|
|
@@ -19,8 +19,8 @@ import java.util.regex.Pattern;
|
|
|
|
|
|
@Service
|
|
|
public class HtmlToCsvExtractor {
|
|
|
- private static final Pattern NUMERIC_ITEM_PREFIX = Pattern.compile("^\\s*(\\d+)\\s*[-.)]\\s*(.*)$");
|
|
|
- private static final Pattern LETTER_ITEM_PREFIX = Pattern.compile("^\\s*([a-zA-Z])\\s*[-.)]\\s*(.*)$");
|
|
|
+ private static final Pattern NUMERIC_CODE_PREFIX = Pattern.compile("^\\s*(\\d+)\\s*[-.)]\\s*(.*)$");
|
|
|
+ private static final Pattern LETTER_CODE_PREFIX = Pattern.compile("^\\s*([a-zA-Z])\\s*[-.)]\\s*(.*)$");
|
|
|
private static final List<String> CSV_HEADER = List.of(
|
|
|
"Código", "Ítem", "Puntuación"
|
|
|
);
|
|
|
@@ -79,20 +79,20 @@ public class HtmlToCsvExtractor {
|
|
|
.toList();
|
|
|
if (!cells.isEmpty()) {
|
|
|
try {
|
|
|
- if (rowIndex[0] == 0) {
|
|
|
+ if (isHeader(rowIndex)) {
|
|
|
printer.printRecord(CSV_HEADER);
|
|
|
rowIndex[0]++;
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- ParsedFirstColumn parsed = parseFirstColumn(cells.getFirst(), lastNumericItem[0]);
|
|
|
- if (StringUtils.isNotBlank(parsed.numericItem())) {
|
|
|
- lastNumericItem[0] = parsed.numericItem();
|
|
|
+ ParsedCodeAndItem parsed = extractCodeAndItem(cells.getFirst(), lastNumericItem[0]);
|
|
|
+ if (StringUtils.isNotBlank(parsed.numberedCode())) {
|
|
|
+ lastNumericItem[0] = parsed.numberedCode();
|
|
|
}
|
|
|
|
|
|
List<String> rowWithItem = new ArrayList<>();
|
|
|
+ rowWithItem.add(parsed.code());
|
|
|
rowWithItem.add(parsed.item());
|
|
|
- rowWithItem.add(parsed.description());
|
|
|
rowWithItem.add(cells.size() > 1 ? cells.get(1) : "");
|
|
|
printer.printRecord(rowWithItem);
|
|
|
} catch (IOException e) {
|
|
|
@@ -108,30 +108,38 @@ public class HtmlToCsvExtractor {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- private ParsedFirstColumn parseFirstColumn(String value, String lastNumericItem) {
|
|
|
- Matcher numericMatcher = NUMERIC_ITEM_PREFIX.matcher(value);
|
|
|
+ private static boolean isHeader(int[] rowIndex) {
|
|
|
+ return rowIndex[0] == 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Extracts the code code from the fist column.
|
|
|
+ * For example, "4- Los contenidos impartidos..." will be split to "4" and "Los contenidos impartidos.."
|
|
|
+ */
|
|
|
+ private ParsedCodeAndItem extractCodeAndItem(String value, String lastNumericItem) {
|
|
|
+ Matcher numericMatcher = NUMERIC_CODE_PREFIX.matcher(value);
|
|
|
if (numericMatcher.matches()) {
|
|
|
- return new ParsedFirstColumn(
|
|
|
+ return new ParsedCodeAndItem(
|
|
|
numericMatcher.group(1),
|
|
|
numericMatcher.group(1),
|
|
|
StringUtils.defaultString(numericMatcher.group(2)).trim()
|
|
|
);
|
|
|
}
|
|
|
|
|
|
- Matcher letterMatcher = LETTER_ITEM_PREFIX.matcher(value);
|
|
|
+ Matcher letterMatcher = LETTER_CODE_PREFIX.matcher(value);
|
|
|
if (letterMatcher.matches()) {
|
|
|
String letter = letterMatcher.group(1).toLowerCase();
|
|
|
String item = StringUtils.isNotBlank(lastNumericItem) ? lastNumericItem + letter : letter;
|
|
|
- return new ParsedFirstColumn(
|
|
|
+ return new ParsedCodeAndItem(
|
|
|
item,
|
|
|
null,
|
|
|
StringUtils.defaultString(letterMatcher.group(2)).trim()
|
|
|
);
|
|
|
}
|
|
|
|
|
|
- return new ParsedFirstColumn("", null, value);
|
|
|
+ return new ParsedCodeAndItem("", null, value);
|
|
|
}
|
|
|
|
|
|
- private record ParsedFirstColumn(String item, String numericItem, String description) {
|
|
|
+ private record ParsedCodeAndItem(String code, String numberedCode, String item) {
|
|
|
}
|
|
|
}
|