atsachlaris 8 ساعت پیش
والد
کامیت
1dd3466bb1
1فایلهای تغییر یافته به همراه22 افزوده شده و 14 حذف شده
  1. 22 14
      src/main/java/es/uv/saic/service/HtmlToCsvExtractor.java

+ 22 - 14
src/main/java/es/uv/saic/service/HtmlToCsvExtractor.java

@@ -19,8 +19,8 @@ import java.util.regex.Pattern;
 
 @Service
 public class HtmlToCsvExtractor {
-    private static final Pattern NUMERIC_ITEM_PREFIX = Pattern.compile("^\\s*(\\d+)\\s*[-.)]\\s*(.*)$");
-    private static final Pattern LETTER_ITEM_PREFIX = Pattern.compile("^\\s*([a-zA-Z])\\s*[-.)]\\s*(.*)$");
+    private static final Pattern NUMERIC_CODE_PREFIX = Pattern.compile("^\\s*(\\d+)\\s*[-.)]\\s*(.*)$");
+    private static final Pattern LETTER_CODE_PREFIX = Pattern.compile("^\\s*([a-zA-Z])\\s*[-.)]\\s*(.*)$");
     private static final List<String> CSV_HEADER = List.of(
             "Código", "Ítem", "Puntuación"
     );
@@ -79,20 +79,20 @@ public class HtmlToCsvExtractor {
                         .toList();
                 if (!cells.isEmpty()) {
                     try {
-                        if (rowIndex[0] == 0) {
+                        if (isHeader(rowIndex)) {
                             printer.printRecord(CSV_HEADER);
                             rowIndex[0]++;
                             return;
                         }
 
-                        ParsedFirstColumn parsed = parseFirstColumn(cells.getFirst(), lastNumericItem[0]);
-                        if (StringUtils.isNotBlank(parsed.numericItem())) {
-                            lastNumericItem[0] = parsed.numericItem();
+                        ParsedCodeAndItem parsed = extractCodeAndItem(cells.getFirst(), lastNumericItem[0]);
+                        if (StringUtils.isNotBlank(parsed.numberedCode())) {
+                            lastNumericItem[0] = parsed.numberedCode();
                         }
 
                         List<String> rowWithItem = new ArrayList<>();
+                        rowWithItem.add(parsed.code());
                         rowWithItem.add(parsed.item());
-                        rowWithItem.add(parsed.description());
                         rowWithItem.add(cells.size() > 1 ? cells.get(1) : "");
                         printer.printRecord(rowWithItem);
                     } catch (IOException e) {
@@ -108,30 +108,38 @@ public class HtmlToCsvExtractor {
         }
     }
 
-    private ParsedFirstColumn parseFirstColumn(String value, String lastNumericItem) {
-        Matcher numericMatcher = NUMERIC_ITEM_PREFIX.matcher(value);
+    private static boolean isHeader(int[] rowIndex) {
+        return rowIndex[0] == 0;
+    }
+
+    /**
+     * Extracts the code code from the fist column.
+     * For example, "4- Los contenidos impartidos..." will be split to "4" and  "Los contenidos impartidos.."
+     */
+    private ParsedCodeAndItem extractCodeAndItem(String value, String lastNumericItem) {
+        Matcher numericMatcher = NUMERIC_CODE_PREFIX.matcher(value);
         if (numericMatcher.matches()) {
-            return new ParsedFirstColumn(
+            return new ParsedCodeAndItem(
                     numericMatcher.group(1),
                     numericMatcher.group(1),
                     StringUtils.defaultString(numericMatcher.group(2)).trim()
             );
         }
 
-        Matcher letterMatcher = LETTER_ITEM_PREFIX.matcher(value);
+        Matcher letterMatcher = LETTER_CODE_PREFIX.matcher(value);
         if (letterMatcher.matches()) {
             String letter = letterMatcher.group(1).toLowerCase();
             String item = StringUtils.isNotBlank(lastNumericItem) ? lastNumericItem + letter : letter;
-            return new ParsedFirstColumn(
+            return new ParsedCodeAndItem(
                     item,
                     null,
                     StringUtils.defaultString(letterMatcher.group(2)).trim()
             );
         }
 
-        return new ParsedFirstColumn("", null, value);
+        return new ParsedCodeAndItem("", null, value);
     }
 
-    private record ParsedFirstColumn(String item, String numericItem, String description) {
+    private record ParsedCodeAndItem(String code, String numberedCode, String item) {
     }
 }