Selaa lähdekoodia

Remove empty spaces and comments

atsachlaris 6 päivää sitten
vanhempi
commit
2b4948282b
2 muutettua tiedostoa jossa 10 lisäystä ja 17 poistoa
  1. 1 1
      requests/service.http
  2. 9 16
      src/main/java/es/uv/saic/service/EnhancementService.java

+ 1 - 1
requests/service.http

@@ -241,7 +241,7 @@ Content-Type: application/json
   </tbody>
   </table>
   <p><br data-mce-bogus="1"></p>
-  <table data-extraction class="mceEditable"
+  <table class="mceEditable"
   style="border-collapse: collapse; width: 297mm; border-width: 1px; border-spacing: 0px; border-color: rgb(149, 165, 166); margin-left: 0px; margin-right: auto;"
   border="1" width="680" cellspacing="0" cellpadding="8"
   data-mce-style="border-collapse: collapse; width: 297mm; border-width: 1px; border-spacing: 0px; border-color: rgb(149, 165, 166); margin-left: 0px; margin-right: auto;"

+ 9 - 16
src/main/java/es/uv/saic/service/EnhancementService.java

@@ -45,8 +45,6 @@ public class EnhancementService {
 
         Document doc = Jsoup.parse(html);
 
-        removeCodeComments(doc);
-
         Elements tables = doc.select("table[data-extraction]");
 
         Document cleanDoc = Document.createShell("");
@@ -59,23 +57,18 @@ public class EnhancementService {
 
             cleanTable.select("*").forEach(Element::clearAttributes);
 
-            normalizeEmptySpaces(cleanTable);
-
             cleanDoc.body().appendChild(cleanTable);
         }
 
-        return cleanDoc.body().html().trim();
+        return cleanDoc.body().html()
+                .replace("\n", "")
+                .replace("\r", "")
+                .replace("\t", "")
+                .replace("&nbsp;", "")
+                .replaceAll(">\\s+<", "><")
+                .replaceAll("<!--.*?-->", "")
+                .replaceAll("\\s{2,}", " ") // collapse multiple spaces
+                .trim();
     }
 
-    private static void normalizeEmptySpaces(Element cleanTable) {
-        cleanTable.textNodes().forEach(t ->
-                t.text(t.text().replace("\u00A0", " "))
-        );
-    }
-
-    private static void removeCodeComments(Document doc) {
-        doc.select("*").forEach(node ->
-                node.childNodes().removeIf(n -> n.nodeName().equals("#comment"))
-        );
-    }
 }