|
@@ -11,6 +11,8 @@ import org.jsoup.select.Elements;
|
|
|
import org.springframework.ai.chat.client.ChatClient;
|
|
import org.springframework.ai.chat.client.ChatClient;
|
|
|
import org.springframework.stereotype.Service;
|
|
import org.springframework.stereotype.Service;
|
|
|
|
|
|
|
|
|
|
+import java.util.Set;
|
|
|
|
|
+
|
|
|
import static es.uv.saic.service.SystemPrompt.SYSTEM_INSTRUCTIONS;
|
|
import static es.uv.saic.service.SystemPrompt.SYSTEM_INSTRUCTIONS;
|
|
|
|
|
|
|
|
@Service
|
|
@Service
|
|
@@ -18,6 +20,9 @@ public class EnhancementService {
|
|
|
|
|
|
|
|
private final ChatClient chatClient;
|
|
private final ChatClient chatClient;
|
|
|
private final ObjectMapper objectMapper = new ObjectMapper();
|
|
private final ObjectMapper objectMapper = new ObjectMapper();
|
|
|
|
|
+ private final Set<String> allowedTags = Set.of(
|
|
|
|
|
+ "table", "thead", "tbody", "tr", "td", "th"
|
|
|
|
|
+ );
|
|
|
|
|
|
|
|
public EnhancementService(ChatClient.Builder chatClientBuilder) {
|
|
public EnhancementService(ChatClient.Builder chatClientBuilder) {
|
|
|
this.chatClient = chatClientBuilder.build();
|
|
this.chatClient = chatClientBuilder.build();
|
|
@@ -50,11 +55,18 @@ public class EnhancementService {
|
|
|
Document cleanDoc = Document.createShell("");
|
|
Document cleanDoc = Document.createShell("");
|
|
|
cleanDoc.outputSettings().prettyPrint(false);
|
|
cleanDoc.outputSettings().prettyPrint(false);
|
|
|
|
|
|
|
|
|
|
+
|
|
|
for (Element table : tables) {
|
|
for (Element table : tables) {
|
|
|
Element cleanTable = table.clone();
|
|
Element cleanTable = table.clone();
|
|
|
|
|
|
|
|
- cleanTable.select("strong, span, p").unwrap();
|
|
|
|
|
|
|
+ //keep only allowed tags
|
|
|
|
|
+ cleanTable.select("*").forEach(el -> {
|
|
|
|
|
+ if (!allowedTags.contains(el.tagName())) {
|
|
|
|
|
+ el.unwrap(); // remove tag but keep content
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
|
|
|
|
|
+ // Remove all attributes
|
|
|
cleanTable.select("*").forEach(Element::clearAttributes);
|
|
cleanTable.select("*").forEach(Element::clearAttributes);
|
|
|
|
|
|
|
|
cleanDoc.body().appendChild(cleanTable);
|
|
cleanDoc.body().appendChild(cleanTable);
|
|
@@ -65,8 +77,8 @@ public class EnhancementService {
|
|
|
.replace("\r", "")
|
|
.replace("\r", "")
|
|
|
.replace("\t", "")
|
|
.replace("\t", "")
|
|
|
.replace(" ", "")
|
|
.replace(" ", "")
|
|
|
- .replaceAll(">\\s+<", "><")
|
|
|
|
|
- .replaceAll("<!--.*?-->", "")
|
|
|
|
|
|
|
+ .replaceAll(">\\s+<", "><") // remove whitespace between tags
|
|
|
|
|
+ .replaceAll("<!--.*?-->", "") // remove comments
|
|
|
.replaceAll("\\s{2,}", " ") // collapse multiple spaces
|
|
.replaceAll("\\s{2,}", " ") // collapse multiple spaces
|
|
|
.trim();
|
|
.trim();
|
|
|
}
|
|
}
|