|
|
@@ -7,9 +7,12 @@ import org.apache.commons.lang3.StringUtils;
|
|
|
import org.jsoup.Jsoup;
|
|
|
import org.jsoup.nodes.Document;
|
|
|
import org.jsoup.nodes.Element;
|
|
|
+import org.jsoup.select.Elements;
|
|
|
import org.springframework.ai.chat.client.ChatClient;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
|
|
|
+import static es.uv.saic.service.SystemPrompt.SYSTEM_INSTRUCTIONS;
|
|
|
+
|
|
|
@Service
|
|
|
public class EnhancementService {
|
|
|
|
|
|
@@ -23,28 +26,43 @@ public class EnhancementService {
|
|
|
@SneakyThrows
|
|
|
public String ask(String message) {
|
|
|
return chatClient.prompt()
|
|
|
+ .system(SYSTEM_INSTRUCTIONS)
|
|
|
.user(normalizeMessage(message))
|
|
|
.call()
|
|
|
.content();
|
|
|
}
|
|
|
|
|
|
private String normalizeMessage(String message) throws JsonProcessingException {
|
|
|
- String cleanHtml = removeHtmlAttributes(message);
|
|
|
+ String cleanHtml = extractStructuredTables(message);
|
|
|
|
|
|
return objectMapper.writeValueAsString(cleanHtml);
|
|
|
}
|
|
|
|
|
|
- public String removeHtmlAttributes(String html) {
|
|
|
+ public String extractStructuredTables(String html) {
|
|
|
if (StringUtils.isEmpty(html)) {
|
|
|
return "";
|
|
|
}
|
|
|
|
|
|
Document doc = Jsoup.parse(html);
|
|
|
|
|
|
- for (Element el : doc.getAllElements()) {
|
|
|
- el.clearAttributes();
|
|
|
+ Elements tables = doc.select("table[data-extraction]");
|
|
|
+
|
|
|
+ Document cleanDoc = Document.createShell("");
|
|
|
+
|
|
|
+ for (Element table : tables) {
|
|
|
+ Element cleanTable = table.clone();
|
|
|
+
|
|
|
+ cleanTable.select("strong, span, p").unwrap();
|
|
|
+
|
|
|
+ for (Element el : cleanTable.getAllElements()) {
|
|
|
+ el.clearAttributes();
|
|
|
+ }
|
|
|
+
|
|
|
+ cleanDoc.body().appendChild(cleanTable);
|
|
|
}
|
|
|
|
|
|
- return doc.body().html();
|
|
|
+ return cleanDoc.body().html()
|
|
|
+ .replaceAll(">\\s+<", "><") // remove spaces between tags
|
|
|
+ .trim();
|
|
|
}
|
|
|
}
|