atsachlaris 1 روز پیش
والد
کامیت
7f73a1c156

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 0 - 0
requests/files/PCG1.html


+ 12 - 0
requests/service.http

@@ -22,6 +22,18 @@ Content-Type: text/html
 
 
 ###
 ###
 
 
+POST http://127.0.0.1:8080/enhancements/chat/file
+Content-Type: multipart/form-data; boundary=WebAppBoundary
+
+--WebAppBoundary
+Content-Disposition: form-data; name="file"; filename="PCG1.html"
+Content-Type: text/html
+
+< ./files/PCG1.html
+--WebAppBoundary--
+
+###
+
 POST http://127.0.0.1:8080/enhancements/chat
 POST http://127.0.0.1:8080/enhancements/chat
 Content-Type: application/json
 Content-Type: application/json
 
 

+ 2 - 1
src/main/java/es/uv/saic/service/HtmlToCsvExtractor.java → src/main/java/es/uv/saic/extractor/HtmlToCsvExtractor.java

@@ -1,5 +1,6 @@
-package es.uv.saic.service;
+package es.uv.saic.extractor;
 
 
+import es.uv.saic.service.ExtractionRequest;
 import org.apache.commons.csv.CSVFormat;
 import org.apache.commons.csv.CSVFormat;
 import org.apache.commons.csv.CSVPrinter;
 import org.apache.commons.csv.CSVPrinter;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.StringUtils;

+ 3 - 1
src/main/java/es/uv/saic/service/TableExtractor.java → src/main/java/es/uv/saic/extractor/TableExtractor.java

@@ -1,4 +1,6 @@
-package es.uv.saic.service;
+package es.uv.saic.extractor;
+
+import es.uv.saic.service.ExtractionRequest;
 
 
 public interface TableExtractor {
 public interface TableExtractor {
     boolean supports(ExtractionRequest request);
     boolean supports(ExtractionRequest request);

+ 16 - 0
src/main/java/es/uv/saic/extractor/docling/DoclingProperties.java

@@ -0,0 +1,16 @@
+package es.uv.saic.extractor.docling;
+
+import lombok.Getter;
+import lombok.Setter;
+import org.springframework.boot.context.properties.ConfigurationProperties;
+
+@Setter
+@Getter
+@ConfigurationProperties(prefix = "extractor.docling")
+public class DoclingProperties {
+    private boolean enabled = false;
+    private String pythonCommand = "python";
+    private String scriptPath = "scripts/docling_extract.py";
+    private long timeoutMs = 30000;
+
+}

+ 2 - 1
src/main/java/es/uv/saic/service/DoclingPythonClient.java → src/main/java/es/uv/saic/extractor/docling/DoclingPythonClient.java

@@ -1,5 +1,6 @@
-package es.uv.saic.service;
+package es.uv.saic.extractor.docling;
 
 
+import es.uv.saic.service.ExtractionRequest;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.springframework.stereotype.Service;
 import org.springframework.stereotype.Service;
 
 

+ 9 - 9
src/main/java/es/uv/saic/service/DoclingTableExtractor.java → src/main/java/es/uv/saic/extractor/docling/DoclingTableExtractor.java

@@ -1,5 +1,9 @@
-package es.uv.saic.service;
+package es.uv.saic.extractor.docling;
 
 
+import es.uv.saic.service.ExtractionRequest;
+import es.uv.saic.extractor.TableExtractor;
+import groovy.util.logging.Slf4j;
+import lombok.RequiredArgsConstructor;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.jsoup.Jsoup;
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Document;
@@ -7,19 +11,14 @@ import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;
 import org.jsoup.select.Elements;
 import org.springframework.stereotype.Service;
 import org.springframework.stereotype.Service;
 
 
+@lombok.extern.slf4j.Slf4j
 @Service
 @Service
+@RequiredArgsConstructor
+@Slf4j
 public class DoclingTableExtractor implements TableExtractor {
 public class DoclingTableExtractor implements TableExtractor {
     private final DoclingPythonClient doclingPythonClient;
     private final DoclingPythonClient doclingPythonClient;
     private final DoclingProperties doclingProperties;
     private final DoclingProperties doclingProperties;
 
 
-    public DoclingTableExtractor(
-            DoclingPythonClient doclingPythonClient,
-            DoclingProperties doclingProperties
-    ) {
-        this.doclingPythonClient = doclingPythonClient;
-        this.doclingProperties = doclingProperties;
-    }
-
     @Override
     @Override
     public boolean supports(ExtractionRequest request) {
     public boolean supports(ExtractionRequest request) {
         return doclingProperties.isEnabled() && request.hasRawHtml();
         return doclingProperties.isEnabled() && request.hasRawHtml();
@@ -27,6 +26,7 @@ public class DoclingTableExtractor implements TableExtractor {
 
 
     @Override
     @Override
     public String extractTablesToCsv(ExtractionRequest request) {
     public String extractTablesToCsv(ExtractionRequest request) {
+        log.info("Extracting tables to CSV using DoclingPythonClient");
         String filteredHtml = keepOnlyDataExtractionTables(request.rawHtml());
         String filteredHtml = keepOnlyDataExtractionTables(request.rawHtml());
         if (StringUtils.isBlank(filteredHtml)) {
         if (StringUtils.isBlank(filteredHtml)) {
             return "";
             return "";

+ 0 - 43
src/main/java/es/uv/saic/service/DoclingProperties.java

@@ -1,43 +0,0 @@
-package es.uv.saic.service;
-
-import org.springframework.boot.context.properties.ConfigurationProperties;
-
-@ConfigurationProperties(prefix = "extractor.docling")
-public class DoclingProperties {
-    private boolean enabled = false;
-    private String pythonCommand = "python";
-    private String scriptPath = "scripts/docling_extract.py";
-    private long timeoutMs = 30000;
-
-    public boolean isEnabled() {
-        return enabled;
-    }
-
-    public void setEnabled(boolean enabled) {
-        this.enabled = enabled;
-    }
-
-    public String getPythonCommand() {
-        return pythonCommand;
-    }
-
-    public void setPythonCommand(String pythonCommand) {
-        this.pythonCommand = pythonCommand;
-    }
-
-    public String getScriptPath() {
-        return scriptPath;
-    }
-
-    public void setScriptPath(String scriptPath) {
-        this.scriptPath = scriptPath;
-    }
-
-    public long getTimeoutMs() {
-        return timeoutMs;
-    }
-
-    public void setTimeoutMs(long timeoutMs) {
-        this.timeoutMs = timeoutMs;
-    }
-}

+ 4 - 7
src/main/java/es/uv/saic/service/EnhancementService.java

@@ -1,6 +1,8 @@
 package es.uv.saic.service;
 package es.uv.saic.service;
 
 
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import es.uv.saic.extractor.docling.DoclingTableExtractor;
+import es.uv.saic.extractor.HtmlToCsvExtractor;
 import lombok.SneakyThrows;
 import lombok.SneakyThrows;
 import org.springframework.ai.chat.client.ChatClient;
 import org.springframework.ai.chat.client.ChatClient;
 import org.springframework.stereotype.Service;
 import org.springframework.stereotype.Service;
@@ -27,11 +29,6 @@ public class EnhancementService {
     }
     }
 
 
     @SneakyThrows
     @SneakyThrows
-    public String ask(String html) {
-        return askHtml(html);
-    }
-
-    @SneakyThrows
     public String askHtml(String html) {
     public String askHtml(String html) {
         String asCsv = extractForChatEndpoint(ExtractionRequest.fromHtml(html));
         String asCsv = extractForChatEndpoint(ExtractionRequest.fromHtml(html));
 
 
@@ -44,14 +41,14 @@ public class EnhancementService {
         return askLlm(asCsv);
         return askLlm(asCsv);
     }
     }
 
 
-    String extractForChatEndpoint(ExtractionRequest request) {
+    public String extractForChatEndpoint(ExtractionRequest request) {
         if (doclingTableExtractor.supports(request)) {
         if (doclingTableExtractor.supports(request)) {
             return doclingTableExtractor.extractTablesToCsv(request);
             return doclingTableExtractor.extractTablesToCsv(request);
         }
         }
         return htmlToCsvExtractor.extractTablesToCsv(request);
         return htmlToCsvExtractor.extractTablesToCsv(request);
     }
     }
 
 
-    String extractForFileEndpoint(ExtractionRequest request) {
+    public String extractForFileEndpoint(ExtractionRequest request) {
         String htmlFallback = request.hasFile() ? new String(request.fileBytes(), StandardCharsets.UTF_8) : "";
         String htmlFallback = request.hasFile() ? new String(request.fileBytes(), StandardCharsets.UTF_8) : "";
         return extractForChatEndpoint(ExtractionRequest.fromHtml(htmlFallback));
         return extractForChatEndpoint(ExtractionRequest.fromHtml(htmlFallback));
     }
     }

+ 0 - 5
src/main/java/es/uv/saic/web/EnhancementController.java

@@ -21,11 +21,6 @@ public class EnhancementController {
 
 
     private final EnhancementService enhancementService;
     private final EnhancementService enhancementService;
 
 
-    @GetMapping("hello")
-    public String helloWord() {
-        return "Hello World";
-    }
-
     @PostMapping("chat")
     @PostMapping("chat")
     public String chat(@RequestBody String html) {
     public String chat(@RequestBody String html) {
         return enhancementService.askHtml(html);
         return enhancementService.askHtml(html);

+ 4 - 0
src/main/resources/application-local.properties

@@ -6,3 +6,7 @@ spring.datasource.password=docent1ia2.l6
 spring.ai.openai.base-url=http://tyrion.uv.es:8090
 spring.ai.openai.base-url=http://tyrion.uv.es:8090
 spring.ai.openai.api-key=hhOQ6QBqHKtOO9MKAUhIyU9auBkgIF40QJKa24jWJzdtxvdXMLi10xUAWMsdpFP0
 spring.ai.openai.api-key=hhOQ6QBqHKtOO9MKAUhIyU9auBkgIF40QJKa24jWJzdtxvdXMLi10xUAWMsdpFP0
 spring.ai.openai.chat.options.model=/media/nas/peerobs_sync/shared/2025-ReviewSim/models/Qwen2.5-7B-Instruct-AWQ
 spring.ai.openai.chat.options.model=/media/nas/peerobs_sync/shared/2025-ReviewSim/models/Qwen2.5-7B-Instruct-AWQ
+
+#Docling extractor
+extractor.docling.enabled=true
+extractor.docling.python-command=C:/Users/arist/AppData/Local/Python/bin/python.exe

+ 0 - 2
src/main/resources/application.properties

@@ -22,7 +22,5 @@ spring.datasource.username=postgres
 spring.sql.init.platform=postgres
 spring.sql.init.platform=postgres
 
 
 # Docling extractor
 # Docling extractor
-extractor.docling.enabled=true
-extractor.docling.python-command=C:/Users/arist/AppData/Local/Python/bin/python.exe
 extractor.docling.script-path=scripts/docling_extract.py
 extractor.docling.script-path=scripts/docling_extract.py
 extractor.docling.timeout-ms=30000
 extractor.docling.timeout-ms=30000

+ 2 - 0
src/test/java/es/uv/saic/service/DoclingPythonClientTest.java

@@ -1,5 +1,7 @@
 package es.uv.saic.service;
 package es.uv.saic.service;
 
 
+import es.uv.saic.extractor.docling.DoclingProperties;
+import es.uv.saic.extractor.docling.DoclingPythonClient;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Test;
 
 
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertThrows;

+ 3 - 0
src/test/java/es/uv/saic/service/DoclingTableExtractorTest.java

@@ -1,5 +1,8 @@
 package es.uv.saic.service;
 package es.uv.saic.service;
 
 
+import es.uv.saic.extractor.docling.DoclingProperties;
+import es.uv.saic.extractor.docling.DoclingPythonClient;
+import es.uv.saic.extractor.docling.DoclingTableExtractor;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Test;
 
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertEquals;

+ 2 - 0
src/test/java/es/uv/saic/service/EnhancementServiceRoutingTest.java

@@ -1,5 +1,7 @@
 package es.uv.saic.service;
 package es.uv.saic.service;
 
 
+import es.uv.saic.extractor.docling.DoclingTableExtractor;
+import es.uv.saic.extractor.HtmlToCsvExtractor;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Test;
 import org.springframework.ai.chat.client.ChatClient;
 import org.springframework.ai.chat.client.ChatClient;

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است