|
@@ -6,7 +6,6 @@ import org.springframework.stereotype.Service;
|
|
|
import java.io.IOException;
|
|
import java.io.IOException;
|
|
|
import java.io.InputStream;
|
|
import java.io.InputStream;
|
|
|
import java.nio.charset.StandardCharsets;
|
|
import java.nio.charset.StandardCharsets;
|
|
|
-import java.nio.file.Files;
|
|
|
|
|
import java.nio.file.Path;
|
|
import java.nio.file.Path;
|
|
|
import java.util.ArrayList;
|
|
import java.util.ArrayList;
|
|
|
import java.util.List;
|
|
import java.util.List;
|
|
@@ -27,30 +26,19 @@ public class DoclingPythonClient {
|
|
|
throw new IllegalStateException("Docling extractor is disabled");
|
|
throw new IllegalStateException("Docling extractor is disabled");
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- Path tempFile = null;
|
|
|
|
|
try {
|
|
try {
|
|
|
List<String> command = new ArrayList<>();
|
|
List<String> command = new ArrayList<>();
|
|
|
command.add(properties.getPythonCommand());
|
|
command.add(properties.getPythonCommand());
|
|
|
command.add(Path.of(properties.getScriptPath()).toAbsolutePath().toString());
|
|
command.add(Path.of(properties.getScriptPath()).toAbsolutePath().toString());
|
|
|
- command.add("--output");
|
|
|
|
|
- command.add("csv");
|
|
|
|
|
-
|
|
|
|
|
- Process process;
|
|
|
|
|
- if (request.hasFile()) {
|
|
|
|
|
- tempFile = createTempInputFile(request);
|
|
|
|
|
- command.add("--input-file");
|
|
|
|
|
- command.add(tempFile.toString());
|
|
|
|
|
- process = new ProcessBuilder(command).start();
|
|
|
|
|
- } else if (request.hasRawHtml()) {
|
|
|
|
|
- command.add("--stdin");
|
|
|
|
|
- process = new ProcessBuilder(command).start();
|
|
|
|
|
- process.getOutputStream().write(request.rawHtml().getBytes(StandardCharsets.UTF_8));
|
|
|
|
|
- process.getOutputStream().flush();
|
|
|
|
|
- process.getOutputStream().close();
|
|
|
|
|
- } else {
|
|
|
|
|
- throw new IllegalArgumentException("Unsupported extraction request: no input provided");
|
|
|
|
|
|
|
+ if (!request.hasRawHtml()) {
|
|
|
|
|
+ throw new IllegalArgumentException("Unsupported extraction request: no HTML payload provided");
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ Process process = new ProcessBuilder(command).start();
|
|
|
|
|
+ process.getOutputStream().write(request.rawHtml().getBytes(StandardCharsets.UTF_8));
|
|
|
|
|
+ process.getOutputStream().flush();
|
|
|
|
|
+ process.getOutputStream().close();
|
|
|
|
|
+
|
|
|
CompletableFuture<String> stdoutFuture = CompletableFuture.supplyAsync(() -> readAsString(process.getInputStream()));
|
|
CompletableFuture<String> stdoutFuture = CompletableFuture.supplyAsync(() -> readAsString(process.getInputStream()));
|
|
|
CompletableFuture<String> stderrFuture = CompletableFuture.supplyAsync(() -> readAsString(process.getErrorStream()));
|
|
CompletableFuture<String> stderrFuture = CompletableFuture.supplyAsync(() -> readAsString(process.getErrorStream()));
|
|
|
|
|
|
|
@@ -74,14 +62,6 @@ public class DoclingPythonClient {
|
|
|
throw new IllegalStateException("Docling extraction interrupted", e);
|
|
throw new IllegalStateException("Docling extraction interrupted", e);
|
|
|
} catch (ExecutionException e) {
|
|
} catch (ExecutionException e) {
|
|
|
throw new IllegalStateException("Unable to read Docling extraction output", e);
|
|
throw new IllegalStateException("Unable to read Docling extraction output", e);
|
|
|
- } finally {
|
|
|
|
|
- if (tempFile != null) {
|
|
|
|
|
- try {
|
|
|
|
|
- Files.deleteIfExists(tempFile);
|
|
|
|
|
- } catch (IOException ignored) {
|
|
|
|
|
- // Best effort cleanup.
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -93,12 +73,4 @@ public class DoclingPythonClient {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- private static Path createTempInputFile(ExtractionRequest request) throws IOException {
|
|
|
|
|
- String suffix = request.fileName() != null && request.fileName().contains(".")
|
|
|
|
|
- ? request.fileName().substring(request.fileName().lastIndexOf('.'))
|
|
|
|
|
- : ".bin";
|
|
|
|
|
- Path tempPath = Files.createTempFile("docling-input-", suffix);
|
|
|
|
|
- Files.write(tempPath, request.fileBytes());
|
|
|
|
|
- return tempPath;
|
|
|
|
|
- }
|
|
|
|
|
}
|
|
}
|