From 14d4f5b109fa65d777ab147b3ce9b5174d020a5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=A1clav=20Van=C4=8Dura?= Date: Wed, 21 May 2025 02:47:55 +0200 Subject: [PATCH] fix(integration): update the Apify Actor integration (#1619) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(actor): remove references to missing docling_processor.py Signed-off-by: Václav Vančura * chore(actor): update Actor README.md with recent repo URL changes Signed-off-by: Václav Vančura * chore(actor): improve the Actor README.md local header link Signed-off-by: Václav Vančura * chore(actor): bump the Actor version number Signed-off-by: Václav Vančura * Update .actor/actor.json Co-authored-by: Marek Trunkát Signed-off-by: Jan Čurn --------- Signed-off-by: Václav Vančura Signed-off-by: Jan Čurn Co-authored-by: Jan Čurn Co-authored-by: Marek Trunkát --- .actor/Dockerfile | 1 - .actor/README.md | 6 +++--- .actor/actor.json | 4 ++-- .actor/actor.sh | 11 ----------- 4 files changed, 5 insertions(+), 17 deletions(-) diff --git a/.actor/Dockerfile b/.actor/Dockerfile index 9c7270df..60f77e72 100644 --- a/.actor/Dockerfile +++ b/.actor/Dockerfile @@ -64,7 +64,6 @@ ENV EASYOCR_MODULE_PATH=/tmp/easyocr-models COPY --chown=1000:1000 .actor/actor.sh .actor/actor.sh COPY --chown=1000:1000 .actor/actor.json .actor/actor.json COPY --chown=1000:1000 .actor/input_schema.json .actor/input_schema.json -COPY --chown=1000:1000 .actor/docling_processor.py .actor/docling_processor.py RUN chmod +x .actor/actor.sh # Copy the build files from builder diff --git a/.actor/README.md b/.actor/README.md index a43181f3..1b3d4666 100644 --- a/.actor/README.md +++ b/.actor/README.md @@ -2,7 +2,7 @@ [![Docling Actor](https://apify.com/actor-badge?actor=vancura/docling?fpr=docling)](https://apify.com/vancura/docling) -This Actor (specification v1) wraps the [Docling project](https://ds4sd.github.io/docling/) to provide serverless document processing in the cloud. It can process complex documents (PDF, DOCX, images) and convert them into structured formats (Markdown, JSON, HTML, Text, or DocTags) with optional OCR support. +This Actor (specification v1) wraps the [Docling project](https://github.com/docling-project/docling) to provide serverless document processing in the cloud. It can process complex documents (PDF, DOCX, images) and convert them into structured formats (Markdown, JSON, HTML, Text, or DocTags) with optional OCR support. ## What are Actors? @@ -14,7 +14,7 @@ This Actor (specification v1) wraps the [Docling project](https://ds4sd.github.i 2. [Usage](#usage) 3. [Input Parameters](#input-parameters) 4. [Output](#output) -5. [Performance & Resources](#performance--resources) +5. [Performance and Resources](#performance-and-resources) 6. [Troubleshooting](#troubleshooting) 7. [Local Development](#local-development) 8. [Architecture](#architecture) @@ -190,7 +190,7 @@ Access logs via: apify key-value-stores get-record DOCLING_LOG ``` -## Performance & Resources +## Performance and Resources - **Docker Image Size**: ~4GB - **Memory Requirements**: diff --git a/.actor/actor.json b/.actor/actor.json index e9bf59be..2b2741a7 100644 --- a/.actor/actor.json +++ b/.actor/actor.json @@ -1,10 +1,10 @@ { "actorSpecification": 1, "name": "docling", - "version": "0.0", + "version": "1.0", "environmentVariables": {}, "dockerFile": "./Dockerfile", - "input": "./input_schema.json", + "inputSchema": "./input_schema.json", "scripts": { "run": "./actor.sh" } diff --git a/.actor/actor.sh b/.actor/actor.sh index cbbcf2b8..1498bb9c 100755 --- a/.actor/actor.sh +++ b/.actor/actor.sh @@ -154,17 +154,6 @@ else echo "Warning: No build files directory found. Some tools may be unavailable." fi -# Copy Python processor script to tools directory -PYTHON_SCRIPT_PATH="$(dirname "$0")/docling_processor.py" -if [ -f "$PYTHON_SCRIPT_PATH" ]; then - echo "Copying Python processor script to tools directory..." - cp "$PYTHON_SCRIPT_PATH" "$TOOLS_DIR/" - chmod +x "$TOOLS_DIR/docling_processor.py" -else - echo "ERROR: Python processor script not found at $PYTHON_SCRIPT_PATH" - exit 1 -fi - # Check OCR directories and ensure they're writable echo "Checking OCR directory permissions..." OCR_DIR="/opt/app-root/src/.EasyOCR"