mirror of
https://github.com/docling-project/docling.git
synced 2025-06-27 05:20:05 +00:00
feat: simplify dependencies, switch to uv (#1700)
* refactor with uv Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * constraints for onnxruntime Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * more constraints Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
61d0d6c755
commit
cdd401847a
19
.github/actions/setup-poetry/action.yml
vendored
19
.github/actions/setup-poetry/action.yml
vendored
@ -1,19 +0,0 @@
|
||||
name: 'Set up Poetry and install'
|
||||
description: 'Set up a specific version of Poetry and install dependencies using caching.'
|
||||
inputs:
|
||||
python-version:
|
||||
description: "Version range or exact version of Python or PyPy to use, using SemVer's version range syntax."
|
||||
default: '3.11'
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
- name: Install poetry
|
||||
run: pipx install poetry==1.8.5
|
||||
shell: bash
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
cache: 'poetry'
|
||||
- name: Install dependencies
|
||||
run: poetry install --all-extras
|
||||
shell: bash
|
7
.github/scripts/release.sh
vendored
7
.github/scripts/release.sh
vendored
@ -10,11 +10,12 @@ fi
|
||||
CHGLOG_FILE="${CHGLOG_FILE:-CHANGELOG.md}"
|
||||
|
||||
# update package version
|
||||
poetry version "${TARGET_VERSION}"
|
||||
uvx --from=toml-cli toml set --toml-path=pyproject.toml project.version "${TARGET_VERSION}"
|
||||
UV_FROZEN=0 uv lock --upgrade-package docling
|
||||
|
||||
# collect release notes
|
||||
REL_NOTES=$(mktemp)
|
||||
poetry run semantic-release changelog --unreleased >> "${REL_NOTES}"
|
||||
uv run --no-sync semantic-release changelog --unreleased >> "${REL_NOTES}"
|
||||
|
||||
# update changelog
|
||||
TMP_CHGLOG=$(mktemp)
|
||||
@ -30,7 +31,7 @@ mv "${TMP_CHGLOG}" "${CHGLOG_FILE}"
|
||||
# push changes
|
||||
git config --global user.name 'github-actions[bot]'
|
||||
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
|
||||
git add pyproject.toml "${CHGLOG_FILE}"
|
||||
git add pyproject.toml uv.lock "${CHGLOG_FILE}"
|
||||
COMMIT_MSG="chore: bump version to ${TARGET_VERSION} [skip ci]"
|
||||
git commit -m "${COMMIT_MSG}"
|
||||
git push origin main
|
||||
|
27
.github/workflows/cd.yml
vendored
27
.github/workflows/cd.yml
vendored
@ -4,9 +4,8 @@ on:
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
# disable keyring (https://github.com/actions/runner-images/issues/6185):
|
||||
PYTHON_KEYRING_BACKEND: keyring.backends.null.Keyring
|
||||
|
||||
UV_FROZEN: "1"
|
||||
|
||||
jobs:
|
||||
code-checks:
|
||||
uses: ./.github/workflows/checks.yml
|
||||
@ -20,15 +19,20 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # for fetching tags, required for semantic-release
|
||||
- uses: ./.github/actions/setup-poetry
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
enable-cache: true
|
||||
- name: Install dependencies
|
||||
run: uv sync --only-dev
|
||||
- name: Check version of potential release
|
||||
id: version_check
|
||||
run: |
|
||||
TRGT_VERSION=$(poetry run semantic-release print-version)
|
||||
echo "TRGT_VERSION=${TRGT_VERSION}" >> $GITHUB_OUTPUT
|
||||
echo "${TRGT_VERSION}"
|
||||
TRGT_VERSION=$(uv run --no-sync semantic-release print-version)
|
||||
echo "TRGT_VERSION=${TRGT_VERSION}" >> "$GITHUB_OUTPUT"
|
||||
echo "${TRGT_VERSION}"
|
||||
- name: Check notes of potential release
|
||||
run: poetry run semantic-release changelog --unreleased
|
||||
run: uv run --no-sync semantic-release changelog --unreleased
|
||||
release:
|
||||
needs: [code-checks, pre-release-check]
|
||||
if: needs.pre-release-check.outputs.TARGET_TAG_V != ''
|
||||
@ -45,7 +49,12 @@ jobs:
|
||||
with:
|
||||
token: ${{ steps.app-token.outputs.token }}
|
||||
fetch-depth: 0 # for fetching tags, required for semantic-release
|
||||
- uses: ./.github/actions/setup-poetry
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
enable-cache: true
|
||||
- name: Install dependencies
|
||||
run: uv sync --only-dev
|
||||
- name: Run release script
|
||||
env:
|
||||
GH_TOKEN: ${{ steps.app-token.outputs.token }}
|
||||
|
72
.github/workflows/checks.yml
vendored
72
.github/workflows/checks.yml
vendored
@ -12,6 +12,7 @@ on:
|
||||
env:
|
||||
HF_HUB_DOWNLOAD_TIMEOUT: "60"
|
||||
HF_HUB_ETAG_TIMEOUT: "60"
|
||||
UV_FROZEN: "1"
|
||||
|
||||
jobs:
|
||||
run-checks:
|
||||
@ -31,16 +32,24 @@ jobs:
|
||||
with:
|
||||
path: ~/.cache/huggingface
|
||||
key: huggingface-cache-py${{ matrix.python-version }}
|
||||
- uses: ./.github/actions/setup-poetry
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Run styling check
|
||||
run: poetry run pre-commit run --all-files
|
||||
- name: Install with poetry
|
||||
run: poetry install --all-extras
|
||||
enable-cache: true
|
||||
- name: pre-commit cache key
|
||||
run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> "$GITHUB_ENV"
|
||||
- uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pre-commit
|
||||
key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }}
|
||||
- name: Install dependencies
|
||||
run: uv sync --frozen --all-extras
|
||||
- name: Check style and run tests
|
||||
run: pre-commit run --all-files
|
||||
- name: Testing
|
||||
run: |
|
||||
poetry run pytest -v --cov=docling --cov-report=xml tests
|
||||
uv run --no-sync pytest -v --cov=docling --cov-report=xml tests
|
||||
- name: Upload coverage to Codecov
|
||||
if: inputs.push_coverage
|
||||
uses: codecov/codecov-action@v5
|
||||
@ -57,7 +66,52 @@ jobs:
|
||||
fi
|
||||
|
||||
echo "Running example $file"
|
||||
poetry run python "$file" || exit 1
|
||||
uv run --no-sync python "$file" || exit 1
|
||||
done
|
||||
- name: Build with poetry
|
||||
run: poetry build
|
||||
|
||||
build-package:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.12']
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
enable-cache: true
|
||||
- name: Install dependencies
|
||||
run: uv sync --all-extras
|
||||
- name: Build package
|
||||
run: uv build
|
||||
- name: Check content of wheel
|
||||
run: unzip -l dist/*.whl
|
||||
- name: Store the distribution packages
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
test-package:
|
||||
needs:
|
||||
- build-package
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.12']
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
enable-cache: true
|
||||
- name: Install package
|
||||
run: uv pip install dist/*.whl
|
||||
- name: Run docling
|
||||
run: docling --help
|
||||
|
3
.github/workflows/ci-docs.yml
vendored
3
.github/workflows/ci-docs.yml
vendored
@ -8,6 +8,9 @@ on:
|
||||
- "**"
|
||||
- "!gh-pages"
|
||||
|
||||
env:
|
||||
UV_FROZEN: "1"
|
||||
|
||||
jobs:
|
||||
build-docs:
|
||||
if: ${{ github.event_name == 'push' || (github.event.pull_request.head.repo.full_name != 'docling-project/docling' && github.event.pull_request.head.repo.full_name != 'docling-project/docling') }}
|
||||
|
4
.github/workflows/ci.yml
vendored
4
.github/workflows/ci.yml
vendored
@ -9,10 +9,6 @@ on:
|
||||
- "!main"
|
||||
- "!gh-pages"
|
||||
|
||||
env:
|
||||
# disable keyring (https://github.com/actions/runner-images/issues/6185):
|
||||
PYTHON_KEYRING_BACKEND: keyring.backends.null.Keyring
|
||||
|
||||
jobs:
|
||||
code-checks:
|
||||
if: ${{ github.event_name == 'push' || (github.event.pull_request.head.repo.full_name != 'docling-project/docling' && github.event.pull_request.head.repo.full_name != 'docling-project/docling') }}
|
||||
|
13
.github/workflows/docs.yml
vendored
13
.github/workflows/docs.yml
vendored
@ -6,14 +6,21 @@ on:
|
||||
description: "If true, the docs will be deployed."
|
||||
default: false
|
||||
|
||||
env:
|
||||
UV_FROZEN: "1"
|
||||
|
||||
jobs:
|
||||
run-docs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/setup-poetry
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
enable-cache: true
|
||||
- name: Build docs
|
||||
run: poetry run mkdocs build --verbose --clean
|
||||
run: uv run mkdocs build --verbose --clean
|
||||
- name: Build and push docs
|
||||
if: inputs.deploy
|
||||
run: poetry run mkdocs gh-deploy --force
|
||||
run: uv run --no-sync mkdocs gh-deploy --force
|
||||
|
22
.github/workflows/pypi.yml
vendored
22
.github/workflows/pypi.yml
vendored
@ -4,16 +4,18 @@ on:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
env:
|
||||
UV_FROZEN: "1"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
# disable keyring (https://github.com/actions/runner-images/issues/6185):
|
||||
PYTHON_KEYRING_BACKEND: keyring.backends.null.Keyring
|
||||
|
||||
jobs:
|
||||
build-and-publish:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.12']
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/docling
|
||||
@ -21,9 +23,15 @@ jobs:
|
||||
id-token: write # IMPORTANT: mandatory for trusted publishing
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/setup-poetry
|
||||
- name: Build and publish
|
||||
run: poetry build
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
enable-cache: true
|
||||
- name: Install dependencies
|
||||
run: uv sync --all-extras
|
||||
- name: Build package
|
||||
run: uv build
|
||||
- name: Publish distribution 📦 to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
|
@ -17,12 +17,11 @@ repos:
|
||||
hooks:
|
||||
- id: mypy
|
||||
name: MyPy
|
||||
entry: poetry run mypy docling
|
||||
entry: uv run --no-sync mypy docling
|
||||
pass_filenames: false
|
||||
language: system
|
||||
files: '\.py$'
|
||||
- id: poetry
|
||||
name: Poetry check
|
||||
entry: poetry check --lock
|
||||
pass_filenames: false
|
||||
language: system
|
||||
- repo: https://github.com/astral-sh/uv-pre-commit
|
||||
rev: 0.7.8
|
||||
hooks:
|
||||
- id: uv-lock
|
||||
|
@ -6,70 +6,52 @@ For more details on the contributing guidelines head to the Docling Project [com
|
||||
|
||||
## Developing
|
||||
|
||||
### Usage of Poetry
|
||||
### Usage of uv
|
||||
|
||||
We use Poetry to manage dependencies.
|
||||
We use [uv](https://docs.astral.sh/uv/) as package and project manager.
|
||||
|
||||
#### Installation
|
||||
|
||||
To install Poetry, follow the documentation here: https://python-poetry.org/docs/master/#installing-with-the-official-installer
|
||||
To install `uv`, check the documentation on [Installing uv](https://docs.astral.sh/uv/getting-started/installation/).
|
||||
|
||||
1. Install Poetry globally on your machine:
|
||||
```bash
|
||||
curl -sSL https://install.python-poetry.org | python3 -
|
||||
```
|
||||
The installation script will print the installation bin folder `POETRY_BIN` which you need in the next steps.
|
||||
#### Create an environment and sync it
|
||||
|
||||
2. Make sure Poetry is in your `$PATH`:
|
||||
- for `zsh`:
|
||||
```sh
|
||||
echo 'export PATH="POETRY_BIN:$PATH"' >> ~/.zshrc
|
||||
```
|
||||
- for `bash`:
|
||||
```sh
|
||||
echo 'export PATH="POETRY_BIN:$PATH"' >> ~/.bashrc
|
||||
```
|
||||
|
||||
3. The official guidelines linked above include useful details on configuring autocomplete for most shell environments, e.g., Bash and Zsh.
|
||||
|
||||
#### Create a Virtual Environment and Install Dependencies
|
||||
|
||||
To activate the Virtual Environment, run:
|
||||
You can use the `uv sync` to create a project virtual environment (if it does not already exist) and sync
|
||||
the project's dependencies with the environment.
|
||||
|
||||
```bash
|
||||
poetry shell
|
||||
uv sync
|
||||
```
|
||||
|
||||
This will spawn a shell with the Virtual Environment activated. If the Virtual Environment doesn't exist, Poetry will create one for you. Then, to install dependencies, run:
|
||||
#### Use a specific Python version (optional)
|
||||
|
||||
If you need to work with a specific version of Python, you can create a new virtual environment for that version
|
||||
and run the sync command:
|
||||
|
||||
```bash
|
||||
poetry install
|
||||
uv venv --python 3.12
|
||||
uv sync
|
||||
```
|
||||
|
||||
**(Advanced) Use a Specific Python Version**
|
||||
More detailed options are described on the [Using Python environments](https://docs.astral.sh/uv/pip/environments/) documentation.
|
||||
|
||||
If you need to work with a specific (older) version of Python, run:
|
||||
#### Add a new dependency
|
||||
|
||||
Simply use the `uv add` command. The `pyproject.toml` and `uv.lock` files will be updated.
|
||||
|
||||
```bash
|
||||
poetry env use $(which python3.8)
|
||||
```
|
||||
|
||||
This creates a Virtual Environment with Python 3.8. For other versions, replace `$(which python3.8)` with the path to the interpreter (e.g., `/usr/bin/python3.8`) or use `$(which pythonX.Y)`.
|
||||
|
||||
#### Add a New Dependency
|
||||
|
||||
```bash
|
||||
poetry add NAME
|
||||
uv add [OPTIONS] <PACKAGES|--requirements <REQUIREMENTS>>
|
||||
```
|
||||
|
||||
## Coding Style Guidelines
|
||||
|
||||
We use the following tools to enforce code style:
|
||||
|
||||
- iSort, to sort imports
|
||||
- Black, to format code
|
||||
- [Ruff](https://docs.astral.sh/ruff/), as linter and code formatter
|
||||
- [MyPy](https://mypy.readthedocs.io), as static type checker
|
||||
|
||||
We run a series of checks on the codebase on every commit using `pre-commit`. To install the hooks, run:
|
||||
A set of styling checks, as well as regression tests, are defined and managed through the [pre-commit](https://pre-commit.com/) framework.
|
||||
To ensure that those scripts run automatically before a commit is finalized, install `pre-commit` on your local repository:
|
||||
|
||||
```bash
|
||||
pre-commit install
|
||||
@ -81,7 +63,7 @@ To run the checks on-demand, run:
|
||||
pre-commit run --all-files
|
||||
```
|
||||
|
||||
Note: Checks like `Black` and `isort` will "fail" if they modify files. This is because `pre-commit` doesn't like to see files modified by its hooks. In these cases, `git add` the modified files and `git commit` again.
|
||||
Note: Checks like `Ruff` will "fail" if they modify files. This is because `pre-commit` doesn't like to see files modified by its hooks. In these cases, `git add` the modified files and `git commit` again.
|
||||
|
||||
## Tests
|
||||
|
||||
@ -94,7 +76,7 @@ When a change improves the conversion results, multiple reference documents must
|
||||
The reference data can be regenerated with
|
||||
|
||||
```sh
|
||||
DOCLING_GEN_TEST_DATA=1 poetry run pytest
|
||||
DOCLING_GEN_TEST_DATA=1 uv run pytest
|
||||
```
|
||||
|
||||
All PRs modifying the reference test data require a double review to guarantee we don't miss edge cases.
|
||||
|
@ -14,9 +14,8 @@
|
||||
[](https://docling-project.github.io/docling/)
|
||||
[](https://pypi.org/project/docling/)
|
||||
[](https://pypi.org/project/docling/)
|
||||
[](https://python-poetry.org/)
|
||||
[](https://github.com/psf/black)
|
||||
[](https://pycqa.github.io/isort/)
|
||||
[](https://github.com/astral-sh/uv)
|
||||
[](https://github.com/astral-sh/ruff)
|
||||
[](https://pydantic.dev)
|
||||
[](https://github.com/pre-commit/pre-commit)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
|
17
docs/faq/index.md
vendored
17
docs/faq/index.md
vendored
@ -44,6 +44,23 @@ This is a collection of FAQ collected from the user questions on <https://github
|
||||
Source: Issue [#283](https://github.com/docling-project/docling/issues/283#issuecomment-2465035868)
|
||||
|
||||
|
||||
??? question "Is macOS x86_64 supported?"
|
||||
|
||||
### Is macOS x86_64 supported?
|
||||
|
||||
Yes, Docling (still) supports running the standard pipeline on macOS x86_64.
|
||||
|
||||
However, users might get into a combination of incompatible dependencies on a fresh install.
|
||||
Because Docling depends on PyTorch which dropped support for macOS x86_64 after the 2.2.2 release,
|
||||
and this old version of PyTorch works only with NumPy 1.x, users **must** ensure the correct NumPy version is running.
|
||||
|
||||
```shell
|
||||
pip install docling "numpy<2.0.0"
|
||||
```
|
||||
|
||||
Source: Issue [#1694](https://github.com/docling-project/docling/issues/1694).
|
||||
|
||||
|
||||
??? question "Are text styles (bold, underline, etc) supported?"
|
||||
|
||||
### Are text styles (bold, underline, etc) supported?
|
||||
|
6
docs/index.md
vendored
6
docs/index.md
vendored
@ -6,13 +6,13 @@
|
||||
[](https://arxiv.org/abs/2408.09869)
|
||||
[](https://pypi.org/project/docling/)
|
||||
[](https://pypi.org/project/docling/)
|
||||
[](https://python-poetry.org/)
|
||||
[](https://github.com/psf/black)
|
||||
[](https://pycqa.github.io/isort/)
|
||||
[](https://github.com/astral-sh/uv)
|
||||
[](https://github.com/astral-sh/ruff)
|
||||
[](https://pydantic.dev)
|
||||
[](https://github.com/pre-commit/pre-commit)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://pepy.tech/projects/docling)
|
||||
[](https://apify.com/vancura/docling)
|
||||
[](https://www.bestpractices.dev/projects/10101)
|
||||
[](https://lfaidata.foundation/projects/)
|
||||
|
||||
|
2
docs/installation/index.md
vendored
2
docs/installation/index.md
vendored
@ -129,5 +129,5 @@ Works on macOS, Linux, and Windows, with support for both x86_64 and arm64 archi
|
||||
To develop Docling features, bugfixes etc., install as follows from your local clone's root dir:
|
||||
|
||||
```bash
|
||||
poetry install --all-extras
|
||||
uv sync --all-extras
|
||||
```
|
||||
|
8099
poetry.lock
generated
8099
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
318
pyproject.toml
318
pyproject.toml
@ -1,20 +1,8 @@
|
||||
[tool.poetry]
|
||||
[project]
|
||||
name = "docling"
|
||||
version = "2.35.0" # DO NOT EDIT, updated automatically
|
||||
description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
|
||||
authors = [
|
||||
"Christoph Auer <cau@zurich.ibm.com>",
|
||||
"Michele Dolfi <dol@zurich.ibm.com>",
|
||||
"Maxim Lysak <mly@zurich.ibm.com>",
|
||||
"Nikos Livathinos <nli@zurich.ibm.com>",
|
||||
"Ahmed Nassar <ahn@zurich.ibm.com>",
|
||||
"Panos Vagenas <pva@zurich.ibm.com>",
|
||||
"Peter Staar <taa@zurich.ibm.com>",
|
||||
]
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
repository = "https://github.com/docling-project/docling"
|
||||
homepage = "https://github.com/docling-project/docling"
|
||||
keywords = [
|
||||
"docling",
|
||||
"convert",
|
||||
@ -29,149 +17,137 @@ keywords = [
|
||||
"table former",
|
||||
]
|
||||
classifiers = [
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: MacOS :: MacOS X",
|
||||
"Operating System :: POSIX :: Linux",
|
||||
"Operating System :: Microsoft :: Windows",
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Science/Research",
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
]
|
||||
packages = [{ include = "docling" }]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
######################
|
||||
# actual dependencies:
|
||||
######################
|
||||
python = "^3.9"
|
||||
pydantic = "^2.0.0"
|
||||
docling-core = {version = "^2.31.2", extras = ["chunking"]}
|
||||
docling-ibm-models = "^3.4.0"
|
||||
docling-parse = "^4.0.0"
|
||||
filetype = "^1.2.0"
|
||||
pypdfium2 = "^4.30.0"
|
||||
pydantic-settings = "^2.3.0"
|
||||
huggingface_hub = ">=0.23,<1"
|
||||
requests = "^2.32.2"
|
||||
easyocr = "^1.7"
|
||||
tesserocr = { version = "^2.7.1", optional = true }
|
||||
certifi = ">=2024.7.4"
|
||||
rtree = "^1.3.0"
|
||||
scipy = [
|
||||
{ version = "^1.6.0", markers = "python_version >= '3.10'" },
|
||||
{ version = ">=1.6.0,<1.14.0", markers = "python_version < '3.10'" },
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
{ name = "Christoph Auer", email = "cau@zurich.ibm.com" },
|
||||
{ name = "Michele Dolfi", email = "dol@zurich.ibm.com" },
|
||||
{ name = "Maxim Lysak", email = "mly@zurich.ibm.com" },
|
||||
{ name = "Nikos Livathinos", email = "nli@zurich.ibm.com" },
|
||||
{ name = "Ahmed Nassar", email = "ahn@zurich.ibm.com" },
|
||||
{ name = "Panos Vagenas", email = "pva@zurich.ibm.com" },
|
||||
{ name = "Peter Staar", email = "taa@zurich.ibm.com" },
|
||||
]
|
||||
typer = ">=0.12.5,<0.16.0"
|
||||
python-docx = "^1.1.2"
|
||||
python-pptx = "^1.0.2"
|
||||
beautifulsoup4 = "^4.12.3"
|
||||
pandas = "^2.1.4"
|
||||
marko = "^2.1.2"
|
||||
openpyxl = "^3.1.5"
|
||||
lxml = ">=4.0.0,<6.0.0"
|
||||
ocrmac = { version = "^1.0.0", markers = "sys_platform == 'darwin'", optional = true }
|
||||
rapidocr-onnxruntime = { version = "^1.4.0", optional = true, markers = "python_version < '3.13'" }
|
||||
onnxruntime = [
|
||||
# 1.19.2 is the last version with python3.9 support,
|
||||
# see https://github.com/microsoft/onnxruntime/releases/tag/v1.20.0
|
||||
{ version = ">=1.7.0,<1.20.0", optional = true, markers = "python_version < '3.10'" },
|
||||
{ version = "^1.7.0", optional = true, markers = "python_version >= '3.10'" },
|
||||
requires-python = '>=3.9,<4.0'
|
||||
dependencies = [
|
||||
'pydantic (>=2.0.0,<3.0.0)',
|
||||
'docling-core[chunking] (>=2.29.0,<3.0.0)',
|
||||
'docling-ibm-models (>=3.4.4,<4.0.0)',
|
||||
'docling-parse (>=4.0.0,<5.0.0)',
|
||||
'filetype (>=1.2.0,<2.0.0)',
|
||||
'pypdfium2 (>=4.30.0,<5.0.0)',
|
||||
'pydantic-settings (>=2.3.0,<3.0.0)',
|
||||
'huggingface_hub (>=0.23,<1)',
|
||||
'requests (>=2.32.2,<3.0.0)',
|
||||
'easyocr (>=1.7,<2.0)',
|
||||
'certifi (>=2024.7.4)',
|
||||
'rtree (>=1.3.0,<2.0.0)',
|
||||
'typer (>=0.12.5,<0.16.0)',
|
||||
'python-docx (>=1.1.2,<2.0.0)',
|
||||
'python-pptx (>=1.0.2,<2.0.0)',
|
||||
'beautifulsoup4 (>=4.12.3,<5.0.0)',
|
||||
'pandas (>=2.1.4,<3.0.0)',
|
||||
'marko (>=2.1.2,<3.0.0)',
|
||||
'openpyxl (>=3.1.5,<4.0.0)',
|
||||
'lxml (>=4.0.0,<6.0.0)',
|
||||
'pillow (>=10.0.0,<12.0.0)',
|
||||
'tqdm (>=4.65.0,<5.0.0)',
|
||||
'pluggy (>=1.0.0,<2.0.0)',
|
||||
'pylatexenc (>=2.10,<3.0)',
|
||||
'click (<8.2.0)',
|
||||
'scipy (>=1.6.0,<2.0.0)',
|
||||
# 'scipy (>=1.6.0,<2.0.0) ; python_version >= "3.10"',
|
||||
# 'scipy (>=1.6.0,<1.14.0) ; python_version < "3.10"',
|
||||
]
|
||||
|
||||
transformers = [
|
||||
{ markers = "sys_platform != 'darwin' or platform_machine != 'x86_64'", version = "^4.46.0", optional = true },
|
||||
{ markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'", version = "~4.42.0", optional = true },
|
||||
]
|
||||
accelerate = [
|
||||
{ markers = "sys_platform != 'darwin' or platform_machine != 'x86_64'", version = "^1.2.1", optional = true },
|
||||
]
|
||||
pillow = ">=10.0.0,<12.0.0"
|
||||
tqdm = "^4.65.0"
|
||||
pluggy = "^1.0.0"
|
||||
pylatexenc = "^2.10"
|
||||
click = "<8.2.0"
|
||||
[project.urls]
|
||||
homepage = "https://github.com/docling-project/docling"
|
||||
repository = "https://github.com/docling-project/docling"
|
||||
issues = "https://github.com/docling-project/docling/issues"
|
||||
changelog = "https://github.com/docling-project/docling/blob/main/CHANGELOG.md"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
python = "^3.9.2"
|
||||
black = { extras = ["jupyter"], version = "^24.4.2" }
|
||||
pytest = "^7.2.2"
|
||||
pre-commit = "^3.7.1"
|
||||
mypy = "^1.10.1"
|
||||
isort = "^5.10.1"
|
||||
python-semantic-release = "^7.32.2"
|
||||
flake8 = "^6.0.0"
|
||||
pyproject-flake8 = "^6.0.0"
|
||||
pytest-xdist = "^3.3.1"
|
||||
types-requests = "^2.31.0.2"
|
||||
flake8-pyproject = "^1.2.3"
|
||||
pylint = "^2.17.5"
|
||||
pandas-stubs = "^2.1.4.231227"
|
||||
ipykernel = "^6.29.5"
|
||||
ipywidgets = "^8.1.5"
|
||||
nbqa = "^1.9.0"
|
||||
types-openpyxl = "^3.1.5.20241114"
|
||||
types-tqdm = "^4.67.0.20241221"
|
||||
coverage = "^7.6.2"
|
||||
pytest-cov = "^6.0.0"
|
||||
[project.entry-points.docling]
|
||||
"docling_defaults" = "docling.models.plugins.defaults"
|
||||
|
||||
[tool.poetry.group.docs.dependencies]
|
||||
mkdocs-material = "^9.5.40"
|
||||
mkdocs-jupyter = "^0.25.0"
|
||||
mkdocs-click = "^0.8.1"
|
||||
mkdocstrings = { extras = ["python"], version = "^0.27.0" }
|
||||
griffe-pydantic = "^1.1.0"
|
||||
|
||||
[tool.poetry.group.examples.dependencies]
|
||||
datasets = "^2.21.0"
|
||||
python-dotenv = "^1.0.1"
|
||||
langchain-huggingface = "^0.0.3"
|
||||
langchain-milvus = "^0.1.4"
|
||||
langchain-text-splitters = "^0.2.4"
|
||||
|
||||
[tool.poetry.group.constraints]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.constraints.dependencies]
|
||||
numpy = [
|
||||
{ version = ">=1.24.4,<3.0.0", markers = 'python_version >= "3.10"' },
|
||||
{ version = ">=1.24.4,<2.1.0", markers = 'python_version < "3.10"' },
|
||||
]
|
||||
|
||||
[tool.poetry.group.mac_intel]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.mac_intel.dependencies]
|
||||
torch = [
|
||||
{ markers = "sys_platform != 'darwin' or platform_machine != 'x86_64'", version = "^2.2.2" },
|
||||
{ markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'", version = "~2.2.2" },
|
||||
]
|
||||
torchvision = [
|
||||
{ markers = "sys_platform != 'darwin' or platform_machine != 'x86_64'", version = "^0" },
|
||||
{ markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'", version = "~0.17.2" },
|
||||
]
|
||||
|
||||
|
||||
[tool.poetry.group.lm.dependencies]
|
||||
peft = "^0.15.2"
|
||||
backoff = "^2.2.1"
|
||||
|
||||
[tool.poetry.extras]
|
||||
tesserocr = ["tesserocr"]
|
||||
ocrmac = ["ocrmac"]
|
||||
vlm = ["transformers", "accelerate"]
|
||||
rapidocr = ["rapidocr-onnxruntime", "onnxruntime"]
|
||||
|
||||
[tool.poetry.scripts]
|
||||
[project.scripts]
|
||||
docling = "docling.cli.main:app"
|
||||
docling-tools = "docling.cli.tools:app"
|
||||
|
||||
[tool.poetry.plugins."docling"]
|
||||
"docling_defaults" = "docling.models.plugins.defaults"
|
||||
[project.optional-dependencies]
|
||||
tesserocr = ['tesserocr (>=2.7.1,<3.0.0)']
|
||||
ocrmac = ['ocrmac (>=1.0.0,<2.0.0) ; sys_platform == "darwin"']
|
||||
vlm = [
|
||||
'transformers (>=4.46.0,<5.0.0)',
|
||||
'accelerate (>=1.2.1,<2.0.0)',
|
||||
'mlx-vlm >=0.1.22 ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
|
||||
]
|
||||
rapidocr = [
|
||||
'rapidocr-onnxruntime (>=1.4.0,<2.0.0) ; python_version < "3.13"',
|
||||
'onnxruntime (>=1.7.0,<2.0.0)',
|
||||
# 'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
|
||||
# 'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"pre-commit~=3.7",
|
||||
"mypy~=1.10",
|
||||
"types-setuptools~=70.3",
|
||||
"pandas-stubs~=2.1",
|
||||
"types-openpyxl~=3.1",
|
||||
"types-requests~=2.31",
|
||||
"boto3-stubs~=1.37",
|
||||
"types-urllib3~=1.26",
|
||||
"types-tqdm~=4.67",
|
||||
"coverage~=7.6",
|
||||
"pytest~=8.3",
|
||||
"pytest-cov>=6.1.1",
|
||||
"pytest-dependency~=0.6",
|
||||
"pytest-xdist~=3.3",
|
||||
"ipykernel~=6.29",
|
||||
"ipywidgets~=8.1",
|
||||
"nbqa~=1.9",
|
||||
"python-semantic-release~=7.32",
|
||||
]
|
||||
docs = [
|
||||
"mkdocs-material~=9.5",
|
||||
"mkdocs-jupyter~=0.25",
|
||||
"mkdocs-click~=0.8",
|
||||
"mkdocstrings[python]~=0.27",
|
||||
"griffe-pydantic~=1.1",
|
||||
]
|
||||
examples = [
|
||||
"datasets~=2.21",
|
||||
"python-dotenv~=1.0",
|
||||
"langchain-huggingface>=0.0.3",
|
||||
"langchain-milvus~=0.1",
|
||||
"langchain-text-splitters~=0.2",
|
||||
]
|
||||
constraints = [
|
||||
'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
|
||||
'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
|
||||
]
|
||||
|
||||
|
||||
[tool.uv]
|
||||
package = true
|
||||
default-groups = "all"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["docling*"]
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py39"
|
||||
@ -187,51 +163,51 @@ skip-magic-trailing-comma = false
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = [
|
||||
# "B", # flake8-bugbear
|
||||
"C", # flake8-comprehensions
|
||||
"C9", # mccabe
|
||||
# "D", # flake8-docstrings
|
||||
"E", # pycodestyle errors (default)
|
||||
"F", # pyflakes (default)
|
||||
"I", # isort
|
||||
"PD", # pandas-vet
|
||||
"PIE", # pie
|
||||
# "PTH", # pathlib
|
||||
"Q", # flake8-quotes
|
||||
# "RET", # return
|
||||
"RUF", # Enable all ruff-specific checks
|
||||
# "SIM", # simplify
|
||||
"S307", # eval
|
||||
# "T20", # (disallow print statements) keep debugging statements out of the codebase
|
||||
"W", # pycodestyle warnings
|
||||
"ASYNC", # async
|
||||
"UP", # pyupgrade
|
||||
# "B", # flake8-bugbear
|
||||
"C", # flake8-comprehensions
|
||||
"C9", # mccabe
|
||||
# "D", # flake8-docstrings
|
||||
"E", # pycodestyle errors (default)
|
||||
"F", # pyflakes (default)
|
||||
"I", # isort
|
||||
"PD", # pandas-vet
|
||||
"PIE", # pie
|
||||
# "PTH", # pathlib
|
||||
"Q", # flake8-quotes
|
||||
# "RET", # return
|
||||
"RUF", # Enable all ruff-specific checks
|
||||
# "SIM", # simplify
|
||||
"S307", # eval
|
||||
# "T20", # (disallow print statements) keep debugging statements out of the codebase
|
||||
"W", # pycodestyle warnings
|
||||
"ASYNC", # async
|
||||
"UP", # pyupgrade
|
||||
]
|
||||
|
||||
ignore = [
|
||||
"C408", # Unnecessary `dict()` call (rewrite as a literal)
|
||||
"E501", # Line too long, handled by ruff formatter
|
||||
"D107", # "Missing docstring in __init__",
|
||||
"F401", # imported but unused; consider using `importlib.util.find_spec` to test for "
|
||||
"F811", # "redefinition of the same function"
|
||||
"PL", # Pylint
|
||||
"RUF012", # Mutable Class Attributes
|
||||
"UP006", # List vs list, etc
|
||||
"UP007", # Option and Union
|
||||
"UP035", # `typing.Set` is deprecated, use `set` instead"
|
||||
"C408", # Unnecessary `dict()` call (rewrite as a literal)
|
||||
"E501", # Line too long, handled by ruff formatter
|
||||
"D107", # "Missing docstring in __init__",
|
||||
"F401", # imported but unused; consider using `importlib.util.find_spec` to test for "
|
||||
"F811", # "redefinition of the same function"
|
||||
"PL", # Pylint
|
||||
"RUF012", # Mutable Class Attributes
|
||||
"UP006", # List vs list, etc
|
||||
"UP007", # Option and Union
|
||||
"UP035", # `typing.Set` is deprecated, use `set` instead"
|
||||
]
|
||||
|
||||
#extend-select = []
|
||||
|
||||
[tool.ruff.lint.pep8-naming]
|
||||
classmethod-decorators = [
|
||||
# Allow Pydantic's `@validator` decorator to trigger class method treatment.
|
||||
"pydantic.validator",
|
||||
# Allow Pydantic's `@validator` decorator to trigger class method treatment.
|
||||
"pydantic.validator",
|
||||
]
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"__init__.py" = ["E402", "F401"]
|
||||
"tests/*.py" = ["ASYNC"] # Disable ASYNC check for tests
|
||||
"tests/*.py" = ["ASYNC"] # Disable ASYNC check for tests
|
||||
|
||||
[tool.ruff.lint.mccabe]
|
||||
max-complexity = 20
|
||||
|
Loading…
x
Reference in New Issue
Block a user