docling/pyproject.toml
2025-06-25 16:27:46 +00:00

272 lines
7.4 KiB
TOML

[project]
name = "docling"
version = "2.38.1" # DO NOT EDIT, updated automatically
description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
license = "MIT"
keywords = [
"docling",
"convert",
"document",
"pdf",
"docx",
"html",
"markdown",
"layout model",
"segmentation",
"table structure",
"table former",
]
classifiers = [
"Operating System :: MacOS :: MacOS X",
"Operating System :: POSIX :: Linux",
"Operating System :: Microsoft :: Windows",
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]
readme = "README.md"
authors = [
{ name = "Christoph Auer", email = "cau@zurich.ibm.com" },
{ name = "Michele Dolfi", email = "dol@zurich.ibm.com" },
{ name = "Maxim Lysak", email = "mly@zurich.ibm.com" },
{ name = "Nikos Livathinos", email = "nli@zurich.ibm.com" },
{ name = "Ahmed Nassar", email = "ahn@zurich.ibm.com" },
{ name = "Panos Vagenas", email = "pva@zurich.ibm.com" },
{ name = "Peter Staar", email = "taa@zurich.ibm.com" },
]
requires-python = '>=3.9,<4.0'
dependencies = [
'pydantic (>=2.0.0,<3.0.0)',
'docling-core[chunking] (>=2.29.0,<3.0.0)',
'docling-ibm-models (>=3.4.4,<4.0.0)',
'docling-parse (>=4.0.0,<5.0.0)',
'filetype (>=1.2.0,<2.0.0)',
'pypdfium2 (>=4.30.0,<5.0.0)',
'pydantic-settings (>=2.3.0,<3.0.0)',
'huggingface_hub (>=0.23,<1)',
'requests (>=2.32.2,<3.0.0)',
'easyocr (>=1.7,<2.0)',
'certifi (>=2024.7.4)',
'rtree (>=1.3.0,<2.0.0)',
'typer (>=0.12.5,<0.17.0)',
'python-docx (>=1.1.2,<2.0.0)',
'python-pptx (>=1.0.2,<2.0.0)',
'beautifulsoup4 (>=4.12.3,<5.0.0)',
'pandas (>=2.1.4,<3.0.0)',
'marko (>=2.1.2,<3.0.0)',
'openpyxl (>=3.1.5,<4.0.0)',
'lxml (>=4.0.0,<6.0.0)',
'pillow (>=10.0.0,<12.0.0)',
'tqdm (>=4.65.0,<5.0.0)',
'pluggy (>=1.0.0,<2.0.0)',
'pylatexenc (>=2.10,<3.0)',
'scipy (>=1.6.0,<2.0.0)',
# 'scipy (>=1.6.0,<2.0.0) ; python_version >= "3.10"',
# 'scipy (>=1.6.0,<1.14.0) ; python_version < "3.10"',
]
[project.urls]
homepage = "https://github.com/docling-project/docling"
repository = "https://github.com/docling-project/docling"
issues = "https://github.com/docling-project/docling/issues"
changelog = "https://github.com/docling-project/docling/blob/main/CHANGELOG.md"
[project.entry-points.docling]
"docling_defaults" = "docling.models.plugins.defaults"
[project.scripts]
docling = "docling.cli.main:app"
docling-tools = "docling.cli.tools:app"
[project.optional-dependencies]
tesserocr = ['tesserocr (>=2.7.1,<3.0.0)']
ocrmac = ['ocrmac (>=1.0.0,<2.0.0) ; sys_platform == "darwin"']
vlm = [
'transformers (>=4.46.0,<5.0.0)',
'accelerate (>=1.2.1,<2.0.0)',
'mlx-vlm >=0.1.22 ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
]
rapidocr = [
'rapidocr-onnxruntime (>=1.4.0,<2.0.0) ; python_version < "3.13"',
'onnxruntime (>=1.7.0,<2.0.0)',
# 'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
# 'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
]
asr = [
"openai-whisper>=20240930",
]
[dependency-groups]
dev = [
"pre-commit~=3.7",
"mypy~=1.10",
"types-setuptools~=70.3",
"pandas-stubs~=2.1",
"types-openpyxl~=3.1",
"types-requests~=2.31",
"boto3-stubs~=1.37",
"types-urllib3~=1.26",
"types-tqdm~=4.67",
"coverage~=7.6",
"pytest~=8.3",
"pytest-cov>=6.1.1",
"pytest-dependency~=0.6",
"pytest-xdist~=3.3",
"ipykernel~=6.29",
"ipywidgets~=8.1",
"nbqa~=1.9",
"python-semantic-release~=7.32",
]
docs = [
"mkdocs-material~=9.5",
"mkdocs-jupyter~=0.25",
"mkdocs-click~=0.8",
"mkdocstrings[python]~=0.27",
"griffe-pydantic~=1.1",
]
examples = [
"datasets~=2.21",
"python-dotenv~=1.0",
"langchain-huggingface>=0.0.3",
"langchain-milvus~=0.1",
"langchain-text-splitters~=0.2",
]
constraints = [
'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
]
[tool.uv]
package = true
default-groups = "all"
[tool.uv.sources]
openai-whisper = { git = "https://github.com/openai/whisper.git", rev = "dd985ac4b90cafeef8712f2998d62c59c3e62d22" }
[tool.setuptools.packages.find]
include = ["docling*"]
[tool.ruff]
target-version = "py39"
line-length = 88
respect-gitignore = true
# extend-exclude = [
# "tests",
# ]
[tool.ruff.format]
skip-magic-trailing-comma = false
[tool.ruff.lint]
select = [
# "B", # flake8-bugbear
"C", # flake8-comprehensions
"C9", # mccabe
# "D", # flake8-docstrings
"E", # pycodestyle errors (default)
"F", # pyflakes (default)
"I", # isort
"PD", # pandas-vet
"PIE", # pie
# "PTH", # pathlib
"Q", # flake8-quotes
# "RET", # return
"RUF", # Enable all ruff-specific checks
# "SIM", # simplify
"S307", # eval
# "T20", # (disallow print statements) keep debugging statements out of the codebase
"W", # pycodestyle warnings
"ASYNC", # async
"UP", # pyupgrade
]
ignore = [
"C408", # Unnecessary `dict()` call (rewrite as a literal)
"E501", # Line too long, handled by ruff formatter
"D107", # "Missing docstring in __init__",
"F401", # imported but unused; consider using `importlib.util.find_spec` to test for "
"F811", # "redefinition of the same function"
"PL", # Pylint
"RUF012", # Mutable Class Attributes
"UP006", # List vs list, etc
"UP007", # Option and Union
"UP035", # `typing.Set` is deprecated, use `set` instead"
]
#extend-select = []
[tool.ruff.lint.pep8-naming]
classmethod-decorators = [
# Allow Pydantic's `@validator` decorator to trigger class method treatment.
"pydantic.validator",
]
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["E402", "F401"]
"tests/*.py" = ["ASYNC"] # Disable ASYNC check for tests
[tool.ruff.lint.mccabe]
max-complexity = 20
# [tool.ruff.lint.isort.sections]
# "docling" = ["docling_core", "docling_ibm_models", "docling_parse"]
[tool.ruff.lint.isort]
combine-as-imports = true
# section-order = [
# "future",
# "standard-library",
# "third-party",
# "docling",
# "first-party",
# "local-folder",
# ]
[tool.mypy]
pretty = true
# strict = true
no_implicit_optional = true
plugins = "pydantic.mypy"
python_version = "3.10"
[[tool.mypy.overrides]]
module = [
"docling_parse.*",
"pypdfium2.*",
"networkx.*",
"scipy.*",
"filetype.*",
"tesserocr.*",
"docling_ibm_models.*",
"easyocr.*",
"ocrmac.*",
"mlx_vlm.*",
"lxml.*",
"huggingface_hub.*",
"transformers.*",
"pylatexenc.*",
]
ignore_missing_imports = true
[tool.semantic_release]
# for default values check:
# https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
version_source = "tag_only"
branch = "main"
# configure types which should trigger minor and patch version bumps respectively
# (note that they must be a subset of the configured allowed types):
parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
parser_angular_minor_types = "feat"
parser_angular_patch_types = "fix,perf"