olmocr/pyproject.toml
2025-06-10 16:14:57 -07:00

195 lines
3.6 KiB
TOML

[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"
[project]
# See https://setuptools.pypa.io/en/latest/userguide/quickstart.html for more project configuration options.
name = "olmocr"
description = "Fast, efficient, and high quality OCR powered by open visual language models"
dynamic = ["version"]
readme = "README.md"
classifiers = [
"Intended Audience :: Science/Research",
"Development Status :: 3 - Alpha",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
]
authors = [
{name = "Allen Institute for Artificial Intelligence", email = "jakep@allenai.org"}
]
requires-python = ">=3.11"
dependencies = [
"cached-path",
"smart_open",
"pypdf>=5.2.0",
"pypdfium2",
"cryptography",
"lingua-language-detector",
"Pillow",
"ftfy",
"bleach",
"markdown2",
"filelock",
"orjson",
"requests",
"zstandard",
"boto3",
"httpx",
"torch>=2.7.0",
"transformers>=4.51.1",
"img2pdf",
"beaker-py",
]
license = {file = "LICENSE"}
[project.urls]
Homepage = "https://github.com/allenai/olmocr"
Repository = "https://github.com/allenai/olmocr"
Changelog = "https://github.com/allenai/olmocr/blob/main/CHANGELOG.md"
# Documentation = "https://olmocr.readthedocs.io/"
[project.optional-dependencies]
gpu = [
"vllm==0.9.1"
]
dev = [
"ruff",
"mypy",
"black",
"isort",
"pytest",
"pytest-sphinx",
"pytest-cov",
"twine>=1.11.0",
"build",
"setuptools",
"wheel",
"Sphinx>=4.3.0,<7.1.0",
"furo==2023.7.26",
"myst-parser>=1.0,<2.1",
"sphinx-copybutton==0.5.2",
"sphinx-autobuild==2021.3.14",
"sphinx-autodoc-typehints==1.23.3",
"packaging",
"necessary",
"peft",
"datasets",
"omegaconf",
"spacy",
]
bench = [
"tinyhost",
"fuzzysearch",
"rapidfuzz",
"sequence_align",
"syntok",
"openai",
"google-genai",
"playwright",
"mistralai",
"lxml",
"flask",
]
train = [
"torch",
"torchvision",
"accelerate",
"datasets",
"peft",
"wandb",
"omegaconf",
"s3fs",
"necessary",
"einops",
"transformers>=4.45.1"
]
elo = [
"numpy",
"scipy",
"pandas",
"matplotlib"
]
[tool.setuptools.packages.find]
exclude = [
"*.tests",
"*.tests.*",
"tests.*",
"tests",
"docs*",
"scripts*"
]
[tool.setuptools]
include-package-data = true
[tool.setuptools.package-data]
olmocr = [
"py.typed",
"viewer/*.html",
"eval/*.html",
]
[tool.setuptools.dynamic]
version = {attr = "olmocr.version.VERSION"}
[tool.black]
line-length = 160
include = '\.pyi?$'
exclude = '''
(
__pycache__
| \.git
| \.mypy_cache
| \.pytest_cache
| \.vscode
| \.venv
| \bdist\b
| \bdoc\b
)
'''
[tool.isort]
profile = "black"
multi_line_output = 3
# You can override these pyright settings by adding a personal pyrightconfig.json file.
[tool.pyright]
reportPrivateImportUsage = false
[tool.ruff]
line-length = 160
target-version = "py311"
exclude = ["olmocr/train/molmo", "tests/*"]
ignore = ["E722"] #igore bare except
[tool.ruff.per-file-ignores]
"__init__.py" = ["F401"]
[tool.mypy]
ignore_missing_imports = true
no_site_packages = true
check_untyped_defs = true
exclude = ["olmocr/train/molmo/", "tests/*"]
[[tool.mypy.overrides]]
module = "tests.*"
strict_optional = false
[tool.pytest.ini_options]
testpaths = "tests/"
python_classes = [
"Test*",
"*Test"
]
log_format = "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
log_level = "DEBUG"
markers = [
"nonci: mark test as not intended for CI runs"
]