olmocr/pyproject.toml

195 lines
3.6 KiB
TOML
Raw Normal View History

2024-09-17 07:53:43 -07:00
[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"
[project]
# See https://setuptools.pypa.io/en/latest/userguide/quickstart.html for more project configuration options.
name = "olmocr"
2024-09-17 07:53:43 -07:00
dynamic = ["version"]
readme = "README.md"
classifiers = [
"Intended Audience :: Science/Research",
"Development Status :: 3 - Alpha",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
]
authors = [
2025-02-14 22:27:36 +00:00
{name = "Allen Institute for Artificial Intelligence", email = "jakep@allenai.org"}
2024-09-17 07:53:43 -07:00
]
requires-python = ">=3.11"
2024-09-17 07:53:43 -07:00
dependencies = [
"cached-path",
2024-09-20 08:22:10 -07:00
"smart_open",
2025-01-27 18:41:13 +00:00
"pypdf>=5.2.0",
"pypdfium2",
2024-10-16 13:18:24 -07:00
"cryptography",
"lingua-language-detector",
"Pillow",
2024-10-09 17:53:26 +00:00
"ftfy",
2024-10-10 22:10:26 +00:00
"bleach",
2024-10-16 16:45:07 +00:00
"markdown2",
2024-10-16 18:26:25 +00:00
"filelock",
2024-10-30 13:24:11 -07:00
"orjson",
2024-11-07 13:26:42 -08:00
"requests",
"zstandard",
2024-11-13 09:46:08 -08:00
"boto3",
"httpx",
"torch>=2.5.1",
2025-03-12 11:59:11 -07:00
"transformers==4.46.2",
"img2pdf",
"beaker-py",
2024-09-17 07:53:43 -07:00
]
license = {file = "LICENSE"}
[project.urls]
Homepage = "https://github.com/allenai/olmocr"
Repository = "https://github.com/allenai/olmocr"
Changelog = "https://github.com/allenai/olmocr/blob/main/CHANGELOG.md"
# Documentation = "https://olmocr.readthedocs.io/"
2024-09-17 07:53:43 -07:00
[project.optional-dependencies]
2025-03-13 13:26:04 -07:00
gpu = [
"sgl-kernel==0.0.3.post1",
"sglang[all]==0.4.2",
]
2024-09-17 07:53:43 -07:00
dev = [
"ruff",
2025-01-30 14:32:08 -08:00
"mypy",
2025-01-29 15:42:34 -08:00
"black",
"isort",
2024-09-17 07:53:43 -07:00
"pytest",
"pytest-sphinx",
"pytest-cov",
"twine>=1.11.0",
"build",
"setuptools",
"wheel",
"Sphinx>=4.3.0,<7.1.0",
"furo==2023.7.26",
"myst-parser>=1.0,<2.1",
"sphinx-copybutton==0.5.2",
"sphinx-autobuild==2021.3.14",
"sphinx-autodoc-typehints==1.23.3",
2024-09-20 08:22:10 -07:00
"packaging",
"necessary",
2025-01-30 15:33:04 -08:00
"peft",
2025-01-30 15:38:29 -08:00
"datasets",
2025-02-26 17:49:04 +00:00
"omegaconf",
"spacy",
2024-11-08 15:02:40 -08:00
]
2025-02-28 10:14:47 -08:00
bench = [
2025-02-28 14:00:22 -08:00
"tinyhost",
2025-02-28 10:14:47 -08:00
"fuzzysearch",
"rapidfuzz",
"sequence_align",
"syntok",
"openai",
2025-02-28 10:14:47 -08:00
"google-genai",
2025-03-10 21:47:49 +00:00
"playwright",
2025-03-12 10:29:49 -07:00
"mistralai",
2025-03-13 12:50:52 -07:00
"lxml",
2025-03-18 16:53:36 +00:00
"flask",
2025-02-28 10:14:47 -08:00
]
2024-09-17 07:53:43 -07:00
2024-09-20 15:09:45 +00:00
train = [
"torch",
"torchvision",
2024-09-20 08:22:10 -07:00
"accelerate",
"datasets",
"peft",
"wandb",
"omegaconf",
"s3fs",
2024-09-27 15:16:12 +00:00
"necessary",
2025-01-22 15:23:08 -08:00
"einops",
"transformers>=4.45.1"
2024-09-20 15:09:45 +00:00
]
elo = [
"numpy",
"scipy",
2025-02-13 19:38:09 -08:00
"pandas",
"matplotlib"
]
2024-09-17 07:53:43 -07:00
[tool.setuptools.packages.find]
exclude = [
"*.tests",
"*.tests.*",
"tests.*",
"tests",
"docs*",
"scripts*"
]
[tool.setuptools]
include-package-data = true
[tool.setuptools.package-data]
2025-02-14 22:09:29 +00:00
olmocr = [
"py.typed",
"viewer/*.html",
"eval/*.html",
]
2024-09-17 07:53:43 -07:00
[tool.setuptools.dynamic]
version = {attr = "olmocr.version.VERSION"}
2024-09-17 07:53:43 -07:00
[tool.black]
2025-01-29 15:30:39 -08:00
line-length = 160
2024-09-17 07:53:43 -07:00
include = '\.pyi?$'
exclude = '''
(
__pycache__
| \.git
| \.mypy_cache
| \.pytest_cache
| \.vscode
| \.venv
| \bdist\b
| \bdoc\b
)
'''
[tool.isort]
profile = "black"
multi_line_output = 3
# You can override these pyright settings by adding a personal pyrightconfig.json file.
[tool.pyright]
reportPrivateImportUsage = false
[tool.ruff]
2025-01-29 15:57:26 -08:00
line-length = 160
target-version = "py311"
exclude = ["olmocr/train/molmo", "tests/*"]
ignore = ["E722"] #igore bare except
2024-09-17 07:53:43 -07:00
[tool.ruff.per-file-ignores]
"__init__.py" = ["F401"]
[tool.mypy]
ignore_missing_imports = true
no_site_packages = true
check_untyped_defs = true
2025-01-30 13:37:01 -08:00
exclude = ["olmocr/train/molmo/", "tests/*"]
2024-09-17 07:53:43 -07:00
[[tool.mypy.overrides]]
module = "tests.*"
strict_optional = false
[tool.pytest.ini_options]
testpaths = "tests/"
python_classes = [
"Test*",
"*Test"
]
log_format = "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
log_level = "DEBUG"
2025-02-14 20:42:19 +00:00
markers = [
"nonci: mark test as not intended for CI runs"
2025-03-13 13:26:04 -07:00
]