2024-09-17 07:53:43 -07:00
|
|
|
[build-system]
|
|
|
|
requires = ["setuptools", "wheel"]
|
|
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
|
|
|
|
[project]
|
|
|
|
# See https://setuptools.pypa.io/en/latest/userguide/quickstart.html for more project configuration options.
|
2025-01-27 18:30:41 +00:00
|
|
|
name = "olmocr"
|
2024-09-17 07:53:43 -07:00
|
|
|
dynamic = ["version"]
|
|
|
|
readme = "README.md"
|
|
|
|
classifiers = [
|
|
|
|
"Intended Audience :: Science/Research",
|
|
|
|
"Development Status :: 3 - Alpha",
|
|
|
|
"License :: OSI Approved :: Apache Software License",
|
|
|
|
"Programming Language :: Python :: 3",
|
|
|
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
|
|
]
|
|
|
|
authors = [
|
|
|
|
{name = "Allen Institute for Artificial Intelligence", email = "contact@allenai.org"}
|
|
|
|
]
|
|
|
|
requires-python = ">=3.8"
|
|
|
|
dependencies = [
|
2024-09-17 16:26:55 +00:00
|
|
|
"cached-path",
|
2024-09-20 08:22:10 -07:00
|
|
|
"smart_open",
|
2025-01-27 18:41:13 +00:00
|
|
|
"pypdf>=5.2.0",
|
2024-09-17 16:26:55 +00:00
|
|
|
"pymupdf",
|
2024-09-17 18:47:27 +00:00
|
|
|
"pypdfium2",
|
2024-10-16 13:18:24 -07:00
|
|
|
"cryptography",
|
2024-09-23 08:20:08 -07:00
|
|
|
"lingua-language-detector",
|
|
|
|
"Pillow",
|
2024-10-09 17:53:26 +00:00
|
|
|
"ftfy",
|
2024-10-10 22:10:26 +00:00
|
|
|
"bleach",
|
2024-10-16 16:45:07 +00:00
|
|
|
"markdown2",
|
2024-10-16 18:26:25 +00:00
|
|
|
"filelock",
|
2024-10-30 13:24:11 -07:00
|
|
|
"orjson",
|
2024-11-07 13:26:42 -08:00
|
|
|
"requests",
|
|
|
|
"zstandard",
|
2024-11-15 12:48:36 -08:00
|
|
|
"aiohttp>=3.10,<3.11", # Specific timeout thing is causing issues
|
2024-11-13 09:46:08 -08:00
|
|
|
"boto3",
|
2024-11-13 10:25:35 -08:00
|
|
|
"torch>=2.4.0",
|
2024-11-13 09:46:08 -08:00
|
|
|
"transformers>=4.46.2",
|
2024-09-17 07:53:43 -07:00
|
|
|
]
|
|
|
|
license = {file = "LICENSE"}
|
|
|
|
|
|
|
|
[project.urls]
|
2025-01-27 18:30:41 +00:00
|
|
|
Homepage = "https://github.com/allenai/olmocr"
|
|
|
|
Repository = "https://github.com/allenai/olmocr"
|
|
|
|
Changelog = "https://github.com/allenai/olmocr/blob/main/CHANGELOG.md"
|
|
|
|
# Documentation = "https://olmocr.readthedocs.io/"
|
2024-09-17 07:53:43 -07:00
|
|
|
|
|
|
|
[project.optional-dependencies]
|
|
|
|
dev = [
|
|
|
|
"ruff",
|
|
|
|
"mypy>=1.0,<1.5",
|
|
|
|
"black>=23.0,<24.0",
|
|
|
|
"isort>=5.12,<5.13",
|
|
|
|
"pytest",
|
|
|
|
"pytest-sphinx",
|
|
|
|
"pytest-cov",
|
|
|
|
"twine>=1.11.0",
|
|
|
|
"build",
|
|
|
|
"setuptools",
|
|
|
|
"wheel",
|
|
|
|
"Sphinx>=4.3.0,<7.1.0",
|
|
|
|
"furo==2023.7.26",
|
|
|
|
"myst-parser>=1.0,<2.1",
|
|
|
|
"sphinx-copybutton==0.5.2",
|
|
|
|
"sphinx-autobuild==2021.3.14",
|
|
|
|
"sphinx-autodoc-typehints==1.23.3",
|
2024-09-20 08:22:10 -07:00
|
|
|
"packaging",
|
|
|
|
"necessary",
|
2024-11-08 15:02:40 -08:00
|
|
|
]
|
|
|
|
|
|
|
|
inference = [
|
2024-11-22 19:37:31 +00:00
|
|
|
"sglang[all]>=0.3.6",
|
2024-11-15 13:30:27 -08:00
|
|
|
"beaker-py",
|
2024-09-17 07:53:43 -07:00
|
|
|
]
|
|
|
|
|
2024-09-20 15:09:45 +00:00
|
|
|
train = [
|
|
|
|
"torch",
|
|
|
|
"torchvision",
|
2024-09-20 08:22:10 -07:00
|
|
|
"accelerate",
|
|
|
|
"datasets",
|
|
|
|
"peft",
|
|
|
|
"wandb",
|
|
|
|
"omegaconf",
|
|
|
|
"s3fs",
|
2024-09-27 15:16:12 +00:00
|
|
|
"necessary",
|
2025-01-22 15:23:08 -08:00
|
|
|
"einops",
|
2024-10-03 09:00:53 -07:00
|
|
|
"transformers>=4.45.1"
|
2024-09-20 15:09:45 +00:00
|
|
|
]
|
|
|
|
|
2024-09-17 07:53:43 -07:00
|
|
|
[tool.setuptools.packages.find]
|
|
|
|
exclude = [
|
|
|
|
"*.tests",
|
|
|
|
"*.tests.*",
|
|
|
|
"tests.*",
|
|
|
|
"tests",
|
|
|
|
"docs*",
|
|
|
|
"scripts*"
|
|
|
|
]
|
|
|
|
|
|
|
|
[tool.setuptools]
|
|
|
|
include-package-data = true
|
|
|
|
|
|
|
|
[tool.setuptools.package-data]
|
2025-01-27 18:30:41 +00:00
|
|
|
olmocr = ["py.typed"]
|
2024-09-17 07:53:43 -07:00
|
|
|
|
|
|
|
[tool.setuptools.dynamic]
|
2025-01-27 18:30:41 +00:00
|
|
|
version = {attr = "olmocr.version.VERSION"}
|
2024-09-17 07:53:43 -07:00
|
|
|
|
|
|
|
[tool.black]
|
2024-09-18 22:52:42 +00:00
|
|
|
line-length = 120
|
2024-09-17 07:53:43 -07:00
|
|
|
include = '\.pyi?$'
|
|
|
|
exclude = '''
|
|
|
|
(
|
|
|
|
__pycache__
|
|
|
|
| \.git
|
|
|
|
| \.mypy_cache
|
|
|
|
| \.pytest_cache
|
|
|
|
| \.vscode
|
|
|
|
| \.venv
|
|
|
|
| \bdist\b
|
|
|
|
| \bdoc\b
|
|
|
|
)
|
|
|
|
'''
|
|
|
|
|
|
|
|
[tool.isort]
|
|
|
|
profile = "black"
|
|
|
|
multi_line_output = 3
|
|
|
|
|
|
|
|
# You can override these pyright settings by adding a personal pyrightconfig.json file.
|
|
|
|
[tool.pyright]
|
|
|
|
reportPrivateImportUsage = false
|
|
|
|
|
|
|
|
[tool.ruff]
|
|
|
|
line-length = 115
|
|
|
|
target-version = "py39"
|
|
|
|
|
|
|
|
[tool.ruff.per-file-ignores]
|
|
|
|
"__init__.py" = ["F401"]
|
|
|
|
|
|
|
|
[tool.mypy]
|
|
|
|
ignore_missing_imports = true
|
|
|
|
no_site_packages = true
|
|
|
|
check_untyped_defs = true
|
|
|
|
|
|
|
|
[[tool.mypy.overrides]]
|
|
|
|
module = "tests.*"
|
|
|
|
strict_optional = false
|
|
|
|
|
|
|
|
[tool.pytest.ini_options]
|
|
|
|
testpaths = "tests/"
|
|
|
|
python_classes = [
|
|
|
|
"Test*",
|
|
|
|
"*Test"
|
|
|
|
]
|
|
|
|
log_format = "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
|
|
|
|
log_level = "DEBUG"
|