[build-system] requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta" [project] # See https://setuptools.pypa.io/en/latest/userguide/quickstart.html for more project configuration options. name = "olmocr" dynamic = ["version"] readme = "README.md" classifiers = [ "Intended Audience :: Science/Research", "Development Status :: 3 - Alpha", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3", "Topic :: Scientific/Engineering :: Artificial Intelligence", ] authors = [ {name = "Allen Institute for Artificial Intelligence", email = "contact@allenai.org"} ] requires-python = ">=3.11" dependencies = [ "cached-path", "smart_open", "pypdf>=5.2.0", "pymupdf", "pypdfium2", "cryptography", "lingua-language-detector", "Pillow", "ftfy", "bleach", "markdown2", "filelock", "orjson", "requests", "zstandard", "boto3", "httpx", "torch>=2.5.1", "transformers>=4.46.2", "beaker-py", ] license = {file = "LICENSE"} [project.urls] Homepage = "https://github.com/allenai/olmocr" Repository = "https://github.com/allenai/olmocr" Changelog = "https://github.com/allenai/olmocr/blob/main/CHANGELOG.md" # Documentation = "https://olmocr.readthedocs.io/" [project.optional-dependencies] dev = [ "ruff", "mypy>=1.0,<1.5", "black", "isort", "pytest", "pytest-sphinx", "pytest-cov", "twine>=1.11.0", "build", "setuptools", "wheel", "Sphinx>=4.3.0,<7.1.0", "furo==2023.7.26", "myst-parser>=1.0,<2.1", "sphinx-copybutton==0.5.2", "sphinx-autobuild==2021.3.14", "sphinx-autodoc-typehints==1.23.3", "packaging", "necessary", ] train = [ "torch", "torchvision", "accelerate", "datasets", "peft", "wandb", "omegaconf", "s3fs", "necessary", "einops", "transformers>=4.45.1" ] [tool.setuptools.packages.find] exclude = [ "*.tests", "*.tests.*", "tests.*", "tests", "docs*", "scripts*" ] [tool.setuptools] include-package-data = true [tool.setuptools.package-data] olmocr = ["py.typed"] [tool.setuptools.dynamic] version = {attr = "olmocr.version.VERSION"} [tool.black] line-length = 160 include = '\.pyi?$' exclude = ''' ( __pycache__ | \.git | \.mypy_cache | \.pytest_cache | \.vscode | \.venv | \bdist\b | \bdoc\b ) ''' [tool.isort] profile = "black" multi_line_output = 3 # You can override these pyright settings by adding a personal pyrightconfig.json file. [tool.pyright] reportPrivateImportUsage = false [tool.ruff] line-length = 160 target-version = "py311" exclude = ["olmocr/train/molmo", "tests/*"] ignore = ["E722"] #igore bare except [tool.ruff.per-file-ignores] "__init__.py" = ["F401"] [tool.mypy] ignore_missing_imports = true no_site_packages = true check_untyped_defs = true [[tool.mypy.overrides]] module = "tests.*" strict_optional = false [tool.pytest.ini_options] testpaths = "tests/" python_classes = [ "Test*", "*Test" ] log_format = "%(asctime)s - %(levelname)s - %(name)s - %(message)s" log_level = "DEBUG"