crawl4ai/pyproject.toml

99 lines
2.3 KiB
TOML
Raw Normal View History

[build-system]
requires = ["setuptools>=64.0.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "Crawl4AI"
dynamic = ["version"]
description = "🚀🤖 Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
readme = "README.md"
requires-python = ">=3.9"
license = "Apache-2.0"
authors = [
{name = "Unclecode", email = "unclecode@kidocode.com"}
]
dependencies = [
"aiosqlite~=0.20",
"lxml~=5.3",
"litellm>=1.53.1",
"numpy>=1.26.0,<3",
"pillow~=10.4",
"playwright>=1.49.0",
"python-dotenv~=1.0",
"requests~=2.26",
"beautifulsoup4~=4.12",
"tf-playwright-stealth>=1.1.0",
"xxhash~=3.4",
"rank-bm25~=0.2",
"aiofiles>=24.1.0",
"colorama~=0.4",
"snowballstemmer~=2.2",
"pydantic>=2.10",
"pyOpenSSL>=24.3.0",
"psutil>=6.1.1",
"nltk>=3.9.1",
"playwright",
"aiofiles",
"rich>=13.9.4",
"cssselect>=1.2.0",
"httpx>=0.27.2",
"fake-useragent>=2.0.3",
"click>=8.1.7",
"pyperclip>=1.8.2",
"chardet>=5.2.0",
"aiohttp>=3.11.11",
"brotli>=1.1.0",
"humanize>=4.10.0",
]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]
[project.optional-dependencies]
pdf = ["PyPDF2"]
torch = ["torch", "nltk", "scikit-learn"]
transformer = ["transformers", "tokenizers"]
cosine = ["torch", "transformers", "nltk"]
sync = ["selenium"]
all = [
Release prep (#749) * fix: Update export of URLPatternFilter * chore: Add dependancy for cchardet in requirements * docs: Update example for deep crawl in release note for v0.5 * Docs: update the example for memory dispatcher * docs: updated example for crawl strategies * Refactor: Removed wrapping in if __name__==main block since this is a markdown file. * chore: removed cchardet from dependancy list, since unclecode is planning to remove it * docs: updated the example for proxy rotation to a working example * feat: Introduced ProxyConfig param * Add tutorial for deep crawl & update contributor list for bug fixes in feb alpha-1 * chore: update and test new dependancies * feat:Make PyPDF2 a conditional dependancy * updated tutorial and release note for v0.5 * docs: update docs for deep crawl, and fix a typo in docker-deployment markdown filename * refactor: 1. Deprecate markdown_v2 2. Make markdown backward compatible to behave as a string when needed. 3. Fix LlmConfig usage in cli 4. Deprecate markdown_v2 in cli 5. Update AsyncWebCrawler for changes in CrawlResult * fix: Bug in serialisation of markdown in acache_url * Refactor: Added deprecation errors for fit_html and fit_markdown directly on markdown. Now access them via markdown * fix: remove deprecated markdown_v2 from docker * Refactor: remove deprecated fit_markdown and fit_html from result * refactor: fix cache retrieval for markdown as a string * chore: update all docs, examples and tests with deprecation announcements for markdown_v2, fit_html, fit_markdown
2025-02-28 17:23:35 +05:30
"PyPDF2",
"torch",
"nltk",
"scikit-learn",
"transformers",
"tokenizers",
"selenium",
"PyPDF2"
]
[project.scripts]
crawl4ai-download-models = "crawl4ai.model_loader:main"
crawl4ai-migrate = "crawl4ai.migrations:main"
crawl4ai-setup = "crawl4ai.install:post_install"
crawl4ai-doctor = "crawl4ai.install:doctor"
crwl = "crawl4ai.cli:main"
[tool.setuptools]
packages = {find = {where = ["."], include = ["crawl4ai*"]}}
[tool.setuptools.package-data]
crawl4ai = ["js_snippet/*.js"]
[tool.setuptools.dynamic]
version = {attr = "crawl4ai.__version__.__version__"}
[tool.uv.sources]
crawl4ai = { workspace = true }
[dependency-groups]
dev = [
"crawl4ai",
]