diff --git a/crawl4ai/__init__.py b/crawl4ai/__init__.py index ea7c219..f4aa60b 100644 --- a/crawl4ai/__init__.py +++ b/crawl4ai/__init__.py @@ -33,9 +33,10 @@ from .async_dispatcher import ( ) from .docker_client import Crawl4aiDockerClient from .hub import CrawlerHub - +from .deep_crawling import DeepCrawlStrategy __all__ = [ "AsyncWebCrawler", + "DeepCrawlStrategy", "CrawlResult", "CrawlerHub", "CacheMode", diff --git a/crawl4ai/cli.py b/crawl4ai/cli.py new file mode 100644 index 0000000..e69de29 diff --git a/crawl4ai/deep_crawling/__init__.py b/crawl4ai/deep_crawling/__init__.py index bbe27ef..8ebdb58 100644 --- a/crawl4ai/deep_crawling/__init__.py +++ b/crawl4ai/deep_crawling/__init__.py @@ -2,10 +2,12 @@ from .base_strategy import DeepCrawlDecorator, DeepCrawlStrategy from .bfs_strategy import BFSDeepCrawlStrategy from .bff_strategy import BestFirstCrawlingStrategy +from .dfs_strategy import DFSDeepCrawlStrategy __all__ = [ "DeepCrawlDecorator", "DeepCrawlStrategy", "BFSDeepCrawlStrategy", "BestFirstCrawlingStrategy", + "DFSDeepCrawlStrategy", ] \ No newline at end of file diff --git a/crawl4ai/deep_crawling/filters.py b/crawl4ai/deep_crawling/filters.py index f74b1c6..68472e9 100644 --- a/crawl4ai/deep_crawling/filters.py +++ b/crawl4ai/deep_crawling/filters.py @@ -224,6 +224,7 @@ def create_common_filter_chain() -> FilterChain: # Use __slots__ and array for maximum memory/speed efficiency +@dataclass class FastFilterStats: __slots__ = ("_counters",) diff --git a/pyproject.toml b/pyproject.toml index 38e1f89..ea6c549 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,9 @@ dependencies = [ "rich>=13.9.4", "cssselect>=1.2.0", "httpx==0.27.2", - "fake-useragent>=2.0.3" + "fake-useragent>=2.0.3", + "click>=8.1.7", + "pyperclip>=1.8.2" ] classifiers = [ "Development Status :: 4 - Beta", @@ -70,7 +72,7 @@ crawl4ai-download-models = "crawl4ai.model_loader:main" crawl4ai-migrate = "crawl4ai.migrations:main" crawl4ai-setup = "crawl4ai.install:post_install" crawl4ai-doctor = "crawl4ai.install:doctor" -crawl = "crawl4ai.cli:cli" +crwl = "crawl4ai.cli:cli" [tool.setuptools] packages = {find = {where = ["."], include = ["crawl4ai*"]}}