crawl4ai/docs/examples/dispatcher_example.py

import asyncio
import time
from rich import print
from rich.table import Table
from crawl4ai import (
    AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, 
    MemoryAdaptiveDispatcher, SemaphoreDispatcher,
    RateLimiter, CrawlerMonitor, DisplayMode, CacheMode
)

async def memory_adaptive(urls, browser_config, run_config):
    """Memory adaptive crawler with monitoring"""
    start = time.perf_counter()
    async with AsyncWebCrawler(config=browser_config) as crawler:
        dispatcher = MemoryAdaptiveDispatcher(
            memory_threshold_percent=70.0,
            max_session_permit=10,
            monitor=CrawlerMonitor(
                max_visible_rows=15,
                display_mode=DisplayMode.DETAILED
            )
        )
        results = await crawler.arun_many(urls, config=run_config, dispatcher=dispatcher)
    duration = time.perf_counter() - start
    return len(results), duration

async def memory_adaptive_with_rate_limit(urls, browser_config, run_config):
    """Memory adaptive crawler with rate limiting"""
    start = time.perf_counter()
    async with AsyncWebCrawler(config=browser_config) as crawler:
        dispatcher = MemoryAdaptiveDispatcher(
            memory_threshold_percent=70.0,
            max_session_permit=10,
            rate_limiter=RateLimiter(
                base_delay=(1.0, 2.0),
                max_delay=30.0,
                max_retries=2
            ),
            monitor=CrawlerMonitor(
                max_visible_rows=15,
                display_mode=DisplayMode.DETAILED
            )
        )
        results = await crawler.arun_many(urls, config=run_config, dispatcher=dispatcher)
    duration = time.perf_counter() - start
    return len(results), duration

async def semaphore(urls, browser_config, run_config):
    """Basic semaphore crawler"""
    start = time.perf_counter()
    async with AsyncWebCrawler(config=browser_config) as crawler:
        dispatcher = SemaphoreDispatcher(
            semaphore_count=5,
            monitor=CrawlerMonitor(
                max_visible_rows=15,
                display_mode=DisplayMode.DETAILED
            )
        )
        results = await crawler.arun_many(urls, config=run_config, dispatcher=dispatcher)
    duration = time.perf_counter() - start
    return len(results), duration

async def semaphore_with_rate_limit(urls, browser_config, run_config):
    """Semaphore crawler with rate limiting"""
    start = time.perf_counter()
    async with AsyncWebCrawler(config=browser_config) as crawler:
        dispatcher = SemaphoreDispatcher(
            semaphore_count=5,
            rate_limiter=RateLimiter(
                base_delay=(1.0, 2.0),
                max_delay=30.0,
                max_retries=2
            ),
            monitor=CrawlerMonitor(
                max_visible_rows=15,
                display_mode=DisplayMode.DETAILED
            )
        )
        results = await crawler.arun_many(urls, config=run_config, dispatcher=dispatcher)
    duration = time.perf_counter() - start
    return len(results), duration

def create_performance_table(results):
    """Creates a rich table showing performance results"""
    table = Table(title="Crawler Strategy Performance Comparison")
    table.add_column("Strategy", style="cyan")
    table.add_column("URLs Crawled", justify="right", style="green")
    table.add_column("Time (seconds)", justify="right", style="yellow")
    table.add_column("URLs/second", justify="right", style="magenta")

    sorted_results = sorted(results.items(), key=lambda x: x[1][1])
    
    for strategy, (urls_crawled, duration) in sorted_results:
        urls_per_second = urls_crawled / duration
        table.add_row(
            strategy,
            str(urls_crawled),
            f"{duration:.2f}",
            f"{urls_per_second:.2f}"
        )
    
    return table

async def main():
    urls = [f"https://example.com/page{i}" for i in range(1, 20)]
    browser_config = BrowserConfig(headless=True, verbose=False)
    run_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)

    results = {
        "Memory Adaptive": await memory_adaptive(urls, browser_config, run_config),
        "Memory Adaptive + Rate Limit": await memory_adaptive_with_rate_limit(urls, browser_config, run_config),
        "Semaphore": await semaphore(urls, browser_config, run_config),
        "Semaphore + Rate Limit": await semaphore_with_rate_limit(urls, browser_config, run_config),
    }

    table = create_performance_table(results)
    print("\nPerformance Summary:")
    print(table)

if __name__ == "__main__":
    asyncio.run(main())
refactor(dispatcher): migrate to modular dispatcher system with enhanced monitoring Reorganize dispatcher functionality into separate components: - Create dedicated dispatcher classes (MemoryAdaptive, Semaphore) - Add RateLimiter for smart request throttling - Implement CrawlerMonitor for real-time progress tracking - Move dispatcher config from CrawlerRunConfig to separate classes BREAKING CHANGE: Dispatcher configuration moved from CrawlerRunConfig to dedicated dispatcher classes. Users need to update their configuration approach for multi-URL crawling. 2025-01-11 21:10:27 +08:00			`import asyncio`
			`import time`
			`from rich import print`
			`from rich.table import Table`
			`from crawl4ai import (`
			`AsyncWebCrawler, BrowserConfig, CrawlerRunConfig,`
			`MemoryAdaptiveDispatcher, SemaphoreDispatcher,`
			`RateLimiter, CrawlerMonitor, DisplayMode, CacheMode`
			`)`
feat(crawler): add memory-adaptive dispatcher with rate limiting Implements a new MemoryAdaptiveDispatcher class to manage concurrent crawling operations with memory monitoring and rate limiting capabilities. Changes include: - Added RateLimitConfig dataclass for configuring rate limiting behavior - Extended CrawlerRunConfig with dispatcher-related settings - Refactored arun_many to use the new dispatcher system - Added memory threshold and session permit controls - Integrated optional progress monitoring display BREAKING CHANGE: The arun_many method now uses MemoryAdaptiveDispatcher by default, which may affect concurrent crawling behavior 2025-01-10 16:01:18 +08:00
refactor(dispatcher): migrate to modular dispatcher system with enhanced monitoring Reorganize dispatcher functionality into separate components: - Create dedicated dispatcher classes (MemoryAdaptive, Semaphore) - Add RateLimiter for smart request throttling - Implement CrawlerMonitor for real-time progress tracking - Move dispatcher config from CrawlerRunConfig to separate classes BREAKING CHANGE: Dispatcher configuration moved from CrawlerRunConfig to dedicated dispatcher classes. Users need to update their configuration approach for multi-URL crawling. 2025-01-11 21:10:27 +08:00			`async def memory_adaptive(urls, browser_config, run_config):`
			`"""Memory adaptive crawler with monitoring"""`
			`start = time.perf_counter()`
feat(crawler): add memory-adaptive dispatcher with rate limiting Implements a new MemoryAdaptiveDispatcher class to manage concurrent crawling operations with memory monitoring and rate limiting capabilities. Changes include: - Added RateLimitConfig dataclass for configuring rate limiting behavior - Extended CrawlerRunConfig with dispatcher-related settings - Refactored arun_many to use the new dispatcher system - Added memory threshold and session permit controls - Integrated optional progress monitoring display BREAKING CHANGE: The arun_many method now uses MemoryAdaptiveDispatcher by default, which may affect concurrent crawling behavior 2025-01-10 16:01:18 +08:00			`async with AsyncWebCrawler(config=browser_config) as crawler:`
refactor(dispatcher): migrate to modular dispatcher system with enhanced monitoring Reorganize dispatcher functionality into separate components: - Create dedicated dispatcher classes (MemoryAdaptive, Semaphore) - Add RateLimiter for smart request throttling - Implement CrawlerMonitor for real-time progress tracking - Move dispatcher config from CrawlerRunConfig to separate classes BREAKING CHANGE: Dispatcher configuration moved from CrawlerRunConfig to dedicated dispatcher classes. Users need to update their configuration approach for multi-URL crawling. 2025-01-11 21:10:27 +08:00			`dispatcher = MemoryAdaptiveDispatcher(`
			`memory_threshold_percent=70.0,`
			`max_session_permit=10,`
			`monitor=CrawlerMonitor(`
			`max_visible_rows=15,`
			`display_mode=DisplayMode.DETAILED`
			`)`
			`)`
			`results = await crawler.arun_many(urls, config=run_config, dispatcher=dispatcher)`
			`duration = time.perf_counter() - start`
			`return len(results), duration`
feat(crawler): add memory-adaptive dispatcher with rate limiting Implements a new MemoryAdaptiveDispatcher class to manage concurrent crawling operations with memory monitoring and rate limiting capabilities. Changes include: - Added RateLimitConfig dataclass for configuring rate limiting behavior - Extended CrawlerRunConfig with dispatcher-related settings - Refactored arun_many to use the new dispatcher system - Added memory threshold and session permit controls - Integrated optional progress monitoring display BREAKING CHANGE: The arun_many method now uses MemoryAdaptiveDispatcher by default, which may affect concurrent crawling behavior 2025-01-10 16:01:18 +08:00
refactor(dispatcher): migrate to modular dispatcher system with enhanced monitoring Reorganize dispatcher functionality into separate components: - Create dedicated dispatcher classes (MemoryAdaptive, Semaphore) - Add RateLimiter for smart request throttling - Implement CrawlerMonitor for real-time progress tracking - Move dispatcher config from CrawlerRunConfig to separate classes BREAKING CHANGE: Dispatcher configuration moved from CrawlerRunConfig to dedicated dispatcher classes. Users need to update their configuration approach for multi-URL crawling. 2025-01-11 21:10:27 +08:00			`async def memory_adaptive_with_rate_limit(urls, browser_config, run_config):`
			`"""Memory adaptive crawler with rate limiting"""`
feat(crawler): add memory-adaptive dispatcher with rate limiting Implements a new MemoryAdaptiveDispatcher class to manage concurrent crawling operations with memory monitoring and rate limiting capabilities. Changes include: - Added RateLimitConfig dataclass for configuring rate limiting behavior - Extended CrawlerRunConfig with dispatcher-related settings - Refactored arun_many to use the new dispatcher system - Added memory threshold and session permit controls - Integrated optional progress monitoring display BREAKING CHANGE: The arun_many method now uses MemoryAdaptiveDispatcher by default, which may affect concurrent crawling behavior 2025-01-10 16:01:18 +08:00			`start = time.perf_counter()`
refactor(dispatcher): migrate to modular dispatcher system with enhanced monitoring Reorganize dispatcher functionality into separate components: - Create dedicated dispatcher classes (MemoryAdaptive, Semaphore) - Add RateLimiter for smart request throttling - Implement CrawlerMonitor for real-time progress tracking - Move dispatcher config from CrawlerRunConfig to separate classes BREAKING CHANGE: Dispatcher configuration moved from CrawlerRunConfig to dedicated dispatcher classes. Users need to update their configuration approach for multi-URL crawling. 2025-01-11 21:10:27 +08:00			`async with AsyncWebCrawler(config=browser_config) as crawler:`
			`dispatcher = MemoryAdaptiveDispatcher(`
			`memory_threshold_percent=70.0,`
			`max_session_permit=10,`
			`rate_limiter=RateLimiter(`
			`base_delay=(1.0, 2.0),`
			`max_delay=30.0,`
			`max_retries=2`
			`),`
			`monitor=CrawlerMonitor(`
			`max_visible_rows=15,`
			`display_mode=DisplayMode.DETAILED`
			`)`
			`)`
			`results = await crawler.arun_many(urls, config=run_config, dispatcher=dispatcher)`
			`duration = time.perf_counter() - start`
			`return len(results), duration`

			`async def semaphore(urls, browser_config, run_config):`
			`"""Basic semaphore crawler"""`
			`start = time.perf_counter()`
			`async with AsyncWebCrawler(config=browser_config) as crawler:`
			`dispatcher = SemaphoreDispatcher(`
			`semaphore_count=5,`
			`monitor=CrawlerMonitor(`
			`max_visible_rows=15,`
			`display_mode=DisplayMode.DETAILED`
			`)`
			`)`
			`results = await crawler.arun_many(urls, config=run_config, dispatcher=dispatcher)`
			`duration = time.perf_counter() - start`
			`return len(results), duration`

			`async def semaphore_with_rate_limit(urls, browser_config, run_config):`
			`"""Semaphore crawler with rate limiting"""`
			`start = time.perf_counter()`
			`async with AsyncWebCrawler(config=browser_config) as crawler:`
			`dispatcher = SemaphoreDispatcher(`
			`semaphore_count=5,`
			`rate_limiter=RateLimiter(`
			`base_delay=(1.0, 2.0),`
			`max_delay=30.0,`
			`max_retries=2`
			`),`
			`monitor=CrawlerMonitor(`
			`max_visible_rows=15,`
			`display_mode=DisplayMode.DETAILED`
			`)`
			`)`
			`results = await crawler.arun_many(urls, config=run_config, dispatcher=dispatcher)`
			`duration = time.perf_counter() - start`
			`return len(results), duration`

			`def create_performance_table(results):`
			`"""Creates a rich table showing performance results"""`
			`table = Table(title="Crawler Strategy Performance Comparison")`
			`table.add_column("Strategy", style="cyan")`
			`table.add_column("URLs Crawled", justify="right", style="green")`
			`table.add_column("Time (seconds)", justify="right", style="yellow")`
			`table.add_column("URLs/second", justify="right", style="magenta")`

			`sorted_results = sorted(results.items(), key=lambda x: x[1][1])`
feat(crawler): add memory-adaptive dispatcher with rate limiting Implements a new MemoryAdaptiveDispatcher class to manage concurrent crawling operations with memory monitoring and rate limiting capabilities. Changes include: - Added RateLimitConfig dataclass for configuring rate limiting behavior - Extended CrawlerRunConfig with dispatcher-related settings - Refactored arun_many to use the new dispatcher system - Added memory threshold and session permit controls - Integrated optional progress monitoring display BREAKING CHANGE: The arun_many method now uses MemoryAdaptiveDispatcher by default, which may affect concurrent crawling behavior 2025-01-10 16:01:18 +08:00
refactor(dispatcher): migrate to modular dispatcher system with enhanced monitoring Reorganize dispatcher functionality into separate components: - Create dedicated dispatcher classes (MemoryAdaptive, Semaphore) - Add RateLimiter for smart request throttling - Implement CrawlerMonitor for real-time progress tracking - Move dispatcher config from CrawlerRunConfig to separate classes BREAKING CHANGE: Dispatcher configuration moved from CrawlerRunConfig to dedicated dispatcher classes. Users need to update their configuration approach for multi-URL crawling. 2025-01-11 21:10:27 +08:00			`for strategy, (urls_crawled, duration) in sorted_results:`
			`urls_per_second = urls_crawled / duration`
			`table.add_row(`
			`strategy,`
			`str(urls_crawled),`
			`f"{duration:.2f}",`
			`f"{urls_per_second:.2f}"`
			`)`
feat(crawler): add memory-adaptive dispatcher with rate limiting Implements a new MemoryAdaptiveDispatcher class to manage concurrent crawling operations with memory monitoring and rate limiting capabilities. Changes include: - Added RateLimitConfig dataclass for configuring rate limiting behavior - Extended CrawlerRunConfig with dispatcher-related settings - Refactored arun_many to use the new dispatcher system - Added memory threshold and session permit controls - Integrated optional progress monitoring display BREAKING CHANGE: The arun_many method now uses MemoryAdaptiveDispatcher by default, which may affect concurrent crawling behavior 2025-01-10 16:01:18 +08:00
refactor(dispatcher): migrate to modular dispatcher system with enhanced monitoring Reorganize dispatcher functionality into separate components: - Create dedicated dispatcher classes (MemoryAdaptive, Semaphore) - Add RateLimiter for smart request throttling - Implement CrawlerMonitor for real-time progress tracking - Move dispatcher config from CrawlerRunConfig to separate classes BREAKING CHANGE: Dispatcher configuration moved from CrawlerRunConfig to dedicated dispatcher classes. Users need to update their configuration approach for multi-URL crawling. 2025-01-11 21:10:27 +08:00			`return table`

			`async def main():`
			`urls = [f"https://example.com/page{i}" for i in range(1, 20)]`
			`browser_config = BrowserConfig(headless=True, verbose=False)`
			`run_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)`

			`results = {`
			`"Memory Adaptive": await memory_adaptive(urls, browser_config, run_config),`
			`"Memory Adaptive + Rate Limit": await memory_adaptive_with_rate_limit(urls, browser_config, run_config),`
			`"Semaphore": await semaphore(urls, browser_config, run_config),`
			`"Semaphore + Rate Limit": await semaphore_with_rate_limit(urls, browser_config, run_config),`
			`}`

			`table = create_performance_table(results)`
			`print("\nPerformance Summary:")`
			`print(table)`
feat(crawler): add memory-adaptive dispatcher with rate limiting Implements a new MemoryAdaptiveDispatcher class to manage concurrent crawling operations with memory monitoring and rate limiting capabilities. Changes include: - Added RateLimitConfig dataclass for configuring rate limiting behavior - Extended CrawlerRunConfig with dispatcher-related settings - Refactored arun_many to use the new dispatcher system - Added memory threshold and session permit controls - Integrated optional progress monitoring display BREAKING CHANGE: The arun_many method now uses MemoryAdaptiveDispatcher by default, which may affect concurrent crawling behavior 2025-01-10 16:01:18 +08:00
			`if __name__ == "__main__":`
refactor(dispatcher): migrate to modular dispatcher system with enhanced monitoring Reorganize dispatcher functionality into separate components: - Create dedicated dispatcher classes (MemoryAdaptive, Semaphore) - Add RateLimiter for smart request throttling - Implement CrawlerMonitor for real-time progress tracking - Move dispatcher config from CrawlerRunConfig to separate classes BREAKING CHANGE: Dispatcher configuration moved from CrawlerRunConfig to dedicated dispatcher classes. Users need to update their configuration approach for multi-URL crawling. 2025-01-11 21:10:27 +08:00			`asyncio.run(main())`