diff --git a/docs/examples/dispatcher_example.py b/docs/examples/dispatcher_example.py index ae6406b..cac0818 100644 --- a/docs/examples/dispatcher_example.py +++ b/docs/examples/dispatcher_example.py @@ -112,19 +112,19 @@ def create_performance_table(results): async def main(): - urls = [f"https://example.com/page{i}" for i in range(1, 20)] + urls = [f"https://example.com/page{i}" for i in range(1, 40)] browser_config = BrowserConfig(headless=True, verbose=False) run_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS, scraping_strategy=LXMLWebScrapingStrategy()) results = { "Memory Adaptive": await memory_adaptive(urls, browser_config, run_config), - "Memory Adaptive + Rate Limit": await memory_adaptive_with_rate_limit( - urls, browser_config, run_config - ), - "Semaphore": await semaphore(urls, browser_config, run_config), - "Semaphore + Rate Limit": await semaphore_with_rate_limit( - urls, browser_config, run_config - ), + # "Memory Adaptive + Rate Limit": await memory_adaptive_with_rate_limit( + # urls, browser_config, run_config + # ), + # "Semaphore": await semaphore(urls, browser_config, run_config), + # "Semaphore + Rate Limit": await semaphore_with_rate_limit( + # urls, browser_config, run_config + # ), } table = create_performance_table(results) diff --git a/docs/examples/scraping_strategies_performance.py b/docs/examples/scraping_strategies_performance.py index b8c80be..87fb8ac 100644 --- a/docs/examples/scraping_strategies_performance.py +++ b/docs/examples/scraping_strategies_performance.py @@ -117,17 +117,17 @@ def test_scraping(): timing_stats.report() # Print stats of LXML output - print("\nLXML Output:") - print(f"\nExtracted links: {len(result_selected['links']['internal']) + len(result_selected['links']['external'])}") - print(f"Extracted images: {len(result_selected['media']['images'])}") - print(f"Clean HTML size: {len(result_selected['cleaned_html'])/1024:.2f} KB") + print("\Turbo Output:") + print(f"\nExtracted links: {len(result_selected.links.internal) + len(result_selected.links.external)}") + print(f"Extracted images: {len(result_selected.media.images)}") + print(f"Clean HTML size: {len(result_selected.cleaned_html)/1024:.2f} KB") print(f"Scraping time: {t2 - t1:.2f} seconds") # Print stats of original output print("\nOriginal Output:") - print(f"\nExtracted links: {len(result_original['links']['internal']) + len(result_original['links']['external'])}") - print(f"Extracted images: {len(result_original['media']['images'])}") - print(f"Clean HTML size: {len(result_original['cleaned_html'])/1024:.2f} KB") + print(f"\nExtracted links: {len(result_original.links.internal) + len(result_original.links.external)}") + print(f"Extracted images: {len(result_original.media.images)}") + print(f"Clean HTML size: {len(result_original.cleaned_html)/1024:.2f} KB") print(f"Scraping time: {t3 - t1:.2f} seconds") diff --git a/docs/examples/v0_4_3_features_demo.py b/docs/examples/v0_4_3_features_demo.py index 9406b50..6e7a8a0 100644 --- a/docs/examples/v0_4_3_features_demo.py +++ b/docs/examples/v0_4_3_features_demo.py @@ -18,6 +18,8 @@ This demonstration showcases three major categories of new features in Crawl4ai - Robots.txt compliance - Proxy rotation - Enhanced URL handling + - Shared data among hooks + - add page routes Each demo function can be run independently or as part of the full suite. """ @@ -333,19 +335,19 @@ async def main(): print("\nšŸ“Š Running Crawl4ai v0.4.3 Feature Demos\n") # Efficiency & Speed Demos - # print("\nšŸš€ EFFICIENCY & SPEED DEMOS") - # await demo_memory_dispatcher() - # await demo_streaming_support() - # await demo_content_scraping() + print("\nšŸš€ EFFICIENCY & SPEED DEMOS") + await demo_memory_dispatcher() + await demo_streaming_support() + await demo_content_scraping() # # LLM Integration Demos - # print("\nšŸ¤– LLM INTEGRATION DEMOS") - # await demo_json_schema_generation() - # await demo_llm_markdown() + print("\nšŸ¤– LLM INTEGRATION DEMOS") + await demo_json_schema_generation() + await demo_llm_markdown() # # Core Improvements - # print("\nšŸ”§ CORE IMPROVEMENT DEMOS") - # await demo_robots_compliance() + print("\nšŸ”§ CORE IMPROVEMENT DEMOS") + await demo_robots_compliance() await demo_proxy_rotation() if __name__ == "__main__":