docs(examples): update demo scripts and fix output formats

Update example scripts to reflect latest API changes and improve demonstrations:
- Increase test URLs in dispatcher example from 20 to 40 pages
- Comment out unused dispatcher strategies for cleaner output
- Fix scraping strategies performance script to use correct object notation
- Update v0_4_3_features_demo with additional feature mentions and uncomment demo sections

These changes make the examples more current and better aligned with the actual API.
This commit is contained in:
UncleCode 2025-01-22 20:40:03 +08:00
parent 2d69bf2366
commit 976ea52167
3 changed files with 26 additions and 24 deletions

View File

@ -112,19 +112,19 @@ def create_performance_table(results):
async def main(): async def main():
urls = [f"https://example.com/page{i}" for i in range(1, 20)] urls = [f"https://example.com/page{i}" for i in range(1, 40)]
browser_config = BrowserConfig(headless=True, verbose=False) browser_config = BrowserConfig(headless=True, verbose=False)
run_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS, scraping_strategy=LXMLWebScrapingStrategy()) run_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS, scraping_strategy=LXMLWebScrapingStrategy())
results = { results = {
"Memory Adaptive": await memory_adaptive(urls, browser_config, run_config), "Memory Adaptive": await memory_adaptive(urls, browser_config, run_config),
"Memory Adaptive + Rate Limit": await memory_adaptive_with_rate_limit( # "Memory Adaptive + Rate Limit": await memory_adaptive_with_rate_limit(
urls, browser_config, run_config # urls, browser_config, run_config
), # ),
"Semaphore": await semaphore(urls, browser_config, run_config), # "Semaphore": await semaphore(urls, browser_config, run_config),
"Semaphore + Rate Limit": await semaphore_with_rate_limit( # "Semaphore + Rate Limit": await semaphore_with_rate_limit(
urls, browser_config, run_config # urls, browser_config, run_config
), # ),
} }
table = create_performance_table(results) table = create_performance_table(results)

View File

@ -117,17 +117,17 @@ def test_scraping():
timing_stats.report() timing_stats.report()
# Print stats of LXML output # Print stats of LXML output
print("\nLXML Output:") print("\Turbo Output:")
print(f"\nExtracted links: {len(result_selected['links']['internal']) + len(result_selected['links']['external'])}") print(f"\nExtracted links: {len(result_selected.links.internal) + len(result_selected.links.external)}")
print(f"Extracted images: {len(result_selected['media']['images'])}") print(f"Extracted images: {len(result_selected.media.images)}")
print(f"Clean HTML size: {len(result_selected['cleaned_html'])/1024:.2f} KB") print(f"Clean HTML size: {len(result_selected.cleaned_html)/1024:.2f} KB")
print(f"Scraping time: {t2 - t1:.2f} seconds") print(f"Scraping time: {t2 - t1:.2f} seconds")
# Print stats of original output # Print stats of original output
print("\nOriginal Output:") print("\nOriginal Output:")
print(f"\nExtracted links: {len(result_original['links']['internal']) + len(result_original['links']['external'])}") print(f"\nExtracted links: {len(result_original.links.internal) + len(result_original.links.external)}")
print(f"Extracted images: {len(result_original['media']['images'])}") print(f"Extracted images: {len(result_original.media.images)}")
print(f"Clean HTML size: {len(result_original['cleaned_html'])/1024:.2f} KB") print(f"Clean HTML size: {len(result_original.cleaned_html)/1024:.2f} KB")
print(f"Scraping time: {t3 - t1:.2f} seconds") print(f"Scraping time: {t3 - t1:.2f} seconds")

View File

@ -18,6 +18,8 @@ This demonstration showcases three major categories of new features in Crawl4ai
- Robots.txt compliance - Robots.txt compliance
- Proxy rotation - Proxy rotation
- Enhanced URL handling - Enhanced URL handling
- Shared data among hooks
- add page routes
Each demo function can be run independently or as part of the full suite. Each demo function can be run independently or as part of the full suite.
""" """
@ -333,19 +335,19 @@ async def main():
print("\n📊 Running Crawl4ai v0.4.3 Feature Demos\n") print("\n📊 Running Crawl4ai v0.4.3 Feature Demos\n")
# Efficiency & Speed Demos # Efficiency & Speed Demos
# print("\n🚀 EFFICIENCY & SPEED DEMOS") print("\n🚀 EFFICIENCY & SPEED DEMOS")
# await demo_memory_dispatcher() await demo_memory_dispatcher()
# await demo_streaming_support() await demo_streaming_support()
# await demo_content_scraping() await demo_content_scraping()
# # LLM Integration Demos # # LLM Integration Demos
# print("\n🤖 LLM INTEGRATION DEMOS") print("\n🤖 LLM INTEGRATION DEMOS")
# await demo_json_schema_generation() await demo_json_schema_generation()
# await demo_llm_markdown() await demo_llm_markdown()
# # Core Improvements # # Core Improvements
# print("\n🔧 CORE IMPROVEMENT DEMOS") print("\n🔧 CORE IMPROVEMENT DEMOS")
# await demo_robots_compliance() await demo_robots_compliance()
await demo_proxy_rotation() await demo_proxy_rotation()
if __name__ == "__main__": if __name__ == "__main__":