docs(examples): update demo scripts and fix output formats

Update example scripts to reflect latest API changes and improve demonstrations:
- Increase test URLs in dispatcher example from 20 to 40 pages
- Comment out unused dispatcher strategies for cleaner output
- Fix scraping strategies performance script to use correct object notation
- Update v0_4_3_features_demo with additional feature mentions and uncomment demo sections

These changes make the examples more current and better aligned with the actual API.
This commit is contained in:
UncleCode 2025-01-22 20:40:03 +08:00
parent 2d69bf2366
commit 976ea52167
3 changed files with 26 additions and 24 deletions

View File

@ -112,19 +112,19 @@ def create_performance_table(results):
async def main():
urls = [f"https://example.com/page{i}" for i in range(1, 20)]
urls = [f"https://example.com/page{i}" for i in range(1, 40)]
browser_config = BrowserConfig(headless=True, verbose=False)
run_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS, scraping_strategy=LXMLWebScrapingStrategy())
results = {
"Memory Adaptive": await memory_adaptive(urls, browser_config, run_config),
"Memory Adaptive + Rate Limit": await memory_adaptive_with_rate_limit(
urls, browser_config, run_config
),
"Semaphore": await semaphore(urls, browser_config, run_config),
"Semaphore + Rate Limit": await semaphore_with_rate_limit(
urls, browser_config, run_config
),
# "Memory Adaptive + Rate Limit": await memory_adaptive_with_rate_limit(
# urls, browser_config, run_config
# ),
# "Semaphore": await semaphore(urls, browser_config, run_config),
# "Semaphore + Rate Limit": await semaphore_with_rate_limit(
# urls, browser_config, run_config
# ),
}
table = create_performance_table(results)

View File

@ -117,17 +117,17 @@ def test_scraping():
timing_stats.report()
# Print stats of LXML output
print("\nLXML Output:")
print(f"\nExtracted links: {len(result_selected['links']['internal']) + len(result_selected['links']['external'])}")
print(f"Extracted images: {len(result_selected['media']['images'])}")
print(f"Clean HTML size: {len(result_selected['cleaned_html'])/1024:.2f} KB")
print("\Turbo Output:")
print(f"\nExtracted links: {len(result_selected.links.internal) + len(result_selected.links.external)}")
print(f"Extracted images: {len(result_selected.media.images)}")
print(f"Clean HTML size: {len(result_selected.cleaned_html)/1024:.2f} KB")
print(f"Scraping time: {t2 - t1:.2f} seconds")
# Print stats of original output
print("\nOriginal Output:")
print(f"\nExtracted links: {len(result_original['links']['internal']) + len(result_original['links']['external'])}")
print(f"Extracted images: {len(result_original['media']['images'])}")
print(f"Clean HTML size: {len(result_original['cleaned_html'])/1024:.2f} KB")
print(f"\nExtracted links: {len(result_original.links.internal) + len(result_original.links.external)}")
print(f"Extracted images: {len(result_original.media.images)}")
print(f"Clean HTML size: {len(result_original.cleaned_html)/1024:.2f} KB")
print(f"Scraping time: {t3 - t1:.2f} seconds")

View File

@ -18,6 +18,8 @@ This demonstration showcases three major categories of new features in Crawl4ai
- Robots.txt compliance
- Proxy rotation
- Enhanced URL handling
- Shared data among hooks
- add page routes
Each demo function can be run independently or as part of the full suite.
"""
@ -333,19 +335,19 @@ async def main():
print("\n📊 Running Crawl4ai v0.4.3 Feature Demos\n")
# Efficiency & Speed Demos
# print("\n🚀 EFFICIENCY & SPEED DEMOS")
# await demo_memory_dispatcher()
# await demo_streaming_support()
# await demo_content_scraping()
print("\n🚀 EFFICIENCY & SPEED DEMOS")
await demo_memory_dispatcher()
await demo_streaming_support()
await demo_content_scraping()
# # LLM Integration Demos
# print("\n🤖 LLM INTEGRATION DEMOS")
# await demo_json_schema_generation()
# await demo_llm_markdown()
print("\n🤖 LLM INTEGRATION DEMOS")
await demo_json_schema_generation()
await demo_llm_markdown()
# # Core Improvements
# print("\n🔧 CORE IMPROVEMENT DEMOS")
# await demo_robots_compliance()
print("\n🔧 CORE IMPROVEMENT DEMOS")
await demo_robots_compliance()
await demo_proxy_rotation()
if __name__ == "__main__":