docs(examples): update demo scripts and fix output formats
Update example scripts to reflect latest API changes and improve demonstrations: - Increase test URLs in dispatcher example from 20 to 40 pages - Comment out unused dispatcher strategies for cleaner output - Fix scraping strategies performance script to use correct object notation - Update v0_4_3_features_demo with additional feature mentions and uncomment demo sections These changes make the examples more current and better aligned with the actual API.
This commit is contained in:
parent
2d69bf2366
commit
976ea52167
@ -112,19 +112,19 @@ def create_performance_table(results):
|
||||
|
||||
|
||||
async def main():
|
||||
urls = [f"https://example.com/page{i}" for i in range(1, 20)]
|
||||
urls = [f"https://example.com/page{i}" for i in range(1, 40)]
|
||||
browser_config = BrowserConfig(headless=True, verbose=False)
|
||||
run_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS, scraping_strategy=LXMLWebScrapingStrategy())
|
||||
|
||||
results = {
|
||||
"Memory Adaptive": await memory_adaptive(urls, browser_config, run_config),
|
||||
"Memory Adaptive + Rate Limit": await memory_adaptive_with_rate_limit(
|
||||
urls, browser_config, run_config
|
||||
),
|
||||
"Semaphore": await semaphore(urls, browser_config, run_config),
|
||||
"Semaphore + Rate Limit": await semaphore_with_rate_limit(
|
||||
urls, browser_config, run_config
|
||||
),
|
||||
# "Memory Adaptive + Rate Limit": await memory_adaptive_with_rate_limit(
|
||||
# urls, browser_config, run_config
|
||||
# ),
|
||||
# "Semaphore": await semaphore(urls, browser_config, run_config),
|
||||
# "Semaphore + Rate Limit": await semaphore_with_rate_limit(
|
||||
# urls, browser_config, run_config
|
||||
# ),
|
||||
}
|
||||
|
||||
table = create_performance_table(results)
|
||||
|
@ -117,17 +117,17 @@ def test_scraping():
|
||||
timing_stats.report()
|
||||
|
||||
# Print stats of LXML output
|
||||
print("\nLXML Output:")
|
||||
print(f"\nExtracted links: {len(result_selected['links']['internal']) + len(result_selected['links']['external'])}")
|
||||
print(f"Extracted images: {len(result_selected['media']['images'])}")
|
||||
print(f"Clean HTML size: {len(result_selected['cleaned_html'])/1024:.2f} KB")
|
||||
print("\Turbo Output:")
|
||||
print(f"\nExtracted links: {len(result_selected.links.internal) + len(result_selected.links.external)}")
|
||||
print(f"Extracted images: {len(result_selected.media.images)}")
|
||||
print(f"Clean HTML size: {len(result_selected.cleaned_html)/1024:.2f} KB")
|
||||
print(f"Scraping time: {t2 - t1:.2f} seconds")
|
||||
|
||||
# Print stats of original output
|
||||
print("\nOriginal Output:")
|
||||
print(f"\nExtracted links: {len(result_original['links']['internal']) + len(result_original['links']['external'])}")
|
||||
print(f"Extracted images: {len(result_original['media']['images'])}")
|
||||
print(f"Clean HTML size: {len(result_original['cleaned_html'])/1024:.2f} KB")
|
||||
print(f"\nExtracted links: {len(result_original.links.internal) + len(result_original.links.external)}")
|
||||
print(f"Extracted images: {len(result_original.media.images)}")
|
||||
print(f"Clean HTML size: {len(result_original.cleaned_html)/1024:.2f} KB")
|
||||
print(f"Scraping time: {t3 - t1:.2f} seconds")
|
||||
|
||||
|
||||
|
@ -18,6 +18,8 @@ This demonstration showcases three major categories of new features in Crawl4ai
|
||||
- Robots.txt compliance
|
||||
- Proxy rotation
|
||||
- Enhanced URL handling
|
||||
- Shared data among hooks
|
||||
- add page routes
|
||||
|
||||
Each demo function can be run independently or as part of the full suite.
|
||||
"""
|
||||
@ -333,19 +335,19 @@ async def main():
|
||||
print("\n📊 Running Crawl4ai v0.4.3 Feature Demos\n")
|
||||
|
||||
# Efficiency & Speed Demos
|
||||
# print("\n🚀 EFFICIENCY & SPEED DEMOS")
|
||||
# await demo_memory_dispatcher()
|
||||
# await demo_streaming_support()
|
||||
# await demo_content_scraping()
|
||||
print("\n🚀 EFFICIENCY & SPEED DEMOS")
|
||||
await demo_memory_dispatcher()
|
||||
await demo_streaming_support()
|
||||
await demo_content_scraping()
|
||||
|
||||
# # LLM Integration Demos
|
||||
# print("\n🤖 LLM INTEGRATION DEMOS")
|
||||
# await demo_json_schema_generation()
|
||||
# await demo_llm_markdown()
|
||||
print("\n🤖 LLM INTEGRATION DEMOS")
|
||||
await demo_json_schema_generation()
|
||||
await demo_llm_markdown()
|
||||
|
||||
# # Core Improvements
|
||||
# print("\n🔧 CORE IMPROVEMENT DEMOS")
|
||||
# await demo_robots_compliance()
|
||||
print("\n🔧 CORE IMPROVEMENT DEMOS")
|
||||
await demo_robots_compliance()
|
||||
await demo_proxy_rotation()
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Loading…
x
Reference in New Issue
Block a user