
Implement comprehensive network request and console message capturing functionality: - Add capture_network_requests and capture_console_messages config parameters - Add network_requests and console_messages fields to models - Implement Playwright event listeners to capture requests, responses, and console output - Create detailed documentation and examples - Add comprehensive tests This feature enables deep visibility into web page activity for debugging, security analysis, performance profiling, and API discovery in web applications.
39 lines
1.3 KiB
Python
39 lines
1.3 KiB
Python
import os, sys
|
|
# append 2 parent directories to sys.path to import crawl4ai
|
|
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
sys.path.append(parent_dir)
|
|
parent_parent_dir = os.path.dirname(parent_dir)
|
|
sys.path.append(parent_parent_dir)
|
|
|
|
|
|
import asyncio
|
|
from typing import List
|
|
from crawl4ai import *
|
|
from crawl4ai.async_dispatcher import MemoryAdaptiveDispatcher
|
|
|
|
async def test_streaming():
|
|
browser_config = BrowserConfig(headless=True, verbose=True)
|
|
crawler_config = CrawlerRunConfig(
|
|
cache_mode=CacheMode.BYPASS,
|
|
markdown_generator=DefaultMarkdownGenerator(
|
|
# content_filter=PruningContentFilter(
|
|
# threshold=0.48,
|
|
# threshold_type="fixed",
|
|
# min_word_threshold=0
|
|
# )
|
|
),
|
|
)
|
|
|
|
urls = ["http://example.com"] * 10
|
|
|
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
dispatcher = MemoryAdaptiveDispatcher(
|
|
max_session_permit=5,
|
|
check_interval=0.5
|
|
)
|
|
|
|
async for result in dispatcher.run_urls_stream(urls, crawler, crawler_config):
|
|
print(f"Got result for {result.url} - Success: {result.result.success}")
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(test_streaming()) |