
Implement comprehensive network request and console message capturing functionality: - Add capture_network_requests and capture_console_messages config parameters - Add network_requests and console_messages fields to models - Implement Playwright event listeners to capture requests, responses, and console output - Create detailed documentation and examples - Add comprehensive tests This feature enables deep visibility into web page activity for debugging, security analysis, performance profiling, and API discovery in web applications.
50 lines
1.6 KiB
Python
50 lines
1.6 KiB
Python
import os, sys
|
|
# append 2 parent directories to sys.path to import crawl4ai
|
|
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
sys.path.append(parent_dir)
|
|
parent_parent_dir = os.path.dirname(parent_dir)
|
|
sys.path.append(parent_parent_dir)
|
|
|
|
import asyncio
|
|
from crawl4ai import *
|
|
|
|
async def test_crawler():
|
|
# Setup configurations
|
|
browser_config = BrowserConfig(headless=True, verbose=False)
|
|
crawler_config = CrawlerRunConfig(
|
|
cache_mode=CacheMode.BYPASS,
|
|
markdown_generator=DefaultMarkdownGenerator(
|
|
content_filter=PruningContentFilter(
|
|
threshold=0.48,
|
|
threshold_type="fixed",
|
|
min_word_threshold=0
|
|
)
|
|
),
|
|
)
|
|
|
|
# Test URLs - mix of different sites
|
|
urls = [
|
|
"http://example.com",
|
|
"http://example.org",
|
|
"http://example.net",
|
|
] * 10 # 15 total URLs
|
|
|
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
print("\n=== Testing Streaming Mode ===")
|
|
async for result in await crawler.arun_many(
|
|
urls=urls,
|
|
config=crawler_config.clone(stream=True),
|
|
):
|
|
print(f"Received result for: {result.url} - Success: {result.success}")
|
|
|
|
print("\n=== Testing Batch Mode ===")
|
|
results = await crawler.arun_many(
|
|
urls=urls,
|
|
config=crawler_config,
|
|
)
|
|
print(f"Received all {len(results)} results at once")
|
|
for result in results:
|
|
print(f"Batch result for: {result.url} - Success: {result.success}")
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(test_crawler()) |