
Implement comprehensive network request and console message capturing functionality: - Add capture_network_requests and capture_console_messages config parameters - Add network_requests and console_messages fields to models - Implement Playwright event listeners to capture requests, responses, and console output - Create detailed documentation and examples - Add comprehensive tests This feature enables deep visibility into web page activity for debugging, security analysis, performance profiling, and API discovery in web applications.
149 lines
4.1 KiB
Python
149 lines
4.1 KiB
Python
import asyncio
|
|
import pytest
|
|
from typing import List
|
|
from crawl4ai import (
|
|
AsyncWebCrawler,
|
|
BrowserConfig,
|
|
CrawlerRunConfig,
|
|
MemoryAdaptiveDispatcher,
|
|
RateLimiter,
|
|
CacheMode
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("viewport", [
|
|
(800, 600),
|
|
(1024, 768),
|
|
(1920, 1080)
|
|
])
|
|
async def test_viewport_config(viewport):
|
|
"""Test different viewport configurations"""
|
|
width, height = viewport
|
|
browser_config = BrowserConfig(
|
|
browser_type="chromium",
|
|
headless=True,
|
|
viewport_width=width,
|
|
viewport_height=height
|
|
)
|
|
|
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
result = await crawler.arun(
|
|
url="https://example.com",
|
|
config=CrawlerRunConfig(
|
|
# cache_mode=CacheMode.BYPASS,
|
|
page_timeout=30000 # 30 seconds
|
|
)
|
|
)
|
|
assert result.success
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_memory_management():
|
|
"""Test memory-adaptive dispatching"""
|
|
browser_config = BrowserConfig(
|
|
browser_type="chromium",
|
|
headless=True,
|
|
viewport_width=1024,
|
|
viewport_height=768
|
|
)
|
|
|
|
dispatcher = MemoryAdaptiveDispatcher(
|
|
memory_threshold_percent=70.0,
|
|
check_interval=1.0,
|
|
max_session_permit=5
|
|
)
|
|
|
|
urls = ["https://example.com"] * 3 # Test with multiple identical URLs
|
|
|
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
results = await crawler.arun_many(
|
|
urls=urls,
|
|
config=CrawlerRunConfig(page_timeout=30000),
|
|
dispatcher=dispatcher
|
|
)
|
|
assert len(results) == len(urls)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_rate_limiting():
|
|
"""Test rate limiting functionality"""
|
|
browser_config = BrowserConfig(
|
|
browser_type="chromium",
|
|
headless=True
|
|
)
|
|
|
|
dispatcher = MemoryAdaptiveDispatcher(
|
|
rate_limiter=RateLimiter(
|
|
base_delay=(1.0, 2.0),
|
|
max_delay=5.0,
|
|
max_retries=2
|
|
),
|
|
memory_threshold_percent=70.0
|
|
)
|
|
|
|
urls = [
|
|
"https://example.com",
|
|
"https://example.org",
|
|
"https://example.net"
|
|
]
|
|
|
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
results = await crawler.arun_many(
|
|
urls=urls,
|
|
config=CrawlerRunConfig(page_timeout=30000),
|
|
dispatcher=dispatcher
|
|
)
|
|
assert len(results) == len(urls)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_javascript_execution():
|
|
"""Test JavaScript execution capabilities"""
|
|
browser_config = BrowserConfig(
|
|
browser_type="chromium",
|
|
headless=True,
|
|
java_script_enabled=True
|
|
)
|
|
|
|
js_code = """
|
|
document.body.style.backgroundColor = 'red';
|
|
return document.body.style.backgroundColor;
|
|
"""
|
|
|
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
result = await crawler.arun(
|
|
url="https://example.com",
|
|
config=CrawlerRunConfig(
|
|
js_code=js_code,
|
|
page_timeout=30000
|
|
)
|
|
)
|
|
assert result.success
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("error_url", [
|
|
"https://invalid.domain.test",
|
|
"https://httpbin.org/status/404",
|
|
"https://httpbin.org/status/503",
|
|
"https://httpbin.org/status/403"
|
|
])
|
|
async def test_error_handling(error_url):
|
|
"""Test error handling for various failure scenarios"""
|
|
browser_config = BrowserConfig(
|
|
browser_type="chromium",
|
|
headless=True
|
|
)
|
|
|
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
result = await crawler.arun(
|
|
url=error_url,
|
|
config=CrawlerRunConfig(
|
|
page_timeout=10000, # Short timeout for error cases
|
|
cache_mode=CacheMode.BYPASS
|
|
)
|
|
)
|
|
assert not result.success
|
|
assert result.error_message is not None
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(test_viewport_config((1024, 768)))
|
|
asyncio.run(test_memory_management())
|
|
asyncio.run(test_rate_limiting())
|
|
asyncio.run(test_javascript_execution()) |