
Implement comprehensive network request and console message capturing functionality: - Add capture_network_requests and capture_console_messages config parameters - Add network_requests and console_messages fields to models - Implement Playwright event listeners to capture requests, responses, and console output - Create detailed documentation and examples - Add comprehensive tests This feature enables deep visibility into web page activity for debugging, security analysis, performance profiling, and API discovery in web applications.
46 lines
1.5 KiB
Python
46 lines
1.5 KiB
Python
import asyncio
|
|
import time
|
|
|
|
|
|
from crawl4ai import CrawlerRunConfig, AsyncWebCrawler, CacheMode
|
|
from crawl4ai.content_scraping_strategy import LXMLWebScrapingStrategy
|
|
from crawl4ai.deep_crawling import BFSDeepCrawlStrategy
|
|
# from crawl4ai.deep_crawling import BFSDeepCrawlStrategy, BestFirstCrawlingStrategy
|
|
|
|
|
|
async def main():
|
|
"""Example deep crawl of documentation site."""
|
|
config = CrawlerRunConfig(
|
|
deep_crawl_strategy = BFSDeepCrawlStrategy(
|
|
max_depth=2,
|
|
include_external=False
|
|
),
|
|
stream=False,
|
|
verbose=True,
|
|
cache_mode=CacheMode.BYPASS,
|
|
scraping_strategy=LXMLWebScrapingStrategy()
|
|
)
|
|
|
|
async with AsyncWebCrawler() as crawler:
|
|
start_time = time.perf_counter()
|
|
print("\nStarting deep crawl in batch mode:")
|
|
results = await crawler.arun(
|
|
url="https://docs.crawl4ai.com",
|
|
config=config
|
|
)
|
|
print(f"Crawled {len(results)} pages")
|
|
print(f"Example page: {results[0].url}")
|
|
print(f"Duration: {time.perf_counter() - start_time:.2f} seconds\n")
|
|
|
|
print("Starting deep crawl in streaming mode:")
|
|
config.stream = True
|
|
start_time = time.perf_counter()
|
|
async for result in await crawler.arun(
|
|
url="https://docs.crawl4ai.com",
|
|
config=config
|
|
):
|
|
print(f"→ {result.url} (Depth: {result.metadata.get('depth', 0)})")
|
|
print(f"Duration: {time.perf_counter() - start_time:.2f} seconds")
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main()) |