crawl4ai/tests/browser/test_builtin_browser.py
UncleCode 6432ff1257 feat(browser): add builtin browser management system
Implements a persistent browser management system that allows running a single shared browser instance
that can be reused across multiple crawler sessions. Key changes include:

- Added browser_mode config option with 'builtin', 'dedicated', and 'custom' modes
- Implemented builtin browser management in BrowserProfiler
- Added CLI commands for managing builtin browser (start, stop, status, restart, view)
- Modified browser process handling to support detached processes
- Added automatic builtin browser setup during package installation

BREAKING CHANGE: The browser_mode config option changes how browser instances are managed
2025-03-20 12:13:59 +08:00

300 lines
11 KiB
Python

"""
Test script for browser_profiler and builtin browser functionality.
This script tests:
1. Creating a builtin browser
2. Getting browser information
3. Killing the browser
4. Restarting the browser
5. Testing crawling with different browser modes
6. Testing edge cases
"""
import asyncio
import os
import sys
import time
from colorama import Fore, init
# Add the project root to the path for imports
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
from crawl4ai.browser_profiler import BrowserProfiler
from crawl4ai.async_webcrawler import AsyncWebCrawler
from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
from crawl4ai.async_logger import AsyncLogger
# Initialize colorama for cross-platform colored terminal output
init()
# Define colors for pretty output
SUCCESS = Fore.GREEN
WARNING = Fore.YELLOW
ERROR = Fore.RED
INFO = Fore.CYAN
RESET = Fore.RESET
# Create logger
logger = AsyncLogger(verbose=True)
async def test_browser_profiler():
"""Test the BrowserProfiler class functionality"""
print(f"\n{INFO}========== Testing BrowserProfiler =========={RESET}")
# Initialize browser profiler
profiler = BrowserProfiler(logger=logger)
# Step 1: Check if builtin browser exists and kill it if it does
print(f"\n{INFO}1. Checking if builtin browser exists{RESET}")
browser_info = profiler.get_builtin_browser_info()
if browser_info:
print(f"{SUCCESS}Builtin browser found: {browser_info['cdp_url']}{RESET}")
# Kill it to start with a clean state
print(f"{INFO}Killing existing browser...{RESET}")
await profiler.kill_builtin_browser()
browser_info = profiler.get_builtin_browser_info()
if not browser_info:
print(f"{SUCCESS}Browser successfully killed{RESET}")
else:
print(f"{ERROR}Failed to kill browser{RESET}")
else:
print(f"{WARNING}No builtin browser found{RESET}")
# Step 2: Launch a new builtin browser
print(f"\n{INFO}2. Launching new builtin browser{RESET}")
cdp_url = await profiler.launch_builtin_browser(headless=True)
if cdp_url:
print(f"{SUCCESS}Builtin browser launched at: {cdp_url}{RESET}")
else:
print(f"{ERROR}Failed to launch builtin browser{RESET}")
return
# Step 3: Get and display browser information
print(f"\n{INFO}3. Getting browser information{RESET}")
browser_info = profiler.get_builtin_browser_info()
if browser_info:
print(f"{SUCCESS}Browser info retrieved:{RESET}")
for key, value in browser_info.items():
if key != 'config': # Skip the verbose config section
print(f" {key}: {value}")
else:
print(f"{ERROR}Failed to get browser information{RESET}")
# Step 4: Get browser status
print(f"\n{INFO}4. Getting browser status{RESET}")
status = await profiler.get_builtin_browser_status()
print(f"Running: {status['running']}")
print(f"CDP URL: {status['cdp_url']}")
# Pause to let the browser run for a moment
print(f"\n{INFO}Waiting for 2 seconds...{RESET}")
await asyncio.sleep(2)
return cdp_url # Return the CDP URL for the crawling tests
async def test_crawling_with_builtin_browser(cdp_url):
"""Test crawling with the builtin browser"""
print(f"\n{INFO}========== Testing Crawling with Builtin Browser =========={RESET}")
# Step 1: Create a crawler with 'builtin' browser mode
print(f"\n{INFO}1. Creating crawler with 'builtin' browser mode{RESET}")
browser_config = BrowserConfig(
browser_mode="builtin",
headless=True
)
crawler = AsyncWebCrawler(config=browser_config)
# Step 2: Test crawling without explicitly starting (should auto-start)
print(f"\n{INFO}2. Testing auto-start with arun{RESET}")
try:
result = await crawler.arun("https://crawl4ai.com")
print(f"{SUCCESS}Auto-start crawling successful!{RESET}")
print(f" Got {len(result.markdown.raw_markdown)} chars of markdown content")
except Exception as e:
print(f"{ERROR}Auto-start crawling failed: {str(e)}{RESET}")
# Close the crawler
await crawler.close()
# Step 3: Test with explicit start
print(f"\n{INFO}3. Testing with explicit start{RESET}")
crawler = AsyncWebCrawler(config=browser_config)
try:
await crawler.start()
print(f"{SUCCESS}Explicit start successful!{RESET}")
result = await crawler.arun("https://example.com")
print(f" Got {len(result.markdown.raw_markdown)} chars of markdown content")
# Try second time, no start needed
print(f"{INFO}Testing second arun call without start{RESET}")
result = await crawler.arun("https://example.com")
print(f" Got {len(result.markdown.raw_markdown)} chars of markdown content")
except Exception as e:
print(f"{ERROR}Explicit start crawling failed: {str(e)}{RESET}")
# Close the crawler
await crawler.close()
# Step 4: Test with context manager
print(f"\n{INFO}4. Testing with context manager{RESET}")
try:
async with AsyncWebCrawler(config=browser_config) as crawler:
result = await crawler.arun("https://httpbin.org/html")
print(f"{SUCCESS}Context manager crawling successful!{RESET}")
print(f" Got {len(result.markdown.raw_markdown)} chars of markdown content")
except Exception as e:
print(f"{ERROR}Context manager crawling failed: {str(e)}{RESET}")
return True
async def test_crawling_without_builtin_browser():
"""Test crawling after killing the builtin browser"""
print(f"\n{INFO}========== Testing Crawling Without Builtin Browser =========={RESET}")
# Step 1: Kill the builtin browser
print(f"\n{INFO}1. Killing the builtin browser{RESET}")
profiler = BrowserProfiler(logger=logger)
await profiler.kill_builtin_browser()
# Step 2: Create a crawler with 'builtin' mode (should fall back to dedicated)
print(f"\n{INFO}2. Creating crawler with 'builtin' mode (should fall back){RESET}")
browser_config = BrowserConfig(
browser_mode="builtin",
headless=True
)
try:
async with AsyncWebCrawler(config=browser_config) as crawler:
result = await crawler.arun("https://httpbin.org/get")
print(f"{SUCCESS}Fallback to dedicated browser successful!{RESET}")
print(f" Got {len(result.markdown.raw_markdown)} chars of markdown content")
except Exception as e:
print(f"{ERROR}Fallback crawler failed: {str(e)}{RESET}")
# Step 3: Test with direct CDP URL
print(f"\n{INFO}3. Testing with direct CDP URL connection{RESET}")
# Launch a standalone browser to get a CDP URL
print(f"{INFO}Launching standalone browser...{RESET}")
cdp_url = await profiler.launch_standalone_browser(headless=True)
if not cdp_url:
print(f"{ERROR}Failed to launch standalone browser{RESET}")
return
print(f"{SUCCESS}Got CDP URL: {cdp_url}{RESET}")
# Create a crawler with the CDP URL
browser_config = BrowserConfig(
browser_mode="dedicated",
cdp_url=cdp_url,
use_managed_browser=True,
headless=True
)
try:
async with AsyncWebCrawler(config=browser_config) as crawler:
result = await crawler.arun("https://httpbin.org/ip")
print(f"{SUCCESS}Direct CDP URL crawling successful!{RESET}")
print(f" Got {len(result.markdown.raw_markdown)} chars of markdown content")
except Exception as e:
print(f"{ERROR}Direct CDP URL crawling failed: {str(e)}{RESET}")
return True
async def test_edge_cases():
"""Test edge cases like multiple starts, killing browser during crawl, etc."""
print(f"\n{INFO}========== Testing Edge Cases =========={RESET}")
# Step 1: Launch the builtin browser if it doesn't exist
print(f"\n{INFO}1. Ensuring builtin browser exists{RESET}")
profiler = BrowserProfiler(logger=logger)
browser_info = profiler.get_builtin_browser_info()
if not browser_info:
cdp_url = await profiler.launch_builtin_browser(headless=True)
if cdp_url:
print(f"{SUCCESS}Builtin browser launched at: {cdp_url}{RESET}")
else:
print(f"{ERROR}Failed to launch builtin browser{RESET}")
return
else:
print(f"{SUCCESS}Using existing builtin browser: {browser_info['cdp_url']}{RESET}")
# Step 2: Test multiple starts with the same crawler
print(f"\n{INFO}2. Testing multiple starts with the same crawler{RESET}")
browser_config = BrowserConfig(browser_mode="builtin", headless=True)
crawler = AsyncWebCrawler(config=browser_config)
await crawler.start()
print(f"{SUCCESS}First start successful!{RESET}")
try:
await crawler.start()
print(f"{SUCCESS}Second start didn't cause errors!{RESET}")
except Exception as e:
print(f"{ERROR}Second start failed: {str(e)}{RESET}")
# Run a crawl to verify functionality
try:
result = await crawler.arun("https://httpbin.org/user-agent")
print(f"{SUCCESS}Crawling after multiple starts successful!{RESET}")
print(f" Got {len(result.markdown.raw_markdown)} chars of markdown content")
except Exception as e:
print(f"{ERROR}Crawling after multiple starts failed: {str(e)}{RESET}")
await crawler.close()
# Step 3: Test killing browser while crawler is active
print(f"\n{INFO}3. Testing killing browser while crawler is active{RESET}")
# Create and start a crawler
browser_config = BrowserConfig(browser_mode="builtin", headless=True)
crawler = AsyncWebCrawler(config=browser_config)
await crawler.start()
# Kill the browser
print(f"{INFO}Killing the browser...{RESET}")
await profiler.kill_builtin_browser()
# Try to crawl (should fail)
try:
result = await crawler.arun("https://httpbin.org/get")
print(f"{WARNING}Crawling succeeded despite killed browser!{RESET}")
except Exception as e:
print(f"{SUCCESS}Crawling failed as expected: {str(e)}{RESET}")
await crawler.close()
return True
async def main():
"""Run all tests"""
try:
print(f"{INFO}Starting browser_profiler and builtin browser tests{RESET}")
# Run browser profiler tests
cdp_url = await test_browser_profiler()
# Run crawling tests with builtin browser
if cdp_url:
await test_crawling_with_builtin_browser(cdp_url)
# Run tests without builtin browser
# await test_crawling_without_builtin_browser()
# Run edge case tests
# await test_edge_cases()
print(f"\n{SUCCESS}All tests completed!{RESET}")
except Exception as e:
print(f"\n{ERROR}Test failed with error: {str(e)}{RESET}")
import traceback
traceback.print_exc()
finally:
# Clean up: kill any remaining builtin browser
print(f"\n{INFO}Cleaning up: killing any remaining builtin browser{RESET}")
profiler = BrowserProfiler(logger=logger)
await profiler.kill_builtin_browser()
print(f"{SUCCESS}Test cleanup complete{RESET}")
if __name__ == "__main__":
asyncio.run(main())