crawl4ai/tests/browser/test_builtin_browser.py

"""
Test script for browser_profiler and builtin browser functionality.

This script tests:
1. Creating a builtin browser
2. Getting browser information
3. Killing the browser
4. Restarting the browser
5. Testing crawling with different browser modes
6. Testing edge cases
"""

import asyncio
import os
import sys
import time
from colorama import Fore, init

# Add the project root to the path for imports
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))

from crawl4ai.browser_profiler import BrowserProfiler
from crawl4ai.async_webcrawler import AsyncWebCrawler
from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
from crawl4ai.async_logger import AsyncLogger

# Initialize colorama for cross-platform colored terminal output
init()

# Define colors for pretty output
SUCCESS = Fore.GREEN
WARNING = Fore.YELLOW
ERROR = Fore.RED
INFO = Fore.CYAN
RESET = Fore.RESET

# Create logger
logger = AsyncLogger(verbose=True)

async def test_browser_profiler():
    """Test the BrowserProfiler class functionality"""
    print(f"\n{INFO}========== Testing BrowserProfiler =========={RESET}")

    # Initialize browser profiler
    profiler = BrowserProfiler(logger=logger)

    # Step 1: Check if builtin browser exists and kill it if it does
    print(f"\n{INFO}1. Checking if builtin browser exists{RESET}")
    browser_info = profiler.get_builtin_browser_info()
    if browser_info:
        print(f"{SUCCESS}Builtin browser found: {browser_info['cdp_url']}{RESET}")
        # Kill it to start with a clean state
        print(f"{INFO}Killing existing browser...{RESET}")
        await profiler.kill_builtin_browser()
        browser_info = profiler.get_builtin_browser_info()
        if not browser_info:
            print(f"{SUCCESS}Browser successfully killed{RESET}")
        else:
            print(f"{ERROR}Failed to kill browser{RESET}")
    else:
        print(f"{WARNING}No builtin browser found{RESET}")

    # Step 2: Launch a new builtin browser
    print(f"\n{INFO}2. Launching new builtin browser{RESET}")
    cdp_url = await profiler.launch_builtin_browser(headless=True)
    if cdp_url:
        print(f"{SUCCESS}Builtin browser launched at: {cdp_url}{RESET}")
    else:
        print(f"{ERROR}Failed to launch builtin browser{RESET}")
        return

    # Step 3: Get and display browser information
    print(f"\n{INFO}3. Getting browser information{RESET}")
    browser_info = profiler.get_builtin_browser_info()
    if browser_info:
        print(f"{SUCCESS}Browser info retrieved:{RESET}")
        for key, value in browser_info.items():
            if key != 'config':  # Skip the verbose config section
                print(f"  {key}: {value}")
    else:
        print(f"{ERROR}Failed to get browser information{RESET}")

    # Step 4: Get browser status
    print(f"\n{INFO}4. Getting browser status{RESET}")
    status = await profiler.get_builtin_browser_status()
    print(f"Running: {status['running']}")
    print(f"CDP URL: {status['cdp_url']}")

    # Pause to let the browser run for a moment
    print(f"\n{INFO}Waiting for 2 seconds...{RESET}")
    await asyncio.sleep(2)

    return cdp_url  # Return the CDP URL for the crawling tests

async def test_crawling_with_builtin_browser(cdp_url):
    """Test crawling with the builtin browser"""
    print(f"\n{INFO}========== Testing Crawling with Builtin Browser =========={RESET}")

    # Step 1: Create a crawler with 'builtin' browser mode
    print(f"\n{INFO}1. Creating crawler with 'builtin' browser mode{RESET}")
    browser_config = BrowserConfig(
        browser_mode="builtin",
        headless=True
    )
    crawler = AsyncWebCrawler(config=browser_config)

    # Step 2: Test crawling without explicitly starting (should auto-start)
    print(f"\n{INFO}2. Testing auto-start with arun{RESET}")
    try:
        result = await crawler.arun("https://crawl4ai.com")
        print(f"{SUCCESS}Auto-start crawling successful!{RESET}")
        print(f"  Got {len(result.markdown.raw_markdown)} chars of markdown content")
    except Exception as e:
        print(f"{ERROR}Auto-start crawling failed: {str(e)}{RESET}")

    # Close the crawler
    await crawler.close()

    # Step 3: Test with explicit start
    print(f"\n{INFO}3. Testing with explicit start{RESET}")
    crawler = AsyncWebCrawler(config=browser_config)
    try:
        await crawler.start()
        print(f"{SUCCESS}Explicit start successful!{RESET}")
        result = await crawler.arun("https://example.com")
        print(f"  Got {len(result.markdown.raw_markdown)} chars of markdown content")
        # Try second time, no start needed
        print(f"{INFO}Testing second arun call without start{RESET}")
        result = await crawler.arun("https://example.com")
        print(f"  Got {len(result.markdown.raw_markdown)} chars of markdown content")
    except Exception as e:
        print(f"{ERROR}Explicit start crawling failed: {str(e)}{RESET}")

    # Close the crawler
    await crawler.close()

    # Step 4: Test with context manager
    print(f"\n{INFO}4. Testing with context manager{RESET}")
    try:
        async with AsyncWebCrawler(config=browser_config) as crawler:
            result = await crawler.arun("https://httpbin.org/html")
            print(f"{SUCCESS}Context manager crawling successful!{RESET}")
            print(f"  Got {len(result.markdown.raw_markdown)} chars of markdown content")
    except Exception as e:
        print(f"{ERROR}Context manager crawling failed: {str(e)}{RESET}")

    return True

async def test_crawling_without_builtin_browser():
    """Test crawling after killing the builtin browser"""
    print(f"\n{INFO}========== Testing Crawling Without Builtin Browser =========={RESET}")

    # Step 1: Kill the builtin browser
    print(f"\n{INFO}1. Killing the builtin browser{RESET}")
    profiler = BrowserProfiler(logger=logger)
    await profiler.kill_builtin_browser()

    # Step 2: Create a crawler with 'builtin' mode (should fall back to dedicated)
    print(f"\n{INFO}2. Creating crawler with 'builtin' mode (should fall back){RESET}")
    browser_config = BrowserConfig(
        browser_mode="builtin",
        headless=True
    )

    try:
        async with AsyncWebCrawler(config=browser_config) as crawler:
            result = await crawler.arun("https://httpbin.org/get")
            print(f"{SUCCESS}Fallback to dedicated browser successful!{RESET}")
            print(f"  Got {len(result.markdown.raw_markdown)} chars of markdown content")
    except Exception as e:
        print(f"{ERROR}Fallback crawler failed: {str(e)}{RESET}")

    # Step 3: Test with direct CDP URL
    print(f"\n{INFO}3. Testing with direct CDP URL connection{RESET}")

    # Launch a standalone browser to get a CDP URL
    print(f"{INFO}Launching standalone browser...{RESET}")
    cdp_url = await profiler.launch_standalone_browser(headless=True)
    if not cdp_url:
        print(f"{ERROR}Failed to launch standalone browser{RESET}")
        return

    print(f"{SUCCESS}Got CDP URL: {cdp_url}{RESET}")

    # Create a crawler with the CDP URL
    browser_config = BrowserConfig(
        browser_mode="dedicated",
        cdp_url=cdp_url,
        use_managed_browser=True,
        headless=True
    )

    try:
        async with AsyncWebCrawler(config=browser_config) as crawler:
            result = await crawler.arun("https://httpbin.org/ip")
            print(f"{SUCCESS}Direct CDP URL crawling successful!{RESET}")
            print(f"  Got {len(result.markdown.raw_markdown)} chars of markdown content")
    except Exception as e:
        print(f"{ERROR}Direct CDP URL crawling failed: {str(e)}{RESET}")

    return True

async def test_edge_cases():
    """Test edge cases like multiple starts, killing browser during crawl, etc."""
    print(f"\n{INFO}========== Testing Edge Cases =========={RESET}")

    # Step 1: Launch the builtin browser if it doesn't exist
    print(f"\n{INFO}1. Ensuring builtin browser exists{RESET}")
    profiler = BrowserProfiler(logger=logger)
    browser_info = profiler.get_builtin_browser_info()
    if not browser_info:
        cdp_url = await profiler.launch_builtin_browser(headless=True)
        if cdp_url:
            print(f"{SUCCESS}Builtin browser launched at: {cdp_url}{RESET}")
        else:
            print(f"{ERROR}Failed to launch builtin browser{RESET}")
            return
    else:
        print(f"{SUCCESS}Using existing builtin browser: {browser_info['cdp_url']}{RESET}")

    # Step 2: Test multiple starts with the same crawler
    print(f"\n{INFO}2. Testing multiple starts with the same crawler{RESET}")
    browser_config = BrowserConfig(browser_mode="builtin", headless=True)
    crawler = AsyncWebCrawler(config=browser_config)

    await crawler.start()
    print(f"{SUCCESS}First start successful!{RESET}")

    try:
        await crawler.start()
        print(f"{SUCCESS}Second start didn't cause errors!{RESET}")
    except Exception as e:
        print(f"{ERROR}Second start failed: {str(e)}{RESET}")

    # Run a crawl to verify functionality
    try:
        result = await crawler.arun("https://httpbin.org/user-agent")
        print(f"{SUCCESS}Crawling after multiple starts successful!{RESET}")
        print(f"  Got {len(result.markdown.raw_markdown)} chars of markdown content")
    except Exception as e:
        print(f"{ERROR}Crawling after multiple starts failed: {str(e)}{RESET}")

    await crawler.close()

    # Step 3: Test killing browser while crawler is active
    print(f"\n{INFO}3. Testing killing browser while crawler is active{RESET}")

    # Create and start a crawler
    browser_config = BrowserConfig(browser_mode="builtin", headless=True)
    crawler = AsyncWebCrawler(config=browser_config)
    await crawler.start()

    # Kill the browser
    print(f"{INFO}Killing the browser...{RESET}")
    await profiler.kill_builtin_browser()

    # Try to crawl (should fail)
    try:
        result = await crawler.arun("https://httpbin.org/get")
        print(f"{WARNING}Crawling succeeded despite killed browser!{RESET}")
    except Exception as e:
        print(f"{SUCCESS}Crawling failed as expected: {str(e)}{RESET}")

    await crawler.close()

    return True

async def main():
    """Run all tests"""
    try:
        print(f"{INFO}Starting browser_profiler and builtin browser tests{RESET}")

        # Run browser profiler tests
        cdp_url = await test_browser_profiler()

        # Run crawling tests with builtin browser
        if cdp_url:
            await test_crawling_with_builtin_browser(cdp_url)

        # Run tests without builtin browser
        # await test_crawling_without_builtin_browser()

        # Run edge case tests
        # await test_edge_cases()

        print(f"\n{SUCCESS}All tests completed!{RESET}")

    except Exception as e:
        print(f"\n{ERROR}Test failed with error: {str(e)}{RESET}")
        import traceback
        traceback.print_exc()
    finally:
        # Clean up: kill any remaining builtin browser
        print(f"\n{INFO}Cleaning up: killing any remaining builtin browser{RESET}")
        profiler = BrowserProfiler(logger=logger)
        await profiler.kill_builtin_browser()
        print(f"{SUCCESS}Test cleanup complete{RESET}")

if __name__ == "__main__":
    asyncio.run(main())