
Add new features to enhance browser automation and HTML extraction: - Add CDP browser launch capability with customizable ports and profiles - Implement JsonLxmlExtractionStrategy for faster HTML parsing - Add CLI command 'crwl cdp' for launching standalone CDP browsers - Support connecting to external CDP browsers via URL - Optimize selector caching and context-sensitive queries BREAKING CHANGE: LLMConfig import path changed from crawl4ai.types to crawl4ai
17 lines
540 B
Python
17 lines
540 B
Python
from crawl4ai.browser_profiler import BrowserProfiler
|
|
import asyncio
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Test launching a standalone browser
|
|
async def test_standalone_browser():
|
|
profiler = BrowserProfiler()
|
|
cdp_url = await profiler.launch_standalone_browser(
|
|
browser_type="chromium",
|
|
user_data_dir="~/.crawl4ai/browser_profile/test-browser-data",
|
|
debugging_port=9222,
|
|
headless=False
|
|
)
|
|
print(f"CDP URL: {cdp_url}")
|
|
|
|
asyncio.run(test_standalone_browser()) |