mirror of
https://github.com/unclecode/crawl4ai.git
synced 2025-10-02 06:00:21 +00:00
27 lines
658 B
Python
27 lines
658 B
Python
import nest_asyncio
|
|
|
|
nest_asyncio.apply()
|
|
|
|
import asyncio
|
|
from crawl4ai import (
|
|
AsyncWebCrawler,
|
|
CrawlerRunConfig,
|
|
LXMLWebScrapingStrategy,
|
|
CacheMode,
|
|
)
|
|
|
|
|
|
async def main():
|
|
config = CrawlerRunConfig(
|
|
cache_mode=CacheMode.BYPASS,
|
|
scraping_strategy=LXMLWebScrapingStrategy(), # Faster alternative to default BeautifulSoup
|
|
)
|
|
async with AsyncWebCrawler() as crawler:
|
|
result = await crawler.arun(url="https://example.com", config=config)
|
|
print(f"Success: {result.success}")
|
|
print(f"Markdown length: {len(result.markdown_v2.raw_markdown)}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|