crawl4ai/tests/test_scraping_strategy.py

27 lines
658 B
Python
Raw Normal View History

import nest_asyncio
2025-01-13 19:19:58 +08:00
nest_asyncio.apply()
import asyncio
2025-01-13 19:19:58 +08:00
from crawl4ai import (
AsyncWebCrawler,
CrawlerRunConfig,
LXMLWebScrapingStrategy,
CacheMode,
)
async def main():
config = CrawlerRunConfig(
cache_mode=CacheMode.BYPASS,
2025-01-13 19:19:58 +08:00
scraping_strategy=LXMLWebScrapingStrategy(), # Faster alternative to default BeautifulSoup
)
async with AsyncWebCrawler() as crawler:
2025-01-13 19:19:58 +08:00
result = await crawler.arun(url="https://example.com", config=config)
print(f"Success: {result.success}")
print(f"Markdown length: {len(result.markdown_v2.raw_markdown)}")
2025-01-13 19:19:58 +08:00
if __name__ == "__main__":
asyncio.run(main())