mirror of
https://github.com/unclecode/crawl4ai.git
synced 2025-12-30 03:48:54 +00:00
Implements a full-featured CLI for Crawl4AI with the following capabilities: - Basic and advanced web crawling - Configuration management via YAML/JSON files - Multiple extraction strategies (CSS, XPath, LLM) - Content filtering and optimization - Interactive Q&A capabilities - Various output formats - Comprehensive documentation and examples Also includes: - Home directory setup for configuration and cache - Environment variable support for API tokens - Test suite for CLI functionality
27 lines
520 B
JSON
27 lines
520 B
JSON
{
|
|
"name": "ArticleExtractor",
|
|
"baseSelector": ".cards[data-tax=news] .card__data",
|
|
"fields": [
|
|
{
|
|
"name": "title",
|
|
"selector": "h4.card__title",
|
|
"type": "text"
|
|
},
|
|
{
|
|
"name": "link",
|
|
"selector": "h4.card__title a",
|
|
"type": "attribute",
|
|
"attribute": "href"
|
|
},
|
|
{
|
|
"name": "details",
|
|
"selector": ".card__details",
|
|
"type": "text"
|
|
},
|
|
{
|
|
"name": "topics",
|
|
"selector": ".card__topics.topics",
|
|
"type": "text"
|
|
}
|
|
]
|
|
} |