2025-08-05 17:41:12 -03:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
"""
|
|
|
|
|
Example demonstrating the v2 search functionality with individual parameters.
|
|
|
|
|
"""
|
2024-04-15 17:01:47 -04:00
|
|
|
|
2025-08-05 17:41:12 -03:00
|
|
|
import os
|
2025-08-06 18:41:54 -03:00
|
|
|
import time
|
2025-08-05 17:41:12 -03:00
|
|
|
from dotenv import load_dotenv
|
|
|
|
|
from firecrawl import Firecrawl
|
2025-08-07 15:52:07 -03:00
|
|
|
from firecrawl.v2.types import ScrapeOptions, ScrapeFormats, WebhookConfig
|
2024-05-08 17:36:40 -07:00
|
|
|
|
2025-08-05 17:41:12 -03:00
|
|
|
load_dotenv()
|
2024-10-23 16:04:46 -03:00
|
|
|
|
2025-08-05 17:41:12 -03:00
|
|
|
def main():
|
|
|
|
|
api_key = os.getenv("FIRECRAWL_API_KEY")
|
|
|
|
|
if not api_key:
|
|
|
|
|
raise ValueError("FIRECRAWL_API_KEY is not set")
|
2025-08-06 18:41:54 -03:00
|
|
|
|
|
|
|
|
api_url = os.getenv("FIRECRAWL_API_URL")
|
|
|
|
|
if not api_url:
|
|
|
|
|
raise ValueError("FIRECRAWL_API_URL is not set")
|
2024-10-23 16:04:46 -03:00
|
|
|
|
2025-08-06 18:41:54 -03:00
|
|
|
firecrawl = Firecrawl(api_key=api_key, api_url=api_url)
|
|
|
|
|
|
2025-08-07 15:52:07 -03:00
|
|
|
# crawl
|
2025-08-06 18:41:54 -03:00
|
|
|
crawl_response = firecrawl.crawl("docs.firecrawl.dev", limit=5)
|
|
|
|
|
print(crawl_response)
|
|
|
|
|
|
2025-08-07 15:52:07 -03:00
|
|
|
# start crawl
|
2025-08-06 18:41:54 -03:00
|
|
|
crawl_job = firecrawl.start_crawl('docs.firecrawl.dev', limit=5)
|
|
|
|
|
print(crawl_job)
|
|
|
|
|
|
2025-08-07 15:52:07 -03:00
|
|
|
crawl_response = firecrawl.get_crawl_status(crawl_job.id)
|
|
|
|
|
print(crawl_response)
|
|
|
|
|
|
|
|
|
|
while (crawl_response.status != 'completed'):
|
|
|
|
|
print(f"Crawl status: {crawl_response.status}")
|
|
|
|
|
crawl_response = firecrawl.get_crawl_status(crawl_job.id)
|
2025-08-06 18:41:54 -03:00
|
|
|
time.sleep(2)
|
|
|
|
|
|
2025-08-07 15:52:07 -03:00
|
|
|
print(crawl_response)
|
2024-05-08 17:16:59 -07:00
|
|
|
|
2025-08-07 11:18:07 -03:00
|
|
|
# crawl params preview
|
|
|
|
|
params_data = firecrawl.crawl_params_preview(
|
|
|
|
|
url="https://docs.firecrawl.dev",
|
|
|
|
|
prompt="Extract all blog posts and documentation"
|
|
|
|
|
)
|
|
|
|
|
print(params_data)
|
|
|
|
|
|
2025-08-07 15:52:07 -03:00
|
|
|
# crawl with webhook example
|
|
|
|
|
webhook_job = firecrawl.start_crawl(
|
|
|
|
|
"docs.firecrawl.dev",
|
|
|
|
|
limit=3,
|
|
|
|
|
webhook="https://your-webhook-endpoint.com/firecrawl"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# advanced webhook with configuration
|
|
|
|
|
webhook_config = WebhookConfig(
|
|
|
|
|
url="https://your-webhook-endpoint.com/firecrawl",
|
|
|
|
|
headers={"Authorization": "Bearer your-token"},
|
|
|
|
|
events=["completed", "failed"]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
webhook_job_advanced = firecrawl.start_crawl(
|
|
|
|
|
"docs.firecrawl.dev",
|
|
|
|
|
limit=2,
|
|
|
|
|
webhook=webhook_config
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Check crawl errors
|
|
|
|
|
errors = firecrawl.get_crawl_errors(crawl_job.id)
|
|
|
|
|
print(f"Crawl errors: {errors.errors}")
|
|
|
|
|
print(f"Robots blocked: {errors.robots_blocked}")
|
|
|
|
|
|
2025-08-05 17:41:12 -03:00
|
|
|
# search examples
|
|
|
|
|
search_response = firecrawl.search(
|
|
|
|
|
query="What is the capital of France?",
|
2025-08-06 18:41:54 -03:00
|
|
|
sources=["web", "news", "images"],
|
|
|
|
|
limit=10
|
|
|
|
|
)
|
|
|
|
|
|
2025-08-05 17:41:12 -03:00
|
|
|
print(search_response)
|
2024-08-30 12:58:38 -03:00
|
|
|
|
2025-08-08 11:56:05 -03:00
|
|
|
# map example
|
|
|
|
|
map_response = firecrawl.map("https://firecrawl.dev")
|
|
|
|
|
print(map_response)
|
|
|
|
|
|
2025-08-05 17:41:12 -03:00
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|