feat(firecrawl): add integration parameter support and enhance kwargs handling

This commit is contained in:
Ademílson Tonato 2025-06-25 22:08:38 +01:00
parent f4714f4849
commit 84431915ca
No known key found for this signature in database
GPG Key ID: 169C7BE271C9FA3A

View File

@ -650,12 +650,16 @@ class FirecrawlApp:
# Add any additional kwargs
search_params.update(kwargs)
_integration = search_params.get('integration')
# Create final params object
final_params = SearchParams(query=query, **search_params)
params_dict = final_params.dict(exclude_none=True)
params_dict['origin'] = f"python-sdk@{version}"
if _integration:
params_dict['integration'] = _integration
# Make request
response = requests.post(
f"{self.api_url}/v1/search",
@ -776,6 +780,7 @@ class FirecrawlApp:
# Add any additional kwargs
crawl_params.update(kwargs)
_integration = crawl_params.get('integration')
# Create final params object
final_params = CrawlParams(**crawl_params)
@ -783,6 +788,9 @@ class FirecrawlApp:
params_dict['url'] = url
params_dict['origin'] = f"python-sdk@{version}"
if _integration:
params_dict['integration'] = _integration
# Make request
headers = self._prepare_headers(idempotency_key)
response = self._post_request(f'{self.api_url}/v1/crawl', params_dict, headers)
@ -815,6 +823,7 @@ class FirecrawlApp:
ignore_query_parameters: Optional[bool] = None,
regex_on_full_url: Optional[bool] = None,
delay: Optional[int] = None,
max_concurrency: Optional[int] = None,
idempotency_key: Optional[str] = None,
**kwargs
) -> CrawlResponse:
@ -1185,6 +1194,7 @@ class FirecrawlApp:
# Add any additional kwargs
map_params.update(kwargs)
_integration = map_params.get('integration')
# Create final params object
final_params = MapParams(**map_params)
@ -1192,6 +1202,9 @@ class FirecrawlApp:
params_dict['url'] = url
params_dict['origin'] = f"python-sdk@{version}"
if _integration:
params_dict['integration'] = _integration
# Make request
response = requests.post(
f"{self.api_url}/v1/map",
@ -1724,7 +1737,8 @@ class FirecrawlApp:
allow_external_links: Optional[bool] = False,
enable_web_search: Optional[bool] = False,
show_sources: Optional[bool] = False,
agent: Optional[Dict[str, Any]] = None) -> ExtractResponse[Any]:
agent: Optional[Dict[str, Any]] = None,
**kwargs) -> ExtractResponse[Any]:
"""
Extract structured information from URLs.
@ -1737,6 +1751,7 @@ class FirecrawlApp:
enable_web_search (Optional[bool]): Enable web search
show_sources (Optional[bool]): Include source URLs
agent (Optional[Dict[str, Any]]): Agent configuration
**kwargs: Additional parameters to pass to the API
Returns:
ExtractResponse[Any] with:
@ -1747,6 +1762,9 @@ class FirecrawlApp:
Raises:
ValueError: If prompt/schema missing or extraction fails
"""
# Validate any additional kwargs
self._validate_kwargs(kwargs, "extract")
headers = self._prepare_headers()
if not prompt and not schema:
@ -1776,6 +1794,9 @@ class FirecrawlApp:
if agent:
request_data['agent'] = agent
# Add any additional kwargs
request_data.update(kwargs)
try:
# Send the initial extract request
response = self._post_request(
@ -2524,12 +2545,13 @@ class FirecrawlApp:
method_params = {
"scrape_url": {"formats", "include_tags", "exclude_tags", "only_main_content", "wait_for",
"timeout", "location", "mobile", "skip_tls_verification", "remove_base64_images",
"block_ads", "proxy", "extract", "json_options", "actions", "change_tracking_options"},
"search": {"limit", "tbs", "filter", "lang", "country", "location", "timeout", "scrape_options"},
"block_ads", "proxy", "extract", "json_options", "actions", "change_tracking_options", "integration"},
"search": {"limit", "tbs", "filter", "lang", "country", "location", "timeout", "scrape_options", "integration"},
"crawl_url": {"include_paths", "exclude_paths", "max_depth", "max_discovery_depth", "limit",
"allow_backward_links", "allow_external_links", "ignore_sitemap", "scrape_options",
"webhook", "deduplicate_similar_urls", "ignore_query_parameters", "regex_on_full_url"},
"map_url": {"search", "ignore_sitemap", "include_subdomains", "sitemap_only", "limit", "timeout"},
"webhook", "deduplicate_similar_urls", "ignore_query_parameters", "regex_on_full_url", "integration"},
"map_url": {"search", "ignore_sitemap", "include_subdomains", "sitemap_only", "limit", "timeout", "integration"},
"extract": {"prompt", "schema", "system_prompt", "allow_external_links", "enable_web_search", "show_sources", "agent", "integration"},
"batch_scrape_urls": {"formats", "headers", "include_tags", "exclude_tags", "only_main_content",
"wait_for", "timeout", "location", "mobile", "skip_tls_verification",
"remove_base64_images", "block_ads", "proxy", "extract", "json_options",