mirror of
https://github.com/mendableai/firecrawl.git
synced 2025-06-27 00:41:33 +00:00
SDK support
This commit is contained in:
parent
852408bf44
commit
2440253fec
@ -174,6 +174,7 @@ export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchem
|
||||
}
|
||||
actions?: ActionsSchema;
|
||||
agent?: AgentOptions;
|
||||
zeroDataRetention?: boolean;
|
||||
}
|
||||
|
||||
export interface ActionsResult {
|
||||
@ -228,6 +229,7 @@ export interface CrawlParams {
|
||||
*/
|
||||
delay?: number;
|
||||
maxConcurrency?: number;
|
||||
zeroDataRetention?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -471,6 +471,7 @@ class FirecrawlApp:
|
||||
change_tracking_options: Optional[ChangeTrackingOptions] = None,
|
||||
max_age: Optional[int] = None,
|
||||
store_in_cache: Optional[bool] = None,
|
||||
zero_data_retention: Optional[bool] = None,
|
||||
**kwargs) -> ScrapeResponse[Any]:
|
||||
"""
|
||||
Scrape and extract content from a URL.
|
||||
@ -493,6 +494,7 @@ class FirecrawlApp:
|
||||
json_options (Optional[JsonConfig]): JSON extraction settings
|
||||
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
|
||||
change_tracking_options (Optional[ChangeTrackingOptions]): Change tracking settings
|
||||
zero_data_retention (Optional[bool]): Whether to delete data after scrape is done
|
||||
|
||||
|
||||
Returns:
|
||||
@ -697,6 +699,7 @@ class FirecrawlApp:
|
||||
regex_on_full_url: Optional[bool] = None,
|
||||
delay: Optional[int] = None,
|
||||
max_concurrency: Optional[int] = None,
|
||||
zero_data_retention: Optional[bool] = None,
|
||||
poll_interval: Optional[int] = 2,
|
||||
idempotency_key: Optional[str] = None,
|
||||
**kwargs
|
||||
@ -722,6 +725,7 @@ class FirecrawlApp:
|
||||
regex_on_full_url (Optional[bool]): Apply regex to full URLs
|
||||
delay (Optional[int]): Delay in seconds between scrapes
|
||||
max_concurrency (Optional[int]): Maximum number of concurrent scrapes
|
||||
zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
|
||||
poll_interval (Optional[int]): Seconds between status checks (default: 2)
|
||||
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
||||
**kwargs: Additional parameters to pass to the API
|
||||
@ -773,7 +777,8 @@ class FirecrawlApp:
|
||||
crawl_params['delay'] = delay
|
||||
if max_concurrency is not None:
|
||||
crawl_params['maxConcurrency'] = max_concurrency
|
||||
|
||||
if zero_data_retention is not None:
|
||||
crawl_params['zeroDataRetention'] = zero_data_retention
|
||||
# Add any additional kwargs
|
||||
crawl_params.update(kwargs)
|
||||
|
||||
@ -815,6 +820,8 @@ class FirecrawlApp:
|
||||
ignore_query_parameters: Optional[bool] = None,
|
||||
regex_on_full_url: Optional[bool] = None,
|
||||
delay: Optional[int] = None,
|
||||
max_concurrency: Optional[int] = None,
|
||||
zero_data_retention: Optional[bool] = None,
|
||||
idempotency_key: Optional[str] = None,
|
||||
**kwargs
|
||||
) -> CrawlResponse:
|
||||
@ -839,6 +846,7 @@ class FirecrawlApp:
|
||||
regex_on_full_url (Optional[bool]): Apply regex to full URLs
|
||||
delay (Optional[int]): Delay in seconds between scrapes
|
||||
max_concurrency (Optional[int]): Maximum number of concurrent scrapes
|
||||
zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
|
||||
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
||||
**kwargs: Additional parameters to pass to the API
|
||||
|
||||
@ -890,7 +898,8 @@ class FirecrawlApp:
|
||||
crawl_params['delay'] = delay
|
||||
if max_concurrency is not None:
|
||||
crawl_params['maxConcurrency'] = max_concurrency
|
||||
|
||||
if zero_data_retention is not None:
|
||||
crawl_params['zeroDataRetention'] = zero_data_retention
|
||||
# Add any additional kwargs
|
||||
crawl_params.update(kwargs)
|
||||
|
||||
@ -1069,6 +1078,7 @@ class FirecrawlApp:
|
||||
regex_on_full_url: Optional[bool] = None,
|
||||
delay: Optional[int] = None,
|
||||
max_concurrency: Optional[int] = None,
|
||||
zero_data_retention: Optional[bool] = None,
|
||||
idempotency_key: Optional[str] = None,
|
||||
**kwargs
|
||||
) -> 'CrawlWatcher':
|
||||
@ -1093,6 +1103,7 @@ class FirecrawlApp:
|
||||
regex_on_full_url (Optional[bool]): Apply regex to full URLs
|
||||
delay (Optional[int]): Delay in seconds between scrapes
|
||||
max_concurrency (Optional[int]): Maximum number of concurrent scrapes
|
||||
zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
|
||||
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
||||
**kwargs: Additional parameters to pass to the API
|
||||
|
||||
@ -1119,6 +1130,7 @@ class FirecrawlApp:
|
||||
regex_on_full_url=regex_on_full_url,
|
||||
delay=delay,
|
||||
max_concurrency=max_concurrency,
|
||||
zero_data_retention=zero_data_retention,
|
||||
idempotency_key=idempotency_key,
|
||||
**kwargs
|
||||
)
|
||||
@ -1236,6 +1248,7 @@ class FirecrawlApp:
|
||||
agent: Optional[AgentOptions] = None,
|
||||
poll_interval: Optional[int] = 2,
|
||||
max_concurrency: Optional[int] = None,
|
||||
zero_data_retention: Optional[bool] = None,
|
||||
idempotency_key: Optional[str] = None,
|
||||
**kwargs
|
||||
) -> BatchScrapeStatusResponse:
|
||||
@ -1399,6 +1412,7 @@ class FirecrawlApp:
|
||||
actions (Optional[List[Union]]): Actions to perform
|
||||
agent (Optional[AgentOptions]): Agent configuration
|
||||
max_concurrency (Optional[int]): Maximum number of concurrent scrapes
|
||||
zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
|
||||
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
||||
**kwargs: Additional parameters to pass to the API
|
||||
|
||||
@ -1460,6 +1474,8 @@ class FirecrawlApp:
|
||||
scrape_params['agent'] = agent.dict(exclude_none=True)
|
||||
if max_concurrency is not None:
|
||||
scrape_params['maxConcurrency'] = max_concurrency
|
||||
if zero_data_retention is not None:
|
||||
scrape_params['zeroDataRetention'] = zero_data_retention
|
||||
|
||||
# Add any additional kwargs
|
||||
scrape_params.update(kwargs)
|
||||
@ -1509,6 +1525,7 @@ class FirecrawlApp:
|
||||
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
||||
agent: Optional[AgentOptions] = None,
|
||||
max_concurrency: Optional[int] = None,
|
||||
zero_data_retention: Optional[bool] = None,
|
||||
idempotency_key: Optional[str] = None,
|
||||
**kwargs
|
||||
) -> 'CrawlWatcher':
|
||||
@ -1535,6 +1552,7 @@ class FirecrawlApp:
|
||||
actions (Optional[List[Union]]): Actions to perform
|
||||
agent (Optional[AgentOptions]): Agent configuration
|
||||
max_concurrency (Optional[int]): Maximum number of concurrent scrapes
|
||||
zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
|
||||
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
||||
**kwargs: Additional parameters to pass to the API
|
||||
|
||||
@ -1592,6 +1610,8 @@ class FirecrawlApp:
|
||||
scrape_params['agent'] = agent.dict(exclude_none=True)
|
||||
if max_concurrency is not None:
|
||||
scrape_params['maxConcurrency'] = max_concurrency
|
||||
if zero_data_retention is not None:
|
||||
scrape_params['zeroDataRetention'] = zero_data_retention
|
||||
|
||||
# Add any additional kwargs
|
||||
scrape_params.update(kwargs)
|
||||
@ -3176,6 +3196,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
json_options: Optional[JsonConfig] = None,
|
||||
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
||||
agent: Optional[AgentOptions] = None,
|
||||
zero_data_retention: Optional[bool] = None,
|
||||
idempotency_key: Optional[str] = None,
|
||||
**kwargs
|
||||
) -> BatchScrapeResponse:
|
||||
@ -3201,6 +3222,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
json_options (Optional[JsonConfig]): JSON extraction config
|
||||
actions (Optional[List[Union]]): Actions to perform
|
||||
agent (Optional[AgentOptions]): Agent configuration
|
||||
zero_data_retention (Optional[bool]): Whether to delete data after 24 hours
|
||||
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
||||
**kwargs: Additional parameters to pass to the API
|
||||
|
||||
@ -3260,6 +3282,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
||||
if agent is not None:
|
||||
scrape_params['agent'] = agent.dict(exclude_none=True)
|
||||
if zero_data_retention is not None:
|
||||
scrape_params['zeroDataRetention'] = zero_data_retention
|
||||
|
||||
# Add any additional kwargs
|
||||
scrape_params.update(kwargs)
|
||||
|
Loading…
x
Reference in New Issue
Block a user