Nick: sdk async and get status

This commit is contained in:
Nicolas 2025-01-07 17:27:40 -03:00
parent 9ec08d7020
commit a185c05a5c
2 changed files with 129 additions and 0 deletions

View File

@ -922,6 +922,72 @@ export default class FirecrawlApp {
return { success: false, error: "Internal server error." };
}
/**
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
* @param url - The URL to extract data from.
* @param params - Additional parameters for the extract request.
* @param idempotencyKey - Optional idempotency key for the request.
* @returns The response from the extract operation.
*/
async asyncExtract(
url: string,
params?: ExtractParams,
idempotencyKey?: string
): Promise<ExtractResponse | ErrorResponse> {
const headers = this.prepareHeaders(idempotencyKey);
let jsonData: any = { url, ...params };
let jsonSchema: any;
try {
if (params?.schema instanceof zt.ZodType) {
jsonSchema = zodToJsonSchema(params.schema);
} else {
jsonSchema = params?.schema;
}
} catch (error: any) {
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
}
try {
const response: AxiosResponse = await this.postRequest(
this.apiUrl + `/v1/extract`,
{ ...jsonData, schema: jsonSchema },
headers
);
if (response.status === 200) {
return response.data;
} else {
this.handleError(response, "start extract job");
}
} catch (error: any) {
throw new FirecrawlError(error.message, 500);
}
return { success: false, error: "Internal server error." };
}
/**
* Retrieves the status of an extract job.
* @param jobId - The ID of the extract job.
* @returns The status of the extract job.
*/
async getExtractStatus(jobId: string): Promise<any> {
try {
const response: AxiosResponse = await this.getRequest(
`${this.apiUrl}/v1/extract/${jobId}`,
this.prepareHeaders()
);
if (response.status === 200) {
return response.data;
} else {
this.handleError(response, "get extract status");
}
} catch (error: any) {
throw new FirecrawlError(error.message, 500);
}
}
/**
* Prepares the headers for an API request.
* @param idempotencyKey - Optional key to ensure idempotency.

View File

@ -582,6 +582,69 @@ class FirecrawlApp:
raise ValueError(str(e), 500)
return {'success': False, 'error': "Internal server error."}
def get_extract_status(self, job_id: str) -> Dict[str, Any]:
"""
Retrieve the status of an extract job.
Args:
job_id (str): The ID of the extract job.
Returns:
Dict[str, Any]: The status of the extract job.
Raises:
ValueError: If there is an error retrieving the status.
"""
headers = self._prepare_headers()
try:
response = self._get_request(f'{self.api_url}/v1/extract/{job_id}', headers)
if response.status_code == 200:
return response.json()
else:
self._handle_error(response, "get extract status")
except Exception as e:
raise ValueError(str(e), 500)
def async_extract(self, urls: List[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> Dict[str, Any]:
"""
Initiate an asynchronous extract job.
Args:
urls (List[str]): The URLs to extract data from.
params (Optional[Dict[str, Any]]): Additional parameters for the extract request.
idempotency_key (Optional[str]): A unique key to ensure idempotency of requests.
Returns:
Dict[str, Any]: The response from the extract operation.
Raises:
ValueError: If there is an error initiating the extract job.
"""
headers = self._prepare_headers(idempotency_key)
schema = params.get('schema') if params else None
if schema:
if hasattr(schema, 'model_json_schema'):
# Convert Pydantic model to JSON schema
schema = schema.model_json_schema()
# Otherwise assume it's already a JSON schema dict
jsonData = {'urls': urls, **(params or {})}
request_data = {
**jsonData,
'allowExternalLinks': params.get('allow_external_links', False) if params else False,
'schema': schema
}
try:
response = self._post_request(f'{self.api_url}/v1/extract', request_data, headers)
if response.status_code == 200:
return response.json()
else:
self._handle_error(response, "async extract")
except Exception as e:
raise ValueError(str(e), 500)
def _prepare_headers(self, idempotency_key: Optional[str] = None) -> Dict[str, str]:
"""