diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 644be7f65..bb9fcf6ab 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -922,6 +922,72 @@ export default class FirecrawlApp { return { success: false, error: "Internal server error." }; } + /** + * Initiates an asynchronous extract job for a URL using the Firecrawl API. + * @param url - The URL to extract data from. + * @param params - Additional parameters for the extract request. + * @param idempotencyKey - Optional idempotency key for the request. + * @returns The response from the extract operation. + */ + async asyncExtract( + url: string, + params?: ExtractParams, + idempotencyKey?: string + ): Promise { + const headers = this.prepareHeaders(idempotencyKey); + let jsonData: any = { url, ...params }; + let jsonSchema: any; + + try { + if (params?.schema instanceof zt.ZodType) { + jsonSchema = zodToJsonSchema(params.schema); + } else { + jsonSchema = params?.schema; + } + } catch (error: any) { + throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400); + } + + try { + const response: AxiosResponse = await this.postRequest( + this.apiUrl + `/v1/extract`, + { ...jsonData, schema: jsonSchema }, + headers + ); + + if (response.status === 200) { + return response.data; + } else { + this.handleError(response, "start extract job"); + } + } catch (error: any) { + throw new FirecrawlError(error.message, 500); + } + return { success: false, error: "Internal server error." }; + } + + /** + * Retrieves the status of an extract job. + * @param jobId - The ID of the extract job. + * @returns The status of the extract job. + */ + async getExtractStatus(jobId: string): Promise { + try { + const response: AxiosResponse = await this.getRequest( + `${this.apiUrl}/v1/extract/${jobId}`, + this.prepareHeaders() + ); + + if (response.status === 200) { + return response.data; + } else { + this.handleError(response, "get extract status"); + } + } catch (error: any) { + throw new FirecrawlError(error.message, 500); + } + } + /** * Prepares the headers for an API request. * @param idempotencyKey - Optional key to ensure idempotency. diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index e844b7330..3fff1c4e4 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -582,6 +582,69 @@ class FirecrawlApp: raise ValueError(str(e), 500) return {'success': False, 'error': "Internal server error."} + + def get_extract_status(self, job_id: str) -> Dict[str, Any]: + """ + Retrieve the status of an extract job. + + Args: + job_id (str): The ID of the extract job. + + Returns: + Dict[str, Any]: The status of the extract job. + + Raises: + ValueError: If there is an error retrieving the status. + """ + headers = self._prepare_headers() + try: + response = self._get_request(f'{self.api_url}/v1/extract/{job_id}', headers) + if response.status_code == 200: + return response.json() + else: + self._handle_error(response, "get extract status") + except Exception as e: + raise ValueError(str(e), 500) + + def async_extract(self, urls: List[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> Dict[str, Any]: + """ + Initiate an asynchronous extract job. + + Args: + urls (List[str]): The URLs to extract data from. + params (Optional[Dict[str, Any]]): Additional parameters for the extract request. + idempotency_key (Optional[str]): A unique key to ensure idempotency of requests. + + Returns: + Dict[str, Any]: The response from the extract operation. + + Raises: + ValueError: If there is an error initiating the extract job. + """ + headers = self._prepare_headers(idempotency_key) + + schema = params.get('schema') if params else None + if schema: + if hasattr(schema, 'model_json_schema'): + # Convert Pydantic model to JSON schema + schema = schema.model_json_schema() + # Otherwise assume it's already a JSON schema dict + + jsonData = {'urls': urls, **(params or {})} + request_data = { + **jsonData, + 'allowExternalLinks': params.get('allow_external_links', False) if params else False, + 'schema': schema + } + + try: + response = self._post_request(f'{self.api_url}/v1/extract', request_data, headers) + if response.status_code == 200: + return response.json() + else: + self._handle_error(response, "async extract") + except Exception as e: + raise ValueError(str(e), 500) def _prepare_headers(self, idempotency_key: Optional[str] = None) -> Dict[str, str]: """