mirror of
https://github.com/mendableai/firecrawl.git
synced 2025-12-24 21:54:33 +00:00
Nick: sdk async and get status
This commit is contained in:
parent
9ec08d7020
commit
a185c05a5c
@ -922,6 +922,72 @@ export default class FirecrawlApp {
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
|
||||
/**
|
||||
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
||||
* @param url - The URL to extract data from.
|
||||
* @param params - Additional parameters for the extract request.
|
||||
* @param idempotencyKey - Optional idempotency key for the request.
|
||||
* @returns The response from the extract operation.
|
||||
*/
|
||||
async asyncExtract(
|
||||
url: string,
|
||||
params?: ExtractParams,
|
||||
idempotencyKey?: string
|
||||
): Promise<ExtractResponse | ErrorResponse> {
|
||||
const headers = this.prepareHeaders(idempotencyKey);
|
||||
let jsonData: any = { url, ...params };
|
||||
let jsonSchema: any;
|
||||
|
||||
try {
|
||||
if (params?.schema instanceof zt.ZodType) {
|
||||
jsonSchema = zodToJsonSchema(params.schema);
|
||||
} else {
|
||||
jsonSchema = params?.schema;
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
|
||||
}
|
||||
|
||||
try {
|
||||
const response: AxiosResponse = await this.postRequest(
|
||||
this.apiUrl + `/v1/extract`,
|
||||
{ ...jsonData, schema: jsonSchema },
|
||||
headers
|
||||
);
|
||||
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
} else {
|
||||
this.handleError(response, "start extract job");
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new FirecrawlError(error.message, 500);
|
||||
}
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the status of an extract job.
|
||||
* @param jobId - The ID of the extract job.
|
||||
* @returns The status of the extract job.
|
||||
*/
|
||||
async getExtractStatus(jobId: string): Promise<any> {
|
||||
try {
|
||||
const response: AxiosResponse = await this.getRequest(
|
||||
`${this.apiUrl}/v1/extract/${jobId}`,
|
||||
this.prepareHeaders()
|
||||
);
|
||||
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
} else {
|
||||
this.handleError(response, "get extract status");
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new FirecrawlError(error.message, 500);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepares the headers for an API request.
|
||||
* @param idempotencyKey - Optional key to ensure idempotency.
|
||||
|
||||
@ -582,6 +582,69 @@ class FirecrawlApp:
|
||||
raise ValueError(str(e), 500)
|
||||
|
||||
return {'success': False, 'error': "Internal server error."}
|
||||
|
||||
def get_extract_status(self, job_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Retrieve the status of an extract job.
|
||||
|
||||
Args:
|
||||
job_id (str): The ID of the extract job.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: The status of the extract job.
|
||||
|
||||
Raises:
|
||||
ValueError: If there is an error retrieving the status.
|
||||
"""
|
||||
headers = self._prepare_headers()
|
||||
try:
|
||||
response = self._get_request(f'{self.api_url}/v1/extract/{job_id}', headers)
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
self._handle_error(response, "get extract status")
|
||||
except Exception as e:
|
||||
raise ValueError(str(e), 500)
|
||||
|
||||
def async_extract(self, urls: List[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Initiate an asynchronous extract job.
|
||||
|
||||
Args:
|
||||
urls (List[str]): The URLs to extract data from.
|
||||
params (Optional[Dict[str, Any]]): Additional parameters for the extract request.
|
||||
idempotency_key (Optional[str]): A unique key to ensure idempotency of requests.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: The response from the extract operation.
|
||||
|
||||
Raises:
|
||||
ValueError: If there is an error initiating the extract job.
|
||||
"""
|
||||
headers = self._prepare_headers(idempotency_key)
|
||||
|
||||
schema = params.get('schema') if params else None
|
||||
if schema:
|
||||
if hasattr(schema, 'model_json_schema'):
|
||||
# Convert Pydantic model to JSON schema
|
||||
schema = schema.model_json_schema()
|
||||
# Otherwise assume it's already a JSON schema dict
|
||||
|
||||
jsonData = {'urls': urls, **(params or {})}
|
||||
request_data = {
|
||||
**jsonData,
|
||||
'allowExternalLinks': params.get('allow_external_links', False) if params else False,
|
||||
'schema': schema
|
||||
}
|
||||
|
||||
try:
|
||||
response = self._post_request(f'{self.api_url}/v1/extract', request_data, headers)
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
self._handle_error(response, "async extract")
|
||||
except Exception as e:
|
||||
raise ValueError(str(e), 500)
|
||||
|
||||
def _prepare_headers(self, idempotency_key: Optional[str] = None) -> Dict[str, str]:
|
||||
"""
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user