mirror of
https://github.com/mendableai/firecrawl.git
synced 2025-12-27 23:24:08 +00:00
Merge pull request #1632 from mendableai/feat/api-integration-parameter
feat(api): add integration field to jobs and update related controllers and types
This commit is contained in:
commit
71caf8ae57
@ -204,6 +204,7 @@ export async function crawlController(req: Request, res: Response) {
|
||||
internalOptions,
|
||||
team_id,
|
||||
origin: req.body.origin ?? defaultOrigin,
|
||||
integration: req.body.integration,
|
||||
crawl_id: id,
|
||||
sitemapped: true,
|
||||
},
|
||||
@ -245,6 +246,7 @@ export async function crawlController(req: Request, res: Response) {
|
||||
internalOptions,
|
||||
team_id,
|
||||
origin: req.body.origin ?? defaultOrigin,
|
||||
integration: req.body.integration,
|
||||
crawl_id: id,
|
||||
},
|
||||
{
|
||||
|
||||
@ -81,6 +81,7 @@ export async function scrapeHelper(
|
||||
scrapeOptions,
|
||||
internalOptions,
|
||||
origin: req.body.origin ?? defaultOrigin,
|
||||
integration: req.body.integration,
|
||||
is_scrape: true,
|
||||
startTime: Date.now(),
|
||||
},
|
||||
|
||||
@ -222,7 +222,8 @@ export async function searchController(req: Request, res: Response) {
|
||||
url: req.body.query,
|
||||
scrapeOptions: fromLegacyScrapeOptions(req.body.pageOptions, undefined, 60000, team_id),
|
||||
crawlerOptions: crawlerOptions,
|
||||
origin: origin,
|
||||
origin,
|
||||
integration: req.body.integration,
|
||||
});
|
||||
return res.status(result.returnCode).json(result);
|
||||
} catch (error) {
|
||||
|
||||
@ -134,6 +134,7 @@ export async function batchScrapeController(
|
||||
crawlerOptions: null,
|
||||
scrapeOptions,
|
||||
origin: "api",
|
||||
integration: req.body.integration,
|
||||
crawl_id: id,
|
||||
sitemapped: true,
|
||||
v1: true,
|
||||
|
||||
@ -114,6 +114,7 @@ export async function crawlController(
|
||||
scrapeOptions: sc.scrapeOptions,
|
||||
internalOptions: sc.internalOptions,
|
||||
origin: req.body.origin,
|
||||
integration: req.body.integration,
|
||||
crawl_id: id,
|
||||
webhook: req.body.webhook,
|
||||
v1: true,
|
||||
|
||||
@ -390,6 +390,7 @@ export async function mapController(
|
||||
crawlerOptions: {},
|
||||
scrapeOptions: {},
|
||||
origin: req.body.origin ?? "api",
|
||||
integration: req.body.integration,
|
||||
num_tokens: 0,
|
||||
});
|
||||
|
||||
|
||||
@ -57,7 +57,8 @@ export async function scrapeController(
|
||||
useCache: req.body.__experimental_cache ? true : false,
|
||||
bypassBilling: isDirectToBullMQ,
|
||||
},
|
||||
origin: req.body.origin,
|
||||
origin,
|
||||
integration: req.body.integration,
|
||||
startTime,
|
||||
},
|
||||
{},
|
||||
|
||||
@ -307,6 +307,7 @@ export async function searchController(
|
||||
url: req.body.query,
|
||||
scrapeOptions: req.body.scrapeOptions,
|
||||
origin: req.body.origin,
|
||||
integration: req.body.integration,
|
||||
cost_tracking: costTracking,
|
||||
},
|
||||
false,
|
||||
|
||||
@ -10,6 +10,21 @@ import {
|
||||
} from "../../lib/entities";
|
||||
import { InternalOptions } from "../../scraper/scrapeURL";
|
||||
|
||||
export enum IntegrationEnum {
|
||||
DIFY = "dify",
|
||||
ZAPIER = "zapier",
|
||||
PIPEDREAM = "pipedream",
|
||||
RAYCAST = "raycast",
|
||||
LANGCHAIN = "langchain",
|
||||
CREWAI = "crewai",
|
||||
LLAMAINDEX = "llamaindex",
|
||||
N8N = "n8n",
|
||||
CAMELAI = "camelai",
|
||||
MAKE = "make",
|
||||
FLOWISE = "flowise",
|
||||
METAGPT = "metagpt",
|
||||
}
|
||||
|
||||
export type Format =
|
||||
| "markdown"
|
||||
| "html"
|
||||
@ -470,6 +485,7 @@ export const extractV1Options = z
|
||||
enableWebSearch: z.boolean().default(false),
|
||||
scrapeOptions: baseScrapeOptions.default({ onlyMainContent: false }).optional(),
|
||||
origin: z.string().optional().default("api"),
|
||||
integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null),
|
||||
urlTrace: z.boolean().default(false),
|
||||
timeout: z.number().int().positive().finite().safe().default(60000),
|
||||
__experimental_streamSteps: z.boolean().default(false),
|
||||
@ -528,6 +544,7 @@ export const scrapeRequestSchema = baseScrapeOptions
|
||||
extract: extractOptionsWithAgent.optional(),
|
||||
jsonOptions: extractOptionsWithAgent.optional(),
|
||||
origin: z.string().optional().default("api"),
|
||||
integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null),
|
||||
timeout: z.number().int().positive().finite().safe().default(30000),
|
||||
})
|
||||
.strict(strictMessage)
|
||||
@ -562,6 +579,7 @@ export const batchScrapeRequestSchema = baseScrapeOptions
|
||||
.extend({
|
||||
urls: url.array(),
|
||||
origin: z.string().optional().default("api"),
|
||||
integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null),
|
||||
webhook: webhookSchema.optional(),
|
||||
appendToId: z.string().uuid().optional(),
|
||||
ignoreInvalidURLs: z.boolean().default(false),
|
||||
@ -575,6 +593,7 @@ export const batchScrapeRequestSchemaNoURLValidation = baseScrapeOptions
|
||||
.extend({
|
||||
urls: z.string().array(),
|
||||
origin: z.string().optional().default("api"),
|
||||
integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null),
|
||||
webhook: webhookSchema.optional(),
|
||||
appendToId: z.string().uuid().optional(),
|
||||
ignoreInvalidURLs: z.boolean().default(false),
|
||||
@ -622,6 +641,7 @@ export const crawlRequestSchema = crawlerOptions
|
||||
.extend({
|
||||
url,
|
||||
origin: z.string().optional().default("api"),
|
||||
integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null),
|
||||
scrapeOptions: baseScrapeOptions.default({}),
|
||||
webhook: webhookSchema.optional(),
|
||||
limit: z.number().default(10000),
|
||||
@ -653,6 +673,7 @@ export const mapRequestSchema = crawlerOptions
|
||||
.extend({
|
||||
url,
|
||||
origin: z.string().optional().default("api"),
|
||||
integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null),
|
||||
includeSubdomains: z.boolean().default(true),
|
||||
search: z.string().optional(),
|
||||
ignoreSitemap: z.boolean().default(false),
|
||||
@ -1204,6 +1225,7 @@ export const searchRequestSchema = z
|
||||
country: z.string().optional().default("us"),
|
||||
location: z.string().optional(),
|
||||
origin: z.string().optional().default("api"),
|
||||
integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null),
|
||||
timeout: z.number().int().positive().finite().safe().default(60000),
|
||||
ignoreInvalidURLs: z.boolean().optional().default(false),
|
||||
__searchPreviewToken: z.string().optional(),
|
||||
|
||||
@ -185,6 +185,7 @@ export async function performExtraction(
|
||||
url: request.urls?.join(", ") || "",
|
||||
scrapeOptions: request,
|
||||
origin: request.origin ?? "api",
|
||||
integration: request.integration,
|
||||
num_tokens: 0,
|
||||
tokens_billed,
|
||||
sources,
|
||||
@ -680,6 +681,7 @@ export async function performExtraction(
|
||||
url: request.urls?.join(", ") || "",
|
||||
scrapeOptions: request,
|
||||
origin: request.origin ?? "api",
|
||||
integration: request.integration,
|
||||
num_tokens: 0,
|
||||
tokens_billed,
|
||||
sources,
|
||||
@ -787,6 +789,7 @@ export async function performExtraction(
|
||||
url: request.urls?.join(", ") || "",
|
||||
scrapeOptions: request,
|
||||
origin: request.origin ?? "api",
|
||||
integration: request.integration,
|
||||
num_tokens: 0,
|
||||
tokens_billed,
|
||||
sources,
|
||||
@ -827,6 +830,7 @@ export async function performExtraction(
|
||||
url: request.urls?.join(", ") || "",
|
||||
scrapeOptions: request,
|
||||
origin: request.origin ?? "api",
|
||||
integration: request.integration,
|
||||
num_tokens: 0,
|
||||
tokens_billed,
|
||||
sources,
|
||||
@ -1011,6 +1015,7 @@ export async function performExtraction(
|
||||
url: request.urls?.join(", ") || "",
|
||||
scrapeOptions: request,
|
||||
origin: request.origin ?? "api",
|
||||
integration: request.integration,
|
||||
num_tokens: totalTokensUsed,
|
||||
tokens_billed: tokensToBill,
|
||||
sources,
|
||||
@ -1079,6 +1084,7 @@ export async function performExtraction(
|
||||
url: request.urls?.join(", ") || "",
|
||||
scrapeOptions: request,
|
||||
origin: request.origin ?? "api",
|
||||
integration: request.integration,
|
||||
num_tokens: 0,
|
||||
tokens_billed,
|
||||
sources,
|
||||
|
||||
@ -118,6 +118,7 @@ import { getACUCTeam } from "../../../controllers/auth";
|
||||
url: request.urls?.join(", ") || "",
|
||||
scrapeOptions: request,
|
||||
origin: request.origin ?? "api",
|
||||
integration: request.integration,
|
||||
num_tokens: 0,
|
||||
tokens_billed: 0,
|
||||
sources,
|
||||
@ -219,6 +220,7 @@ import { getACUCTeam } from "../../../controllers/auth";
|
||||
url: request.urls?.join(", ") || "",
|
||||
scrapeOptions: request,
|
||||
origin: request.origin ?? "api",
|
||||
integration: request.integration,
|
||||
num_tokens: 0,
|
||||
tokens_billed: 0,
|
||||
sources,
|
||||
@ -568,6 +570,7 @@ import { getACUCTeam } from "../../../controllers/auth";
|
||||
url: request.urls?.join(", ") || "",
|
||||
scrapeOptions: request,
|
||||
origin: request.origin ?? "api",
|
||||
integration: request.integration,
|
||||
num_tokens: 0,
|
||||
tokens_billed: 0,
|
||||
sources,
|
||||
@ -663,6 +666,7 @@ import { getACUCTeam } from "../../../controllers/auth";
|
||||
url: request.urls?.join(", ") || "",
|
||||
scrapeOptions: request,
|
||||
origin: request.origin ?? "api",
|
||||
integration: request.integration,
|
||||
num_tokens: 0,
|
||||
tokens_billed: 0,
|
||||
sources,
|
||||
@ -691,6 +695,7 @@ import { getACUCTeam } from "../../../controllers/auth";
|
||||
url: request.urls?.join(", ") || "",
|
||||
scrapeOptions: request,
|
||||
origin: request.origin ?? "api",
|
||||
integration: request.integration,
|
||||
num_tokens: 0,
|
||||
tokens_billed: 0,
|
||||
sources,
|
||||
@ -852,6 +857,7 @@ import { getACUCTeam } from "../../../controllers/auth";
|
||||
url: request.urls?.join(", ") || "",
|
||||
scrapeOptions: request,
|
||||
origin: request.origin ?? "api",
|
||||
integration: request.integration,
|
||||
num_tokens: totalTokensUsed,
|
||||
tokens_billed: tokensToBill,
|
||||
sources,
|
||||
|
||||
@ -48,6 +48,7 @@ export async function saveJobToGCS(job: FirecrawlJob): Promise<void> {
|
||||
crawler_options: JSON.stringify(job.crawlerOptions),
|
||||
page_options: JSON.stringify(job.scrapeOptions),
|
||||
origin: job.origin,
|
||||
integration: job.integration ?? null,
|
||||
num_tokens: job.num_tokens ?? null,
|
||||
retry: !!job.retry,
|
||||
crawl_id: job.crawl_id ?? null,
|
||||
|
||||
@ -57,6 +57,7 @@ export async function logJob(job: FirecrawlJob, force: boolean = false, bypassLo
|
||||
crawler_options: job.crawlerOptions,
|
||||
page_options: job.scrapeOptions,
|
||||
origin: job.origin,
|
||||
integration: job.integration ?? null,
|
||||
num_tokens: job.num_tokens,
|
||||
retry: !!job.retry,
|
||||
crawl_id: job.crawl_id,
|
||||
|
||||
@ -323,6 +323,7 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
|
||||
scrapeOptions: sc.scrapeOptions,
|
||||
crawlerOptions: sc.crawlerOptions,
|
||||
origin: job.data.origin,
|
||||
integration: job.data.integration,
|
||||
}, false, job.data.internalOptions?.bypassBilling ?? false);
|
||||
logger.info("Logged crawl!");
|
||||
|
||||
@ -373,6 +374,7 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
|
||||
(job.data.crawlerOptions === null ? "Batch Scrape" : "Unknown"),
|
||||
crawlerOptions: sc.crawlerOptions,
|
||||
origin: job.data.origin,
|
||||
integration: job.data.integration,
|
||||
},
|
||||
true,
|
||||
job.data.internalOptions?.bypassBilling ?? false,
|
||||
@ -1456,6 +1458,7 @@ async function processJob(job: Job & { id: string }, token: string) {
|
||||
crawlerOptions: sc.crawlerOptions,
|
||||
scrapeOptions: job.data.scrapeOptions,
|
||||
origin: job.data.origin,
|
||||
integration: job.data.integration,
|
||||
crawl_id: job.data.crawl_id,
|
||||
cost_tracking: costTracking,
|
||||
pdf_num_pages: doc.metadata.numPages,
|
||||
@ -1506,6 +1509,7 @@ async function processJob(job: Job & { id: string }, token: string) {
|
||||
url: job.data.url,
|
||||
scrapeOptions: job.data.scrapeOptions,
|
||||
origin: job.data.origin,
|
||||
integration: job.data.integration,
|
||||
num_tokens: 0, // TODO: fix
|
||||
cost_tracking: costTracking,
|
||||
pdf_num_pages: doc.metadata.numPages,
|
||||
@ -1606,6 +1610,7 @@ async function processJob(job: Job & { id: string }, token: string) {
|
||||
crawlerOptions: job.data.crawlerOptions,
|
||||
scrapeOptions: job.data.scrapeOptions,
|
||||
origin: job.data.origin,
|
||||
integration: job.data.integration,
|
||||
crawl_id: job.data.crawl_id,
|
||||
cost_tracking: costTracking,
|
||||
},
|
||||
|
||||
@ -44,6 +44,7 @@ export interface WebScraperOptions {
|
||||
sitemapped?: boolean;
|
||||
webhook?: z.infer<typeof webhookSchema>;
|
||||
v1?: boolean;
|
||||
integration?: string | null;
|
||||
|
||||
/**
|
||||
* Disables billing on the worker side.
|
||||
@ -94,6 +95,7 @@ export interface FirecrawlJob {
|
||||
crawlerOptions?: any;
|
||||
scrapeOptions?: any;
|
||||
origin: string;
|
||||
integration?: string | null;
|
||||
num_tokens?: number;
|
||||
retry?: boolean;
|
||||
crawl_id?: string;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user