diff --git a/apps/api/src/controllers/v0/crawl.ts b/apps/api/src/controllers/v0/crawl.ts index e70c79d9d..92b452108 100644 --- a/apps/api/src/controllers/v0/crawl.ts +++ b/apps/api/src/controllers/v0/crawl.ts @@ -204,6 +204,7 @@ export async function crawlController(req: Request, res: Response) { internalOptions, team_id, origin: req.body.origin ?? defaultOrigin, + integration: req.body.integration, crawl_id: id, sitemapped: true, }, @@ -245,6 +246,7 @@ export async function crawlController(req: Request, res: Response) { internalOptions, team_id, origin: req.body.origin ?? defaultOrigin, + integration: req.body.integration, crawl_id: id, }, { diff --git a/apps/api/src/controllers/v0/scrape.ts b/apps/api/src/controllers/v0/scrape.ts index 559d222cc..9d1ce86dd 100644 --- a/apps/api/src/controllers/v0/scrape.ts +++ b/apps/api/src/controllers/v0/scrape.ts @@ -81,6 +81,7 @@ export async function scrapeHelper( scrapeOptions, internalOptions, origin: req.body.origin ?? defaultOrigin, + integration: req.body.integration, is_scrape: true, startTime: Date.now(), }, diff --git a/apps/api/src/controllers/v0/search.ts b/apps/api/src/controllers/v0/search.ts index e4be9bd75..3bbb45ce5 100644 --- a/apps/api/src/controllers/v0/search.ts +++ b/apps/api/src/controllers/v0/search.ts @@ -222,7 +222,8 @@ export async function searchController(req: Request, res: Response) { url: req.body.query, scrapeOptions: fromLegacyScrapeOptions(req.body.pageOptions, undefined, 60000, team_id), crawlerOptions: crawlerOptions, - origin: origin, + origin, + integration: req.body.integration, }); return res.status(result.returnCode).json(result); } catch (error) { diff --git a/apps/api/src/controllers/v1/batch-scrape.ts b/apps/api/src/controllers/v1/batch-scrape.ts index d14f8cd7f..5016e2ab7 100644 --- a/apps/api/src/controllers/v1/batch-scrape.ts +++ b/apps/api/src/controllers/v1/batch-scrape.ts @@ -134,6 +134,7 @@ export async function batchScrapeController( crawlerOptions: null, scrapeOptions, origin: "api", + integration: req.body.integration, crawl_id: id, sitemapped: true, v1: true, diff --git a/apps/api/src/controllers/v1/crawl.ts b/apps/api/src/controllers/v1/crawl.ts index fe9d6afd9..81234b2d6 100644 --- a/apps/api/src/controllers/v1/crawl.ts +++ b/apps/api/src/controllers/v1/crawl.ts @@ -114,6 +114,7 @@ export async function crawlController( scrapeOptions: sc.scrapeOptions, internalOptions: sc.internalOptions, origin: req.body.origin, + integration: req.body.integration, crawl_id: id, webhook: req.body.webhook, v1: true, diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts index a7a47d05e..332fade6d 100644 --- a/apps/api/src/controllers/v1/map.ts +++ b/apps/api/src/controllers/v1/map.ts @@ -390,6 +390,7 @@ export async function mapController( crawlerOptions: {}, scrapeOptions: {}, origin: req.body.origin ?? "api", + integration: req.body.integration, num_tokens: 0, }); diff --git a/apps/api/src/controllers/v1/scrape.ts b/apps/api/src/controllers/v1/scrape.ts index a97d0e63d..fb68f7ffb 100644 --- a/apps/api/src/controllers/v1/scrape.ts +++ b/apps/api/src/controllers/v1/scrape.ts @@ -57,7 +57,8 @@ export async function scrapeController( useCache: req.body.__experimental_cache ? true : false, bypassBilling: isDirectToBullMQ, }, - origin: req.body.origin, + origin, + integration: req.body.integration, startTime, }, {}, diff --git a/apps/api/src/controllers/v1/search.ts b/apps/api/src/controllers/v1/search.ts index ceacce4cc..c94480db6 100644 --- a/apps/api/src/controllers/v1/search.ts +++ b/apps/api/src/controllers/v1/search.ts @@ -307,6 +307,7 @@ export async function searchController( url: req.body.query, scrapeOptions: req.body.scrapeOptions, origin: req.body.origin, + integration: req.body.integration, cost_tracking: costTracking, }, false, diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index ba68c9953..8399cdef5 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -10,6 +10,21 @@ import { } from "../../lib/entities"; import { InternalOptions } from "../../scraper/scrapeURL"; +export enum IntegrationEnum { + DIFY = "dify", + ZAPIER = "zapier", + PIPEDREAM = "pipedream", + RAYCAST = "raycast", + LANGCHAIN = "langchain", + CREWAI = "crewai", + LLAMAINDEX = "llamaindex", + N8N = "n8n", + CAMELAI = "camelai", + MAKE = "make", + FLOWISE = "flowise", + METAGPT = "metagpt", +} + export type Format = | "markdown" | "html" @@ -470,6 +485,7 @@ export const extractV1Options = z enableWebSearch: z.boolean().default(false), scrapeOptions: baseScrapeOptions.default({ onlyMainContent: false }).optional(), origin: z.string().optional().default("api"), + integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null), urlTrace: z.boolean().default(false), timeout: z.number().int().positive().finite().safe().default(60000), __experimental_streamSteps: z.boolean().default(false), @@ -528,6 +544,7 @@ export const scrapeRequestSchema = baseScrapeOptions extract: extractOptionsWithAgent.optional(), jsonOptions: extractOptionsWithAgent.optional(), origin: z.string().optional().default("api"), + integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null), timeout: z.number().int().positive().finite().safe().default(30000), }) .strict(strictMessage) @@ -562,6 +579,7 @@ export const batchScrapeRequestSchema = baseScrapeOptions .extend({ urls: url.array(), origin: z.string().optional().default("api"), + integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null), webhook: webhookSchema.optional(), appendToId: z.string().uuid().optional(), ignoreInvalidURLs: z.boolean().default(false), @@ -575,6 +593,7 @@ export const batchScrapeRequestSchemaNoURLValidation = baseScrapeOptions .extend({ urls: z.string().array(), origin: z.string().optional().default("api"), + integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null), webhook: webhookSchema.optional(), appendToId: z.string().uuid().optional(), ignoreInvalidURLs: z.boolean().default(false), @@ -622,6 +641,7 @@ export const crawlRequestSchema = crawlerOptions .extend({ url, origin: z.string().optional().default("api"), + integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null), scrapeOptions: baseScrapeOptions.default({}), webhook: webhookSchema.optional(), limit: z.number().default(10000), @@ -653,6 +673,7 @@ export const mapRequestSchema = crawlerOptions .extend({ url, origin: z.string().optional().default("api"), + integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null), includeSubdomains: z.boolean().default(true), search: z.string().optional(), ignoreSitemap: z.boolean().default(false), @@ -1204,6 +1225,7 @@ export const searchRequestSchema = z country: z.string().optional().default("us"), location: z.string().optional(), origin: z.string().optional().default("api"), + integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null), timeout: z.number().int().positive().finite().safe().default(60000), ignoreInvalidURLs: z.boolean().optional().default(false), __searchPreviewToken: z.string().optional(), diff --git a/apps/api/src/lib/extract/extraction-service.ts b/apps/api/src/lib/extract/extraction-service.ts index 1bd73dd81..6b6e23841 100644 --- a/apps/api/src/lib/extract/extraction-service.ts +++ b/apps/api/src/lib/extract/extraction-service.ts @@ -185,6 +185,7 @@ export async function performExtraction( url: request.urls?.join(", ") || "", scrapeOptions: request, origin: request.origin ?? "api", + integration: request.integration, num_tokens: 0, tokens_billed, sources, @@ -680,6 +681,7 @@ export async function performExtraction( url: request.urls?.join(", ") || "", scrapeOptions: request, origin: request.origin ?? "api", + integration: request.integration, num_tokens: 0, tokens_billed, sources, @@ -787,6 +789,7 @@ export async function performExtraction( url: request.urls?.join(", ") || "", scrapeOptions: request, origin: request.origin ?? "api", + integration: request.integration, num_tokens: 0, tokens_billed, sources, @@ -827,6 +830,7 @@ export async function performExtraction( url: request.urls?.join(", ") || "", scrapeOptions: request, origin: request.origin ?? "api", + integration: request.integration, num_tokens: 0, tokens_billed, sources, @@ -1011,6 +1015,7 @@ export async function performExtraction( url: request.urls?.join(", ") || "", scrapeOptions: request, origin: request.origin ?? "api", + integration: request.integration, num_tokens: totalTokensUsed, tokens_billed: tokensToBill, sources, @@ -1079,6 +1084,7 @@ export async function performExtraction( url: request.urls?.join(", ") || "", scrapeOptions: request, origin: request.origin ?? "api", + integration: request.integration, num_tokens: 0, tokens_billed, sources, diff --git a/apps/api/src/lib/extract/fire-0/extraction-service-f0.ts b/apps/api/src/lib/extract/fire-0/extraction-service-f0.ts index 1ddf17f38..3f4e041ee 100644 --- a/apps/api/src/lib/extract/fire-0/extraction-service-f0.ts +++ b/apps/api/src/lib/extract/fire-0/extraction-service-f0.ts @@ -118,6 +118,7 @@ import { getACUCTeam } from "../../../controllers/auth"; url: request.urls?.join(", ") || "", scrapeOptions: request, origin: request.origin ?? "api", + integration: request.integration, num_tokens: 0, tokens_billed: 0, sources, @@ -219,6 +220,7 @@ import { getACUCTeam } from "../../../controllers/auth"; url: request.urls?.join(", ") || "", scrapeOptions: request, origin: request.origin ?? "api", + integration: request.integration, num_tokens: 0, tokens_billed: 0, sources, @@ -568,6 +570,7 @@ import { getACUCTeam } from "../../../controllers/auth"; url: request.urls?.join(", ") || "", scrapeOptions: request, origin: request.origin ?? "api", + integration: request.integration, num_tokens: 0, tokens_billed: 0, sources, @@ -663,6 +666,7 @@ import { getACUCTeam } from "../../../controllers/auth"; url: request.urls?.join(", ") || "", scrapeOptions: request, origin: request.origin ?? "api", + integration: request.integration, num_tokens: 0, tokens_billed: 0, sources, @@ -691,6 +695,7 @@ import { getACUCTeam } from "../../../controllers/auth"; url: request.urls?.join(", ") || "", scrapeOptions: request, origin: request.origin ?? "api", + integration: request.integration, num_tokens: 0, tokens_billed: 0, sources, @@ -852,6 +857,7 @@ import { getACUCTeam } from "../../../controllers/auth"; url: request.urls?.join(", ") || "", scrapeOptions: request, origin: request.origin ?? "api", + integration: request.integration, num_tokens: totalTokensUsed, tokens_billed: tokensToBill, sources, diff --git a/apps/api/src/lib/gcs-jobs.ts b/apps/api/src/lib/gcs-jobs.ts index 3e780457b..92047f1ea 100644 --- a/apps/api/src/lib/gcs-jobs.ts +++ b/apps/api/src/lib/gcs-jobs.ts @@ -48,6 +48,7 @@ export async function saveJobToGCS(job: FirecrawlJob): Promise { crawler_options: JSON.stringify(job.crawlerOptions), page_options: JSON.stringify(job.scrapeOptions), origin: job.origin, + integration: job.integration ?? null, num_tokens: job.num_tokens ?? null, retry: !!job.retry, crawl_id: job.crawl_id ?? null, diff --git a/apps/api/src/services/logging/log_job.ts b/apps/api/src/services/logging/log_job.ts index b26a3a9a3..584c1ac71 100644 --- a/apps/api/src/services/logging/log_job.ts +++ b/apps/api/src/services/logging/log_job.ts @@ -57,6 +57,7 @@ export async function logJob(job: FirecrawlJob, force: boolean = false, bypassLo crawler_options: job.crawlerOptions, page_options: job.scrapeOptions, origin: job.origin, + integration: job.integration ?? null, num_tokens: job.num_tokens, retry: !!job.retry, crawl_id: job.crawl_id, diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 4f025e38a..8fcfdcb2d 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -323,6 +323,7 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) { scrapeOptions: sc.scrapeOptions, crawlerOptions: sc.crawlerOptions, origin: job.data.origin, + integration: job.data.integration, }, false, job.data.internalOptions?.bypassBilling ?? false); logger.info("Logged crawl!"); @@ -373,6 +374,7 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) { (job.data.crawlerOptions === null ? "Batch Scrape" : "Unknown"), crawlerOptions: sc.crawlerOptions, origin: job.data.origin, + integration: job.data.integration, }, true, job.data.internalOptions?.bypassBilling ?? false, @@ -1456,6 +1458,7 @@ async function processJob(job: Job & { id: string }, token: string) { crawlerOptions: sc.crawlerOptions, scrapeOptions: job.data.scrapeOptions, origin: job.data.origin, + integration: job.data.integration, crawl_id: job.data.crawl_id, cost_tracking: costTracking, pdf_num_pages: doc.metadata.numPages, @@ -1506,6 +1509,7 @@ async function processJob(job: Job & { id: string }, token: string) { url: job.data.url, scrapeOptions: job.data.scrapeOptions, origin: job.data.origin, + integration: job.data.integration, num_tokens: 0, // TODO: fix cost_tracking: costTracking, pdf_num_pages: doc.metadata.numPages, @@ -1606,6 +1610,7 @@ async function processJob(job: Job & { id: string }, token: string) { crawlerOptions: job.data.crawlerOptions, scrapeOptions: job.data.scrapeOptions, origin: job.data.origin, + integration: job.data.integration, crawl_id: job.data.crawl_id, cost_tracking: costTracking, }, diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index 1d488ff01..f1625ee78 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -44,6 +44,7 @@ export interface WebScraperOptions { sitemapped?: boolean; webhook?: z.infer; v1?: boolean; + integration?: string | null; /** * Disables billing on the worker side. @@ -94,6 +95,7 @@ export interface FirecrawlJob { crawlerOptions?: any; scrapeOptions?: any; origin: string; + integration?: string | null; num_tokens?: number; retry?: boolean; crawl_id?: string;