Merge pull request #1632 from mendableai/feat/api-integration-parameter

feat(api): add integration field to jobs and update related controllers and types
This commit is contained in:
Ademílson Tonato 2025-06-05 11:44:02 +01:00 committed by GitHub
commit 71caf8ae57
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 54 additions and 2 deletions

View File

@ -204,6 +204,7 @@ export async function crawlController(req: Request, res: Response) {
internalOptions,
team_id,
origin: req.body.origin ?? defaultOrigin,
integration: req.body.integration,
crawl_id: id,
sitemapped: true,
},
@ -245,6 +246,7 @@ export async function crawlController(req: Request, res: Response) {
internalOptions,
team_id,
origin: req.body.origin ?? defaultOrigin,
integration: req.body.integration,
crawl_id: id,
},
{

View File

@ -81,6 +81,7 @@ export async function scrapeHelper(
scrapeOptions,
internalOptions,
origin: req.body.origin ?? defaultOrigin,
integration: req.body.integration,
is_scrape: true,
startTime: Date.now(),
},

View File

@ -222,7 +222,8 @@ export async function searchController(req: Request, res: Response) {
url: req.body.query,
scrapeOptions: fromLegacyScrapeOptions(req.body.pageOptions, undefined, 60000, team_id),
crawlerOptions: crawlerOptions,
origin: origin,
origin,
integration: req.body.integration,
});
return res.status(result.returnCode).json(result);
} catch (error) {

View File

@ -134,6 +134,7 @@ export async function batchScrapeController(
crawlerOptions: null,
scrapeOptions,
origin: "api",
integration: req.body.integration,
crawl_id: id,
sitemapped: true,
v1: true,

View File

@ -114,6 +114,7 @@ export async function crawlController(
scrapeOptions: sc.scrapeOptions,
internalOptions: sc.internalOptions,
origin: req.body.origin,
integration: req.body.integration,
crawl_id: id,
webhook: req.body.webhook,
v1: true,

View File

@ -390,6 +390,7 @@ export async function mapController(
crawlerOptions: {},
scrapeOptions: {},
origin: req.body.origin ?? "api",
integration: req.body.integration,
num_tokens: 0,
});

View File

@ -57,7 +57,8 @@ export async function scrapeController(
useCache: req.body.__experimental_cache ? true : false,
bypassBilling: isDirectToBullMQ,
},
origin: req.body.origin,
origin,
integration: req.body.integration,
startTime,
},
{},

View File

@ -307,6 +307,7 @@ export async function searchController(
url: req.body.query,
scrapeOptions: req.body.scrapeOptions,
origin: req.body.origin,
integration: req.body.integration,
cost_tracking: costTracking,
},
false,

View File

@ -10,6 +10,21 @@ import {
} from "../../lib/entities";
import { InternalOptions } from "../../scraper/scrapeURL";
export enum IntegrationEnum {
DIFY = "dify",
ZAPIER = "zapier",
PIPEDREAM = "pipedream",
RAYCAST = "raycast",
LANGCHAIN = "langchain",
CREWAI = "crewai",
LLAMAINDEX = "llamaindex",
N8N = "n8n",
CAMELAI = "camelai",
MAKE = "make",
FLOWISE = "flowise",
METAGPT = "metagpt",
}
export type Format =
| "markdown"
| "html"
@ -470,6 +485,7 @@ export const extractV1Options = z
enableWebSearch: z.boolean().default(false),
scrapeOptions: baseScrapeOptions.default({ onlyMainContent: false }).optional(),
origin: z.string().optional().default("api"),
integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null),
urlTrace: z.boolean().default(false),
timeout: z.number().int().positive().finite().safe().default(60000),
__experimental_streamSteps: z.boolean().default(false),
@ -528,6 +544,7 @@ export const scrapeRequestSchema = baseScrapeOptions
extract: extractOptionsWithAgent.optional(),
jsonOptions: extractOptionsWithAgent.optional(),
origin: z.string().optional().default("api"),
integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null),
timeout: z.number().int().positive().finite().safe().default(30000),
})
.strict(strictMessage)
@ -562,6 +579,7 @@ export const batchScrapeRequestSchema = baseScrapeOptions
.extend({
urls: url.array(),
origin: z.string().optional().default("api"),
integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null),
webhook: webhookSchema.optional(),
appendToId: z.string().uuid().optional(),
ignoreInvalidURLs: z.boolean().default(false),
@ -575,6 +593,7 @@ export const batchScrapeRequestSchemaNoURLValidation = baseScrapeOptions
.extend({
urls: z.string().array(),
origin: z.string().optional().default("api"),
integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null),
webhook: webhookSchema.optional(),
appendToId: z.string().uuid().optional(),
ignoreInvalidURLs: z.boolean().default(false),
@ -622,6 +641,7 @@ export const crawlRequestSchema = crawlerOptions
.extend({
url,
origin: z.string().optional().default("api"),
integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null),
scrapeOptions: baseScrapeOptions.default({}),
webhook: webhookSchema.optional(),
limit: z.number().default(10000),
@ -653,6 +673,7 @@ export const mapRequestSchema = crawlerOptions
.extend({
url,
origin: z.string().optional().default("api"),
integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null),
includeSubdomains: z.boolean().default(true),
search: z.string().optional(),
ignoreSitemap: z.boolean().default(false),
@ -1204,6 +1225,7 @@ export const searchRequestSchema = z
country: z.string().optional().default("us"),
location: z.string().optional(),
origin: z.string().optional().default("api"),
integration: z.nativeEnum(IntegrationEnum).optional().transform(val => val || null),
timeout: z.number().int().positive().finite().safe().default(60000),
ignoreInvalidURLs: z.boolean().optional().default(false),
__searchPreviewToken: z.string().optional(),

View File

@ -185,6 +185,7 @@ export async function performExtraction(
url: request.urls?.join(", ") || "",
scrapeOptions: request,
origin: request.origin ?? "api",
integration: request.integration,
num_tokens: 0,
tokens_billed,
sources,
@ -680,6 +681,7 @@ export async function performExtraction(
url: request.urls?.join(", ") || "",
scrapeOptions: request,
origin: request.origin ?? "api",
integration: request.integration,
num_tokens: 0,
tokens_billed,
sources,
@ -787,6 +789,7 @@ export async function performExtraction(
url: request.urls?.join(", ") || "",
scrapeOptions: request,
origin: request.origin ?? "api",
integration: request.integration,
num_tokens: 0,
tokens_billed,
sources,
@ -827,6 +830,7 @@ export async function performExtraction(
url: request.urls?.join(", ") || "",
scrapeOptions: request,
origin: request.origin ?? "api",
integration: request.integration,
num_tokens: 0,
tokens_billed,
sources,
@ -1011,6 +1015,7 @@ export async function performExtraction(
url: request.urls?.join(", ") || "",
scrapeOptions: request,
origin: request.origin ?? "api",
integration: request.integration,
num_tokens: totalTokensUsed,
tokens_billed: tokensToBill,
sources,
@ -1079,6 +1084,7 @@ export async function performExtraction(
url: request.urls?.join(", ") || "",
scrapeOptions: request,
origin: request.origin ?? "api",
integration: request.integration,
num_tokens: 0,
tokens_billed,
sources,

View File

@ -118,6 +118,7 @@ import { getACUCTeam } from "../../../controllers/auth";
url: request.urls?.join(", ") || "",
scrapeOptions: request,
origin: request.origin ?? "api",
integration: request.integration,
num_tokens: 0,
tokens_billed: 0,
sources,
@ -219,6 +220,7 @@ import { getACUCTeam } from "../../../controllers/auth";
url: request.urls?.join(", ") || "",
scrapeOptions: request,
origin: request.origin ?? "api",
integration: request.integration,
num_tokens: 0,
tokens_billed: 0,
sources,
@ -568,6 +570,7 @@ import { getACUCTeam } from "../../../controllers/auth";
url: request.urls?.join(", ") || "",
scrapeOptions: request,
origin: request.origin ?? "api",
integration: request.integration,
num_tokens: 0,
tokens_billed: 0,
sources,
@ -663,6 +666,7 @@ import { getACUCTeam } from "../../../controllers/auth";
url: request.urls?.join(", ") || "",
scrapeOptions: request,
origin: request.origin ?? "api",
integration: request.integration,
num_tokens: 0,
tokens_billed: 0,
sources,
@ -691,6 +695,7 @@ import { getACUCTeam } from "../../../controllers/auth";
url: request.urls?.join(", ") || "",
scrapeOptions: request,
origin: request.origin ?? "api",
integration: request.integration,
num_tokens: 0,
tokens_billed: 0,
sources,
@ -852,6 +857,7 @@ import { getACUCTeam } from "../../../controllers/auth";
url: request.urls?.join(", ") || "",
scrapeOptions: request,
origin: request.origin ?? "api",
integration: request.integration,
num_tokens: totalTokensUsed,
tokens_billed: tokensToBill,
sources,

View File

@ -48,6 +48,7 @@ export async function saveJobToGCS(job: FirecrawlJob): Promise<void> {
crawler_options: JSON.stringify(job.crawlerOptions),
page_options: JSON.stringify(job.scrapeOptions),
origin: job.origin,
integration: job.integration ?? null,
num_tokens: job.num_tokens ?? null,
retry: !!job.retry,
crawl_id: job.crawl_id ?? null,

View File

@ -57,6 +57,7 @@ export async function logJob(job: FirecrawlJob, force: boolean = false, bypassLo
crawler_options: job.crawlerOptions,
page_options: job.scrapeOptions,
origin: job.origin,
integration: job.integration ?? null,
num_tokens: job.num_tokens,
retry: !!job.retry,
crawl_id: job.crawl_id,

View File

@ -323,6 +323,7 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
scrapeOptions: sc.scrapeOptions,
crawlerOptions: sc.crawlerOptions,
origin: job.data.origin,
integration: job.data.integration,
}, false, job.data.internalOptions?.bypassBilling ?? false);
logger.info("Logged crawl!");
@ -373,6 +374,7 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
(job.data.crawlerOptions === null ? "Batch Scrape" : "Unknown"),
crawlerOptions: sc.crawlerOptions,
origin: job.data.origin,
integration: job.data.integration,
},
true,
job.data.internalOptions?.bypassBilling ?? false,
@ -1456,6 +1458,7 @@ async function processJob(job: Job & { id: string }, token: string) {
crawlerOptions: sc.crawlerOptions,
scrapeOptions: job.data.scrapeOptions,
origin: job.data.origin,
integration: job.data.integration,
crawl_id: job.data.crawl_id,
cost_tracking: costTracking,
pdf_num_pages: doc.metadata.numPages,
@ -1506,6 +1509,7 @@ async function processJob(job: Job & { id: string }, token: string) {
url: job.data.url,
scrapeOptions: job.data.scrapeOptions,
origin: job.data.origin,
integration: job.data.integration,
num_tokens: 0, // TODO: fix
cost_tracking: costTracking,
pdf_num_pages: doc.metadata.numPages,
@ -1606,6 +1610,7 @@ async function processJob(job: Job & { id: string }, token: string) {
crawlerOptions: job.data.crawlerOptions,
scrapeOptions: job.data.scrapeOptions,
origin: job.data.origin,
integration: job.data.integration,
crawl_id: job.data.crawl_id,
cost_tracking: costTracking,
},

View File

@ -44,6 +44,7 @@ export interface WebScraperOptions {
sitemapped?: boolean;
webhook?: z.infer<typeof webhookSchema>;
v1?: boolean;
integration?: string | null;
/**
* Disables billing on the worker side.
@ -94,6 +95,7 @@ export interface FirecrawlJob {
crawlerOptions?: any;
scrapeOptions?: any;
origin: string;
integration?: string | null;
num_tokens?: number;
retry?: boolean;
crawl_id?: string;