mirror of
https://github.com/mendableai/firecrawl.git
synced 2025-06-27 00:41:33 +00:00
Allow international URLs to pass validation (#1717)
This commit is contained in:
parent
1919799bed
commit
9a5d40c3cf
@ -76,4 +76,9 @@ describe("URL Schema Validation", () => {
|
||||
it("should reject malformed URLs containing multiple 'http://'", () => {
|
||||
expect(() => url.parse("http://ex ample.com/")).toThrow("Invalid URL");
|
||||
});
|
||||
|
||||
it("should accept URLs with international domain names", () => {
|
||||
expect(() => url.parse("http://xn--1lqv92a901a.xn--ses554g/")).not.toThrow();
|
||||
});
|
||||
|
||||
});
|
||||
|
@ -42,7 +42,7 @@ export const url = z.preprocess(
|
||||
if (!protocolIncluded(x as string)) {
|
||||
x = `http://${x}`;
|
||||
}
|
||||
|
||||
|
||||
// transforming the query parameters is breaking certain sites, so we're not doing it - mogery
|
||||
// try {
|
||||
// const urlObj = new URL(x as string);
|
||||
@ -52,7 +52,7 @@ export const url = z.preprocess(
|
||||
// }
|
||||
// } catch (e) {
|
||||
// }
|
||||
|
||||
|
||||
return x;
|
||||
},
|
||||
z
|
||||
@ -61,7 +61,7 @@ export const url = z.preprocess(
|
||||
.regex(/^https?:\/\//, "URL uses unsupported protocol")
|
||||
.refine(
|
||||
(x) =>
|
||||
/\.[a-zA-Z\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F]{2,}(:\d+)?([\/?#]|$)/i.test(
|
||||
/\.[a-zA-Z0-9-\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F]{2,}(:\d+)?([\/?#]|$)/i.test(
|
||||
x,
|
||||
),
|
||||
"URL must have a valid top-level domain or be a valid path",
|
||||
@ -74,7 +74,7 @@ export const url = z.preprocess(
|
||||
return false;
|
||||
}
|
||||
}, "Invalid URL")
|
||||
// .refine((x) => !isUrlBlocked(x as string), BLOCKLISTED_URL_MESSAGE),
|
||||
// .refine((x) => !isUrlBlocked(x as string), BLOCKLISTED_URL_MESSAGE),
|
||||
);
|
||||
|
||||
const strictMessage =
|
||||
@ -823,11 +823,11 @@ export type ErrorResponse = {
|
||||
export type ScrapeResponse =
|
||||
| ErrorResponse
|
||||
| {
|
||||
success: true;
|
||||
warning?: string;
|
||||
data: Document;
|
||||
scrape_id?: string;
|
||||
};
|
||||
success: true;
|
||||
warning?: string;
|
||||
data: Document;
|
||||
scrape_id?: string;
|
||||
};
|
||||
|
||||
export interface ScrapeResponseRequestTest {
|
||||
statusCode: number;
|
||||
@ -878,27 +878,27 @@ export interface ExtractResponseRequestTest {
|
||||
export type CrawlResponse =
|
||||
| ErrorResponse
|
||||
| {
|
||||
success: true;
|
||||
id: string;
|
||||
url: string;
|
||||
};
|
||||
success: true;
|
||||
id: string;
|
||||
url: string;
|
||||
};
|
||||
|
||||
export type BatchScrapeResponse =
|
||||
| ErrorResponse
|
||||
| {
|
||||
success: true;
|
||||
id: string;
|
||||
url: string;
|
||||
invalidURLs?: string[];
|
||||
};
|
||||
success: true;
|
||||
id: string;
|
||||
url: string;
|
||||
invalidURLs?: string[];
|
||||
};
|
||||
|
||||
export type MapResponse =
|
||||
| ErrorResponse
|
||||
| {
|
||||
success: true;
|
||||
links: string[];
|
||||
scrape_id?: string;
|
||||
};
|
||||
success: true;
|
||||
links: string[];
|
||||
scrape_id?: string;
|
||||
};
|
||||
|
||||
export type CrawlStatusParams = {
|
||||
jobId: string;
|
||||
@ -911,47 +911,47 @@ export type ConcurrencyCheckParams = {
|
||||
export type ConcurrencyCheckResponse =
|
||||
| ErrorResponse
|
||||
| {
|
||||
success: true;
|
||||
concurrency: number;
|
||||
maxConcurrency: number;
|
||||
};
|
||||
success: true;
|
||||
concurrency: number;
|
||||
maxConcurrency: number;
|
||||
};
|
||||
|
||||
export type CrawlStatusResponse =
|
||||
| ErrorResponse
|
||||
| {
|
||||
success: true;
|
||||
status: "scraping" | "completed" | "failed" | "cancelled";
|
||||
completed: number;
|
||||
total: number;
|
||||
creditsUsed: number;
|
||||
expiresAt: string;
|
||||
next?: string;
|
||||
data: Document[];
|
||||
};
|
||||
success: true;
|
||||
status: "scraping" | "completed" | "failed" | "cancelled";
|
||||
completed: number;
|
||||
total: number;
|
||||
creditsUsed: number;
|
||||
expiresAt: string;
|
||||
next?: string;
|
||||
data: Document[];
|
||||
};
|
||||
|
||||
export type OngoingCrawlsResponse =
|
||||
| ErrorResponse
|
||||
| {
|
||||
success: true;
|
||||
crawls: {
|
||||
id: string;
|
||||
teamId: string;
|
||||
url: string;
|
||||
options: CrawlerOptions;
|
||||
}[];
|
||||
success: true;
|
||||
crawls: {
|
||||
id: string;
|
||||
teamId: string;
|
||||
url: string;
|
||||
options: CrawlerOptions;
|
||||
}[];
|
||||
};
|
||||
|
||||
|
||||
export type CrawlErrorsResponse =
|
||||
| ErrorResponse
|
||||
| {
|
||||
errors: {
|
||||
id: string;
|
||||
timestamp?: string;
|
||||
url: string;
|
||||
error: string;
|
||||
}[];
|
||||
robotsBlocked: string[];
|
||||
};
|
||||
errors: {
|
||||
id: string;
|
||||
timestamp?: string;
|
||||
url: string;
|
||||
error: string;
|
||||
}[];
|
||||
robotsBlocked: string[];
|
||||
};
|
||||
|
||||
type AuthObject = {
|
||||
team_id: string;
|
||||
@ -1146,7 +1146,7 @@ export function fromLegacyScrapeOptions(
|
||||
? ("screenshot@fullPage" as const)
|
||||
: null,
|
||||
extractorOptions !== undefined &&
|
||||
extractorOptions.mode.includes("llm-extraction")
|
||||
extractorOptions.mode.includes("llm-extraction")
|
||||
? ("extract" as const)
|
||||
: null,
|
||||
"links",
|
||||
@ -1170,12 +1170,12 @@ export function fromLegacyScrapeOptions(
|
||||
removeBase64Images: pageOptions.removeBase64Images,
|
||||
extract:
|
||||
extractorOptions !== undefined &&
|
||||
extractorOptions.mode.includes("llm-extraction")
|
||||
extractorOptions.mode.includes("llm-extraction")
|
||||
? {
|
||||
systemPrompt: extractorOptions.extractionPrompt,
|
||||
prompt: extractorOptions.userPrompt,
|
||||
schema: extractorOptions.extractionSchema,
|
||||
}
|
||||
systemPrompt: extractorOptions.extractionPrompt,
|
||||
prompt: extractorOptions.userPrompt,
|
||||
schema: extractorOptions.extractionSchema,
|
||||
}
|
||||
: undefined,
|
||||
mobile: pageOptions.mobile,
|
||||
fastMode: pageOptions.useFastMode,
|
||||
@ -1290,10 +1290,10 @@ export type SearchRequestInput = z.input<typeof searchRequestSchema>;
|
||||
export type SearchResponse =
|
||||
| ErrorResponse
|
||||
| {
|
||||
success: true;
|
||||
warning?: string;
|
||||
data: Document[];
|
||||
};
|
||||
success: true;
|
||||
warning?: string;
|
||||
data: Document[];
|
||||
};
|
||||
|
||||
export type TokenUsage = {
|
||||
promptTokens: number;
|
||||
|
Loading…
x
Reference in New Issue
Block a user