mirror of
https://github.com/mendableai/firecrawl.git
synced 2025-09-27 17:29:20 +00:00
feat(scrape): add error tallying instead of empty response
This commit is contained in:
parent
712ca31615
commit
01f42b980d
@ -64,22 +64,21 @@ export async function scrapeController(
|
|||||||
success: false,
|
success: false,
|
||||||
error: "Request timed out",
|
error: "Request timed out",
|
||||||
});
|
});
|
||||||
} else {
|
} else if (typeof e === "string" && e.startsWith("{\"type\":\"all\",")) {
|
||||||
return res.status(500).json({
|
return res.status(500).json({
|
||||||
success: false,
|
success: false,
|
||||||
error: `(Internal server error) - ${e && e?.message ? e.message : e} ${
|
error: "All scraping methods failed for URL: " + req.body.url,
|
||||||
extractorOptions && extractorOptions.mode !== "markdown"
|
details: JSON.parse(e).errors as string[],
|
||||||
? " - Could be due to LLM parsing issues"
|
|
||||||
: ""
|
|
||||||
}`,
|
|
||||||
});
|
});
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
await job.remove();
|
await job.remove();
|
||||||
|
|
||||||
if (!doc) {
|
if (!doc) {
|
||||||
console.error("!!! PANIC DOC IS", doc, job);
|
// console.error("!!! PANIC DOC IS", doc, job);
|
||||||
return res.status(200).json({
|
return res.status(200).json({
|
||||||
success: true,
|
success: true,
|
||||||
warning: "No page found",
|
warning: "No page found",
|
||||||
|
@ -351,6 +351,9 @@ export async function scrapSingleUrl(
|
|||||||
pageStatusCode: 200,
|
pageStatusCode: 200,
|
||||||
pageError: undefined,
|
pageError: undefined,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const errors: Record<string, string> = {};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
let urlKey = urlToScrap;
|
let urlKey = urlToScrap;
|
||||||
try {
|
try {
|
||||||
@ -392,6 +395,12 @@ export async function scrapSingleUrl(
|
|||||||
pageError = undefined;
|
pageError = undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (attempt.pageError) {
|
||||||
|
errors[scraper] = attempt.pageError;
|
||||||
|
} else {
|
||||||
|
errors[scraper] = null;
|
||||||
|
}
|
||||||
|
|
||||||
if ((text && text.trim().length >= 100) || (typeof screenshot === "string" && screenshot.length > 0)) {
|
if ((text && text.trim().length >= 100) || (typeof screenshot === "string" && screenshot.length > 0)) {
|
||||||
Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100 or screenshot, breaking`);
|
Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100 or screenshot, breaking`);
|
||||||
break;
|
break;
|
||||||
@ -443,12 +452,17 @@ export async function scrapSingleUrl(
|
|||||||
|
|
||||||
return document;
|
return document;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
Logger.debug(`⛏️ Error: ${error.message} - Failed to fetch URL: ${urlToScrap}`);
|
Logger.error(`⛏️ Error: ${error.message} - Failed to fetch URL: ${urlToScrap}`);
|
||||||
ScrapeEvents.insert(jobId, {
|
ScrapeEvents.insert(jobId, {
|
||||||
type: "error",
|
type: "error",
|
||||||
message: typeof error === "string" ? error : typeof error.message === "string" ? error.message : JSON.stringify(error),
|
message: typeof error === "string" ? error : typeof error.message === "string" ? error.message : JSON.stringify(error),
|
||||||
stack: error.stack,
|
stack: error.stack,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (error instanceof Error && error.message.startsWith("All scraping methods failed")) {
|
||||||
|
throw new Error(JSON.stringify({"type": "all", "errors": Object.values(errors)}));
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
content: "",
|
content: "",
|
||||||
markdown: pageOptions.includeMarkdown || pageOptions.includeExtract ? "" : undefined,
|
markdown: pageOptions.includeMarkdown || pageOptions.includeExtract ? "" : undefined,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user