Revert "Merge pull request #561 from mendableai/bug/dealing-with-dns-error"

This reverts commit 2030ec603109d6ce8786a011d431bc5c83917f1b, reversing
changes made to f494d2b707d40b690ae41611d17f77f683570fc2.
This commit is contained in:
Nicolas 2024-08-20 18:16:11 -03:00
parent 2030ec6031
commit ffe11a5bf7

View File

@ -136,29 +136,27 @@ export async function scrapWithFireEngine({
return { html: "", screenshot: "", pageStatusCode: null, pageError: "" }; return { html: "", screenshot: "", pageStatusCode: null, pageError: "" };
} }
if (checkStatusResponse.status !== 200 || checkStatusResponse.data.error) { if (response.status !== 200) {
Logger.debug( Logger.debug(
`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${checkStatusResponse.status}` `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`
); );
logParams.error_message = checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error; logParams.error_message = response.data?.pageError;
logParams.response_code = checkStatusResponse.data?.pageStatusCode; logParams.response_code = response.data?.pageStatusCode;
if(checkStatusResponse.data && checkStatusResponse.data?.pageStatusCode !== 200) { if(response.data && response.data?.pageStatusCode !== 200) {
Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`); Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`);
} }
const pageStatusCode = checkStatusResponse.data?.pageStatusCode ? checkStatusResponse.data?.pageStatusCode : checkStatusResponse.data?.error && checkStatusResponse.data?.error.includes("Dns resolution error for hostname") ? 404 : undefined;
return { return {
html: "", html: "",
screenshot: "", screenshot: "",
pageStatusCode, pageStatusCode: response.data?.pageStatusCode,
pageError: checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error, pageError: response.data?.pageError,
}; };
} }
const contentType = checkStatusResponse.headers["content-type"]; const contentType = response.headers["content-type"];
if (contentType && contentType.includes("application/pdf")) { if (contentType && contentType.includes("application/pdf")) {
const { content, pageStatusCode, pageError } = await fetchAndProcessPdf( const { content, pageStatusCode, pageError } = await fetchAndProcessPdf(
url, url,
@ -169,18 +167,18 @@ export async function scrapWithFireEngine({
logParams.error_message = pageError; logParams.error_message = pageError;
return { html: content, screenshot: "", pageStatusCode, pageError }; return { html: content, screenshot: "", pageStatusCode, pageError };
} else { } else {
const data = checkStatusResponse.data; const data = response.data;
logParams.success = logParams.success =
(data.pageStatusCode >= 200 && data.pageStatusCode < 300) || (data.pageStatusCode >= 200 && data.pageStatusCode < 300) ||
data.pageStatusCode === 404; data.pageStatusCode === 404;
logParams.html = data.content ?? ""; logParams.html = data.content ?? "";
logParams.response_code = data.pageStatusCode; logParams.response_code = data.pageStatusCode;
logParams.error_message = data.pageError ?? data.error; logParams.error_message = data.pageError;
return { return {
html: data.content ?? "", html: data.content ?? "",
screenshot: data.screenshot ?? "", screenshot: data.screenshot ?? "",
pageStatusCode: data.pageStatusCode, pageStatusCode: data.pageStatusCode,
pageError: data.pageError ?? data.error, pageError: data.pageError,
}; };
} }
} catch (error) { } catch (error) {