Nick: fixes

This commit is contained in:
Nicolas 2024-07-18 13:19:44 -04:00
parent f11137352c
commit d2de01d342
5 changed files with 18 additions and 10 deletions

View File

@ -19,7 +19,10 @@ export async function crawlJobStatusPreviewController(req: Request, res: Respons
}
}
const jobStatus = await job.getState();
let jobStatus = await job.getState();
if (jobStatus === 'waiting' || jobStatus === 'stuck') {
jobStatus = 'active';
}
res.json({
status: jobStatus,

View File

@ -31,7 +31,6 @@ export async function scrapWithFireEngine({
fireEngineOptions?: FireEngineOptions;
headers?: Record<string, string>;
options?: any;
engine?: 'playwright' | 'chrome-cdp' | 'tlsclient';
}): Promise<FireEngineResponse> {
const logParams = {
url,
@ -47,6 +46,7 @@ export async function scrapWithFireEngine({
try {
const reqParams = await generateRequestParams(url);
const waitParam = reqParams["params"]?.wait ?? waitFor;
const engineParam = reqParams["params"]?.engine ?? fireEngineOptions?.engine ?? "playwright";
const screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
const fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
@ -57,13 +57,13 @@ export async function scrapWithFireEngine({
endpoint = "/request";
}
let engine = fireEngineOptions?.engine ?? options?.engine ?? "playwright"; // do we want fireEngineOptions as first choice?
let engine = engineParam; // do we want fireEngineOptions as first choice?
console.log(
`[Fire-Engine] Scraping ${url} with wait: ${waitParam} and screenshot: ${screenshotParam} and method: ${fireEngineOptionsParam?.method ?? "null"}`
`[Fire-Engine][${engine}] Scraping ${url} with wait: ${waitParam} and screenshot: ${screenshotParam} and method: ${fireEngineOptionsParam?.method ?? "null"}`
);
console.log(fireEngineOptionsParam)
// console.log(fireEngineOptionsParam)
const response = await axios.post(
process.env.FIRE_ENGINE_BETA_URL + endpoint,
@ -73,7 +73,6 @@ export async function scrapWithFireEngine({
screenshot: screenshotParam,
headers: headers,
pageOptions: pageOptions,
engine: engine,
...fireEngineOptionsParam,
},
{
@ -86,14 +85,14 @@ export async function scrapWithFireEngine({
if (response.status !== 200) {
console.error(
`[Fire-Engine] Error fetching url: ${url} with status: ${response.status}`
`[Fire-Engine][${engine}] Error fetching url: ${url} with status: ${response.status}`
);
logParams.error_message = response.data?.pageError;
logParams.response_code = response.data?.pageStatusCode;
if(response.data && response.data?.pageStatusCode !== 200) {
console.error(`[Fire-Engine] Error fetching url: ${url} with status: ${response.status}`);
console.error(`[Fire-Engine][${engine}] Error fetching url: ${url} with status: ${response.status}`);
}
return {

View File

@ -161,7 +161,9 @@ export async function scrapSingleUrl(
screenshot: pageOptions.screenshot,
pageOptions: pageOptions,
headers: pageOptions.headers,
fireEngineOptions: {
engine: engine,
}
});
scraperResponse.text = response.html;
scraperResponse.screenshot = response.screenshot;

View File

@ -175,6 +175,7 @@ export const urlSpecificParams = {
"firecrawl.dev":{
defaultScraper: "fire-engine",
params: {
engine: "playwright",
headers: {
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",

View File

@ -7,11 +7,14 @@ export function getWebScraperQueue() {
if (!webScraperQueue) {
webScraperQueue = new Queue("web-scraper", process.env.REDIS_URL, {
settings: {
lockDuration: 2 * 60 * 1000, // 1 minute in milliseconds,
lockDuration: 1 * 60 * 1000, // 1 minute in milliseconds,
lockRenewTime: 15 * 1000, // 15 seconds in milliseconds
stalledInterval: 30 * 1000,
maxStalledCount: 10,
},
defaultJobOptions:{
attempts: 5
}
});
console.log("Web scraper queue created");
}