mirror of
				https://github.com/mendableai/firecrawl.git
				synced 2025-11-04 03:53:17 +00:00 
			
		
		
		
	Nick:
This commit is contained in:
		
							parent
							
								
									bfc7f5882e
								
							
						
					
					
						commit
						e098e88ea7
					
				@ -129,3 +129,11 @@ export interface FireEngineResponse {
 | 
			
		||||
  pageError?: string;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
export interface FireEngineOptions{
 | 
			
		||||
  mobileProxy?: boolean;
 | 
			
		||||
  method?: string;
 | 
			
		||||
  engine?: string;
 | 
			
		||||
  blockMedia?: boolean;
 | 
			
		||||
  blockAds?: boolean;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -8,7 +8,6 @@ import { scrapSingleUrl } from "./single_url";
 | 
			
		||||
import robotsParser from "robots-parser";
 | 
			
		||||
import { getURLDepth } from "./utils/maxDepthUtils";
 | 
			
		||||
import { axiosTimeout } from "../../../src/lib/timeout";
 | 
			
		||||
import { scrapWithFireEngine } from "./scrapers/fireEngine";
 | 
			
		||||
 | 
			
		||||
export class WebCrawler {
 | 
			
		||||
  private initialUrl: string;
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,5 @@
 | 
			
		||||
import axios from "axios";
 | 
			
		||||
import { FireEngineResponse } from "../../../lib/entities";
 | 
			
		||||
import { FireEngineOptions, FireEngineResponse } from "../../../lib/entities";
 | 
			
		||||
import { logScrape } from "../../../services/logging/scrape_log";
 | 
			
		||||
import { generateRequestParams } from "../single_url";
 | 
			
		||||
import { fetchAndProcessPdf } from "../utils/pdfProcessor";
 | 
			
		||||
@ -20,6 +20,7 @@ export async function scrapWithFireEngine({
 | 
			
		||||
  waitFor = 0,
 | 
			
		||||
  screenshot = false,
 | 
			
		||||
  pageOptions = { parsePDF: true },
 | 
			
		||||
  fireEngineOptions = {},
 | 
			
		||||
  headers,
 | 
			
		||||
  options,
 | 
			
		||||
}: {
 | 
			
		||||
@ -27,6 +28,7 @@ export async function scrapWithFireEngine({
 | 
			
		||||
  waitFor?: number;
 | 
			
		||||
  screenshot?: boolean;
 | 
			
		||||
  pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean };
 | 
			
		||||
  fireEngineOptions?: FireEngineOptions;
 | 
			
		||||
  headers?: Record<string, string>;
 | 
			
		||||
  options?: any;
 | 
			
		||||
}): Promise<FireEngineResponse> {
 | 
			
		||||
@ -57,6 +59,7 @@ export async function scrapWithFireEngine({
 | 
			
		||||
        screenshot: screenshotParam,
 | 
			
		||||
        headers: headers,
 | 
			
		||||
        pageOptions: pageOptions,
 | 
			
		||||
        ...fireEngineOptions,
 | 
			
		||||
      },
 | 
			
		||||
      {
 | 
			
		||||
        headers: {
 | 
			
		||||
 | 
			
		||||
@ -21,7 +21,7 @@ export async function getLinksFromSitemap(
 | 
			
		||||
        const response = await axios.get(sitemapUrl, { timeout: axiosTimeout });
 | 
			
		||||
        content = response.data;
 | 
			
		||||
      } else if (mode === 'fire-engine') {
 | 
			
		||||
        const response = await scrapWithFireEngine({ url: sitemapUrl });
 | 
			
		||||
        const response = await scrapWithFireEngine({ url: sitemapUrl, fireEngineOptions: { engine: "request", method: "get", mobileProxy: true } });
 | 
			
		||||
        content = response.html;
 | 
			
		||||
      }
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user