mirror of
				https://github.com/mendableai/firecrawl.git
				synced 2025-11-03 19:43:01 +00:00 
			
		
		
		
	fix(crawl): never invalidate first crawl scrape if redirects
This commit is contained in:
		
							parent
							
								
									b96b97ed72
								
							
						
					
					
						commit
						7d73ebdbf1
					
				@ -558,6 +558,7 @@ async function processKickoffJob(job: Job & { id: string }, token: string) {
 | 
			
		||||
          crawl_id: job.data.crawl_id,
 | 
			
		||||
          webhook: job.data.webhook,
 | 
			
		||||
          v1: job.data.v1,
 | 
			
		||||
          isCrawlSourceScrape: true,
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
          priority: 15,
 | 
			
		||||
@ -721,10 +722,14 @@ async function processJob(job: Job & { id: string }, token: string) {
 | 
			
		||||
        if (
 | 
			
		||||
          crawler.filterURL(doc.metadata.url, doc.metadata.sourceURL) === null
 | 
			
		||||
        ) {
 | 
			
		||||
          if (job.data.isCrawlSourceScrape) {
 | 
			
		||||
            // TODO: re-fetch sitemap for redirect target domain
 | 
			
		||||
          } else {
 | 
			
		||||
            throw new Error(
 | 
			
		||||
              "Redirected target URL is not allowed by crawlOptions",
 | 
			
		||||
            ); // TODO: make this its own error type that is ignored by error tracking
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (isUrlBlocked(doc.metadata.url)) {
 | 
			
		||||
          throw new Error(BLOCKLISTED_URL_MESSAGE); // TODO: make this its own error type that is ignored by error tracking
 | 
			
		||||
 | 
			
		||||
@ -44,6 +44,7 @@ export interface WebScraperOptions {
 | 
			
		||||
  webhook?: z.infer<typeof webhookSchema>;
 | 
			
		||||
  v1?: boolean;
 | 
			
		||||
  is_scrape?: boolean;
 | 
			
		||||
  isCrawlSourceScrape?: boolean;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export interface RunWebScraperParams {
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user