mirror of
				https://github.com/mendableai/firecrawl.git
				synced 2025-11-04 03:53:17 +00:00 
			
		
		
		
	NIck: rm scrape events
This commit is contained in:
		
							parent
							
								
									feda4dede7
								
							
						
					
					
						commit
						22f7efed35
					
				@ -1,109 +1,109 @@
 | 
			
		||||
import { Job } from "bullmq";
 | 
			
		||||
import { supabase_service as supabase } from "../services/supabase";
 | 
			
		||||
import { logger } from "./logger";
 | 
			
		||||
import { configDotenv } from "dotenv";
 | 
			
		||||
import { Engine } from "../scraper/scrapeURL/engines";
 | 
			
		||||
configDotenv();
 | 
			
		||||
// import { Job } from "bullmq";
 | 
			
		||||
// import { supabase_service as supabase } from "../services/supabase";
 | 
			
		||||
// import { logger } from "./logger";
 | 
			
		||||
// import { configDotenv } from "dotenv";
 | 
			
		||||
// import { Engine } from "../scraper/scrapeURL/engines";
 | 
			
		||||
// configDotenv();
 | 
			
		||||
 | 
			
		||||
export type ScrapeErrorEvent = {
 | 
			
		||||
  type: "error";
 | 
			
		||||
  message: string;
 | 
			
		||||
  stack?: string;
 | 
			
		||||
};
 | 
			
		||||
// export type ScrapeErrorEvent = {
 | 
			
		||||
//   type: "error";
 | 
			
		||||
//   message: string;
 | 
			
		||||
//   stack?: string;
 | 
			
		||||
// };
 | 
			
		||||
 | 
			
		||||
export type ScrapeScrapeEvent = {
 | 
			
		||||
  type: "scrape";
 | 
			
		||||
  url: string;
 | 
			
		||||
  worker?: string;
 | 
			
		||||
  method: Engine;
 | 
			
		||||
  result: null | {
 | 
			
		||||
    success: boolean;
 | 
			
		||||
    response_code?: number;
 | 
			
		||||
    response_size?: number;
 | 
			
		||||
    error?: string | object;
 | 
			
		||||
    // proxy?: string,
 | 
			
		||||
    time_taken: number;
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
// export type ScrapeScrapeEvent = {
 | 
			
		||||
//   type: "scrape";
 | 
			
		||||
//   url: string;
 | 
			
		||||
//   worker?: string;
 | 
			
		||||
//   method: Engine;
 | 
			
		||||
//   result: null | {
 | 
			
		||||
//     success: boolean;
 | 
			
		||||
//     response_code?: number;
 | 
			
		||||
//     response_size?: number;
 | 
			
		||||
//     error?: string | object;
 | 
			
		||||
//     // proxy?: string,
 | 
			
		||||
//     time_taken: number;
 | 
			
		||||
//   };
 | 
			
		||||
// };
 | 
			
		||||
 | 
			
		||||
export type ScrapeQueueEvent = {
 | 
			
		||||
  type: "queue";
 | 
			
		||||
  event:
 | 
			
		||||
    | "waiting"
 | 
			
		||||
    | "active"
 | 
			
		||||
    | "completed"
 | 
			
		||||
    | "paused"
 | 
			
		||||
    | "resumed"
 | 
			
		||||
    | "removed"
 | 
			
		||||
    | "failed";
 | 
			
		||||
  worker?: string;
 | 
			
		||||
};
 | 
			
		||||
// export type ScrapeQueueEvent = {
 | 
			
		||||
//   type: "queue";
 | 
			
		||||
//   event:
 | 
			
		||||
//     | "waiting"
 | 
			
		||||
//     | "active"
 | 
			
		||||
//     | "completed"
 | 
			
		||||
//     | "paused"
 | 
			
		||||
//     | "resumed"
 | 
			
		||||
//     | "removed"
 | 
			
		||||
//     | "failed";
 | 
			
		||||
//   worker?: string;
 | 
			
		||||
// };
 | 
			
		||||
 | 
			
		||||
export type ScrapeEvent =
 | 
			
		||||
  | ScrapeErrorEvent
 | 
			
		||||
  | ScrapeScrapeEvent
 | 
			
		||||
  | ScrapeQueueEvent;
 | 
			
		||||
// export type ScrapeEvent =
 | 
			
		||||
//   | ScrapeErrorEvent
 | 
			
		||||
//   | ScrapeScrapeEvent
 | 
			
		||||
//   | ScrapeQueueEvent;
 | 
			
		||||
 | 
			
		||||
export class ScrapeEvents {
 | 
			
		||||
  static async insert(jobId: string, content: ScrapeEvent) {
 | 
			
		||||
    if (jobId === "TEST") return null;
 | 
			
		||||
// export class ScrapeEvents {
 | 
			
		||||
//   static async insert(jobId: string, content: ScrapeEvent) {
 | 
			
		||||
//     if (jobId === "TEST") return null;
 | 
			
		||||
 | 
			
		||||
    const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
 | 
			
		||||
    if (useDbAuthentication) {
 | 
			
		||||
      try {
 | 
			
		||||
        const result = await supabase
 | 
			
		||||
          .from("scrape_events")
 | 
			
		||||
          .insert({
 | 
			
		||||
            job_id: jobId,
 | 
			
		||||
            type: content.type,
 | 
			
		||||
            content: content,
 | 
			
		||||
            // created_at
 | 
			
		||||
          })
 | 
			
		||||
          .select()
 | 
			
		||||
          .single();
 | 
			
		||||
        return (result.data as any).id;
 | 
			
		||||
      } catch (error) {
 | 
			
		||||
        // logger.error(`Error inserting scrape event: ${error}`);
 | 
			
		||||
        return null;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
//     const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
 | 
			
		||||
//     if (useDbAuthentication) {
 | 
			
		||||
//       try {
 | 
			
		||||
//         const result = await supabase
 | 
			
		||||
//           .from("scrape_events")
 | 
			
		||||
//           .insert({
 | 
			
		||||
//             job_id: jobId,
 | 
			
		||||
//             type: content.type,
 | 
			
		||||
//             content: content,
 | 
			
		||||
//             // created_at
 | 
			
		||||
//           })
 | 
			
		||||
//           .select()
 | 
			
		||||
//           .single();
 | 
			
		||||
//         return (result.data as any).id;
 | 
			
		||||
//       } catch (error) {
 | 
			
		||||
//         // logger.error(`Error inserting scrape event: ${error}`);
 | 
			
		||||
//         return null;
 | 
			
		||||
//       }
 | 
			
		||||
//     }
 | 
			
		||||
 | 
			
		||||
    return null;
 | 
			
		||||
  }
 | 
			
		||||
//     return null;
 | 
			
		||||
//   }
 | 
			
		||||
 | 
			
		||||
  static async updateScrapeResult(
 | 
			
		||||
    logId: number | null,
 | 
			
		||||
    result: ScrapeScrapeEvent["result"],
 | 
			
		||||
  ) {
 | 
			
		||||
    if (logId === null) return;
 | 
			
		||||
//   static async updateScrapeResult(
 | 
			
		||||
//     logId: number | null,
 | 
			
		||||
//     result: ScrapeScrapeEvent["result"],
 | 
			
		||||
//   ) {
 | 
			
		||||
//     if (logId === null) return;
 | 
			
		||||
 | 
			
		||||
    try {
 | 
			
		||||
      const previousLog = (
 | 
			
		||||
        await supabase.from("scrape_events").select().eq("id", logId).single()
 | 
			
		||||
      ).data as any;
 | 
			
		||||
      await supabase
 | 
			
		||||
        .from("scrape_events")
 | 
			
		||||
        .update({
 | 
			
		||||
          content: {
 | 
			
		||||
            ...previousLog.content,
 | 
			
		||||
            result,
 | 
			
		||||
          },
 | 
			
		||||
        })
 | 
			
		||||
        .eq("id", logId);
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      logger.error(`Error updating scrape result: ${error}`);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
//     try {
 | 
			
		||||
//       const previousLog = (
 | 
			
		||||
//         await supabase.from("scrape_events").select().eq("id", logId).single()
 | 
			
		||||
//       ).data as any;
 | 
			
		||||
//       await supabase
 | 
			
		||||
//         .from("scrape_events")
 | 
			
		||||
//         .update({
 | 
			
		||||
//           content: {
 | 
			
		||||
//             ...previousLog.content,
 | 
			
		||||
//             result,
 | 
			
		||||
//           },
 | 
			
		||||
//         })
 | 
			
		||||
//         .eq("id", logId);
 | 
			
		||||
//     } catch (error) {
 | 
			
		||||
//       logger.error(`Error updating scrape result: ${error}`);
 | 
			
		||||
//     }
 | 
			
		||||
//   }
 | 
			
		||||
 | 
			
		||||
  static async logJobEvent(job: Job | any, event: ScrapeQueueEvent["event"]) {
 | 
			
		||||
    try {
 | 
			
		||||
      await this.insert(((job as any).id ? (job as any).id : job) as string, {
 | 
			
		||||
        type: "queue",
 | 
			
		||||
        event,
 | 
			
		||||
        worker: process.env.FLY_MACHINE_ID,
 | 
			
		||||
      });
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      logger.error(`Error logging job event: ${error}`);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
//   static async logJobEvent(job: Job | any, event: ScrapeQueueEvent["event"]) {
 | 
			
		||||
//     try {
 | 
			
		||||
//       await this.insert(((job as any).id ? (job as any).id : job) as string, {
 | 
			
		||||
//         type: "queue",
 | 
			
		||||
//         event,
 | 
			
		||||
//         worker: process.env.FLY_MACHINE_ID,
 | 
			
		||||
//       });
 | 
			
		||||
//     } catch (error) {
 | 
			
		||||
//       logger.error(`Error logging job event: ${error}`);
 | 
			
		||||
//     }
 | 
			
		||||
//   }
 | 
			
		||||
// }
 | 
			
		||||
 | 
			
		||||
@ -8,7 +8,6 @@ import { billTeam } from "../services/billing/credit_billing";
 | 
			
		||||
import { Document } from "../controllers/v1/types";
 | 
			
		||||
import { supabase_service } from "../services/supabase";
 | 
			
		||||
import { logger as _logger } from "../lib/logger";
 | 
			
		||||
import { ScrapeEvents } from "../lib/scrape-events";
 | 
			
		||||
import { configDotenv } from "dotenv";
 | 
			
		||||
import {
 | 
			
		||||
  EngineResultsTracker,
 | 
			
		||||
@ -146,35 +145,35 @@ export async function runWebScraper({
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const engineOrder = Object.entries(engines)
 | 
			
		||||
    .sort((a, b) => a[1].startedAt - b[1].startedAt)
 | 
			
		||||
    .map((x) => x[0]) as Engine[];
 | 
			
		||||
  // const engineOrder = Object.entries(engines)
 | 
			
		||||
  //   .sort((a, b) => a[1].startedAt - b[1].startedAt)
 | 
			
		||||
  //   .map((x) => x[0]) as Engine[];
 | 
			
		||||
 | 
			
		||||
  for (const engine of engineOrder) {
 | 
			
		||||
    const result = engines[engine] as Exclude<
 | 
			
		||||
      EngineResultsTracker[Engine],
 | 
			
		||||
      undefined
 | 
			
		||||
    >;
 | 
			
		||||
    ScrapeEvents.insert(bull_job_id, {
 | 
			
		||||
      type: "scrape",
 | 
			
		||||
      url,
 | 
			
		||||
      method: engine,
 | 
			
		||||
      result: {
 | 
			
		||||
        success: result.state === "success",
 | 
			
		||||
        response_code:
 | 
			
		||||
          result.state === "success" ? result.result.statusCode : undefined,
 | 
			
		||||
        response_size:
 | 
			
		||||
          result.state === "success" ? result.result.html.length : undefined,
 | 
			
		||||
        error:
 | 
			
		||||
          result.state === "error"
 | 
			
		||||
            ? result.error
 | 
			
		||||
            : result.state === "timeout"
 | 
			
		||||
              ? "Timed out"
 | 
			
		||||
              : undefined,
 | 
			
		||||
        time_taken: result.finishedAt - result.startedAt,
 | 
			
		||||
      },
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
  // for (const engine of engineOrder) {
 | 
			
		||||
  //   const result = engines[engine] as Exclude<
 | 
			
		||||
  //     EngineResultsTracker[Engine],
 | 
			
		||||
  //     undefined
 | 
			
		||||
  //   >;
 | 
			
		||||
  //   ScrapeEvents.insert(bull_job_id, {
 | 
			
		||||
  //     type: "scrape",
 | 
			
		||||
  //     url,
 | 
			
		||||
  //     method: engine,
 | 
			
		||||
  //     result: {
 | 
			
		||||
  //       success: result.state === "success",
 | 
			
		||||
  //       response_code:
 | 
			
		||||
  //         result.state === "success" ? result.result.statusCode : undefined,
 | 
			
		||||
  //       response_size:
 | 
			
		||||
  //         result.state === "success" ? result.result.html.length : undefined,
 | 
			
		||||
  //       error:
 | 
			
		||||
  //         result.state === "error"
 | 
			
		||||
  //           ? result.error
 | 
			
		||||
  //           : result.state === "timeout"
 | 
			
		||||
  //             ? "Timed out"
 | 
			
		||||
  //             : undefined,
 | 
			
		||||
  //       time_taken: result.finishedAt - result.startedAt,
 | 
			
		||||
  //     },
 | 
			
		||||
  //   });
 | 
			
		||||
  // }
 | 
			
		||||
 | 
			
		||||
  if (error === undefined && response?.success) {
 | 
			
		||||
    return response;
 | 
			
		||||
@ -228,7 +227,7 @@ const saveJob = async (
 | 
			
		||||
      //     // I think the job won't exist here anymore
 | 
			
		||||
      //   }
 | 
			
		||||
    }
 | 
			
		||||
    ScrapeEvents.logJobEvent(job, "completed");
 | 
			
		||||
    // ScrapeEvents.logJobEvent(job, "completed");
 | 
			
		||||
  } catch (error) {
 | 
			
		||||
    _logger.error(`🐂 Failed to update job status`, {
 | 
			
		||||
      module: "runWebScraper",
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user