mirror of
https://github.com/mendableai/firecrawl.git
synced 2025-06-27 00:41:33 +00:00
feat: update mu (#1639)
* update to mu v2 * feat(ci): add RUNPOD_MUV2_POD_ID * stupid change to make CI run --------- Co-authored-by: Gergő Móricz <mo.geryy@gmail.com>
This commit is contained in:
parent
6ca551a887
commit
af88218fad
1
.github/workflows/test-server.yml
vendored
1
.github/workflows/test-server.yml
vendored
@ -30,6 +30,7 @@ env:
|
||||
SERPER_API_KEY: ${{ secrets.SERPER_API_KEY }}
|
||||
ENV: ${{ secrets.ENV }}
|
||||
RUNPOD_MU_POD_ID: ${{ secrets.RUNPOD_MU_POD_ID }}
|
||||
RUNPOD_MUV2_POD_ID: ${{ secrets.RUNPOD_MUV2_POD_ID }}
|
||||
RUNPOD_MU_API_KEY: ${{ secrets.RUNPOD_MU_API_KEY }}
|
||||
GCS_CREDENTIALS: ${{ secrets.GCS_CREDENTIALS }}
|
||||
GCS_BUCKET_NAME: ${{ secrets.GCS_BUCKET_NAME }}
|
||||
|
@ -3,7 +3,7 @@ import { logger } from "./logger";
|
||||
import crypto from "crypto";
|
||||
|
||||
const credentials = process.env.GCS_CREDENTIALS ? JSON.parse(atob(process.env.GCS_CREDENTIALS)) : undefined;
|
||||
const PDF_CACHE_PREFIX = "pdf-cache/";
|
||||
const PDF_CACHE_PREFIX = "pdf-cache-v2/";
|
||||
|
||||
/**
|
||||
* Creates a SHA-256 hash of the PDF content to use as a cache key
|
||||
|
@ -41,7 +41,6 @@ async function scrapePDFWithRunPodMU(
|
||||
|
||||
try {
|
||||
const cachedResult = await getPdfResultFromCache(base64Content);
|
||||
|
||||
if (cachedResult) {
|
||||
meta.logger.info("Using cached RunPod MU result for PDF", {
|
||||
tempFilePath,
|
||||
@ -65,38 +64,6 @@ async function scrapePDFWithRunPodMU(
|
||||
const abort = timeout ? AbortSignal.timeout(timeout) : undefined;
|
||||
|
||||
const podStart = await robustFetch({
|
||||
url:
|
||||
"https://api.runpod.ai/v2/" + process.env.RUNPOD_MU_POD_ID + "/runsync",
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${process.env.RUNPOD_MU_API_KEY}`,
|
||||
},
|
||||
body: {
|
||||
input: {
|
||||
file_content: base64Content,
|
||||
filename: path.basename(tempFilePath) + ".pdf",
|
||||
timeout,
|
||||
created_at: Date.now(),
|
||||
},
|
||||
},
|
||||
logger: meta.logger.child({
|
||||
method: "scrapePDFWithRunPodMU/runsync/robustFetch",
|
||||
}),
|
||||
schema: z.object({
|
||||
id: z.string(),
|
||||
status: z.string(),
|
||||
output: z
|
||||
.object({
|
||||
markdown: z.string(),
|
||||
})
|
||||
.optional(),
|
||||
}),
|
||||
mock: meta.mock,
|
||||
abort,
|
||||
});
|
||||
|
||||
//this is just so we can test in parallel and compare results
|
||||
robustFetch({
|
||||
url:
|
||||
"https://api.runpod.ai/v2/" + process.env.RUNPOD_MUV2_POD_ID + "/runsync",
|
||||
method: "POST",
|
||||
@ -125,11 +92,6 @@ async function scrapePDFWithRunPodMU(
|
||||
}),
|
||||
mock: meta.mock,
|
||||
abort,
|
||||
}).catch(error => {
|
||||
meta.logger.warn("Error scraping PDF with RunPod MU V2", {
|
||||
error,
|
||||
tempFilePath,
|
||||
});
|
||||
});
|
||||
|
||||
let status: string = podStart.status;
|
||||
|
Loading…
x
Reference in New Issue
Block a user