mirror of
https://github.com/mendableai/firecrawl.git
synced 2025-12-08 13:30:07 +00:00
Add parsePDF parameter to JS SDK (clean implementation)
- Add parsePDF boolean parameter to CrawlScrapeOptions interface - Parameter automatically flows through scrape and crawl operations via spread operator - Add comprehensive test cases for parsePDF functionality in both scrape and crawl scenarios - Tests verify parsePDF=true and parsePDF=false behavior with PDF files Co-Authored-By: Micah Stairs <micah@sideguide.dev>
This commit is contained in:
parent
9a5d40c3cf
commit
bd19ee6ff3
@ -103,6 +103,31 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||||||
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||||
}, 30000); // 30 seconds timeout
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
|
test.concurrent('should return successful response for valid scrape with PDF file and parsePDF true', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf', {
|
||||||
|
parsePDF: true
|
||||||
|
});
|
||||||
|
if (!response.success) {
|
||||||
|
throw new Error(response.error);
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||||
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
|
test.concurrent('should return successful response for valid scrape with PDF file and parsePDF false', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf', {
|
||||||
|
parsePDF: false
|
||||||
|
});
|
||||||
|
if (!response.success) {
|
||||||
|
throw new Error(response.error);
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
||||||
if (API_URL.includes('api.firecrawl.dev')) {
|
if (API_URL.includes('api.firecrawl.dev')) {
|
||||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||||
@ -154,6 +179,23 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||||||
}
|
}
|
||||||
}, 60000); // 60 seconds timeout
|
}, 60000); // 60 seconds timeout
|
||||||
|
|
||||||
|
test.concurrent('should handle parsePDF parameter in crawl scrapeOptions', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const response = await app.crawlUrl('https://roastmywebsite.ai', {
|
||||||
|
limit: 1,
|
||||||
|
scrapeOptions: {
|
||||||
|
formats: ['markdown'],
|
||||||
|
parsePDF: true
|
||||||
|
}
|
||||||
|
} as CrawlParams, 30) as CrawlStatusResponse;
|
||||||
|
|
||||||
|
expect(response).not.toHaveProperty("next");
|
||||||
|
expect(response.data.length).toBeGreaterThan(0);
|
||||||
|
if (response.data[0]) {
|
||||||
|
expect(response.data[0]).toHaveProperty("markdown");
|
||||||
|
}
|
||||||
|
}, 60000); // 60 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should handle idempotency key for crawl', async () => {
|
test.concurrent('should handle idempotency key for crawl', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
const uniqueIdempotencyKey = uuidv4();
|
const uniqueIdempotencyKey = uuidv4();
|
||||||
|
|||||||
@ -125,6 +125,7 @@ export interface CrawlScrapeOptions {
|
|||||||
proxy?: "basic" | "stealth" | "auto";
|
proxy?: "basic" | "stealth" | "auto";
|
||||||
storeInCache?: boolean;
|
storeInCache?: boolean;
|
||||||
maxAge?: number;
|
maxAge?: number;
|
||||||
|
parsePDF?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
export type Action = {
|
export type Action = {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user