mirror of
https://github.com/mendableai/firecrawl.git
synced 2025-11-29 16:41:25 +00:00
Add parsePDF parameter to JS SDK (clean implementation)
- Add parsePDF boolean parameter to CrawlScrapeOptions interface - Parameter automatically flows through scrape and crawl operations via spread operator - Add comprehensive test cases for parsePDF functionality in both scrape and crawl scenarios - Tests verify parsePDF=true and parsePDF=false behavior with PDF files Co-Authored-By: Micah Stairs <micah@sideguide.dev>
This commit is contained in:
parent
9a5d40c3cf
commit
bd19ee6ff3
@ -103,6 +103,31 @@ describe('FirecrawlApp E2E Tests', () => {
|
||||
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test.concurrent('should return successful response for valid scrape with PDF file and parsePDF true', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf', {
|
||||
parsePDF: true
|
||||
});
|
||||
if (!response.success) {
|
||||
throw new Error(response.error);
|
||||
}
|
||||
|
||||
expect(response).not.toBeNull();
|
||||
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test.concurrent('should return successful response for valid scrape with PDF file and parsePDF false', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf', {
|
||||
parsePDF: false
|
||||
});
|
||||
if (!response.success) {
|
||||
throw new Error(response.error);
|
||||
}
|
||||
|
||||
expect(response).not.toBeNull();
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
||||
if (API_URL.includes('api.firecrawl.dev')) {
|
||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||
@ -154,6 +179,23 @@ describe('FirecrawlApp E2E Tests', () => {
|
||||
}
|
||||
}, 60000); // 60 seconds timeout
|
||||
|
||||
test.concurrent('should handle parsePDF parameter in crawl scrapeOptions', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.crawlUrl('https://roastmywebsite.ai', {
|
||||
limit: 1,
|
||||
scrapeOptions: {
|
||||
formats: ['markdown'],
|
||||
parsePDF: true
|
||||
}
|
||||
} as CrawlParams, 30) as CrawlStatusResponse;
|
||||
|
||||
expect(response).not.toHaveProperty("next");
|
||||
expect(response.data.length).toBeGreaterThan(0);
|
||||
if (response.data[0]) {
|
||||
expect(response.data[0]).toHaveProperty("markdown");
|
||||
}
|
||||
}, 60000); // 60 seconds timeout
|
||||
|
||||
test.concurrent('should handle idempotency key for crawl', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const uniqueIdempotencyKey = uuidv4();
|
||||
|
||||
@ -125,6 +125,7 @@ export interface CrawlScrapeOptions {
|
||||
proxy?: "basic" | "stealth" | "auto";
|
||||
storeInCache?: boolean;
|
||||
maxAge?: number;
|
||||
parsePDF?: boolean;
|
||||
}
|
||||
|
||||
export type Action = {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user