Add parsePDF parameter to JS SDK (clean implementation)

- Add parsePDF boolean parameter to CrawlScrapeOptions interface - Parameter automatically flows through scrape and crawl operations via spread operator - Add comprehensive test cases for parsePDF functionality in both scrape and crawl scenarios - Tests verify parsePDF=true and parsePDF=false behavior with PDF files Co-Authored-By: Micah Stairs <micah@sideguide.dev>
2025-11-29 16:41:25 +00:00 · 2025-06-26 20:59:01 +00:00 · 2025-06-26 20:59:01 +00:00 · bd19ee6ff3
commit bd19ee6ff3
parent 9a5d40c3cf
2 changed files with 43 additions and 0 deletions
--- a/apps/js-sdk/firecrawl/src/tests/v1/e2e_withAuth/index.test.ts
+++ b/apps/js-sdk/firecrawl/src/tests/v1/e2e_withAuth/index.test.ts
@ -103,6 +103,31 @@ describe('FirecrawlApp E2E Tests', () => {
    expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
  }, 30000); // 30 seconds timeout

+  test.concurrent('should return successful response for valid scrape with PDF file and parsePDF true', async () => {
+    const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
+    const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf', {
+      parsePDF: true
+    });
+    if (!response.success) {
+      throw new Error(response.error);
+    }
+
+    expect(response).not.toBeNull();
+    expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
+  }, 30000); // 30 seconds timeout
+
+  test.concurrent('should return successful response for valid scrape with PDF file and parsePDF false', async () => {
+    const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
+    const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf', {
+      parsePDF: false
+    });
+    if (!response.success) {
+      throw new Error(response.error);
+    }
+
+    expect(response).not.toBeNull();
+  }, 30000); // 30 seconds timeout
+
  test.concurrent('should throw error for invalid API key on crawl', async () => {
    if (API_URL.includes('api.firecrawl.dev')) {
      const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
@ -154,6 +179,23 @@ describe('FirecrawlApp E2E Tests', () => {
    }
  }, 60000); // 60 seconds timeout

+  test.concurrent('should handle parsePDF parameter in crawl scrapeOptions', async () => {
+    const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
+    const response = await app.crawlUrl('https://roastmywebsite.ai', {
+      limit: 1,
+      scrapeOptions: {
+        formats: ['markdown'],
+        parsePDF: true
+      }
+    } as CrawlParams, 30) as CrawlStatusResponse;
+    
+    expect(response).not.toHaveProperty("next");
+    expect(response.data.length).toBeGreaterThan(0);
+    if (response.data[0]) {
+      expect(response.data[0]).toHaveProperty("markdown");
+    }
+  }, 60000); // 60 seconds timeout
+
  test.concurrent('should handle idempotency key for crawl', async () => {
    const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
    const uniqueIdempotencyKey = uuidv4();
--- a/apps/js-sdk/firecrawl/src/index.ts
+++ b/apps/js-sdk/firecrawl/src/index.ts
@ -125,6 +125,7 @@ export interface CrawlScrapeOptions {
  proxy?: "basic" | "stealth" | "auto";
  storeInCache?: boolean;
  maxAge?: number;
+  parsePDF?: boolean;
 }

 export type Action = {