This commit is contained in:
Nicolas 2025-06-06 17:29:09 -03:00
commit 623d39801f
6 changed files with 24 additions and 609 deletions

View File

@ -1,12 +1,12 @@
{ {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "1.23.6", "version": "1.25.5",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "1.23.6", "version": "1.25.5",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"axios": "^1.6.8", "axios": "^1.6.8",
@ -18,7 +18,7 @@
"@jest/globals": "^29.7.0", "@jest/globals": "^29.7.0",
"@types/axios": "^0.14.0", "@types/axios": "^0.14.0",
"@types/dotenv": "^8.2.0", "@types/dotenv": "^8.2.0",
"@types/jest": "^29.5.12", "@types/jest": "^29.5.14",
"@types/mocha": "^10.0.6", "@types/mocha": "^10.0.6",
"@types/node": "^20.12.12", "@types/node": "^20.12.12",
"@types/uuid": "^9.0.8", "@types/uuid": "^9.0.8",
@ -1812,10 +1812,11 @@
} }
}, },
"node_modules/@types/jest": { "node_modules/@types/jest": {
"version": "29.5.12", "version": "29.5.14",
"resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.12.tgz", "resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.14.tgz",
"integrity": "sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==", "integrity": "sha512-ZN+4sdnLUbo8EVvVc2ao0GFW6oVrQRPn4K2lglySj7APvSrgzxHiNNK99us4WDMi57xxA2yggblIAMNhXOotLQ==",
"dev": true, "dev": true,
"license": "MIT",
"dependencies": { "dependencies": {
"expect": "^29.0.0", "expect": "^29.0.0",
"pretty-format": "^29.0.0" "pretty-format": "^29.0.0"

View File

@ -11,9 +11,6 @@ importers:
axios: axios:
specifier: ^1.6.8 specifier: ^1.6.8
version: 1.7.9 version: 1.7.9
isows:
specifier: ^1.0.4
version: 1.0.6(ws@8.18.0)
typescript-event-target: typescript-event-target:
specifier: ^1.1.1 specifier: ^1.1.1
version: 1.1.1 version: 1.1.1
@ -34,7 +31,7 @@ importers:
specifier: ^8.2.0 specifier: ^8.2.0
version: 8.2.3 version: 8.2.3
'@types/jest': '@types/jest':
specifier: ^29.5.12 specifier: ^29.5.14
version: 29.5.14 version: 29.5.14
'@types/mocha': '@types/mocha':
specifier: ^10.0.6 specifier: ^10.0.6
@ -1052,11 +1049,6 @@ packages:
isexe@2.0.0: isexe@2.0.0:
resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==} resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==}
isows@1.0.6:
resolution: {integrity: sha512-lPHCayd40oW98/I0uvgaHKWCSvkzY27LjWLbtzOm64yQ+G3Q5npjjbdppU65iZXkK1Zt+kH9pfegli0AYfwYYw==}
peerDependencies:
ws: '*'
istanbul-lib-coverage@3.2.2: istanbul-lib-coverage@3.2.2:
resolution: {integrity: sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==} resolution: {integrity: sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==}
engines: {node: '>=8'} engines: {node: '>=8'}
@ -1722,18 +1714,6 @@ packages:
resolution: {integrity: sha512-7KxauUdBmSdWnmpaGFg+ppNjKF8uNLry8LyzjauQDOVONfFLNKrKvQOxZ/VuTIcS/gge/YNahf5RIIQWTSarlg==} resolution: {integrity: sha512-7KxauUdBmSdWnmpaGFg+ppNjKF8uNLry8LyzjauQDOVONfFLNKrKvQOxZ/VuTIcS/gge/YNahf5RIIQWTSarlg==}
engines: {node: ^12.13.0 || ^14.15.0 || >=16.0.0} engines: {node: ^12.13.0 || ^14.15.0 || >=16.0.0}
ws@8.18.0:
resolution: {integrity: sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==}
engines: {node: '>=10.0.0'}
peerDependencies:
bufferutil: ^4.0.1
utf-8-validate: '>=5.0.2'
peerDependenciesMeta:
bufferutil:
optional: true
utf-8-validate:
optional: true
y18n@5.0.8: y18n@5.0.8:
resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==} resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==}
engines: {node: '>=10'} engines: {node: '>=10'}
@ -2770,10 +2750,6 @@ snapshots:
isexe@2.0.0: {} isexe@2.0.0: {}
isows@1.0.6(ws@8.18.0):
dependencies:
ws: 8.18.0
istanbul-lib-coverage@3.2.2: {} istanbul-lib-coverage@3.2.2: {}
istanbul-lib-instrument@5.2.1: istanbul-lib-instrument@5.2.1:
@ -3584,8 +3560,6 @@ snapshots:
imurmurhash: 0.1.4 imurmurhash: 0.1.4
signal-exit: 3.0.7 signal-exit: 3.0.7
ws@8.18.0: {}
y18n@5.0.8: {} y18n@5.0.8: {}
yallist@3.1.1: {} yallist@3.1.1: {}

View File

@ -1,330 +0,0 @@
import FirecrawlApp, {
CrawlResponseV0,
CrawlStatusResponse,
CrawlStatusResponseV0,
FirecrawlDocumentV0,
ScrapeResponseV0,
SearchResponseV0,
} from "../../index";
import { v4 as uuidv4 } from "uuid";
import dotenv from "dotenv";
import { describe, test, expect } from "@jest/globals";
dotenv.config();
const TEST_API_KEY = process.env.TEST_API_KEY;
const API_URL = "http://127.0.0.1:3002";
describe('FirecrawlApp<"v0"> E2E Tests', () => {
test.concurrent("should throw error for no API key", async () => {
expect(() => {
new FirecrawlApp<"v0">({ apiKey: null, apiUrl: API_URL, version: "v0" });
}).toThrow("No API key provided");
});
test.concurrent(
"should throw error for invalid API key on scrape",
async () => {
const invalidApp = new FirecrawlApp<"v0">({
apiKey: "invalid_api_key",
apiUrl: API_URL,
version: "v0",
});
await expect(
invalidApp.scrapeUrl("https://roastmywebsite.ai")
).rejects.toThrow("Request failed with status code 401");
}
);
test.concurrent(
"should throw error for blocklisted URL on scrape",
async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const blocklistedUrl = "https://facebook.com/fake-test";
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow(
"Request failed with status code 403"
);
}
);
test.concurrent(
"should return successful response with valid preview token",
async () => {
const app = new FirecrawlApp<"v0">({
apiKey: process.env.PREVIEW_TOKEN,
apiUrl: API_URL,
version: "v0",
});
const response = (await app.scrapeUrl(
"https://roastmywebsite.ai"
)) as ScrapeResponseV0;
expect(response).not.toBeNull();
expect(response.data?.content).toContain("_Roast_");
},
30000
); // 30 seconds timeout
test.concurrent(
"should return successful response for valid scrape",
async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const response = (await app.scrapeUrl(
"https://roastmywebsite.ai"
)) as ScrapeResponseV0;
expect(response).not.toBeNull();
expect(response.data?.content).toContain("_Roast_");
expect(response.data).toHaveProperty("markdown");
expect(response.data).toHaveProperty("metadata");
expect(response.data).not.toHaveProperty("html");
},
30000
); // 30 seconds timeout
test.concurrent(
"should return successful response with valid API key and include HTML",
async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const response = (await app.scrapeUrl("https://roastmywebsite.ai", {
pageOptions: { includeHtml: true },
})) as ScrapeResponseV0;
expect(response).not.toBeNull();
expect(response.data?.content).toContain("_Roast_");
expect(response.data?.markdown).toContain("_Roast_");
expect(response.data?.html).toContain("<h1");
},
30000
); // 30 seconds timeout
test.concurrent(
"should return successful response for valid scrape with PDF file",
async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const response = (await app.scrapeUrl(
"https://arxiv.org/pdf/astro-ph/9301001.pdf"
)) as ScrapeResponseV0;
expect(response).not.toBeNull();
expect(response.data?.content).toContain(
"We present spectrophotometric observations of the Broad Line Radio Galaxy"
);
},
30000
); // 30 seconds timeout
test.concurrent(
"should return successful response for valid scrape with PDF file without explicit extension",
async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const response = (await app.scrapeUrl(
"https://arxiv.org/pdf/astro-ph/9301001"
)) as ScrapeResponseV0;
expect(response).not.toBeNull();
expect(response.data?.content).toContain(
"We present spectrophotometric observations of the Broad Line Radio Galaxy"
);
},
30000
); // 30 seconds timeout
test.concurrent(
"should throw error for invalid API key on crawl",
async () => {
const invalidApp = new FirecrawlApp<"v0">({
apiKey: "invalid_api_key",
apiUrl: API_URL,
version: "v0",
});
await expect(
invalidApp.crawlUrl("https://roastmywebsite.ai")
).rejects.toThrow("Request failed with status code 401");
}
);
test.concurrent(
"should throw error for blocklisted URL on crawl",
async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const blocklistedUrl = "https://twitter.com/fake-test";
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow(
"Request failed with status code 403"
);
}
);
test.concurrent(
"should return successful response for crawl and wait for completion",
async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const response = (await app.crawlUrl(
"https://roastmywebsite.ai",
{ crawlerOptions: { excludes: ["blog/*"] } },
true,
10
)) as FirecrawlDocumentV0[];
expect(response).not.toBeNull();
expect(response[0].content).toContain("_Roast_");
},
60000
); // 60 seconds timeout
test.concurrent("should handle idempotency key for crawl", async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const uniqueIdempotencyKey = uuidv4();
const response = (await app.crawlUrl(
"https://roastmywebsite.ai",
{ crawlerOptions: { excludes: ["blog/*"] } },
false,
2,
uniqueIdempotencyKey
)) as CrawlResponseV0;
expect(response).not.toBeNull();
expect(response.jobId).toBeDefined();
await expect(
app.crawlUrl(
"https://roastmywebsite.ai",
{ crawlerOptions: { excludes: ["blog/*"] } },
true,
2,
uniqueIdempotencyKey
)
).rejects.toThrow("Request failed with status code 409");
});
test.concurrent(
"should check crawl status",
async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const response: any = (await app.crawlUrl(
"https://roastmywebsite.ai",
{ crawlerOptions: { excludes: ["blog/*"] } },
false
)) as CrawlResponseV0;
expect(response).not.toBeNull();
expect(response.jobId).toBeDefined();
let statusResponse = await app.checkCrawlStatus(response.jobId);
const maxChecks = 15;
let checks = 0;
while (statusResponse.status === "active" && checks < maxChecks) {
await new Promise((resolve) => setTimeout(resolve, 5000));
expect(statusResponse.partial_data).not.toBeNull();
// expect(statusResponse.current).toBeGreaterThanOrEqual(1);
statusResponse = (await app.checkCrawlStatus(
response.jobId
)) as CrawlStatusResponseV0;
checks++;
}
expect(statusResponse).not.toBeNull();
expect(statusResponse.success).toBe(true);
expect(statusResponse.status).toBe("completed");
expect(statusResponse.total).toEqual(statusResponse.current);
expect(statusResponse.current_step).not.toBeNull();
expect(statusResponse.current).toBeGreaterThanOrEqual(1);
expect(statusResponse?.data?.length).toBeGreaterThan(0);
},
35000
); // 35 seconds timeout
test.concurrent(
"should return successful response for search",
async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const response = (await app.search("test query")) as SearchResponseV0;
expect(response).not.toBeNull();
expect(response?.data?.[0]?.content).toBeDefined();
expect(response?.data?.length).toBeGreaterThan(2);
},
30000
); // 30 seconds timeout
test.concurrent(
"should throw error for invalid API key on search",
async () => {
const invalidApp = new FirecrawlApp<"v0">({
apiKey: "invalid_api_key",
apiUrl: API_URL,
version: "v0",
});
await expect(invalidApp.search("test query")).rejects.toThrow(
"Request failed with status code 401"
);
}
);
test.concurrent(
"should perform LLM extraction",
async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const response = (await app.scrapeUrl("https://mendable.ai", {
extractorOptions: {
mode: "llm-extraction",
extractionPrompt:
"Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
extractionSchema: {
type: "object",
properties: {
company_mission: { type: "string" },
supports_sso: { type: "boolean" },
is_open_source: { type: "boolean" },
},
required: ["company_mission", "supports_sso", "is_open_source"],
},
},
})) as ScrapeResponseV0;
expect(response).not.toBeNull();
expect(response.data?.llm_extraction).toBeDefined();
const llmExtraction = response.data?.llm_extraction;
expect(llmExtraction?.company_mission).toBeDefined();
expect(typeof llmExtraction?.supports_sso).toBe("boolean");
expect(typeof llmExtraction?.is_open_source).toBe("boolean");
},
30000
); // 30 seconds timeout
});

File diff suppressed because one or more lines are too long

View File

@ -1,57 +0,0 @@
import { describe, expect, jest, test } from '@jest/globals';
import FirecrawlApp from '../index';
import axios from 'axios';
import { join } from 'path';
import { readFile } from 'fs/promises';
// Mock jest and set the type
jest.mock('axios');
const mockedAxios = axios as jest.Mocked<typeof axios>;
// Get the fixure data from the JSON file in ./fixtures
async function loadFixture(name: string): Promise<string> {
return await readFile(join(__dirname, 'fixtures', `${name}.json`), 'utf-8')
}
const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
describe('the firecrawl JS SDK', () => {
test('Should require an API key only for cloud service', async () => {
if (API_URL.includes('api.firecrawl.dev')) {
// Should throw for cloud service
expect(() => {
new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
}).toThrow('No API key provided');
} else {
// Should not throw for self-hosted
expect(() => {
new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
}).not.toThrow();
}
});
test('Should return scraped data from a /scrape API call', async () => {
const mockData = await loadFixture('scrape');
mockedAxios.post.mockResolvedValue({
status: 200,
data: JSON.parse(mockData),
});
const apiKey = 'YOUR_API_KEY'
const app = new FirecrawlApp<"v0">({ apiKey });
// Scrape a single URL
const url = 'https://mendable.ai';
const scrapedData = await app.scrapeUrl(url);
expect(mockedAxios.post).toHaveBeenCalledTimes(1);
expect(mockedAxios.post).toHaveBeenCalledWith(
expect.stringMatching(/^https:\/\/api.firecrawl.dev/),
expect.objectContaining({ url }),
expect.objectContaining({ headers: expect.objectContaining({'Authorization': `Bearer ${apiKey}`}) }),
)
expect(scrapedData.success).toBe(true);
expect(scrapedData?.data?.metadata.title).toEqual('Mendable');
});
})

View File

@ -26,7 +26,7 @@ describe('FirecrawlApp E2E Tests', () => {
test.concurrent('should throw error for invalid API key on scrape', async () => { test.concurrent('should throw error for invalid API key on scrape', async () => {
if (API_URL.includes('api.firecrawl.dev')) { if (API_URL.includes('api.firecrawl.dev')) {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 404"); await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 401");
} else { } else {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).resolves.not.toThrow(); await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
@ -36,36 +36,16 @@ describe('FirecrawlApp E2E Tests', () => {
test.concurrent('should throw error for blocklisted URL on scrape', async () => { test.concurrent('should throw error for blocklisted URL on scrape', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const blocklistedUrl = "https://facebook.com/fake-test"; const blocklistedUrl = "https://facebook.com/fake-test";
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("This website is no longer supported");
}); });
test.concurrent('should return successful response with valid preview token', async () => {
const app = new FirecrawlApp({ apiKey: process.env.PREVIEW_TOKEN, apiUrl: API_URL });
const response = await app.scrapeUrl('https://roastmywebsite.ai');
if (!response.success) {
throw new Error(response.error);
}
expect(response).not.toBeNull();
expect(response?.markdown).toContain("_Roast_");
}, 30000); // 30 seconds timeout
test.concurrent('should return successful response for valid scrape', async () => { test.concurrent('should return successful response for valid scrape', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://roastmywebsite.ai'); const response = await app.scrapeUrl('https://roastmywebsite.ai');
if (!response.success) { if (!response.success) {
throw new Error(response.error); throw new Error(response.error);
} }
expect(response).not.toBeNull();
expect(response).not.toHaveProperty('content'); // v0
expect(response).not.toHaveProperty('html');
expect(response).not.toHaveProperty('rawHtml');
expect(response).not.toHaveProperty('screenshot');
expect(response).not.toHaveProperty('links');
expect(response).toHaveProperty('markdown');
expect(response).toHaveProperty('metadata');
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
test.concurrent('should return successful response with valid API key and options', async () => { test.concurrent('should return successful response with valid API key and options', async () => {
@ -80,53 +60,10 @@ describe('FirecrawlApp E2E Tests', () => {
timeout: 30000, timeout: 30000,
waitFor: 1000 waitFor: 1000
}); });
if (!response.success) { if (!response.success) {
throw new Error(response.error); throw new Error(response.error);
} }
expect(response).not.toBeNull();
expect(response).not.toHaveProperty('content'); // v0
expect(response.markdown).toContain("_Roast_");
expect(response.html).toContain("<h1");
expect(response.rawHtml).toContain("<h1");
expect(response.screenshot).not.toBeUndefined();
expect(response.screenshot).not.toBeNull();
expect(response.screenshot).toContain("https://");
expect(response.links).not.toBeNull();
expect(response.links?.length).toBeGreaterThan(0);
expect(response.links?.[0]).toContain("https://");
expect(response.metadata).not.toBeNull();
expect(response.metadata).not.toBeUndefined();
expect(response.metadata).toHaveProperty("title");
expect(response.metadata).toHaveProperty("description");
expect(response.metadata).toHaveProperty("keywords");
expect(response.metadata).toHaveProperty("robots");
expect(response.metadata).toHaveProperty("ogTitle");
expect(response.metadata).toHaveProperty("ogDescription");
expect(response.metadata).toHaveProperty("ogUrl");
expect(response.metadata).toHaveProperty("ogImage");
expect(response.metadata).toHaveProperty("ogLocaleAlternate");
expect(response.metadata).toHaveProperty("ogSiteName");
expect(response.metadata).toHaveProperty("sourceURL");
expect(response.metadata).not.toHaveProperty("pageStatusCode");
expect(response.metadata).toHaveProperty("statusCode");
expect(response.metadata).not.toHaveProperty("pageError");
if (response.metadata !== undefined) {
expect(response.metadata.error).toBeUndefined();
expect(response.metadata.title).toBe("Roast My Website");
expect(response.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
expect(response.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl");
expect(response.metadata.robots).toBe("follow, index");
expect(response.metadata.ogTitle).toBe("Roast My Website");
expect(response.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
expect(response.metadata.ogUrl).toBe("https://www.roastmywebsite.ai");
expect(response.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png");
expect(response.metadata.ogLocaleAlternate).toStrictEqual([]);
expect(response.metadata.ogSiteName).toBe("Roast My Website");
expect(response.metadata.sourceURL).toBe("https://roastmywebsite.ai");
expect(response.metadata.statusCode).toBe(200);
}
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
test.concurrent('should return successful response with valid API key and screenshot fullPage', async () => { test.concurrent('should return successful response with valid API key and screenshot fullPage', async () => {
@ -139,7 +76,6 @@ describe('FirecrawlApp E2E Tests', () => {
throw new Error(response.error); throw new Error(response.error);
} }
expect(response).not.toBeNull();
expect(response.screenshot).not.toBeUndefined(); expect(response.screenshot).not.toBeUndefined();
expect(response.screenshot).not.toBeNull(); expect(response.screenshot).not.toBeNull();
expect(response.screenshot).toContain("https://"); expect(response.screenshot).toContain("https://");
@ -170,7 +106,7 @@ describe('FirecrawlApp E2E Tests', () => {
test.concurrent('should throw error for invalid API key on crawl', async () => { test.concurrent('should throw error for invalid API key on crawl', async () => {
if (API_URL.includes('api.firecrawl.dev')) { if (API_URL.includes('api.firecrawl.dev')) {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404"); await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
} else { } else {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).resolves.not.toThrow(); await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
@ -180,34 +116,10 @@ describe('FirecrawlApp E2E Tests', () => {
test.concurrent('should return successful response for crawl and wait for completion', async () => { test.concurrent('should return successful response for crawl and wait for completion', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.crawlUrl('https://roastmywebsite.ai', {}, 30) as CrawlStatusResponse; const response = await app.crawlUrl('https://roastmywebsite.ai', {}, 30) as CrawlStatusResponse;
expect(response).not.toBeNull();
expect(response).toHaveProperty("total");
expect(response.total).toBeGreaterThan(0);
expect(response).toHaveProperty("creditsUsed");
expect(response.creditsUsed).toBeGreaterThan(0);
expect(response).toHaveProperty("expiresAt");
expect(new Date(response.expiresAt).getTime()).toBeGreaterThan(Date.now());
expect(response).toHaveProperty("status");
expect(response.status).toBe("completed");
expect(response).not.toHaveProperty("next"); // wait until done expect(response).not.toHaveProperty("next"); // wait until done
expect(response.data.length).toBeGreaterThan(0); expect(response.data.length).toBeGreaterThan(0);
expect(response.data[0]).not.toBeNull();
expect(response.data[0]).not.toBeUndefined();
if (response.data[0]) { if (response.data[0]) {
expect(response.data[0]).toHaveProperty("markdown"); expect(response.data[0]).toHaveProperty("markdown");
expect(response.data[0].markdown).toContain("_Roast_");
expect(response.data[0]).not.toHaveProperty('content'); // v0
expect(response.data[0]).not.toHaveProperty("html");
expect(response.data[0]).not.toHaveProperty("rawHtml");
expect(response.data[0]).not.toHaveProperty("screenshot");
expect(response.data[0]).not.toHaveProperty("links");
expect(response.data[0]).toHaveProperty("metadata");
expect(response.data[0].metadata).toHaveProperty("title");
expect(response.data[0].metadata).toHaveProperty("description");
expect(response.data[0].metadata).toHaveProperty("language");
expect(response.data[0].metadata).toHaveProperty("sourceURL");
expect(response.data[0].metadata).toHaveProperty("statusCode");
expect(response.data[0].metadata).not.toHaveProperty("error");
} }
}, 60000); // 60 seconds timeout }, 60000); // 60 seconds timeout
@ -230,39 +142,15 @@ describe('FirecrawlApp E2E Tests', () => {
waitFor: 1000 waitFor: 1000
} }
} as CrawlParams, 30) as CrawlStatusResponse; } as CrawlParams, 30) as CrawlStatusResponse;
expect(response).not.toBeNull();
expect(response).toHaveProperty("total");
expect(response.total).toBeGreaterThan(0);
expect(response).toHaveProperty("creditsUsed");
expect(response.creditsUsed).toBeGreaterThan(0);
expect(response).toHaveProperty("expiresAt");
expect(new Date(response.expiresAt).getTime()).toBeGreaterThan(Date.now());
expect(response).toHaveProperty("status");
expect(response.status).toBe("completed");
expect(response).not.toHaveProperty("next"); expect(response).not.toHaveProperty("next");
expect(response.data.length).toBeGreaterThan(0); expect(response.data.length).toBeGreaterThan(0);
expect(response.data[0]).not.toBeNull();
expect(response.data[0]).not.toBeUndefined();
if (response.data[0]) { if (response.data[0]) {
expect(response.data[0]).toHaveProperty("markdown"); expect(response.data[0]).toHaveProperty("markdown");
expect(response.data[0].markdown).toContain("_Roast_");
expect(response.data[0]).not.toHaveProperty('content'); // v0 expect(response.data[0]).not.toHaveProperty('content'); // v0
expect(response.data[0]).toHaveProperty("html"); expect(response.data[0]).toHaveProperty("html");
expect(response.data[0].html).toContain("<h1");
expect(response.data[0]).toHaveProperty("rawHtml"); expect(response.data[0]).toHaveProperty("rawHtml");
expect(response.data[0].rawHtml).toContain("<h1");
expect(response.data[0]).toHaveProperty("screenshot"); expect(response.data[0]).toHaveProperty("screenshot");
expect(response.data[0].screenshot).toContain("https://");
expect(response.data[0]).toHaveProperty("links"); expect(response.data[0]).toHaveProperty("links");
expect(response.data[0].links).not.toBeNull();
expect(response.data[0].links?.length).toBeGreaterThan(0);
expect(response.data[0]).toHaveProperty("metadata");
expect(response.data[0].metadata).toHaveProperty("title");
expect(response.data[0].metadata).toHaveProperty("description");
expect(response.data[0].metadata).toHaveProperty("language");
expect(response.data[0].metadata).toHaveProperty("sourceURL");
expect(response.data[0].metadata).toHaveProperty("statusCode");
expect(response.data[0].metadata).not.toHaveProperty("error");
} }
}, 60000); // 60 seconds timeout }, 60000); // 60 seconds timeout
@ -278,7 +166,7 @@ describe('FirecrawlApp E2E Tests', () => {
test.concurrent('should check crawl status', async () => { test.concurrent('should check crawl status', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.asyncCrawlUrl('https://firecrawl.dev', { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams) as CrawlResponse; const response = await app.asyncCrawlUrl('https://firecrawl.dev', { limit: 20, scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams) as CrawlResponse;
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response.id).toBeDefined(); expect(response.id).toBeDefined();
@ -314,44 +202,15 @@ describe('FirecrawlApp E2E Tests', () => {
expect(statusResponse).toHaveProperty("total"); expect(statusResponse).toHaveProperty("total");
expect(statusResponse.success).toBe(true); expect(statusResponse.success).toBe(true);
if (statusResponse.success === true) { if (statusResponse.success === true) {
expect(statusResponse.total).toBeGreaterThan(0);
expect(statusResponse).toHaveProperty("creditsUsed");
expect(statusResponse.creditsUsed).toBeGreaterThan(0);
expect(statusResponse).toHaveProperty("expiresAt");
expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
expect(statusResponse).toHaveProperty("status");
expect(statusResponse.status).toBe("completed"); expect(statusResponse.status).toBe("completed");
expect(statusResponse.data.length).toBeGreaterThan(0); expect(statusResponse.data.length).toBeGreaterThan(0);
expect(statusResponse.data[0]).not.toBeNull();
expect(statusResponse.data[0]).not.toBeUndefined();
if (statusResponse.data[0]) {
expect(statusResponse.data[0]).toHaveProperty("markdown");
expect(statusResponse.data[0].markdown?.length).toBeGreaterThan(10);
expect(statusResponse.data[0]).not.toHaveProperty('content'); // v0
expect(statusResponse.data[0]).toHaveProperty("html");
expect(statusResponse.data[0].html).toContain("<div");
expect(statusResponse.data[0]).toHaveProperty("rawHtml");
expect(statusResponse.data[0].rawHtml).toContain("<div");
expect(statusResponse.data[0]).toHaveProperty("screenshot");
expect(statusResponse.data[0].screenshot).toContain("https://");
expect(statusResponse.data[0]).toHaveProperty("links");
expect(statusResponse.data[0].links).not.toBeNull();
expect(statusResponse.data[0].links?.length).toBeGreaterThan(0);
expect(statusResponse.data[0]).toHaveProperty("metadata");
expect(statusResponse.data[0].metadata).toHaveProperty("title");
expect(statusResponse.data[0].metadata).toHaveProperty("description");
expect(statusResponse.data[0].metadata).toHaveProperty("language");
expect(statusResponse.data[0].metadata).toHaveProperty("sourceURL");
expect(statusResponse.data[0].metadata).toHaveProperty("statusCode");
expect(statusResponse.data[0].metadata).not.toHaveProperty("error");
}
} }
}, 60000); // 60 seconds timeout }, 60000); // 60 seconds timeout
test.concurrent('should throw error for invalid API key on map', async () => { test.concurrent('should throw error for invalid API key on map', async () => {
if (API_URL.includes('api.firecrawl.dev')) { if (API_URL.includes('api.firecrawl.dev')) {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404"); await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
} else { } else {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).resolves.not.toThrow(); await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
@ -361,16 +220,9 @@ describe('FirecrawlApp E2E Tests', () => {
test.concurrent('should throw error for blocklisted URL on map', async () => { test.concurrent('should throw error for blocklisted URL on map', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const blocklistedUrl = "https://facebook.com/fake-test"; const blocklistedUrl = "https://facebook.com/fake-test";
await expect(app.mapUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); await expect(app.mapUrl(blocklistedUrl)).rejects.toThrow("403");
}); });
test.concurrent('should return successful response with valid preview token', async () => {
const app = new FirecrawlApp({ apiKey: process.env.PREVIEW_TOKEN, apiUrl: API_URL });
const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
expect(response).not.toBeNull();
expect(response.links?.length).toBeGreaterThan(0);
}, 30000); // 30 seconds timeout
test.concurrent('should return successful response for valid map', async () => { test.concurrent('should return successful response for valid map', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse; const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
expect(response).not.toBeNull(); expect(response).not.toBeNull();
@ -387,13 +239,11 @@ describe('FirecrawlApp E2E Tests', () => {
const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: TEST_API_KEY }); const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: TEST_API_KEY });
const response = await app.search("firecrawl"); const response = await app.search("firecrawl");
expect(response.success).toBe(true); expect(response.success).toBe(true);
console.log(response.data);
expect(response.data?.length).toBeGreaterThan(0); expect(response.data?.length).toBeGreaterThan(0);
expect(response.data?.[0]?.markdown).toBeDefined(); expect(response.data?.[0]?.markdown).not.toBeDefined();
expect(response.data?.[0]?.metadata).toBeDefined(); expect(response.data?.[0]?.title).toBeDefined();
expect(response.data?.[0]?.metadata?.title).toBeDefined(); expect(response.data?.[0]?.description).toBeDefined();
expect(response.data?.[0]?.metadata?.description).toBeDefined(); }, 30000); // 30 seconds timeout
});
test('should search with params object', async () => { test('should search with params object', async () => {
const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: TEST_API_KEY }); const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: TEST_API_KEY });
@ -412,14 +262,13 @@ describe('FirecrawlApp E2E Tests', () => {
expect(doc.markdown).toBeDefined(); expect(doc.markdown).toBeDefined();
expect(doc.html).toBeDefined(); expect(doc.html).toBeDefined();
expect(doc.links).toBeDefined(); expect(doc.links).toBeDefined();
expect(doc.metadata).toBeDefined(); expect(doc.title).toBeDefined();
expect(doc.metadata?.title).toBeDefined(); expect(doc.description).toBeDefined();
expect(doc.metadata?.description).toBeDefined();
} }
}); }, 30000); // 30 seconds timeout
test('should handle invalid API key for search', async () => { test('should handle invalid API key for search', async () => {
const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: "invalid_api_key" }); const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: "invalid_api_key" });
await expect(app.search("test query")).rejects.toThrow("Request failed with status code 404"); await expect(app.search("test query")).rejects.toThrow("Request failed with status code 401");
}); });
}); });