diff --git a/apps/api/requests.http b/apps/api/requests.http index 0e3b92066..8aa3788db 100644 --- a/apps/api/requests.http +++ b/apps/api/requests.http @@ -70,8 +70,8 @@ content-type: application/json "urls": ["firecrawl.dev"], "prompt": "What is the title, description and main product of the page?", "schema": { - "title": "string", - "description": "string", - "mainProduct": "string" + "title": { "type": "string" }, + "description": { "type": "string" }, + "mainProduct": { "type": "string" } } -} \ No newline at end of file +} diff --git a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.test.ts b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.test.ts new file mode 100644 index 000000000..f23f506f0 --- /dev/null +++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.test.ts @@ -0,0 +1,33 @@ +import { removeDefaultProperty } from "./llmExtract"; + +describe("removeDefaultProperty", () => { + it("should remove the default property from a simple object", () => { + const input = { default: "test", test: "test" }; + const expectedOutput = { test: "test" }; + expect(removeDefaultProperty(input)).toEqual(expectedOutput); + }); + + it("should remove the default property from a nested object", () => { + const input = { default: "test", nested: { default: "nestedTest", test: "nestedTest" } }; + const expectedOutput = { nested: { test: "nestedTest" } }; + expect(removeDefaultProperty(input)).toEqual(expectedOutput); + }); + + it("should remove the default property from an array of objects", () => { + const input = { array: [{ default: "test1", test: "test1" }, { default: "test2", test: "test2" }] }; + const expectedOutput = { array: [{ test: "test1" }, { test: "test2" }] }; + expect(removeDefaultProperty(input)).toEqual(expectedOutput); + }); + + it("should handle objects without a default property", () => { + const input = { test: "test" }; + const expectedOutput = { test: "test" }; + expect(removeDefaultProperty(input)).toEqual(expectedOutput); + }); + + it("should handle null and non-object inputs", () => { + expect(removeDefaultProperty(null)).toBeNull(); + expect(removeDefaultProperty("string")).toBe("string"); + expect(removeDefaultProperty(123)).toBe(123); + }); +}); \ No newline at end of file diff --git a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts index 759f87e2e..0b4d6e1e6 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts @@ -121,6 +121,10 @@ export async function generateOpenAICompletions( } let schema = options.schema; + if (schema) { + schema = removeDefaultProperty(schema); +} + if (schema && schema.type === "array") { schema = { type: "object", @@ -134,10 +138,12 @@ export async function generateOpenAICompletions( schema = { type: "object", properties: Object.fromEntries( - Object.entries(schema).map(([key, value]) => [key, { type: value }]), + Object.entries(schema).map(([key, value]) => { + return [key, removeDefaultProperty(value)]; + }) ), required: Object.keys(schema), - additionalProperties: false, + additionalProperties: false }; } @@ -232,3 +238,19 @@ export async function performLLMExtract( return document; } + +export function removeDefaultProperty(schema: any): any { + if (typeof schema !== 'object' || schema === null) return schema; + + const { default: _, ...rest } = schema; + + for (const key in rest) { + if (Array.isArray(rest[key])) { + rest[key] = rest[key].map((item: any) => removeDefaultProperty(item)); + } else if (typeof rest[key] === 'object' && rest[key] !== null) { + rest[key] = removeDefaultProperty(rest[key]); + } + } + + return rest; +} \ No newline at end of file