firecrawl/apps/api/src/lib/extract/helpers/mix-schema-objs.ts
Nicolas 5e5b5ee0e2
(feat/extract) New re-ranker + multi entity extraction (#1061)
* agent that decides if splits schema or not

* split and merge properties done

* wip

* wip

* changes

* ch

* array merge working!

* comment

* wip

* dereferentiate schema

* dereference schemas

* Nick: new re-ranker

* Create llm-links.txt

* Nick: format

* Update extraction-service.ts

* wip: cooking schema mix and spread functions

* wip

* wip getting there!!!

* nick:

* moved functions to helpers

* nick:

* cant reproduce the error anymore

* error handling all scrapes failed

* fix

* Nick: added the sitemap index

* Update sitemap-index.ts

* Update map.ts

* deduplicate and merge arrays

* added error handler for object transformations

* Update url-processor.ts

* Nick:

* Nick: fixes

* Nick: big improvements to rerank of multi-entity

* Nick: working

* Update reranker.ts

* fixed transformations for nested objs

* fix merge nulls

* Nick: fixed error piping

* Update queue-worker.ts

* Update extraction-service.ts

* Nick: format

* Update queue-worker.ts

* Update pnpm-lock.yaml

* Update queue-worker.ts

---------

Co-authored-by: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com>
Co-authored-by: Thomas Kosmas <thomas510111@gmail.com>
2025-01-13 22:30:15 -03:00

35 lines
1.3 KiB
TypeScript

export async function mixSchemaObjects(
finalSchema: any,
singleAnswerResult: any,
multiEntityResult: any
) {
const finalResult: any = {};
// Recursive helper function to merge results based on schema
function mergeResults(schema: any, singleResult: any, multiResult: any) {
const result: any = {};
for (const key in schema.properties) {
if (schema.properties[key].type === 'object' && schema.properties[key].properties) {
// If the property is an object, recursively merge its properties
result[key] = mergeResults(
schema.properties[key],
singleResult[key] || {},
multiResult[key] || {}
);
} else if (schema.properties[key].type === 'array' && Array.isArray(multiResult[key])) {
// If the property is an array, flatten the arrays from multiResult
result[key] = multiResult[key].flat();
} else if (singleResult.hasOwnProperty(key)) {
result[key] = singleResult[key];
} else if (multiResult.hasOwnProperty(key)) {
result[key] = multiResult[key];
}
}
return result;
}
// Merge the properties from the final schema
Object.assign(finalResult, mergeResults(finalSchema, singleAnswerResult, multiEntityResult));
return finalResult;
}