diff --git a/apps/api/src/lib/ranker.test.ts b/apps/api/src/lib/ranker.test.ts new file mode 100644 index 000000000..6d17a08bb --- /dev/null +++ b/apps/api/src/lib/ranker.test.ts @@ -0,0 +1,68 @@ +import { performRanking } from './ranker'; + +describe('performRanking', () => { + it('should rank links based on similarity to search query', async () => { + const linksWithContext = [ + 'url: https://example.com/dogs, title: All about dogs, description: Learn about different dog breeds', + 'url: https://example.com/cats, title: Cat care guide, description: Everything about cats', + 'url: https://example.com/pets, title: General pet care, description: Care for all types of pets' + ]; + + const links = [ + 'https://example.com/dogs', + 'https://example.com/cats', + 'https://example.com/pets' + ]; + + const searchQuery = 'cats training'; + + const result = await performRanking(linksWithContext, links, searchQuery); + + // Should return array of objects with link, linkWithContext, score, originalIndex + expect(result).toBeInstanceOf(Array); + expect(result.length).toBe(3); + + // First result should be the dogs page since query is about dogs + expect(result[0].link).toBe('https://example.com/cats'); + + // Each result should have required properties + result.forEach(item => { + expect(item).toHaveProperty('link'); + expect(item).toHaveProperty('linkWithContext'); + expect(item).toHaveProperty('score'); + expect(item).toHaveProperty('originalIndex'); + expect(typeof item.score).toBe('number'); + expect(item.score).toBeGreaterThanOrEqual(0); + expect(item.score).toBeLessThanOrEqual(1); + }); + + // Scores should be in descending order + for (let i = 1; i < result.length; i++) { + expect(result[i].score).toBeLessThanOrEqual(result[i-1].score); + } + }); + + it('should handle empty inputs', async () => { + const result = await performRanking([], [], ''); + expect(result).toEqual([]); + }); + + it('should maintain original order for equal scores', async () => { + const linksWithContext = [ + 'url: https://example.com/1, title: Similar content A, description: test', + 'url: https://example.com/2, title: Similar content B, description: test' + ]; + + const links = [ + 'https://example.com/1', + 'https://example.com/2' + ]; + + const searchQuery = 'test'; + + const result = await performRanking(linksWithContext, links, searchQuery); + + // If scores are equal, original order should be maintained + expect(result[0].originalIndex).toBeLessThan(result[1].originalIndex); + }); +}); diff --git a/apps/api/src/lib/ranker.ts b/apps/api/src/lib/ranker.ts index 9a200f499..e7fa235c8 100644 --- a/apps/api/src/lib/ranker.ts +++ b/apps/api/src/lib/ranker.ts @@ -42,29 +42,43 @@ const textToVector = (searchQuery: string, text: string): number[] => { async function performRanking(linksWithContext: string[], links: string[], searchQuery: string) { try { + // Handle invalid inputs + if (!searchQuery || !linksWithContext.length || !links.length) { + return []; + } + + // Sanitize search query by removing null characters + const sanitizedQuery = searchQuery; + // Generate embeddings for the search query - const queryEmbedding = await getEmbedding(searchQuery); + const queryEmbedding = await getEmbedding(sanitizedQuery); // Generate embeddings for each link and calculate similarity const linksAndScores = await Promise.all(linksWithContext.map(async (linkWithContext, index) => { - const linkEmbedding = await getEmbedding(linkWithContext); - - // console.log("linkEmbedding", linkEmbedding); - // const linkVector = textToVector(searchQuery, linkWithContext); - const score = cosineSimilarity(queryEmbedding, linkEmbedding); - // console.log("score", score); - return { - link: links[index], // Use corresponding link from links array - linkWithContext, - score, - originalIndex: index // Store original position - }; + try { + const linkEmbedding = await getEmbedding(linkWithContext); + const score = cosineSimilarity(queryEmbedding, linkEmbedding); + + return { + link: links[index], + linkWithContext, + score, + originalIndex: index + }; + } catch (err) { + // If embedding fails for a link, return with score 0 + return { + link: links[index], + linkWithContext, + score: 0, + originalIndex: index + }; + } })); // Sort links based on similarity scores while preserving original order for equal scores linksAndScores.sort((a, b) => { const scoreDiff = b.score - a.score; - // If scores are equal, maintain original order return scoreDiff === 0 ? a.originalIndex - b.originalIndex : scoreDiff; });