feat: enhance query performance by restructuring relationships, entities, and chunks retrieval in PostgreSQL.

Fixed: duplicate items query
This commit is contained in:
Matt23-star 2025-08-16 22:49:54 +08:00
parent 6a7e3092ea
commit a0593ec1c9

View File

@ -4578,50 +4578,86 @@ SQL_TEMPLATES = {
update_time = EXCLUDED.update_time update_time = EXCLUDED.update_time
""", """,
"relationships": """ "relationships": """
WITH relevant_chunks AS ( WITH relevant_chunks AS (SELECT id as chunk_id
SELECT id as chunk_id FROM LIGHTRAG_VDB_CHUNKS
FROM LIGHTRAG_VDB_CHUNKS WHERE $2
WHERE $2::varchar[] IS NULL OR full_doc_id = ANY($2::varchar[]) :: varchar [] IS NULL OR full_doc_id = ANY ($2:: varchar [])
) )
SELECT r.source_id as src_id, r.target_id as tgt_id, , rc AS (
EXTRACT(EPOCH FROM r.create_time)::BIGINT as created_at SELECT array_agg(chunk_id) AS chunk_arr
FROM LIGHTRAG_VDB_RELATION r FROM relevant_chunks
JOIN relevant_chunks c ON c.chunk_id = ANY(r.chunk_ids) ), cand AS (
WHERE r.workspace = $1 SELECT
AND r.content_vector <=> '[{embedding_string}]'::vector < $3 r.id, r.source_id AS src_id, r.target_id AS tgt_id, r.chunk_ids, r.create_time, r.content_vector <=> '[{embedding_string}]'::vector AS dist
ORDER BY r.content_vector <=> '[{embedding_string}]'::vector FROM LIGHTRAG_VDB_RELATION r
LIMIT $4 WHERE r.workspace = $1
""", ORDER BY r.content_vector <=> '[{embedding_string}]'::vector
LIMIT ($4 * 50)
)
SELECT c.src_id,
c.tgt_id,
EXTRACT(EPOCH FROM c.create_time) ::BIGINT AS created_at
FROM cand c
JOIN rc ON TRUE
WHERE c.dist < $3
AND c.chunk_ids && (rc.chunk_arr::varchar[])
ORDER BY c.dist, c.id
LIMIT $4;
""",
"entities": """ "entities": """
WITH relevant_chunks AS ( WITH relevant_chunks AS (SELECT id as chunk_id
SELECT id as chunk_id FROM LIGHTRAG_VDB_CHUNKS
FROM LIGHTRAG_VDB_CHUNKS WHERE $2
WHERE $2::varchar[] IS NULL OR full_doc_id = ANY($2::varchar[]) :: varchar [] IS NULL OR full_doc_id = ANY ($2:: varchar [])
) )
SELECT e.entity_name, , rc AS (
EXTRACT(EPOCH FROM e.create_time)::BIGINT as created_at SELECT array_agg(chunk_id) AS chunk_arr
FROM LIGHTRAG_VDB_ENTITY e FROM relevant_chunks
JOIN relevant_chunks c ON c.chunk_id = ANY(e.chunk_ids) ), cand AS (
WHERE e.workspace = $1 SELECT
AND e.content_vector <=> '[{embedding_string}]'::vector < $3 e.id, e.entity_name, e.chunk_ids, e.create_time, e.content_vector <=> '[{embedding_string}]'::vector AS dist
ORDER BY e.content_vector <=> '[{embedding_string}]'::vector FROM LIGHTRAG_VDB_ENTITY e
LIMIT $4 WHERE e.workspace = $1
""", ORDER BY e.content_vector <=> '[{embedding_string}]'::vector
LIMIT ($4 * 50)
)
SELECT c.entity_name,
EXTRACT(EPOCH FROM c.create_time) ::BIGINT AS created_at
FROM cand c
JOIN rc ON TRUE
WHERE c.dist < $3
AND c.chunk_ids && (rc.chunk_arr::varchar[])
ORDER BY c.dist, c.id
LIMIT $4;
""",
"chunks": """ "chunks": """
WITH relevant_chunks AS ( WITH relevant_chunks AS (SELECT id as chunk_id
SELECT id as chunk_id FROM LIGHTRAG_VDB_CHUNKS
FROM LIGHTRAG_VDB_CHUNKS WHERE $2
WHERE $2::varchar[] IS NULL OR full_doc_id = ANY($2::varchar[]) :: varchar [] IS NULL OR full_doc_id = ANY ($2:: varchar [])
) )
SELECT id, content, file_path, , rc AS (
EXTRACT(EPOCH FROM create_time)::BIGINT as created_at SELECT array_agg(chunk_id) AS chunk_arr
FROM LIGHTRAG_VDB_CHUNKS FROM relevant_chunks
WHERE workspace = $1 ), cand AS (
AND id IN (SELECT chunk_id FROM relevant_chunks) SELECT
AND content_vector <=> '[{embedding_string}]'::vector < $3 id, content, file_path, create_time, content_vector <=> '[{embedding_string}]'::vector AS dist
ORDER BY content_vector <=> '[{embedding_string}]'::vector FROM LIGHTRAG_VDB_CHUNKS
LIMIT $4 WHERE workspace = $1
""", ORDER BY content_vector <=> '[{embedding_string}]'::vector
LIMIT ($4 * 50)
)
SELECT c.id,
c.content,
c.file_path,
EXTRACT(EPOCH FROM c.create_time) ::BIGINT AS created_at
FROM cand c
JOIN rc ON TRUE
WHERE c.dist < $3
AND c.id = ANY (rc.chunk_arr)
ORDER BY c.dist, c.id
LIMIT $4;
""",
# DROP tables # DROP tables
"drop_specifiy_table_workspace": """ "drop_specifiy_table_workspace": """
DELETE FROM {table_name} WHERE workspace=$1 DELETE FROM {table_name} WHERE workspace=$1