mirror of
https://github.com/HKUDS/LightRAG.git
synced 2025-11-09 14:23:48 +00:00
fix: Deduplicate entities and relationships in a single chunk with multiple gleaning results during KG rebuild
This commit is contained in:
parent
70e154b0aa
commit
6b6d14bc3a
@ -284,6 +284,7 @@ async def _rebuild_knowledge_from_chunks(
|
|||||||
pipeline_status["history_messages"].append(status_message)
|
pipeline_status["history_messages"].append(status_message)
|
||||||
|
|
||||||
# Get cached extraction results for these chunks using storage
|
# Get cached extraction results for these chunks using storage
|
||||||
|
# cached_results: chunk_id -> [list of extraction result from LLM cache sorted by created_at]
|
||||||
cached_results = await _get_cached_extraction_results(
|
cached_results = await _get_cached_extraction_results(
|
||||||
llm_response_cache,
|
llm_response_cache,
|
||||||
all_referenced_chunk_ids,
|
all_referenced_chunk_ids,
|
||||||
@ -309,6 +310,7 @@ async def _rebuild_knowledge_from_chunks(
|
|||||||
chunk_entities[chunk_id] = defaultdict(list)
|
chunk_entities[chunk_id] = defaultdict(list)
|
||||||
chunk_relationships[chunk_id] = defaultdict(list)
|
chunk_relationships[chunk_id] = defaultdict(list)
|
||||||
|
|
||||||
|
# process multiple LLM extraction results for a single chunk_id
|
||||||
for extraction_result in extraction_results:
|
for extraction_result in extraction_results:
|
||||||
entities, relationships = await _parse_extraction_result(
|
entities, relationships = await _parse_extraction_result(
|
||||||
text_chunks_storage=text_chunks_storage,
|
text_chunks_storage=text_chunks_storage,
|
||||||
@ -317,9 +319,20 @@ async def _rebuild_knowledge_from_chunks(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Merge entities and relationships from this extraction result
|
# Merge entities and relationships from this extraction result
|
||||||
|
# Only keep the first occurrence of each entity_name in the same chunk_id
|
||||||
for entity_name, entity_list in entities.items():
|
for entity_name, entity_list in entities.items():
|
||||||
|
if (
|
||||||
|
entity_name not in chunk_entities[chunk_id]
|
||||||
|
or len(chunk_entities[chunk_id][entity_name]) == 0
|
||||||
|
):
|
||||||
chunk_entities[chunk_id][entity_name].extend(entity_list)
|
chunk_entities[chunk_id][entity_name].extend(entity_list)
|
||||||
|
|
||||||
|
# Only keep the first occurrence of each rel_key in the same chunk_id
|
||||||
for rel_key, rel_list in relationships.items():
|
for rel_key, rel_list in relationships.items():
|
||||||
|
if (
|
||||||
|
rel_key not in chunk_relationships[chunk_id]
|
||||||
|
or len(chunk_relationships[chunk_id][rel_key]) == 0
|
||||||
|
):
|
||||||
chunk_relationships[chunk_id][rel_key].extend(rel_list)
|
chunk_relationships[chunk_id][rel_key].extend(rel_list)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user