Merge pull request #811 from da-luggas/main

Fixed broken ainsert_custom_kg()
This commit is contained in:
Yannick Stephan 2025-02-19 15:20:37 +01:00 committed by GitHub
commit 6f95ad92bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 38 additions and 3 deletions

View File

@ -461,14 +461,22 @@ custom_kg = {
{
"content": "ProductX, developed by CompanyA, has revolutionized the market with its cutting-edge features.",
"source_id": "Source1",
"chunk_order_index": 0,
},
{
"content": "One outstanding feature of ProductX is its advanced AI capabilities.",
"source_id": "Source1",
"chunk_order_index": 1,
},
{
"content": "PersonA is a prominent researcher at UniversityB, focusing on artificial intelligence and machine learning.",
"source_id": "Source2",
"chunk_order_index": 0,
},
{
"content": "None",
"source_id": "UNKNOWN",
"chunk_order_index": 0,
},
],
}

View File

@ -87,18 +87,27 @@ custom_kg = {
{
"content": "ProductX, developed by CompanyA, has revolutionized the market with its cutting-edge features.",
"source_id": "Source1",
"source_chunk_index": 0,
},
{
"content": "One outstanding feature of ProductX is its advanced AI capabilities.",
"source_id": "Source1",
"chunk_order_index": 1,
},
{
"content": "PersonA is a prominent researcher at UniversityB, focusing on artificial intelligence and machine learning.",
"source_id": "Source2",
"source_chunk_index": 0,
},
{
"content": "EventY, held in CityC, attracts technology enthusiasts and companies from around the globe.",
"source_id": "Source3",
"source_chunk_index": 0,
},
{
"content": "None",
"source_id": "UNKNOWN",
"source_chunk_index": 0,
},
],
}

View File

@ -37,6 +37,7 @@ from .utils import (
limit_async_func_call,
logger,
set_logger,
encode_string_by_tiktoken,
)
from .types import KnowledgeGraph
@ -926,11 +927,28 @@ class LightRAG:
all_chunks_data: dict[str, dict[str, str]] = {}
chunk_to_source_map: dict[str, str] = {}
for chunk_data in custom_kg.get("chunks", {}):
chunk_content = chunk_data["content"]
chunk_content = chunk_data["content"].strip()
source_id = chunk_data["source_id"]
chunk_id = compute_mdhash_id(chunk_content.strip(), prefix="chunk-")
tokens = len(
encode_string_by_tiktoken(
chunk_content, model_name=self.tiktoken_model_name
)
)
chunk_order_index = (
0
if "chunk_order_index" not in chunk_data.keys()
else chunk_data["chunk_order_index"]
)
chunk_id = compute_mdhash_id(chunk_content, prefix="chunk-")
chunk_entry = {"content": chunk_content.strip(), "source_id": source_id}
chunk_entry = {
"content": chunk_content,
"source_id": source_id,
"tokens": tokens,
"chunk_order_index": chunk_order_index,
"full_doc_id": source_id,
"status": DocStatus.PROCESSED,
}
all_chunks_data[chunk_id] = chunk_entry
chunk_to_source_map[source_id] = chunk_id
update_storage = True