mirror of
https://github.com/HKUDS/LightRAG.git
synced 2025-07-04 07:26:17 +00:00
Merge pull request #811 from da-luggas/main
Fixed broken ainsert_custom_kg()
This commit is contained in:
commit
6f95ad92bf
@ -461,14 +461,22 @@ custom_kg = {
|
|||||||
{
|
{
|
||||||
"content": "ProductX, developed by CompanyA, has revolutionized the market with its cutting-edge features.",
|
"content": "ProductX, developed by CompanyA, has revolutionized the market with its cutting-edge features.",
|
||||||
"source_id": "Source1",
|
"source_id": "Source1",
|
||||||
|
"chunk_order_index": 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content": "One outstanding feature of ProductX is its advanced AI capabilities.",
|
||||||
|
"source_id": "Source1",
|
||||||
|
"chunk_order_index": 1,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"content": "PersonA is a prominent researcher at UniversityB, focusing on artificial intelligence and machine learning.",
|
"content": "PersonA is a prominent researcher at UniversityB, focusing on artificial intelligence and machine learning.",
|
||||||
"source_id": "Source2",
|
"source_id": "Source2",
|
||||||
|
"chunk_order_index": 0,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"content": "None",
|
"content": "None",
|
||||||
"source_id": "UNKNOWN",
|
"source_id": "UNKNOWN",
|
||||||
|
"chunk_order_index": 0,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
@ -87,18 +87,27 @@ custom_kg = {
|
|||||||
{
|
{
|
||||||
"content": "ProductX, developed by CompanyA, has revolutionized the market with its cutting-edge features.",
|
"content": "ProductX, developed by CompanyA, has revolutionized the market with its cutting-edge features.",
|
||||||
"source_id": "Source1",
|
"source_id": "Source1",
|
||||||
|
"source_chunk_index": 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content": "One outstanding feature of ProductX is its advanced AI capabilities.",
|
||||||
|
"source_id": "Source1",
|
||||||
|
"chunk_order_index": 1,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"content": "PersonA is a prominent researcher at UniversityB, focusing on artificial intelligence and machine learning.",
|
"content": "PersonA is a prominent researcher at UniversityB, focusing on artificial intelligence and machine learning.",
|
||||||
"source_id": "Source2",
|
"source_id": "Source2",
|
||||||
|
"source_chunk_index": 0,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"content": "EventY, held in CityC, attracts technology enthusiasts and companies from around the globe.",
|
"content": "EventY, held in CityC, attracts technology enthusiasts and companies from around the globe.",
|
||||||
"source_id": "Source3",
|
"source_id": "Source3",
|
||||||
|
"source_chunk_index": 0,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"content": "None",
|
"content": "None",
|
||||||
"source_id": "UNKNOWN",
|
"source_id": "UNKNOWN",
|
||||||
|
"source_chunk_index": 0,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
@ -37,6 +37,7 @@ from .utils import (
|
|||||||
limit_async_func_call,
|
limit_async_func_call,
|
||||||
logger,
|
logger,
|
||||||
set_logger,
|
set_logger,
|
||||||
|
encode_string_by_tiktoken,
|
||||||
)
|
)
|
||||||
from .types import KnowledgeGraph
|
from .types import KnowledgeGraph
|
||||||
|
|
||||||
@ -926,11 +927,28 @@ class LightRAG:
|
|||||||
all_chunks_data: dict[str, dict[str, str]] = {}
|
all_chunks_data: dict[str, dict[str, str]] = {}
|
||||||
chunk_to_source_map: dict[str, str] = {}
|
chunk_to_source_map: dict[str, str] = {}
|
||||||
for chunk_data in custom_kg.get("chunks", {}):
|
for chunk_data in custom_kg.get("chunks", {}):
|
||||||
chunk_content = chunk_data["content"]
|
chunk_content = chunk_data["content"].strip()
|
||||||
source_id = chunk_data["source_id"]
|
source_id = chunk_data["source_id"]
|
||||||
chunk_id = compute_mdhash_id(chunk_content.strip(), prefix="chunk-")
|
tokens = len(
|
||||||
|
encode_string_by_tiktoken(
|
||||||
|
chunk_content, model_name=self.tiktoken_model_name
|
||||||
|
)
|
||||||
|
)
|
||||||
|
chunk_order_index = (
|
||||||
|
0
|
||||||
|
if "chunk_order_index" not in chunk_data.keys()
|
||||||
|
else chunk_data["chunk_order_index"]
|
||||||
|
)
|
||||||
|
chunk_id = compute_mdhash_id(chunk_content, prefix="chunk-")
|
||||||
|
|
||||||
chunk_entry = {"content": chunk_content.strip(), "source_id": source_id}
|
chunk_entry = {
|
||||||
|
"content": chunk_content,
|
||||||
|
"source_id": source_id,
|
||||||
|
"tokens": tokens,
|
||||||
|
"chunk_order_index": chunk_order_index,
|
||||||
|
"full_doc_id": source_id,
|
||||||
|
"status": DocStatus.PROCESSED,
|
||||||
|
}
|
||||||
all_chunks_data[chunk_id] = chunk_entry
|
all_chunks_data[chunk_id] = chunk_entry
|
||||||
chunk_to_source_map[source_id] = chunk_id
|
chunk_to_source_map[source_id] = chunk_id
|
||||||
update_storage = True
|
update_storage = True
|
||||||
|
Loading…
x
Reference in New Issue
Block a user