mirror of
https://github.com/HKUDS/LightRAG.git
synced 2025-06-26 22:00:19 +00:00
Update insert_custom_kg
This commit is contained in:
parent
5385616e7e
commit
40b10e8fcf
21
README.md
21
README.md
@ -903,7 +903,8 @@ custom_kg = {
|
||||
"chunks": [
|
||||
{
|
||||
"content": "Alice and Bob are collaborating on quantum computing research.",
|
||||
"source_id": "doc-1"
|
||||
"source_id": "doc-1",
|
||||
"file_path": "test_file",
|
||||
}
|
||||
],
|
||||
"entities": [
|
||||
@ -911,19 +912,22 @@ custom_kg = {
|
||||
"entity_name": "Alice",
|
||||
"entity_type": "person",
|
||||
"description": "Alice is a researcher specializing in quantum physics.",
|
||||
"source_id": "doc-1"
|
||||
"source_id": "doc-1",
|
||||
"file_path": "test_file"
|
||||
},
|
||||
{
|
||||
"entity_name": "Bob",
|
||||
"entity_type": "person",
|
||||
"description": "Bob is a mathematician.",
|
||||
"source_id": "doc-1"
|
||||
"source_id": "doc-1",
|
||||
"file_path": "test_file"
|
||||
},
|
||||
{
|
||||
"entity_name": "Quantum Computing",
|
||||
"entity_type": "technology",
|
||||
"description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
|
||||
"source_id": "doc-1"
|
||||
"source_id": "doc-1",
|
||||
"file_path": "test_file"
|
||||
}
|
||||
],
|
||||
"relationships": [
|
||||
@ -933,7 +937,8 @@ custom_kg = {
|
||||
"description": "Alice and Bob are research partners.",
|
||||
"keywords": "collaboration research",
|
||||
"weight": 1.0,
|
||||
"source_id": "doc-1"
|
||||
"source_id": "doc-1",
|
||||
"file_path": "test_file"
|
||||
},
|
||||
{
|
||||
"src_id": "Alice",
|
||||
@ -941,7 +946,8 @@ custom_kg = {
|
||||
"description": "Alice conducts research on quantum computing.",
|
||||
"keywords": "research expertise",
|
||||
"weight": 1.0,
|
||||
"source_id": "doc-1"
|
||||
"source_id": "doc-1",
|
||||
"file_path": "test_file"
|
||||
},
|
||||
{
|
||||
"src_id": "Bob",
|
||||
@ -949,7 +955,8 @@ custom_kg = {
|
||||
"description": "Bob researches quantum computing.",
|
||||
"keywords": "research application",
|
||||
"weight": 1.0,
|
||||
"source_id": "doc-1"
|
||||
"source_id": "doc-1",
|
||||
"file_path": "test_file"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ import traceback
|
||||
import asyncio
|
||||
import configparser
|
||||
import os
|
||||
import time
|
||||
import warnings
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
@ -1235,7 +1236,6 @@ class LightRAG:
|
||||
self,
|
||||
custom_kg: dict[str, Any],
|
||||
full_doc_id: str = None,
|
||||
file_path: str = "custom_kg",
|
||||
) -> None:
|
||||
update_storage = False
|
||||
try:
|
||||
@ -1245,6 +1245,7 @@ class LightRAG:
|
||||
for chunk_data in custom_kg.get("chunks", []):
|
||||
chunk_content = clean_text(chunk_data["content"])
|
||||
source_id = chunk_data["source_id"]
|
||||
file_path = chunk_data.get("file_path", "custom_kg")
|
||||
tokens = len(self.tokenizer.encode(chunk_content))
|
||||
chunk_order_index = (
|
||||
0
|
||||
@ -1261,7 +1262,7 @@ class LightRAG:
|
||||
"full_doc_id": full_doc_id
|
||||
if full_doc_id is not None
|
||||
else source_id,
|
||||
"file_path": file_path, # Add file path
|
||||
"file_path": file_path,
|
||||
"status": DocStatus.PROCESSED,
|
||||
}
|
||||
all_chunks_data[chunk_id] = chunk_entry
|
||||
@ -1282,6 +1283,7 @@ class LightRAG:
|
||||
description = entity_data.get("description", "No description provided")
|
||||
source_chunk_id = entity_data.get("source_id", "UNKNOWN")
|
||||
source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
|
||||
file_path = entity_data.get("file_path", "custom_kg")
|
||||
|
||||
# Log if source_id is UNKNOWN
|
||||
if source_id == "UNKNOWN":
|
||||
@ -1296,6 +1298,7 @@ class LightRAG:
|
||||
"description": description,
|
||||
"source_id": source_id,
|
||||
"file_path": file_path,
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
# Insert node data into the knowledge graph
|
||||
await self.chunk_entity_relation_graph.upsert_node(
|
||||
@ -1315,6 +1318,7 @@ class LightRAG:
|
||||
weight = relationship_data.get("weight", 1.0)
|
||||
source_chunk_id = relationship_data.get("source_id", "UNKNOWN")
|
||||
source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
|
||||
file_path = relationship_data.get("file_path", "custom_kg")
|
||||
|
||||
# Log if source_id is UNKNOWN
|
||||
if source_id == "UNKNOWN":
|
||||
@ -1334,6 +1338,8 @@ class LightRAG:
|
||||
"source_id": source_id,
|
||||
"description": "UNKNOWN",
|
||||
"entity_type": "UNKNOWN",
|
||||
"file_path": file_path,
|
||||
"created_at": int(time.time()),
|
||||
},
|
||||
)
|
||||
|
||||
@ -1346,8 +1352,11 @@ class LightRAG:
|
||||
"description": description,
|
||||
"keywords": keywords,
|
||||
"source_id": source_id,
|
||||
"file_path": file_path,
|
||||
"created_at": int(time.time()),
|
||||
},
|
||||
)
|
||||
|
||||
edge_data: dict[str, str] = {
|
||||
"src_id": src_id,
|
||||
"tgt_id": tgt_id,
|
||||
@ -1355,6 +1364,8 @@ class LightRAG:
|
||||
"keywords": keywords,
|
||||
"source_id": source_id,
|
||||
"weight": weight,
|
||||
"file_path": file_path,
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
all_relationships_data.append(edge_data)
|
||||
update_storage = True
|
||||
@ -1367,7 +1378,7 @@ class LightRAG:
|
||||
"source_id": dp["source_id"],
|
||||
"description": dp["description"],
|
||||
"entity_type": dp["entity_type"],
|
||||
"file_path": file_path, # Add file path
|
||||
"file_path": dp.get("file_path", "custom_kg"),
|
||||
}
|
||||
for dp in all_entities_data
|
||||
}
|
||||
@ -1383,7 +1394,7 @@ class LightRAG:
|
||||
"keywords": dp["keywords"],
|
||||
"description": dp["description"],
|
||||
"weight": dp["weight"],
|
||||
"file_path": file_path, # Add file path
|
||||
"file_path": dp.get("file_path", "custom_kg"),
|
||||
}
|
||||
for dp in all_relationships_data
|
||||
}
|
||||
|
@ -496,6 +496,7 @@ async def _merge_edges_then_upsert(
|
||||
keywords=keywords,
|
||||
source_id=source_id,
|
||||
file_path=file_path,
|
||||
created_at=int(time.time()),
|
||||
)
|
||||
|
||||
return edge_data
|
||||
|
@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
import asyncio
|
||||
from typing import Any, cast
|
||||
|
||||
@ -479,7 +480,9 @@ async def acreate_entity(
|
||||
"entity_id": entity_name,
|
||||
"entity_type": entity_data.get("entity_type", "UNKNOWN"),
|
||||
"description": entity_data.get("description", ""),
|
||||
"source_id": entity_data.get("source_id", "manual"),
|
||||
"source_id": entity_data.get("source_id", "manual_creation"),
|
||||
"file_path": entity_data.get("file_path", "manual_creation"),
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
|
||||
# Add entity to knowledge graph
|
||||
@ -575,8 +578,10 @@ async def acreate_relation(
|
||||
edge_data = {
|
||||
"description": relation_data.get("description", ""),
|
||||
"keywords": relation_data.get("keywords", ""),
|
||||
"source_id": relation_data.get("source_id", "manual"),
|
||||
"source_id": relation_data.get("source_id", "manual_creation"),
|
||||
"weight": float(relation_data.get("weight", 1.0)),
|
||||
"file_path": relation_data.get("file_path", "manual_creation"),
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
|
||||
# Add relation to knowledge graph
|
||||
|
Loading…
x
Reference in New Issue
Block a user