Update insert_custom_kg

This commit is contained in:
zrguo 2025-05-27 16:07:04 +08:00
parent 5385616e7e
commit 40b10e8fcf
4 changed files with 83 additions and 59 deletions

113
README.md
View File

@ -900,59 +900,66 @@ All operations are available in both synchronous and asynchronous versions. The
```python
custom_kg = {
"chunks": [
{
"content": "Alice and Bob are collaborating on quantum computing research.",
"source_id": "doc-1"
}
],
"entities": [
{
"entity_name": "Alice",
"entity_type": "person",
"description": "Alice is a researcher specializing in quantum physics.",
"source_id": "doc-1"
},
{
"entity_name": "Bob",
"entity_type": "person",
"description": "Bob is a mathematician.",
"source_id": "doc-1"
},
{
"entity_name": "Quantum Computing",
"entity_type": "technology",
"description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
"source_id": "doc-1"
}
],
"relationships": [
{
"src_id": "Alice",
"tgt_id": "Bob",
"description": "Alice and Bob are research partners.",
"keywords": "collaboration research",
"weight": 1.0,
"source_id": "doc-1"
},
{
"src_id": "Alice",
"tgt_id": "Quantum Computing",
"description": "Alice conducts research on quantum computing.",
"keywords": "research expertise",
"weight": 1.0,
"source_id": "doc-1"
},
{
"src_id": "Bob",
"tgt_id": "Quantum Computing",
"description": "Bob researches quantum computing.",
"keywords": "research application",
"weight": 1.0,
"source_id": "doc-1"
}
]
}
"chunks": [
{
"content": "Alice and Bob are collaborating on quantum computing research.",
"source_id": "doc-1",
"file_path": "test_file",
}
],
"entities": [
{
"entity_name": "Alice",
"entity_type": "person",
"description": "Alice is a researcher specializing in quantum physics.",
"source_id": "doc-1",
"file_path": "test_file"
},
{
"entity_name": "Bob",
"entity_type": "person",
"description": "Bob is a mathematician.",
"source_id": "doc-1",
"file_path": "test_file"
},
{
"entity_name": "Quantum Computing",
"entity_type": "technology",
"description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
"source_id": "doc-1",
"file_path": "test_file"
}
],
"relationships": [
{
"src_id": "Alice",
"tgt_id": "Bob",
"description": "Alice and Bob are research partners.",
"keywords": "collaboration research",
"weight": 1.0,
"source_id": "doc-1",
"file_path": "test_file"
},
{
"src_id": "Alice",
"tgt_id": "Quantum Computing",
"description": "Alice conducts research on quantum computing.",
"keywords": "research expertise",
"weight": 1.0,
"source_id": "doc-1",
"file_path": "test_file"
},
{
"src_id": "Bob",
"tgt_id": "Quantum Computing",
"description": "Bob researches quantum computing.",
"keywords": "research application",
"weight": 1.0,
"source_id": "doc-1",
"file_path": "test_file"
}
]
}
rag.insert_custom_kg(custom_kg)
```

View File

@ -4,6 +4,7 @@ import traceback
import asyncio
import configparser
import os
import time
import warnings
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone
@ -1235,7 +1236,6 @@ class LightRAG:
self,
custom_kg: dict[str, Any],
full_doc_id: str = None,
file_path: str = "custom_kg",
) -> None:
update_storage = False
try:
@ -1245,6 +1245,7 @@ class LightRAG:
for chunk_data in custom_kg.get("chunks", []):
chunk_content = clean_text(chunk_data["content"])
source_id = chunk_data["source_id"]
file_path = chunk_data.get("file_path", "custom_kg")
tokens = len(self.tokenizer.encode(chunk_content))
chunk_order_index = (
0
@ -1261,7 +1262,7 @@ class LightRAG:
"full_doc_id": full_doc_id
if full_doc_id is not None
else source_id,
"file_path": file_path, # Add file path
"file_path": file_path,
"status": DocStatus.PROCESSED,
}
all_chunks_data[chunk_id] = chunk_entry
@ -1282,6 +1283,7 @@ class LightRAG:
description = entity_data.get("description", "No description provided")
source_chunk_id = entity_data.get("source_id", "UNKNOWN")
source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
file_path = entity_data.get("file_path", "custom_kg")
# Log if source_id is UNKNOWN
if source_id == "UNKNOWN":
@ -1296,6 +1298,7 @@ class LightRAG:
"description": description,
"source_id": source_id,
"file_path": file_path,
"created_at": int(time.time()),
}
# Insert node data into the knowledge graph
await self.chunk_entity_relation_graph.upsert_node(
@ -1315,6 +1318,7 @@ class LightRAG:
weight = relationship_data.get("weight", 1.0)
source_chunk_id = relationship_data.get("source_id", "UNKNOWN")
source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
file_path = relationship_data.get("file_path", "custom_kg")
# Log if source_id is UNKNOWN
if source_id == "UNKNOWN":
@ -1334,6 +1338,8 @@ class LightRAG:
"source_id": source_id,
"description": "UNKNOWN",
"entity_type": "UNKNOWN",
"file_path": file_path,
"created_at": int(time.time()),
},
)
@ -1346,8 +1352,11 @@ class LightRAG:
"description": description,
"keywords": keywords,
"source_id": source_id,
"file_path": file_path,
"created_at": int(time.time()),
},
)
edge_data: dict[str, str] = {
"src_id": src_id,
"tgt_id": tgt_id,
@ -1355,6 +1364,8 @@ class LightRAG:
"keywords": keywords,
"source_id": source_id,
"weight": weight,
"file_path": file_path,
"created_at": int(time.time()),
}
all_relationships_data.append(edge_data)
update_storage = True
@ -1367,7 +1378,7 @@ class LightRAG:
"source_id": dp["source_id"],
"description": dp["description"],
"entity_type": dp["entity_type"],
"file_path": file_path, # Add file path
"file_path": dp.get("file_path", "custom_kg"),
}
for dp in all_entities_data
}
@ -1383,7 +1394,7 @@ class LightRAG:
"keywords": dp["keywords"],
"description": dp["description"],
"weight": dp["weight"],
"file_path": file_path, # Add file path
"file_path": dp.get("file_path", "custom_kg"),
}
for dp in all_relationships_data
}

View File

@ -496,6 +496,7 @@ async def _merge_edges_then_upsert(
keywords=keywords,
source_id=source_id,
file_path=file_path,
created_at=int(time.time()),
)
return edge_data

View File

@ -1,5 +1,6 @@
from __future__ import annotations
import time
import asyncio
from typing import Any, cast
@ -479,7 +480,9 @@ async def acreate_entity(
"entity_id": entity_name,
"entity_type": entity_data.get("entity_type", "UNKNOWN"),
"description": entity_data.get("description", ""),
"source_id": entity_data.get("source_id", "manual"),
"source_id": entity_data.get("source_id", "manual_creation"),
"file_path": entity_data.get("file_path", "manual_creation"),
"created_at": int(time.time()),
}
# Add entity to knowledge graph
@ -575,8 +578,10 @@ async def acreate_relation(
edge_data = {
"description": relation_data.get("description", ""),
"keywords": relation_data.get("keywords", ""),
"source_id": relation_data.get("source_id", "manual"),
"source_id": relation_data.get("source_id", "manual_creation"),
"weight": float(relation_data.get("weight", 1.0)),
"file_path": relation_data.get("file_path", "manual_creation"),
"created_at": int(time.time()),
}
# Add relation to knowledge graph