mirror of
https://github.com/HKUDS/LightRAG.git
synced 2025-06-26 22:00:19 +00:00
Update insert_custom_kg
This commit is contained in:
parent
5385616e7e
commit
40b10e8fcf
23
README.md
23
README.md
@ -903,7 +903,8 @@ custom_kg = {
|
|||||||
"chunks": [
|
"chunks": [
|
||||||
{
|
{
|
||||||
"content": "Alice and Bob are collaborating on quantum computing research.",
|
"content": "Alice and Bob are collaborating on quantum computing research.",
|
||||||
"source_id": "doc-1"
|
"source_id": "doc-1",
|
||||||
|
"file_path": "test_file",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"entities": [
|
"entities": [
|
||||||
@ -911,19 +912,22 @@ custom_kg = {
|
|||||||
"entity_name": "Alice",
|
"entity_name": "Alice",
|
||||||
"entity_type": "person",
|
"entity_type": "person",
|
||||||
"description": "Alice is a researcher specializing in quantum physics.",
|
"description": "Alice is a researcher specializing in quantum physics.",
|
||||||
"source_id": "doc-1"
|
"source_id": "doc-1",
|
||||||
|
"file_path": "test_file"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"entity_name": "Bob",
|
"entity_name": "Bob",
|
||||||
"entity_type": "person",
|
"entity_type": "person",
|
||||||
"description": "Bob is a mathematician.",
|
"description": "Bob is a mathematician.",
|
||||||
"source_id": "doc-1"
|
"source_id": "doc-1",
|
||||||
|
"file_path": "test_file"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"entity_name": "Quantum Computing",
|
"entity_name": "Quantum Computing",
|
||||||
"entity_type": "technology",
|
"entity_type": "technology",
|
||||||
"description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
|
"description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
|
||||||
"source_id": "doc-1"
|
"source_id": "doc-1",
|
||||||
|
"file_path": "test_file"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"relationships": [
|
"relationships": [
|
||||||
@ -933,7 +937,8 @@ custom_kg = {
|
|||||||
"description": "Alice and Bob are research partners.",
|
"description": "Alice and Bob are research partners.",
|
||||||
"keywords": "collaboration research",
|
"keywords": "collaboration research",
|
||||||
"weight": 1.0,
|
"weight": 1.0,
|
||||||
"source_id": "doc-1"
|
"source_id": "doc-1",
|
||||||
|
"file_path": "test_file"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"src_id": "Alice",
|
"src_id": "Alice",
|
||||||
@ -941,7 +946,8 @@ custom_kg = {
|
|||||||
"description": "Alice conducts research on quantum computing.",
|
"description": "Alice conducts research on quantum computing.",
|
||||||
"keywords": "research expertise",
|
"keywords": "research expertise",
|
||||||
"weight": 1.0,
|
"weight": 1.0,
|
||||||
"source_id": "doc-1"
|
"source_id": "doc-1",
|
||||||
|
"file_path": "test_file"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"src_id": "Bob",
|
"src_id": "Bob",
|
||||||
@ -949,10 +955,11 @@ custom_kg = {
|
|||||||
"description": "Bob researches quantum computing.",
|
"description": "Bob researches quantum computing.",
|
||||||
"keywords": "research application",
|
"keywords": "research application",
|
||||||
"weight": 1.0,
|
"weight": 1.0,
|
||||||
"source_id": "doc-1"
|
"source_id": "doc-1",
|
||||||
|
"file_path": "test_file"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
rag.insert_custom_kg(custom_kg)
|
rag.insert_custom_kg(custom_kg)
|
||||||
```
|
```
|
||||||
|
@ -4,6 +4,7 @@ import traceback
|
|||||||
import asyncio
|
import asyncio
|
||||||
import configparser
|
import configparser
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
import warnings
|
import warnings
|
||||||
from dataclasses import asdict, dataclass, field
|
from dataclasses import asdict, dataclass, field
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
@ -1235,7 +1236,6 @@ class LightRAG:
|
|||||||
self,
|
self,
|
||||||
custom_kg: dict[str, Any],
|
custom_kg: dict[str, Any],
|
||||||
full_doc_id: str = None,
|
full_doc_id: str = None,
|
||||||
file_path: str = "custom_kg",
|
|
||||||
) -> None:
|
) -> None:
|
||||||
update_storage = False
|
update_storage = False
|
||||||
try:
|
try:
|
||||||
@ -1245,6 +1245,7 @@ class LightRAG:
|
|||||||
for chunk_data in custom_kg.get("chunks", []):
|
for chunk_data in custom_kg.get("chunks", []):
|
||||||
chunk_content = clean_text(chunk_data["content"])
|
chunk_content = clean_text(chunk_data["content"])
|
||||||
source_id = chunk_data["source_id"]
|
source_id = chunk_data["source_id"]
|
||||||
|
file_path = chunk_data.get("file_path", "custom_kg")
|
||||||
tokens = len(self.tokenizer.encode(chunk_content))
|
tokens = len(self.tokenizer.encode(chunk_content))
|
||||||
chunk_order_index = (
|
chunk_order_index = (
|
||||||
0
|
0
|
||||||
@ -1261,7 +1262,7 @@ class LightRAG:
|
|||||||
"full_doc_id": full_doc_id
|
"full_doc_id": full_doc_id
|
||||||
if full_doc_id is not None
|
if full_doc_id is not None
|
||||||
else source_id,
|
else source_id,
|
||||||
"file_path": file_path, # Add file path
|
"file_path": file_path,
|
||||||
"status": DocStatus.PROCESSED,
|
"status": DocStatus.PROCESSED,
|
||||||
}
|
}
|
||||||
all_chunks_data[chunk_id] = chunk_entry
|
all_chunks_data[chunk_id] = chunk_entry
|
||||||
@ -1282,6 +1283,7 @@ class LightRAG:
|
|||||||
description = entity_data.get("description", "No description provided")
|
description = entity_data.get("description", "No description provided")
|
||||||
source_chunk_id = entity_data.get("source_id", "UNKNOWN")
|
source_chunk_id = entity_data.get("source_id", "UNKNOWN")
|
||||||
source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
|
source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
|
||||||
|
file_path = entity_data.get("file_path", "custom_kg")
|
||||||
|
|
||||||
# Log if source_id is UNKNOWN
|
# Log if source_id is UNKNOWN
|
||||||
if source_id == "UNKNOWN":
|
if source_id == "UNKNOWN":
|
||||||
@ -1296,6 +1298,7 @@ class LightRAG:
|
|||||||
"description": description,
|
"description": description,
|
||||||
"source_id": source_id,
|
"source_id": source_id,
|
||||||
"file_path": file_path,
|
"file_path": file_path,
|
||||||
|
"created_at": int(time.time()),
|
||||||
}
|
}
|
||||||
# Insert node data into the knowledge graph
|
# Insert node data into the knowledge graph
|
||||||
await self.chunk_entity_relation_graph.upsert_node(
|
await self.chunk_entity_relation_graph.upsert_node(
|
||||||
@ -1315,6 +1318,7 @@ class LightRAG:
|
|||||||
weight = relationship_data.get("weight", 1.0)
|
weight = relationship_data.get("weight", 1.0)
|
||||||
source_chunk_id = relationship_data.get("source_id", "UNKNOWN")
|
source_chunk_id = relationship_data.get("source_id", "UNKNOWN")
|
||||||
source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
|
source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
|
||||||
|
file_path = relationship_data.get("file_path", "custom_kg")
|
||||||
|
|
||||||
# Log if source_id is UNKNOWN
|
# Log if source_id is UNKNOWN
|
||||||
if source_id == "UNKNOWN":
|
if source_id == "UNKNOWN":
|
||||||
@ -1334,6 +1338,8 @@ class LightRAG:
|
|||||||
"source_id": source_id,
|
"source_id": source_id,
|
||||||
"description": "UNKNOWN",
|
"description": "UNKNOWN",
|
||||||
"entity_type": "UNKNOWN",
|
"entity_type": "UNKNOWN",
|
||||||
|
"file_path": file_path,
|
||||||
|
"created_at": int(time.time()),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -1346,8 +1352,11 @@ class LightRAG:
|
|||||||
"description": description,
|
"description": description,
|
||||||
"keywords": keywords,
|
"keywords": keywords,
|
||||||
"source_id": source_id,
|
"source_id": source_id,
|
||||||
|
"file_path": file_path,
|
||||||
|
"created_at": int(time.time()),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
edge_data: dict[str, str] = {
|
edge_data: dict[str, str] = {
|
||||||
"src_id": src_id,
|
"src_id": src_id,
|
||||||
"tgt_id": tgt_id,
|
"tgt_id": tgt_id,
|
||||||
@ -1355,6 +1364,8 @@ class LightRAG:
|
|||||||
"keywords": keywords,
|
"keywords": keywords,
|
||||||
"source_id": source_id,
|
"source_id": source_id,
|
||||||
"weight": weight,
|
"weight": weight,
|
||||||
|
"file_path": file_path,
|
||||||
|
"created_at": int(time.time()),
|
||||||
}
|
}
|
||||||
all_relationships_data.append(edge_data)
|
all_relationships_data.append(edge_data)
|
||||||
update_storage = True
|
update_storage = True
|
||||||
@ -1367,7 +1378,7 @@ class LightRAG:
|
|||||||
"source_id": dp["source_id"],
|
"source_id": dp["source_id"],
|
||||||
"description": dp["description"],
|
"description": dp["description"],
|
||||||
"entity_type": dp["entity_type"],
|
"entity_type": dp["entity_type"],
|
||||||
"file_path": file_path, # Add file path
|
"file_path": dp.get("file_path", "custom_kg"),
|
||||||
}
|
}
|
||||||
for dp in all_entities_data
|
for dp in all_entities_data
|
||||||
}
|
}
|
||||||
@ -1383,7 +1394,7 @@ class LightRAG:
|
|||||||
"keywords": dp["keywords"],
|
"keywords": dp["keywords"],
|
||||||
"description": dp["description"],
|
"description": dp["description"],
|
||||||
"weight": dp["weight"],
|
"weight": dp["weight"],
|
||||||
"file_path": file_path, # Add file path
|
"file_path": dp.get("file_path", "custom_kg"),
|
||||||
}
|
}
|
||||||
for dp in all_relationships_data
|
for dp in all_relationships_data
|
||||||
}
|
}
|
||||||
|
@ -496,6 +496,7 @@ async def _merge_edges_then_upsert(
|
|||||||
keywords=keywords,
|
keywords=keywords,
|
||||||
source_id=source_id,
|
source_id=source_id,
|
||||||
file_path=file_path,
|
file_path=file_path,
|
||||||
|
created_at=int(time.time()),
|
||||||
)
|
)
|
||||||
|
|
||||||
return edge_data
|
return edge_data
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import time
|
||||||
import asyncio
|
import asyncio
|
||||||
from typing import Any, cast
|
from typing import Any, cast
|
||||||
|
|
||||||
@ -479,7 +480,9 @@ async def acreate_entity(
|
|||||||
"entity_id": entity_name,
|
"entity_id": entity_name,
|
||||||
"entity_type": entity_data.get("entity_type", "UNKNOWN"),
|
"entity_type": entity_data.get("entity_type", "UNKNOWN"),
|
||||||
"description": entity_data.get("description", ""),
|
"description": entity_data.get("description", ""),
|
||||||
"source_id": entity_data.get("source_id", "manual"),
|
"source_id": entity_data.get("source_id", "manual_creation"),
|
||||||
|
"file_path": entity_data.get("file_path", "manual_creation"),
|
||||||
|
"created_at": int(time.time()),
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add entity to knowledge graph
|
# Add entity to knowledge graph
|
||||||
@ -575,8 +578,10 @@ async def acreate_relation(
|
|||||||
edge_data = {
|
edge_data = {
|
||||||
"description": relation_data.get("description", ""),
|
"description": relation_data.get("description", ""),
|
||||||
"keywords": relation_data.get("keywords", ""),
|
"keywords": relation_data.get("keywords", ""),
|
||||||
"source_id": relation_data.get("source_id", "manual"),
|
"source_id": relation_data.get("source_id", "manual_creation"),
|
||||||
"weight": float(relation_data.get("weight", 1.0)),
|
"weight": float(relation_data.get("weight", 1.0)),
|
||||||
|
"file_path": relation_data.get("file_path", "manual_creation"),
|
||||||
|
"created_at": int(time.time()),
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add relation to knowledge graph
|
# Add relation to knowledge graph
|
||||||
|
Loading…
x
Reference in New Issue
Block a user