Update insert_custom_kg

This commit is contained in:
zrguo 2025-05-27 16:07:04 +08:00
parent 5385616e7e
commit 40b10e8fcf
4 changed files with 83 additions and 59 deletions

View File

@ -903,7 +903,8 @@ custom_kg = {
"chunks": [ "chunks": [
{ {
"content": "Alice and Bob are collaborating on quantum computing research.", "content": "Alice and Bob are collaborating on quantum computing research.",
"source_id": "doc-1" "source_id": "doc-1",
"file_path": "test_file",
} }
], ],
"entities": [ "entities": [
@ -911,19 +912,22 @@ custom_kg = {
"entity_name": "Alice", "entity_name": "Alice",
"entity_type": "person", "entity_type": "person",
"description": "Alice is a researcher specializing in quantum physics.", "description": "Alice is a researcher specializing in quantum physics.",
"source_id": "doc-1" "source_id": "doc-1",
"file_path": "test_file"
}, },
{ {
"entity_name": "Bob", "entity_name": "Bob",
"entity_type": "person", "entity_type": "person",
"description": "Bob is a mathematician.", "description": "Bob is a mathematician.",
"source_id": "doc-1" "source_id": "doc-1",
"file_path": "test_file"
}, },
{ {
"entity_name": "Quantum Computing", "entity_name": "Quantum Computing",
"entity_type": "technology", "entity_type": "technology",
"description": "Quantum computing utilizes quantum mechanical phenomena for computation.", "description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
"source_id": "doc-1" "source_id": "doc-1",
"file_path": "test_file"
} }
], ],
"relationships": [ "relationships": [
@ -933,7 +937,8 @@ custom_kg = {
"description": "Alice and Bob are research partners.", "description": "Alice and Bob are research partners.",
"keywords": "collaboration research", "keywords": "collaboration research",
"weight": 1.0, "weight": 1.0,
"source_id": "doc-1" "source_id": "doc-1",
"file_path": "test_file"
}, },
{ {
"src_id": "Alice", "src_id": "Alice",
@ -941,7 +946,8 @@ custom_kg = {
"description": "Alice conducts research on quantum computing.", "description": "Alice conducts research on quantum computing.",
"keywords": "research expertise", "keywords": "research expertise",
"weight": 1.0, "weight": 1.0,
"source_id": "doc-1" "source_id": "doc-1",
"file_path": "test_file"
}, },
{ {
"src_id": "Bob", "src_id": "Bob",
@ -949,10 +955,11 @@ custom_kg = {
"description": "Bob researches quantum computing.", "description": "Bob researches quantum computing.",
"keywords": "research application", "keywords": "research application",
"weight": 1.0, "weight": 1.0,
"source_id": "doc-1" "source_id": "doc-1",
"file_path": "test_file"
} }
] ]
} }
rag.insert_custom_kg(custom_kg) rag.insert_custom_kg(custom_kg)
``` ```

View File

@ -4,6 +4,7 @@ import traceback
import asyncio import asyncio
import configparser import configparser
import os import os
import time
import warnings import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone from datetime import datetime, timezone
@ -1235,7 +1236,6 @@ class LightRAG:
self, self,
custom_kg: dict[str, Any], custom_kg: dict[str, Any],
full_doc_id: str = None, full_doc_id: str = None,
file_path: str = "custom_kg",
) -> None: ) -> None:
update_storage = False update_storage = False
try: try:
@ -1245,6 +1245,7 @@ class LightRAG:
for chunk_data in custom_kg.get("chunks", []): for chunk_data in custom_kg.get("chunks", []):
chunk_content = clean_text(chunk_data["content"]) chunk_content = clean_text(chunk_data["content"])
source_id = chunk_data["source_id"] source_id = chunk_data["source_id"]
file_path = chunk_data.get("file_path", "custom_kg")
tokens = len(self.tokenizer.encode(chunk_content)) tokens = len(self.tokenizer.encode(chunk_content))
chunk_order_index = ( chunk_order_index = (
0 0
@ -1261,7 +1262,7 @@ class LightRAG:
"full_doc_id": full_doc_id "full_doc_id": full_doc_id
if full_doc_id is not None if full_doc_id is not None
else source_id, else source_id,
"file_path": file_path, # Add file path "file_path": file_path,
"status": DocStatus.PROCESSED, "status": DocStatus.PROCESSED,
} }
all_chunks_data[chunk_id] = chunk_entry all_chunks_data[chunk_id] = chunk_entry
@ -1282,6 +1283,7 @@ class LightRAG:
description = entity_data.get("description", "No description provided") description = entity_data.get("description", "No description provided")
source_chunk_id = entity_data.get("source_id", "UNKNOWN") source_chunk_id = entity_data.get("source_id", "UNKNOWN")
source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN") source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
file_path = entity_data.get("file_path", "custom_kg")
# Log if source_id is UNKNOWN # Log if source_id is UNKNOWN
if source_id == "UNKNOWN": if source_id == "UNKNOWN":
@ -1296,6 +1298,7 @@ class LightRAG:
"description": description, "description": description,
"source_id": source_id, "source_id": source_id,
"file_path": file_path, "file_path": file_path,
"created_at": int(time.time()),
} }
# Insert node data into the knowledge graph # Insert node data into the knowledge graph
await self.chunk_entity_relation_graph.upsert_node( await self.chunk_entity_relation_graph.upsert_node(
@ -1315,6 +1318,7 @@ class LightRAG:
weight = relationship_data.get("weight", 1.0) weight = relationship_data.get("weight", 1.0)
source_chunk_id = relationship_data.get("source_id", "UNKNOWN") source_chunk_id = relationship_data.get("source_id", "UNKNOWN")
source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN") source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
file_path = relationship_data.get("file_path", "custom_kg")
# Log if source_id is UNKNOWN # Log if source_id is UNKNOWN
if source_id == "UNKNOWN": if source_id == "UNKNOWN":
@ -1334,6 +1338,8 @@ class LightRAG:
"source_id": source_id, "source_id": source_id,
"description": "UNKNOWN", "description": "UNKNOWN",
"entity_type": "UNKNOWN", "entity_type": "UNKNOWN",
"file_path": file_path,
"created_at": int(time.time()),
}, },
) )
@ -1346,8 +1352,11 @@ class LightRAG:
"description": description, "description": description,
"keywords": keywords, "keywords": keywords,
"source_id": source_id, "source_id": source_id,
"file_path": file_path,
"created_at": int(time.time()),
}, },
) )
edge_data: dict[str, str] = { edge_data: dict[str, str] = {
"src_id": src_id, "src_id": src_id,
"tgt_id": tgt_id, "tgt_id": tgt_id,
@ -1355,6 +1364,8 @@ class LightRAG:
"keywords": keywords, "keywords": keywords,
"source_id": source_id, "source_id": source_id,
"weight": weight, "weight": weight,
"file_path": file_path,
"created_at": int(time.time()),
} }
all_relationships_data.append(edge_data) all_relationships_data.append(edge_data)
update_storage = True update_storage = True
@ -1367,7 +1378,7 @@ class LightRAG:
"source_id": dp["source_id"], "source_id": dp["source_id"],
"description": dp["description"], "description": dp["description"],
"entity_type": dp["entity_type"], "entity_type": dp["entity_type"],
"file_path": file_path, # Add file path "file_path": dp.get("file_path", "custom_kg"),
} }
for dp in all_entities_data for dp in all_entities_data
} }
@ -1383,7 +1394,7 @@ class LightRAG:
"keywords": dp["keywords"], "keywords": dp["keywords"],
"description": dp["description"], "description": dp["description"],
"weight": dp["weight"], "weight": dp["weight"],
"file_path": file_path, # Add file path "file_path": dp.get("file_path", "custom_kg"),
} }
for dp in all_relationships_data for dp in all_relationships_data
} }

View File

@ -496,6 +496,7 @@ async def _merge_edges_then_upsert(
keywords=keywords, keywords=keywords,
source_id=source_id, source_id=source_id,
file_path=file_path, file_path=file_path,
created_at=int(time.time()),
) )
return edge_data return edge_data

View File

@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import time
import asyncio import asyncio
from typing import Any, cast from typing import Any, cast
@ -479,7 +480,9 @@ async def acreate_entity(
"entity_id": entity_name, "entity_id": entity_name,
"entity_type": entity_data.get("entity_type", "UNKNOWN"), "entity_type": entity_data.get("entity_type", "UNKNOWN"),
"description": entity_data.get("description", ""), "description": entity_data.get("description", ""),
"source_id": entity_data.get("source_id", "manual"), "source_id": entity_data.get("source_id", "manual_creation"),
"file_path": entity_data.get("file_path", "manual_creation"),
"created_at": int(time.time()),
} }
# Add entity to knowledge graph # Add entity to knowledge graph
@ -575,8 +578,10 @@ async def acreate_relation(
edge_data = { edge_data = {
"description": relation_data.get("description", ""), "description": relation_data.get("description", ""),
"keywords": relation_data.get("keywords", ""), "keywords": relation_data.get("keywords", ""),
"source_id": relation_data.get("source_id", "manual"), "source_id": relation_data.get("source_id", "manual_creation"),
"weight": float(relation_data.get("weight", 1.0)), "weight": float(relation_data.get("weight", 1.0)),
"file_path": relation_data.get("file_path", "manual_creation"),
"created_at": int(time.time()),
} }
# Add relation to knowledge graph # Add relation to knowledge graph