From 40b10e8fcf40b3a2d818ab8a231bcad1ae8de837 Mon Sep 17 00:00:00 2001 From: zrguo <49157727+LarFii@users.noreply.github.com> Date: Tue, 27 May 2025 16:07:04 +0800 Subject: [PATCH] Update insert_custom_kg --- README.md | 113 +++++++++++++++++++++------------------- lightrag/lightrag.py | 19 +++++-- lightrag/operate.py | 1 + lightrag/utils_graph.py | 9 +++- 4 files changed, 83 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index 9a71465b..e45ca155 100644 --- a/README.md +++ b/README.md @@ -900,59 +900,66 @@ All operations are available in both synchronous and asynchronous versions. The ```python custom_kg = { - "chunks": [ - { - "content": "Alice and Bob are collaborating on quantum computing research.", - "source_id": "doc-1" - } - ], - "entities": [ - { - "entity_name": "Alice", - "entity_type": "person", - "description": "Alice is a researcher specializing in quantum physics.", - "source_id": "doc-1" - }, - { - "entity_name": "Bob", - "entity_type": "person", - "description": "Bob is a mathematician.", - "source_id": "doc-1" - }, - { - "entity_name": "Quantum Computing", - "entity_type": "technology", - "description": "Quantum computing utilizes quantum mechanical phenomena for computation.", - "source_id": "doc-1" - } - ], - "relationships": [ - { - "src_id": "Alice", - "tgt_id": "Bob", - "description": "Alice and Bob are research partners.", - "keywords": "collaboration research", - "weight": 1.0, - "source_id": "doc-1" - }, - { - "src_id": "Alice", - "tgt_id": "Quantum Computing", - "description": "Alice conducts research on quantum computing.", - "keywords": "research expertise", - "weight": 1.0, - "source_id": "doc-1" - }, - { - "src_id": "Bob", - "tgt_id": "Quantum Computing", - "description": "Bob researches quantum computing.", - "keywords": "research application", - "weight": 1.0, - "source_id": "doc-1" - } - ] -} + "chunks": [ + { + "content": "Alice and Bob are collaborating on quantum computing research.", + "source_id": "doc-1", + "file_path": "test_file", + } + ], + "entities": [ + { + "entity_name": "Alice", + "entity_type": "person", + "description": "Alice is a researcher specializing in quantum physics.", + "source_id": "doc-1", + "file_path": "test_file" + }, + { + "entity_name": "Bob", + "entity_type": "person", + "description": "Bob is a mathematician.", + "source_id": "doc-1", + "file_path": "test_file" + }, + { + "entity_name": "Quantum Computing", + "entity_type": "technology", + "description": "Quantum computing utilizes quantum mechanical phenomena for computation.", + "source_id": "doc-1", + "file_path": "test_file" + } + ], + "relationships": [ + { + "src_id": "Alice", + "tgt_id": "Bob", + "description": "Alice and Bob are research partners.", + "keywords": "collaboration research", + "weight": 1.0, + "source_id": "doc-1", + "file_path": "test_file" + }, + { + "src_id": "Alice", + "tgt_id": "Quantum Computing", + "description": "Alice conducts research on quantum computing.", + "keywords": "research expertise", + "weight": 1.0, + "source_id": "doc-1", + "file_path": "test_file" + }, + { + "src_id": "Bob", + "tgt_id": "Quantum Computing", + "description": "Bob researches quantum computing.", + "keywords": "research application", + "weight": 1.0, + "source_id": "doc-1", + "file_path": "test_file" + } + ] + } rag.insert_custom_kg(custom_kg) ``` diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index dcebc174..0bf7de83 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -4,6 +4,7 @@ import traceback import asyncio import configparser import os +import time import warnings from dataclasses import asdict, dataclass, field from datetime import datetime, timezone @@ -1235,7 +1236,6 @@ class LightRAG: self, custom_kg: dict[str, Any], full_doc_id: str = None, - file_path: str = "custom_kg", ) -> None: update_storage = False try: @@ -1245,6 +1245,7 @@ class LightRAG: for chunk_data in custom_kg.get("chunks", []): chunk_content = clean_text(chunk_data["content"]) source_id = chunk_data["source_id"] + file_path = chunk_data.get("file_path", "custom_kg") tokens = len(self.tokenizer.encode(chunk_content)) chunk_order_index = ( 0 @@ -1261,7 +1262,7 @@ class LightRAG: "full_doc_id": full_doc_id if full_doc_id is not None else source_id, - "file_path": file_path, # Add file path + "file_path": file_path, "status": DocStatus.PROCESSED, } all_chunks_data[chunk_id] = chunk_entry @@ -1282,6 +1283,7 @@ class LightRAG: description = entity_data.get("description", "No description provided") source_chunk_id = entity_data.get("source_id", "UNKNOWN") source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN") + file_path = entity_data.get("file_path", "custom_kg") # Log if source_id is UNKNOWN if source_id == "UNKNOWN": @@ -1296,6 +1298,7 @@ class LightRAG: "description": description, "source_id": source_id, "file_path": file_path, + "created_at": int(time.time()), } # Insert node data into the knowledge graph await self.chunk_entity_relation_graph.upsert_node( @@ -1315,6 +1318,7 @@ class LightRAG: weight = relationship_data.get("weight", 1.0) source_chunk_id = relationship_data.get("source_id", "UNKNOWN") source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN") + file_path = relationship_data.get("file_path", "custom_kg") # Log if source_id is UNKNOWN if source_id == "UNKNOWN": @@ -1334,6 +1338,8 @@ class LightRAG: "source_id": source_id, "description": "UNKNOWN", "entity_type": "UNKNOWN", + "file_path": file_path, + "created_at": int(time.time()), }, ) @@ -1346,8 +1352,11 @@ class LightRAG: "description": description, "keywords": keywords, "source_id": source_id, + "file_path": file_path, + "created_at": int(time.time()), }, ) + edge_data: dict[str, str] = { "src_id": src_id, "tgt_id": tgt_id, @@ -1355,6 +1364,8 @@ class LightRAG: "keywords": keywords, "source_id": source_id, "weight": weight, + "file_path": file_path, + "created_at": int(time.time()), } all_relationships_data.append(edge_data) update_storage = True @@ -1367,7 +1378,7 @@ class LightRAG: "source_id": dp["source_id"], "description": dp["description"], "entity_type": dp["entity_type"], - "file_path": file_path, # Add file path + "file_path": dp.get("file_path", "custom_kg"), } for dp in all_entities_data } @@ -1383,7 +1394,7 @@ class LightRAG: "keywords": dp["keywords"], "description": dp["description"], "weight": dp["weight"], - "file_path": file_path, # Add file path + "file_path": dp.get("file_path", "custom_kg"), } for dp in all_relationships_data } diff --git a/lightrag/operate.py b/lightrag/operate.py index 8b39527f..fd8d5006 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -496,6 +496,7 @@ async def _merge_edges_then_upsert( keywords=keywords, source_id=source_id, file_path=file_path, + created_at=int(time.time()), ) return edge_data diff --git a/lightrag/utils_graph.py b/lightrag/utils_graph.py index 60369ee5..54876fa7 100644 --- a/lightrag/utils_graph.py +++ b/lightrag/utils_graph.py @@ -1,5 +1,6 @@ from __future__ import annotations +import time import asyncio from typing import Any, cast @@ -479,7 +480,9 @@ async def acreate_entity( "entity_id": entity_name, "entity_type": entity_data.get("entity_type", "UNKNOWN"), "description": entity_data.get("description", ""), - "source_id": entity_data.get("source_id", "manual"), + "source_id": entity_data.get("source_id", "manual_creation"), + "file_path": entity_data.get("file_path", "manual_creation"), + "created_at": int(time.time()), } # Add entity to knowledge graph @@ -575,8 +578,10 @@ async def acreate_relation( edge_data = { "description": relation_data.get("description", ""), "keywords": relation_data.get("keywords", ""), - "source_id": relation_data.get("source_id", "manual"), + "source_id": relation_data.get("source_id", "manual_creation"), "weight": float(relation_data.get("weight", 1.0)), + "file_path": relation_data.get("file_path", "manual_creation"), + "created_at": int(time.time()), } # Add relation to knowledge graph