mirror of
https://github.com/HKUDS/LightRAG.git
synced 2025-07-24 01:10:54 +00:00
Add delete method
This commit is contained in:
parent
39cf939d15
commit
b1f35004b1
1
.gitignore
vendored
1
.gitignore
vendored
@ -10,3 +10,4 @@ local_neo4jWorkDir/
|
||||
neo4jWorkDir/
|
||||
ignore_this.txt
|
||||
.venv/
|
||||
.ruff_cache/
|
18
README.md
18
README.md
@ -22,6 +22,7 @@ This repository hosts the code of LightRAG. The structure of this code is based
|
||||
</div>
|
||||
|
||||
## 🎉 News
|
||||
- [x] [2024.11.11]🎯📢LightRAG now supports [deleting entities by their names](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#delete-entity).
|
||||
- [x] [2024.11.09]🎯📢Now comes [LightRAG Gui](https://lightrag-gui.streamlit.app) that lets you insert, query, visualize, and download LightRAG knowledge.
|
||||
- [x] [2024.11.04]🎯📢You can [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage) now.
|
||||
- [x] [2024.10.29]🎯📢LightRAG now supports multiple file types, including PDF, DOC, PPT, and CSV via `textract`.
|
||||
@ -318,6 +319,23 @@ with open("./newText.txt") as f:
|
||||
rag.insert(f.read())
|
||||
```
|
||||
|
||||
### Delete Entity
|
||||
|
||||
```python
|
||||
# Delete Entity: Deleting entities by their names
|
||||
rag = LightRAG(
|
||||
working_dir=WORKING_DIR,
|
||||
llm_model_func=llm_model_func,
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=embedding_dimension,
|
||||
max_token_size=8192,
|
||||
func=embedding_func,
|
||||
),
|
||||
)
|
||||
|
||||
rag.delete_by_entity("Project Gutenberg")
|
||||
```
|
||||
|
||||
### Multi-file Type Support
|
||||
|
||||
The `textract` supports reading file types such as TXT, DOCX, PPTX, CSV, and PDF.
|
||||
|
@ -1,5 +1,5 @@
|
||||
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
|
||||
|
||||
__version__ = "0.0.9"
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "Zirui Guo"
|
||||
__url__ = "https://github.com/HKUDS/LightRAG"
|
||||
|
@ -116,7 +116,7 @@ class BaseGraphStorage(StorageNameSpace):
|
||||
):
|
||||
raise NotImplementedError
|
||||
|
||||
async def clustering(self, algorithm: str):
|
||||
async def delete_node(self, node_id: str):
|
||||
raise NotImplementedError
|
||||
|
||||
async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
|
||||
|
@ -188,7 +188,6 @@ class LightRAG:
|
||||
return {
|
||||
"Neo4JStorage": Neo4JStorage,
|
||||
"NetworkXStorage": NetworkXStorage,
|
||||
# "ArangoDBStorage": ArangoDBStorage
|
||||
}
|
||||
|
||||
def insert(self, string_or_strings):
|
||||
@ -328,3 +327,32 @@ class LightRAG:
|
||||
continue
|
||||
tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
def delete_by_entity(self, entity_name: str):
|
||||
loop = always_get_an_event_loop()
|
||||
return loop.run_until_complete(self.adelete_by_entity(entity_name))
|
||||
|
||||
async def adelete_by_entity(self, entity_name: str):
|
||||
entity_name = f"\"{entity_name.upper()}\""
|
||||
|
||||
try:
|
||||
await self.entities_vdb.delete_entity(entity_name)
|
||||
await self.relationships_vdb.delete_relation(entity_name)
|
||||
await self.chunk_entity_relation_graph.delete_node(entity_name)
|
||||
|
||||
logger.info(f"Entity '{entity_name}' and its relationships have been deleted.")
|
||||
await self._delete_by_entity_done()
|
||||
except Exception as e:
|
||||
logger.error(f"Error while deleting entity '{entity_name}': {e}")
|
||||
|
||||
async def _delete_by_entity_done(self):
|
||||
tasks = []
|
||||
for storage_inst in [
|
||||
self.entities_vdb,
|
||||
self.relationships_vdb,
|
||||
self.chunk_entity_relation_graph,
|
||||
]:
|
||||
if storage_inst is None:
|
||||
continue
|
||||
tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
|
||||
await asyncio.gather(*tasks)
|
@ -7,7 +7,13 @@ import networkx as nx
|
||||
import numpy as np
|
||||
from nano_vectordb import NanoVectorDB
|
||||
|
||||
from .utils import load_json, logger, write_json
|
||||
from .utils import (
|
||||
logger,
|
||||
load_json,
|
||||
write_json,
|
||||
compute_mdhash_id,
|
||||
)
|
||||
|
||||
from .base import (
|
||||
BaseGraphStorage,
|
||||
BaseKVStorage,
|
||||
@ -110,6 +116,37 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
||||
{**dp, "id": dp["__id__"], "distance": dp["__metrics__"]} for dp in results
|
||||
]
|
||||
return results
|
||||
|
||||
@property
|
||||
def client_storage(self):
|
||||
return getattr(self._client, "_NanoVectorDB__storage")
|
||||
|
||||
async def delete_entity(self, entity_name: str):
|
||||
try:
|
||||
entity_id = [compute_mdhash_id(entity_name, prefix="ent-")]
|
||||
|
||||
if self._client.get(entity_id):
|
||||
self._client.delete(entity_id)
|
||||
logger.info(f"Entity {entity_name} have been deleted.")
|
||||
else:
|
||||
logger.info(f"No entity found with name {entity_name}.")
|
||||
except Exception as e:
|
||||
logger.error(f"Error while deleting entity {entity_name}: {e}")
|
||||
|
||||
async def delete_relation(self, entity_name: str):
|
||||
try:
|
||||
relations = [
|
||||
dp for dp in self.client_storage["data"] if dp["src_id"] == entity_name or dp["tgt_id"] == entity_name
|
||||
]
|
||||
ids_to_delete = [relation["__id__"] for relation in relations]
|
||||
|
||||
if ids_to_delete:
|
||||
self._client.delete(ids_to_delete)
|
||||
logger.info(f"All relations related to entity {entity_name} have been deleted.")
|
||||
else:
|
||||
logger.info(f"No relations found for entity {entity_name}.")
|
||||
except Exception as e:
|
||||
logger.error(f"Error while deleting relations for entity {entity_name}: {e}")
|
||||
|
||||
async def index_done_callback(self):
|
||||
self._client.save()
|
||||
@ -228,6 +265,18 @@ class NetworkXStorage(BaseGraphStorage):
|
||||
):
|
||||
self._graph.add_edge(source_node_id, target_node_id, **edge_data)
|
||||
|
||||
async def delete_node(self, node_id: str):
|
||||
"""
|
||||
Delete a node from the graph based on the specified node_id.
|
||||
|
||||
:param node_id: The node_id to delete
|
||||
"""
|
||||
if self._graph.has_node(node_id):
|
||||
self._graph.remove_node(node_id)
|
||||
logger.info(f"Node {node_id} deleted from the graph.")
|
||||
else:
|
||||
logger.warning(f"Node {node_id} not found in the graph for deletion.")
|
||||
|
||||
async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
|
||||
if algorithm not in self._node_embed_algorithms:
|
||||
raise ValueError(f"Node embedding algorithm {algorithm} not supported")
|
||||
|
Loading…
x
Reference in New Issue
Block a user