diff --git a/.gitignore b/.gitignore index def738b2..b5c4bb11 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ local_neo4jWorkDir/ neo4jWorkDir/ ignore_this.txt .venv/ +.ruff_cache/ \ No newline at end of file diff --git a/README.md b/README.md index 59245c44..d32458a8 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ This repository hosts the code of LightRAG. The structure of this code is based ## 🎉 News +- [x] [2024.11.11]🎯📢LightRAG now supports [deleting entities by their names](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#delete-entity). - [x] [2024.11.09]🎯📢Now comes [LightRAG Gui](https://lightrag-gui.streamlit.app) that lets you insert, query, visualize, and download LightRAG knowledge. - [x] [2024.11.04]🎯📢You can [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage) now. - [x] [2024.10.29]🎯📢LightRAG now supports multiple file types, including PDF, DOC, PPT, and CSV via `textract`. @@ -318,6 +319,23 @@ with open("./newText.txt") as f: rag.insert(f.read()) ``` +### Delete Entity + +```python +# Delete Entity: Deleting entities by their names +rag = LightRAG( + working_dir=WORKING_DIR, + llm_model_func=llm_model_func, + embedding_func=EmbeddingFunc( + embedding_dim=embedding_dimension, + max_token_size=8192, + func=embedding_func, + ), +) + +rag.delete_by_entity("Project Gutenberg") +``` + ### Multi-file Type Support The `textract` supports reading file types such as TXT, DOCX, PPTX, CSV, and PDF. diff --git a/lightrag/__init__.py b/lightrag/__init__.py index b73db1b9..6d9003ff 100644 --- a/lightrag/__init__.py +++ b/lightrag/__init__.py @@ -1,5 +1,5 @@ from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam -__version__ = "0.0.9" +__version__ = "1.0.0" __author__ = "Zirui Guo" __url__ = "https://github.com/HKUDS/LightRAG" diff --git a/lightrag/base.py b/lightrag/base.py index cecd5edd..bd472570 100644 --- a/lightrag/base.py +++ b/lightrag/base.py @@ -116,7 +116,7 @@ class BaseGraphStorage(StorageNameSpace): ): raise NotImplementedError - async def clustering(self, algorithm: str): + async def delete_node(self, node_id: str): raise NotImplementedError async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]: diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 3abe9185..b4e4886d 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -188,7 +188,6 @@ class LightRAG: return { "Neo4JStorage": Neo4JStorage, "NetworkXStorage": NetworkXStorage, - # "ArangoDBStorage": ArangoDBStorage } def insert(self, string_or_strings): @@ -328,3 +327,32 @@ class LightRAG: continue tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback()) await asyncio.gather(*tasks) + + def delete_by_entity(self, entity_name: str): + loop = always_get_an_event_loop() + return loop.run_until_complete(self.adelete_by_entity(entity_name)) + + async def adelete_by_entity(self, entity_name: str): + entity_name = f"\"{entity_name.upper()}\"" + + try: + await self.entities_vdb.delete_entity(entity_name) + await self.relationships_vdb.delete_relation(entity_name) + await self.chunk_entity_relation_graph.delete_node(entity_name) + + logger.info(f"Entity '{entity_name}' and its relationships have been deleted.") + await self._delete_by_entity_done() + except Exception as e: + logger.error(f"Error while deleting entity '{entity_name}': {e}") + + async def _delete_by_entity_done(self): + tasks = [] + for storage_inst in [ + self.entities_vdb, + self.relationships_vdb, + self.chunk_entity_relation_graph, + ]: + if storage_inst is None: + continue + tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback()) + await asyncio.gather(*tasks) \ No newline at end of file diff --git a/lightrag/storage.py b/lightrag/storage.py index 61bebf2d..080562df 100644 --- a/lightrag/storage.py +++ b/lightrag/storage.py @@ -7,7 +7,13 @@ import networkx as nx import numpy as np from nano_vectordb import NanoVectorDB -from .utils import load_json, logger, write_json +from .utils import ( + logger, + load_json, + write_json, + compute_mdhash_id, +) + from .base import ( BaseGraphStorage, BaseKVStorage, @@ -110,6 +116,37 @@ class NanoVectorDBStorage(BaseVectorStorage): {**dp, "id": dp["__id__"], "distance": dp["__metrics__"]} for dp in results ] return results + + @property + def client_storage(self): + return getattr(self._client, "_NanoVectorDB__storage") + + async def delete_entity(self, entity_name: str): + try: + entity_id = [compute_mdhash_id(entity_name, prefix="ent-")] + + if self._client.get(entity_id): + self._client.delete(entity_id) + logger.info(f"Entity {entity_name} have been deleted.") + else: + logger.info(f"No entity found with name {entity_name}.") + except Exception as e: + logger.error(f"Error while deleting entity {entity_name}: {e}") + + async def delete_relation(self, entity_name: str): + try: + relations = [ + dp for dp in self.client_storage["data"] if dp["src_id"] == entity_name or dp["tgt_id"] == entity_name + ] + ids_to_delete = [relation["__id__"] for relation in relations] + + if ids_to_delete: + self._client.delete(ids_to_delete) + logger.info(f"All relations related to entity {entity_name} have been deleted.") + else: + logger.info(f"No relations found for entity {entity_name}.") + except Exception as e: + logger.error(f"Error while deleting relations for entity {entity_name}: {e}") async def index_done_callback(self): self._client.save() @@ -228,6 +265,18 @@ class NetworkXStorage(BaseGraphStorage): ): self._graph.add_edge(source_node_id, target_node_id, **edge_data) + async def delete_node(self, node_id: str): + """ + Delete a node from the graph based on the specified node_id. + + :param node_id: The node_id to delete + """ + if self._graph.has_node(node_id): + self._graph.remove_node(node_id) + logger.info(f"Node {node_id} deleted from the graph.") + else: + logger.warning(f"Node {node_id} not found in the graph for deletion.") + async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]: if algorithm not in self._node_embed_algorithms: raise ValueError(f"Node embedding algorithm {algorithm} not supported")