From e0bb6093b03b632fcee6f25eb27766be9799bf7a Mon Sep 17 00:00:00 2001 From: xuewei <728857235@qq.com> Date: Wed, 16 Jul 2025 21:59:17 +0800 Subject: [PATCH 1/5] Fix Milvus DataNotMatchException --- lightrag/lightrag.py | 4 ++-- lightrag/operate.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 092e06eb..0d072e61 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -457,13 +457,13 @@ class LightRAG: namespace=NameSpace.VECTOR_STORE_ENTITIES, workspace=self.workspace, embedding_func=self.embedding_func, - meta_fields={"entity_name", "source_id", "content", "file_path"}, + meta_fields={"entity_name", "source_id", "content", "file_path", "entity_type"}, ) self.relationships_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore namespace=NameSpace.VECTOR_STORE_RELATIONSHIPS, workspace=self.workspace, embedding_func=self.embedding_func, - meta_fields={"src_id", "tgt_id", "source_id", "content", "file_path"}, + meta_fields={"src_id", "tgt_id", "source_id", "content", "file_path", "weight"}, ) self.chunks_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore namespace=NameSpace.VECTOR_STORE_CHUNKS, diff --git a/lightrag/operate.py b/lightrag/operate.py index e3456d92..1eac8357 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -1284,6 +1284,7 @@ async def merge_nodes_and_edges( "content": f"{edge_data['src_id']}\t{edge_data['tgt_id']}\n{edge_data['keywords']}\n{edge_data['description']}", "source_id": edge_data["source_id"], "file_path": edge_data.get("file_path", "unknown_source"), + "weight": 0 } } await relationships_vdb.upsert(data_for_vdb) From 77a42a37cab7e596338323656d376853bfa8457f Mon Sep 17 00:00:00 2001 From: xuewei <728857235@qq.com> Date: Wed, 16 Jul 2025 22:07:39 +0800 Subject: [PATCH 2/5] Fix Milvus DataNotMatchException lint-and-format --- lightrag/operate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 1eac8357..de29cf11 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -1284,7 +1284,7 @@ async def merge_nodes_and_edges( "content": f"{edge_data['src_id']}\t{edge_data['tgt_id']}\n{edge_data['keywords']}\n{edge_data['description']}", "source_id": edge_data["source_id"], "file_path": edge_data.get("file_path", "unknown_source"), - "weight": 0 + "weight": 0, } } await relationships_vdb.upsert(data_for_vdb) From a896e789694a2598321528b0f333b571aed1d258 Mon Sep 17 00:00:00 2001 From: okxuewei <48642811+okxuewei@users.noreply.github.com> Date: Thu, 17 Jul 2025 12:06:49 +0800 Subject: [PATCH 3/5] Remove "entity_type" and "weight" from the schema --- lightrag/kg/milvus_impl.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/lightrag/kg/milvus_impl.py b/lightrag/kg/milvus_impl.py index faf0a485..eed87289 100644 --- a/lightrag/kg/milvus_impl.py +++ b/lightrag/kg/milvus_impl.py @@ -44,12 +44,6 @@ class MilvusVectorDBStorage(BaseVectorStorage): max_length=512, nullable=True, ), - FieldSchema( - name="entity_type", - dtype=DataType.VARCHAR, - max_length=128, - nullable=True, - ), FieldSchema( name="file_path", dtype=DataType.VARCHAR, @@ -67,7 +61,6 @@ class MilvusVectorDBStorage(BaseVectorStorage): FieldSchema( name="tgt_id", dtype=DataType.VARCHAR, max_length=512, nullable=True ), - FieldSchema(name="weight", dtype=DataType.DOUBLE, nullable=True), FieldSchema( name="file_path", dtype=DataType.VARCHAR, From 14d9fe49b0b785e3ff3ecf4c27f7e1825c628c39 Mon Sep 17 00:00:00 2001 From: yangdx Date: Thu, 17 Jul 2025 12:08:35 +0800 Subject: [PATCH 4/5] refactor(milvus): remove entity_type and weight fields from schema - Remove entity_type field from entities collections - Remove weight field from relationships collections - Update schema definitions and index creation logic - Maintain backward compatibility with existing data via dynamic fields --- lightrag/kg/milvus_impl.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/lightrag/kg/milvus_impl.py b/lightrag/kg/milvus_impl.py index faf0a485..8a13fd21 100644 --- a/lightrag/kg/milvus_impl.py +++ b/lightrag/kg/milvus_impl.py @@ -44,12 +44,6 @@ class MilvusVectorDBStorage(BaseVectorStorage): max_length=512, nullable=True, ), - FieldSchema( - name="entity_type", - dtype=DataType.VARCHAR, - max_length=128, - nullable=True, - ), FieldSchema( name="file_path", dtype=DataType.VARCHAR, @@ -67,7 +61,6 @@ class MilvusVectorDBStorage(BaseVectorStorage): FieldSchema( name="tgt_id", dtype=DataType.VARCHAR, max_length=512, nullable=True ), - FieldSchema(name="weight", dtype=DataType.DOUBLE, nullable=True), FieldSchema( name="file_path", dtype=DataType.VARCHAR, @@ -227,19 +220,6 @@ class MilvusVectorDBStorage(BaseVectorStorage): logger.debug(f"IndexParams method failed for entity_name: {e}") self._create_scalar_index_fallback("entity_name", "INVERTED") - try: - entity_type_index = self._get_index_params() - entity_type_index.add_index( - field_name="entity_type", index_type="INVERTED" - ) - self._client.create_index( - collection_name=self.namespace, - index_params=entity_type_index, - ) - except Exception as e: - logger.debug(f"IndexParams method failed for entity_type: {e}") - self._create_scalar_index_fallback("entity_type", "INVERTED") - elif "relationships" in self.namespace.lower(): # Create indexes for relationship fields try: @@ -294,7 +274,6 @@ class MilvusVectorDBStorage(BaseVectorStorage): # Create scalar indexes using fallback if "entities" in self.namespace.lower(): self._create_scalar_index_fallback("entity_name", "INVERTED") - self._create_scalar_index_fallback("entity_type", "INVERTED") elif "relationships" in self.namespace.lower(): self._create_scalar_index_fallback("src_id", "INVERTED") self._create_scalar_index_fallback("tgt_id", "INVERTED") @@ -320,14 +299,12 @@ class MilvusVectorDBStorage(BaseVectorStorage): if "entities" in self.namespace.lower(): specific_fields = { "entity_name": {"type": "VarChar"}, - "entity_type": {"type": "VarChar"}, "file_path": {"type": "VarChar"}, } elif "relationships" in self.namespace.lower(): specific_fields = { "src_id": {"type": "VarChar"}, "tgt_id": {"type": "VarChar"}, - "weight": {"type": "Double"}, "file_path": {"type": "VarChar"}, } elif "chunks" in self.namespace.lower(): From e6a7dc94f31e7641f26d2b25ee98b8697ce9f6de Mon Sep 17 00:00:00 2001 From: okxuewei <48642811+okxuewei@users.noreply.github.com> Date: Thu, 17 Jul 2025 12:09:16 +0800 Subject: [PATCH 5/5] Rollback the update --- lightrag/lightrag.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 0d072e61..092e06eb 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -457,13 +457,13 @@ class LightRAG: namespace=NameSpace.VECTOR_STORE_ENTITIES, workspace=self.workspace, embedding_func=self.embedding_func, - meta_fields={"entity_name", "source_id", "content", "file_path", "entity_type"}, + meta_fields={"entity_name", "source_id", "content", "file_path"}, ) self.relationships_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore namespace=NameSpace.VECTOR_STORE_RELATIONSHIPS, workspace=self.workspace, embedding_func=self.embedding_func, - meta_fields={"src_id", "tgt_id", "source_id", "content", "file_path", "weight"}, + meta_fields={"src_id", "tgt_id", "source_id", "content", "file_path"}, ) self.chunks_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore namespace=NameSpace.VECTOR_STORE_CHUNKS,