From bfa0844ecb2f4cec3aaf4fbd431493909c8fbd7c Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Wed, 9 Jul 2025 15:17:05 +0800
Subject: [PATCH] Update README

---
 README-zh.md | 30 +++++++++++++++++++++---------
 README.md    | 30 +++++++++++++++++++++---------
 2 files changed, 42 insertions(+), 18 deletions(-)
diff --git a/README-zh.md b/README-zh.md
index e9599099..678d727b 100644
--- a/README-zh.md
+++ b/README-zh.md
@@ -824,7 +824,7 @@ rag = LightRAG(
   create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
   CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
   ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
-
+  
   -- 如有必要可以删除
   drop INDEX entity_p_idx;
   drop INDEX vertex_p_idx;
@@ -849,6 +849,18 @@ rag = LightRAG(
 
 </details>
 
+### LightRAG实例间的数据隔离
+
+通过 workspace 参数可以不同实现不同LightRAG实例之间的存储数据隔离。LightRAG在初始化后workspace就已经确定，之后修改workspace是无效的。下面是不同类型的存储实现工作空间的方式：
+
+- **对于本地基于文件的数据库，数据隔离通过工作空间子目录实现：** JsonKVStorage, JsonDocStatusStorage, NetworkXStorage, NanoVectorDBStorage, FaissVectorDBStorage。
+- **对于将数据存储在集合（collection）中的数据库，通过在集合名称前添加工作空间前缀来实现：** RedisKVStorage, RedisDocStatusStorage, MilvusVectorDBStorage, QdrantVectorDBStorage, MongoKVStorage, MongoDocStatusStorage, MongoVectorDBStorage, MongoGraphStorage, PGGraphStorage。
+- **对于关系型数据库，数据隔离通过向表中添加 `workspace` 字段进行数据的逻辑隔离：** PGKVStorage, PGVectorStorage, PGDocStatusStorage。
+
+* **对于Neo4j图数据库，通过label来实现数据的逻辑隔离**：Neo4JStorage
+
+为了保持对遗留数据的兼容，在未配置工作空间时PostgreSQL的默认工作空间为`default`，Neo4j的默认工作空间为`base`。对于所有的外部存储，系统都提供了专用的工作空间环境变量，用于覆盖公共的 `WORKSPACE`环境变量配置。这些适用于指定存储类型的工作空间环境变量为：`REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`。
+
 ## 编辑实体和关系
 
 LightRAG现在支持全面的知识图谱管理功能，允许您在知识图谱中创建、编辑和删除实体和关系。
@@ -1170,17 +1182,17 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
         from lightrag.llm.openai import openai_complete_if_cache, openai_embed
         from lightrag.utils import EmbeddingFunc
         import os
-
+    
         async def load_existing_lightrag():
             # 首先，创建或加载现有的 LightRAG 实例
             lightrag_working_dir = "./existing_lightrag_storage"
-
+    
             # 检查是否存在之前的 LightRAG 实例
             if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
                 print("✅ Found existing LightRAG instance, loading...")
             else:
                 print("❌ No existing LightRAG instance found, will create new one")
-
+    
             # 使用您的配置创建/加载 LightRAG 实例
             lightrag_instance = LightRAG(
                 working_dir=lightrag_working_dir,
@@ -1203,10 +1215,10 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
                     ),
                 )
             )
-
+    
             # 初始化存储（如果有现有数据，这将加载现有数据）
             await lightrag_instance.initialize_storages()
-
+    
             # 现在使用现有的 LightRAG 实例初始化 RAGAnything
             rag = RAGAnything(
                 lightrag=lightrag_instance,  # 传递现有的 LightRAG 实例
@@ -1235,20 +1247,20 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
                 )
                 # 注意：working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
             )
-
+    
             # 查询现有的知识库
             result = await rag.query_with_multimodal(
                 "What data has been processed in this LightRAG instance?",
                 mode="hybrid"
             )
             print("Query result:", result)
-
+    
             # 向现有的 LightRAG 实例添加新的多模态文档
             await rag.process_document_complete(
                 file_path="path/to/new/multimodal_document.pdf",
                 output_dir="./output"
             )
-
+    
         if __name__ == "__main__":
             asyncio.run(load_existing_lightrag())
     ```
diff --git a/README.md b/README.md
index 39353ef8..6650ada8 100644
--- a/README.md
+++ b/README.md
@@ -239,6 +239,7 @@ A full list of LightRAG init parameters:
 | **Parameter** | **Type** | **Explanation** | **Default** |
 |--------------|----------|-----------------|-------------|
 | **working_dir** | `str` | Directory where the cache will be stored | `lightrag_cache+timestamp` |
+| **workspace** | str | Workspace name for data isolation between different LightRAG Instances |  |
 | **kv_storage** | `str` | Storage type for documents and text chunks. Supported types: `JsonKVStorage`,`PGKVStorage`,`RedisKVStorage`,`MongoKVStorage` | `JsonKVStorage` |
 | **vector_storage** | `str` | Storage type for embedding vectors. Supported types: `NanoVectorDBStorage`,`PGVectorStorage`,`MilvusVectorDBStorage`,`ChromaVectorDBStorage`,`FaissVectorDBStorage`,`MongoVectorDBStorage`,`QdrantVectorDBStorage` | `NanoVectorDBStorage` |
 | **graph_storage** | `str` | Storage type for graph edges and nodes. Supported types: `NetworkXStorage`,`Neo4JStorage`,`PGGraphStorage`,`AGEStorage` | `NetworkXStorage` |
@@ -796,7 +797,7 @@ For production level scenarios you will most likely want to leverage an enterpri
   create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
   CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
   ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
-
+  
   -- drop if necessary
   drop INDEX entity_p_idx;
   drop INDEX vertex_p_idx;
@@ -895,6 +896,17 @@ async def initialize_rag():
 
 </details>
 
+### Data Isolation Between LightRAG Instances
+
+The `workspace` parameter ensures data isolation between different LightRAG instances. Once initialized, the `workspace` is immutable and cannot be changed.Here is how workspaces are implemented for different types of storage:
+
+- **For local file-based databases, data isolation is achieved through workspace subdirectories:** `JsonKVStorage`, `JsonDocStatusStorage`, `NetworkXStorage`, `NanoVectorDBStorage`, `FaissVectorDBStorage`.
+- **For databases that store data in collections, it's done by adding a workspace prefix to the collection name:** `RedisKVStorage`, `RedisDocStatusStorage`, `MilvusVectorDBStorage`, `QdrantVectorDBStorage`, `MongoKVStorage`, `MongoDocStatusStorage`, `MongoVectorDBStorage`, `MongoGraphStorage`, `PGGraphStorage`.
+- **For relational databases, data isolation is achieved by adding a `workspace` field to the tables for logical data separation:** `PGKVStorage`, `PGVectorStorage`, `PGDocStatusStorage`.
+- **For the Neo4j graph database, logical data isolation is achieved through labels:** `Neo4JStorage`
+
+To maintain compatibility with legacy data, the default workspace for PostgreSQL is `default` and for Neo4j is `base` when no workspace is configured. For all external storages, the system provides dedicated workspace environment variables to override the common `WORKSPACE` environment variable configuration. These storage-specific workspace environment variables are: `REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`.
+
 ## Edit Entities and Relations
 
 LightRAG now supports comprehensive knowledge graph management capabilities, allowing you to create, edit, and delete entities and relationships within your knowledge graph.
@@ -1219,17 +1231,17 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
         from lightrag.llm.openai import openai_complete_if_cache, openai_embed
         from lightrag.utils import EmbeddingFunc
         import os
-
+    
         async def load_existing_lightrag():
             # First, create or load an existing LightRAG instance
             lightrag_working_dir = "./existing_lightrag_storage"
-
+    
             # Check if previous LightRAG instance exists
             if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
                 print("✅ Found existing LightRAG instance, loading...")
             else:
                 print("❌ No existing LightRAG instance found, will create new one")
-
+    
             # Create/Load LightRAG instance with your configurations
             lightrag_instance = LightRAG(
                 working_dir=lightrag_working_dir,
@@ -1252,10 +1264,10 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
                     ),
                 )
             )
-
+    
             # Initialize storage (this will load existing data if available)
             await lightrag_instance.initialize_storages()
-
+    
             # Now initialize RAGAnything with the existing LightRAG instance
             rag = RAGAnything(
                 lightrag=lightrag_instance,  # Pass the existing LightRAG instance
@@ -1284,20 +1296,20 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
                 )
                 # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
             )
-
+    
             # Query the existing knowledge base
             result = await rag.query_with_multimodal(
                 "What data has been processed in this LightRAG instance?",
                 mode="hybrid"
             )
             print("Query result:", result)
-
+    
             # Add new multimodal documents to the existing LightRAG instance
             await rag.process_document_complete(
                 file_path="path/to/new/multimodal_document.pdf",
                 output_dir="./output"
             )
-
+    
         if __name__ == "__main__":
             asyncio.run(load_existing_lightrag())
     ```