From 2e2b9f3b4831bbb4f45b063f3a807cf523c98dc0 Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Sat, 5 Jul 2025 11:19:00 +0800
Subject: [PATCH] Refactor `setup.py` to utilize `pyproject.toml` for project
 installation.

---
 README-zh.md              |  20 +++----
 README.md                 |  20 +++----
 lightrag/kg/faiss_impl.py |   6 +--
 pyproject.toml            |  93 ++++++++++++++++++++++++++++++++
 setup.py                  | 108 ++------------------------------------
 5 files changed, 120 insertions(+), 127 deletions(-)
 create mode 100644 pyproject.toml
diff --git a/README-zh.md b/README-zh.md
index 917736a6..07c1375a 100644
--- a/README-zh.md
+++ b/README-zh.md
@@ -757,6 +757,8 @@ async def initialize_rag():
 
 <details>
 <summary> <b>使用Faiss进行存储</b> </summary>
+在使用Faiss向量数据库之前必须手工安装`faiss-cpu`或`faiss-gpu`。
+
 
 - 安装所需依赖：
 
@@ -818,7 +820,7 @@ rag = LightRAG(
   create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
   CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
   ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
-
+  
   -- 如有必要可以删除
   drop INDEX entity_p_idx;
   drop INDEX vertex_p_idx;
@@ -1164,17 +1166,17 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
         from lightrag.llm.openai import openai_complete_if_cache, openai_embed
         from lightrag.utils import EmbeddingFunc
         import os
-
+    
         async def load_existing_lightrag():
             # 首先，创建或加载现有的 LightRAG 实例
             lightrag_working_dir = "./existing_lightrag_storage"
-
+    
             # 检查是否存在之前的 LightRAG 实例
             if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
                 print("✅ Found existing LightRAG instance, loading...")
             else:
                 print("❌ No existing LightRAG instance found, will create new one")
-
+    
             # 使用您的配置创建/加载 LightRAG 实例
             lightrag_instance = LightRAG(
                 working_dir=lightrag_working_dir,
@@ -1197,10 +1199,10 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
                     ),
                 )
             )
-
+    
             # 初始化存储（如果有现有数据，这将加载现有数据）
             await lightrag_instance.initialize_storages()
-
+    
             # 现在使用现有的 LightRAG 实例初始化 RAGAnything
             rag = RAGAnything(
                 lightrag=lightrag_instance,  # 传递现有的 LightRAG 实例
@@ -1229,20 +1231,20 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
                 )
                 # 注意：working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
             )
-
+    
             # 查询现有的知识库
             result = await rag.query_with_multimodal(
                 "What data has been processed in this LightRAG instance?",
                 mode="hybrid"
             )
             print("Query result:", result)
-
+    
             # 向现有的 LightRAG 实例添加新的多模态文档
             await rag.process_document_complete(
                 file_path="path/to/new/multimodal_document.pdf",
                 output_dir="./output"
             )
-
+    
         if __name__ == "__main__":
             asyncio.run(load_existing_lightrag())
     ```
diff --git a/README.md b/README.md
index 617dc5e6..f6fffdf5 100644
--- a/README.md
+++ b/README.md
@@ -792,7 +792,7 @@ For production level scenarios you will most likely want to leverage an enterpri
   create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
   CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
   ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
-
+  
   -- drop if necessary
   drop INDEX entity_p_idx;
   drop INDEX vertex_p_idx;
@@ -819,6 +819,8 @@ For production level scenarios you will most likely want to leverage an enterpri
 
 <details>
 <summary> <b>Using Faiss for Storage</b> </summary>
+You must manually install faiss-cpu or faiss-gpu before using FAISS vector db.
+Manually install `faiss-cpu` or `faiss-gpu` before using FAISS vector db.
 
 - Install the required dependencies:
 
@@ -1178,17 +1180,17 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
         from lightrag.llm.openai import openai_complete_if_cache, openai_embed
         from lightrag.utils import EmbeddingFunc
         import os
-
+    
         async def load_existing_lightrag():
             # First, create or load an existing LightRAG instance
             lightrag_working_dir = "./existing_lightrag_storage"
-
+    
             # Check if previous LightRAG instance exists
             if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
                 print("✅ Found existing LightRAG instance, loading...")
             else:
                 print("❌ No existing LightRAG instance found, will create new one")
-
+    
             # Create/Load LightRAG instance with your configurations
             lightrag_instance = LightRAG(
                 working_dir=lightrag_working_dir,
@@ -1211,10 +1213,10 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
                     ),
                 )
             )
-
+    
             # Initialize storage (this will load existing data if available)
             await lightrag_instance.initialize_storages()
-
+    
             # Now initialize RAGAnything with the existing LightRAG instance
             rag = RAGAnything(
                 lightrag=lightrag_instance,  # Pass the existing LightRAG instance
@@ -1243,20 +1245,20 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
                 )
                 # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
             )
-
+    
             # Query the existing knowledge base
             result = await rag.query_with_multimodal(
                 "What data has been processed in this LightRAG instance?",
                 mode="hybrid"
             )
             print("Query result:", result)
-
+    
             # Add new multimodal documents to the existing LightRAG instance
             await rag.process_document_complete(
                 file_path="path/to/new/multimodal_document.pdf",
                 output_dir="./output"
             )
-
+    
         if __name__ == "__main__":
             asyncio.run(load_existing_lightrag())
     ```
diff --git a/lightrag/kg/faiss_impl.py b/lightrag/kg/faiss_impl.py
index fe188ffa..62f3663f 100644
--- a/lightrag/kg/faiss_impl.py
+++ b/lightrag/kg/faiss_impl.py
@@ -17,11 +17,7 @@ from .shared_storage import (
     set_all_update_flags,
 )
 
-USE_GPU = os.getenv("FAISS_USE_GPU", "0") == "1"
-FAISS_PACKAGE = "faiss-gpu" if USE_GPU else "faiss-cpu"
-if not pm.is_installed(FAISS_PACKAGE):
-    pm.install(FAISS_PACKAGE)
-
+# You must manually install faiss-cpu or faiss-gpu before using FAISS vector db
 import faiss  # type: ignore
 
 
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..b87df3bc
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,93 @@
+[build-system]
+requires = ["setuptools>=64", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "lightrag-hku"
+dynamic = ["version"]
+authors = [
+    {name = "Zirui Guo"}
+]
+description = "LightRAG: Simple and Fast Retrieval-Augmented Generation"
+readme = "README.md"
+license = {text = "MIT"}
+requires-python = ">=3.9"
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Intended Audience :: Developers",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+dependencies = [
+    "aiohttp",
+    "configparser",
+    "dotenv",
+    "future",
+    "numpy",
+    "pandas>=2.0.0",
+    "pipmaster",
+    "pydantic",
+    "python-dotenv",
+    "pyuca",
+    "setuptools",
+    "tenacity",
+    "tiktoken",
+    "xlsxwriter>=3.1.0",
+]
+
+[project.optional-dependencies]
+api = [
+    # Core dependencies
+    "aiohttp",
+    "configparser",
+    "dotenv",
+    "future",
+    "numpy",
+    "openai",
+    "pandas>=2.0.0",
+    "pipmaster",
+    "pydantic",
+    "python-dotenv",
+    "pyuca",
+    "setuptools",
+    "tenacity",
+    "tiktoken",
+    "xlsxwriter>=3.1.0",
+    # API-specific dependencies
+    "aiofiles",
+    "ascii_colors",
+    "asyncpg",
+    "distro",
+    "fastapi",
+    "httpcore",
+    "httpx",
+    "jiter",
+    "passlib[bcrypt]",
+    "PyJWT",
+    "python-jose[cryptography]",
+    "python-multipart",
+    "pytz",
+    "uvicorn",
+]
+
+[project.scripts]
+lightrag-server = "lightrag.api.lightrag_server:main"
+lightrag-gunicorn = "lightrag.api.run_with_gunicorn:main"
+
+[project.urls]
+Homepage = "https://github.com/HKUDS/LightRAG"
+Documentation = "https://github.com/HKUDS/LightRAG"
+Repository = "https://github.com/HKUDS/LightRAG"
+"Bug Tracker" = "https://github.com/HKUDS/LightRAG/issues"
+
+[tool.setuptools]
+packages = ["lightrag"]
+include-package-data = true
+
+[tool.setuptools.dynamic]
+version = {attr = "lightrag.__version__"}
+
+[tool.setuptools.package-data]
+lightrag = ["api/webui/**/*"]
diff --git a/setup.py b/setup.py
index 6051a55b..655e2e9e 100644
--- a/setup.py
+++ b/setup.py
@@ -1,106 +1,6 @@
-import setuptools
-from pathlib import Path
+# Minimal setup.py for backward compatibility
+# Primary configuration is now in pyproject.toml
 
+from setuptools import setup
 
-# Reading the long description from README.md
-def read_long_description():
-    try:
-        return Path("README.md").read_text(encoding="utf-8")
-    except FileNotFoundError:
-        return "A description of LightRAG is currently unavailable."
-
-
-# Retrieving metadata from __init__.py
-def retrieve_metadata():
-    vars2find = ["__author__", "__version__", "__url__"]
-    vars2readme = {}
-    try:
-        with open("./lightrag/__init__.py") as f:
-            for line in f.readlines():
-                for v in vars2find:
-                    if line.startswith(v):
-                        line = (
-                            line.replace(" ", "")
-                            .replace('"', "")
-                            .replace("'", "")
-                            .strip()
-                        )
-                        vars2readme[v] = line.split("=")[1]
-    except FileNotFoundError:
-        raise FileNotFoundError("Metadata file './lightrag/__init__.py' not found.")
-
-    # Checking if all required variables are found
-    missing_vars = [v for v in vars2find if v not in vars2readme]
-    if missing_vars:
-        raise ValueError(
-            f"Missing required metadata variables in __init__.py: {missing_vars}"
-        )
-
-    return vars2readme
-
-
-# Reading dependencies from requirements.txt
-def read_requirements(file_path="requirements.txt"):
-    deps = []
-    try:
-        with open(file_path) as f:
-            deps = [
-                line.strip() for line in f if line.strip() and not line.startswith("#")
-            ]
-    except FileNotFoundError:
-        print(f"Warning: '{file_path}' not found. No dependencies will be installed.")
-    return deps
-
-
-def read_api_requirements():
-    return read_requirements("lightrag/api/requirements.txt")
-
-
-def read_extra_requirements():
-    return read_requirements("lightrag/tools/lightrag_visualizer/requirements.txt")
-
-
-metadata = retrieve_metadata()
-long_description = read_long_description()
-requirements = read_requirements()
-
-setuptools.setup(
-    name="lightrag-hku",
-    url=metadata["__url__"],
-    version=metadata["__version__"],
-    author=metadata["__author__"],
-    description="LightRAG: Simple and Fast Retrieval-Augmented Generation",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    packages=setuptools.find_packages(
-        exclude=("tests*", "docs*")
-    ),  # Automatically find packages
-    classifiers=[
-        "Development Status :: 4 - Beta",
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: MIT License",
-        "Operating System :: OS Independent",
-        "Intended Audience :: Developers",
-        "Topic :: Software Development :: Libraries :: Python Modules",
-    ],
-    python_requires=">=3.9",
-    install_requires=requirements,
-    include_package_data=True,  # Includes non-code files from MANIFEST.in
-    project_urls={  # Additional project metadata
-        "Documentation": metadata.get("__url__", ""),
-        "Source": metadata.get("__url__", ""),
-        "Tracker": f"{metadata.get('__url__', '')}/issues"
-        if metadata.get("__url__")
-        else "",
-    },
-    extras_require={
-        "api": requirements + read_api_requirements(),
-        "tools": read_extra_requirements(),  # API requirements as optional
-    },
-    entry_points={
-        "console_scripts": [
-            "lightrag-server=lightrag.api.lightrag_server:main [api]",
-            "lightrag-gunicorn=lightrag.api.run_with_gunicorn:main [api]",
-        ],
-    },
-)
+setup()