Refactor setup.py to utilize pyproject.toml for project installation.

2026-01-08 04:39:55 +00:00 · 2025-07-05 11:19:00 +08:00 · 2025-07-05 11:19:00 +08:00 · 2e2b9f3b48
commit 2e2b9f3b48
parent 8cb89e35e1
5 changed files with 120 additions and 127 deletions
--- a/README-zh.md
+++ b/README-zh.md
@ -757,6 +757,8 @@ async def initialize_rag():

 <details>
 <summary> <b>使用Faiss进行存储</b> </summary>
+在使用Faiss向量数据库之前必须手工安装`faiss-cpu`或`faiss-gpu`。
+

 - 安装所需依赖：

@ -818,7 +820,7 @@ rag = LightRAG(
  create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
  CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
  ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
-
+  
  -- 如有必要可以删除
  drop INDEX entity_p_idx;
  drop INDEX vertex_p_idx;
@ -1164,17 +1166,17 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
        from lightrag.llm.openai import openai_complete_if_cache, openai_embed
        from lightrag.utils import EmbeddingFunc
        import os
-
+    
        async def load_existing_lightrag():
            # 首先，创建或加载现有的 LightRAG 实例
            lightrag_working_dir = "./existing_lightrag_storage"
-
+    
            # 检查是否存在之前的 LightRAG 实例
            if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
                print("✅ Found existing LightRAG instance, loading...")
            else:
                print("❌ No existing LightRAG instance found, will create new one")
-
+    
            # 使用您的配置创建/加载 LightRAG 实例
            lightrag_instance = LightRAG(
                working_dir=lightrag_working_dir,
@ -1197,10 +1199,10 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
                    ),
                )
            )
-
+    
            # 初始化存储（如果有现有数据，这将加载现有数据）
            await lightrag_instance.initialize_storages()
-
+    
            # 现在使用现有的 LightRAG 实例初始化 RAGAnything
            rag = RAGAnything(
                lightrag=lightrag_instance,  # 传递现有的 LightRAG 实例
@ -1229,20 +1231,20 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
                )
                # 注意：working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
            )
-
+    
            # 查询现有的知识库
            result = await rag.query_with_multimodal(
                "What data has been processed in this LightRAG instance?",
                mode="hybrid"
            )
            print("Query result:", result)
-
+    
            # 向现有的 LightRAG 实例添加新的多模态文档
            await rag.process_document_complete(
                file_path="path/to/new/multimodal_document.pdf",
                output_dir="./output"
            )
-
+    
        if __name__ == "__main__":
            asyncio.run(load_existing_lightrag())
    ```
--- a/README.md
+++ b/README.md
@ -792,7 +792,7 @@ For production level scenarios you will most likely want to leverage an enterpri
  create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
  CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
  ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
-
+  
  -- drop if necessary
  drop INDEX entity_p_idx;
  drop INDEX vertex_p_idx;
@ -819,6 +819,8 @@ For production level scenarios you will most likely want to leverage an enterpri

 <details>
 <summary> <b>Using Faiss for Storage</b> </summary>
+You must manually install faiss-cpu or faiss-gpu before using FAISS vector db.
+Manually install `faiss-cpu` or `faiss-gpu` before using FAISS vector db.

 - Install the required dependencies:

@ -1178,17 +1180,17 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
        from lightrag.llm.openai import openai_complete_if_cache, openai_embed
        from lightrag.utils import EmbeddingFunc
        import os
-
+    
        async def load_existing_lightrag():
            # First, create or load an existing LightRAG instance
            lightrag_working_dir = "./existing_lightrag_storage"
-
+    
            # Check if previous LightRAG instance exists
            if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
                print("✅ Found existing LightRAG instance, loading...")
            else:
                print("❌ No existing LightRAG instance found, will create new one")
-
+    
            # Create/Load LightRAG instance with your configurations
            lightrag_instance = LightRAG(
                working_dir=lightrag_working_dir,
@ -1211,10 +1213,10 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
                    ),
                )
            )
-
+    
            # Initialize storage (this will load existing data if available)
            await lightrag_instance.initialize_storages()
-
+    
            # Now initialize RAGAnything with the existing LightRAG instance
            rag = RAGAnything(
                lightrag=lightrag_instance,  # Pass the existing LightRAG instance
@ -1243,20 +1245,20 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
                )
                # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
            )
-
+    
            # Query the existing knowledge base
            result = await rag.query_with_multimodal(
                "What data has been processed in this LightRAG instance?",
                mode="hybrid"
            )
            print("Query result:", result)
-
+    
            # Add new multimodal documents to the existing LightRAG instance
            await rag.process_document_complete(
                file_path="path/to/new/multimodal_document.pdf",
                output_dir="./output"
            )
-
+    
        if __name__ == "__main__":
            asyncio.run(load_existing_lightrag())
    ```
--- a/lightrag/kg/faiss_impl.py
+++ b/lightrag/kg/faiss_impl.py
@ -17,11 +17,7 @@ from .shared_storage import (
    set_all_update_flags,
 )

-USE_GPU = os.getenv("FAISS_USE_GPU", "0") == "1"
-FAISS_PACKAGE = "faiss-gpu" if USE_GPU else "faiss-cpu"
-if not pm.is_installed(FAISS_PACKAGE):
-    pm.install(FAISS_PACKAGE)
-
+# You must manually install faiss-cpu or faiss-gpu before using FAISS vector db
 import faiss  # type: ignore


--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,93 @@
+[build-system]
+requires = ["setuptools>=64", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "lightrag-hku"
+dynamic = ["version"]
+authors = [
+    {name = "Zirui Guo"}
+]
+description = "LightRAG: Simple and Fast Retrieval-Augmented Generation"
+readme = "README.md"
+license = {text = "MIT"}
+requires-python = ">=3.9"
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Intended Audience :: Developers",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+dependencies = [
+    "aiohttp",
+    "configparser",
+    "dotenv",
+    "future",
+    "numpy",
+    "pandas>=2.0.0",
+    "pipmaster",
+    "pydantic",
+    "python-dotenv",
+    "pyuca",
+    "setuptools",
+    "tenacity",
+    "tiktoken",
+    "xlsxwriter>=3.1.0",
+]
+
+[project.optional-dependencies]
+api = [
+    # Core dependencies
+    "aiohttp",
+    "configparser",
+    "dotenv",
+    "future",
+    "numpy",
+    "openai",
+    "pandas>=2.0.0",
+    "pipmaster",
+    "pydantic",
+    "python-dotenv",
+    "pyuca",
+    "setuptools",
+    "tenacity",
+    "tiktoken",
+    "xlsxwriter>=3.1.0",
+    # API-specific dependencies
+    "aiofiles",
+    "ascii_colors",
+    "asyncpg",
+    "distro",
+    "fastapi",
+    "httpcore",
+    "httpx",
+    "jiter",
+    "passlib[bcrypt]",
+    "PyJWT",
+    "python-jose[cryptography]",
+    "python-multipart",
+    "pytz",
+    "uvicorn",
+]
+
+[project.scripts]
+lightrag-server = "lightrag.api.lightrag_server:main"
+lightrag-gunicorn = "lightrag.api.run_with_gunicorn:main"
+
+[project.urls]
+Homepage = "https://github.com/HKUDS/LightRAG"
+Documentation = "https://github.com/HKUDS/LightRAG"
+Repository = "https://github.com/HKUDS/LightRAG"
+"Bug Tracker" = "https://github.com/HKUDS/LightRAG/issues"
+
+[tool.setuptools]
+packages = ["lightrag"]
+include-package-data = true
+
+[tool.setuptools.dynamic]
+version = {attr = "lightrag.__version__"}
+
+[tool.setuptools.package-data]
+lightrag = ["api/webui/**/*"]
--- a/setup.py
+++ b/setup.py
@ -1,106 +1,6 @@
-import setuptools
-from pathlib import Path
+# Minimal setup.py for backward compatibility
+# Primary configuration is now in pyproject.toml

+from setuptools import setup

-# Reading the long description from README.md
-def read_long_description():
-    try:
-        return Path("README.md").read_text(encoding="utf-8")
-    except FileNotFoundError:
-        return "A description of LightRAG is currently unavailable."
-
-
-# Retrieving metadata from __init__.py
-def retrieve_metadata():
-    vars2find = ["__author__", "__version__", "__url__"]
-    vars2readme = {}
-    try:
-        with open("./lightrag/__init__.py") as f:
-            for line in f.readlines():
-                for v in vars2find:
-                    if line.startswith(v):
-                        line = (
-                            line.replace(" ", "")
-                            .replace('"', "")
-                            .replace("'", "")
-                            .strip()
-                        )
-                        vars2readme[v] = line.split("=")[1]
-    except FileNotFoundError:
-        raise FileNotFoundError("Metadata file './lightrag/__init__.py' not found.")
-
-    # Checking if all required variables are found
-    missing_vars = [v for v in vars2find if v not in vars2readme]
-    if missing_vars:
-        raise ValueError(
-            f"Missing required metadata variables in __init__.py: {missing_vars}"
-        )
-
-    return vars2readme
-
-
-# Reading dependencies from requirements.txt
-def read_requirements(file_path="requirements.txt"):
-    deps = []
-    try:
-        with open(file_path) as f:
-            deps = [
-                line.strip() for line in f if line.strip() and not line.startswith("#")
-            ]
-    except FileNotFoundError:
-        print(f"Warning: '{file_path}' not found. No dependencies will be installed.")
-    return deps
-
-
-def read_api_requirements():
-    return read_requirements("lightrag/api/requirements.txt")
-
-
-def read_extra_requirements():
-    return read_requirements("lightrag/tools/lightrag_visualizer/requirements.txt")
-
-
-metadata = retrieve_metadata()
-long_description = read_long_description()
-requirements = read_requirements()
-
-setuptools.setup(
-    name="lightrag-hku",
-    url=metadata["__url__"],
-    version=metadata["__version__"],
-    author=metadata["__author__"],
-    description="LightRAG: Simple and Fast Retrieval-Augmented Generation",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    packages=setuptools.find_packages(
-        exclude=("tests*", "docs*")
-    ),  # Automatically find packages
-    classifiers=[
-        "Development Status :: 4 - Beta",
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: MIT License",
-        "Operating System :: OS Independent",
-        "Intended Audience :: Developers",
-        "Topic :: Software Development :: Libraries :: Python Modules",
-    ],
-    python_requires=">=3.9",
-    install_requires=requirements,
-    include_package_data=True,  # Includes non-code files from MANIFEST.in
-    project_urls={  # Additional project metadata
-        "Documentation": metadata.get("__url__", ""),
-        "Source": metadata.get("__url__", ""),
-        "Tracker": f"{metadata.get('__url__', '')}/issues"
-        if metadata.get("__url__")
-        else "",
-    },
-    extras_require={
-        "api": requirements + read_api_requirements(),
-        "tools": read_extra_requirements(),  # API requirements as optional
-    },
-    entry_points={
-        "console_scripts": [
-            "lightrag-server=lightrag.api.lightrag_server:main [api]",
-            "lightrag-gunicorn=lightrag.api.run_with_gunicorn:main [api]",
-        ],
-    },
-)
+setup()