From 2e2b9f3b4831bbb4f45b063f3a807cf523c98dc0 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sat, 5 Jul 2025 11:19:00 +0800 Subject: [PATCH] Refactor `setup.py` to utilize `pyproject.toml` for project installation. --- README-zh.md | 20 +++---- README.md | 20 +++---- lightrag/kg/faiss_impl.py | 6 +-- pyproject.toml | 93 ++++++++++++++++++++++++++++++++ setup.py | 108 ++------------------------------------ 5 files changed, 120 insertions(+), 127 deletions(-) create mode 100644 pyproject.toml diff --git a/README-zh.md b/README-zh.md index 917736a6..07c1375a 100644 --- a/README-zh.md +++ b/README-zh.md @@ -757,6 +757,8 @@ async def initialize_rag():
使用Faiss进行存储 +在使用Faiss向量数据库之前必须手工安装`faiss-cpu`或`faiss-gpu`。 + - 安装所需依赖: @@ -818,7 +820,7 @@ rag = LightRAG( create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype)); CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties); ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx; - + -- 如有必要可以删除 drop INDEX entity_p_idx; drop INDEX vertex_p_idx; @@ -1164,17 +1166,17 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现 from lightrag.llm.openai import openai_complete_if_cache, openai_embed from lightrag.utils import EmbeddingFunc import os - + async def load_existing_lightrag(): # 首先,创建或加载现有的 LightRAG 实例 lightrag_working_dir = "./existing_lightrag_storage" - + # 检查是否存在之前的 LightRAG 实例 if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir): print("✅ Found existing LightRAG instance, loading...") else: print("❌ No existing LightRAG instance found, will create new one") - + # 使用您的配置创建/加载 LightRAG 实例 lightrag_instance = LightRAG( working_dir=lightrag_working_dir, @@ -1197,10 +1199,10 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现 ), ) ) - + # 初始化存储(如果有现有数据,这将加载现有数据) await lightrag_instance.initialize_storages() - + # 现在使用现有的 LightRAG 实例初始化 RAGAnything rag = RAGAnything( lightrag=lightrag_instance, # 传递现有的 LightRAG 实例 @@ -1229,20 +1231,20 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现 ) # 注意:working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承 ) - + # 查询现有的知识库 result = await rag.query_with_multimodal( "What data has been processed in this LightRAG instance?", mode="hybrid" ) print("Query result:", result) - + # 向现有的 LightRAG 实例添加新的多模态文档 await rag.process_document_complete( file_path="path/to/new/multimodal_document.pdf", output_dir="./output" ) - + if __name__ == "__main__": asyncio.run(load_existing_lightrag()) ``` diff --git a/README.md b/README.md index 617dc5e6..f6fffdf5 100644 --- a/README.md +++ b/README.md @@ -792,7 +792,7 @@ For production level scenarios you will most likely want to leverage an enterpri create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype)); CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties); ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx; - + -- drop if necessary drop INDEX entity_p_idx; drop INDEX vertex_p_idx; @@ -819,6 +819,8 @@ For production level scenarios you will most likely want to leverage an enterpri
Using Faiss for Storage +You must manually install faiss-cpu or faiss-gpu before using FAISS vector db. +Manually install `faiss-cpu` or `faiss-gpu` before using FAISS vector db. - Install the required dependencies: @@ -1178,17 +1180,17 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/ from lightrag.llm.openai import openai_complete_if_cache, openai_embed from lightrag.utils import EmbeddingFunc import os - + async def load_existing_lightrag(): # First, create or load an existing LightRAG instance lightrag_working_dir = "./existing_lightrag_storage" - + # Check if previous LightRAG instance exists if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir): print("✅ Found existing LightRAG instance, loading...") else: print("❌ No existing LightRAG instance found, will create new one") - + # Create/Load LightRAG instance with your configurations lightrag_instance = LightRAG( working_dir=lightrag_working_dir, @@ -1211,10 +1213,10 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/ ), ) ) - + # Initialize storage (this will load existing data if available) await lightrag_instance.initialize_storages() - + # Now initialize RAGAnything with the existing LightRAG instance rag = RAGAnything( lightrag=lightrag_instance, # Pass the existing LightRAG instance @@ -1243,20 +1245,20 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/ ) # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance ) - + # Query the existing knowledge base result = await rag.query_with_multimodal( "What data has been processed in this LightRAG instance?", mode="hybrid" ) print("Query result:", result) - + # Add new multimodal documents to the existing LightRAG instance await rag.process_document_complete( file_path="path/to/new/multimodal_document.pdf", output_dir="./output" ) - + if __name__ == "__main__": asyncio.run(load_existing_lightrag()) ``` diff --git a/lightrag/kg/faiss_impl.py b/lightrag/kg/faiss_impl.py index fe188ffa..62f3663f 100644 --- a/lightrag/kg/faiss_impl.py +++ b/lightrag/kg/faiss_impl.py @@ -17,11 +17,7 @@ from .shared_storage import ( set_all_update_flags, ) -USE_GPU = os.getenv("FAISS_USE_GPU", "0") == "1" -FAISS_PACKAGE = "faiss-gpu" if USE_GPU else "faiss-cpu" -if not pm.is_installed(FAISS_PACKAGE): - pm.install(FAISS_PACKAGE) - +# You must manually install faiss-cpu or faiss-gpu before using FAISS vector db import faiss # type: ignore diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..b87df3bc --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,93 @@ +[build-system] +requires = ["setuptools>=64", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "lightrag-hku" +dynamic = ["version"] +authors = [ + {name = "Zirui Guo"} +] +description = "LightRAG: Simple and Fast Retrieval-Augmented Generation" +readme = "README.md" +license = {text = "MIT"} +requires-python = ">=3.9" +classifiers = [ + "Development Status :: 4 - Beta", + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Intended Audience :: Developers", + "Topic :: Software Development :: Libraries :: Python Modules", +] +dependencies = [ + "aiohttp", + "configparser", + "dotenv", + "future", + "numpy", + "pandas>=2.0.0", + "pipmaster", + "pydantic", + "python-dotenv", + "pyuca", + "setuptools", + "tenacity", + "tiktoken", + "xlsxwriter>=3.1.0", +] + +[project.optional-dependencies] +api = [ + # Core dependencies + "aiohttp", + "configparser", + "dotenv", + "future", + "numpy", + "openai", + "pandas>=2.0.0", + "pipmaster", + "pydantic", + "python-dotenv", + "pyuca", + "setuptools", + "tenacity", + "tiktoken", + "xlsxwriter>=3.1.0", + # API-specific dependencies + "aiofiles", + "ascii_colors", + "asyncpg", + "distro", + "fastapi", + "httpcore", + "httpx", + "jiter", + "passlib[bcrypt]", + "PyJWT", + "python-jose[cryptography]", + "python-multipart", + "pytz", + "uvicorn", +] + +[project.scripts] +lightrag-server = "lightrag.api.lightrag_server:main" +lightrag-gunicorn = "lightrag.api.run_with_gunicorn:main" + +[project.urls] +Homepage = "https://github.com/HKUDS/LightRAG" +Documentation = "https://github.com/HKUDS/LightRAG" +Repository = "https://github.com/HKUDS/LightRAG" +"Bug Tracker" = "https://github.com/HKUDS/LightRAG/issues" + +[tool.setuptools] +packages = ["lightrag"] +include-package-data = true + +[tool.setuptools.dynamic] +version = {attr = "lightrag.__version__"} + +[tool.setuptools.package-data] +lightrag = ["api/webui/**/*"] diff --git a/setup.py b/setup.py index 6051a55b..655e2e9e 100644 --- a/setup.py +++ b/setup.py @@ -1,106 +1,6 @@ -import setuptools -from pathlib import Path +# Minimal setup.py for backward compatibility +# Primary configuration is now in pyproject.toml +from setuptools import setup -# Reading the long description from README.md -def read_long_description(): - try: - return Path("README.md").read_text(encoding="utf-8") - except FileNotFoundError: - return "A description of LightRAG is currently unavailable." - - -# Retrieving metadata from __init__.py -def retrieve_metadata(): - vars2find = ["__author__", "__version__", "__url__"] - vars2readme = {} - try: - with open("./lightrag/__init__.py") as f: - for line in f.readlines(): - for v in vars2find: - if line.startswith(v): - line = ( - line.replace(" ", "") - .replace('"', "") - .replace("'", "") - .strip() - ) - vars2readme[v] = line.split("=")[1] - except FileNotFoundError: - raise FileNotFoundError("Metadata file './lightrag/__init__.py' not found.") - - # Checking if all required variables are found - missing_vars = [v for v in vars2find if v not in vars2readme] - if missing_vars: - raise ValueError( - f"Missing required metadata variables in __init__.py: {missing_vars}" - ) - - return vars2readme - - -# Reading dependencies from requirements.txt -def read_requirements(file_path="requirements.txt"): - deps = [] - try: - with open(file_path) as f: - deps = [ - line.strip() for line in f if line.strip() and not line.startswith("#") - ] - except FileNotFoundError: - print(f"Warning: '{file_path}' not found. No dependencies will be installed.") - return deps - - -def read_api_requirements(): - return read_requirements("lightrag/api/requirements.txt") - - -def read_extra_requirements(): - return read_requirements("lightrag/tools/lightrag_visualizer/requirements.txt") - - -metadata = retrieve_metadata() -long_description = read_long_description() -requirements = read_requirements() - -setuptools.setup( - name="lightrag-hku", - url=metadata["__url__"], - version=metadata["__version__"], - author=metadata["__author__"], - description="LightRAG: Simple and Fast Retrieval-Augmented Generation", - long_description=long_description, - long_description_content_type="text/markdown", - packages=setuptools.find_packages( - exclude=("tests*", "docs*") - ), # Automatically find packages - classifiers=[ - "Development Status :: 4 - Beta", - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - "Intended Audience :: Developers", - "Topic :: Software Development :: Libraries :: Python Modules", - ], - python_requires=">=3.9", - install_requires=requirements, - include_package_data=True, # Includes non-code files from MANIFEST.in - project_urls={ # Additional project metadata - "Documentation": metadata.get("__url__", ""), - "Source": metadata.get("__url__", ""), - "Tracker": f"{metadata.get('__url__', '')}/issues" - if metadata.get("__url__") - else "", - }, - extras_require={ - "api": requirements + read_api_requirements(), - "tools": read_extra_requirements(), # API requirements as optional - }, - entry_points={ - "console_scripts": [ - "lightrag-server=lightrag.api.lightrag_server:main [api]", - "lightrag-gunicorn=lightrag.api.run_with_gunicorn:main [api]", - ], - }, -) +setup()