Refactor setup.py to utilize pyproject.toml for project installation.

This commit is contained in:
yangdx 2025-07-05 11:19:00 +08:00
parent 8cb89e35e1
commit 2e2b9f3b48
5 changed files with 120 additions and 127 deletions

View File

@ -757,6 +757,8 @@ async def initialize_rag():
<details>
<summary> <b>使用Faiss进行存储</b> </summary>
在使用Faiss向量数据库之前必须手工安装`faiss-cpu``faiss-gpu`
- 安装所需依赖:
@ -818,7 +820,7 @@ rag = LightRAG(
create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
-- 如有必要可以删除
drop INDEX entity_p_idx;
drop INDEX vertex_p_idx;
@ -1164,17 +1166,17 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc
import os
async def load_existing_lightrag():
# 首先,创建或加载现有的 LightRAG 实例
lightrag_working_dir = "./existing_lightrag_storage"
# 检查是否存在之前的 LightRAG 实例
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
print("✅ Found existing LightRAG instance, loading...")
else:
print("❌ No existing LightRAG instance found, will create new one")
# 使用您的配置创建/加载 LightRAG 实例
lightrag_instance = LightRAG(
working_dir=lightrag_working_dir,
@ -1197,10 +1199,10 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
),
)
)
# 初始化存储(如果有现有数据,这将加载现有数据)
await lightrag_instance.initialize_storages()
# 现在使用现有的 LightRAG 实例初始化 RAGAnything
rag = RAGAnything(
lightrag=lightrag_instance, # 传递现有的 LightRAG 实例
@ -1229,20 +1231,20 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
)
# 注意working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
)
# 查询现有的知识库
result = await rag.query_with_multimodal(
"What data has been processed in this LightRAG instance?",
mode="hybrid"
)
print("Query result:", result)
# 向现有的 LightRAG 实例添加新的多模态文档
await rag.process_document_complete(
file_path="path/to/new/multimodal_document.pdf",
output_dir="./output"
)
if __name__ == "__main__":
asyncio.run(load_existing_lightrag())
```

View File

@ -792,7 +792,7 @@ For production level scenarios you will most likely want to leverage an enterpri
create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
-- drop if necessary
drop INDEX entity_p_idx;
drop INDEX vertex_p_idx;
@ -819,6 +819,8 @@ For production level scenarios you will most likely want to leverage an enterpri
<details>
<summary> <b>Using Faiss for Storage</b> </summary>
You must manually install faiss-cpu or faiss-gpu before using FAISS vector db.
Manually install `faiss-cpu` or `faiss-gpu` before using FAISS vector db.
- Install the required dependencies:
@ -1178,17 +1180,17 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc
import os
async def load_existing_lightrag():
# First, create or load an existing LightRAG instance
lightrag_working_dir = "./existing_lightrag_storage"
# Check if previous LightRAG instance exists
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
print("✅ Found existing LightRAG instance, loading...")
else:
print("❌ No existing LightRAG instance found, will create new one")
# Create/Load LightRAG instance with your configurations
lightrag_instance = LightRAG(
working_dir=lightrag_working_dir,
@ -1211,10 +1213,10 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
),
)
)
# Initialize storage (this will load existing data if available)
await lightrag_instance.initialize_storages()
# Now initialize RAGAnything with the existing LightRAG instance
rag = RAGAnything(
lightrag=lightrag_instance, # Pass the existing LightRAG instance
@ -1243,20 +1245,20 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
)
# Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
)
# Query the existing knowledge base
result = await rag.query_with_multimodal(
"What data has been processed in this LightRAG instance?",
mode="hybrid"
)
print("Query result:", result)
# Add new multimodal documents to the existing LightRAG instance
await rag.process_document_complete(
file_path="path/to/new/multimodal_document.pdf",
output_dir="./output"
)
if __name__ == "__main__":
asyncio.run(load_existing_lightrag())
```

View File

@ -17,11 +17,7 @@ from .shared_storage import (
set_all_update_flags,
)
USE_GPU = os.getenv("FAISS_USE_GPU", "0") == "1"
FAISS_PACKAGE = "faiss-gpu" if USE_GPU else "faiss-cpu"
if not pm.is_installed(FAISS_PACKAGE):
pm.install(FAISS_PACKAGE)
# You must manually install faiss-cpu or faiss-gpu before using FAISS vector db
import faiss # type: ignore

93
pyproject.toml Normal file
View File

@ -0,0 +1,93 @@
[build-system]
requires = ["setuptools>=64", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "lightrag-hku"
dynamic = ["version"]
authors = [
{name = "Zirui Guo"}
]
description = "LightRAG: Simple and Fast Retrieval-Augmented Generation"
readme = "README.md"
license = {text = "MIT"}
requires-python = ">=3.9"
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Intended Audience :: Developers",
"Topic :: Software Development :: Libraries :: Python Modules",
]
dependencies = [
"aiohttp",
"configparser",
"dotenv",
"future",
"numpy",
"pandas>=2.0.0",
"pipmaster",
"pydantic",
"python-dotenv",
"pyuca",
"setuptools",
"tenacity",
"tiktoken",
"xlsxwriter>=3.1.0",
]
[project.optional-dependencies]
api = [
# Core dependencies
"aiohttp",
"configparser",
"dotenv",
"future",
"numpy",
"openai",
"pandas>=2.0.0",
"pipmaster",
"pydantic",
"python-dotenv",
"pyuca",
"setuptools",
"tenacity",
"tiktoken",
"xlsxwriter>=3.1.0",
# API-specific dependencies
"aiofiles",
"ascii_colors",
"asyncpg",
"distro",
"fastapi",
"httpcore",
"httpx",
"jiter",
"passlib[bcrypt]",
"PyJWT",
"python-jose[cryptography]",
"python-multipart",
"pytz",
"uvicorn",
]
[project.scripts]
lightrag-server = "lightrag.api.lightrag_server:main"
lightrag-gunicorn = "lightrag.api.run_with_gunicorn:main"
[project.urls]
Homepage = "https://github.com/HKUDS/LightRAG"
Documentation = "https://github.com/HKUDS/LightRAG"
Repository = "https://github.com/HKUDS/LightRAG"
"Bug Tracker" = "https://github.com/HKUDS/LightRAG/issues"
[tool.setuptools]
packages = ["lightrag"]
include-package-data = true
[tool.setuptools.dynamic]
version = {attr = "lightrag.__version__"}
[tool.setuptools.package-data]
lightrag = ["api/webui/**/*"]

108
setup.py
View File

@ -1,106 +1,6 @@
import setuptools
from pathlib import Path
# Minimal setup.py for backward compatibility
# Primary configuration is now in pyproject.toml
from setuptools import setup
# Reading the long description from README.md
def read_long_description():
try:
return Path("README.md").read_text(encoding="utf-8")
except FileNotFoundError:
return "A description of LightRAG is currently unavailable."
# Retrieving metadata from __init__.py
def retrieve_metadata():
vars2find = ["__author__", "__version__", "__url__"]
vars2readme = {}
try:
with open("./lightrag/__init__.py") as f:
for line in f.readlines():
for v in vars2find:
if line.startswith(v):
line = (
line.replace(" ", "")
.replace('"', "")
.replace("'", "")
.strip()
)
vars2readme[v] = line.split("=")[1]
except FileNotFoundError:
raise FileNotFoundError("Metadata file './lightrag/__init__.py' not found.")
# Checking if all required variables are found
missing_vars = [v for v in vars2find if v not in vars2readme]
if missing_vars:
raise ValueError(
f"Missing required metadata variables in __init__.py: {missing_vars}"
)
return vars2readme
# Reading dependencies from requirements.txt
def read_requirements(file_path="requirements.txt"):
deps = []
try:
with open(file_path) as f:
deps = [
line.strip() for line in f if line.strip() and not line.startswith("#")
]
except FileNotFoundError:
print(f"Warning: '{file_path}' not found. No dependencies will be installed.")
return deps
def read_api_requirements():
return read_requirements("lightrag/api/requirements.txt")
def read_extra_requirements():
return read_requirements("lightrag/tools/lightrag_visualizer/requirements.txt")
metadata = retrieve_metadata()
long_description = read_long_description()
requirements = read_requirements()
setuptools.setup(
name="lightrag-hku",
url=metadata["__url__"],
version=metadata["__version__"],
author=metadata["__author__"],
description="LightRAG: Simple and Fast Retrieval-Augmented Generation",
long_description=long_description,
long_description_content_type="text/markdown",
packages=setuptools.find_packages(
exclude=("tests*", "docs*")
), # Automatically find packages
classifiers=[
"Development Status :: 4 - Beta",
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Intended Audience :: Developers",
"Topic :: Software Development :: Libraries :: Python Modules",
],
python_requires=">=3.9",
install_requires=requirements,
include_package_data=True, # Includes non-code files from MANIFEST.in
project_urls={ # Additional project metadata
"Documentation": metadata.get("__url__", ""),
"Source": metadata.get("__url__", ""),
"Tracker": f"{metadata.get('__url__', '')}/issues"
if metadata.get("__url__")
else "",
},
extras_require={
"api": requirements + read_api_requirements(),
"tools": read_extra_requirements(), # API requirements as optional
},
entry_points={
"console_scripts": [
"lightrag-server=lightrag.api.lightrag_server:main [api]",
"lightrag-gunicorn=lightrag.api.run_with_gunicorn:main [api]",
],
},
)
setup()