From c7bc2c63cfaab68a263ebe14a626c24079a123b0 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Thu, 20 Feb 2025 13:21:41 +0100 Subject: [PATCH] cleanup storages --- .../lightrag_openai_compatible_stream_demo.py | 2 - lightrag/kg/__init__.py | 137 +++++++++++++++- lightrag/lightrag.py | 147 +----------------- lightrag/utils.py | 6 +- reproduce/Step_3.py | 1 - reproduce/Step_3_openai_compatible.py | 3 - 6 files changed, 142 insertions(+), 154 deletions(-) diff --git a/examples/lightrag_openai_compatible_stream_demo.py b/examples/lightrag_openai_compatible_stream_demo.py index 7509e4dc..750f139e 100644 --- a/examples/lightrag_openai_compatible_stream_demo.py +++ b/examples/lightrag_openai_compatible_stream_demo.py @@ -44,5 +44,3 @@ async def print_stream(stream): async for chunk in stream: if chunk: print(chunk, end="", flush=True) - - diff --git a/lightrag/kg/__init__.py b/lightrag/kg/__init__.py index 087eaac9..2f3eae87 100644 --- a/lightrag/kg/__init__.py +++ b/lightrag/kg/__init__.py @@ -1 +1,136 @@ -# print ("init package vars here. ......") +STORAGE_IMPLEMENTATIONS = { + "KV_STORAGE": { + "implementations": [ + "JsonKVStorage", + "MongoKVStorage", + "RedisKVStorage", + "TiDBKVStorage", + "PGKVStorage", + "OracleKVStorage", + ], + "required_methods": ["get_by_id", "upsert"], + }, + "GRAPH_STORAGE": { + "implementations": [ + "NetworkXStorage", + "Neo4JStorage", + "MongoGraphStorage", + "TiDBGraphStorage", + "AGEStorage", + "GremlinStorage", + "PGGraphStorage", + "OracleGraphStorage", + ], + "required_methods": ["upsert_node", "upsert_edge"], + }, + "VECTOR_STORAGE": { + "implementations": [ + "NanoVectorDBStorage", + "MilvusVectorDBStorage", + "ChromaVectorDBStorage", + "TiDBVectorDBStorage", + "PGVectorStorage", + "FaissVectorDBStorage", + "QdrantVectorDBStorage", + "OracleVectorDBStorage", + "MongoVectorDBStorage", + ], + "required_methods": ["query", "upsert"], + }, + "DOC_STATUS_STORAGE": { + "implementations": [ + "JsonDocStatusStorage", + "PGDocStatusStorage", + "PGDocStatusStorage", + "MongoDocStatusStorage", + ], + "required_methods": ["get_docs_by_status"], + }, +} + +# Storage implementation environment variable without default value +STORAGE_ENV_REQUIREMENTS: dict[str, list[str]] = { + # KV Storage Implementations + "JsonKVStorage": [], + "MongoKVStorage": [], + "RedisKVStorage": ["REDIS_URI"], + "TiDBKVStorage": ["TIDB_USER", "TIDB_PASSWORD", "TIDB_DATABASE"], + "PGKVStorage": ["POSTGRES_USER", "POSTGRES_PASSWORD", "POSTGRES_DATABASE"], + "OracleKVStorage": [ + "ORACLE_DSN", + "ORACLE_USER", + "ORACLE_PASSWORD", + "ORACLE_CONFIG_DIR", + ], + # Graph Storage Implementations + "NetworkXStorage": [], + "Neo4JStorage": ["NEO4J_URI", "NEO4J_USERNAME", "NEO4J_PASSWORD"], + "MongoGraphStorage": [], + "TiDBGraphStorage": ["TIDB_USER", "TIDB_PASSWORD", "TIDB_DATABASE"], + "AGEStorage": [ + "AGE_POSTGRES_DB", + "AGE_POSTGRES_USER", + "AGE_POSTGRES_PASSWORD", + ], + "GremlinStorage": ["GREMLIN_HOST", "GREMLIN_PORT", "GREMLIN_GRAPH"], + "PGGraphStorage": [ + "POSTGRES_USER", + "POSTGRES_PASSWORD", + "POSTGRES_DATABASE", + ], + "OracleGraphStorage": [ + "ORACLE_DSN", + "ORACLE_USER", + "ORACLE_PASSWORD", + "ORACLE_CONFIG_DIR", + ], + # Vector Storage Implementations + "NanoVectorDBStorage": [], + "MilvusVectorDBStorage": [], + "ChromaVectorDBStorage": [], + "TiDBVectorDBStorage": ["TIDB_USER", "TIDB_PASSWORD", "TIDB_DATABASE"], + "PGVectorStorage": ["POSTGRES_USER", "POSTGRES_PASSWORD", "POSTGRES_DATABASE"], + "FaissVectorDBStorage": [], + "QdrantVectorDBStorage": ["QDRANT_URL"], # QDRANT_API_KEY has default value None + "OracleVectorDBStorage": [ + "ORACLE_DSN", + "ORACLE_USER", + "ORACLE_PASSWORD", + "ORACLE_CONFIG_DIR", + ], + "MongoVectorDBStorage": [], + # Document Status Storage Implementations + "JsonDocStatusStorage": [], + "PGDocStatusStorage": ["POSTGRES_USER", "POSTGRES_PASSWORD", "POSTGRES_DATABASE"], + "MongoDocStatusStorage": [], +} + +# Storage implementation module mapping +STORAGES = { + "NetworkXStorage": ".kg.networkx_impl", + "JsonKVStorage": ".kg.json_kv_impl", + "NanoVectorDBStorage": ".kg.nano_vector_db_impl", + "JsonDocStatusStorage": ".kg.json_doc_status_impl", + "Neo4JStorage": ".kg.neo4j_impl", + "OracleKVStorage": ".kg.oracle_impl", + "OracleGraphStorage": ".kg.oracle_impl", + "OracleVectorDBStorage": ".kg.oracle_impl", + "MilvusVectorDBStorage": ".kg.milvus_impl", + "MongoKVStorage": ".kg.mongo_impl", + "MongoDocStatusStorage": ".kg.mongo_impl", + "MongoGraphStorage": ".kg.mongo_impl", + "MongoVectorDBStorage": ".kg.mongo_impl", + "RedisKVStorage": ".kg.redis_impl", + "ChromaVectorDBStorage": ".kg.chroma_impl", + "TiDBKVStorage": ".kg.tidb_impl", + "TiDBVectorDBStorage": ".kg.tidb_impl", + "TiDBGraphStorage": ".kg.tidb_impl", + "PGKVStorage": ".kg.postgres_impl", + "PGVectorStorage": ".kg.postgres_impl", + "AGEStorage": ".kg.age_impl", + "PGGraphStorage": ".kg.postgres_impl", + "GremlinStorage": ".kg.gremlin_impl", + "PGDocStatusStorage": ".kg.postgres_impl", + "FaissVectorDBStorage": ".kg.faiss_impl", + "QdrantVectorDBStorage": ".kg.qdrant_impl", +} diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 8b695883..174947f3 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -8,6 +8,8 @@ from datetime import datetime from functools import partial from typing import Any, AsyncIterator, Callable, Iterator, cast, final +from lightrag.kg import STORAGE_ENV_REQUIREMENTS, STORAGE_IMPLEMENTATIONS, STORAGES + from .base import ( BaseGraphStorage, BaseKVStorage, @@ -45,149 +47,6 @@ from .utils import ( config = configparser.ConfigParser() config.read("config.ini", "utf-8") -# Storage type and implementation compatibility validation table -STORAGE_IMPLEMENTATIONS = { - "KV_STORAGE": { - "implementations": [ - "JsonKVStorage", - "MongoKVStorage", - "RedisKVStorage", - "TiDBKVStorage", - "PGKVStorage", - "OracleKVStorage", - ], - "required_methods": ["get_by_id", "upsert"], - }, - "GRAPH_STORAGE": { - "implementations": [ - "NetworkXStorage", - "Neo4JStorage", - "MongoGraphStorage", - "TiDBGraphStorage", - "AGEStorage", - "GremlinStorage", - "PGGraphStorage", - "OracleGraphStorage", - ], - "required_methods": ["upsert_node", "upsert_edge"], - }, - "VECTOR_STORAGE": { - "implementations": [ - "NanoVectorDBStorage", - "MilvusVectorDBStorage", - "ChromaVectorDBStorage", - "TiDBVectorDBStorage", - "PGVectorStorage", - "FaissVectorDBStorage", - "QdrantVectorDBStorage", - "OracleVectorDBStorage", - "MongoVectorDBStorage", - ], - "required_methods": ["query", "upsert"], - }, - "DOC_STATUS_STORAGE": { - "implementations": [ - "JsonDocStatusStorage", - "PGDocStatusStorage", - "PGDocStatusStorage", - "MongoDocStatusStorage", - ], - "required_methods": ["get_docs_by_status"], - }, -} - -# Storage implementation environment variable without default value -STORAGE_ENV_REQUIREMENTS: dict[str, list[str]] = { - # KV Storage Implementations - "JsonKVStorage": [], - "MongoKVStorage": [], - "RedisKVStorage": ["REDIS_URI"], - "TiDBKVStorage": ["TIDB_USER", "TIDB_PASSWORD", "TIDB_DATABASE"], - "PGKVStorage": ["POSTGRES_USER", "POSTGRES_PASSWORD", "POSTGRES_DATABASE"], - "OracleKVStorage": [ - "ORACLE_DSN", - "ORACLE_USER", - "ORACLE_PASSWORD", - "ORACLE_CONFIG_DIR", - ], - # Graph Storage Implementations - "NetworkXStorage": [], - "Neo4JStorage": ["NEO4J_URI", "NEO4J_USERNAME", "NEO4J_PASSWORD"], - "MongoGraphStorage": [], - "TiDBGraphStorage": ["TIDB_USER", "TIDB_PASSWORD", "TIDB_DATABASE"], - "AGEStorage": [ - "AGE_POSTGRES_DB", - "AGE_POSTGRES_USER", - "AGE_POSTGRES_PASSWORD", - ], - "GremlinStorage": ["GREMLIN_HOST", "GREMLIN_PORT", "GREMLIN_GRAPH"], - "PGGraphStorage": [ - "POSTGRES_USER", - "POSTGRES_PASSWORD", - "POSTGRES_DATABASE", - ], - "OracleGraphStorage": [ - "ORACLE_DSN", - "ORACLE_USER", - "ORACLE_PASSWORD", - "ORACLE_CONFIG_DIR", - ], - # Vector Storage Implementations - "NanoVectorDBStorage": [], - "MilvusVectorDBStorage": [], - "ChromaVectorDBStorage": [], - "TiDBVectorDBStorage": ["TIDB_USER", "TIDB_PASSWORD", "TIDB_DATABASE"], - "PGVectorStorage": ["POSTGRES_USER", "POSTGRES_PASSWORD", "POSTGRES_DATABASE"], - "FaissVectorDBStorage": [], - "QdrantVectorDBStorage": ["QDRANT_URL"], # QDRANT_API_KEY has default value None - "OracleVectorDBStorage": [ - "ORACLE_DSN", - "ORACLE_USER", - "ORACLE_PASSWORD", - "ORACLE_CONFIG_DIR", - ], - "MongoVectorDBStorage": [], - # Document Status Storage Implementations - "JsonDocStatusStorage": [], - "PGDocStatusStorage": ["POSTGRES_USER", "POSTGRES_PASSWORD", "POSTGRES_DATABASE"], - "MongoDocStatusStorage": [], -} - -# Storage implementation module mapping -STORAGES = { - "NetworkXStorage": ".kg.networkx_impl", - "JsonKVStorage": ".kg.json_kv_impl", - "NanoVectorDBStorage": ".kg.nano_vector_db_impl", - "JsonDocStatusStorage": ".kg.json_doc_status_impl", - "Neo4JStorage": ".kg.neo4j_impl", - "OracleKVStorage": ".kg.oracle_impl", - "OracleGraphStorage": ".kg.oracle_impl", - "OracleVectorDBStorage": ".kg.oracle_impl", - "MilvusVectorDBStorage": ".kg.milvus_impl", - "MongoKVStorage": ".kg.mongo_impl", - "MongoDocStatusStorage": ".kg.mongo_impl", - "MongoGraphStorage": ".kg.mongo_impl", - "MongoVectorDBStorage": ".kg.mongo_impl", - "RedisKVStorage": ".kg.redis_impl", - "ChromaVectorDBStorage": ".kg.chroma_impl", - "TiDBKVStorage": ".kg.tidb_impl", - "TiDBVectorDBStorage": ".kg.tidb_impl", - "TiDBGraphStorage": ".kg.tidb_impl", - "PGKVStorage": ".kg.postgres_impl", - "PGVectorStorage": ".kg.postgres_impl", - "AGEStorage": ".kg.age_impl", - "PGGraphStorage": ".kg.postgres_impl", - "GremlinStorage": ".kg.gremlin_impl", - "PGDocStatusStorage": ".kg.postgres_impl", - "FaissVectorDBStorage": ".kg.faiss_impl", - "QdrantVectorDBStorage": ".kg.qdrant_impl", -} - - - - - - @final @dataclass @@ -1643,4 +1502,4 @@ class LightRAG: raise ValueError( f"Storage implementation '{storage_name}' requires the following " f"environment variables: {', '.join(missing_vars)}" - ) \ No newline at end of file + ) diff --git a/lightrag/utils.py b/lightrag/utils.py index 62f62d4d..d402d14c 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -714,6 +714,7 @@ def get_conversation_turns( return "\n".join(formatted_turns) + def always_get_an_event_loop() -> asyncio.AbstractEventLoop: """ Ensure that there is always an event loop available. @@ -737,8 +738,8 @@ def always_get_an_event_loop() -> asyncio.AbstractEventLoop: new_loop = asyncio.new_event_loop() asyncio.set_event_loop(new_loop) return new_loop - - + + def lazy_external_import(module_name: str, class_name: str) -> Callable[..., Any]: """Lazily import a class from an external module based on the package of the caller.""" # Get the caller's module and package @@ -756,4 +757,3 @@ def lazy_external_import(module_name: str, class_name: str) -> Callable[..., Any return cls(*args, **kwargs) return import_class - \ No newline at end of file diff --git a/reproduce/Step_3.py b/reproduce/Step_3.py index be5ba99d..facb913e 100644 --- a/reproduce/Step_3.py +++ b/reproduce/Step_3.py @@ -23,7 +23,6 @@ async def process_query(query_text, rag_instance, query_param): return None, {"query": query_text, "error": str(e)} - def run_queries_and_save_to_json( queries, rag_instance, query_param, output_file, error_file ): diff --git a/reproduce/Step_3_openai_compatible.py b/reproduce/Step_3_openai_compatible.py index b1d33f93..885220fa 100644 --- a/reproduce/Step_3_openai_compatible.py +++ b/reproduce/Step_3_openai_compatible.py @@ -54,9 +54,6 @@ async def process_query(query_text, rag_instance, query_param): return None, {"query": query_text, "error": str(e)} - - - def run_queries_and_save_to_json( queries, rag_instance, query_param, output_file, error_file ):