LightRAG/lightrag/tools/check_initialization.py

181 lines
5.7 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
Diagnostic tool to check LightRAG initialization status.
2025-08-23 02:39:12 +08:00
This tool helps developers verify that their LightRAG instance is properly
initialized before use, preventing common initialization errors.
Usage:
python -m lightrag.tools.check_initialization
"""
import asyncio
import sys
from pathlib import Path
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from lightrag import LightRAG
from lightrag.base import StoragesStatus
async def check_lightrag_setup(rag_instance: LightRAG, verbose: bool = False) -> bool:
"""
Check if a LightRAG instance is properly initialized.
2025-08-23 02:39:12 +08:00
Args:
rag_instance: The LightRAG instance to check
verbose: If True, print detailed diagnostic information
2025-08-23 02:39:12 +08:00
Returns:
True if properly initialized, False otherwise
"""
issues = []
warnings = []
2025-08-23 02:39:12 +08:00
print("🔍 Checking LightRAG initialization status...\n")
2025-08-23 02:39:12 +08:00
# Check storage initialization status
2025-08-23 02:39:12 +08:00
if not hasattr(rag_instance, "_storages_status"):
issues.append("LightRAG instance missing _storages_status attribute")
elif rag_instance._storages_status != StoragesStatus.INITIALIZED:
2025-08-23 02:39:12 +08:00
issues.append(
f"Storages not initialized (status: {rag_instance._storages_status.name})"
)
else:
print("✅ Storage status: INITIALIZED")
2025-08-23 02:39:12 +08:00
# Check individual storage components
storage_components = [
2025-08-23 02:39:12 +08:00
("full_docs", "Document storage"),
("text_chunks", "Text chunks storage"),
("entities_vdb", "Entity vector database"),
("relationships_vdb", "Relationship vector database"),
("chunks_vdb", "Chunks vector database"),
("doc_status", "Document status tracker"),
("llm_response_cache", "LLM response cache"),
("full_entities", "Entity storage"),
("full_relations", "Relation storage"),
("chunk_entity_relation_graph", "Graph storage"),
]
2025-08-23 02:39:12 +08:00
if verbose:
print("\n📦 Storage Components:")
2025-08-23 02:39:12 +08:00
for component, description in storage_components:
if not hasattr(rag_instance, component):
issues.append(f"Missing storage component: {component} ({description})")
else:
storage = getattr(rag_instance, component)
if storage is None:
warnings.append(f"Storage {component} is None (might be optional)")
2025-08-23 02:39:12 +08:00
elif hasattr(storage, "_storage_lock"):
if storage._storage_lock is None:
issues.append(f"Storage {component} not initialized (lock is None)")
elif verbose:
print(f"{description}: Ready")
elif verbose:
print(f"{description}: Ready")
2025-08-23 02:39:12 +08:00
# Check pipeline status
try:
from lightrag.kg.shared_storage import get_namespace_data
2025-08-23 02:39:12 +08:00
get_namespace_data("pipeline_status")
print("✅ Pipeline status: INITIALIZED")
except KeyError:
2025-08-23 02:39:12 +08:00
issues.append(
"Pipeline status not initialized - call initialize_pipeline_status()"
)
except Exception as e:
issues.append(f"Error checking pipeline status: {str(e)}")
2025-08-23 02:39:12 +08:00
# Print results
print("\n" + "=" * 50)
2025-08-23 02:39:12 +08:00
if issues:
print("❌ Issues found:\n")
for issue in issues:
print(f"{issue}")
2025-08-23 02:39:12 +08:00
print("\n📝 To fix, run this initialization sequence:\n")
print(" await rag.initialize_storages()")
print(" from lightrag.kg.shared_storage import initialize_pipeline_status")
print(" await initialize_pipeline_status()")
2025-08-23 02:39:12 +08:00
print(
"\n📚 Documentation: https://github.com/HKUDS/LightRAG#important-initialization-requirements"
)
if warnings and verbose:
print("\n⚠️ Warnings (might be normal):")
for warning in warnings:
print(f"{warning}")
2025-08-23 02:39:12 +08:00
return False
else:
print("✅ LightRAG is properly initialized and ready to use!")
2025-08-23 02:39:12 +08:00
if warnings and verbose:
print("\n⚠️ Warnings (might be normal):")
for warning in warnings:
print(f"{warning}")
2025-08-23 02:39:12 +08:00
return True
async def demo():
"""Demonstrate the diagnostic tool with a test instance."""
from lightrag.llm.openai import openai_embed, gpt_4o_mini_complete
from lightrag.kg.shared_storage import initialize_pipeline_status
2025-08-23 02:39:12 +08:00
print("=" * 50)
print("LightRAG Initialization Diagnostic Tool")
print("=" * 50)
2025-08-23 02:39:12 +08:00
# Create test instance
rag = LightRAG(
working_dir="./test_diagnostic",
embedding_func=openai_embed,
llm_model_func=gpt_4o_mini_complete,
)
2025-08-23 02:39:12 +08:00
print("\n🔴 BEFORE initialization:\n")
await check_lightrag_setup(rag, verbose=True)
2025-08-23 02:39:12 +08:00
print("\n" + "=" * 50)
print("\n🔄 Initializing...\n")
await rag.initialize_storages()
await initialize_pipeline_status()
2025-08-23 02:39:12 +08:00
print("\n🟢 AFTER initialization:\n")
await check_lightrag_setup(rag, verbose=True)
2025-08-23 02:39:12 +08:00
# Cleanup
import shutil
2025-08-23 02:39:12 +08:00
shutil.rmtree("./test_diagnostic", ignore_errors=True)
if __name__ == "__main__":
import argparse
2025-08-23 02:39:12 +08:00
parser = argparse.ArgumentParser(description="Check LightRAG initialization status")
parser.add_argument(
2025-08-23 02:39:12 +08:00
"--demo", action="store_true", help="Run a demonstration with a test instance"
)
parser.add_argument(
2025-08-23 02:39:12 +08:00
"--verbose",
"-v",
action="store_true",
2025-08-23 02:39:12 +08:00
help="Show detailed diagnostic information",
)
2025-08-23 02:39:12 +08:00
args = parser.parse_args()
2025-08-23 02:39:12 +08:00
if args.demo:
asyncio.run(demo())
else:
print("Run with --demo to see the diagnostic tool in action")
2025-08-23 02:39:12 +08:00
print("Or import this module and use check_lightrag_setup() with your instance")