LightRAG/examples/raganything_example.py

287 lines
9.6 KiB
Python
Raw Normal View History

2025-06-05 17:02:48 +08:00
#!/usr/bin/env python
"""
Example script demonstrating the integration of MinerU parser with RAGAnything
This example shows how to:
1. Process parsed documents with RAGAnything
2. Perform multimodal queries on the processed documents
3. Handle different types of content (text, images, tables)
"""
import os
import argparse
import asyncio
2025-07-03 19:22:20 +08:00
import logging
import logging.config
from pathlib import Path
# Add project root directory to Python path
import sys
sys.path.append(str(Path(__file__).parent.parent))
2025-06-05 17:02:48 +08:00
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
2025-07-03 19:22:20 +08:00
from lightrag.utils import EmbeddingFunc, logger, set_verbose_debug
from raganything import RAGAnything, RAGAnythingConfig
def configure_logging():
"""Configure logging for the application"""
# Get log directory path from environment variable or use current directory
log_dir = os.getenv("LOG_DIR", os.getcwd())
log_file_path = os.path.abspath(os.path.join(log_dir, "raganything_example.log"))
print(f"\nRAGAnything example log file: {log_file_path}\n")
os.makedirs(os.path.dirname(log_dir), exist_ok=True)
# Get log file max size and backup count from environment variables
log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
logging.config.dictConfig(
{
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"default": {
"format": "%(levelname)s: %(message)s",
},
"detailed": {
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
},
},
"handlers": {
"console": {
"formatter": "default",
"class": "logging.StreamHandler",
"stream": "ext://sys.stderr",
},
"file": {
"formatter": "detailed",
"class": "logging.handlers.RotatingFileHandler",
"filename": log_file_path,
"maxBytes": log_max_bytes,
"backupCount": log_backup_count,
"encoding": "utf-8",
},
},
"loggers": {
"lightrag": {
"handlers": ["console", "file"],
"level": "INFO",
"propagate": False,
},
},
}
)
# Set the logger level to INFO
logger.setLevel(logging.INFO)
# Enable verbose debug if needed
set_verbose_debug(os.getenv("VERBOSE", "false").lower() == "true")
2025-06-05 17:02:48 +08:00
2025-06-05 17:37:11 +08:00
async def process_with_rag(
file_path: str,
output_dir: str,
api_key: str,
base_url: str = None,
working_dir: str = None,
):
2025-06-05 17:02:48 +08:00
"""
Process document with RAGAnything
2025-06-05 17:37:11 +08:00
2025-06-05 17:02:48 +08:00
Args:
file_path: Path to the document
output_dir: Output directory for RAG results
api_key: OpenAI API key
base_url: Optional base URL for API
2025-07-03 19:22:20 +08:00
working_dir: Working directory for RAG storage
2025-06-05 17:02:48 +08:00
"""
try:
2025-07-03 19:22:20 +08:00
# Create RAGAnything configuration
config = RAGAnythingConfig(
working_dir=working_dir or "./rag_storage",
mineru_parse_method="auto",
enable_image_processing=True,
enable_table_processing=True,
enable_equation_processing=True,
)
# Define LLM model function
def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
return openai_complete_if_cache(
2025-06-05 17:02:48 +08:00
"gpt-4o-mini",
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
api_key=api_key,
base_url=base_url,
**kwargs,
2025-06-05 17:37:11 +08:00
)
2025-07-03 19:22:20 +08:00
# Define vision model function for image processing
def vision_model_func(
prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs
):
if image_data:
return openai_complete_if_cache(
"gpt-4o",
"",
system_prompt=None,
history_messages=[],
messages=[
{"role": "system", "content": system_prompt}
if system_prompt
else None,
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_data}"
},
},
],
}
if image_data
else {"role": "user", "content": prompt},
],
2025-06-26 16:08:14 +08:00
api_key=api_key,
base_url=base_url,
2025-07-03 19:22:20 +08:00
**kwargs,
)
else:
return llm_model_func(prompt, system_prompt, history_messages, **kwargs)
# Define embedding function
embedding_func = EmbeddingFunc(
embedding_dim=3072,
max_token_size=8192,
func=lambda texts: openai_embed(
texts,
model="text-embedding-3-large",
api_key=api_key,
base_url=base_url,
2025-06-05 17:02:48 +08:00
),
)
2025-07-03 19:22:20 +08:00
# Initialize RAGAnything with new dataclass structure
rag = RAGAnything(
config=config,
llm_model_func=llm_model_func,
vision_model_func=vision_model_func,
embedding_func=embedding_func,
)
2025-06-05 17:02:48 +08:00
# Process document
await rag.process_document_complete(
2025-06-05 17:37:11 +08:00
file_path=file_path, output_dir=output_dir, parse_method="auto"
2025-06-05 17:02:48 +08:00
)
2025-07-04 11:32:01 +08:00
# Example queries - demonstrating different query approaches
logger.info("\nQuerying processed document:")
# 1. Pure text queries using aquery()
text_queries = [
2025-06-05 17:02:48 +08:00
"What is the main content of the document?",
2025-07-04 11:32:01 +08:00
"What are the key topics discussed?",
2025-06-05 17:02:48 +08:00
]
2025-07-04 11:32:01 +08:00
for query in text_queries:
logger.info(f"\n[Text Query]: {query}")
result = await rag.aquery(query, mode="hybrid")
2025-07-03 19:22:20 +08:00
logger.info(f"Answer: {result}")
2025-06-05 17:02:48 +08:00
2025-07-04 11:32:01 +08:00
# 2. Multimodal query with specific multimodal content using aquery_with_multimodal()
logger.info(
"\n[Multimodal Query]: Analyzing performance data in context of document"
)
multimodal_result = await rag.aquery_with_multimodal(
"Compare this performance data with any similar results mentioned in the document",
multimodal_content=[
{
"type": "table",
"table_data": """Method,Accuracy,Processing_Time
RAGAnything,95.2%,120ms
Traditional_RAG,87.3%,180ms
Baseline,82.1%,200ms""",
"table_caption": "Performance comparison results",
}
],
mode="hybrid",
)
logger.info(f"Answer: {multimodal_result}")
# 3. Another multimodal query with equation content
logger.info("\n[Multimodal Query]: Mathematical formula analysis")
equation_result = await rag.aquery_with_multimodal(
"Explain this formula and relate it to any mathematical concepts in the document",
multimodal_content=[
{
"type": "equation",
"latex": "F1 = 2 \\cdot \\frac{precision \\cdot recall}{precision + recall}",
"equation_caption": "F1-score calculation formula",
}
],
mode="hybrid",
)
logger.info(f"Answer: {equation_result}")
2025-06-05 17:02:48 +08:00
except Exception as e:
2025-07-03 19:22:20 +08:00
logger.error(f"Error processing with RAG: {str(e)}")
import traceback
logger.error(traceback.format_exc())
2025-06-05 17:02:48 +08:00
2025-06-05 17:37:11 +08:00
2025-06-05 17:02:48 +08:00
def main():
"""Main function to run the example"""
2025-06-05 17:37:11 +08:00
parser = argparse.ArgumentParser(description="MinerU RAG Example")
parser.add_argument("file_path", help="Path to the document to process")
parser.add_argument(
"--working_dir", "-w", default="./rag_storage", help="Working directory path"
)
parser.add_argument(
"--output", "-o", default="./output", help="Output directory path"
)
parser.add_argument(
2025-07-03 19:22:20 +08:00
"--api-key",
default=os.getenv("OPENAI_API_KEY"),
help="OpenAI API key (defaults to OPENAI_API_KEY env var)",
2025-06-05 17:37:11 +08:00
)
parser.add_argument("--base-url", help="Optional base URL for API")
2025-06-05 17:02:48 +08:00
args = parser.parse_args()
2025-07-03 19:22:20 +08:00
# Check if API key is provided
if not args.api_key:
logger.error("Error: OpenAI API key is required")
logger.error("Set OPENAI_API_KEY environment variable or use --api-key option")
return
2025-06-05 17:02:48 +08:00
# Create output directory if specified
if args.output:
os.makedirs(args.output, exist_ok=True)
# Process with RAG
2025-06-05 17:37:11 +08:00
asyncio.run(
process_with_rag(
args.file_path, args.output, args.api_key, args.base_url, args.working_dir
)
)
if __name__ == "__main__":
2025-07-03 19:22:20 +08:00
# Configure logging first
configure_logging()
print("RAGAnything Example")
print("=" * 30)
print("Processing document with multimodal RAG pipeline")
print("=" * 30)
2025-06-05 17:37:11 +08:00
main()