Update RAGAnything related

2025-06-26 22:00:19 +00:00 · 2025-06-26 16:08:14 +08:00 · 2025-06-26 16:08:14 +08:00 · 1d788c3e97
commit 1d788c3e97
parent 687ccd4923
3 changed files with 105 additions and 42 deletions
--- a/README.md
+++ b/README.md
@ -1162,36 +1162,91 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
    ```python
    import asyncio
    from raganything import RAGAnything
    from lightrag import LightRAG
    from lightrag.llm.openai import openai_complete_if_cache, openai_embed
    from lightrag.utils import EmbeddingFunc
    import os
-   async def main():
+    async def load_existing_lightrag():
-       # Initialize RAGAnything with LightRAG integration
+        # First, create or load an existing LightRAG instance
-       rag = RAGAnything(
+        lightrag_working_dir = "./existing_lightrag_storage"
-           working_dir="./rag_storage",
+
-           llm_model_func=lambda prompt, **kwargs: openai_complete_if_cache(
+        # Check if previous LightRAG instance exists
-               "gpt-4o-mini", prompt, api_key="your-api-key", **kwargs
+        if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
-           ),
+            print("✅ Found existing LightRAG instance, loading...")
-           embedding_func=lambda texts: openai_embed(
+        else:
-               texts, model="text-embedding-3-large", api_key="your-api-key"
+            print("❌ No existing LightRAG instance found, will create new one")
        # Create/Load LightRAG instance with your configurations
        lightrag_instance = LightRAG(
            working_dir=lightrag_working_dir,
            llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
                "gpt-4o-mini",
                prompt,
                system_prompt=system_prompt,
                history_messages=history_messages,
                api_key="your-api-key",
                **kwargs,
            ),
            embedding_func=EmbeddingFunc(
                embedding_dim=3072,
                max_token_size=8192,
                func=lambda texts: openai_embed(
                    texts,
                    model="text-embedding-3-large",
                    api_key=api_key,
                    base_url=base_url,
                ),
            )
        )
-       # Process multimodal documents
+        # Initialize storage (this will load existing data if available)
        await lightrag_instance.initialize_storages()
        # Now initialize RAGAnything with the existing LightRAG instance
        rag = RAGAnything(
            lightrag=lightrag_instance,  # Pass the existing LightRAG instance
            # Only need vision model for multimodal processing
            vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
                "gpt-4o",
                "",
                system_prompt=None,
                history_messages=[],
                messages=[
                    {"role": "system", "content": system_prompt} if system_prompt else None,
                    {"role": "user", "content": [
                        {"type": "text", "text": prompt},
                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
                    ]} if image_data else {"role": "user", "content": prompt}
                ],
                api_key="your-api-key",
                **kwargs,
            ) if image_data else openai_complete_if_cache(
                "gpt-4o-mini",
                prompt,
                system_prompt=system_prompt,
                history_messages=history_messages,
                api_key="your-api-key",
                **kwargs,
            )
            # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
        )
        # Query the existing knowledge base
        result = await rag.query_with_multimodal(
            "What data has been processed in this LightRAG instance?",
            mode="hybrid"
        )
        print("Query result:", result)
        # Add new multimodal documents to the existing LightRAG instance
        await rag.process_document_complete(
-           file_path="path/to/your/document.pdf",
+            file_path="path/to/new/multimodal_document.pdf",
            output_dir="./output"
        )
       # Query multimodal content
       result = await rag.query_with_multimodal(
           "What are the main findings shown in the figures and tables?",
           mode="hybrid"
       )
       print(result)
    if __name__ == "__main__":
-       asyncio.run(main())
+        asyncio.run(load_existing_lightrag())
    ```
 For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything).
--- a/examples/modalprocessors_example.py
+++ b/examples/modalprocessors_example.py
@ -9,6 +9,7 @@ import argparse
 from lightrag.llm.openai import openai_complete_if_cache, openai_embed
 from lightrag.kg.shared_storage import initialize_pipeline_status
 from lightrag import LightRAG
 from lightrag.utils import EmbeddingFunc
 from raganything.modalprocessors import (
    ImageModalProcessor,
    TableModalProcessor,
@ -165,12 +166,16 @@ async def process_equation_example(lightrag: LightRAG, llm_model_func):
 async def initialize_rag(api_key: str, base_url: str = None):
    rag = LightRAG(
        working_dir=WORKING_DIR,
-        embedding_func=lambda texts: openai_embed(
+        embedding_func=EmbeddingFunc(
            embedding_dim=3072,
            max_token_size=8192,
            func=lambda texts: openai_embed(
                texts,
                model="text-embedding-3-large",
                api_key=api_key,
                base_url=base_url,
            ),
        ),
        llm_model_func=lambda prompt,
        system_prompt=None,
        history_messages=[],
--- a/examples/raganything_example.py
+++ b/examples/raganything_example.py
@ -12,6 +12,7 @@ import os
 import argparse
 import asyncio
 from lightrag.llm.openai import openai_complete_if_cache, openai_embed
 from lightrag.utils import EmbeddingFunc
 from raganything.raganything import RAGAnything
@ -89,14 +90,16 @@ async def process_with_rag(
                base_url=base_url,
                **kwargs,
            ),
-            embedding_func=lambda texts: openai_embed(
+            embedding_func=EmbeddingFunc(
                embedding_dim=3072,
                max_token_size=8192,
                func=lambda texts: openai_embed(
                    texts,
                    model="text-embedding-3-large",
                    api_key=api_key,
                    base_url=base_url,
                ),
-            embedding_dim=3072,
+            ),
            max_token_size=8192,
        )
        # Process document