From 1d788c3e9723f6b35fbefee1ce065e087905ee11 Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Thu, 26 Jun 2025 16:08:14 +0800
Subject: [PATCH] Update RAGAnything related

---
 README.md                           | 115 ++++++++++++++++++++--------
 examples/modalprocessors_example.py |  15 ++--
 examples/raganything_example.py     |  17 ++--
 3 files changed, 105 insertions(+), 42 deletions(-)

diff --git a/README.md b/README.md
index fa227c60..bbae685f 100644
--- a/README.md
+++ b/README.md
@@ -1159,40 +1159,95 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
    pip install raganything
    ```
 2. Process multimodal documents:
-   ```python
-   import asyncio
-   from raganything import RAGAnything
-   from lightrag.llm.openai import openai_complete_if_cache, openai_embed
+    ```python
+    import asyncio
+    from raganything import RAGAnything
+    from lightrag import LightRAG
+    from lightrag.llm.openai import openai_complete_if_cache, openai_embed
+    from lightrag.utils import EmbeddingFunc
+    import os
 
-   async def main():
-       # Initialize RAGAnything with LightRAG integration
-       rag = RAGAnything(
-           working_dir="./rag_storage",
-           llm_model_func=lambda prompt, **kwargs: openai_complete_if_cache(
-               "gpt-4o-mini", prompt, api_key="your-api-key", **kwargs
-           ),
-           embedding_func=lambda texts: openai_embed(
-               texts, model="text-embedding-3-large", api_key="your-api-key"
-           ),
-           embedding_dim=3072,
-       )
+    async def load_existing_lightrag():
+        # First, create or load an existing LightRAG instance
+        lightrag_working_dir = "./existing_lightrag_storage"
 
-       # Process multimodal documents
-       await rag.process_document_complete(
-           file_path="path/to/your/document.pdf",
-           output_dir="./output"
-       )
+        # Check if previous LightRAG instance exists
+        if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
+            print("✅ Found existing LightRAG instance, loading...")
+        else:
+            print("❌ No existing LightRAG instance found, will create new one")
 
-       # Query multimodal content
-       result = await rag.query_with_multimodal(
-           "What are the main findings shown in the figures and tables?",
-           mode="hybrid"
-       )
-       print(result)
+        # Create/Load LightRAG instance with your configurations
+        lightrag_instance = LightRAG(
+            working_dir=lightrag_working_dir,
+            llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
+                "gpt-4o-mini",
+                prompt,
+                system_prompt=system_prompt,
+                history_messages=history_messages,
+                api_key="your-api-key",
+                **kwargs,
+            ),
+            embedding_func=EmbeddingFunc(
+                embedding_dim=3072,
+                max_token_size=8192,
+                func=lambda texts: openai_embed(
+                    texts,
+                    model="text-embedding-3-large",
+                    api_key=api_key,
+                    base_url=base_url,
+                ),
+            )
+        )
 
-   if __name__ == "__main__":
-       asyncio.run(main())
-   ```
+        # Initialize storage (this will load existing data if available)
+        await lightrag_instance.initialize_storages()
+
+        # Now initialize RAGAnything with the existing LightRAG instance
+        rag = RAGAnything(
+            lightrag=lightrag_instance,  # Pass the existing LightRAG instance
+            # Only need vision model for multimodal processing
+            vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
+                "gpt-4o",
+                "",
+                system_prompt=None,
+                history_messages=[],
+                messages=[
+                    {"role": "system", "content": system_prompt} if system_prompt else None,
+                    {"role": "user", "content": [
+                        {"type": "text", "text": prompt},
+                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
+                    ]} if image_data else {"role": "user", "content": prompt}
+                ],
+                api_key="your-api-key",
+                **kwargs,
+            ) if image_data else openai_complete_if_cache(
+                "gpt-4o-mini",
+                prompt,
+                system_prompt=system_prompt,
+                history_messages=history_messages,
+                api_key="your-api-key",
+                **kwargs,
+            )
+            # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
+        )
+
+        # Query the existing knowledge base
+        result = await rag.query_with_multimodal(
+            "What data has been processed in this LightRAG instance?",
+            mode="hybrid"
+        )
+        print("Query result:", result)
+
+        # Add new multimodal documents to the existing LightRAG instance
+        await rag.process_document_complete(
+            file_path="path/to/new/multimodal_document.pdf",
+            output_dir="./output"
+        )
+
+    if __name__ == "__main__":
+        asyncio.run(load_existing_lightrag())
+    ```
 
 For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything).
 
diff --git a/examples/modalprocessors_example.py b/examples/modalprocessors_example.py
index a45956ff..b25c12c2 100644
--- a/examples/modalprocessors_example.py
+++ b/examples/modalprocessors_example.py
@@ -9,6 +9,7 @@ import argparse
 from lightrag.llm.openai import openai_complete_if_cache, openai_embed
 from lightrag.kg.shared_storage import initialize_pipeline_status
 from lightrag import LightRAG
+from lightrag.utils import EmbeddingFunc
 from raganything.modalprocessors import (
     ImageModalProcessor,
     TableModalProcessor,
@@ -165,11 +166,15 @@ async def process_equation_example(lightrag: LightRAG, llm_model_func):
 async def initialize_rag(api_key: str, base_url: str = None):
     rag = LightRAG(
         working_dir=WORKING_DIR,
-        embedding_func=lambda texts: openai_embed(
-            texts,
-            model="text-embedding-3-large",
-            api_key=api_key,
-            base_url=base_url,
+        embedding_func=EmbeddingFunc(
+            embedding_dim=3072,
+            max_token_size=8192,
+            func=lambda texts: openai_embed(
+                texts,
+                model="text-embedding-3-large",
+                api_key=api_key,
+                base_url=base_url,
+            ),
         ),
         llm_model_func=lambda prompt,
         system_prompt=None,
diff --git a/examples/raganything_example.py b/examples/raganything_example.py
index 2e24d5ee..4933b3d7 100644
--- a/examples/raganything_example.py
+++ b/examples/raganything_example.py
@@ -12,6 +12,7 @@ import os
 import argparse
 import asyncio
 from lightrag.llm.openai import openai_complete_if_cache, openai_embed
+from lightrag.utils import EmbeddingFunc
 from raganything.raganything import RAGAnything
 
 
@@ -89,14 +90,16 @@ async def process_with_rag(
                 base_url=base_url,
                 **kwargs,
             ),
-            embedding_func=lambda texts: openai_embed(
-                texts,
-                model="text-embedding-3-large",
-                api_key=api_key,
-                base_url=base_url,
+            embedding_func=EmbeddingFunc(
+                embedding_dim=3072,
+                max_token_size=8192,
+                func=lambda texts: openai_embed(
+                    texts,
+                    model="text-embedding-3-large",
+                    api_key=api_key,
+                    base_url=base_url,
+                ),
             ),
-            embedding_dim=3072,
-            max_token_size=8192,
         )
 
         # Process document