Merge pull request #1167 from omdivyatej/om-pr

Feature: Dynamic LLM Selection via QueryParam for Optimized Performance
2025-11-04 03:39:35 +00:00 · 2025-03-25 18:13:44 +08:00 · 2025-03-25 18:13:44 +08:00 · ec15d5a5af
commit ec15d5a5af
parent 4daa1c966f f049f2f5c4
4 changed files with 125 additions and 8 deletions
--- a/examples/lightrag_multi_model_all_modes_demo.py
+++ b/examples/lightrag_multi_model_all_modes_demo.py
@ -0,0 +1,88 @@
+import os
+import asyncio
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.openai import gpt_4o_mini_complete, gpt_4o_complete, openai_embed
+from lightrag.kg.shared_storage import initialize_pipeline_status
+
+WORKING_DIR = "./lightrag_demo"
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        embedding_func=openai_embed,
+        llm_model_func=gpt_4o_mini_complete,  # Default model for queries
+    )
+
+    await rag.initialize_storages()
+    await initialize_pipeline_status()
+
+    return rag
+
+
+def main():
+    # Initialize RAG instance
+    rag = asyncio.run(initialize_rag())
+
+    # Load the data
+    with open("./book.txt", "r", encoding="utf-8") as f:
+        rag.insert(f.read())
+
+    # Query with naive mode (default model)
+    print("--- NAIVE mode ---")
+    print(
+        rag.query(
+            "What are the main themes in this story?", param=QueryParam(mode="naive")
+        )
+    )
+
+    # Query with local mode (default model)
+    print("\n--- LOCAL mode ---")
+    print(
+        rag.query(
+            "What are the main themes in this story?", param=QueryParam(mode="local")
+        )
+    )
+
+    # Query with global mode (default model)
+    print("\n--- GLOBAL mode ---")
+    print(
+        rag.query(
+            "What are the main themes in this story?", param=QueryParam(mode="global")
+        )
+    )
+
+    # Query with hybrid mode (default model)
+    print("\n--- HYBRID mode ---")
+    print(
+        rag.query(
+            "What are the main themes in this story?", param=QueryParam(mode="hybrid")
+        )
+    )
+
+    # Query with mix mode (default model)
+    print("\n--- MIX mode ---")
+    print(
+        rag.query(
+            "What are the main themes in this story?", param=QueryParam(mode="mix")
+        )
+    )
+
+    # Query with a custom model (gpt-4o) for a more complex question
+    print("\n--- Using custom model for complex analysis ---")
+    print(
+        rag.query(
+            "How does the character development reflect Victorian-era attitudes?",
+            param=QueryParam(
+                mode="global",
+                model_func=gpt_4o_complete,  # Override default model with more capable one
+            ),
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()
--- a/lightrag/base.py
+++ b/lightrag/base.py
@ -10,6 +10,7 @@ from typing import (
    Literal,
    TypedDict,
    TypeVar,
+    Callable,
 )
 import numpy as np
 from .utils import EmbeddingFunc
@ -84,6 +85,12 @@ class QueryParam:
    ids: list[str] | None = None
    """List of ids to filter the results."""

+    model_func: Callable[..., object] | None = None
+    """Optional override for the LLM model function to use for this specific query.
+    If provided, this will be used instead of the global model function.
+    This allows using different models for different query modes.
+    """
+

@dataclass
 class StorageNameSpace(ABC):
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@ -1330,11 +1330,15 @@ class LightRAG:
        Args:
            query (str): The query to be executed.
            param (QueryParam): Configuration parameters for query execution.
+                If param.model_func is provided, it will be used instead of the global model.
            prompt (Optional[str]): Custom prompts for fine-tuned control over the system's behavior. Defaults to None, which uses PROMPTS["rag_response"].

        Returns:
            str: The result of the query execution.
        """
+        # If a custom model is provided in param, temporarily update global config
+        global_config = asdict(self)
+
        if param.mode in ["local", "global", "hybrid"]:
            response = await kg_query(
                query.strip(),
@ -1343,7 +1347,7 @@ class LightRAG:
                self.relationships_vdb,
                self.text_chunks,
                param,
-                asdict(self),
+                global_config,
                hashing_kv=self.llm_response_cache,  # Directly use llm_response_cache
                system_prompt=system_prompt,
            )
@ -1353,7 +1357,7 @@ class LightRAG:
                self.chunks_vdb,
                self.text_chunks,
                param,
-                asdict(self),
+                global_config,
                hashing_kv=self.llm_response_cache,  # Directly use llm_response_cache
                system_prompt=system_prompt,
            )
@ -1366,7 +1370,7 @@ class LightRAG:
                self.chunks_vdb,
                self.text_chunks,
                param,
-                asdict(self),
+                global_config,
                hashing_kv=self.llm_response_cache,  # Directly use llm_response_cache
                system_prompt=system_prompt,
            )
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@ -705,7 +705,11 @@ async def kg_query(
    system_prompt: str | None = None,
 ) -> str | AsyncIterator[str]:
    # Handle cache
-    use_model_func = global_config["llm_model_func"]
+    use_model_func = (
+        query_param.model_func
+        if query_param.model_func
+        else global_config["llm_model_func"]
+    )
    args_hash = compute_args_hash(query_param.mode, query, cache_type="query")
    cached_response, quantized, min_val, max_val = await handle_cache(
        hashing_kv, args_hash, query, query_param.mode, cache_type="query"
@ -866,7 +870,9 @@ async def extract_keywords_only(
    logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")

    # 5. Call the LLM for keyword extraction
-    use_model_func = global_config["llm_model_func"]
+    use_model_func = (
+        param.model_func if param.model_func else global_config["llm_model_func"]
+    )
    result = await use_model_func(kw_prompt, keyword_extraction=True)

    # 6. Parse out JSON from the LLM response
@ -926,7 +932,11 @@ async def mix_kg_vector_query(
    3. Combining both results for comprehensive answer generation
    """
    # 1. Cache handling
-    use_model_func = global_config["llm_model_func"]
+    use_model_func = (
+        query_param.model_func
+        if query_param.model_func
+        else global_config["llm_model_func"]
+    )
    args_hash = compute_args_hash("mix", query, cache_type="query")
    cached_response, quantized, min_val, max_val = await handle_cache(
        hashing_kv, args_hash, query, "mix", cache_type="query"
@ -1731,7 +1741,11 @@ async def naive_query(
    system_prompt: str | None = None,
 ) -> str | AsyncIterator[str]:
    # Handle cache
-    use_model_func = global_config["llm_model_func"]
+    use_model_func = (
+        query_param.model_func
+        if query_param.model_func
+        else global_config["llm_model_func"]
+    )
    args_hash = compute_args_hash(query_param.mode, query, cache_type="query")
    cached_response, quantized, min_val, max_val = await handle_cache(
        hashing_kv, args_hash, query, query_param.mode, cache_type="query"
@ -1850,7 +1864,11 @@ async def kg_query_with_keywords(
    # ---------------------------
    # 1) Handle potential cache for query results
    # ---------------------------
-    use_model_func = global_config["llm_model_func"]
+    use_model_func = (
+        query_param.model_func
+        if query_param.model_func
+        else global_config["llm_model_func"]
+    )
    args_hash = compute_args_hash(query_param.mode, query, cache_type="query")
    cached_response, quantized, min_val, max_val = await handle_cache(
        hashing_kv, args_hash, query, query_param.mode, cache_type="query"