Merge branch 'patch-1'

2025-12-05 11:27:30 +00:00 · 2025-07-29 09:57:58 +08:00 · 2025-07-29 09:57:58 +08:00 · d26d413d97
commit d26d413d97
parent 9bdbdae120 f4c2dc327d
8 changed files with 688 additions and 10 deletions
--- a/env.example
+++ b/env.example
@ -118,8 +118,9 @@ LLM_MODEL=gpt-4o
 LLM_BINDING_HOST=https://api.openai.com/v1
 LLM_BINDING_API_KEY=your_api_key

-### Set as num_ctx option for Ollama LLM
-# OLLAMA_NUM_CTX=32768
+### Set as num_ctx option for Ollama LLM (Must be larger than MAX_TOTAL_TOKENS+2000)
+### see also env.ollama-binding-options.example for fine tuning ollama
+# OLLAMA_LLM_NUM_CTX=32768

 ### Optional for Azure
 # AZURE_OPENAI_API_VERSION=2024-08-01-preview
--- a/env.ollama-binding-options.example
+++ b/env.ollama-binding-options.example
@ -0,0 +1,195 @@
+################################################################################
+# Autogenerated .env entries list for LightRAG binding options
+#
+# To generate run:
+# $ python -m lightrag.llm.binding_options
+################################################################################
+# ollama_embedding -- Context window size (number of tokens)
+# OLLAMA_EMBEDDING_NUM_CTX=4096
+
+# ollama_embedding -- Maximum number of tokens to predict
+# OLLAMA_EMBEDDING_NUM_PREDICT=128
+
+# ollama_embedding -- Number of tokens to keep from the initial prompt
+# OLLAMA_EMBEDDING_NUM_KEEP=0
+
+# ollama_embedding -- Random seed for generation (-1 for random)
+# OLLAMA_EMBEDDING_SEED=-1
+
+# ollama_embedding -- Controls randomness (0.0-2.0, higher = more creative)
+# OLLAMA_EMBEDDING_TEMPERATURE=0.8
+
+# ollama_embedding -- Top-k sampling parameter (0 = disabled)
+# OLLAMA_EMBEDDING_TOP_K=40
+
+# ollama_embedding -- Top-p (nucleus) sampling parameter (0.0-1.0)
+# OLLAMA_EMBEDDING_TOP_P=0.9
+
+# ollama_embedding -- Tail free sampling parameter (1.0 = disabled)
+# OLLAMA_EMBEDDING_TFS_Z=1.0
+
+# ollama_embedding -- Typical probability mass (1.0 = disabled)
+# OLLAMA_EMBEDDING_TYPICAL_P=1.0
+
+# ollama_embedding -- Minimum probability threshold (0.0 = disabled)
+# OLLAMA_EMBEDDING_MIN_P=0.0
+
+# ollama_embedding -- Number of tokens to consider for repetition penalty
+# OLLAMA_EMBEDDING_REPEAT_LAST_N=64
+
+# ollama_embedding -- Penalty for repetition (1.0 = no penalty)
+# OLLAMA_EMBEDDING_REPEAT_PENALTY=1.1
+
+# ollama_embedding -- Penalty for token presence (-2.0 to 2.0)
+# OLLAMA_EMBEDDING_PRESENCE_PENALTY=0.0
+
+# ollama_embedding -- Penalty for token frequency (-2.0 to 2.0)
+# OLLAMA_EMBEDDING_FREQUENCY_PENALTY=0.0
+
+# ollama_embedding -- Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0)
+# OLLAMA_EMBEDDING_MIROSTAT=0
+
+# ollama_embedding -- Mirostat target entropy
+# OLLAMA_EMBEDDING_MIROSTAT_TAU=5.0
+
+# ollama_embedding -- Mirostat learning rate
+# OLLAMA_EMBEDDING_MIROSTAT_ETA=0.1
+
+# ollama_embedding -- Enable NUMA optimization
+# OLLAMA_EMBEDDING_NUMA=False
+
+# ollama_embedding -- Batch size for processing
+# OLLAMA_EMBEDDING_NUM_BATCH=512
+
+# ollama_embedding -- Number of GPUs to use (-1 for auto)
+# OLLAMA_EMBEDDING_NUM_GPU=-1
+
+# ollama_embedding -- Main GPU index
+# OLLAMA_EMBEDDING_MAIN_GPU=0
+
+# ollama_embedding -- Optimize for low VRAM
+# OLLAMA_EMBEDDING_LOW_VRAM=False
+
+# ollama_embedding -- Number of CPU threads (0 for auto)
+# OLLAMA_EMBEDDING_NUM_THREAD=0
+
+# ollama_embedding -- Use half-precision for key/value cache
+# OLLAMA_EMBEDDING_F16_KV=True
+
+# ollama_embedding -- Return logits for all tokens
+# OLLAMA_EMBEDDING_LOGITS_ALL=False
+
+# ollama_embedding -- Only load vocabulary
+# OLLAMA_EMBEDDING_VOCAB_ONLY=False
+
+# ollama_embedding -- Use memory mapping for model files
+# OLLAMA_EMBEDDING_USE_MMAP=True
+
+# ollama_embedding -- Lock model in memory
+# OLLAMA_EMBEDDING_USE_MLOCK=False
+
+# ollama_embedding -- Only use for embeddings
+# OLLAMA_EMBEDDING_EMBEDDING_ONLY=False
+
+# ollama_embedding -- Penalize newline tokens
+# OLLAMA_EMBEDDING_PENALIZE_NEWLINE=True
+
+# ollama_embedding -- Stop sequences (comma-separated string)
+# OLLAMA_EMBEDDING_STOP=
+
+# ollama_llm -- Context window size (number of tokens)
+# OLLAMA_LLM_NUM_CTX=4096
+
+# ollama_llm -- Maximum number of tokens to predict
+# OLLAMA_LLM_NUM_PREDICT=128
+
+# ollama_llm -- Number of tokens to keep from the initial prompt
+# OLLAMA_LLM_NUM_KEEP=0
+
+# ollama_llm -- Random seed for generation (-1 for random)
+# OLLAMA_LLM_SEED=-1
+
+# ollama_llm -- Controls randomness (0.0-2.0, higher = more creative)
+# OLLAMA_LLM_TEMPERATURE=0.8
+
+# ollama_llm -- Top-k sampling parameter (0 = disabled)
+# OLLAMA_LLM_TOP_K=40
+
+# ollama_llm -- Top-p (nucleus) sampling parameter (0.0-1.0)
+# OLLAMA_LLM_TOP_P=0.9
+
+# ollama_llm -- Tail free sampling parameter (1.0 = disabled)
+# OLLAMA_LLM_TFS_Z=1.0
+
+# ollama_llm -- Typical probability mass (1.0 = disabled)
+# OLLAMA_LLM_TYPICAL_P=1.0
+
+# ollama_llm -- Minimum probability threshold (0.0 = disabled)
+# OLLAMA_LLM_MIN_P=0.0
+
+# ollama_llm -- Number of tokens to consider for repetition penalty
+# OLLAMA_LLM_REPEAT_LAST_N=64
+
+# ollama_llm -- Penalty for repetition (1.0 = no penalty)
+# OLLAMA_LLM_REPEAT_PENALTY=1.1
+
+# ollama_llm -- Penalty for token presence (-2.0 to 2.0)
+# OLLAMA_LLM_PRESENCE_PENALTY=0.0
+
+# ollama_llm -- Penalty for token frequency (-2.0 to 2.0)
+# OLLAMA_LLM_FREQUENCY_PENALTY=0.0
+
+# ollama_llm -- Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0)
+# OLLAMA_LLM_MIROSTAT=0
+
+# ollama_llm -- Mirostat target entropy
+# OLLAMA_LLM_MIROSTAT_TAU=5.0
+
+# ollama_llm -- Mirostat learning rate
+# OLLAMA_LLM_MIROSTAT_ETA=0.1
+
+# ollama_llm -- Enable NUMA optimization
+# OLLAMA_LLM_NUMA=False
+
+# ollama_llm -- Batch size for processing
+# OLLAMA_LLM_NUM_BATCH=512
+
+# ollama_llm -- Number of GPUs to use (-1 for auto)
+# OLLAMA_LLM_NUM_GPU=-1
+
+# ollama_llm -- Main GPU index
+# OLLAMA_LLM_MAIN_GPU=0
+
+# ollama_llm -- Optimize for low VRAM
+# OLLAMA_LLM_LOW_VRAM=False
+
+# ollama_llm -- Number of CPU threads (0 for auto)
+# OLLAMA_LLM_NUM_THREAD=0
+
+# ollama_llm -- Use half-precision for key/value cache
+# OLLAMA_LLM_F16_KV=True
+
+# ollama_llm -- Return logits for all tokens
+# OLLAMA_LLM_LOGITS_ALL=False
+
+# ollama_llm -- Only load vocabulary
+# OLLAMA_LLM_VOCAB_ONLY=False
+
+# ollama_llm -- Use memory mapping for model files
+# OLLAMA_LLM_USE_MMAP=True
+
+# ollama_llm -- Lock model in memory
+# OLLAMA_LLM_USE_MLOCK=False
+
+# ollama_llm -- Only use for embeddings
+# OLLAMA_LLM_EMBEDDING_ONLY=False
+
+# ollama_llm -- Penalize newline tokens
+# OLLAMA_LLM_PENALIZE_NEWLINE=True
+
+# ollama_llm -- Stop sequences (comma-separated string)
+# OLLAMA_LLM_STOP=
+
+#
+# End of .env entries for LightRAG binding options
+################################################################################
--- a/lightrag/api/README-zh.md
+++ b/lightrag/api/README-zh.md
@ -69,8 +69,8 @@ LLM_BINDING=ollama
 LLM_MODEL=mistral-nemo:latest
 LLM_BINDING_HOST=http://localhost:11434
 # LLM_BINDING_API_KEY=your_api_key
-###  Ollama 服务器上下文 token 数（基于您的 Ollama 服务器容量）
-OLLAMA_NUM_CTX=8192
+###  Ollama 服务器上下文 token 数（必须大于 MAX_TOTAL_TOKENS+2000）
+OLLAMA_LLM_NUM_CTX=8192

 EMBEDDING_BINDING=ollama
 EMBEDDING_BINDING_HOST=http://localhost:11434
--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@ -69,8 +69,8 @@ LLM_BINDING=ollama
 LLM_MODEL=mistral-nemo:latest
 LLM_BINDING_HOST=http://localhost:11434
 # LLM_BINDING_API_KEY=your_api_key
-###  Ollama Server context length
-OLLAMA_NUM_CTX=8192
+###  Ollama Server context length (Must be larger than MAX_TOTAL_TOKENS+2000)
+OLLAMA_LLM_NUM_CTX=16384

 EMBEDDING_BINDING=ollama
 EMBEDDING_BINDING_HOST=http://localhost:11434
@ -457,6 +457,10 @@ You cannot change storage implementation selection after adding documents to Lig
 | --embedding-binding   | ollama        | Embedding binding type (lollms, ollama, openai, azure_openai)                                                                   |
 | --auto-scan-at-startup| -             | Scan input directory for new files and start indexing                                                                           |

+### Additional Ollama Binding Options
+
+When using `--llm-binding ollama` or `--embedding-binding ollama`, additional Ollama-specific configuration options are available. To see all available Ollama binding options, add `--help` to the command line when starting the server. These additional options allow for fine-tuning of Ollama model parameters and connection settings.
+
 ### .env Examples

 ```bash
@ -481,6 +485,7 @@ LLM_BINDING_HOST=https://api.openai.com/v1
 LLM_BINDING_API_KEY=your-api-key

 ### Embedding Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
+# see also env.ollama-binding-options.example for fine tuning ollama
 EMBEDDING_MODEL=bge-m3:latest
 EMBEDDING_DIM=1024
 EMBEDDING_BINDING=ollama
--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@ -7,6 +7,8 @@ import argparse
 import logging
 from dotenv import load_dotenv
 from lightrag.utils import get_env_value
+from lightrag.llm.binding_options import OllamaEmbeddingOptions, OllamaLLMOptions
+import sys

 from lightrag.constants import (
    DEFAULT_WOKERS,
@ -248,6 +250,29 @@ def parse_args() -> argparse.Namespace:
        help="Embedding binding type (default: from env or ollama)",
    )

+    # Conditionally add binding options defined in binding_options module
+    # This will add command line arguments for all binding options (e.g., --ollama-embedding-num_ctx)
+    # and corresponding environment variables (e.g., OLLAMA_EMBEDDING_NUM_CTX)
+    if "--llm-binding" in sys.argv:
+        try:
+            idx = sys.argv.index("--llm-binding")
+            if idx + 1 < len(sys.argv) and sys.argv[idx + 1] == "ollama":
+                OllamaLLMOptions.add_args(parser)
+        except IndexError:
+            pass
+    elif os.environ.get("LLM_BINDING") == "ollama":
+        OllamaLLMOptions.add_args(parser)
+
+    if "--embedding-binding" in sys.argv:
+        try:
+            idx = sys.argv.index("--embedding-binding")
+            if idx + 1 < len(sys.argv) and sys.argv[idx + 1] == "ollama":
+                OllamaEmbeddingOptions.add_args(parser)
+        except IndexError:
+            pass
+    elif os.environ.get("EMBEDDING_BINDING") == "ollama":
+        OllamaEmbeddingOptions.add_args(parser)
+
    args = parser.parse_args()

    # convert relative path to absolute path
@ -379,7 +404,8 @@ def update_uvicorn_mode_config():
        global_args.workers = 1
        # Log warning directly here
        logging.warning(
-            f"In uvicorn mode, workers parameter was set to {original_workers}. Forcing workers=1"
+            f"In uvicorn mode, workers parameter was set to {
+                original_workers}. Forcing workers=1"
        )


--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@ -209,6 +209,7 @@ def create_app(args):
        from lightrag.llm.lollms import lollms_model_complete, lollms_embed
    if args.llm_binding == "ollama" or args.embedding_binding == "ollama":
        from lightrag.llm.ollama import ollama_model_complete, ollama_embed
+        from lightrag.llm.binding_options import OllamaLLMOptions
    if args.llm_binding == "openai" or args.embedding_binding == "openai":
        from lightrag.llm.openai import openai_complete_if_cache, openai_embed
    if args.llm_binding == "azure_openai" or args.embedding_binding == "azure_openai":
@ -219,6 +220,7 @@ def create_app(args):
    if args.llm_binding_host == "openai-ollama" or args.embedding_binding == "ollama":
        from lightrag.llm.openai import openai_complete_if_cache
        from lightrag.llm.ollama import ollama_embed
+        from lightrag.llm.binding_options import OllamaEmbeddingOptions
    if args.embedding_binding == "jina":
        from lightrag.llm.jina import jina_embed

@ -284,6 +286,7 @@ def create_app(args):
            embed_model=args.embedding_model,
            host=args.embedding_binding_host,
            api_key=args.embedding_binding_api_key,
+            options=OllamaEmbeddingOptions.options_dict(args),
        )
        if args.embedding_binding == "ollama"
        else azure_openai_embed(
@ -360,7 +363,7 @@ def create_app(args):
            llm_model_kwargs={
                "host": args.llm_binding_host,
                "timeout": args.timeout,
-                "options": {"num_ctx": args.ollama_num_ctx},
+                "options": OllamaLLMOptions.options_dict(args),
                "api_key": args.llm_binding_api_key,
            }
            if args.llm_binding == "lollms" or args.llm_binding == "ollama"
--- a/lightrag/llm/binding_options.py
+++ b/lightrag/llm/binding_options.py
@ -0,0 +1,446 @@
+"""
+Module that implements containers for specific LLM bindings.
+
+This module provides container implementations for various Large Language Model
+bindings and integrations.
+"""
+
+from argparse import ArgumentParser, Namespace
+import argparse
+from dataclasses import asdict, dataclass
+from typing import Any, ClassVar
+
+from lightrag.utils import get_env_value
+
+
+# =============================================================================
+# BindingOptions Base Class
+# =============================================================================
+#
+# The BindingOptions class serves as the foundation for all LLM provider bindings
+# in LightRAG. It provides a standardized framework for:
+#
+# 1. Configuration Management:
+#    - Defines how each LLM provider's configuration parameters are structured
+#    - Handles default values and type information for each parameter
+#    - Maps configuration options to command-line arguments and environment variables
+#
+# 2. Environment Integration:
+#    - Automatically generates environment variable names from binding parameters
+#    - Provides methods to create sample .env files for easy configuration
+#    - Supports configuration via environment variables with fallback to defaults
+#
+# 3. Command-Line Interface:
+#    - Dynamically generates command-line arguments for all registered bindings
+#    - Maintains consistent naming conventions across different LLM providers
+#    - Provides help text and type validation for each configuration option
+#
+# 4. Extensibility:
+#    - Uses class introspection to automatically discover all binding subclasses
+#    - Requires minimal boilerplate code when adding new LLM provider bindings
+#    - Maintains separation of concerns between different provider configurations
+#
+# This design pattern ensures that adding support for a new LLM provider requires
+# only defining the provider-specific parameters and help text, while the base
+# class handles all the common functionality for argument parsing, environment
+# variable handling, and configuration management.
+#
+# Instances of a derived class of BindingOptions can be used to store multiple
+# runtime configurations of options for a single LLM provider. using the
+# asdict() method to convert the options to a dictionary.
+#
+# =============================================================================
+@dataclass
+class BindingOptions:
+    """Base class for binding options."""
+
+    # mandatory name of binding
+    _binding_name: ClassVar[str]
+
+    # optional help message for each option
+    _help: ClassVar[dict[str, str]]
+
+    @staticmethod
+    def _all_class_vars(klass: type, include_inherited=True) -> dict[str, Any]:
+        """Print class variables, optionally including inherited ones"""
+        if include_inherited:
+            # Get all class variables from MRO
+            vars_dict = {}
+            for base in reversed(klass.__mro__[:-1]):  # Exclude 'object'
+                vars_dict.update(
+                    {
+                        k: v
+                        for k, v in base.__dict__.items()
+                        if (
+                            not k.startswith("_")
+                            and not callable(v)
+                            and not isinstance(v, classmethod)
+                        )
+                    }
+                )
+        else:
+            # Only direct class variables
+            vars_dict = {
+                k: v
+                for k, v in klass.__dict__.items()
+                if (
+                    not k.startswith("_")
+                    and not callable(v)
+                    and not isinstance(v, classmethod)
+                )
+            }
+
+        return vars_dict
+
+    @classmethod
+    def add_args(cls, parser: ArgumentParser):
+        group = parser.add_argument_group(f"{cls._binding_name} binding options")
+        for arg_item in cls.args_env_name_type_value():
+            group.add_argument(
+                f"--{arg_item['argname']}",
+                type=arg_item["type"],
+                default=get_env_value(f"{arg_item['env_name']}", argparse.SUPPRESS),
+                help=arg_item["help"],
+            )
+
+    @classmethod
+    def args_env_name_type_value(cls):
+        args_prefix = f"{cls._binding_name}".replace("_", "-")
+        env_var_prefix = f"{cls._binding_name}_".upper()
+        class_vars = {
+            key: value
+            for key, value in cls._all_class_vars(cls).items()
+            if not callable(value) and not key.startswith("_")
+        }
+        help = cls._help
+
+        for class_var in class_vars:
+            argdef = {
+                "argname": f"{args_prefix}-{class_var}",
+                "env_name": f"{env_var_prefix}{class_var.upper()}",
+                "type": type(class_vars[class_var]),
+                "default": class_vars[class_var],
+                "help": f"{cls._binding_name} -- " + help.get(class_var, ""),
+            }
+
+            yield argdef
+
+    @classmethod
+    def generate_dot_env_sample(cls):
+        from io import StringIO
+
+        sample_top = (
+            "#" * 80
+            + "\n"
+            + (
+                "# Autogenerated .env entries list for LightRAG binding options\n"
+                "#\n"
+                "# To generate run:\n"
+                "# $ python -m lightrag.llm.binding_options\n"
+            )
+            + "#" * 80
+            + "\n"
+        )
+
+        sample_bottom = (
+            ("#\n# End of .env entries for LightRAG binding options\n")
+            + "#" * 80
+            + "\n"
+        )
+
+        sample_stream = StringIO()
+        sample_stream.write(sample_top)
+        for klass in cls.__subclasses__():
+            for arg_item in klass.args_env_name_type_value():
+                if arg_item["help"]:
+                    sample_stream.write(f"# {arg_item['help']}\n")
+                sample_stream.write(
+                    f"# {arg_item['env_name']}={arg_item['default']}\n\n"
+                )
+
+        sample_stream.write(sample_bottom)
+        return sample_stream.getvalue()
+
+    @classmethod
+    def options_dict(cls, args: Namespace) -> dict[str, Any]:
+        """
+        Extract options dictionary for a specific binding from parsed arguments.
+
+        This method filters the parsed command-line arguments to return only those
+        that belong to the specific binding class. It removes the binding prefix
+        from argument names to create a clean options dictionary.
+
+        Args:
+            args (Namespace): Parsed command-line arguments containing all binding options
+
+        Returns:
+            dict[str, Any]: Dictionary mapping option names (without prefix) to their values
+
+        Example:
+            If args contains {'ollama_num_ctx': 512, 'other_option': 'value'}
+            and this is called on OllamaOptions, it returns {'num_ctx': 512}
+        """
+        prefix = cls._binding_name + "_"
+        skipchars = len(prefix)
+        options = {
+            key[skipchars:]: value
+            for key, value in vars(args).items()
+            if key.startswith(prefix)
+        }
+
+        return options
+
+    def asdict(self) -> dict[str, Any]:
+        """
+        Convert an instance of binding options to a dictionary.
+
+        This method uses dataclasses.asdict() to convert the dataclass instance
+        into a dictionary representation, including all its fields and values.
+
+        Returns:
+            dict[str, Any]: Dictionary representation of the binding options instance
+        """
+        return asdict(self)
+
+
+# =============================================================================
+# Binding Options for Different LLM Providers
+# =============================================================================
+#
+# This section contains dataclass definitions for various LLM provider options.
+# Each binding option class inherits from BindingOptions and defines:
+#   - _binding_name: Unique identifier for the binding
+#   - Configuration parameters with default values
+#   - _help: Dictionary mapping parameter names to help descriptions
+#
+# To add a new binding:
+#   1. Create a new dataclass inheriting from BindingOptions
+#   2. Set the _binding_name class variable
+#   3. Define configuration parameters as class attributes
+#   4. Add corresponding help strings in the _help dictionary
+#
+# =============================================================================
+
+
+# =============================================================================
+# Binding Options for Ollama
+# =============================================================================
+#
+# Ollama binding options provide configuration for the Ollama local LLM server.
+# These options control model behavior, sampling parameters, hardware utilization,
+# and performance settings. The parameters are based on Ollama's API specification
+# and provide fine-grained control over model inference and generation.
+#
+# The _OllamaOptionsMixin defines the complete set of available options, while
+# OllamaEmbeddingOptions and OllamaLLMOptions provide specialized configurations
+# for embedding and language model tasks respectively.
+# =============================================================================
+@dataclass
+class _OllamaOptionsMixin:
+    """Options for Ollama bindings."""
+
+    # Core context and generation parameters
+    num_ctx: int = 32768  # Context window size (number of tokens)
+    num_predict: int = 128  # Maximum number of tokens to predict
+    num_keep: int = 0  # Number of tokens to keep from the initial prompt
+    seed: int = -1  # Random seed for generation (-1 for random)
+
+    # Sampling parameters
+    temperature: float = 0.8  # Controls randomness (0.0-2.0)
+    top_k: int = 40  # Top-k sampling parameter
+    top_p: float = 0.9  # Top-p (nucleus) sampling parameter
+    tfs_z: float = 1.0  # Tail free sampling parameter
+    typical_p: float = 1.0  # Typical probability mass
+    min_p: float = 0.0  # Minimum probability threshold
+
+    # Repetition control
+    repeat_last_n: int = 64  # Number of tokens to consider for repetition penalty
+    repeat_penalty: float = 1.1  # Penalty for repetition
+    presence_penalty: float = 0.0  # Penalty for token presence
+    frequency_penalty: float = 0.0  # Penalty for token frequency
+
+    # Mirostat sampling
+    mirostat: int = (
+        # Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0)
+        0
+    )
+    mirostat_tau: float = 5.0  # Mirostat target entropy
+    mirostat_eta: float = 0.1  # Mirostat learning rate
+
+    # Hardware and performance parameters
+    numa: bool = False  # Enable NUMA optimization
+    num_batch: int = 512  # Batch size for processing
+    num_gpu: int = -1  # Number of GPUs to use (-1 for auto)
+    main_gpu: int = 0  # Main GPU index
+    low_vram: bool = False  # Optimize for low VRAM
+    num_thread: int = 0  # Number of CPU threads (0 for auto)
+
+    # Memory and model parameters
+    f16_kv: bool = True  # Use half-precision for key/value cache
+    logits_all: bool = False  # Return logits for all tokens
+    vocab_only: bool = False  # Only load vocabulary
+    use_mmap: bool = True  # Use memory mapping for model files
+    use_mlock: bool = False  # Lock model in memory
+    embedding_only: bool = False  # Only use for embeddings
+
+    # Output control
+    penalize_newline: bool = True  # Penalize newline tokens
+    stop: str = ""  # Stop sequences (comma-separated)
+
+    # optional help strings
+    _help: ClassVar[dict[str, str]] = {
+        "num_ctx": "Context window size (number of tokens)",
+        "num_predict": "Maximum number of tokens to predict",
+        "num_keep": "Number of tokens to keep from the initial prompt",
+        "seed": "Random seed for generation (-1 for random)",
+        "temperature": "Controls randomness (0.0-2.0, higher = more creative)",
+        "top_k": "Top-k sampling parameter (0 = disabled)",
+        "top_p": "Top-p (nucleus) sampling parameter (0.0-1.0)",
+        "tfs_z": "Tail free sampling parameter (1.0 = disabled)",
+        "typical_p": "Typical probability mass (1.0 = disabled)",
+        "min_p": "Minimum probability threshold (0.0 = disabled)",
+        "repeat_last_n": "Number of tokens to consider for repetition penalty",
+        "repeat_penalty": "Penalty for repetition (1.0 = no penalty)",
+        "presence_penalty": "Penalty for token presence (-2.0 to 2.0)",
+        "frequency_penalty": "Penalty for token frequency (-2.0 to 2.0)",
+        "mirostat": "Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0)",
+        "mirostat_tau": "Mirostat target entropy",
+        "mirostat_eta": "Mirostat learning rate",
+        "numa": "Enable NUMA optimization",
+        "num_batch": "Batch size for processing",
+        "num_gpu": "Number of GPUs to use (-1 for auto)",
+        "main_gpu": "Main GPU index",
+        "low_vram": "Optimize for low VRAM",
+        "num_thread": "Number of CPU threads (0 for auto)",
+        "f16_kv": "Use half-precision for key/value cache",
+        "logits_all": "Return logits for all tokens",
+        "vocab_only": "Only load vocabulary",
+        "use_mmap": "Use memory mapping for model files",
+        "use_mlock": "Lock model in memory",
+        "embedding_only": "Only use for embeddings",
+        "penalize_newline": "Penalize newline tokens",
+        "stop": "Stop sequences (comma-separated string)",
+    }
+
+
+# =============================================================================
+# Ollama Binding Options - Specialized Configurations
+# =============================================================================
+#
+# This section defines specialized binding option classes for different Ollama
+# use cases. Both classes inherit from OllamaOptionsMixin to share the complete
+# set of Ollama configuration parameters, while providing distinct binding names
+# for command-line argument generation and environment variable handling.
+#
+# OllamaEmbeddingOptions: Specialized for embedding tasks
+# OllamaLLMOptions: Specialized for language model/chat tasks
+#
+# Each class maintains its own binding name prefix, allowing users to configure
+# embedding and LLM options independently when both are used in the same application.
+# =============================================================================
+
+
+@dataclass
+class OllamaEmbeddingOptions(_OllamaOptionsMixin, BindingOptions):
+    """Options for Ollama embeddings with specialized configuration for embedding tasks."""
+
+    # mandatory name of binding
+    _binding_name: ClassVar[str] = "ollama_embedding"
+
+
+@dataclass
+class OllamaLLMOptions(_OllamaOptionsMixin, BindingOptions):
+    """Options for Ollama LLM with specialized configuration for LLM tasks."""
+
+    # mandatory name of binding
+    _binding_name: ClassVar[str] = "ollama_llm"
+
+
+# =============================================================================
+# Additional LLM Provider Bindings
+# =============================================================================
+#
+# This section is where you can add binding options for other LLM providers.
+# Each new binding should follow the same pattern as the Ollama bindings above:
+#
+# 1. Create a dataclass that inherits from BindingOptions
+# 2. Set a unique _binding_name class variable (e.g., "openai", "anthropic")
+# 3. Define configuration parameters as class attributes with default values
+# 4. Add a _help class variable with descriptions for each parameter
+#
+# Example template for a new provider:
+#
+# @dataclass
+# class NewProviderOptions(BindingOptions):
+#     """Options for NewProvider LLM binding."""
+#
+#     _binding_name: ClassVar[str] = "newprovider"
+#
+#     # Configuration parameters
+#     api_key: str = ""
+#     max_tokens: int = 1000
+#     model: str = "default-model"
+#
+#     # Help descriptions
+#     _help: ClassVar[dict[str, str]] = {
+#         "api_key": "API key for authentication",
+#         "max_tokens": "Maximum tokens to generate",
+#         "model": "Model name to use",
+#     }
+#
+# =============================================================================
+
+# TODO: Add binding options for additional LLM providers here
+# Common providers to consider: OpenAI, Anthropic, Cohere, Hugging Face, etc.
+
+# =============================================================================
+# Main Section - For Testing and Sample Generation
+# =============================================================================
+#
+# When run as a script, this module:
+# 1. Generates and prints a sample .env file with all binding options
+# 2. If "test" argument is provided, demonstrates argument parsing with Ollama binding
+#
+# Usage:
+#   python -m lightrag.llm.binding_options           # Generate .env sample
+#   python -m lightrag.llm.binding_options test      # Test argument parsing
+#
+# =============================================================================
+
+if __name__ == "__main__":
+    import sys
+    import dotenv
+    from io import StringIO
+
+    print(BindingOptions.generate_dot_env_sample())
+
+    env_strstream = StringIO(
+        ("OLLAMA_LLM_TEMPERATURE=0.1\nOLLAMA_EMBEDDING_TEMPERATURE=0.2\n")
+    )
+
+    # Load environment variables from .env file
+    dotenv.load_dotenv(stream=env_strstream)
+
+    if len(sys.argv) > 1 and sys.argv[1] == "test":
+        parser = ArgumentParser(description="Test Ollama binding")
+        OllamaEmbeddingOptions.add_args(parser)
+        OllamaLLMOptions.add_args(parser)
+        args = parser.parse_args(
+            [
+                "--ollama-embedding-num_ctx",
+                "1024",
+                "--ollama-llm-num_ctx",
+                "2048",
+            ]
+        )
+        print(args)
+
+        # test LLM options
+        ollama_options = OllamaLLMOptions.options_dict(args)
+        print(ollama_options)
+        print(OllamaLLMOptions(num_ctx=30000).asdict())
+
+        # test embedding options
+        embedding_options = OllamaEmbeddingOptions.options_dict(args)
+        print(embedding_options)
+        print(OllamaEmbeddingOptions(**embedding_options).asdict())
--- a/lightrag/llm/ollama.py
+++ b/lightrag/llm/ollama.py
@ -149,9 +149,11 @@ async def ollama_embed(texts: list[str], embed_model, **kwargs) -> np.ndarray:
    timeout = kwargs.pop("timeout", None) or 300  # Default time out 300s

    ollama_client = ollama.AsyncClient(host=host, timeout=timeout, headers=headers)
-
    try:
-        data = await ollama_client.embed(model=embed_model, input=texts)
+        options = kwargs.pop("options", {})
+        data = await ollama_client.embed(
+            model=embed_model, input=texts, options=options
+        )
        return np.array(data["embeddings"])
    except Exception as e:
        logger.error(f"Error in ollama_embed: {str(e)}")