diff --git a/env.example b/env.example index ab12cc41..79cd1a13 100644 --- a/env.example +++ b/env.example @@ -118,8 +118,9 @@ LLM_MODEL=gpt-4o LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your_api_key -### Set as num_ctx option for Ollama LLM -# OLLAMA_NUM_CTX=32768 +### Set as num_ctx option for Ollama LLM (Must be larger than MAX_TOTAL_TOKENS+2000) +### see also env.ollama-binding-options.example for fine tuning ollama +# OLLAMA_LLM_NUM_CTX=32768 ### Optional for Azure # AZURE_OPENAI_API_VERSION=2024-08-01-preview diff --git a/env.ollama-binding-options.example b/env.ollama-binding-options.example new file mode 100644 index 00000000..f3c8181e --- /dev/null +++ b/env.ollama-binding-options.example @@ -0,0 +1,195 @@ +################################################################################ +# Autogenerated .env entries list for LightRAG binding options +# +# To generate run: +# $ python -m lightrag.llm.binding_options +################################################################################ +# ollama_embedding -- Context window size (number of tokens) +# OLLAMA_EMBEDDING_NUM_CTX=4096 + +# ollama_embedding -- Maximum number of tokens to predict +# OLLAMA_EMBEDDING_NUM_PREDICT=128 + +# ollama_embedding -- Number of tokens to keep from the initial prompt +# OLLAMA_EMBEDDING_NUM_KEEP=0 + +# ollama_embedding -- Random seed for generation (-1 for random) +# OLLAMA_EMBEDDING_SEED=-1 + +# ollama_embedding -- Controls randomness (0.0-2.0, higher = more creative) +# OLLAMA_EMBEDDING_TEMPERATURE=0.8 + +# ollama_embedding -- Top-k sampling parameter (0 = disabled) +# OLLAMA_EMBEDDING_TOP_K=40 + +# ollama_embedding -- Top-p (nucleus) sampling parameter (0.0-1.0) +# OLLAMA_EMBEDDING_TOP_P=0.9 + +# ollama_embedding -- Tail free sampling parameter (1.0 = disabled) +# OLLAMA_EMBEDDING_TFS_Z=1.0 + +# ollama_embedding -- Typical probability mass (1.0 = disabled) +# OLLAMA_EMBEDDING_TYPICAL_P=1.0 + +# ollama_embedding -- Minimum probability threshold (0.0 = disabled) +# OLLAMA_EMBEDDING_MIN_P=0.0 + +# ollama_embedding -- Number of tokens to consider for repetition penalty +# OLLAMA_EMBEDDING_REPEAT_LAST_N=64 + +# ollama_embedding -- Penalty for repetition (1.0 = no penalty) +# OLLAMA_EMBEDDING_REPEAT_PENALTY=1.1 + +# ollama_embedding -- Penalty for token presence (-2.0 to 2.0) +# OLLAMA_EMBEDDING_PRESENCE_PENALTY=0.0 + +# ollama_embedding -- Penalty for token frequency (-2.0 to 2.0) +# OLLAMA_EMBEDDING_FREQUENCY_PENALTY=0.0 + +# ollama_embedding -- Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0) +# OLLAMA_EMBEDDING_MIROSTAT=0 + +# ollama_embedding -- Mirostat target entropy +# OLLAMA_EMBEDDING_MIROSTAT_TAU=5.0 + +# ollama_embedding -- Mirostat learning rate +# OLLAMA_EMBEDDING_MIROSTAT_ETA=0.1 + +# ollama_embedding -- Enable NUMA optimization +# OLLAMA_EMBEDDING_NUMA=False + +# ollama_embedding -- Batch size for processing +# OLLAMA_EMBEDDING_NUM_BATCH=512 + +# ollama_embedding -- Number of GPUs to use (-1 for auto) +# OLLAMA_EMBEDDING_NUM_GPU=-1 + +# ollama_embedding -- Main GPU index +# OLLAMA_EMBEDDING_MAIN_GPU=0 + +# ollama_embedding -- Optimize for low VRAM +# OLLAMA_EMBEDDING_LOW_VRAM=False + +# ollama_embedding -- Number of CPU threads (0 for auto) +# OLLAMA_EMBEDDING_NUM_THREAD=0 + +# ollama_embedding -- Use half-precision for key/value cache +# OLLAMA_EMBEDDING_F16_KV=True + +# ollama_embedding -- Return logits for all tokens +# OLLAMA_EMBEDDING_LOGITS_ALL=False + +# ollama_embedding -- Only load vocabulary +# OLLAMA_EMBEDDING_VOCAB_ONLY=False + +# ollama_embedding -- Use memory mapping for model files +# OLLAMA_EMBEDDING_USE_MMAP=True + +# ollama_embedding -- Lock model in memory +# OLLAMA_EMBEDDING_USE_MLOCK=False + +# ollama_embedding -- Only use for embeddings +# OLLAMA_EMBEDDING_EMBEDDING_ONLY=False + +# ollama_embedding -- Penalize newline tokens +# OLLAMA_EMBEDDING_PENALIZE_NEWLINE=True + +# ollama_embedding -- Stop sequences (comma-separated string) +# OLLAMA_EMBEDDING_STOP= + +# ollama_llm -- Context window size (number of tokens) +# OLLAMA_LLM_NUM_CTX=4096 + +# ollama_llm -- Maximum number of tokens to predict +# OLLAMA_LLM_NUM_PREDICT=128 + +# ollama_llm -- Number of tokens to keep from the initial prompt +# OLLAMA_LLM_NUM_KEEP=0 + +# ollama_llm -- Random seed for generation (-1 for random) +# OLLAMA_LLM_SEED=-1 + +# ollama_llm -- Controls randomness (0.0-2.0, higher = more creative) +# OLLAMA_LLM_TEMPERATURE=0.8 + +# ollama_llm -- Top-k sampling parameter (0 = disabled) +# OLLAMA_LLM_TOP_K=40 + +# ollama_llm -- Top-p (nucleus) sampling parameter (0.0-1.0) +# OLLAMA_LLM_TOP_P=0.9 + +# ollama_llm -- Tail free sampling parameter (1.0 = disabled) +# OLLAMA_LLM_TFS_Z=1.0 + +# ollama_llm -- Typical probability mass (1.0 = disabled) +# OLLAMA_LLM_TYPICAL_P=1.0 + +# ollama_llm -- Minimum probability threshold (0.0 = disabled) +# OLLAMA_LLM_MIN_P=0.0 + +# ollama_llm -- Number of tokens to consider for repetition penalty +# OLLAMA_LLM_REPEAT_LAST_N=64 + +# ollama_llm -- Penalty for repetition (1.0 = no penalty) +# OLLAMA_LLM_REPEAT_PENALTY=1.1 + +# ollama_llm -- Penalty for token presence (-2.0 to 2.0) +# OLLAMA_LLM_PRESENCE_PENALTY=0.0 + +# ollama_llm -- Penalty for token frequency (-2.0 to 2.0) +# OLLAMA_LLM_FREQUENCY_PENALTY=0.0 + +# ollama_llm -- Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0) +# OLLAMA_LLM_MIROSTAT=0 + +# ollama_llm -- Mirostat target entropy +# OLLAMA_LLM_MIROSTAT_TAU=5.0 + +# ollama_llm -- Mirostat learning rate +# OLLAMA_LLM_MIROSTAT_ETA=0.1 + +# ollama_llm -- Enable NUMA optimization +# OLLAMA_LLM_NUMA=False + +# ollama_llm -- Batch size for processing +# OLLAMA_LLM_NUM_BATCH=512 + +# ollama_llm -- Number of GPUs to use (-1 for auto) +# OLLAMA_LLM_NUM_GPU=-1 + +# ollama_llm -- Main GPU index +# OLLAMA_LLM_MAIN_GPU=0 + +# ollama_llm -- Optimize for low VRAM +# OLLAMA_LLM_LOW_VRAM=False + +# ollama_llm -- Number of CPU threads (0 for auto) +# OLLAMA_LLM_NUM_THREAD=0 + +# ollama_llm -- Use half-precision for key/value cache +# OLLAMA_LLM_F16_KV=True + +# ollama_llm -- Return logits for all tokens +# OLLAMA_LLM_LOGITS_ALL=False + +# ollama_llm -- Only load vocabulary +# OLLAMA_LLM_VOCAB_ONLY=False + +# ollama_llm -- Use memory mapping for model files +# OLLAMA_LLM_USE_MMAP=True + +# ollama_llm -- Lock model in memory +# OLLAMA_LLM_USE_MLOCK=False + +# ollama_llm -- Only use for embeddings +# OLLAMA_LLM_EMBEDDING_ONLY=False + +# ollama_llm -- Penalize newline tokens +# OLLAMA_LLM_PENALIZE_NEWLINE=True + +# ollama_llm -- Stop sequences (comma-separated string) +# OLLAMA_LLM_STOP= + +# +# End of .env entries for LightRAG binding options +################################################################################ diff --git a/lightrag/api/README-zh.md b/lightrag/api/README-zh.md index 95a7d660..b9b8245d 100644 --- a/lightrag/api/README-zh.md +++ b/lightrag/api/README-zh.md @@ -69,8 +69,8 @@ LLM_BINDING=ollama LLM_MODEL=mistral-nemo:latest LLM_BINDING_HOST=http://localhost:11434 # LLM_BINDING_API_KEY=your_api_key -### Ollama 服务器上下文 token 数(基于您的 Ollama 服务器容量) -OLLAMA_NUM_CTX=8192 +### Ollama 服务器上下文 token 数(必须大于 MAX_TOTAL_TOKENS+2000) +OLLAMA_LLM_NUM_CTX=8192 EMBEDDING_BINDING=ollama EMBEDDING_BINDING_HOST=http://localhost:11434 diff --git a/lightrag/api/README.md b/lightrag/api/README.md index 5b7f4dee..84a8eb4a 100644 --- a/lightrag/api/README.md +++ b/lightrag/api/README.md @@ -69,8 +69,8 @@ LLM_BINDING=ollama LLM_MODEL=mistral-nemo:latest LLM_BINDING_HOST=http://localhost:11434 # LLM_BINDING_API_KEY=your_api_key -### Ollama Server context length -OLLAMA_NUM_CTX=8192 +### Ollama Server context length (Must be larger than MAX_TOTAL_TOKENS+2000) +OLLAMA_LLM_NUM_CTX=16384 EMBEDDING_BINDING=ollama EMBEDDING_BINDING_HOST=http://localhost:11434 @@ -457,6 +457,10 @@ You cannot change storage implementation selection after adding documents to Lig | --embedding-binding | ollama | Embedding binding type (lollms, ollama, openai, azure_openai) | | --auto-scan-at-startup| - | Scan input directory for new files and start indexing | +### Additional Ollama Binding Options + +When using `--llm-binding ollama` or `--embedding-binding ollama`, additional Ollama-specific configuration options are available. To see all available Ollama binding options, add `--help` to the command line when starting the server. These additional options allow for fine-tuning of Ollama model parameters and connection settings. + ### .env Examples ```bash @@ -481,6 +485,7 @@ LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your-api-key ### Embedding Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal) +# see also env.ollama-binding-options.example for fine tuning ollama EMBEDDING_MODEL=bge-m3:latest EMBEDDING_DIM=1024 EMBEDDING_BINDING=ollama diff --git a/lightrag/api/config.py b/lightrag/api/config.py index 2302981d..cf8c0492 100644 --- a/lightrag/api/config.py +++ b/lightrag/api/config.py @@ -7,6 +7,8 @@ import argparse import logging from dotenv import load_dotenv from lightrag.utils import get_env_value +from lightrag.llm.binding_options import OllamaEmbeddingOptions, OllamaLLMOptions +import sys from lightrag.constants import ( DEFAULT_WOKERS, @@ -248,6 +250,29 @@ def parse_args() -> argparse.Namespace: help="Embedding binding type (default: from env or ollama)", ) + # Conditionally add binding options defined in binding_options module + # This will add command line arguments for all binding options (e.g., --ollama-embedding-num_ctx) + # and corresponding environment variables (e.g., OLLAMA_EMBEDDING_NUM_CTX) + if "--llm-binding" in sys.argv: + try: + idx = sys.argv.index("--llm-binding") + if idx + 1 < len(sys.argv) and sys.argv[idx + 1] == "ollama": + OllamaLLMOptions.add_args(parser) + except IndexError: + pass + elif os.environ.get("LLM_BINDING") == "ollama": + OllamaLLMOptions.add_args(parser) + + if "--embedding-binding" in sys.argv: + try: + idx = sys.argv.index("--embedding-binding") + if idx + 1 < len(sys.argv) and sys.argv[idx + 1] == "ollama": + OllamaEmbeddingOptions.add_args(parser) + except IndexError: + pass + elif os.environ.get("EMBEDDING_BINDING") == "ollama": + OllamaEmbeddingOptions.add_args(parser) + args = parser.parse_args() # convert relative path to absolute path @@ -379,7 +404,8 @@ def update_uvicorn_mode_config(): global_args.workers = 1 # Log warning directly here logging.warning( - f"In uvicorn mode, workers parameter was set to {original_workers}. Forcing workers=1" + f"In uvicorn mode, workers parameter was set to { + original_workers}. Forcing workers=1" ) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 75e6526f..8845e06c 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -209,6 +209,7 @@ def create_app(args): from lightrag.llm.lollms import lollms_model_complete, lollms_embed if args.llm_binding == "ollama" or args.embedding_binding == "ollama": from lightrag.llm.ollama import ollama_model_complete, ollama_embed + from lightrag.llm.binding_options import OllamaLLMOptions if args.llm_binding == "openai" or args.embedding_binding == "openai": from lightrag.llm.openai import openai_complete_if_cache, openai_embed if args.llm_binding == "azure_openai" or args.embedding_binding == "azure_openai": @@ -219,6 +220,7 @@ def create_app(args): if args.llm_binding_host == "openai-ollama" or args.embedding_binding == "ollama": from lightrag.llm.openai import openai_complete_if_cache from lightrag.llm.ollama import ollama_embed + from lightrag.llm.binding_options import OllamaEmbeddingOptions if args.embedding_binding == "jina": from lightrag.llm.jina import jina_embed @@ -284,6 +286,7 @@ def create_app(args): embed_model=args.embedding_model, host=args.embedding_binding_host, api_key=args.embedding_binding_api_key, + options=OllamaEmbeddingOptions.options_dict(args), ) if args.embedding_binding == "ollama" else azure_openai_embed( @@ -360,7 +363,7 @@ def create_app(args): llm_model_kwargs={ "host": args.llm_binding_host, "timeout": args.timeout, - "options": {"num_ctx": args.ollama_num_ctx}, + "options": OllamaLLMOptions.options_dict(args), "api_key": args.llm_binding_api_key, } if args.llm_binding == "lollms" or args.llm_binding == "ollama" diff --git a/lightrag/llm/binding_options.py b/lightrag/llm/binding_options.py new file mode 100644 index 00000000..c4e873ea --- /dev/null +++ b/lightrag/llm/binding_options.py @@ -0,0 +1,446 @@ +""" +Module that implements containers for specific LLM bindings. + +This module provides container implementations for various Large Language Model +bindings and integrations. +""" + +from argparse import ArgumentParser, Namespace +import argparse +from dataclasses import asdict, dataclass +from typing import Any, ClassVar + +from lightrag.utils import get_env_value + + +# ============================================================================= +# BindingOptions Base Class +# ============================================================================= +# +# The BindingOptions class serves as the foundation for all LLM provider bindings +# in LightRAG. It provides a standardized framework for: +# +# 1. Configuration Management: +# - Defines how each LLM provider's configuration parameters are structured +# - Handles default values and type information for each parameter +# - Maps configuration options to command-line arguments and environment variables +# +# 2. Environment Integration: +# - Automatically generates environment variable names from binding parameters +# - Provides methods to create sample .env files for easy configuration +# - Supports configuration via environment variables with fallback to defaults +# +# 3. Command-Line Interface: +# - Dynamically generates command-line arguments for all registered bindings +# - Maintains consistent naming conventions across different LLM providers +# - Provides help text and type validation for each configuration option +# +# 4. Extensibility: +# - Uses class introspection to automatically discover all binding subclasses +# - Requires minimal boilerplate code when adding new LLM provider bindings +# - Maintains separation of concerns between different provider configurations +# +# This design pattern ensures that adding support for a new LLM provider requires +# only defining the provider-specific parameters and help text, while the base +# class handles all the common functionality for argument parsing, environment +# variable handling, and configuration management. +# +# Instances of a derived class of BindingOptions can be used to store multiple +# runtime configurations of options for a single LLM provider. using the +# asdict() method to convert the options to a dictionary. +# +# ============================================================================= +@dataclass +class BindingOptions: + """Base class for binding options.""" + + # mandatory name of binding + _binding_name: ClassVar[str] + + # optional help message for each option + _help: ClassVar[dict[str, str]] + + @staticmethod + def _all_class_vars(klass: type, include_inherited=True) -> dict[str, Any]: + """Print class variables, optionally including inherited ones""" + if include_inherited: + # Get all class variables from MRO + vars_dict = {} + for base in reversed(klass.__mro__[:-1]): # Exclude 'object' + vars_dict.update( + { + k: v + for k, v in base.__dict__.items() + if ( + not k.startswith("_") + and not callable(v) + and not isinstance(v, classmethod) + ) + } + ) + else: + # Only direct class variables + vars_dict = { + k: v + for k, v in klass.__dict__.items() + if ( + not k.startswith("_") + and not callable(v) + and not isinstance(v, classmethod) + ) + } + + return vars_dict + + @classmethod + def add_args(cls, parser: ArgumentParser): + group = parser.add_argument_group(f"{cls._binding_name} binding options") + for arg_item in cls.args_env_name_type_value(): + group.add_argument( + f"--{arg_item['argname']}", + type=arg_item["type"], + default=get_env_value(f"{arg_item['env_name']}", argparse.SUPPRESS), + help=arg_item["help"], + ) + + @classmethod + def args_env_name_type_value(cls): + args_prefix = f"{cls._binding_name}".replace("_", "-") + env_var_prefix = f"{cls._binding_name}_".upper() + class_vars = { + key: value + for key, value in cls._all_class_vars(cls).items() + if not callable(value) and not key.startswith("_") + } + help = cls._help + + for class_var in class_vars: + argdef = { + "argname": f"{args_prefix}-{class_var}", + "env_name": f"{env_var_prefix}{class_var.upper()}", + "type": type(class_vars[class_var]), + "default": class_vars[class_var], + "help": f"{cls._binding_name} -- " + help.get(class_var, ""), + } + + yield argdef + + @classmethod + def generate_dot_env_sample(cls): + from io import StringIO + + sample_top = ( + "#" * 80 + + "\n" + + ( + "# Autogenerated .env entries list for LightRAG binding options\n" + "#\n" + "# To generate run:\n" + "# $ python -m lightrag.llm.binding_options\n" + ) + + "#" * 80 + + "\n" + ) + + sample_bottom = ( + ("#\n# End of .env entries for LightRAG binding options\n") + + "#" * 80 + + "\n" + ) + + sample_stream = StringIO() + sample_stream.write(sample_top) + for klass in cls.__subclasses__(): + for arg_item in klass.args_env_name_type_value(): + if arg_item["help"]: + sample_stream.write(f"# {arg_item['help']}\n") + sample_stream.write( + f"# {arg_item['env_name']}={arg_item['default']}\n\n" + ) + + sample_stream.write(sample_bottom) + return sample_stream.getvalue() + + @classmethod + def options_dict(cls, args: Namespace) -> dict[str, Any]: + """ + Extract options dictionary for a specific binding from parsed arguments. + + This method filters the parsed command-line arguments to return only those + that belong to the specific binding class. It removes the binding prefix + from argument names to create a clean options dictionary. + + Args: + args (Namespace): Parsed command-line arguments containing all binding options + + Returns: + dict[str, Any]: Dictionary mapping option names (without prefix) to their values + + Example: + If args contains {'ollama_num_ctx': 512, 'other_option': 'value'} + and this is called on OllamaOptions, it returns {'num_ctx': 512} + """ + prefix = cls._binding_name + "_" + skipchars = len(prefix) + options = { + key[skipchars:]: value + for key, value in vars(args).items() + if key.startswith(prefix) + } + + return options + + def asdict(self) -> dict[str, Any]: + """ + Convert an instance of binding options to a dictionary. + + This method uses dataclasses.asdict() to convert the dataclass instance + into a dictionary representation, including all its fields and values. + + Returns: + dict[str, Any]: Dictionary representation of the binding options instance + """ + return asdict(self) + + +# ============================================================================= +# Binding Options for Different LLM Providers +# ============================================================================= +# +# This section contains dataclass definitions for various LLM provider options. +# Each binding option class inherits from BindingOptions and defines: +# - _binding_name: Unique identifier for the binding +# - Configuration parameters with default values +# - _help: Dictionary mapping parameter names to help descriptions +# +# To add a new binding: +# 1. Create a new dataclass inheriting from BindingOptions +# 2. Set the _binding_name class variable +# 3. Define configuration parameters as class attributes +# 4. Add corresponding help strings in the _help dictionary +# +# ============================================================================= + + +# ============================================================================= +# Binding Options for Ollama +# ============================================================================= +# +# Ollama binding options provide configuration for the Ollama local LLM server. +# These options control model behavior, sampling parameters, hardware utilization, +# and performance settings. The parameters are based on Ollama's API specification +# and provide fine-grained control over model inference and generation. +# +# The _OllamaOptionsMixin defines the complete set of available options, while +# OllamaEmbeddingOptions and OllamaLLMOptions provide specialized configurations +# for embedding and language model tasks respectively. +# ============================================================================= +@dataclass +class _OllamaOptionsMixin: + """Options for Ollama bindings.""" + + # Core context and generation parameters + num_ctx: int = 32768 # Context window size (number of tokens) + num_predict: int = 128 # Maximum number of tokens to predict + num_keep: int = 0 # Number of tokens to keep from the initial prompt + seed: int = -1 # Random seed for generation (-1 for random) + + # Sampling parameters + temperature: float = 0.8 # Controls randomness (0.0-2.0) + top_k: int = 40 # Top-k sampling parameter + top_p: float = 0.9 # Top-p (nucleus) sampling parameter + tfs_z: float = 1.0 # Tail free sampling parameter + typical_p: float = 1.0 # Typical probability mass + min_p: float = 0.0 # Minimum probability threshold + + # Repetition control + repeat_last_n: int = 64 # Number of tokens to consider for repetition penalty + repeat_penalty: float = 1.1 # Penalty for repetition + presence_penalty: float = 0.0 # Penalty for token presence + frequency_penalty: float = 0.0 # Penalty for token frequency + + # Mirostat sampling + mirostat: int = ( + # Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0) + 0 + ) + mirostat_tau: float = 5.0 # Mirostat target entropy + mirostat_eta: float = 0.1 # Mirostat learning rate + + # Hardware and performance parameters + numa: bool = False # Enable NUMA optimization + num_batch: int = 512 # Batch size for processing + num_gpu: int = -1 # Number of GPUs to use (-1 for auto) + main_gpu: int = 0 # Main GPU index + low_vram: bool = False # Optimize for low VRAM + num_thread: int = 0 # Number of CPU threads (0 for auto) + + # Memory and model parameters + f16_kv: bool = True # Use half-precision for key/value cache + logits_all: bool = False # Return logits for all tokens + vocab_only: bool = False # Only load vocabulary + use_mmap: bool = True # Use memory mapping for model files + use_mlock: bool = False # Lock model in memory + embedding_only: bool = False # Only use for embeddings + + # Output control + penalize_newline: bool = True # Penalize newline tokens + stop: str = "" # Stop sequences (comma-separated) + + # optional help strings + _help: ClassVar[dict[str, str]] = { + "num_ctx": "Context window size (number of tokens)", + "num_predict": "Maximum number of tokens to predict", + "num_keep": "Number of tokens to keep from the initial prompt", + "seed": "Random seed for generation (-1 for random)", + "temperature": "Controls randomness (0.0-2.0, higher = more creative)", + "top_k": "Top-k sampling parameter (0 = disabled)", + "top_p": "Top-p (nucleus) sampling parameter (0.0-1.0)", + "tfs_z": "Tail free sampling parameter (1.0 = disabled)", + "typical_p": "Typical probability mass (1.0 = disabled)", + "min_p": "Minimum probability threshold (0.0 = disabled)", + "repeat_last_n": "Number of tokens to consider for repetition penalty", + "repeat_penalty": "Penalty for repetition (1.0 = no penalty)", + "presence_penalty": "Penalty for token presence (-2.0 to 2.0)", + "frequency_penalty": "Penalty for token frequency (-2.0 to 2.0)", + "mirostat": "Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0)", + "mirostat_tau": "Mirostat target entropy", + "mirostat_eta": "Mirostat learning rate", + "numa": "Enable NUMA optimization", + "num_batch": "Batch size for processing", + "num_gpu": "Number of GPUs to use (-1 for auto)", + "main_gpu": "Main GPU index", + "low_vram": "Optimize for low VRAM", + "num_thread": "Number of CPU threads (0 for auto)", + "f16_kv": "Use half-precision for key/value cache", + "logits_all": "Return logits for all tokens", + "vocab_only": "Only load vocabulary", + "use_mmap": "Use memory mapping for model files", + "use_mlock": "Lock model in memory", + "embedding_only": "Only use for embeddings", + "penalize_newline": "Penalize newline tokens", + "stop": "Stop sequences (comma-separated string)", + } + + +# ============================================================================= +# Ollama Binding Options - Specialized Configurations +# ============================================================================= +# +# This section defines specialized binding option classes for different Ollama +# use cases. Both classes inherit from OllamaOptionsMixin to share the complete +# set of Ollama configuration parameters, while providing distinct binding names +# for command-line argument generation and environment variable handling. +# +# OllamaEmbeddingOptions: Specialized for embedding tasks +# OllamaLLMOptions: Specialized for language model/chat tasks +# +# Each class maintains its own binding name prefix, allowing users to configure +# embedding and LLM options independently when both are used in the same application. +# ============================================================================= + + +@dataclass +class OllamaEmbeddingOptions(_OllamaOptionsMixin, BindingOptions): + """Options for Ollama embeddings with specialized configuration for embedding tasks.""" + + # mandatory name of binding + _binding_name: ClassVar[str] = "ollama_embedding" + + +@dataclass +class OllamaLLMOptions(_OllamaOptionsMixin, BindingOptions): + """Options for Ollama LLM with specialized configuration for LLM tasks.""" + + # mandatory name of binding + _binding_name: ClassVar[str] = "ollama_llm" + + +# ============================================================================= +# Additional LLM Provider Bindings +# ============================================================================= +# +# This section is where you can add binding options for other LLM providers. +# Each new binding should follow the same pattern as the Ollama bindings above: +# +# 1. Create a dataclass that inherits from BindingOptions +# 2. Set a unique _binding_name class variable (e.g., "openai", "anthropic") +# 3. Define configuration parameters as class attributes with default values +# 4. Add a _help class variable with descriptions for each parameter +# +# Example template for a new provider: +# +# @dataclass +# class NewProviderOptions(BindingOptions): +# """Options for NewProvider LLM binding.""" +# +# _binding_name: ClassVar[str] = "newprovider" +# +# # Configuration parameters +# api_key: str = "" +# max_tokens: int = 1000 +# model: str = "default-model" +# +# # Help descriptions +# _help: ClassVar[dict[str, str]] = { +# "api_key": "API key for authentication", +# "max_tokens": "Maximum tokens to generate", +# "model": "Model name to use", +# } +# +# ============================================================================= + +# TODO: Add binding options for additional LLM providers here +# Common providers to consider: OpenAI, Anthropic, Cohere, Hugging Face, etc. + +# ============================================================================= +# Main Section - For Testing and Sample Generation +# ============================================================================= +# +# When run as a script, this module: +# 1. Generates and prints a sample .env file with all binding options +# 2. If "test" argument is provided, demonstrates argument parsing with Ollama binding +# +# Usage: +# python -m lightrag.llm.binding_options # Generate .env sample +# python -m lightrag.llm.binding_options test # Test argument parsing +# +# ============================================================================= + +if __name__ == "__main__": + import sys + import dotenv + from io import StringIO + + print(BindingOptions.generate_dot_env_sample()) + + env_strstream = StringIO( + ("OLLAMA_LLM_TEMPERATURE=0.1\nOLLAMA_EMBEDDING_TEMPERATURE=0.2\n") + ) + + # Load environment variables from .env file + dotenv.load_dotenv(stream=env_strstream) + + if len(sys.argv) > 1 and sys.argv[1] == "test": + parser = ArgumentParser(description="Test Ollama binding") + OllamaEmbeddingOptions.add_args(parser) + OllamaLLMOptions.add_args(parser) + args = parser.parse_args( + [ + "--ollama-embedding-num_ctx", + "1024", + "--ollama-llm-num_ctx", + "2048", + ] + ) + print(args) + + # test LLM options + ollama_options = OllamaLLMOptions.options_dict(args) + print(ollama_options) + print(OllamaLLMOptions(num_ctx=30000).asdict()) + + # test embedding options + embedding_options = OllamaEmbeddingOptions.options_dict(args) + print(embedding_options) + print(OllamaEmbeddingOptions(**embedding_options).asdict()) diff --git a/lightrag/llm/ollama.py b/lightrag/llm/ollama.py index 5cd8a721..31888e9a 100644 --- a/lightrag/llm/ollama.py +++ b/lightrag/llm/ollama.py @@ -149,9 +149,11 @@ async def ollama_embed(texts: list[str], embed_model, **kwargs) -> np.ndarray: timeout = kwargs.pop("timeout", None) or 300 # Default time out 300s ollama_client = ollama.AsyncClient(host=host, timeout=timeout, headers=headers) - try: - data = await ollama_client.embed(model=embed_model, input=texts) + options = kwargs.pop("options", {}) + data = await ollama_client.embed( + model=embed_model, input=texts, options=options + ) return np.array(data["embeddings"]) except Exception as e: logger.error(f"Error in ollama_embed: {str(e)}")