mirror of
https://github.com/HKUDS/LightRAG.git
synced 2025-12-05 11:27:30 +00:00
Merge branch 'patch-1'
This commit is contained in:
commit
d26d413d97
@ -118,8 +118,9 @@ LLM_MODEL=gpt-4o
|
||||
LLM_BINDING_HOST=https://api.openai.com/v1
|
||||
LLM_BINDING_API_KEY=your_api_key
|
||||
|
||||
### Set as num_ctx option for Ollama LLM
|
||||
# OLLAMA_NUM_CTX=32768
|
||||
### Set as num_ctx option for Ollama LLM (Must be larger than MAX_TOTAL_TOKENS+2000)
|
||||
### see also env.ollama-binding-options.example for fine tuning ollama
|
||||
# OLLAMA_LLM_NUM_CTX=32768
|
||||
|
||||
### Optional for Azure
|
||||
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
|
||||
|
||||
195
env.ollama-binding-options.example
Normal file
195
env.ollama-binding-options.example
Normal file
@ -0,0 +1,195 @@
|
||||
################################################################################
|
||||
# Autogenerated .env entries list for LightRAG binding options
|
||||
#
|
||||
# To generate run:
|
||||
# $ python -m lightrag.llm.binding_options
|
||||
################################################################################
|
||||
# ollama_embedding -- Context window size (number of tokens)
|
||||
# OLLAMA_EMBEDDING_NUM_CTX=4096
|
||||
|
||||
# ollama_embedding -- Maximum number of tokens to predict
|
||||
# OLLAMA_EMBEDDING_NUM_PREDICT=128
|
||||
|
||||
# ollama_embedding -- Number of tokens to keep from the initial prompt
|
||||
# OLLAMA_EMBEDDING_NUM_KEEP=0
|
||||
|
||||
# ollama_embedding -- Random seed for generation (-1 for random)
|
||||
# OLLAMA_EMBEDDING_SEED=-1
|
||||
|
||||
# ollama_embedding -- Controls randomness (0.0-2.0, higher = more creative)
|
||||
# OLLAMA_EMBEDDING_TEMPERATURE=0.8
|
||||
|
||||
# ollama_embedding -- Top-k sampling parameter (0 = disabled)
|
||||
# OLLAMA_EMBEDDING_TOP_K=40
|
||||
|
||||
# ollama_embedding -- Top-p (nucleus) sampling parameter (0.0-1.0)
|
||||
# OLLAMA_EMBEDDING_TOP_P=0.9
|
||||
|
||||
# ollama_embedding -- Tail free sampling parameter (1.0 = disabled)
|
||||
# OLLAMA_EMBEDDING_TFS_Z=1.0
|
||||
|
||||
# ollama_embedding -- Typical probability mass (1.0 = disabled)
|
||||
# OLLAMA_EMBEDDING_TYPICAL_P=1.0
|
||||
|
||||
# ollama_embedding -- Minimum probability threshold (0.0 = disabled)
|
||||
# OLLAMA_EMBEDDING_MIN_P=0.0
|
||||
|
||||
# ollama_embedding -- Number of tokens to consider for repetition penalty
|
||||
# OLLAMA_EMBEDDING_REPEAT_LAST_N=64
|
||||
|
||||
# ollama_embedding -- Penalty for repetition (1.0 = no penalty)
|
||||
# OLLAMA_EMBEDDING_REPEAT_PENALTY=1.1
|
||||
|
||||
# ollama_embedding -- Penalty for token presence (-2.0 to 2.0)
|
||||
# OLLAMA_EMBEDDING_PRESENCE_PENALTY=0.0
|
||||
|
||||
# ollama_embedding -- Penalty for token frequency (-2.0 to 2.0)
|
||||
# OLLAMA_EMBEDDING_FREQUENCY_PENALTY=0.0
|
||||
|
||||
# ollama_embedding -- Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0)
|
||||
# OLLAMA_EMBEDDING_MIROSTAT=0
|
||||
|
||||
# ollama_embedding -- Mirostat target entropy
|
||||
# OLLAMA_EMBEDDING_MIROSTAT_TAU=5.0
|
||||
|
||||
# ollama_embedding -- Mirostat learning rate
|
||||
# OLLAMA_EMBEDDING_MIROSTAT_ETA=0.1
|
||||
|
||||
# ollama_embedding -- Enable NUMA optimization
|
||||
# OLLAMA_EMBEDDING_NUMA=False
|
||||
|
||||
# ollama_embedding -- Batch size for processing
|
||||
# OLLAMA_EMBEDDING_NUM_BATCH=512
|
||||
|
||||
# ollama_embedding -- Number of GPUs to use (-1 for auto)
|
||||
# OLLAMA_EMBEDDING_NUM_GPU=-1
|
||||
|
||||
# ollama_embedding -- Main GPU index
|
||||
# OLLAMA_EMBEDDING_MAIN_GPU=0
|
||||
|
||||
# ollama_embedding -- Optimize for low VRAM
|
||||
# OLLAMA_EMBEDDING_LOW_VRAM=False
|
||||
|
||||
# ollama_embedding -- Number of CPU threads (0 for auto)
|
||||
# OLLAMA_EMBEDDING_NUM_THREAD=0
|
||||
|
||||
# ollama_embedding -- Use half-precision for key/value cache
|
||||
# OLLAMA_EMBEDDING_F16_KV=True
|
||||
|
||||
# ollama_embedding -- Return logits for all tokens
|
||||
# OLLAMA_EMBEDDING_LOGITS_ALL=False
|
||||
|
||||
# ollama_embedding -- Only load vocabulary
|
||||
# OLLAMA_EMBEDDING_VOCAB_ONLY=False
|
||||
|
||||
# ollama_embedding -- Use memory mapping for model files
|
||||
# OLLAMA_EMBEDDING_USE_MMAP=True
|
||||
|
||||
# ollama_embedding -- Lock model in memory
|
||||
# OLLAMA_EMBEDDING_USE_MLOCK=False
|
||||
|
||||
# ollama_embedding -- Only use for embeddings
|
||||
# OLLAMA_EMBEDDING_EMBEDDING_ONLY=False
|
||||
|
||||
# ollama_embedding -- Penalize newline tokens
|
||||
# OLLAMA_EMBEDDING_PENALIZE_NEWLINE=True
|
||||
|
||||
# ollama_embedding -- Stop sequences (comma-separated string)
|
||||
# OLLAMA_EMBEDDING_STOP=
|
||||
|
||||
# ollama_llm -- Context window size (number of tokens)
|
||||
# OLLAMA_LLM_NUM_CTX=4096
|
||||
|
||||
# ollama_llm -- Maximum number of tokens to predict
|
||||
# OLLAMA_LLM_NUM_PREDICT=128
|
||||
|
||||
# ollama_llm -- Number of tokens to keep from the initial prompt
|
||||
# OLLAMA_LLM_NUM_KEEP=0
|
||||
|
||||
# ollama_llm -- Random seed for generation (-1 for random)
|
||||
# OLLAMA_LLM_SEED=-1
|
||||
|
||||
# ollama_llm -- Controls randomness (0.0-2.0, higher = more creative)
|
||||
# OLLAMA_LLM_TEMPERATURE=0.8
|
||||
|
||||
# ollama_llm -- Top-k sampling parameter (0 = disabled)
|
||||
# OLLAMA_LLM_TOP_K=40
|
||||
|
||||
# ollama_llm -- Top-p (nucleus) sampling parameter (0.0-1.0)
|
||||
# OLLAMA_LLM_TOP_P=0.9
|
||||
|
||||
# ollama_llm -- Tail free sampling parameter (1.0 = disabled)
|
||||
# OLLAMA_LLM_TFS_Z=1.0
|
||||
|
||||
# ollama_llm -- Typical probability mass (1.0 = disabled)
|
||||
# OLLAMA_LLM_TYPICAL_P=1.0
|
||||
|
||||
# ollama_llm -- Minimum probability threshold (0.0 = disabled)
|
||||
# OLLAMA_LLM_MIN_P=0.0
|
||||
|
||||
# ollama_llm -- Number of tokens to consider for repetition penalty
|
||||
# OLLAMA_LLM_REPEAT_LAST_N=64
|
||||
|
||||
# ollama_llm -- Penalty for repetition (1.0 = no penalty)
|
||||
# OLLAMA_LLM_REPEAT_PENALTY=1.1
|
||||
|
||||
# ollama_llm -- Penalty for token presence (-2.0 to 2.0)
|
||||
# OLLAMA_LLM_PRESENCE_PENALTY=0.0
|
||||
|
||||
# ollama_llm -- Penalty for token frequency (-2.0 to 2.0)
|
||||
# OLLAMA_LLM_FREQUENCY_PENALTY=0.0
|
||||
|
||||
# ollama_llm -- Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0)
|
||||
# OLLAMA_LLM_MIROSTAT=0
|
||||
|
||||
# ollama_llm -- Mirostat target entropy
|
||||
# OLLAMA_LLM_MIROSTAT_TAU=5.0
|
||||
|
||||
# ollama_llm -- Mirostat learning rate
|
||||
# OLLAMA_LLM_MIROSTAT_ETA=0.1
|
||||
|
||||
# ollama_llm -- Enable NUMA optimization
|
||||
# OLLAMA_LLM_NUMA=False
|
||||
|
||||
# ollama_llm -- Batch size for processing
|
||||
# OLLAMA_LLM_NUM_BATCH=512
|
||||
|
||||
# ollama_llm -- Number of GPUs to use (-1 for auto)
|
||||
# OLLAMA_LLM_NUM_GPU=-1
|
||||
|
||||
# ollama_llm -- Main GPU index
|
||||
# OLLAMA_LLM_MAIN_GPU=0
|
||||
|
||||
# ollama_llm -- Optimize for low VRAM
|
||||
# OLLAMA_LLM_LOW_VRAM=False
|
||||
|
||||
# ollama_llm -- Number of CPU threads (0 for auto)
|
||||
# OLLAMA_LLM_NUM_THREAD=0
|
||||
|
||||
# ollama_llm -- Use half-precision for key/value cache
|
||||
# OLLAMA_LLM_F16_KV=True
|
||||
|
||||
# ollama_llm -- Return logits for all tokens
|
||||
# OLLAMA_LLM_LOGITS_ALL=False
|
||||
|
||||
# ollama_llm -- Only load vocabulary
|
||||
# OLLAMA_LLM_VOCAB_ONLY=False
|
||||
|
||||
# ollama_llm -- Use memory mapping for model files
|
||||
# OLLAMA_LLM_USE_MMAP=True
|
||||
|
||||
# ollama_llm -- Lock model in memory
|
||||
# OLLAMA_LLM_USE_MLOCK=False
|
||||
|
||||
# ollama_llm -- Only use for embeddings
|
||||
# OLLAMA_LLM_EMBEDDING_ONLY=False
|
||||
|
||||
# ollama_llm -- Penalize newline tokens
|
||||
# OLLAMA_LLM_PENALIZE_NEWLINE=True
|
||||
|
||||
# ollama_llm -- Stop sequences (comma-separated string)
|
||||
# OLLAMA_LLM_STOP=
|
||||
|
||||
#
|
||||
# End of .env entries for LightRAG binding options
|
||||
################################################################################
|
||||
@ -69,8 +69,8 @@ LLM_BINDING=ollama
|
||||
LLM_MODEL=mistral-nemo:latest
|
||||
LLM_BINDING_HOST=http://localhost:11434
|
||||
# LLM_BINDING_API_KEY=your_api_key
|
||||
### Ollama 服务器上下文 token 数(基于您的 Ollama 服务器容量)
|
||||
OLLAMA_NUM_CTX=8192
|
||||
### Ollama 服务器上下文 token 数(必须大于 MAX_TOTAL_TOKENS+2000)
|
||||
OLLAMA_LLM_NUM_CTX=8192
|
||||
|
||||
EMBEDDING_BINDING=ollama
|
||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||
|
||||
@ -69,8 +69,8 @@ LLM_BINDING=ollama
|
||||
LLM_MODEL=mistral-nemo:latest
|
||||
LLM_BINDING_HOST=http://localhost:11434
|
||||
# LLM_BINDING_API_KEY=your_api_key
|
||||
### Ollama Server context length
|
||||
OLLAMA_NUM_CTX=8192
|
||||
### Ollama Server context length (Must be larger than MAX_TOTAL_TOKENS+2000)
|
||||
OLLAMA_LLM_NUM_CTX=16384
|
||||
|
||||
EMBEDDING_BINDING=ollama
|
||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||
@ -457,6 +457,10 @@ You cannot change storage implementation selection after adding documents to Lig
|
||||
| --embedding-binding | ollama | Embedding binding type (lollms, ollama, openai, azure_openai) |
|
||||
| --auto-scan-at-startup| - | Scan input directory for new files and start indexing |
|
||||
|
||||
### Additional Ollama Binding Options
|
||||
|
||||
When using `--llm-binding ollama` or `--embedding-binding ollama`, additional Ollama-specific configuration options are available. To see all available Ollama binding options, add `--help` to the command line when starting the server. These additional options allow for fine-tuning of Ollama model parameters and connection settings.
|
||||
|
||||
### .env Examples
|
||||
|
||||
```bash
|
||||
@ -481,6 +485,7 @@ LLM_BINDING_HOST=https://api.openai.com/v1
|
||||
LLM_BINDING_API_KEY=your-api-key
|
||||
|
||||
### Embedding Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
|
||||
# see also env.ollama-binding-options.example for fine tuning ollama
|
||||
EMBEDDING_MODEL=bge-m3:latest
|
||||
EMBEDDING_DIM=1024
|
||||
EMBEDDING_BINDING=ollama
|
||||
|
||||
@ -7,6 +7,8 @@ import argparse
|
||||
import logging
|
||||
from dotenv import load_dotenv
|
||||
from lightrag.utils import get_env_value
|
||||
from lightrag.llm.binding_options import OllamaEmbeddingOptions, OllamaLLMOptions
|
||||
import sys
|
||||
|
||||
from lightrag.constants import (
|
||||
DEFAULT_WOKERS,
|
||||
@ -248,6 +250,29 @@ def parse_args() -> argparse.Namespace:
|
||||
help="Embedding binding type (default: from env or ollama)",
|
||||
)
|
||||
|
||||
# Conditionally add binding options defined in binding_options module
|
||||
# This will add command line arguments for all binding options (e.g., --ollama-embedding-num_ctx)
|
||||
# and corresponding environment variables (e.g., OLLAMA_EMBEDDING_NUM_CTX)
|
||||
if "--llm-binding" in sys.argv:
|
||||
try:
|
||||
idx = sys.argv.index("--llm-binding")
|
||||
if idx + 1 < len(sys.argv) and sys.argv[idx + 1] == "ollama":
|
||||
OllamaLLMOptions.add_args(parser)
|
||||
except IndexError:
|
||||
pass
|
||||
elif os.environ.get("LLM_BINDING") == "ollama":
|
||||
OllamaLLMOptions.add_args(parser)
|
||||
|
||||
if "--embedding-binding" in sys.argv:
|
||||
try:
|
||||
idx = sys.argv.index("--embedding-binding")
|
||||
if idx + 1 < len(sys.argv) and sys.argv[idx + 1] == "ollama":
|
||||
OllamaEmbeddingOptions.add_args(parser)
|
||||
except IndexError:
|
||||
pass
|
||||
elif os.environ.get("EMBEDDING_BINDING") == "ollama":
|
||||
OllamaEmbeddingOptions.add_args(parser)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# convert relative path to absolute path
|
||||
@ -379,7 +404,8 @@ def update_uvicorn_mode_config():
|
||||
global_args.workers = 1
|
||||
# Log warning directly here
|
||||
logging.warning(
|
||||
f"In uvicorn mode, workers parameter was set to {original_workers}. Forcing workers=1"
|
||||
f"In uvicorn mode, workers parameter was set to {
|
||||
original_workers}. Forcing workers=1"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -209,6 +209,7 @@ def create_app(args):
|
||||
from lightrag.llm.lollms import lollms_model_complete, lollms_embed
|
||||
if args.llm_binding == "ollama" or args.embedding_binding == "ollama":
|
||||
from lightrag.llm.ollama import ollama_model_complete, ollama_embed
|
||||
from lightrag.llm.binding_options import OllamaLLMOptions
|
||||
if args.llm_binding == "openai" or args.embedding_binding == "openai":
|
||||
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
||||
if args.llm_binding == "azure_openai" or args.embedding_binding == "azure_openai":
|
||||
@ -219,6 +220,7 @@ def create_app(args):
|
||||
if args.llm_binding_host == "openai-ollama" or args.embedding_binding == "ollama":
|
||||
from lightrag.llm.openai import openai_complete_if_cache
|
||||
from lightrag.llm.ollama import ollama_embed
|
||||
from lightrag.llm.binding_options import OllamaEmbeddingOptions
|
||||
if args.embedding_binding == "jina":
|
||||
from lightrag.llm.jina import jina_embed
|
||||
|
||||
@ -284,6 +286,7 @@ def create_app(args):
|
||||
embed_model=args.embedding_model,
|
||||
host=args.embedding_binding_host,
|
||||
api_key=args.embedding_binding_api_key,
|
||||
options=OllamaEmbeddingOptions.options_dict(args),
|
||||
)
|
||||
if args.embedding_binding == "ollama"
|
||||
else azure_openai_embed(
|
||||
@ -360,7 +363,7 @@ def create_app(args):
|
||||
llm_model_kwargs={
|
||||
"host": args.llm_binding_host,
|
||||
"timeout": args.timeout,
|
||||
"options": {"num_ctx": args.ollama_num_ctx},
|
||||
"options": OllamaLLMOptions.options_dict(args),
|
||||
"api_key": args.llm_binding_api_key,
|
||||
}
|
||||
if args.llm_binding == "lollms" or args.llm_binding == "ollama"
|
||||
|
||||
446
lightrag/llm/binding_options.py
Normal file
446
lightrag/llm/binding_options.py
Normal file
@ -0,0 +1,446 @@
|
||||
"""
|
||||
Module that implements containers for specific LLM bindings.
|
||||
|
||||
This module provides container implementations for various Large Language Model
|
||||
bindings and integrations.
|
||||
"""
|
||||
|
||||
from argparse import ArgumentParser, Namespace
|
||||
import argparse
|
||||
from dataclasses import asdict, dataclass
|
||||
from typing import Any, ClassVar
|
||||
|
||||
from lightrag.utils import get_env_value
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# BindingOptions Base Class
|
||||
# =============================================================================
|
||||
#
|
||||
# The BindingOptions class serves as the foundation for all LLM provider bindings
|
||||
# in LightRAG. It provides a standardized framework for:
|
||||
#
|
||||
# 1. Configuration Management:
|
||||
# - Defines how each LLM provider's configuration parameters are structured
|
||||
# - Handles default values and type information for each parameter
|
||||
# - Maps configuration options to command-line arguments and environment variables
|
||||
#
|
||||
# 2. Environment Integration:
|
||||
# - Automatically generates environment variable names from binding parameters
|
||||
# - Provides methods to create sample .env files for easy configuration
|
||||
# - Supports configuration via environment variables with fallback to defaults
|
||||
#
|
||||
# 3. Command-Line Interface:
|
||||
# - Dynamically generates command-line arguments for all registered bindings
|
||||
# - Maintains consistent naming conventions across different LLM providers
|
||||
# - Provides help text and type validation for each configuration option
|
||||
#
|
||||
# 4. Extensibility:
|
||||
# - Uses class introspection to automatically discover all binding subclasses
|
||||
# - Requires minimal boilerplate code when adding new LLM provider bindings
|
||||
# - Maintains separation of concerns between different provider configurations
|
||||
#
|
||||
# This design pattern ensures that adding support for a new LLM provider requires
|
||||
# only defining the provider-specific parameters and help text, while the base
|
||||
# class handles all the common functionality for argument parsing, environment
|
||||
# variable handling, and configuration management.
|
||||
#
|
||||
# Instances of a derived class of BindingOptions can be used to store multiple
|
||||
# runtime configurations of options for a single LLM provider. using the
|
||||
# asdict() method to convert the options to a dictionary.
|
||||
#
|
||||
# =============================================================================
|
||||
@dataclass
|
||||
class BindingOptions:
|
||||
"""Base class for binding options."""
|
||||
|
||||
# mandatory name of binding
|
||||
_binding_name: ClassVar[str]
|
||||
|
||||
# optional help message for each option
|
||||
_help: ClassVar[dict[str, str]]
|
||||
|
||||
@staticmethod
|
||||
def _all_class_vars(klass: type, include_inherited=True) -> dict[str, Any]:
|
||||
"""Print class variables, optionally including inherited ones"""
|
||||
if include_inherited:
|
||||
# Get all class variables from MRO
|
||||
vars_dict = {}
|
||||
for base in reversed(klass.__mro__[:-1]): # Exclude 'object'
|
||||
vars_dict.update(
|
||||
{
|
||||
k: v
|
||||
for k, v in base.__dict__.items()
|
||||
if (
|
||||
not k.startswith("_")
|
||||
and not callable(v)
|
||||
and not isinstance(v, classmethod)
|
||||
)
|
||||
}
|
||||
)
|
||||
else:
|
||||
# Only direct class variables
|
||||
vars_dict = {
|
||||
k: v
|
||||
for k, v in klass.__dict__.items()
|
||||
if (
|
||||
not k.startswith("_")
|
||||
and not callable(v)
|
||||
and not isinstance(v, classmethod)
|
||||
)
|
||||
}
|
||||
|
||||
return vars_dict
|
||||
|
||||
@classmethod
|
||||
def add_args(cls, parser: ArgumentParser):
|
||||
group = parser.add_argument_group(f"{cls._binding_name} binding options")
|
||||
for arg_item in cls.args_env_name_type_value():
|
||||
group.add_argument(
|
||||
f"--{arg_item['argname']}",
|
||||
type=arg_item["type"],
|
||||
default=get_env_value(f"{arg_item['env_name']}", argparse.SUPPRESS),
|
||||
help=arg_item["help"],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def args_env_name_type_value(cls):
|
||||
args_prefix = f"{cls._binding_name}".replace("_", "-")
|
||||
env_var_prefix = f"{cls._binding_name}_".upper()
|
||||
class_vars = {
|
||||
key: value
|
||||
for key, value in cls._all_class_vars(cls).items()
|
||||
if not callable(value) and not key.startswith("_")
|
||||
}
|
||||
help = cls._help
|
||||
|
||||
for class_var in class_vars:
|
||||
argdef = {
|
||||
"argname": f"{args_prefix}-{class_var}",
|
||||
"env_name": f"{env_var_prefix}{class_var.upper()}",
|
||||
"type": type(class_vars[class_var]),
|
||||
"default": class_vars[class_var],
|
||||
"help": f"{cls._binding_name} -- " + help.get(class_var, ""),
|
||||
}
|
||||
|
||||
yield argdef
|
||||
|
||||
@classmethod
|
||||
def generate_dot_env_sample(cls):
|
||||
from io import StringIO
|
||||
|
||||
sample_top = (
|
||||
"#" * 80
|
||||
+ "\n"
|
||||
+ (
|
||||
"# Autogenerated .env entries list for LightRAG binding options\n"
|
||||
"#\n"
|
||||
"# To generate run:\n"
|
||||
"# $ python -m lightrag.llm.binding_options\n"
|
||||
)
|
||||
+ "#" * 80
|
||||
+ "\n"
|
||||
)
|
||||
|
||||
sample_bottom = (
|
||||
("#\n# End of .env entries for LightRAG binding options\n")
|
||||
+ "#" * 80
|
||||
+ "\n"
|
||||
)
|
||||
|
||||
sample_stream = StringIO()
|
||||
sample_stream.write(sample_top)
|
||||
for klass in cls.__subclasses__():
|
||||
for arg_item in klass.args_env_name_type_value():
|
||||
if arg_item["help"]:
|
||||
sample_stream.write(f"# {arg_item['help']}\n")
|
||||
sample_stream.write(
|
||||
f"# {arg_item['env_name']}={arg_item['default']}\n\n"
|
||||
)
|
||||
|
||||
sample_stream.write(sample_bottom)
|
||||
return sample_stream.getvalue()
|
||||
|
||||
@classmethod
|
||||
def options_dict(cls, args: Namespace) -> dict[str, Any]:
|
||||
"""
|
||||
Extract options dictionary for a specific binding from parsed arguments.
|
||||
|
||||
This method filters the parsed command-line arguments to return only those
|
||||
that belong to the specific binding class. It removes the binding prefix
|
||||
from argument names to create a clean options dictionary.
|
||||
|
||||
Args:
|
||||
args (Namespace): Parsed command-line arguments containing all binding options
|
||||
|
||||
Returns:
|
||||
dict[str, Any]: Dictionary mapping option names (without prefix) to their values
|
||||
|
||||
Example:
|
||||
If args contains {'ollama_num_ctx': 512, 'other_option': 'value'}
|
||||
and this is called on OllamaOptions, it returns {'num_ctx': 512}
|
||||
"""
|
||||
prefix = cls._binding_name + "_"
|
||||
skipchars = len(prefix)
|
||||
options = {
|
||||
key[skipchars:]: value
|
||||
for key, value in vars(args).items()
|
||||
if key.startswith(prefix)
|
||||
}
|
||||
|
||||
return options
|
||||
|
||||
def asdict(self) -> dict[str, Any]:
|
||||
"""
|
||||
Convert an instance of binding options to a dictionary.
|
||||
|
||||
This method uses dataclasses.asdict() to convert the dataclass instance
|
||||
into a dictionary representation, including all its fields and values.
|
||||
|
||||
Returns:
|
||||
dict[str, Any]: Dictionary representation of the binding options instance
|
||||
"""
|
||||
return asdict(self)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Binding Options for Different LLM Providers
|
||||
# =============================================================================
|
||||
#
|
||||
# This section contains dataclass definitions for various LLM provider options.
|
||||
# Each binding option class inherits from BindingOptions and defines:
|
||||
# - _binding_name: Unique identifier for the binding
|
||||
# - Configuration parameters with default values
|
||||
# - _help: Dictionary mapping parameter names to help descriptions
|
||||
#
|
||||
# To add a new binding:
|
||||
# 1. Create a new dataclass inheriting from BindingOptions
|
||||
# 2. Set the _binding_name class variable
|
||||
# 3. Define configuration parameters as class attributes
|
||||
# 4. Add corresponding help strings in the _help dictionary
|
||||
#
|
||||
# =============================================================================
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Binding Options for Ollama
|
||||
# =============================================================================
|
||||
#
|
||||
# Ollama binding options provide configuration for the Ollama local LLM server.
|
||||
# These options control model behavior, sampling parameters, hardware utilization,
|
||||
# and performance settings. The parameters are based on Ollama's API specification
|
||||
# and provide fine-grained control over model inference and generation.
|
||||
#
|
||||
# The _OllamaOptionsMixin defines the complete set of available options, while
|
||||
# OllamaEmbeddingOptions and OllamaLLMOptions provide specialized configurations
|
||||
# for embedding and language model tasks respectively.
|
||||
# =============================================================================
|
||||
@dataclass
|
||||
class _OllamaOptionsMixin:
|
||||
"""Options for Ollama bindings."""
|
||||
|
||||
# Core context and generation parameters
|
||||
num_ctx: int = 32768 # Context window size (number of tokens)
|
||||
num_predict: int = 128 # Maximum number of tokens to predict
|
||||
num_keep: int = 0 # Number of tokens to keep from the initial prompt
|
||||
seed: int = -1 # Random seed for generation (-1 for random)
|
||||
|
||||
# Sampling parameters
|
||||
temperature: float = 0.8 # Controls randomness (0.0-2.0)
|
||||
top_k: int = 40 # Top-k sampling parameter
|
||||
top_p: float = 0.9 # Top-p (nucleus) sampling parameter
|
||||
tfs_z: float = 1.0 # Tail free sampling parameter
|
||||
typical_p: float = 1.0 # Typical probability mass
|
||||
min_p: float = 0.0 # Minimum probability threshold
|
||||
|
||||
# Repetition control
|
||||
repeat_last_n: int = 64 # Number of tokens to consider for repetition penalty
|
||||
repeat_penalty: float = 1.1 # Penalty for repetition
|
||||
presence_penalty: float = 0.0 # Penalty for token presence
|
||||
frequency_penalty: float = 0.0 # Penalty for token frequency
|
||||
|
||||
# Mirostat sampling
|
||||
mirostat: int = (
|
||||
# Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0)
|
||||
0
|
||||
)
|
||||
mirostat_tau: float = 5.0 # Mirostat target entropy
|
||||
mirostat_eta: float = 0.1 # Mirostat learning rate
|
||||
|
||||
# Hardware and performance parameters
|
||||
numa: bool = False # Enable NUMA optimization
|
||||
num_batch: int = 512 # Batch size for processing
|
||||
num_gpu: int = -1 # Number of GPUs to use (-1 for auto)
|
||||
main_gpu: int = 0 # Main GPU index
|
||||
low_vram: bool = False # Optimize for low VRAM
|
||||
num_thread: int = 0 # Number of CPU threads (0 for auto)
|
||||
|
||||
# Memory and model parameters
|
||||
f16_kv: bool = True # Use half-precision for key/value cache
|
||||
logits_all: bool = False # Return logits for all tokens
|
||||
vocab_only: bool = False # Only load vocabulary
|
||||
use_mmap: bool = True # Use memory mapping for model files
|
||||
use_mlock: bool = False # Lock model in memory
|
||||
embedding_only: bool = False # Only use for embeddings
|
||||
|
||||
# Output control
|
||||
penalize_newline: bool = True # Penalize newline tokens
|
||||
stop: str = "" # Stop sequences (comma-separated)
|
||||
|
||||
# optional help strings
|
||||
_help: ClassVar[dict[str, str]] = {
|
||||
"num_ctx": "Context window size (number of tokens)",
|
||||
"num_predict": "Maximum number of tokens to predict",
|
||||
"num_keep": "Number of tokens to keep from the initial prompt",
|
||||
"seed": "Random seed for generation (-1 for random)",
|
||||
"temperature": "Controls randomness (0.0-2.0, higher = more creative)",
|
||||
"top_k": "Top-k sampling parameter (0 = disabled)",
|
||||
"top_p": "Top-p (nucleus) sampling parameter (0.0-1.0)",
|
||||
"tfs_z": "Tail free sampling parameter (1.0 = disabled)",
|
||||
"typical_p": "Typical probability mass (1.0 = disabled)",
|
||||
"min_p": "Minimum probability threshold (0.0 = disabled)",
|
||||
"repeat_last_n": "Number of tokens to consider for repetition penalty",
|
||||
"repeat_penalty": "Penalty for repetition (1.0 = no penalty)",
|
||||
"presence_penalty": "Penalty for token presence (-2.0 to 2.0)",
|
||||
"frequency_penalty": "Penalty for token frequency (-2.0 to 2.0)",
|
||||
"mirostat": "Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0)",
|
||||
"mirostat_tau": "Mirostat target entropy",
|
||||
"mirostat_eta": "Mirostat learning rate",
|
||||
"numa": "Enable NUMA optimization",
|
||||
"num_batch": "Batch size for processing",
|
||||
"num_gpu": "Number of GPUs to use (-1 for auto)",
|
||||
"main_gpu": "Main GPU index",
|
||||
"low_vram": "Optimize for low VRAM",
|
||||
"num_thread": "Number of CPU threads (0 for auto)",
|
||||
"f16_kv": "Use half-precision for key/value cache",
|
||||
"logits_all": "Return logits for all tokens",
|
||||
"vocab_only": "Only load vocabulary",
|
||||
"use_mmap": "Use memory mapping for model files",
|
||||
"use_mlock": "Lock model in memory",
|
||||
"embedding_only": "Only use for embeddings",
|
||||
"penalize_newline": "Penalize newline tokens",
|
||||
"stop": "Stop sequences (comma-separated string)",
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Ollama Binding Options - Specialized Configurations
|
||||
# =============================================================================
|
||||
#
|
||||
# This section defines specialized binding option classes for different Ollama
|
||||
# use cases. Both classes inherit from OllamaOptionsMixin to share the complete
|
||||
# set of Ollama configuration parameters, while providing distinct binding names
|
||||
# for command-line argument generation and environment variable handling.
|
||||
#
|
||||
# OllamaEmbeddingOptions: Specialized for embedding tasks
|
||||
# OllamaLLMOptions: Specialized for language model/chat tasks
|
||||
#
|
||||
# Each class maintains its own binding name prefix, allowing users to configure
|
||||
# embedding and LLM options independently when both are used in the same application.
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class OllamaEmbeddingOptions(_OllamaOptionsMixin, BindingOptions):
|
||||
"""Options for Ollama embeddings with specialized configuration for embedding tasks."""
|
||||
|
||||
# mandatory name of binding
|
||||
_binding_name: ClassVar[str] = "ollama_embedding"
|
||||
|
||||
|
||||
@dataclass
|
||||
class OllamaLLMOptions(_OllamaOptionsMixin, BindingOptions):
|
||||
"""Options for Ollama LLM with specialized configuration for LLM tasks."""
|
||||
|
||||
# mandatory name of binding
|
||||
_binding_name: ClassVar[str] = "ollama_llm"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Additional LLM Provider Bindings
|
||||
# =============================================================================
|
||||
#
|
||||
# This section is where you can add binding options for other LLM providers.
|
||||
# Each new binding should follow the same pattern as the Ollama bindings above:
|
||||
#
|
||||
# 1. Create a dataclass that inherits from BindingOptions
|
||||
# 2. Set a unique _binding_name class variable (e.g., "openai", "anthropic")
|
||||
# 3. Define configuration parameters as class attributes with default values
|
||||
# 4. Add a _help class variable with descriptions for each parameter
|
||||
#
|
||||
# Example template for a new provider:
|
||||
#
|
||||
# @dataclass
|
||||
# class NewProviderOptions(BindingOptions):
|
||||
# """Options for NewProvider LLM binding."""
|
||||
#
|
||||
# _binding_name: ClassVar[str] = "newprovider"
|
||||
#
|
||||
# # Configuration parameters
|
||||
# api_key: str = ""
|
||||
# max_tokens: int = 1000
|
||||
# model: str = "default-model"
|
||||
#
|
||||
# # Help descriptions
|
||||
# _help: ClassVar[dict[str, str]] = {
|
||||
# "api_key": "API key for authentication",
|
||||
# "max_tokens": "Maximum tokens to generate",
|
||||
# "model": "Model name to use",
|
||||
# }
|
||||
#
|
||||
# =============================================================================
|
||||
|
||||
# TODO: Add binding options for additional LLM providers here
|
||||
# Common providers to consider: OpenAI, Anthropic, Cohere, Hugging Face, etc.
|
||||
|
||||
# =============================================================================
|
||||
# Main Section - For Testing and Sample Generation
|
||||
# =============================================================================
|
||||
#
|
||||
# When run as a script, this module:
|
||||
# 1. Generates and prints a sample .env file with all binding options
|
||||
# 2. If "test" argument is provided, demonstrates argument parsing with Ollama binding
|
||||
#
|
||||
# Usage:
|
||||
# python -m lightrag.llm.binding_options # Generate .env sample
|
||||
# python -m lightrag.llm.binding_options test # Test argument parsing
|
||||
#
|
||||
# =============================================================================
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
import dotenv
|
||||
from io import StringIO
|
||||
|
||||
print(BindingOptions.generate_dot_env_sample())
|
||||
|
||||
env_strstream = StringIO(
|
||||
("OLLAMA_LLM_TEMPERATURE=0.1\nOLLAMA_EMBEDDING_TEMPERATURE=0.2\n")
|
||||
)
|
||||
|
||||
# Load environment variables from .env file
|
||||
dotenv.load_dotenv(stream=env_strstream)
|
||||
|
||||
if len(sys.argv) > 1 and sys.argv[1] == "test":
|
||||
parser = ArgumentParser(description="Test Ollama binding")
|
||||
OllamaEmbeddingOptions.add_args(parser)
|
||||
OllamaLLMOptions.add_args(parser)
|
||||
args = parser.parse_args(
|
||||
[
|
||||
"--ollama-embedding-num_ctx",
|
||||
"1024",
|
||||
"--ollama-llm-num_ctx",
|
||||
"2048",
|
||||
]
|
||||
)
|
||||
print(args)
|
||||
|
||||
# test LLM options
|
||||
ollama_options = OllamaLLMOptions.options_dict(args)
|
||||
print(ollama_options)
|
||||
print(OllamaLLMOptions(num_ctx=30000).asdict())
|
||||
|
||||
# test embedding options
|
||||
embedding_options = OllamaEmbeddingOptions.options_dict(args)
|
||||
print(embedding_options)
|
||||
print(OllamaEmbeddingOptions(**embedding_options).asdict())
|
||||
@ -149,9 +149,11 @@ async def ollama_embed(texts: list[str], embed_model, **kwargs) -> np.ndarray:
|
||||
timeout = kwargs.pop("timeout", None) or 300 # Default time out 300s
|
||||
|
||||
ollama_client = ollama.AsyncClient(host=host, timeout=timeout, headers=headers)
|
||||
|
||||
try:
|
||||
data = await ollama_client.embed(model=embed_model, input=texts)
|
||||
options = kwargs.pop("options", {})
|
||||
data = await ollama_client.embed(
|
||||
model=embed_model, input=texts, options=options
|
||||
)
|
||||
return np.array(data["embeddings"])
|
||||
except Exception as e:
|
||||
logger.error(f"Error in ollama_embed: {str(e)}")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user