LightRAG/lightrag/llm/openai.py

from ..utils import verbose_debug, VERBOSE_DEBUG
import sys
import os
import logging

if sys.version_info < (3, 9):
    from typing import AsyncIterator
else:
    from collections.abc import AsyncIterator
import pipmaster as pm  # Pipmaster for dynamic library install

# install specific modules
if not pm.is_installed("openai"):
    pm.install("openai")

from openai import (
    AsyncOpenAI,
    APIConnectionError,
    RateLimitError,
    APITimeoutError,
)
from tenacity import (
    retry,
    stop_after_attempt,
    wait_exponential,
    retry_if_exception_type,
)
from lightrag.utils import (
    wrap_embedding_func_with_attrs,
    locate_json_string_body_from_string,
    safe_unicode_decode,
    logger,
)
from lightrag.types import GPTKeywordExtractionFormat
from lightrag.api import __api_version__

import numpy as np
from typing import Any, Union


class InvalidResponseError(Exception):
    """Custom exception class for triggering retry mechanism"""

    pass


@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=10),
    retry=retry_if_exception_type(
        (RateLimitError, APIConnectionError, APITimeoutError, InvalidResponseError)
    ),
)
async def openai_complete_if_cache(
    model: str,
    prompt: str,
    system_prompt: str | None = None,
    history_messages: list[dict[str, Any]] | None = None,
    base_url: str | None = None,
    api_key: str | None = None,
    token_tracker: Any | None = None,
    **kwargs: Any,
) -> str:
    if history_messages is None:
        history_messages = []
    if not api_key:
        api_key = os.environ["OPENAI_API_KEY"]

    default_headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}",
        "Content-Type": "application/json",
    }

    # Set openai logger level to INFO when VERBOSE_DEBUG is off
    if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
        logging.getLogger("openai").setLevel(logging.INFO)

    openai_async_client = (
        AsyncOpenAI(default_headers=default_headers, api_key=api_key)
        if base_url is None
        else AsyncOpenAI(
            base_url=base_url, default_headers=default_headers, api_key=api_key
        )
    )
    kwargs.pop("hashing_kv", None)
    kwargs.pop("keyword_extraction", None)
    messages: list[dict[str, Any]] = []
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})
    messages.extend(history_messages)
    messages.append({"role": "user", "content": prompt})

    logger.debug("===== Entering func of LLM =====")
    logger.debug(f"Model: {model}   Base URL: {base_url}")
    logger.debug(f"Additional kwargs: {kwargs}")
    logger.debug(f"Num of history messages: {len(history_messages)}")
    verbose_debug(f"System prompt: {system_prompt}")
    verbose_debug(f"Query: {prompt}")
    logger.debug("===== Sending Query to LLM =====")

    try:
        if "response_format" in kwargs:
            response = await openai_async_client.beta.chat.completions.parse(
                model=model, messages=messages, **kwargs
            )
        else:
            response = await openai_async_client.chat.completions.create(
                model=model, messages=messages, **kwargs
            )
    except APIConnectionError as e:
        logger.error(f"OpenAI API Connection Error: {e}")
        raise
    except RateLimitError as e:
        logger.error(f"OpenAI API Rate Limit Error: {e}")
        raise
    except APITimeoutError as e:
        logger.error(f"OpenAI API Timeout Error: {e}")
        raise
    except Exception as e:
        logger.error(
            f"OpenAI API Call Failed,\nModel: {model},\nParams: {kwargs}, Got: {e}"
        )
        raise

    if hasattr(response, "__aiter__"):

        async def inner():
            try:
                async for chunk in response:
                    content = chunk.choices[0].delta.content
                    if content is None:
                        continue
                    if r"\u" in content:
                        content = safe_unicode_decode(content.encode("utf-8"))
                    yield content
            except Exception as e:
                logger.error(f"Error in stream response: {str(e)}")
                raise

        return inner()

    else:
        if (
            not response
            or not response.choices
            or not hasattr(response.choices[0], "message")
            or not hasattr(response.choices[0].message, "content")
        ):
            logger.error("Invalid response from OpenAI API")
            raise InvalidResponseError("Invalid response from OpenAI API")

        content = response.choices[0].message.content

        if not content or content.strip() == "":
            logger.error("Received empty content from OpenAI API")
            raise InvalidResponseError("Received empty content from OpenAI API")

        if r"\u" in content:
            content = safe_unicode_decode(content.encode("utf-8"))

        if token_tracker and hasattr(response, "usage"):
            token_counts = {
                "prompt_tokens": getattr(response.usage, "prompt_tokens", 0),
                "completion_tokens": getattr(response.usage, "completion_tokens", 0),
                "total_tokens": getattr(response.usage, "total_tokens", 0),
            }
            token_tracker.add_usage(token_counts)
    
        logger.debug(f"Response content len: {len(content)}")
        verbose_debug(f"Response: {response}")

        return content


async def openai_complete(
    prompt,
    system_prompt=None,
    history_messages=None,
    keyword_extraction=False,
    **kwargs,
) -> Union[str, AsyncIterator[str]]:
    if history_messages is None:
        history_messages = []
    keyword_extraction = kwargs.pop("keyword_extraction", None)
    if keyword_extraction:
        kwargs["response_format"] = "json"
    model_name = kwargs["hashing_kv"].global_config["llm_model_name"]
    return await openai_complete_if_cache(
        model_name,
        prompt,
        system_prompt=system_prompt,
        history_messages=history_messages,
        **kwargs,
    )


async def gpt_4o_complete(
    prompt,
    system_prompt=None,
    history_messages=None,
    keyword_extraction=False,
    **kwargs,
) -> str:
    if history_messages is None:
        history_messages = []
    keyword_extraction = kwargs.pop("keyword_extraction", None)
    if keyword_extraction:
        kwargs["response_format"] = GPTKeywordExtractionFormat
    return await openai_complete_if_cache(
        "gpt-4o",
        prompt,
        system_prompt=system_prompt,
        history_messages=history_messages,
        **kwargs,
    )


async def gpt_4o_mini_complete(
    prompt,
    system_prompt=None,
    history_messages=None,
    keyword_extraction=False,
    **kwargs,
) -> str:
    if history_messages is None:
        history_messages = []
    keyword_extraction = kwargs.pop("keyword_extraction", None)
    if keyword_extraction:
        kwargs["response_format"] = GPTKeywordExtractionFormat
    return await openai_complete_if_cache(
        "gpt-4o-mini",
        prompt,
        system_prompt=system_prompt,
        history_messages=history_messages,
        **kwargs,
    )


async def nvidia_openai_complete(
    prompt,
    system_prompt=None,
    history_messages=None,
    keyword_extraction=False,
    **kwargs,
) -> str:
    if history_messages is None:
        history_messages = []
    keyword_extraction = kwargs.pop("keyword_extraction", None)
    result = await openai_complete_if_cache(
        "nvidia/llama-3.1-nemotron-70b-instruct",  # context length 128k
        prompt,
        system_prompt=system_prompt,
        history_messages=history_messages,
        base_url="https://integrate.api.nvidia.com/v1",
        **kwargs,
    )
    if keyword_extraction:  # TODO: use JSON API
        return locate_json_string_body_from_string(result)
    return result


@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=60),
    retry=retry_if_exception_type(
        (RateLimitError, APIConnectionError, APITimeoutError)
    ),
)
async def openai_embed(
    texts: list[str],
    model: str = "text-embedding-3-small",
    base_url: str = None,
    api_key: str = None,
) -> np.ndarray:
    if not api_key:
        api_key = os.environ["OPENAI_API_KEY"]

    default_headers = {
        "User-Agent": f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}",
        "Content-Type": "application/json",
    }
    openai_async_client = (
        AsyncOpenAI(default_headers=default_headers, api_key=api_key)
        if base_url is None
        else AsyncOpenAI(
            base_url=base_url, default_headers=default_headers, api_key=api_key
        )
    )
    response = await openai_async_client.embeddings.create(
        model=model, input=texts, encoding_format="float"
    )
    return np.array([dp.embedding for dp in response.data])
Set OpenAI logger level to INFO if VERBOSE_DEBUG is off 2025-02-17 12:20:47 +08:00			`from ..utils import verbose_debug, VERBOSE_DEBUG`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`import sys`
			`import os`
Set OpenAI logger level to INFO if VERBOSE_DEBUG is off 2025-02-17 12:20:47 +08:00			`import logging`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00
			`if sys.version_info < (3, 9):`
			`from typing import AsyncIterator`
			`else:`
			`from collections.abc import AsyncIterator`
Fixed missing imports bug and fixed linting 2025-01-25 00:55:07 +01:00			`import pipmaster as pm # Pipmaster for dynamic library install`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00
			`# install specific modules`
			`if not pm.is_installed("openai"):`
			`pm.install("openai")`

			`from openai import (`
			`AsyncOpenAI,`
			`APIConnectionError,`
			`RateLimitError,`
			`APITimeoutError,`
			`)`
			`from tenacity import (`
			`retry,`
			`stop_after_attempt,`
			`wait_exponential,`
			`retry_if_exception_type,`
			`)`
			`from lightrag.utils import (`
			`wrap_embedding_func_with_attrs,`
			`locate_json_string_body_from_string,`
			`safe_unicode_decode,`
			`logger,`
			`)`
			`from lightrag.types import GPTKeywordExtractionFormat`
Add LightRAG version to User-Agent header for better request tracking • Add User-Agent header with version info • Update header creation in Ollama client • Update header creation in OpenAI client • Ensure consistent header format • Include Mozilla UA string for OpenAI 2025-02-06 22:55:22 +08:00			`from lightrag.api import __api_version__`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00
			`import numpy as np`
cleanup code 2025-02-18 16:55:48 +01:00			`from typing import Any, Union`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00
Fix linting 2025-02-06 23:12:35 +08:00
Enhance OpenAI API error handling and logging for better reliability • Add InvalidResponseError custom exception • Improve error logging for API failures • Add empty response content validation • Add more detailed debug logging info • Add retry for invalid response cases 2025-02-06 19:42:57 +08:00			`class InvalidResponseError(Exception):`
			`"""Custom exception class for triggering retry mechanism"""`
Fix linting 2025-02-06 23:12:35 +08:00
Enhance OpenAI API error handling and logging for better reliability • Add InvalidResponseError custom exception • Improve error logging for API failures • Add empty response content validation • Add more detailed debug logging info • Add retry for invalid response cases 2025-02-06 19:42:57 +08:00			`pass`
Fixed missing imports bug and fixed linting 2025-01-25 00:55:07 +01:00
Fix linting 2025-02-06 23:12:35 +08:00
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`@retry(`
			`stop=stop_after_attempt(3),`
			`wait=wait_exponential(multiplier=1, min=4, max=10),`
			`retry=retry_if_exception_type(`
Enhance OpenAI API error handling and logging for better reliability • Add InvalidResponseError custom exception • Improve error logging for API failures • Add empty response content validation • Add more detailed debug logging info • Add retry for invalid response cases 2025-02-06 19:42:57 +08:00			`(RateLimitError, APIConnectionError, APITimeoutError, InvalidResponseError)`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`),`
			`)`
			`async def openai_complete_if_cache(`
cleanup code 2025-02-18 16:55:48 +01:00			`model: str,`
			`prompt: str,`
			`system_prompt: str \| None = None,`
			`history_messages: list[dict[str, Any]] \| None = None,`
			`base_url: str \| None = None,`
			`api_key: str \| None = None,`
feat: Add TokenTracker to track token usage for LLM calls 2025-03-28 01:25:15 +08:00			`token_tracker: Any \| None = None,`
cleanup code 2025-02-18 16:55:48 +01:00			`**kwargs: Any,`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`) -> str:`
fix: Fix potential mutable default parameter issue 2025-02-06 14:46:07 +08:00			`if history_messages is None:`
			`history_messages = []`
Fix cache bugs 2025-02-11 13:28:18 +08:00			`if not api_key:`
			`api_key = os.environ["OPENAI_API_KEY"]`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00
Add LightRAG version to User-Agent header for better request tracking • Add User-Agent header with version info • Update header creation in Ollama client • Update header creation in OpenAI client • Ensure consistent header format • Include Mozilla UA string for OpenAI 2025-02-06 22:55:22 +08:00			`default_headers = {`
			`"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}",`
Fix linting 2025-02-06 23:12:35 +08:00			`"Content-Type": "application/json",`
Add LightRAG version to User-Agent header for better request tracking • Add User-Agent header with version info • Update header creation in Ollama client • Update header creation in OpenAI client • Ensure consistent header format • Include Mozilla UA string for OpenAI 2025-02-06 22:55:22 +08:00			`}`
Set OpenAI logger level to INFO if VERBOSE_DEBUG is off 2025-02-17 12:20:47 +08:00
			`# Set openai logger level to INFO when VERBOSE_DEBUG is off`
			`if not VERBOSE_DEBUG and logger.level == logging.DEBUG:`
			`logging.getLogger("openai").setLevel(logging.INFO)`
Fix linting 2025-02-17 12:34:54 +08:00
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`openai_async_client = (`
Fix cache bugs 2025-02-11 13:28:18 +08:00			`AsyncOpenAI(default_headers=default_headers, api_key=api_key)`
Fix linting 2025-02-06 23:12:35 +08:00			`if base_url is None`
Fix linting error 2025-02-11 13:32:24 +08:00			`else AsyncOpenAI(`
			`base_url=base_url, default_headers=default_headers, api_key=api_key`
			`)`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`)`
			`kwargs.pop("hashing_kv", None)`
			`kwargs.pop("keyword_extraction", None)`
cleanup code 2025-02-18 16:55:48 +01:00			`messages: list[dict[str, Any]] = []`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`if system_prompt:`
			`messages.append({"role": "system", "content": system_prompt})`
			`messages.extend(history_messages)`
			`messages.append({"role": "user", "content": prompt})`

Improve OpenAI LLM logging with more detailed debug information 2025-03-28 21:33:59 +08:00			`logger.debug("===== Entering func of LLM =====")`
Enhance OpenAI API error handling and logging for better reliability • Add InvalidResponseError custom exception • Improve error logging for API failures • Add empty response content validation • Add more detailed debug logging info • Add retry for invalid response cases 2025-02-06 19:42:57 +08:00			`logger.debug(f"Model: {model} Base URL: {base_url}")`
			`logger.debug(f"Additional kwargs: {kwargs}")`
Improve OpenAI LLM logging with more detailed debug information 2025-03-28 21:33:59 +08:00			`logger.debug(f"Num of history messages: {len(history_messages)}")`
Add verbose debug option to control detailed debug output level • Added VERBOSE env var & CLI flag • Implemented verbose_debug() function • Added verbose option to splash screen • Reduced default debug output length • Modified LLM debug logging behavior 2025-02-17 01:38:18 +08:00			`verbose_debug(f"System prompt: {system_prompt}")`
Improve OpenAI LLM logging with more detailed debug information 2025-03-28 21:33:59 +08:00			`verbose_debug(f"Query: {prompt}")`
			`logger.debug("===== Sending Query to LLM =====")`
Enhance OpenAI API error handling and logging for better reliability • Add InvalidResponseError custom exception • Improve error logging for API failures • Add empty response content validation • Add more detailed debug logging info • Add retry for invalid response cases 2025-02-06 19:42:57 +08:00
Fix linting 2025-02-17 12:34:54 +08:00			`try:`
Enhance OpenAI API error handling and logging for better reliability • Add InvalidResponseError custom exception • Improve error logging for API failures • Add empty response content validation • Add more detailed debug logging info • Add retry for invalid response cases 2025-02-06 19:42:57 +08:00			`if "response_format" in kwargs:`
			`response = await openai_async_client.beta.chat.completions.parse(`
			`model=model, messages=messages, **kwargs`
			`)`
			`else:`
			`response = await openai_async_client.chat.completions.create(`
			`model=model, messages=messages, **kwargs`
			`)`
			`except APIConnectionError as e:`
cleanup code 2025-02-18 16:55:48 +01:00			`logger.error(f"OpenAI API Connection Error: {e}")`
Enhance OpenAI API error handling and logging for better reliability • Add InvalidResponseError custom exception • Improve error logging for API failures • Add empty response content validation • Add more detailed debug logging info • Add retry for invalid response cases 2025-02-06 19:42:57 +08:00			`raise`
			`except RateLimitError as e:`
cleanup code 2025-02-18 16:55:48 +01:00			`logger.error(f"OpenAI API Rate Limit Error: {e}")`
Enhance OpenAI API error handling and logging for better reliability • Add InvalidResponseError custom exception • Improve error logging for API failures • Add empty response content validation • Add more detailed debug logging info • Add retry for invalid response cases 2025-02-06 19:42:57 +08:00			`raise`
			`except APITimeoutError as e:`
cleanup code 2025-02-18 16:55:48 +01:00			`logger.error(f"OpenAI API Timeout Error: {e}")`
Enhance OpenAI API error handling and logging for better reliability • Add InvalidResponseError custom exception • Improve error logging for API failures • Add empty response content validation • Add more detailed debug logging info • Add retry for invalid response cases 2025-02-06 19:42:57 +08:00			`raise`
			`except Exception as e:`
cleanup code 2025-02-18 16:55:48 +01:00			`logger.error(`
			`f"OpenAI API Call Failed,\nModel: {model},\nParams: {kwargs}, Got: {e}"`
			`)`
Enhance OpenAI API error handling and logging for better reliability • Add InvalidResponseError custom exception • Improve error logging for API failures • Add empty response content validation • Add more detailed debug logging info • Add retry for invalid response cases 2025-02-06 19:42:57 +08:00			`raise`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00
			`if hasattr(response, "__aiter__"):`

			`async def inner():`
Improve error handling and response consistency in streaming endpoints • Add error message forwarding to client • Handle stream cancellations gracefully • Add logging for stream errors • Ensure clean stream termination • Add try-catch in OpenAI streaming 2025-02-05 10:44:48 +08:00			`try:`
			`async for chunk in response:`
			`content = chunk.choices[0].delta.content`
			`if content is None:`
			`continue`
			`if r"\u" in content:`
			`content = safe_unicode_decode(content.encode("utf-8"))`
fix stream 2025-03-17 11:41:55 +08:00			`yield content`
Improve error handling and response consistency in streaming endpoints • Add error message forwarding to client • Handle stream cancellations gracefully • Add logging for stream errors • Ensure clean stream termination • Add try-catch in OpenAI streaming 2025-02-05 10:44:48 +08:00			`except Exception as e:`
			`logger.error(f"Error in stream response: {str(e)}")`
			`raise`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00
fix stream 2025-03-17 11:41:55 +08:00			`return inner()`
Enhance OpenAI API error handling and logging for better reliability • Add InvalidResponseError custom exception • Improve error logging for API failures • Add empty response content validation • Add more detailed debug logging info • Add retry for invalid response cases 2025-02-06 19:42:57 +08:00
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`else:`
Enhance OpenAI API error handling and logging for better reliability • Add InvalidResponseError custom exception • Improve error logging for API failures • Add empty response content validation • Add more detailed debug logging info • Add retry for invalid response cases 2025-02-06 19:42:57 +08:00			`if (`
			`not response`
			`or not response.choices`
			`or not hasattr(response.choices[0], "message")`
			`or not hasattr(response.choices[0].message, "content")`
			`):`
			`logger.error("Invalid response from OpenAI API")`
			`raise InvalidResponseError("Invalid response from OpenAI API")`

Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`content = response.choices[0].message.content`
Enhance OpenAI API error handling and logging for better reliability • Add InvalidResponseError custom exception • Improve error logging for API failures • Add empty response content validation • Add more detailed debug logging info • Add retry for invalid response cases 2025-02-06 19:42:57 +08:00
			`if not content or content.strip() == "":`
			`logger.error("Received empty content from OpenAI API")`
			`raise InvalidResponseError("Received empty content from OpenAI API")`

Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`if r"\u" in content:`
			`content = safe_unicode_decode(content.encode("utf-8"))`
feat: Add TokenTracker to track token usage for LLM calls 2025-03-28 01:25:15 +08:00
			`if token_tracker and hasattr(response, "usage"):`
			`token_counts = {`
			`"prompt_tokens": getattr(response.usage, "prompt_tokens", 0),`
			`"completion_tokens": getattr(response.usage, "completion_tokens", 0),`
			`"total_tokens": getattr(response.usage, "total_tokens", 0),`
			`}`
			`token_tracker.add_usage(token_counts)`
Improve OpenAI LLM logging with more detailed debug information 2025-03-28 21:33:59 +08:00
			`logger.debug(f"Response content len: {len(content)}")`
			`verbose_debug(f"Response: {response}")`
feat: Add TokenTracker to track token usage for LLM calls 2025-03-28 01:25:15 +08:00
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`return content`


			`async def openai_complete(`
Fix linting 2025-02-06 16:24:02 +08:00			`prompt,`
			`system_prompt=None,`
			`history_messages=None,`
			`keyword_extraction=False,`
			`**kwargs,`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`) -> Union[str, AsyncIterator[str]]:`
fix: Fix potential mutable default parameter issue 2025-02-06 14:46:07 +08:00			`if history_messages is None:`
			`history_messages = []`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`keyword_extraction = kwargs.pop("keyword_extraction", None)`
			`if keyword_extraction:`
			`kwargs["response_format"] = "json"`
			`model_name = kwargs["hashing_kv"].global_config["llm_model_name"]`
			`return await openai_complete_if_cache(`
			`model_name,`
			`prompt,`
			`system_prompt=system_prompt,`
			`history_messages=history_messages,`
			`**kwargs,`
			`)`


			`async def gpt_4o_complete(`
Fix linting 2025-02-06 16:24:02 +08:00			`prompt,`
			`system_prompt=None,`
			`history_messages=None,`
			`keyword_extraction=False,`
			`**kwargs,`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`) -> str:`
fix: Fix potential mutable default parameter issue 2025-02-06 14:46:07 +08:00			`if history_messages is None:`
			`history_messages = []`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`keyword_extraction = kwargs.pop("keyword_extraction", None)`
			`if keyword_extraction:`
			`kwargs["response_format"] = GPTKeywordExtractionFormat`
			`return await openai_complete_if_cache(`
			`"gpt-4o",`
			`prompt,`
			`system_prompt=system_prompt,`
			`history_messages=history_messages,`
			`**kwargs,`
			`)`


			`async def gpt_4o_mini_complete(`
Fix linting 2025-02-06 16:24:02 +08:00			`prompt,`
			`system_prompt=None,`
			`history_messages=None,`
			`keyword_extraction=False,`
			`**kwargs,`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`) -> str:`
fix: Fix potential mutable default parameter issue 2025-02-06 14:46:07 +08:00			`if history_messages is None:`
			`history_messages = []`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`keyword_extraction = kwargs.pop("keyword_extraction", None)`
			`if keyword_extraction:`
			`kwargs["response_format"] = GPTKeywordExtractionFormat`
			`return await openai_complete_if_cache(`
			`"gpt-4o-mini",`
			`prompt,`
			`system_prompt=system_prompt,`
			`history_messages=history_messages,`
			`**kwargs,`
			`)`


			`async def nvidia_openai_complete(`
Fix linting 2025-02-06 16:24:02 +08:00			`prompt,`
			`system_prompt=None,`
			`history_messages=None,`
			`keyword_extraction=False,`
			`**kwargs,`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`) -> str:`
fix: Fix potential mutable default parameter issue 2025-02-06 14:46:07 +08:00			`if history_messages is None:`
			`history_messages = []`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`keyword_extraction = kwargs.pop("keyword_extraction", None)`
			`result = await openai_complete_if_cache(`
			`"nvidia/llama-3.1-nemotron-70b-instruct", # context length 128k`
			`prompt,`
			`system_prompt=system_prompt,`
			`history_messages=history_messages,`
			`base_url="https://integrate.api.nvidia.com/v1",`
			`**kwargs,`
			`)`
			`if keyword_extraction: # TODO: use JSON API`
			`return locate_json_string_body_from_string(result)`
			`return result`


			`@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)`
			`@retry(`
			`stop=stop_after_attempt(3),`
			`wait=wait_exponential(multiplier=1, min=4, max=60),`
			`retry=retry_if_exception_type(`
			`(RateLimitError, APIConnectionError, APITimeoutError)`
			`),`
			`)`
			`async def openai_embed(`
			`texts: list[str],`
			`model: str = "text-embedding-3-small",`
			`base_url: str = None,`
			`api_key: str = None,`
			`) -> np.ndarray:`
Fix cache bugs 2025-02-11 13:28:18 +08:00			`if not api_key:`
			`api_key = os.environ["OPENAI_API_KEY"]`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00
Add LightRAG version to User-Agent header for better request tracking • Add User-Agent header with version info • Update header creation in Ollama client • Update header creation in OpenAI client • Ensure consistent header format • Include Mozilla UA string for OpenAI 2025-02-06 22:55:22 +08:00			`default_headers = {`
			`"User-Agent": f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}",`
Fix linting 2025-02-06 23:12:35 +08:00			`"Content-Type": "application/json",`
Add LightRAG version to User-Agent header for better request tracking • Add User-Agent header with version info • Update header creation in Ollama client • Update header creation in OpenAI client • Ensure consistent header format • Include Mozilla UA string for OpenAI 2025-02-06 22:55:22 +08:00			`}`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`openai_async_client = (`
Fix cache bugs 2025-02-11 13:28:18 +08:00			`AsyncOpenAI(default_headers=default_headers, api_key=api_key)`
Fix linting 2025-02-06 23:12:35 +08:00			`if base_url is None`
Fix linting error 2025-02-11 13:32:24 +08:00			`else AsyncOpenAI(`
			`base_url=base_url, default_headers=default_headers, api_key=api_key`
			`)`
Separated llms from the main llm.py file and fixed some deprication bugs 2025-01-25 00:11:00 +01:00			`)`
			`response = await openai_async_client.embeddings.create(`
			`model=model, input=texts, encoding_format="float"`
			`)`
			`return np.array([dp.embedding for dp in response.data])`