graphiti/graphiti_core/llm_client/openai_client.py

"""
Copyright 2024, Zep Software, Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import logging
import typing

import openai
from openai import AsyncOpenAI
from openai.types.chat import ChatCompletionMessageParam
from pydantic import BaseModel

from ..prompts.models import Message
from .client import LLMClient
from .config import LLMConfig
from .errors import RateLimitError, RefusalError

logger = logging.getLogger(__name__)

DEFAULT_MODEL = 'gpt-4o-mini'


class OpenAIClient(LLMClient):
    """
    OpenAIClient is a client class for interacting with OpenAI's language models.

    This class extends the LLMClient and provides methods to initialize the client,
    get an embedder, and generate responses from the language model.

    Attributes:
        client (AsyncOpenAI): The OpenAI client used to interact with the API.
        model (str): The model name to use for generating responses.
        temperature (float): The temperature to use for generating responses.
        max_tokens (int): The maximum number of tokens to generate in a response.

    Methods:
        __init__(config: LLMConfig | None = None, cache: bool = False, client: typing.Any = None):
            Initializes the OpenAIClient with the provided configuration, cache setting, and client.

        _generate_response(messages: list[Message]) -> dict[str, typing.Any]:
            Generates a response from the language model based on the provided messages.
    """

    def __init__(
        self, config: LLMConfig | None = None, cache: bool = False, client: typing.Any = None
    ):
        """
        Initialize the OpenAIClient with the provided configuration, cache setting, and client.

        Args:
            config (LLMConfig | None): The configuration for the LLM client, including API key, model, base URL, temperature, and max tokens.
            cache (bool): Whether to use caching for responses. Defaults to False.
            client (Any | None): An optional async client instance to use. If not provided, a new AsyncOpenAI client is created.

        """
        # removed caching to simplify the `generate_response` override
        if cache:
            raise NotImplementedError('Caching is not implemented for OpenAI')

        if config is None:
            config = LLMConfig()

        super().__init__(config, cache)

        if client is None:
            self.client = AsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
        else:
            self.client = client

    async def _generate_response(
        self, messages: list[Message], response_model: type[BaseModel] | None = None
    ) -> dict[str, typing.Any]:
        openai_messages: list[ChatCompletionMessageParam] = []
        for m in messages:
            if m.role == 'user':
                openai_messages.append({'role': 'user', 'content': m.content})
            elif m.role == 'system':
                openai_messages.append({'role': 'system', 'content': m.content})
        try:
            response = await self.client.beta.chat.completions.parse(
                model=self.model or DEFAULT_MODEL,
                messages=openai_messages,
                temperature=self.temperature,
                max_tokens=self.max_tokens,
                response_format=response_model,  # type: ignore
            )

            response_object = response.choices[0].message

            if response_object.parsed:
                return response_object.parsed.model_dump()
            elif response_object.refusal:
                raise RefusalError(response_object.refusal)
            else:
                raise Exception('No response from LLM')
        except openai.LengthFinishReasonError as e:
            raise Exception(f'Output length exceeded max tokens {self.max_tokens}: {e}') from e
        except openai.RateLimitError as e:
            raise RateLimitError from e
        except Exception as e:
            logger.error(f'Error in generating LLM response: {e}')
            raise

    async def generate_response(
        self, messages: list[Message], response_model: type[BaseModel] | None = None
    ) -> dict[str, typing.Any]:
        response = await self._generate_response(messages, response_model)

        return response
Add Apache License 2.0 boilerplate to all Python files (#30) * Add Apache License 2.0 boilerplate to all Python files --- For more details, open the [Copilot Workspace session](https://copilot-workspace.githubnext.com/getzep/graphiti?shareId=XXXX-XXXX-XXXX-XXXX). * format * format * chore: Add Ellipsis configuration file 2024-08-23 13:01:33 -07:00			`"""`
			`Copyright 2024, Zep Software, Inc.`

			`Licensed under the Apache License, Version 2.0 (the "License");`
			`you may not use this file except in compliance with the License.`
			`You may obtain a copy of the License at`

			`http://www.apache.org/licenses/LICENSE-2.0`

			`Unless required by applicable law or agreed to in writing, software`
			`distributed under the License is distributed on an "AS IS" BASIS,`
			`WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`See the License for the specific language governing permissions and`
			`limitations under the License.`
			`"""`

format and linting (#18) * Makefile and format * fix podcast stuff * refactor: update import statement for transcript_parser in podcast_runner.py * format and linting * chore: Update import statements and remove unused code in maintenance module 2024-08-22 12:26:13 -07:00			`import logging`
chore: Fix Typing Issues (#27) * typing.Any and friends * message * chore: Import Message model in llm_client * fix: :lipstick: mypy errors * clean up mypy stuff * mypy * format * mypy * mypy * mypy --------- Co-authored-by: paulpaliychuk <pavlo.paliychuk.ca@gmail.com> Co-authored-by: prestonrasmussen <prasmuss15@gmail.com> 2024-08-23 08:15:44 -07:00			`import typing`
format and linting (#18) * Makefile and format * fix podcast stuff * refactor: update import statement for transcript_parser in podcast_runner.py * format and linting * chore: Update import statements and remove unused code in maintenance module 2024-08-22 12:26:13 -07:00
Fix llm client retry (#102) * Fix llm client retry * feat: Improve llm client retry error message 2024-09-10 08:15:27 -07:00			`import openai`
Refactor maintenance structure, add prompt library (#4) * chore: Initial draft of stubs * chore: Add comments and mock implementation of the add_episode method * chore: Add success and error callbacks * chore: Add success and error callbacks * refactor: Fix conflicts with the latest merge 2024-08-15 12:03:41 -04:00			`from openai import AsyncOpenAI`
chore: Fix Typing Issues (#27) * typing.Any and friends * message * chore: Import Message model in llm_client * fix: :lipstick: mypy errors * clean up mypy stuff * mypy * format * mypy * mypy * mypy --------- Co-authored-by: paulpaliychuk <pavlo.paliychuk.ca@gmail.com> Co-authored-by: prestonrasmussen <prasmuss15@gmail.com> 2024-08-23 08:15:44 -07:00			`from openai.types.chat import ChatCompletionMessageParam`
Implement OpenAI Structured Output (#225) * implement so * bug fixes and typing * inject schema for non-openai clients * correct datetime format * remove List keyword * Refactor node_operations.py to use updated prompt_library functions * update example 2024-12-05 07:03:18 -08:00			`from pydantic import BaseModel`
format and linting (#18) * Makefile and format * fix podcast stuff * refactor: update import statement for transcript_parser in podcast_runner.py * format and linting * chore: Update import statements and remove unused code in maintenance module 2024-08-22 12:26:13 -07:00
chore: Fix Typing Issues (#27) * typing.Any and friends * message * chore: Import Message model in llm_client * fix: :lipstick: mypy errors * clean up mypy stuff * mypy * format * mypy * mypy * mypy --------- Co-authored-by: paulpaliychuk <pavlo.paliychuk.ca@gmail.com> Co-authored-by: prestonrasmussen <prasmuss15@gmail.com> 2024-08-23 08:15:44 -07:00			`from ..prompts.models import Message`
Refactor maintenance structure, add prompt library (#4) * chore: Initial draft of stubs * chore: Add comments and mock implementation of the add_episode method * chore: Add success and error callbacks * chore: Add success and error callbacks * refactor: Fix conflicts with the latest merge 2024-08-15 12:03:41 -04:00			`from .client import LLMClient`
			`from .config import LLMConfig`
Implement OpenAI Structured Output (#225) * implement so * bug fixes and typing * inject schema for non-openai clients * correct datetime format * remove List keyword * Refactor node_operations.py to use updated prompt_library functions * update example 2024-12-05 07:03:18 -08:00			`from .errors import RateLimitError, RefusalError`
Refactor maintenance structure, add prompt library (#4) * chore: Initial draft of stubs * chore: Add comments and mock implementation of the add_episode method * chore: Add success and error callbacks * chore: Add success and error callbacks * refactor: Fix conflicts with the latest merge 2024-08-15 12:03:41 -04:00
Cleanup maintenance utilities + add podcast runner (#5) * chore: Fix minor issues with episodic edge building + cleanup * feat: Port podcast runner * feat: Port podcast runner 2024-08-16 09:29:57 -04:00			`logger = logging.getLogger(__name__)`

Group id fix (#152) * node distance and group_ids fixed * get all with no group_id passed * push * push * remove comments * mypy * mypy ids * please mypy * trust * last one 2024-09-24 15:55:30 -04:00			`DEFAULT_MODEL = 'gpt-4o-mini'`
implement diskcache (#39) * chore: Add romeo runner * fix: Linter * wip * wip dump * chore: Update romeo parser * chore: Anthropic model fix * wip * allbirds * allbirds runner * format * wip * wip * mypy updates * update * remove r * update tests * format * wip * chore: Strategically update the message * rebase and fix import issues * Update package imports for graphiti_core in examples and utils * nits * chore: Update OpenAI GPT-4o model to gpt-4o-2024-08-06 * implement groq * improvments & linting * cleanup and nits * Refactor package imports for graphiti_core in examples and utils * Refactor package imports for graphiti_core in examples and utils * implement diskcache * remove debug stuff * log cache hit when debugging only * Improve LLM config. Fix bugs (#41) Refactor LLMConfig class to allow None values for model and base_url * chore: Resolve mc --------- Co-authored-by: paulpaliychuk <pavlo.paliychuk.ca@gmail.com> Co-authored-by: prestonrasmussen <prasmuss15@gmail.com> 2024-08-26 10:13:05 -07:00
Refactor maintenance structure, add prompt library (#4) * chore: Initial draft of stubs * chore: Add comments and mock implementation of the add_episode method * chore: Add success and error callbacks * chore: Add success and error callbacks * refactor: Fix conflicts with the latest merge 2024-08-15 12:03:41 -04:00
			`class OpenAIClient(LLMClient):`
feat: Refactor OpenAIClient initialization and add client parameter (#140) The code changes refactor the `OpenAIClient` initialization to accept an optional `client` parameter. This allows the client to be passed in from outside, providing more flexibility and enabling easier testing. 2024-09-21 12:09:04 -07:00			`"""`
			`OpenAIClient is a client class for interacting with OpenAI's language models.`

			`This class extends the LLMClient and provides methods to initialize the client,`
			`get an embedder, and generate responses from the language model.`

			`Attributes:`
			`client (AsyncOpenAI): The OpenAI client used to interact with the API.`
			`model (str): The model name to use for generating responses.`
			`temperature (float): The temperature to use for generating responses.`
			`max_tokens (int): The maximum number of tokens to generate in a response.`

			`Methods:`
			`__init__(config: LLMConfig \| None = None, cache: bool = False, client: typing.Any = None):`
			`Initializes the OpenAIClient with the provided configuration, cache setting, and client.`

			`_generate_response(messages: list[Message]) -> dict[str, typing.Any]:`
			`Generates a response from the language model based on the provided messages.`
			`"""`

			`def __init__(`
			`self, config: LLMConfig \| None = None, cache: bool = False, client: typing.Any = None`
			`):`
			`"""`
			`Initialize the OpenAIClient with the provided configuration, cache setting, and client.`

			`Args:`
			`config (LLMConfig \| None): The configuration for the LLM client, including API key, model, base URL, temperature, and max tokens.`
			`cache (bool): Whether to use caching for responses. Defaults to False.`
			`client (Any \| None): An optional async client instance to use. If not provided, a new AsyncOpenAI client is created.`

			`"""`
Implement OpenAI Structured Output (#225) * implement so * bug fixes and typing * inject schema for non-openai clients * correct datetime format * remove List keyword * Refactor node_operations.py to use updated prompt_library functions * update example 2024-12-05 07:03:18 -08:00			# removed caching to simplify the `generate_response` override
			`if cache:`
			`raise NotImplementedError('Caching is not implemented for OpenAI')`

Controlled example (#37) * chore: Add romeo runner * fix: Linter * dedupe fixes * wip * wip dump * allbirds * chore: Update romeo parser * chore: Anthropic model fix * allbirds runner * format * wip * mypy updates * update * remove r * update tests * format * wip * wip * wip * chore: Strategically update the message * chore: Add romeo runner * fix: Linter * wip * wip dump * chore: Update romeo parser * chore: Anthropic model fix * wip * allbirds * allbirds runner * format * wip * wip * mypy updates * update * remove r * update tests * format * wip * chore: Strategically update the message * rebase and fix import issues * Update package imports for graphiti_core in examples and utils * nits * chore: Update OpenAI GPT-4o model to gpt-4o-2024-08-06 * implement groq * improvments & linting * cleanup and nits * Refactor package imports for graphiti_core in examples and utils * Refactor package imports for graphiti_core in examples and utils * chore: Nuke unused examples * chore: Nuke unused examples * chore: Only run type check on graphiti_core * fix unit tests * reformat * unit test * fix: Unit tests * test: Add coverage for extract_date_strings_from_edge * lint * remove commented code --------- Co-authored-by: prestonrasmussen <prasmuss15@gmail.com> Co-authored-by: Daniel Chalef <131175+danielchalef@users.noreply.github.com> 2024-08-26 10:30:22 -04:00			`if config is None:`
			`config = LLMConfig()`
implement diskcache (#39) * chore: Add romeo runner * fix: Linter * wip * wip dump * chore: Update romeo parser * chore: Anthropic model fix * wip * allbirds * allbirds runner * format * wip * wip * mypy updates * update * remove r * update tests * format * wip * chore: Strategically update the message * rebase and fix import issues * Update package imports for graphiti_core in examples and utils * nits * chore: Update OpenAI GPT-4o model to gpt-4o-2024-08-06 * implement groq * improvments & linting * cleanup and nits * Refactor package imports for graphiti_core in examples and utils * Refactor package imports for graphiti_core in examples and utils * implement diskcache * remove debug stuff * log cache hit when debugging only * Improve LLM config. Fix bugs (#41) Refactor LLMConfig class to allow None values for model and base_url * chore: Resolve mc --------- Co-authored-by: paulpaliychuk <pavlo.paliychuk.ca@gmail.com> Co-authored-by: prestonrasmussen <prasmuss15@gmail.com> 2024-08-26 10:13:05 -07:00
			`super().__init__(config, cache)`

feat: Refactor OpenAIClient initialization and add client parameter (#140) The code changes refactor the `OpenAIClient` initialization to accept an optional `client` parameter. This allows the client to be passed in from outside, providing more flexibility and enabling easier testing. 2024-09-21 12:09:04 -07:00			`if client is None:`
			`self.client = AsyncOpenAI(api_key=config.api_key, base_url=config.base_url)`
			`else:`
			`self.client = client`
Refactor maintenance structure, add prompt library (#4) * chore: Initial draft of stubs * chore: Add comments and mock implementation of the add_episode method * chore: Add success and error callbacks * chore: Add success and error callbacks * refactor: Fix conflicts with the latest merge 2024-08-15 12:03:41 -04:00
Implement OpenAI Structured Output (#225) * implement so * bug fixes and typing * inject schema for non-openai clients * correct datetime format * remove List keyword * Refactor node_operations.py to use updated prompt_library functions * update example 2024-12-05 07:03:18 -08:00			`async def _generate_response(`
			`self, messages: list[Message], response_model: type[BaseModel] \| None = None`
			`) -> dict[str, typing.Any]:`
feat: Add real world dates extraction (#26) * feat: Add real world dates extraction * fix: Linter * fix: :lipstick: mypy errors * chore: handle invalid dates returned by the llm * chore: Polish prompt * reformat * style: :lipstick: reformat 2024-08-23 14:18:45 -04:00			`openai_messages: list[ChatCompletionMessageParam] = []`
			`for m in messages:`
			`if m.role == 'user':`
			`openai_messages.append({'role': 'user', 'content': m.content})`
			`elif m.role == 'system':`
			`openai_messages.append({'role': 'system', 'content': m.content})`
			`try:`
Implement OpenAI Structured Output (#225) * implement so * bug fixes and typing * inject schema for non-openai clients * correct datetime format * remove List keyword * Refactor node_operations.py to use updated prompt_library functions * update example 2024-12-05 07:03:18 -08:00			`response = await self.client.beta.chat.completions.parse(`
implement diskcache (#39) * chore: Add romeo runner * fix: Linter * wip * wip dump * chore: Update romeo parser * chore: Anthropic model fix * wip * allbirds * allbirds runner * format * wip * wip * mypy updates * update * remove r * update tests * format * wip * chore: Strategically update the message * rebase and fix import issues * Update package imports for graphiti_core in examples and utils * nits * chore: Update OpenAI GPT-4o model to gpt-4o-2024-08-06 * implement groq * improvments & linting * cleanup and nits * Refactor package imports for graphiti_core in examples and utils * Refactor package imports for graphiti_core in examples and utils * implement diskcache * remove debug stuff * log cache hit when debugging only * Improve LLM config. Fix bugs (#41) Refactor LLMConfig class to allow None values for model and base_url * chore: Resolve mc --------- Co-authored-by: paulpaliychuk <pavlo.paliychuk.ca@gmail.com> Co-authored-by: prestonrasmussen <prasmuss15@gmail.com> 2024-08-26 10:13:05 -07:00			`model=self.model or DEFAULT_MODEL,`
feat: Add real world dates extraction (#26) * feat: Add real world dates extraction * fix: Linter * fix: :lipstick: mypy errors * chore: handle invalid dates returned by the llm * chore: Polish prompt * reformat * style: :lipstick: reformat 2024-08-23 14:18:45 -04:00			`messages=openai_messages,`
implement diskcache (#39) * chore: Add romeo runner * fix: Linter * wip * wip dump * chore: Update romeo parser * chore: Anthropic model fix * wip * allbirds * allbirds runner * format * wip * wip * mypy updates * update * remove r * update tests * format * wip * chore: Strategically update the message * rebase and fix import issues * Update package imports for graphiti_core in examples and utils * nits * chore: Update OpenAI GPT-4o model to gpt-4o-2024-08-06 * implement groq * improvments & linting * cleanup and nits * Refactor package imports for graphiti_core in examples and utils * Refactor package imports for graphiti_core in examples and utils * implement diskcache * remove debug stuff * log cache hit when debugging only * Improve LLM config. Fix bugs (#41) Refactor LLMConfig class to allow None values for model and base_url * chore: Resolve mc --------- Co-authored-by: paulpaliychuk <pavlo.paliychuk.ca@gmail.com> Co-authored-by: prestonrasmussen <prasmuss15@gmail.com> 2024-08-26 10:13:05 -07:00			`temperature=self.temperature,`
			`max_tokens=self.max_tokens,`
Implement OpenAI Structured Output (#225) * implement so * bug fixes and typing * inject schema for non-openai clients * correct datetime format * remove List keyword * Refactor node_operations.py to use updated prompt_library functions * update example 2024-12-05 07:03:18 -08:00			`response_format=response_model, # type: ignore`
feat: Add real world dates extraction (#26) * feat: Add real world dates extraction * fix: Linter * fix: :lipstick: mypy errors * chore: handle invalid dates returned by the llm * chore: Polish prompt * reformat * style: :lipstick: reformat 2024-08-23 14:18:45 -04:00			`)`
Implement OpenAI Structured Output (#225) * implement so * bug fixes and typing * inject schema for non-openai clients * correct datetime format * remove List keyword * Refactor node_operations.py to use updated prompt_library functions * update example 2024-12-05 07:03:18 -08:00
			`response_object = response.choices[0].message`

			`if response_object.parsed:`
			`return response_object.parsed.model_dump()`
			`elif response_object.refusal:`
			`raise RefusalError(response_object.refusal)`
			`else:`
			`raise Exception('No response from LLM')`
			`except openai.LengthFinishReasonError as e:`
			`raise Exception(f'Output length exceeded max tokens {self.max_tokens}: {e}') from e`
Fix llm client retry (#102) * Fix llm client retry * feat: Improve llm client retry error message 2024-09-10 08:15:27 -07:00			`except openai.RateLimitError as e:`
			`raise RateLimitError from e`
feat: Add real world dates extraction (#26) * feat: Add real world dates extraction * fix: Linter * fix: :lipstick: mypy errors * chore: handle invalid dates returned by the llm * chore: Polish prompt * reformat * style: :lipstick: reformat 2024-08-23 14:18:45 -04:00			`except Exception as e:`
			`logger.error(f'Error in generating LLM response: {e}')`
			`raise`
Implement OpenAI Structured Output (#225) * implement so * bug fixes and typing * inject schema for non-openai clients * correct datetime format * remove List keyword * Refactor node_operations.py to use updated prompt_library functions * update example 2024-12-05 07:03:18 -08:00
			`async def generate_response(`
			`self, messages: list[Message], response_model: type[BaseModel] \| None = None`
			`) -> dict[str, typing.Any]:`
			`response = await self._generate_response(messages, response_model)`

			`return response`