From 5c2a268d95e08e3203f6dfa72f5c9b51b972dd10 Mon Sep 17 00:00:00 2001 From: Aaron Date: Thu, 5 Oct 2023 05:45:11 +0100 Subject: [PATCH] openai_utils.py - functionality for instantiating config_list with a .env file (#68) * FORMATTING * UPDATE - OAI __init__.py * ruff * ADD - notebook covering oai API configuration options and their different purposes * ADD openai util updates so that the function just assumes the same environment variable name for all models, also added functionality for adding API configurations like api_base etc. * ADD - updates to config_list_from_dotenv and tests for openai_util testing, update example notebook * UPDATE - added working config_list_from_dotenv() with passing tests, and updated notebook * UPDATE - code and tests to potentially get around the window build permission error, used different method of producing temporary files --------- Co-authored-by: Ward --- autogen/oai/__init__.py | 2 + autogen/oai/openai_utils.py | 140 +++++++++ notebook/oai_openai_utils.ipynb | 521 ++++++++++++++++++++++++++++++++ setup.py | 1 + test/oai/test_utils.py | 137 ++++++++- 5 files changed, 798 insertions(+), 3 deletions(-) create mode 100644 notebook/oai_openai_utils.ipynb diff --git a/autogen/oai/__init__.py b/autogen/oai/__init__.py index d2b9d2618..a1b34b336 100644 --- a/autogen/oai/__init__.py +++ b/autogen/oai/__init__.py @@ -5,6 +5,7 @@ from autogen.oai.openai_utils import ( config_list_openai_aoai, config_list_from_models, config_list_from_json, + config_list_from_dotenv, ) __all__ = [ @@ -15,4 +16,5 @@ __all__ = [ "config_list_openai_aoai", "config_list_from_models", "config_list_from_json", + "config_list_from_dotenv", ] diff --git a/autogen/oai/openai_utils.py b/autogen/oai/openai_utils.py index 5d2e03919..b34d5d465 100644 --- a/autogen/oai/openai_utils.py +++ b/autogen/oai/openai_utils.py @@ -1,7 +1,11 @@ import os import json +import tempfile +from pathlib import Path from typing import List, Optional, Dict, Set, Union import logging +from dotenv import find_dotenv, load_dotenv + NON_CACHE_KEY = ["api_key", "api_base", "api_type", "api_version"] @@ -239,3 +243,139 @@ def config_list_from_json( except FileNotFoundError: return [] return filter_config(config_list, filter_dict) + + +def get_config( + api_key: str, api_base: Optional[str] = None, api_type: Optional[str] = None, api_version: Optional[str] = None +) -> Dict: + """ + Construct a configuration dictionary with the provided API configurations. + Appending the additional configurations to the config only if they're set + + example: + >> model_api_key_map={ + "gpt-4": "OPENAI_API_KEY", + "gpt-3.5-turbo": { + "api_key_env_var": "ANOTHER_API_KEY", + "api_type": "aoai", + "api_version": "v2", + "api_base": "https://api.someotherapi.com" + } + } + Args: + api_key (str): The API key used for authenticating API requests. + api_base (str, optional): The base URL of the API. Defaults to None. + api_type (str, optional): The type or kind of API. Defaults to None. + api_version (str, optional): The API version. Defaults to None. + + Returns: + Dict: A dictionary containing the API configurations. + """ + config = {"api_key": api_key} + if api_base: + config["api_base"] = api_base + if api_type: + config["api_type"] = api_type + if api_version: + config["api_version"] = api_version + return config + + +def config_list_from_dotenv( + dotenv_file_path: Optional[str] = None, model_api_key_map: Optional[dict] = None, filter_dict: Optional[dict] = None +) -> List[Dict[str, Union[str, Set[str]]]]: + """ + Load API configurations from a specified .env file or environment variables and construct a list of configurations. + + This function will: + - Load API keys from a provided .env file or from existing environment variables. + - Create a configuration dictionary for each model using the API keys and additional configurations. + - Filter and return the configurations based on provided filters. + + model_api_key_map will default to `{"gpt-4": "OPENAI_API_KEY", "gpt-3.5-turbo": "OPENAI_API_KEY"}` if none + + Args: + dotenv_file_path (str, optional): The path to the .env file. Defaults to None. + model_api_key_map (str/dict, optional): A dictionary mapping models to their API key configurations. + If a string is provided as configuration, it is considered as an environment + variable name storing the API key. + If a dict is provided, it should contain at least 'api_key_env_var' key, + and optionally other API configurations like 'api_base', 'api_type', and 'api_version'. + Defaults to a basic map with 'gpt-4' and 'gpt-3.5-turbo' mapped to 'OPENAI_API_KEY'. + filter_dict (dict, optional): A dictionary containing the models to be loaded. + Containing a 'model' key mapped to a set of model names to be loaded. + Defaults to None, which loads all found configurations. + + Returns: + List[Dict[str, Union[str, Set[str]]]]: A list of configuration dictionaries for each model. + + Raises: + FileNotFoundError: If the specified .env file does not exist. + TypeError: If an unsupported type of configuration is provided in model_api_key_map. + """ + if dotenv_file_path: + dotenv_path = Path(dotenv_file_path) + if dotenv_path.exists(): + load_dotenv(dotenv_path) + else: + logging.warning(f"The specified .env file {dotenv_path} does not exist.") + else: + dotenv_path = find_dotenv() + if not dotenv_path: + logging.warning("No .env file found. Loading configurations from environment variables.") + load_dotenv(dotenv_path) + + # Ensure the model_api_key_map is not None to prevent TypeErrors during key assignment. + model_api_key_map = model_api_key_map or {} + + # Ensure default models are always considered + default_models = ["gpt-4", "gpt-3.5-turbo"] + + for model in default_models: + # Only assign default API key if the model is not present in the map. + # If model is present but set to invalid/empty, do not overwrite. + if model not in model_api_key_map: + model_api_key_map[model] = "OPENAI_API_KEY" + + env_var = [] + # Loop over the models and create configuration dictionaries + for model, config in model_api_key_map.items(): + if isinstance(config, str): + api_key_env_var = config + config_dict = get_config(api_key=os.getenv(api_key_env_var)) + elif isinstance(config, dict): + api_key = os.getenv(config.get("api_key_env_var", "OPENAI_API_KEY")) + config_without_key_var = {k: v for k, v in config.items() if k != "api_key_env_var"} + config_dict = get_config(api_key=api_key, **config_without_key_var) + else: + logging.warning(f"Unsupported type {type(config)} for model {model} configuration") + + if not config_dict["api_key"] or config_dict["api_key"].strip() == "": + logging.warning( + f"API key not found or empty for model {model}. Please ensure path to .env file is correct." + ) + continue # Skip this configuration and continue with the next + + # Add model to the configuration and append to the list + config_dict["model"] = model + env_var.append(config_dict) + + fd, temp_name = tempfile.mkstemp() + try: + with os.fdopen(fd, "w+") as temp: + env_var_str = json.dumps(env_var) + temp.write(env_var_str) + temp.flush() + + # Assuming config_list_from_json is a valid function from your code + config_list = config_list_from_json(env_or_file=temp_name, filter_dict=filter_dict) + finally: + # The file is deleted after using its name (to prevent windows build from breaking) + os.remove(temp_name) + + if len(config_list) == 0: + logging.error("No configurations loaded.") + return [] + + logging.info(f"Models available: {[config['model'] for config in config_list]}") + return config_list diff --git a/notebook/oai_openai_utils.ipynb b/notebook/oai_openai_utils.ipynb new file mode 100644 index 000000000..a1162ff83 --- /dev/null +++ b/notebook/oai_openai_utils.ipynb @@ -0,0 +1,521 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# In-depth Guide to OpenAI Utility Functions\n", + "\n", + "Managing API configurations can be tricky, especially when dealing with multiple models and API versions. The provided utility functions assist users in managing these configurations effectively. Ensure your API keys and other sensitive data are stored securely. For local development, you might store keys in `.txt` or `.env` files or environment variables. Never expose your API keys publicly. If you insist on having your key files stored locally on your repo (you shouldn't), make sure the key file path is added to the `.gitignore` file.\n", + "\n", + "#### Steps:\n", + "1. Obtain API keys from OpenAI and optionally from Azure OpenAI (or other provider).\n", + "2. Store them securely using either:\n", + " - Environment Variables: `export OPENAI_API_KEY='your-key'` in your shell.\n", + " - Text File: Save the key in a `key_openai.txt` file.\n", + " - Env File: Save the key to a `.env` file eg: `OPENAI_API_KEY=sk-********************`\n", + "\n", + "---\n", + "\n", + "**TL;DR:**
\n", + "There are many ways to generate a `config_list` depending on your use case:\n", + "\n", + "- `get_config_list`: Generates configurations for API calls, primarily from provided API keys.\n", + "- `config_list_openai_aoai`: Constructs a list of configurations using both Azure OpenAI and OpenAI endpoints, sourcing API keys from environment variables or local files.\n", + "- `config_list_from_json`: Loads configurations from a JSON structure, either from an environment variable or a local JSON file, with the flexibility of filtering configurations based on given criteria.\n", + "- `config_list_from_models`: Creates configurations based on a provided list of models, useful when targeting specific models without manually specifying each configuration.\n", + "- `config_list_from_dotenv`: Constructs a configuration list from a `.env` file, offering a consolidated way to manage multiple API configurations and keys from a single file." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### What is a `config_list`?\n", + "When instantiating an assistant, such as the example below, you see that it is being passed a `config_list`. This is used to tell the `AssistantAgent` which models or configurations it has access to:\n", + "```python\n", + "\n", + "assistant = AssistantAgent(\n", + " name=\"assistant\",\n", + " llm_config={\n", + " \"request_timeout\": 600,\n", + " \"seed\": 42,\n", + " \"config_list\": config_list,\n", + " \"temperature\": 0,\n", + " },\n", + ")\n", + "```\n", + "\n", + "Consider an intelligent assistant that utilizes OpenAI's GPT models. Depending on user requests, it might need to:\n", + "\n", + "- Generate creative content (using gpt-4).\n", + "- Answer general queries (using gpt-3.5-turbo).\n", + "\n", + "Different tasks may require different models, and the `config_list` aids in dynamically selecting the appropriate model configuration, managing API keys, endpoints, and versions for efficient operation of the intelligent assistant. In summary, the `config_list` help the agents work efficiently, reliably, and optimally by managing various configurations and interactions with the OpenAI API - enhancing the adaptability and functionality of the agents." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ! pip install pyautogen" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import autogen " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## get_config_list\n", + "\n", + "Used to generate configurations for API calls." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "api_keys = [\"YOUR_OPENAI_API_KEY\"]\n", + "api_bases = None # You can specify API base URLs if needed. eg: localhost:8000\n", + "api_type = \"openai\" # Type of API, e.g., \"openai\" or \"aoai\".\n", + "api_version = None # Specify API version if needed.\n", + "\n", + "config_list = autogen.get_config_list(\n", + " api_keys,\n", + " api_bases=api_bases,\n", + " api_type=api_type,\n", + " api_version=api_version\n", + ")\n", + "\n", + "print(config_list)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## config_list_openai_aoai\n", + "\n", + "This method creates a list of configurations using Azure OpenAI endpoints and OpenAI endpoints. It tries to extract API keys and bases either from environment variables or from local text files.\n", + "\n", + "Steps:\n", + "- Store OpenAI API key in:\n", + " - Environment variable: `OPENAI_API_KEY`\n", + " - or Local file: `key_openai.txt`\n", + "- Store Azure OpenAI API key in:\n", + " - Environment variable: `AZURE_OPENAI_API_KEY`\n", + " - or Local file: `key_aoai.txt` (Supports multiple keys, one per line)\n", + "- Store Azure OpenAI API base in:\n", + " - Environment variable: `AZURE_OPENAI_API_BASE`\n", + " - or Local file: `base_aoai.txt` (Supports multiple bases, one per line)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "config_list = autogen.config_list_openai_aoai(\n", + " key_file_path=\".\",\n", + " openai_api_key_file=\"key_openai.txt\",\n", + " aoai_api_key_file=\"key_aoai.txt\",\n", + " aoai_api_base_file=\"base_aoai.txt\",\n", + " exclude=None # The API type to exclude, eg: \"openai\" or \"aoai\".\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## config_list_from_json\n", + "\n", + "This method loads configurations from an environment variable or a JSON file. It provides flexibility by allowing users to filter configurations based on certain criteria.\n", + "\n", + "Steps:\n", + "- Setup the JSON Configuration:\n", + " 1. Store configurations in an environment variable named `OAI_CONFIG_LIST` as a valid JSON string.\n", + " 2. Alternatively, save configurations in a local JSON file named `OAI_CONFIG_LIST.json`\n", + " 3. Add `OAI_CONFIG_LIST` to your `.gitignore` file on your local repository.\n", + "\n", + "Your JSON struction should look something like this:\n", + "\n", + "```json\n", + "# OAI_CONFIG_LIST file example\n", + "[\n", + " {\n", + " \"model\": \"gpt-4\",\n", + " \"api_key\": \"YOUR_OPENAI_API_KEY\"\n", + " },\n", + " {\n", + " \"model\": \"gpt-3.5-turbo\",\n", + " \"api_key\": \"YOUR_OPENAI_API_KEY\",\n", + " \"api_version\": \"2023-03-01-preview\"\n", + " }\n", + "]\n", + "\n", + "```\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "config_list = autogen.config_list_from_json(\n", + " env_or_file=\"OAI_CONFIG_LIST\", # or OAI_CONFIG_LIST.json if file extension is added\n", + " filter_dict={\n", + " \"model\": {\n", + " \"gpt-4\",\n", + " \"gpt-3.5-turbo\",\n", + " }\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### What is `filter_dict`?\n", + "\n", + "The z parameter in `autogen.config_list_from_json` function is used to selectively filter the configurations loaded from the environment variable or JSON file based on specified criteria. It allows you to define criteria to select only those configurations that match the defined conditions.\n", + "\n", + "lets say you want to config an assistant agent to only LLM type. Take the below example: even though we have \"gpt-3.5-turbo\" and \"gpt-4\" in our `OAI_CONFIG_LIST`, this agent would only be configured to use" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cheap_config_list = autogen.config_list_from_json(\n", + " env_or_file=\"OAI_CONFIG_LIST\", \n", + " filter_dict={\n", + " \"model\": {\n", + " \"gpt-3.5-turbo\",\n", + " }\n", + " }\n", + ")\n", + "\n", + "costly_config_list = autogen.config_list_from_json(\n", + " env_or_file=\"OAI_CONFIG_LIST\", \n", + " filter_dict={\n", + " \"model\": {\n", + " \"gpt-4\",\n", + " }\n", + " }\n", + ")\n", + "\n", + "# Assistant using GPT 3.5 Turbo\n", + "assistant_one = autogen.AssistantAgent(\n", + " name=\"3.5-assistant\",\n", + " llm_config={\n", + " \"request_timeout\": 600,\n", + " \"seed\": 42,\n", + " \"config_list\": cheap_config_list,\n", + " \"temperature\": 0,\n", + " },\n", + ")\n", + "\n", + "# Assistant using GPT 4\n", + "assistant_two = autogen.AssistantAgent(\n", + " name=\"4-assistant\",\n", + " llm_config={\n", + " \"request_timeout\": 600,\n", + " \"seed\": 42,\n", + " \"config_list\": costly_config_list,\n", + " \"temperature\": 0,\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With the `OAI_CONFIG_LIST` we set earlier, there isn't much to filter on. But when the complexity of a project grows and the and you're managing multiple models for various purposes, you can see how `filter_dict` can be useful. \n", + "\n", + "A more complex filtering criteria could be the following: Assuming we have a `OAI_CONFIG_LIST` several models used to create various agents - Lets say we want to load configurations for `gpt-4` using API version `\"2023-03-01-preview\"` and we want the `api_type` to be `aoai`, we can set up `filter_dict` as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "config_list = autogen.config_list_from_json(\n", + " env_or_file=\"OAI_CONFIG_LIST\",\n", + " filter_dict = {\n", + " \"model\": {\n", + " \"gpt-4\"\n", + " },\n", + " \"api_version\": {\n", + " \"2023-03-01-preview\"\n", + " },\n", + " \"api_type\": \n", + " [\"aoai\"]\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## config_list_from_models\n", + "\n", + "This method creates configurations based on a provided list of models. It's useful when you have specific models in mind and don't want to manually specify each configuration.\n", + "\n", + "Steps:\n", + "- Similar to method 1, store API keys and bases either in environment variables or `.txt` files." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "config_list = autogen.config_list_from_models(\n", + " key_file_path = \".\",\n", + " openai_api_key_file = \"key_openai.txt\",\n", + " aoai_api_key_file = \"key_aoai.txt\",\n", + " aoai_api_base_file = \"base_aoai.txt\",\n", + " exclude=\"aoai\",\n", + " model_list = None,\n", + " model_list=[\"gpt-4\", \"gpt-3.5-turbo\", \"gpt-3.5-turbo-16k\"],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## config_list_from_dotenv\n", + "\n", + "If you are interest in keeping all of your keys in a single location like a `.env` file rather than using a configuration specifically for OpenAI, you can use `config_list_from_dotenv`. The allows you to conveniently create a config list without creating a complex `OAI_CONFIG_LIST` file.\n", + "\n", + "The `model_api_key_map` parameter is a dictionary that maps model names to the environment variable names in the `.env` file where their respective API keys are stored. It allows the code to know which API key to use for each model. \n", + "\n", + "If not provided, it defaults to using `OPENAI_API_KEY` for `gpt-4` and `OPENAI_API_KEY` for `gpt-3.5-turbo`.\n", + "\n", + "```python\n", + " # default key map\n", + " model_api_key_map = {\n", + " \"gpt-4\": \"OPENAI_API_KEY\",\n", + " \"gpt-3.5-turbo\": \"OPENAI_API_KEY\",\n", + " }\n", + "```\n", + "\n", + "Here is an example `.env` file:\n", + "\n", + "```bash\n", + "OPENAI_API_KEY=sk-*********************\n", + "HUGGING_FACE_API_KEY=**************************\n", + "ANOTHER_API_KEY=1234567890234567890\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'api_key': 'sk-*********************', 'model': 'gpt-4'},\n", + " {'api_key': 'sk-*********************', 'model': 'gpt-3.5-turbo'}]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import autogen\n", + "\n", + "config_list = autogen.config_list_from_dotenv(\n", + " dotenv_file_path='.env', # If None the function will try find in the working directory\n", + " filter_dict={\n", + " \"model\": {\n", + " \"gpt-4\",\n", + " \"gpt-3.5-turbo\",\n", + " }\n", + " }\n", + ")\n", + "\n", + "config_list" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'api_key': '1234567890234567890', 'model': 'gpt-4'},\n", + " {'api_key': 'sk-*********************', 'model': 'gpt-3.5-turbo'}]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# gpt-3.5-turbo will default to OPENAI_API_KEY\n", + "config_list = autogen.config_list_from_dotenv(\n", + " dotenv_file_path='.env', # If None the function will try find in the working directory\n", + " model_api_key_map={\n", + " \"gpt-4\": \"ANOTHER_API_KEY\", # String or dict accepted\n", + " },\n", + " filter_dict={\n", + " \"model\": {\n", + " \"gpt-4\",\n", + " \"gpt-3.5-turbo\",\n", + " }\n", + " }\n", + ")\n", + "\n", + "config_list" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'api_key': 'sk-*********************', 'model': 'gpt-4'},\n", + " {'api_key': '**************************', 'model': 'vicuna'}]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# example using different environment variable names\n", + "config_list = autogen.config_list_from_dotenv(\n", + " dotenv_file_path='.env',\n", + " model_api_key_map={\n", + " \"gpt-4\": \"OPENAI_API_KEY\",\n", + " \"vicuna\": \"HUGGING_FACE_API_KEY\",\n", + " },\n", + " filter_dict={\n", + " \"model\": {\n", + " \"gpt-4\",\n", + " \"vicuna\",\n", + " }\n", + " }\n", + ")\n", + "\n", + "config_list" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also provide additional configurations for APIs, simply by replacing the string value with a dictionary expanding on the configurations. See the example below showing the example of using `gpt-4` on `openai` by default, and using `gpt-3.5-turbo` with additional configurations for `aoai`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'api_key': 'sk-*********************', 'model': 'gpt-4'},\n", + " {'api_key': '1234567890234567890',\n", + " 'api_base': 'https://api.someotherapi.com',\n", + " 'api_type': 'aoai',\n", + " 'api_version': 'v2',\n", + " 'model': 'gpt-3.5-turbo'}]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config_list = autogen.config_list_from_dotenv(\n", + " dotenv_file_path='.env',\n", + " model_api_key_map={\n", + " \"gpt-4\": \"OPENAI_API_KEY\",\n", + " \"gpt-3.5-turbo\": {\n", + " \"api_key_env_var\": \"ANOTHER_API_KEY\",\n", + " \"api_type\": \"aoai\",\n", + " \"api_version\": \"v2\",\n", + " \"api_base\": \"https://api.someotherapi.com\"\n", + " }\n", + " },\n", + " filter_dict={\n", + " \"model\": {\n", + " \"gpt-4\",\n", + " \"gpt-3.5-turbo\",\n", + " }\n", + " }\n", + ")\n", + "\n", + "config_list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "masterclass", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/setup.py b/setup.py index b9ec7af33..37c9d2d88 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,7 @@ install_requires = [ "diskcache", "termcolor", "flaml", + "python-dotenv", ] diff --git a/test/oai/test_utils.py b/test/oai/test_utils.py index 685bcf904..8813ff0d7 100644 --- a/test/oai/test_utils.py +++ b/test/oai/test_utils.py @@ -1,31 +1,162 @@ -import json import os -import autogen +import sys +import json +import pytest +import logging +import tempfile +from unittest import mock from test_completion import KEY_LOC, OAI_CONFIG_LIST +sys.path.append("../../autogen") +import autogen # noqa: E402 + +# Example environment variables +ENV_VARS = { + "OPENAI_API_KEY": "sk-********************", + "HUGGING_FACE_API_KEY": "**************************", + "ANOTHER_API_KEY": "1234567890234567890", +} + +# Example model to API key mappings +MODEL_API_KEY_MAP = { + "gpt-4": "OPENAI_API_KEY", + "gpt-3.5-turbo": { + "api_key_env_var": "ANOTHER_API_KEY", + "api_type": "aoai", + "api_version": "v2", + "api_base": "https://api.someotherapi.com", + }, +} + +# Example filter dictionary +FILTER_DICT = { + "model": { + "gpt-4", + "gpt-3.5-turbo", + } +} + + +@pytest.fixture +def mock_os_environ(): + with mock.patch.dict(os.environ, ENV_VARS): + yield + def test_config_list_from_json(): + # Test the functionality for loading configurations from JSON file + # and ensuring that the loaded configurations are as expected. config_list = autogen.config_list_gpt4_gpt35(key_file_path=KEY_LOC) json_file = os.path.join(KEY_LOC, "config_list_test.json") + with open(json_file, "w") as f: json.dump(config_list, f, indent=4) + config_list_1 = autogen.config_list_from_json(json_file) assert config_list == config_list_1 + os.environ["config_list_test"] = json.dumps(config_list) config_list_2 = autogen.config_list_from_json("config_list_test") assert config_list == config_list_2 + config_list_3 = autogen.config_list_from_json( OAI_CONFIG_LIST, file_location=KEY_LOC, filter_dict={"model": ["gpt4", "gpt-4-32k"]} ) assert all(config.get("model") in ["gpt4", "gpt-4-32k"] for config in config_list_3) + del os.environ["config_list_test"] os.remove(json_file) def test_config_list_openai_aoai(): + # Testing the functionality for loading configurations for different API types + # and ensuring the API types in the loaded configurations are as expected. config_list = autogen.config_list_openai_aoai(key_file_path=KEY_LOC) assert all(config.get("api_type") in [None, "open_ai", "azure"] for config in config_list) +def test_config_list_from_dotenv(mock_os_environ, caplog): + # Test with valid .env file + fd, temp_name = tempfile.mkstemp() + try: + with os.fdopen(fd, "w+") as temp: + temp.write("\n".join([f"{k}={v}" for k, v in ENV_VARS.items()])) + temp.flush() + # Use the updated config_list_from_dotenv function + config_list = autogen.config_list_from_dotenv(dotenv_file_path=temp_name) + + # Ensure configurations are loaded and API keys match expected values + assert config_list, "Config list is empty with default API keys" + + # Check that configurations only include models specified in the filter + for config in config_list: + assert config["model"] in FILTER_DICT["model"], f"Model {config['model']} not in filter" + + # Check the default API key for gpt-4 and gpt-3.5-turbo when model_api_key_map is None + config_list = autogen.config_list_from_dotenv(dotenv_file_path=temp_name, model_api_key_map=None) + + expected_api_key = os.getenv("OPENAI_API_KEY") + assert any( + config["model"] == "gpt-4" and config["api_key"] == expected_api_key for config in config_list + ), "Default gpt-4 configuration not found or incorrect" + assert any( + config["model"] == "gpt-3.5-turbo" and config["api_key"] == expected_api_key for config in config_list + ), "Default gpt-3.5-turbo configuration not found or incorrect" + finally: + os.remove(temp_name) # The file is deleted after using its name (to prevent windows build from breaking) + + # Test with missing dotenv file + with caplog.at_level(logging.WARNING): + config_list = autogen.config_list_from_dotenv(dotenv_file_path="non_existent_path") + assert "The specified .env file non_existent_path does not exist." in caplog.text + + # Test with invalid API key + ENV_VARS["ANOTHER_API_KEY"] = "" # Removing ANOTHER_API_KEY value + + with caplog.at_level(logging.WARNING): + config_list = autogen.config_list_from_dotenv() + assert "No .env file found. Loading configurations from environment variables." in caplog.text + # The function does not return an empty list if at least one configuration is loaded successfully + assert config_list != [], "Config list is empty" + + # Test with no configurations loaded + invalid_model_api_key_map = { + "gpt-4": "INVALID_API_KEY", # Simulate an environment var name that doesn't exist + } + with caplog.at_level(logging.ERROR): + # Mocking `config_list_from_json` to return an empty list and raise an exception when called + with mock.patch("autogen.config_list_from_json", return_value=[], side_effect=Exception("Mock called")): + # Call the function with the invalid map + config_list = autogen.config_list_from_dotenv( + model_api_key_map=invalid_model_api_key_map, + filter_dict={ + "model": { + "gpt-4", + } + }, + ) + + # Assert that the configuration list is empty + assert not config_list, "Expected no configurations to be loaded" + + # test for mixed validity in the keymap + invalid_model_api_key_map = { + "gpt-4": "INVALID_API_KEY", + "gpt-3.5-turbo": "ANOTHER_API_KEY", # valid according to the example configs + } + + with caplog.at_level(logging.WARNING): + # Call the function with the mixed validity map + config_list = autogen.config_list_from_dotenv(model_api_key_map=invalid_model_api_key_map) + assert config_list, "Expected configurations to be loaded" + assert any( + config["model"] == "gpt-3.5-turbo" for config in config_list + ), "gpt-3.5-turbo configuration not found" + assert all( + config["model"] != "gpt-4" for config in config_list + ), "gpt-4 configuration found, but was not expected" + assert "API key not found or empty for model gpt-4" in caplog.text + + if __name__ == "__main__": - test_config_list_from_json() + pytest.main()