autogen/test/oai/test_utils.py
Gunnar Kudrjavets f68c09b035
Validate the OpenAI API key format (#1635)
* Validate the OpenAI API key format

Increase the amount of internal validation for OpenAI API keys. The intent is
to shorten the debugging loop in case of typos. The changes do *not* add
validation for Azure OpenAI API keys.

* Add the validation in `__init__` of `OpenAIClient`.

* Introduce the `MOCK_OPEN_AI_API_KEY` constant for testing.

*  Add unit test coverage for the `is_valid_api_key` function.

* Validate the OpenAI API key format

Increase the amount of internal validation for OpenAI API keys. The intent is
to shorten the debugging loop in case of typos. The changes do *not* add
validation for Azure OpenAI API keys.

* Add the validation in `__init__` of `OpenAIClient`.

* Introduce the `MOCK_OPEN_AI_API_KEY` constant for testing.

*Add unit test coverage for the `is_valid_api_key` function.

* Log a warning when register a default client fails.

* Validate the OpenAI API key format

Increase the amount of internal validation for OpenAI API keys. The intent is
to shorten the debugging loop in case of typos. The changes do *not* add
validation for Azure OpenAI API keys.

* Add the validation in `__init__` of `OpenAIClient`. We'll log a
  warning when the OpenAI API key isn't valid.

* Introduce the `MOCK_OPEN_AI_API_KEY` constant for testing.

* Add unit test coverage for the `is_valid_api_key` function.

* Check for OpenAI base_url before API key validation

---------

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
2024-02-14 18:51:38 +00:00

389 lines
15 KiB
Python

import json
import logging
import os
import tempfile
from unittest import mock
from unittest.mock import patch
import pytest
import autogen # noqa: E402
from autogen.oai.openai_utils import DEFAULT_AZURE_API_VERSION, filter_config, is_valid_api_key
from conftest import MOCK_OPEN_AI_API_KEY
# Example environment variables
ENV_VARS = {
"OPENAI_API_KEY": "sk-********************",
"HUGGING_FACE_API_KEY": "**************************",
"ANOTHER_API_KEY": "1234567890234567890",
}
# Example model to API key mappings
MODEL_API_KEY_MAP = {
"gpt-4": "OPENAI_API_KEY",
"gpt-3.5-turbo": {
"api_key_env_var": "ANOTHER_API_KEY",
"api_type": "aoai",
"api_version": "v2",
"base_url": "https://api.someotherapi.com",
},
}
# Example filter dictionary
FILTER_DICT = {
"model": {
"gpt-4",
"gpt-3.5-turbo",
}
}
JSON_SAMPLE = """
[
{
"model": "gpt-3.5-turbo",
"api_type": "openai"
},
{
"model": "gpt-4",
"api_type": "openai"
},
{
"model": "gpt-35-turbo-v0301",
"tags": ["gpt-3.5-turbo", "gpt35_turbo"],
"api_key": "111113fc7e8a46419bfac511bb301111",
"base_url": "https://1111.openai.azure.com",
"api_type": "azure",
"api_version": "2023-07-01-preview"
},
{
"model": "gpt",
"api_key": "not-needed",
"base_url": "http://localhost:1234/v1"
}
]
"""
@pytest.fixture
def mock_os_environ():
with mock.patch.dict(os.environ, ENV_VARS):
yield
def test_config_list_from_json():
with tempfile.NamedTemporaryFile(mode="w+", delete=False) as tmp_file:
json_data = json.loads(JSON_SAMPLE)
tmp_file.write(JSON_SAMPLE)
tmp_file.flush()
config_list = autogen.config_list_from_json(tmp_file.name)
assert len(config_list) == len(json_data)
i = 0
for config in config_list:
assert isinstance(config, dict)
for key in config:
assert key in json_data[i]
assert config[key] == json_data[i][key]
i += 1
os.environ["config_list_test"] = JSON_SAMPLE
config_list_2 = autogen.config_list_from_json("config_list_test")
assert config_list == config_list_2
# Test: the env variable is set to a file path with folder name inside.
config_list_3 = autogen.config_list_from_json(
tmp_file.name, filter_dict={"model": ["gpt", "gpt-4", "gpt-4-32k"]}
)
assert all(config.get("model") in ["gpt-4", "gpt"] for config in config_list_3)
del os.environ["config_list_test"]
# Test: using the `file_location` parameter.
config_list_4 = autogen.config_list_from_json(
os.path.basename(tmp_file.name),
file_location=os.path.dirname(tmp_file.name),
filter_dict={"model": ["gpt4", "gpt-4-32k"]},
)
assert all(config.get("model") in ["gpt4", "gpt-4-32k"] for config in config_list_4)
# Test: the env variable is set to a file path.
fd, temp_name = tempfile.mkstemp()
json.dump(config_list, os.fdopen(fd, "w+"), indent=4)
os.environ["config_list_test"] = temp_name
config_list_5 = autogen.config_list_from_json("config_list_test")
assert config_list_5 == config_list_2
del os.environ["config_list_test"]
# Test that an error is thrown when the config list is missing
with pytest.raises(FileNotFoundError):
autogen.config_list_from_json("OAI_CONFIG_LIST.missing")
def test_config_list_openai_aoai():
# Testing the functionality for loading configurations for different API types
# and ensuring the API types in the loaded configurations are as expected.
with tempfile.TemporaryDirectory() as temp_dir:
# Create temporary files with sample data for keys and base URLs
openai_key_file = os.path.join(temp_dir, "key_openai.txt")
aoai_key_file = os.path.join(temp_dir, "key_aoai.txt")
openai_base_file = os.path.join(temp_dir, "base_openai.txt")
aoai_base_file = os.path.join(temp_dir, "base_aoai.txt")
# Write sample data to the temporary files
with open(openai_key_file, "w") as f:
f.write("sk-testkeyopenai123\nsk-testkeyopenai456")
with open(aoai_key_file, "w") as f:
f.write("sk-testkeyaoai456")
with open(openai_base_file, "w") as f:
f.write("https://api.openai.com/v1\nhttps://api.openai.com/v1")
with open(aoai_base_file, "w") as f:
f.write("https://api.azure.com/v1")
# Pass the temporary directory as a parameter to the function
config_list = autogen.config_list_openai_aoai(key_file_path=temp_dir)
assert len(config_list) == 3
expected_config_list = [
{"api_key": "sk-testkeyopenai123", "base_url": "https://api.openai.com/v1"},
{"api_key": "sk-testkeyopenai456", "base_url": "https://api.openai.com/v1"},
{
"api_key": "sk-testkeyaoai456",
"base_url": "https://api.azure.com/v1",
"api_type": "azure",
"api_version": DEFAULT_AZURE_API_VERSION,
},
]
assert config_list == expected_config_list
@patch(
"os.environ",
{
"OPENAI_API_KEY": "test_openai_key",
"OPENAI_API_BASE": "https://api.openai.com",
"AZURE_OPENAI_API_KEY": "test_aoai_key",
"AZURE_OPENAI_API_BASE": "https://api.azure.com",
},
)
def test_config_list_openai_aoai_env_vars():
# Test the config_list_openai_aoai function with environment variables set
configs = autogen.oai.openai_utils.config_list_openai_aoai(key_file_path=None)
assert len(configs) == 2
assert {"api_key": "test_openai_key", "base_url": "https://api.openai.com"} in configs
assert {
"api_key": "test_aoai_key",
"base_url": "https://api.azure.com",
"api_type": "azure",
"api_version": DEFAULT_AZURE_API_VERSION,
} in configs
@patch(
"os.environ",
{
"OPENAI_API_KEY": "test_openai_key\ntest_openai_key2",
"OPENAI_API_BASE": "https://api.openai.com\nhttps://api.openai.com/v2",
"AZURE_OPENAI_API_KEY": "test_aoai_key\ntest_aoai_key2",
"AZURE_OPENAI_API_BASE": "https://api.azure.com\nhttps://api.azure.com/v2",
},
)
def test_config_list_openai_aoai_env_vars_multi():
# Test the config_list_openai_aoai function with multiple environment variable values (new line separated)
configs = autogen.oai.openai_utils.config_list_openai_aoai()
assert len(configs) == 4
assert {"api_key": "test_openai_key", "base_url": "https://api.openai.com"} in configs
assert {"api_key": "test_openai_key2", "base_url": "https://api.openai.com/v2"} in configs
assert {
"api_key": "test_aoai_key",
"base_url": "https://api.azure.com",
"api_type": "azure",
"api_version": DEFAULT_AZURE_API_VERSION,
} in configs
assert {
"api_key": "test_aoai_key2",
"base_url": "https://api.azure.com/v2",
"api_type": "azure",
"api_version": DEFAULT_AZURE_API_VERSION,
} in configs
def test_config_list_openai_aoai_file_not_found():
with mock.patch.dict(os.environ, {}, clear=True):
config_list = autogen.config_list_openai_aoai(key_file_path="non_existent_path")
assert len(config_list) == 0
def test_config_list_from_dotenv(mock_os_environ, caplog):
# Test with valid .env file
fd, temp_name = tempfile.mkstemp()
try:
with os.fdopen(fd, "w+") as temp:
temp.write("\n".join([f"{k}={v}" for k, v in ENV_VARS.items()]))
temp.flush()
# Use the updated config_list_from_dotenv function
config_list = autogen.config_list_from_dotenv(dotenv_file_path=temp_name)
# Ensure configurations are loaded and API keys match expected values
assert config_list, "Config list is empty with default API keys"
# Check that configurations only include models specified in the filter
for config in config_list:
assert config["model"] in FILTER_DICT["model"], f"Model {config['model']} not in filter"
# Check the default API key for gpt-4 and gpt-3.5-turbo when model_api_key_map is None
config_list = autogen.config_list_from_dotenv(dotenv_file_path=temp_name, model_api_key_map=None)
expected_api_key = os.getenv("OPENAI_API_KEY")
assert any(
config["model"] == "gpt-4" and config["api_key"] == expected_api_key for config in config_list
), "Default gpt-4 configuration not found or incorrect"
assert any(
config["model"] == "gpt-3.5-turbo" and config["api_key"] == expected_api_key for config in config_list
), "Default gpt-3.5-turbo configuration not found or incorrect"
finally:
os.remove(temp_name) # The file is deleted after using its name (to prevent windows build from breaking)
# Test with missing dotenv file
with caplog.at_level(logging.WARNING):
config_list = autogen.config_list_from_dotenv(dotenv_file_path="non_existent_path")
assert "The specified .env file non_existent_path does not exist." in caplog.text
# Test with invalid API key
ENV_VARS["ANOTHER_API_KEY"] = "" # Removing ANOTHER_API_KEY value
with caplog.at_level(logging.WARNING):
config_list = autogen.config_list_from_dotenv()
assert "No .env file found. Loading configurations from environment variables." in caplog.text
# The function does not return an empty list if at least one configuration is loaded successfully
assert config_list != [], "Config list is empty"
# Test with no configurations loaded
invalid_model_api_key_map = {
"gpt-4": "INVALID_API_KEY", # Simulate an environment var name that doesn't exist
}
with caplog.at_level(logging.ERROR):
# Mocking `config_list_from_json` to return an empty list and raise an exception when called
with mock.patch("autogen.config_list_from_json", return_value=[], side_effect=Exception("Mock called")):
# Call the function with the invalid map
config_list = autogen.config_list_from_dotenv(
model_api_key_map=invalid_model_api_key_map,
filter_dict={
"model": {
"gpt-4",
}
},
)
# Assert that the configuration list is empty
assert not config_list, "Expected no configurations to be loaded"
# test for mixed validity in the keymap
invalid_model_api_key_map = {
"gpt-4": "INVALID_API_KEY",
"gpt-3.5-turbo": "ANOTHER_API_KEY", # valid according to the example configs
}
with caplog.at_level(logging.WARNING):
# Call the function with the mixed validity map
config_list = autogen.config_list_from_dotenv(model_api_key_map=invalid_model_api_key_map)
assert config_list, "Expected configurations to be loaded"
assert any(
config["model"] == "gpt-3.5-turbo" for config in config_list
), "gpt-3.5-turbo configuration not found"
assert all(
config["model"] != "gpt-4" for config in config_list
), "gpt-4 configuration found, but was not expected"
assert "API key not found or empty for model gpt-4" in caplog.text
def test_get_config_list():
# Define a list of API keys and corresponding base URLs
api_keys = ["key1", "key2", "key3"]
base_urls = ["https://api.service1.com", "https://api.service2.com", "https://api.service3.com"]
api_type = "openai"
api_version = "v1"
# Call the get_config_list function to get a list of configuration dictionaries
config_list = autogen.get_config_list(api_keys, base_urls, api_type, api_version)
# Check that the config_list is not empty
assert config_list, "The config_list should not be empty."
# Check that the config_list has the correct length
assert len(config_list) == len(
api_keys
), "The config_list should have the same number of items as the api_keys list."
# Check that each config in the config_list has the correct structure and data
for i, config in enumerate(config_list):
assert config["api_key"] == api_keys[i], f"The api_key for config {i} is incorrect."
assert config["base_url"] == base_urls[i], f"The base_url for config {i} is incorrect."
assert config["api_type"] == api_type, f"The api_type for config {i} is incorrect."
assert config["api_version"] == api_version, f"The api_version for config {i} is incorrect."
# Test with mismatched lengths of api_keys and base_urls
with pytest.raises(AssertionError) as exc_info:
autogen.get_config_list(api_keys, base_urls[:2], api_type, api_version)
assert str(exc_info.value) == "The length of api_keys must match the length of base_urls"
# Test with empty api_keys
with pytest.raises(AssertionError) as exc_info:
autogen.get_config_list([], base_urls, api_type, api_version)
assert str(exc_info.value) == "The length of api_keys must match the length of base_urls"
# Test with None base_urls
config_list_without_base = autogen.get_config_list(api_keys, None, api_type, api_version)
assert all(
"base_url" not in config for config in config_list_without_base
), "The configs should not have base_url when None is provided."
# Test with empty string in api_keys
api_keys_with_empty = ["key1", "", "key3"]
config_list_with_empty_key = autogen.get_config_list(api_keys_with_empty, base_urls, api_type, api_version)
assert len(config_list_with_empty_key) == 2, "The config_list should exclude configurations with empty api_keys."
def test_tags():
config_list = json.loads(JSON_SAMPLE)
target_list = filter_config(config_list, {"model": ["gpt-35-turbo-v0301"]})
assert len(target_list) == 1
list_1 = filter_config(config_list, {"tags": ["gpt35_turbo"]})
assert len(list_1) == 1
assert list_1[0] == target_list[0]
list_2 = filter_config(config_list, {"tags": ["gpt-3.5-turbo"]})
assert len(list_2) == 1
assert list_2[0] == target_list[0]
list_3 = filter_config(config_list, {"tags": ["gpt-3.5-turbo", "gpt35_turbo"]})
assert len(list_3) == 1
assert list_3[0] == target_list[0]
# Will still match because there's a non-empty intersection
list_4 = filter_config(config_list, {"tags": ["gpt-3.5-turbo", "does_not_exist"]})
assert len(list_4) == 1
assert list_4[0] == target_list[0]
list_5 = filter_config(config_list, {"tags": ["does_not_exist"]})
assert len(list_5) == 0
def test_is_valid_api_key():
assert not is_valid_api_key("")
assert not is_valid_api_key("sk-")
assert not is_valid_api_key("SK-")
assert not is_valid_api_key("sk-asajsdjsd2")
assert not is_valid_api_key("FooBar")
assert not is_valid_api_key("sk-asajsdjsd22372%23kjdfdfdf2329ffUUDSDS")
assert is_valid_api_key("sk-asajsdjsd22372X23kjdfdfdf2329ffUUDSDS")
assert is_valid_api_key("sk-asajsdjsd22372X23kjdfdfdf2329ffUUDSDS1212121221212sssXX")
assert is_valid_api_key(MOCK_OPEN_AI_API_KEY)
if __name__ == "__main__":
pytest.main()