make retry_time configurable, add doc (#53)

* make retry_time configurable, add doc

* in seconds

* retry_wait_time

* bump version to 0.1.4

* remove .json

* rename

* time
This commit is contained in:
Chi Wang 2023-09-30 09:21:07 -07:00 committed by GitHub
parent d802b7ae04
commit 904b293aa4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 40 additions and 23 deletions

View File

@ -105,9 +105,9 @@ class Completion(openai_Completion):
seed = 41 seed = 41
cache_path = f".cache/{seed}" cache_path = f".cache/{seed}"
# retry after this many seconds # retry after this many seconds
retry_time = 10 retry_wait_time = 10
# fail a request after hitting RateLimitError for this many seconds # fail a request after hitting RateLimitError for this many seconds
retry_timeout = 120 max_retry_period = 120
# time out for request to openai server # time out for request to openai server
request_timeout = 60 request_timeout = 60
@ -181,7 +181,7 @@ class Completion(openai_Completion):
def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_cache=True): def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_cache=True):
"""Get the response from the openai api call. """Get the response from the openai api call.
Try cache first. If not found, call the openai api. If the api call fails, retry after retry_time. Try cache first. If not found, call the openai api. If the api call fails, retry after retry_wait_time.
""" """
config = config.copy() config = config.copy()
openai.api_key_path = config.pop("api_key_path", openai.api_key_path) openai.api_key_path = config.pop("api_key_path", openai.api_key_path)
@ -199,7 +199,8 @@ class Completion(openai_Completion):
) )
start_time = time.time() start_time = time.time()
request_timeout = cls.request_timeout request_timeout = cls.request_timeout
retry_timeout = config.pop("retry_timeout", cls.retry_timeout) max_retry_period = config.pop("max_retry_period", cls.max_retry_period)
retry_wait_time = config.pop("retry_wait_time", cls.retry_wait_time)
while True: while True:
try: try:
if "request_timeout" in config: if "request_timeout" in config:
@ -211,18 +212,18 @@ class Completion(openai_Completion):
APIConnectionError, APIConnectionError,
): ):
# transient error # transient error
logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1) logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1)
sleep(cls.retry_time) sleep(retry_wait_time)
except APIError as err: except APIError as err:
error_code = err and err.json_body and isinstance(err.json_body, dict) and err.json_body.get("error") error_code = err and err.json_body and isinstance(err.json_body, dict) and err.json_body.get("error")
error_code = error_code and error_code.get("code") error_code = error_code and error_code.get("code")
if error_code == "content_filter": if error_code == "content_filter":
raise raise
# transient error # transient error
logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1) logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1)
sleep(cls.retry_time) sleep(retry_wait_time)
except (RateLimitError, Timeout) as err: except (RateLimitError, Timeout) as err:
time_left = retry_timeout - (time.time() - start_time + cls.retry_time) time_left = max_retry_period - (time.time() - start_time + retry_wait_time)
if ( if (
time_left > 0 time_left > 0
and isinstance(err, RateLimitError) and isinstance(err, RateLimitError)
@ -233,8 +234,8 @@ class Completion(openai_Completion):
if isinstance(err, Timeout): if isinstance(err, Timeout):
request_timeout <<= 1 request_timeout <<= 1
request_timeout = min(request_timeout, time_left) request_timeout = min(request_timeout, time_left)
logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1) logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1)
sleep(cls.retry_time) sleep(retry_wait_time)
elif raise_on_ratelimit_or_timeout: elif raise_on_ratelimit_or_timeout:
raise raise
else: else:
@ -242,7 +243,7 @@ class Completion(openai_Completion):
if use_cache and isinstance(err, Timeout): if use_cache and isinstance(err, Timeout):
cls._cache.set(key, response) cls._cache.set(key, response)
logger.warning( logger.warning(
f"Failed to get response from openai api due to getting RateLimitError or Timeout for {retry_timeout} seconds." f"Failed to get response from openai api due to getting RateLimitError or Timeout for {max_retry_period} seconds."
) )
return response return response
except InvalidRequestError: except InvalidRequestError:
@ -743,9 +744,11 @@ class Completion(openai_Completion):
When set to False, -1 will be returned when all configs fail. When set to False, -1 will be returned when all configs fail.
allow_format_str_template (bool, Optional): Whether to allow format string template in the config. allow_format_str_template (bool, Optional): Whether to allow format string template in the config.
**config: Configuration for the openai API call. This is used as parameters for calling openai API. **config: Configuration for the openai API call. This is used as parameters for calling openai API.
Besides the parameters for the openai API call, it can also contain a seed (int) for the cache. The "prompt" or "messages" parameter can contain a template (str or Callable) which will be instantiated with the context.
This is useful when implementing "controlled randomness" for the completion. Besides the parameters for the openai API call, it can also contain:
Also, the "prompt" or "messages" parameter can contain a template (str or Callable) which will be instantiated with the context. - `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests.
- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
- `seed` (int) for the cache. This is useful when implementing "controlled randomness" for the completion.
Returns: Returns:
Responses from OpenAI API, with additional fields. Responses from OpenAI API, with additional fields.
@ -763,9 +766,9 @@ class Completion(openai_Completion):
base_config = config.copy() base_config = config.copy()
base_config["allow_format_str_template"] = allow_format_str_template base_config["allow_format_str_template"] = allow_format_str_template
base_config.update(each_config) base_config.update(each_config)
if i < last and filter_func is None and "retry_timeout" not in base_config: if i < last and filter_func is None and "max_retry_period" not in base_config:
# retry_timeout = 0 to avoid retrying when no filter is given # max_retry_period = 0 to avoid retrying when no filter is given
base_config["retry_timeout"] = 0 base_config["max_retry_period"] = 0
try: try:
response = cls.create( response = cls.create(
context, context,
@ -1103,7 +1106,7 @@ class Completion(openai_Completion):
class ChatCompletion(Completion): class ChatCompletion(Completion):
"""A class for OpenAI API ChatCompletion.""" """A class for OpenAI API ChatCompletion. Share the same API as Completion."""
default_search_space = Completion.default_search_space.copy() default_search_space = Completion.default_search_space.copy()
default_search_space["model"] = tune.choice(["gpt-3.5-turbo", "gpt-4"]) default_search_space["model"] = tune.choice(["gpt-3.5-turbo", "gpt-4"])

View File

@ -1 +1 @@
__version__ = "0.1.3" __version__ = "0.1.4"

View File

@ -227,7 +227,7 @@ def test_humaneval(num_samples=1):
config_list=autogen.config_list_from_models(KEY_LOC, model_list=["gpt-3.5-turbo"]), config_list=autogen.config_list_from_models(KEY_LOC, model_list=["gpt-3.5-turbo"]),
prompt="", prompt="",
max_tokens=1, max_tokens=1,
retry_timeout=0, max_retry_period=0,
raise_on_ratelimit_or_timeout=False, raise_on_ratelimit_or_timeout=False,
) )
# assert response == -1 # assert response == -1

View File

@ -2,7 +2,7 @@ from autogen import AssistantAgent, UserProxyAgent, config_list_from_json
# Load LLM inference endpoints from an env variable or a file # Load LLM inference endpoints from an env variable or a file
# See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints
# and OAI_CONFIG_LIST_sample.json # and OAI_CONFIG_LIST_sample
config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST") config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST")
assistant = AssistantAgent("assistant", llm_config={"config_list": config_list}) assistant = AssistantAgent("assistant", llm_config={"config_list": config_list})
user_proxy = UserProxyAgent("user_proxy", code_execution_config={"work_dir": "coding"}) user_proxy = UserProxyAgent("user_proxy", code_execution_config={"work_dir": "coding"})

View File

@ -99,3 +99,13 @@ You can also explicitly specify that by:
```python ```python
assistant = autogen.AssistantAgent(name="assistant", llm_config={"api_key": ...}) assistant = autogen.AssistantAgent(name="assistant", llm_config={"api_key": ...})
``` ```
## Handle Rate Limit Error and Timeout Error
You can set `retry_wait_time` and `max_retry_period` to handle rate limit error. And you can set `request_timeout` to handle timeout error. They can all be specified in `llm_config` for an agent, which will be used in the [`create`](/docs/reference/oai/completion#create) function for LLM inference.
- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
- `max_retry_period` (int): the total timeout (in seconds) allowed for retrying failed requests.
- `request_timeout` (int): the timeout (in seconds) sent with a single request.
Please refer to the [documentation](/docs/Use-Cases/enhanced_inference#runtime-error) for more info.

View File

@ -123,7 +123,11 @@ API call results are cached locally and reused when the same request is issued.
### Runtime error ### Runtime error
It is easy to hit error when calling OpenAI APIs, due to connection, rate limit, or timeout. Some of the errors are transient. `autogen.Completion.create` deals with the transient errors and retries automatically. Initial request timeout, retry timeout and retry time interval can be configured via `request_timeout`, `retry_timeout` and `autogen.Completion.retry_time`. It is easy to hit error when calling OpenAI APIs, due to connection, rate limit, or timeout. Some of the errors are transient. `autogen.Completion.create` deals with the transient errors and retries automatically. Request timeout, max retry period and retry wait time can be configured via `request_timeout`, `max_retry_period` and `retry_wait_time`.
- `request_timeout` (int): the timeout (in seconds) sent with a single request.
- `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests.
- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
Moreover, one can pass a list of configurations of different models/endpoints to mitigate the rate limits. For example, Moreover, one can pass a list of configurations of different models/endpoints to mitigate the rate limits. For example,