mirror of
https://github.com/microsoft/autogen.git
synced 2025-11-05 20:34:19 +00:00
make retry_time configurable, add doc (#53)
* make retry_time configurable, add doc * in seconds * retry_wait_time * bump version to 0.1.4 * remove .json * rename * time
This commit is contained in:
parent
d802b7ae04
commit
904b293aa4
@ -105,9 +105,9 @@ class Completion(openai_Completion):
|
|||||||
seed = 41
|
seed = 41
|
||||||
cache_path = f".cache/{seed}"
|
cache_path = f".cache/{seed}"
|
||||||
# retry after this many seconds
|
# retry after this many seconds
|
||||||
retry_time = 10
|
retry_wait_time = 10
|
||||||
# fail a request after hitting RateLimitError for this many seconds
|
# fail a request after hitting RateLimitError for this many seconds
|
||||||
retry_timeout = 120
|
max_retry_period = 120
|
||||||
# time out for request to openai server
|
# time out for request to openai server
|
||||||
request_timeout = 60
|
request_timeout = 60
|
||||||
|
|
||||||
@ -181,7 +181,7 @@ class Completion(openai_Completion):
|
|||||||
def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_cache=True):
|
def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_cache=True):
|
||||||
"""Get the response from the openai api call.
|
"""Get the response from the openai api call.
|
||||||
|
|
||||||
Try cache first. If not found, call the openai api. If the api call fails, retry after retry_time.
|
Try cache first. If not found, call the openai api. If the api call fails, retry after retry_wait_time.
|
||||||
"""
|
"""
|
||||||
config = config.copy()
|
config = config.copy()
|
||||||
openai.api_key_path = config.pop("api_key_path", openai.api_key_path)
|
openai.api_key_path = config.pop("api_key_path", openai.api_key_path)
|
||||||
@ -199,7 +199,8 @@ class Completion(openai_Completion):
|
|||||||
)
|
)
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
request_timeout = cls.request_timeout
|
request_timeout = cls.request_timeout
|
||||||
retry_timeout = config.pop("retry_timeout", cls.retry_timeout)
|
max_retry_period = config.pop("max_retry_period", cls.max_retry_period)
|
||||||
|
retry_wait_time = config.pop("retry_wait_time", cls.retry_wait_time)
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
if "request_timeout" in config:
|
if "request_timeout" in config:
|
||||||
@ -211,18 +212,18 @@ class Completion(openai_Completion):
|
|||||||
APIConnectionError,
|
APIConnectionError,
|
||||||
):
|
):
|
||||||
# transient error
|
# transient error
|
||||||
logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1)
|
logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1)
|
||||||
sleep(cls.retry_time)
|
sleep(retry_wait_time)
|
||||||
except APIError as err:
|
except APIError as err:
|
||||||
error_code = err and err.json_body and isinstance(err.json_body, dict) and err.json_body.get("error")
|
error_code = err and err.json_body and isinstance(err.json_body, dict) and err.json_body.get("error")
|
||||||
error_code = error_code and error_code.get("code")
|
error_code = error_code and error_code.get("code")
|
||||||
if error_code == "content_filter":
|
if error_code == "content_filter":
|
||||||
raise
|
raise
|
||||||
# transient error
|
# transient error
|
||||||
logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1)
|
logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1)
|
||||||
sleep(cls.retry_time)
|
sleep(retry_wait_time)
|
||||||
except (RateLimitError, Timeout) as err:
|
except (RateLimitError, Timeout) as err:
|
||||||
time_left = retry_timeout - (time.time() - start_time + cls.retry_time)
|
time_left = max_retry_period - (time.time() - start_time + retry_wait_time)
|
||||||
if (
|
if (
|
||||||
time_left > 0
|
time_left > 0
|
||||||
and isinstance(err, RateLimitError)
|
and isinstance(err, RateLimitError)
|
||||||
@ -233,8 +234,8 @@ class Completion(openai_Completion):
|
|||||||
if isinstance(err, Timeout):
|
if isinstance(err, Timeout):
|
||||||
request_timeout <<= 1
|
request_timeout <<= 1
|
||||||
request_timeout = min(request_timeout, time_left)
|
request_timeout = min(request_timeout, time_left)
|
||||||
logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1)
|
logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1)
|
||||||
sleep(cls.retry_time)
|
sleep(retry_wait_time)
|
||||||
elif raise_on_ratelimit_or_timeout:
|
elif raise_on_ratelimit_or_timeout:
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
@ -242,7 +243,7 @@ class Completion(openai_Completion):
|
|||||||
if use_cache and isinstance(err, Timeout):
|
if use_cache and isinstance(err, Timeout):
|
||||||
cls._cache.set(key, response)
|
cls._cache.set(key, response)
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Failed to get response from openai api due to getting RateLimitError or Timeout for {retry_timeout} seconds."
|
f"Failed to get response from openai api due to getting RateLimitError or Timeout for {max_retry_period} seconds."
|
||||||
)
|
)
|
||||||
return response
|
return response
|
||||||
except InvalidRequestError:
|
except InvalidRequestError:
|
||||||
@ -743,9 +744,11 @@ class Completion(openai_Completion):
|
|||||||
When set to False, -1 will be returned when all configs fail.
|
When set to False, -1 will be returned when all configs fail.
|
||||||
allow_format_str_template (bool, Optional): Whether to allow format string template in the config.
|
allow_format_str_template (bool, Optional): Whether to allow format string template in the config.
|
||||||
**config: Configuration for the openai API call. This is used as parameters for calling openai API.
|
**config: Configuration for the openai API call. This is used as parameters for calling openai API.
|
||||||
Besides the parameters for the openai API call, it can also contain a seed (int) for the cache.
|
The "prompt" or "messages" parameter can contain a template (str or Callable) which will be instantiated with the context.
|
||||||
This is useful when implementing "controlled randomness" for the completion.
|
Besides the parameters for the openai API call, it can also contain:
|
||||||
Also, the "prompt" or "messages" parameter can contain a template (str or Callable) which will be instantiated with the context.
|
- `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests.
|
||||||
|
- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
|
||||||
|
- `seed` (int) for the cache. This is useful when implementing "controlled randomness" for the completion.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Responses from OpenAI API, with additional fields.
|
Responses from OpenAI API, with additional fields.
|
||||||
@ -763,9 +766,9 @@ class Completion(openai_Completion):
|
|||||||
base_config = config.copy()
|
base_config = config.copy()
|
||||||
base_config["allow_format_str_template"] = allow_format_str_template
|
base_config["allow_format_str_template"] = allow_format_str_template
|
||||||
base_config.update(each_config)
|
base_config.update(each_config)
|
||||||
if i < last and filter_func is None and "retry_timeout" not in base_config:
|
if i < last and filter_func is None and "max_retry_period" not in base_config:
|
||||||
# retry_timeout = 0 to avoid retrying when no filter is given
|
# max_retry_period = 0 to avoid retrying when no filter is given
|
||||||
base_config["retry_timeout"] = 0
|
base_config["max_retry_period"] = 0
|
||||||
try:
|
try:
|
||||||
response = cls.create(
|
response = cls.create(
|
||||||
context,
|
context,
|
||||||
@ -1103,7 +1106,7 @@ class Completion(openai_Completion):
|
|||||||
|
|
||||||
|
|
||||||
class ChatCompletion(Completion):
|
class ChatCompletion(Completion):
|
||||||
"""A class for OpenAI API ChatCompletion."""
|
"""A class for OpenAI API ChatCompletion. Share the same API as Completion."""
|
||||||
|
|
||||||
default_search_space = Completion.default_search_space.copy()
|
default_search_space = Completion.default_search_space.copy()
|
||||||
default_search_space["model"] = tune.choice(["gpt-3.5-turbo", "gpt-4"])
|
default_search_space["model"] = tune.choice(["gpt-3.5-turbo", "gpt-4"])
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
__version__ = "0.1.3"
|
__version__ = "0.1.4"
|
||||||
|
|||||||
@ -227,7 +227,7 @@ def test_humaneval(num_samples=1):
|
|||||||
config_list=autogen.config_list_from_models(KEY_LOC, model_list=["gpt-3.5-turbo"]),
|
config_list=autogen.config_list_from_models(KEY_LOC, model_list=["gpt-3.5-turbo"]),
|
||||||
prompt="",
|
prompt="",
|
||||||
max_tokens=1,
|
max_tokens=1,
|
||||||
retry_timeout=0,
|
max_retry_period=0,
|
||||||
raise_on_ratelimit_or_timeout=False,
|
raise_on_ratelimit_or_timeout=False,
|
||||||
)
|
)
|
||||||
# assert response == -1
|
# assert response == -1
|
||||||
|
|||||||
@ -2,7 +2,7 @@ from autogen import AssistantAgent, UserProxyAgent, config_list_from_json
|
|||||||
|
|
||||||
# Load LLM inference endpoints from an env variable or a file
|
# Load LLM inference endpoints from an env variable or a file
|
||||||
# See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints
|
# See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints
|
||||||
# and OAI_CONFIG_LIST_sample.json
|
# and OAI_CONFIG_LIST_sample
|
||||||
config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST")
|
config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST")
|
||||||
assistant = AssistantAgent("assistant", llm_config={"config_list": config_list})
|
assistant = AssistantAgent("assistant", llm_config={"config_list": config_list})
|
||||||
user_proxy = UserProxyAgent("user_proxy", code_execution_config={"work_dir": "coding"})
|
user_proxy = UserProxyAgent("user_proxy", code_execution_config={"work_dir": "coding"})
|
||||||
|
|||||||
@ -99,3 +99,13 @@ You can also explicitly specify that by:
|
|||||||
```python
|
```python
|
||||||
assistant = autogen.AssistantAgent(name="assistant", llm_config={"api_key": ...})
|
assistant = autogen.AssistantAgent(name="assistant", llm_config={"api_key": ...})
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Handle Rate Limit Error and Timeout Error
|
||||||
|
|
||||||
|
You can set `retry_wait_time` and `max_retry_period` to handle rate limit error. And you can set `request_timeout` to handle timeout error. They can all be specified in `llm_config` for an agent, which will be used in the [`create`](/docs/reference/oai/completion#create) function for LLM inference.
|
||||||
|
|
||||||
|
- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
|
||||||
|
- `max_retry_period` (int): the total timeout (in seconds) allowed for retrying failed requests.
|
||||||
|
- `request_timeout` (int): the timeout (in seconds) sent with a single request.
|
||||||
|
|
||||||
|
Please refer to the [documentation](/docs/Use-Cases/enhanced_inference#runtime-error) for more info.
|
||||||
|
|||||||
@ -123,7 +123,11 @@ API call results are cached locally and reused when the same request is issued.
|
|||||||
|
|
||||||
### Runtime error
|
### Runtime error
|
||||||
|
|
||||||
It is easy to hit error when calling OpenAI APIs, due to connection, rate limit, or timeout. Some of the errors are transient. `autogen.Completion.create` deals with the transient errors and retries automatically. Initial request timeout, retry timeout and retry time interval can be configured via `request_timeout`, `retry_timeout` and `autogen.Completion.retry_time`.
|
It is easy to hit error when calling OpenAI APIs, due to connection, rate limit, or timeout. Some of the errors are transient. `autogen.Completion.create` deals with the transient errors and retries automatically. Request timeout, max retry period and retry wait time can be configured via `request_timeout`, `max_retry_period` and `retry_wait_time`.
|
||||||
|
|
||||||
|
- `request_timeout` (int): the timeout (in seconds) sent with a single request.
|
||||||
|
- `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests.
|
||||||
|
- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
|
||||||
|
|
||||||
Moreover, one can pass a list of configurations of different models/endpoints to mitigate the rate limits. For example,
|
Moreover, one can pass a list of configurations of different models/endpoints to mitigate the rate limits. For example,
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user