mirror of
				https://github.com/microsoft/autogen.git
				synced 2025-11-04 11:49:45 +00:00 
			
		
		
		
	make retry_time configurable, add doc (#53)
* make retry_time configurable, add doc * in seconds * retry_wait_time * bump version to 0.1.4 * remove .json * rename * time
This commit is contained in:
		
							parent
							
								
									d802b7ae04
								
							
						
					
					
						commit
						904b293aa4
					
				@ -105,9 +105,9 @@ class Completion(openai_Completion):
 | 
				
			|||||||
    seed = 41
 | 
					    seed = 41
 | 
				
			||||||
    cache_path = f".cache/{seed}"
 | 
					    cache_path = f".cache/{seed}"
 | 
				
			||||||
    # retry after this many seconds
 | 
					    # retry after this many seconds
 | 
				
			||||||
    retry_time = 10
 | 
					    retry_wait_time = 10
 | 
				
			||||||
    # fail a request after hitting RateLimitError for this many seconds
 | 
					    # fail a request after hitting RateLimitError for this many seconds
 | 
				
			||||||
    retry_timeout = 120
 | 
					    max_retry_period = 120
 | 
				
			||||||
    # time out for request to openai server
 | 
					    # time out for request to openai server
 | 
				
			||||||
    request_timeout = 60
 | 
					    request_timeout = 60
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -181,7 +181,7 @@ class Completion(openai_Completion):
 | 
				
			|||||||
    def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_cache=True):
 | 
					    def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_cache=True):
 | 
				
			||||||
        """Get the response from the openai api call.
 | 
					        """Get the response from the openai api call.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        Try cache first. If not found, call the openai api. If the api call fails, retry after retry_time.
 | 
					        Try cache first. If not found, call the openai api. If the api call fails, retry after retry_wait_time.
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        config = config.copy()
 | 
					        config = config.copy()
 | 
				
			||||||
        openai.api_key_path = config.pop("api_key_path", openai.api_key_path)
 | 
					        openai.api_key_path = config.pop("api_key_path", openai.api_key_path)
 | 
				
			||||||
@ -199,7 +199,8 @@ class Completion(openai_Completion):
 | 
				
			|||||||
        )
 | 
					        )
 | 
				
			||||||
        start_time = time.time()
 | 
					        start_time = time.time()
 | 
				
			||||||
        request_timeout = cls.request_timeout
 | 
					        request_timeout = cls.request_timeout
 | 
				
			||||||
        retry_timeout = config.pop("retry_timeout", cls.retry_timeout)
 | 
					        max_retry_period = config.pop("max_retry_period", cls.max_retry_period)
 | 
				
			||||||
 | 
					        retry_wait_time = config.pop("retry_wait_time", cls.retry_wait_time)
 | 
				
			||||||
        while True:
 | 
					        while True:
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
                if "request_timeout" in config:
 | 
					                if "request_timeout" in config:
 | 
				
			||||||
@ -211,18 +212,18 @@ class Completion(openai_Completion):
 | 
				
			|||||||
                APIConnectionError,
 | 
					                APIConnectionError,
 | 
				
			||||||
            ):
 | 
					            ):
 | 
				
			||||||
                # transient error
 | 
					                # transient error
 | 
				
			||||||
                logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1)
 | 
					                logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1)
 | 
				
			||||||
                sleep(cls.retry_time)
 | 
					                sleep(retry_wait_time)
 | 
				
			||||||
            except APIError as err:
 | 
					            except APIError as err:
 | 
				
			||||||
                error_code = err and err.json_body and isinstance(err.json_body, dict) and err.json_body.get("error")
 | 
					                error_code = err and err.json_body and isinstance(err.json_body, dict) and err.json_body.get("error")
 | 
				
			||||||
                error_code = error_code and error_code.get("code")
 | 
					                error_code = error_code and error_code.get("code")
 | 
				
			||||||
                if error_code == "content_filter":
 | 
					                if error_code == "content_filter":
 | 
				
			||||||
                    raise
 | 
					                    raise
 | 
				
			||||||
                # transient error
 | 
					                # transient error
 | 
				
			||||||
                logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1)
 | 
					                logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1)
 | 
				
			||||||
                sleep(cls.retry_time)
 | 
					                sleep(retry_wait_time)
 | 
				
			||||||
            except (RateLimitError, Timeout) as err:
 | 
					            except (RateLimitError, Timeout) as err:
 | 
				
			||||||
                time_left = retry_timeout - (time.time() - start_time + cls.retry_time)
 | 
					                time_left = max_retry_period - (time.time() - start_time + retry_wait_time)
 | 
				
			||||||
                if (
 | 
					                if (
 | 
				
			||||||
                    time_left > 0
 | 
					                    time_left > 0
 | 
				
			||||||
                    and isinstance(err, RateLimitError)
 | 
					                    and isinstance(err, RateLimitError)
 | 
				
			||||||
@ -233,8 +234,8 @@ class Completion(openai_Completion):
 | 
				
			|||||||
                    if isinstance(err, Timeout):
 | 
					                    if isinstance(err, Timeout):
 | 
				
			||||||
                        request_timeout <<= 1
 | 
					                        request_timeout <<= 1
 | 
				
			||||||
                    request_timeout = min(request_timeout, time_left)
 | 
					                    request_timeout = min(request_timeout, time_left)
 | 
				
			||||||
                    logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1)
 | 
					                    logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1)
 | 
				
			||||||
                    sleep(cls.retry_time)
 | 
					                    sleep(retry_wait_time)
 | 
				
			||||||
                elif raise_on_ratelimit_or_timeout:
 | 
					                elif raise_on_ratelimit_or_timeout:
 | 
				
			||||||
                    raise
 | 
					                    raise
 | 
				
			||||||
                else:
 | 
					                else:
 | 
				
			||||||
@ -242,7 +243,7 @@ class Completion(openai_Completion):
 | 
				
			|||||||
                    if use_cache and isinstance(err, Timeout):
 | 
					                    if use_cache and isinstance(err, Timeout):
 | 
				
			||||||
                        cls._cache.set(key, response)
 | 
					                        cls._cache.set(key, response)
 | 
				
			||||||
                    logger.warning(
 | 
					                    logger.warning(
 | 
				
			||||||
                        f"Failed to get response from openai api due to getting RateLimitError or Timeout for {retry_timeout} seconds."
 | 
					                        f"Failed to get response from openai api due to getting RateLimitError or Timeout for {max_retry_period} seconds."
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
                    return response
 | 
					                    return response
 | 
				
			||||||
            except InvalidRequestError:
 | 
					            except InvalidRequestError:
 | 
				
			||||||
@ -743,9 +744,11 @@ class Completion(openai_Completion):
 | 
				
			|||||||
                When set to False, -1 will be returned when all configs fail.
 | 
					                When set to False, -1 will be returned when all configs fail.
 | 
				
			||||||
            allow_format_str_template (bool, Optional): Whether to allow format string template in the config.
 | 
					            allow_format_str_template (bool, Optional): Whether to allow format string template in the config.
 | 
				
			||||||
            **config: Configuration for the openai API call. This is used as parameters for calling openai API.
 | 
					            **config: Configuration for the openai API call. This is used as parameters for calling openai API.
 | 
				
			||||||
                Besides the parameters for the openai API call, it can also contain a seed (int) for the cache.
 | 
					                The "prompt" or "messages" parameter can contain a template (str or Callable) which will be instantiated with the context.
 | 
				
			||||||
                This is useful when implementing "controlled randomness" for the completion.
 | 
					                Besides the parameters for the openai API call, it can also contain:
 | 
				
			||||||
                Also, the "prompt" or "messages" parameter can contain a template (str or Callable) which will be instantiated with the context.
 | 
					                - `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests.
 | 
				
			||||||
 | 
					                - `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
 | 
				
			||||||
 | 
					                - `seed` (int) for the cache. This is useful when implementing "controlled randomness" for the completion.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        Returns:
 | 
					        Returns:
 | 
				
			||||||
            Responses from OpenAI API, with additional fields.
 | 
					            Responses from OpenAI API, with additional fields.
 | 
				
			||||||
@ -763,9 +766,9 @@ class Completion(openai_Completion):
 | 
				
			|||||||
                base_config = config.copy()
 | 
					                base_config = config.copy()
 | 
				
			||||||
                base_config["allow_format_str_template"] = allow_format_str_template
 | 
					                base_config["allow_format_str_template"] = allow_format_str_template
 | 
				
			||||||
                base_config.update(each_config)
 | 
					                base_config.update(each_config)
 | 
				
			||||||
                if i < last and filter_func is None and "retry_timeout" not in base_config:
 | 
					                if i < last and filter_func is None and "max_retry_period" not in base_config:
 | 
				
			||||||
                    # retry_timeout = 0 to avoid retrying when no filter is given
 | 
					                    # max_retry_period = 0 to avoid retrying when no filter is given
 | 
				
			||||||
                    base_config["retry_timeout"] = 0
 | 
					                    base_config["max_retry_period"] = 0
 | 
				
			||||||
                try:
 | 
					                try:
 | 
				
			||||||
                    response = cls.create(
 | 
					                    response = cls.create(
 | 
				
			||||||
                        context,
 | 
					                        context,
 | 
				
			||||||
@ -1103,7 +1106,7 @@ class Completion(openai_Completion):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class ChatCompletion(Completion):
 | 
					class ChatCompletion(Completion):
 | 
				
			||||||
    """A class for OpenAI API ChatCompletion."""
 | 
					    """A class for OpenAI API ChatCompletion. Share the same API as Completion."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    default_search_space = Completion.default_search_space.copy()
 | 
					    default_search_space = Completion.default_search_space.copy()
 | 
				
			||||||
    default_search_space["model"] = tune.choice(["gpt-3.5-turbo", "gpt-4"])
 | 
					    default_search_space["model"] = tune.choice(["gpt-3.5-turbo", "gpt-4"])
 | 
				
			||||||
 | 
				
			|||||||
@ -1 +1 @@
 | 
				
			|||||||
__version__ = "0.1.3"
 | 
					__version__ = "0.1.4"
 | 
				
			||||||
 | 
				
			|||||||
@ -227,7 +227,7 @@ def test_humaneval(num_samples=1):
 | 
				
			|||||||
        config_list=autogen.config_list_from_models(KEY_LOC, model_list=["gpt-3.5-turbo"]),
 | 
					        config_list=autogen.config_list_from_models(KEY_LOC, model_list=["gpt-3.5-turbo"]),
 | 
				
			||||||
        prompt="",
 | 
					        prompt="",
 | 
				
			||||||
        max_tokens=1,
 | 
					        max_tokens=1,
 | 
				
			||||||
        retry_timeout=0,
 | 
					        max_retry_period=0,
 | 
				
			||||||
        raise_on_ratelimit_or_timeout=False,
 | 
					        raise_on_ratelimit_or_timeout=False,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    # assert response == -1
 | 
					    # assert response == -1
 | 
				
			||||||
 | 
				
			|||||||
@ -2,7 +2,7 @@ from autogen import AssistantAgent, UserProxyAgent, config_list_from_json
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
# Load LLM inference endpoints from an env variable or a file
 | 
					# Load LLM inference endpoints from an env variable or a file
 | 
				
			||||||
# See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints
 | 
					# See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints
 | 
				
			||||||
# and OAI_CONFIG_LIST_sample.json
 | 
					# and OAI_CONFIG_LIST_sample
 | 
				
			||||||
config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST")
 | 
					config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST")
 | 
				
			||||||
assistant = AssistantAgent("assistant", llm_config={"config_list": config_list})
 | 
					assistant = AssistantAgent("assistant", llm_config={"config_list": config_list})
 | 
				
			||||||
user_proxy = UserProxyAgent("user_proxy", code_execution_config={"work_dir": "coding"})
 | 
					user_proxy = UserProxyAgent("user_proxy", code_execution_config={"work_dir": "coding"})
 | 
				
			||||||
 | 
				
			|||||||
@ -99,3 +99,13 @@ You can also explicitly specify that by:
 | 
				
			|||||||
```python
 | 
					```python
 | 
				
			||||||
assistant = autogen.AssistantAgent(name="assistant", llm_config={"api_key": ...})
 | 
					assistant = autogen.AssistantAgent(name="assistant", llm_config={"api_key": ...})
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Handle Rate Limit Error and Timeout Error
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You can set `retry_wait_time` and `max_retry_period` to handle rate limit error. And you can set `request_timeout` to handle timeout error. They can all be specified in `llm_config` for an agent, which will be used in the [`create`](/docs/reference/oai/completion#create) function for LLM inference.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
 | 
				
			||||||
 | 
					- `max_retry_period` (int): the total timeout (in seconds) allowed for retrying failed requests.
 | 
				
			||||||
 | 
					- `request_timeout` (int): the timeout (in seconds) sent with a single request.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Please refer to the [documentation](/docs/Use-Cases/enhanced_inference#runtime-error) for more info.
 | 
				
			||||||
 | 
				
			|||||||
@ -123,7 +123,11 @@ API call results are cached locally and reused when the same request is issued.
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
### Runtime error
 | 
					### Runtime error
 | 
				
			||||||
 | 
					
 | 
				
			||||||
It is easy to hit error when calling OpenAI APIs, due to connection, rate limit, or timeout. Some of the errors are transient. `autogen.Completion.create` deals with the transient errors and retries automatically. Initial request timeout, retry timeout and retry time interval can be configured via `request_timeout`, `retry_timeout` and `autogen.Completion.retry_time`.
 | 
					It is easy to hit error when calling OpenAI APIs, due to connection, rate limit, or timeout. Some of the errors are transient. `autogen.Completion.create` deals with the transient errors and retries automatically. Request timeout, max retry period and retry wait time can be configured via `request_timeout`, `max_retry_period` and `retry_wait_time`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- `request_timeout` (int): the timeout (in seconds) sent with a single request.
 | 
				
			||||||
 | 
					- `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests.
 | 
				
			||||||
 | 
					- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Moreover, one can pass a list of configurations of different models/endpoints to mitigate the rate limits. For example,
 | 
					Moreover, one can pass a list of configurations of different models/endpoints to mitigate the rate limits. For example,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user