diff --git a/haystack/utils/openai_utils.py b/haystack/utils/openai_utils.py index 66fef9c5c..eccb5676c 100644 --- a/haystack/utils/openai_utils.py +++ b/haystack/utils/openai_utils.py @@ -99,6 +99,10 @@ def _openai_text_completion_tokenization_details(model_name: str): max_tokens_limit = 2049 # Based on this ref: https://platform.openai.com/docs/models/gpt-3 model_tokenizer = MODEL_TO_ENCODING.get(model_name) if USE_TIKTOKEN else None + # covering the lack of support in Tiktoken. https://github.com/openai/tiktoken/pull/72 + if model_name == "gpt-35-turbo" and USE_TIKTOKEN: + model_tokenizer = "cl100k_base" + if model_tokenizer: # Based on OpenAI models page, 'davinci' considers have 2049 tokens, ## therefore, it is better to add `text-davinci` instead to the condition.