From 38768bffdf1e89ad199863fbd2668747bc6d5ff5 Mon Sep 17 00:00:00 2001 From: recrudesce Date: Tue, 25 Apr 2023 15:43:24 +0100 Subject: [PATCH] fix: Tiktoken does not support Azure gpt-35-turbo (#4739) * force support for gpt-35-turbo Cos Tiktoken doesn't support it yet - see https://github.com/openai/tiktoken/pull/72 * Update openai_utils.py * Appeasing the linting gods Why hast thou forsaken me ? * Remove trailing whitespace * chg: remove redundant elif block --- haystack/utils/openai_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/haystack/utils/openai_utils.py b/haystack/utils/openai_utils.py index 66fef9c5c..eccb5676c 100644 --- a/haystack/utils/openai_utils.py +++ b/haystack/utils/openai_utils.py @@ -99,6 +99,10 @@ def _openai_text_completion_tokenization_details(model_name: str): max_tokens_limit = 2049 # Based on this ref: https://platform.openai.com/docs/models/gpt-3 model_tokenizer = MODEL_TO_ENCODING.get(model_name) if USE_TIKTOKEN else None + # covering the lack of support in Tiktoken. https://github.com/openai/tiktoken/pull/72 + if model_name == "gpt-35-turbo" and USE_TIKTOKEN: + model_tokenizer = "cl100k_base" + if model_tokenizer: # Based on OpenAI models page, 'davinci' considers have 2049 tokens, ## therefore, it is better to add `text-davinci` instead to the condition.