From cfd703fa3efaca1d708aa6589ff363d12ff77a33 Mon Sep 17 00:00:00 2001
From: Michael Feil <63565275+michaelfeil@users.noreply.github.com>
Date: Thu, 22 Jun 2023 14:23:19 +0200
Subject: [PATCH] fix: model_tokenizer in  openai text completion tokenization
 details (#5104)

* fix: model_tokenizer

* Update test

---------

Co-authored-by: Sebastian Husch Lee <sjrl423@gmail.com>
---
 haystack/utils/openai_utils.py  | 1 +
 test/utils/test_openai_utils.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/haystack/utils/openai_utils.py b/haystack/utils/openai_utils.py
index c733c0ae6..b866523eb 100644
--- a/haystack/utils/openai_utils.py
+++ b/haystack/utils/openai_utils.py
@@ -65,6 +65,7 @@ def _openai_text_completion_tokenization_details(model_name: str):
     """
     tokenizer_name = "gpt2"
     max_tokens_limit = 2049  # Based on this ref: https://platform.openai.com/docs/models/gpt-3
+    model_tokenizer = None
 
     if model_name == "gpt-35-turbo":
         # covering the lack of support in Tiktoken. https://github.com/openai/tiktoken/pull/72
diff --git a/test/utils/test_openai_utils.py b/test/utils/test_openai_utils.py
index c9d49a747..4896d5aef 100644
--- a/test/utils/test_openai_utils.py
+++ b/test/utils/test_openai_utils.py
@@ -16,8 +16,8 @@ from haystack.utils.openai_utils import (
 
 @pytest.mark.unit
 def test_openai_text_completion_tokenization_details_gpt_default():
-    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="text-ada-001")
-    assert tokenizer_name == "r50k_base"
+    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="not-recognized-name")
+    assert tokenizer_name == "gpt2"
     assert max_tokens_limit == 2049