fix: model_tokenizer in openai text completion tokenization details (#5104)

* fix: model_tokenizer * Update test --------- Co-authored-by: Sebastian Husch Lee <sjrl423@gmail.com>
2025-09-01 12:23:31 +00:00 · 2023-06-22 14:23:19 +02:00 · 2023-06-22 14:23:19 +02:00 · cfd703fa3e
commit cfd703fa3e
parent 6a5fbb7118
2 changed files with 3 additions and 2 deletions
--- a/haystack/utils/openai_utils.py
+++ b/haystack/utils/openai_utils.py
@ -65,6 +65,7 @@ def _openai_text_completion_tokenization_details(model_name: str):
    """
    tokenizer_name = "gpt2"
    max_tokens_limit = 2049  # Based on this ref: https://platform.openai.com/docs/models/gpt-3
    model_tokenizer = None
    if model_name == "gpt-35-turbo":
        # covering the lack of support in Tiktoken. https://github.com/openai/tiktoken/pull/72
--- a/test/utils/test_openai_utils.py
+++ b/test/utils/test_openai_utils.py
@ -16,8 +16,8 @@ from haystack.utils.openai_utils import (
@pytest.mark.unit
 def test_openai_text_completion_tokenization_details_gpt_default():
-    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="text-ada-001")
+    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="not-recognized-name")
-    assert tokenizer_name == "r50k_base"
+    assert tokenizer_name == "gpt2"
    assert max_tokens_limit == 2049