mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-31 11:56:35 +00:00
fix: model_tokenizer in openai text completion tokenization details (#5104)
* fix: model_tokenizer * Update test --------- Co-authored-by: Sebastian Husch Lee <sjrl423@gmail.com>
This commit is contained in:
parent
6a5fbb7118
commit
cfd703fa3e
@ -65,6 +65,7 @@ def _openai_text_completion_tokenization_details(model_name: str):
|
||||
"""
|
||||
tokenizer_name = "gpt2"
|
||||
max_tokens_limit = 2049 # Based on this ref: https://platform.openai.com/docs/models/gpt-3
|
||||
model_tokenizer = None
|
||||
|
||||
if model_name == "gpt-35-turbo":
|
||||
# covering the lack of support in Tiktoken. https://github.com/openai/tiktoken/pull/72
|
||||
|
@ -16,8 +16,8 @@ from haystack.utils.openai_utils import (
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_openai_text_completion_tokenization_details_gpt_default():
|
||||
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="text-ada-001")
|
||||
assert tokenizer_name == "r50k_base"
|
||||
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="not-recognized-name")
|
||||
assert tokenizer_name == "gpt2"
|
||||
assert max_tokens_limit == 2049
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user