mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-09-01 12:23:31 +00:00
fix: model_tokenizer in openai text completion tokenization details (#5104)
* fix: model_tokenizer * Update test --------- Co-authored-by: Sebastian Husch Lee <sjrl423@gmail.com>
This commit is contained in:
parent
6a5fbb7118
commit
cfd703fa3e
@ -65,6 +65,7 @@ def _openai_text_completion_tokenization_details(model_name: str):
|
|||||||
"""
|
"""
|
||||||
tokenizer_name = "gpt2"
|
tokenizer_name = "gpt2"
|
||||||
max_tokens_limit = 2049 # Based on this ref: https://platform.openai.com/docs/models/gpt-3
|
max_tokens_limit = 2049 # Based on this ref: https://platform.openai.com/docs/models/gpt-3
|
||||||
|
model_tokenizer = None
|
||||||
|
|
||||||
if model_name == "gpt-35-turbo":
|
if model_name == "gpt-35-turbo":
|
||||||
# covering the lack of support in Tiktoken. https://github.com/openai/tiktoken/pull/72
|
# covering the lack of support in Tiktoken. https://github.com/openai/tiktoken/pull/72
|
||||||
|
@ -16,8 +16,8 @@ from haystack.utils.openai_utils import (
|
|||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_openai_text_completion_tokenization_details_gpt_default():
|
def test_openai_text_completion_tokenization_details_gpt_default():
|
||||||
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="text-ada-001")
|
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="not-recognized-name")
|
||||||
assert tokenizer_name == "r50k_base"
|
assert tokenizer_name == "gpt2"
|
||||||
assert max_tokens_limit == 2049
|
assert max_tokens_limit == 2049
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user