mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-30 03:16:46 +00:00
fix: Support for gpt-4-32k (#4825)
* Add step to loook up tokenizers by prefix in openai_utils * Updated tiktoken min version + openai_utils test * Added test case for GPT-4 and Azure model naming * Broken down tests * Added default case --------- Co-authored-by: ZanSara <sara.zanzottera@deepset.ai>
This commit is contained in:
parent
179e9cea08
commit
6eb251d1f0
@ -34,7 +34,7 @@ if sys.version_info >= (3, 8) and (machine in ["amd64", "x86_64"] or (machine ==
|
||||
|
||||
if USE_TIKTOKEN:
|
||||
import tiktoken # pylint: disable=import-error
|
||||
from tiktoken.model import MODEL_TO_ENCODING
|
||||
from tiktoken.model import MODEL_TO_ENCODING, MODEL_PREFIX_TO_ENCODING
|
||||
else:
|
||||
logger.warning(
|
||||
"OpenAI tiktoken module is not available for Python < 3.8,Linux ARM64 and AARCH64. Falling back to GPT2TokenizerFast."
|
||||
@ -97,11 +97,18 @@ def _openai_text_completion_tokenization_details(model_name: str):
|
||||
"""
|
||||
tokenizer_name = "gpt2"
|
||||
max_tokens_limit = 2049 # Based on this ref: https://platform.openai.com/docs/models/gpt-3
|
||||
model_tokenizer = MODEL_TO_ENCODING.get(model_name) if USE_TIKTOKEN else None
|
||||
|
||||
# covering the lack of support in Tiktoken. https://github.com/openai/tiktoken/pull/72
|
||||
if model_name == "gpt-35-turbo" and USE_TIKTOKEN:
|
||||
model_tokenizer = "cl100k_base"
|
||||
if USE_TIKTOKEN:
|
||||
if model_name == "gpt-35-turbo":
|
||||
# covering the lack of support in Tiktoken. https://github.com/openai/tiktoken/pull/72
|
||||
model_tokenizer = "cl100k_base"
|
||||
elif model_name in MODEL_TO_ENCODING:
|
||||
model_tokenizer = MODEL_TO_ENCODING[model_name]
|
||||
else:
|
||||
for model_prefix, tokenizer in MODEL_PREFIX_TO_ENCODING.items():
|
||||
if model_name.startswith(model_prefix):
|
||||
model_tokenizer = tokenizer
|
||||
break
|
||||
|
||||
if model_tokenizer:
|
||||
# Based on OpenAI models page, 'davinci' considers have 2049 tokens,
|
||||
|
@ -76,7 +76,7 @@ dependencies = [
|
||||
"sentence-transformers>=2.2.0",
|
||||
|
||||
# OpenAI tokenizer
|
||||
"tiktoken>=0.3.0; python_version >= '3.8' and (platform_machine == 'AMD64' or platform_machine == 'amd64' or platform_machine == 'x86_64' or (platform_machine == 'arm64' and platform_system == 'Darwin'))",
|
||||
"tiktoken>=0.3.2; python_version >= '3.8' and (platform_machine == 'AMD64' or platform_machine == 'amd64' or platform_machine == 'x86_64' or (platform_machine == 'arm64' and platform_system == 'Darwin'))",
|
||||
|
||||
# Schema validation
|
||||
"jsonschema",
|
||||
|
@ -1,10 +1,53 @@
|
||||
import pytest
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from tenacity import wait_none
|
||||
|
||||
from haystack.errors import OpenAIError, OpenAIRateLimitError, OpenAIUnauthorizedError
|
||||
from haystack.utils.openai_utils import openai_request
|
||||
from haystack.utils.openai_utils import openai_request, _openai_text_completion_tokenization_details
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_openai_text_completion_tokenization_details_gpt_default():
|
||||
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="text-ada-001")
|
||||
assert tokenizer_name == "r50k_base"
|
||||
assert max_tokens_limit == 2049
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_openai_text_completion_tokenization_details_gpt_davinci():
|
||||
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="text-davinci-003")
|
||||
assert tokenizer_name == "p50k_base"
|
||||
assert max_tokens_limit == 4097
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_openai_text_completion_tokenization_details_gpt3_5_azure():
|
||||
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-35-turbo")
|
||||
assert tokenizer_name == "cl100k_base"
|
||||
assert max_tokens_limit == 4096
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_openai_text_completion_tokenization_details_gpt3_5():
|
||||
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-3.5-turbo")
|
||||
assert tokenizer_name == "cl100k_base"
|
||||
assert max_tokens_limit == 4096
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_openai_text_completion_tokenization_details_gpt_4():
|
||||
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4")
|
||||
assert tokenizer_name == "cl100k_base"
|
||||
assert max_tokens_limit == 8192
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_openai_text_completion_tokenization_details_gpt_4_32k():
|
||||
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4-32k")
|
||||
assert tokenizer_name == "cl100k_base"
|
||||
assert max_tokens_limit == 32768
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
|
Loading…
x
Reference in New Issue
Block a user