mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-31 11:56:35 +00:00
fix: Support for gpt-4-32k (#4825)
* Add step to loook up tokenizers by prefix in openai_utils * Updated tiktoken min version + openai_utils test * Added test case for GPT-4 and Azure model naming * Broken down tests * Added default case --------- Co-authored-by: ZanSara <sara.zanzottera@deepset.ai>
This commit is contained in:
parent
179e9cea08
commit
6eb251d1f0
@ -34,7 +34,7 @@ if sys.version_info >= (3, 8) and (machine in ["amd64", "x86_64"] or (machine ==
|
|||||||
|
|
||||||
if USE_TIKTOKEN:
|
if USE_TIKTOKEN:
|
||||||
import tiktoken # pylint: disable=import-error
|
import tiktoken # pylint: disable=import-error
|
||||||
from tiktoken.model import MODEL_TO_ENCODING
|
from tiktoken.model import MODEL_TO_ENCODING, MODEL_PREFIX_TO_ENCODING
|
||||||
else:
|
else:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"OpenAI tiktoken module is not available for Python < 3.8,Linux ARM64 and AARCH64. Falling back to GPT2TokenizerFast."
|
"OpenAI tiktoken module is not available for Python < 3.8,Linux ARM64 and AARCH64. Falling back to GPT2TokenizerFast."
|
||||||
@ -97,11 +97,18 @@ def _openai_text_completion_tokenization_details(model_name: str):
|
|||||||
"""
|
"""
|
||||||
tokenizer_name = "gpt2"
|
tokenizer_name = "gpt2"
|
||||||
max_tokens_limit = 2049 # Based on this ref: https://platform.openai.com/docs/models/gpt-3
|
max_tokens_limit = 2049 # Based on this ref: https://platform.openai.com/docs/models/gpt-3
|
||||||
model_tokenizer = MODEL_TO_ENCODING.get(model_name) if USE_TIKTOKEN else None
|
|
||||||
|
|
||||||
# covering the lack of support in Tiktoken. https://github.com/openai/tiktoken/pull/72
|
if USE_TIKTOKEN:
|
||||||
if model_name == "gpt-35-turbo" and USE_TIKTOKEN:
|
if model_name == "gpt-35-turbo":
|
||||||
model_tokenizer = "cl100k_base"
|
# covering the lack of support in Tiktoken. https://github.com/openai/tiktoken/pull/72
|
||||||
|
model_tokenizer = "cl100k_base"
|
||||||
|
elif model_name in MODEL_TO_ENCODING:
|
||||||
|
model_tokenizer = MODEL_TO_ENCODING[model_name]
|
||||||
|
else:
|
||||||
|
for model_prefix, tokenizer in MODEL_PREFIX_TO_ENCODING.items():
|
||||||
|
if model_name.startswith(model_prefix):
|
||||||
|
model_tokenizer = tokenizer
|
||||||
|
break
|
||||||
|
|
||||||
if model_tokenizer:
|
if model_tokenizer:
|
||||||
# Based on OpenAI models page, 'davinci' considers have 2049 tokens,
|
# Based on OpenAI models page, 'davinci' considers have 2049 tokens,
|
||||||
|
@ -76,7 +76,7 @@ dependencies = [
|
|||||||
"sentence-transformers>=2.2.0",
|
"sentence-transformers>=2.2.0",
|
||||||
|
|
||||||
# OpenAI tokenizer
|
# OpenAI tokenizer
|
||||||
"tiktoken>=0.3.0; python_version >= '3.8' and (platform_machine == 'AMD64' or platform_machine == 'amd64' or platform_machine == 'x86_64' or (platform_machine == 'arm64' and platform_system == 'Darwin'))",
|
"tiktoken>=0.3.2; python_version >= '3.8' and (platform_machine == 'AMD64' or platform_machine == 'amd64' or platform_machine == 'x86_64' or (platform_machine == 'arm64' and platform_system == 'Darwin'))",
|
||||||
|
|
||||||
# Schema validation
|
# Schema validation
|
||||||
"jsonschema",
|
"jsonschema",
|
||||||
|
@ -1,10 +1,53 @@
|
|||||||
|
import pytest
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from tenacity import wait_none
|
from tenacity import wait_none
|
||||||
|
|
||||||
from haystack.errors import OpenAIError, OpenAIRateLimitError, OpenAIUnauthorizedError
|
from haystack.errors import OpenAIError, OpenAIRateLimitError, OpenAIUnauthorizedError
|
||||||
from haystack.utils.openai_utils import openai_request
|
from haystack.utils.openai_utils import openai_request, _openai_text_completion_tokenization_details
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_openai_text_completion_tokenization_details_gpt_default():
|
||||||
|
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="text-ada-001")
|
||||||
|
assert tokenizer_name == "r50k_base"
|
||||||
|
assert max_tokens_limit == 2049
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_openai_text_completion_tokenization_details_gpt_davinci():
|
||||||
|
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="text-davinci-003")
|
||||||
|
assert tokenizer_name == "p50k_base"
|
||||||
|
assert max_tokens_limit == 4097
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_openai_text_completion_tokenization_details_gpt3_5_azure():
|
||||||
|
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-35-turbo")
|
||||||
|
assert tokenizer_name == "cl100k_base"
|
||||||
|
assert max_tokens_limit == 4096
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_openai_text_completion_tokenization_details_gpt3_5():
|
||||||
|
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-3.5-turbo")
|
||||||
|
assert tokenizer_name == "cl100k_base"
|
||||||
|
assert max_tokens_limit == 4096
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_openai_text_completion_tokenization_details_gpt_4():
|
||||||
|
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4")
|
||||||
|
assert tokenizer_name == "cl100k_base"
|
||||||
|
assert max_tokens_limit == 8192
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_openai_text_completion_tokenization_details_gpt_4_32k():
|
||||||
|
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4-32k")
|
||||||
|
assert tokenizer_name == "cl100k_base"
|
||||||
|
assert max_tokens_limit == 32768
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
|
Loading…
x
Reference in New Issue
Block a user