diff --git a/haystack/nodes/answer_generator/openai.py b/haystack/nodes/answer_generator/openai.py index c44769920..ab6f2f0f0 100644 --- a/haystack/nodes/answer_generator/openai.py +++ b/haystack/nodes/answer_generator/openai.py @@ -2,6 +2,7 @@ import json import logging import sys from typing import List, Optional, Tuple, Union +import platform import requests @@ -12,14 +13,19 @@ from haystack.utils.reflection import retry_with_exponential_backoff logger = logging.getLogger(__name__) +machine = platform.machine() +system = platform.system() + USE_TIKTOKEN = False -if sys.version_info >= (3, 8): +if sys.version_info >= (3, 8) and (machine in ["amd64", "x86_64"] or (machine == "arm64" and system == "Darwin")): USE_TIKTOKEN = True if USE_TIKTOKEN: import tiktoken # pylint: disable=import-error else: - logger.warning("OpenAI tiktoken module is not available for Python < 3.8. Falling back to GPT2TokenizerFast.") + logger.warning( + "OpenAI tiktoken module is not available for Python < 3.8,Linux ARM64 and AARCH64. Falling back to GPT2TokenizerFast." + ) from transformers import GPT2TokenizerFast, PreTrainedTokenizerFast diff --git a/haystack/nodes/retriever/_openai_encoder.py b/haystack/nodes/retriever/_openai_encoder.py index e5063bef7..3569e3be0 100644 --- a/haystack/nodes/retriever/_openai_encoder.py +++ b/haystack/nodes/retriever/_openai_encoder.py @@ -3,6 +3,7 @@ import logging import sys from pathlib import Path from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union +import platform import numpy as np import requests @@ -19,14 +20,19 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +machine = platform.machine() +system = platform.system() + USE_TIKTOKEN = False -if sys.version_info >= (3, 8): +if sys.version_info >= (3, 8) and (machine in ["amd64", "x86_64"] or (machine == "arm64" and system == "Darwin")): USE_TIKTOKEN = True if USE_TIKTOKEN: import tiktoken # pylint: disable=import-error else: - logger.warning("OpenAI tiktoken module is not available for Python < 3.8. Falling back to GPT2TokenizerFast.") + logger.warning( + "OpenAI tiktoken module is not available for Python < 3.8,Linux ARM64 and AARCH64. Falling back to GPT2TokenizerFast." + ) from transformers import GPT2TokenizerFast, PreTrainedTokenizerFast diff --git a/pyproject.toml b/pyproject.toml index c8cbbc6cf..ec33231bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,7 +88,7 @@ dependencies = [ "elasticsearch>=7.7,<8", # OpenAI tokenizer - "tiktoken>=0.1.2; python_version >= '3.8'", + "tiktoken>=0.1.2; python_version >= '3.8' and (platform_machine == 'amd64' or platform_machine == 'x86_64' or (platform_machine == 'arm64' and platform_system == 'Darwin'))", # context matching "rapidfuzz>=2.0.15,<2.8.0", # FIXME https://github.com/deepset-ai/haystack/pull/3199