feat: add apple silicon GPU acceleration (#6151)

* feat: add apple silicon GPU acceleration * add release notes * small fix * Update utils.py * Update utils.py * ci fix mps * Revert "ci fix mps" This reverts commit 783ae503940d9ff8270a970a321549fb9e69dce7. * mps fix * Update experiment_tracking.py * try removing upper watermark limit * disable mps CI * Use xl runner * initialise env * small fix * black linting --------- Co-authored-by: Massimiliano Pippi <mpippi@gmail.com>
2026-01-06 03:57:19 +00:00 · 2023-10-30 14:26:46 +04:00 · 2023-10-30 14:26:46 +04:00 · 708d33a657
commit 708d33a657
parent 789e524de3
8 changed files with 61 additions and 4 deletions
--- a/.github/workflows/tests_preview.yml
+++ b/.github/workflows/tests_preview.yml
@ -219,7 +219,9 @@ jobs:
  integration-tests-macos:
    name: Integration / macos-latest
    needs: unit-tests
-    runs-on: macos-latest
+    runs-on: macos-latest-xl
+    env:
+      HAYSTACK_MPS_ENABLED : false
    steps:
      - uses: actions/checkout@v4

--- a/e2e/modeling/test_dpr.py
+++ b/e2e/modeling/test_dpr.py
@ -1,5 +1,6 @@
 from typing import Dict, Any
 from pathlib import Path
+import os

 import numpy as np
 import pytest
@ -707,6 +708,12 @@ def test_dpr_processor_save_load_non_bert_tokenizer(tmp_path: Path, query_and_pa

    if torch.cuda.is_available():
        device = torch.device("cuda")
+    elif (
+        hasattr(torch.backends, "mps")
+        and torch.backends.mps.is_available()
+        and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
+    ):
+        device = torch.device("mps")
    else:
        device = torch.device("cpu")
    model = BiAdaptiveModel(
@ -753,6 +760,12 @@ def test_dpr_processor_save_load_non_bert_tokenizer(tmp_path: Path, query_and_pa

    if torch.cuda.is_available():
        device = torch.device("cuda")
+    elif (
+        hasattr(torch.backends, "mps")
+        and torch.backends.mps.is_available()
+        and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
+    ):
+        device = torch.device("mps")
    else:
        device = torch.device("cpu")
    loaded_model = BiAdaptiveModel(
@ -879,6 +892,12 @@ def test_dpr_processor_save_load_non_bert_tokenizer(tmp_path: Path, query_and_pa

    if torch.cuda.is_available():
        device = torch.device("cuda")
+    elif (
+        hasattr(torch.backends, "mps")
+        and torch.backends.mps.is_available()
+        and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
+    ):
+        device = torch.device("mps")
    else:
        device = torch.device("cpu")
    model = BiAdaptiveModel(
--- a/haystack/environment.py
+++ b/haystack/environment.py
@ -106,11 +106,16 @@ def collect_static_system_specs() -> Dict[str, Any]:

    try:
        torch_import.check()
+        has_mps = (
+            hasattr(torch.backends, "mps")
+            and torch.backends.mps.is_available()
+            and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
+        )
        specs.update(
            {
                "libraries.torch": torch.__version__,
                "libraries.cuda": torch.version.cuda if torch.cuda.is_available() else False,
-                "hardware.gpus": torch.cuda.device_count() if torch.cuda.is_available() else 0,
+                "hardware.gpus": torch.cuda.device_count() if torch.cuda.is_available() else 1 if has_mps else 0,
            }
        )
    except ImportError:
--- a/haystack/modeling/utils.py
+++ b/haystack/modeling/utils.py
@ -112,6 +112,13 @@ def initialize_device_settings(
            else:
                devices_to_use = [torch.device("cuda:0")]
                n_gpu = 1
+        elif (
+            hasattr(torch.backends, "mps")
+            and torch.backends.mps.is_available()
+            and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
+        ):
+            devices_to_use = [torch.device("mps")]
+            n_gpu = 1
        else:
            devices_to_use = [torch.device("cpu")]
            n_gpu = 0
@ -180,6 +187,7 @@ def all_gather_list(data, group=None, max_size=16384):
        data (Any): data from the local worker to be gathered on other workers
        group (optional): group of the collective
    """
+    # pylint: disable=all
    SIZE_STORAGE_BYTES = 4  # int32 to encode the payload size

    enc = pickle.dumps(data)
--- a/haystack/preview/components/readers/extractive.py
+++ b/haystack/preview/components/readers/extractive.py
@ -2,6 +2,7 @@ from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, Union
 import math
 import warnings
+import os

 from haystack.preview import component, default_to_dict, ComponentError, Document, ExtractedAnswer
 from haystack.preview.lazy_imports import LazyImport
@ -111,6 +112,12 @@ class ExtractiveReader:
        if self.model is None:
            if torch.cuda.is_available():
                self.device = self.device or "cuda:0"
+            elif (
+                hasattr(torch.backends, "mps")
+                and torch.backends.mps.is_available()
+                and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
+            ):
+                self.device = self.device or "mps:0"
            else:
                self.device = self.device or "cpu:0"
            self.model = AutoModelForQuestionAnswering.from_pretrained(self.model_name_or_path, token=self.token).to(
--- a/haystack/utils/experiment_tracking.py
+++ b/haystack/utils/experiment_tracking.py
@ -17,7 +17,7 @@ with LazyImport("Run 'pip install transformers[torch]'") as transformers_import:
    import transformers

 with LazyImport("Run Run 'pip install farm-haystack[metrics]'") as mlflow_import:
-    import mlflow
+    import mlflow  # pylint: disable=import-error


 logger = logging.getLogger(__name__)
@ -236,6 +236,11 @@ def get_or_create_env_meta_data() -> Dict[str, Any]:
    from haystack.telemetry import HAYSTACK_EXECUTION_CONTEXT

    global env_meta_data  # pylint: disable=global-statement
+    has_mps = (
+        hasattr(torch.backends, "mps")
+        and torch.backends.mps.is_available()
+        and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
+    )
    if not env_meta_data:
        env_meta_data = {
            "os_version": platform.release(),
@ -246,7 +251,7 @@ def get_or_create_env_meta_data() -> Dict[str, Any]:
            "transformers_version": transformers.__version__,
            "torch_version": torch.__version__,
            "torch_cuda_version": torch.version.cuda if torch.cuda.is_available() else 0,
-            "n_gpu": torch.cuda.device_count() if torch.cuda.is_available() else 0,
+            "n_gpu": torch.cuda.device_count() if torch.cuda.is_available() else 1 if has_mps else 0,
            "n_cpu": os.cpu_count(),
            "context": os.environ.get(HAYSTACK_EXECUTION_CONTEXT),
            "execution_env": _get_execution_environment(),
--- a/haystack/utils/torch_utils.py
+++ b/haystack/utils/torch_utils.py
@ -1,4 +1,5 @@
 from typing import Optional, List, Union
+import os

 import torch
 from torch.utils.data import Dataset
@ -44,4 +45,10 @@ def get_devices(devices: Optional[List[Union[str, torch.device]]]) -> List[torch
        return [torch.device(device) for device in devices]
    elif torch.cuda.is_available():
        return [torch.device(device) for device in range(torch.cuda.device_count())]
+    elif (
+        hasattr(torch.backends, "mps")
+        and torch.backends.mps.is_available()
+        and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
+    ):
+        return [torch.device("mps")]
    return [torch.device("cpu")]
--- a/releasenotes/notes/add-apple-silicon-gpu-acceleration-38bf69781a933b95.yaml
+++ b/releasenotes/notes/add-apple-silicon-gpu-acceleration-38bf69781a933b95.yaml
@ -0,0 +1,4 @@
+---
+enhancements:
+  - |
+    Added support for Apple Silicon GPU acceleration through "mps pytorch", enabling better performance on Apple M1 hardware.