From a1fb70bbbd7aa0f9f08ac8678d11960ec161c3ee Mon Sep 17 00:00:00 2001
From: Alon Eirew <alon.eirew@intel.com>
Date: Mon, 3 Jan 2022 09:33:55 +0200
Subject: [PATCH] Make ctx_segment_ids a list instead of np.zeros_like

* fix #1687

* fix - UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow..

* fix RuntimeError: received 0 items of ancdata

* Remove set_sharing_strategy from this branch and replace numpy.zeros_like with python numpy
---
 haystack/modeling/data_handler/processor.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/haystack/modeling/data_handler/processor.py b/haystack/modeling/data_handler/processor.py
index 4208d1929..718443565 100644
--- a/haystack/modeling/data_handler/processor.py
+++ b/haystack/modeling/data_handler/processor.py
@@ -1102,8 +1102,7 @@ class TextSimilarityProcessor(Processor):
                         return_token_type_ids=True
                     )
 
-                    # TODO check if we need this and potentially remove
-                    ctx_segment_ids = np.zeros_like(ctx_inputs["token_type_ids"], dtype=np.int32)
+                    ctx_segment_ids = [[0] * len(ctx_inputs["token_type_ids"][0])] * len(ctx_inputs["token_type_ids"])
 
                     # get tokens in string format
                     tokenized_passage = [self.passage_tokenizer.convert_ids_to_tokens(ctx) for ctx in ctx_inputs["input_ids"]]