From a1fb70bbbd7aa0f9f08ac8678d11960ec161c3ee Mon Sep 17 00:00:00 2001 From: Alon Eirew Date: Mon, 3 Jan 2022 09:33:55 +0200 Subject: [PATCH] Make ctx_segment_ids a list instead of np.zeros_like * fix #1687 * fix - UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow.. * fix RuntimeError: received 0 items of ancdata * Remove set_sharing_strategy from this branch and replace numpy.zeros_like with python numpy --- haystack/modeling/data_handler/processor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/haystack/modeling/data_handler/processor.py b/haystack/modeling/data_handler/processor.py index 4208d1929..718443565 100644 --- a/haystack/modeling/data_handler/processor.py +++ b/haystack/modeling/data_handler/processor.py @@ -1102,8 +1102,7 @@ class TextSimilarityProcessor(Processor): return_token_type_ids=True ) - # TODO check if we need this and potentially remove - ctx_segment_ids = np.zeros_like(ctx_inputs["token_type_ids"], dtype=np.int32) + ctx_segment_ids = [[0] * len(ctx_inputs["token_type_ids"][0])] * len(ctx_inputs["token_type_ids"]) # get tokens in string format tokenized_passage = [self.passage_tokenizer.convert_ids_to_tokens(ctx) for ctx in ctx_inputs["input_ids"]]