From a5501c72cf717396cd1b5238bddcc6ce00cba490 Mon Sep 17 00:00:00 2001 From: bogdankostic Date: Wed, 2 Mar 2022 09:21:54 +0100 Subject: [PATCH] Fix table extraction in `ParsrConverter` (#2262) --- haystack/nodes/file_converter/parsr.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/haystack/nodes/file_converter/parsr.py b/haystack/nodes/file_converter/parsr.py index bb84b8acf..a525ba91d 100644 --- a/haystack/nodes/file_converter/parsr.py +++ b/haystack/nodes/file_converter/parsr.py @@ -197,9 +197,12 @@ class ParsrConverter(BaseConverter): elem_idx: int, meta: Optional[Dict[str, str]] = None, ) -> Dict[str, Any]: + row_idx_start = 0 caption = "" - table_list = [[""] * len(element["content"][0]["content"]) for _ in range(len(element["content"]))] + number_of_columns = max([len(row["content"]) for row in element["content"]]) + number_of_rows = len(element["content"]) + table_list = [[""] * number_of_columns for _ in range(number_of_rows)] for row_idx, row in enumerate(element["content"]): for col_idx, cell in enumerate(row["content"]):