diff --git a/tutorials/Tutorial8_Preprocessing.ipynb b/tutorials/Tutorial8_Preprocessing.ipynb index 00d6bd7d0..3bd71db6a 100644 --- a/tutorials/Tutorial8_Preprocessing.ipynb +++ b/tutorials/Tutorial8_Preprocessing.ipynb @@ -169,7 +169,7 @@ "converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=[\"en\"])\n", "doc_pdf = converter.convert(file_path=\"data/preprocessing_tutorial/bert.pdf\", meta=None)\n", "\n", - "converter = DocxToTextConverter(remove_numeric_tables=True, valid_languages=[\"en\"])\n", + "converter = DocxToTextConverter(remove_numeric_tables=False, valid_languages=[\"en\"])\n", "doc_docx = converter.convert(file_path=\"data/preprocessing_tutorial/heavy_metal.docx\", meta=None)\n" ], "metadata": { @@ -520,4 +520,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file diff --git a/tutorials/Tutorial8_Preprocessing.py b/tutorials/Tutorial8_Preprocessing.py index 394d37c0e..e4f293854 100644 --- a/tutorials/Tutorial8_Preprocessing.py +++ b/tutorials/Tutorial8_Preprocessing.py @@ -52,7 +52,7 @@ def tutorial8_preprocessing(): converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"]) doc_pdf = converter.convert(file_path="data/preprocessing_tutorial/bert.pdf", meta=None) - converter = DocxToTextConverter(remove_numeric_tables=True, valid_languages=["en"]) + converter = DocxToTextConverter(remove_numeric_tables=False, valid_languages=["en"]) doc_docx = converter.convert(file_path="data/preprocessing_tutorial/heavy_metal.docx", meta=None) # Haystack also has a convenience function that will automatically apply the right converter to each file in a directory.