Fix param in tutorial 8

This commit is contained in:
Malte Pietsch 2021-10-13 14:45:09 +02:00
parent 9650f7aed1
commit db2b5d913b
2 changed files with 3 additions and 3 deletions

View File

@ -169,7 +169,7 @@
"converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=[\"en\"])\n", "converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=[\"en\"])\n",
"doc_pdf = converter.convert(file_path=\"data/preprocessing_tutorial/bert.pdf\", meta=None)\n", "doc_pdf = converter.convert(file_path=\"data/preprocessing_tutorial/bert.pdf\", meta=None)\n",
"\n", "\n",
"converter = DocxToTextConverter(remove_numeric_tables=True, valid_languages=[\"en\"])\n", "converter = DocxToTextConverter(remove_numeric_tables=False, valid_languages=[\"en\"])\n",
"doc_docx = converter.convert(file_path=\"data/preprocessing_tutorial/heavy_metal.docx\", meta=None)\n" "doc_docx = converter.convert(file_path=\"data/preprocessing_tutorial/heavy_metal.docx\", meta=None)\n"
], ],
"metadata": { "metadata": {

View File

@ -52,7 +52,7 @@ def tutorial8_preprocessing():
converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"]) converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
doc_pdf = converter.convert(file_path="data/preprocessing_tutorial/bert.pdf", meta=None) doc_pdf = converter.convert(file_path="data/preprocessing_tutorial/bert.pdf", meta=None)
converter = DocxToTextConverter(remove_numeric_tables=True, valid_languages=["en"]) converter = DocxToTextConverter(remove_numeric_tables=False, valid_languages=["en"])
doc_docx = converter.convert(file_path="data/preprocessing_tutorial/heavy_metal.docx", meta=None) doc_docx = converter.convert(file_path="data/preprocessing_tutorial/heavy_metal.docx", meta=None)
# Haystack also has a convenience function that will automatically apply the right converter to each file in a directory. # Haystack also has a convenience function that will automatically apply the right converter to each file in a directory.