haystack/test/test_utils.py
Lalit Pagaria 9b58374b7c
Skip file conversion if file type is not supported (#456)
* Skip file converter if file type is not supported. Refer https://github.com/deepset-ai/haystack/issues/453

* Fixing issue reported by mypy

* Addressing review comments
2020-10-01 14:47:45 +02:00

14 lines
528 B
Python

from haystack.preprocessor import utils
from haystack.preprocessor.cleaning import clean_wiki_text
def test_convert_files_to_dicts(xpdf_fixture):
documents = utils.convert_files_to_dicts(dir_path="samples", clean_func=clean_wiki_text, split_paragraphs=True)
assert documents and len(documents) > 0
def test_tika_convert_files_to_dicts(tika_fixture):
documents = utils.tika_convert_files_to_dicts(dir_path="samples", clean_func=clean_wiki_text, split_paragraphs=True)
assert documents and len(documents) > 0