mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-27 10:49:52 +00:00

* Skip file converter if file type is not supported. Refer https://github.com/deepset-ai/haystack/issues/453 * Fixing issue reported by mypy * Addressing review comments
14 lines
528 B
Python
14 lines
528 B
Python
from haystack.preprocessor import utils
|
|
from haystack.preprocessor.cleaning import clean_wiki_text
|
|
|
|
|
|
def test_convert_files_to_dicts(xpdf_fixture):
|
|
documents = utils.convert_files_to_dicts(dir_path="samples", clean_func=clean_wiki_text, split_paragraphs=True)
|
|
assert documents and len(documents) > 0
|
|
|
|
|
|
def test_tika_convert_files_to_dicts(tika_fixture):
|
|
documents = utils.tika_convert_files_to_dicts(dir_path="samples", clean_func=clean_wiki_text, split_paragraphs=True)
|
|
assert documents and len(documents) > 0
|
|
|