mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-27 02:40:41 +00:00
14 lines
528 B
Python
14 lines
528 B
Python
![]() |
from haystack.preprocessor import utils
|
||
|
from haystack.preprocessor.cleaning import clean_wiki_text
|
||
|
|
||
|
|
||
|
def test_convert_files_to_dicts(xpdf_fixture):
|
||
|
documents = utils.convert_files_to_dicts(dir_path="samples", clean_func=clean_wiki_text, split_paragraphs=True)
|
||
|
assert documents and len(documents) > 0
|
||
|
|
||
|
|
||
|
def test_tika_convert_files_to_dicts(tika_fixture):
|
||
|
documents = utils.tika_convert_files_to_dicts(dir_path="samples", clean_func=clean_wiki_text, split_paragraphs=True)
|
||
|
assert documents and len(documents) > 0
|
||
|
|