haystack/test/test_docx_conversion.py
Tanay Soni 1637ce1184 Revert "Add Tika Converter (#314)"
This reverts commit 5ef59b1901da6d51bfa085683321a243228d4fc9.
2020-08-17 11:13:52 +02:00

13 lines
528 B
Python

import logging
from pathlib import Path
from haystack.indexing.file_converters.docx import DocxToTextConverter
logger = logging.getLogger(__name__)
def test_extract_pages():
converter = DocxToTextConverter()
paragraphs = converter.extract_pages(file_path=Path("samples/docx/sample_docx.docx"))
assert len(paragraphs)==8 #Sample has 8 Paragraphs
assert paragraphs[1]== 'The US has "passed the peak" on new coronavirus cases, President Donald Trump said and predicted that some states would reopen this month.'