unstructured/test_unstructured/file_utils/test_file_conversion.py
Matt Robinson e43cb0e6e0
feat: add partition_epub function (#364)
* add pypandoc dependency

* added epub partitioner and file conversion

* test for partition_epub

* tests for file conversion

* add epub to filetype detection

* added epub to auto partition

* update bricks docs

* updated installing docs

* changelot and version

* add pandoc to dependencies

* add pandoc to debian dependencies

* linting, linting, linting

* typo fix

* typo fix

* file conversion type hints

* more type hints

---------

Co-authored-by: qued <64741807+qued@users.noreply.github.com>
2023-03-14 15:52:21 +00:00

24 lines
833 B
Python

import os
import pathlib
from unittest.mock import patch
import pypandoc
import pytest
from unstructured.file_utils.file_conversion import convert_file_to_text
DIRECTORY = pathlib.Path(__file__).parent.resolve()
def test_convert_file_to_text():
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
html_text = convert_file_to_text(filename, source_format="epub", target_format="html")
assert html_text.startswith("<p>")
def test_convert_to_file_raises_if_pandoc_not_available():
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
with patch.object(pypandoc, "convert_file", side_effect=FileNotFoundError):
with pytest.raises(FileNotFoundError):
convert_file_to_text(filename, source_format="epub", target_format="html")