mirror of
				https://github.com/Unstructured-IO/unstructured.git
				synced 2025-11-04 03:53:45 +00:00 
			
		
		
		
	* docker works * more epub tests * changelog version * support epub + odt + rtf * update dockerfile * revert.. * install pandoc on ci env * pandoc docker grab bashed on arch * move arch into image * move back to base image
		
			
				
	
	
		
			24 lines
		
	
	
		
			833 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			24 lines
		
	
	
		
			833 B
		
	
	
	
		
			Python
		
	
	
	
	
	
import os
 | 
						|
import pathlib
 | 
						|
from unittest.mock import patch
 | 
						|
 | 
						|
import pypandoc
 | 
						|
import pytest
 | 
						|
 | 
						|
from unstructured.file_utils.file_conversion import convert_file_to_text
 | 
						|
 | 
						|
DIRECTORY = pathlib.Path(__file__).parent.resolve()
 | 
						|
 | 
						|
 | 
						|
def test_convert_file_to_text():
 | 
						|
    filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
 | 
						|
    html_text = convert_file_to_text(filename, source_format="epub", target_format="html")
 | 
						|
    assert html_text.startswith("<p>")
 | 
						|
 | 
						|
 | 
						|
def test_convert_to_file_raises_if_pandoc_not_available():
 | 
						|
    filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
 | 
						|
    with patch.object(pypandoc, "convert_file", side_effect=FileNotFoundError):
 | 
						|
        with pytest.raises(FileNotFoundError):
 | 
						|
            convert_file_to_text(filename, source_format="epub", target_format="html")
 |