mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-03 11:34:07 +00:00
test: Increase coverage (#181)
This commit is contained in:
parent
f36e514c6d
commit
243bf7ed5e
@ -132,3 +132,15 @@ def test_partition_pdf(url, api_called, local_called):
|
|||||||
pdf.partition_pdf(filename="fake.pdf", url=url)
|
pdf.partition_pdf(filename="fake.pdf", url=url)
|
||||||
assert pdf._partition_via_api.called == api_called
|
assert pdf._partition_via_api.called == api_called
|
||||||
assert pdf._partition_pdf_or_image_local.called == local_called
|
assert pdf._partition_pdf_or_image_local.called == local_called
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"url, api_called, local_called", [("fakeurl", True, False), (None, False, True)]
|
||||||
|
)
|
||||||
|
def test_partition_pdf_with_template(url, api_called, local_called):
|
||||||
|
with mock.patch.object(
|
||||||
|
pdf, attribute="_partition_via_api", new=mock.MagicMock()
|
||||||
|
), mock.patch.object(pdf, "_partition_pdf_or_image_local", mock.MagicMock()):
|
||||||
|
pdf.partition_pdf(filename="fake.pdf", url=url, template="checkbox")
|
||||||
|
assert pdf._partition_via_api.called == api_called
|
||||||
|
assert pdf._partition_pdf_or_image_local.called == local_called
|
||||||
|
|||||||
@ -1,7 +1,8 @@
|
|||||||
|
import hashlib
|
||||||
from abc import ABC
|
from abc import ABC
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import hashlib
|
|
||||||
from typing import Callable, List, Union
|
from typing import Callable, List, Union
|
||||||
|
|
||||||
from unstructured.documents.elements import Element, Text, NoID
|
from unstructured.documents.elements import Element, Text, NoID
|
||||||
|
|
||||||
|
|
||||||
@ -66,10 +67,7 @@ class Name(EmailElement):
|
|||||||
cleaned_text = cleaner(cleaned_text)
|
cleaned_text = cleaner(cleaned_text)
|
||||||
cleaned_name = cleaner(cleaned_name)
|
cleaned_name = cleaner(cleaned_name)
|
||||||
|
|
||||||
if not isinstance(cleaned_text, str):
|
if not isinstance(cleaned_text, str) or not isinstance(cleaned_name, str):
|
||||||
raise ValueError("Cleaner produced a non-string output.")
|
|
||||||
|
|
||||||
if not isinstance(cleaned_name, str):
|
|
||||||
raise ValueError("Cleaner produced a non-string output.")
|
raise ValueError("Cleaner produced a non-string output.")
|
||||||
|
|
||||||
self.text = cleaned_text
|
self.text = cleaned_text
|
||||||
@ -133,39 +131,3 @@ class Attachment(Name):
|
|||||||
category = "Attachment"
|
category = "Attachment"
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Email(ABC):
|
|
||||||
"""An email class with it's attributes"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.recipient = Recipient
|
|
||||||
self.sender = Sender
|
|
||||||
self.subject = Subject
|
|
||||||
self.body = BodyText
|
|
||||||
self.received_info: ReceivedInfo
|
|
||||||
self.meta_data: MetaData
|
|
||||||
self.attachment: List[Attachment]
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return f"""
|
|
||||||
Recipient: {self.recipient}
|
|
||||||
Sender: {self.sender}
|
|
||||||
Subject: {self.subject}
|
|
||||||
|
|
||||||
Received Header Information:
|
|
||||||
|
|
||||||
{self.received_info}
|
|
||||||
|
|
||||||
Meta Data From Header:
|
|
||||||
|
|
||||||
{self.meta_data}
|
|
||||||
|
|
||||||
Body:
|
|
||||||
|
|
||||||
{self.body}
|
|
||||||
|
|
||||||
Attachment:
|
|
||||||
|
|
||||||
{[file.name for file in self.attachment]}
|
|
||||||
"""
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user