2023-08-17 10:34:08 -07:00
|
|
|
from dataclasses import dataclass
|
|
|
|
|
|
|
|
from unstructured.ingest.interfaces import BaseIngestDoc, IngestDocSessionHandleMixin
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class IngestDocWithSessionHandle(IngestDocSessionHandleMixin, BaseIngestDoc):
|
|
|
|
pass
|
|
|
|
|
2023-09-11 11:40:56 -04:00
|
|
|
|
2023-08-17 10:34:08 -07:00
|
|
|
def test_process_document_with_session_handle(mocker):
|
|
|
|
"""Test that the process_document function calls the doc_processor_fn with the correct
|
|
|
|
arguments, assigns the session handle, and returns the correct results."""
|
2023-08-30 18:48:41 -07:00
|
|
|
mock_doc = mocker.MagicMock(spec=(IngestDocWithSessionHandle))
|
2023-09-11 11:40:56 -04:00
|
|
|
mocker.patch(
|
|
|
|
"unstructured.ingest.connector.registry.create_ingest_doc_from_json",
|
|
|
|
return_value=mock_doc,
|
|
|
|
)
|
2023-08-17 10:34:08 -07:00
|
|
|
mock_session_handle = mocker.MagicMock()
|
2023-09-11 11:40:56 -04:00
|
|
|
mocker.patch(
|
|
|
|
"unstructured.ingest.doc_processor.generalized.session_handle",
|
|
|
|
mock_session_handle,
|
|
|
|
)
|
2023-08-17 10:34:08 -07:00
|
|
|
|
2023-08-30 18:48:41 -07:00
|
|
|
# import here to account for the patching above
|
|
|
|
from unstructured.ingest.doc_processor.generalized import process_document
|
2023-09-11 11:40:56 -04:00
|
|
|
|
2023-08-30 18:48:41 -07:00
|
|
|
result = process_document(mocker.MagicMock())
|
2023-09-11 11:40:56 -04:00
|
|
|
|
2023-08-17 10:34:08 -07:00
|
|
|
mock_doc.get_file.assert_called_once_with()
|
|
|
|
mock_doc.write_result.assert_called_with()
|
|
|
|
mock_doc.cleanup_file.assert_called_once_with()
|
2023-09-11 11:40:56 -04:00
|
|
|
assert result == mock_doc.process_file.return_value
|
2023-08-17 10:34:08 -07:00
|
|
|
assert mock_doc.session_handle == mock_session_handle
|
|
|
|
|
|
|
|
|
|
|
|
def test_process_document_no_session_handle(mocker):
|
|
|
|
"""Test that the process_document function calls does not assign session handle the IngestDoc
|
|
|
|
does not have the session handle mixin."""
|
|
|
|
mock_doc = mocker.MagicMock(spec=(BaseIngestDoc))
|
2023-09-11 11:40:56 -04:00
|
|
|
mocker.patch(
|
|
|
|
"unstructured.ingest.connector.registry.create_ingest_doc_from_json",
|
|
|
|
return_value=mock_doc,
|
|
|
|
)
|
2023-08-30 18:48:41 -07:00
|
|
|
mocker.patch("unstructured.ingest.doc_processor.generalized.session_handle", mocker.MagicMock())
|
2023-08-17 10:34:08 -07:00
|
|
|
|
2023-08-30 18:48:41 -07:00
|
|
|
# import here to account for the patching above
|
|
|
|
from unstructured.ingest.doc_processor.generalized import process_document
|
2023-09-11 11:40:56 -04:00
|
|
|
|
2023-08-17 10:34:08 -07:00
|
|
|
process_document(mock_doc)
|
|
|
|
|
|
|
|
assert not hasattr(mock_doc, "session_handle")
|