haystack/test/components/converters/test_msg_to_document.py
Sebastian Husch Lee 99a998f90b
feat: Add MSGToDocument converter (#8868)
* Initial commit of MSG converter from Bijay

* Updates to the MSG converter

* Add license header

* Add tests for msg converter

* Update converter

* Expanding tests

* Update docstrings

* add license header

* Add reno

* Add to inits and pydocs

* Add test for empty input

* Fix types

* Fix mypy

---------

Co-authored-by: Bijay Gurung <bijay.learning@gmail.com>
2025-02-24 08:12:32 +01:00

39 lines
1.7 KiB
Python

# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
from haystack.components.converters.msg import MSGToDocument
class TestMSGToDocument:
def test_run(self, test_files_path):
converter = MSGToDocument(store_full_path=True)
paths = [test_files_path / "msg" / "sample.msg"]
result = converter.run(sources=paths, meta={"date_added": "2021-09-01T00:00:00"})
assert len(result["documents"]) == 1
assert result["documents"][0].content.startswith('From: "Sebastian Lee"')
assert result["documents"][0].meta == {
"date_added": "2021-09-01T00:00:00",
"file_path": str(test_files_path / "msg" / "sample.msg"),
}
assert len(result["attachments"]) == 1
assert result["attachments"][0].mime_type == "application/pdf"
assert result["attachments"][0].meta == {
"date_added": "2021-09-01T00:00:00",
"parent_file_path": str(test_files_path / "msg" / "sample.msg"),
"file_path": "sample_pdf_1.pdf",
}
def test_run_wrong_file_type(self, test_files_path, caplog):
converter = MSGToDocument(store_full_path=False)
paths = [test_files_path / "pdf" / "sample_pdf_1.pdf"]
result = converter.run(sources=paths, meta={"date_added": "2021-09-01T00:00:00"})
assert len(result["documents"]) == 0
assert "msg_file is not an Outlook MSG file" in caplog.text
def test_run_empty_sources(self, test_files_path):
converter = MSGToDocument(store_full_path=False)
result = converter.run(sources=[])
assert len(result["documents"]) == 0
assert len(result["attachments"]) == 0