unstructured/pyproject.toml
Steve Canny e4158deaff
fix(msg): use python-oxmsg for MSG email parsing (#3142)
**Summary**
`partition_msg()` previously used the `msg_parser` library for parsing
Outlook MSG email files (.msg files). The `msg_parser` library is
unmaintained and has several major shortcomings such as not being able
to parse MSG files with 8-bit encoded strings and not reliably
extracting attachments.

Use the new and permissively licenced `python-oxmsg` library instead.

**Additional Context**
For reviewability purposes, this PR temporarily places the new
`partition_msg()` implementation in `new_msg.py` and references that
implementation from `msg.py`. `new_msg.py` will be renamed to `msg.py`
in a closely following PR. This avoids a very messy interleaving of
hunks in a diff between the old and re-written `partition_msg()`
implementation.

Fixes #2481 
Fixes #3006
2024-06-05 21:12:27 +00:00

40 lines
1.6 KiB
TOML

[tool.black]
line-length = 100
[tool.pyright]
pythonPlatform = "Linux"
pythonVersion = "3.9"
reportUnnecessaryCast = true
reportUnnecessaryTypeIgnoreComment = true
stubPath = "./typings"
typeCheckingMode = "strict"
verboseOutput = true
[tool.ruff]
line-length = 100
# -- changes made here should also be made in `.pre-commit-config.yaml` and `Makefile` --
lint.select = [
"C4", # -- flake8-comprehensions --
"COM", # -- flake8-commas --
"E", # -- pycodestyle errors --
"F", # -- pyflakes --
"I", # -- isort (imports) --
"PLR0402", # -- Name compared with itself like `foo == foo` --
"PT", # -- flake8-pytest-style --
"SIM", # -- flake8-simplify --
"UP015", # -- redundant `open()` mode parameter (like "r" is default) --
"UP018", # -- Unnecessary {literal_type} call like `str("abc")`. (rewrite as a literal) --
"UP032", # -- Use f-string instead of `.format()` call --
"UP034", # -- Avoid extraneous parentheses --
"W", # -- Warnings, including invalid escape-sequence --
]
lint.ignore = [
"COM812", # -- over aggressively insists on trailing commas where not desireable --
"PT001", # -- wants empty parens on @pytest.fixture where not used (essentially always) --
"PT005", # -- flags mock fixtures with names intentionally matching private method name --
"PT011", # -- pytest.raises({exc}) too broad, use match param or more specific exception --
"PT012", # -- pytest.raises() block should contain a single simple statement --
"SIM117", # -- merge `with` statements for context managers that have same scope --
]