mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-24 17:41:15 +00:00

**Summary** `partition_msg()` previously used the `msg_parser` library for parsing Outlook MSG email files (.msg files). The `msg_parser` library is unmaintained and has several major shortcomings such as not being able to parse MSG files with 8-bit encoded strings and not reliably extracting attachments. Use the new and permissively licenced `python-oxmsg` library instead. **Additional Context** For reviewability purposes, this PR temporarily places the new `partition_msg()` implementation in `new_msg.py` and references that implementation from `msg.py`. `new_msg.py` will be renamed to `msg.py` in a closely following PR. This avoids a very messy interleaving of hunks in a diff between the old and re-written `partition_msg()` implementation. Fixes #2481 Fixes #3006
40 lines
1.6 KiB
TOML
40 lines
1.6 KiB
TOML
[tool.black]
|
|
line-length = 100
|
|
|
|
[tool.pyright]
|
|
pythonPlatform = "Linux"
|
|
pythonVersion = "3.9"
|
|
reportUnnecessaryCast = true
|
|
reportUnnecessaryTypeIgnoreComment = true
|
|
stubPath = "./typings"
|
|
typeCheckingMode = "strict"
|
|
verboseOutput = true
|
|
|
|
[tool.ruff]
|
|
line-length = 100
|
|
|
|
# -- changes made here should also be made in `.pre-commit-config.yaml` and `Makefile` --
|
|
lint.select = [
|
|
"C4", # -- flake8-comprehensions --
|
|
"COM", # -- flake8-commas --
|
|
"E", # -- pycodestyle errors --
|
|
"F", # -- pyflakes --
|
|
"I", # -- isort (imports) --
|
|
"PLR0402", # -- Name compared with itself like `foo == foo` --
|
|
"PT", # -- flake8-pytest-style --
|
|
"SIM", # -- flake8-simplify --
|
|
"UP015", # -- redundant `open()` mode parameter (like "r" is default) --
|
|
"UP018", # -- Unnecessary {literal_type} call like `str("abc")`. (rewrite as a literal) --
|
|
"UP032", # -- Use f-string instead of `.format()` call --
|
|
"UP034", # -- Avoid extraneous parentheses --
|
|
"W", # -- Warnings, including invalid escape-sequence --
|
|
]
|
|
lint.ignore = [
|
|
"COM812", # -- over aggressively insists on trailing commas where not desireable --
|
|
"PT001", # -- wants empty parens on @pytest.fixture where not used (essentially always) --
|
|
"PT005", # -- flags mock fixtures with names intentionally matching private method name --
|
|
"PT011", # -- pytest.raises({exc}) too broad, use match param or more specific exception --
|
|
"PT012", # -- pytest.raises() block should contain a single simple statement --
|
|
"SIM117", # -- merge `with` statements for context managers that have same scope --
|
|
]
|