Chore: Make linter happy (#1256)

* refactor: remove unused imports

* fix: replace NotImplemented with NotImplementedError

* refactor: resolve E722 (do not use bare 'except')

* refactor: remove unused variable

* refactor: remove unused imports

* refactor: ignore unused imports that will be used in the future

* refactor: resolve W293 (blank line contains whitespace)

* refactor: resolve F541 (f-string is missing placeholders)

---------

Co-authored-by: afourney <adamfo@microsoft.com>
This commit is contained in:
t3tra 2025-05-22 02:02:16 +09:00 committed by GitHub
parent 39e7252940
commit cb421cf9ea
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 25 additions and 69 deletions

View File

@ -1,5 +1,4 @@
import sys
from typing import Any
from mcp.server.fastmcp import FastMCP
from starlette.applications import Starlette
from mcp.server.sse import SseServerTransport

View File

@ -1,6 +1,5 @@
#!/usr/bin/env python3 -m pytest
import os
import pytest
from markitdown import MarkItDown, StreamInfo
from markitdown_sample_plugin import RtfConverter

View File

@ -4,7 +4,6 @@
import argparse
import sys
import codecs
import locale
from textwrap import dedent
from importlib.metadata import entry_points
from .__about__ import __version__
@ -34,13 +33,13 @@ def main():
OR
markitdown < example.pdf
OR to save to a file use
markitdown example.pdf -o example.md
OR
markitdown example.pdf > example.md
"""
).strip(),

View File

@ -1,7 +1,4 @@
import os
import tempfile
from warnings import warn
from typing import Any, Union, BinaryIO, Optional, List
from typing import Any, BinaryIO, Optional
from ._stream_info import StreamInfo

View File

@ -1,11 +1,8 @@
import copy
import mimetypes
import os
import re
import sys
import shutil
import tempfile
import warnings
import traceback
import io
from dataclasses import dataclass
@ -547,7 +544,7 @@ class MarkItDown:
# Sanity check -- make sure the cur_pos is still the same
assert (
cur_pos == file_stream.tell()
), f"File stream position should NOT change between guess iterations"
), "File stream position should NOT change between guess iterations"
_kwargs = {k: v for k, v in kwargs.items()}
@ -614,7 +611,7 @@ class MarkItDown:
# Nothing can handle it!
raise UnsupportedFormatException(
f"Could not convert stream to Markdown. No converter attempted a conversion, suggesting that the filetype is simply not supported."
"Could not convert stream to Markdown. No converter attempted a conversion, suggesting that the filetype is simply not supported."
)
def register_page_converter(self, converter: DocumentConverter) -> None:

View File

@ -272,7 +272,7 @@ class oMath2Latex(Tag2Method):
if FUNC.get(t):
latex_chars.append(FUNC[t])
else:
raise NotImplemented("Not support func %s" % t)
raise NotImplementedError("Not support func %s" % t)
else:
latex_chars.append(t)
t = BLANK.join(latex_chars)
@ -316,7 +316,7 @@ class oMath2Latex(Tag2Method):
t_dict = self.process_children_dict(elm, include=("e", "lim"))
latex_s = LIM_FUNC.get(t_dict["e"])
if not latex_s:
raise NotImplemented("Not support lim %s" % t_dict["e"])
raise NotImplementedError("Not support lim %s" % t_dict["e"])
else:
return latex_s.format(lim=t_dict.get("lim"))

View File

@ -147,7 +147,7 @@ def pre_process_docx(input_docx: BinaryIO) -> BinaryIO:
updated_content = _pre_process_math(content)
# In the future, if there are more pre-processing steps, they can be added here
zip_output.writestr(name, updated_content)
except:
except Exception:
# If there is an error in processing the content, write the original content
zip_output.writestr(name, content)
else:

View File

@ -1,5 +1,4 @@
import io
from typing import Any, BinaryIO, Optional
from typing import Any, BinaryIO
from ._exiftool import exiftool_metadata
from ._transcribe_audio import transcribe_audio

View File

@ -1,9 +1,8 @@
import io
import re
import base64
import binascii
from urllib.parse import parse_qs, urlparse
from typing import Any, BinaryIO, Optional
from typing import Any, BinaryIO
from bs4 import BeautifulSoup
from .._base_converter import DocumentConverter, DocumentConverterResult

View File

@ -1,9 +1,7 @@
import sys
import csv
import io
from typing import BinaryIO, Any
from charset_normalizer import from_bytes
from ._html_converter import HtmlConverter
from .._base_converter import DocumentConverter, DocumentConverterResult
from .._stream_info import StreamInfo

View File

@ -1,13 +1,12 @@
import sys
import re
import os
from typing import BinaryIO, Any, List, Optional, Union
from typing import BinaryIO, Any, List
from enum import Enum
from ._html_converter import HtmlConverter
from .._base_converter import DocumentConverter, DocumentConverterResult
from .._stream_info import StreamInfo
from .._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE
from .._exceptions import MissingDependencyException
# Try loading optional (but in this case, required) dependencies
# Save reporting of any exceptions for later

View File

@ -4,7 +4,7 @@ from typing import BinaryIO, Any
from ._html_converter import HtmlConverter
from ..converter_utils.docx.pre_process import pre_process_docx
from .._base_converter import DocumentConverter, DocumentConverterResult
from .._base_converter import DocumentConverterResult
from .._stream_info import StreamInfo
from .._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE

View File

@ -6,7 +6,7 @@ from xml.dom.minidom import Document
from typing import BinaryIO, Any, Dict, List
from ._html_converter import HtmlConverter
from .._base_converter import DocumentConverter, DocumentConverterResult
from .._base_converter import DocumentConverterResult
from .._stream_info import StreamInfo
ACCEPTED_MIME_TYPE_PREFIXES = [

View File

@ -1,10 +1,6 @@
import json
import subprocess
import locale
import sys
import shutil
import os
import warnings
from typing import BinaryIO, Any, Union

View File

@ -50,8 +50,6 @@ class IpynbConverter(DocumentConverter):
**kwargs: Any, # Options to pass to the converter
) -> DocumentConverterResult:
# Parse and convert the notebook
result = None
encoding = stream_info.charset or "utf-8"
notebook_content = file_stream.read().decode(encoding=encoding)
return self._convert(json.loads(notebook_content))

View File

@ -1,4 +1,4 @@
from typing import BinaryIO, Any, Union
from typing import BinaryIO, Union
import base64
import mimetypes
from .._stream_info import StreamInfo

View File

@ -4,7 +4,6 @@ import io
from typing import BinaryIO, Any
from ._html_converter import HtmlConverter
from .._base_converter import DocumentConverter, DocumentConverterResult
from .._stream_info import StreamInfo
from .._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE

View File

@ -9,7 +9,7 @@ from .._stream_info import StreamInfo
# Save reporting of any exceptions for later
_dependency_exc_info = None
try:
import mammoth
import mammoth # noqa: F401
except ImportError:
# Preserve the error and stack trace for later
_dependency_exc_info = sys.exc_info()

View File

@ -1,7 +1,6 @@
import io
import re
import bs4
from typing import Any, BinaryIO, Optional
from typing import Any, BinaryIO
from .._base_converter import DocumentConverter, DocumentConverterResult
from .._stream_info import StreamInfo

View File

@ -10,14 +10,14 @@ from .._stream_info import StreamInfo
_xlsx_dependency_exc_info = None
try:
import pandas as pd
import openpyxl
import openpyxl # noqa: F401
except ImportError:
_xlsx_dependency_exc_info = sys.exc_info()
_xls_dependency_exc_info = None
try:
import pandas as pd
import xlrd
import pandas as pd # noqa: F811
import xlrd # noqa: F401
except ImportError:
_xls_dependency_exc_info = sys.exc_info()

View File

@ -1,10 +1,8 @@
import sys
import json
import time
import io
import re
import bs4
from typing import Any, BinaryIO, Optional, Dict, List, Union
from typing import Any, BinaryIO, Dict, List, Union
from urllib.parse import parse_qs, urlparse, unquote
from .._base_converter import DocumentConverter, DocumentConverterResult

View File

@ -1,4 +1,3 @@
import sys
import zipfile
import io
import os

View File

@ -1,6 +1,5 @@
#!/usr/bin/env python3 -m pytest
import subprocess
import pytest
from markitdown import __version__
# This file contains CLI tests that are not directly tested by the FileTestVectors.
@ -24,8 +23,8 @@ def test_invalid_flag() -> None:
assert result.returncode != 0, f"CLI exited with error: {result.stderr}"
assert (
"unrecognized arguments" in result.stderr
), f"Expected 'unrecognized arguments' to appear in STDERR"
assert "SYNTAX" in result.stderr, f"Expected 'SYNTAX' to appear in STDERR"
), "Expected 'unrecognized arguments' to appear in STDERR"
assert "SYNTAX" in result.stderr, "Expected 'SYNTAX' to appear in STDERR"
if __name__ == "__main__":

View File

@ -19,13 +19,6 @@ else:
FileTestVector,
)
from markitdown import (
MarkItDown,
UnsupportedFormatException,
FileConversionException,
StreamInfo,
)
skip_remote = (
True if os.environ.get("GITHUB_ACTIONS") else False
) # Don't run these tests in CI
@ -140,8 +133,6 @@ def test_convert_url(shared_tmp_dir, test_vector):
"""Test the conversion of a stream with no stream info."""
# Note: tmp_dir is not used here, but is needed to match the signature
markitdown = MarkItDown()
time.sleep(1) # Ensure we don't hit rate limits
result = subprocess.run(
["python", "-m", "markitdown", TEST_FILES_URL + "/" + test_vector.filename],
@ -191,7 +182,6 @@ def test_output_to_file_with_data_uris(shared_tmp_dir, test_vector) -> None:
if __name__ == "__main__":
import sys
import tempfile
"""Runs this file's tests from the command line."""

View File

@ -3,7 +3,6 @@ import io
import os
import re
import shutil
import openai
import pytest
from markitdown._uri_utils import parse_data_uri, file_uri_to_path
@ -253,8 +252,6 @@ def test_file_uris() -> None:
def test_docx_comments() -> None:
markitdown = MarkItDown()
# Test DOCX processing, with comments and setting style_map on init
markitdown_with_style_map = MarkItDown(style_map="comment-reference => ")
result = markitdown_with_style_map.convert(

View File

@ -2,7 +2,6 @@
import os
import time
import pytest
import codecs
import base64
from pathlib import Path
@ -14,8 +13,6 @@ else:
from markitdown import (
MarkItDown,
UnsupportedFormatException,
FileConversionException,
StreamInfo,
)
@ -203,8 +200,6 @@ def test_convert_stream_keep_data_uris(test_vector):
if __name__ == "__main__":
import sys
"""Runs this file's tests from the command line."""
# General tests