Replace python-magic with puremagic (#15990)

2025-12-06 11:56:56 +00:00 · 2025-07-08 14:16:25 +08:00 · 2025-07-08 14:16:25 +08:00 · b34d8fa796
commit b34d8fa796
parent fc97e5c2d8
4 changed files with 18 additions and 30 deletions
--- a/docs/version3.x/deployment/mcp_server.en.md
+++ b/docs/version3.x/deployment/mcp_server.en.md
@ -85,7 +85,7 @@ To install `paddleocr-mcp` using pip:
 ```bash
 # Install the wheel
-pip install https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.1.0/paddleocr_mcp-0.1.0-py3-none-any.whl
+pip install https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.2.0/paddleocr_mcp-0.2.0-py3-none-any.whl
 # Or install from source
 # git clone https://github.com/PaddlePaddle/PaddleOCR.git
@ -98,14 +98,7 @@ To verify successful installation:
 paddleocr_mcp --help
 ```
-If the help message is printed, the installation succeeded. This project depends on the `python-magic` library. If you see the following error:
+If the help message is printed, the installation succeeded.
 ```
 ...
 ImportError: failed to find libmagic.  Check your installation
 ```
 You are likely missing a required native library for python-magic. Please refer to the [official python-magic documentation](https://github.com/ahupp/python-magic?tab=readme-ov-file#installation) for installation instructions.
 ## 2. Using with Claude for Desktop
@ -293,7 +286,7 @@ Currently, for both the AI Studio and self-hosted modes, starting the MCP server
          "command": "uvx",
          "args": [
            "--from",
-            "paddleocr-mcp@https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.1.0/paddleocr_mcp-0.1.0-py3-none-any.whl",
+            "paddleocr-mcp@https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.2.0/paddleocr_mcp-0.2.0-py3-none-any.whl",
            "paddleocr_mcp"
          ],
          "env": {
--- a/docs/version3.x/deployment/mcp_server.md
+++ b/docs/version3.x/deployment/mcp_server.md
@ -86,7 +86,7 @@
 ```bash
 # 安装 wheel 包
-pip install https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.1.0/paddleocr_mcp-0.1.0-py3-none-any.whl
+pip install https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.2.0/paddleocr_mcp-0.2.0-py3-none-any.whl
 # 或者，从项目源码安装
 # git clone https://github.com/PaddlePaddle/PaddleOCR.git
@ -99,14 +99,7 @@ pip install https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/pa
 paddleocr_mcp --help
 ```
-如果执行上述命令后打印出了帮助信息，则说明安装成功。本项目依赖 python-magic 库。如果在执行上述命令时出现如下错误提示：
+如果执行上述命令后打印出了帮助信息，则说明安装成功。
 ```
 ...
 ImportError: failed to find libmagic.  Check your installation
 ```
 很可能是因为缺少 python-magic 库所需的底层库。请参考 [python-magic 官方文档](https://github.com/ahupp/python-magic?tab=readme-ov-file#installation) 完成相应依赖库的安装。
 ## 2. 在 Claude for Desktop 中使用
@ -290,7 +283,7 @@ ImportError: failed to find libmagic.  Check your installation
          "command": "uvx",
          "args": [
            "--from",
-            "paddleocr-mcp@https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.1.0/paddleocr_mcp-0.1.0-py3-none-any.whl",
+            "paddleocr-mcp@https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.2.0/paddleocr_mcp-0.2.0-py3-none-any.whl",
            "paddleocr_mcp"
          ],
          "env": {
--- a/mcp_server/paddleocr_mcp/pipelines.py
+++ b/mcp_server/paddleocr_mcp/pipelines.py
@ -31,8 +31,8 @@ from typing import Any, Callable, Dict, List, NoReturn, Optional, Type, Union
 from urllib.parse import urlparse
 import httpx
 import magic
 import numpy as np
 import puremagic
 from fastmcp import Context, FastMCP
 from mcp.types import ImageContent, TextContent
 from PIL import Image as PILImage
@ -70,7 +70,7 @@ def _is_url(s: str) -> bool:
 def _infer_file_type_from_bytes(data: bytes) -> Optional[str]:
-    mime = magic.from_buffer(data, mime=True)
+    mime = puremagic.from_string(data, mime=True)
    if mime.startswith("image/"):
        return "image"
    elif mime == "application/pdf":
@ -411,7 +411,7 @@ class SimpleInferencePipelineHandler(PipelineHandler):
                image_arr = np.array(image_pil.convert("RGB"))
                return np.ascontiguousarray(image_arr[..., ::-1])
            except Exception as e:
-                raise ValueError(f"Failed to decode Base64 image: {e}")
+                raise ValueError(f"Failed to decode Base64 image: {str(e)}") from e
        elif _is_file_path(input_data) or _is_url(input_data):
            return input_data
        else:
@ -439,11 +439,11 @@ class SimpleInferencePipelineHandler(PipelineHandler):
                if file_type_str is None:
                    raise ValueError(
                        "Unsupported file type in Base64 data. "
-                        "Only image files (JPEG, PNG, etc.) and PDF documents are supported."
+                        "Only images (JPEG, PNG, etc.) and PDF documents are supported."
                    )
                return input_data, file_type_str
            except Exception as e:
-                raise ValueError(f"Failed to decode Base64 data: {e}")
+                raise ValueError(f"Failed to decode Base64 data: {str(e)}") from e
        elif _is_file_path(input_data):
            try:
                with open(input_data, "rb") as f:
@ -453,11 +453,11 @@ class SimpleInferencePipelineHandler(PipelineHandler):
                if file_type_str is None:
                    raise ValueError(
                        f"Unsupported file type for '{input_data}'. "
-                        "Only image files (JPEG, PNG, etc.) and PDF documents are supported."
+                        "Only images (JPEG, PNG, etc.) and PDF documents are supported."
                    )
                return input_data, file_type_str
            except Exception as e:
-                raise ValueError(f"Failed to read file: {e}")
+                raise ValueError(f"Failed to read file: {str(e)}") from e
        else:
            raise ValueError("Invalid input data format")
@ -776,7 +776,9 @@ class PPStructureV3Handler(SimpleInferencePipelineHandler):
                    img_bytes = response.content
                    return base64.b64encode(img_bytes).decode("ascii")
            except Exception as e:
-                await ctx.error(f"Failed to download image from URL {img_data}: {e}")
+                await ctx.error(
                    f"Failed to download image from URL {img_data}: {str(e)}"
                )
                return img_data
        elif _is_base64(img_data):
            return img_data
--- a/mcp_server/pyproject.toml
+++ b/mcp_server/pyproject.toml
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "paddleocr_mcp"
-version = "0.1.0"
+version = "0.2.0"
 requires-python = ">=3.10"
 dependencies = [
    "mcp>=1.5.0",
@ -12,7 +12,7 @@ dependencies = [
    "httpx>=0.24.0",
    "numpy>=1.24.0",
    "pillow>=9.0.0",
-    "python-magic>=0.4.24",
+    "puremagic>=1.30.0",
    "typing-extensions>=4.0.0",
 ]