Replace python-magic with puremagic (#15990)

This commit is contained in:
Lin Manhui 2025-07-08 14:16:25 +08:00 committed by GitHub
parent fc97e5c2d8
commit b34d8fa796
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 18 additions and 30 deletions

View File

@ -85,7 +85,7 @@ To install `paddleocr-mcp` using pip:
```bash ```bash
# Install the wheel # Install the wheel
pip install https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.1.0/paddleocr_mcp-0.1.0-py3-none-any.whl pip install https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.2.0/paddleocr_mcp-0.2.0-py3-none-any.whl
# Or install from source # Or install from source
# git clone https://github.com/PaddlePaddle/PaddleOCR.git # git clone https://github.com/PaddlePaddle/PaddleOCR.git
@ -98,14 +98,7 @@ To verify successful installation:
paddleocr_mcp --help paddleocr_mcp --help
``` ```
If the help message is printed, the installation succeeded. This project depends on the `python-magic` library. If you see the following error: If the help message is printed, the installation succeeded.
```
...
ImportError: failed to find libmagic. Check your installation
```
You are likely missing a required native library for python-magic. Please refer to the [official python-magic documentation](https://github.com/ahupp/python-magic?tab=readme-ov-file#installation) for installation instructions.
## 2. Using with Claude for Desktop ## 2. Using with Claude for Desktop
@ -293,7 +286,7 @@ Currently, for both the AI Studio and self-hosted modes, starting the MCP server
"command": "uvx", "command": "uvx",
"args": [ "args": [
"--from", "--from",
"paddleocr-mcp@https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.1.0/paddleocr_mcp-0.1.0-py3-none-any.whl", "paddleocr-mcp@https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.2.0/paddleocr_mcp-0.2.0-py3-none-any.whl",
"paddleocr_mcp" "paddleocr_mcp"
], ],
"env": { "env": {

View File

@ -86,7 +86,7 @@
```bash ```bash
# 安装 wheel 包 # 安装 wheel 包
pip install https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.1.0/paddleocr_mcp-0.1.0-py3-none-any.whl pip install https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.2.0/paddleocr_mcp-0.2.0-py3-none-any.whl
# 或者,从项目源码安装 # 或者,从项目源码安装
# git clone https://github.com/PaddlePaddle/PaddleOCR.git # git clone https://github.com/PaddlePaddle/PaddleOCR.git
@ -99,14 +99,7 @@ pip install https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/pa
paddleocr_mcp --help paddleocr_mcp --help
``` ```
如果执行上述命令后打印出了帮助信息,则说明安装成功。本项目依赖 python-magic 库。如果在执行上述命令时出现如下错误提示: 如果执行上述命令后打印出了帮助信息,则说明安装成功。
```
...
ImportError: failed to find libmagic. Check your installation
```
很可能是因为缺少 python-magic 库所需的底层库。请参考 [python-magic 官方文档](https://github.com/ahupp/python-magic?tab=readme-ov-file#installation) 完成相应依赖库的安装。
## 2. 在 Claude for Desktop 中使用 ## 2. 在 Claude for Desktop 中使用
@ -290,7 +283,7 @@ ImportError: failed to find libmagic. Check your installation
"command": "uvx", "command": "uvx",
"args": [ "args": [
"--from", "--from",
"paddleocr-mcp@https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.1.0/paddleocr_mcp-0.1.0-py3-none-any.whl", "paddleocr-mcp@https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.2.0/paddleocr_mcp-0.2.0-py3-none-any.whl",
"paddleocr_mcp" "paddleocr_mcp"
], ],
"env": { "env": {

View File

@ -31,8 +31,8 @@ from typing import Any, Callable, Dict, List, NoReturn, Optional, Type, Union
from urllib.parse import urlparse from urllib.parse import urlparse
import httpx import httpx
import magic
import numpy as np import numpy as np
import puremagic
from fastmcp import Context, FastMCP from fastmcp import Context, FastMCP
from mcp.types import ImageContent, TextContent from mcp.types import ImageContent, TextContent
from PIL import Image as PILImage from PIL import Image as PILImage
@ -70,7 +70,7 @@ def _is_url(s: str) -> bool:
def _infer_file_type_from_bytes(data: bytes) -> Optional[str]: def _infer_file_type_from_bytes(data: bytes) -> Optional[str]:
mime = magic.from_buffer(data, mime=True) mime = puremagic.from_string(data, mime=True)
if mime.startswith("image/"): if mime.startswith("image/"):
return "image" return "image"
elif mime == "application/pdf": elif mime == "application/pdf":
@ -411,7 +411,7 @@ class SimpleInferencePipelineHandler(PipelineHandler):
image_arr = np.array(image_pil.convert("RGB")) image_arr = np.array(image_pil.convert("RGB"))
return np.ascontiguousarray(image_arr[..., ::-1]) return np.ascontiguousarray(image_arr[..., ::-1])
except Exception as e: except Exception as e:
raise ValueError(f"Failed to decode Base64 image: {e}") raise ValueError(f"Failed to decode Base64 image: {str(e)}") from e
elif _is_file_path(input_data) or _is_url(input_data): elif _is_file_path(input_data) or _is_url(input_data):
return input_data return input_data
else: else:
@ -439,11 +439,11 @@ class SimpleInferencePipelineHandler(PipelineHandler):
if file_type_str is None: if file_type_str is None:
raise ValueError( raise ValueError(
"Unsupported file type in Base64 data. " "Unsupported file type in Base64 data. "
"Only image files (JPEG, PNG, etc.) and PDF documents are supported." "Only images (JPEG, PNG, etc.) and PDF documents are supported."
) )
return input_data, file_type_str return input_data, file_type_str
except Exception as e: except Exception as e:
raise ValueError(f"Failed to decode Base64 data: {e}") raise ValueError(f"Failed to decode Base64 data: {str(e)}") from e
elif _is_file_path(input_data): elif _is_file_path(input_data):
try: try:
with open(input_data, "rb") as f: with open(input_data, "rb") as f:
@ -453,11 +453,11 @@ class SimpleInferencePipelineHandler(PipelineHandler):
if file_type_str is None: if file_type_str is None:
raise ValueError( raise ValueError(
f"Unsupported file type for '{input_data}'. " f"Unsupported file type for '{input_data}'. "
"Only image files (JPEG, PNG, etc.) and PDF documents are supported." "Only images (JPEG, PNG, etc.) and PDF documents are supported."
) )
return input_data, file_type_str return input_data, file_type_str
except Exception as e: except Exception as e:
raise ValueError(f"Failed to read file: {e}") raise ValueError(f"Failed to read file: {str(e)}") from e
else: else:
raise ValueError("Invalid input data format") raise ValueError("Invalid input data format")
@ -776,7 +776,9 @@ class PPStructureV3Handler(SimpleInferencePipelineHandler):
img_bytes = response.content img_bytes = response.content
return base64.b64encode(img_bytes).decode("ascii") return base64.b64encode(img_bytes).decode("ascii")
except Exception as e: except Exception as e:
await ctx.error(f"Failed to download image from URL {img_data}: {e}") await ctx.error(
f"Failed to download image from URL {img_data}: {str(e)}"
)
return img_data return img_data
elif _is_base64(img_data): elif _is_base64(img_data):
return img_data return img_data

View File

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "paddleocr_mcp" name = "paddleocr_mcp"
version = "0.1.0" version = "0.2.0"
requires-python = ">=3.10" requires-python = ">=3.10"
dependencies = [ dependencies = [
"mcp>=1.5.0", "mcp>=1.5.0",
@ -12,7 +12,7 @@ dependencies = [
"httpx>=0.24.0", "httpx>=0.24.0",
"numpy>=1.24.0", "numpy>=1.24.0",
"pillow>=9.0.0", "pillow>=9.0.0",
"python-magic>=0.4.24", "puremagic>=1.30.0",
"typing-extensions>=4.0.0", "typing-extensions>=4.0.0",
] ]