Replace python-magic with puremagic (#15990)

This commit is contained in:
Lin Manhui 2025-07-08 14:16:25 +08:00 committed by GitHub
parent fc97e5c2d8
commit b34d8fa796
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 18 additions and 30 deletions

View File

@ -85,7 +85,7 @@ To install `paddleocr-mcp` using pip:
```bash
# Install the wheel
pip install https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.1.0/paddleocr_mcp-0.1.0-py3-none-any.whl
pip install https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.2.0/paddleocr_mcp-0.2.0-py3-none-any.whl
# Or install from source
# git clone https://github.com/PaddlePaddle/PaddleOCR.git
@ -98,14 +98,7 @@ To verify successful installation:
paddleocr_mcp --help
```
If the help message is printed, the installation succeeded. This project depends on the `python-magic` library. If you see the following error:
```
...
ImportError: failed to find libmagic. Check your installation
```
You are likely missing a required native library for python-magic. Please refer to the [official python-magic documentation](https://github.com/ahupp/python-magic?tab=readme-ov-file#installation) for installation instructions.
If the help message is printed, the installation succeeded.
## 2. Using with Claude for Desktop
@ -293,7 +286,7 @@ Currently, for both the AI Studio and self-hosted modes, starting the MCP server
"command": "uvx",
"args": [
"--from",
"paddleocr-mcp@https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.1.0/paddleocr_mcp-0.1.0-py3-none-any.whl",
"paddleocr-mcp@https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.2.0/paddleocr_mcp-0.2.0-py3-none-any.whl",
"paddleocr_mcp"
],
"env": {

View File

@ -86,7 +86,7 @@
```bash
# 安装 wheel 包
pip install https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.1.0/paddleocr_mcp-0.1.0-py3-none-any.whl
pip install https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.2.0/paddleocr_mcp-0.2.0-py3-none-any.whl
# 或者,从项目源码安装
# git clone https://github.com/PaddlePaddle/PaddleOCR.git
@ -99,14 +99,7 @@ pip install https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/pa
paddleocr_mcp --help
```
如果执行上述命令后打印出了帮助信息,则说明安装成功。本项目依赖 python-magic 库。如果在执行上述命令时出现如下错误提示:
```
...
ImportError: failed to find libmagic. Check your installation
```
很可能是因为缺少 python-magic 库所需的底层库。请参考 [python-magic 官方文档](https://github.com/ahupp/python-magic?tab=readme-ov-file#installation) 完成相应依赖库的安装。
如果执行上述命令后打印出了帮助信息,则说明安装成功。
## 2. 在 Claude for Desktop 中使用
@ -290,7 +283,7 @@ ImportError: failed to find libmagic. Check your installation
"command": "uvx",
"args": [
"--from",
"paddleocr-mcp@https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.1.0/paddleocr_mcp-0.1.0-py3-none-any.whl",
"paddleocr-mcp@https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/mcp/paddleocr_mcp/releases/v0.2.0/paddleocr_mcp-0.2.0-py3-none-any.whl",
"paddleocr_mcp"
],
"env": {

View File

@ -31,8 +31,8 @@ from typing import Any, Callable, Dict, List, NoReturn, Optional, Type, Union
from urllib.parse import urlparse
import httpx
import magic
import numpy as np
import puremagic
from fastmcp import Context, FastMCP
from mcp.types import ImageContent, TextContent
from PIL import Image as PILImage
@ -70,7 +70,7 @@ def _is_url(s: str) -> bool:
def _infer_file_type_from_bytes(data: bytes) -> Optional[str]:
mime = magic.from_buffer(data, mime=True)
mime = puremagic.from_string(data, mime=True)
if mime.startswith("image/"):
return "image"
elif mime == "application/pdf":
@ -411,7 +411,7 @@ class SimpleInferencePipelineHandler(PipelineHandler):
image_arr = np.array(image_pil.convert("RGB"))
return np.ascontiguousarray(image_arr[..., ::-1])
except Exception as e:
raise ValueError(f"Failed to decode Base64 image: {e}")
raise ValueError(f"Failed to decode Base64 image: {str(e)}") from e
elif _is_file_path(input_data) or _is_url(input_data):
return input_data
else:
@ -439,11 +439,11 @@ class SimpleInferencePipelineHandler(PipelineHandler):
if file_type_str is None:
raise ValueError(
"Unsupported file type in Base64 data. "
"Only image files (JPEG, PNG, etc.) and PDF documents are supported."
"Only images (JPEG, PNG, etc.) and PDF documents are supported."
)
return input_data, file_type_str
except Exception as e:
raise ValueError(f"Failed to decode Base64 data: {e}")
raise ValueError(f"Failed to decode Base64 data: {str(e)}") from e
elif _is_file_path(input_data):
try:
with open(input_data, "rb") as f:
@ -453,11 +453,11 @@ class SimpleInferencePipelineHandler(PipelineHandler):
if file_type_str is None:
raise ValueError(
f"Unsupported file type for '{input_data}'. "
"Only image files (JPEG, PNG, etc.) and PDF documents are supported."
"Only images (JPEG, PNG, etc.) and PDF documents are supported."
)
return input_data, file_type_str
except Exception as e:
raise ValueError(f"Failed to read file: {e}")
raise ValueError(f"Failed to read file: {str(e)}") from e
else:
raise ValueError("Invalid input data format")
@ -776,7 +776,9 @@ class PPStructureV3Handler(SimpleInferencePipelineHandler):
img_bytes = response.content
return base64.b64encode(img_bytes).decode("ascii")
except Exception as e:
await ctx.error(f"Failed to download image from URL {img_data}: {e}")
await ctx.error(
f"Failed to download image from URL {img_data}: {str(e)}"
)
return img_data
elif _is_base64(img_data):
return img_data

View File

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "paddleocr_mcp"
version = "0.1.0"
version = "0.2.0"
requires-python = ">=3.10"
dependencies = [
"mcp>=1.5.0",
@ -12,7 +12,7 @@ dependencies = [
"httpx>=0.24.0",
"numpy>=1.24.0",
"pillow>=9.0.0",
"python-magic>=0.4.24",
"puremagic>=1.30.0",
"typing-extensions>=4.0.0",
]