Update PP-OCRv4 supported languages and default models (#15561)

Co-authored-by: Sam-gsj <Sam-gsj@users.noreply.github.com>
This commit is contained in:
Lin Manhui 2025-06-04 13:56:15 +08:00 committed by GitHub
parent 3532870f8b
commit fa621efe0e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 16 additions and 1 deletions

View File

@ -301,7 +301,10 @@ class PaddleOCR(PaddleXPipelineWrapper):
ppocr_version = "PP-OCRv5"
if ppocr_version == "PP-OCRv5":
return "PP-OCRv5_mobile_det", "PP-OCRv5_mobile_rec"
if lang in ("ch", "chinese_cht", "en", "japan"):
return "PP-OCRv5_server_det", "PP-OCRv5_server_rec"
else:
return None, None
elif ppocr_version == "PP-OCRv4":
if lang == "ch":
return "PP-OCRv4_mobile_det", "PP-OCRv4_mobile_rec"

View File

@ -71,6 +71,18 @@ def test_predict_params(
def test_lang_and_ocr_version():
ocr_engine = PaddleOCR(lang="ch", ocr_version="PP-OCRv5")
assert ocr_engine._params["text_detection_model_name"] == "PP-OCRv5_server_det"
assert ocr_engine._params["text_recognition_model_name"] == "PP-OCRv5_server_rec"
ocr_engine = PaddleOCR(lang="chinese_cht", ocr_version="PP-OCRv5")
assert ocr_engine._params["text_detection_model_name"] == "PP-OCRv5_server_det"
assert ocr_engine._params["text_recognition_model_name"] == "PP-OCRv5_server_rec"
ocr_engine = PaddleOCR(lang="en", ocr_version="PP-OCRv5")
assert ocr_engine._params["text_detection_model_name"] == "PP-OCRv5_server_det"
assert ocr_engine._params["text_recognition_model_name"] == "PP-OCRv5_server_rec"
ocr_engine = PaddleOCR(lang="japan", ocr_version="PP-OCRv5")
assert ocr_engine._params["text_detection_model_name"] == "PP-OCRv5_server_det"
assert ocr_engine._params["text_recognition_model_name"] == "PP-OCRv5_server_rec"
ocr_engine = PaddleOCR(lang="ch", ocr_version="PP-OCRv4")
assert ocr_engine._params["text_detection_model_name"] == "PP-OCRv4_mobile_det"
assert ocr_engine._params["text_recognition_model_name"] == "PP-OCRv4_mobile_rec"