PaddleOCR/paddleocr/_common_args.py

# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from paddlex.inference import PaddlePredictorOption
from paddlex.utils.device import get_default_device, parse_device

from ._constants import (
    DEFAULT_CPU_THREADS,
    DEFAULT_DEVICE,
    DEFAULT_ENABLE_MKLDNN,
    DEFAULT_MIN_SUBGRAPH_SIZE,
    DEFAULT_PRECISION,
    DEFAULT_USE_TENSORRT,
    SUPPORTED_PRECISION_LIST,
)
from ._utils.cli import str2bool


def parse_common_args(kwargs, *, default_enable_hpi):
    default_vals = {
        "device": DEFAULT_DEVICE,
        "enable_hpi": default_enable_hpi,
        "use_tensorrt": DEFAULT_USE_TENSORRT,
        "min_subgraph_size": DEFAULT_MIN_SUBGRAPH_SIZE,
        "precision": DEFAULT_PRECISION,
        "enable_mkldnn": DEFAULT_ENABLE_MKLDNN,
        "cpu_threads": DEFAULT_CPU_THREADS,
    }

    unknown_names = kwargs.keys() - default_vals.keys()
    for name in unknown_names:
        raise ValueError(f"Unknown argument: {name}")

    kwargs = {**default_vals, **kwargs}

    if kwargs["precision"] not in SUPPORTED_PRECISION_LIST:
        raise ValueError(
            f"Invalid precision: {kwargs['precision']}. Supported values are: {SUPPORTED_PRECISION_LIST}."
        )

    kwargs["use_pptrt"] = kwargs.pop("use_tensorrt")
    kwargs["pptrt_min_subgraph_size"] = kwargs.pop("min_subgraph_size")
    kwargs["pptrt_precision"] = kwargs.pop("precision")

    return kwargs


def prepare_common_init_args(model_name, common_args):
    device = common_args["device"]
    if device is None:
        device = get_default_device()
    device_type, _ = parse_device(device)

    init_kwargs = {"device": device}
    init_kwargs["use_hpip"] = common_args["enable_hpi"]

    pp_option = PaddlePredictorOption(model_name)
    if device_type == "gpu":
        if common_args["use_pptrt"]:
            if common_args["pptrt_precision"] == "fp32":
                pp_option.run_mode = "trt_fp32"
            else:
                assert common_args["pptrt_precision"] == "fp16", common_args[
                    "pptrt_precision"
                ]
                pp_option.run_mode = "trt_fp16"
    elif device_type == "cpu":
        enable_mkldnn = common_args["enable_mkldnn"]
        if enable_mkldnn:
            pp_option.run_mode = "mkldnn"
        else:
            pp_option.run_mode = "paddle"
    pp_option.cpu_threads = common_args["cpu_threads"]
    init_kwargs["pp_option"] = pp_option

    return init_kwargs


def add_common_cli_opts(parser, *, default_enable_hpi, allow_multiple_devices):
    if allow_multiple_devices:
        help_ = "Device(s) to use for inference, e.g., `cpu`, `gpu`, `npu`, `gpu:0`, `gpu:0,1`. If multiple devices are specified, inference will be performed in parallel. Note that parallel inference is not always supported. By default, GPU 0 will be used if available; otherwise, the CPU will be used."
    else:
        help_ = "Device to use for inference, e.g., `cpu`, `gpu`, `npu`, `gpu:0`. By default, GPU 0 will be used if available; otherwise, the CPU will be used."
    parser.add_argument(
        "--device",
        type=str,
        default=DEFAULT_DEVICE,
        help=help_,
    )
    parser.add_argument(
        "--enable_hpi",
        type=str2bool,
        default=default_enable_hpi,
        help="Enable the high performance inference.",
    )
    parser.add_argument(
        "--use_tensorrt",
        type=str2bool,
        default=DEFAULT_USE_TENSORRT,
        help="Whether to use the Paddle Inference TensorRT subgraph engine.",
    )
    parser.add_argument(
        "--min_subgraph_size",
        type=int,
        default=DEFAULT_MIN_SUBGRAPH_SIZE,
        help="Minimum subgraph size for TensorRT when using the Paddle Inference TensorRT subgraph engine.",
    )
    parser.add_argument(
        "--precision",
        type=str,
        default=DEFAULT_PRECISION,
        choices=SUPPORTED_PRECISION_LIST,
        help="Precision for TensorRT when using the Paddle Inference TensorRT subgraph engine.",
    )
    parser.add_argument(
        "--enable_mkldnn",
        type=str2bool,
        default=DEFAULT_ENABLE_MKLDNN,
        help="Enable oneDNN (formerly MKL-DNN) acceleration for inference.",
    )
    parser.add_argument(
        "--cpu_threads",
        type=int,
        default=DEFAULT_CPU_THREADS,
        help="Number of threads to use for inference on CPUs.",
    )
[Breaking][Feat] New PaddleOCR inference package (#15046) * Init new paddleocr * Remove unused dependency * Fix typos * Fix * Add doc understanding modules * Fix package finding * Normalize name * Fix setting bugs * Fix setting bug * Support single model inference * Add PP-ChatOCRv4-doc * Add pp_chatocrv4_doc tests * Enable MKL-DNN when available * add seal_text_detection modules * add layout_detection and table_cells_detection modules * add testing scripts * Fix desc * add text_image_unwarping and table_structure_recognition modules * add formula_recognition and doc_vlm modules * update formula_recognition default_model_name * add MKLDNN_BLOCKLIST * update MKLDNN log * add seal rec pipeline * fix sth * fix sth * add doc preprocessor pipeline * fix sth * add doc understanding * add table_rec_v2, ppstructurev3, formula_rec pipelines * move test files * forward kwargs to pipeline.predict * clean test files * Add missing kwargs * Fix typo * Fix typo * rerun CI * update mkldnn BLOCKLIST * update * update warning message * fix cli args * update PIPELINE_MKLDNN_BLOCKLIST * update of workflow * skip resource_intensive tests * update config * skip ppdocbee test_predict_params --------- Co-authored-by: zhangyue66 <zhangyue66@baidu.com> Co-authored-by: zhangzelun <zhangzelun@baidu.com> 2025-05-04 15:59:02 +08:00			`# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`

			`from paddlex.inference import PaddlePredictorOption`
			`from paddlex.utils.device import get_default_device, parse_device`

			`from ._constants import (`
			`DEFAULT_CPU_THREADS,`
			`DEFAULT_DEVICE,`
			`DEFAULT_ENABLE_MKLDNN,`
			`DEFAULT_MIN_SUBGRAPH_SIZE,`
			`DEFAULT_PRECISION,`
			`DEFAULT_USE_TENSORRT,`
			`SUPPORTED_PRECISION_LIST,`
			`)`
Add deployment docs and enhance CLI (#15117) * Add serving and hpi docs * Optimize CLI logging info * Update interface * Add on-device deployment and onnx model conversion docs * Enhance CLI * _gen->_iter * Fix CLI help message * Update table_recognition_v2 and PP-StructureV3 interfaces * Update installation doc * Update interface * Update interface * Add logging doc * Update default values --------- Co-authored-by: cuicheng01 <45199522+cuicheng01@users.noreply.github.com> 2025-05-19 03:01:27 +08:00			`from ._utils.cli import str2bool`
[Breaking][Feat] New PaddleOCR inference package (#15046) * Init new paddleocr * Remove unused dependency * Fix typos * Fix * Add doc understanding modules * Fix package finding * Normalize name * Fix setting bugs * Fix setting bug * Support single model inference * Add PP-ChatOCRv4-doc * Add pp_chatocrv4_doc tests * Enable MKL-DNN when available * add seal_text_detection modules * add layout_detection and table_cells_detection modules * add testing scripts * Fix desc * add text_image_unwarping and table_structure_recognition modules * add formula_recognition and doc_vlm modules * update formula_recognition default_model_name * add MKLDNN_BLOCKLIST * update MKLDNN log * add seal rec pipeline * fix sth * fix sth * add doc preprocessor pipeline * fix sth * add doc understanding * add table_rec_v2, ppstructurev3, formula_rec pipelines * move test files * forward kwargs to pipeline.predict * clean test files * Add missing kwargs * Fix typo * Fix typo * rerun CI * update mkldnn BLOCKLIST * update * update warning message * fix cli args * update PIPELINE_MKLDNN_BLOCKLIST * update of workflow * skip resource_intensive tests * update config * skip ppdocbee test_predict_params --------- Co-authored-by: zhangyue66 <zhangyue66@baidu.com> Co-authored-by: zhangzelun <zhangzelun@baidu.com> 2025-05-04 15:59:02 +08:00

			`def parse_common_args(kwargs, *, default_enable_hpi):`
			`default_vals = {`
			`"device": DEFAULT_DEVICE,`
			`"enable_hpi": default_enable_hpi,`
			`"use_tensorrt": DEFAULT_USE_TENSORRT,`
			`"min_subgraph_size": DEFAULT_MIN_SUBGRAPH_SIZE,`
			`"precision": DEFAULT_PRECISION,`
			`"enable_mkldnn": DEFAULT_ENABLE_MKLDNN,`
			`"cpu_threads": DEFAULT_CPU_THREADS,`
			`}`

			`unknown_names = kwargs.keys() - default_vals.keys()`
			`for name in unknown_names:`
			`raise ValueError(f"Unknown argument: {name}")`

			`kwargs = {default_vals, kwargs}`

			`if kwargs["precision"] not in SUPPORTED_PRECISION_LIST:`
			`raise ValueError(`
			`f"Invalid precision: {kwargs['precision']}. Supported values are: {SUPPORTED_PRECISION_LIST}."`
			`)`

			`kwargs["use_pptrt"] = kwargs.pop("use_tensorrt")`
			`kwargs["pptrt_min_subgraph_size"] = kwargs.pop("min_subgraph_size")`
			`kwargs["pptrt_precision"] = kwargs.pop("precision")`

			`return kwargs`


			`def prepare_common_init_args(model_name, common_args):`
			`device = common_args["device"]`
			`if device is None:`
			`device = get_default_device()`
			`device_type, _ = parse_device(device)`

			`init_kwargs = {"device": device}`
			`init_kwargs["use_hpip"] = common_args["enable_hpi"]`

			`pp_option = PaddlePredictorOption(model_name)`
			`if device_type == "gpu":`
			`if common_args["use_pptrt"]:`
			`if common_args["pptrt_precision"] == "fp32":`
			`pp_option.run_mode = "trt_fp32"`
			`else:`
			`assert common_args["pptrt_precision"] == "fp16", common_args[`
			`"pptrt_precision"`
			`]`
			`pp_option.run_mode = "trt_fp16"`
			`elif device_type == "cpu":`
			`enable_mkldnn = common_args["enable_mkldnn"]`
			`if enable_mkldnn:`
			`pp_option.run_mode = "mkldnn"`
[WIP][Feat] Accommodate PaddleX MKL-DNN new behavior (#15471) * Accommodate PaddleX MKL-DNN new behavior * Bump paddlex version * Use stderr 2025-06-01 18:27:52 +08:00			`else:`
			`pp_option.run_mode = "paddle"`
[Breaking][Feat] New PaddleOCR inference package (#15046) * Init new paddleocr * Remove unused dependency * Fix typos * Fix * Add doc understanding modules * Fix package finding * Normalize name * Fix setting bugs * Fix setting bug * Support single model inference * Add PP-ChatOCRv4-doc * Add pp_chatocrv4_doc tests * Enable MKL-DNN when available * add seal_text_detection modules * add layout_detection and table_cells_detection modules * add testing scripts * Fix desc * add text_image_unwarping and table_structure_recognition modules * add formula_recognition and doc_vlm modules * update formula_recognition default_model_name * add MKLDNN_BLOCKLIST * update MKLDNN log * add seal rec pipeline * fix sth * fix sth * add doc preprocessor pipeline * fix sth * add doc understanding * add table_rec_v2, ppstructurev3, formula_rec pipelines * move test files * forward kwargs to pipeline.predict * clean test files * Add missing kwargs * Fix typo * Fix typo * rerun CI * update mkldnn BLOCKLIST * update * update warning message * fix cli args * update PIPELINE_MKLDNN_BLOCKLIST * update of workflow * skip resource_intensive tests * update config * skip ppdocbee test_predict_params --------- Co-authored-by: zhangyue66 <zhangyue66@baidu.com> Co-authored-by: zhangzelun <zhangzelun@baidu.com> 2025-05-04 15:59:02 +08:00			`pp_option.cpu_threads = common_args["cpu_threads"]`
			`init_kwargs["pp_option"] = pp_option`

			`return init_kwargs`


Change CLI subparser name and argument order (#15113) * PP-StructureV3->pp_structurev3 * Change CLI args order * Polish * Change order: 2025-05-18 21:13:43 +08:00			`def add_common_cli_opts(parser, *, default_enable_hpi, allow_multiple_devices):`
			`if allow_multiple_devices:`
			help_ = "Device(s) to use for inference, e.g., `cpu`, `gpu`, `npu`, `gpu:0`, `gpu:0,1`. If multiple devices are specified, inference will be performed in parallel. Note that parallel inference is not always supported. By default, GPU 0 will be used if available; otherwise, the CPU will be used."
			`else:`
			help_ = "Device to use for inference, e.g., `cpu`, `gpu`, `npu`, `gpu:0`. By default, GPU 0 will be used if available; otherwise, the CPU will be used."
[Breaking][Feat] New PaddleOCR inference package (#15046) * Init new paddleocr * Remove unused dependency * Fix typos * Fix * Add doc understanding modules * Fix package finding * Normalize name * Fix setting bugs * Fix setting bug * Support single model inference * Add PP-ChatOCRv4-doc * Add pp_chatocrv4_doc tests * Enable MKL-DNN when available * add seal_text_detection modules * add layout_detection and table_cells_detection modules * add testing scripts * Fix desc * add text_image_unwarping and table_structure_recognition modules * add formula_recognition and doc_vlm modules * update formula_recognition default_model_name * add MKLDNN_BLOCKLIST * update MKLDNN log * add seal rec pipeline * fix sth * fix sth * add doc preprocessor pipeline * fix sth * add doc understanding * add table_rec_v2, ppstructurev3, formula_rec pipelines * move test files * forward kwargs to pipeline.predict * clean test files * Add missing kwargs * Fix typo * Fix typo * rerun CI * update mkldnn BLOCKLIST * update * update warning message * fix cli args * update PIPELINE_MKLDNN_BLOCKLIST * update of workflow * skip resource_intensive tests * update config * skip ppdocbee test_predict_params --------- Co-authored-by: zhangyue66 <zhangyue66@baidu.com> Co-authored-by: zhangzelun <zhangzelun@baidu.com> 2025-05-04 15:59:02 +08:00			`parser.add_argument(`
			`"--device",`
			`type=str,`
			`default=DEFAULT_DEVICE,`
Change CLI subparser name and argument order (#15113) * PP-StructureV3->pp_structurev3 * Change CLI args order * Polish * Change order: 2025-05-18 21:13:43 +08:00			`help=help_,`
[Breaking][Feat] New PaddleOCR inference package (#15046) * Init new paddleocr * Remove unused dependency * Fix typos * Fix * Add doc understanding modules * Fix package finding * Normalize name * Fix setting bugs * Fix setting bug * Support single model inference * Add PP-ChatOCRv4-doc * Add pp_chatocrv4_doc tests * Enable MKL-DNN when available * add seal_text_detection modules * add layout_detection and table_cells_detection modules * add testing scripts * Fix desc * add text_image_unwarping and table_structure_recognition modules * add formula_recognition and doc_vlm modules * update formula_recognition default_model_name * add MKLDNN_BLOCKLIST * update MKLDNN log * add seal rec pipeline * fix sth * fix sth * add doc preprocessor pipeline * fix sth * add doc understanding * add table_rec_v2, ppstructurev3, formula_rec pipelines * move test files * forward kwargs to pipeline.predict * clean test files * Add missing kwargs * Fix typo * Fix typo * rerun CI * update mkldnn BLOCKLIST * update * update warning message * fix cli args * update PIPELINE_MKLDNN_BLOCKLIST * update of workflow * skip resource_intensive tests * update config * skip ppdocbee test_predict_params --------- Co-authored-by: zhangyue66 <zhangyue66@baidu.com> Co-authored-by: zhangzelun <zhangzelun@baidu.com> 2025-05-04 15:59:02 +08:00			`)`
			`parser.add_argument(`
			`"--enable_hpi",`
			`type=str2bool,`
			`default=default_enable_hpi,`
			`help="Enable the high performance inference.",`
			`)`
			`parser.add_argument(`
			`"--use_tensorrt",`
			`type=str2bool,`
			`default=DEFAULT_USE_TENSORRT,`
			`help="Whether to use the Paddle Inference TensorRT subgraph engine.",`
			`)`
			`parser.add_argument(`
			`"--min_subgraph_size",`
			`type=int,`
			`default=DEFAULT_MIN_SUBGRAPH_SIZE,`
			`help="Minimum subgraph size for TensorRT when using the Paddle Inference TensorRT subgraph engine.",`
			`)`
			`parser.add_argument(`
			`"--precision",`
			`type=str,`
			`default=DEFAULT_PRECISION,`
			`choices=SUPPORTED_PRECISION_LIST,`
			`help="Precision for TensorRT when using the Paddle Inference TensorRT subgraph engine.",`
			`)`
			`parser.add_argument(`
			`"--enable_mkldnn",`
			`type=str2bool,`
			`default=DEFAULT_ENABLE_MKLDNN,`
[WIP][Feat] Accommodate PaddleX MKL-DNN new behavior (#15471) * Accommodate PaddleX MKL-DNN new behavior * Bump paddlex version * Use stderr 2025-06-01 18:27:52 +08:00			`help="Enable oneDNN (formerly MKL-DNN) acceleration for inference.",`
[Breaking][Feat] New PaddleOCR inference package (#15046) * Init new paddleocr * Remove unused dependency * Fix typos * Fix * Add doc understanding modules * Fix package finding * Normalize name * Fix setting bugs * Fix setting bug * Support single model inference * Add PP-ChatOCRv4-doc * Add pp_chatocrv4_doc tests * Enable MKL-DNN when available * add seal_text_detection modules * add layout_detection and table_cells_detection modules * add testing scripts * Fix desc * add text_image_unwarping and table_structure_recognition modules * add formula_recognition and doc_vlm modules * update formula_recognition default_model_name * add MKLDNN_BLOCKLIST * update MKLDNN log * add seal rec pipeline * fix sth * fix sth * add doc preprocessor pipeline * fix sth * add doc understanding * add table_rec_v2, ppstructurev3, formula_rec pipelines * move test files * forward kwargs to pipeline.predict * clean test files * Add missing kwargs * Fix typo * Fix typo * rerun CI * update mkldnn BLOCKLIST * update * update warning message * fix cli args * update PIPELINE_MKLDNN_BLOCKLIST * update of workflow * skip resource_intensive tests * update config * skip ppdocbee test_predict_params --------- Co-authored-by: zhangyue66 <zhangyue66@baidu.com> Co-authored-by: zhangzelun <zhangzelun@baidu.com> 2025-05-04 15:59:02 +08:00			`)`
			`parser.add_argument(`
			`"--cpu_threads",`
			`type=int,`
			`default=DEFAULT_CPU_THREADS,`
			`help="Number of threads to use for inference on CPUs.",`
			`)`