help_="Device(s) to use for inference, e.g., `cpu`, `gpu`, `npu`, `gpu:0`, `gpu:0,1`. If multiple devices are specified, inference will be performed in parallel. Note that parallel inference is not always supported. By default, GPU 0 will be used if available; otherwise, the CPU will be used."
else:
help_="Device to use for inference, e.g., `cpu`, `gpu`, `npu`, `gpu:0`. By default, GPU 0 will be used if available; otherwise, the CPU will be used."
help="Whether to use the Paddle Inference TensorRT subgraph engine.",
)
parser.add_argument(
"--min_subgraph_size",
type=int,
default=DEFAULT_MIN_SUBGRAPH_SIZE,
help="Minimum subgraph size for TensorRT when using the Paddle Inference TensorRT subgraph engine.",
)
parser.add_argument(
"--precision",
type=str,
default=DEFAULT_PRECISION,
choices=SUPPORTED_PRECISION_LIST,
help="Precision for TensorRT when using the Paddle Inference TensorRT subgraph engine.",
)
parser.add_argument(
"--enable_mkldnn",
type=str2bool,
default=DEFAULT_ENABLE_MKLDNN,
help="Enable oneDNN (formerly MKL-DNN) acceleration for inference. By default, oneDNN will be used when available, except for models and pipelines that have known oneDNN issues.",
)
parser.add_argument(
"--cpu_threads",
type=int,
default=DEFAULT_CPU_THREADS,
help="Number of threads to use for inference on CPUs.",