mirror of
https://github.com/PaddlePaddle/PaddleOCR.git
synced 2025-11-02 02:39:16 +00:00
Merge pull request #5410 from Intsigstephon/dygraph
add fleet tests for supplementary
This commit is contained in:
commit
f383963f4a
@ -47,6 +47,13 @@ bash test_tipc/test_train_python.sh ./test_tipc/train_infer_python_PACT.txt 'lit
|
||||
bash test_tipc/test_train_python.sh ./test_tipc/train_infer_python_FPGM.txt 'lite_train_lite_infer'
|
||||
```
|
||||
|
||||
多机多卡的运行配置文件分别为 `train_infer_python_fleet.txt`, `train_infer_python_FPGM_fleet.txt` 和 `train_infer_python_PACT_fleet.txt`。
|
||||
运行时,需要修改配置文件中的 `gpu_list:xx.xx.xx.xx,yy.yy.yy.yy;0,1`。 将 `xx.xx.xx.xx` 替换为具体的 `ip` 地址,各个`ip`地址之间用`,`分隔。 另外,和单机训练
|
||||
不同,启动多机多卡训练需要在多机的每个节点上分别运行命令。以多机多卡量化训练为例,指令如下:
|
||||
```
|
||||
bash test_tipc/test_train_python.sh ./test_tipc/train_infer_python_PACT_fleet.txt 'lite_train_lite_infer'
|
||||
```
|
||||
|
||||
运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如'lite_train_lite_infer'模式运行后,在test_tipc/extra_output文件夹有以下文件:
|
||||
|
||||
```
|
||||
|
||||
@ -35,7 +35,6 @@ use_share_conv_key=$(func_parser_key "${lines[13]}")
|
||||
use_share_conv_list=$(func_parser_value "${lines[13]}")
|
||||
run_train_py=$(func_parser_value "${lines[14]}")
|
||||
|
||||
|
||||
LOG_PATH="./test_tipc/extra_output"
|
||||
mkdir -p ${LOG_PATH}
|
||||
status_log="${LOG_PATH}/results_python.log"
|
||||
@ -98,6 +97,8 @@ if [ ${MODE} = "lite_train_lite_infer" ] || [ ${MODE} = "whole_train_whole_infer
|
||||
cmd="${python} ${run_train_py} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_checkpoints} ${set_autocast} ${set_batchsize} ${set_use_custom_op} ${set_model_type} ${set_use_share_conv} ${set_amp_config}"
|
||||
elif [ ${#ips} -le 26 ];then # train with multi-gpu
|
||||
cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train_py} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_checkpoints} ${set_autocast} ${set_batchsize} ${set_use_custom_op} ${set_model_type} ${set_use_share_conv} ${set_amp_config}"
|
||||
else
|
||||
cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train_py} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_checkpoints} ${set_autocast} ${set_batchsize} ${set_use_custom_op} ${set_model_type} ${set_use_share_conv} ${set_amp_config}"
|
||||
fi
|
||||
|
||||
# run train
|
||||
|
||||
@ -4,9 +4,9 @@ python:python3.7
|
||||
gpu_list:0|0,1
|
||||
use_gpu:True|True
|
||||
AMP.use_amp:True|False
|
||||
epoch:lite_train_lite_infer=20|whole_train_whole_infer=1000
|
||||
epoch:lite_train_lite_infer=2|whole_train_whole_infer=1000
|
||||
save_model_dir:./output/
|
||||
TRAIN.batch_size:lite_train_lite_infer=2|whole_train_whole_infer=4
|
||||
TRAIN.batch_size:lite_train_lite_infer=1280|whole_train_whole_infer=1280
|
||||
pretrained_model:null
|
||||
checkpoints:null
|
||||
use_custom_relu:False|True
|
||||
|
||||
@ -0,0 +1,17 @@
|
||||
===========================train_params===========================
|
||||
model_name:ch_PPOCRv2_det
|
||||
python:python3.7
|
||||
gpu_list:xx.xx.xx.xx,yy.yy.yy.yy;0,1
|
||||
use_gpu:True
|
||||
AMP.use_amp:True|False
|
||||
epoch:lite_train_lite_infer=2|whole_train_whole_infer=1000
|
||||
save_model_dir:./output/
|
||||
TRAIN.batch_size:lite_train_lite_infer=1280|whole_train_whole_infer=1280
|
||||
pretrained_model:null
|
||||
checkpoints:null
|
||||
use_custom_relu:False|True
|
||||
model_type:cls|cls_distill|cls_distill_multiopt
|
||||
MODEL.siamese:False|True
|
||||
norm_train:train.py -c mv3_large_x0_5.yml -o prune_train=True
|
||||
quant_train:False
|
||||
prune_train:False
|
||||
@ -4,9 +4,9 @@ python:python3.7
|
||||
gpu_list:0|0,1
|
||||
use_gpu:True|True
|
||||
AMP.use_amp:True|False
|
||||
epoch:lite_train_lite_infer=20|whole_train_whole_infer=1000
|
||||
epoch:lite_train_lite_infer=2|whole_train_whole_infer=1000
|
||||
save_model_dir:./output/
|
||||
TRAIN.batch_size:lite_train_lite_infer=2|whole_train_whole_infer=4
|
||||
TRAIN.batch_size:lite_train_lite_infer=1280|whole_train_whole_infer=1280
|
||||
pretrained_model:null
|
||||
checkpoints:null
|
||||
use_custom_relu:False|True
|
||||
|
||||
@ -0,0 +1,17 @@
|
||||
===========================train_params===========================
|
||||
model_name:ch_PPOCRv2_det
|
||||
python:python3.7
|
||||
gpu_list:xx.xx.xx.xx,yy.yy.yy.yy;0,1
|
||||
use_gpu:True
|
||||
AMP.use_amp:True|False
|
||||
epoch:lite_train_lite_infer=2|whole_train_whole_infer=1000
|
||||
save_model_dir:./output/
|
||||
TRAIN.batch_size:lite_train_lite_infer=1280|whole_train_whole_infer=1280
|
||||
pretrained_model:null
|
||||
checkpoints:null
|
||||
use_custom_relu:False|True
|
||||
model_type:cls|cls_distill|cls_distill_multiopt
|
||||
MODEL.siamese:False|True
|
||||
norm_train:train.py -c mv3_large_x0_5.yml -o quant_train=True
|
||||
quant_train:False
|
||||
prune_train:False
|
||||
@ -0,0 +1,17 @@
|
||||
===========================train_params===========================
|
||||
model_name:ch_PPOCRv2_det
|
||||
python:python3.7
|
||||
gpu_list:xx.xx.xx.xx,yy.yy.yy.yy;0,1
|
||||
use_gpu:True
|
||||
AMP.use_amp:True|False
|
||||
epoch:lite_train_lite_infer=2|whole_train_whole_infer=1000
|
||||
save_model_dir:./output/
|
||||
TRAIN.batch_size:lite_train_lite_infer=1280|whole_train_whole_infer=1280
|
||||
pretrained_model:null
|
||||
checkpoints:null
|
||||
use_custom_relu:False|True
|
||||
model_type:cls|cls_distill|cls_distill_multiopt
|
||||
MODEL.siamese:False|True
|
||||
norm_train: train.py -c mv3_large_x0_5.yml -o
|
||||
quant_train:False
|
||||
prune_train:False
|
||||
Loading…
x
Reference in New Issue
Block a user