support benchmark for paddlepaddle3.0 (#13574)

This commit is contained in:
changdazhou 2024-08-02 19:24:40 +08:00 committed by GitHub
parent d69bf81907
commit b6211b936b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 22 additions and 16 deletions

View File

@ -2,8 +2,6 @@ import os
from .base_logger import BaseLogger from .base_logger import BaseLogger
from ppocr.utils.logging import get_logger from ppocr.utils.logging import get_logger
logger = get_logger()
class WandbLogger(BaseLogger): class WandbLogger(BaseLogger):
def __init__( def __init__(
@ -40,6 +38,7 @@ class WandbLogger(BaseLogger):
resume="allow", resume="allow",
) )
self._wandb_init.update(**kwargs) self._wandb_init.update(**kwargs)
self.logger = get_logger()
_ = self.run _ = self.run
@ -50,7 +49,7 @@ class WandbLogger(BaseLogger):
def run(self): def run(self):
if self._run is None: if self._run is None:
if self.wandb.run is not None: if self.wandb.run is not None:
logger.info( self.logger.info(
"There is a wandb run already in progress " "There is a wandb run already in progress "
"and newly created instances of `WandbLogger` will reuse" "and newly created instances of `WandbLogger` will reuse"
" this run. If this is not desired, call `wandb.finish()`" " this run. If this is not desired, call `wandb.finish()`"

View File

@ -26,7 +26,7 @@ logger_initialized = {}
@functools.lru_cache() @functools.lru_cache()
def get_logger(name="ppocr", log_file=None, log_level=logging.DEBUG): def get_logger(name="ppocr", log_file=None, log_level=logging.DEBUG, log_ranks="0"):
"""Initialize and get a logger by name. """Initialize and get a logger by name.
If the logger has not been initialized, this method will initialize the If the logger has not been initialized, this method will initialize the
logger by adding one or two handlers, otherwise the initialized logger will logger by adding one or two handlers, otherwise the initialized logger will
@ -39,6 +39,7 @@ def get_logger(name="ppocr", log_file=None, log_level=logging.DEBUG):
log_level (int): The logger level. Note that only the process of log_level (int): The logger level. Note that only the process of
rank 0 is affected, and other processes will set the level to rank 0 is affected, and other processes will set the level to
"Error" thus be silent most of the time. "Error" thus be silent most of the time.
log_ranks (str): The ids of gpu to log which are separated by "," when more than 1, "0" by default.
Returns: Returns:
logging.Logger: The expected logger. logging.Logger: The expected logger.
""" """
@ -62,7 +63,13 @@ def get_logger(name="ppocr", log_file=None, log_level=logging.DEBUG):
file_handler = logging.FileHandler(log_file, "a") file_handler = logging.FileHandler(log_file, "a")
file_handler.setFormatter(formatter) file_handler.setFormatter(formatter)
logger.addHandler(file_handler) logger.addHandler(file_handler)
if dist.get_rank() == 0:
if isinstance(log_ranks, str):
log_ranks = [int(i) for i in log_ranks.split(",")]
elif isinstance(log_ranks, int):
log_ranks = [log_ranks]
if dist.get_rank() in log_ranks:
logger.setLevel(log_level) logger.setLevel(log_level)
else: else:
logger.setLevel(logging.ERROR) logger.setLevel(logging.ERROR)

View File

@ -26,9 +26,6 @@ import random
from ppocr.utils.logging import get_logger from ppocr.utils.logging import get_logger
logger = get_logger()
def str2bool(v): def str2bool(v):
return v.lower() in ("true", "yes", "t", "y", "1") return v.lower() in ("true", "yes", "t", "y", "1")
@ -340,6 +337,7 @@ def get_infer_gpuid():
Returns: Returns:
int: The GPU ID to be used for inference. int: The GPU ID to be used for inference.
""" """
logger = get_logger()
if not paddle.device.is_compiled_with_rocm: if not paddle.device.is_compiled_with_rocm:
gpu_id_str = os.environ.get("CUDA_VISIBLE_DEVICES", "0") gpu_id_str = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
else: else:

View File

@ -204,6 +204,7 @@ def train(
eval_batch_step = config["Global"]["eval_batch_step"] eval_batch_step = config["Global"]["eval_batch_step"]
eval_batch_epoch = config["Global"].get("eval_batch_epoch", None) eval_batch_epoch = config["Global"].get("eval_batch_epoch", None)
profiler_options = config["profiler_options"] profiler_options = config["profiler_options"]
print_mem_info = config["Global"].get("print_mem_info", True)
global_step = 0 global_step = 0
if "global_step" in pre_best_model_dict: if "global_step" in pre_best_model_dict:
@ -406,9 +407,8 @@ def train(
metrics=train_stats.get(), prefix="TRAIN", step=global_step metrics=train_stats.get(), prefix="TRAIN", step=global_step
) )
if dist.get_rank() == 0 and ( if (global_step > 0 and global_step % print_batch_step == 0) or (
(global_step > 0 and global_step % print_batch_step == 0) idx >= len(train_dataloader) - 1
or (idx >= len(train_dataloader) - 1)
): ):
logs = train_stats.log() logs = train_stats.log()
@ -418,13 +418,13 @@ def train(
eta_sec_format = str(datetime.timedelta(seconds=int(eta_sec))) eta_sec_format = str(datetime.timedelta(seconds=int(eta_sec)))
max_mem_reserved_str = "" max_mem_reserved_str = ""
max_mem_allocated_str = "" max_mem_allocated_str = ""
if paddle.device.is_compiled_with_cuda(): if paddle.device.is_compiled_with_cuda() and print_mem_info:
max_mem_reserved_str = f"max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB," max_mem_reserved_str = f", max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB,"
max_mem_allocated_str = f"max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB" max_mem_allocated_str = f" max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB"
strs = ( strs = (
"epoch: [{}/{}], global_step: {}, {}, avg_reader_cost: " "epoch: [{}/{}], global_step: {}, {}, avg_reader_cost: "
"{:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, " "{:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, "
"ips: {:.5f} samples/s, eta: {}, {} {}".format( "ips: {:.5f} samples/s, eta: {}{}{}".format(
epoch, epoch,
epoch_num, epoch_num,
global_step, global_step,
@ -740,7 +740,9 @@ def preprocess(is_train=False):
log_file = "{}/train.log".format(save_model_dir) log_file = "{}/train.log".format(save_model_dir)
else: else:
log_file = None log_file = None
logger = get_logger(log_file=log_file)
log_ranks = config["Global"].get("log_ranks", "0")
logger = get_logger(log_file=log_file, log_ranks=log_ranks)
# check if set use_gpu=True in paddlepaddle cpu version # check if set use_gpu=True in paddlepaddle cpu version
use_gpu = config["Global"].get("use_gpu", False) use_gpu = config["Global"].get("use_gpu", False)