logging-format-interpolation (#3907)

This commit is contained in:
ZanSara 2023-02-03 13:30:56 +01:00 committed by GitHub
parent 8824f3a10a
commit 76db26f228
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 45 additions and 45 deletions

View File

@ -150,7 +150,7 @@ class DataSilo:
:return: None :return: None
""" """
logger.info("\nLoading data into the data silo ..." "{}".format(TRACTOR_SMALL)) logger.info("\nLoading data into the data silo ... %s", TRACTOR_SMALL)
# train data # train data
logger.info("LOADING TRAIN DATA") logger.info("LOADING TRAIN DATA")
logger.info("==================") logger.info("==================")
@ -161,7 +161,7 @@ class DataSilo:
elif self.processor.train_filename: elif self.processor.train_filename:
# or from a file (default) # or from a file (default)
train_file = self.processor.data_dir / self.processor.train_filename train_file = self.processor.data_dir / self.processor.train_filename
logger.info("Loading train set from: {} ".format(train_file)) logger.info("Loading train set from: %s ", train_file)
self.data["train"], self.tensor_names = self._get_dataset(train_file) self.data["train"], self.tensor_names = self._get_dataset(train_file)
else: else:
logger.info("No train set is being loaded") logger.info("No train set is being loaded")
@ -178,7 +178,7 @@ class DataSilo:
elif self.processor.dev_filename: elif self.processor.dev_filename:
# or from file (default) # or from file (default)
dev_file = self.processor.data_dir / self.processor.dev_filename dev_file = self.processor.data_dir / self.processor.dev_filename
logger.info("Loading dev set from: {}".format(dev_file)) logger.info("Loading dev set from: %s", dev_file)
self.data["dev"], _ = self._get_dataset(dev_file) self.data["dev"], _ = self._get_dataset(dev_file)
elif self.processor.dev_split > 0.0: elif self.processor.dev_split > 0.0:
# or split it apart from train set # or split it apart from train set
@ -199,7 +199,7 @@ class DataSilo:
elif self.processor.test_filename: elif self.processor.test_filename:
# or from file (default) # or from file (default)
test_file = self.processor.data_dir / self.processor.test_filename test_file = self.processor.data_dir / self.processor.test_filename
logger.info("Loading test set from: {}".format(test_file)) logger.info("Loading test set from: %s", test_file)
if self.tensor_names: if self.tensor_names:
self.data["test"], _ = self._get_dataset(test_file) self.data["test"], _ = self._get_dataset(test_file)
else: else:
@ -406,16 +406,16 @@ class DataSilo:
else: else:
self.counts["test"] = 0 self.counts["test"] = 0
logger.info("Examples in train: {}".format(self.counts["train"])) logger.info("Examples in train: %s", self.counts["train"])
logger.info("Examples in dev : {}".format(self.counts["dev"])) logger.info("Examples in dev : %s", self.counts["dev"])
logger.info("Examples in test : {}".format(self.counts["test"])) logger.info("Examples in test : %s", self.counts["test"])
logger.info("Total examples : {}".format(self.counts["train"] + self.counts["dev"] + self.counts["test"])) logger.info("Total examples : %s", self.counts["train"] + self.counts["dev"] + self.counts["test"])
logger.info("") logger.info("")
if self.data["train"]: if self.data["train"]:
if "input_ids" in self.tensor_names: if "input_ids" in self.tensor_names:
logger.info("Longest sequence length observed after clipping: {}".format(max(seq_lens))) logger.info("Longest sequence length observed after clipping: %s", max(seq_lens))
logger.info("Average sequence length after clipping: {}".format(ave_len)) logger.info("Average sequence length after clipping: %s", ave_len)
logger.info("Proportion clipped: {}".format(clipped)) logger.info("Proportion clipped: %s", clipped)
if clipped > 0.5: if clipped > 0.5:
logger.info( logger.info(
"[Haystack Tip] %s%% of your samples got cut down to %s tokens. " "[Haystack Tip] %s%% of your samples got cut down to %s tokens. "
@ -429,20 +429,20 @@ class DataSilo:
) )
elif "query_input_ids" in self.tensor_names and "passage_input_ids" in self.tensor_names: elif "query_input_ids" in self.tensor_names and "passage_input_ids" in self.tensor_names:
logger.info( logger.info(
"Longest query length observed after clipping: {} - for max_query_len: {}".format( "Longest query length observed after clipping: %s - for max_query_len: %s",
max(seq_lens[0]), max_seq_len[0] max(seq_lens[0]),
) max_seq_len[0],
) )
logger.info("Average query length after clipping: {}".format(ave_len[0])) logger.info("Average query length after clipping: %s", ave_len[0])
logger.info("Proportion queries clipped: {}".format(clipped[0])) logger.info("Proportion queries clipped: %s", clipped[0])
logger.info("") logger.info("")
logger.info( logger.info(
"Longest passage length observed after clipping: {} - for max_passage_len: {}".format( "Longest passage length observed after clipping: %s - for max_passage_len: %s",
max(seq_lens[1]), max_seq_len[1] max(seq_lens[1]),
) max_seq_len[1],
) )
logger.info("Average passage length after clipping: {}".format(ave_len[1])) logger.info("Average passage length after clipping: %s", ave_len[1])
logger.info("Proportion passages clipped: {}".format(clipped[1])) logger.info("Proportion passages clipped: %s", clipped[1])
tracker.track_params( tracker.track_params(
{ {

View File

@ -2271,9 +2271,9 @@ def _download_extract_downstream_data(input_file: str, proxies=None):
directory = full_path.parent directory = full_path.parent
taskname = directory.stem taskname = directory.stem
datadir = directory.parent datadir = directory.parent
logger.info("downloading and extracting file {} to dir {}".format(taskname, datadir)) logger.info("downloading and extracting file %s to dir %s", taskname, datadir)
if taskname not in DOWNSTREAM_TASK_MAP: if taskname not in DOWNSTREAM_TASK_MAP:
logger.error("Cannot download {}. Unknown data source.".format(taskname)) logger.error("Cannot download %s. Unknown data source.", taskname)
else: else:
if os.name == "nt": # make use of NamedTemporaryFile compatible with Windows if os.name == "nt": # make use of NamedTemporaryFile compatible with Windows
delete_tmp_file = False delete_tmp_file = False

View File

@ -187,7 +187,7 @@ class Evaluator:
logger.info(header) logger.info(header)
for head in results: for head in results:
logger.info("\n _________ {} _________".format(head["task_name"])) logger.info("\n _________ %s _________", head["task_name"])
for metric_name, metric_val in head.items(): for metric_name, metric_val in head.items():
# log with experiment tracking framework (e.g. Mlflow) # log with experiment tracking framework (e.g. Mlflow)
if logging: if logging:
@ -201,10 +201,10 @@ class Evaluator:
if metric_name == "report": if metric_name == "report":
if isinstance(metric_val, str) and len(metric_val) > 8000: if isinstance(metric_val, str) and len(metric_val) > 8000:
metric_val = metric_val[:7500] + "\n ............................. \n" + metric_val[-500:] metric_val = metric_val[:7500] + "\n ............................. \n" + metric_val[-500:]
logger.info("{}: \n {}".format(metric_name, metric_val)) logger.info("%s: \n %s", metric_name, metric_val)
else: else:
if not metric_name in ["preds", "labels"] and not metric_name.startswith("_"): if not metric_name in ["preds", "labels"] and not metric_name.startswith("_"):
logger.info("{}: {}".format(metric_name, metric_val)) logger.info("%s: %s", metric_name, metric_val)
def _to_numpy(container): def _to_numpy(container):

View File

@ -110,7 +110,7 @@ class PredictionHead(nn.Module):
prediction_head = cls.subclasses[config["name"]](**config) prediction_head = cls.subclasses[config["name"]](**config)
if load_weights: if load_weights:
model_file = cls._get_model_file(config_file=config_file) model_file = cls._get_model_file(config_file=config_file)
logger.info("Loading prediction head from {}".format(model_file)) logger.info("Loading prediction head from %s", model_file)
prediction_head.load_state_dict(torch.load(model_file, map_location=torch.device("cpu")), strict=strict) prediction_head.load_state_dict(torch.load(model_file, map_location=torch.device("cpu")), strict=strict)
return prediction_head return prediction_head

View File

@ -238,17 +238,13 @@ class Trainer:
do_stopping, save_model, eval_value = self.early_stopping.check_stopping(result) do_stopping, save_model, eval_value = self.early_stopping.check_stopping(result)
if save_model: if save_model:
logger.info( logger.info(
"Saving current best model to {}, eval={}".format( "Saving current best model to %s, eval=%s", self.early_stopping.save_dir, eval_value
self.early_stopping.save_dir, eval_value
)
) )
self.model.save(self.early_stopping.save_dir) self.model.save(self.early_stopping.save_dir)
self.data_silo.processor.save(self.early_stopping.save_dir) self.data_silo.processor.save(self.early_stopping.save_dir)
if do_stopping: if do_stopping:
# log the stopping # log the stopping
logger.info( logger.info("STOPPING EARLY AT EPOCH %s, STEP %s, EVALUATION %s", epoch, step, evalnr)
"STOPPING EARLY AT EPOCH {}, STEP {}, EVALUATION {}".format(epoch, step, evalnr)
)
if do_stopping: if do_stopping:
break break
@ -280,7 +276,7 @@ class Trainer:
# With early stopping we want to restore the best model # With early stopping we want to restore the best model
if self.early_stopping and self.early_stopping.save_dir: if self.early_stopping and self.early_stopping.save_dir:
logger.info("Restoring best model so far from {}".format(self.early_stopping.save_dir)) logger.info("Restoring best model so far from %s", self.early_stopping.save_dir)
self.model = self.model.load(self.early_stopping.save_dir, self.device) self.model = self.model.load(self.early_stopping.save_dir, self.device)
self.model.connect_heads_with_processor(self.data_silo.processor.tasks, require_labels=True) self.model.connect_heads_with_processor(self.data_silo.processor.tasks, require_labels=True)

View File

@ -787,7 +787,7 @@ class FARMReader(BaseReader):
large_files.append(rel_path) large_files.append(rel_path)
if len(large_files) > 0: if len(large_files) > 0:
logger.info("Track files with git lfs: {}".format(", ".join(large_files))) logger.info("Track files with git lfs: %s", ", ".join(large_files))
repo.lfs_track(large_files) repo.lfs_track(large_files)
logger.info("Push model to the hub. This might take a while") logger.info("Push model to the hub. This might take a while")

View File

@ -112,8 +112,9 @@ def fetch_archive_from_http(
tar_archive.extractall(output_dir) tar_archive.extractall(output_dir)
else: else:
logger.warning( logger.warning(
"Skipped url {0} as file type is not supported here. " "Skipped url %s as file type is not supported here. "
"See haystack documentation for support of more file types".format(url) "See haystack documentation for support of more file types",
url,
) )
return True return True

View File

@ -45,8 +45,10 @@ def convert_files_to_docs(
suffix2paths[file_suffix].append(path) suffix2paths[file_suffix].append(path)
elif not path.is_dir(): elif not path.is_dir():
logger.warning( logger.warning(
"Skipped file {0} as type {1} is not supported here. " "Skipped file %s as type %s is not supported here. "
"See haystack.file_converter for support of more file types".format(path, file_suffix) "See haystack.file_converter for support of more file types",
path,
file_suffix,
) )
# No need to initialize converter if file type not present # No need to initialize converter if file type not present
@ -61,7 +63,7 @@ def convert_files_to_docs(
documents = [] documents = []
for suffix, paths in suffix2paths.items(): for suffix, paths in suffix2paths.items():
for path in paths: for path in paths:
logger.info("Converting {}".format(path)) logger.info("Converting %s", path)
# PDFToTextConverter, TextConverter, and DocxToTextConverter return a list containing a single Document # PDFToTextConverter, TextConverter, and DocxToTextConverter return a list containing a single Document
document = suffix2converter[suffix].convert( document = suffix2converter[suffix].convert(
file_path=path, meta=None, encoding=encoding, id_hash_keys=id_hash_keys file_path=path, meta=None, encoding=encoding, id_hash_keys=id_hash_keys
@ -108,7 +110,7 @@ def tika_convert_files_to_docs(
try: try:
from haystack.nodes.file_converter import TikaConverter from haystack.nodes.file_converter import TikaConverter
except Exception as ex: except Exception as ex:
logger.error("Tika not installed. Please install tika and try again. Error: {}".format(ex)) logger.error("Tika not installed. Please install tika and try again. Error: %s", ex)
raise ex raise ex
converter = TikaConverter() converter = TikaConverter()
paths = [p for p in Path(dir_path).glob("**/*")] paths = [p for p in Path(dir_path).glob("**/*")]
@ -121,13 +123,15 @@ def tika_convert_files_to_docs(
file_paths.append(path) file_paths.append(path)
elif not path.is_dir(): elif not path.is_dir():
logger.warning( logger.warning(
"Skipped file {0} as type {1} is not supported here. " "Skipped file %s as type %s is not supported here. "
"See haystack.file_converter for support of more file types".format(path, file_suffix) "See haystack.file_converter for support of more file types",
path,
file_suffix,
) )
documents = [] documents = []
for path in file_paths: for path in file_paths:
logger.info("Converting {}".format(path)) logger.info("Converting %s", path)
# TikaConverter returns a list containing a single Document # TikaConverter returns a list containing a single Document
document = converter.convert(path)[0] document = converter.convert(path)[0]
meta = document.meta or {} meta = document.meta or {}

View File

@ -273,7 +273,6 @@ disable = [
"too-many-instance-attributes", "too-many-instance-attributes",
"super-with-arguments", "super-with-arguments",
"redefined-builtin", "redefined-builtin",
"logging-format-interpolation",
"abstract-method", "abstract-method",
"too-many-branches", "too-many-branches",
"unspecified-encoding", "unspecified-encoding",