mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-09-26 16:46:58 +00:00
logging-format-interpolation (#3907)
This commit is contained in:
parent
8824f3a10a
commit
76db26f228
@ -150,7 +150,7 @@ class DataSilo:
|
||||
:return: None
|
||||
"""
|
||||
|
||||
logger.info("\nLoading data into the data silo ..." "{}".format(TRACTOR_SMALL))
|
||||
logger.info("\nLoading data into the data silo ... %s", TRACTOR_SMALL)
|
||||
# train data
|
||||
logger.info("LOADING TRAIN DATA")
|
||||
logger.info("==================")
|
||||
@ -161,7 +161,7 @@ class DataSilo:
|
||||
elif self.processor.train_filename:
|
||||
# or from a file (default)
|
||||
train_file = self.processor.data_dir / self.processor.train_filename
|
||||
logger.info("Loading train set from: {} ".format(train_file))
|
||||
logger.info("Loading train set from: %s ", train_file)
|
||||
self.data["train"], self.tensor_names = self._get_dataset(train_file)
|
||||
else:
|
||||
logger.info("No train set is being loaded")
|
||||
@ -178,7 +178,7 @@ class DataSilo:
|
||||
elif self.processor.dev_filename:
|
||||
# or from file (default)
|
||||
dev_file = self.processor.data_dir / self.processor.dev_filename
|
||||
logger.info("Loading dev set from: {}".format(dev_file))
|
||||
logger.info("Loading dev set from: %s", dev_file)
|
||||
self.data["dev"], _ = self._get_dataset(dev_file)
|
||||
elif self.processor.dev_split > 0.0:
|
||||
# or split it apart from train set
|
||||
@ -199,7 +199,7 @@ class DataSilo:
|
||||
elif self.processor.test_filename:
|
||||
# or from file (default)
|
||||
test_file = self.processor.data_dir / self.processor.test_filename
|
||||
logger.info("Loading test set from: {}".format(test_file))
|
||||
logger.info("Loading test set from: %s", test_file)
|
||||
if self.tensor_names:
|
||||
self.data["test"], _ = self._get_dataset(test_file)
|
||||
else:
|
||||
@ -406,16 +406,16 @@ class DataSilo:
|
||||
else:
|
||||
self.counts["test"] = 0
|
||||
|
||||
logger.info("Examples in train: {}".format(self.counts["train"]))
|
||||
logger.info("Examples in dev : {}".format(self.counts["dev"]))
|
||||
logger.info("Examples in test : {}".format(self.counts["test"]))
|
||||
logger.info("Total examples : {}".format(self.counts["train"] + self.counts["dev"] + self.counts["test"]))
|
||||
logger.info("Examples in train: %s", self.counts["train"])
|
||||
logger.info("Examples in dev : %s", self.counts["dev"])
|
||||
logger.info("Examples in test : %s", self.counts["test"])
|
||||
logger.info("Total examples : %s", self.counts["train"] + self.counts["dev"] + self.counts["test"])
|
||||
logger.info("")
|
||||
if self.data["train"]:
|
||||
if "input_ids" in self.tensor_names:
|
||||
logger.info("Longest sequence length observed after clipping: {}".format(max(seq_lens)))
|
||||
logger.info("Average sequence length after clipping: {}".format(ave_len))
|
||||
logger.info("Proportion clipped: {}".format(clipped))
|
||||
logger.info("Longest sequence length observed after clipping: %s", max(seq_lens))
|
||||
logger.info("Average sequence length after clipping: %s", ave_len)
|
||||
logger.info("Proportion clipped: %s", clipped)
|
||||
if clipped > 0.5:
|
||||
logger.info(
|
||||
"[Haystack Tip] %s%% of your samples got cut down to %s tokens. "
|
||||
@ -429,20 +429,20 @@ class DataSilo:
|
||||
)
|
||||
elif "query_input_ids" in self.tensor_names and "passage_input_ids" in self.tensor_names:
|
||||
logger.info(
|
||||
"Longest query length observed after clipping: {} - for max_query_len: {}".format(
|
||||
max(seq_lens[0]), max_seq_len[0]
|
||||
"Longest query length observed after clipping: %s - for max_query_len: %s",
|
||||
max(seq_lens[0]),
|
||||
max_seq_len[0],
|
||||
)
|
||||
)
|
||||
logger.info("Average query length after clipping: {}".format(ave_len[0]))
|
||||
logger.info("Proportion queries clipped: {}".format(clipped[0]))
|
||||
logger.info("Average query length after clipping: %s", ave_len[0])
|
||||
logger.info("Proportion queries clipped: %s", clipped[0])
|
||||
logger.info("")
|
||||
logger.info(
|
||||
"Longest passage length observed after clipping: {} - for max_passage_len: {}".format(
|
||||
max(seq_lens[1]), max_seq_len[1]
|
||||
"Longest passage length observed after clipping: %s - for max_passage_len: %s",
|
||||
max(seq_lens[1]),
|
||||
max_seq_len[1],
|
||||
)
|
||||
)
|
||||
logger.info("Average passage length after clipping: {}".format(ave_len[1]))
|
||||
logger.info("Proportion passages clipped: {}".format(clipped[1]))
|
||||
logger.info("Average passage length after clipping: %s", ave_len[1])
|
||||
logger.info("Proportion passages clipped: %s", clipped[1])
|
||||
|
||||
tracker.track_params(
|
||||
{
|
||||
|
@ -2271,9 +2271,9 @@ def _download_extract_downstream_data(input_file: str, proxies=None):
|
||||
directory = full_path.parent
|
||||
taskname = directory.stem
|
||||
datadir = directory.parent
|
||||
logger.info("downloading and extracting file {} to dir {}".format(taskname, datadir))
|
||||
logger.info("downloading and extracting file %s to dir %s", taskname, datadir)
|
||||
if taskname not in DOWNSTREAM_TASK_MAP:
|
||||
logger.error("Cannot download {}. Unknown data source.".format(taskname))
|
||||
logger.error("Cannot download %s. Unknown data source.", taskname)
|
||||
else:
|
||||
if os.name == "nt": # make use of NamedTemporaryFile compatible with Windows
|
||||
delete_tmp_file = False
|
||||
|
@ -187,7 +187,7 @@ class Evaluator:
|
||||
logger.info(header)
|
||||
|
||||
for head in results:
|
||||
logger.info("\n _________ {} _________".format(head["task_name"]))
|
||||
logger.info("\n _________ %s _________", head["task_name"])
|
||||
for metric_name, metric_val in head.items():
|
||||
# log with experiment tracking framework (e.g. Mlflow)
|
||||
if logging:
|
||||
@ -201,10 +201,10 @@ class Evaluator:
|
||||
if metric_name == "report":
|
||||
if isinstance(metric_val, str) and len(metric_val) > 8000:
|
||||
metric_val = metric_val[:7500] + "\n ............................. \n" + metric_val[-500:]
|
||||
logger.info("{}: \n {}".format(metric_name, metric_val))
|
||||
logger.info("%s: \n %s", metric_name, metric_val)
|
||||
else:
|
||||
if not metric_name in ["preds", "labels"] and not metric_name.startswith("_"):
|
||||
logger.info("{}: {}".format(metric_name, metric_val))
|
||||
logger.info("%s: %s", metric_name, metric_val)
|
||||
|
||||
|
||||
def _to_numpy(container):
|
||||
|
@ -110,7 +110,7 @@ class PredictionHead(nn.Module):
|
||||
prediction_head = cls.subclasses[config["name"]](**config)
|
||||
if load_weights:
|
||||
model_file = cls._get_model_file(config_file=config_file)
|
||||
logger.info("Loading prediction head from {}".format(model_file))
|
||||
logger.info("Loading prediction head from %s", model_file)
|
||||
prediction_head.load_state_dict(torch.load(model_file, map_location=torch.device("cpu")), strict=strict)
|
||||
return prediction_head
|
||||
|
||||
|
@ -238,17 +238,13 @@ class Trainer:
|
||||
do_stopping, save_model, eval_value = self.early_stopping.check_stopping(result)
|
||||
if save_model:
|
||||
logger.info(
|
||||
"Saving current best model to {}, eval={}".format(
|
||||
self.early_stopping.save_dir, eval_value
|
||||
)
|
||||
"Saving current best model to %s, eval=%s", self.early_stopping.save_dir, eval_value
|
||||
)
|
||||
self.model.save(self.early_stopping.save_dir)
|
||||
self.data_silo.processor.save(self.early_stopping.save_dir)
|
||||
if do_stopping:
|
||||
# log the stopping
|
||||
logger.info(
|
||||
"STOPPING EARLY AT EPOCH {}, STEP {}, EVALUATION {}".format(epoch, step, evalnr)
|
||||
)
|
||||
logger.info("STOPPING EARLY AT EPOCH %s, STEP %s, EVALUATION %s", epoch, step, evalnr)
|
||||
if do_stopping:
|
||||
break
|
||||
|
||||
@ -280,7 +276,7 @@ class Trainer:
|
||||
|
||||
# With early stopping we want to restore the best model
|
||||
if self.early_stopping and self.early_stopping.save_dir:
|
||||
logger.info("Restoring best model so far from {}".format(self.early_stopping.save_dir))
|
||||
logger.info("Restoring best model so far from %s", self.early_stopping.save_dir)
|
||||
self.model = self.model.load(self.early_stopping.save_dir, self.device)
|
||||
self.model.connect_heads_with_processor(self.data_silo.processor.tasks, require_labels=True)
|
||||
|
||||
|
@ -787,7 +787,7 @@ class FARMReader(BaseReader):
|
||||
large_files.append(rel_path)
|
||||
|
||||
if len(large_files) > 0:
|
||||
logger.info("Track files with git lfs: {}".format(", ".join(large_files)))
|
||||
logger.info("Track files with git lfs: %s", ", ".join(large_files))
|
||||
repo.lfs_track(large_files)
|
||||
|
||||
logger.info("Push model to the hub. This might take a while")
|
||||
|
@ -112,8 +112,9 @@ def fetch_archive_from_http(
|
||||
tar_archive.extractall(output_dir)
|
||||
else:
|
||||
logger.warning(
|
||||
"Skipped url {0} as file type is not supported here. "
|
||||
"See haystack documentation for support of more file types".format(url)
|
||||
"Skipped url %s as file type is not supported here. "
|
||||
"See haystack documentation for support of more file types",
|
||||
url,
|
||||
)
|
||||
|
||||
return True
|
||||
|
@ -45,8 +45,10 @@ def convert_files_to_docs(
|
||||
suffix2paths[file_suffix].append(path)
|
||||
elif not path.is_dir():
|
||||
logger.warning(
|
||||
"Skipped file {0} as type {1} is not supported here. "
|
||||
"See haystack.file_converter for support of more file types".format(path, file_suffix)
|
||||
"Skipped file %s as type %s is not supported here. "
|
||||
"See haystack.file_converter for support of more file types",
|
||||
path,
|
||||
file_suffix,
|
||||
)
|
||||
|
||||
# No need to initialize converter if file type not present
|
||||
@ -61,7 +63,7 @@ def convert_files_to_docs(
|
||||
documents = []
|
||||
for suffix, paths in suffix2paths.items():
|
||||
for path in paths:
|
||||
logger.info("Converting {}".format(path))
|
||||
logger.info("Converting %s", path)
|
||||
# PDFToTextConverter, TextConverter, and DocxToTextConverter return a list containing a single Document
|
||||
document = suffix2converter[suffix].convert(
|
||||
file_path=path, meta=None, encoding=encoding, id_hash_keys=id_hash_keys
|
||||
@ -108,7 +110,7 @@ def tika_convert_files_to_docs(
|
||||
try:
|
||||
from haystack.nodes.file_converter import TikaConverter
|
||||
except Exception as ex:
|
||||
logger.error("Tika not installed. Please install tika and try again. Error: {}".format(ex))
|
||||
logger.error("Tika not installed. Please install tika and try again. Error: %s", ex)
|
||||
raise ex
|
||||
converter = TikaConverter()
|
||||
paths = [p for p in Path(dir_path).glob("**/*")]
|
||||
@ -121,13 +123,15 @@ def tika_convert_files_to_docs(
|
||||
file_paths.append(path)
|
||||
elif not path.is_dir():
|
||||
logger.warning(
|
||||
"Skipped file {0} as type {1} is not supported here. "
|
||||
"See haystack.file_converter for support of more file types".format(path, file_suffix)
|
||||
"Skipped file %s as type %s is not supported here. "
|
||||
"See haystack.file_converter for support of more file types",
|
||||
path,
|
||||
file_suffix,
|
||||
)
|
||||
|
||||
documents = []
|
||||
for path in file_paths:
|
||||
logger.info("Converting {}".format(path))
|
||||
logger.info("Converting %s", path)
|
||||
# TikaConverter returns a list containing a single Document
|
||||
document = converter.convert(path)[0]
|
||||
meta = document.meta or {}
|
||||
|
@ -273,7 +273,6 @@ disable = [
|
||||
"too-many-instance-attributes",
|
||||
"super-with-arguments",
|
||||
"redefined-builtin",
|
||||
"logging-format-interpolation",
|
||||
"abstract-method",
|
||||
"too-many-branches",
|
||||
"unspecified-encoding",
|
||||
|
Loading…
x
Reference in New Issue
Block a user