diff --git a/haystack/modeling/data_handler/data_silo.py b/haystack/modeling/data_handler/data_silo.py index 7d51193ec..5ce6bb6c0 100644 --- a/haystack/modeling/data_handler/data_silo.py +++ b/haystack/modeling/data_handler/data_silo.py @@ -150,7 +150,7 @@ class DataSilo: :return: None """ - logger.info("\nLoading data into the data silo ..." "{}".format(TRACTOR_SMALL)) + logger.info("\nLoading data into the data silo ... %s", TRACTOR_SMALL) # train data logger.info("LOADING TRAIN DATA") logger.info("==================") @@ -161,7 +161,7 @@ class DataSilo: elif self.processor.train_filename: # or from a file (default) train_file = self.processor.data_dir / self.processor.train_filename - logger.info("Loading train set from: {} ".format(train_file)) + logger.info("Loading train set from: %s ", train_file) self.data["train"], self.tensor_names = self._get_dataset(train_file) else: logger.info("No train set is being loaded") @@ -178,7 +178,7 @@ class DataSilo: elif self.processor.dev_filename: # or from file (default) dev_file = self.processor.data_dir / self.processor.dev_filename - logger.info("Loading dev set from: {}".format(dev_file)) + logger.info("Loading dev set from: %s", dev_file) self.data["dev"], _ = self._get_dataset(dev_file) elif self.processor.dev_split > 0.0: # or split it apart from train set @@ -199,7 +199,7 @@ class DataSilo: elif self.processor.test_filename: # or from file (default) test_file = self.processor.data_dir / self.processor.test_filename - logger.info("Loading test set from: {}".format(test_file)) + logger.info("Loading test set from: %s", test_file) if self.tensor_names: self.data["test"], _ = self._get_dataset(test_file) else: @@ -406,16 +406,16 @@ class DataSilo: else: self.counts["test"] = 0 - logger.info("Examples in train: {}".format(self.counts["train"])) - logger.info("Examples in dev : {}".format(self.counts["dev"])) - logger.info("Examples in test : {}".format(self.counts["test"])) - logger.info("Total examples : {}".format(self.counts["train"] + self.counts["dev"] + self.counts["test"])) + logger.info("Examples in train: %s", self.counts["train"]) + logger.info("Examples in dev : %s", self.counts["dev"]) + logger.info("Examples in test : %s", self.counts["test"]) + logger.info("Total examples : %s", self.counts["train"] + self.counts["dev"] + self.counts["test"]) logger.info("") if self.data["train"]: if "input_ids" in self.tensor_names: - logger.info("Longest sequence length observed after clipping: {}".format(max(seq_lens))) - logger.info("Average sequence length after clipping: {}".format(ave_len)) - logger.info("Proportion clipped: {}".format(clipped)) + logger.info("Longest sequence length observed after clipping: %s", max(seq_lens)) + logger.info("Average sequence length after clipping: %s", ave_len) + logger.info("Proportion clipped: %s", clipped) if clipped > 0.5: logger.info( "[Haystack Tip] %s%% of your samples got cut down to %s tokens. " @@ -429,20 +429,20 @@ class DataSilo: ) elif "query_input_ids" in self.tensor_names and "passage_input_ids" in self.tensor_names: logger.info( - "Longest query length observed after clipping: {} - for max_query_len: {}".format( - max(seq_lens[0]), max_seq_len[0] - ) + "Longest query length observed after clipping: %s - for max_query_len: %s", + max(seq_lens[0]), + max_seq_len[0], ) - logger.info("Average query length after clipping: {}".format(ave_len[0])) - logger.info("Proportion queries clipped: {}".format(clipped[0])) + logger.info("Average query length after clipping: %s", ave_len[0]) + logger.info("Proportion queries clipped: %s", clipped[0]) logger.info("") logger.info( - "Longest passage length observed after clipping: {} - for max_passage_len: {}".format( - max(seq_lens[1]), max_seq_len[1] - ) + "Longest passage length observed after clipping: %s - for max_passage_len: %s", + max(seq_lens[1]), + max_seq_len[1], ) - logger.info("Average passage length after clipping: {}".format(ave_len[1])) - logger.info("Proportion passages clipped: {}".format(clipped[1])) + logger.info("Average passage length after clipping: %s", ave_len[1]) + logger.info("Proportion passages clipped: %s", clipped[1]) tracker.track_params( { diff --git a/haystack/modeling/data_handler/processor.py b/haystack/modeling/data_handler/processor.py index 693eb5413..004b3eb92 100644 --- a/haystack/modeling/data_handler/processor.py +++ b/haystack/modeling/data_handler/processor.py @@ -2271,9 +2271,9 @@ def _download_extract_downstream_data(input_file: str, proxies=None): directory = full_path.parent taskname = directory.stem datadir = directory.parent - logger.info("downloading and extracting file {} to dir {}".format(taskname, datadir)) + logger.info("downloading and extracting file %s to dir %s", taskname, datadir) if taskname not in DOWNSTREAM_TASK_MAP: - logger.error("Cannot download {}. Unknown data source.".format(taskname)) + logger.error("Cannot download %s. Unknown data source.", taskname) else: if os.name == "nt": # make use of NamedTemporaryFile compatible with Windows delete_tmp_file = False diff --git a/haystack/modeling/evaluation/eval.py b/haystack/modeling/evaluation/eval.py index 3e5979198..23a791f64 100644 --- a/haystack/modeling/evaluation/eval.py +++ b/haystack/modeling/evaluation/eval.py @@ -187,7 +187,7 @@ class Evaluator: logger.info(header) for head in results: - logger.info("\n _________ {} _________".format(head["task_name"])) + logger.info("\n _________ %s _________", head["task_name"]) for metric_name, metric_val in head.items(): # log with experiment tracking framework (e.g. Mlflow) if logging: @@ -201,10 +201,10 @@ class Evaluator: if metric_name == "report": if isinstance(metric_val, str) and len(metric_val) > 8000: metric_val = metric_val[:7500] + "\n ............................. \n" + metric_val[-500:] - logger.info("{}: \n {}".format(metric_name, metric_val)) + logger.info("%s: \n %s", metric_name, metric_val) else: if not metric_name in ["preds", "labels"] and not metric_name.startswith("_"): - logger.info("{}: {}".format(metric_name, metric_val)) + logger.info("%s: %s", metric_name, metric_val) def _to_numpy(container): diff --git a/haystack/modeling/model/prediction_head.py b/haystack/modeling/model/prediction_head.py index 5c10befb6..6b05dca53 100644 --- a/haystack/modeling/model/prediction_head.py +++ b/haystack/modeling/model/prediction_head.py @@ -110,7 +110,7 @@ class PredictionHead(nn.Module): prediction_head = cls.subclasses[config["name"]](**config) if load_weights: model_file = cls._get_model_file(config_file=config_file) - logger.info("Loading prediction head from {}".format(model_file)) + logger.info("Loading prediction head from %s", model_file) prediction_head.load_state_dict(torch.load(model_file, map_location=torch.device("cpu")), strict=strict) return prediction_head diff --git a/haystack/modeling/training/base.py b/haystack/modeling/training/base.py index cef0a8ea7..15ee17a08 100644 --- a/haystack/modeling/training/base.py +++ b/haystack/modeling/training/base.py @@ -238,17 +238,13 @@ class Trainer: do_stopping, save_model, eval_value = self.early_stopping.check_stopping(result) if save_model: logger.info( - "Saving current best model to {}, eval={}".format( - self.early_stopping.save_dir, eval_value - ) + "Saving current best model to %s, eval=%s", self.early_stopping.save_dir, eval_value ) self.model.save(self.early_stopping.save_dir) self.data_silo.processor.save(self.early_stopping.save_dir) if do_stopping: # log the stopping - logger.info( - "STOPPING EARLY AT EPOCH {}, STEP {}, EVALUATION {}".format(epoch, step, evalnr) - ) + logger.info("STOPPING EARLY AT EPOCH %s, STEP %s, EVALUATION %s", epoch, step, evalnr) if do_stopping: break @@ -280,7 +276,7 @@ class Trainer: # With early stopping we want to restore the best model if self.early_stopping and self.early_stopping.save_dir: - logger.info("Restoring best model so far from {}".format(self.early_stopping.save_dir)) + logger.info("Restoring best model so far from %s", self.early_stopping.save_dir) self.model = self.model.load(self.early_stopping.save_dir, self.device) self.model.connect_heads_with_processor(self.data_silo.processor.tasks, require_labels=True) diff --git a/haystack/nodes/reader/farm.py b/haystack/nodes/reader/farm.py index 6f8725ba7..56f74a6cd 100644 --- a/haystack/nodes/reader/farm.py +++ b/haystack/nodes/reader/farm.py @@ -787,7 +787,7 @@ class FARMReader(BaseReader): large_files.append(rel_path) if len(large_files) > 0: - logger.info("Track files with git lfs: {}".format(", ".join(large_files))) + logger.info("Track files with git lfs: %s", ", ".join(large_files)) repo.lfs_track(large_files) logger.info("Push model to the hub. This might take a while") diff --git a/haystack/utils/import_utils.py b/haystack/utils/import_utils.py index 6c9fefd3e..97e28cbc0 100644 --- a/haystack/utils/import_utils.py +++ b/haystack/utils/import_utils.py @@ -112,8 +112,9 @@ def fetch_archive_from_http( tar_archive.extractall(output_dir) else: logger.warning( - "Skipped url {0} as file type is not supported here. " - "See haystack documentation for support of more file types".format(url) + "Skipped url %s as file type is not supported here. " + "See haystack documentation for support of more file types", + url, ) return True diff --git a/haystack/utils/preprocessing.py b/haystack/utils/preprocessing.py index b9cc9659f..dea6b6d31 100644 --- a/haystack/utils/preprocessing.py +++ b/haystack/utils/preprocessing.py @@ -45,8 +45,10 @@ def convert_files_to_docs( suffix2paths[file_suffix].append(path) elif not path.is_dir(): logger.warning( - "Skipped file {0} as type {1} is not supported here. " - "See haystack.file_converter for support of more file types".format(path, file_suffix) + "Skipped file %s as type %s is not supported here. " + "See haystack.file_converter for support of more file types", + path, + file_suffix, ) # No need to initialize converter if file type not present @@ -61,7 +63,7 @@ def convert_files_to_docs( documents = [] for suffix, paths in suffix2paths.items(): for path in paths: - logger.info("Converting {}".format(path)) + logger.info("Converting %s", path) # PDFToTextConverter, TextConverter, and DocxToTextConverter return a list containing a single Document document = suffix2converter[suffix].convert( file_path=path, meta=None, encoding=encoding, id_hash_keys=id_hash_keys @@ -108,7 +110,7 @@ def tika_convert_files_to_docs( try: from haystack.nodes.file_converter import TikaConverter except Exception as ex: - logger.error("Tika not installed. Please install tika and try again. Error: {}".format(ex)) + logger.error("Tika not installed. Please install tika and try again. Error: %s", ex) raise ex converter = TikaConverter() paths = [p for p in Path(dir_path).glob("**/*")] @@ -121,13 +123,15 @@ def tika_convert_files_to_docs( file_paths.append(path) elif not path.is_dir(): logger.warning( - "Skipped file {0} as type {1} is not supported here. " - "See haystack.file_converter for support of more file types".format(path, file_suffix) + "Skipped file %s as type %s is not supported here. " + "See haystack.file_converter for support of more file types", + path, + file_suffix, ) documents = [] for path in file_paths: - logger.info("Converting {}".format(path)) + logger.info("Converting %s", path) # TikaConverter returns a list containing a single Document document = converter.convert(path)[0] meta = document.meta or {} diff --git a/pyproject.toml b/pyproject.toml index 0e0ee4f8f..f424a5ec9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -273,7 +273,6 @@ disable = [ "too-many-instance-attributes", "super-with-arguments", "redefined-builtin", - "logging-format-interpolation", "abstract-method", "too-many-branches", "unspecified-encoding",