chore: remove f-strings from logs for performance reasons (#3212)

* Use the %s syntax on all debug messages * Use the %s syntax on some more debug messages * Use the %s syntax on info messages * Use the %s syntax on warning messages * Use the %s syntax on error and exception messages * mypy * pylint * trogger tutorials execution in CI * trigger tutorials execution on CI * black * remove embeddings from repr * fix Document `__repr__` * address feedback * mypy
2026-01-06 03:57:19 +00:00 · 2022-09-19 18:18:32 +02:00 · 2022-09-19 18:18:32 +02:00 · dcb132ba59
commit dcb132ba59
parent 8fbccbda82
60 changed files with 235 additions and 209 deletions
--- a/haystack/document_stores/deepsetcloud.py
+++ b/haystack/document_stores/deepsetcloud.py
@ -29,7 +29,7 @@ def disable_and_log(func):

        args_as_kwargs = args_to_kwargs(args, func)
        parameters = {**args_as_kwargs, **kwargs}
-        logger.info(f"Input to {func.__name__}: {parameters}")
+        logger.info("Input to %s: %s", func.__name__, parameters)

    return wrapper

--- a/haystack/document_stores/elasticsearch.py
+++ b/haystack/document_stores/elasticsearch.py
@ -200,7 +200,7 @@ class BaseElasticsearchDocumentStore(KeywordDocumentStore):
        """
        # Check if index_name refers to an alias
        if self.client.indices.exists_alias(name=index_name):
-            logger.debug(f"Index name {index_name} is an alias.")
+            logger.debug("Index name %s is an alias.", index_name)

        # check if the existing index has the embedding field; if not create it
        if self.client.indices.exists(index=index_name, headers=headers):
@ -949,7 +949,7 @@ class BaseElasticsearchDocumentStore(KeywordDocumentStore):
            all_terms_must_match=all_terms_must_match,
        )

-        logger.debug(f"Retriever query: {body}")
+        logger.debug("Retriever query: %s", body)
        result = self.client.search(index=index, body=body, headers=headers)["hits"]["hits"]

        documents = [
@ -1086,7 +1086,7 @@ class BaseElasticsearchDocumentStore(KeywordDocumentStore):
            body.append(headers)
            body.append(cur_query_body)

-        logger.debug(f"Retriever query: {body}")
+        logger.debug("Retriever query: %s", body)
        responses = self.client.msearch(index=index, body=body)

        all_documents = []
@ -1287,7 +1287,7 @@ class BaseElasticsearchDocumentStore(KeywordDocumentStore):
        if excluded_meta_data:
            body["_source"] = {"excludes": excluded_meta_data}

-        logger.debug(f"Retriever query: {body}")
+        logger.debug("Retriever query: %s", body)
        try:
            result = self.client.search(index=index, body=body, request_timeout=300, headers=headers)["hits"]["hits"]
            if len(result) == 0:
@ -1458,12 +1458,16 @@ class BaseElasticsearchDocumentStore(KeywordDocumentStore):

        if update_existing_embeddings:
            document_count = self.get_document_count(index=index, headers=headers)
-            logger.info(f"Updating embeddings for all {document_count} docs ...")
        else:
            document_count = self.get_document_count(
                index=index, filters=filters, only_documents_without_embedding=True, headers=headers
            )
-            logger.info(f"Updating embeddings for {document_count} docs without embeddings ...")
+
+        logger.info(
+            "Updating embeddings for all %s docs %s...",
+            document_count,
+            "without embeddings" if not update_existing_embeddings else "",
+        )

        result = self._get_all_documents_in_index(
            index=index,
@ -1674,7 +1678,7 @@ class BaseElasticsearchDocumentStore(KeywordDocumentStore):
    def _delete_index(self, index: str):
        if self.client.indices.exists(index):
            self.client.indices.delete(index=index, ignore=[400, 404])
-            logger.info(f"Index '{index}' deleted.")
+            logger.info("Index '%s' deleted.", index)


 class ElasticsearchDocumentStore(BaseElasticsearchDocumentStore):
--- a/haystack/document_stores/faiss.py
+++ b/haystack/document_stores/faiss.py
@ -346,7 +346,7 @@ class FAISSDocumentStore(SQLDocumentStore):
            logger.warning("Calling DocumentStore.update_embeddings() on an empty index")
            return

-        logger.info(f"Updating embeddings for {document_count} docs...")
+        logger.info("Updating embeddings for %s docs...", document_count)
        vector_id = sum(index.ntotal for index in self.faiss_indexes.values())

        result = self._query(
@ -568,7 +568,7 @@ class FAISSDocumentStore(SQLDocumentStore):
            )
        if index in self.faiss_indexes:
            del self.faiss_indexes[index]
-            logger.info(f"Index '{index}' deleted.")
+            logger.info("Index '%s' deleted.", index)
        super().delete_index(index)

    def query_by_embedding(
--- a/haystack/document_stores/memory.py
+++ b/haystack/document_stores/memory.py
@ -451,11 +451,10 @@ class InMemoryDocumentStore(BaseDocumentStore):
        result = self._query(
            index=index, filters=filters, only_documents_without_embedding=not update_existing_embeddings
        )
-        document_count = len(result)
-        logger.info(f"Updating embeddings for {document_count} docs ...")
+        logger.info("Updating embeddings for %s docs ...", len(result) if logger.level > logging.DEBUG else 0)
        batched_documents = get_batches_from_generator(result, batch_size)
        with tqdm(
-            total=document_count, disable=not self.progress_bar, position=0, unit=" docs", desc="Updating Embedding"
+            total=len(result), disable=not self.progress_bar, position=0, unit=" docs", desc="Updating Embedding"
        ) as progress_bar:
            for document_batch in batched_documents:
                embeddings = retriever.embed_documents(document_batch)  # type: ignore
@ -782,7 +781,7 @@ class InMemoryDocumentStore(BaseDocumentStore):
        """
        if index in self.indexes:
            del self.indexes[index]
-            logger.info(f"Index '{index}' deleted.")
+            logger.info("Index '%s' deleted.", index)

    def delete_labels(
        self,
--- a/haystack/document_stores/memory_knowledgegraph.py
+++ b/haystack/document_stores/memory_knowledgegraph.py
@ -37,7 +37,7 @@ class InMemoryKnowledgeGraph(BaseKnowledgeGraph):
        if index not in self.indexes:
            self.indexes[index] = Graph()
        else:
-            logger.warning(f"Index '{index}' is already present.")
+            logger.warning("Index '%s' is already present.", index)

    def delete_index(self, index: Optional[str] = None):
        """
@ -49,7 +49,7 @@ class InMemoryKnowledgeGraph(BaseKnowledgeGraph):

        if index in self.indexes:
            del self.indexes[index]
-            logger.info(f"Index '{index}' deleted.")
+            logger.info("Index '%s' deleted.", index)

    def import_from_ttl_file(self, path: Path, index: Optional[str] = None):
        """
--- a/haystack/document_stores/milvus1.py
+++ b/haystack/document_stores/milvus1.py
@ -313,13 +313,6 @@ class Milvus1DocumentStore(SQLDocumentStore):
        index = index or self.index
        self._create_collection_and_index_if_not_exist(index)

-        document_count = self.get_document_count(index=index)
-        if document_count == 0:
-            logger.warning("Calling DocumentStore.update_embeddings() on an empty index")
-            return
-
-        logger.info(f"Updating embeddings for {document_count} docs...")
-
        result = self._query(
            index=index,
            vector_ids=None,
@ -327,6 +320,14 @@ class Milvus1DocumentStore(SQLDocumentStore):
            filters=filters,
            only_documents_without_embedding=not update_existing_embeddings,
        )
+
+        document_count = len(result)
+        if document_count == 0:
+            logger.warning("Calling DocumentStore.update_embeddings() on an empty index")
+            return
+
+        logger.info("Updating embeddings for %s docs...", document_count)
+
        batched_documents = get_batches_from_generator(result, batch_size)
        with tqdm(
            total=document_count, disable=not self.progress_bar, position=0, unit=" docs", desc="Updating Embedding"
@ -665,10 +666,10 @@ class Milvus1DocumentStore(SQLDocumentStore):
        index = index or self.index
        status, collection_info = self.milvus_server.get_collection_stats(collection_name=index)
        if not status.OK():
-            logger.info(f"Failed fetch stats from store ...")
+            logger.info("Failed fetch stats from store ...")
            return list()

-        logger.debug(f"collection_info = {collection_info}")
+        logger.debug("collection_info = %s", collection_info)

        ids = list()
        partition_list = collection_info["partitions"]
@ -679,16 +680,16 @@ class Milvus1DocumentStore(SQLDocumentStore):
                status, id_list = self.milvus_server.list_id_in_segment(
                    collection_name=index, segment_name=segment_name
                )
-                logger.debug(f"{status}: segment {segment_name} has {len(id_list)} vectors ...")
+                logger.debug("%s: segment %s has %s vectors ...", status, segment_name, len(id_list))
                ids.extend(id_list)

        if len(ids) == 0:
-            logger.info(f"No documents in the store ...")
+            logger.info("No documents in the store ...")
            return list()

        status, vectors = self.milvus_server.get_entity_by_id(collection_name=index, ids=ids)
        if not status.OK():
-            logger.info(f"Failed fetch document for ids {ids} from store ...")
+            logger.info("Failed fetch document for ids %s from store ...", ids)
            return list()

        return vectors
--- a/haystack/document_stores/milvus2.py
+++ b/haystack/document_stores/milvus2.py
@ -204,7 +204,7 @@ class Milvus2DocumentStore(SQLDocumentStore):

            for field in custom_fields:
                if field.name == self.id_field or field.name == self.embedding_field:
-                    logger.warning(f"Skipping `{field.name}` as it is similar to `id_field` or `embedding_field`")
+                    logger.warning("Skipping '%s' as it is similar to 'id_field' or 'embedding_field'", field.name)
                else:
                    fields.append(field)

@ -353,7 +353,7 @@ class Milvus2DocumentStore(SQLDocumentStore):
            logger.warning("Calling DocumentStore.update_embeddings() on an empty index")
            return

-        logger.info(f"Updating embeddings for {document_count} docs...")
+        logger.info("Updating embeddings for %s docs...", document_count)

        result = self._query(
            index=index,
@ -516,7 +516,7 @@ class Milvus2DocumentStore(SQLDocumentStore):
    def _delete_index(self, index: str):
        if utility.has_collection(collection_name=index):
            utility.drop_collection(collection_name=index)
-            logger.info(f"Index '{index}' deleted.")
+            logger.info("Index '%s' deleted.", index)
        super().delete_index(index)

    def get_all_documents_generator(
--- a/haystack/document_stores/opensearch.py
+++ b/haystack/document_stores/opensearch.py
@ -379,7 +379,7 @@ class OpenSearchDocumentStore(BaseElasticsearchDocumentStore):
        if excluded_meta_data:
            body["_source"] = {"excludes": excluded_meta_data}

-        logger.debug(f"Retriever query: {body}")
+        logger.debug("Retriever query: %s", body)
        result = self.client.search(index=index, body=body, request_timeout=300, headers=headers)["hits"]["hits"]

        documents = [
@ -396,7 +396,7 @@ class OpenSearchDocumentStore(BaseElasticsearchDocumentStore):
        """
        # Check if index_name refers to an alias
        if self.client.indices.exists_alias(name=index_name):
-            logger.debug(f"Index name {index_name} is an alias.")
+            logger.debug("Index name %s is an alias.", index_name)

        # check if the existing index has the embedding field; if not create it
        if self.client.indices.exists(index=index_name, headers=headers):
--- a/haystack/document_stores/pinecone.py
+++ b/haystack/document_stores/pinecone.py
@ -216,7 +216,7 @@ class PineconeDocumentStore(BaseDocumentStore):
        stats = index_connection.describe_index_stats()
        dims = stats["dimension"]
        count = stats["namespaces"][""]["vector_count"] if stats["namespaces"].get("") else 0
-        logger.info(f"Index statistics: name: {index}, embedding dimensions: {dims}, record count: {count}")
+        logger.info("Index statistics: name: %s embedding dimensions: %s, record count: %s", index, dims, count)
        # return index connection
        return index_connection

@ -471,7 +471,7 @@ class PineconeDocumentStore(BaseDocumentStore):
            logger.warning("Calling DocumentStore.update_embeddings() on an empty index")
            return

-        logger.info(f"Updating embeddings for {document_count} docs...")
+        logger.info("Updating embeddings for %s docs...", document_count)

        # If the embedding namespace is empty or the user does not want to update existing embeddings, we use document namespace
        if self.get_embedding_count(index=index) == 0 or not update_existing_embeddings:
@ -1003,7 +1003,7 @@ class PineconeDocumentStore(BaseDocumentStore):
        index = self._index_name(index)
        if index in pinecone.list_indexes():
            pinecone.delete_index(index)
-            logger.info(f"Index '{index}' deleted.")
+            logger.info("Index '%s' deleted.", index)
        if index in self.pinecone_indexes:
            del self.pinecone_indexes[index]
        if index in self.all_ids:
--- a/haystack/document_stores/sql.py
+++ b/haystack/document_stores/sql.py
@ -402,7 +402,7 @@ class SQLDocumentStore(BaseDocumentStore):
                    try:
                        meta_orms.append(MetaDocumentORM(name=key, value=value))
                    except TypeError as ex:
-                        logger.error(f"Document {doc.id} - {ex}")
+                        logger.error("Document %s - %s", doc.id, ex)
                doc_mapping = {
                    "id": doc.id,
                    "content": doc.to_dict()["content"],
@ -425,7 +425,7 @@ class SQLDocumentStore(BaseDocumentStore):
            try:
                self.session.commit()
            except Exception as ex:
-                logger.error(f"Transaction rollback: {ex.__cause__}")
+                logger.error("Transaction rollback: %s", ex.__cause__)
                # Rollback is important here otherwise self.session will be in inconsistent state and next call will fail
                self.session.rollback()
                raise ex
@ -495,7 +495,7 @@ class SQLDocumentStore(BaseDocumentStore):
            try:
                self.session.commit()
            except Exception as ex:
-                logger.error(f"Transaction rollback: {ex.__cause__}")
+                logger.error("Transaction rollback: %s", ex.__cause__)
                self.session.rollback()
                raise ex

--- a/haystack/document_stores/utils.py
+++ b/haystack/document_stores/utils.py
@ -34,7 +34,7 @@ def eval_data_from_json(
    with open(filename, "r", encoding="utf-8") as file:
        data = json.load(file)
        if "title" not in data["data"][0]:
-            logger.warning(f"No title information found for documents in QA file: {filename}")
+            logger.warning("No title information found for documents in QA file: %s", filename)

        for squad_document in data["data"]:
            if max_docs:
--- a/haystack/document_stores/weaviate.py
+++ b/haystack/document_stores/weaviate.py
@ -313,7 +313,7 @@ class WeaviateDocumentStore(BaseDocumentStore):
        try:
            result = self.weaviate_client.data_object.get_by_id(id, with_vector=True)
        except weaviate.exceptions.UnexpectedStatusCodeException as usce:
-            logging.debug(f"Weaviate could not get the document requested: {usce}")
+            logging.debug("Weaviate could not get the document requested: %s", usce)
        if result:
            document = self._convert_weaviate_result_to_document(result, return_embedding=True)
        return document
@ -340,7 +340,7 @@ class WeaviateDocumentStore(BaseDocumentStore):
            try:
                result = self.weaviate_client.data_object.get_by_id(id, with_vector=True)
            except weaviate.exceptions.UnexpectedStatusCodeException as usce:
-                logging.debug(f"Weaviate could not get the document requested: {usce}")
+                logging.debug("Weaviate could not get the document requested: %s", usce)
            if result:
                document = self._convert_weaviate_result_to_document(result, return_embedding=True)
                documents.append(document)
@ -560,7 +560,7 @@ class WeaviateDocumentStore(BaseDocumentStore):
                            and "error" in result["result"]["errors"]
                        ):
                            for message in result["result"]["errors"]["error"]:
-                                logger.error(f"{message['message']}")
+                                logger.error(message["message"])
                progress_bar.update(batch_size)
        progress_bar.close()

@ -1215,7 +1215,10 @@ class WeaviateDocumentStore(BaseDocumentStore):
            raise RuntimeError("Specify the arg `embedding_field` when initializing WeaviateDocumentStore()")

        if update_existing_embeddings:
-            logger.info(f"Updating embeddings for all {self.get_document_count(index=index)} docs ...")
+            logger.info(
+                "Updating embeddings for all %s docs ...",
+                self.get_document_count(index=index) if logger.level > logging.DEBUG else 0,
+            )
        else:
            raise RuntimeError(
                "All the documents in Weaviate store have an embedding by default. Only update is allowed!"
@ -1379,7 +1382,7 @@ class WeaviateDocumentStore(BaseDocumentStore):
        index = self._sanitize_index_name(index) or index
        if any(c for c in self.weaviate_client.schema.get()["classes"] if c["class"] == index):
            self.weaviate_client.schema.delete_class(index)
-            logger.info(f"Index '{index}' deleted.")
+            logger.info("Index '%s' deleted.", index)

    def delete_labels(self):
        """
--- a/haystack/modeling/data_handler/data_silo.py
+++ b/haystack/modeling/data_handler/data_silo.py
@ -221,7 +221,7 @@ class DataSilo:
        """
        Load serialized dataset from a cache.
        """
-        logger.info(f"Loading datasets from cache at {cache_dir}")
+        logger.info("Loading datasets from cache at %s", cache_dir)
        self.data["train"] = torch.load(cache_dir / "train_dataset")

        dev_dataset_path = cache_dir / "dev_dataset"
@ -277,7 +277,7 @@ class DataSilo:
            torch.save(self.data["test"], cache_dir / "test_dataset")

        torch.save(self.tensor_names, cache_dir / "tensor_names")
-        logger.info(f"Cached the datasets at {cache_dir}")
+        logger.info("Cached the datasets at %s", cache_dir)

    def _initialize_data_loaders(self):
        """
--- a/haystack/modeling/data_handler/dataset.py
+++ b/haystack/modeling/data_handler/dataset.py
@ -63,7 +63,7 @@ def convert_features_to_dataset(features):
                )
        except:
            logger.debug(
-                f"Could not determine type for feature '{t_name}'. " "Converting now to a tensor of default type long."
+                "Could not determine type for feature '%s'. Converting now to a tensor of default type long.", t_name
            )

        # Convert all remaining python objects to torch long tensors
--- a/haystack/modeling/data_handler/processor.py
+++ b/haystack/modeling/data_handler/processor.py
@ -147,7 +147,7 @@ class Processor(ABC):
        sig = signature(cls.subclasses[processor_name])
        unused_args = {k: v for k, v in kwargs.items() if k not in sig.parameters}
        logger.debug(
-            f"Got more parameters than needed for loading {processor_name}: {unused_args}. " f"Those won't be used!"
+            "Got more parameters than needed for loading %s: %s. Those won't be used!", processor_name, unused_args
        )
        processor = cls.subclasses[processor_name](
            data_dir=data_dir,
@ -324,7 +324,9 @@ class Processor(ABC):
        if problematic_sample_ids:
            n_problematic = len(problematic_sample_ids)
            problematic_id_str = ", ".join([str(i) for i in problematic_sample_ids])
-            logger.error(f"Unable to convert {n_problematic} samples to features. Their ids are : {problematic_id_str}")
+            logger.error(
+                "Unable to convert %s samples to features. Their ids are : %s", n_problematic, problematic_id_str
+            )

    @staticmethod
    def _check_sample_features(basket: SampleBasket):
@ -348,7 +350,7 @@ class Processor(ABC):
        return True

    def _log_samples(self, n_samples: int, baskets: List[SampleBasket]):
-        logger.debug("*** Show {} random examples ***".format(n_samples))
+        logger.debug("*** Show %s random examples ***", n_samples)
        if len(baskets) == 0:
            logger.debug("*** No samples to show because there are no baskets ***")
            return
@ -1817,7 +1819,7 @@ class TextClassificationProcessor(Processor):
        self.header = header
        self.max_samples = max_samples
        self.dev_stratification = dev_stratification
-        logger.debug(f"Currently no support in Processor for returning problematic ids")
+        logger.debug("Currently no support in Processor for returning problematic ids")

        super(TextClassificationProcessor, self).__init__(
            tokenizer=tokenizer,
@ -2141,14 +2143,14 @@ def write_squad_predictions(predictions, out_filename, predictions_filename=None
                        dev_labels[q["id"]] = q["answers"][0]["text"]
        not_included = set(list(dev_labels.keys())) - set(list(predictions_json.keys()))
        if len(not_included) > 0:
-            logger.info(f"There were missing predicitons for question ids: {list(not_included)}")
+            logger.info("There were missing predicitons for question ids: %s", list(not_included))
        for x in not_included:
            predictions_json[x] = ""

    # os.makedirs("model_output", exist_ok=True)
    # filepath = Path("model_output") / out_filename
    json.dump(predictions_json, open(out_filename, "w"))
-    logger.info(f"Written Squad predictions to: {out_filename}")
+    logger.info("Written Squad predictions to: %s", out_filename)


 def _read_dpr_json(
@ -2188,7 +2190,7 @@ def _read_dpr_json(
    """
    # get remote dataset if needed
    if not os.path.exists(file):
-        logger.info(f" Couldn't find {file} locally. Trying to download ...")
+        logger.info("Couldn't find %s locally. Trying to download ...", file)
        _download_extract_downstream_data(file, proxies=proxies)

    if Path(file).suffix.lower() == ".jsonl":
@ -2250,7 +2252,7 @@ def _read_dpr_json(
 def _read_squad_file(filename: str, proxies=None):
    """Read a SQuAD json file"""
    if not os.path.exists(filename):
-        logger.info(f" Couldn't find {filename} locally. Trying to download ...")
+        logger.info("Couldn't find %s locally. Trying to download ...", filename)
        _download_extract_downstream_data(filename, proxies)
    with open(filename, "r", encoding="utf-8") as reader:
        input_data = json.load(reader)["data"]
--- a/haystack/modeling/evaluation/eval.py
+++ b/haystack/modeling/evaluation/eval.py
@ -112,10 +112,10 @@ class Evaluator:
        for head_num, head in enumerate(model.prediction_heads):
            if head.model_type == "span_classification" and calibrate_conf_scores:
                temperature_previous = head.temperature_for_confidence.item()
-                logger.info(f"temperature used for confidence scores before calibration: {temperature_previous}")
+                logger.info("temperature used for confidence scores before calibration: %s", temperature_previous)
                head.calibrate_conf(logits_all[head_num], label_all[head_num])
                temperature_current = head.temperature_for_confidence.item()
-                logger.info(f"temperature used for confidence scores after calibration: {temperature_current}")
+                logger.info("temperature used for confidence scores after calibration: %s", temperature_current)
                temperature_change = (abs(temperature_current - temperature_previous) / temperature_previous) * 100.0
                if temperature_change > 50:
                    logger.warning(
--- a/haystack/modeling/infer.py
+++ b/haystack/modeling/infer.py
@ -283,7 +283,7 @@ class Inferencer:
                else:
                    num_processes = mp.cpu_count()
            self.process_pool = mp.Pool(processes=num_processes)
-            logger.info(f"Got ya {num_processes} parallel workers to do inference ...")
+            logger.info("Got ya %s parallel workers to do inference ...", num_processes)
            log_ascii_workers(n=num_processes, logger=logger)

    def close_multiprocessing_pool(self, join: bool = False):
--- a/haystack/modeling/model/adaptive_model.py
+++ b/haystack/modeling/model/adaptive_model.py
@ -142,7 +142,7 @@ class BaseAdaptiveModel:
                "does not currently support saving and loading"
            )
            assert len(model_files) == len(config_files), error_str
-        logger.info(f"Found files for loading {len(model_files)} prediction heads")
+        logger.info("Found files for loading %s prediction heads", len(model_files))

        return model_files, config_files

@ -573,7 +573,7 @@ class AdaptiveModel(nn.Module, BaseAdaptiveModel):
        try:
            tracker.track_params(params)
        except Exception as e:
-            logger.warning(f"ML logging didn't work: {e}")
+            logger.warning("ML logging didn't work: %s", e)

    def verify_vocab_size(self, vocab_size: int):
        """
--- a/haystack/modeling/model/biadaptive_model.py
+++ b/haystack/modeling/model/biadaptive_model.py
@ -376,7 +376,7 @@ class BiAdaptiveModel(nn.Module):
        try:
            tracker.track_params(params)
        except Exception as e:
-            logger.warning(f"ML logging didn't work: {e}")
+            logger.warning("ML logging didn't work: %s", e)

    def verify_vocab_size(self, vocab_size1: int, vocab_size2: int):
        """
--- a/haystack/modeling/model/language_model.py
+++ b/haystack/modeling/model/language_model.py
@ -488,7 +488,7 @@ class HFLanguageModelNoSegmentIds(HFLanguageModelWithPooler):
            specified using the arguments `output_hidden_states` and `output_attentions`.
        """
        if segment_ids is not None:
-            logging.warning(f"`segment_ids` is not None, but {self.name} does not use them. They will be ignored.")
+            logger.warning(f"'segment_ids' is not None, but %s does not use them. They will be ignored.", self.name)

        return super().forward(
            input_ids=input_ids,
@ -906,7 +906,7 @@ def get_language_model(
            "Ensure that the model class name can be inferred from the directory name "
            "when loading a Transformers model."
        )
-        logger.error(f"Using the AutoModel class for '{pretrained_model_name_or_path}'. This can cause crashes!")
+        logger.error("Using the AutoModel class for '%s'. This can cause crashes!", pretrained_model_name_or_path)
        model_type = "Auto"

    # Find the class corresponding to this model type
@ -918,7 +918,9 @@ def get_language_model(
            f"Supported model types are: {', '.join(HUGGINGFACE_TO_HAYSTACK.keys())}"
        )

-    logger.info(f" * LOADING MODEL: '{pretrained_model_name_or_path}' {'(' + model_type + ')' if model_type else ''}")
+    logger.info(
+        " * LOADING MODEL: '%s' %s", pretrained_model_name_or_path, "(" + model_type + ")" if model_type else ""
+    )

    # Instantiate the class for this model
    language_model = language_model_class(
@ -930,8 +932,10 @@ def get_language_model(
        model_kwargs=model_kwargs,
    )
    logger.info(
-        f"Loaded '{pretrained_model_name_or_path}' ({model_type} model) "
-        f"from {'local file system' if config_file_exists else 'model hub'}."
+        "Loaded '%s' (%s model) from %s.",
+        pretrained_model_name_or_path,
+        model_type,
+        "local file system" if config_file_exists else "model hub",
    )
    return language_model

@ -963,7 +967,7 @@ def _get_model_type(
            model_type = config.architectures[0] if is_supported_model(config.architectures[0]) else None

    except Exception as e:
-        logger.error(f"AutoConfig failed to load on '{model_name_or_path}': {str(e)}")
+        logger.error("AutoConfig failed to load on '%s': %s", model_name_or_path, e)

    if not model_type:
        logger.warning("Could not infer the model type from its config. Looking for clues in the model name.")
@ -991,5 +995,5 @@ def _guess_language(name: str) -> str:
        language = languages[0]
    else:
        language = "english"
-    logger.info(f"Auto-detected model language: {language}")
+    logger.info("Auto-detected model language: %s", language)
    return language
--- a/haystack/modeling/model/optimization.py
+++ b/haystack/modeling/model/optimization.py
@ -188,7 +188,7 @@ def _get_optim(model, opts: Dict):
    optimizer_name = opts.pop("name", None)

    # Logging
-    logger.info(f"Loading optimizer `{optimizer_name}`: '{opts}'")
+    logger.info("Loading optimizer '%s': %s", optimizer_name, opts)
    tracker.track_params(opts)
    tracker.track_params({"optimizer_name": optimizer_name})

@ -271,7 +271,7 @@ def get_scheduler(optimizer, opts):
        except AttributeError:
            raise AttributeError(f"Scheduler '{schedule_name}' not found in 'torch' or 'transformers'")

-    logger.info(f"Using scheduler '{schedule_name}'")
+    logger.info("Using scheduler '%s'", schedule_name)

    # get supported args of constructor
    allowed_args = inspect.signature(sched_constructor).parameters.keys()
@ -285,7 +285,7 @@ def get_scheduler(optimizer, opts):
    constructor_opts = {k: v for k, v in opts.items() if k in allowed_args}

    # Logging
-    logger.info(f"Loading schedule `{schedule_name}`: '{constructor_opts}'")
+    logger.info("Loading schedule '%s': '%s'", schedule_name, constructor_opts)
    tracker.track_params(constructor_opts)
    tracker.track_params({"schedule_name": schedule_name})

--- a/haystack/modeling/model/prediction_head.py
+++ b/haystack/modeling/model/prediction_head.py
@ -265,7 +265,7 @@ class QuestionAnsweringHead(PredictionHead):
        self.layer_dims = layer_dims
        assert self.layer_dims[-1] == 2
        self.feed_forward = FeedForwardBlock(self.layer_dims)
-        logger.debug(f"Prediction head initialized with size {self.layer_dims}")
+        logger.debug("Prediction head initialized with size %s", self.layer_dims)
        self.num_labels = self.layer_dims[-1]
        self.ph_output_type = "per_token_squad"
        self.model_type = "span_classification"  # predicts start and end token of answer
--- a/haystack/modeling/model/triadaptive_model.py
+++ b/haystack/modeling/model/triadaptive_model.py
@ -409,7 +409,7 @@ class TriAdaptiveModel(nn.Module):
        try:
            tracker.track_params(params)
        except Exception as e:
-            logger.warning(f"ML logging didn't work: {e}")
+            logger.warning("ML logging didn't work: %s", e)

    def verify_vocab_size(self, vocab_size1: int, vocab_size2: int, vocab_size3: int):
        """Verifies that the model fits to the tokenizer vocabulary.
--- a/haystack/modeling/training/base.py
+++ b/haystack/modeling/training/base.py
@ -192,9 +192,9 @@ class Trainer:
                # when resuming training from a checkpoint, we want to fast forward to the step of the checkpoint
                if resume_from_step and step <= resume_from_step:
                    if step % 10000 == 0:
-                        logger.info(f"Skipping {step} out of {resume_from_step} steps ...")
+                        logger.info("Skipping %s out of %s steps ...", step, resume_from_step)
                    if resume_from_step == step:
-                        logger.info(f"Finished skipping {resume_from_step} steps ...")
+                        logger.info("Finished skipping %s steps ...", resume_from_step)
                        resume_from_step = None
                    else:
                        continue
@ -385,9 +385,9 @@ class Trainer:
            trainer = cls._load_checkpoint(
                path=checkpoint_to_load, data_silo=data_silo, model=model, optimizer=optimizer, local_rank=local_rank
            )
-            logging.info(f"Resuming training from the train checkpoint at {checkpoint_to_load} ...")
+            logging.info("Resuming training from the train checkpoint at %s ...", checkpoint_to_load)
        else:
-            logging.info(f"No train checkpoints found. Starting a new training ...")
+            logging.info("No train checkpoints found. Starting a new training ...")
            trainer = cls(
                data_silo=data_silo,
                model=model,
@ -447,7 +447,7 @@ class Trainer:
            data_silo=data_silo, model=model, optimizer=optimizer, lr_schedule=scheduler, **trainer_state_dict
        )

-        logger.info(f"Loaded a train checkpoint from {path}")
+        logger.info("Loaded a train checkpoint from %s", path)
        return trainer

    @classmethod
@ -516,7 +516,7 @@ class Trainer:
            for cp in saved_checkpoints[self.checkpoints_to_keep :]:
                shutil.rmtree(cp)

-        logger.info(f"Saved a training checkpoint after {checkpoint_name}")
+        logger.info("Saved a training checkpoint after %s", checkpoint_name)

    def _get_state_dict(self):
        """
--- a/haystack/modeling/utils.py
+++ b/haystack/modeling/utils.py
@ -102,8 +102,9 @@ def initialize_device_settings(
    device_to_replace = torch.device("cuda")
    devices_to_use = [torch.device("cuda:0") if device == device_to_replace else device for device in devices_to_use]

-    logger.info(f"Using devices: {', '.join([str(device) for device in devices_to_use]).upper()}")
-    logger.info(f"Number of GPUs: {n_gpu}")
+    logger.info(
+        "Using devices: %s - Number of GPUs: %s", ", ".join([str(device) for device in devices_to_use]).upper(), n_gpu
+    )
    return devices_to_use, n_gpu


@ -135,7 +136,7 @@ def try_get(keys, dictionary):
                    ret = ret[0]
                return ret
    except Exception as e:
-        logger.warning(f"Cannot extract from dict {dictionary} with error: {e}")
+        logger.warning("Cannot extract from dict %s with error: %s", dictionary, e)
    return None


--- a/haystack/nodes/_json_schema.py
+++ b/haystack/nodes/_json_schema.py
@ -176,7 +176,7 @@ def create_schema_for_node_class(node_class: Type[BaseComponent]) -> Tuple[Dict[

    node_name = getattr(node_class, "__name__")

-    logger.info(f"Creating schema for '{node_name}'")
+    logger.info("Creating schema for '%s'", node_name)

    # Read the relevant init parameters from __init__'s signature
    init_method = getattr(node_class, "__init__", None)
@ -400,7 +400,7 @@ def inject_definition_in_schema(node_class: Type[BaseComponent], schema: Dict[st
    schema_definition, node_ref = create_schema_for_node_class(node_class)
    schema["definitions"].update(schema_definition)
    schema["properties"]["components"]["items"]["anyOf"].append(node_ref)
-    logger.info(f"Added definition for {getattr(node_class, '__name__')}")
+    logger.info("Added definition for %s", getattr(node_class, "__name__"))
    return schema


--- a/haystack/nodes/answer_generator/transformers.py
+++ b/haystack/nodes/answer_generator/transformers.py
@ -124,7 +124,7 @@ class RAGenerator(BaseGenerator):

        if top_k > self.num_beams:
            top_k = self.num_beams
-            logger.warning(f"top_k value should not be greater than num_beams, hence setting it to {num_beams}")
+            logger.warning("top_k value should not be greater than num_beams, hence setting it to %s", num_beams)

        self.top_k = top_k

@ -238,7 +238,7 @@ class RAGenerator(BaseGenerator):

        if top_k > self.num_beams:
            top_k = self.num_beams
-            logger.warning(f"top_k value should not be greater than num_beams, hence setting it to {top_k}")
+            logger.warning("top_k value should not be greater than num_beams, hence setting it to %s", top_k)

        # Flatten the documents so easy to reference
        flat_docs_dict = self._flatten_docs(documents)
@ -382,7 +382,7 @@ class Seq2SeqGenerator(BaseGenerator):

        if top_k > self.num_beams:
            top_k = self.num_beams
-            logger.warning(f"top_k value should not be greater than num_beams, hence setting it to {num_beams}")
+            logger.warning("top_k value should not be greater than num_beams, hence setting it to %s", num_beams)

        self.top_k = top_k

@ -434,7 +434,7 @@ class Seq2SeqGenerator(BaseGenerator):

        if top_k > self.num_beams:
            top_k = self.num_beams
-            logger.warning(f"top_k value should not be greater than num_beams, hence setting it to {top_k}")
+            logger.warning("top_k value should not be greater than num_beams, hence setting it to %s", top_k)

        converter: Callable = Seq2SeqGenerator._get_converter(self.model_name_or_path)
        if not converter:
--- a/haystack/nodes/connector/crawler.py
+++ b/haystack/nodes/connector/crawler.py
@ -216,9 +216,11 @@ class Crawler(BaseComponent):
        file_paths: list = []
        is_not_empty = len(list(output_dir.rglob("*"))) > 0
        if is_not_empty and not overwrite_existing_files:
-            logger.info(f"Found data stored in `{output_dir}`. Delete this first if you really want to fetch new data.")
+            logger.info(
+                "Found data stored in `%s`. Delete this first if you really want to fetch new data.", output_dir
+            )
        else:
-            logger.info(f"Fetching from {urls} to `{output_dir}`")
+            logger.info("Fetching from %s to `%s`", urls, output_dir)

            # Start by writing out the initial list of urls
            if filter_urls:
@ -278,7 +280,7 @@ class Crawler(BaseComponent):
    ) -> List[Path]:
        paths = []
        for link in urls:
-            logger.info(f"writing contents from `{link}`")
+            logger.info("writing contents from '%s'", link)
            self.driver.get(link)
            if loading_wait_time is not None:
                time.sleep(loading_wait_time)
--- a/haystack/nodes/document_classifier/base.py
+++ b/haystack/nodes/document_classifier/base.py
@ -37,7 +37,7 @@ class BaseDocumentClassifier(BaseComponent):
            results = []

        document_ids = [doc.id for doc in results]
-        logger.debug(f"Classified documents with IDs: {document_ids}")
+        logger.debug("Classified documents with IDs: %s", document_ids)

        # convert back to dicts if we are in an indexing pipeline
        if root_node == "File":
@ -54,11 +54,11 @@ class BaseDocumentClassifier(BaseComponent):

        if isinstance(documents[0], Document):
            document_ids = [doc.id for doc in results]
-            logger.debug(f"Classified documents with IDs: {document_ids}")
+            logger.debug("Classified documents with IDs: %s", document_ids)
        else:
            for doc_list in results:
                document_ids = [doc.id for doc in doc_list]
-                logger.debug(f"Classified documents with IDs: {document_ids}")
+                logger.debug("Classified documents with IDs: %s", document_ids)

        return output, "output_1"

--- a/haystack/nodes/file_converter/image.py
+++ b/haystack/nodes/file_converter/image.py
@ -135,7 +135,7 @@ class ImageToTextConverter(BaseConverter):
                # remove lines having > 40% of words as digits AND not ending with a period(.)
                if remove_numeric_tables:
                    if words and len(digits) / len(words) > 0.4 and not line.strip().endswith("."):
-                        logger.debug(f"Removing line '{line}' from file")
+                        logger.debug("Removing line '%s' from file", line)
                        continue
                cleaned_lines.append(line)

--- a/haystack/nodes/file_converter/pdf.py
+++ b/haystack/nodes/file_converter/pdf.py
@ -140,7 +140,7 @@ class PDFToTextConverter(BaseConverter):
                # remove lines having > 40% of words as digits AND not ending with a period(.)
                if remove_numeric_tables:
                    if words and len(digits) / len(words) > 0.4 and not line.strip().endswith("."):
-                        logger.debug(f"Removing line '{line}' from {file_path}")
+                        logger.debug("Removing line '%s' from %s", line, file_path)
                        continue
                cleaned_lines.append(line)

@ -256,7 +256,7 @@ class PDFToTextOCRConverter(BaseConverter):
                image.save(temp_img.name)
                pages.append(self.image_2_text.convert(file_path=temp_img.name)[0].content)
        except Exception as exception:
-            logger.error(f"File {file_path} has an error \n {exception}")
+            logger.error("File %s has an error:\n%s", file_path, exception)

        raw_text = "\f".join(pages)
        document = Document(content=raw_text, meta=meta, id_hash_keys=id_hash_keys)
--- a/haystack/nodes/file_converter/tika.py
+++ b/haystack/nodes/file_converter/tika.py
@ -160,7 +160,7 @@ class TikaConverter(BaseConverter):
                # remove lines having > 40% of words as digits AND not ending with a period(.)
                if remove_numeric_tables:
                    if words and len(digits) / len(words) > 0.4 and not line.strip().endswith("."):
-                        logger.debug(f"Removing line '{line}' from {file_path}")
+                        logger.debug("Removing line '%s' from %s", line, file_path)
                        continue

                cleaned_lines.append(line)
--- a/haystack/nodes/file_converter/txt.py
+++ b/haystack/nodes/file_converter/txt.py
@ -63,7 +63,7 @@ class TextConverter(BaseConverter):
                # remove lines having > 40% of words as digits AND not ending with a period(.)
                if remove_numeric_tables:
                    if words and len(digits) / len(words) > 0.4 and not line.strip().endswith("."):
-                        logger.debug(f"Removing line '{line}' from {file_path}")
+                        logger.debug("Removing line '%s' from %s", line, file_path)
                        continue

                cleaned_lines.append(line)
--- a/haystack/nodes/preprocessor/preprocessor.py
+++ b/haystack/nodes/preprocessor/preprocessor.py
@ -465,7 +465,7 @@ class PreProcessor(BasePreProcessor):
        found_footer = self._find_longest_common_ngram(end_of_pages)
        if found_footer:
            pages = [page.replace(found_footer, "") for page in pages]
-        logger.debug(f"Removed header '{found_header}' and footer '{found_footer}' in document")
+        logger.debug("Removed header '%s' and footer '%s' in document", found_header, found_footer)
        text = "\f".join(pages)
        return text

@ -537,10 +537,10 @@ class PreProcessor(BasePreProcessor):
                sentence_tokenizer = nltk.data.load(f"file:{str(tokenizer_model_path)}", format="pickle")
                sentences = sentence_tokenizer.tokenize(text)
            except LookupError:
-                logger.exception(f"PreProcessor couldn't load sentence tokenizer from {str(tokenizer_model_path)}")
+                logger.exception("PreProcessor couldn't load sentence tokenizer from %s", tokenizer_model_path)
            except (UnpicklingError, ValueError) as e:
                logger.exception(
-                    f"PreProcessor couldn't determine model format of sentence tokenizer at {str(tokenizer_model_path)}."
+                    "PreProcessor couldn't determine model format of sentence tokenizer at %s", tokenizer_model_path
                )
            if sentences:
                return sentences
--- a/haystack/nodes/ranker/base.py
+++ b/haystack/nodes/ranker/base.py
@ -41,7 +41,7 @@ class BaseRanker(BaseComponent):
            results = []

        document_ids = [doc.id for doc in results]
-        logger.debug(f"Retrieved documents with IDs: {document_ids}")
+        logger.debug("Retrieved documents with IDs: %s", document_ids)
        output = {"documents": results}

        return output, "output_1"
@ -59,7 +59,7 @@ class BaseRanker(BaseComponent):

        for doc_list in results:
            document_ids = [doc.id for doc in doc_list]
-            logger.debug(f"Ranked documents with IDs: {document_ids}")
+            logger.debug("Ranked documents with IDs: %s", document_ids)

        output = {"documents": results}

--- a/haystack/nodes/reader/farm.py
+++ b/haystack/nodes/reader/farm.py
@ -728,7 +728,7 @@ class FARMReader(BaseReader):

        :param directory: Directory where the Reader model should be saved
        """
-        logger.info(f"Saving reader model to {directory}")
+        logger.info("Saving reader model to %s", directory)
        self.inferencer.model.save(directory)
        self.inferencer.processor.save(directory)

@ -1026,7 +1026,7 @@ class FARMReader(BaseReader):
        aggregated_per_doc = defaultdict(list)
        for label in labels:
            if not label.document.id:
-                logger.error(f"Label does not contain a document id")
+                logger.error("Label does not contain a document id")
                continue
            aggregated_per_doc[label.document.id].append(label)

@ -1036,7 +1036,7 @@ class FARMReader(BaseReader):
        for doc_id in all_doc_ids:
            doc = document_store.get_document_by_id(doc_id, index=doc_index)
            if not doc:
-                logger.error(f"Document with the ID '{doc_id}' is not present in the document store.")
+                logger.error("Document with the ID '%s' is not present in the document store.", doc_id)
                continue
            d[str(doc_id)] = {"context": doc.content}
            # get all questions / answers
@ -1047,7 +1047,7 @@ class FARMReader(BaseReader):
                for label in aggregated_per_doc[doc_id]:
                    aggregation_key = (doc_id, label.query)
                    if label.answer is None:
-                        logger.error(f"Label.answer was None, but Answer object was expected: {label} ")
+                        logger.error("Label.answer was None, but Answer object was expected: %s", label)
                        continue
                    if label.answer.offsets_in_document is None:
                        logger.error(
--- a/haystack/nodes/reader/table.py
+++ b/haystack/nodes/reader/table.py
@ -177,13 +177,13 @@ class TableReader(BaseReader):
        no_answer_score = 1.0
        for document in documents:
            if document.content_type != "table":
-                logger.warning(f"Skipping document with id '{document.id}' in TableReader as it is not of type table.")
+                logger.warning("Skipping document with id '%s' in TableReader as it is not of type table.", document.id)
                continue

            table: pd.DataFrame = document.content
            if table.shape[0] == 0:
                logger.warning(
-                    f"Skipping document with id '{document.id}' in TableReader as it does not contain any rows."
+                    "Skipping document with id '%s' in TableReader as it does not contain any rows.", document.id
                )
                continue
            # Tokenize query and current table
@ -656,13 +656,13 @@ class RCIReader(BaseReader):
        answers = []
        for document in documents:
            if document.content_type != "table":
-                logger.warning(f"Skipping document with id '{document.id}' in RCIReader as it is not of type table.")
+                logger.warning("Skipping document with id '%s' in RCIReader as it is not of type table.", document.id)
                continue

            table: pd.DataFrame = document.content
            if table.shape[0] == 0:
                logger.warning(
-                    f"Skipping document with id '{document.id}' in RCIReader as it does not contain any rows."
+                    "Skipping document with id '%s' in RCIReader as it does not contain any rows.", document.id
                )
                continue
            table = table.astype(str)
--- a/haystack/nodes/retriever/_embedding_encoder.py
+++ b/haystack/nodes/retriever/_embedding_encoder.py
@ -221,7 +221,7 @@ class _SentenceTransformersEmbeddingEncoder(_BaseEmbeddingEncoder):
            else:
                train_examples.append(InputExample(texts=texts))

-        logger.info(f"Training/adapting {self.embedding_model} with {len(train_examples)} examples")
+        logger.info("Training/adapting %s with %s examples", self.embedding_model, len(train_examples))
        train_dataloader = DataLoader(train_examples, batch_size=batch_size, drop_last=True, shuffle=True)
        train_loss = st_loss.loss(self.embedding_model)

--- a/haystack/nodes/retriever/base.py
+++ b/haystack/nodes/retriever/base.py
@ -328,7 +328,7 @@ class BaseRetriever(BaseComponent):
            query=query, filters=filters, top_k=top_k, index=index, headers=headers, scale_score=scale_score
        )
        document_ids = [doc.id for doc in documents]
-        logger.debug(f"Retrieved documents with IDs: {document_ids}")
+        logger.debug("Retrieved documents with IDs: %s", document_ids)
        output = {"documents": documents}

        return output, "output_1"
@ -351,13 +351,13 @@ class BaseRetriever(BaseComponent):
                if not isinstance(doc, Document):
                    raise HaystackError(f"doc was of type {type(doc)}, but expected a Document.")
                document_ids.append(doc.id)
-            logger.debug(f"Retrieved documents with IDs: {document_ids}")
+            logger.debug("Retrieved documents with IDs: %s", document_ids)
        else:
            for doc_list in documents:
                if not isinstance(doc_list, list):
                    raise HaystackError(f"doc_list was of type {type(doc_list)}, but expected a list of Documents.")
                document_ids = [doc.id for doc in doc_list]
-                logger.debug(f"Retrieved documents with IDs: {document_ids}")
+                logger.debug("Retrieved documents with IDs: %s", document_ids)
        output = {"documents": documents}

        return output, "output_1"
--- a/haystack/nodes/retriever/dense.py
+++ b/haystack/nodes/retriever/dense.py
@ -752,7 +752,7 @@ class DensePassageRetriever(BaseRetriever):
            use_fast_tokenizers=use_fast_tokenizers,
            similarity_function=similarity_function,
        )
-        logger.info(f"DPR model loaded from {load_dir}")
+        logger.info("DPR model loaded from %s", load_dir)

        return dpr

@ -1435,7 +1435,7 @@ class TableTextRetriever(BaseRetriever):
            use_fast_tokenizers=use_fast_tokenizers,
            similarity_function=similarity_function,
        )
-        logger.info(f"TableTextRetriever model loaded from {load_dir}")
+        logger.info("TableTextRetriever model loaded from %s", load_dir)

        return mm_retriever

@ -1530,7 +1530,7 @@ class EmbeddingRetriever(BaseRetriever):
            else model_format
        )

-        logger.info(f"Init retriever using embeddings of model {embedding_model}")
+        logger.info("Init retriever using embeddings of model %s", embedding_model)

        if self.model_format not in _EMBEDDING_ENCODERS.keys():
            raise ValueError(f"Unknown retriever embedding model format {model_format}")
--- a/haystack/nodes/retriever/sparse.py
+++ b/haystack/nodes/retriever/sparse.py
@ -438,7 +438,7 @@ class TfidfRetriever(BaseRetriever):
                    continue
                paragraphs.append(Paragraph(document_id=doc.id, paragraph_id=p_id, content=(p,), meta=doc.meta))
                p_id += 1
-        logger.info(f"Found {len(paragraphs)} candidate paragraphs from {len(documents)} docs in DB")
+        logger.info("Found %s candidate paragraphs from %s docs in DB", len(paragraphs), len(documents))
        return paragraphs

    def _calc_scores(self, queries: Union[str, List[str]]) -> List[Dict[int, float]]:
@ -514,7 +514,9 @@ class TfidfRetriever(BaseRetriever):
        df_sliced = df_sliced[:top_k]

        logger.debug(
-            f"Identified {df_sliced.shape[0]} candidates via retriever:\n {df_sliced.to_string(col_space=10, index=False)}"
+            "Identified %s candidates via retriever:\n%s",
+            df_sliced.shape[0],
+            df_sliced.to_string(col_space=10, index=False),
        )

        # get actual content for the top candidates
@ -585,8 +587,9 @@ class TfidfRetriever(BaseRetriever):
            df_sliced = self.df.loc[query_result.keys()]
            df_sliced = df_sliced[:top_k]
            logger.debug(
-                f"Identified {df_sliced.shape[0]} candidates via retriever:"
-                f"\n {df_sliced.to_string(col_space=10, index=False)}"
+                "Identified %s candidates via retriever:\n%s",
+                df_sliced.shape[0],
+                df_sliced.to_string(col_space=10, index=False),
            )

            # get actual content for the top candidates
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@ -14,7 +14,6 @@ import json
 import inspect
 import logging
 import tempfile
-import traceback
 from pathlib import Path

 import yaml
@ -296,14 +295,14 @@ class Pipeline:
                        f"Deployed pipeline configs are not allowed to be updated. Please undeploy pipeline config '{pipeline_config_name}' first."
                    )
                client.update_pipeline_config(config=config, pipeline_config_name=pipeline_config_name)
-                logger.info(f"Pipeline config '{pipeline_config_name}' successfully updated.")
+                logger.info("Pipeline config '%s' successfully updated.", pipeline_config_name)
            else:
                raise ValueError(
                    f"Pipeline config '{pipeline_config_name}' already exists. Set `overwrite=True` to overwrite pipeline config."
                )
        else:
            client.save_pipeline_config(config=config, pipeline_config_name=pipeline_config_name)
-            logger.info(f"Pipeline config '{pipeline_config_name}' successfully created.")
+            logger.info("Pipeline config '%s' successfully created.", pipeline_config_name)

    @classmethod
    def deploy_on_deepset_cloud(
@ -508,13 +507,15 @@ class Pipeline:
            predecessors = set(nx.ancestors(self.graph, node_id))
            if predecessors.isdisjoint(set(queue.keys())):  # only execute if predecessor nodes are executed
                try:
-                    logger.debug(f"Running node `{node_id}` with input `{node_input}`")
+                    logger.debug("Running node '%s` with input: %s", node_id, node_input)
                    node_output, stream_id = self._run_node(node_id, node_input)
                except Exception as e:
-                    tb = traceback.format_exc()
+                    # The input might be a really large object with thousands of embeddings.
+                    # If you really want to see it, raise the log level.
+                    logger.debug("Exception while running node '%s' with input %s", node_id, node_input)
                    raise Exception(
-                        f"Exception while running node `{node_id}` with input `{node_input}`: {e}, full stack trace: {tb}"
-                    )
+                        f"Exception while running node '{node_id}': {e}\nEnable debug logging to see the data that was passed when the pipeline failed."
+                    ) from e
                queue.pop(node_id)
                #
                if stream_id == "split":
@ -660,14 +661,15 @@ class Pipeline:
            predecessors = set(nx.ancestors(self.graph, node_id))
            if predecessors.isdisjoint(set(queue.keys())):  # only execute if predecessor nodes are executed
                try:
-                    logger.debug(f"Running node `{node_id}` with input `{node_input}`")
+                    logger.debug("Running node '%s` with input: %s", node_id, node_input)
                    node_output, stream_id = self.graph.nodes[node_id]["component"]._dispatch_run_batch(**node_input)
                except Exception as e:
-                    tb = traceback.format_exc()
+                    # The input might be a really large object with thousands of embeddings.
+                    # If you really want to see it, raise the log level.
+                    logger.debug("Exception while running node '%s' with input %s", node_id, node_input)
                    raise Exception(
-                        f"Exception while running node `{node_id}` with input `{node_input}`: {e}, "
-                        f"full stack trace: {tb}"
-                    )
+                        f"Exception while running node '{node_id}': {e}\nEnable debug logging to see the data that was passed when the pipeline failed."
+                    ) from e
                queue.pop(node_id)

                if stream_id == "split":
@ -746,7 +748,7 @@ class Pipeline:

        url = f"https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{dataset}.zip"
        data_path = util.download_and_unzip(url, dataset_dir)
-        logger.info(f"Dataset downloaded here: {data_path}")
+        logger.info("Dataset downloaded here: %s", data_path)
        corpus, queries, qrels = GenericDataLoader(data_path).load(split="test")  # or split = "train" or "dev"

        # check index before eval
@ -776,11 +778,11 @@ class Pipeline:

        # Clean up document store
        if not keep_index and document_store is not None and document_store.index is not None:
-            logger.info(f"Cleaning up: deleting index '{document_store.index}'...")
+            logger.info("Cleaning up: deleting index '%s' ...", document_store.index)
            document_store.delete_index(document_store.index)

        # Evaluate your retrieval using NDCG@k, MAP@K ...
-        logger.info(f"Retriever evaluation for k in: {retriever.k_values}")
+        logger.info("Retriever evaluation for k in: %s", retriever.k_values)
        ndcg, map_, recall, precision = retriever.evaluate(qrels, results, retriever.k_values)
        return ndcg, map_, recall, precision

@ -985,10 +987,10 @@ class Pipeline:
                if not reuse_index:
                    raise HaystackError(f"Index '{document_store.index}' is not empty. Please provide an empty index.")
            else:
-                logger.info(f"indexing {len(corpus_file_paths)} documents...")
+                logger.info("indexing %s documents...", len(corpus_file_paths))
                index_pipeline.run(file_paths=corpus_file_paths, meta=corpus_file_metas, params=index_params)
                document_count = document_store.get_document_count()
-                logger.info(f"indexing {len(evaluation_set_labels)} files to {document_count} documents finished.")
+                logger.info("indexing %s files to %s documents finished.", len(evaluation_set_labels), document_count)

            tracker.track_params({"pipeline_index_document_count": document_count})

@ -1056,7 +1058,7 @@ class Pipeline:

            # Clean up document store
            if not reuse_index and document_store.index is not None:
-                logger.info(f"Cleaning up: deleting index '{document_store.index}'...")
+                logger.info("Cleaning up: deleting index '%s'...", document_store.index)
                document_store.delete_index(document_store.index)

        finally:
@ -1435,7 +1437,7 @@ class Pipeline:
        for i, (query, query_labels) in enumerate(zip(queries, query_labels_per_query)):

            if query_labels is None or query_labels.labels is None:
-                logger.warning(f"There is no label for query '{query}'. Query will be omitted.")
+                logger.warning("There is no label for query '%s'. Query will be omitted.", query)
                continue

            # remarks for no_answers:
@ -1925,7 +1927,7 @@ class Pipeline:

            component_params = definitions[name].get("params", {})
            component_type = definitions[name]["type"]
-            logger.debug(f"Loading component `{name}` of type `{definitions[name]['type']}`")
+            logger.debug(f"Loading component '%s' of type '%s'", name, definitions[name]["type"])

            for key, value in component_params.items():
                # Component params can reference to other components. For instance, a Retriever can reference a
@ -2188,9 +2190,9 @@ class _HaystackBeirRetrieverAdapter:
                file_paths.append(file_path)
                metas.append({"id": id, "name": doc.get("title", None)})

-            logger.info(f"indexing {len(corpus)} documents...")
+            logger.info("indexing %s documents...", len(corpus))
            self.index_pipeline.run(file_paths=file_paths, meta=metas, params=self.index_params)
-            logger.info(f"indexing finished.")
+            logger.info("indexing finished.")

            # adjust query_params to ensure top_k is retrieved
            query_params = copy.deepcopy(self.query_params)
--- a/haystack/pipelines/config.py
+++ b/haystack/pipelines/config.py
@ -208,7 +208,7 @@ def validate_yaml(
    """
    pipeline_config = read_pipeline_config_from_yaml(path)
    validate_config(pipeline_config=pipeline_config, strict_version_check=strict_version_check, extras=extras)
-    logging.debug(f"'{path}' contains valid Haystack pipelines.")
+    logging.debug("'%s' contains valid Haystack pipelines.", path)


 def validate_config(
@ -343,7 +343,7 @@ def validate_schema(pipeline_config: Dict, strict_version_check: bool = False, e
                f"Validation failed. {validation.message}. {error_location} " "See the stacktrace for more information."
            ) from validation

-    logging.debug(f"The given configuration is valid according to the JSON schema.")
+    logging.debug("The given configuration is valid according to the JSON schema.")


 def validate_pipeline_graph(pipeline_definition: Dict[str, Any], component_definitions: Dict[str, Any]):
@ -357,7 +357,7 @@ def validate_pipeline_graph(pipeline_definition: Dict[str, Any], component_defin
    graph = _init_pipeline_graph(root_node_name=root_node_name)
    for node in pipeline_definition["nodes"]:
        graph = _add_node_to_pipeline_graph(graph=graph, node=node, components=component_definitions)
-    logging.debug(f"The graph for pipeline '{pipeline_definition['name']}' is valid.")
+    logging.debug("The graph for pipeline '%s' is valid.", pipeline_definition["name"])


 def _find_root_in_pipeline_definition(pipeline_definition: Dict[str, Any]):
--- a/haystack/schema.py
+++ b/haystack/schema.py
@ -225,6 +225,9 @@ class Document:
        )

    def __repr__(self):
+        values = self.to_dict()
+        if values.get("embedding", False):
+            values["embedding"] = f"<embedding of shape {values['embedding'].get('shape', '[no shape]')}>"
        return f"<Document: {str(self.to_dict())}>"

    def __str__(self):
@ -1389,7 +1392,7 @@ class EvaluationResult:
                        index=False, quoting=csv.QUOTE_NONNUMERIC (to avoid problems with \r chars)
        """
        out_dir = out_dir if isinstance(out_dir, Path) else Path(out_dir)
-        logger.info(f"Saving evaluation results to {out_dir}")
+        logger.info("Saving evaluation results to %s", out_dir)
        if not out_dir.exists():
            out_dir.mkdir(parents=True)
        for node_name, df in self.node_results.items():
--- a/haystack/telemetry.py
+++ b/haystack/telemetry.py
@ -73,7 +73,7 @@ def enable_writing_events_to_file():
    Enables writing each event that is sent to the log file specified in LOG_PATH
    """
    os.environ[HAYSTACK_TELEMETRY_LOGGING_TO_FILE_ENABLED] = "True"
-    logger.info(f"Writing events to log file {LOG_PATH} has been enabled.")
+    logger.info("Writing events to log file %s has been enabled.", LOG_PATH)


 def disable_writing_events_to_file():
@ -81,7 +81,7 @@ def disable_writing_events_to_file():
    Disables writing each event that is sent to the log file specified in LOG_PATH
    """
    os.environ[HAYSTACK_TELEMETRY_LOGGING_TO_FILE_ENABLED] = "False"
-    logger.info(f"Writing events to log file {LOG_PATH} has been disabled.")
+    logger.info("Writing events to log file %s has been disabled.", LOG_PATH)


 def is_telemetry_enabled() -> bool:
@ -235,7 +235,7 @@ def _read_telemetry_config():
            if "user_id" in config and user_id is None:
                user_id = config["user_id"]
    except Exception as e:
-        logger.debug(f"Telemetry was not able to read the config file {CONFIG_PATH}.", exc_info=e)
+        logger.debug("Telemetry was not able to read the config file %s", CONFIG_PATH, exc_info=e)


 def _write_telemetry_config():
@ -257,7 +257,7 @@ def _write_telemetry_config():
        with open(CONFIG_PATH, "w") as outfile:
            yaml.dump(config, outfile, default_flow_style=False)
    except Exception:
-        logger.debug(f"Could not write config file to {CONFIG_PATH}.")
+        logger.debug("Could not write config file to %s", CONFIG_PATH)
        send_custom_event(event="config saving failed")


@ -266,7 +266,7 @@ def _write_event_to_telemetry_log_file(distinct_id: str, event: str, properties:
        with open(LOG_PATH, "a") as file_object:
            file_object.write(f"{event}, {properties}, {distinct_id}\n")
    except Exception as e:
-        logger.debug(f"Telemetry was not able to write event to log file {LOG_PATH}.", exc_info=e)
+        logger.debug("Telemetry was not able to write event to log file %s", LOG_PATH, exc_info=e)


 def _delete_telemetry_file(file_type_to_delete: TelemetryFileType):
@ -279,7 +279,7 @@ def _delete_telemetry_file(file_type_to_delete: TelemetryFileType):
    try:
        path.unlink()  # todo add missing_ok=True to the unlink() call when upgrading to python>3.7
    except Exception as e:
-        logger.debug(f"Telemetry was not able to delete the {file_type_to_delete} at {path}.", exc_info=e)
+        logger.debug("Telemetry was not able to delete the %s at %s", file_type_to_delete, path, exc_info=e)


 class NonPrivateParameters:
--- a/haystack/utils/deepsetcloud.py
+++ b/haystack/utils/deepsetcloud.py
@ -546,7 +546,7 @@ class PipelineClient:
        pipelines_url = f"{workspace_url}/pipelines"
        response = self.client.post(url=pipelines_url, data=yaml.dump(config), headers=headers).json()
        if "name" not in response or response["name"] != pipeline_config_name:
-            logger.warning(f"Unexpected response from saving pipeline config: {response}")
+            logger.warning("Unexpected response from saving pipeline config: %s", response)

    def update_pipeline_config(
        self,
@ -568,7 +568,7 @@ class PipelineClient:
        yaml_url = f"{pipeline_url}/yaml"
        response = self.client.put(url=yaml_url, data=yaml.dump(config), headers=headers).json()
        if "name" not in response or response["name"] != pipeline_config_name:
-            logger.warning(f"Unexpected response from updating pipeline config: {response}")
+            logger.warning("Unexpected response from updating pipeline config: %s", response)

    def deploy(
        self,
@ -609,9 +609,9 @@ class PipelineClient:

        if status == PipelineStatus.DEPLOYED:
            if changed:
-                logger.info(f"Pipeline config '{pipeline_config_name}' successfully deployed.")
+                logger.info("Pipeline config '%s' successfully deployed.", pipeline_config_name)
            else:
-                logger.info(f"Pipeline config '{pipeline_config_name}' is already deployed.")
+                logger.info("Pipeline config '%s' is already deployed.", pipeline_config_name)
            logger.info(
                f"Search endpoint for pipeline config '{pipeline_config_name}' is up and running for you under {pipeline_url}"
            )
@ -628,7 +628,7 @@ class PipelineClient:
                    f"  ]\n"
                    f"}}'"
                )
-                logger.info(f"Try it out using the following curl command:\n{curl_cmd}")
+                logger.info("Try it out using the following curl command:\n%s", curl_cmd)

        elif status == PipelineStatus.DEPLOYMENT_FAILED:
            raise DeepsetCloudError(
@ -673,9 +673,9 @@ class PipelineClient:

        if status == PipelineStatus.UNDEPLOYED:
            if changed:
-                logger.info(f"Pipeline config '{pipeline_config_name}' successfully undeployed.")
+                logger.info("Pipeline config '%s' successfully undeployed.", pipeline_config_name)
            else:
-                logger.info(f"Pipeline config '{pipeline_config_name}' is already undeployed.")
+                logger.info("Pipeline config '%s' is already undeployed.", pipeline_config_name)
        elif status in [PipelineStatus.DEPLOYMENT_IN_PROGRESS, PipelineStatus.DEPLOYMENT_SCHEDULED]:
            raise DeepsetCloudError(
                f"Undeployment of pipeline config '{pipeline_config_name}' aborted. Deployment was requested."
@ -754,7 +754,7 @@ class PipelineClient:
                raise DeepsetCloudError(f"Pipeline config '{pipeline_config_name}' does not exist anymore.")
            status = PipelineStatus.from_str(pipeline_info["status"])
            if status in valid_transitioning_states:
-                logger.info(f"Current status of '{pipeline_config_name}' is: '{status}'")
+                logger.info("Current status of '%s' is: '%s'", pipeline_config_name, status)
                time.sleep(5)

        return status, True
@ -907,7 +907,7 @@ class EvaluationSetClient:
                f"Successfully uploaded evaluation set file {file_path}. You can access it now under evaluation set '{file_path.name}'."
            )
        except DeepsetCloudError as e:
-            logger.error(f"Error uploading evaluation set file {file_path}: {e.args}")
+            logger.error("Error uploading evaluation set file %s: %s", file_path, e.args)

    def get_evaluation_set(
        self, evaluation_set: Optional[str] = None, workspace: Optional[str] = None
@ -993,9 +993,9 @@ class FileClient:
                file_id = response_file_upload.json().get("file_id")
                file_ids.append(file_id)
            except Exception as e:
-                logger.exception(f"Error uploading file {file_path}")
+                logger.exception("Error uploading file %s", file_path)

-        logger.info(f"Successfully uploaded {len(file_ids)} files.")
+        logger.info("Successfully uploaded %s files.", len(file_ids))

    def delete_file(self, file_id: str, workspace: Optional[str] = None, headers: dict = None):
        """
@ -1147,7 +1147,7 @@ class EvaluationRunClient:
        eval_run_url = f"{workspace_url}/eval_runs/{eval_run_name}"
        response = self.client.delete(eval_run_url, headers=headers)
        if response.status_code == 204:
-            logger.info(f"Evaluation run '{eval_run_name}' deleted.")
+            logger.info("Evaluation run '%s' deleted.", eval_run_name)

    def start_eval_run(self, eval_run_name: str, workspace: Optional[str] = None, headers: dict = None):
        """
@ -1162,7 +1162,7 @@ class EvaluationRunClient:
        eval_run_url = f"{workspace_url}/eval_runs/{eval_run_name}/start"
        response = self.client.post(eval_run_url, headers=headers)
        if response.status_code == 204:
-            logger.info(f"Evaluation run '{eval_run_name}' has been started.")
+            logger.info("Evaluation run '%s' has been started.", eval_run_name)

    def update_eval_run(
        self,
--- a/haystack/utils/doc_store.py
+++ b/haystack/utils/doc_store.py
@ -79,7 +79,7 @@ def launch_weaviate(sleep=15):


 def stop_container(container_name, delete_container=False):
-    logger.debug(f"Stopping {container_name}...")
+    logger.debug("Stopping %s...", container_name)
    status = subprocess.run([f"docker stop {container_name}"], shell=True)
    if status.returncode:
        logger.warning(
@ -117,7 +117,7 @@ def stop_service(document_store, delete_container=False):
    elif "WeaviateDocumentStore" in ds_class:
        stop_weaviate(delete_container)
    else:
-        logger.warning(f"No support yet for auto stopping the service behind a {type(document_store)}")
+        logger.warning("No support yet for auto stopping the service behind a %s", type(document_store))


 def launch_milvus(sleep=15, delete_existing=False):
--- a/haystack/utils/docker.py
+++ b/haystack/utils/docker.py
@ -28,6 +28,6 @@ def cache_models(models: List[str] = None, use_auth_token: Optional[Union[str, b
    import transformers

    for model_to_cache in models:
-        logging.info(f"Caching {model_to_cache}")
+        logging.info("Caching %s", model_to_cache)
        transformers.AutoTokenizer.from_pretrained(model_to_cache, use_auth_token=use_auth_token)
        transformers.AutoModel.from_pretrained(model_to_cache, use_auth_token=use_auth_token)
--- a/haystack/utils/experiment_tracking.py
+++ b/haystack/utils/experiment_tracking.py
@ -117,19 +117,19 @@ class StdoutTrackingHead(BaseTrackingHead):
    def init_experiment(
        self, experiment_name: str, run_name: str = None, tags: Dict[str, Any] = None, nested: bool = False
    ):
-        logger.info(f"\n **** Starting experiment '{experiment_name}' (Run: {run_name})  ****")
+        logger.info("\n **** Starting experiment '%s' (Run: %s)  ****", experiment_name, run_name)

    def track_metrics(self, metrics: Dict[str, Any], step: int):
-        logger.info(f"Logged metrics at step {step}: \n {metrics}")
+        logger.info("Logged metrics at step %s: \n %s", step, metrics)

    def track_params(self, params: Dict[str, Any]):
-        logger.info(f"Logged parameters: \n {params}")
+        logger.info("Logged parameters: \n %s", params)

    def track_artifacts(self, dir_path: Union[str, Path], artifact_path: str = None):
-        logger.warning(f"Cannot log artifacts with StdoutLogger: \n {dir_path}")
+        logger.warning("Cannot log artifacts with StdoutLogger: \n %s", dir_path)

    def end_run(self):
-        logger.info(f"**** End of Experiment **** ")
+        logger.info("**** End of Experiment **** ")


 class MLflowTrackingHead(BaseTrackingHead):
@ -148,7 +148,9 @@ class MLflowTrackingHead(BaseTrackingHead):
            mlflow.set_tracking_uri(self.tracking_uri)
            mlflow.set_experiment(experiment_name)
            mlflow.start_run(run_name=run_name, nested=nested, tags=tags)
-            logger.info(f"Tracking run {run_name} of experiment {experiment_name} by mlflow under {self.tracking_uri}")
+            logger.info(
+                "Tracking run %s of experiment %s by mlflow under %s", run_name, experiment_name, self.tracking_uri
+            )
            if self.auto_track_environment:
                mlflow.log_params(flatten_dict({"environment": get_or_create_env_meta_data()}))
        except ConnectionError:
@ -163,9 +165,9 @@ class MLflowTrackingHead(BaseTrackingHead):
            metrics = flatten_dict(metrics)
            mlflow.log_metrics(metrics, step=step)
        except ConnectionError:
-            logger.warning(f"ConnectionError in logging metrics to MLflow.")
+            logger.warning("ConnectionError in logging metrics to MLflow.")
        except Exception as e:
-            logger.warning(f"Failed to log metrics: {e}")
+            logger.warning("Failed to log metrics: %s", e)

    def track_params(self, params: Dict[str, Any]):
        try:
@ -174,15 +176,15 @@ class MLflowTrackingHead(BaseTrackingHead):
        except ConnectionError:
            logger.warning("ConnectionError in logging params to MLflow")
        except Exception as e:
-            logger.warning(f"Failed to log params: {e}")
+            logger.warning("Failed to log params: %s", e)

    def track_artifacts(self, dir_path: Union[str, Path], artifact_path: str = None):
        try:
            mlflow.log_artifacts(dir_path, artifact_path)
        except ConnectionError:
-            logger.warning(f"ConnectionError in logging artifacts to MLflow")
+            logger.warning("ConnectionError in logging artifacts to MLflow")
        except Exception as e:
-            logger.warning(f"Failed to log artifacts: {e}")
+            logger.warning("Failed to log artifacts: %s", e)

    def end_run(self):
        mlflow.end_run()
--- a/haystack/utils/import_utils.py
+++ b/haystack/utils/import_utils.py
@ -81,10 +81,10 @@ def fetch_archive_from_http(url: str, output_dir: str, proxies: Optional[dict] =

    is_not_empty = len(list(Path(path).rglob("*"))) > 0
    if is_not_empty:
-        logger.info(f"Found data stored in `{output_dir}`. Delete this first if you really want to fetch new data.")
+        logger.info("Found data stored in '%s'. Delete this first if you really want to fetch new data.", output_dir)
        return False
    else:
-        logger.info(f"Fetching from {url} to `{output_dir}`")
+        logger.info("Fetching from %s to '%s'", url, output_dir)

        _, _, archive_extension = url.rpartition(".")
        request_data = requests.get(url, proxies=proxies)
--- a/haystack/utils/squad_to_dpr.py
+++ b/haystack/utils/squad_to_dpr.py
@ -192,8 +192,8 @@ def create_dpr_training_dataset(squad_data: dict, retriever: BaseRetriever, num_
                n_questions += 1
                yield dict_DPR

-    logger.info(f"Number of skipped questions: {n_non_added_questions}")
-    logger.info(f"Number of added questions:   {n_questions}")
+    logger.info("Number of skipped questions: %s", n_non_added_questions)
+    logger.info("Number of added questions: %s", n_questions)


 def save_dataset(iter_dpr: Iterator, dpr_output_filename: Path, total_nb_questions: int, split_dataset: bool):
--- a/rest_api/rest_api/pipeline/init.py
+++ b/rest_api/rest_api/pipeline/init.py
@ -26,17 +26,17 @@ def setup_pipelines() -> Dict[str, Any]:

    # Load query pipeline
    query_pipeline = Pipeline.load_from_yaml(Path(config.PIPELINE_YAML_PATH), pipeline_name=config.QUERY_PIPELINE_NAME)
-    logging.info(f"Loaded pipeline nodes: {query_pipeline.graph.nodes.keys()}")
+    logging.info("Loaded pipeline nodes: %s", query_pipeline.graph.nodes.keys())
    pipelines["query_pipeline"] = query_pipeline

    # Find document store
    document_store = query_pipeline.get_document_store()
-    logging.info(f"Loaded docstore: {document_store}")
+    logging.info("Loaded docstore: %s", document_store)
    pipelines["document_store"] = document_store

    # Setup concurrency limiter
    concurrency_limiter = RequestLimiter(config.CONCURRENT_REQUEST_PER_WORKER)
-    logging.info("Concurrent requests per worker: {CONCURRENT_REQUEST_PER_WORKER}")
+    logging.info("Concurrent requests per worker: %s", config.CONCURRENT_REQUEST_PER_WORKER)
    pipelines["concurrency_limiter"] = concurrency_limiter

    # Load indexing pipeline (if available)
@ -53,7 +53,7 @@ def setup_pipelines() -> Dict[str, Any]:

    except PipelineConfigError as e:
        indexing_pipeline = None
-        logger.error(f"{e.message}\nFile Upload API will not be available.")
+        logger.error("%s\nFile Upload API will not be available.", e.message)

    finally:
        pipelines["indexing_pipeline"] = indexing_pipeline
--- a/test/benchmarks/model_distillation.py
+++ b/test/benchmarks/model_distillation.py
@ -122,7 +122,7 @@ def main():
    for current_config in combine_config(distillation_settings):
        descriptions.append(f"Results of student with distillation (config: {current_config}")
        # distillation training
-        logger.info(f"Training student with distillation (config: {current_config}")
+        logger.info("Training student with distillation (config: %s)", current_config)
        results.append(
            train_student_with_distillation(
                student, teacher, download_folder, train_file, test_file, **current_config, **training_settings
@ -145,9 +145,9 @@ def main():
    logger.info("Evaluation results:")
    for result, description in zip(results, descriptions):
        logger.info(description)
-        logger.info(f"EM: {result['EM']}")
-        logger.info(f"F1: {result['f1']}")
-        logger.info(f"Top n accuracy: {result['top_n_accuracy']}")
+        logger.info("EM: %s", result["EM"])
+        logger.info("F1: %s", result["f1"])
+        logger.info("Top n accuracy: %s", result["top_n_accuracy"])


 if __name__ == "__main__":
--- a/test/benchmarks/reader.py
+++ b/test/benchmarks/reader.py
@ -51,7 +51,7 @@ def benchmark_reader(ci=False, update_json=False, save_markdown=False, **kwargs)
    index_to_doc_store(doc_store, docs, None, labels)
    for reader_name in reader_models:
        for reader_type in reader_types:
-            logger.info(f"##### Start reader run - model:{reader_name}, type: {reader_type} ##### ")
+            logger.info("##### Start reader run - model: %s, type: %s ##### ", reader_name, reader_type)
            try:
                reader = get_reader(reader_name, reader_type)
                results = reader.eval(
--- a/test/benchmarks/retriever.py
+++ b/test/benchmarks/retriever.py
@ -56,7 +56,7 @@ def benchmark_indexing(
    retriever_results = []
    for n_docs in n_docs_options:
        for retriever_name, doc_store_name in retriever_doc_stores:
-            logger.info(f"##### Start indexing run: {retriever_name}, {doc_store_name}, {n_docs} docs ##### ")
+            logger.info("##### Start indexing run: %s, %s, %s docs ##### ", retriever_name, doc_store_name, n_docs)
            try:
                doc_store = get_document_store(doc_store_name)
                retriever = get_retriever(retriever_name, doc_store, DEVICES)
@ -160,7 +160,7 @@ def benchmark_querying(
    for n_docs in n_docs_options:
        for retriever_name, doc_store_name in retriever_doc_stores:
            try:
-                logger.info(f"##### Start querying run: {retriever_name}, {doc_store_name}, {n_docs} docs ##### ")
+                logger.info("##### Start querying run: %s, %s, %s docs ##### ", retriever_name, doc_store_name, n_docs)
                if retriever_name in ["elastic", "sentence_transformers"]:
                    similarity = "cosine"
                else:
@ -273,7 +273,7 @@ def add_precomputed_embeddings(embeddings_dir, embeddings_filenames, docs):
    ret = []
    id_to_doc = {x.meta["passage_id"]: x for x in docs}
    for ef in embeddings_filenames:
-        logger.info(f"Adding precomputed embeddings from {embeddings_dir + ef}")
+        logger.info("Adding precomputed embeddings from %s", embeddings_dir + ef)
        filename = embeddings_dir + ef
        embeds = pickle.load(open(filename, "rb"))
        for i, vec in embeds:
@ -284,7 +284,7 @@ def add_precomputed_embeddings(embeddings_dir, embeddings_filenames, docs):
    # In the official DPR repo, there are only 20594995 precomputed embeddings for 21015324 wikipedia passages
    # If there isn't an embedding for a given doc, we remove it here
    ret = [x for x in ret if x.embedding is not None]
-    logger.info(f"Embeddings loaded for {len(ret)}/{len(docs)} docs")
+    logger.info("Embeddings loaded for %s/%s docs", len(ret), len(docs))
    return ret


--- a/test/benchmarks/utils.py
+++ b/test/benchmarks/utils.py
@ -170,16 +170,16 @@ def download_from_url(url: str, filepath: Union[str, Path]):
    :return: local path of the downloaded file
    """

-    logger.info(f"Downloading {url}")
+    logger.info("Downloading %s", url)
    # Create local folder
    folder, filename = os.path.split(filepath)
    if not os.path.exists(folder):
        os.makedirs(folder)
    # Download file if not present locally
    if os.path.exists(filepath):
-        logger.info(f"Skipping {url} (exists locally)")
+        logger.info("Skipping %s (exists locally)", url)
    else:
-        logger.info(f"Downloading {url} to {filepath} ")
+        logger.info("Downloading %s to %s", filepath)
        with open(filepath, "wb") as file:
            http_get(url=url, temp_file=file)
    return filepath
--- a/test/conftest.py
+++ b/test/conftest.py
@ -1024,7 +1024,7 @@ def adaptive_model_qa(num_processes):
    current_process = psutil.Process()
    children = current_process.children()
    if len(children) != 0:
-        logging.error(f"Not all the subprocesses are closed! {len(children)} are still running.")
+        logging.error("Not all the subprocesses are closed! %s are still running.", len(children))


@pytest.fixture
--- a/test/mocks/pinecone.py
+++ b/test/mocks/pinecone.py
@ -132,7 +132,7 @@ class Index:
        response: dict = {"namespace": namespace, "vectors": {}}
        if namespace not in self.index_config.namespaces:
            # If we query an empty/non-existent namespace, Pinecone will just return an empty response
-            logger.warning(f"No namespace called '{namespace}'")
+            logger.warning("No namespace called '%s'", namespace)
            return response
        records = self.index_config.namespaces[namespace]
        namespace_ids = records.keys()
--- a/test/pipelines/test_pipeline.py
+++ b/test/pipelines/test_pipeline.py
@ -56,11 +56,11 @@ def reduce_windows_recursion_limit():
    default_recursion_limit = sys.getrecursionlimit()
    if is_windows:
        reduced_recursion_limit = default_recursion_limit // 2
-        logger.warning(f"Reducing recursion limit to {reduced_recursion_limit}")
+        logger.warning("Reducing recursion limit to %s", reduced_recursion_limit)
        sys.setrecursionlimit(reduced_recursion_limit)
    yield
    if is_windows:
-        logger.warning(f"Resetting recursion limit to {default_recursion_limit}")
+        logger.warning("Resetting recursion limit to %s", default_recursion_limit)
        sys.setrecursionlimit(default_recursion_limit)


--- a/tutorials/Tutorial5_Evaluation.py
+++ b/tutorials/Tutorial5_Evaluation.py
@ -30,9 +30,9 @@ def tutorial5_evaluation():
    doc_index = "tutorial5_docs"
    label_index = "tutorial5_labels"

-    ##############################################
+    ###############################################
    # Code
-    ##############################################
+    ###############################################
    launch_es()

    # Download evaluation data, which is a subset of Natural Questions development set containing 50 documents with one question per document and multiple annotated answers