chore: enable logging-fstring-interpolation and cleanup (#3843)

* enable logging-fstring-interpolation

* remove logging-fstring-interpolation from exclusion list

* remove implicit string interpolations added by black

* remove from rest_api too

* fix % sign
This commit is contained in:
ZanSara 2023-01-12 09:31:21 +01:00 committed by GitHub
parent 4cbc8550d6
commit d157e41c1f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
64 changed files with 408 additions and 249 deletions

View File

@ -476,8 +476,9 @@ class BaseDocumentStore(BaseComponent):
else: else:
jsonl_filename = (file_path.parent / (file_path.stem + ".jsonl")).as_posix() jsonl_filename = (file_path.parent / (file_path.stem + ".jsonl")).as_posix()
logger.info( logger.info(
f"Adding evaluation data batch-wise is not compatible with json-formatted SQuAD files. " "Adding evaluation data batch-wise is not compatible with json-formatted SQuAD files. "
f"Converting json to jsonl to: {jsonl_filename}" "Converting json to jsonl to: %s",
jsonl_filename,
) )
squad_json_to_jsonl(filename, jsonl_filename) squad_json_to_jsonl(filename, jsonl_filename)
self.add_eval_data( self.add_eval_data(
@ -622,8 +623,9 @@ class BaseDocumentStore(BaseComponent):
for document in documents: for document in documents:
if document.id in _hash_ids: if document.id in _hash_ids:
logger.info( logger.info(
f"Duplicate Documents: Document with id '{document.id}' already exists in index " "Duplicate Documents: Document with id '%s' already exists in index '%s'",
f"'{index or self.index}'" document.id,
index or self.index,
) )
continue continue
_documents.append(document) _documents.append(document)

View File

@ -118,23 +118,25 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore):
indexing_info = index_info["indexing"] indexing_info = index_info["indexing"]
if indexing_info["pending_file_count"] > 0: if indexing_info["pending_file_count"] > 0:
logger.warning( logger.warning(
f"{indexing_info['pending_file_count']} files are pending to be indexed. " "%s files are pending to be indexed. Indexing status: %s",
f"Indexing status: {indexing_info['status']}" indexing_info["pending_file_count"],
indexing_info["status"],
) )
if index in deployed_unhealthy_pipelines: if index in deployed_unhealthy_pipelines:
logger.warning( logger.warning(
f"The index '{index}' is unhealthy and should be redeployed using " "The index '%s' is unhealthy and should be redeployed using "
f"`Pipeline.undeploy_on_deepset_cloud()` and `Pipeline.deploy_on_deepset_cloud()`." "`Pipeline.undeploy_on_deepset_cloud()` and `Pipeline.deploy_on_deepset_cloud()`.",
index,
) )
else: else:
logger.info( logger.info(
f"You are using a DeepsetCloudDocumentStore with an index that does not exist on deepset Cloud. " "You are using a DeepsetCloudDocumentStore with an index that does not exist on deepset Cloud. "
f"This document store always returns empty responses. This can be useful if you want to " "This document store always returns empty responses. This can be useful if you want to "
f"create a new pipeline within deepset Cloud.\n" "create a new pipeline within deepset Cloud.\n"
f"In order to create a new pipeline on deepset Cloud, take the following steps: \n" "In order to create a new pipeline on deepset Cloud, take the following steps: \n"
f" - create query and indexing pipelines using this DocumentStore\n" " - create query and indexing pipelines using this DocumentStore\n"
f" - call `Pipeline.save_to_deepset_cloud()` passing the pipelines and a `pipeline_config_name`\n" " - call `Pipeline.save_to_deepset_cloud()` passing the pipelines and a `pipeline_config_name`\n"
f" - call `Pipeline.deploy_on_deepset_cloud()` passing the `pipeline_config_name`" " - call `Pipeline.deploy_on_deepset_cloud()` passing the `pipeline_config_name`"
) )
self.evaluation_set_client = DeepsetCloud.get_evaluation_set_client( self.evaluation_set_client = DeepsetCloud.get_evaluation_set_client(

View File

@ -508,9 +508,10 @@ class ElasticsearchDocumentStore(SearchEngineDocumentStore):
if not any(indices): if not any(indices):
logger.warning( logger.warning(
f"To use an index, you must create it first. The index called '{index_name}' doesn't exist. " "To use an index, you must create it first. The index called '%s' doesn't exist. "
f"You can create it by setting `create_index=True` on init or by calling `write_documents()` if you prefer to create it on demand. " "You can create it by setting `create_index=True` on init or by calling `write_documents()` if you prefer to create it on demand. "
f"Note that this instance doesn't validate the index after you create it." "Note that this instance doesn't validate the index after you create it.",
index_name,
) )
# If the index name is an alias that groups multiple existing indices, each of them must have an embedding_field. # If the index name is an alias that groups multiple existing indices, each of them must have an embedding_field.

View File

@ -206,7 +206,10 @@ class FAISSDocumentStore(SQLDocumentStore):
index.hnsw.efConstruction = ef_construction index.hnsw.efConstruction = ef_construction
logger.info( logger.info(
f"HNSW params: n_links: {n_links}, efSearch: {index.hnsw.efSearch}, efConstruction: {index.hnsw.efConstruction}" "HNSW params: n_links: %s, efSearch: %s, efConstruction: %s",
n_links,
index.hnsw.efSearch,
index.hnsw.efConstruction,
) )
else: else:
index = faiss.index_factory(embedding_dim, index_factory, metric_type) index = faiss.index_factory(embedding_dim, index_factory, metric_type)
@ -550,8 +553,10 @@ class FAISSDocumentStore(SQLDocumentStore):
""" """
if index == self.index: if index == self.index:
logger.warning( logger.warning(
f"Deletion of default index '{index}' detected. " "Deletion of default index '%s' detected. "
f"If you plan to use this index again, please reinstantiate '{self.__class__.__name__}' in order to avoid side-effects." "If you plan to use this index again, please reinstantiate '%s' in order to avoid side-effects.",
index,
self.__class__.__name__,
) )
if index in self.faiss_indexes: if index in self.faiss_indexes:
del self.faiss_indexes[index] del self.faiss_indexes[index]

View File

@ -110,8 +110,9 @@ class InMemoryDocumentStore(KeywordDocumentStore):
self.devices, _ = initialize_device_settings(devices=devices, use_cuda=self.use_gpu, multi_gpu=False) self.devices, _ = initialize_device_settings(devices=devices, use_cuda=self.use_gpu, multi_gpu=False)
if len(self.devices) > 1: if len(self.devices) > 1:
logger.warning( logger.warning(
f"Multiple devices are not supported in {self.__class__.__name__} inference, " "Multiple devices are not supported in %s inference, using the first device %s.",
f"using the first device {self.devices[0]}." self.__class__.__name__,
self.devices[0],
) )
self.main_device = self.devices[0] self.main_device = self.devices[0]
@ -184,7 +185,7 @@ class InMemoryDocumentStore(KeywordDocumentStore):
) )
if duplicate_documents == "skip": if duplicate_documents == "skip":
logger.warning( logger.warning(
f"Duplicate Documents: Document with id '{document.id} already exists in index " f"'{index}'" "Duplicate Documents: Document with id '%s' already exists in index '%s'", document.id, index
) )
continue continue
self.indexes[index][document.id] = document self.indexes[index][document.id] = document
@ -205,8 +206,9 @@ class InMemoryDocumentStore(KeywordDocumentStore):
textual_documents = [doc for doc in all_documents if doc.content_type == "text"] textual_documents = [doc for doc in all_documents if doc.content_type == "text"]
if len(textual_documents) < len(all_documents): if len(textual_documents) < len(all_documents):
logger.warning( logger.warning(
f"Some documents in {index} index are non-textual." "Some documents in %s index are non-textual."
f" They will be written to the index, but the corresponding BM25 representations will not be generated." " They will be written to the index, but the corresponding BM25 representations will not be generated.",
index,
) )
tokenized_corpus = [ tokenized_corpus = [
@ -236,10 +238,11 @@ class InMemoryDocumentStore(KeywordDocumentStore):
duplicate_ids: list = [label.id for label in self._get_duplicate_labels(label_objects, index=index)] duplicate_ids: list = [label.id for label in self._get_duplicate_labels(label_objects, index=index)]
if len(duplicate_ids) > 0: if len(duplicate_ids) > 0:
logger.warning( logger.warning(
f"Duplicate Label IDs: Inserting a Label whose id already exists in this document store." "Duplicate Label IDs: Inserting a Label whose id already exists in this document store."
f" This will overwrite the old Label. Please make sure Label.id is a unique identifier of" " This will overwrite the old Label. Please make sure Label.id is a unique identifier of"
f" the answer annotation and not the question." " the answer annotation and not the question."
f" Problematic ids: {','.join(duplicate_ids)}" " Problematic ids: %s",
",".join(duplicate_ids),
) )
for label in label_objects: for label in label_objects:

View File

@ -497,8 +497,10 @@ class MilvusDocumentStore(SQLDocumentStore):
""" """
if index == self.index: if index == self.index:
logger.warning( logger.warning(
f"Deletion of default index '{index}' detected. " "Deletion of default index '%s' detected. "
f"If you plan to use this index again, please reinstantiate '{self.__class__.__name__}' in order to avoid side-effects." "If you plan to use this index again, please reinstantiate '%s' in order to avoid side-effects.",
index,
self.__class__.__name__,
) )
self._delete_index(index) self._delete_index(index)

View File

@ -527,9 +527,10 @@ class OpenSearchDocumentStore(SearchEngineDocumentStore):
if not any(indices): if not any(indices):
# We don't want to raise here as creating a query-only document store before the index being created asynchronously is a valid use case. # We don't want to raise here as creating a query-only document store before the index being created asynchronously is a valid use case.
logger.warning( logger.warning(
f"Before you can use an index, you must create it first. The index '{index_name}' doesn't exist. " "Before you can use an index, you must create it first. The index '%s' doesn't exist. "
f"You can create it by setting `create_index=True` on init or by calling `write_documents()` if you prefer to create it on demand. " "You can create it by setting `create_index=True` on init or by calling `write_documents()` if you prefer to create it on demand. "
f"Note that this instance doesn't validate the index after you created it." "Note that this instance doesn't validate the index after you created it.",
index_name,
) )
# If the index name is an alias that groups multiple existing indices, each of them must have an embedding_field. # If the index name is an alias that groups multiple existing indices, each of them must have an embedding_field.
@ -583,11 +584,11 @@ class OpenSearchDocumentStore(SearchEngineDocumentStore):
if self.index_type == "hnsw" and ef_search != 20: if self.index_type == "hnsw" and ef_search != 20:
body = {"knn.algo_param.ef_search": 20} body = {"knn.algo_param.ef_search": 20}
self.client.indices.put_settings(index=index_id, body=body, headers=headers) self.client.indices.put_settings(index=index_id, body=body, headers=headers)
logger.info(f"Set ef_search to 20 for hnsw index '{index_id}'.") logger.info("Set ef_search to 20 for hnsw index '%s'.", index_id)
elif self.index_type == "flat" and ef_search != 512: elif self.index_type == "flat" and ef_search != 512:
body = {"knn.algo_param.ef_search": 512} body = {"knn.algo_param.ef_search": 512}
self.client.indices.put_settings(index=index_id, body=body, headers=headers) self.client.indices.put_settings(index=index_id, body=body, headers=headers)
logger.info(f"Set ef_search to 512 for hnsw index '{index_id}'.") logger.info("Set ef_search to 512 for hnsw index '%s'.", index_id)
def _validate_approximate_knn_settings( def _validate_approximate_knn_settings(
self, existing_embedding_field: Dict[str, Any], index_settings: Dict[str, Any], index_id: str self, existing_embedding_field: Dict[str, Any], index_settings: Dict[str, Any], index_id: str

View File

@ -216,7 +216,10 @@ class SearchEngineDocumentStore(KeywordDocumentStore):
except Exception as e: except Exception as e:
if hasattr(e, "status_code") and e.status_code == 429: # type: ignore if hasattr(e, "status_code") and e.status_code == 429: # type: ignore
logger.warning( logger.warning(
f"Failed to insert a batch of '{len(documents)}' documents because of a 'Too Many Requeset' response. Splitting the number of documents into two chunks with the same size and retrying in {_timeout} seconds." "Failed to insert a batch of '%s' documents because of a 'Too Many Requeset' response. "
"Splitting the number of documents into two chunks with the same size and retrying in %s seconds.",
len(documents),
_timeout,
) )
if len(documents) == 1: if len(documents) == 1:
logger.warning( logger.warning(
@ -478,10 +481,11 @@ class SearchEngineDocumentStore(KeywordDocumentStore):
duplicate_ids: list = [label.id for label in self._get_duplicate_labels(label_list, index=index)] duplicate_ids: list = [label.id for label in self._get_duplicate_labels(label_list, index=index)]
if len(duplicate_ids) > 0: if len(duplicate_ids) > 0:
logger.warning( logger.warning(
f"Duplicate Label IDs: Inserting a Label whose id already exists in this document store." "Duplicate Label IDs: Inserting a Label whose id already exists in this document store."
f" This will overwrite the old Label. Please make sure Label.id is a unique identifier of" " This will overwrite the old Label. Please make sure Label.id is a unique identifier of"
f" the answer annotation and not the question." " the answer annotation and not the question."
f" Problematic ids: {','.join(duplicate_ids)}" " Problematic ids: %s",
",".join(duplicate_ids),
) )
labels_to_index = [] labels_to_index = []
for label in label_list: for label in label_list:
@ -1087,7 +1091,8 @@ class SearchEngineDocumentStore(KeywordDocumentStore):
if not isinstance(query, str): if not isinstance(query, str):
logger.warning( logger.warning(
"The query provided seems to be not a string, but an object " "The query provided seems to be not a string, but an object "
f"of type {type(query)}. This can cause the query to fail." "of type %s. This can cause the query to fail.",
type(query),
) )
operator = "AND" if all_terms_must_match else "OR" operator = "AND" if all_terms_must_match else "OR"
body = { body = {
@ -1599,8 +1604,10 @@ class SearchEngineDocumentStore(KeywordDocumentStore):
""" """
if index == self.index: if index == self.index:
logger.warning( logger.warning(
f"Deletion of default index '{index}' detected. " "Deletion of default index '%s' detected. "
f"If you plan to use this index again, please reinstantiate '{self.__class__.__name__}' in order to avoid side-effects." "If you plan to use this index again, please reinstantiate '%s' in order to avoid side-effects.",
index,
self.__class__.__name__,
) )
self._delete_index(index) self._delete_index(index)

View File

@ -448,10 +448,11 @@ class SQLDocumentStore(BaseDocumentStore):
duplicate_ids: list = [label.id for label in self._get_duplicate_labels(labels, index=index)] duplicate_ids: list = [label.id for label in self._get_duplicate_labels(labels, index=index)]
if len(duplicate_ids) > 0: if len(duplicate_ids) > 0:
logger.warning( logger.warning(
f"Duplicate Label IDs: Inserting a Label whose id already exists in this document store." "Duplicate Label IDs: Inserting a Label whose id already exists in this document store."
f" This will overwrite the old Label. Please make sure Label.id is a unique identifier of" " This will overwrite the old Label. Please make sure Label.id is a unique identifier of"
f" the answer annotation and not the question." " the answer annotation and not the question."
f" Problematic ids: {','.join(duplicate_ids)}" " Problematic ids: %s",
",".join(duplicate_ids),
) )
# TODO: Use batch_size # TODO: Use batch_size

View File

@ -52,8 +52,9 @@ def eval_data_from_json(
problematic_ids.extend(cur_problematic_ids) problematic_ids.extend(cur_problematic_ids)
if len(problematic_ids) > 0: if len(problematic_ids) > 0:
logger.warning( logger.warning(
f"Could not convert an answer for {len(problematic_ids)} questions.\n" "Could not convert an answer for %s questions.\nThere were conversion errors for question ids: %s",
f"There were conversion errors for question ids: {problematic_ids}" len(problematic_ids),
problematic_ids,
) )
return docs, labels return docs, labels
@ -99,8 +100,10 @@ def eval_data_from_jsonl(
if len(docs) >= batch_size: if len(docs) >= batch_size:
if len(problematic_ids) > 0: if len(problematic_ids) > 0:
logger.warning( logger.warning(
f"Could not convert an answer for {len(problematic_ids)} questions.\n" "Could not convert an answer for %s questions.\n"
f"There were conversion errors for question ids: {problematic_ids}" "There were conversion errors for question ids: %s",
len(problematic_ids),
problematic_ids,
) )
yield docs, labels yield docs, labels
docs = [] docs = []

View File

@ -358,7 +358,9 @@ class WeaviateDocumentStore(KeywordDocumentStore):
generated_uuid = str(uuid.UUID(hashed_id.hexdigest()[::2])) generated_uuid = str(uuid.UUID(hashed_id.hexdigest()[::2]))
if not self.uuid_format_warning_raised: if not self.uuid_format_warning_raised:
logger.warning( logger.warning(
f"Document id {id} is not in uuid format. Such ids will be replaced by uuids, in this case {generated_uuid}." "Document id %s is not in uuid format. Such ids will be replaced by uuids, in this case %s.",
id,
generated_uuid,
) )
self.uuid_format_warning_raised = True self.uuid_format_warning_raised = True
id = generated_uuid id = generated_uuid
@ -1507,8 +1509,10 @@ class WeaviateDocumentStore(KeywordDocumentStore):
""" """
if index == self.index: if index == self.index:
logger.warning( logger.warning(
f"Deletion of default index '{index}' detected. " "Deletion of default index '%s' detected. "
f"If you plan to use this index again, please reinstantiate '{self.__class__.__name__}' in order to avoid side-effects." "If you plan to use this index again, please reinstantiate '%s' in order to avoid side-effects.",
index,
self.__class__.__name__,
) )
self._delete_index(index) self._delete_index(index)

View File

@ -336,7 +336,9 @@ class DataSilo:
logger.warning("No dev set created. Please adjust the dev_split parameter.") logger.warning("No dev set created. Please adjust the dev_split parameter.")
logger.info( logger.info(
f"Took {len(dev_dataset)} samples out of train set to create dev set (dev split is roughly {self.processor.dev_split})" "Took %s samples out of train set to create dev set (dev split is roughly %s)",
len(dev_dataset),
self.processor.dev_split,
) )
def random_split_ConcatDataset(self, ds: ConcatDataset, lengths: List[int]): def random_split_ConcatDataset(self, ds: ConcatDataset, lengths: List[int]):
@ -387,7 +389,7 @@ class DataSilo:
clipped, ave_len, seq_lens, max_seq_len = self._calc_length_stats_biencoder() clipped, ave_len, seq_lens, max_seq_len = self._calc_length_stats_biencoder()
else: else:
logger.warning( logger.warning(
f"Could not compute length statistics because 'input_ids' or 'query_input_ids' and 'passage_input_ids' are missing." "Could not compute length statistics because 'input_ids' or 'query_input_ids' and 'passage_input_ids' are missing."
) )
clipped = -1 clipped = -1
ave_len = -1 ave_len = -1
@ -416,11 +418,14 @@ class DataSilo:
logger.info("Proportion clipped: {}".format(clipped)) logger.info("Proportion clipped: {}".format(clipped))
if clipped > 0.5: if clipped > 0.5:
logger.info( logger.info(
f"[Haystack Tip] {round(clipped * 100, 1)}% of your samples got cut down to {max_seq_len} tokens. " "[Haystack Tip] %s%% of your samples got cut down to %s tokens. "
"Consider increasing max_seq_len " "Consider increasing max_seq_len "
f"(the maximum value allowed with the current model is max_seq_len={self.processor.tokenizer.model_max_length}, " "(the maximum value allowed with the current model is max_seq_len=%s, "
"if this is not enough consider splitting the document in smaller units or changing the model). " "if this is not enough consider splitting the document in smaller units or changing the model). "
"This will lead to higher memory consumption but is likely to improve your model performance" "This will lead to higher memory consumption but is likely to improve your model performance",
round(clipped * 100, 1),
max_seq_len,
self.processor.tokenizer.model_max_length,
) )
elif "query_input_ids" in self.tensor_names and "passage_input_ids" in self.tensor_names: elif "query_input_ids" in self.tensor_names and "passage_input_ids" in self.tensor_names:
logger.info( logger.info(

View File

@ -59,9 +59,11 @@ def convert_features_to_dataset(features):
base = check.ravel()[0] base = check.ravel()[0]
if not np.issubdtype(type(base), np.integer): if not np.issubdtype(type(base), np.integer):
logger.warning( logger.warning(
f"Problem during conversion to torch tensors:\n" "Problem during conversion to torch tensors:\n"
f"A non-integer value for feature '{t_name}' with a value of: " "A non-integer value for feature '%s' with a value of: "
f"'{base}' will be converted to a torch tensor of dtype long." "'%s' will be converted to a torch tensor of dtype long.",
t_name,
base,
) )
except: except:
logger.debug( logger.debug(

View File

@ -38,10 +38,11 @@ def sample_to_features_text(sample, tasks, max_seq_len, tokenizer):
if (len(inputs["input_ids"]) - inputs["special_tokens_mask"].count(1)) != len(sample.tokenized["tokens"]): if (len(inputs["input_ids"]) - inputs["special_tokens_mask"].count(1)) != len(sample.tokenized["tokens"]):
logger.error( logger.error(
f"FastTokenizer encoded sample {sample.clear_text['text']} to " "FastTokenizer encoded sample %s to %s tokens, which differs "
f"{len(inputs['input_ids']) - inputs['special_tokens_mask'].count(1)} tokens, which differs " "from number of tokens produced in tokenize_with_metadata(). \n"
f"from number of tokens produced in tokenize_with_metadata(). \n" "Further processing is likely to be wrong.",
f"Further processing is likely to be wrong." sample.clear_text["text"],
len(inputs["input_ids"]) - inputs["special_tokens_mask"].count(1),
) )
else: else:
# TODO It might be cleaner to adjust the data structure in sample.tokenized # TODO It might be cleaner to adjust the data structure in sample.tokenized

View File

@ -565,8 +565,9 @@ class SquadProcessor(Processor):
) )
except Exception as e: except Exception as e:
logger.warning( logger.warning(
f"Could not devide document into passages. Document: {basket.raw['document_text'][:200]}\n" "Could not devide document into passages. Document: %s\nWith error: %s",
f"With error: {e}" basket.raw["document_text"][:200],
e,
) )
passage_spans = [] passage_spans = []
@ -663,8 +664,9 @@ class SquadProcessor(Processor):
# check if answer string can be found in context # check if answer string can be found in context
if answer_text not in doc_text: if answer_text not in doc_text:
logger.warning( logger.warning(
f"Answer '{answer['text']}' not contained in context.\n" "Answer '%s' not contained in context.\n"
f"Example will not be converted for training/evaluation." "Example will not be converted for training/evaluation.",
answer["text"],
) )
error_in_answer = True error_in_answer = True
label_idxs[i][0] = -100 # TODO remove this hack also from featurization label_idxs[i][0] = -100 # TODO remove this hack also from featurization
@ -672,8 +674,10 @@ class SquadProcessor(Processor):
break # Break loop around answers, so the error message is not shown multiple times break # Break loop around answers, so the error message is not shown multiple times
if answer_indices.strip() != answer_text.strip(): if answer_indices.strip() != answer_text.strip():
logger.warning( logger.warning(
f"Answer using start/end indices is '{answer_indices}' while gold label text is '{answer_text}'.\n" "Answer using start/end indices is '%s' while gold label text is '%s'.\n"
f"Example will not be converted for training/evaluation." "Example will not be converted for training/evaluation.",
answer_indices,
answer_text,
) )
error_in_answer = True error_in_answer = True
label_idxs[i][0] = -100 # TODO remove this hack also from featurization label_idxs[i][0] = -100 # TODO remove this hack also from featurization
@ -1025,7 +1029,7 @@ class TextSimilarityProcessor(Processor):
if problematic_ids: if problematic_ids:
logger.error( logger.error(
f"There were {len(problematic_ids)} errors during preprocessing at positions: {problematic_ids}" "There were %s errors during preprocessing at positions: %s", len(problematic_ids), problematic_ids
) )
if return_baskets: if return_baskets:
@ -1104,7 +1108,7 @@ class TextSimilarityProcessor(Processor):
if len(tokenized_query) == 0: if len(tokenized_query) == 0:
logger.warning( logger.warning(
f"The query could not be tokenized, likely because it contains a character that the query tokenizer does not recognize" "The query could not be tokenized, likely because it contains a character that the query tokenizer does not recognize"
) )
return None return None
@ -1222,7 +1226,8 @@ class TextSimilarityProcessor(Processor):
if title is None: if title is None:
title = "" title = ""
logger.warning( logger.warning(
f"Couldn't find title although `embed_title` is set to True for DPR. Using title='' now. Related passage text: '{ctx}' " "Couldn't find title although `embed_title` is set to True for DPR. Using title='' now. Related passage text: '%s' ",
ctx,
) )
res.append(tuple((title, ctx))) res.append(tuple((title, ctx)))
return res return res
@ -1545,7 +1550,7 @@ class TableTextSimilarityProcessor(Processor):
if problematic_ids: if problematic_ids:
logger.error( logger.error(
f"There were {len(problematic_ids)} errors during preprocessing at positions: {problematic_ids}" "There were %s errors during preprocessing at positions: %s", len(problematic_ids), problematic_ids
) )
if return_baskets: if return_baskets:
@ -1588,7 +1593,7 @@ class TableTextSimilarityProcessor(Processor):
if len(tokenized_query) == 0: if len(tokenized_query) == 0:
logger.warning( logger.warning(
f"The query could not be tokenized, likely because it contains a character that the query tokenizer does not recognize" "The query could not be tokenized, likely because it contains a character that the query tokenizer does not recognize"
) )
return None return None

View File

@ -125,7 +125,8 @@ class Evaluator:
temperature_change = (abs(temperature_current - temperature_previous) / temperature_previous) * 100.0 temperature_change = (abs(temperature_current - temperature_previous) / temperature_previous) * 100.0
if temperature_change > 50: if temperature_change > 50:
logger.warning( logger.warning(
f"temperature used for calibration of confidence scores changed by more than {temperature_change} percent" "temperature used for calibration of confidence scores changed by more than %s percent",
temperature_change,
) )
if hasattr(head, "aggregate_preds"): if hasattr(head, "aggregate_preds"):
# Needed to convert NQ ids from np arrays to strings # Needed to convert NQ ids from np arrays to strings
@ -146,8 +147,11 @@ class Evaluator:
result["report"] = compute_report_metrics(head, preds_all[head_num], label_all[head_num]) result["report"] = compute_report_metrics(head, preds_all[head_num], label_all[head_num])
except: except:
logger.error( logger.error(
f"Couldn't create eval report for head {head_num} with following preds and labels:" "Couldn't create eval report for head %s with following preds and labels:"
f"\n Preds: {preds_all[head_num]} \n Labels: {label_all[head_num]}" "\n Preds: %s \n Labels: %s",
head_num,
preds_all[head_num],
label_all[head_num],
) )
result["report"] = "Error" result["report"] = "Error"

View File

@ -77,8 +77,9 @@ class Inferencer:
self.devices, n_gpu = initialize_device_settings(devices=devices, use_cuda=gpu, multi_gpu=False) self.devices, n_gpu = initialize_device_settings(devices=devices, use_cuda=gpu, multi_gpu=False)
if len(self.devices) > 1: if len(self.devices) > 1:
logger.warning( logger.warning(
f"Multiple devices are not supported in {self.__class__.__name__} inference, " "Multiple devices are not supported in %s inference, using the first device %s.",
f"using the first device {self.devices[0]}." self.__class__.__name__,
self.devices[0],
) )
self.processor = processor self.processor = processor
@ -187,9 +188,7 @@ class Inferencer:
devices, n_gpu = initialize_device_settings(devices=devices, use_cuda=gpu, multi_gpu=False) devices, n_gpu = initialize_device_settings(devices=devices, use_cuda=gpu, multi_gpu=False)
if len(devices) > 1: if len(devices) > 1:
logger.warning( logger.warning("Multiple devices are not supported in Inferencer, using the first device %s.", devices[0])
f"Multiple devices are not supported in Inferencer, " f"using the first device {devices[0]}."
)
name = os.path.basename(model_name_or_path) name = os.path.basename(model_name_or_path)

View File

@ -390,8 +390,9 @@ class AdaptiveModel(nn.Module, BaseAdaptiveModel):
for prediction_head in self.prediction_heads: for prediction_head in self.prediction_heads:
if len(prediction_head.layer_dims) != 2: if len(prediction_head.layer_dims) != 2:
logger.error( logger.error(
f"Currently conversion only works for PredictionHeads that are a single layer Feed Forward NN with dimensions [LM_output_dim, number_classes].\n" "Currently conversion only works for PredictionHeads that are a single layer Feed Forward NN with dimensions [LM_output_dim, number_classes].\n"
f" Your PredictionHead has {str(prediction_head.layer_dims)} dimensions." " Your PredictionHead has %s dimensions.",
str(prediction_head.layer_dims),
) )
continue continue
if prediction_head.model_type == "span_classification": if prediction_head.model_type == "span_classification":
@ -399,8 +400,8 @@ class AdaptiveModel(nn.Module, BaseAdaptiveModel):
converted_models.append(transformers_model) converted_models.append(transformers_model)
else: else:
logger.error( logger.error(
f"Haystack -> Transformers conversion is not supported yet for" "Haystack -> Transformers conversion is not supported yet for prediction heads of type %s",
f" prediction heads of type {prediction_head.model_type}" prediction_head.model_type,
) )
return converted_models return converted_models

View File

@ -93,7 +93,7 @@ class FeatureExtractor:
with open(config_file) as f: with open(config_file) as f:
config = json.load(f) config = json.load(f)
feature_extractor_classname = config["tokenizer_class"] feature_extractor_classname = config["tokenizer_class"]
logger.debug(f"⛏️ Selected feature extractor: {feature_extractor_classname} (from {config_file})") logger.debug("⛏️ Selected feature extractor: %s (from %s)", feature_extractor_classname, config_file)
# Use FastTokenizers as much as possible # Use FastTokenizers as much as possible
try: try:
feature_extractor_class = getattr(transformers, feature_extractor_classname + "Fast") feature_extractor_class = getattr(transformers, feature_extractor_classname + "Fast")
@ -122,7 +122,7 @@ class FeatureExtractor:
f"\n- {f'{chr(10)}- '.join(FEATURE_EXTRACTORS.keys())}" f"\n- {f'{chr(10)}- '.join(FEATURE_EXTRACTORS.keys())}"
) from e ) from e
logger.debug( logger.debug(
f"⛏️ Selected feature extractor: {feature_extractor_class.__name__} (for model type '{model_type}')" "⛏️ Selected feature extractor: %s (for model type '%s')", feature_extractor_class.__name__, model_type
) )
self.default_params = DEFAULT_EXTRACTION_PARAMS.get(feature_extractor_class, {}) self.default_params = DEFAULT_EXTRACTION_PARAMS.get(feature_extractor_class, {})

View File

@ -293,7 +293,7 @@ class HFLanguageModel(LanguageModel):
model_emb_size = self.model.resize_token_embeddings(new_num_tokens=None).num_embeddings model_emb_size = self.model.resize_token_embeddings(new_num_tokens=None).num_embeddings
vocab_size = model_emb_size + n_added_tokens vocab_size = model_emb_size + n_added_tokens
logger.info( logger.info(
f"Resizing embedding layer of LM from {model_emb_size} to {vocab_size} to cope with custom vocab." "Resizing embedding layer of LM from %s to %s to cope with custom vocab.", model_emb_size, vocab_size
) )
self.model.resize_token_embeddings(vocab_size) self.model.resize_token_embeddings(vocab_size)
# verify # verify
@ -464,7 +464,7 @@ class HFLanguageModelNoSegmentIds(HFLanguageModelWithPooler):
specified using the arguments `output_hidden_states` and `output_attentions`. specified using the arguments `output_hidden_states` and `output_attentions`.
""" """
if segment_ids is not None: if segment_ids is not None:
logger.warning(f"'segment_ids' is not None, but %s does not use them. They will be ignored.", self.name) logger.warning("'segment_ids' is not None, but %s does not use them. They will be ignored.", self.name)
return super().forward( return super().forward(
input_ids=input_ids, input_ids=input_ids,
@ -636,8 +636,9 @@ class DPREncoder(LanguageModel):
""" """
if model_config.model_type.lower() != "bert": if model_config.model_type.lower() != "bert":
logger.warning( logger.warning(
f"Using a model of type '{model_config.model_type}' which might be incompatible with DPR encoders. " "Using a model of type '%s' which might be incompatible with DPR encoders. "
f"Only Bert-based encoders are supported. They need input_ids, token_type_ids, attention_mask as input tensors." "Only Bert-based encoders are supported. They need input_ids, token_type_ids, attention_mask as input tensors.",
model_config.model_type,
) )
config_dict = vars(model_config) config_dict = vars(model_config)
if model_kwargs: if model_kwargs:
@ -876,12 +877,13 @@ def get_language_model(
if not model_type: if not model_type:
logger.error( logger.error(
f"Model type not understood for '{pretrained_model_name_or_path}' " "Model type not understood for '%s' (%s). "
f"({model_type if model_type else 'model_type not set'}). "
"Either supply the local path for a saved model, " "Either supply the local path for a saved model, "
"or the name of a model that can be downloaded from the Model Hub. " "or the name of a model that can be downloaded from the Model Hub. "
"Ensure that the model class name can be inferred from the directory name " "Ensure that the model class name can be inferred from the directory name "
"when loading a Transformers model." "when loading a Transformers model.",
pretrained_model_name_or_path,
model_type if model_type else "model_type not set",
) )
logger.error("Using the AutoModel class for '%s'. This can cause crashes!", pretrained_model_name_or_path) logger.error("Using the AutoModel class for '%s'. This can cause crashes!", pretrained_model_name_or_path)
model_type = "Auto" model_type = "Auto"
@ -957,7 +959,7 @@ def _get_model_type(
if model_type and model_type.lower() == "roberta" and "mlm" in model_name_or_path.lower(): if model_type and model_type.lower() == "roberta" and "mlm" in model_name_or_path.lower():
logger.error( logger.error(
f"MLM part of codebert is currently not supported in Haystack: '{model_name_or_path}' may crash later." "MLM part of codebert is currently not supported in Haystack: '%s' may crash later.", model_name_or_path
) )
return model_type return model_type

View File

@ -88,13 +88,14 @@ def get_model(
config = AutoConfig.from_pretrained(pretrained_model_name_or_path=model_name, **autoconfig_kwargs) config = AutoConfig.from_pretrained(pretrained_model_name_or_path=model_name, **autoconfig_kwargs)
model_type = config.model_type model_type = config.model_type
except Exception as e: except Exception as e:
logger.debug(f"Can't find model type for {pretrained_model_name_or_path}: {e}") logger.debug("Can't find model type for %s: %s", pretrained_model_name_or_path, e)
if feature_extractor_kwargs is not None: if feature_extractor_kwargs is not None:
logger.debug( logger.debug(
"Can't forward feature_extractor_kwargs to a SentenceTransformers model. " "Can't forward feature_extractor_kwargs to a SentenceTransformers model. "
"These kwargs are being dropped. " "These kwargs are being dropped. "
f"Content of feature_extractor_kwargs: {feature_extractor_kwargs}" "Content of feature_extractor_kwargs: %s",
feature_extractor_kwargs,
) )
else: else:
@ -102,9 +103,10 @@ def get_model(
config = AutoConfig.from_pretrained(pretrained_model_name_or_path=model_name, **autoconfig_kwargs) config = AutoConfig.from_pretrained(pretrained_model_name_or_path=model_name, **autoconfig_kwargs)
if not config.model_type: if not config.model_type:
logger.error( logger.error(
f"Model type not understood for '{pretrained_model_name_or_path}'. Please provide the name of " "Model type not understood for '%s'. Please provide the name of "
"a model that can be downloaded from the Model Hub.\nUsing the AutoModel class. " "a model that can be downloaded from the Model Hub.\nUsing the AutoModel class. "
"THIS CAN CAUSE CRASHES and won't work for models that are not working with text." "THIS CAN CAUSE CRASHES and won't work for models that are not working with text.",
pretrained_model_name_or_path,
) )
model_type = None model_type = None
else: else:
@ -112,10 +114,13 @@ def get_model(
model_type = HUGGINGFACE_CAPITALIZE[config.model_type.lower()] model_type = HUGGINGFACE_CAPITALIZE[config.model_type.lower()]
except KeyError as e: except KeyError as e:
logger.error( logger.error(
f"Haystack doesn't support model '{pretrained_model_name_or_path}' (type '{config.model_type.lower()}') " "Haystack doesn't support model '%s' (type '%s') "
"We'll use the AutoModel class for it. " "We'll use the AutoModel class for it. "
"THIS CAN CAUSE CRASHES and won't work for models that are not working with text. " "THIS CAN CAUSE CRASHES and won't work for models that are not working with text. "
f"Supported model types: {', '.join(HUGGINGFACE_CAPITALIZE.keys())}" "Supported model types: %s",
pretrained_model_name_or_path,
config.model_type.lower(),
", ".join(HUGGINGFACE_CAPITALIZE.keys()),
) )
model_type = None model_type = None

View File

@ -25,9 +25,11 @@ class HaystackModel(ABC):
See the values of `haystack.schema.ContentTypes`. See the values of `haystack.schema.ContentTypes`.
""" """
logger.info( logger.info(
f" 🤖 Loading '{pretrained_model_name_or_path}' " " 🤖 Loading '%s' (%s of type '%s' for %s data)",
f"({self.__class__.__name__} of type '{model_type if model_type else '<unknown>'}' " pretrained_model_name_or_path,
f"for {content_type} data)" self.__class__.__name__,
model_type if model_type else "<unknown>",
content_type,
) )
self.model_name_or_path = pretrained_model_name_or_path self.model_name_or_path = pretrained_model_name_or_path
self.model_type = model_type self.model_type = model_type

View File

@ -164,8 +164,11 @@ class PredictionHead(nn.Module):
return return
new_dims = [input_dim] + old_dims[1:] new_dims = [input_dim] + old_dims[1:]
logger.info( logger.info(
f"Resizing input dimensions of {type(self).__name__} ({self.task_name}) " "Resizing input dimensions of %s (%s) from %s to %s to match language model",
f"from {old_dims} to {new_dims} to match language model" type(self).__name__,
self.task_name,
old_dims,
new_dims,
) )
self.feed_forward = FeedForwardBlock(new_dims) self.feed_forward = FeedForwardBlock(new_dims)
self.layer_dims[0] = input_dim self.layer_dims[0] = input_dim
@ -260,8 +263,8 @@ class QuestionAnsweringHead(PredictionHead):
super(QuestionAnsweringHead, self).__init__() super(QuestionAnsweringHead, self).__init__()
if len(kwargs) > 0: if len(kwargs) > 0:
logger.warning( logger.warning(
f"Some unused parameters are passed to the QuestionAnsweringHead. " "Some unused parameters are passed to the QuestionAnsweringHead. Might not be a problem. Params: %s",
f"Might not be a problem. Params: {json.dumps(kwargs)}" json.dumps(kwargs),
) )
self.layer_dims = layer_dims self.layer_dims = layer_dims
assert self.layer_dims[-1] == 2 assert self.layer_dims[-1] == 2

View File

@ -105,20 +105,23 @@ class QACandidate:
self.answer = "no_answer" self.answer = "no_answer"
if self.offset_answer_start != 0 or self.offset_answer_end != 0: if self.offset_answer_start != 0 or self.offset_answer_end != 0:
logger.error( logger.error(
f"Both start and end offsets should be 0: \n" "Both start and end offsets should be 0: \n%s, %s with a no_answer. ",
f"{self.offset_answer_start}, {self.offset_answer_end} with a no_answer. " self.offset_answer_start,
self.offset_answer_end,
) )
else: else:
self.answer = string self.answer = string
if self.offset_answer_end - self.offset_answer_start <= 0: if self.offset_answer_end - self.offset_answer_start <= 0:
logger.error( logger.error(
f"End offset comes before start offset: \n" "End offset comes before start offset: \n(%s, %s) with a span answer. ",
f"({self.offset_answer_start}, {self.offset_answer_end}) with a span answer. " self.offset_answer_start,
self.offset_answer_end,
) )
elif self.offset_answer_end <= 0: elif self.offset_answer_end <= 0:
logger.error( logger.error(
f"Invalid end offset: \n" "Invalid end offset: \n(%s, %s) with a span answer. ",
f"({self.offset_answer_start}, {self.offset_answer_end}) with a span answer. " self.offset_answer_start,
self.offset_answer_end,
) )
def _create_context_window(self, context_window_size: int, clear_text: str) -> Tuple[str, int, int]: def _create_context_window(self, context_window_size: int, clear_text: str) -> Tuple[str, int, int]:
@ -167,7 +170,8 @@ class QACandidate:
""" """
if self.offset_unit != "token": if self.offset_unit != "token":
logger.error( logger.error(
f"QACandidate needs to have self.offset_unit=token before calling _span_to_string() (id = {self.passage_id})" "QACandidate needs to have self.offset_unit=token before calling _span_to_string() (id = %s)",
self.passage_id,
) )
start_t = self.offset_answer_start start_t = self.offset_answer_start

View File

@ -104,8 +104,10 @@ class Trainer:
if use_amp in amp_mapping: if use_amp in amp_mapping:
logger.warning( logger.warning(
"The Trainer only supports native PyTorch automatic mixed precision and no longer supports the Apex library.\n" "The Trainer only supports native PyTorch automatic mixed precision and no longer supports the Apex library.\n"
f"Because you provided Apex optimization level {use_amp}, automatic mixed precision was set to {amp_mapping[use_amp]}.\n" "Because you provided Apex optimization level %s, automatic mixed precision was set to %s.\n"
"In the future, set `use_amp=True` to turn on automatic mixed precision." "In the future, set `use_amp=True` to turn on automatic mixed precision.",
use_amp,
amp_mapping[use_amp],
) )
use_amp = amp_mapping[use_amp] use_amp = amp_mapping[use_amp]
else: else:
@ -570,8 +572,11 @@ class Trainer:
if ranks_with_data < torch.distributed.get_world_size(): if ranks_with_data < torch.distributed.get_world_size():
if step is not None: if step is not None:
logger.info( logger.info(
f"Stopping epoch {self.from_epoch} at step {step} for rank {self.local_rank} since at least one other rank " "Stopping epoch %s at step %s for rank %s since at least one other rank "
f"(~ one GPU) in distributed training doesn't have any more batches... " "(~ one GPU) in distributed training doesn't have any more batches... ",
self.from_epoch,
step,
self.local_rank,
) )
return False return False
else: else:

View File

@ -191,13 +191,15 @@ class OpenAIAnswerGenerator(BaseGenerator):
if len(input_docs) == 0: if len(input_docs) == 0:
logger.warning( logger.warning(
f"Skipping all of the provided Documents, as none of them fits the maximum token limit of " "Skipping all of the provided Documents, as none of them fits the maximum token limit of %s"
f"{self.MAX_TOKENS_LIMIT}. The generated answers will therefore not be conditioned on any context." "The generated answers will therefore not be conditioned on any context.",
self.MAX_TOKENS_LIMIT,
) )
elif skipped_docs >= 1: elif skipped_docs >= 1:
logger.warning( logger.warning(
f"Skipping {skipped_docs} of the provided Documents, as using them would exceed the maximum token " "Skipping %s of the provided Documents, as using them would exceed the maximum token limit of %s.",
f"limit of {self.MAX_TOKENS_LIMIT}." skipped_docs,
self.MAX_TOKENS_LIMIT,
) )
# Top ranked documents should go at the end # Top ranked documents should go at the end

View File

@ -131,8 +131,9 @@ class RAGenerator(BaseGenerator):
self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False) self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
if len(self.devices) > 1: if len(self.devices) > 1:
logger.warning( logger.warning(
f"Multiple devices are not supported in {self.__class__.__name__} inference, " "Multiple devices are not supported in %s inference, using the first device %s.",
f"using the first device {self.devices[0]}." self.__class__.__name__,
self.devices[0],
) )
self.tokenizer = RagTokenizer.from_pretrained(model_name_or_path, use_auth_token=use_auth_token) self.tokenizer = RagTokenizer.from_pretrained(model_name_or_path, use_auth_token=use_auth_token)
@ -389,8 +390,9 @@ class Seq2SeqGenerator(BaseGenerator):
self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False) self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
if len(self.devices) > 1: if len(self.devices) > 1:
logger.warning( logger.warning(
f"Multiple devices are not supported in {self.__class__.__name__} inference, " "Multiple devices are not supported in %s inference, using the first device %s.",
f"using the first device {self.devices[0]}." self.__class__.__name__,
self.devices[0],
) )
Seq2SeqGenerator._register_converters(model_name_or_path, input_converter) Seq2SeqGenerator._register_converters(model_name_or_path, input_converter)

View File

@ -52,8 +52,9 @@ class TextToSpeech:
resolved_devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False) resolved_devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
if len(resolved_devices) > 1: if len(resolved_devices) > 1:
logger.warning( logger.warning(
f"Multiple devices are not supported in {self.__class__.__name__} inference, " "Multiple devices are not supported in %s inference, using the first device %s.",
f"using the first device {resolved_devices[0]}." self.__class__.__name__,
resolved_devices[0],
) )
self.model = _Text2SpeechModel.from_pretrained( self.model = _Text2SpeechModel.from_pretrained(

View File

@ -311,7 +311,12 @@ class Crawler(BaseComponent):
json.dump(document.to_dict(), f) json.dump(document.to_dict(), f)
except Exception as e: except Exception as e:
logging.exception( logging.exception(
f"Crawler can't save the content of '{link}' under '{file_path}'. This webpage will be skipped, but links from this page will still be crawled. Make sure the path above is accessible and the file name is valid. If the file name is invalid, consider setting 'crawler_naming_function' to another function." "Crawler can't save the content of '%s' under '%s'. "
"This webpage will be skipped, but links from this page will still be crawled. "
"Make sure the path above is accessible and the file name is valid. "
"If the file name is invalid, consider setting 'crawler_naming_function' to another function.",
link,
file_path,
) )
paths.append(file_path) paths.append(file_path)

View File

@ -123,15 +123,17 @@ class TransformersDocumentClassifier(BaseDocumentClassifier):
if labels and task == "text-classification": if labels and task == "text-classification":
logger.warning( logger.warning(
f"Provided labels {labels} will be ignored for task text-classification. Set task to " "Provided labels %s will be ignored for task text-classification. Set task to "
f"zero-shot-classification to use labels." "zero-shot-classification to use labels.",
labels,
) )
resolved_devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False) resolved_devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
if len(resolved_devices) > 1: if len(resolved_devices) > 1:
logger.warning( logger.warning(
f"Multiple devices are not supported in {self.__class__.__name__} inference, " "Multiple devices are not supported in %s inference, using the first device %s.",
f"using the first device {resolved_devices[0]}." self.__class__.__name__,
resolved_devices[0],
) )
if tokenizer is None: if tokenizer is None:

View File

@ -75,16 +75,18 @@ class EvalDocuments(BaseComponent):
self.top_k_used = top_k self.top_k_used = top_k
elif self.top_k_used != top_k: elif self.top_k_used != top_k:
logger.warning( logger.warning(
f"EvalDocuments was last run with top_k_eval_documents={self.top_k_used} but is " "EvalDocuments was last run with top_k_eval_documents=%s} but is "
f"being run again with top_k={self.top_k}. " "being run again with top_k=%s. "
f"The evaluation counter is being reset from this point so that the evaluation " "The evaluation counter is being reset from this point so that the evaluation "
f"metrics are interpretable." "metrics are interpretable.",
self.top_k_used,
self.top_k,
) )
self.init_counts() self.init_counts()
if len(documents) < top_k and not self.too_few_docs_warning: if len(documents) < top_k and not self.too_few_docs_warning:
logger.warning( logger.warning(
f"EvalDocuments is being provided less candidate documents than top_k " f"(currently set to {top_k})." "EvalDocuments is being provided less candidate documents than top_k (currently set to %s).", top_k
) )
self.too_few_docs_warning = True self.too_few_docs_warning = True

View File

@ -123,8 +123,9 @@ class EntityExtractor(BaseComponent):
self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False) self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
if len(self.devices) > 1: if len(self.devices) > 1:
logger.warning( logger.warning(
f"Multiple devices are not supported in {self.__class__.__name__} inference, " "Multiple devices are not supported in %s inference, using the first device %s.",
f"using the first device {self.devices[0]}." self.__class__.__name__,
self.devices[0],
) )
self.batch_size = batch_size self.batch_size = batch_size
self.progress_bar = progress_bar self.progress_bar = progress_bar

View File

@ -63,8 +63,9 @@ class FileTypeClassifier(BaseComponent):
return mimetypes.guess_extension(extension) or "" return mimetypes.guess_extension(extension) or ""
except NameError as ne: except NameError as ne:
logger.error( logger.error(
f"The type of '{file_path}' could not be guessed, probably because 'python-magic' is not installed. Ignoring this error." "The type of '%s' could not be guessed, probably because 'python-magic' is not installed. Ignoring this error."
"Please make sure the necessary OS libraries are installed if you need this functionality ('python-magic' or 'python-magic-bin' on Windows)." "Please make sure the necessary OS libraries are installed if you need this functionality ('python-magic' or 'python-magic-bin' on Windows).",
file_path,
) )
return "" return ""

View File

@ -201,8 +201,10 @@ class AzureConverter(BaseConverter):
file_text += f" {cell}" file_text += f" {cell}"
if not self.validate_language(file_text, valid_languages): if not self.validate_language(file_text, valid_languages):
logger.warning( logger.warning(
f"The language for {file_path} is not one of {valid_languages}. The file may not have " "The language for %s is not one of %s. The file may not have "
f"been decoded in the correct text format." "been decoded in the correct text format.",
file_path,
valid_languages,
) )
return docs return docs

View File

@ -146,8 +146,9 @@ class ImageToTextConverter(BaseConverter):
document_text = "".join(cleaned_pages) document_text = "".join(cleaned_pages)
if not self.validate_language(document_text, valid_languages): if not self.validate_language(document_text, valid_languages):
logger.warning( logger.warning(
f"The language for image is not one of {valid_languages}. The file may not have " "The language for image is not one of %s. The file may not have "
f"been decoded in the correct text format." "been decoded in the correct text format.",
valid_languages,
) )
text = "\f".join(cleaned_pages) text = "\f".join(cleaned_pages)

View File

@ -200,8 +200,10 @@ class ParsrConverter(BaseConverter):
file_text += f" {cell}" file_text += f" {cell}"
if not self.validate_language(file_text, valid_languages): if not self.validate_language(file_text, valid_languages):
logger.warning( logger.warning(
f"The language for {file_path} is not one of {valid_languages}. The file may not have " "The language for %s is not one of %s. The file may not have "
f"been decoded in the correct text format." "been decoded in the correct text format.",
file_path,
valid_languages,
) )
if extract_headlines: if extract_headlines:

View File

@ -150,8 +150,10 @@ class PDFToTextConverter(BaseConverter):
document_text = "".join(cleaned_pages) document_text = "".join(cleaned_pages)
if not self.validate_language(document_text, valid_languages): if not self.validate_language(document_text, valid_languages):
logger.warning( logger.warning(
f"The language for {file_path} is not one of {valid_languages}. The file may not have " "The language for %s is not one of %s. The file may not have "
f"been decoded in the correct text format." "been decoded in the correct text format.",
file_path,
valid_languages,
) )
text = "\f".join(cleaned_pages) text = "\f".join(cleaned_pages)

View File

@ -172,8 +172,10 @@ class TikaConverter(BaseConverter):
document_text = "".join(cleaned_pages) document_text = "".join(cleaned_pages)
if not self.validate_language(document_text, valid_languages): if not self.validate_language(document_text, valid_languages):
logger.warning( logger.warning(
f"The language for {file_path} is not one of {valid_languages}. The file may not have " "The language for %s is not one of %s. The file may not have "
f"been decoded in the correct text format." "been decoded in the correct text format.",
file_path,
valid_languages,
) )
text = "\f".join(cleaned_pages) text = "\f".join(cleaned_pages)

View File

@ -75,8 +75,10 @@ class TextConverter(BaseConverter):
document_text = "".join(cleaned_pages) document_text = "".join(cleaned_pages)
if not self.validate_language(document_text, valid_languages): if not self.validate_language(document_text, valid_languages):
logger.warning( logger.warning(
f"The language for {file_path} is not one of {valid_languages}. The file may not have " "The language for %s is not one of %s. The file may not have "
f"been decoded in the correct text format." "been decoded in the correct text format.",
file_path,
valid_languages,
) )
text = "".join(cleaned_pages) text = "".join(cleaned_pages)

View File

@ -119,8 +119,9 @@ class PseudoLabelGenerator(BaseComponent):
self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False) self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
if len(self.devices) > 1: if len(self.devices) > 1:
logger.warning( logger.warning(
f"Multiple devices are not supported in {self.__class__.__name__} inference, " "Multiple devices are not supported in %s inference, using the first device %s.",
f"using the first device {self.devices[0]}." self.__class__.__name__,
self.devices[0],
) )
self.retriever = retriever self.retriever = retriever

View File

@ -106,7 +106,7 @@ class PreProcessor(BasePreProcessor):
try: try:
nltk.download("punkt") nltk.download("punkt")
except FileExistsError as error: except FileExistsError as error:
logger.debug(f"NLTK punkt tokenizer seems to be already downloaded. Error message: {error}") logger.debug("NLTK punkt tokenizer seems to be already downloaded. Error message: %s", error)
pass pass
self.clean_whitespace = clean_whitespace self.clean_whitespace = clean_whitespace
self.clean_header_footer = clean_header_footer self.clean_header_footer = clean_header_footer
@ -747,14 +747,16 @@ class PreProcessor(BasePreProcessor):
# NLTK failed to load custom SentenceTokenizer, fallback to the default model or to English # NLTK failed to load custom SentenceTokenizer, fallback to the default model or to English
if language_name is not None: if language_name is not None:
logger.error( logger.error(
f"PreProcessor couldn't find custom sentence tokenizer model for {self.language}. " "PreProcessor couldn't find custom sentence tokenizer model for %s. Using default %s model.",
f"Using default {self.language} model." self.language,
self.language,
) )
sentence_tokenizer = nltk.data.load(f"tokenizers/punkt/{language_name}.pickle") sentence_tokenizer = nltk.data.load(f"tokenizers/punkt/{language_name}.pickle")
else: else:
logger.error( logger.error(
f"PreProcessor couldn't find default or custom sentence tokenizer model for {self.language}. " "PreProcessor couldn't find default or custom sentence tokenizer model for %s. "
f"Using English instead." "Using English instead.",
self.language,
) )
sentence_tokenizer = nltk.data.load(f"tokenizers/punkt/english.pickle") sentence_tokenizer = nltk.data.load(f"tokenizers/punkt/english.pickle")
@ -763,8 +765,9 @@ class PreProcessor(BasePreProcessor):
sentence_tokenizer = nltk.data.load(f"tokenizers/punkt/{language_name}.pickle") sentence_tokenizer = nltk.data.load(f"tokenizers/punkt/{language_name}.pickle")
else: else:
logger.error( logger.error(
f"PreProcessor couldn't find the default sentence tokenizer model for {self.language}. " "PreProcessor couldn't find the default sentence tokenizer model for %s. "
f" Using English instead. You may train your own model and use the 'tokenizer_model_folder' parameter." " Using English instead. You may train your own model and use the 'tokenizer_model_folder' parameter.",
self.language,
) )
sentence_tokenizer = nltk.data.load(f"tokenizers/punkt/english.pickle") sentence_tokenizer = nltk.data.load(f"tokenizers/punkt/english.pickle")

View File

@ -125,8 +125,11 @@ class PromptTemplate(BasePromptTemplate, ABC):
if args: if args:
if len(args) != len(self.prompt_params): if len(args) != len(self.prompt_params):
logger.warning( logger.warning(
f"For {self.name}, expected {self.prompt_params} arguments, instead " "For %s, expected %s arguments, instead got %s arguments %s",
f"got {len(args)} arguments {args}" self.name,
self.prompt_params,
len(args),
args,
) )
for prompt_param, arg in zip(self.prompt_params, args): for prompt_param, arg in zip(self.prompt_params, args):
template_dict[prompt_param] = [arg] if isinstance(arg, str) else arg template_dict[prompt_param] = [arg] if isinstance(arg, str) else arg
@ -229,8 +232,9 @@ class HFLocalInvocationLayer(PromptModelInvocationLayer):
self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False) self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
if len(self.devices) > 1: if len(self.devices) > 1:
logger.warning( logger.warning(
f"Multiple devices are not supported in {self.__class__.__name__} inference, " "Multiple devices are not supported in %s inference, using the first device %s.",
f"using the first device {self.devices[0]}." self.__class__.__name__,
self.devices[0],
) )
# Due to reflective construction of all invocation layers we might receive some # Due to reflective construction of all invocation layers we might receive some

View File

@ -100,8 +100,9 @@ class TransformersQueryClassifier(BaseQueryClassifier):
resolved_devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False) resolved_devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
if len(resolved_devices) > 1: if len(resolved_devices) > 1:
logger.warning( logger.warning(
f"Multiple devices are not supported in {self.__class__.__name__} inference, " "Multiple devices are not supported in %s inference, using the first device %s.",
f"using the first device {resolved_devices[0]}." self.__class__.__name__,
resolved_devices[0],
) )
self.model = pipeline( self.model = pipeline(

View File

@ -81,8 +81,9 @@ class QuestionGenerator(BaseComponent):
self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False) self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
if len(self.devices) > 1: if len(self.devices) > 1:
logger.warning( logger.warning(
f"Multiple devices are not supported in {self.__class__.__name__} inference, " "Multiple devices are not supported in %s inference, using the first device %s.",
f"using the first device {self.devices[0]}." self.__class__.__name__,
self.devices[0],
) )
self.model = AutoModelForSeq2SeqLM.from_pretrained( self.model = AutoModelForSeq2SeqLM.from_pretrained(
model_name_or_path, revision=model_version, use_auth_token=use_auth_token model_name_or_path, revision=model_version, use_auth_token=use_auth_token

View File

@ -1023,9 +1023,10 @@ class FARMReader(BaseReader):
if self.top_k_per_candidate != 4: if self.top_k_per_candidate != 4:
logger.info( logger.info(
f"Performing Evaluation using top_k_per_candidate = {self.top_k_per_candidate} \n" "Performing Evaluation using top_k_per_candidate = %s \n"
f"and consequently, QuestionAnsweringPredictionHead.n_best = {self.top_k_per_candidate + 1}. \n" "and consequently, QuestionAnsweringPredictionHead.n_best = {self.top_k_per_candidate + 1}. \n"
f"This deviates from FARM's default where QuestionAnsweringPredictionHead.n_best = 5" "This deviates from FARM's default where QuestionAnsweringPredictionHead.n_best = 5",
self.top_k_per_candidate,
) )
# extract all questions for evaluation # extract all questions for evaluation
@ -1062,7 +1063,7 @@ class FARMReader(BaseReader):
continue continue
if label.answer.offsets_in_document is None: if label.answer.offsets_in_document is None:
logger.error( logger.error(
f"Label.answer.offsets_in_document was None, but Span object was expected: {label} " "Label.answer.offsets_in_document was None, but Span object was expected: %s ", label
) )
continue continue
# add to existing answers # add to existing answers
@ -1074,7 +1075,11 @@ class FARMReader(BaseReader):
# Hack to fix problem where duplicate questions are merged by doc_store processing creating a QA example with 8 annotations > 6 annotation max # Hack to fix problem where duplicate questions are merged by doc_store processing creating a QA example with 8 annotations > 6 annotation max
if len(aggregated_per_question[aggregation_key]["answers"]) >= 6: if len(aggregated_per_question[aggregation_key]["answers"]) >= 6:
logger.warning( logger.warning(
f"Answers in this sample are being dropped because it has more than 6 answers. (doc_id: {doc_id}, question: {label.query}, label_id: {label.id})" "Answers in this sample are being dropped because it has more than 6 answers. "
"(doc_id: %s, question: %s, label_id: %s)",
doc_id,
label.query,
label.id,
) )
continue continue
aggregated_per_question[aggregation_key]["answers"].append( aggregated_per_question[aggregation_key]["answers"].append(

View File

@ -116,8 +116,9 @@ class TableReader(BaseReader):
self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False) self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
if len(self.devices) > 1: if len(self.devices) > 1:
logger.warning( logger.warning(
f"Multiple devices are not supported in {self.__class__.__name__} inference, " "Multiple devices are not supported in %s inference, using the first device %s.",
f"using the first device {self.devices[0]}." self.__class__.__name__,
self.devices[0],
) )
config = TapasConfig.from_pretrained(model_name_or_path, use_auth_token=use_auth_token) config = TapasConfig.from_pretrained(model_name_or_path, use_auth_token=use_auth_token)
@ -646,8 +647,9 @@ class RCIReader(BaseReader):
self.devices, _ = initialize_device_settings(use_cuda=use_gpu, multi_gpu=False) self.devices, _ = initialize_device_settings(use_cuda=use_gpu, multi_gpu=False)
if len(self.devices) > 1: if len(self.devices) > 1:
logger.warning( logger.warning(
f"Multiple devices are not supported in {self.__class__.__name__} inference, " "Multiple devices are not supported in %s inference, using the first device %s.",
f"using the first device {self.devices[0]}." self.__class__.__name__,
self.devices[0],
) )
self.row_model = AutoModelForSequenceClassification.from_pretrained( self.row_model = AutoModelForSequenceClassification.from_pretrained(

View File

@ -86,8 +86,9 @@ class TransformersReader(BaseReader):
if len(self.devices) > 1: if len(self.devices) > 1:
logger.warning( logger.warning(
f"Multiple devices are not supported in {self.__class__.__name__} inference, " "Multiple devices are not supported in %s inference, using the first device %s.",
f"using the first device {self.devices[0]}." self.__class__.__name__,
self.devices[0],
) )
self.model = pipeline( self.model = pipeline(

View File

@ -104,14 +104,18 @@ class _BaseEmbeddingEncoder:
if model_similarity is not None and document_store.similarity != model_similarity: if model_similarity is not None and document_store.similarity != model_similarity:
logger.warning( logger.warning(
f"You seem to be using {model_name} model with the {document_store.similarity} function instead of the recommended {model_similarity}. " "You seem to be using %s model with the %s function instead of the recommended %s. "
f"This can be set when initializing the DocumentStore" "This can be set when initializing the DocumentStore",
model_name,
document_store.similarity,
model_similarity,
) )
elif "dpr" in model_name.lower() and document_store.similarity != "dot_product": elif "dpr" in model_name.lower() and document_store.similarity != "dot_product":
logger.warning( logger.warning(
f"You seem to be using a DPR model with the {document_store.similarity} function. " "You seem to be using a DPR model with the %s function. "
f"We recommend using dot_product instead. " "We recommend using dot_product instead. "
f"This can be set when initializing the DocumentStore" "This can be set when initializing the DocumentStore",
document_store.similarity,
) )

View File

@ -245,9 +245,8 @@ class BaseRetriever(BaseComponent):
mean_avg_precision = summed_avg_precision / number_of_questions mean_avg_precision = summed_avg_precision / number_of_questions
logger.info( logger.info(
( "For {} out of {} questions ({:.2%}), the answer was in the top-{} candidate passages selected by the retriever.".format(
f"For {correct_retrievals} out of {number_of_questions} questions ({recall:.2%}), the answer was in" correct_retrievals, number_of_questions, recall, top_k
f" the top-{top_k} candidate passages selected by the retriever."
) )
) )

View File

@ -178,9 +178,10 @@ class DensePassageRetriever(DenseRetriever):
if document_store and document_store.similarity != "dot_product": if document_store and document_store.similarity != "dot_product":
logger.warning( logger.warning(
f"You are using a Dense Passage Retriever model with the {document_store.similarity} function. " "You are using a Dense Passage Retriever model with the %s function. "
"We recommend you use dot_product instead. " "We recommend you use dot_product instead. "
"This can be set when initializing the DocumentStore" "This can be set when initializing the DocumentStore",
document_store.similarity,
) )
# Init & Load Encoders # Init & Load Encoders
@ -550,8 +551,9 @@ class DensePassageRetriever(DenseRetriever):
""" """
if self.processor.num_hard_negatives != 0: if self.processor.num_hard_negatives != 0:
logger.warning( logger.warning(
f"'num_hard_negatives' is set to {self.processor.num_hard_negatives}, but inference does " "'num_hard_negatives' is set to %s, but inference does "
f"not require any hard negatives. Setting num_hard_negatives to 0." "not require any hard negatives. Setting num_hard_negatives to 0.",
self.processor.num_hard_negatives,
) )
self.processor.num_hard_negatives = 0 self.processor.num_hard_negatives = 0
@ -1163,8 +1165,9 @@ class TableTextRetriever(DenseRetriever):
if self.processor.num_hard_negatives != 0: if self.processor.num_hard_negatives != 0:
logger.warning( logger.warning(
f"'num_hard_negatives' is set to {self.processor.num_hard_negatives}, but inference does " "'num_hard_negatives' is set to %s, but inference does "
f"not require any hard negatives. Setting num_hard_negatives to 0." "not require any hard negatives. Setting num_hard_negatives to 0.",
self.processor.num_hard_negatives,
) )
self.processor.num_hard_negatives = 0 self.processor.num_hard_negatives = 0
@ -1532,10 +1535,11 @@ class EmbeddingRetriever(DenseRetriever):
and model_format != "sentence_transformers" and model_format != "sentence_transformers"
): ):
logger.warning( logger.warning(
f"You seem to be using a Sentence Transformer embedding model but 'model_format' is set to '{self.model_format}'." "You seem to be using a Sentence Transformer embedding model but 'model_format' is set to '%s'."
f" You may need to set model_format='sentence_transformers' to ensure correct loading of model." " You may need to set model_format='sentence_transformers' to ensure correct loading of model."
f"As an alternative, you can let Haystack derive the format automatically by not setting the " "As an alternative, you can let Haystack derive the format automatically by not setting the "
f"'model_format' parameter at all." "'model_format' parameter at all.",
self.model_format,
) )
self.embedding_encoder = _EMBEDDING_ENCODERS[self.model_format](retriever=self) self.embedding_encoder = _EMBEDDING_ENCODERS[self.model_format](retriever=self)

View File

@ -96,8 +96,9 @@ class TransformersSummarizer(BaseSummarizer):
self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False) self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
if len(self.devices) > 1: if len(self.devices) > 1:
logger.warning( logger.warning(
f"Multiple devices are not supported in {self.__class__.__name__} inference, " "Multiple devices are not supported in %s} inference, using the first device %s.",
f"using the first device {self.devices[0]}." self.__class__.__name__,
self.devices[0],
) )
if tokenizer is None: if tokenizer is None:

View File

@ -83,8 +83,9 @@ class TransformersTranslator(BaseTranslator):
self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False) self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
if len(self.devices) > 1: if len(self.devices) > 1:
logger.warning( logger.warning(
f"Multiple devices are not supported in {self.__class__.__name__} inference, " "Multiple devices are not supported in %s inference, using the first device %s.",
f"using the first device {self.devices[0]}." self.__class__.__name__,
self.devices[0],
) )
self.max_seq_len = max_seq_len self.max_seq_len = max_seq_len

View File

@ -293,10 +293,13 @@ class Pipeline:
for document_store in document_stores: for document_store in document_stores:
if document_store["type"] != "DeepsetCloudDocumentStore": if document_store["type"] != "DeepsetCloudDocumentStore":
logger.info( logger.info(
f"In order to be used on Deepset Cloud, component '{document_store['name']}' of type '{document_store['type']}' " "In order to be used on Deepset Cloud, component '%s' of type '%s' "
f"has been automatically converted to type DeepsetCloudDocumentStore. " "has been automatically converted to type DeepsetCloudDocumentStore. "
f"Usually this replacement will result in equivalent pipeline quality. " "Usually this replacement will result in equivalent pipeline quality. "
f"However depending on chosen settings of '{document_store['name']}' differences might occur." "However depending on chosen settings of '%s' differences might occur.",
document_store["name"],
document_store["type"],
document_store["name"],
) )
document_store["type"] = "DeepsetCloudDocumentStore" document_store["type"] = "DeepsetCloudDocumentStore"
document_store["params"] = {} document_store["params"] = {}
@ -784,7 +787,7 @@ class Pipeline:
# crop dataset if `dataset_size` is provided and is valid # crop dataset if `dataset_size` is provided and is valid
if num_documents is not None and 0 < num_documents < len(corpus): if num_documents is not None and 0 < num_documents < len(corpus):
logger.info(f"Cropping dataset from {len(corpus)} to {num_documents} documents") logger.info("Cropping dataset from %s to %s documents", len(corpus), num_documents)
corpus = dict(itertools.islice(corpus.items(), num_documents)) corpus = dict(itertools.islice(corpus.items(), num_documents))
# Remove queries that don't contain the remaining documents # Remove queries that don't contain the remaining documents
corpus_ids = set(list(corpus.keys())) corpus_ids = set(list(corpus.keys()))
@ -800,8 +803,9 @@ class Pipeline:
qrels = qrels_new qrels = qrels_new
elif num_documents is not None and (num_documents < 1 or num_documents > len(corpus)): elif num_documents is not None and (num_documents < 1 or num_documents > len(corpus)):
logging.warning( logging.warning(
f"'num_documents' variable should be lower than corpus length and have a positive value, but it's {num_documents}." "'num_documents' variable should be lower than corpus length and have a positive value, but it's %s."
" Dataset size remains unchanged." " Dataset size remains unchanged.",
num_documents,
) )
# check index before eval # check index before eval

View File

@ -82,7 +82,11 @@ def get_component_definitions(
param_name = key.replace(env_prefix, "").lower() param_name = key.replace(env_prefix, "").lower()
component_definition["params"][param_name] = value component_definition["params"][param_name] = value
logger.info( logger.info(
f"Param '{param_name}' of component '{name}' overwritten with environment variable '{key}' value '{value}'." "Param '%s' of component '%s' overwritten with environment variable '%s' value '%s'.",
param_name,
name,
key,
value,
) )
return component_definitions return component_definitions
@ -291,11 +295,13 @@ def validate_schema(pipeline_config: Dict, strict_version_check: bool = False, e
ok_to_ignore_version = pipeline_version == "ignore" and "rc" in __version__ ok_to_ignore_version = pipeline_version == "ignore" and "rc" in __version__
if not ok_to_ignore_version: if not ok_to_ignore_version:
logging.warning( logging.warning(
f"This pipeline is version '{pipeline_version}', but you're using Haystack {__version__}\n" "This pipeline is version '%s', but you're using Haystack %s\n"
"This might cause bugs and unexpected behaviors." "This might cause bugs and unexpected behaviors."
"Please check out the release notes (https://github.com/deepset-ai/haystack/releases/latest), " "Please check out the release notes (https://github.com/deepset-ai/haystack/releases/latest), "
"the documentation (https://haystack.deepset.ai/components/pipelines#yaml-file-definitions) " "the documentation (https://haystack.deepset.ai/components/pipelines#yaml-file-definitions) "
"and fix your configuration accordingly." "and fix your configuration accordingly.",
pipeline_version,
__version__,
) )
# Load the json schema, and create one if it doesn't exist yet # Load the json schema, and create one if it doesn't exist yet
@ -317,7 +323,8 @@ def validate_schema(pipeline_config: Dict, strict_version_check: bool = False, e
if validation.instance["type"] not in loaded_custom_nodes: if validation.instance["type"] not in loaded_custom_nodes:
logger.info( logger.info(
f"Missing definition for node of type {validation.instance['type']}. Looking into local classes..." "Missing definition for node of type %s. Looking into local classes...",
validation.instance["type"],
) )
missing_component_class = BaseComponent.get_subclass(validation.instance["type"]) missing_component_class = BaseComponent.get_subclass(validation.instance["type"])
schema = inject_definition_in_schema(node_class=missing_component_class, schema=schema) schema = inject_definition_in_schema(node_class=missing_component_class, schema=schema)

View File

@ -1092,8 +1092,10 @@ class EvaluationResult:
query_answers = answers[answers["multilabel_id"] == multilabel_id] query_answers = answers[answers["multilabel_id"] == multilabel_id]
if answer_metric not in metrics: if answer_metric not in metrics:
logger.warning( logger.warning(
f"You specified an answer_metric={answer_metric} not available in calculated metrics={metrics.keys()}." "You specified an answer_metric=%s not available in calculated metrics=%s."
f"Skipping collection of worst performing samples." "Skipping collection of worst performing samples.",
answer_metric,
metrics.keys(),
) )
break break
if metrics[answer_metric] <= answer_metric_threshold: if metrics[answer_metric] <= answer_metric_threshold:
@ -1127,8 +1129,10 @@ class EvaluationResult:
for multilabel_id, metrics in worst_df.iterrows(): for multilabel_id, metrics in worst_df.iterrows():
if document_metric not in metrics: if document_metric not in metrics:
logger.warning( logger.warning(
f"You specified a document_metric={document_metric} not available in calculated metrics={metrics.keys()}." "You specified a document_metric=%s not available in calculated metrics=%s."
f"Skipping collection of worst performing samples." "Skipping collection of worst performing samples.",
document_metric,
metrics.keys(),
) )
break break
if metrics[document_metric] <= document_metric_threshold: if metrics[document_metric] <= document_metric_threshold:
@ -1185,9 +1189,9 @@ class EvaluationResult:
document_relevance_criterion = answer_scope_to_doc_relevance_crit.get(answer_scope, document_scope) document_relevance_criterion = answer_scope_to_doc_relevance_crit.get(answer_scope, document_scope)
elif answer_scope in answer_scope_to_doc_relevance_crit.keys(): elif answer_scope in answer_scope_to_doc_relevance_crit.keys():
logger.warning( logger.warning(
f"You specified a non-answer document_scope together with a non-default answer_scope. " "You specified a non-answer document_scope together with a non-default answer_scope. "
f"This may result in inconsistencies between answer and document metrics. " "This may result in inconsistencies between answer and document metrics. "
f"To enforce the same definition of correctness for both, document_scope must be one of {['answer', 'document_id_or_answer']}." "To enforce the same definition of correctness for both, document_scope must be one of 'answer', 'document_id_or_answer'."
) )
return document_relevance_criterion # type: ignore[return-value] return document_relevance_criterion # type: ignore[return-value]

View File

@ -250,7 +250,11 @@ def _write_telemetry_config():
# show a log message if telemetry config is written for the first time # show a log message if telemetry config is written for the first time
if not CONFIG_PATH.is_file(): if not CONFIG_PATH.is_file():
logger.info( logger.info(
f"Haystack sends anonymous usage data to understand the actual usage and steer dev efforts towards features that are most meaningful to users. You can opt-out at anytime by calling disable_telemetry() or by manually setting the environment variable HAYSTACK_TELEMETRY_ENABLED as described for different operating systems on the documentation page. More information at https://docs.haystack.deepset.ai/docs/telemetry" "Haystack sends anonymous usage data to understand the actual usage and steer dev efforts "
"towards features that are most meaningful to users. You can opt-out at anytime by calling "
"disable_telemetry() or by manually setting the environment variable "
"HAYSTACK_TELEMETRY_ENABLED as described for different operating systems on the documentation "
"page. More information at https://docs.haystack.deepset.ai/docs/telemetry"
) )
CONFIG_PATH.parents[0].mkdir(parents=True, exist_ok=True) CONFIG_PATH.parents[0].mkdir(parents=True, exist_ok=True)
user_id = _get_or_create_user_id() user_id = _get_or_create_user_id()

View File

@ -420,7 +420,11 @@ class IndexClient:
doc = response.json() doc = response.json()
else: else:
logger.warning( logger.warning(
f"Document {id} could not be fetched from deepset Cloud: HTTP {response.status_code} - {response.reason}\n{response.content.decode()}" "Document %s could not be fetched from deepset Cloud: HTTP %s - %s\n%s",
id,
response.status_code,
response.reason,
response.content.decode(),
) )
return doc return doc
@ -625,7 +629,9 @@ class PipelineClient:
else: else:
logger.info("Pipeline config '%s' is already deployed.", pipeline_config_name) logger.info("Pipeline config '%s' is already deployed.", pipeline_config_name)
logger.info( logger.info(
f"Search endpoint for pipeline config '{pipeline_config_name}' is up and running for you under {pipeline_url}" "Search endpoint for pipeline config '%s' is up and running for you under %s",
pipeline_config_name,
pipeline_url,
) )
if show_curl_message: if show_curl_message:
curl_cmd = ( curl_cmd = (
@ -925,7 +931,9 @@ class EvaluationSetClient:
with open(file_path, "rb") as file: with open(file_path, "rb") as file:
self.client.post(url=target_url, files={"file": (file_path.name, file, mime_type)}) self.client.post(url=target_url, files={"file": (file_path.name, file, mime_type)})
logger.info( logger.info(
f"Successfully uploaded evaluation set file {file_path}. You can access it now under evaluation set '{file_path.name}'." "Successfully uploaded evaluation set file %s. You can access it now under evaluation set '%s'.",
file_path,
file_path.name,
) )
except DeepsetCloudError as e: except DeepsetCloudError as e:
logger.error("Error uploading evaluation set file %s: %s", file_path, e.args) logger.error("Error uploading evaluation set file %s: %s", file_path, e.args)

View File

@ -87,8 +87,9 @@ def stop_container(container_name, delete_container=False):
status = subprocess.run([f"docker stop {container_name}"], shell=True) status = subprocess.run([f"docker stop {container_name}"], shell=True)
if status.returncode: if status.returncode:
logger.warning( logger.warning(
f"Tried to stop {container_name} but this failed. " "Tried to stop %s but this failed. It is likely that there was no Docker container with the name %s",
f"It is likely that there was no Docker container with the name {container_name}" container_name,
container_name,
) )
if delete_container: if delete_container:
status = subprocess.run([f"docker rm {container_name}"], shell=True) status = subprocess.run([f"docker rm {container_name}"], shell=True)

View File

@ -61,8 +61,8 @@ def print_answers(results: dict, details: str = "all", max_text_len: Optional[in
filtered_answers = answers filtered_answers = answers
else: else:
valid_values = ", ".join(fields_to_keep_by_level.keys()) + " and 'all'" valid_values = ", ".join(fields_to_keep_by_level.keys()) + " and 'all'"
logging.warn(f"print_answers received details='{details}', which was not understood. ") logging.warn("print_answers received details='%s', which was not understood. ", details)
logging.warn(f"Valid values are {valid_values}. Using 'all'.") logging.warn("Valid values are %s. Using 'all'.", valid_values)
filtered_answers = answers filtered_answers = answers
# Shorten long text fields # Shorten long text fields

View File

@ -50,8 +50,11 @@ def retry_with_exponential_backoff(
# Sleep for the delay # Sleep for the delay
logger.warning( logger.warning(
f"{e.__class__.__name__ } - {e}, " "%s - %s, retry %s in %s seconds...",
f"retry {function.__name__} in {'{0:.2f}'.format(sleep_time)} seconds..." e.__class__.__name__,
e,
function.__name__,
"{0:.2f}".format(sleep_time),
) )
time.sleep(sleep_time) time.sleep(sleep_time)

View File

@ -178,7 +178,7 @@ def create_dpr_training_dataset(squad_data: dict, retriever: BaseRetriever, num_
if not hard_negative_ctxs or not positive_ctxs: if not hard_negative_ctxs or not positive_ctxs:
logging.error( logging.error(
f"No retrieved candidates for article {article_title}, with question {question['question']}" "No retrieved candidates for article %s, with question %s", article_title, question["question"]
) )
n_non_added_questions += 1 n_non_added_questions += 1
continue continue

View File

@ -260,7 +260,6 @@ disable = [
"too-few-public-methods", "too-few-public-methods",
"raise-missing-from", "raise-missing-from",
"invalid-name", "invalid-name",
"logging-fstring-interpolation",
"too-many-locals", "too-many-locals",
"duplicate-code", "duplicate-code",
"too-many-arguments", "too-many-arguments",

View File

@ -171,8 +171,9 @@ def export_feedback(
context = squad_label["paragraphs"][0]["context"] context = squad_label["paragraphs"][0]["context"]
if not context[start : start + len(answer)] == answer: if not context[start : start + len(answer)] == answer:
logger.error( logger.error(
f"Skipping invalid squad label as string via offsets " "Skipping invalid squad label as string via offsets ('%s') does not match answer string ('%s') ",
f"('{context[start:start + len(answer)]}') does not match answer string ('{answer}') " context[start : start + len(answer)],
answer,
) )
export_data.append(squad_label) export_data.append(squad_label)

View File

@ -97,22 +97,27 @@ def _format_filters(filters):
new_filters = {} new_filters = {}
if filters is None: if filters is None:
logger.warning( logger.warning(
f"Request with deprecated filter format ('\"filters\": null'). " "Request with deprecated filter format ('\"filters\": null'). "
f"Remove empty filters from params to be compliant with future versions" "Remove empty filters from params to be compliant with future versions"
) )
else: else:
for key, values in filters.items(): for key, values in filters.items():
if values is None: if values is None:
logger.warning( logger.warning(
f"Request with deprecated filter format ('{key}: null'). " "Request with deprecated filter format ('%s: null'). "
f"Remove null values from filters to be compliant with future versions" "Remove null values from filters to be compliant with future versions",
key,
) )
continue continue
if not isinstance(values, list): if not isinstance(values, list):
logger.warning( logger.warning(
f"Request with deprecated filter format ('{key}': {values}). " "Request with deprecated filter format ('%s': %s). "
f"Change to '{key}':[{values}]' to be compliant with future versions" "Change to '%s':[%s]' to be compliant with future versions",
key,
values,
key,
values,
) )
values = [values] values = [values]