diff --git a/docs/_src/api/api/pipelines.md b/docs/_src/api/api/pipelines.md index cb40d023a..3faa60f55 100644 --- a/docs/_src/api/api/pipelines.md +++ b/docs/_src/api/api/pipelines.md @@ -137,7 +137,7 @@ Set the component for a node in the Pipeline. #### run ```python - | run(query: Optional[str] = None, file_paths: Optional[List[str]] = None, labels: Optional[MultiLabel] = None, documents: Optional[List[Document]] = None, meta: Optional[dict] = None, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None) + | run(query: Optional[str] = None, file_paths: Optional[List[str]] = None, labels: Optional[MultiLabel] = None, documents: Optional[List[Document]] = None, meta: Optional[dict] = None, params: Optional[dict] = None, debug: Optional[bool] = None) ``` Runs the pipeline, one node at a time. @@ -155,11 +155,8 @@ Runs the pipeline, one node at a time. {"Retriever": {"top_k": 10}, "Reader": {"top_k": 3, "debug": True}} - `debug`: Whether the pipeline should instruct nodes to collect debug information about their execution. By default these include the input parameters - they received, the output they generated, and eventual logs (of any severity) - emitted. All debug information can then be found in the dict returned - by this method under the key "_debug" -- `debug_logs`: Whether all the logs of the node should be printed in the console, - regardless of their severity and of the existing logger's settings. + they received and the output they generated. All debug information can + then be found in the dict returned by this method under the key "_debug" #### get\_nodes\_by\_class @@ -509,7 +506,7 @@ Pipeline for Extractive Question Answering. #### run ```python - | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None) + | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None) ``` **Arguments**: @@ -519,11 +516,9 @@ Pipeline for Extractive Question Answering. params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}} - `debug`: Whether the pipeline should instruct nodes to collect debug information about their execution. By default these include the input parameters - they received, the output they generated, and eventual logs (of any severity) - emitted. All debug information can then be found in the dict returned + they received and the output they generated. + All debug information can then be found in the dict returned by this method under the key "_debug" -- `debug_logs`: Whether all the logs of the node should be printed in the console, - regardless of their severity and of the existing logger's settings. ## DocumentSearchPipeline Objects @@ -549,7 +544,7 @@ Pipeline for semantic document search. #### run ```python - | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None) + | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None) ``` **Arguments**: @@ -558,11 +553,9 @@ Pipeline for semantic document search. - `params`: params for the `retriever` and `reader`. For instance, params={"retriever": {"top_k": 10}} - `debug`: Whether the pipeline should instruct nodes to collect debug information about their execution. By default these include the input parameters - they received, the output they generated, and eventual logs (of any severity) - emitted. All debug information can then be found in the dict returned + they received and the output they generated. + All debug information can then be found in the dict returned by this method under the key "_debug" -- `debug_logs`: Whether all the logs of the node should be printed in the console, - regardless of their severity and of the existing logger's settings. ## GenerativeQAPipeline Objects @@ -589,7 +582,7 @@ Pipeline for Generative Question Answering. #### run ```python - | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None) + | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None) ``` **Arguments**: @@ -599,11 +592,9 @@ Pipeline for Generative Question Answering. params={"Retriever": {"top_k": 10}, "Generator": {"top_k": 5}} - `debug`: Whether the pipeline should instruct nodes to collect debug information about their execution. By default these include the input parameters - they received, the output they generated, and eventual logs (of any severity) - emitted. All debug information can then be found in the dict returned + they received and the output they generated. + All debug information can then be found in the dict returned by this method under the key "_debug" -- `debug_logs`: Whether all the logs of the node should be printed in the console, - regardless of their severity and of the existing logger's settings. ## SearchSummarizationPipeline Objects @@ -633,7 +624,7 @@ Pipeline that retrieves documents for a query and then summarizes those document #### run ```python - | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None) + | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None) ``` **Arguments**: @@ -643,11 +634,9 @@ Pipeline that retrieves documents for a query and then summarizes those document params={"retriever": {"top_k": 10}, "summarizer": {"generate_single_summary": True}} - `debug`: Whether the pipeline should instruct nodes to collect debug information about their execution. By default these include the input parameters - they received, the output they generated, and eventual logs (of any severity) - emitted. All debug information can then be found in the dict returned + they received and the output they generated. + All debug information can then be found in the dict returned by this method under the key "_debug" -- `debug_logs`: Whether all the logs of the node should be printed in the console, - regardless of their severity and of the existing logger's settings. ## FAQPipeline Objects @@ -673,7 +662,7 @@ Pipeline for finding similar FAQs using semantic document search. #### run ```python - | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None) + | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None) ``` **Arguments**: @@ -682,11 +671,9 @@ Pipeline for finding similar FAQs using semantic document search. - `params`: params for the `retriever`. For instance, params={"retriever": {"top_k": 10}} - `debug`: Whether the pipeline should instruct nodes to collect debug information about their execution. By default these include the input parameters - they received, the output they generated, and eventual logs (of any severity) - emitted. All debug information can then be found in the dict returned + they received and the output they generated. + All debug information can then be found in the dict returned by this method under the key "_debug" -- `debug_logs`: Whether all the logs of the node should be printed in the console, - regardless of their severity and of the existing logger's settings. ## TranslationWrapperPipeline Objects diff --git a/haystack/__init__.py b/haystack/__init__.py index 1787751b9..8399aad76 100644 --- a/haystack/__init__.py +++ b/haystack/__init__.py @@ -1,24 +1,10 @@ import logging + +# This configuration must be done before any import to apply to all submodules +logging.basicConfig(format="%(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.WARNING) +logging.getLogger("haystack").setLevel(logging.INFO) + from haystack import pipelines - -# Configure the root logger t0 DEBUG to allow the "debug" flag to receive the logs -root_logger = logging.getLogger() -root_logger.setLevel(logging.DEBUG) - -# Then reconfigure the StreamHandler not to display anything below WARNING as default -stream_handler = logging.StreamHandler() -stream_handler.setLevel(logging.INFO) -root_logger.addHandler(stream_handler) - -# Change log-levels before modules are loaded to avoid verbose log messages. -logging.getLogger('haystack.modeling').setLevel(logging.WARNING) -logging.getLogger('haystack.modeling.utils').setLevel(logging.INFO) -logging.getLogger('haystack.modeling.infer').setLevel(logging.INFO) -logging.getLogger('transformers').setLevel(logging.WARNING) -logging.getLogger('haystack.modeling.evaluation.eval').setLevel(logging.INFO) -logging.getLogger('haystack.modeling.model.optimization').setLevel(logging.INFO) -logging.getLogger('faiss.loader').setLevel(logging.WARNING) - from haystack.schema import Document, Answer, Label, MultiLabel, Span from haystack.nodes import BaseComponent from haystack.pipelines import Pipeline @@ -27,13 +13,13 @@ from haystack._version import __version__ import pandas as pd pd.options.display.max_colwidth = 80 -logger = logging.getLogger(__name__) - # ########################################### # Enable old style imports (temporary) import sys +logger = logging.getLogger(__name__) + # Wrapper emitting a warning on import def DeprecatedModule(mod, deprecated_attributes=None, is_module_deprecated=True): """ diff --git a/haystack/document_stores/base.py b/haystack/document_stores/base.py index 9967950ac..541e7655b 100644 --- a/haystack/document_stores/base.py +++ b/haystack/document_stores/base.py @@ -325,7 +325,7 @@ class BaseDocumentStore(BaseComponent): for document in documents: if document.id in _hash_ids: - logger.warning(f"Duplicate Documents: Document with id '{document.id}' already exists in index " + logger.info(f"Duplicate Documents: Document with id '{document.id}' already exists in index " f"'{self.index}'") continue _documents.append(document) diff --git a/haystack/modeling/data_handler/processor.py b/haystack/modeling/data_handler/processor.py index 61420a984..d45251805 100644 --- a/haystack/modeling/data_handler/processor.py +++ b/haystack/modeling/data_handler/processor.py @@ -328,14 +328,14 @@ class Processor(ABC): return True def _log_samples(self, n_samples:int, baskets:List[SampleBasket]): - logger.info("*** Show {} random examples ***".format(n_samples)) + logger.debug("*** Show {} random examples ***".format(n_samples)) if len(baskets) == 0: - logger.info("*** No samples to show because there are no baskets ***") + logger.debug("*** No samples to show because there are no baskets ***") return for i in range(n_samples): random_basket = random.choice(baskets) random_sample = random.choice(random_basket.samples) # type: ignore - logger.info(random_sample) + logger.debug(random_sample) def _log_params(self): params = { @@ -1727,7 +1727,7 @@ class TextClassificationProcessor(Processor): self.header = header self.max_samples = max_samples self.dev_stratification = dev_stratification - logger.warning(f"Currently no support in Processor for returning problematic ids") + logger.debug(f"Currently no support in Processor for returning problematic ids") super(TextClassificationProcessor, self).__init__( tokenizer=tokenizer, diff --git a/haystack/modeling/infer.py b/haystack/modeling/infer.py index 9a00d6dee..cd105f3ec 100644 --- a/haystack/modeling/infer.py +++ b/haystack/modeling/infer.py @@ -119,6 +119,7 @@ class Inferencer: use_fast: bool = True, tokenizer_args: Dict =None, multithreading_rust: bool = True, + devices: Optional[List[Union[int, str, torch.device]]] = None, **kwargs ): """ @@ -158,12 +159,15 @@ class Inferencer: :param multithreading_rust: Whether to allow multithreading in Rust, e.g. for FastTokenizers. Note: Enabling multithreading in Rust AND multiprocessing in python might cause deadlocks. + :param devices: List of devices to perform inference on. (Currently, only the first device in the list is used.) :return: An instance of the Inferencer. """ if tokenizer_args is None: tokenizer_args = {} - devices, n_gpu = initialize_device_settings(use_cuda=gpu, multi_gpu=False) + if devices is None: + devices, n_gpu = initialize_device_settings(use_cuda=gpu, multi_gpu=False) + name = os.path.basename(model_name_or_path) # a) either from local dir @@ -183,7 +187,7 @@ class Inferencer: model = AdaptiveModel.convert_from_transformers(model_name_or_path, revision=revision, - device=devices[0], + device=devices[0], # type: ignore task_type=task_type, **kwargs) processor = Processor.convert_from_transformers(model_name_or_path, diff --git a/haystack/modeling/logger.py b/haystack/modeling/logger.py index 3920df0e1..0e38a571d 100644 --- a/haystack/modeling/logger.py +++ b/haystack/modeling/logger.py @@ -112,7 +112,7 @@ class MLFlowLogger(BaseMLLogger): @classmethod def disable(cls): - logger.warning("ML Logging is turned off. No parameters, metrics or artifacts will be logged to MLFlow.") + logger.info("ML Logging is turned off. No parameters, metrics or artifacts will be logged to MLFlow.") cls.disable_logging = True diff --git a/haystack/modeling/model/language_model.py b/haystack/modeling/model/language_model.py index 1a8d1623f..fc2c278b5 100644 --- a/haystack/modeling/model/language_model.py +++ b/haystack/modeling/model/language_model.py @@ -24,6 +24,7 @@ import json import logging import os from pathlib import Path +from functools import wraps import numpy as np import torch from torch import nn @@ -46,6 +47,29 @@ from transformers.modeling_utils import SequenceSummary logger = logging.getLogger(__name__) +def silence_transformers_logs(from_pretrained_func): + """ + Wrapper that raises the log level of Transformers to + ERROR to hide some unnecessary warnings + """ + @wraps(from_pretrained_func) + def quiet_from_pretrained_func(cls, *args, **kwargs): + + # Raise the log level of Transformers + t_logger = logging.getLogger("transformers") + original_log_level = t_logger.level + t_logger.setLevel(logging.ERROR) + + result = from_pretrained_func(cls, *args, **kwargs) + + # Restore the log level + t_logger.setLevel(original_log_level) + + return result + + return quiet_from_pretrained_func + + # These are the names of the attributes in various model configs which refer to the number of dimensions # in the output vectors OUTPUT_DIM_NAMES = ["dim", "hidden_size", "d_model"] @@ -122,7 +146,6 @@ class LanguageModel(nn.Module): n_added_tokens = kwargs.pop("n_added_tokens", 0) language_model_class = kwargs.pop("language_model_class", None) kwargs["revision"] = kwargs.get("revision", None) - logger.info("") logger.info("LOADING MODEL") logger.info("=============") config_file = Path(pretrained_model_name_or_path) / "language_model_config.json" @@ -426,6 +449,7 @@ class Bert(LanguageModel): return bert @classmethod + @silence_transformers_logs def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs): """ Load a pretrained model by supplying @@ -503,6 +527,7 @@ class Albert(LanguageModel): self.name = "albert" @classmethod + @silence_transformers_logs def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs): """ Load a language model either by supplying @@ -584,6 +609,7 @@ class Roberta(LanguageModel): self.name = "roberta" @classmethod + @silence_transformers_logs def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs): """ Load a language model either by supplying @@ -665,6 +691,7 @@ class XLMRoberta(LanguageModel): self.name = "xlm_roberta" @classmethod + @silence_transformers_logs def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs): """ Load a language model either by supplying @@ -753,6 +780,7 @@ class DistilBert(LanguageModel): self.pooler = None @classmethod + @silence_transformers_logs def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs): """ Load a pretrained model by supplying @@ -840,6 +868,7 @@ class XLNet(LanguageModel): self.pooler = None @classmethod + @silence_transformers_logs def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs): """ Load a language model either by supplying @@ -946,6 +975,7 @@ class Electra(LanguageModel): self.pooler = None @classmethod + @silence_transformers_logs def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs): """ Load a pretrained model by supplying @@ -1037,6 +1067,7 @@ class Camembert(Roberta): self.name = "camembert" @classmethod + @silence_transformers_logs def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs): """ Load a language model either by supplying @@ -1080,6 +1111,7 @@ class DPRQuestionEncoder(LanguageModel): self.name = "dpr_question_encoder" @classmethod + @silence_transformers_logs def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs): """ Load a pretrained model by supplying @@ -1212,6 +1244,7 @@ class DPRContextEncoder(LanguageModel): self.name = "dpr_context_encoder" @classmethod + @silence_transformers_logs def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs): """ Load a pretrained model by supplying @@ -1364,6 +1397,7 @@ class BigBird(LanguageModel): return big_bird @classmethod + @silence_transformers_logs def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs): """ Load a pretrained model by supplying diff --git a/haystack/modeling/model/prediction_head.py b/haystack/modeling/model/prediction_head.py index 4d5df6de7..4f7d46010 100644 --- a/haystack/modeling/model/prediction_head.py +++ b/haystack/modeling/model/prediction_head.py @@ -22,7 +22,7 @@ logger = logging.getLogger(__name__) try: from apex.normalization.fused_layer_norm import FusedLayerNorm as BertLayerNorm except (ImportError, AttributeError) as e: - logger.info("Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .") + logger.debug("Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .") BertLayerNorm = torch.nn.LayerNorm @@ -255,7 +255,7 @@ class QuestionAnsweringHead(PredictionHead): self.layer_dims = layer_dims assert self.layer_dims[-1] == 2 self.feed_forward = FeedForwardBlock(self.layer_dims) - logger.info(f"Prediction head initialized with size {self.layer_dims}") + logger.debug(f"Prediction head initialized with size {self.layer_dims}") self.num_labels = self.layer_dims[-1] self.ph_output_type = "per_token_squad" self.model_type = ("span_classification") # predicts start and end token of answer diff --git a/haystack/modeling/model/tokenization.py b/haystack/modeling/model/tokenization.py index 9d2721af3..0d4b9a047 100644 --- a/haystack/modeling/model/tokenization.py +++ b/haystack/modeling/model/tokenization.py @@ -75,7 +75,7 @@ class Tokenizer: if tokenizer_class is None: tokenizer_class = cls._infer_tokenizer_class(pretrained_model_name_or_path) - logger.info(f"Loading tokenizer of type '{tokenizer_class}'") + logger.debug(f"Loading tokenizer of type '{tokenizer_class}'") # return appropriate tokenizer object ret = None if "AlbertTokenizer" in tokenizer_class: diff --git a/haystack/modeling/visual.py b/haystack/modeling/visual.py index fc2d4d836..fec1ba5f9 100644 --- a/haystack/modeling/visual.py +++ b/haystack/modeling/visual.py @@ -92,15 +92,12 @@ WATERING_CAN = """ WORKER_M = """ 0 /|\\ -/'\\ -""" +/'\\""" WORKER_F =""" 0 /w\\ -/ \\ -""" +/ \\""" WORKER_X =""" 0 /w\\ -/'\\ -""" \ No newline at end of file +/'\\""" \ No newline at end of file diff --git a/haystack/nodes/base.py b/haystack/nodes/base.py index d721314dc..32593aabb 100644 --- a/haystack/nodes/base.py +++ b/haystack/nodes/base.py @@ -14,71 +14,6 @@ from haystack.schema import Document, MultiLabel logger = logging.getLogger(__name__) -class InMemoryLogger(io.TextIOBase): - """ - Implementation of a logger that keeps track - of the log lines in a list called `logs`, - from where they can be accessed freely. - """ - def __init__(self, *args): - io.TextIOBase.__init__(self, *args) - self.logs = [] - - def write(self, x): - self.logs.append(x) - - -def record_debug_logs(func: Callable, node_name: str, logs: bool) -> Callable: - """ - Captures the debug logs of the wrapped function and - saves them in the `_debug` key of the output dictionary. - If `logs` is True, dumps the same logs to the console as well. - - Used in `BaseComponent.__getattribute__()` to wrap `run()` functions. - This makes sure that every implementation of `run()` by a subclass will - be automagically decorated with this method when requested. - - :param func: the function to decorate (must be an implementation of - `BaseComponent.run()`). - :param logs: whether the captured logs should also be displayed - in the console during the execution of the pipeline. - """ - @wraps(func) - def inner(*args, **kwargs) -> Tuple[Dict[str, Any], str]: - - with InMemoryLogger() as logs_container: - logger = logging.getLogger() - - # Adds a handler that stores the logs in a variable - handler = logging.StreamHandler(logs_container) - handler.setLevel(logger.level or logging.DEBUG) - logger.addHandler(handler) - - # Add a handler that prints log messages in the console - # to the specified level for the node - if logs: - handler_console = logging.StreamHandler() - handler_console.setLevel(logging.DEBUG) - formatter = logging.Formatter(f'[{node_name} logs] %(message)s') - handler_console.setFormatter(formatter) - logger.addHandler(handler_console) - - output, stream = func(*args, **kwargs) - - if not "_debug" in output.keys(): - output["_debug"] = {} - output["_debug"]["logs"] = logs_container.logs - - # Remove both handlers - logger.removeHandler(handler) - if logs: - logger.removeHandler(handler_console) - - return output, stream - - return inner - - class BaseComponent: """ A base class for implementing nodes in a Pipeline. @@ -96,37 +31,6 @@ class BaseComponent: super().__init_subclass__(**kwargs) cls.subclasses[cls.__name__] = cls - def __getattribute__(self, name): - """ - This modified `__getattribute__` method automagically decorates - every `BaseComponent.run()` implementation with the - `record_debug_logs` decorator defined above. - - This decorator makes the function collect its debug logs into a - `_debug` key of the output dictionary. - - The logs collection is not always performed. Before applying the decorator, - it checks for an instance attribute called `debug` to know - whether it should or not. The decorator is applied if the attribute is - defined and True. - - In addition, the value of the instance attribute `debug_logs` is - passed to the decorator. If it's True, it will print the - logs in the console as well. - """ - if name == "run" and self.debug: - func = getattr(type(self), "run") - return record_debug_logs(func=func, node_name=self.__class__.__name__, logs=self.debug_logs).__get__(self) - return object.__getattribute__(self, name) - - def __getattr__(self, name): - """ - Ensures that `debug` and `debug_logs` are always defined. - """ - if name in ["debug", "debug_logs"]: - return None - raise AttributeError(name) - @classmethod def get_subclass(cls, component_type: str): if component_type not in cls.subclasses.keys(): @@ -196,7 +100,7 @@ class BaseComponent: It takes care of the following: - inspect run() signature to validate if all necessary arguments are available - - pop `debug` and `debug_logs` and sets them on the instance to control debug output + - pop `debug` and sets them on the instance to control debug output - call run() with the corresponding arguments and gather output - collate `_debug` information if present - merge component output with the preceding output and pass it on to the subsequent Component in the Pipeline @@ -214,8 +118,6 @@ class BaseComponent: # Extract debug attributes if "debug" in value.keys(): self.debug = value.pop("debug") - if "debug_logs" in value.keys(): - self.debug_logs = value.pop("debug_logs") for _k, _v in value.items(): if _k not in run_signature_args: @@ -234,13 +136,10 @@ class BaseComponent: # Collect debug information current_debug = output.get("_debug", {}) - if self.debug: + if getattr(self, "debug", None): current_debug["input"] = {**run_inputs, **run_params} - if self.debug: - current_debug["input"]["debug"] = self.debug - if self.debug_logs: - current_debug["input"]["debug_logs"] = self.debug_logs - filtered_output = {key: value for key, value in output.items() if key != "_debug"} # Exclude _debug to avoid recursion + current_debug["input"]["debug"] = self.debug + filtered_output = {key: value for key, value in output.items() if key != "_debug"} # Exclude _debug to avoid recursion current_debug["output"] = filtered_output # append _debug information from nodes diff --git a/haystack/nodes/reader/farm.py b/haystack/nodes/reader/farm.py index a046e8ebc..f953c9877 100644 --- a/haystack/nodes/reader/farm.py +++ b/haystack/nodes/reader/farm.py @@ -131,6 +131,7 @@ class FARMReader(BaseReader): proxies=proxies, local_files_only=local_files_only, force_download=force_download, + devices=self.devices, **kwargs) self.inferencer.model.prediction_heads[0].context_window_size = context_window_size self.inferencer.model.prediction_heads[0].no_ans_boost = no_ans_boost @@ -443,7 +444,7 @@ class FARMReader(BaseReader): :type device: str """ if device is None: - device = self.device + device = self.devices[0] eval_processor = SquadProcessor( tokenizer=self.inferencer.processor.tokenizer, max_seq_len=self.inferencer.processor.max_seq_len, @@ -493,7 +494,7 @@ class FARMReader(BaseReader): :param calibrate_conf_scores: Whether to calibrate the temperature for temperature scaling of the confidence scores """ if device is None: - device = self.device + device = self.devices[0] if self.top_k_per_candidate != 4: logger.info(f"Performing Evaluation using top_k_per_candidate = {self.top_k_per_candidate} \n" f"and consequently, QuestionAnsweringPredictionHead.n_best = {self.top_k_per_candidate + 1}. \n" @@ -661,7 +662,7 @@ class FARMReader(BaseReader): :param label_origin: Field name where the gold labels are stored """ if device is None: - device = self.device + device = self.devices[0] self.eval(document_store=document_store, device=device, label_index=label_index, diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py index 50bf87fec..cabe837b1 100644 --- a/haystack/pipelines/base.py +++ b/haystack/pipelines/base.py @@ -261,8 +261,7 @@ class Pipeline(BasePipeline): documents: Optional[List[Document]] = None, meta: Optional[dict] = None, params: Optional[dict] = None, - debug: Optional[bool] = None, - debug_logs: Optional[bool] = None + debug: Optional[bool] = None ): """ Runs the pipeline, one node at a time. @@ -278,11 +277,8 @@ class Pipeline(BasePipeline): {"Retriever": {"top_k": 10}, "Reader": {"top_k": 3, "debug": True}} :param debug: Whether the pipeline should instruct nodes to collect debug information about their execution. By default these include the input parameters - they received, the output they generated, and eventual logs (of any severity) - emitted. All debug information can then be found in the dict returned - by this method under the key "_debug" - :param debug_logs: Whether all the logs of the node should be printed in the console, - regardless of their severity and of the existing logger's settings. + they received and the output they generated. All debug information can + then be found in the dict returned by this method under the key "_debug" """ # validate the node names if params: @@ -327,8 +323,6 @@ class Pipeline(BasePipeline): if node_id not in node_input["params"].keys(): node_input["params"][node_id] = {} node_input["params"][node_id]["debug"] = debug - if debug_logs is not None: - node_input["params"][node_id]["debug_logs"] = debug_logs predecessors = set(nx.ancestors(self.graph, node_id)) if predecessors.isdisjoint(set(queue.keys())): # only execute if predecessor nodes are executed diff --git a/haystack/pipelines/standard_pipelines.py b/haystack/pipelines/standard_pipelines.py index 46f342533..cb1bc7a8a 100644 --- a/haystack/pipelines/standard_pipelines.py +++ b/haystack/pipelines/standard_pipelines.py @@ -87,21 +87,18 @@ class ExtractiveQAPipeline(BaseStandardPipeline): def run(self, query: str, params: Optional[dict] = None, - debug: Optional[bool] = None, - debug_logs: Optional[bool] = None): + debug: Optional[bool] = None): """ :param query: The search query string. :param params: Params for the `retriever` and `reader`. For instance, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}} :param debug: Whether the pipeline should instruct nodes to collect debug information about their execution. By default these include the input parameters - they received, the output they generated, and eventual logs (of any severity) - emitted. All debug information can then be found in the dict returned + they received and the output they generated. + All debug information can then be found in the dict returned by this method under the key "_debug" - :param debug_logs: Whether all the logs of the node should be printed in the console, - regardless of their severity and of the existing logger's settings. """ - output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs) + output = self.pipeline.run(query=query, params=params, debug=debug) return output @@ -119,20 +116,17 @@ class DocumentSearchPipeline(BaseStandardPipeline): def run(self, query: str, params: Optional[dict] = None, - debug: Optional[bool] = None, - debug_logs: Optional[bool] = None): + debug: Optional[bool] = None): """ :param query: the query string. :param params: params for the `retriever` and `reader`. For instance, params={"retriever": {"top_k": 10}} :param debug: Whether the pipeline should instruct nodes to collect debug information about their execution. By default these include the input parameters - they received, the output they generated, and eventual logs (of any severity) - emitted. All debug information can then be found in the dict returned + they received and the output they generated. + All debug information can then be found in the dict returned by this method under the key "_debug" - :param debug_logs: Whether all the logs of the node should be printed in the console, - regardless of their severity and of the existing logger's settings. """ - output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs) + output = self.pipeline.run(query=query, params=params, debug=debug) return output @@ -152,21 +146,18 @@ class GenerativeQAPipeline(BaseStandardPipeline): def run(self, query: str, params: Optional[dict] = None, - debug: Optional[bool] = None, - debug_logs: Optional[bool] = None): + debug: Optional[bool] = None): """ :param query: the query string. :param params: params for the `retriever` and `generator`. For instance, params={"Retriever": {"top_k": 10}, "Generator": {"top_k": 5}} :param debug: Whether the pipeline should instruct nodes to collect debug information about their execution. By default these include the input parameters - they received, the output they generated, and eventual logs (of any severity) - emitted. All debug information can then be found in the dict returned + they received and the output they generated. + All debug information can then be found in the dict returned by this method under the key "_debug" - :param debug_logs: Whether all the logs of the node should be printed in the console, - regardless of their severity and of the existing logger's settings. """ - output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs) + output = self.pipeline.run(query=query, params=params, debug=debug) return output @@ -190,21 +181,18 @@ class SearchSummarizationPipeline(BaseStandardPipeline): def run(self, query: str, params: Optional[dict] = None, - debug: Optional[bool] = None, - debug_logs: Optional[bool] = None): + debug: Optional[bool] = None): """ :param query: the query string. :param params: params for the `retriever` and `summarizer`. For instance, params={"retriever": {"top_k": 10}, "summarizer": {"generate_single_summary": True}} :param debug: Whether the pipeline should instruct nodes to collect debug information about their execution. By default these include the input parameters - they received, the output they generated, and eventual logs (of any severity) - emitted. All debug information can then be found in the dict returned + they received and the output they generated. + All debug information can then be found in the dict returned by this method under the key "_debug" - :param debug_logs: Whether all the logs of the node should be printed in the console, - regardless of their severity and of the existing logger's settings. - """ - output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs) + """ + output = self.pipeline.run(query=query, params=params, debug=debug) # Convert to answer format to allow "drop-in replacement" for other QA pipelines if self.return_in_answer_format: @@ -243,20 +231,17 @@ class FAQPipeline(BaseStandardPipeline): def run(self, query: str, params: Optional[dict] = None, - debug: Optional[bool] = None, - debug_logs: Optional[bool] = None): + debug: Optional[bool] = None): """ :param query: the query string. :param params: params for the `retriever`. For instance, params={"retriever": {"top_k": 10}} :param debug: Whether the pipeline should instruct nodes to collect debug information about their execution. By default these include the input parameters - they received, the output they generated, and eventual logs (of any severity) - emitted. All debug information can then be found in the dict returned + they received and the output they generated. + All debug information can then be found in the dict returned by this method under the key "_debug" - :param debug_logs: Whether all the logs of the node should be printed in the console, - regardless of their severity and of the existing logger's settings. """ - output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs) + output = self.pipeline.run(query=query, params=params, debug=debug) return output @@ -316,10 +301,8 @@ class QuestionGenerationPipeline(BaseStandardPipeline): def run(self, documents, params: Optional[dict] = None, - debug: Optional[bool] = None, - debug_logs: Optional[bool] = None - ): - output = self.pipeline.run(documents=documents, params=params, debug=debug, debug_logs=debug_logs) + debug: Optional[bool] = None): + output = self.pipeline.run(documents=documents, params=params, debug=debug) return output @@ -336,9 +319,8 @@ class RetrieverQuestionGenerationPipeline(BaseStandardPipeline): def run(self, query: str, params: Optional[dict] = None, - debug: Optional[bool] = None, - debug_logs: Optional[bool] = None): - output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs) + debug: Optional[bool] = None): + output = self.pipeline.run(query=query, params=params, debug=debug) return output @@ -372,9 +354,8 @@ class QuestionAnswerGenerationPipeline(BaseStandardPipeline): def run(self, documents: List[Document], # type: ignore params: Optional[dict] = None, - debug: Optional[bool] = None, - debug_logs: Optional[bool] = None): - output = self.pipeline.run(documents=documents, params=params, debug=debug, debug_logs=debug_logs) + debug: Optional[bool] = None): + output = self.pipeline.run(documents=documents, params=params, debug=debug) return output diff --git a/haystack/utils/doc_store.py b/haystack/utils/doc_store.py index afbc46cc9..2f281aeae 100644 --- a/haystack/utils/doc_store.py +++ b/haystack/utils/doc_store.py @@ -9,7 +9,7 @@ logger = logging.getLogger(__name__) def launch_es(sleep=15): # Start an Elasticsearch server via Docker - logger.info("Starting Elasticsearch ...") + logger.debug("Starting Elasticsearch ...") status = subprocess.run( ['docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2'], shell=True ) @@ -22,7 +22,7 @@ def launch_es(sleep=15): def launch_open_distro_es(sleep=15): # Start an Open Distro for Elasticsearch server via Docker - logger.info("Starting Open Distro for Elasticsearch ...") + logger.debug("Starting Open Distro for Elasticsearch ...") status = subprocess.run( ['docker run -d -p 9200:9200 -p 9600:9600 -e "discovery.type=single-node" amazon/opendistro-for-elasticsearch:1.13.2'], shell=True ) @@ -35,7 +35,7 @@ def launch_open_distro_es(sleep=15): def launch_opensearch(sleep=15): # Start an OpenSearch server via docker - logger.info("Starting OpenSearch...") + logger.debug("Starting OpenSearch...") # This line is needed since it is not possible to start a new docker container with the name opensearch if there is a stopped image with the same now # docker rm only succeeds if the container is stopped, not if it is running _ = subprocess.run(['docker rm opensearch'], shell=True, stdout=subprocess.DEVNULL) @@ -53,7 +53,7 @@ def launch_opensearch(sleep=15): def launch_weaviate(sleep=15): # Start a Weaviate server via Docker - logger.info("Starting Weaviate ...") + logger.debug("Starting Weaviate ...") status = subprocess.run( ["docker run -d -p 8080:8080 --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.7.2"], shell=True ) @@ -65,7 +65,7 @@ def launch_weaviate(sleep=15): def stop_opensearch(): - logger.info("Stopping OpenSearch...") + logger.debug("Stopping OpenSearch...") status = subprocess.run(['docker stop opensearch'], shell=True) if status.returncode: logger.warning("Tried to stop OpenSearch but this failed. " @@ -84,7 +84,7 @@ def stop_service(document_store): def launch_milvus(sleep=15): # Start a Milvus server via docker - logger.info("Starting Milvus ...") + logger.debug("Starting Milvus ...") logger.warning("Automatic Milvus config creation not yet implemented. " "If you are starting Milvus using launch_milvus(), " "make sure you have a properly populated milvus/conf folder. " diff --git a/test/test_pipeline.py b/test/test_pipeline.py index ec32f2f04..ce476ac5c 100644 --- a/test/test_pipeline.py +++ b/test/test_pipeline.py @@ -131,8 +131,7 @@ def test_node_names_validation(document_store_with_docs, tmp_path): "top_k": 5, "non-existing-global_param": "wrong", }, - debug=True, - debug_logs=True + debug=True ) exception_raised = str(exc_info.value) assert "non-existing-node" in exception_raised @@ -155,8 +154,7 @@ def test_debug_attributes_global(document_store_with_docs, tmp_path): prediction = pipeline.run( query="Who lives in Berlin?", params={"ESRetriever": {"top_k": 10}, "Reader": {"top_k": 3}}, - debug=True, - debug_logs=True + debug=True ) assert "_debug" in prediction.keys() assert "ESRetriever" in prediction["_debug"].keys() @@ -187,7 +185,7 @@ def test_debug_attributes_per_node(document_store_with_docs, tmp_path): prediction = pipeline.run( query="Who lives in Berlin?", params={ - "ESRetriever": {"top_k": 10, "debug": True, "debug_logs":True}, + "ESRetriever": {"top_k": 10, "debug": True}, "Reader": {"top_k": 3} }, ) @@ -217,7 +215,7 @@ def test_global_debug_attributes_override_node_ones(document_store_with_docs, tm prediction = pipeline.run( query="Who lives in Berlin?", params={ - "ESRetriever": {"top_k": 10, "debug": True, "debug_logs":True}, + "ESRetriever": {"top_k": 10, "debug": True}, "Reader": {"top_k": 3, "debug": True} }, debug=False