diff --git a/docs/_src/api/api/pipelines.md b/docs/_src/api/api/pipelines.md
index cb40d023a..3faa60f55 100644
--- a/docs/_src/api/api/pipelines.md
+++ b/docs/_src/api/api/pipelines.md
@@ -137,7 +137,7 @@ Set the component for a node in the Pipeline.
#### run
```python
- | run(query: Optional[str] = None, file_paths: Optional[List[str]] = None, labels: Optional[MultiLabel] = None, documents: Optional[List[Document]] = None, meta: Optional[dict] = None, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None)
+ | run(query: Optional[str] = None, file_paths: Optional[List[str]] = None, labels: Optional[MultiLabel] = None, documents: Optional[List[Document]] = None, meta: Optional[dict] = None, params: Optional[dict] = None, debug: Optional[bool] = None)
```
Runs the pipeline, one node at a time.
@@ -155,11 +155,8 @@ Runs the pipeline, one node at a time.
{"Retriever": {"top_k": 10}, "Reader": {"top_k": 3, "debug": True}}
- `debug`: Whether the pipeline should instruct nodes to collect debug information
about their execution. By default these include the input parameters
- they received, the output they generated, and eventual logs (of any severity)
- emitted. All debug information can then be found in the dict returned
- by this method under the key "_debug"
-- `debug_logs`: Whether all the logs of the node should be printed in the console,
- regardless of their severity and of the existing logger's settings.
+ they received and the output they generated. All debug information can
+ then be found in the dict returned by this method under the key "_debug"
#### get\_nodes\_by\_class
@@ -509,7 +506,7 @@ Pipeline for Extractive Question Answering.
#### run
```python
- | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None)
+ | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None)
```
**Arguments**:
@@ -519,11 +516,9 @@ Pipeline for Extractive Question Answering.
params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}
- `debug`: Whether the pipeline should instruct nodes to collect debug information
about their execution. By default these include the input parameters
- they received, the output they generated, and eventual logs (of any severity)
- emitted. All debug information can then be found in the dict returned
+ they received and the output they generated.
+ All debug information can then be found in the dict returned
by this method under the key "_debug"
-- `debug_logs`: Whether all the logs of the node should be printed in the console,
- regardless of their severity and of the existing logger's settings.
## DocumentSearchPipeline Objects
@@ -549,7 +544,7 @@ Pipeline for semantic document search.
#### run
```python
- | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None)
+ | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None)
```
**Arguments**:
@@ -558,11 +553,9 @@ Pipeline for semantic document search.
- `params`: params for the `retriever` and `reader`. For instance, params={"retriever": {"top_k": 10}}
- `debug`: Whether the pipeline should instruct nodes to collect debug information
about their execution. By default these include the input parameters
- they received, the output they generated, and eventual logs (of any severity)
- emitted. All debug information can then be found in the dict returned
+ they received and the output they generated.
+ All debug information can then be found in the dict returned
by this method under the key "_debug"
-- `debug_logs`: Whether all the logs of the node should be printed in the console,
- regardless of their severity and of the existing logger's settings.
## GenerativeQAPipeline Objects
@@ -589,7 +582,7 @@ Pipeline for Generative Question Answering.
#### run
```python
- | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None)
+ | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None)
```
**Arguments**:
@@ -599,11 +592,9 @@ Pipeline for Generative Question Answering.
params={"Retriever": {"top_k": 10}, "Generator": {"top_k": 5}}
- `debug`: Whether the pipeline should instruct nodes to collect debug information
about their execution. By default these include the input parameters
- they received, the output they generated, and eventual logs (of any severity)
- emitted. All debug information can then be found in the dict returned
+ they received and the output they generated.
+ All debug information can then be found in the dict returned
by this method under the key "_debug"
-- `debug_logs`: Whether all the logs of the node should be printed in the console,
- regardless of their severity and of the existing logger's settings.
## SearchSummarizationPipeline Objects
@@ -633,7 +624,7 @@ Pipeline that retrieves documents for a query and then summarizes those document
#### run
```python
- | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None)
+ | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None)
```
**Arguments**:
@@ -643,11 +634,9 @@ Pipeline that retrieves documents for a query and then summarizes those document
params={"retriever": {"top_k": 10}, "summarizer": {"generate_single_summary": True}}
- `debug`: Whether the pipeline should instruct nodes to collect debug information
about their execution. By default these include the input parameters
- they received, the output they generated, and eventual logs (of any severity)
- emitted. All debug information can then be found in the dict returned
+ they received and the output they generated.
+ All debug information can then be found in the dict returned
by this method under the key "_debug"
-- `debug_logs`: Whether all the logs of the node should be printed in the console,
- regardless of their severity and of the existing logger's settings.
## FAQPipeline Objects
@@ -673,7 +662,7 @@ Pipeline for finding similar FAQs using semantic document search.
#### run
```python
- | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None)
+ | run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None)
```
**Arguments**:
@@ -682,11 +671,9 @@ Pipeline for finding similar FAQs using semantic document search.
- `params`: params for the `retriever`. For instance, params={"retriever": {"top_k": 10}}
- `debug`: Whether the pipeline should instruct nodes to collect debug information
about their execution. By default these include the input parameters
- they received, the output they generated, and eventual logs (of any severity)
- emitted. All debug information can then be found in the dict returned
+ they received and the output they generated.
+ All debug information can then be found in the dict returned
by this method under the key "_debug"
-- `debug_logs`: Whether all the logs of the node should be printed in the console,
- regardless of their severity and of the existing logger's settings.
## TranslationWrapperPipeline Objects
diff --git a/haystack/__init__.py b/haystack/__init__.py
index 1787751b9..8399aad76 100644
--- a/haystack/__init__.py
+++ b/haystack/__init__.py
@@ -1,24 +1,10 @@
import logging
+
+# This configuration must be done before any import to apply to all submodules
+logging.basicConfig(format="%(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.WARNING)
+logging.getLogger("haystack").setLevel(logging.INFO)
+
from haystack import pipelines
-
-# Configure the root logger t0 DEBUG to allow the "debug" flag to receive the logs
-root_logger = logging.getLogger()
-root_logger.setLevel(logging.DEBUG)
-
-# Then reconfigure the StreamHandler not to display anything below WARNING as default
-stream_handler = logging.StreamHandler()
-stream_handler.setLevel(logging.INFO)
-root_logger.addHandler(stream_handler)
-
-# Change log-levels before modules are loaded to avoid verbose log messages.
-logging.getLogger('haystack.modeling').setLevel(logging.WARNING)
-logging.getLogger('haystack.modeling.utils').setLevel(logging.INFO)
-logging.getLogger('haystack.modeling.infer').setLevel(logging.INFO)
-logging.getLogger('transformers').setLevel(logging.WARNING)
-logging.getLogger('haystack.modeling.evaluation.eval').setLevel(logging.INFO)
-logging.getLogger('haystack.modeling.model.optimization').setLevel(logging.INFO)
-logging.getLogger('faiss.loader').setLevel(logging.WARNING)
-
from haystack.schema import Document, Answer, Label, MultiLabel, Span
from haystack.nodes import BaseComponent
from haystack.pipelines import Pipeline
@@ -27,13 +13,13 @@ from haystack._version import __version__
import pandas as pd
pd.options.display.max_colwidth = 80
-logger = logging.getLogger(__name__)
-
# ###########################################
# Enable old style imports (temporary)
import sys
+logger = logging.getLogger(__name__)
+
# Wrapper emitting a warning on import
def DeprecatedModule(mod, deprecated_attributes=None, is_module_deprecated=True):
"""
diff --git a/haystack/document_stores/base.py b/haystack/document_stores/base.py
index 9967950ac..541e7655b 100644
--- a/haystack/document_stores/base.py
+++ b/haystack/document_stores/base.py
@@ -325,7 +325,7 @@ class BaseDocumentStore(BaseComponent):
for document in documents:
if document.id in _hash_ids:
- logger.warning(f"Duplicate Documents: Document with id '{document.id}' already exists in index "
+ logger.info(f"Duplicate Documents: Document with id '{document.id}' already exists in index "
f"'{self.index}'")
continue
_documents.append(document)
diff --git a/haystack/modeling/data_handler/processor.py b/haystack/modeling/data_handler/processor.py
index 61420a984..d45251805 100644
--- a/haystack/modeling/data_handler/processor.py
+++ b/haystack/modeling/data_handler/processor.py
@@ -328,14 +328,14 @@ class Processor(ABC):
return True
def _log_samples(self, n_samples:int, baskets:List[SampleBasket]):
- logger.info("*** Show {} random examples ***".format(n_samples))
+ logger.debug("*** Show {} random examples ***".format(n_samples))
if len(baskets) == 0:
- logger.info("*** No samples to show because there are no baskets ***")
+ logger.debug("*** No samples to show because there are no baskets ***")
return
for i in range(n_samples):
random_basket = random.choice(baskets)
random_sample = random.choice(random_basket.samples) # type: ignore
- logger.info(random_sample)
+ logger.debug(random_sample)
def _log_params(self):
params = {
@@ -1727,7 +1727,7 @@ class TextClassificationProcessor(Processor):
self.header = header
self.max_samples = max_samples
self.dev_stratification = dev_stratification
- logger.warning(f"Currently no support in Processor for returning problematic ids")
+ logger.debug(f"Currently no support in Processor for returning problematic ids")
super(TextClassificationProcessor, self).__init__(
tokenizer=tokenizer,
diff --git a/haystack/modeling/infer.py b/haystack/modeling/infer.py
index 9a00d6dee..cd105f3ec 100644
--- a/haystack/modeling/infer.py
+++ b/haystack/modeling/infer.py
@@ -119,6 +119,7 @@ class Inferencer:
use_fast: bool = True,
tokenizer_args: Dict =None,
multithreading_rust: bool = True,
+ devices: Optional[List[Union[int, str, torch.device]]] = None,
**kwargs
):
"""
@@ -158,12 +159,15 @@ class Inferencer:
:param multithreading_rust: Whether to allow multithreading in Rust, e.g. for FastTokenizers.
Note: Enabling multithreading in Rust AND multiprocessing in python might cause
deadlocks.
+ :param devices: List of devices to perform inference on. (Currently, only the first device in the list is used.)
:return: An instance of the Inferencer.
"""
if tokenizer_args is None:
tokenizer_args = {}
- devices, n_gpu = initialize_device_settings(use_cuda=gpu, multi_gpu=False)
+ if devices is None:
+ devices, n_gpu = initialize_device_settings(use_cuda=gpu, multi_gpu=False)
+
name = os.path.basename(model_name_or_path)
# a) either from local dir
@@ -183,7 +187,7 @@ class Inferencer:
model = AdaptiveModel.convert_from_transformers(model_name_or_path,
revision=revision,
- device=devices[0],
+ device=devices[0], # type: ignore
task_type=task_type,
**kwargs)
processor = Processor.convert_from_transformers(model_name_or_path,
diff --git a/haystack/modeling/logger.py b/haystack/modeling/logger.py
index 3920df0e1..0e38a571d 100644
--- a/haystack/modeling/logger.py
+++ b/haystack/modeling/logger.py
@@ -112,7 +112,7 @@ class MLFlowLogger(BaseMLLogger):
@classmethod
def disable(cls):
- logger.warning("ML Logging is turned off. No parameters, metrics or artifacts will be logged to MLFlow.")
+ logger.info("ML Logging is turned off. No parameters, metrics or artifacts will be logged to MLFlow.")
cls.disable_logging = True
diff --git a/haystack/modeling/model/language_model.py b/haystack/modeling/model/language_model.py
index 1a8d1623f..fc2c278b5 100644
--- a/haystack/modeling/model/language_model.py
+++ b/haystack/modeling/model/language_model.py
@@ -24,6 +24,7 @@ import json
import logging
import os
from pathlib import Path
+from functools import wraps
import numpy as np
import torch
from torch import nn
@@ -46,6 +47,29 @@ from transformers.modeling_utils import SequenceSummary
logger = logging.getLogger(__name__)
+def silence_transformers_logs(from_pretrained_func):
+ """
+ Wrapper that raises the log level of Transformers to
+ ERROR to hide some unnecessary warnings
+ """
+ @wraps(from_pretrained_func)
+ def quiet_from_pretrained_func(cls, *args, **kwargs):
+
+ # Raise the log level of Transformers
+ t_logger = logging.getLogger("transformers")
+ original_log_level = t_logger.level
+ t_logger.setLevel(logging.ERROR)
+
+ result = from_pretrained_func(cls, *args, **kwargs)
+
+ # Restore the log level
+ t_logger.setLevel(original_log_level)
+
+ return result
+
+ return quiet_from_pretrained_func
+
+
# These are the names of the attributes in various model configs which refer to the number of dimensions
# in the output vectors
OUTPUT_DIM_NAMES = ["dim", "hidden_size", "d_model"]
@@ -122,7 +146,6 @@ class LanguageModel(nn.Module):
n_added_tokens = kwargs.pop("n_added_tokens", 0)
language_model_class = kwargs.pop("language_model_class", None)
kwargs["revision"] = kwargs.get("revision", None)
- logger.info("")
logger.info("LOADING MODEL")
logger.info("=============")
config_file = Path(pretrained_model_name_or_path) / "language_model_config.json"
@@ -426,6 +449,7 @@ class Bert(LanguageModel):
return bert
@classmethod
+ @silence_transformers_logs
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
"""
Load a pretrained model by supplying
@@ -503,6 +527,7 @@ class Albert(LanguageModel):
self.name = "albert"
@classmethod
+ @silence_transformers_logs
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
"""
Load a language model either by supplying
@@ -584,6 +609,7 @@ class Roberta(LanguageModel):
self.name = "roberta"
@classmethod
+ @silence_transformers_logs
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
"""
Load a language model either by supplying
@@ -665,6 +691,7 @@ class XLMRoberta(LanguageModel):
self.name = "xlm_roberta"
@classmethod
+ @silence_transformers_logs
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
"""
Load a language model either by supplying
@@ -753,6 +780,7 @@ class DistilBert(LanguageModel):
self.pooler = None
@classmethod
+ @silence_transformers_logs
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
"""
Load a pretrained model by supplying
@@ -840,6 +868,7 @@ class XLNet(LanguageModel):
self.pooler = None
@classmethod
+ @silence_transformers_logs
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
"""
Load a language model either by supplying
@@ -946,6 +975,7 @@ class Electra(LanguageModel):
self.pooler = None
@classmethod
+ @silence_transformers_logs
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
"""
Load a pretrained model by supplying
@@ -1037,6 +1067,7 @@ class Camembert(Roberta):
self.name = "camembert"
@classmethod
+ @silence_transformers_logs
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
"""
Load a language model either by supplying
@@ -1080,6 +1111,7 @@ class DPRQuestionEncoder(LanguageModel):
self.name = "dpr_question_encoder"
@classmethod
+ @silence_transformers_logs
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
"""
Load a pretrained model by supplying
@@ -1212,6 +1244,7 @@ class DPRContextEncoder(LanguageModel):
self.name = "dpr_context_encoder"
@classmethod
+ @silence_transformers_logs
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
"""
Load a pretrained model by supplying
@@ -1364,6 +1397,7 @@ class BigBird(LanguageModel):
return big_bird
@classmethod
+ @silence_transformers_logs
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
"""
Load a pretrained model by supplying
diff --git a/haystack/modeling/model/prediction_head.py b/haystack/modeling/model/prediction_head.py
index 4d5df6de7..4f7d46010 100644
--- a/haystack/modeling/model/prediction_head.py
+++ b/haystack/modeling/model/prediction_head.py
@@ -22,7 +22,7 @@ logger = logging.getLogger(__name__)
try:
from apex.normalization.fused_layer_norm import FusedLayerNorm as BertLayerNorm
except (ImportError, AttributeError) as e:
- logger.info("Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .")
+ logger.debug("Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .")
BertLayerNorm = torch.nn.LayerNorm
@@ -255,7 +255,7 @@ class QuestionAnsweringHead(PredictionHead):
self.layer_dims = layer_dims
assert self.layer_dims[-1] == 2
self.feed_forward = FeedForwardBlock(self.layer_dims)
- logger.info(f"Prediction head initialized with size {self.layer_dims}")
+ logger.debug(f"Prediction head initialized with size {self.layer_dims}")
self.num_labels = self.layer_dims[-1]
self.ph_output_type = "per_token_squad"
self.model_type = ("span_classification") # predicts start and end token of answer
diff --git a/haystack/modeling/model/tokenization.py b/haystack/modeling/model/tokenization.py
index 9d2721af3..0d4b9a047 100644
--- a/haystack/modeling/model/tokenization.py
+++ b/haystack/modeling/model/tokenization.py
@@ -75,7 +75,7 @@ class Tokenizer:
if tokenizer_class is None:
tokenizer_class = cls._infer_tokenizer_class(pretrained_model_name_or_path)
- logger.info(f"Loading tokenizer of type '{tokenizer_class}'")
+ logger.debug(f"Loading tokenizer of type '{tokenizer_class}'")
# return appropriate tokenizer object
ret = None
if "AlbertTokenizer" in tokenizer_class:
diff --git a/haystack/modeling/visual.py b/haystack/modeling/visual.py
index fc2d4d836..fec1ba5f9 100644
--- a/haystack/modeling/visual.py
+++ b/haystack/modeling/visual.py
@@ -92,15 +92,12 @@ WATERING_CAN = """
WORKER_M = """ 0
/|\\
-/'\\
-"""
+/'\\"""
WORKER_F =""" 0
/w\\
-/ \\
-"""
+/ \\"""
WORKER_X =""" 0
/w\\
-/'\\
-"""
\ No newline at end of file
+/'\\"""
\ No newline at end of file
diff --git a/haystack/nodes/base.py b/haystack/nodes/base.py
index d721314dc..32593aabb 100644
--- a/haystack/nodes/base.py
+++ b/haystack/nodes/base.py
@@ -14,71 +14,6 @@ from haystack.schema import Document, MultiLabel
logger = logging.getLogger(__name__)
-class InMemoryLogger(io.TextIOBase):
- """
- Implementation of a logger that keeps track
- of the log lines in a list called `logs`,
- from where they can be accessed freely.
- """
- def __init__(self, *args):
- io.TextIOBase.__init__(self, *args)
- self.logs = []
-
- def write(self, x):
- self.logs.append(x)
-
-
-def record_debug_logs(func: Callable, node_name: str, logs: bool) -> Callable:
- """
- Captures the debug logs of the wrapped function and
- saves them in the `_debug` key of the output dictionary.
- If `logs` is True, dumps the same logs to the console as well.
-
- Used in `BaseComponent.__getattribute__()` to wrap `run()` functions.
- This makes sure that every implementation of `run()` by a subclass will
- be automagically decorated with this method when requested.
-
- :param func: the function to decorate (must be an implementation of
- `BaseComponent.run()`).
- :param logs: whether the captured logs should also be displayed
- in the console during the execution of the pipeline.
- """
- @wraps(func)
- def inner(*args, **kwargs) -> Tuple[Dict[str, Any], str]:
-
- with InMemoryLogger() as logs_container:
- logger = logging.getLogger()
-
- # Adds a handler that stores the logs in a variable
- handler = logging.StreamHandler(logs_container)
- handler.setLevel(logger.level or logging.DEBUG)
- logger.addHandler(handler)
-
- # Add a handler that prints log messages in the console
- # to the specified level for the node
- if logs:
- handler_console = logging.StreamHandler()
- handler_console.setLevel(logging.DEBUG)
- formatter = logging.Formatter(f'[{node_name} logs] %(message)s')
- handler_console.setFormatter(formatter)
- logger.addHandler(handler_console)
-
- output, stream = func(*args, **kwargs)
-
- if not "_debug" in output.keys():
- output["_debug"] = {}
- output["_debug"]["logs"] = logs_container.logs
-
- # Remove both handlers
- logger.removeHandler(handler)
- if logs:
- logger.removeHandler(handler_console)
-
- return output, stream
-
- return inner
-
-
class BaseComponent:
"""
A base class for implementing nodes in a Pipeline.
@@ -96,37 +31,6 @@ class BaseComponent:
super().__init_subclass__(**kwargs)
cls.subclasses[cls.__name__] = cls
- def __getattribute__(self, name):
- """
- This modified `__getattribute__` method automagically decorates
- every `BaseComponent.run()` implementation with the
- `record_debug_logs` decorator defined above.
-
- This decorator makes the function collect its debug logs into a
- `_debug` key of the output dictionary.
-
- The logs collection is not always performed. Before applying the decorator,
- it checks for an instance attribute called `debug` to know
- whether it should or not. The decorator is applied if the attribute is
- defined and True.
-
- In addition, the value of the instance attribute `debug_logs` is
- passed to the decorator. If it's True, it will print the
- logs in the console as well.
- """
- if name == "run" and self.debug:
- func = getattr(type(self), "run")
- return record_debug_logs(func=func, node_name=self.__class__.__name__, logs=self.debug_logs).__get__(self)
- return object.__getattribute__(self, name)
-
- def __getattr__(self, name):
- """
- Ensures that `debug` and `debug_logs` are always defined.
- """
- if name in ["debug", "debug_logs"]:
- return None
- raise AttributeError(name)
-
@classmethod
def get_subclass(cls, component_type: str):
if component_type not in cls.subclasses.keys():
@@ -196,7 +100,7 @@ class BaseComponent:
It takes care of the following:
- inspect run() signature to validate if all necessary arguments are available
- - pop `debug` and `debug_logs` and sets them on the instance to control debug output
+ - pop `debug` and sets them on the instance to control debug output
- call run() with the corresponding arguments and gather output
- collate `_debug` information if present
- merge component output with the preceding output and pass it on to the subsequent Component in the Pipeline
@@ -214,8 +118,6 @@ class BaseComponent:
# Extract debug attributes
if "debug" in value.keys():
self.debug = value.pop("debug")
- if "debug_logs" in value.keys():
- self.debug_logs = value.pop("debug_logs")
for _k, _v in value.items():
if _k not in run_signature_args:
@@ -234,13 +136,10 @@ class BaseComponent:
# Collect debug information
current_debug = output.get("_debug", {})
- if self.debug:
+ if getattr(self, "debug", None):
current_debug["input"] = {**run_inputs, **run_params}
- if self.debug:
- current_debug["input"]["debug"] = self.debug
- if self.debug_logs:
- current_debug["input"]["debug_logs"] = self.debug_logs
- filtered_output = {key: value for key, value in output.items() if key != "_debug"} # Exclude _debug to avoid recursion
+ current_debug["input"]["debug"] = self.debug
+ filtered_output = {key: value for key, value in output.items() if key != "_debug"} # Exclude _debug to avoid recursion
current_debug["output"] = filtered_output
# append _debug information from nodes
diff --git a/haystack/nodes/reader/farm.py b/haystack/nodes/reader/farm.py
index a046e8ebc..f953c9877 100644
--- a/haystack/nodes/reader/farm.py
+++ b/haystack/nodes/reader/farm.py
@@ -131,6 +131,7 @@ class FARMReader(BaseReader):
proxies=proxies,
local_files_only=local_files_only,
force_download=force_download,
+ devices=self.devices,
**kwargs)
self.inferencer.model.prediction_heads[0].context_window_size = context_window_size
self.inferencer.model.prediction_heads[0].no_ans_boost = no_ans_boost
@@ -443,7 +444,7 @@ class FARMReader(BaseReader):
:type device: str
"""
if device is None:
- device = self.device
+ device = self.devices[0]
eval_processor = SquadProcessor(
tokenizer=self.inferencer.processor.tokenizer,
max_seq_len=self.inferencer.processor.max_seq_len,
@@ -493,7 +494,7 @@ class FARMReader(BaseReader):
:param calibrate_conf_scores: Whether to calibrate the temperature for temperature scaling of the confidence scores
"""
if device is None:
- device = self.device
+ device = self.devices[0]
if self.top_k_per_candidate != 4:
logger.info(f"Performing Evaluation using top_k_per_candidate = {self.top_k_per_candidate} \n"
f"and consequently, QuestionAnsweringPredictionHead.n_best = {self.top_k_per_candidate + 1}. \n"
@@ -661,7 +662,7 @@ class FARMReader(BaseReader):
:param label_origin: Field name where the gold labels are stored
"""
if device is None:
- device = self.device
+ device = self.devices[0]
self.eval(document_store=document_store,
device=device,
label_index=label_index,
diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index 50bf87fec..cabe837b1 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -261,8 +261,7 @@ class Pipeline(BasePipeline):
documents: Optional[List[Document]] = None,
meta: Optional[dict] = None,
params: Optional[dict] = None,
- debug: Optional[bool] = None,
- debug_logs: Optional[bool] = None
+ debug: Optional[bool] = None
):
"""
Runs the pipeline, one node at a time.
@@ -278,11 +277,8 @@ class Pipeline(BasePipeline):
{"Retriever": {"top_k": 10}, "Reader": {"top_k": 3, "debug": True}}
:param debug: Whether the pipeline should instruct nodes to collect debug information
about their execution. By default these include the input parameters
- they received, the output they generated, and eventual logs (of any severity)
- emitted. All debug information can then be found in the dict returned
- by this method under the key "_debug"
- :param debug_logs: Whether all the logs of the node should be printed in the console,
- regardless of their severity and of the existing logger's settings.
+ they received and the output they generated. All debug information can
+ then be found in the dict returned by this method under the key "_debug"
"""
# validate the node names
if params:
@@ -327,8 +323,6 @@ class Pipeline(BasePipeline):
if node_id not in node_input["params"].keys():
node_input["params"][node_id] = {}
node_input["params"][node_id]["debug"] = debug
- if debug_logs is not None:
- node_input["params"][node_id]["debug_logs"] = debug_logs
predecessors = set(nx.ancestors(self.graph, node_id))
if predecessors.isdisjoint(set(queue.keys())): # only execute if predecessor nodes are executed
diff --git a/haystack/pipelines/standard_pipelines.py b/haystack/pipelines/standard_pipelines.py
index 46f342533..cb1bc7a8a 100644
--- a/haystack/pipelines/standard_pipelines.py
+++ b/haystack/pipelines/standard_pipelines.py
@@ -87,21 +87,18 @@ class ExtractiveQAPipeline(BaseStandardPipeline):
def run(self,
query: str,
params: Optional[dict] = None,
- debug: Optional[bool] = None,
- debug_logs: Optional[bool] = None):
+ debug: Optional[bool] = None):
"""
:param query: The search query string.
:param params: Params for the `retriever` and `reader`. For instance,
params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}
:param debug: Whether the pipeline should instruct nodes to collect debug information
about their execution. By default these include the input parameters
- they received, the output they generated, and eventual logs (of any severity)
- emitted. All debug information can then be found in the dict returned
+ they received and the output they generated.
+ All debug information can then be found in the dict returned
by this method under the key "_debug"
- :param debug_logs: Whether all the logs of the node should be printed in the console,
- regardless of their severity and of the existing logger's settings.
"""
- output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs)
+ output = self.pipeline.run(query=query, params=params, debug=debug)
return output
@@ -119,20 +116,17 @@ class DocumentSearchPipeline(BaseStandardPipeline):
def run(self,
query: str,
params: Optional[dict] = None,
- debug: Optional[bool] = None,
- debug_logs: Optional[bool] = None):
+ debug: Optional[bool] = None):
"""
:param query: the query string.
:param params: params for the `retriever` and `reader`. For instance, params={"retriever": {"top_k": 10}}
:param debug: Whether the pipeline should instruct nodes to collect debug information
about their execution. By default these include the input parameters
- they received, the output they generated, and eventual logs (of any severity)
- emitted. All debug information can then be found in the dict returned
+ they received and the output they generated.
+ All debug information can then be found in the dict returned
by this method under the key "_debug"
- :param debug_logs: Whether all the logs of the node should be printed in the console,
- regardless of their severity and of the existing logger's settings.
"""
- output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs)
+ output = self.pipeline.run(query=query, params=params, debug=debug)
return output
@@ -152,21 +146,18 @@ class GenerativeQAPipeline(BaseStandardPipeline):
def run(self,
query: str,
params: Optional[dict] = None,
- debug: Optional[bool] = None,
- debug_logs: Optional[bool] = None):
+ debug: Optional[bool] = None):
"""
:param query: the query string.
:param params: params for the `retriever` and `generator`. For instance,
params={"Retriever": {"top_k": 10}, "Generator": {"top_k": 5}}
:param debug: Whether the pipeline should instruct nodes to collect debug information
about their execution. By default these include the input parameters
- they received, the output they generated, and eventual logs (of any severity)
- emitted. All debug information can then be found in the dict returned
+ they received and the output they generated.
+ All debug information can then be found in the dict returned
by this method under the key "_debug"
- :param debug_logs: Whether all the logs of the node should be printed in the console,
- regardless of their severity and of the existing logger's settings.
"""
- output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs)
+ output = self.pipeline.run(query=query, params=params, debug=debug)
return output
@@ -190,21 +181,18 @@ class SearchSummarizationPipeline(BaseStandardPipeline):
def run(self,
query: str,
params: Optional[dict] = None,
- debug: Optional[bool] = None,
- debug_logs: Optional[bool] = None):
+ debug: Optional[bool] = None):
"""
:param query: the query string.
:param params: params for the `retriever` and `summarizer`. For instance,
params={"retriever": {"top_k": 10}, "summarizer": {"generate_single_summary": True}}
:param debug: Whether the pipeline should instruct nodes to collect debug information
about their execution. By default these include the input parameters
- they received, the output they generated, and eventual logs (of any severity)
- emitted. All debug information can then be found in the dict returned
+ they received and the output they generated.
+ All debug information can then be found in the dict returned
by this method under the key "_debug"
- :param debug_logs: Whether all the logs of the node should be printed in the console,
- regardless of their severity and of the existing logger's settings.
- """
- output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs)
+ """
+ output = self.pipeline.run(query=query, params=params, debug=debug)
# Convert to answer format to allow "drop-in replacement" for other QA pipelines
if self.return_in_answer_format:
@@ -243,20 +231,17 @@ class FAQPipeline(BaseStandardPipeline):
def run(self,
query: str,
params: Optional[dict] = None,
- debug: Optional[bool] = None,
- debug_logs: Optional[bool] = None):
+ debug: Optional[bool] = None):
"""
:param query: the query string.
:param params: params for the `retriever`. For instance, params={"retriever": {"top_k": 10}}
:param debug: Whether the pipeline should instruct nodes to collect debug information
about their execution. By default these include the input parameters
- they received, the output they generated, and eventual logs (of any severity)
- emitted. All debug information can then be found in the dict returned
+ they received and the output they generated.
+ All debug information can then be found in the dict returned
by this method under the key "_debug"
- :param debug_logs: Whether all the logs of the node should be printed in the console,
- regardless of their severity and of the existing logger's settings.
"""
- output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs)
+ output = self.pipeline.run(query=query, params=params, debug=debug)
return output
@@ -316,10 +301,8 @@ class QuestionGenerationPipeline(BaseStandardPipeline):
def run(self,
documents,
params: Optional[dict] = None,
- debug: Optional[bool] = None,
- debug_logs: Optional[bool] = None
- ):
- output = self.pipeline.run(documents=documents, params=params, debug=debug, debug_logs=debug_logs)
+ debug: Optional[bool] = None):
+ output = self.pipeline.run(documents=documents, params=params, debug=debug)
return output
@@ -336,9 +319,8 @@ class RetrieverQuestionGenerationPipeline(BaseStandardPipeline):
def run(self,
query: str,
params: Optional[dict] = None,
- debug: Optional[bool] = None,
- debug_logs: Optional[bool] = None):
- output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs)
+ debug: Optional[bool] = None):
+ output = self.pipeline.run(query=query, params=params, debug=debug)
return output
@@ -372,9 +354,8 @@ class QuestionAnswerGenerationPipeline(BaseStandardPipeline):
def run(self,
documents: List[Document], # type: ignore
params: Optional[dict] = None,
- debug: Optional[bool] = None,
- debug_logs: Optional[bool] = None):
- output = self.pipeline.run(documents=documents, params=params, debug=debug, debug_logs=debug_logs)
+ debug: Optional[bool] = None):
+ output = self.pipeline.run(documents=documents, params=params, debug=debug)
return output
diff --git a/haystack/utils/doc_store.py b/haystack/utils/doc_store.py
index afbc46cc9..2f281aeae 100644
--- a/haystack/utils/doc_store.py
+++ b/haystack/utils/doc_store.py
@@ -9,7 +9,7 @@ logger = logging.getLogger(__name__)
def launch_es(sleep=15):
# Start an Elasticsearch server via Docker
- logger.info("Starting Elasticsearch ...")
+ logger.debug("Starting Elasticsearch ...")
status = subprocess.run(
['docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2'], shell=True
)
@@ -22,7 +22,7 @@ def launch_es(sleep=15):
def launch_open_distro_es(sleep=15):
# Start an Open Distro for Elasticsearch server via Docker
- logger.info("Starting Open Distro for Elasticsearch ...")
+ logger.debug("Starting Open Distro for Elasticsearch ...")
status = subprocess.run(
['docker run -d -p 9200:9200 -p 9600:9600 -e "discovery.type=single-node" amazon/opendistro-for-elasticsearch:1.13.2'], shell=True
)
@@ -35,7 +35,7 @@ def launch_open_distro_es(sleep=15):
def launch_opensearch(sleep=15):
# Start an OpenSearch server via docker
- logger.info("Starting OpenSearch...")
+ logger.debug("Starting OpenSearch...")
# This line is needed since it is not possible to start a new docker container with the name opensearch if there is a stopped image with the same now
# docker rm only succeeds if the container is stopped, not if it is running
_ = subprocess.run(['docker rm opensearch'], shell=True, stdout=subprocess.DEVNULL)
@@ -53,7 +53,7 @@ def launch_opensearch(sleep=15):
def launch_weaviate(sleep=15):
# Start a Weaviate server via Docker
- logger.info("Starting Weaviate ...")
+ logger.debug("Starting Weaviate ...")
status = subprocess.run(
["docker run -d -p 8080:8080 --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.7.2"], shell=True
)
@@ -65,7 +65,7 @@ def launch_weaviate(sleep=15):
def stop_opensearch():
- logger.info("Stopping OpenSearch...")
+ logger.debug("Stopping OpenSearch...")
status = subprocess.run(['docker stop opensearch'], shell=True)
if status.returncode:
logger.warning("Tried to stop OpenSearch but this failed. "
@@ -84,7 +84,7 @@ def stop_service(document_store):
def launch_milvus(sleep=15):
# Start a Milvus server via docker
- logger.info("Starting Milvus ...")
+ logger.debug("Starting Milvus ...")
logger.warning("Automatic Milvus config creation not yet implemented. "
"If you are starting Milvus using launch_milvus(), "
"make sure you have a properly populated milvus/conf folder. "
diff --git a/test/test_pipeline.py b/test/test_pipeline.py
index ec32f2f04..ce476ac5c 100644
--- a/test/test_pipeline.py
+++ b/test/test_pipeline.py
@@ -131,8 +131,7 @@ def test_node_names_validation(document_store_with_docs, tmp_path):
"top_k": 5,
"non-existing-global_param": "wrong",
},
- debug=True,
- debug_logs=True
+ debug=True
)
exception_raised = str(exc_info.value)
assert "non-existing-node" in exception_raised
@@ -155,8 +154,7 @@ def test_debug_attributes_global(document_store_with_docs, tmp_path):
prediction = pipeline.run(
query="Who lives in Berlin?",
params={"ESRetriever": {"top_k": 10}, "Reader": {"top_k": 3}},
- debug=True,
- debug_logs=True
+ debug=True
)
assert "_debug" in prediction.keys()
assert "ESRetriever" in prediction["_debug"].keys()
@@ -187,7 +185,7 @@ def test_debug_attributes_per_node(document_store_with_docs, tmp_path):
prediction = pipeline.run(
query="Who lives in Berlin?",
params={
- "ESRetriever": {"top_k": 10, "debug": True, "debug_logs":True},
+ "ESRetriever": {"top_k": 10, "debug": True},
"Reader": {"top_k": 3}
},
)
@@ -217,7 +215,7 @@ def test_global_debug_attributes_override_node_ones(document_store_with_docs, tm
prediction = pipeline.run(
query="Who lives in Berlin?",
params={
- "ESRetriever": {"top_k": 10, "debug": True, "debug_logs":True},
+ "ESRetriever": {"top_k": 10, "debug": True},
"Reader": {"top_k": 3, "debug": True}
},
debug=False