mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-06 03:57:19 +00:00
Simplify logs management (#1696)
* Move each haystack module's logger configuration into the respective file and configure the handlers properly * Implement most changes from #1714 * Remove accidentally committed git merge tags ':D * Remove the debug logs capture feature * Remove more references to debug_logs * Fix issue with FARMReader that somehow made it to master * Add devices parameter to Inferencer * Change log of APEX message to DEBUG and lower the 'Starting <docstore>...' messages to DEBUG as well * Change log level of a few logs from modeling * Silence the transformers warning * Remove empty line below the workers :) * Fix two more levels in the tutorials logs Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: bogdankostic <bogdankostic@web.de>
This commit is contained in:
parent
b28dd823ef
commit
42c8edca54
@ -137,7 +137,7 @@ Set the component for a node in the Pipeline.
|
||||
#### run
|
||||
|
||||
```python
|
||||
| run(query: Optional[str] = None, file_paths: Optional[List[str]] = None, labels: Optional[MultiLabel] = None, documents: Optional[List[Document]] = None, meta: Optional[dict] = None, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None)
|
||||
| run(query: Optional[str] = None, file_paths: Optional[List[str]] = None, labels: Optional[MultiLabel] = None, documents: Optional[List[Document]] = None, meta: Optional[dict] = None, params: Optional[dict] = None, debug: Optional[bool] = None)
|
||||
```
|
||||
|
||||
Runs the pipeline, one node at a time.
|
||||
@ -155,11 +155,8 @@ Runs the pipeline, one node at a time.
|
||||
{"Retriever": {"top_k": 10}, "Reader": {"top_k": 3, "debug": True}}
|
||||
- `debug`: Whether the pipeline should instruct nodes to collect debug information
|
||||
about their execution. By default these include the input parameters
|
||||
they received, the output they generated, and eventual logs (of any severity)
|
||||
emitted. All debug information can then be found in the dict returned
|
||||
by this method under the key "_debug"
|
||||
- `debug_logs`: Whether all the logs of the node should be printed in the console,
|
||||
regardless of their severity and of the existing logger's settings.
|
||||
they received and the output they generated. All debug information can
|
||||
then be found in the dict returned by this method under the key "_debug"
|
||||
|
||||
<a name="base.Pipeline.get_nodes_by_class"></a>
|
||||
#### get\_nodes\_by\_class
|
||||
@ -509,7 +506,7 @@ Pipeline for Extractive Question Answering.
|
||||
#### run
|
||||
|
||||
```python
|
||||
| run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None)
|
||||
| run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None)
|
||||
```
|
||||
|
||||
**Arguments**:
|
||||
@ -519,11 +516,9 @@ Pipeline for Extractive Question Answering.
|
||||
params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}
|
||||
- `debug`: Whether the pipeline should instruct nodes to collect debug information
|
||||
about their execution. By default these include the input parameters
|
||||
they received, the output they generated, and eventual logs (of any severity)
|
||||
emitted. All debug information can then be found in the dict returned
|
||||
they received and the output they generated.
|
||||
All debug information can then be found in the dict returned
|
||||
by this method under the key "_debug"
|
||||
- `debug_logs`: Whether all the logs of the node should be printed in the console,
|
||||
regardless of their severity and of the existing logger's settings.
|
||||
|
||||
<a name="standard_pipelines.DocumentSearchPipeline"></a>
|
||||
## DocumentSearchPipeline Objects
|
||||
@ -549,7 +544,7 @@ Pipeline for semantic document search.
|
||||
#### run
|
||||
|
||||
```python
|
||||
| run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None)
|
||||
| run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None)
|
||||
```
|
||||
|
||||
**Arguments**:
|
||||
@ -558,11 +553,9 @@ Pipeline for semantic document search.
|
||||
- `params`: params for the `retriever` and `reader`. For instance, params={"retriever": {"top_k": 10}}
|
||||
- `debug`: Whether the pipeline should instruct nodes to collect debug information
|
||||
about their execution. By default these include the input parameters
|
||||
they received, the output they generated, and eventual logs (of any severity)
|
||||
emitted. All debug information can then be found in the dict returned
|
||||
they received and the output they generated.
|
||||
All debug information can then be found in the dict returned
|
||||
by this method under the key "_debug"
|
||||
- `debug_logs`: Whether all the logs of the node should be printed in the console,
|
||||
regardless of their severity and of the existing logger's settings.
|
||||
|
||||
<a name="standard_pipelines.GenerativeQAPipeline"></a>
|
||||
## GenerativeQAPipeline Objects
|
||||
@ -589,7 +582,7 @@ Pipeline for Generative Question Answering.
|
||||
#### run
|
||||
|
||||
```python
|
||||
| run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None)
|
||||
| run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None)
|
||||
```
|
||||
|
||||
**Arguments**:
|
||||
@ -599,11 +592,9 @@ Pipeline for Generative Question Answering.
|
||||
params={"Retriever": {"top_k": 10}, "Generator": {"top_k": 5}}
|
||||
- `debug`: Whether the pipeline should instruct nodes to collect debug information
|
||||
about their execution. By default these include the input parameters
|
||||
they received, the output they generated, and eventual logs (of any severity)
|
||||
emitted. All debug information can then be found in the dict returned
|
||||
they received and the output they generated.
|
||||
All debug information can then be found in the dict returned
|
||||
by this method under the key "_debug"
|
||||
- `debug_logs`: Whether all the logs of the node should be printed in the console,
|
||||
regardless of their severity and of the existing logger's settings.
|
||||
|
||||
<a name="standard_pipelines.SearchSummarizationPipeline"></a>
|
||||
## SearchSummarizationPipeline Objects
|
||||
@ -633,7 +624,7 @@ Pipeline that retrieves documents for a query and then summarizes those document
|
||||
#### run
|
||||
|
||||
```python
|
||||
| run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None)
|
||||
| run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None)
|
||||
```
|
||||
|
||||
**Arguments**:
|
||||
@ -643,11 +634,9 @@ Pipeline that retrieves documents for a query and then summarizes those document
|
||||
params={"retriever": {"top_k": 10}, "summarizer": {"generate_single_summary": True}}
|
||||
- `debug`: Whether the pipeline should instruct nodes to collect debug information
|
||||
about their execution. By default these include the input parameters
|
||||
they received, the output they generated, and eventual logs (of any severity)
|
||||
emitted. All debug information can then be found in the dict returned
|
||||
they received and the output they generated.
|
||||
All debug information can then be found in the dict returned
|
||||
by this method under the key "_debug"
|
||||
- `debug_logs`: Whether all the logs of the node should be printed in the console,
|
||||
regardless of their severity and of the existing logger's settings.
|
||||
|
||||
<a name="standard_pipelines.FAQPipeline"></a>
|
||||
## FAQPipeline Objects
|
||||
@ -673,7 +662,7 @@ Pipeline for finding similar FAQs using semantic document search.
|
||||
#### run
|
||||
|
||||
```python
|
||||
| run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None, debug_logs: Optional[bool] = None)
|
||||
| run(query: str, params: Optional[dict] = None, debug: Optional[bool] = None)
|
||||
```
|
||||
|
||||
**Arguments**:
|
||||
@ -682,11 +671,9 @@ Pipeline for finding similar FAQs using semantic document search.
|
||||
- `params`: params for the `retriever`. For instance, params={"retriever": {"top_k": 10}}
|
||||
- `debug`: Whether the pipeline should instruct nodes to collect debug information
|
||||
about their execution. By default these include the input parameters
|
||||
they received, the output they generated, and eventual logs (of any severity)
|
||||
emitted. All debug information can then be found in the dict returned
|
||||
they received and the output they generated.
|
||||
All debug information can then be found in the dict returned
|
||||
by this method under the key "_debug"
|
||||
- `debug_logs`: Whether all the logs of the node should be printed in the console,
|
||||
regardless of their severity and of the existing logger's settings.
|
||||
|
||||
<a name="standard_pipelines.TranslationWrapperPipeline"></a>
|
||||
## TranslationWrapperPipeline Objects
|
||||
|
||||
@ -1,24 +1,10 @@
|
||||
import logging
|
||||
|
||||
# This configuration must be done before any import to apply to all submodules
|
||||
logging.basicConfig(format="%(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.WARNING)
|
||||
logging.getLogger("haystack").setLevel(logging.INFO)
|
||||
|
||||
from haystack import pipelines
|
||||
|
||||
# Configure the root logger t0 DEBUG to allow the "debug" flag to receive the logs
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(logging.DEBUG)
|
||||
|
||||
# Then reconfigure the StreamHandler not to display anything below WARNING as default
|
||||
stream_handler = logging.StreamHandler()
|
||||
stream_handler.setLevel(logging.INFO)
|
||||
root_logger.addHandler(stream_handler)
|
||||
|
||||
# Change log-levels before modules are loaded to avoid verbose log messages.
|
||||
logging.getLogger('haystack.modeling').setLevel(logging.WARNING)
|
||||
logging.getLogger('haystack.modeling.utils').setLevel(logging.INFO)
|
||||
logging.getLogger('haystack.modeling.infer').setLevel(logging.INFO)
|
||||
logging.getLogger('transformers').setLevel(logging.WARNING)
|
||||
logging.getLogger('haystack.modeling.evaluation.eval').setLevel(logging.INFO)
|
||||
logging.getLogger('haystack.modeling.model.optimization').setLevel(logging.INFO)
|
||||
logging.getLogger('faiss.loader').setLevel(logging.WARNING)
|
||||
|
||||
from haystack.schema import Document, Answer, Label, MultiLabel, Span
|
||||
from haystack.nodes import BaseComponent
|
||||
from haystack.pipelines import Pipeline
|
||||
@ -27,13 +13,13 @@ from haystack._version import __version__
|
||||
import pandas as pd
|
||||
pd.options.display.max_colwidth = 80
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ###########################################
|
||||
# Enable old style imports (temporary)
|
||||
import sys
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Wrapper emitting a warning on import
|
||||
def DeprecatedModule(mod, deprecated_attributes=None, is_module_deprecated=True):
|
||||
"""
|
||||
|
||||
@ -325,7 +325,7 @@ class BaseDocumentStore(BaseComponent):
|
||||
|
||||
for document in documents:
|
||||
if document.id in _hash_ids:
|
||||
logger.warning(f"Duplicate Documents: Document with id '{document.id}' already exists in index "
|
||||
logger.info(f"Duplicate Documents: Document with id '{document.id}' already exists in index "
|
||||
f"'{self.index}'")
|
||||
continue
|
||||
_documents.append(document)
|
||||
|
||||
@ -328,14 +328,14 @@ class Processor(ABC):
|
||||
return True
|
||||
|
||||
def _log_samples(self, n_samples:int, baskets:List[SampleBasket]):
|
||||
logger.info("*** Show {} random examples ***".format(n_samples))
|
||||
logger.debug("*** Show {} random examples ***".format(n_samples))
|
||||
if len(baskets) == 0:
|
||||
logger.info("*** No samples to show because there are no baskets ***")
|
||||
logger.debug("*** No samples to show because there are no baskets ***")
|
||||
return
|
||||
for i in range(n_samples):
|
||||
random_basket = random.choice(baskets)
|
||||
random_sample = random.choice(random_basket.samples) # type: ignore
|
||||
logger.info(random_sample)
|
||||
logger.debug(random_sample)
|
||||
|
||||
def _log_params(self):
|
||||
params = {
|
||||
@ -1727,7 +1727,7 @@ class TextClassificationProcessor(Processor):
|
||||
self.header = header
|
||||
self.max_samples = max_samples
|
||||
self.dev_stratification = dev_stratification
|
||||
logger.warning(f"Currently no support in Processor for returning problematic ids")
|
||||
logger.debug(f"Currently no support in Processor for returning problematic ids")
|
||||
|
||||
super(TextClassificationProcessor, self).__init__(
|
||||
tokenizer=tokenizer,
|
||||
|
||||
@ -119,6 +119,7 @@ class Inferencer:
|
||||
use_fast: bool = True,
|
||||
tokenizer_args: Dict =None,
|
||||
multithreading_rust: bool = True,
|
||||
devices: Optional[List[Union[int, str, torch.device]]] = None,
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
@ -158,12 +159,15 @@ class Inferencer:
|
||||
:param multithreading_rust: Whether to allow multithreading in Rust, e.g. for FastTokenizers.
|
||||
Note: Enabling multithreading in Rust AND multiprocessing in python might cause
|
||||
deadlocks.
|
||||
:param devices: List of devices to perform inference on. (Currently, only the first device in the list is used.)
|
||||
:return: An instance of the Inferencer.
|
||||
"""
|
||||
if tokenizer_args is None:
|
||||
tokenizer_args = {}
|
||||
|
||||
devices, n_gpu = initialize_device_settings(use_cuda=gpu, multi_gpu=False)
|
||||
if devices is None:
|
||||
devices, n_gpu = initialize_device_settings(use_cuda=gpu, multi_gpu=False)
|
||||
|
||||
name = os.path.basename(model_name_or_path)
|
||||
|
||||
# a) either from local dir
|
||||
@ -183,7 +187,7 @@ class Inferencer:
|
||||
|
||||
model = AdaptiveModel.convert_from_transformers(model_name_or_path,
|
||||
revision=revision,
|
||||
device=devices[0],
|
||||
device=devices[0], # type: ignore
|
||||
task_type=task_type,
|
||||
**kwargs)
|
||||
processor = Processor.convert_from_transformers(model_name_or_path,
|
||||
|
||||
@ -112,7 +112,7 @@ class MLFlowLogger(BaseMLLogger):
|
||||
|
||||
@classmethod
|
||||
def disable(cls):
|
||||
logger.warning("ML Logging is turned off. No parameters, metrics or artifacts will be logged to MLFlow.")
|
||||
logger.info("ML Logging is turned off. No parameters, metrics or artifacts will be logged to MLFlow.")
|
||||
cls.disable_logging = True
|
||||
|
||||
|
||||
|
||||
@ -24,6 +24,7 @@ import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from functools import wraps
|
||||
import numpy as np
|
||||
import torch
|
||||
from torch import nn
|
||||
@ -46,6 +47,29 @@ from transformers.modeling_utils import SequenceSummary
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def silence_transformers_logs(from_pretrained_func):
|
||||
"""
|
||||
Wrapper that raises the log level of Transformers to
|
||||
ERROR to hide some unnecessary warnings
|
||||
"""
|
||||
@wraps(from_pretrained_func)
|
||||
def quiet_from_pretrained_func(cls, *args, **kwargs):
|
||||
|
||||
# Raise the log level of Transformers
|
||||
t_logger = logging.getLogger("transformers")
|
||||
original_log_level = t_logger.level
|
||||
t_logger.setLevel(logging.ERROR)
|
||||
|
||||
result = from_pretrained_func(cls, *args, **kwargs)
|
||||
|
||||
# Restore the log level
|
||||
t_logger.setLevel(original_log_level)
|
||||
|
||||
return result
|
||||
|
||||
return quiet_from_pretrained_func
|
||||
|
||||
|
||||
# These are the names of the attributes in various model configs which refer to the number of dimensions
|
||||
# in the output vectors
|
||||
OUTPUT_DIM_NAMES = ["dim", "hidden_size", "d_model"]
|
||||
@ -122,7 +146,6 @@ class LanguageModel(nn.Module):
|
||||
n_added_tokens = kwargs.pop("n_added_tokens", 0)
|
||||
language_model_class = kwargs.pop("language_model_class", None)
|
||||
kwargs["revision"] = kwargs.get("revision", None)
|
||||
logger.info("")
|
||||
logger.info("LOADING MODEL")
|
||||
logger.info("=============")
|
||||
config_file = Path(pretrained_model_name_or_path) / "language_model_config.json"
|
||||
@ -426,6 +449,7 @@ class Bert(LanguageModel):
|
||||
return bert
|
||||
|
||||
@classmethod
|
||||
@silence_transformers_logs
|
||||
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
|
||||
"""
|
||||
Load a pretrained model by supplying
|
||||
@ -503,6 +527,7 @@ class Albert(LanguageModel):
|
||||
self.name = "albert"
|
||||
|
||||
@classmethod
|
||||
@silence_transformers_logs
|
||||
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
|
||||
"""
|
||||
Load a language model either by supplying
|
||||
@ -584,6 +609,7 @@ class Roberta(LanguageModel):
|
||||
self.name = "roberta"
|
||||
|
||||
@classmethod
|
||||
@silence_transformers_logs
|
||||
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
|
||||
"""
|
||||
Load a language model either by supplying
|
||||
@ -665,6 +691,7 @@ class XLMRoberta(LanguageModel):
|
||||
self.name = "xlm_roberta"
|
||||
|
||||
@classmethod
|
||||
@silence_transformers_logs
|
||||
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
|
||||
"""
|
||||
Load a language model either by supplying
|
||||
@ -753,6 +780,7 @@ class DistilBert(LanguageModel):
|
||||
self.pooler = None
|
||||
|
||||
@classmethod
|
||||
@silence_transformers_logs
|
||||
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
|
||||
"""
|
||||
Load a pretrained model by supplying
|
||||
@ -840,6 +868,7 @@ class XLNet(LanguageModel):
|
||||
self.pooler = None
|
||||
|
||||
@classmethod
|
||||
@silence_transformers_logs
|
||||
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
|
||||
"""
|
||||
Load a language model either by supplying
|
||||
@ -946,6 +975,7 @@ class Electra(LanguageModel):
|
||||
self.pooler = None
|
||||
|
||||
@classmethod
|
||||
@silence_transformers_logs
|
||||
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
|
||||
"""
|
||||
Load a pretrained model by supplying
|
||||
@ -1037,6 +1067,7 @@ class Camembert(Roberta):
|
||||
self.name = "camembert"
|
||||
|
||||
@classmethod
|
||||
@silence_transformers_logs
|
||||
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
|
||||
"""
|
||||
Load a language model either by supplying
|
||||
@ -1080,6 +1111,7 @@ class DPRQuestionEncoder(LanguageModel):
|
||||
self.name = "dpr_question_encoder"
|
||||
|
||||
@classmethod
|
||||
@silence_transformers_logs
|
||||
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
|
||||
"""
|
||||
Load a pretrained model by supplying
|
||||
@ -1212,6 +1244,7 @@ class DPRContextEncoder(LanguageModel):
|
||||
self.name = "dpr_context_encoder"
|
||||
|
||||
@classmethod
|
||||
@silence_transformers_logs
|
||||
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
|
||||
"""
|
||||
Load a pretrained model by supplying
|
||||
@ -1364,6 +1397,7 @@ class BigBird(LanguageModel):
|
||||
return big_bird
|
||||
|
||||
@classmethod
|
||||
@silence_transformers_logs
|
||||
def load(cls, pretrained_model_name_or_path: Union[Path, str], language: str = None, **kwargs):
|
||||
"""
|
||||
Load a pretrained model by supplying
|
||||
|
||||
@ -22,7 +22,7 @@ logger = logging.getLogger(__name__)
|
||||
try:
|
||||
from apex.normalization.fused_layer_norm import FusedLayerNorm as BertLayerNorm
|
||||
except (ImportError, AttributeError) as e:
|
||||
logger.info("Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .")
|
||||
logger.debug("Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .")
|
||||
BertLayerNorm = torch.nn.LayerNorm
|
||||
|
||||
|
||||
@ -255,7 +255,7 @@ class QuestionAnsweringHead(PredictionHead):
|
||||
self.layer_dims = layer_dims
|
||||
assert self.layer_dims[-1] == 2
|
||||
self.feed_forward = FeedForwardBlock(self.layer_dims)
|
||||
logger.info(f"Prediction head initialized with size {self.layer_dims}")
|
||||
logger.debug(f"Prediction head initialized with size {self.layer_dims}")
|
||||
self.num_labels = self.layer_dims[-1]
|
||||
self.ph_output_type = "per_token_squad"
|
||||
self.model_type = ("span_classification") # predicts start and end token of answer
|
||||
|
||||
@ -75,7 +75,7 @@ class Tokenizer:
|
||||
if tokenizer_class is None:
|
||||
tokenizer_class = cls._infer_tokenizer_class(pretrained_model_name_or_path)
|
||||
|
||||
logger.info(f"Loading tokenizer of type '{tokenizer_class}'")
|
||||
logger.debug(f"Loading tokenizer of type '{tokenizer_class}'")
|
||||
# return appropriate tokenizer object
|
||||
ret = None
|
||||
if "AlbertTokenizer" in tokenizer_class:
|
||||
|
||||
@ -92,15 +92,12 @@ WATERING_CAN = """
|
||||
|
||||
WORKER_M = """ 0
|
||||
/|\\
|
||||
/'\\
|
||||
"""
|
||||
/'\\"""
|
||||
|
||||
WORKER_F =""" 0
|
||||
/w\\
|
||||
/ \\
|
||||
"""
|
||||
/ \\"""
|
||||
|
||||
WORKER_X =""" 0
|
||||
/w\\
|
||||
/'\\
|
||||
"""
|
||||
/'\\"""
|
||||
@ -14,71 +14,6 @@ from haystack.schema import Document, MultiLabel
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class InMemoryLogger(io.TextIOBase):
|
||||
"""
|
||||
Implementation of a logger that keeps track
|
||||
of the log lines in a list called `logs`,
|
||||
from where they can be accessed freely.
|
||||
"""
|
||||
def __init__(self, *args):
|
||||
io.TextIOBase.__init__(self, *args)
|
||||
self.logs = []
|
||||
|
||||
def write(self, x):
|
||||
self.logs.append(x)
|
||||
|
||||
|
||||
def record_debug_logs(func: Callable, node_name: str, logs: bool) -> Callable:
|
||||
"""
|
||||
Captures the debug logs of the wrapped function and
|
||||
saves them in the `_debug` key of the output dictionary.
|
||||
If `logs` is True, dumps the same logs to the console as well.
|
||||
|
||||
Used in `BaseComponent.__getattribute__()` to wrap `run()` functions.
|
||||
This makes sure that every implementation of `run()` by a subclass will
|
||||
be automagically decorated with this method when requested.
|
||||
|
||||
:param func: the function to decorate (must be an implementation of
|
||||
`BaseComponent.run()`).
|
||||
:param logs: whether the captured logs should also be displayed
|
||||
in the console during the execution of the pipeline.
|
||||
"""
|
||||
@wraps(func)
|
||||
def inner(*args, **kwargs) -> Tuple[Dict[str, Any], str]:
|
||||
|
||||
with InMemoryLogger() as logs_container:
|
||||
logger = logging.getLogger()
|
||||
|
||||
# Adds a handler that stores the logs in a variable
|
||||
handler = logging.StreamHandler(logs_container)
|
||||
handler.setLevel(logger.level or logging.DEBUG)
|
||||
logger.addHandler(handler)
|
||||
|
||||
# Add a handler that prints log messages in the console
|
||||
# to the specified level for the node
|
||||
if logs:
|
||||
handler_console = logging.StreamHandler()
|
||||
handler_console.setLevel(logging.DEBUG)
|
||||
formatter = logging.Formatter(f'[{node_name} logs] %(message)s')
|
||||
handler_console.setFormatter(formatter)
|
||||
logger.addHandler(handler_console)
|
||||
|
||||
output, stream = func(*args, **kwargs)
|
||||
|
||||
if not "_debug" in output.keys():
|
||||
output["_debug"] = {}
|
||||
output["_debug"]["logs"] = logs_container.logs
|
||||
|
||||
# Remove both handlers
|
||||
logger.removeHandler(handler)
|
||||
if logs:
|
||||
logger.removeHandler(handler_console)
|
||||
|
||||
return output, stream
|
||||
|
||||
return inner
|
||||
|
||||
|
||||
class BaseComponent:
|
||||
"""
|
||||
A base class for implementing nodes in a Pipeline.
|
||||
@ -96,37 +31,6 @@ class BaseComponent:
|
||||
super().__init_subclass__(**kwargs)
|
||||
cls.subclasses[cls.__name__] = cls
|
||||
|
||||
def __getattribute__(self, name):
|
||||
"""
|
||||
This modified `__getattribute__` method automagically decorates
|
||||
every `BaseComponent.run()` implementation with the
|
||||
`record_debug_logs` decorator defined above.
|
||||
|
||||
This decorator makes the function collect its debug logs into a
|
||||
`_debug` key of the output dictionary.
|
||||
|
||||
The logs collection is not always performed. Before applying the decorator,
|
||||
it checks for an instance attribute called `debug` to know
|
||||
whether it should or not. The decorator is applied if the attribute is
|
||||
defined and True.
|
||||
|
||||
In addition, the value of the instance attribute `debug_logs` is
|
||||
passed to the decorator. If it's True, it will print the
|
||||
logs in the console as well.
|
||||
"""
|
||||
if name == "run" and self.debug:
|
||||
func = getattr(type(self), "run")
|
||||
return record_debug_logs(func=func, node_name=self.__class__.__name__, logs=self.debug_logs).__get__(self)
|
||||
return object.__getattribute__(self, name)
|
||||
|
||||
def __getattr__(self, name):
|
||||
"""
|
||||
Ensures that `debug` and `debug_logs` are always defined.
|
||||
"""
|
||||
if name in ["debug", "debug_logs"]:
|
||||
return None
|
||||
raise AttributeError(name)
|
||||
|
||||
@classmethod
|
||||
def get_subclass(cls, component_type: str):
|
||||
if component_type not in cls.subclasses.keys():
|
||||
@ -196,7 +100,7 @@ class BaseComponent:
|
||||
|
||||
It takes care of the following:
|
||||
- inspect run() signature to validate if all necessary arguments are available
|
||||
- pop `debug` and `debug_logs` and sets them on the instance to control debug output
|
||||
- pop `debug` and sets them on the instance to control debug output
|
||||
- call run() with the corresponding arguments and gather output
|
||||
- collate `_debug` information if present
|
||||
- merge component output with the preceding output and pass it on to the subsequent Component in the Pipeline
|
||||
@ -214,8 +118,6 @@ class BaseComponent:
|
||||
# Extract debug attributes
|
||||
if "debug" in value.keys():
|
||||
self.debug = value.pop("debug")
|
||||
if "debug_logs" in value.keys():
|
||||
self.debug_logs = value.pop("debug_logs")
|
||||
|
||||
for _k, _v in value.items():
|
||||
if _k not in run_signature_args:
|
||||
@ -234,13 +136,10 @@ class BaseComponent:
|
||||
|
||||
# Collect debug information
|
||||
current_debug = output.get("_debug", {})
|
||||
if self.debug:
|
||||
if getattr(self, "debug", None):
|
||||
current_debug["input"] = {**run_inputs, **run_params}
|
||||
if self.debug:
|
||||
current_debug["input"]["debug"] = self.debug
|
||||
if self.debug_logs:
|
||||
current_debug["input"]["debug_logs"] = self.debug_logs
|
||||
filtered_output = {key: value for key, value in output.items() if key != "_debug"} # Exclude _debug to avoid recursion
|
||||
current_debug["input"]["debug"] = self.debug
|
||||
filtered_output = {key: value for key, value in output.items() if key != "_debug"} # Exclude _debug to avoid recursion
|
||||
current_debug["output"] = filtered_output
|
||||
|
||||
# append _debug information from nodes
|
||||
|
||||
@ -131,6 +131,7 @@ class FARMReader(BaseReader):
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
force_download=force_download,
|
||||
devices=self.devices,
|
||||
**kwargs)
|
||||
self.inferencer.model.prediction_heads[0].context_window_size = context_window_size
|
||||
self.inferencer.model.prediction_heads[0].no_ans_boost = no_ans_boost
|
||||
@ -443,7 +444,7 @@ class FARMReader(BaseReader):
|
||||
:type device: str
|
||||
"""
|
||||
if device is None:
|
||||
device = self.device
|
||||
device = self.devices[0]
|
||||
eval_processor = SquadProcessor(
|
||||
tokenizer=self.inferencer.processor.tokenizer,
|
||||
max_seq_len=self.inferencer.processor.max_seq_len,
|
||||
@ -493,7 +494,7 @@ class FARMReader(BaseReader):
|
||||
:param calibrate_conf_scores: Whether to calibrate the temperature for temperature scaling of the confidence scores
|
||||
"""
|
||||
if device is None:
|
||||
device = self.device
|
||||
device = self.devices[0]
|
||||
if self.top_k_per_candidate != 4:
|
||||
logger.info(f"Performing Evaluation using top_k_per_candidate = {self.top_k_per_candidate} \n"
|
||||
f"and consequently, QuestionAnsweringPredictionHead.n_best = {self.top_k_per_candidate + 1}. \n"
|
||||
@ -661,7 +662,7 @@ class FARMReader(BaseReader):
|
||||
:param label_origin: Field name where the gold labels are stored
|
||||
"""
|
||||
if device is None:
|
||||
device = self.device
|
||||
device = self.devices[0]
|
||||
self.eval(document_store=document_store,
|
||||
device=device,
|
||||
label_index=label_index,
|
||||
|
||||
@ -261,8 +261,7 @@ class Pipeline(BasePipeline):
|
||||
documents: Optional[List[Document]] = None,
|
||||
meta: Optional[dict] = None,
|
||||
params: Optional[dict] = None,
|
||||
debug: Optional[bool] = None,
|
||||
debug_logs: Optional[bool] = None
|
||||
debug: Optional[bool] = None
|
||||
):
|
||||
"""
|
||||
Runs the pipeline, one node at a time.
|
||||
@ -278,11 +277,8 @@ class Pipeline(BasePipeline):
|
||||
{"Retriever": {"top_k": 10}, "Reader": {"top_k": 3, "debug": True}}
|
||||
:param debug: Whether the pipeline should instruct nodes to collect debug information
|
||||
about their execution. By default these include the input parameters
|
||||
they received, the output they generated, and eventual logs (of any severity)
|
||||
emitted. All debug information can then be found in the dict returned
|
||||
by this method under the key "_debug"
|
||||
:param debug_logs: Whether all the logs of the node should be printed in the console,
|
||||
regardless of their severity and of the existing logger's settings.
|
||||
they received and the output they generated. All debug information can
|
||||
then be found in the dict returned by this method under the key "_debug"
|
||||
"""
|
||||
# validate the node names
|
||||
if params:
|
||||
@ -327,8 +323,6 @@ class Pipeline(BasePipeline):
|
||||
if node_id not in node_input["params"].keys():
|
||||
node_input["params"][node_id] = {}
|
||||
node_input["params"][node_id]["debug"] = debug
|
||||
if debug_logs is not None:
|
||||
node_input["params"][node_id]["debug_logs"] = debug_logs
|
||||
|
||||
predecessors = set(nx.ancestors(self.graph, node_id))
|
||||
if predecessors.isdisjoint(set(queue.keys())): # only execute if predecessor nodes are executed
|
||||
|
||||
@ -87,21 +87,18 @@ class ExtractiveQAPipeline(BaseStandardPipeline):
|
||||
def run(self,
|
||||
query: str,
|
||||
params: Optional[dict] = None,
|
||||
debug: Optional[bool] = None,
|
||||
debug_logs: Optional[bool] = None):
|
||||
debug: Optional[bool] = None):
|
||||
"""
|
||||
:param query: The search query string.
|
||||
:param params: Params for the `retriever` and `reader`. For instance,
|
||||
params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}
|
||||
:param debug: Whether the pipeline should instruct nodes to collect debug information
|
||||
about their execution. By default these include the input parameters
|
||||
they received, the output they generated, and eventual logs (of any severity)
|
||||
emitted. All debug information can then be found in the dict returned
|
||||
they received and the output they generated.
|
||||
All debug information can then be found in the dict returned
|
||||
by this method under the key "_debug"
|
||||
:param debug_logs: Whether all the logs of the node should be printed in the console,
|
||||
regardless of their severity and of the existing logger's settings.
|
||||
"""
|
||||
output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs)
|
||||
output = self.pipeline.run(query=query, params=params, debug=debug)
|
||||
return output
|
||||
|
||||
|
||||
@ -119,20 +116,17 @@ class DocumentSearchPipeline(BaseStandardPipeline):
|
||||
def run(self,
|
||||
query: str,
|
||||
params: Optional[dict] = None,
|
||||
debug: Optional[bool] = None,
|
||||
debug_logs: Optional[bool] = None):
|
||||
debug: Optional[bool] = None):
|
||||
"""
|
||||
:param query: the query string.
|
||||
:param params: params for the `retriever` and `reader`. For instance, params={"retriever": {"top_k": 10}}
|
||||
:param debug: Whether the pipeline should instruct nodes to collect debug information
|
||||
about their execution. By default these include the input parameters
|
||||
they received, the output they generated, and eventual logs (of any severity)
|
||||
emitted. All debug information can then be found in the dict returned
|
||||
they received and the output they generated.
|
||||
All debug information can then be found in the dict returned
|
||||
by this method under the key "_debug"
|
||||
:param debug_logs: Whether all the logs of the node should be printed in the console,
|
||||
regardless of their severity and of the existing logger's settings.
|
||||
"""
|
||||
output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs)
|
||||
output = self.pipeline.run(query=query, params=params, debug=debug)
|
||||
return output
|
||||
|
||||
|
||||
@ -152,21 +146,18 @@ class GenerativeQAPipeline(BaseStandardPipeline):
|
||||
def run(self,
|
||||
query: str,
|
||||
params: Optional[dict] = None,
|
||||
debug: Optional[bool] = None,
|
||||
debug_logs: Optional[bool] = None):
|
||||
debug: Optional[bool] = None):
|
||||
"""
|
||||
:param query: the query string.
|
||||
:param params: params for the `retriever` and `generator`. For instance,
|
||||
params={"Retriever": {"top_k": 10}, "Generator": {"top_k": 5}}
|
||||
:param debug: Whether the pipeline should instruct nodes to collect debug information
|
||||
about their execution. By default these include the input parameters
|
||||
they received, the output they generated, and eventual logs (of any severity)
|
||||
emitted. All debug information can then be found in the dict returned
|
||||
they received and the output they generated.
|
||||
All debug information can then be found in the dict returned
|
||||
by this method under the key "_debug"
|
||||
:param debug_logs: Whether all the logs of the node should be printed in the console,
|
||||
regardless of their severity and of the existing logger's settings.
|
||||
"""
|
||||
output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs)
|
||||
output = self.pipeline.run(query=query, params=params, debug=debug)
|
||||
return output
|
||||
|
||||
|
||||
@ -190,21 +181,18 @@ class SearchSummarizationPipeline(BaseStandardPipeline):
|
||||
def run(self,
|
||||
query: str,
|
||||
params: Optional[dict] = None,
|
||||
debug: Optional[bool] = None,
|
||||
debug_logs: Optional[bool] = None):
|
||||
debug: Optional[bool] = None):
|
||||
"""
|
||||
:param query: the query string.
|
||||
:param params: params for the `retriever` and `summarizer`. For instance,
|
||||
params={"retriever": {"top_k": 10}, "summarizer": {"generate_single_summary": True}}
|
||||
:param debug: Whether the pipeline should instruct nodes to collect debug information
|
||||
about their execution. By default these include the input parameters
|
||||
they received, the output they generated, and eventual logs (of any severity)
|
||||
emitted. All debug information can then be found in the dict returned
|
||||
they received and the output they generated.
|
||||
All debug information can then be found in the dict returned
|
||||
by this method under the key "_debug"
|
||||
:param debug_logs: Whether all the logs of the node should be printed in the console,
|
||||
regardless of their severity and of the existing logger's settings.
|
||||
"""
|
||||
output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs)
|
||||
"""
|
||||
output = self.pipeline.run(query=query, params=params, debug=debug)
|
||||
|
||||
# Convert to answer format to allow "drop-in replacement" for other QA pipelines
|
||||
if self.return_in_answer_format:
|
||||
@ -243,20 +231,17 @@ class FAQPipeline(BaseStandardPipeline):
|
||||
def run(self,
|
||||
query: str,
|
||||
params: Optional[dict] = None,
|
||||
debug: Optional[bool] = None,
|
||||
debug_logs: Optional[bool] = None):
|
||||
debug: Optional[bool] = None):
|
||||
"""
|
||||
:param query: the query string.
|
||||
:param params: params for the `retriever`. For instance, params={"retriever": {"top_k": 10}}
|
||||
:param debug: Whether the pipeline should instruct nodes to collect debug information
|
||||
about their execution. By default these include the input parameters
|
||||
they received, the output they generated, and eventual logs (of any severity)
|
||||
emitted. All debug information can then be found in the dict returned
|
||||
they received and the output they generated.
|
||||
All debug information can then be found in the dict returned
|
||||
by this method under the key "_debug"
|
||||
:param debug_logs: Whether all the logs of the node should be printed in the console,
|
||||
regardless of their severity and of the existing logger's settings.
|
||||
"""
|
||||
output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs)
|
||||
output = self.pipeline.run(query=query, params=params, debug=debug)
|
||||
return output
|
||||
|
||||
|
||||
@ -316,10 +301,8 @@ class QuestionGenerationPipeline(BaseStandardPipeline):
|
||||
def run(self,
|
||||
documents,
|
||||
params: Optional[dict] = None,
|
||||
debug: Optional[bool] = None,
|
||||
debug_logs: Optional[bool] = None
|
||||
):
|
||||
output = self.pipeline.run(documents=documents, params=params, debug=debug, debug_logs=debug_logs)
|
||||
debug: Optional[bool] = None):
|
||||
output = self.pipeline.run(documents=documents, params=params, debug=debug)
|
||||
return output
|
||||
|
||||
|
||||
@ -336,9 +319,8 @@ class RetrieverQuestionGenerationPipeline(BaseStandardPipeline):
|
||||
def run(self,
|
||||
query: str,
|
||||
params: Optional[dict] = None,
|
||||
debug: Optional[bool] = None,
|
||||
debug_logs: Optional[bool] = None):
|
||||
output = self.pipeline.run(query=query, params=params, debug=debug, debug_logs=debug_logs)
|
||||
debug: Optional[bool] = None):
|
||||
output = self.pipeline.run(query=query, params=params, debug=debug)
|
||||
return output
|
||||
|
||||
|
||||
@ -372,9 +354,8 @@ class QuestionAnswerGenerationPipeline(BaseStandardPipeline):
|
||||
def run(self,
|
||||
documents: List[Document], # type: ignore
|
||||
params: Optional[dict] = None,
|
||||
debug: Optional[bool] = None,
|
||||
debug_logs: Optional[bool] = None):
|
||||
output = self.pipeline.run(documents=documents, params=params, debug=debug, debug_logs=debug_logs)
|
||||
debug: Optional[bool] = None):
|
||||
output = self.pipeline.run(documents=documents, params=params, debug=debug)
|
||||
return output
|
||||
|
||||
|
||||
|
||||
@ -9,7 +9,7 @@ logger = logging.getLogger(__name__)
|
||||
def launch_es(sleep=15):
|
||||
# Start an Elasticsearch server via Docker
|
||||
|
||||
logger.info("Starting Elasticsearch ...")
|
||||
logger.debug("Starting Elasticsearch ...")
|
||||
status = subprocess.run(
|
||||
['docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2'], shell=True
|
||||
)
|
||||
@ -22,7 +22,7 @@ def launch_es(sleep=15):
|
||||
def launch_open_distro_es(sleep=15):
|
||||
# Start an Open Distro for Elasticsearch server via Docker
|
||||
|
||||
logger.info("Starting Open Distro for Elasticsearch ...")
|
||||
logger.debug("Starting Open Distro for Elasticsearch ...")
|
||||
status = subprocess.run(
|
||||
['docker run -d -p 9200:9200 -p 9600:9600 -e "discovery.type=single-node" amazon/opendistro-for-elasticsearch:1.13.2'], shell=True
|
||||
)
|
||||
@ -35,7 +35,7 @@ def launch_open_distro_es(sleep=15):
|
||||
def launch_opensearch(sleep=15):
|
||||
# Start an OpenSearch server via docker
|
||||
|
||||
logger.info("Starting OpenSearch...")
|
||||
logger.debug("Starting OpenSearch...")
|
||||
# This line is needed since it is not possible to start a new docker container with the name opensearch if there is a stopped image with the same now
|
||||
# docker rm only succeeds if the container is stopped, not if it is running
|
||||
_ = subprocess.run(['docker rm opensearch'], shell=True, stdout=subprocess.DEVNULL)
|
||||
@ -53,7 +53,7 @@ def launch_opensearch(sleep=15):
|
||||
def launch_weaviate(sleep=15):
|
||||
# Start a Weaviate server via Docker
|
||||
|
||||
logger.info("Starting Weaviate ...")
|
||||
logger.debug("Starting Weaviate ...")
|
||||
status = subprocess.run(
|
||||
["docker run -d -p 8080:8080 --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.7.2"], shell=True
|
||||
)
|
||||
@ -65,7 +65,7 @@ def launch_weaviate(sleep=15):
|
||||
|
||||
|
||||
def stop_opensearch():
|
||||
logger.info("Stopping OpenSearch...")
|
||||
logger.debug("Stopping OpenSearch...")
|
||||
status = subprocess.run(['docker stop opensearch'], shell=True)
|
||||
if status.returncode:
|
||||
logger.warning("Tried to stop OpenSearch but this failed. "
|
||||
@ -84,7 +84,7 @@ def stop_service(document_store):
|
||||
def launch_milvus(sleep=15):
|
||||
# Start a Milvus server via docker
|
||||
|
||||
logger.info("Starting Milvus ...")
|
||||
logger.debug("Starting Milvus ...")
|
||||
logger.warning("Automatic Milvus config creation not yet implemented. "
|
||||
"If you are starting Milvus using launch_milvus(), "
|
||||
"make sure you have a properly populated milvus/conf folder. "
|
||||
|
||||
@ -131,8 +131,7 @@ def test_node_names_validation(document_store_with_docs, tmp_path):
|
||||
"top_k": 5,
|
||||
"non-existing-global_param": "wrong",
|
||||
},
|
||||
debug=True,
|
||||
debug_logs=True
|
||||
debug=True
|
||||
)
|
||||
exception_raised = str(exc_info.value)
|
||||
assert "non-existing-node" in exception_raised
|
||||
@ -155,8 +154,7 @@ def test_debug_attributes_global(document_store_with_docs, tmp_path):
|
||||
prediction = pipeline.run(
|
||||
query="Who lives in Berlin?",
|
||||
params={"ESRetriever": {"top_k": 10}, "Reader": {"top_k": 3}},
|
||||
debug=True,
|
||||
debug_logs=True
|
||||
debug=True
|
||||
)
|
||||
assert "_debug" in prediction.keys()
|
||||
assert "ESRetriever" in prediction["_debug"].keys()
|
||||
@ -187,7 +185,7 @@ def test_debug_attributes_per_node(document_store_with_docs, tmp_path):
|
||||
prediction = pipeline.run(
|
||||
query="Who lives in Berlin?",
|
||||
params={
|
||||
"ESRetriever": {"top_k": 10, "debug": True, "debug_logs":True},
|
||||
"ESRetriever": {"top_k": 10, "debug": True},
|
||||
"Reader": {"top_k": 3}
|
||||
},
|
||||
)
|
||||
@ -217,7 +215,7 @@ def test_global_debug_attributes_override_node_ones(document_store_with_docs, tm
|
||||
prediction = pipeline.run(
|
||||
query="Who lives in Berlin?",
|
||||
params={
|
||||
"ESRetriever": {"top_k": 10, "debug": True, "debug_logs":True},
|
||||
"ESRetriever": {"top_k": 10, "debug": True},
|
||||
"Reader": {"top_k": 3, "debug": True}
|
||||
},
|
||||
debug=False
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user