mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-06-26 22:00:13 +00:00
chore: make Haystack warnings consistent (#9083)
* chore: make Haystack warnings consistent * more structured logging * small fixes
This commit is contained in:
parent
3e435439d9
commit
1c1030efc6
@ -84,7 +84,7 @@ class OpenAPIServiceToFunctions:
|
||||
"IO error reading OpenAPI specification file: {source}. Error: {e}", source=source, e=e
|
||||
)
|
||||
else:
|
||||
logger.warning(f"OpenAPI specification file not found: {source}")
|
||||
logger.warning("OpenAPI specification file not found: {source}", source=source)
|
||||
elif isinstance(source, ByteStream):
|
||||
openapi_spec_content = source.data.decode("utf-8")
|
||||
if not openapi_spec_content:
|
||||
|
@ -5,7 +5,6 @@
|
||||
import ast
|
||||
import contextlib
|
||||
from typing import Any, Callable, Dict, Optional, Set
|
||||
from warnings import warn
|
||||
|
||||
import jinja2.runtime
|
||||
from jinja2 import Environment, TemplateSyntaxError, meta
|
||||
@ -13,9 +12,11 @@ from jinja2.nativetypes import NativeEnvironment
|
||||
from jinja2.sandbox import SandboxedEnvironment
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
from haystack import component, default_from_dict, default_to_dict
|
||||
from haystack import component, default_from_dict, default_to_dict, logging
|
||||
from haystack.utils import deserialize_callable, deserialize_type, serialize_callable, serialize_type
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OutputAdaptationException(Exception):
|
||||
"""Exception raised when there is an error during output adaptation."""
|
||||
@ -76,7 +77,7 @@ class OutputAdapter:
|
||||
"Unsafe mode is enabled. This allows execution of arbitrary code in the Jinja template. "
|
||||
"Use this only if you trust the source of the template."
|
||||
)
|
||||
warn(msg)
|
||||
logger.warning(msg)
|
||||
self._env = (
|
||||
NativeEnvironment() if self._unsafe else SandboxedEnvironment(undefined=jinja2.runtime.StrictUndefined)
|
||||
)
|
||||
|
@ -2,12 +2,11 @@
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import warnings
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from haystack import component, default_from_dict, default_to_dict
|
||||
from haystack import component, default_from_dict, default_to_dict, logging
|
||||
from haystack.dataclasses import Document
|
||||
from haystack.lazy_imports import LazyImport
|
||||
from haystack.utils import Secret, deserialize_secrets_inplace
|
||||
@ -17,6 +16,8 @@ from haystack.utils.url_validation import is_valid_http_url
|
||||
with LazyImport(message="Run 'pip install \"huggingface_hub>=0.27.0\"'") as huggingface_hub_import:
|
||||
from huggingface_hub import InferenceClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@component
|
||||
class HuggingFaceAPIDocumentEmbedder:
|
||||
@ -241,11 +242,11 @@ class HuggingFaceAPIDocumentEmbedder:
|
||||
if self.api_type == HFEmbeddingAPIType.SERVERLESS_INFERENCE_API:
|
||||
if truncate is not None:
|
||||
msg = "`truncate` parameter is not supported for Serverless Inference API. It will be ignored."
|
||||
warnings.warn(msg)
|
||||
logger.warning(msg)
|
||||
truncate = None
|
||||
if normalize is not None:
|
||||
msg = "`normalize` parameter is not supported for Serverless Inference API. It will be ignored."
|
||||
warnings.warn(msg)
|
||||
logger.warning(msg)
|
||||
normalize = None
|
||||
|
||||
all_embeddings = []
|
||||
|
@ -2,10 +2,9 @@
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import warnings
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from haystack import component, default_from_dict, default_to_dict
|
||||
from haystack import component, default_from_dict, default_to_dict, logging
|
||||
from haystack.lazy_imports import LazyImport
|
||||
from haystack.utils import Secret, deserialize_secrets_inplace
|
||||
from haystack.utils.hf import HFEmbeddingAPIType, HFModelType, check_valid_model
|
||||
@ -14,6 +13,8 @@ from haystack.utils.url_validation import is_valid_http_url
|
||||
with LazyImport(message="Run 'pip install \"huggingface_hub>=0.27.0\"'") as huggingface_hub_import:
|
||||
from huggingface_hub import InferenceClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@component
|
||||
class HuggingFaceAPITextEmbedder:
|
||||
@ -200,11 +201,11 @@ class HuggingFaceAPITextEmbedder:
|
||||
if self.api_type == HFEmbeddingAPIType.SERVERLESS_INFERENCE_API:
|
||||
if truncate is not None:
|
||||
msg = "`truncate` parameter is not supported for Serverless Inference API. It will be ignored."
|
||||
warnings.warn(msg)
|
||||
logger.warning(msg)
|
||||
truncate = None
|
||||
if normalize is not None:
|
||||
msg = "`normalize` parameter is not supported for Serverless Inference API. It will be ignored."
|
||||
warnings.warn(msg)
|
||||
logger.warning(msg)
|
||||
normalize = None
|
||||
|
||||
text_to_embed = self.prefix + text + self.suffix
|
||||
|
@ -4,15 +4,16 @@
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, List, Optional, Tuple, Type
|
||||
from warnings import warn
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from haystack import component, default_from_dict, default_to_dict
|
||||
from haystack import component, default_from_dict, default_to_dict, logging
|
||||
from haystack.components.builders import PromptBuilder
|
||||
from haystack.components.generators import OpenAIGenerator
|
||||
from haystack.utils import Secret, deserialize_secrets_inplace, deserialize_type, serialize_type
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@component
|
||||
class LLMEvaluator:
|
||||
@ -206,10 +207,9 @@ class LLMEvaluator:
|
||||
try:
|
||||
result = self.generator.run(prompt=prompt["prompt"])
|
||||
except Exception as e:
|
||||
msg = f"Error while generating response for prompt: {prompt}. Error: {e}"
|
||||
if self.raise_on_failure:
|
||||
raise ValueError(msg)
|
||||
warn(msg)
|
||||
raise ValueError(f"Error while generating response for prompt: {prompt}. Error: {e}")
|
||||
logger.warning("Error while generating response for prompt: {prompt}. Error: {e}", prompt=prompt, e=e)
|
||||
results.append(None)
|
||||
errors += 1
|
||||
continue
|
||||
@ -225,8 +225,11 @@ class LLMEvaluator:
|
||||
metadata = result["meta"]
|
||||
|
||||
if errors > 0:
|
||||
msg = f"LLM evaluator failed for {errors} out of {len(list_of_input_names_to_values)} inputs."
|
||||
warn(msg)
|
||||
logger.warning(
|
||||
"LLM evaluator failed for {errors} out of {len(list_of_input_names_to_values)} inputs.",
|
||||
errors=errors,
|
||||
len=len(list_of_input_names_to_values),
|
||||
)
|
||||
|
||||
return {"results": results, "meta": metadata}
|
||||
|
||||
@ -374,14 +377,19 @@ class LLMEvaluator:
|
||||
msg = "Response from LLM evaluator is not a valid JSON."
|
||||
if self.raise_on_failure:
|
||||
raise ValueError(msg)
|
||||
warn(msg)
|
||||
logger.warning(msg)
|
||||
return False
|
||||
|
||||
if not all(output in parsed_output for output in expected):
|
||||
msg = f"Expected response from LLM evaluator to be JSON with keys {expected}, got {received}."
|
||||
if self.raise_on_failure:
|
||||
raise ValueError(msg)
|
||||
warn(msg)
|
||||
raise ValueError(
|
||||
f"Expected response from LLM evaluator to be JSON with keys {expected}, got {{received}}."
|
||||
)
|
||||
logger.warning(
|
||||
"Expected response from LLM evaluator to be JSON with keys {expected}, got {received}.",
|
||||
expected=expected,
|
||||
received=received,
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
@ -3,7 +3,6 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import math
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
@ -215,9 +214,10 @@ class ExtractiveReader:
|
||||
document_contents = []
|
||||
for i, doc in enumerate(documents):
|
||||
if doc.content is None:
|
||||
warnings.warn(
|
||||
f"Document with id {doc.id} was passed to ExtractiveReader. The Document doesn't "
|
||||
f"contain any text and it will be ignored."
|
||||
logger.warning(
|
||||
"Document with id {doc_id} was passed to ExtractiveReader. The Document doesn't "
|
||||
"contain any text and it will be ignored.",
|
||||
doc_id=doc.id,
|
||||
)
|
||||
continue
|
||||
texts.append(doc.content)
|
||||
|
@ -5,7 +5,6 @@
|
||||
import ast
|
||||
import contextlib
|
||||
from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Set, Union, get_args, get_origin
|
||||
from warnings import warn
|
||||
|
||||
from jinja2 import Environment, TemplateSyntaxError, meta
|
||||
from jinja2.nativetypes import NativeEnvironment
|
||||
@ -192,7 +191,7 @@ class ConditionalRouter:
|
||||
"Unsafe mode is enabled. This allows execution of arbitrary code in the Jinja template. "
|
||||
"Use this only if you trust the source of the template."
|
||||
)
|
||||
warn(msg)
|
||||
logger.warning(msg)
|
||||
|
||||
self._env = NativeEnvironment() if self._unsafe else SandboxedEnvironment()
|
||||
self._env.filters.update(self.custom_filters)
|
||||
@ -216,13 +215,11 @@ class ConditionalRouter:
|
||||
# warn about unused optional variables
|
||||
unused_optional_vars = set(self.optional_variables) - input_types if self.optional_variables else None
|
||||
if unused_optional_vars:
|
||||
msg = (
|
||||
f"The following optional variables are specified but not used in any route: {unused_optional_vars}. "
|
||||
"Check if there's a typo in variable names."
|
||||
logger.warning(
|
||||
"The following optional variables are specified but not used in any route: {unused_optional_vars}. "
|
||||
"Check if there's a typo in variable names.",
|
||||
unused_optional_vars=unused_optional_vars,
|
||||
)
|
||||
# intentionally using both warn and logger
|
||||
warn(msg, UserWarning)
|
||||
logger.warning(msg)
|
||||
|
||||
# add mandatory input types
|
||||
component.set_input_types(self, **dict.fromkeys(mandatory_input_types, Any))
|
||||
|
@ -5,13 +5,15 @@
|
||||
import csv
|
||||
from copy import deepcopy
|
||||
from typing import Any, Dict, List, Literal, Optional, Union
|
||||
from warnings import warn
|
||||
|
||||
from haystack import logging
|
||||
from haystack.lazy_imports import LazyImport
|
||||
|
||||
with LazyImport("Run 'pip install pandas'") as pandas_import:
|
||||
from pandas import DataFrame
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EvaluationRunResult:
|
||||
"""
|
||||
@ -188,10 +190,15 @@ class EvaluationRunResult:
|
||||
raise ValueError("The 'other' parameter must have 'run_name', 'inputs', and 'results' attributes.")
|
||||
|
||||
if self.run_name == other.run_name:
|
||||
warn(f"The run names of the two evaluation results are the same ('{self.run_name}')")
|
||||
logger.warning(
|
||||
"The run names of the two evaluation results are the same ('{run_name}')", run_name=self.run_name
|
||||
)
|
||||
|
||||
if self.inputs.keys() != other.inputs.keys():
|
||||
warn(f"The input columns differ between the results; using the input columns of '{self.run_name}'.")
|
||||
logger.warning(
|
||||
"The input columns differ between the results; using the input columns of '{run_name}'",
|
||||
run_name=self.run_name,
|
||||
)
|
||||
|
||||
# got both detailed reports
|
||||
detailed_a = self.detailed_report(output_format="json")
|
||||
|
@ -203,7 +203,7 @@ class TestHuggingFaceAPIDocumentEmbedder:
|
||||
"my_prefix document number 4 my_suffix",
|
||||
]
|
||||
|
||||
def test_embed_batch(self, mock_check_valid_model, recwarn):
|
||||
def test_embed_batch(self, mock_check_valid_model, caplog):
|
||||
texts = ["text 1", "text 2", "text 3", "text 4", "text 5"]
|
||||
|
||||
with patch("huggingface_hub.InferenceClient.feature_extraction") as mock_embedding_patch:
|
||||
@ -225,10 +225,10 @@ class TestHuggingFaceAPIDocumentEmbedder:
|
||||
assert len(embedding) == 384
|
||||
assert all(isinstance(x, float) for x in embedding)
|
||||
|
||||
# Check that warnings about ignoring truncate and normalize are raised
|
||||
assert len(recwarn) == 2
|
||||
assert "truncate" in str(recwarn[0].message)
|
||||
assert "normalize" in str(recwarn[1].message)
|
||||
# Check that logger warnings about ignoring truncate and normalize are raised
|
||||
assert len(caplog.records) == 2
|
||||
assert "truncate" in caplog.records[0].message
|
||||
assert "normalize" in caplog.records[1].message
|
||||
|
||||
def test_embed_batch_wrong_embedding_shape(self, mock_check_valid_model):
|
||||
texts = ["text 1", "text 2", "text 3", "text 4", "text 5"]
|
||||
|
@ -136,7 +136,7 @@ class TestHuggingFaceAPITextEmbedder:
|
||||
with pytest.raises(TypeError):
|
||||
embedder.run(text=list_integers_input)
|
||||
|
||||
def test_run(self, mock_check_valid_model, recwarn):
|
||||
def test_run(self, mock_check_valid_model, caplog):
|
||||
with patch("huggingface_hub.InferenceClient.feature_extraction") as mock_embedding_patch:
|
||||
mock_embedding_patch.return_value = array([[random.random() for _ in range(384)]])
|
||||
|
||||
@ -158,9 +158,9 @@ class TestHuggingFaceAPITextEmbedder:
|
||||
assert all(isinstance(x, float) for x in result["embedding"])
|
||||
|
||||
# Check that warnings about ignoring truncate and normalize are raised
|
||||
assert len(recwarn) == 2
|
||||
assert "truncate" in str(recwarn[0].message)
|
||||
assert "normalize" in str(recwarn[1].message)
|
||||
assert len(caplog.records) == 2
|
||||
assert "truncate" in caplog.records[0].message
|
||||
assert "normalize" in caplog.records[1].message
|
||||
|
||||
def test_run_wrong_embedding_shape(self, mock_check_valid_model):
|
||||
# embedding ndim > 2
|
||||
|
@ -525,7 +525,7 @@ class TestRouter:
|
||||
result = pipe.run(data={"router": {"question": "What?", "mode": "chat", "language": "en", "source": "doc"}})
|
||||
assert result["router"] == {"en_doc_chat": "What?"}, "Pipeline should handle all parameters"
|
||||
|
||||
def test_warns_on_unused_optional_variables(self):
|
||||
def test_warns_on_unused_optional_variables(self, caplog):
|
||||
"""
|
||||
Test that a warning is raised when optional_variables contains variables
|
||||
that are not used in any route conditions or outputs.
|
||||
@ -536,8 +536,8 @@ class TestRouter:
|
||||
]
|
||||
|
||||
# Initialize with unused optional variables and capture warning
|
||||
with pytest.warns(UserWarning, match="optional variables"):
|
||||
router = ConditionalRouter(routes=routes, optional_variables=["unused_var1", "unused_var2"])
|
||||
router = ConditionalRouter(routes=routes, optional_variables=["unused_var1", "unused_var2"])
|
||||
assert "optional variables" in caplog.records[0].message
|
||||
|
||||
# Verify router still works normally
|
||||
result = router.run(question="What?", mode="chat")
|
||||
|
Loading…
x
Reference in New Issue
Block a user