diff --git a/haystack/components/converters/azure.py b/haystack/components/converters/azure.py index d761e2cbb..4c34b6478 100644 --- a/haystack/components/converters/azure.py +++ b/haystack/components/converters/azure.py @@ -95,7 +95,7 @@ class AzureOCRDocumentConverter: self.document_analysis_client = DocumentAnalysisClient( endpoint=endpoint, credential=AzureKeyCredential(api_key.resolve_value() or "") - ) # type: ignore + ) self.endpoint = endpoint self.model_id = model_id self.api_key = api_key @@ -383,10 +383,10 @@ class AzureOCRDocumentConverter: if all(line.polygon is not None for line in lines): for i in range(len(lines)): # pylint: disable=consider-using-enumerate # left_upi, right_upi, right_lowi, left_lowi = lines[i].polygon - left_upi, _, _, _ = lines[i].polygon # type: ignore + left_upi, _, _, _ = lines[i].polygon pairs_by_page[page_idx].append([i, i]) for j in range(i + 1, len(lines)): # pylint: disable=invalid-name - left_upj, _, _, _ = lines[j].polygon # type: ignore + left_upj, _, _, _ = lines[j].polygon close_on_y_axis = abs(left_upi[1] - left_upj[1]) < threshold_y if close_on_y_axis: pairs_by_page[page_idx].append([i, j]) @@ -422,13 +422,13 @@ class AzureOCRDocumentConverter: for page_idx, _ in enumerate(result.pages): sorted_rows = [] for row_of_lines in merged_lines_by_page[page_idx]: - sorted_rows.append(sorted(row_of_lines, key=lambda x: x.polygon[0][0])) # type: ignore + sorted_rows.append(sorted(row_of_lines, key=lambda x: x.polygon[0][0])) x_sorted_lines_by_page[page_idx] = sorted_rows # Sort each row within the page by the y-value of the upper left bounding box coordinate y_sorted_lines_by_page = {} for page_idx, _ in enumerate(result.pages): - sorted_rows = sorted(x_sorted_lines_by_page[page_idx], key=lambda x: x[0].polygon[0][1]) # type: ignore + sorted_rows = sorted(x_sorted_lines_by_page[page_idx], key=lambda x: x[0].polygon[0][1]) y_sorted_lines_by_page[page_idx] = sorted_rows # Construct the text to write diff --git a/haystack/components/generators/chat/hugging_face_api.py b/haystack/components/generators/chat/hugging_face_api.py index 272e84d47..c12a5192b 100644 --- a/haystack/components/generators/chat/hugging_face_api.py +++ b/haystack/components/generators/chat/hugging_face_api.py @@ -602,7 +602,7 @@ class HuggingFaceAPIChatGenerator: chunk=chunk, previous_chunks=streaming_chunks, component_info=component_info ) streaming_chunks.append(stream_chunk) - await streaming_callback(stream_chunk) # type: ignore + await streaming_callback(stream_chunk) message = _convert_streaming_chunks_to_chat_message(chunks=streaming_chunks) if message.meta.get("usage") is None: diff --git a/haystack/components/generators/chat/hugging_face_local.py b/haystack/components/generators/chat/hugging_face_local.py index 7e69b36c7..c256afa67 100644 --- a/haystack/components/generators/chat/hugging_face_local.py +++ b/haystack/components/generators/chat/hugging_face_local.py @@ -8,10 +8,10 @@ import re import sys from concurrent.futures import ThreadPoolExecutor from contextlib import asynccontextmanager, suppress -from typing import Any, Callable, Literal, Optional, Union, cast +from typing import Any, Callable, Literal, Optional, Union from haystack import component, default_from_dict, default_to_dict, logging -from haystack.dataclasses import AsyncStreamingCallbackT, ChatMessage, ComponentInfo, StreamingCallbackT, ToolCall +from haystack.dataclasses import ChatMessage, ComponentInfo, StreamingCallbackT, ToolCall from haystack.dataclasses.streaming_chunk import select_streaming_callback from haystack.lazy_imports import LazyImport from haystack.tools import ( @@ -473,8 +473,7 @@ class HuggingFaceLocalChatGenerator: if streaming_callback: async_handler = AsyncHFTokenStreamingHandler( tokenizer=prepared_inputs["tokenizer"], - # Cast to AsyncStreamingCallbackT since we know streaming_callback is async - stream_handler=cast(AsyncStreamingCallbackT, streaming_callback), + stream_handler=streaming_callback, stop_words=prepared_inputs["stop_words"], component_info=ComponentInfo.from_component(self), ) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 679c1a846..12ed1688c 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -416,7 +416,7 @@ class OpenAIChatGenerator: return { "model": self.model, - "messages": openai_formatted_messages, # type: ignore[arg-type] # openai expects list of specific message types + "messages": openai_formatted_messages, "stream": streaming_callback is not None, "n": num_responses, **openai_tools, diff --git a/haystack/components/generators/hugging_face_local.py b/haystack/components/generators/hugging_face_local.py index 01cc4a23b..2a4bb7336 100644 --- a/haystack/components/generators/hugging_face_local.py +++ b/haystack/components/generators/hugging_face_local.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Literal, Optional, cast +from typing import Any, Literal, Optional from haystack import component, default_from_dict, default_to_dict, logging from haystack.dataclasses import ComponentInfo, StreamingCallbackT, select_streaming_callback @@ -153,7 +153,7 @@ class HuggingFaceLocalGenerator: return if self.pipeline is None: - self.pipeline = cast(HfPipeline, pipeline(**self.huggingface_pipeline_kwargs)) + self.pipeline = pipeline(**self.huggingface_pipeline_kwargs) if self.stop_words: # text-generation and text2text-generation pipelines always have a non-None tokenizer diff --git a/haystack/components/generators/openai_dalle.py b/haystack/components/generators/openai_dalle.py index 8d6e52bb3..7a907e2ec 100644 --- a/haystack/components/generators/openai_dalle.py +++ b/haystack/components/generators/openai_dalle.py @@ -143,7 +143,7 @@ class DALLEImageGenerator: :returns: The serialized component as a dictionary. """ - return default_to_dict( # type: ignore + return default_to_dict( self, model=self.model, quality=self.quality, @@ -167,4 +167,4 @@ class DALLEImageGenerator: """ init_params = data.get("init_parameters", {}) deserialize_secrets_inplace(init_params, keys=["api_key"]) - return default_from_dict(cls, data) # type: ignore + return default_from_dict(cls, data) diff --git a/haystack/components/preprocessors/csv_document_cleaner.py b/haystack/components/preprocessors/csv_document_cleaner.py index 981f87b9b..d5b2a53f1 100644 --- a/haystack/components/preprocessors/csv_document_cleaner.py +++ b/haystack/components/preprocessors/csv_document_cleaner.py @@ -80,7 +80,7 @@ class CSVDocumentCleaner: cleaned_documents = [] for document in documents: try: - df = pd.read_csv(StringIO(document.content), header=None, dtype=object) # type: ignore + df = pd.read_csv(StringIO(document.content), header=None, dtype=object) except Exception as e: logger.error( "Error processing document {id}. Keeping it, but skipping cleaning. Error: {error}", diff --git a/haystack/components/preprocessors/csv_document_splitter.py b/haystack/components/preprocessors/csv_document_splitter.py index 5cbd6ddbb..4968506d7 100644 --- a/haystack/components/preprocessors/csv_document_splitter.py +++ b/haystack/components/preprocessors/csv_document_splitter.py @@ -107,7 +107,7 @@ class CSVDocumentSplitter: split_dfs = [] for document in documents: try: - df = pd.read_csv(StringIO(document.content), **resolved_read_csv_kwargs) # type: ignore + df = pd.read_csv(StringIO(document.content), **resolved_read_csv_kwargs) except Exception as e: logger.error(f"Error processing document {document.id}. Keeping it, but skipping splitting. Error: {e}") split_documents.append(document) diff --git a/haystack/components/preprocessors/recursive_splitter.py b/haystack/components/preprocessors/recursive_splitter.py index 2511d5263..c4f56e96c 100644 --- a/haystack/components/preprocessors/recursive_splitter.py +++ b/haystack/components/preprocessors/recursive_splitter.py @@ -289,7 +289,7 @@ class RecursiveDocumentSplitter: if self._chunk_length(text) <= self.split_length: return [text] - for curr_separator in self.separators: # type: ignore # the caller already checked that separators is not None + for curr_separator in self.separators: if curr_separator == "sentence": # re. ignore: correct SentenceSplitter initialization is checked at the initialization of the component sentence_with_spans = self.nltk_tokenizer.split_sentences(text) # type: ignore diff --git a/haystack/components/rankers/meta_field_grouping_ranker.py b/haystack/components/rankers/meta_field_grouping_ranker.py index 6a286726f..eb3265871 100644 --- a/haystack/components/rankers/meta_field_grouping_ranker.py +++ b/haystack/components/rankers/meta_field_grouping_ranker.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from collections import defaultdict -from typing import Any, Optional, cast +from typing import Any, Optional from haystack import Document, component @@ -107,7 +107,7 @@ class MetaFieldGroupingRanker: for subgroups in document_groups.values(): for docs in subgroups.values(): if self.sort_docs_by: - docs.sort(key=lambda d: d.meta.get(cast(str, self.sort_docs_by), float("inf"))) + docs.sort(key=lambda d: d.meta.get(self.sort_docs_by or "", float("inf"))) ordered_docs.extend(docs) ordered_docs.extend(no_group_docs) diff --git a/haystack/components/rankers/transformers_similarity.py b/haystack/components/rankers/transformers_similarity.py index 23ed954c4..77958b411 100644 --- a/haystack/components/rankers/transformers_similarity.py +++ b/haystack/components/rankers/transformers_similarity.py @@ -292,7 +292,7 @@ class TransformersSimilarityRanker: def __getitem__(self, item): return {key: self.batch_encoding.data[key][item] for key in self.batch_encoding.data.keys()} - batch_enc = self.tokenizer(query_doc_pairs, padding=True, truncation=True, return_tensors="pt").to( # type: ignore + batch_enc = self.tokenizer(query_doc_pairs, padding=True, truncation=True, return_tensors="pt").to( self.device.first_device.to_torch() ) dataset = _Dataset(batch_enc) @@ -301,7 +301,7 @@ class TransformersSimilarityRanker: similarity_scores = [] with torch.inference_mode(): for features in inp_dataloader: - model_preds = self.model(**features).logits.squeeze(dim=1) # type: ignore + model_preds = self.model(**features).logits.squeeze(dim=1) similarity_scores.extend(model_preds) similarity_scores = torch.stack(similarity_scores) @@ -310,7 +310,7 @@ class TransformersSimilarityRanker: _, sorted_indices = torch.sort(similarity_scores, descending=True) - sorted_indices = sorted_indices.cpu().tolist() # type: ignore + sorted_indices = sorted_indices.cpu().tolist() similarity_scores = similarity_scores.cpu().tolist() ranked_docs = [] for sorted_index in sorted_indices: diff --git a/haystack/components/tools/tool_invoker.py b/haystack/components/tools/tool_invoker.py index 91646b3d1..558f9dd60 100644 --- a/haystack/components/tools/tool_invoker.py +++ b/haystack/components/tools/tool_invoker.py @@ -529,7 +529,7 @@ class ToolInvoker: with ThreadPoolExecutor(max_workers=self.max_workers) as executor: futures = [] for params in tool_call_params: - future = executor.submit(self._execute_single_tool_call, **params) # type: ignore[arg-type] + future = executor.submit(self._execute_single_tool_call, **params) futures.append(future) # 3) Process results in the order they are submitted diff --git a/haystack/components/websearch/serper_dev.py b/haystack/components/websearch/serper_dev.py index 930a9af34..be679cf10 100644 --- a/haystack/components/websearch/serper_dev.py +++ b/haystack/components/websearch/serper_dev.py @@ -110,7 +110,7 @@ class SerperDevWebSearch: headers = {"X-API-KEY": self.api_key.resolve_value(), "Content-Type": "application/json"} try: - response = requests.post(SERPERDEV_BASE_URL, headers=headers, data=payload, timeout=30) # type: ignore + response = requests.post(SERPERDEV_BASE_URL, headers=headers, data=payload, timeout=30) response.raise_for_status() # Will raise an HTTPError for bad responses except requests.Timeout as error: raise TimeoutError(f"Request to {self.__class__.__name__} timed out.") from error diff --git a/haystack/core/pipeline/base.py b/haystack/core/pipeline/base.py index 1cd01339f..27c3c0c1c 100644 --- a/haystack/core/pipeline/base.py +++ b/haystack/core/pipeline/base.py @@ -10,7 +10,7 @@ from enum import IntEnum from pathlib import Path from typing import Any, ContextManager, Iterator, Mapping, Optional, TextIO, TypeVar, Union -import networkx # type:ignore +import networkx from haystack import logging, tracing from haystack.core.component import Component, InputSocket, OutputSocket, component @@ -123,7 +123,7 @@ class PipelineBase: # noqa: PLW1641 res += f" - {k}: {v}\n" res += "🚅 Components\n" - for name, instance in self.graph.nodes(data="instance"): # type: ignore # type wrongly defined in networkx + for name, instance in self.graph.nodes(data="instance"): res += f" - {name}: {instance.__class__.__name__}\n" res += "🛤️ Connections\n" @@ -144,7 +144,7 @@ class PipelineBase: # noqa: PLW1641 Dictionary with serialized data. """ components = {} - for name, instance in self.graph.nodes(data="instance"): # type:ignore + for name, instance in self.graph.nodes(data="instance"): components[name] = component_to_dict(instance, name) connections = [] @@ -619,7 +619,7 @@ class PipelineBase: # noqa: PLW1641 :returns: The name of the Component instance. """ - for name, inst in self.graph.nodes(data="instance"): # type: ignore # type wrongly defined in networkx + for name, inst in self.graph.nodes(data="instance"): if inst == instance: return name return "" @@ -717,7 +717,7 @@ class PipelineBase: # noqa: PLW1641 """ if is_in_jupyter(): - from IPython.display import Image, display # type: ignore + from IPython.display import Image, display if super_component_expansion: graph, super_component_mapping = self._merge_super_component_pipelines() @@ -811,7 +811,7 @@ class PipelineBase: # noqa: PLW1641 :returns: An iterator of tuples of component name and component instance. """ - for component_name, instance in self.graph.nodes(data="instance"): # type: ignore # type is wrong in networkx + for component_name, instance in self.graph.nodes(data="instance"): yield component_name, instance def warm_up(self) -> None: diff --git a/haystack/core/pipeline/descriptions.py b/haystack/core/pipeline/descriptions.py index b229f3082..a167aa4e9 100644 --- a/haystack/core/pipeline/descriptions.py +++ b/haystack/core/pipeline/descriptions.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 -import networkx # type:ignore +import networkx from haystack.core.component.types import InputSocket, InputSocketTypeDescriptor, OutputSocket from haystack.core.type_utils import _type_name diff --git a/haystack/core/pipeline/draw.py b/haystack/core/pipeline/draw.py index 9703ba771..39a685ee3 100644 --- a/haystack/core/pipeline/draw.py +++ b/haystack/core/pipeline/draw.py @@ -9,7 +9,7 @@ import random import zlib from typing import Any, Optional -import networkx # type:ignore +import networkx import requests from haystack import logging diff --git a/haystack/testing/sample_components/accumulate.py b/haystack/testing/sample_components/accumulate.py index e9e30e792..670ed8017 100644 --- a/haystack/testing/sample_components/accumulate.py +++ b/haystack/testing/sample_components/accumulate.py @@ -37,7 +37,7 @@ class Accumulate: import it at need. This is also a parameter. """ self.state = 0 - self.function: Callable = _default_function if function is None else function # type: ignore + self.function: Callable = _default_function if function is None else function def to_dict(self) -> dict[str, Any]: """Converts the component to a dictionary""" diff --git a/haystack/tools/parameters_schema_utils.py b/haystack/tools/parameters_schema_utils.py index 9feea0005..707671d14 100644 --- a/haystack/tools/parameters_schema_utils.py +++ b/haystack/tools/parameters_schema_utils.py @@ -161,7 +161,7 @@ def _resolve_type(_type: Any) -> Any: return Sequence[_resolve_type(args[0]) if args else Any] # type: ignore[misc] if origin is Union: - return Union[tuple(_resolve_type(a) for a in args)] # type: ignore[misc] + return Union[tuple(_resolve_type(a) for a in args)] if origin is dict: return dict[args[0] if args else Any, _resolve_type(args[1]) if args else Any] # type: ignore[misc] diff --git a/pyproject.toml b/pyproject.toml index 4d8c50adb..5c3c21cd4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -276,10 +276,12 @@ asyncio_default_fixture_loop_scope = "class" [tool.mypy] python_version = "3.9" -disallow_incomplete_defs = true -warn_return_any = false -ignore_missing_imports = true check_untyped_defs = true +disallow_incomplete_defs = true +ignore_missing_imports = true +warn_unused_ignores = true +warn_redundant_casts = true +warn_return_any = false [[tool.mypy.overrides]] # TODO: Fix component typings