From 72fe43a7cca34729bbaf2fe658f31cfb8042c1bb Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 12 Jun 2023 12:23:32 +0200
Subject: [PATCH] build: Move Azure's Form Recognizer dependency to extras
 (#5096)

* build: Move Azure's Form Recognizer dependency to extras

* try catch imports for AzureConverter

* assign None to failed imports

* use lazy import

* use forward reference in type hints
---
 haystack/nodes/file_converter/azure.py | 19 +++++++++++++------
 pyproject.toml                         |  2 +-
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/haystack/nodes/file_converter/azure.py b/haystack/nodes/file_converter/azure.py
index dda04b91e..8ddbcd049 100644
--- a/haystack/nodes/file_converter/azure.py
+++ b/haystack/nodes/file_converter/azure.py
@@ -5,17 +5,21 @@ from collections import defaultdict
 import json
 import copy
 
-from azure.ai.formrecognizer import DocumentAnalysisClient, AnalyzeResult
-from azure.core.credentials import AzureKeyCredential
 import pandas as pd
 
+from haystack.lazy_imports import LazyImport
 from haystack.nodes.file_converter.base import BaseConverter
 from haystack.errors import HaystackError
 from haystack.schema import Document
 
-
 logger = logging.getLogger(__name__)
 
+with LazyImport(
+    message="Run 'pip install farm-haystack[file-conversion]' or 'pip install " "azure-ai-formrecognizer>=3.2.0b2'"
+) as azure_import:
+    from azure.ai.formrecognizer import DocumentAnalysisClient, AnalyzeResult
+    from azure.core.credentials import AzureKeyCredential
+
 
 class AzureConverter(BaseConverter):
     """
@@ -70,6 +74,9 @@ class AzureConverter(BaseConverter):
         :param add_page_number: Adds the number of the page a table occurs in to the Document's meta field
                                 `"page"`.
         """
+        # ensure the required dependencies were actually imported
+        azure_import.check()
+
         super().__init__(valid_languages=valid_languages, id_hash_keys=id_hash_keys)
 
         self.document_analysis_client = DocumentAnalysisClient(
@@ -179,7 +186,7 @@ class AzureConverter(BaseConverter):
 
     def _convert_tables_and_text(
         self,
-        result: AnalyzeResult,
+        result: "AnalyzeResult",
         meta: Optional[Dict[str, Any]],
         valid_languages: Optional[List[str]],
         file_path: Path,
@@ -209,7 +216,7 @@ class AzureConverter(BaseConverter):
         return docs
 
     def _convert_tables(
-        self, result: AnalyzeResult, meta: Optional[Dict[str, Any]], id_hash_keys: Optional[List[str]] = None
+        self, result: "AnalyzeResult", meta: Optional[Dict[str, Any]], id_hash_keys: Optional[List[str]] = None
     ) -> List[Document]:
         converted_tables: List[Document] = []
 
@@ -310,7 +317,7 @@ class AzureConverter(BaseConverter):
         return converted_tables
 
     def _convert_text(
-        self, result: AnalyzeResult, meta: Optional[Dict[str, str]], id_hash_keys: Optional[List[str]] = None
+        self, result: "AnalyzeResult", meta: Optional[Dict[str, str]], id_hash_keys: Optional[List[str]] = None
     ) -> Document:
         text = ""
         table_spans_by_page = defaultdict(list)
diff --git a/pyproject.toml b/pyproject.toml
index 11fb76e61..7ca3082f2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -61,7 +61,6 @@ dependencies = [
   "networkx",  # graphs library
   "quantulum3",  # quantities extraction from text
   "posthog",  # telemetry
-  "azure-ai-formrecognizer>=3.2.0b2",  # forms reader
   # audio's espnet-model-zoo requires huggingface-hub version <0.8 while we need >=0.5 to be able to use create_repo in FARMReader
   "huggingface-hub>=0.5.0",
   "tenacity",  # retry decorator
@@ -143,6 +142,7 @@ preprocessing = [
   "langdetect",  # for language classification
 ]
 file-conversion = [
+  "azure-ai-formrecognizer>=3.2.0b2",  # Microsoft Azure's Form Recognizer service (text and table exctrator)
   "python-docx",
   "tika",  # Apache Tika (text & metadata extractor)
   "beautifulsoup4",