Add Azure Cognitive Search Reader (#169)

Co-authored-by: Jerry Liu <jerryjliu98@gmail.com>
2025-12-28 07:29:57 +00:00 · 2023-04-08 07:12:21 +02:00 · 2023-04-08 07:12:21 +02:00 · f7a38ac33a
commit f7a38ac33a
parent 61de0c1648
5 changed files with 148 additions and 9 deletions
--- a/loader_hub/azcognitive_search/README.md
+++ b/loader_hub/azcognitive_search/README.md
@ -0,0 +1,63 @@
+# Azure Cognitive Search Loader
+
+The AzCognitiveSearchReader Loader returns a set of texts corresponding to documents retrieved from specific index of Azure Cognitive Search.
+The user initializes the loader with credentials (service name and key) and the index name. 
+
+## Usage
+
+Here's an example usage of the AzCognitiveSearchReader.
+
+```python
+from llama_index import download_loader
+
+AzCognitiveSearchReader = download_loader("AzCognitiveSearchReader")
+
+reader = AzCognitiveSearchReader(
+    "<Azure_Cognitive_Search_NAME>",
+    "<Azure_Cognitive_Search_KEY>,
+    "<Index_name>
+)
+
+
+query_sample = ""
+documents = reader.load_data(
+    query="<search_term>", content_field="<content_field_name>", filter="<azure_search_filter>"
+)
+```
+
+## Usage in combination with langchain
+
+```python
+
+    from llama_index import GPTSimpleVectorIndex, download_loader
+    from langchain.chains.conversation.memory import ConversationBufferMemory
+    from langchain.agents import Tool, AgentExecutor, load_tools, initialize_agent
+
+    AzCognitiveSearchReader = download_loader("AzCognitiveSearchReader")
+
+    az_loader = AzCognitiveSearchReader(
+            COGNITIVE_SEARCH_SERVICE_NAME,
+            COGNITIVE_SEARCH_KEY,
+            INDEX_NAME)
+
+    documents = az_loader.load_data(query, field_name)
+
+    index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
+
+    tools = [
+        Tool(
+            name="Azure cognitive search index",
+            func=lambda q: index.query(q),
+            description=f"Useful when you want answer questions about the text on azure cognitive search.",
+        ),
+    ]
+    memory = ConversationBufferMemory(memory_key="chat_history")
+    agent_chain = initialize_agent(
+        tools, llm, agent="zero-shot-react-description", memory=memory
+    )
+
+    result = agent_chain.run(input="How can I contact with my health insurance?")
+```
+
+
+This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples.
--- a/loader_hub/azcognitive_search/init.py
+++ b/loader_hub/azcognitive_search/init.py
@ -0,0 +1 @@
+"""Init file."""
--- a/loader_hub/azcognitive_search/base.py
+++ b/loader_hub/azcognitive_search/base.py
@ -0,0 +1,64 @@
+"""Azure Cognitive Search reader.
+A loader that fetches documents from specific index.
+
+"""
+
+from typing import List, Optional
+
+from llama_index.readers.base import BaseReader
+from llama_index.readers.schema.base import Document
+
+
+class AzCognitiveSearchReader(BaseReader):
+    """General reader for any Azure Cognitive Search index reader.
+
+    Args:
+        service_name (str): the name of azure cognitive search service.
+        search_key (str): provide azure search access key directly.
+        index (str): index name
+    
+    """
+
+    def __init__(self, service_name: str, searck_key: str, index: str) -> None:
+        """Initialize Azure cognitive search service using the search key."""
+        import logging
+
+        from azure.core.credentials import AzureKeyCredential
+        from azure.search.documents import SearchClient
+
+        logger = logging.getLogger("azure.core.pipeline.policies.http_logging_policy")
+        logger.setLevel(logging.WARNING)
+
+        azure_credential = AzureKeyCredential(searck_key)
+
+        self.search_client = SearchClient(
+            endpoint=f"https://{service_name}.search.windows.net",
+            index_name=index,
+            credential=azure_credential,
+        )
+
+    def load_data(
+        self, query: str, content_field: str, filter: Optional[str] = None
+    ) -> List[Document]:
+        """Read data from azure cognitive search index.
+
+        Args:
+            query (str): search term in Azure Search index
+            content_field (str): field name of the document content.
+            filter (str): Filter expression. For example : 'sourcepage eq
+                'employee_handbook-3.pdf' and sourcefile eq 'employee_handbook.pdf''
+
+        Returns:
+            List[Document]: A list of documents.
+
+        """
+
+        search_result = self.search_client.search(query, filter=filter)
+
+        return [
+            Document(
+                text=result[content_field],
+                extra_info={"id": result["id"], "score": result["@search.score"]},
+            )
+            for result in search_result
+        ]
--- a/loader_hub/azcognitive_search/requirements.txt
+++ b/loader_hub/azcognitive_search/requirements.txt
@ -0,0 +1,2 @@
+azure-search-documents
+azure-identity
--- a/loader_hub/library.json
+++ b/loader_hub/library.json
@ -3,6 +3,10 @@
    "id": "asana",
    "author": "daveey"
  },
+  "AzCognitiveSearchReader": {
+    "id": "azcognitive_search",
+    "author": "mrcabellom"
+  },
  "GoogleDocsReader": {
    "id": "google_docs",
    "author": "jerryjliu"
@ -426,14 +430,16 @@
  "JiraReader": {
    "id": "jira",
    "author": "bearguy",
-    "keywords": ["jira"]
+    "keywords": [
+      "jira"
+    ]
  },
  "UnstructuredURLLoader": {
    "id": "web/unstructured_web",
    "author": "kravetsmic",
    "keywords": [
-       "unstructured.io",
-       "url"
+      "unstructured.io",
+      "url"
    ]
  },
  "GoogleSheetsReader": {
@ -448,14 +454,17 @@
      "rss"
    ]
  },
-   "FlatPdfReader": {
+  "FlatPdfReader": {
    "id": "file/flat_pdf",
    "author": "emmanuel-oliveira",
-    "keywords": ["pdf", "flat", "flattened"]
-   },
-   "MilvusReader": {
+    "keywords": [
+      "pdf",
+      "flat",
+      "flattened"
+    ]
+  },
+  "MilvusReader": {
    "id": "milvus",
    "author": "filip-halt"
-   }
-
+  }
 }