Add Azure Cognitive Search Reader (#169)

Co-authored-by: Jerry Liu <jerryjliu98@gmail.com>
This commit is contained in:
mrcabellom 2023-04-08 07:12:21 +02:00 committed by GitHub
parent 61de0c1648
commit f7a38ac33a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 148 additions and 9 deletions

View File

@ -0,0 +1,63 @@
# Azure Cognitive Search Loader
The AzCognitiveSearchReader Loader returns a set of texts corresponding to documents retrieved from specific index of Azure Cognitive Search.
The user initializes the loader with credentials (service name and key) and the index name.
## Usage
Here's an example usage of the AzCognitiveSearchReader.
```python
from llama_index import download_loader
AzCognitiveSearchReader = download_loader("AzCognitiveSearchReader")
reader = AzCognitiveSearchReader(
"<Azure_Cognitive_Search_NAME>",
"<Azure_Cognitive_Search_KEY>,
"<Index_name>
)
query_sample = ""
documents = reader.load_data(
query="<search_term>", content_field="<content_field_name>", filter="<azure_search_filter>"
)
```
## Usage in combination with langchain
```python
from llama_index import GPTSimpleVectorIndex, download_loader
from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain.agents import Tool, AgentExecutor, load_tools, initialize_agent
AzCognitiveSearchReader = download_loader("AzCognitiveSearchReader")
az_loader = AzCognitiveSearchReader(
COGNITIVE_SEARCH_SERVICE_NAME,
COGNITIVE_SEARCH_KEY,
INDEX_NAME)
documents = az_loader.load_data(query, field_name)
index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
tools = [
Tool(
name="Azure cognitive search index",
func=lambda q: index.query(q),
description=f"Useful when you want answer questions about the text on azure cognitive search.",
),
]
memory = ConversationBufferMemory(memory_key="chat_history")
agent_chain = initialize_agent(
tools, llm, agent="zero-shot-react-description", memory=memory
)
result = agent_chain.run(input="How can I contact with my health insurance?")
```
This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples.

View File

@ -0,0 +1 @@
"""Init file."""

View File

@ -0,0 +1,64 @@
"""Azure Cognitive Search reader.
A loader that fetches documents from specific index.
"""
from typing import List, Optional
from llama_index.readers.base import BaseReader
from llama_index.readers.schema.base import Document
class AzCognitiveSearchReader(BaseReader):
"""General reader for any Azure Cognitive Search index reader.
Args:
service_name (str): the name of azure cognitive search service.
search_key (str): provide azure search access key directly.
index (str): index name
"""
def __init__(self, service_name: str, searck_key: str, index: str) -> None:
"""Initialize Azure cognitive search service using the search key."""
import logging
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
logger = logging.getLogger("azure.core.pipeline.policies.http_logging_policy")
logger.setLevel(logging.WARNING)
azure_credential = AzureKeyCredential(searck_key)
self.search_client = SearchClient(
endpoint=f"https://{service_name}.search.windows.net",
index_name=index,
credential=azure_credential,
)
def load_data(
self, query: str, content_field: str, filter: Optional[str] = None
) -> List[Document]:
"""Read data from azure cognitive search index.
Args:
query (str): search term in Azure Search index
content_field (str): field name of the document content.
filter (str): Filter expression. For example : 'sourcepage eq
'employee_handbook-3.pdf' and sourcefile eq 'employee_handbook.pdf''
Returns:
List[Document]: A list of documents.
"""
search_result = self.search_client.search(query, filter=filter)
return [
Document(
text=result[content_field],
extra_info={"id": result["id"], "score": result["@search.score"]},
)
for result in search_result
]

View File

@ -0,0 +1,2 @@
azure-search-documents
azure-identity

View File

@ -3,6 +3,10 @@
"id": "asana",
"author": "daveey"
},
"AzCognitiveSearchReader": {
"id": "azcognitive_search",
"author": "mrcabellom"
},
"GoogleDocsReader": {
"id": "google_docs",
"author": "jerryjliu"
@ -426,14 +430,16 @@
"JiraReader": {
"id": "jira",
"author": "bearguy",
"keywords": ["jira"]
"keywords": [
"jira"
]
},
"UnstructuredURLLoader": {
"id": "web/unstructured_web",
"author": "kravetsmic",
"keywords": [
"unstructured.io",
"url"
"unstructured.io",
"url"
]
},
"GoogleSheetsReader": {
@ -448,14 +454,17 @@
"rss"
]
},
"FlatPdfReader": {
"FlatPdfReader": {
"id": "file/flat_pdf",
"author": "emmanuel-oliveira",
"keywords": ["pdf", "flat", "flattened"]
},
"MilvusReader": {
"keywords": [
"pdf",
"flat",
"flattened"
]
},
"MilvusReader": {
"id": "milvus",
"author": "filip-halt"
}
}
}