mirror of
https://github.com/run-llama/llama-hub.git
synced 2025-12-28 07:29:57 +00:00
Add Azure Cognitive Search Reader (#169)
Co-authored-by: Jerry Liu <jerryjliu98@gmail.com>
This commit is contained in:
parent
61de0c1648
commit
f7a38ac33a
63
loader_hub/azcognitive_search/README.md
Normal file
63
loader_hub/azcognitive_search/README.md
Normal file
@ -0,0 +1,63 @@
|
||||
# Azure Cognitive Search Loader
|
||||
|
||||
The AzCognitiveSearchReader Loader returns a set of texts corresponding to documents retrieved from specific index of Azure Cognitive Search.
|
||||
The user initializes the loader with credentials (service name and key) and the index name.
|
||||
|
||||
## Usage
|
||||
|
||||
Here's an example usage of the AzCognitiveSearchReader.
|
||||
|
||||
```python
|
||||
from llama_index import download_loader
|
||||
|
||||
AzCognitiveSearchReader = download_loader("AzCognitiveSearchReader")
|
||||
|
||||
reader = AzCognitiveSearchReader(
|
||||
"<Azure_Cognitive_Search_NAME>",
|
||||
"<Azure_Cognitive_Search_KEY>,
|
||||
"<Index_name>
|
||||
)
|
||||
|
||||
|
||||
query_sample = ""
|
||||
documents = reader.load_data(
|
||||
query="<search_term>", content_field="<content_field_name>", filter="<azure_search_filter>"
|
||||
)
|
||||
```
|
||||
|
||||
## Usage in combination with langchain
|
||||
|
||||
```python
|
||||
|
||||
from llama_index import GPTSimpleVectorIndex, download_loader
|
||||
from langchain.chains.conversation.memory import ConversationBufferMemory
|
||||
from langchain.agents import Tool, AgentExecutor, load_tools, initialize_agent
|
||||
|
||||
AzCognitiveSearchReader = download_loader("AzCognitiveSearchReader")
|
||||
|
||||
az_loader = AzCognitiveSearchReader(
|
||||
COGNITIVE_SEARCH_SERVICE_NAME,
|
||||
COGNITIVE_SEARCH_KEY,
|
||||
INDEX_NAME)
|
||||
|
||||
documents = az_loader.load_data(query, field_name)
|
||||
|
||||
index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
|
||||
|
||||
tools = [
|
||||
Tool(
|
||||
name="Azure cognitive search index",
|
||||
func=lambda q: index.query(q),
|
||||
description=f"Useful when you want answer questions about the text on azure cognitive search.",
|
||||
),
|
||||
]
|
||||
memory = ConversationBufferMemory(memory_key="chat_history")
|
||||
agent_chain = initialize_agent(
|
||||
tools, llm, agent="zero-shot-react-description", memory=memory
|
||||
)
|
||||
|
||||
result = agent_chain.run(input="How can I contact with my health insurance?")
|
||||
```
|
||||
|
||||
|
||||
This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples.
|
||||
1
loader_hub/azcognitive_search/__init__.py
Normal file
1
loader_hub/azcognitive_search/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
"""Init file."""
|
||||
64
loader_hub/azcognitive_search/base.py
Normal file
64
loader_hub/azcognitive_search/base.py
Normal file
@ -0,0 +1,64 @@
|
||||
"""Azure Cognitive Search reader.
|
||||
A loader that fetches documents from specific index.
|
||||
|
||||
"""
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
from llama_index.readers.base import BaseReader
|
||||
from llama_index.readers.schema.base import Document
|
||||
|
||||
|
||||
class AzCognitiveSearchReader(BaseReader):
|
||||
"""General reader for any Azure Cognitive Search index reader.
|
||||
|
||||
Args:
|
||||
service_name (str): the name of azure cognitive search service.
|
||||
search_key (str): provide azure search access key directly.
|
||||
index (str): index name
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, service_name: str, searck_key: str, index: str) -> None:
|
||||
"""Initialize Azure cognitive search service using the search key."""
|
||||
import logging
|
||||
|
||||
from azure.core.credentials import AzureKeyCredential
|
||||
from azure.search.documents import SearchClient
|
||||
|
||||
logger = logging.getLogger("azure.core.pipeline.policies.http_logging_policy")
|
||||
logger.setLevel(logging.WARNING)
|
||||
|
||||
azure_credential = AzureKeyCredential(searck_key)
|
||||
|
||||
self.search_client = SearchClient(
|
||||
endpoint=f"https://{service_name}.search.windows.net",
|
||||
index_name=index,
|
||||
credential=azure_credential,
|
||||
)
|
||||
|
||||
def load_data(
|
||||
self, query: str, content_field: str, filter: Optional[str] = None
|
||||
) -> List[Document]:
|
||||
"""Read data from azure cognitive search index.
|
||||
|
||||
Args:
|
||||
query (str): search term in Azure Search index
|
||||
content_field (str): field name of the document content.
|
||||
filter (str): Filter expression. For example : 'sourcepage eq
|
||||
'employee_handbook-3.pdf' and sourcefile eq 'employee_handbook.pdf''
|
||||
|
||||
Returns:
|
||||
List[Document]: A list of documents.
|
||||
|
||||
"""
|
||||
|
||||
search_result = self.search_client.search(query, filter=filter)
|
||||
|
||||
return [
|
||||
Document(
|
||||
text=result[content_field],
|
||||
extra_info={"id": result["id"], "score": result["@search.score"]},
|
||||
)
|
||||
for result in search_result
|
||||
]
|
||||
2
loader_hub/azcognitive_search/requirements.txt
Normal file
2
loader_hub/azcognitive_search/requirements.txt
Normal file
@ -0,0 +1,2 @@
|
||||
azure-search-documents
|
||||
azure-identity
|
||||
@ -3,6 +3,10 @@
|
||||
"id": "asana",
|
||||
"author": "daveey"
|
||||
},
|
||||
"AzCognitiveSearchReader": {
|
||||
"id": "azcognitive_search",
|
||||
"author": "mrcabellom"
|
||||
},
|
||||
"GoogleDocsReader": {
|
||||
"id": "google_docs",
|
||||
"author": "jerryjliu"
|
||||
@ -426,14 +430,16 @@
|
||||
"JiraReader": {
|
||||
"id": "jira",
|
||||
"author": "bearguy",
|
||||
"keywords": ["jira"]
|
||||
"keywords": [
|
||||
"jira"
|
||||
]
|
||||
},
|
||||
"UnstructuredURLLoader": {
|
||||
"id": "web/unstructured_web",
|
||||
"author": "kravetsmic",
|
||||
"keywords": [
|
||||
"unstructured.io",
|
||||
"url"
|
||||
"unstructured.io",
|
||||
"url"
|
||||
]
|
||||
},
|
||||
"GoogleSheetsReader": {
|
||||
@ -448,14 +454,17 @@
|
||||
"rss"
|
||||
]
|
||||
},
|
||||
"FlatPdfReader": {
|
||||
"FlatPdfReader": {
|
||||
"id": "file/flat_pdf",
|
||||
"author": "emmanuel-oliveira",
|
||||
"keywords": ["pdf", "flat", "flattened"]
|
||||
},
|
||||
"MilvusReader": {
|
||||
"keywords": [
|
||||
"pdf",
|
||||
"flat",
|
||||
"flattened"
|
||||
]
|
||||
},
|
||||
"MilvusReader": {
|
||||
"id": "milvus",
|
||||
"author": "filip-halt"
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user