mirror of
https://github.com/run-llama/llama-hub.git
synced 2025-08-13 19:21:15 +00:00
65 lines
2.0 KiB
Python
65 lines
2.0 KiB
Python
"""Azure Cognitive Search reader.
|
|
A loader that fetches documents from specific index.
|
|
|
|
"""
|
|
|
|
from typing import List, Optional
|
|
|
|
from llama_index.readers.base import BaseReader
|
|
from llama_index.readers.schema.base import Document
|
|
|
|
|
|
class AzCognitiveSearchReader(BaseReader):
|
|
"""General reader for any Azure Cognitive Search index reader.
|
|
|
|
Args:
|
|
service_name (str): the name of azure cognitive search service.
|
|
search_key (str): provide azure search access key directly.
|
|
index (str): index name
|
|
|
|
"""
|
|
|
|
def __init__(self, service_name: str, searck_key: str, index: str) -> None:
|
|
"""Initialize Azure cognitive search service using the search key."""
|
|
import logging
|
|
|
|
from azure.core.credentials import AzureKeyCredential
|
|
from azure.search.documents import SearchClient
|
|
|
|
logger = logging.getLogger("azure.core.pipeline.policies.http_logging_policy")
|
|
logger.setLevel(logging.WARNING)
|
|
|
|
azure_credential = AzureKeyCredential(searck_key)
|
|
|
|
self.search_client = SearchClient(
|
|
endpoint=f"https://{service_name}.search.windows.net",
|
|
index_name=index,
|
|
credential=azure_credential,
|
|
)
|
|
|
|
def load_data(
|
|
self, query: str, content_field: str, filter: Optional[str] = None
|
|
) -> List[Document]:
|
|
"""Read data from azure cognitive search index.
|
|
|
|
Args:
|
|
query (str): search term in Azure Search index
|
|
content_field (str): field name of the document content.
|
|
filter (str): Filter expression. For example : 'sourcepage eq
|
|
'employee_handbook-3.pdf' and sourcefile eq 'employee_handbook.pdf''
|
|
|
|
Returns:
|
|
List[Document]: A list of documents.
|
|
|
|
"""
|
|
|
|
search_result = self.search_client.search(query, filter=filter)
|
|
|
|
return [
|
|
Document(
|
|
text=result[content_field],
|
|
extra_info={"id": result["id"], "score": result["@search.score"]},
|
|
)
|
|
for result in search_result
|
|
]
|