2025-01-30 13:59:51 -05:00
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
2025-04-02 17:15:26 -04:00
import os
2025-01-30 13:59:51 -05:00
import traceback
from pathlib import Path
from fastapi import (
APIRouter ,
2025-04-02 17:15:26 -04:00
Depends ,
2025-01-30 13:59:51 -05:00
HTTPException ,
)
2025-04-02 21:51:28 +00:00
from graphrag . api . query import drift_search as graphrag_drift_search
from graphrag . api . query import global_search as graphrag_global_search
from graphrag . api . query import local_search as graphrag_local_search
2025-01-30 13:59:51 -05:00
from graphrag_app . logger . load_logger import load_pipeline_logger
from graphrag_app . typing . models import (
2025-04-02 21:51:28 +00:00
GraphDriftRequest ,
GraphGlobalRequest ,
GraphLocalRequest ,
2025-01-30 13:59:51 -05:00
GraphResponse ,
)
from graphrag_app . typing . pipeline import PipelineJobState
from graphrag_app . utils . common import (
2025-04-02 21:51:28 +00:00
get_data_tables ,
2025-01-30 13:59:51 -05:00
sanitize_name ,
2025-04-02 17:15:26 -04:00
subscription_key_check ,
2025-04-03 21:05:42 +00:00
update_multi_index_context_data ,
2025-01-30 13:59:51 -05:00
)
from graphrag_app . utils . pipeline import PipelineJob
query_route = APIRouter (
prefix = " /query " ,
tags = [ " Query Operations " ] ,
)
2025-04-02 17:15:26 -04:00
if os . getenv ( " KUBERNETES_SERVICE_HOST " ) :
query_route . dependencies . append ( Depends ( subscription_key_check ) )
2025-01-30 13:59:51 -05:00
@query_route.post (
" /global " ,
summary = " Perform a global search across the knowledge graph index " ,
description = " The global query method generates answers by searching over all AI-generated community reports in a map-reduce fashion. This is a resource-intensive method, but often gives good responses for questions that require an understanding of the dataset as a whole. " ,
response_model = GraphResponse ,
responses = { 200 : { " model " : GraphResponse } } ,
)
2025-04-02 21:51:28 +00:00
async def global_search ( request : GraphGlobalRequest ) :
logger = load_pipeline_logger ( )
if isinstance ( request . index_name , list ) :
raise HTTPException (
status_code = 501 ,
detail = " Multi-index query is not implemented. " ,
)
# make sure all referenced indexes have completed
index_name_map = {
" index_name " : request . index_name ,
" sanitized_name " : sanitize_name ( request . index_name ) ,
}
if not _is_index_complete ( index_name_map [ ' sanitized_name ' ] ) :
2025-01-30 13:59:51 -05:00
raise HTTPException (
status_code = 500 ,
2025-04-02 21:51:28 +00:00
detail = f " { index_name_map [ ' index_name ' ] } not ready for querying. " ,
2025-01-30 13:59:51 -05:00
)
try :
2025-04-02 21:51:28 +00:00
data_tables = get_data_tables (
index_name_map ,
community_level = request . community_level ,
include_local_context = False
2025-01-30 13:59:51 -05:00
)
# perform async search
2025-04-02 21:51:28 +00:00
result = await graphrag_global_search (
config = data_tables . config ,
communities = data_tables . communities ,
entities = data_tables . entities ,
community_reports = data_tables . community_reports ,
community_level = data_tables . community_level ,
dynamic_community_selection = request . dynamic_community_selection ,
response_type = request . response_type ,
2025-01-30 13:59:51 -05:00
query = request . query ,
)
2025-04-02 21:51:28 +00:00
context = update_multi_index_context_data (
result [ 1 ] ,
index_name = index_name_map [ ' index_name ' ] ,
index_id = index_name_map [ ' sanitized_name ' ]
)
2025-01-30 13:59:51 -05:00
2025-04-02 21:51:28 +00:00
return GraphResponse ( result = result [ 0 ] , context_data = context )
2025-01-30 13:59:51 -05:00
except Exception as e :
logger . error (
message = " Could not perform global search. " ,
cause = e ,
stack = traceback . format_exc ( ) ,
)
raise HTTPException ( status_code = 500 , detail = None )
@query_route.post (
" /local " ,
summary = " Perform a local search across the knowledge graph index. " ,
description = " The local query method generates answers by combining relevant data from the AI-extracted knowledge-graph with text chunks of the raw documents. This method is suitable for questions that require an understanding of specific entities mentioned in the documents (e.g. What are the healing properties of chamomile?). " ,
response_model = GraphResponse ,
responses = { 200 : { " model " : GraphResponse } } ,
)
2025-04-02 21:51:28 +00:00
async def local_search ( request : GraphLocalRequest ) :
logger = load_pipeline_logger ( )
if isinstance ( request . index_name , list ) :
raise HTTPException (
status_code = 501 ,
detail = " Multi-index query is not implemented. " ,
)
# make sure all referenced indexes have completed
index_name_map = {
" index_name " : request . index_name ,
" sanitized_name " : sanitize_name ( request . index_name ) ,
}
if not _is_index_complete ( index_name_map [ ' sanitized_name ' ] ) :
raise HTTPException (
status_code = 500 ,
detail = f " { index_name_map [ ' index_name ' ] } not ready for querying. " ,
)
try :
data_tables = get_data_tables (
index_name_map ,
community_level = request . community_level ,
include_local_context = True
)
# perform async search
result = await graphrag_local_search (
config = data_tables . config ,
entities = data_tables . entities ,
community_reports = data_tables . community_reports ,
communities = data_tables . communities ,
text_units = data_tables . text_units ,
relationships = data_tables . relationships ,
covariates = data_tables . covariates ,
community_level = data_tables . community_level ,
response_type = request . response_type ,
query = request . query ,
)
context = update_multi_index_context_data (
result [ 1 ] ,
index_name = index_name_map [ ' index_name ' ] ,
index_id = index_name_map [ ' sanitized_name ' ]
)
return GraphResponse ( result = result [ 0 ] , context_data = context )
except Exception as e :
logger . error (
message = " Could not perform local search. " ,
cause = e ,
stack = traceback . format_exc ( ) ,
)
raise HTTPException ( status_code = 500 , detail = None )
2025-01-30 13:59:51 -05:00
2025-04-02 21:51:28 +00:00
@query_route.post (
" /drift " ,
summary = " Perform a drift (Dynamic Reasoning and Inference with Flexible Traversal) search across the knowledge graph index " ,
description = " DRIFT search offers a new approach to local search queries by incorporating community information, greatly expanding the range of facts retrieved for the final answer. This approach extends the GraphRAG query engine by adding a more comprehensive local search option that leverages community insights to refine queries into detailed follow-up questions. While resource-intensive, DRIFT search typically delivers the most accurate responses for queries that demand both a broad understanding of the entire dataset and deeper semantic knowledge about specific details. " ,
response_model = GraphResponse ,
responses = { 200 : { " model " : GraphResponse } } ,
)
async def drift_search ( request : GraphDriftRequest ) :
logger = load_pipeline_logger ( )
if isinstance ( request . index_name , list ) :
raise HTTPException (
status_code = 501 ,
detail = " Multi-index query is not implemented. " ,
)
# make sure all referenced indexes have completed
index_name_map = {
" index_name " : request . index_name ,
" sanitized_name " : sanitize_name ( request . index_name ) ,
}
if not _is_index_complete ( index_name_map [ ' sanitized_name ' ] ) :
2025-01-30 13:59:51 -05:00
raise HTTPException (
status_code = 500 ,
2025-04-02 21:51:28 +00:00
detail = f " { index_name_map [ ' index_name ' ] } not ready for querying. " ,
2025-01-30 13:59:51 -05:00
)
2025-04-02 21:51:28 +00:00
try :
data_tables = get_data_tables (
index_name_map ,
community_level = request . community_level ,
include_local_context = True
)
# perform async search
result = await graphrag_drift_search (
config = data_tables . config ,
entities = data_tables . entities ,
community_reports = data_tables . community_reports ,
communities = data_tables . communities ,
text_units = data_tables . text_units ,
relationships = data_tables . relationships ,
community_level = data_tables . community_level ,
response_type = request . response_type ,
query = request . query ,
)
context = update_multi_index_context_data (
result [ 1 ] ,
index_name = index_name_map [ ' index_name ' ] ,
index_id = index_name_map [ ' sanitized_name ' ]
)
return GraphResponse ( result = result [ 0 ] , context_data = context )
except Exception as e :
logger . error (
message = " Could not perform local search. " ,
cause = e ,
stack = traceback . format_exc ( ) ,
)
raise HTTPException ( status_code = 500 , detail = None )
2025-01-30 13:59:51 -05:00
def _is_index_complete ( index_name : str ) - > bool :
"""
Check if an index is ready for querying .
An index is ready for use only if it exists in the jobs table in cosmos db and
the indexing build job has finished ( i . e . 100 percent ) . Otherwise it is not ready .
Args :
- - - - -
index_name ( str )
Name of the index to check .
Returns : bool
True if the index is ready for use , False otherwise .
"""
if PipelineJob . item_exist ( index_name ) :
pipeline_job = PipelineJob . load_item ( index_name )
if PipelineJobState ( pipeline_job . status ) == PipelineJobState . COMPLETE :
return True
2025-04-02 21:51:28 +00:00
return False