2024-06-26 15:45:06 -04:00
|
|
|
# Copyright (c) Microsoft Corporation.
|
|
|
|
# Licensed under the MIT License.
|
|
|
|
|
2025-04-02 17:15:26 -04:00
|
|
|
import os
|
2025-01-30 13:59:51 -05:00
|
|
|
import traceback
|
2025-04-02 21:51:28 +00:00
|
|
|
from io import BytesIO
|
2025-01-30 13:59:51 -05:00
|
|
|
|
2025-04-02 21:51:28 +00:00
|
|
|
import networkx as nx
|
2024-06-26 15:45:06 -04:00
|
|
|
from fastapi import (
|
|
|
|
APIRouter,
|
2025-01-30 13:59:51 -05:00
|
|
|
Depends,
|
2024-06-26 15:45:06 -04:00
|
|
|
HTTPException,
|
|
|
|
)
|
|
|
|
from fastapi.responses import StreamingResponse
|
|
|
|
|
2025-01-30 13:59:51 -05:00
|
|
|
from graphrag_app.logger.load_logger import load_pipeline_logger
|
2025-04-02 21:51:28 +00:00
|
|
|
from graphrag_app.typing.models import GraphDataResponse
|
2025-01-30 13:59:51 -05:00
|
|
|
from graphrag_app.utils.azure_clients import AzureClientManager
|
|
|
|
from graphrag_app.utils.common import (
|
2024-06-26 15:45:06 -04:00
|
|
|
sanitize_name,
|
2025-04-02 17:15:26 -04:00
|
|
|
subscription_key_check,
|
2024-06-26 15:45:06 -04:00
|
|
|
validate_index_file_exist,
|
|
|
|
)
|
|
|
|
|
|
|
|
graph_route = APIRouter(
|
|
|
|
prefix="/graph",
|
|
|
|
tags=["Graph Operations"],
|
|
|
|
)
|
2025-04-02 17:15:26 -04:00
|
|
|
if os.getenv("KUBERNETES_SERVICE_HOST"):
|
|
|
|
graph_route.dependencies.append(Depends(subscription_key_check))
|
2024-06-26 15:45:06 -04:00
|
|
|
|
|
|
|
|
|
|
|
@graph_route.get(
|
2025-01-30 13:59:51 -05:00
|
|
|
"/graphml/{container_name}",
|
2024-06-26 15:45:06 -04:00
|
|
|
summary="Retrieve a GraphML file of the knowledge graph",
|
|
|
|
response_description="GraphML file successfully downloaded",
|
|
|
|
)
|
2025-01-30 13:59:51 -05:00
|
|
|
async def get_graphml_file(
|
|
|
|
container_name, sanitized_container_name: str = Depends(sanitize_name)
|
|
|
|
):
|
2025-04-02 21:51:28 +00:00
|
|
|
logger = load_pipeline_logger()
|
|
|
|
|
2025-01-30 13:59:51 -05:00
|
|
|
# validate graphml file existence
|
2024-12-30 01:59:08 -05:00
|
|
|
azure_client_manager = AzureClientManager()
|
2025-01-30 13:59:51 -05:00
|
|
|
graphml_filename = "graph.graphml"
|
2024-06-26 15:45:06 -04:00
|
|
|
blob_filepath = f"output/{graphml_filename}" # expected file location of the graph based on the workflow
|
2025-01-30 13:59:51 -05:00
|
|
|
validate_index_file_exist(sanitized_container_name, blob_filepath)
|
2024-06-26 15:45:06 -04:00
|
|
|
try:
|
2024-12-30 01:59:08 -05:00
|
|
|
blob_client = azure_client_manager.get_blob_service_client().get_blob_client(
|
2025-01-30 13:59:51 -05:00
|
|
|
container=sanitized_container_name, blob=blob_filepath
|
2024-06-26 15:45:06 -04:00
|
|
|
)
|
|
|
|
blob_stream = blob_client.download_blob().chunks()
|
|
|
|
return StreamingResponse(
|
|
|
|
blob_stream,
|
|
|
|
media_type="application/octet-stream",
|
2025-04-02 21:51:28 +00:00
|
|
|
headers={
|
|
|
|
"Content-Disposition": f"attachment; filename={graphml_filename}",
|
|
|
|
"filename": graphml_filename,
|
|
|
|
},
|
2024-06-26 15:45:06 -04:00
|
|
|
)
|
2025-01-30 13:59:51 -05:00
|
|
|
except Exception as e:
|
|
|
|
logger.error(
|
|
|
|
message="Could not fetch graphml file",
|
|
|
|
cause=e,
|
|
|
|
stack=traceback.format_exc(),
|
|
|
|
)
|
2024-06-26 15:45:06 -04:00
|
|
|
raise HTTPException(
|
|
|
|
status_code=500,
|
2025-01-30 13:59:51 -05:00
|
|
|
detail=f"Could not fetch graphml file for '{container_name}'.",
|
2024-06-26 15:45:06 -04:00
|
|
|
)
|
2025-04-02 21:51:28 +00:00
|
|
|
|
|
|
|
|
|
|
|
@graph_route.get(
|
|
|
|
"/stats/{index_name}",
|
|
|
|
summary="Retrieve basic graph statistics, number of nodes and edges",
|
|
|
|
response_model=GraphDataResponse,
|
|
|
|
responses={200: {"model": GraphDataResponse}},
|
|
|
|
response_description="Retrieve the number of nodes and edges from the index graph",
|
|
|
|
)
|
|
|
|
async def retrieve_graph_stats(index_name: str):
|
|
|
|
logger = load_pipeline_logger()
|
|
|
|
|
|
|
|
# validate index_name and graphml file existence
|
|
|
|
sanitized_index_name = sanitize_name(index_name)
|
|
|
|
graphml_filename = "graph.graphml"
|
|
|
|
graphml_filepath = f"output/{graphml_filename}" # expected file location of the graph based on the workflow
|
|
|
|
validate_index_file_exist(sanitized_index_name, graphml_filepath)
|
|
|
|
|
|
|
|
try:
|
|
|
|
azure_client_manager = AzureClientManager()
|
|
|
|
storage_client = azure_client_manager.get_blob_service_client().get_blob_client(
|
|
|
|
container=sanitized_index_name, blob=graphml_filepath
|
|
|
|
)
|
|
|
|
blob_data = storage_client.download_blob().readall()
|
|
|
|
bytes_io = BytesIO(blob_data)
|
|
|
|
g = nx.read_graphml(bytes_io)
|
|
|
|
return GraphDataResponse(nodes=len(g.nodes), edges=len(g.edges))
|
|
|
|
except Exception:
|
|
|
|
logger.error("Could not retrieve graph data file")
|
|
|
|
raise HTTPException(
|
|
|
|
status_code=500,
|
|
|
|
detail=f"Could not retrieve graph statistics for index '{index_name}'.",
|
|
|
|
)
|