| 
									
										
										
										
											2024-06-26 15:45:06 -04:00
										 |  |  | # Copyright (c) Microsoft Corporation. | 
					
						
							|  |  |  | # Licensed under the MIT License. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-04-02 17:15:26 -04:00
										 |  |  | import os | 
					
						
							| 
									
										
										
										
											2025-01-30 13:59:51 -05:00
										 |  |  | import traceback | 
					
						
							| 
									
										
										
										
											2025-04-02 21:51:28 +00:00
										 |  |  | from io import BytesIO | 
					
						
							| 
									
										
										
										
											2025-01-30 13:59:51 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-04-02 21:51:28 +00:00
										 |  |  | import networkx as nx | 
					
						
							| 
									
										
										
										
											2024-06-26 15:45:06 -04:00
										 |  |  | from fastapi import ( | 
					
						
							|  |  |  |     APIRouter, | 
					
						
							| 
									
										
										
										
											2025-01-30 13:59:51 -05:00
										 |  |  |     Depends, | 
					
						
							| 
									
										
										
										
											2024-06-26 15:45:06 -04:00
										 |  |  |     HTTPException, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | from fastapi.responses import StreamingResponse | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-01-30 13:59:51 -05:00
										 |  |  | from graphrag_app.logger.load_logger import load_pipeline_logger | 
					
						
							| 
									
										
										
										
											2025-04-02 21:51:28 +00:00
										 |  |  | from graphrag_app.typing.models import GraphDataResponse | 
					
						
							| 
									
										
										
										
											2025-01-30 13:59:51 -05:00
										 |  |  | from graphrag_app.utils.azure_clients import AzureClientManager | 
					
						
							|  |  |  | from graphrag_app.utils.common import ( | 
					
						
							| 
									
										
										
										
											2024-06-26 15:45:06 -04:00
										 |  |  |     sanitize_name, | 
					
						
							| 
									
										
										
										
											2025-04-02 17:15:26 -04:00
										 |  |  |     subscription_key_check, | 
					
						
							| 
									
										
										
										
											2024-06-26 15:45:06 -04:00
										 |  |  |     validate_index_file_exist, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | graph_route = APIRouter( | 
					
						
							|  |  |  |     prefix="/graph", | 
					
						
							|  |  |  |     tags=["Graph Operations"], | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2025-04-02 17:15:26 -04:00
										 |  |  | if os.getenv("KUBERNETES_SERVICE_HOST"): | 
					
						
							|  |  |  |     graph_route.dependencies.append(Depends(subscription_key_check)) | 
					
						
							| 
									
										
										
										
											2024-06-26 15:45:06 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @graph_route.get( | 
					
						
							| 
									
										
										
										
											2025-01-30 13:59:51 -05:00
										 |  |  |     "/graphml/{container_name}", | 
					
						
							| 
									
										
										
										
											2024-06-26 15:45:06 -04:00
										 |  |  |     summary="Retrieve a GraphML file of the knowledge graph", | 
					
						
							|  |  |  |     response_description="GraphML file successfully downloaded", | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2025-01-30 13:59:51 -05:00
										 |  |  | async def get_graphml_file( | 
					
						
							|  |  |  |     container_name, sanitized_container_name: str = Depends(sanitize_name) | 
					
						
							|  |  |  | ): | 
					
						
							| 
									
										
										
										
											2025-04-02 21:51:28 +00:00
										 |  |  |     logger = load_pipeline_logger() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-01-30 13:59:51 -05:00
										 |  |  |     # validate graphml file existence | 
					
						
							| 
									
										
										
										
											2024-12-30 01:59:08 -05:00
										 |  |  |     azure_client_manager = AzureClientManager() | 
					
						
							| 
									
										
										
										
											2025-01-30 13:59:51 -05:00
										 |  |  |     graphml_filename = "graph.graphml" | 
					
						
							| 
									
										
										
										
											2024-06-26 15:45:06 -04:00
										 |  |  |     blob_filepath = f"output/{graphml_filename}"  # expected file location of the graph based on the workflow | 
					
						
							| 
									
										
										
										
											2025-01-30 13:59:51 -05:00
										 |  |  |     validate_index_file_exist(sanitized_container_name, blob_filepath) | 
					
						
							| 
									
										
										
										
											2024-06-26 15:45:06 -04:00
										 |  |  |     try: | 
					
						
							| 
									
										
										
										
											2024-12-30 01:59:08 -05:00
										 |  |  |         blob_client = azure_client_manager.get_blob_service_client().get_blob_client( | 
					
						
							| 
									
										
										
										
											2025-01-30 13:59:51 -05:00
										 |  |  |             container=sanitized_container_name, blob=blob_filepath | 
					
						
							| 
									
										
										
										
											2024-06-26 15:45:06 -04:00
										 |  |  |         ) | 
					
						
							|  |  |  |         blob_stream = blob_client.download_blob().chunks() | 
					
						
							|  |  |  |         return StreamingResponse( | 
					
						
							|  |  |  |             blob_stream, | 
					
						
							|  |  |  |             media_type="application/octet-stream", | 
					
						
							| 
									
										
										
										
											2025-04-02 21:51:28 +00:00
										 |  |  |             headers={ | 
					
						
							|  |  |  |                 "Content-Disposition": f"attachment; filename={graphml_filename}", | 
					
						
							|  |  |  |                 "filename": graphml_filename, | 
					
						
							|  |  |  |             }, | 
					
						
							| 
									
										
										
										
											2024-06-26 15:45:06 -04:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2025-01-30 13:59:51 -05:00
										 |  |  |     except Exception as e: | 
					
						
							|  |  |  |         logger.error( | 
					
						
							|  |  |  |             message="Could not fetch graphml file", | 
					
						
							|  |  |  |             cause=e, | 
					
						
							|  |  |  |             stack=traceback.format_exc(), | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2024-06-26 15:45:06 -04:00
										 |  |  |         raise HTTPException( | 
					
						
							|  |  |  |             status_code=500, | 
					
						
							| 
									
										
										
										
											2025-01-30 13:59:51 -05:00
										 |  |  |             detail=f"Could not fetch graphml file for '{container_name}'.", | 
					
						
							| 
									
										
										
										
											2024-06-26 15:45:06 -04:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2025-04-02 21:51:28 +00:00
										 |  |  |      | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @graph_route.get( | 
					
						
							|  |  |  |     "/stats/{index_name}", | 
					
						
							|  |  |  |     summary="Retrieve basic graph statistics, number of nodes and edges", | 
					
						
							|  |  |  |     response_model=GraphDataResponse, | 
					
						
							|  |  |  |     responses={200: {"model": GraphDataResponse}}, | 
					
						
							|  |  |  |     response_description="Retrieve the number of nodes and edges from the index graph", | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | async def retrieve_graph_stats(index_name: str): | 
					
						
							|  |  |  |     logger = load_pipeline_logger() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # validate index_name and graphml file existence | 
					
						
							|  |  |  |     sanitized_index_name = sanitize_name(index_name) | 
					
						
							|  |  |  |     graphml_filename = "graph.graphml" | 
					
						
							|  |  |  |     graphml_filepath = f"output/{graphml_filename}"  # expected file location of the graph based on the workflow | 
					
						
							|  |  |  |     validate_index_file_exist(sanitized_index_name, graphml_filepath) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         azure_client_manager = AzureClientManager() | 
					
						
							|  |  |  |         storage_client = azure_client_manager.get_blob_service_client().get_blob_client( | 
					
						
							|  |  |  |             container=sanitized_index_name, blob=graphml_filepath | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         blob_data = storage_client.download_blob().readall() | 
					
						
							|  |  |  |         bytes_io = BytesIO(blob_data) | 
					
						
							|  |  |  |         g = nx.read_graphml(bytes_io) | 
					
						
							|  |  |  |         return GraphDataResponse(nodes=len(g.nodes), edges=len(g.edges)) | 
					
						
							|  |  |  |     except Exception: | 
					
						
							|  |  |  |         logger.error("Could not retrieve graph data file") | 
					
						
							|  |  |  |         raise HTTPException( | 
					
						
							|  |  |  |             status_code=500, | 
					
						
							|  |  |  |             detail=f"Could not retrieve graph statistics for index '{index_name}'.", | 
					
						
							|  |  |  |         ) |