diff --git a/backend/graphrag_app/api/data.py b/backend/graphrag_app/api/data.py index 14c6230..f0bac4e 100644 --- a/backend/graphrag_app/api/data.py +++ b/backend/graphrag_app/api/data.py @@ -6,7 +6,7 @@ import re from math import ceil from typing import List -from azure.storage.blob import ContainerClient +from azure.storage.blob.aio import ContainerClient from fastapi import ( APIRouter, Depends, @@ -135,10 +135,12 @@ async def upload_files( files = [UploadFile(Cleaner(f.file), filename=f.filename) for f in files] # upload files in batches of 1000 to avoid exceeding Azure Storage API limits - blob_container_client = get_blob_container_client(sanitized_container_name) + blob_container_client = await get_blob_container_client( + sanitized_container_name + ) batch_size = 1000 - batches = ceil(len(files) / batch_size) - for i in range(batches): + num_batches = ceil(len(files) / batch_size) + for i in range(num_batches): batch_files = files[i * batch_size : (i + 1) * batch_size] tasks = [ upload_file_async(file, blob_container_client, overwrite) @@ -164,16 +166,17 @@ async def upload_files( @data_route.delete( - "/{storage_name}", + "/{container_name}", summary="Delete a data storage container", response_model=BaseResponse, responses={200: {"model": BaseResponse}}, ) -async def delete_files(container_name: str): +async def delete_files(sanitized_container_name: str = Depends(sanitize_name)): """ Delete a specified data storage container. """ - sanitized_container_name = sanitize_name(container_name) + # sanitized_container_name = sanitize_name(container_name) + original_container_name = desanitize_name(sanitized_container_name) try: # delete container in Azure Storage delete_blob_container(sanitized_container_name) @@ -182,11 +185,11 @@ async def delete_files(container_name: str): except Exception: logger = load_pipeline_logger() logger.error( - f"Error deleting container {container_name}.", - details={"Container": container_name}, + f"Error deleting container {original_container_name}.", + details={"Container": original_container_name}, ) raise HTTPException( status_code=500, - detail=f"Error deleting container '{container_name}'.", + detail=f"Error deleting container '{original_container_name}'.", ) return BaseResponse(status="Success") diff --git a/backend/graphrag_app/api/graph.py b/backend/graphrag_app/api/graph.py index ffc0744..c605de0 100644 --- a/backend/graphrag_app/api/graph.py +++ b/backend/graphrag_app/api/graph.py @@ -23,7 +23,7 @@ graph_route = APIRouter( @graph_route.get( - "/graphml/{index_name}", + "/graphml/{container_name}", summary="Retrieve a GraphML file of the knowledge graph", response_description="GraphML file successfully downloaded", ) diff --git a/backend/graphrag_app/api/index.py b/backend/graphrag_app/api/index.py index 337fd21..9953106 100644 --- a/backend/graphrag_app/api/index.py +++ b/backend/graphrag_app/api/index.py @@ -209,7 +209,7 @@ def _delete_k8s_job(job_name: str, namespace: str) -> None: @index_route.delete( - "/{index_name}", + "/{container_name}", summary="Delete a specified index", response_model=BaseResponse, responses={200: {"model": BaseResponse}}, @@ -259,7 +259,7 @@ async def delete_index( @index_route.get( - "/status/{index_name}", + "/status/{container_name}", summary="Track the status of an indexing job", response_model=IndexStatusResponse, ) diff --git a/backend/graphrag_app/utils/common.py b/backend/graphrag_app/utils/common.py index 93d53a1..ab5ea58 100644 --- a/backend/graphrag_app/utils/common.py +++ b/backend/graphrag_app/utils/common.py @@ -7,8 +7,9 @@ import re import pandas as pd from azure.core.exceptions import ResourceNotFoundError -from azure.cosmos import exceptions +from azure.cosmos import ContainerProxy, exceptions from azure.identity import DefaultAzureCredential +from azure.storage.blob.aio import ContainerClient from fastapi import HTTPException from graphrag_app.logger.load_logger import load_pipeline_logger @@ -157,7 +158,7 @@ def validate_blob_container_name(container_name: str): ) -def get_cosmos_container_store_client(): +def get_cosmos_container_store_client() -> ContainerProxy: try: azure_client_manager = AzureClientManager() return azure_client_manager.get_cosmos_container_client( @@ -169,7 +170,7 @@ def get_cosmos_container_store_client(): raise HTTPException(status_code=500, detail="Error fetching cosmosdb client.") -async def get_blob_container_client(name: str): +async def get_blob_container_client(name: str) -> ContainerClient: try: azure_client_manager = AzureClientManager() blob_service_client = azure_client_manager.get_blob_service_client_async() @@ -183,7 +184,7 @@ async def get_blob_container_client(name: str): raise HTTPException(status_code=500, detail="Error fetching storage client.") -def sanitize_name(container_name: str | None) -> str | None: +def sanitize_name(container_name: str) -> str: """ Sanitize a user-provided string to be used as an Azure Storage container name. Convert the string to a SHA256 hash, then truncate to 128 bit length to ensure @@ -199,8 +200,6 @@ def sanitize_name(container_name: str | None) -> str | None: Returns: str The sanitized name. """ - if not container_name: - return None container_name = container_name.encode() hashed_name = hashlib.sha256(container_name) truncated_hash = hashed_name.digest()[:16] # get the first 16 bytes (128 bits) diff --git a/backend/tests/integration/test_api_data.py b/backend/tests/integration/test_api_data.py index 054c51f..8b72c41 100644 --- a/backend/tests/integration/test_api_data.py +++ b/backend/tests/integration/test_api_data.py @@ -19,7 +19,7 @@ def test_upload_files(cosmos_client: CosmosClient, client): response = client.post( "/data", files={"files": ("test.txt", f)}, - params={"storage_name": "testContainer"}, + params={"container_name": "testContainer"}, ) # check the response assert response.status_code == 200