diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 9b4c8d5..89dfa6b 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -28,10 +28,8 @@ RUN apt-get update && apt-get install -y \ software-properties-common # install Azure CLI RUN curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash -RUN az bicep install -# install kubectl -RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \ - && install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl +# install bicep and kubectl +RUN az bicep install && az aks install-cli # install helm RUN curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 \ && chmod 700 get_helm.sh \ diff --git a/docker/Dockerfile-backend b/docker/Dockerfile-backend index 938473b..8b1e7bb 100644 --- a/docker/Dockerfile-backend +++ b/docker/Dockerfile-backend @@ -20,11 +20,6 @@ RUN cd backend \ # download all nltk data that graphrag requires RUN python -m nltk.downloader punkt averaged_perceptron_tagger maxent_ne_chunker words wordnet -# Note: we temporarily patch the adlfs library to enable use of managed identity. A PR has been submitted to the adlfs library. -# See https://github.com/fsspec/adlfs/pull/480 -# TODO: remove this once PR has been merged and a new version released -RUN sed -i '/self.credential = credential/a\ \ \ \ \ \ \ \ if kwargs.get("account_host"): self.account_host = kwargs.get("account_host")' /usr/local/lib/python3.10/site-packages/adlfs/spec.py - WORKDIR /backend EXPOSE 80 CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "80"] diff --git a/docs/DEPLOYMENT-GUIDE.md b/docs/DEPLOYMENT-GUIDE.md index bedbd01..aa263c5 100644 --- a/docs/DEPLOYMENT-GUIDE.md +++ b/docs/DEPLOYMENT-GUIDE.md @@ -78,16 +78,16 @@ In the `deploy.parameters.json` file, provide values for the following required | Variable | Expected Value | Required | Description | :--- | :--- | --- | ---: | -`RESOURCE_GROUP` | | Yes | The resource group that GraphRAG will be deployed in. Will get created automatically if the resource group does not exist. -`LOCATION` | | Yes | The azure cloud region to deploy GraphRAG resources in. -`CONTAINER_REGISTRY_NAME` | | No | Name of an Azure Container Registry where the `graphrag` backend docker image will be hosted. Leave off `.azurecr.io` from the name. If not provided, a unique name will be generated (recommended). -`GRAPHRAG_IMAGE` | graphrag:backend | No | The name and tag of the graphrag docker image in the container registry. Will default to `graphrag:backend` and be hosted at `my_container_registry_name>.azurecr.io/graphrag:backend`. `GRAPHRAG_API_BASE` | https://.openai.azure.com | Yes | Azure OpenAI service endpoint. `GRAPHRAG_API_VERSION` | 2023-03-15-preview | Yes | Azure OpenAI API version. -`GRAPHRAG_LLM_MODEL` | gpt-4 | Yes | Name of the gpt-4 turbo model. -`GRAPHRAG_LLM_DEPLOYMENT_NAME` | | Yes | Deployment name of the gpt-4 turbo model. -`GRAPHRAG_EMBEDDING_MODEL` | text-embedding-ada-002 | Yes | Name of the Azure OpenAI embedding model. `GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME` | | Yes | Deployment name of the Azure OpenAI embedding model. +`GRAPHRAG_EMBEDDING_MODEL` | text-embedding-ada-002 | Yes | Name of the Azure OpenAI embedding model. +`GRAPHRAG_LLM_DEPLOYMENT_NAME` | | Yes | Deployment name of the gpt-4 turbo model. +`GRAPHRAG_LLM_MODEL` | gpt-4 | Yes | Name of the gpt-4 turbo model. +`LOCATION` | | Yes | The azure cloud region to deploy GraphRAG resources to (can be different than the location of your AOAI instance). Please use the [compressed form](https://azuretracks.com/2021/04/current-azure-region-names-reference) of a cloud region name (i.e. `eastus2`). +`RESOURCE_GROUP` | | Yes | The resource group that GraphRAG will be deployed in. Will get created automatically if the resource group does not exist. +`GRAPHRAG_IMAGE` | graphrag:backend | No | The name and tag of the graphrag docker image in the container registry. Will default to `graphrag:backend` and be hosted at `my_container_registry_name>.azurecr.io/graphrag:backend`. +`CONTAINER_REGISTRY_NAME` | | No | Name of an Azure Container Registry where the `graphrag` backend docker image will be hosted. Leave off `.azurecr.io` from the name. If not provided, a unique name will be generated (recommended). `GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT` | | No | Endpoint for cognitive services identity authorization. Will default to `https://cognitiveservices.azure.com/.default` for Azure Commercial cloud but should be defined for deployments in other Azure clouds. `APIM_NAME` | | No | Hostname of the API. Must be a globally unique name. The API will be accessible at `https://.azure-api.net`. If not provided a unique name will be generated. `APIM_TIER` | | No | The [APIM tier](https://azure.microsoft.com/en-us/pricing/details/api-management) to use. Must be either `Developer` or `StandardV2`. Will default to `Developer` for cost savings. diff --git a/infra/core/aks/aks.bicep b/infra/core/aks/aks.bicep index 76cfc25..b47fec5 100644 --- a/infra/core/aks/aks.bicep +++ b/infra/core/aks/aks.bicep @@ -30,7 +30,7 @@ param systemOsDiskSizeGB int = 128 param systemNodeCount int = 1 @description('The size of the system Virtual Machine.') -param systemVMSize string = 'standard_d4s_v5' +param systemVMSize string = 'standard_d4s_v5' // 4 vcpu, 16 GB memory @description('The number of nodes for the graphrag node pool.') @minValue(1) @@ -62,6 +62,8 @@ param ingressRoleAssignments array = [] @description('Array of objects with fields principalType, roleDefinitionId') param systemRoleAssignments array = [] +@description('Array of object ids that will have admin role of the cluster') +param clusterAdmins array = [] resource privateDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' existing = { name: privateDnsZoneName @@ -76,6 +78,11 @@ resource aks 'Microsoft.ContainerService/managedClusters@2024-02-01' = { properties: { enableRBAC: true dnsPrefix: !empty(dnsPrefix) ? dnsPrefix : toLower(clusterName) + aadProfile: { + managed: true + enableAzureRBAC: true + adminGroupObjectIDs: clusterAdmins + } addonProfiles: { omsagent: { enabled: true diff --git a/infra/deploy.sh b/infra/deploy.sh index 408dce8..888c758 100755 --- a/infra/deploy.sh +++ b/infra/deploy.sh @@ -297,6 +297,16 @@ getAksCredentials () { printf "Getting AKS credentials... " az aks get-credentials -g $rg -n $aks --overwrite-existing 2>&1 exitIfCommandFailed $? "Error getting AKS credentials, exiting..." + kubelogin convert-kubeconfig -l azurecli + exitIfCommandFailed $? "Error logging into AKS, exiting..." + # get principal/object id of the signed in user + local principalId=$(az ad signed-in-user show --output json | jq -r .id) + exitIfValueEmpty $principalId "Principal ID of deployer not found" + # assign "Azure Kubernetes Service RBAC Admin" role to deployer + local scope=$(az aks show --resource-group $rg --name $aks --query "id" -o tsv) + exitIfValueEmpty "$scope" "Unable to get AKS scope, exiting..." + az role assignment create --role "Azure Kubernetes Service RBAC Cluster Admin" --assignee-object-id $principalId --scope $scope + exitIfCommandFailed $? "Error assigning 'Azure Kubernetes Service RBAC Cluster Admin' role to deployer, exiting..." kubectl config set-context $aks --namespace=$aksNamespace printf "Done\n" } @@ -326,6 +336,9 @@ deployAzureResources () { echo "Deploying Azure resources..." local SSH_PUBLICKEY=$(jq -r .publicKey <<< $SSHKEY_DETAILS) exitIfValueEmpty "$SSH_PUBLICKEY" "Unable to read ssh publickey, exiting..." + # get principal/object id of the signed in user + local deployerPrincipalId=$(az ad signed-in-user show --output json | jq -r .id) + exitIfValueEmpty $deployerPrincipalId "Principal ID of deployer not found" local datetime="`date +%Y-%m-%d_%H-%M-%S`" local deployName="graphrag-deploy-$datetime" echo "Deployment name: $deployName" @@ -342,6 +355,7 @@ deployAzureResources () { --parameters "publisherEmail=$PUBLISHER_EMAIL" \ --parameters "enablePrivateEndpoints=$ENABLE_PRIVATE_ENDPOINTS" \ --parameters "acrName=$CONTAINER_REGISTRY_NAME" \ + --parameters "deployerPrincipalId=$deployerPrincipalId" \ --output json) # errors in deployment may not be caught by exitIfCommandFailed function so we also check the output for errors exitIfCommandFailed $? "Error deploying Azure resources..." @@ -390,14 +404,14 @@ checkSKUQuotas() { local dsv5_limit=$(jq -r .limit <<< $dsv5_usage_report) local dsv5_currVal=$(jq -r .currentValue <<< $dsv5_usage_report) local dsv5_reqVal=$(expr $dsv5_currVal + 12) - exitIfThresholdExceeded $dsv5_reqVal $dsv5_limit "Not enough Standard DSv5 Family vCPU quota for deployment." + exitIfThresholdExceeded $dsv5_reqVal $dsv5_limit "Not enough Standard DSv5 Family vCPU quota for deployment. At least 12 vCPU is required." # Check quota for Standard ESv5 Family vCPUs local esv5_usage_report=$(jq -c '.[] | select(.localName | contains("Standard ESv5 Family vCPUs"))' <<< $vm_usage_report) local esv5_limit=$(jq -r .limit <<< $esv5_usage_report) local esv5_currVal=$(jq -r .currentValue <<< $esv5_usage_report) local esv5_reqVal=$(expr $esv5_currVal + 8) - exitIfThresholdExceeded $esv5_reqVal $esv5_limit "Not enough Standard ESv5 Family vCPU quota for deployment." + exitIfThresholdExceeded $esv5_reqVal $esv5_limit "Not enough Standard ESv5 Family vCPU quota for deployment. At least 8 vCPU is required." printf "Done.\n" } @@ -579,7 +593,7 @@ grantDevAccessToAzureResources() { # get principal/object id of the signed in user local principalId=$(az ad signed-in-user show --output json | jq -r .id) - exitIfValueEmpty $principalId "Principal ID not found" + exitIfValueEmpty $principalId "Principal ID of deployer not found" # assign storage account roles local storageAccountName=$(az storage account list --resource-group $RESOURCE_GROUP --output json | jq -r .[0].name) diff --git a/infra/main.bicep b/infra/main.bicep index 9f192f3..5f0c019 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -29,6 +29,9 @@ param graphRagName string @description('Cloud region for all resources') param location string = resourceGroup().location +@description('Principal/Object ID of the deployer. Will be used to assign admin roles to the AKS cluster.') +param deployerPrincipalId string + @minLength(1) @description('Name of the publisher of the API Management instance.') param publisherName string @@ -188,6 +191,7 @@ module aks 'core/aks/aks.bicep' = { location: location graphragVMSize: 'standard_d8s_v5' // 8 vcpu, 32 GB memory graphragIndexingVMSize: 'standard_e8s_v5' // 8 vcpus, 64 GB memory + clusterAdmins: ['${deployerPrincipalId}'] sshRSAPublicKey: aksSshRsaPublicKey logAnalyticsWorkspaceId: log.outputs.id subnetId: vnet.properties.subnets[1].id // aks subnet