Update security compliance (#194)

This commit is contained in:
Josh Bradley 2024-10-30 15:51:02 -04:00 committed by GitHub
parent ba3be1b312
commit 825750bea3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 38 additions and 20 deletions

View File

@ -28,10 +28,8 @@ RUN apt-get update && apt-get install -y \
software-properties-common
# install Azure CLI
RUN curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
RUN az bicep install
# install kubectl
RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \
&& install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
# install bicep and kubectl
RUN az bicep install && az aks install-cli
# install helm
RUN curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 \
&& chmod 700 get_helm.sh \

View File

@ -20,11 +20,6 @@ RUN cd backend \
# download all nltk data that graphrag requires
RUN python -m nltk.downloader punkt averaged_perceptron_tagger maxent_ne_chunker words wordnet
# Note: we temporarily patch the adlfs library to enable use of managed identity. A PR has been submitted to the adlfs library.
# See https://github.com/fsspec/adlfs/pull/480
# TODO: remove this once PR has been merged and a new version released
RUN sed -i '/self.credential = credential/a\ \ \ \ \ \ \ \ if kwargs.get("account_host"): self.account_host = kwargs.get("account_host")' /usr/local/lib/python3.10/site-packages/adlfs/spec.py
WORKDIR /backend
EXPOSE 80
CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "80"]

View File

@ -78,16 +78,16 @@ In the `deploy.parameters.json` file, provide values for the following required
| Variable | Expected Value | Required | Description
| :--- | :--- | --- | ---: |
`RESOURCE_GROUP` | <my_resource_group> | Yes | The resource group that GraphRAG will be deployed in. Will get created automatically if the resource group does not exist.
`LOCATION` | <my_location> | Yes | The azure cloud region to deploy GraphRAG resources in.
`CONTAINER_REGISTRY_NAME` | <my_container_registry_name> | No | Name of an Azure Container Registry where the `graphrag` backend docker image will be hosted. Leave off `.azurecr.io` from the name. If not provided, a unique name will be generated (recommended).
`GRAPHRAG_IMAGE` | graphrag:backend | No | The name and tag of the graphrag docker image in the container registry. Will default to `graphrag:backend` and be hosted at `my_container_registry_name>.azurecr.io/graphrag:backend`.
`GRAPHRAG_API_BASE` | https://<my_openai_name>.openai.azure.com | Yes | Azure OpenAI service endpoint.
`GRAPHRAG_API_VERSION` | 2023-03-15-preview | Yes | Azure OpenAI API version.
`GRAPHRAG_LLM_MODEL` | gpt-4 | Yes | Name of the gpt-4 turbo model.
`GRAPHRAG_LLM_DEPLOYMENT_NAME` | | Yes | Deployment name of the gpt-4 turbo model.
`GRAPHRAG_EMBEDDING_MODEL` | text-embedding-ada-002 | Yes | Name of the Azure OpenAI embedding model.
`GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME` | | Yes | Deployment name of the Azure OpenAI embedding model.
`GRAPHRAG_EMBEDDING_MODEL` | text-embedding-ada-002 | Yes | Name of the Azure OpenAI embedding model.
`GRAPHRAG_LLM_DEPLOYMENT_NAME` | | Yes | Deployment name of the gpt-4 turbo model.
`GRAPHRAG_LLM_MODEL` | gpt-4 | Yes | Name of the gpt-4 turbo model.
`LOCATION` | <my_location> | Yes | The azure cloud region to deploy GraphRAG resources to (can be different than the location of your AOAI instance). Please use the [compressed form](https://azuretracks.com/2021/04/current-azure-region-names-reference) of a cloud region name (i.e. `eastus2`).
`RESOURCE_GROUP` | <my_resource_group> | Yes | The resource group that GraphRAG will be deployed in. Will get created automatically if the resource group does not exist.
`GRAPHRAG_IMAGE` | graphrag:backend | No | The name and tag of the graphrag docker image in the container registry. Will default to `graphrag:backend` and be hosted at `my_container_registry_name>.azurecr.io/graphrag:backend`.
`CONTAINER_REGISTRY_NAME` | <my_container_registry_name> | No | Name of an Azure Container Registry where the `graphrag` backend docker image will be hosted. Leave off `.azurecr.io` from the name. If not provided, a unique name will be generated (recommended).
`GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT` | | No | Endpoint for cognitive services identity authorization. Will default to `https://cognitiveservices.azure.com/.default` for Azure Commercial cloud but should be defined for deployments in other Azure clouds.
`APIM_NAME` | | No | Hostname of the API. Must be a globally unique name. The API will be accessible at `https://<APIM_NAME>.azure-api.net`. If not provided a unique name will be generated.
`APIM_TIER` | | No | The [APIM tier](https://azure.microsoft.com/en-us/pricing/details/api-management) to use. Must be either `Developer` or `StandardV2`. Will default to `Developer` for cost savings.

View File

@ -30,7 +30,7 @@ param systemOsDiskSizeGB int = 128
param systemNodeCount int = 1
@description('The size of the system Virtual Machine.')
param systemVMSize string = 'standard_d4s_v5'
param systemVMSize string = 'standard_d4s_v5' // 4 vcpu, 16 GB memory
@description('The number of nodes for the graphrag node pool.')
@minValue(1)
@ -62,6 +62,8 @@ param ingressRoleAssignments array = []
@description('Array of objects with fields principalType, roleDefinitionId')
param systemRoleAssignments array = []
@description('Array of object ids that will have admin role of the cluster')
param clusterAdmins array = []
resource privateDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' existing = {
name: privateDnsZoneName
@ -76,6 +78,11 @@ resource aks 'Microsoft.ContainerService/managedClusters@2024-02-01' = {
properties: {
enableRBAC: true
dnsPrefix: !empty(dnsPrefix) ? dnsPrefix : toLower(clusterName)
aadProfile: {
managed: true
enableAzureRBAC: true
adminGroupObjectIDs: clusterAdmins
}
addonProfiles: {
omsagent: {
enabled: true

View File

@ -297,6 +297,16 @@ getAksCredentials () {
printf "Getting AKS credentials... "
az aks get-credentials -g $rg -n $aks --overwrite-existing 2>&1
exitIfCommandFailed $? "Error getting AKS credentials, exiting..."
kubelogin convert-kubeconfig -l azurecli
exitIfCommandFailed $? "Error logging into AKS, exiting..."
# get principal/object id of the signed in user
local principalId=$(az ad signed-in-user show --output json | jq -r .id)
exitIfValueEmpty $principalId "Principal ID of deployer not found"
# assign "Azure Kubernetes Service RBAC Admin" role to deployer
local scope=$(az aks show --resource-group $rg --name $aks --query "id" -o tsv)
exitIfValueEmpty "$scope" "Unable to get AKS scope, exiting..."
az role assignment create --role "Azure Kubernetes Service RBAC Cluster Admin" --assignee-object-id $principalId --scope $scope
exitIfCommandFailed $? "Error assigning 'Azure Kubernetes Service RBAC Cluster Admin' role to deployer, exiting..."
kubectl config set-context $aks --namespace=$aksNamespace
printf "Done\n"
}
@ -326,6 +336,9 @@ deployAzureResources () {
echo "Deploying Azure resources..."
local SSH_PUBLICKEY=$(jq -r .publicKey <<< $SSHKEY_DETAILS)
exitIfValueEmpty "$SSH_PUBLICKEY" "Unable to read ssh publickey, exiting..."
# get principal/object id of the signed in user
local deployerPrincipalId=$(az ad signed-in-user show --output json | jq -r .id)
exitIfValueEmpty $deployerPrincipalId "Principal ID of deployer not found"
local datetime="`date +%Y-%m-%d_%H-%M-%S`"
local deployName="graphrag-deploy-$datetime"
echo "Deployment name: $deployName"
@ -342,6 +355,7 @@ deployAzureResources () {
--parameters "publisherEmail=$PUBLISHER_EMAIL" \
--parameters "enablePrivateEndpoints=$ENABLE_PRIVATE_ENDPOINTS" \
--parameters "acrName=$CONTAINER_REGISTRY_NAME" \
--parameters "deployerPrincipalId=$deployerPrincipalId" \
--output json)
# errors in deployment may not be caught by exitIfCommandFailed function so we also check the output for errors
exitIfCommandFailed $? "Error deploying Azure resources..."
@ -390,14 +404,14 @@ checkSKUQuotas() {
local dsv5_limit=$(jq -r .limit <<< $dsv5_usage_report)
local dsv5_currVal=$(jq -r .currentValue <<< $dsv5_usage_report)
local dsv5_reqVal=$(expr $dsv5_currVal + 12)
exitIfThresholdExceeded $dsv5_reqVal $dsv5_limit "Not enough Standard DSv5 Family vCPU quota for deployment."
exitIfThresholdExceeded $dsv5_reqVal $dsv5_limit "Not enough Standard DSv5 Family vCPU quota for deployment. At least 12 vCPU is required."
# Check quota for Standard ESv5 Family vCPUs
local esv5_usage_report=$(jq -c '.[] | select(.localName | contains("Standard ESv5 Family vCPUs"))' <<< $vm_usage_report)
local esv5_limit=$(jq -r .limit <<< $esv5_usage_report)
local esv5_currVal=$(jq -r .currentValue <<< $esv5_usage_report)
local esv5_reqVal=$(expr $esv5_currVal + 8)
exitIfThresholdExceeded $esv5_reqVal $esv5_limit "Not enough Standard ESv5 Family vCPU quota for deployment."
exitIfThresholdExceeded $esv5_reqVal $esv5_limit "Not enough Standard ESv5 Family vCPU quota for deployment. At least 8 vCPU is required."
printf "Done.\n"
}
@ -579,7 +593,7 @@ grantDevAccessToAzureResources() {
# get principal/object id of the signed in user
local principalId=$(az ad signed-in-user show --output json | jq -r .id)
exitIfValueEmpty $principalId "Principal ID not found"
exitIfValueEmpty $principalId "Principal ID of deployer not found"
# assign storage account roles
local storageAccountName=$(az storage account list --resource-group $RESOURCE_GROUP --output json | jq -r .[0].name)

View File

@ -29,6 +29,9 @@ param graphRagName string
@description('Cloud region for all resources')
param location string = resourceGroup().location
@description('Principal/Object ID of the deployer. Will be used to assign admin roles to the AKS cluster.')
param deployerPrincipalId string
@minLength(1)
@description('Name of the publisher of the API Management instance.')
param publisherName string
@ -188,6 +191,7 @@ module aks 'core/aks/aks.bicep' = {
location: location
graphragVMSize: 'standard_d8s_v5' // 8 vcpu, 32 GB memory
graphragIndexingVMSize: 'standard_e8s_v5' // 8 vcpus, 64 GB memory
clusterAdmins: ['${deployerPrincipalId}']
sshRSAPublicKey: aksSshRsaPublicKey
logAnalyticsWorkspaceId: log.outputs.id
subnetId: vnet.properties.subnets[1].id // aks subnet