mirror of
https://github.com/Azure-Samples/graphrag-accelerator.git
synced 2025-06-27 04:39:57 +00:00
Update security compliance (#194)
This commit is contained in:
parent
ba3be1b312
commit
825750bea3
@ -28,10 +28,8 @@ RUN apt-get update && apt-get install -y \
|
||||
software-properties-common
|
||||
# install Azure CLI
|
||||
RUN curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
|
||||
RUN az bicep install
|
||||
# install kubectl
|
||||
RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \
|
||||
&& install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
|
||||
# install bicep and kubectl
|
||||
RUN az bicep install && az aks install-cli
|
||||
# install helm
|
||||
RUN curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 \
|
||||
&& chmod 700 get_helm.sh \
|
||||
|
@ -20,11 +20,6 @@ RUN cd backend \
|
||||
# download all nltk data that graphrag requires
|
||||
RUN python -m nltk.downloader punkt averaged_perceptron_tagger maxent_ne_chunker words wordnet
|
||||
|
||||
# Note: we temporarily patch the adlfs library to enable use of managed identity. A PR has been submitted to the adlfs library.
|
||||
# See https://github.com/fsspec/adlfs/pull/480
|
||||
# TODO: remove this once PR has been merged and a new version released
|
||||
RUN sed -i '/self.credential = credential/a\ \ \ \ \ \ \ \ if kwargs.get("account_host"): self.account_host = kwargs.get("account_host")' /usr/local/lib/python3.10/site-packages/adlfs/spec.py
|
||||
|
||||
WORKDIR /backend
|
||||
EXPOSE 80
|
||||
CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "80"]
|
||||
|
@ -78,16 +78,16 @@ In the `deploy.parameters.json` file, provide values for the following required
|
||||
|
||||
| Variable | Expected Value | Required | Description
|
||||
| :--- | :--- | --- | ---: |
|
||||
`RESOURCE_GROUP` | <my_resource_group> | Yes | The resource group that GraphRAG will be deployed in. Will get created automatically if the resource group does not exist.
|
||||
`LOCATION` | <my_location> | Yes | The azure cloud region to deploy GraphRAG resources in.
|
||||
`CONTAINER_REGISTRY_NAME` | <my_container_registry_name> | No | Name of an Azure Container Registry where the `graphrag` backend docker image will be hosted. Leave off `.azurecr.io` from the name. If not provided, a unique name will be generated (recommended).
|
||||
`GRAPHRAG_IMAGE` | graphrag:backend | No | The name and tag of the graphrag docker image in the container registry. Will default to `graphrag:backend` and be hosted at `my_container_registry_name>.azurecr.io/graphrag:backend`.
|
||||
`GRAPHRAG_API_BASE` | https://<my_openai_name>.openai.azure.com | Yes | Azure OpenAI service endpoint.
|
||||
`GRAPHRAG_API_VERSION` | 2023-03-15-preview | Yes | Azure OpenAI API version.
|
||||
`GRAPHRAG_LLM_MODEL` | gpt-4 | Yes | Name of the gpt-4 turbo model.
|
||||
`GRAPHRAG_LLM_DEPLOYMENT_NAME` | | Yes | Deployment name of the gpt-4 turbo model.
|
||||
`GRAPHRAG_EMBEDDING_MODEL` | text-embedding-ada-002 | Yes | Name of the Azure OpenAI embedding model.
|
||||
`GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME` | | Yes | Deployment name of the Azure OpenAI embedding model.
|
||||
`GRAPHRAG_EMBEDDING_MODEL` | text-embedding-ada-002 | Yes | Name of the Azure OpenAI embedding model.
|
||||
`GRAPHRAG_LLM_DEPLOYMENT_NAME` | | Yes | Deployment name of the gpt-4 turbo model.
|
||||
`GRAPHRAG_LLM_MODEL` | gpt-4 | Yes | Name of the gpt-4 turbo model.
|
||||
`LOCATION` | <my_location> | Yes | The azure cloud region to deploy GraphRAG resources to (can be different than the location of your AOAI instance). Please use the [compressed form](https://azuretracks.com/2021/04/current-azure-region-names-reference) of a cloud region name (i.e. `eastus2`).
|
||||
`RESOURCE_GROUP` | <my_resource_group> | Yes | The resource group that GraphRAG will be deployed in. Will get created automatically if the resource group does not exist.
|
||||
`GRAPHRAG_IMAGE` | graphrag:backend | No | The name and tag of the graphrag docker image in the container registry. Will default to `graphrag:backend` and be hosted at `my_container_registry_name>.azurecr.io/graphrag:backend`.
|
||||
`CONTAINER_REGISTRY_NAME` | <my_container_registry_name> | No | Name of an Azure Container Registry where the `graphrag` backend docker image will be hosted. Leave off `.azurecr.io` from the name. If not provided, a unique name will be generated (recommended).
|
||||
`GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT` | | No | Endpoint for cognitive services identity authorization. Will default to `https://cognitiveservices.azure.com/.default` for Azure Commercial cloud but should be defined for deployments in other Azure clouds.
|
||||
`APIM_NAME` | | No | Hostname of the API. Must be a globally unique name. The API will be accessible at `https://<APIM_NAME>.azure-api.net`. If not provided a unique name will be generated.
|
||||
`APIM_TIER` | | No | The [APIM tier](https://azure.microsoft.com/en-us/pricing/details/api-management) to use. Must be either `Developer` or `StandardV2`. Will default to `Developer` for cost savings.
|
||||
|
@ -30,7 +30,7 @@ param systemOsDiskSizeGB int = 128
|
||||
param systemNodeCount int = 1
|
||||
|
||||
@description('The size of the system Virtual Machine.')
|
||||
param systemVMSize string = 'standard_d4s_v5'
|
||||
param systemVMSize string = 'standard_d4s_v5' // 4 vcpu, 16 GB memory
|
||||
|
||||
@description('The number of nodes for the graphrag node pool.')
|
||||
@minValue(1)
|
||||
@ -62,6 +62,8 @@ param ingressRoleAssignments array = []
|
||||
@description('Array of objects with fields principalType, roleDefinitionId')
|
||||
param systemRoleAssignments array = []
|
||||
|
||||
@description('Array of object ids that will have admin role of the cluster')
|
||||
param clusterAdmins array = []
|
||||
|
||||
resource privateDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' existing = {
|
||||
name: privateDnsZoneName
|
||||
@ -76,6 +78,11 @@ resource aks 'Microsoft.ContainerService/managedClusters@2024-02-01' = {
|
||||
properties: {
|
||||
enableRBAC: true
|
||||
dnsPrefix: !empty(dnsPrefix) ? dnsPrefix : toLower(clusterName)
|
||||
aadProfile: {
|
||||
managed: true
|
||||
enableAzureRBAC: true
|
||||
adminGroupObjectIDs: clusterAdmins
|
||||
}
|
||||
addonProfiles: {
|
||||
omsagent: {
|
||||
enabled: true
|
||||
|
@ -297,6 +297,16 @@ getAksCredentials () {
|
||||
printf "Getting AKS credentials... "
|
||||
az aks get-credentials -g $rg -n $aks --overwrite-existing 2>&1
|
||||
exitIfCommandFailed $? "Error getting AKS credentials, exiting..."
|
||||
kubelogin convert-kubeconfig -l azurecli
|
||||
exitIfCommandFailed $? "Error logging into AKS, exiting..."
|
||||
# get principal/object id of the signed in user
|
||||
local principalId=$(az ad signed-in-user show --output json | jq -r .id)
|
||||
exitIfValueEmpty $principalId "Principal ID of deployer not found"
|
||||
# assign "Azure Kubernetes Service RBAC Admin" role to deployer
|
||||
local scope=$(az aks show --resource-group $rg --name $aks --query "id" -o tsv)
|
||||
exitIfValueEmpty "$scope" "Unable to get AKS scope, exiting..."
|
||||
az role assignment create --role "Azure Kubernetes Service RBAC Cluster Admin" --assignee-object-id $principalId --scope $scope
|
||||
exitIfCommandFailed $? "Error assigning 'Azure Kubernetes Service RBAC Cluster Admin' role to deployer, exiting..."
|
||||
kubectl config set-context $aks --namespace=$aksNamespace
|
||||
printf "Done\n"
|
||||
}
|
||||
@ -326,6 +336,9 @@ deployAzureResources () {
|
||||
echo "Deploying Azure resources..."
|
||||
local SSH_PUBLICKEY=$(jq -r .publicKey <<< $SSHKEY_DETAILS)
|
||||
exitIfValueEmpty "$SSH_PUBLICKEY" "Unable to read ssh publickey, exiting..."
|
||||
# get principal/object id of the signed in user
|
||||
local deployerPrincipalId=$(az ad signed-in-user show --output json | jq -r .id)
|
||||
exitIfValueEmpty $deployerPrincipalId "Principal ID of deployer not found"
|
||||
local datetime="`date +%Y-%m-%d_%H-%M-%S`"
|
||||
local deployName="graphrag-deploy-$datetime"
|
||||
echo "Deployment name: $deployName"
|
||||
@ -342,6 +355,7 @@ deployAzureResources () {
|
||||
--parameters "publisherEmail=$PUBLISHER_EMAIL" \
|
||||
--parameters "enablePrivateEndpoints=$ENABLE_PRIVATE_ENDPOINTS" \
|
||||
--parameters "acrName=$CONTAINER_REGISTRY_NAME" \
|
||||
--parameters "deployerPrincipalId=$deployerPrincipalId" \
|
||||
--output json)
|
||||
# errors in deployment may not be caught by exitIfCommandFailed function so we also check the output for errors
|
||||
exitIfCommandFailed $? "Error deploying Azure resources..."
|
||||
@ -390,14 +404,14 @@ checkSKUQuotas() {
|
||||
local dsv5_limit=$(jq -r .limit <<< $dsv5_usage_report)
|
||||
local dsv5_currVal=$(jq -r .currentValue <<< $dsv5_usage_report)
|
||||
local dsv5_reqVal=$(expr $dsv5_currVal + 12)
|
||||
exitIfThresholdExceeded $dsv5_reqVal $dsv5_limit "Not enough Standard DSv5 Family vCPU quota for deployment."
|
||||
exitIfThresholdExceeded $dsv5_reqVal $dsv5_limit "Not enough Standard DSv5 Family vCPU quota for deployment. At least 12 vCPU is required."
|
||||
|
||||
# Check quota for Standard ESv5 Family vCPUs
|
||||
local esv5_usage_report=$(jq -c '.[] | select(.localName | contains("Standard ESv5 Family vCPUs"))' <<< $vm_usage_report)
|
||||
local esv5_limit=$(jq -r .limit <<< $esv5_usage_report)
|
||||
local esv5_currVal=$(jq -r .currentValue <<< $esv5_usage_report)
|
||||
local esv5_reqVal=$(expr $esv5_currVal + 8)
|
||||
exitIfThresholdExceeded $esv5_reqVal $esv5_limit "Not enough Standard ESv5 Family vCPU quota for deployment."
|
||||
exitIfThresholdExceeded $esv5_reqVal $esv5_limit "Not enough Standard ESv5 Family vCPU quota for deployment. At least 8 vCPU is required."
|
||||
printf "Done.\n"
|
||||
}
|
||||
|
||||
@ -579,7 +593,7 @@ grantDevAccessToAzureResources() {
|
||||
|
||||
# get principal/object id of the signed in user
|
||||
local principalId=$(az ad signed-in-user show --output json | jq -r .id)
|
||||
exitIfValueEmpty $principalId "Principal ID not found"
|
||||
exitIfValueEmpty $principalId "Principal ID of deployer not found"
|
||||
|
||||
# assign storage account roles
|
||||
local storageAccountName=$(az storage account list --resource-group $RESOURCE_GROUP --output json | jq -r .[0].name)
|
||||
|
@ -29,6 +29,9 @@ param graphRagName string
|
||||
@description('Cloud region for all resources')
|
||||
param location string = resourceGroup().location
|
||||
|
||||
@description('Principal/Object ID of the deployer. Will be used to assign admin roles to the AKS cluster.')
|
||||
param deployerPrincipalId string
|
||||
|
||||
@minLength(1)
|
||||
@description('Name of the publisher of the API Management instance.')
|
||||
param publisherName string
|
||||
@ -188,6 +191,7 @@ module aks 'core/aks/aks.bicep' = {
|
||||
location: location
|
||||
graphragVMSize: 'standard_d8s_v5' // 8 vcpu, 32 GB memory
|
||||
graphragIndexingVMSize: 'standard_e8s_v5' // 8 vcpus, 64 GB memory
|
||||
clusterAdmins: ['${deployerPrincipalId}']
|
||||
sshRSAPublicKey: aksSshRsaPublicKey
|
||||
logAnalyticsWorkspaceId: log.outputs.id
|
||||
subnetId: vnet.properties.subnets[1].id // aks subnet
|
||||
|
Loading…
x
Reference in New Issue
Block a user