diff --git a/backend/src/api/prompt_tuning.py b/backend/src/api/prompt_tuning.py index 6c8c90c..8c784f6 100644 --- a/backend/src/api/prompt_tuning.py +++ b/backend/src/api/prompt_tuning.py @@ -44,7 +44,7 @@ async def generate_prompts(storage_name: str, limit: int = 5): this_directory = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe())) ) - data = yaml.safe_load(open(f"{this_directory}/pipeline-settings.yaml")) + data = yaml.safe_load(open(f"{this_directory}/../indexer/settings.yaml")) data["input"]["container_name"] = sanitized_storage_name graphrag_config = create_graphrag_config(values=data, root_dir=".") diff --git a/backend/src/main.py b/backend/src/main.py index 6e215b9..a2c1faa 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -6,6 +6,7 @@ import traceback from contextlib import asynccontextmanager import yaml +from azure.cosmos import PartitionKey, ThroughputProperties from fastapi import ( FastAPI, Request, @@ -49,10 +50,18 @@ def intialize_cosmosdb_setup(): """Initialise CosmosDB (if necessary) by setting up a database and containers that are expected at startup time.""" azure_client_manager = AzureClientManager() client = azure_client_manager.get_cosmos_client() - client.create_database_if_not_exists("graphrag") - client.get_database_client("graphrag").create_container_if_not_exists("jobs", "/id") - client.get_database_client("graphrag").create_container_if_not_exists( - "container-store", "/id" + db_client = client.create_database_if_not_exists("graphrag") + # create containers with default settings + throughput = ThroughputProperties( + auto_scale_max_throughput=1000, auto_scale_increment_percent=1 + ) + db_client.create_container_if_not_exists( + id="jobs", partition_key=PartitionKey(path="/id"), offer_throughput=throughput + ) + db_client.create_container_if_not_exists( + id="container-store", + partition_key=PartitionKey(path="/id"), + offer_throughput=throughput, ) diff --git a/infra/core/acr/acr.bicep b/infra/core/acr/acr.bicep index c5d9a50..41ec2c6 100644 --- a/infra/core/acr/acr.bicep +++ b/infra/core/acr/acr.bicep @@ -1,12 +1,12 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + @description('The name of the Container Registry resource. Will be automatically generated if not provided.') param registryName string @description('The location of the Container Registry resource.') param location string = resourceGroup().location -@description('Array of objects with fields principalId, principalType, roleDefinitionId') -param roleAssignments array = [] - resource registry 'Microsoft.ContainerRegistry/registries@2023-11-01-preview' = { name: registryName location: location @@ -27,14 +27,6 @@ resource registry 'Microsoft.ContainerRegistry/registries@2023-11-01-preview' = } } -resource roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ - for role in roleAssignments: { - name: guid('${role.principalId}-${role.principalType}-${role.roleDefinitionId}') - scope: resourceGroup() - properties: role - } -] - output name string = registry.name output id string = registry.id output loginServer string = registry.properties.loginServer diff --git a/infra/core/aks/aks.bicep b/infra/core/aks/aks.bicep index 68903d4..8d349b7 100644 --- a/infra/core/aks/aks.bicep +++ b/infra/core/aks/aks.bicep @@ -50,12 +50,6 @@ param subnetId string param privateDnsZoneName string -@description('Array of objects with fields principalType, roleDefinitionId') -param ingressRoleAssignments array = [] - -@description('Array of objects with fields principalType, roleDefinitionId') -param systemRoleAssignments array = [] - @description('Array of object ids that will have admin role of the cluster') param clusterAdmins array = [] @@ -221,35 +215,11 @@ resource aksManagedNodeOSUpgradeSchedule 'Microsoft.ContainerService/managedClus } } -// role assignment to ingress identity -resource webAppRoutingPrivateDnsContributor 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ - for role in ingressRoleAssignments: { - name: guid(subscription().subscriptionId, resourceGroup().name, role.roleDefinitionId, privateDnsZone.id) - scope: resourceGroup() - properties: { - principalId: aks.properties.ingressProfile.webAppRouting.identity.objectId - principalType: role.principalType - roleDefinitionId: role.roleDefinitionId - } - } -] - -// role assignment to AKS system identity -resource systemRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ - for role in systemRoleAssignments: { - name: guid(subscription().subscriptionId, resourceGroup().name, role.roleDefinitionId, aks.id) - scope: resourceGroup() - properties: { - principalId: aks.identity.principalId - principalType: role.principalType - roleDefinitionId: role.roleDefinitionId - } - } -] - output name string = aks.name output id string = aks.id output managedResourceGroup string = aks.properties.nodeResourceGroup output controlPlaneFqdn string = aks.properties.fqdn output kubeletPrincipalId string = aks.properties.identityProfile.kubeletidentity.objectId +output ingressWebAppIdentity string = aks.properties.ingressProfile.webAppRouting.identity.objectId +output systemIdentity string = aks.identity.principalId output issuer string = aks.properties.oidcIssuerProfile.issuerURL diff --git a/infra/core/cosmosdb/cosmosdb.bicep b/infra/core/cosmosdb/cosmosdb.bicep index 1aecdb4..e6ebc21 100644 --- a/infra/core/cosmosdb/cosmosdb.bicep +++ b/infra/core/cosmosdb/cosmosdb.bicep @@ -7,22 +7,9 @@ param cosmosDbName string @description('The location of the CosmosDB resource.') param location string = resourceGroup().location -@allowed([ 'Enabled', 'Disabled' ]) +@allowed(['Enabled', 'Disabled']) param publicNetworkAccess string = 'Disabled' -@description('Role definition id to assign to the principal. Learn more: https://learn.microsoft.com/en-us/azure/cosmos-db/how-to-setup-rbac') -@allowed([ - '00000000-0000-0000-0000-000000000001' // 'Cosmos DB Built-in Data Reader' role - '00000000-0000-0000-0000-000000000002' // 'Cosmos DB Built-in Data Contributor' role -]) -param roleDefinitionId array = [ - '00000000-0000-0000-0000-000000000001' - '00000000-0000-0000-0000-000000000002' -] - -param principalId string - - resource cosmosDb 'Microsoft.DocumentDB/databaseAccounts@2024-11-15' = { name: cosmosDbName location: location @@ -82,104 +69,6 @@ resource cosmosDb 'Microsoft.DocumentDB/databaseAccounts@2024-11-15' = { } } -resource graphragDatabase 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases@2024-11-15' = { - parent: cosmosDb - name: 'graphrag' - properties: { - resource: { - id: 'graphrag' - } - } -} - -resource jobsContainer 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers@2024-11-15' = { - parent: graphragDatabase - name: 'jobs' - properties: { - resource: { - id: 'jobs' - indexingPolicy: { - indexingMode: 'consistent' - automatic: true - includedPaths: [ - { - path: '/*' - } - ] - excludedPaths: [ - { - path: '/"_etag"/?' - } - ] - } - partitionKey: { - paths: [ - '/id' - ] - kind: 'Hash' - version: 2 - } - uniqueKeyPolicy: { - uniqueKeys: [] - } - conflictResolutionPolicy: { - mode: 'LastWriterWins' - conflictResolutionPath: '/_ts' - } - } - } -} - -resource containerStoreContainer 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers@2024-11-15' = { - parent: graphragDatabase - name: 'container-store' - properties: { - resource: { - id: 'container-store' - indexingPolicy: { - indexingMode: 'consistent' - automatic: true - includedPaths: [ - { - path: '/*' - } - ] - excludedPaths: [ - { - path: '/"_etag"/?' - } - ] - } - partitionKey: { - paths: [ - '/id' - ] - kind: 'Hash' - version: 2 - } - uniqueKeyPolicy: { - uniqueKeys: [] - } - conflictResolutionPolicy: { - mode: 'LastWriterWins' - conflictResolutionPath: '/_ts' - } - } - } -} - -resource sqlRoleAssignment 'Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2024-11-15' = [ - for roleId in roleDefinitionId: { - name: guid('${roleId}-${principalId}-${cosmosDb.id}') - parent: cosmosDb - properties: { - roleDefinitionId: '${resourceGroup().id}/providers/Microsoft.DocumentDB/databaseAccounts/${cosmosDb.name}/sqlRoleDefinitions/${roleId}' - principalId: principalId - scope: cosmosDb.id - } - } -] - output name string = cosmosDb.name output id string = cosmosDb.id output endpoint string = cosmosDb.properties.documentEndpoint diff --git a/infra/core/rbac/aks-rbac.bicep b/infra/core/rbac/aks-rbac.bicep new file mode 100644 index 0000000..9124c4f --- /dev/null +++ b/infra/core/rbac/aks-rbac.bicep @@ -0,0 +1,20 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +@description('Array of objects with fields principalId, principalType, roleDefinitionId') +param roleAssignments array = [] + +resource roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ + for role in roleAssignments: { + // note: the guid must be globally unique and deterministic (reproducible) across Azure + name: guid( + subscription().subscriptionId, + resourceGroup().name, + role.principalId, + role.principalType, + role.roleDefinitionId + ) + scope: resourceGroup() + properties: role + } +] diff --git a/infra/core/rbac/workload-identity-rbac.bicep b/infra/core/rbac/workload-identity-rbac.bicep new file mode 100644 index 0000000..92feaef --- /dev/null +++ b/infra/core/rbac/workload-identity-rbac.bicep @@ -0,0 +1,92 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +@description('ID of the service principal to assign the RBAC roles to.') +param principalId string + +@description('Type of principal to assign the RBAC roles to.') +@allowed(['ServicePrincipal', 'User', 'Group', 'Device', 'ForeignGroup']) +param principalType string + +@description('Name of an existing CosmosDB resource.') +param cosmosDbName string + +@description('Role definitions for various roles that will be assigned at deployment time. Learn more: https://learn.microsoft.com/en-us/azure/role-based-access-control/built-in-roles') +var roleDefinitions = [ + { + id: 'ba92f5b4-2d11-453d-a403-e96b0029c9fe' // Storage Blob Data Contributor Role + } + { + id: 'b24988ac-6180-42a0-ab88-20f7382dd24c' // AI Search Contributor Role + } + { + id: '8ebe5a00-799e-43f5-93ac-243d3dce84a7' // AI Search Index Data Contributor Role + } + { + id: '1407120a-92aa-4202-b7e9-c0e197c71c8f' // AI Search Index Data Reader Role + } + { + id: 'a001fd3d-188f-4b5d-821b-7da978bf7442' // Cognitive Services OpenAI Contributor + } + { + id: '3913510d-42f4-4e42-8a64-420c390055eb' // Monitoring Metrics Publisher Role + } +] + +resource roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ + for roleDef in roleDefinitions: { + // note: the guid must be globally unique and deterministic (reproducible) across Azure + name: guid(subscription().subscriptionId, resourceGroup().name, principalId, principalType, roleDef.id) + scope: resourceGroup() + properties: { + principalId: principalId + principalType: principalType + roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', roleDef.id) + } + } +] + +resource cosmosDb 'Microsoft.DocumentDB/databaseAccounts@2024-12-01-preview' existing = { + name: cosmosDbName +} + +var customRoleName = 'Custom cosmosDB role for graphrag - adds read/write permissions at the database and container level' +resource customCosmosRoleDefinition 'Microsoft.DocumentDB/databaseAccounts/sqlRoleDefinitions@2024-12-01-preview' = { + // note: the guid must be globally unique and deterministic (reproducible) across Azure + name: guid(subscription().subscriptionId, resourceGroup().name, cosmosDb.id, customRoleName) // guid is used to ensure uniqueness + parent: cosmosDb + properties: { + roleName: customRoleName + type: 'CustomRole' + assignableScopes: [ + cosmosDb.id + ] + permissions: [ + { + dataActions: [ + 'Microsoft.DocumentDB/databaseAccounts/readMetadata' + 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers/*' + 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers/items/*' + 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/write' + ] + } + ] + } +} + +resource assignment 'Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2024-12-01-preview' = { + // note: the guid must be globally unique and deterministic (reproducible) across Azure + name: guid( + subscription().subscriptionId, + resourceGroup().name, + cosmosDb.id, + customCosmosRoleDefinition.id, + principalId + ) + parent: cosmosDb + properties: { + principalId: principalId + roleDefinitionId: customCosmosRoleDefinition.id + scope: cosmosDb.id + } +} diff --git a/infra/core/workload-rbac.bicep b/infra/core/workload-rbac.bicep deleted file mode 100644 index 80e36b7..0000000 --- a/infra/core/workload-rbac.bicep +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -@description('ID of the service principal to assign the RBAC roles to.') -param principalId string - -@description('Type of principal to assign the RBAC roles to.') -@allowed(['ServicePrincipal', 'User', 'Group', 'Device', 'ForeignGroup']) -param principalType string - -@description('Role definitions for various roles that will be assigned at deployment time. Learn more: https://learn.microsoft.com/en-us/azure/role-based-access-control/built-in-roles') -var roleDefinitions = [ - { - id: 'ba92f5b4-2d11-453d-a403-e96b0029c9fe' // Storage Blob Data Contributor Role - } - { - id: 'b24988ac-6180-42a0-ab88-20f7382dd24c' // AI Search Contributor Role - } - { - id: '8ebe5a00-799e-43f5-93ac-243d3dce84a7' // AI Search Index Data Contributor Role - } - { - id: '1407120a-92aa-4202-b7e9-c0e197c71c8f' // AI Search Index Data Reader Role - } - { - id: 'a001fd3d-188f-4b5d-821b-7da978bf7442' // Cognitive Services OpenAI Contributor - } - { - id: '3913510d-42f4-4e42-8a64-420c390055eb' // Monitoring Metrics Publisher Role - } -] - -resource roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ - for roleDef in roleDefinitions: { - name: guid(subscription().subscriptionId, resourceGroup().name, principalId, principalType, roleDef.id) - scope: resourceGroup() - properties: { - principalId: principalId - principalType: principalType - roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', roleDef.id) - } - } -] diff --git a/infra/main.bicep b/infra/main.bicep index 67043dd..d0e9cf3 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -70,7 +70,7 @@ var dnsDomain = 'graphrag.io' var appHostname = 'graphrag.${dnsDomain}' var appUrl = 'http://${appHostname}' -@description('Role definitions for various roles that will be assigned at deployment time. Learn more: https://learn.microsoft.com/en-us/azure/role-based-access-control/built-in-roles') +@description('Role definitions for various RBAC roles that will be assigned at deployment time. Learn more: https://learn.microsoft.com/en-us/azure/role-based-access-control/built-in-roles') var roles = { privateDnsZoneContributor: resourceId( 'Microsoft.Authorization/roleDefinitions', @@ -84,17 +84,39 @@ var roles = { 'Microsoft.Authorization/roleDefinitions', '7f951dda-4ed3-4680-a7ca-43fe172d538d' // ACR Pull Role ) - monitoringMetricsPublisher: resourceId( - 'Microsoft.Authorization/roleDefinitions', - '3913510d-42f4-4e42-8a64-420c390055eb' // Monitoring Metrics Publisher Role - ) } -module workloadIdentityRBACAssignments 'core/workload-rbac.bicep' = { - name: 'workload-rbac-assignments' +// apply RBAC role assignments to the AKS workload identity +module aksWorkloadIdentityRBAC 'core/rbac/workload-identity-rbac.bicep' = { + name: 'aks-workload-identity-rbac-assignments' params: { principalId: workloadIdentity.outputs.principalId principalType: 'ServicePrincipal' + cosmosDbName: cosmosdb.outputs.name + } +} + +// apply necessary RBAC role assignments to the AKS service +module aksRBAC 'core/rbac/aks-rbac.bicep' = { + name: 'aks-rbac-assignments' + params: { + roleAssignments: [ + { + principalId: aks.outputs.kubeletPrincipalId + principalType: 'ServicePrincipal' + roleDefinitionId: roles.acrPull + } + { + principalId: aks.outputs.ingressWebAppIdentity + principalType: 'ServicePrincipal' + roleDefinitionId: roles.privateDnsZoneContributor + } + { + principalId: aks.outputs.systemIdentity + principalType: 'ServicePrincipal' + roleDefinitionId: roles.networkContributor + } + ] } } @@ -170,13 +192,6 @@ module acr 'core/acr/acr.bicep' = { params: { registryName: !empty(acrName) ? acrName : '${abbrs.containerRegistryRegistries}${resourceBaseNameFinal}' location: location - roleAssignments: [ - { - principalId: aks.outputs.kubeletPrincipalId - principalType: 'ServicePrincipal' - roleDefinitionId: roles.acrPull - } - ] } } @@ -191,18 +206,6 @@ module aks 'core/aks/aks.bicep' = { logAnalyticsWorkspaceId: log.outputs.id subnetId: vnet.properties.subnets[1].id // aks subnet privateDnsZoneName: privateDnsZone.outputs.name - ingressRoleAssignments: [ - { - principalType: 'ServicePrincipal' - roleDefinitionId: roles.privateDnsZoneContributor - } - ] - systemRoleAssignments: [ - { - principalType: 'ServicePrincipal' - roleDefinitionId: roles.networkContributor - } - ] } } @@ -212,7 +215,6 @@ module cosmosdb 'core/cosmosdb/cosmosdb.bicep' = { cosmosDbName: !empty(cosmosDbName) ? cosmosDbName : '${abbrs.documentDBDatabaseAccounts}${resourceBaseNameFinal}' location: location publicNetworkAccess: enablePrivateEndpoints ? 'Disabled' : 'Enabled' - principalId: workloadIdentity.outputs.principalId } }