81 lines
2.3 KiB
Bicep
Raw Normal View History

2025-01-06 00:53:29 -05:00
@description('Name of the Azure OpenAI instance')
param openAiName string = 'openai${uniqueString(resourceGroup().id)}'
@description('Location for the Azure OpenAI instance')
param location string = resourceGroup().location
2025-01-08 11:38:03 -05:00
@description('LLM model name')
param llmModelName string = 'gpt-4o'
2025-01-06 00:53:29 -05:00
2025-01-08 11:38:03 -05:00
@description('LLM Model API version')
param llmModelVersion string
2025-01-06 00:53:29 -05:00
2025-01-08 11:38:03 -05:00
@description('Embedding model name')
param embeddingModelName string = 'text-embedding-ada-002'
2025-01-06 00:53:29 -05:00
2025-01-08 11:38:03 -05:00
@description('Embedding Model API version')
param embeddingModelVersion string
@description('TPM quota for llm model deployment (x1000)')
2025-02-11 16:01:32 -04:00
param llmTpmQuota int = 1
2025-01-08 11:38:03 -05:00
@description('TPM quota for embedding model deployment (x1000)')
2025-02-11 16:01:32 -04:00
param embeddingTpmQuota int = 1
2025-01-06 00:53:29 -05:00
resource aoai 'Microsoft.CognitiveServices/accounts@2024-10-01' = {
name: openAiName
location: location
sku: {
name: 'S0'
}
kind: 'OpenAI'
properties: {
publicNetworkAccess: 'Enabled'
disableLocalAuth: true
}
}
2025-01-08 11:38:03 -05:00
resource llmDeployment 'Microsoft.CognitiveServices/accounts/deployments@2024-10-01' = {
2025-01-06 00:53:29 -05:00
parent: aoai
2025-01-08 11:38:03 -05:00
name: llmModelName
2025-01-06 00:53:29 -05:00
sku: {
name: 'GlobalStandard'
2025-01-08 11:38:03 -05:00
capacity: llmTpmQuota
2025-01-06 00:53:29 -05:00
}
properties: {
model: {
format: 'OpenAI'
2025-01-08 11:38:03 -05:00
name: llmModelName
version: llmModelVersion
2025-01-06 00:53:29 -05:00
}
2025-01-08 11:38:03 -05:00
currentCapacity: llmTpmQuota
2025-01-06 00:53:29 -05:00
}
}
2025-01-08 11:38:03 -05:00
resource embeddingDeployment 'Microsoft.CognitiveServices/accounts/deployments@2024-10-01' = {
2025-01-06 00:53:29 -05:00
parent: aoai
2025-01-08 11:38:03 -05:00
name: embeddingModelName
2025-01-07 01:20:33 -05:00
// NOTE: simultaneous model deployments are not supported at this time. As a workaround, use dependsOn to force the models to be deployed in a sequential manner.
2025-01-08 11:38:03 -05:00
dependsOn: [llmDeployment]
2025-01-06 00:53:29 -05:00
sku: {
name: 'Standard'
2025-01-08 11:38:03 -05:00
capacity: embeddingTpmQuota
2025-01-06 00:53:29 -05:00
}
properties: {
model: {
format: 'OpenAI'
2025-01-08 11:38:03 -05:00
name: embeddingModelName
version: embeddingModelVersion
2025-01-06 00:53:29 -05:00
}
2025-01-08 11:38:03 -05:00
currentCapacity: embeddingTpmQuota
2025-01-06 00:53:29 -05:00
}
}
output openAiEndpoint string = aoai.properties.endpoint
2025-01-08 11:38:03 -05:00
output llmModel string = llmDeployment.properties.model.name
output llmModelDeploymentName string = llmDeployment.name
output llmModelApiVersion string = llmDeployment.apiVersion
output textEmbeddingModel string = embeddingDeployment.properties.model.name
output textEmbeddingModelDeploymentName string = embeddingDeployment.name
output textEmbeddingModelApiVersion string = embeddingDeployment.apiVersion