haystack/.github/workflows/demo/ec2-autoscaling-group.yaml

317 lines
11 KiB
YAML

# yaml-language-server: $schema=https://raw.githubusercontent.com/awslabs/goformation/v5.2.11/schema/cloudformation.schema.json
Parameters:
Project:
Description: A project name that is used for resource names
Type: String
Default: haystack-demo
Environment:
Description: An environment name that is suffixed to resource names
Type: String
Default: production
VPCStack:
Description: VPC stack name
Type: String
Default: haystack-demo-production-vpc
GitRepositoryURL:
Description: Git repository to clone
Type: String
GitBranchName:
Description: Name of the branch from the git repository to checkout on instance
Type: String
GitCommitHash:
Description: Git commit hash that triggered this deployment
Type: String
InstanceType:
Description: EC2 instance type
Type: String
Default: p3.2xlarge
ImageId:
Description: AMI to use for the EC2 instance
Type: String
IamInstanceProfile:
Description: IAM instance profile to attach to the EC2 instance
Type: String
KeyName:
Description: EC2 key pair to add to the EC2 instance
Type: String
Resources:
AutoScalingGroup:
Type: AWS::AutoScaling::AutoScalingGroup
CreationPolicy:
ResourceSignal:
Count: "1"
Timeout: PT45M
UpdatePolicy:
AutoScalingRollingUpdate:
MinInstancesInService: "1"
MaxBatchSize: "1"
PauseTime: PT45M
WaitOnResourceSignals: true
SuspendProcesses:
- HealthCheck
- ReplaceUnhealthy
- AZRebalance
- AlarmNotification
- ScheduledActions
Properties:
LaunchTemplate:
LaunchTemplateId: !Ref InstanceLaunchTemplate
Version: !GetAtt InstanceLaunchTemplate.LatestVersionNumber
VPCZoneIdentifier:
- !ImportValue
"Fn::Sub": "${VPCStack}-PublicSubnet1"
- !ImportValue
"Fn::Sub": "${VPCStack}-PublicSubnet2"
MaxSize: "2"
DesiredCapacity: "1"
MinSize: "1"
TargetGroupARNs:
- !ImportValue
"Fn::Sub": "${VPCStack}-DefaultTargetGroup"
Tags:
- Key: Name
Value: !Sub ${Project}-${Environment}
PropagateAtLaunch: true
- Key: Project
Value: !Ref Project
PropagateAtLaunch: true
- Key: Environment
Value: !Ref Environment
PropagateAtLaunch: true
- Key: GitCommitHash
Value: !Ref GitCommitHash
PropagateAtLaunch: true
ParameterAmazonAgentConfig:
Type: AWS::SSM::Parameter
Properties:
Name: !Sub /deepset/${Project}/${Environment}/ec2/amazon-cloudwatch-agent-config
Type: String
Tags:
Name: !Sub /deepset/${Project}/${Environment}/ec2/amazon-cloudwatch-agent-config
Project: !Ref Project
Environment: !Ref Environment
GitCommitHash: !Ref GitCommitHash
Value: !Sub |
{
"agent": {
"metrics_collection_interval": 60,
"logfile": "/var/log/amazon-cloudwatch-agent.log",
"debug": true
},
"logs": {
"logs_collected": {
"files": {
"collect_list": [
{
"file_path": "/var/log/cloud-init-output.log",
"log_group_name": "/deepset/${Project}/${Environment}/instance",
"log_stream_name": "{instance_id}_/var/log/cloud-init-output.log"
},
{
"file_path": "/var/log/cloud-init.log",
"log_group_name": "/deepset/${Project}/${Environment}/instance",
"log_stream_name": "{instance_id}_/var/log/cloud-init.log"
},
{
"file_path": "/var/log/syslog",
"log_group_name": "/deepset/${Project}/${Environment}/instance",
"log_stream_name": "{instance_id}_/var/log/syslog"
}
]
}
}
},
"metrics": {
"append_dimensions": {
"AutoScalingGroupName": "${!aws:AutoScalingGroupName}",
"InstanceId": "${!aws:InstanceId}"
},
"aggregation_dimensions": [
[
"AutoScalingGroupName"
]
],
"metrics_collected": {
"collectd": {
"metrics_aggregation_interval": 60
},
"statsd": {
"metrics_aggregation_interval": 60,
"metrics_collection_interval": 1,
"service_address": ":8125"
},
"cpu": {
"measurement": [
"cpu_usage_idle",
"cpu_usage_iowait",
"cpu_usage_user",
"cpu_usage_system"
],
"totalcpu": true
},
"disk": {
"measurement": [
"used_percent",
"inodes_free"
],
"resources": [
"/dev/xvda"
]
},
"diskio": {
"measurement": [
"io_time",
"write_bytes",
"read_bytes",
"writes",
"reads"
],
"resources": [
"/dev/xvda"
]
},
"mem": {
"measurement": [
"mem_used_percent"
]
}
}
}
}
InstanceLaunchTemplate:
Type: AWS::EC2::LaunchTemplate
Properties:
LaunchTemplateName: !Sub ${Project}-${Environment}-${GitCommitHash}
LaunchTemplateData:
InstanceType: !Ref InstanceType
ImageId: !Ref ImageId
IamInstanceProfile:
Arn: !Ref IamInstanceProfile
KeyName: !Ref KeyName
SecurityGroupIds:
- !ImportValue
"Fn::Sub": "${VPCStack}-InstanceSecurityGroup"
Monitoring:
Enabled: true
EbsOptimized: true
BlockDeviceMappings:
- DeviceName: /dev/sda1
Ebs:
VolumeSize: 200
TagSpecifications:
- ResourceType: instance
Tags:
- Key: Name
Value: !Sub ${Project}-${Environment}
- Key: Project
Value: !Ref Project
- Key: Environment
Value: !Ref Environment
- Key: GitCommitHash
Value: !Ref GitCommitHash
- ResourceType: volume
Tags:
- Key: Name
Value: !Sub ${Project}-${Environment}
- Key: Project
Value: !Ref Project
- Key: Environment
Value: !Ref Environment
- Key: GitCommitHash
Value: !Ref GitCommitHash
- ResourceType: network-interface
Tags:
- Key: Name
Value: !Sub ${Project}-${Environment}
- Key: Project
Value: !Ref Project
- Key: Environment
Value: !Ref Environment
- Key: GitCommitHash
Value: !Ref GitCommitHash
UserData:
Fn::Base64: !Sub |
#!/bin/bash -x
echo 'APT::Periodic::Update-Package-Lists "0";
APT::Periodic::Unattended-Upgrade "0";' > /etc/apt/apt.conf.d/20auto-upgrades
export DEBIAN_FRONTEND=noninteractive
apt-get update -q
apt-get install -yq collectd
curl https://s3.amazonaws.com/amazoncloudwatch-agent/ubuntu/amd64/latest/amazon-cloudwatch-agent.deb -O
dpkg -i -E ./amazon-cloudwatch-agent.deb
/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c ssm:${ParameterAmazonAgentConfig}
systemctl enable amazon-cloudwatch-agent.service
systemctl restart amazon-cloudwatch-agent
mkdir -p /opt/aws/bin
wget https://s3.amazonaws.com/cloudformation-examples/aws-cfn-bootstrap-py3-latest.tar.gz
python3 -m easy_install --script-dir /opt/aws/bin aws-cfn-bootstrap-py3-latest.tar.gz
set -e
trap '/opt/aws/bin/cfn-signal --exit-code 1 --stack ${AWS::StackId} --resource AutoScalingGroup --region ${AWS::Region}' ERR
echo "Deploying Haystack demo, commit ${GitCommitHash}"
apt-get install -yq curl git ca-certificates curl gnupg lsb-release
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \
$(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
apt-get update -q
apt-get install -yq docker-ce docker-ce-cli containerd.io
# Install Docker compose
curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/bin/docker-compose
chmod +x /usr/bin/docker-compose
# Install Nvidia container runtime
curl -s -L https://nvidia.github.io/nvidia-container-runtime/gpgkey | \
apt-key add -
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-container-runtime/$distribution/nvidia-container-runtime.list | \
tee /etc/apt/sources.list.d/nvidia-container-runtime.list
apt-get update -q
apt-get install -yq nvidia-container-runtime
# Setup and start Docker
groupadd docker || true
usermod -aG docker $USER || true
newgrp docker || true
systemctl unmask docker
systemctl restart docker
# Exposes the GPUs to Docker
docker run --rm --gpus all ubuntu nvidia-smi
# Clone and start Haystack
git clone --branch "${GitBranchName}" "${GitRepositoryURL}" /opt/haystack
cd /opt/haystack
export COMPOSE_FILE=docker-compose-gpu.yml:.github/workflows/demo/docker-compose.demo.yml
export COMPOSE_HTTP_TIMEOUT=300
docker-compose pull
docker-compose up -d
/opt/aws/bin/cfn-signal --exit-code $? --stack ${AWS::StackId} --resource AutoScalingGroup --region ${AWS::Region}
Outputs:
LogGroupName:
Description: CloudWatch log group to send instance logs
Value: !Sub /deepset/${Project}/${Environment}/instance