AB
Complete your AI application deployment with comprehensive CI/CD pipelines, monitoring, and production-ready operations. Part 2 covers GitHub Actions, Docker optimization, and monitoring setup.
Welcome back! In Part 1, we built the foundational AWS infrastructure using Terraform. Now we’ll complete the deployment pipeline with CI/CD automation, monitoring, and production-ready operations.
First, let’s complete our ECS module. Create modules/ecs/main.tf
:
# ECS Cluster
resource "aws_ecs_cluster" "main" {
name = "${var.project_name}-${var.environment}-cluster"
setting {
name = "containerInsights"
value = "enabled"
}
tags = {
Name = "${var.project_name}-${var.environment}-cluster"
}
}
# Application Load Balancer
resource "aws_lb" "main" {
name = "${var.project_name}-${var.environment}-alb"
internal = false
load_balancer_type = "application"
security_groups = [var.alb_security_group_id]
subnets = var.public_subnets
enable_deletion_protection = false
tags = {
Name = "${var.project_name}-${var.environment}-alb"
}
}
# Target Groups for Frontend and Backend
resource "aws_lb_target_group" "frontend" {
name = "${var.project_name}-${var.environment}-frontend-tg"
port = 3000
protocol = "HTTP"
vpc_id = var.vpc_id
target_type = "ip"
health_check {
enabled = true
healthy_threshold = 2
interval = 30
matcher = "200"
path = "/"
port = "traffic-port"
protocol = "HTTP"
timeout = 5
unhealthy_threshold = 2
}
tags = {
Name = "${var.project_name}-${var.environment}-frontend-tg"
}
}
resource "aws_lb_target_group" "backend" {
name = "${var.project_name}-${var.environment}-backend-tg"
port = 5000
protocol = "HTTP"
vpc_id = var.vpc_id
target_type = "ip"
health_check {
enabled = true
healthy_threshold = 2
interval = 30
matcher = "200"
path = "/health"
port = "traffic-port"
protocol = "HTTP"
timeout = 5
unhealthy_threshold = 2
}
tags = {
Name = "${var.project_name}-${var.environment}-backend-tg"
}
}
# ALB Listeners
resource "aws_lb_listener" "main" {
load_balancer_arn = aws_lb.main.arn
port = "80"
protocol = "HTTP"
default_action {
type = "forward"
target_group_arn = aws_lb_target_group.frontend.arn
}
}
# ALB Listener Rules
resource "aws_lb_listener_rule" "backend" {
listener_arn = aws_lb_listener.main.arn
priority = 100
action {
type = "forward"
target_group_arn = aws_lb_target_group.backend.arn
}
condition {
path_pattern {
values = ["/api/*", "/generate-captions", "/health"]
}
}
}
# ECR Repositories
resource "aws_ecr_repository" "frontend" {
name = "${var.project_name}-frontend"
image_tag_mutability = "MUTABLE"
image_scanning_configuration {
scan_on_push = true
}
}
resource "aws_ecr_repository" "backend" {
name = "${var.project_name}-backend"
image_tag_mutability = "MUTABLE"
image_scanning_configuration {
scan_on_push = true
}
}
# ECS Task Execution Role
resource "aws_iam_role" "ecs_task_execution_role" {
name = "${var.project_name}-${var.environment}-ecs-task-execution-role"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = {
Service = "ecs-tasks.amazonaws.com"
}
}
]
})
}
resource "aws_iam_role_policy_attachment" "ecs_task_execution_role" {
role = aws_iam_role.ecs_task_execution_role.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}
# ECS Task Role for S3 and SSM access
resource "aws_iam_role" "ecs_task_role" {
name = "${var.project_name}-${var.environment}-ecs-task-role"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = {
Service = "ecs-tasks.amazonaws.com"
}
}
]
})
}
# IAM Policy for Task Role
resource "aws_iam_policy" "ecs_task_policy" {
name = "${var.project_name}-${var.environment}-ecs-task-policy"
description = "IAM policy for ECS tasks"
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"ssm:GetParameter",
"ssm:GetParameters",
"ssm:GetParametersByPath"
]
Resource = [
"arn:aws:ssm:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:parameter/${var.project_name}/${var.environment}/*"
]
},
{
Effect = "Allow"
Action = [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject"
]
Resource = [
"${var.upload_bucket_arn}/*"
]
}
]
})
}
resource "aws_iam_role_policy_attachment" "ecs_task_role" {
role = aws_iam_role.ecs_task_role.name
policy_arn = aws_iam_policy.ecs_task_policy.arn
}
# Data sources
data "aws_region" "current" {}
data "aws_caller_identity" "current" {}
# ECS Task Definitions
resource "aws_ecs_task_definition" "frontend" {
family = "${var.project_name}-${var.environment}-frontend"
network_mode = "awsvpc"
requires_compatibilities = ["FARGATE"]
cpu = 256
memory = 512
execution_role_arn = aws_iam_role.ecs_task_execution_role.arn
task_role_arn = aws_iam_role.ecs_task_role.arn
container_definitions = jsonencode([
{
name = "frontend"
image = "${aws_ecr_repository.frontend.repository_url}:latest"
portMappings = [
{
containerPort = 3000
protocol = "tcp"
}
]
environment = [
{
name = "NODE_ENV"
value = var.environment == "prod" ? "production" : "development"
},
{
name = "NEXT_PUBLIC_API_URL"
value = "http://${aws_lb.main.dns_name}"
}
]
logConfiguration = {
logDriver = "awslogs"
options = {
awslogs-group = aws_cloudwatch_log_group.frontend.name
awslogs-region = data.aws_region.current.name
awslogs-stream-prefix = "ecs"
}
}
essential = true
}
])
}
resource "aws_ecs_task_definition" "backend" {
family = "${var.project_name}-${var.environment}-backend"
network_mode = "awsvpc"
requires_compatibilities = ["FARGATE"]
cpu = 1024
memory = 2048
execution_role_arn = aws_iam_role.ecs_task_execution_role.arn
task_role_arn = aws_iam_role.ecs_task_role.arn
container_definitions = jsonencode([
{
name = "backend"
image = "${aws_ecr_repository.backend.repository_url}:latest"
portMappings = [
{
containerPort = 5000
protocol = "tcp"
}
]
secrets = [
{
name = "GOOGLE_API_KEY"
valueFrom = "arn:aws:ssm:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:parameter/${var.project_name}/${var.environment}/google-api-key"
}
]
environment = [
{
name = "FLASK_ENV"
value = var.environment == "prod" ? "production" : "development"
},
{
name = "S3_BUCKET_NAME"
value = var.upload_bucket_name
}
]
logConfiguration = {
logDriver = "awslogs"
options = {
awslogs-group = aws_cloudwatch_log_group.backend.name
awslogs-region = data.aws_region.current.name
awslogs-stream-prefix = "ecs"
}
}
essential = true
}
])
}
# CloudWatch Log Groups
resource "aws_cloudwatch_log_group" "frontend" {
name = "/ecs/${var.project_name}-${var.environment}-frontend"
retention_in_days = 7
}
resource "aws_cloudwatch_log_group" "backend" {
name = "/ecs/${var.project_name}-${var.environment}-backend"
retention_in_days = 7
}
# ECS Services
resource "aws_ecs_service" "frontend" {
name = "${var.project_name}-${var.environment}-frontend"
cluster = aws_ecs_cluster.main.id
task_definition = aws_ecs_task_definition.frontend.arn
desired_count = var.environment == "prod" ? 2 : 1
launch_type = "FARGATE"
network_configuration {
security_groups = [var.ecs_security_group_id]
subnets = var.private_subnets
assign_public_ip = false
}
load_balancer {
target_group_arn = aws_lb_target_group.frontend.arn
container_name = "frontend"
container_port = 3000
}
depends_on = [aws_lb_listener.main]
}
resource "aws_ecs_service" "backend" {
name = "${var.project_name}-${var.environment}-backend"
cluster = aws_ecs_cluster.main.id
task_definition = aws_ecs_task_definition.backend.arn
desired_count = var.environment == "prod" ? 2 : 1
launch_type = "FARGATE"
network_configuration {
security_groups = [var.ecs_security_group_id]
subnets = var.private_subnets
assign_public_ip = false
}
load_balancer {
target_group_arn = aws_lb_target_group.backend.arn
container_name = "backend"
container_port = 5000
}
depends_on = [aws_lb_listener_rule.backend]
}
## CI/CD Pipeline with GitHub Actions

### Step 1: Repository Setup
Create the following directory structure in your application repository:
```bash
.github/
└── workflows/
├── ci.yml
├── deploy-dev.yml
├── deploy-staging.yml
└── deploy-prod.yml
Create .github/workflows/ci.yml
:
name: Continuous Integration
on:
pull_request:
branches: [main, develop]
push:
branches: [develop]
env:
AWS_REGION: us-east-1
jobs:
test-backend:
name: Test Backend
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Cache pip dependencies
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('backend/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
cd backend
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pytest pytest-cov black flake8 bandit safety
- name: Code formatting check (Black)
run: |
cd backend
black --check --diff .
- name: Linting (Flake8)
run: |
cd backend
flake8 . --max-line-length=88 --extend-ignore=E203,W503
- name: Security scan (Bandit)
run: |
cd backend
bandit -r . -f json -o bandit-report.json || true
- name: Dependency vulnerability scan
run: |
cd backend
safety check --json --output safety-report.json || true
- name: Run unit tests
run: |
cd backend
echo "No tests for backend"
env:
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY_TEST }}
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
file: backend/coverage.xml
flags: backend
name: backend-coverage
test-frontend:
name: Test Frontend
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: 20
cache: npm
cache-dependency-path: frontend/package-lock.json
- name: Install dependencies
run: |
cd frontend
npm ci
- name: ESLint check
run: |
cd frontend
npm run lint
- name: Type checking
run: |
cd frontend
npx tsc --noEmit
- name: Run unit tests
run: |
cd frontend
echo "No tests for frontend"
- name: Build application
run: |
cd frontend
npm run build
env:
NEXT_PUBLIC_API_URL: http://localhost:5000
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
file: frontend/coverage/lcov.info
flags: frontend
name: frontend-coverage
sonarqube-analysis:
name: SonarQube Analysis
runs-on: ubuntu-latest
needs: [test-backend, test-frontend]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: SonarQube Scan
uses: sonarqube-quality-gate-action@master
env:
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
security-scan:
name: Security Scanning
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
with:
scan-type: "fs"
scan-ref: "."
format: "sarif"
output: "trivy-results.sarif"
- name: Upload Trivy scan results
uses: github/codeql-action/upload-sarif@v2
with:
sarif_file: "trivy-results.sarif"
Create .github/workflows/deploy-dev.yml
:
name: Deploy to Development
on:
push:
branches: [develop]
workflow_dispatch:
env:
AWS_REGION: us-east-1
ECR_REPOSITORY_BACKEND: caption-generator-backend
ECR_REPOSITORY_FRONTEND: caption-generator-frontend
ECS_CLUSTER: caption-generator-dev-cluster
ECS_SERVICE_BACKEND: caption-generator-dev-backend
ECS_SERVICE_FRONTEND: caption-generator-dev-frontend
jobs:
deploy:
name: Deploy to Development
runs-on: ubuntu-latest
environment: development
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}
- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v1
- name: Extract metadata for backend
id: meta-backend
uses: docker/metadata-action@v4
with:
images: ${{ steps.login-ecr.outputs.registry }}/${{ env.ECR_REPOSITORY_BACKEND }}
tags: |
type=ref,event=branch
type=sha,prefix={{branch}}-
type=raw,value=latest,enable={{is_default_branch}}
- name: Extract metadata for frontend
id: meta-frontend
uses: docker/metadata-action@v4
with:
images: ${{ steps.login-ecr.outputs.registry }}/${{ env.ECR_REPOSITORY_FRONTEND }}
tags: |
type=ref,event=branch
type=sha,prefix={{branch}}-
type=raw,value=latest,enable={{is_default_branch}}
- name: Build and push backend image
uses: docker/build-push-action@v4
with:
context: ./backend
push: true
tags: ${{ steps.meta-backend.outputs.tags }}
labels: ${{ steps.meta-backend.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Build and push frontend image
uses: docker/build-push-action@v4
with:
context: ./frontend
push: true
tags: ${{ steps.meta-frontend.outputs.tags }}
labels: ${{ steps.meta-frontend.outputs.labels }}
build-args: |
NEXT_PUBLIC_API_URL=${{ secrets.DEV_API_URL }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Deploy to ECS
run: |
# Update backend service
aws ecs update-service \
--cluster ${{ env.ECS_CLUSTER }} \
--service ${{ env.ECS_SERVICE_BACKEND }} \
--force-new-deployment \
--region ${{ env.AWS_REGION }}
# Update frontend service
aws ecs update-service \
--cluster ${{ env.ECS_CLUSTER }} \
--service ${{ env.ECS_SERVICE_FRONTEND }} \
--force-new-deployment \
--region ${{ env.AWS_REGION }}
- name: Wait for deployment to complete
run: |
echo "Waiting for backend deployment to complete..."
aws ecs wait services-stable \
--cluster ${{ env.ECS_CLUSTER }} \
--services ${{ env.ECS_SERVICE_BACKEND }} \
--region ${{ env.AWS_REGION }}
echo "Waiting for frontend deployment to complete..."
aws ecs wait services-stable \
--cluster ${{ env.ECS_CLUSTER }} \
--services ${{ env.ECS_SERVICE_FRONTEND }} \
--region ${{ env.AWS_REGION }}
- name: Verify deployment
run: |
# Get ALB DNS name
ALB_DNS=$(aws elbv2 describe-load-balancers \
--names caption-generator-dev-alb \
--query 'LoadBalancers[0].DNSName' \
--output text \
--region ${{ env.AWS_REGION }})
# Test frontend
echo "Testing frontend at http://$ALB_DNS"
curl -f http://$ALB_DNS || exit 1
# Test backend health
echo "Testing backend health at http://$ALB_DNS/health"
curl -f http://$ALB_DNS/health || exit 1
- name: Notify deployment status
if: always()
run: |
if [ ${{ job.status }} == 'success' ]; then
echo "✅ Development deployment successful!"
else
echo "❌ Development deployment failed!"
exit 1
fi
Update your backend/Dockerfile
:
# Multi-stage build for Python Flask backend
FROM python:3.9-slim AS base
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
g++ \
ffmpeg \
libpq-dev \
&& rm -rf /var/lib/apt/lists/*
# Set working directory
WORKDIR /app
# Create non-root user for security
RUN groupadd -r appuser && useradd -r -g appuser appuser
# Copy requirements first for better caching
COPY requirements.txt .
# Install Python dependencies
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r requirements.txt
# Production stage
FROM base AS production
# Copy application code
COPY . .
# Change ownership to non-root user
RUN chown -R appuser:appuser /app
USER appuser
# Health check
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD curl -f http://localhost:5000/health || exit 1
# Expose port
EXPOSE 5000
# Run application
CMD ["python", "app.py"]
Update your frontend/Dockerfile
:
# Multi-stage build for Next.js frontend
FROM node:20-alpine AS base
# Install dependencies for node-gyp
RUN apk add --no-cache libc6-compat python3 make g++
WORKDIR /app
# Copy package files
COPY package*.json ./
# Install dependencies
FROM base AS deps
RUN npm ci --only=production && npm cache clean --force
# Build stage
FROM base AS builder
# Copy dependencies
COPY --from=deps /app/node_modules ./node_modules
COPY . .
# Set build-time environment variable
ARG NEXT_PUBLIC_API_URL
ENV NEXT_PUBLIC_API_URL=$NEXT_PUBLIC_API_URL
# Build application
RUN npm run build
# Production stage
FROM node:20-alpine AS runner
WORKDIR /app
# Create non-root user
RUN addgroup -g 1001 -S nodejs && adduser -S nextjs -u 1001
# Copy built application
COPY --from=builder /app/public ./public
COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./
COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static
USER nextjs
EXPOSE 3000
ENV PORT 3000
ENV HOSTNAME "0.0.0.0"
# Health check
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD curl -f http://localhost:3000/api/health || exit 1
CMD ["node", "server.js"]
Create monitoring/cloudwatch-dashboard.tf
:
resource "aws_cloudwatch_dashboard" "main" {
dashboard_name = "${var.project_name}-${var.environment}-dashboard"
dashboard_body = jsonencode({
widgets = [
{
type = "metric"
x = 0
y = 0
width = 12
height = 6
properties = {
metrics = [
["AWS/ECS", "CPUUtilization", "ServiceName", "${var.project_name}-${var.environment}-frontend", "ClusterName", "${var.project_name}-${var.environment}-cluster"],
["AWS/ECS", "CPUUtilization", "ServiceName", "${var.project_name}-${var.environment}-backend", "ClusterName", "${var.project_name}-${var.environment}-cluster"],
["AWS/ECS", "MemoryUtilization", "ServiceName", "${var.project_name}-${var.environment}-frontend", "ClusterName", "${var.project_name}-${var.environment}-cluster"],
["AWS/ECS", "MemoryUtilization", "ServiceName", "${var.project_name}-${var.environment}-backend", "ClusterName", "${var.project_name}-${var.environment}-cluster"]
]
period = 300
stat = "Average"
region = "us-east-1"
title = "ECS Service Metrics"
}
},
{
type = "metric"
x = 0
y = 6
width = 12
height = 6
properties = {
metrics = [
["AWS/ApplicationELB", "RequestCount", "LoadBalancer", aws_lb.main.arn_suffix],
["AWS/ApplicationELB", "TargetResponseTime", "LoadBalancer", aws_lb.main.arn_suffix],
["AWS/ApplicationELB", "HTTPCode_Target_2XX_Count", "LoadBalancer", aws_lb.main.arn_suffix],
["AWS/ApplicationELB", "HTTPCode_Target_4XX_Count", "LoadBalancer", aws_lb.main.arn_suffix],
["AWS/ApplicationELB", "HTTPCode_Target_5XX_Count", "LoadBalancer", aws_lb.main.arn_suffix]
]
period = 300
stat = "Sum"
region = "us-east-1"
title = "Load Balancer Metrics"
}
}
]
})
}
# CloudWatch Alarms
resource "aws_cloudwatch_metric_alarm" "high_cpu_backend" {
alarm_name = "${var.project_name}-${var.environment}-backend-high-cpu"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "CPUUtilization"
namespace = "AWS/ECS"
period = "300"
statistic = "Average"
threshold = "80"
alarm_description = "This metric monitors ECS backend CPU utilization"
dimensions = {
ServiceName = "${var.project_name}-${var.environment}-backend"
ClusterName = "${var.project_name}-${var.environment}-cluster"
}
alarm_actions = [aws_sns_topic.alerts.arn]
}
resource "aws_cloudwatch_metric_alarm" "high_error_rate" {
alarm_name = "${var.project_name}-${var.environment}-high-error-rate"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "HTTPCode_Target_5XX_Count"
namespace = "AWS/ApplicationELB"
period = "300"
statistic = "Sum"
threshold = "10"
alarm_description = "This metric monitors ALB 5XX error rate"
dimensions = {
LoadBalancer = aws_lb.main.arn_suffix
}
alarm_actions = [aws_sns_topic.alerts.arn]
}
# SNS Topic for alerts
resource "aws_sns_topic" "alerts" {
name = "${var.project_name}-${var.environment}-alerts"
}
Create sonar-project.properties
in project root:
# Project identification
sonar.projectKey=caption-generator
sonar.organization=your-org
sonar.projectName=Caption Generator
sonar.projectVersion=1.0
# Source configuration
sonar.sources=backend,frontend/src
sonar.tests=backend/tests,frontend/src/__tests__
# Exclusions
sonar.exclusions=**/node_modules/**,**/venv/**,**/.next/**,**/build/**,**/dist/**
sonar.test.exclusions=**/*.test.ts,**/*.test.tsx,**/*.test.py
# Coverage reports
sonar.python.coverage.reportPaths=backend/coverage.xml
sonar.typescript.lcov.reportPaths=frontend/coverage/lcov.info
# Quality gate settings
sonar.qualitygate.wait=true
Create backend/sonar-project.properties
:
sonar.projectKey=caption-generator-backend
sonar.sources=.
sonar.exclusions=venv/**,tests/**,__pycache__/**
sonar.python.coverage.reportPaths=coverage.xml
sonar.python.xunit.reportPath=test-reports/*.xml
# Language-specific settings
sonar.python.version=3.9
Create frontend/sonar-project.properties
:
sonar.projectKey=caption-generator-frontend
sonar.sources=src
sonar.exclusions=**/*.test.ts,**/*.test.tsx,**/node_modules/**,**/.next/**
sonar.typescript.lcov.reportPaths=coverage/lcov.info
# Language-specific settings
sonar.typescript.node.maxspace=4096
Add to your ECS module:
# Auto Scaling for Backend Service
resource "aws_appautoscaling_target" "backend" {
max_capacity = var.environment == "prod" ? 10 : 3
min_capacity = var.environment == "prod" ? 2 : 1
resource_id = "service/${aws_ecs_cluster.main.name}/${aws_ecs_service.backend.name}"
scalable_dimension = "ecs:service:DesiredCount"
service_namespace = "ecs"
}
resource "aws_appautoscaling_policy" "backend_cpu" {
name = "${var.project_name}-${var.environment}-backend-cpu-scaling"
policy_type = "TargetTrackingScaling"
resource_id = aws_appautoscaling_target.backend.resource_id
scalable_dimension = aws_appautoscaling_target.backend.scalable_dimension
service_namespace = aws_appautoscaling_target.backend.service_namespace
target_tracking_scaling_policy_configuration {
predefined_metric_specification {
predefined_metric_type = "ECSServiceAverageCPUUtilization"
}
target_value = 70.0
scale_in_cooldown = 300
scale_out_cooldown = 300
}
}
# Spot Fargate for development
resource "aws_ecs_service" "backend_spot" {
count = var.environment == "dev" ? 1 : 0
name = "${var.project_name}-${var.environment}-backend-spot"
cluster = aws_ecs_cluster.main.id
task_definition = aws_ecs_task_definition.backend.arn
desired_count = 1
capacity_provider_strategy {
capacity_provider = "FARGATE_SPOT"
weight = 100
}
network_configuration {
security_groups = [var.ecs_security_group_id]
subnets = var.private_subnets
assign_public_ip = false
}
}
# Cost Budget Alert
resource "aws_budgets_budget" "monthly_cost" {
name = "${var.project_name}-${var.environment}-monthly-budget"
budget_type = "COST"
limit_amount = var.environment == "prod" ? "500" : "100"
limit_unit = "USD"
time_unit = "MONTHLY"
cost_filters = {
Tag = ["Project:${var.project_name}"]
}
notification {
comparison_operator = "GREATER_THAN"
threshold = 80
threshold_type = "PERCENTAGE"
notification_type = "ACTUAL"
subscriber_email_addresses = [var.alert_email]
}
notification {
comparison_operator = "GREATER_THAN"
threshold = 100
threshold_type = "PERCENTAGE"
notification_type = "FORECASTED"
subscriber_email_addresses = [var.alert_email]
}
}
Create environment-specific Terraform configurations:
environments/dev/terraform.tfvars:
environment = "dev"
vpc_cidr = "10.0.0.0/16"
# Smaller instances for dev
backend_cpu = 512
backend_memory = 1024
frontend_cpu = 256
frontend_memory = 512
# Minimal redundancy
min_capacity = 1
max_capacity = 3
environments/prod/terraform.tfvars:
environment = "prod"
vpc_cidr = "10.1.0.0/16"
# Production-sized instances
backend_cpu = 1024
backend_memory = 2048
frontend_cpu = 512
frontend_memory = 1024
# High availability
min_capacity = 2
max_capacity = 10
For production deployments, implement blue/green strategy:
# In your production deployment workflow
- name: Blue/Green Deployment
run: |
# Create new task definition revision
NEW_TASK_DEF=$(aws ecs describe-task-definition \
--task-definition ${{ env.ECS_TASK_DEFINITION }} \
--query 'taskDefinition' \
--output json | \
jq --arg IMAGE "${{ env.IMAGE_URI }}" \
'.containerDefinitions[0].image = $IMAGE | del(.taskDefinitionArn, .revision, .status, .requiresAttributes, .placementConstraints, .compatibilities, .registeredAt, .registeredBy)')
# Register new task definition
aws ecs register-task-definition \
--cli-input-json "$NEW_TASK_DEF"
# Update service with new task definition
aws ecs update-service \
--cluster ${{ env.ECS_CLUSTER }} \
--service ${{ env.ECS_SERVICE }} \
--task-definition ${{ env.ECS_TASK_DEFINITION }}
# Wait for deployment to complete
aws ecs wait services-stable \
--cluster ${{ env.ECS_CLUSTER }} \
--services ${{ env.ECS_SERVICE }}
Based on actual AWS usage patterns:
You now have a complete, production-ready deployment pipeline for AI applications on AWS. This setup provides:
This architecture can handle everything from startup MVP to enterprise-scale AI applications, with costs scaling appropriately with usage.