Módulo 3 - Aula 5

Deploy e Produção

Domine deploy e produção de aplicações tRPC: containerização com Docker, CI/CD pipelines, monitoramento em tempo real e escalabilidade para SaaS enterprise.

95 min

Avançado

DevOps

🎯 Por que deploy e produção são críticos para SaaS?

Zero Downtime: Deploy contínuo sem interrupções mantém a confiança dos usuários e a receita fluindo.

Escalabilidade: Infraestrutura que escala automaticamente suporta crescimento exponencial do negócio.

🐳 Containerização com Docker

Dockerfile

# 📁 Dockerfile
FROM node:18-alpine AS base

# 🔧 Instalar dependências apenas quando necessário
FROM base AS deps
RUN apk add --no-cache libc6-compat
WORKDIR /app

# 📦 Instalar dependências baseado no package manager
COPY package.json yarn.lock* package-lock.json* pnpm-lock.yaml* ./
RUN \
  if [ -f yarn.lock ]; then yarn --frozen-lockfile; \
  elif [ -f package-lock.json ]; then npm ci; \
  elif [ -f pnpm-lock.yaml ]; then npm install -g pnpm && pnpm i --frozen-lockfile; \
  else echo "Lockfile not found." && exit 1; \
  fi

# 🏗️ Rebuild apenas quando necessário
FROM base AS builder
WORKDIR /app
COPY --from=deps /app/node_modules ./node_modules
COPY . .

# 🎯 Definir variáveis de ambiente de build
ENV NEXT_TELEMETRY_DISABLED 1
ENV NODE_ENV production

# 🔧 Gerar cliente Prisma
RUN npx prisma generate

# 📦 Build da aplicação
RUN npm run build

# 🚀 Imagem de produção
FROM base AS runner
WORKDIR /app

ENV NODE_ENV production
ENV NEXT_TELEMETRY_DISABLED 1

# 👤 Criar usuário não-root
RUN addgroup --system --gid 1001 nodejs
RUN adduser --system --uid 1001 nextjs

# 📁 Copiar arquivos necessários
COPY --from=builder /app/public ./public
COPY --from=builder /app/package.json ./package.json

# 🔧 Copiar build da aplicação
COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./
COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static

# 📊 Configurar logs
RUN mkdir -p /app/logs && chown nextjs:nodejs /app/logs

# 🚀 Definir usuário e porta
USER nextjs
EXPOSE 3000
ENV PORT 3000

# 🎯 Comando de inicialização
CMD ["node", "server.js"]

docker-compose.yml

# 📁 docker-compose.yml
version: '3.8'

services:
  # 🚀 Aplicação Next.js
  app:
    build: .
    ports:
      - '3000:3000'
    environment:
      - NODE_ENV=production
      - DATABASE_URL=postgresql://postgres:password@db:5432/myapp
      - REDIS_URL=redis://redis:6379
      - NEXTAUTH_SECRET=your-secret-key
      - NEXTAUTH_URL=http://localhost:3000
    depends_on:
      - db
      - redis
    volumes:
      - ./logs:/app/logs
    restart: unless-stopped
    healthcheck:
      test: ['CMD', 'curl', '-f', 'http://localhost:3000/api/health']
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s

  # 🐘 PostgreSQL Database
  db:
    image: postgres:15-alpine
    environment:
      - POSTGRES_DB=myapp
      - POSTGRES_USER=postgres
      - POSTGRES_PASSWORD=password
    volumes:
      - postgres_data:/var/lib/postgresql/data
      - ./init-db.sql:/docker-entrypoint-initdb.d/init-db.sql
    ports:
      - '5432:5432'
    restart: unless-stopped
    healthcheck:
      test: ['CMD-SHELL', 'pg_isready -U postgres']
      interval: 30s
      timeout: 10s
      retries: 5

  # 📊 Redis Cache
  redis:
    image: redis:7-alpine
    command: redis-server --appendonly yes --maxmemory 1gb --maxmemory-policy allkeys-lru
    ports:
      - '6379:6379'
    volumes:
      - redis_data:/data
    restart: unless-stopped
    healthcheck:
      test: ['CMD', 'redis-cli', 'ping']
      interval: 30s
      timeout: 10s
      retries: 3

  # 📊 Nginx Load Balancer
  nginx:
    image: nginx:alpine
    ports:
      - '80:80'
      - '443:443'
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf
      - ./certs:/etc/nginx/certs
    depends_on:
      - app
    restart: unless-stopped

  # 📈 Prometheus Monitoring
  prometheus:
    image: prom/prometheus:latest
    ports:
      - '9090:9090'
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/usr/share/prometheus/console_libraries'
      - '--web.console.templates=/usr/share/prometheus/consoles'
    restart: unless-stopped

  # 📊 Grafana Dashboard
  grafana:
    image: grafana/grafana:latest
    ports:
      - '3001:3000'
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=admin
    volumes:
      - grafana_data:/var/lib/grafana
      - ./grafana/dashboards:/var/lib/grafana/dashboards
      - ./grafana/provisioning:/etc/grafana/provisioning
    restart: unless-stopped

volumes:
  postgres_data:
  redis_data:
  prometheus_data:
  grafana_data:

🔄 CI/CD Pipeline

.github/workflows/deploy.yml

# 📁 .github/workflows/deploy.yml
name: Deploy to Production

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

env:
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository }}

jobs:
  # 🧪 Testes
  test:
    runs-on: ubuntu-latest
    
    services:
      postgres:
        image: postgres:15
        env:
          POSTGRES_PASSWORD: postgres
          POSTGRES_DB: test
        options: >-
          --health-cmd pg_isready
          --health-interval 10s
          --health-timeout 5s
          --health-retries 5
        ports:
          - 5432:5432
      
      redis:
        image: redis:7
        options: >-
          --health-cmd "redis-cli ping"
          --health-interval 10s
          --health-timeout 5s
          --health-retries 5
        ports:
          - 6379:6379

    steps:
      - name: 📥 Checkout code
        uses: actions/checkout@v4

      - name: 🔧 Setup Node.js
        uses: actions/setup-node@v4
        with:
          node-version: '18'
          cache: 'npm'

      - name: 📦 Install dependencies
        run: npm ci

      - name: 🎯 Generate Prisma client
        run: npx prisma generate

      - name: 🗃️ Run database migrations
        run: npx prisma migrate deploy
        env:
          DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test

      - name: 🧪 Run tests
        run: npm run test:ci
        env:
          NODE_ENV: test
          DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test
          REDIS_URL: redis://localhost:6379

      - name: 📊 Upload coverage reports
        uses: codecov/codecov-action@v3
        with:
          file: ./coverage/lcov.info
          flags: unittests
          name: codecov-umbrella

  # 🔍 Análise de qualidade
  quality:
    runs-on: ubuntu-latest
    
    steps:
      - name: 📥 Checkout code
        uses: actions/checkout@v4

      - name: 🔧 Setup Node.js
        uses: actions/setup-node@v4
        with:
          node-version: '18'
          cache: 'npm'

      - name: 📦 Install dependencies
        run: npm ci

      - name: 🎯 Type checking
        run: npm run type-check

      - name: 📝 Lint code
        run: npm run lint

      - name: 🎨 Format check
        run: npm run format:check

      - name: 🔒 Security audit
        run: npm audit --audit-level=high

      - name: 📊 SonarCloud Scan
        uses: SonarSource/sonarcloud-github-action@master
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}

  # 🏗️ Build e Push da imagem
  build:
    runs-on: ubuntu-latest
    needs: [test, quality]
    if: github.ref == 'refs/heads/main'
    
    permissions:
      contents: read
      packages: write

    steps:
      - name: 📥 Checkout code
        uses: actions/checkout@v4

      - name: 🔧 Setup Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: 🔑 Login to Container Registry
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: 🏷️ Extract metadata
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
          tags: |
            type=ref,event=branch
            type=ref,event=pr
            type=sha,prefix={{branch}}-
            type=raw,value=latest,enable={{is_default_branch}}

      - name: 🏗️ Build and push
        uses: docker/build-push-action@v5
        with:
          context: .
          platforms: linux/amd64,linux/arm64
          push: true
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          cache-from: type=gha
          cache-to: type=gha,mode=max

  # 🚀 Deploy para produção
  deploy:
    runs-on: ubuntu-latest
    needs: [build]
    if: github.ref == 'refs/heads/main'
    
    environment:
      name: production
      url: https://myapp.com

    steps:
      - name: 📥 Checkout code
        uses: actions/checkout@v4

      - name: 🔧 Setup kubectl
        uses: azure/setup-kubectl@v3
        with:
          version: 'v1.28.0'

      - name: 🔑 Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@v4
        with:
          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
          aws-region: us-east-1

      - name: 📊 Update kubeconfig
        run: |
          aws eks update-kubeconfig --name production-cluster --region us-east-1

      - name: 🔄 Deploy to Kubernetes
        run: |
          # Substituir tag da imagem no deployment
          sed -i "s|IMAGE_TAG|${{ github.sha }}|g" k8s/deployment.yaml
          
          # Aplicar manifests
          kubectl apply -f k8s/
          
          # Aguardar rollout
          kubectl rollout status deployment/app-deployment
          
          # Verificar pods
          kubectl get pods -l app=myapp

      - name: 🧪 Run smoke tests
        run: |
          npm run test:smoke
        env:
          TEST_URL: https://myapp.com

      - name: 📱 Notify deployment
        uses: 8398a7/action-slack@v3
        with:
          status: ${{ job.status }}
          channel: '#deployments'
          username: 'GitHub Actions'
          text: |
            🚀 *Deployment to Production*
            
            *Status:* ${{ job.status }}
            *Commit:* ${{ github.sha }}
            *Branch:* ${{ github.ref }}
            *Author:* ${{ github.actor }}
            
            <https://myapp.com|View Application>
        env:
          SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
        if: always()

  # 🔄 Rollback automático em caso de falha
  rollback:
    runs-on: ubuntu-latest
    needs: [deploy]
    if: failure() && github.ref == 'refs/heads/main'
    
    steps:
      - name: 🔧 Setup kubectl
        uses: azure/setup-kubectl@v3
        with:
          version: 'v1.28.0'

      - name: 🔑 Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@v4
        with:
          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
          aws-region: us-east-1

      - name: 📊 Update kubeconfig
        run: |
          aws eks update-kubeconfig --name production-cluster --region us-east-1

      - name: 🔄 Rollback deployment
        run: |
          kubectl rollout undo deployment/app-deployment
          kubectl rollout status deployment/app-deployment

      - name: 🚨 Notify rollback
        uses: 8398a7/action-slack@v3
        with:
          status: 'warning'
          channel: '#alerts'
          username: 'GitHub Actions'
          text: |
            🚨 *Automatic Rollback Executed*
            
            *Reason:* Deployment failed
            *Commit:* ${{ github.sha }}
            *Branch:* ${{ github.ref }}
            
            Previous version has been restored.
        env:
          SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}

☸️ Configuração Kubernetes

k8s/deployment.yaml

# 📁 k8s/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: app-deployment
  labels:
    app: myapp
spec:
  replicas: 3
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 0
  selector:
    matchLabels:
      app: myapp
  template:
    metadata:
      labels:
        app: myapp
    spec:
      containers:
      - name: app
        image: ghcr.io/myorg/myapp:IMAGE_TAG
        ports:
        - containerPort: 3000
        env:
        - name: NODE_ENV
          value: "production"
        - name: DATABASE_URL
          valueFrom:
            secretKeyRef:
              name: app-secrets
              key: database-url
        - name: REDIS_URL
          valueFrom:
            secretKeyRef:
              name: app-secrets
              key: redis-url
        - name: NEXTAUTH_SECRET
          valueFrom:
            secretKeyRef:
              name: app-secrets
              key: nextauth-secret
        
        # 🔍 Health checks
        livenessProbe:
          httpGet:
            path: /api/health
            port: 3000
          initialDelaySeconds: 30
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 3
        
        readinessProbe:
          httpGet:
            path: /api/ready
            port: 3000
          initialDelaySeconds: 5
          periodSeconds: 5
          timeoutSeconds: 3
          failureThreshold: 3
        
        # 📊 Resource limits
        resources:
          requests:
            memory: "512Mi"
            cpu: "250m"
          limits:
            memory: "1Gi"
            cpu: "500m"
        
        # 🔧 Volume mounts
        volumeMounts:
        - name: logs
          mountPath: /app/logs
        - name: config
          mountPath: /app/config
          readOnly: true
      
      volumes:
      - name: logs
        emptyDir: {}
      - name: config
        configMap:
          name: app-config

---
# 🌐 Service
apiVersion: v1
kind: Service
metadata:
  name: app-service
spec:
  selector:
    app: myapp
  ports:
    - protocol: TCP
      port: 80
      targetPort: 3000
  type: ClusterIP

---
# 📊 Horizontal Pod Autoscaler
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: app-hpa
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: app-deployment
  minReplicas: 3
  maxReplicas: 10
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80
  - type: Pods
    pods:
      metric:
        name: requests_per_second
      target:
        type: AverageValue
        averageValue: "100"

---
# 🔧 ConfigMap
apiVersion: v1
kind: ConfigMap
metadata:
  name: app-config
data:
  config.json: |
    {
      "app": {
        "name": "MyApp",
        "version": "1.0.0",
        "environment": "production"
      },
      "features": {
        "analytics": true,
        "monitoring": true,
        "cache": true
      },
      "limits": {
        "requestsPerMinute": 1000,
        "maxFileSize": "10MB"
      }
    }

---
# 🔑 Secret (aplicar separadamente)
apiVersion: v1
kind: Secret
metadata:
  name: app-secrets
type: Opaque
data:
  database-url: <base64-encoded-database-url>
  redis-url: <base64-encoded-redis-url>
  nextauth-secret: <base64-encoded-nextauth-secret>

k8s/ingress.yaml

# 📁 k8s/ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: app-ingress
  annotations:
    kubernetes.io/ingress.class: "nginx"
    cert-manager.io/cluster-issuer: "letsencrypt-prod"
    nginx.ingress.kubernetes.io/rate-limit: "100"
    nginx.ingress.kubernetes.io/rate-limit-window: "1m"
    nginx.ingress.kubernetes.io/ssl-redirect: "true"
    nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
    nginx.ingress.kubernetes.io/proxy-body-size: "10m"
    nginx.ingress.kubernetes.io/proxy-read-timeout: "300"
    nginx.ingress.kubernetes.io/proxy-send-timeout: "300"
    nginx.ingress.kubernetes.io/enable-cors: "true"
    nginx.ingress.kubernetes.io/cors-allow-origin: "https://myapp.com"
    nginx.ingress.kubernetes.io/cors-allow-methods: "GET, POST, PUT, DELETE, OPTIONS"
    nginx.ingress.kubernetes.io/cors-allow-headers: "DNT,X-CustomHeader,Keep-Alive,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Authorization"
spec:
  tls:
  - hosts:
    - myapp.com
    - api.myapp.com
    secretName: app-tls
  rules:
  - host: myapp.com
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: app-service
            port:
              number: 80
  - host: api.myapp.com
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: app-service
            port:
              number: 80

---
# 🔧 NetworkPolicy
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: app-network-policy
spec:
  podSelector:
    matchLabels:
      app: myapp
  policyTypes:
  - Ingress
  - Egress
  ingress:
  - from:
    - namespaceSelector:
        matchLabels:
          name: ingress-nginx
    - podSelector:
        matchLabels:
          app: monitoring
    ports:
    - protocol: TCP
      port: 3000
  egress:
  - to:
    - namespaceSelector:
        matchLabels:
          name: database
    ports:
    - protocol: TCP
      port: 5432
  - to:
    - namespaceSelector:
        matchLabels:
          name: redis
    ports:
    - protocol: TCP
      port: 6379
  - to: []
    ports:
    - protocol: TCP
      port: 443
    - protocol: TCP
      port: 80

📊 Monitoramento e Observabilidade

src/lib/monitoring/health.ts

// 📁 src/lib/monitoring/health.ts
import { NextRequest, NextResponse } from 'next/server';
import { prisma } from '@/lib/prisma';
import { redis } from '@/lib/redis';

// 🔍 Interface para health check
interface HealthCheck {
  status: 'healthy' | 'unhealthy' | 'degraded';
  timestamp: number;
  version: string;
  uptime: number;
  checks: {
    database: HealthCheckResult;
    redis: HealthCheckResult;
    disk: HealthCheckResult;
    memory: HealthCheckResult;
  };
}

interface HealthCheckResult {
  status: 'healthy' | 'unhealthy';
  responseTime: number;
  error?: string;
}

// 🚀 Health check principal
export async function healthCheck(): Promise<HealthCheck> {
  const startTime = Date.now();
  
  // 🔍 Executar checks em paralelo
  const [database, redisCheck, disk, memory] = await Promise.allSettled([
    checkDatabase(),
    checkRedis(),
    checkDisk(),
    checkMemory(),
  ]);
  
  const checks = {
    database: database.status === 'fulfilled' ? database.value : {
      status: 'unhealthy' as const,
      responseTime: Date.now() - startTime,
      error: database.reason?.message || 'Unknown error',
    },
    redis: redisCheck.status === 'fulfilled' ? redisCheck.value : {
      status: 'unhealthy' as const,
      responseTime: Date.now() - startTime,
      error: redisCheck.reason?.message || 'Unknown error',
    },
    disk: disk.status === 'fulfilled' ? disk.value : {
      status: 'unhealthy' as const,
      responseTime: Date.now() - startTime,
      error: disk.reason?.message || 'Unknown error',
    },
    memory: memory.status === 'fulfilled' ? memory.value : {
      status: 'unhealthy' as const,
      responseTime: Date.now() - startTime,
      error: memory.reason?.message || 'Unknown error',
    },
  };
  
  // 🎯 Determinar status geral
  const unhealthyChecks = Object.values(checks).filter(c => c.status === 'unhealthy');
  const status = unhealthyChecks.length === 0 ? 'healthy' : 
                unhealthyChecks.length <= 1 ? 'degraded' : 'unhealthy';
  
  return {
    status,
    timestamp: Date.now(),
    version: process.env.npm_package_version || '1.0.0',
    uptime: process.uptime(),
    checks,
  };
}

// 🗃️ Check do banco de dados
async function checkDatabase(): Promise<HealthCheckResult> {
  const startTime = Date.now();
  
  try {
    await prisma.$queryRaw`SELECT 1`;
    
    return {
      status: 'healthy',
      responseTime: Date.now() - startTime,
    };
  } catch (error) {
    return {
      status: 'unhealthy',
      responseTime: Date.now() - startTime,
      error: error instanceof Error ? error.message : 'Database connection failed',
    };
  }
}

// 📊 Check do Redis
async function checkRedis(): Promise<HealthCheckResult> {
  const startTime = Date.now();
  
  try {
    await redis.ping();
    
    return {
      status: 'healthy',
      responseTime: Date.now() - startTime,
    };
  } catch (error) {
    return {
      status: 'unhealthy',
      responseTime: Date.now() - startTime,
      error: error instanceof Error ? error.message : 'Redis connection failed',
    };
  }
}

// 💾 Check do disco
async function checkDisk(): Promise<HealthCheckResult> {
  const startTime = Date.now();
  
  try {
    const fs = await import('fs/promises');
    const stats = await fs.statfs('./');
    
    // 🚨 Verificar espaço disponível (mínimo 1GB)
    const freeSpace = stats.bavail * stats.bsize;
    const minFreeSpace = 1024 * 1024 * 1024; // 1GB
    
    if (freeSpace < minFreeSpace) {
      return {
        status: 'unhealthy',
        responseTime: Date.now() - startTime,
        error: `Low disk space: ${Math.round(freeSpace / 1024 / 1024)}MB available`,
      };
    }
    
    return {
      status: 'healthy',
      responseTime: Date.now() - startTime,
    };
  } catch (error) {
    return {
      status: 'unhealthy',
      responseTime: Date.now() - startTime,
      error: error instanceof Error ? error.message : 'Disk check failed',
    };
  }
}

// 🧠 Check da memória
async function checkMemory(): Promise<HealthCheckResult> {
  const startTime = Date.now();
  
  try {
    const usage = process.memoryUsage();
    const totalMemory = usage.rss + usage.heapUsed + usage.external;
    const maxMemory = 1024 * 1024 * 1024; // 1GB
    
    if (totalMemory > maxMemory) {
      return {
        status: 'unhealthy',
        responseTime: Date.now() - startTime,
        error: `High memory usage: ${Math.round(totalMemory / 1024 / 1024)}MB`,
      };
    }
    
    return {
      status: 'healthy',
      responseTime: Date.now() - startTime,
    };
  } catch (error) {
    return {
      status: 'unhealthy',
      responseTime: Date.now() - startTime,
      error: error instanceof Error ? error.message : 'Memory check failed',
    };
  }
}

// 🎯 Readiness check (mais rigoroso)
export async function readinessCheck(): Promise<boolean> {
  try {
    const health = await healthCheck();
    
    // 🔍 Verificar se componentes críticos estão saudáveis
    const criticalChecks = [health.checks.database, health.checks.redis];
    const unhealthyCritical = criticalChecks.filter(c => c.status === 'unhealthy');
    
    return unhealthyCritical.length === 0;
  } catch (error) {
    return false;
  }
}

// 📊 Métricas de aplicação
export async function getMetrics(): Promise<Record<string, any>> {
  const [
    activeConnections,
    requestsPerSecond,
    errorRate,
    responseTime,
    cacheHitRate,
  ] = await Promise.allSettled([
    getActiveConnections(),
    getRequestsPerSecond(),
    getErrorRate(),
    getAverageResponseTime(),
    getCacheHitRate(),
  ]);
  
  return {
    activeConnections: activeConnections.status === 'fulfilled' ? activeConnections.value : 0,
    requestsPerSecond: requestsPerSecond.status === 'fulfilled' ? requestsPerSecond.value : 0,
    errorRate: errorRate.status === 'fulfilled' ? errorRate.value : 0,
    responseTime: responseTime.status === 'fulfilled' ? responseTime.value : 0,
    cacheHitRate: cacheHitRate.status === 'fulfilled' ? cacheHitRate.value : 0,
    uptime: process.uptime(),
    memory: process.memoryUsage(),
    cpu: process.cpuUsage(),
  };
}

// 🔧 Funções auxiliares para métricas
async function getActiveConnections(): Promise<number> {
  try {
    const result = await prisma.$queryRaw`
      SELECT count(*) as count 
      FROM pg_stat_activity 
      WHERE state = 'active'
    ` as any[];
    
    return parseInt(result[0]?.count || '0');
  } catch {
    return 0;
  }
}

async function getRequestsPerSecond(): Promise<number> {
  try {
    const key = 'metrics:requests_per_second';
    const count = await redis.get(key);
    return parseInt(count || '0');
  } catch {
    return 0;
  }
}

async function getErrorRate(): Promise<number> {
  try {
    const totalKey = 'metrics:total_requests';
    const errorKey = 'metrics:error_requests';
    
    const [total, errors] = await Promise.all([
      redis.get(totalKey),
      redis.get(errorKey),
    ]);
    
    const totalCount = parseInt(total || '0');
    const errorCount = parseInt(errors || '0');
    
    return totalCount > 0 ? (errorCount / totalCount) * 100 : 0;
  } catch {
    return 0;
  }
}

async function getAverageResponseTime(): Promise<number> {
  try {
    const key = 'metrics:response_times';
    const times = await redis.lrange(key, 0, 99); // Últimas 100 requisições
    
    if (times.length === 0) return 0;
    
    const sum = times.reduce((acc, time) => acc + parseInt(time), 0);
    return sum / times.length;
  } catch {
    return 0;
  }
}

async function getCacheHitRate(): Promise<number> {
  try {
    const info = await redis.info('stats');
    const hitsMatch = info.match(/keyspace_hits:(\d+)/);
    const missesMatch = info.match(/keyspace_misses:(\d+)/);
    
    if (!hitsMatch || !missesMatch) return 0;
    
    const hits = parseInt(hitsMatch[1]);
    const misses = parseInt(missesMatch[1]);
    const total = hits + misses;
    
    return total > 0 ? (hits / total) * 100 : 0;
  } catch {
    return 0;
  }
}

// 🚨 Sistema de alertas
export class AlertManager {
  private alerts: Array<{
    id: string;
    type: 'error' | 'warning' | 'info';
    message: string;
    timestamp: number;
    resolved: boolean;
  }> = [];
  
  async checkAndAlert(): Promise<void> {
    const health = await healthCheck();
    const metrics = await getMetrics();
    
    // 🔍 Verificar condições de alerta
    if (health.status === 'unhealthy') {
      this.createAlert('error', 'Application is unhealthy', health);
    }
    
    if (metrics.errorRate > 5) {
      this.createAlert('warning', `High error rate: ${metrics.errorRate}%`, metrics);
    }
    
    if (metrics.responseTime > 2000) {
      this.createAlert('warning', `High response time: ${metrics.responseTime}ms`, metrics);
    }
    
    if (metrics.cacheHitRate < 50) {
      this.createAlert('info', `Low cache hit rate: ${metrics.cacheHitRate}%`, metrics);
    }
  }
  
  private createAlert(type: 'error' | 'warning' | 'info', message: string, data: any): void {
    const alert = {
      id: `alert-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
      type,
      message,
      timestamp: Date.now(),
      resolved: false,
    };
    
    this.alerts.push(alert);
    
    // 📱 Enviar notificação
    this.sendNotification(alert, data);
  }
  
  private async sendNotification(alert: any, data: any): Promise<void> {
    // 🔔 Implementar integração com Slack, Discord, etc.
    console.log('ALERT:', alert, data);
  }
}

// 🎯 Instância global do gerenciador de alertas
export const alertManager = new AlertManager();

// 🔄 Verificar alertas periodicamente
setInterval(async () => {
  await alertManager.checkAndAlert();
}, 60 * 1000); // A cada minuto

🌐 Endpoints de API

src/app/api/health/route.ts

// 📁 src/app/api/health/route.ts
import { NextResponse } from 'next/server';
import { healthCheck } from '@/lib/monitoring/health';

export async function GET() {
  try {
    const health = await healthCheck();
    
    const statusCode = health.status === 'healthy' ? 200 : 
                      health.status === 'degraded' ? 200 : 503;
    
    return NextResponse.json(health, { status: statusCode });
  } catch (error) {
    return NextResponse.json(
      {
        status: 'unhealthy',
        error: error instanceof Error ? error.message : 'Unknown error',
        timestamp: Date.now(),
      },
      { status: 503 }
    );
  }
}

// 📁 src/app/api/ready/route.ts
import { NextResponse } from 'next/server';
import { readinessCheck } from '@/lib/monitoring/health';

export async function GET() {
  try {
    const isReady = await readinessCheck();
    
    if (isReady) {
      return NextResponse.json({ status: 'ready' });
    } else {
      return NextResponse.json(
        { status: 'not ready' },
        { status: 503 }
      );
    }
  } catch (error) {
    return NextResponse.json(
      { status: 'error', error: error instanceof Error ? error.message : 'Unknown error' },
      { status: 503 }
    );
  }
}

// 📁 src/app/api/metrics/route.ts
import { NextResponse } from 'next/server';
import { getMetrics } from '@/lib/monitoring/health';

export async function GET() {
  try {
    const metrics = await getMetrics();
    
    // 📊 Formato Prometheus
    const prometheusFormat = `
# HELP nodejs_active_connections Number of active database connections
# TYPE nodejs_active_connections gauge
nodejs_active_connections ${metrics.activeConnections}

# HELP nodejs_requests_per_second Current requests per second
# TYPE nodejs_requests_per_second gauge
nodejs_requests_per_second ${metrics.requestsPerSecond}

# HELP nodejs_error_rate Current error rate percentage
# TYPE nodejs_error_rate gauge
nodejs_error_rate ${metrics.errorRate}

# HELP nodejs_response_time_ms Average response time in milliseconds
# TYPE nodejs_response_time_ms gauge
nodejs_response_time_ms ${metrics.responseTime}

# HELP nodejs_cache_hit_rate Cache hit rate percentage
# TYPE nodejs_cache_hit_rate gauge
nodejs_cache_hit_rate ${metrics.cacheHitRate}

# HELP nodejs_uptime_seconds Process uptime in seconds
# TYPE nodejs_uptime_seconds gauge
nodejs_uptime_seconds ${metrics.uptime}

# HELP nodejs_memory_usage_bytes Memory usage in bytes
# TYPE nodejs_memory_usage_bytes gauge
nodejs_memory_usage_bytes{type="rss"} ${metrics.memory.rss}
nodejs_memory_usage_bytes{type="heapUsed"} ${metrics.memory.heapUsed}
nodejs_memory_usage_bytes{type="heapTotal"} ${metrics.memory.heapTotal}
nodejs_memory_usage_bytes{type="external"} ${metrics.memory.external}

# HELP nodejs_cpu_usage_microseconds CPU usage in microseconds
# TYPE nodejs_cpu_usage_microseconds gauge
nodejs_cpu_usage_microseconds{type="user"} ${metrics.cpu.user}
nodejs_cpu_usage_microseconds{type="system"} ${metrics.cpu.system}
    `.trim();
    
    return new Response(prometheusFormat, {
      headers: {
        'Content-Type': 'text/plain; version=0.0.4; charset=utf-8',
      },
    });
  } catch (error) {
    return NextResponse.json(
      { error: error instanceof Error ? error.message : 'Unknown error' },
      { status: 500 }
    );
  }
}

// 📁 src/app/api/version/route.ts
import { NextResponse } from 'next/server';
import { readFile } from 'fs/promises';
import { join } from 'path';

export async function GET() {
  try {
    const packageJsonPath = join(process.cwd(), 'package.json');
    const packageJson = JSON.parse(await readFile(packageJsonPath, 'utf-8'));
    
    return NextResponse.json({
      name: packageJson.name,
      version: packageJson.version,
      buildDate: process.env.BUILD_DATE || new Date().toISOString(),
      commit: process.env.GITHUB_SHA || 'unknown',
      node: process.version,
      platform: process.platform,
      arch: process.arch,
    });
  } catch (error) {
    return NextResponse.json(
      { error: error instanceof Error ? error.message : 'Unknown error' },
      { status: 500 }
    );
  }
}

📈 Configuração Prometheus

prometheus.yml

# 📁 prometheus.yml
global:
  scrape_interval: 15s
  evaluation_interval: 15s

# 🚨 Configuração de alertas
rule_files:
  - "alert_rules.yml"

alerting:
  alertmanagers:
    - static_configs:
        - targets:
          - alertmanager:9093

# 🎯 Jobs de scraping
scrape_configs:
  # 📊 Métricas da aplicação
  - job_name: 'myapp'
    static_configs:
      - targets: ['app:3000']
    metrics_path: '/api/metrics'
    scrape_interval: 30s
    scrape_timeout: 10s
    honor_timestamps: true

  # 🐳 Métricas do Docker
  - job_name: 'docker'
    static_configs:
      - targets: ['docker-exporter:9323']

  # 🐘 Métricas do PostgreSQL
  - job_name: 'postgres'
    static_configs:
      - targets: ['postgres-exporter:9187']

  # 📊 Métricas do Redis
  - job_name: 'redis'
    static_configs:
      - targets: ['redis-exporter:9121']

  # ☸️ Métricas do Kubernetes
  - job_name: 'kubernetes-pods'
    kubernetes_sd_configs:
      - role: pod
    relabel_configs:
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
        action: keep
        regex: true
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
        action: replace
        target_label: __metrics_path__
        regex: (.+)
      - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
        action: replace
        regex: ([^:]+)(?::\\d+)?;(\\d+)
        replacement: $1:$2
        target_label: __address__
      - action: labelmap
        regex: __meta_kubernetes_pod_label_(.+)
      - source_labels: [__meta_kubernetes_namespace]
        action: replace
        target_label: kubernetes_namespace
      - source_labels: [__meta_kubernetes_pod_name]
        action: replace
        target_label: kubernetes_pod_name

  # 🔍 Métricas do Nginx
  - job_name: 'nginx'
    static_configs:
      - targets: ['nginx-exporter:9113']

---
# 📁 alert_rules.yml
groups:
  - name: application_alerts
    rules:
      # 🚨 Aplicação offline
      - alert: ApplicationDown
        expr: up{job="myapp"} == 0
        for: 1m
        labels:
          severity: critical
        annotations:
          summary: "Application is down"
          description: "Application has been down for more than 1 minute"

      # 🔥 Alta taxa de erro
      - alert: HighErrorRate
        expr: nodejs_error_rate > 5
        for: 2m
        labels:
          severity: warning
        annotations:
          summary: "High error rate detected"
          description: "Error rate is greater than 5% for more than 2 minutes"

      # 🐌 Tempo de resposta alto
      - alert: HighResponseTime
        expr: nodejs_response_time_ms > 2000
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "High response time detected"
          description: "Response time is greater than 2000ms for more than 5 minutes"

      # 🧠 Alto uso de memória
      - alert: HighMemoryUsage
        expr: nodejs_memory_usage_bytes{type="rss"} > 1073741824
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "High memory usage detected"
          description: "Memory usage is greater than 1GB for more than 5 minutes"

      # 📊 Baixa taxa de cache
      - alert: LowCacheHitRate
        expr: nodejs_cache_hit_rate < 50
        for: 10m
        labels:
          severity: info
        annotations:
          summary: "Low cache hit rate detected"
          description: "Cache hit rate is less than 50% for more than 10 minutes"

      # 🗃️ Muitas conexões de banco
      - alert: TooManyDatabaseConnections
        expr: nodejs_active_connections > 50
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Too many database connections"
          description: "There are more than 50 active database connections"

  - name: infrastructure_alerts
    rules:
      # 💾 Pouco espaço em disco
      - alert: LowDiskSpace
        expr: node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"} < 0.1
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Low disk space"
          description: "Disk space is below 10% on instance"

      # 🔥 Alta CPU
      - alert: HighCPUUsage
        expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "High CPU usage"
          description: "CPU usage is greater than 80% on instance"

      # 📊 Pouca memória
      - alert: LowMemory
        expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes < 0.1
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Low memory"
          description: "Available memory is below 10% on instance"

💡 Melhores Práticas para Produção

Zero Downtime:Use rolling updates e health checks para deployments sem interrupção.

Monitoramento 360°:Monitore aplicação, infraestrutura e experiência do usuário.

Alertas Inteligentes:Configure alertas baseados em SLIs e evite fadiga de alertas.

Backup e Recovery:Tenha planos de backup e recovery testados regularmente.

🎯 Parabéns! Você Dominou o tRPC

Você completou com sucesso o curso completo de tRPC! Agora você possui as habilidades avançadas para construir APIs type-safe, performáticas e escaláveis.

✅ O que você conquistou:

• Fundamentos sólidos do tRPC
• Queries e mutations complexas
• Otimização e cache avançado
• Middleware e contexto robusto
• Deploy e produção enterprise

🚀 Próximos passos:

• Aplicar em projetos reais
• Contribuir com open source
• Ensinar outros desenvolvedores
• Explorar patterns avançados
• Construir seu próprio SaaS

Aula Anterior

Middleware e Contexto