Domine deploy e produção de aplicações tRPC: containerização com Docker, CI/CD pipelines, monitoramento em tempo real e escalabilidade para SaaS enterprise.
Zero Downtime: Deploy contínuo sem interrupções mantém a confiança dos usuários e a receita fluindo.
Escalabilidade: Infraestrutura que escala automaticamente suporta crescimento exponencial do negócio.
# 📁 Dockerfile
FROM node:18-alpine AS base
# 🔧 Instalar dependências apenas quando necessário
FROM base AS deps
RUN apk add --no-cache libc6-compat
WORKDIR /app
# 📦 Instalar dependências baseado no package manager
COPY package.json yarn.lock* package-lock.json* pnpm-lock.yaml* ./
RUN \
if [ -f yarn.lock ]; then yarn --frozen-lockfile; \
elif [ -f package-lock.json ]; then npm ci; \
elif [ -f pnpm-lock.yaml ]; then npm install -g pnpm && pnpm i --frozen-lockfile; \
else echo "Lockfile not found." && exit 1; \
fi
# 🏗️ Rebuild apenas quando necessário
FROM base AS builder
WORKDIR /app
COPY --from=deps /app/node_modules ./node_modules
COPY . .
# 🎯 Definir variáveis de ambiente de build
ENV NEXT_TELEMETRY_DISABLED 1
ENV NODE_ENV production
# 🔧 Gerar cliente Prisma
RUN npx prisma generate
# 📦 Build da aplicação
RUN npm run build
# 🚀 Imagem de produção
FROM base AS runner
WORKDIR /app
ENV NODE_ENV production
ENV NEXT_TELEMETRY_DISABLED 1
# 👤 Criar usuário não-root
RUN addgroup --system --gid 1001 nodejs
RUN adduser --system --uid 1001 nextjs
# 📁 Copiar arquivos necessários
COPY --from=builder /app/public ./public
COPY --from=builder /app/package.json ./package.json
# 🔧 Copiar build da aplicação
COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./
COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static
# 📊 Configurar logs
RUN mkdir -p /app/logs && chown nextjs:nodejs /app/logs
# 🚀 Definir usuário e porta
USER nextjs
EXPOSE 3000
ENV PORT 3000
# 🎯 Comando de inicialização
CMD ["node", "server.js"]
# 📁 docker-compose.yml
version: '3.8'
services:
# 🚀 Aplicação Next.js
app:
build: .
ports:
- '3000:3000'
environment:
- NODE_ENV=production
- DATABASE_URL=postgresql://postgres:password@db:5432/myapp
- REDIS_URL=redis://redis:6379
- NEXTAUTH_SECRET=your-secret-key
- NEXTAUTH_URL=http://localhost:3000
depends_on:
- db
- redis
volumes:
- ./logs:/app/logs
restart: unless-stopped
healthcheck:
test: ['CMD', 'curl', '-f', 'http://localhost:3000/api/health']
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
# 🐘 PostgreSQL Database
db:
image: postgres:15-alpine
environment:
- POSTGRES_DB=myapp
- POSTGRES_USER=postgres
- POSTGRES_PASSWORD=password
volumes:
- postgres_data:/var/lib/postgresql/data
- ./init-db.sql:/docker-entrypoint-initdb.d/init-db.sql
ports:
- '5432:5432'
restart: unless-stopped
healthcheck:
test: ['CMD-SHELL', 'pg_isready -U postgres']
interval: 30s
timeout: 10s
retries: 5
# 📊 Redis Cache
redis:
image: redis:7-alpine
command: redis-server --appendonly yes --maxmemory 1gb --maxmemory-policy allkeys-lru
ports:
- '6379:6379'
volumes:
- redis_data:/data
restart: unless-stopped
healthcheck:
test: ['CMD', 'redis-cli', 'ping']
interval: 30s
timeout: 10s
retries: 3
# 📊 Nginx Load Balancer
nginx:
image: nginx:alpine
ports:
- '80:80'
- '443:443'
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf
- ./certs:/etc/nginx/certs
depends_on:
- app
restart: unless-stopped
# 📈 Prometheus Monitoring
prometheus:
image: prom/prometheus:latest
ports:
- '9090:9090'
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
restart: unless-stopped
# 📊 Grafana Dashboard
grafana:
image: grafana/grafana:latest
ports:
- '3001:3000'
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/dashboards:/var/lib/grafana/dashboards
- ./grafana/provisioning:/etc/grafana/provisioning
restart: unless-stopped
volumes:
postgres_data:
redis_data:
prometheus_data:
grafana_data:
# 📁 .github/workflows/deploy.yml
name: Deploy to Production
on:
push:
branches: [main]
pull_request:
branches: [main]
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
# 🧪 Testes
test:
runs-on: ubuntu-latest
services:
postgres:
image: postgres:15
env:
POSTGRES_PASSWORD: postgres
POSTGRES_DB: test
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 5432:5432
redis:
image: redis:7
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 6379:6379
steps:
- name: 📥 Checkout code
uses: actions/checkout@v4
- name: 🔧 Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '18'
cache: 'npm'
- name: 📦 Install dependencies
run: npm ci
- name: 🎯 Generate Prisma client
run: npx prisma generate
- name: 🗃️ Run database migrations
run: npx prisma migrate deploy
env:
DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test
- name: 🧪 Run tests
run: npm run test:ci
env:
NODE_ENV: test
DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test
REDIS_URL: redis://localhost:6379
- name: 📊 Upload coverage reports
uses: codecov/codecov-action@v3
with:
file: ./coverage/lcov.info
flags: unittests
name: codecov-umbrella
# 🔍 Análise de qualidade
quality:
runs-on: ubuntu-latest
steps:
- name: 📥 Checkout code
uses: actions/checkout@v4
- name: 🔧 Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '18'
cache: 'npm'
- name: 📦 Install dependencies
run: npm ci
- name: 🎯 Type checking
run: npm run type-check
- name: 📝 Lint code
run: npm run lint
- name: 🎨 Format check
run: npm run format:check
- name: 🔒 Security audit
run: npm audit --audit-level=high
- name: 📊 SonarCloud Scan
uses: SonarSource/sonarcloud-github-action@master
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
# 🏗️ Build e Push da imagem
build:
runs-on: ubuntu-latest
needs: [test, quality]
if: github.ref == 'refs/heads/main'
permissions:
contents: read
packages: write
steps:
- name: 📥 Checkout code
uses: actions/checkout@v4
- name: 🔧 Setup Docker Buildx
uses: docker/setup-buildx-action@v3
- name: 🔑 Login to Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: 🏷️ Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=sha,prefix={{branch}}-
type=raw,value=latest,enable={{is_default_branch}}
- name: 🏗️ Build and push
uses: docker/build-push-action@v5
with:
context: .
platforms: linux/amd64,linux/arm64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
# 🚀 Deploy para produção
deploy:
runs-on: ubuntu-latest
needs: [build]
if: github.ref == 'refs/heads/main'
environment:
name: production
url: https://myapp.com
steps:
- name: 📥 Checkout code
uses: actions/checkout@v4
- name: 🔧 Setup kubectl
uses: azure/setup-kubectl@v3
with:
version: 'v1.28.0'
- name: 🔑 Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-1
- name: 📊 Update kubeconfig
run: |
aws eks update-kubeconfig --name production-cluster --region us-east-1
- name: 🔄 Deploy to Kubernetes
run: |
# Substituir tag da imagem no deployment
sed -i "s|IMAGE_TAG|${{ github.sha }}|g" k8s/deployment.yaml
# Aplicar manifests
kubectl apply -f k8s/
# Aguardar rollout
kubectl rollout status deployment/app-deployment
# Verificar pods
kubectl get pods -l app=myapp
- name: 🧪 Run smoke tests
run: |
npm run test:smoke
env:
TEST_URL: https://myapp.com
- name: 📱 Notify deployment
uses: 8398a7/action-slack@v3
with:
status: ${{ job.status }}
channel: '#deployments'
username: 'GitHub Actions'
text: |
🚀 *Deployment to Production*
*Status:* ${{ job.status }}
*Commit:* ${{ github.sha }}
*Branch:* ${{ github.ref }}
*Author:* ${{ github.actor }}
<https://myapp.com|View Application>
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
if: always()
# 🔄 Rollback automático em caso de falha
rollback:
runs-on: ubuntu-latest
needs: [deploy]
if: failure() && github.ref == 'refs/heads/main'
steps:
- name: 🔧 Setup kubectl
uses: azure/setup-kubectl@v3
with:
version: 'v1.28.0'
- name: 🔑 Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-1
- name: 📊 Update kubeconfig
run: |
aws eks update-kubeconfig --name production-cluster --region us-east-1
- name: 🔄 Rollback deployment
run: |
kubectl rollout undo deployment/app-deployment
kubectl rollout status deployment/app-deployment
- name: 🚨 Notify rollback
uses: 8398a7/action-slack@v3
with:
status: 'warning'
channel: '#alerts'
username: 'GitHub Actions'
text: |
🚨 *Automatic Rollback Executed*
*Reason:* Deployment failed
*Commit:* ${{ github.sha }}
*Branch:* ${{ github.ref }}
Previous version has been restored.
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
# 📁 k8s/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: app-deployment
labels:
app: myapp
spec:
replicas: 3
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
selector:
matchLabels:
app: myapp
template:
metadata:
labels:
app: myapp
spec:
containers:
- name: app
image: ghcr.io/myorg/myapp:IMAGE_TAG
ports:
- containerPort: 3000
env:
- name: NODE_ENV
value: "production"
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: app-secrets
key: database-url
- name: REDIS_URL
valueFrom:
secretKeyRef:
name: app-secrets
key: redis-url
- name: NEXTAUTH_SECRET
valueFrom:
secretKeyRef:
name: app-secrets
key: nextauth-secret
# 🔍 Health checks
livenessProbe:
httpGet:
path: /api/health
port: 3000
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /api/ready
port: 3000
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
# 📊 Resource limits
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "1Gi"
cpu: "500m"
# 🔧 Volume mounts
volumeMounts:
- name: logs
mountPath: /app/logs
- name: config
mountPath: /app/config
readOnly: true
volumes:
- name: logs
emptyDir: {}
- name: config
configMap:
name: app-config
---
# 🌐 Service
apiVersion: v1
kind: Service
metadata:
name: app-service
spec:
selector:
app: myapp
ports:
- protocol: TCP
port: 80
targetPort: 3000
type: ClusterIP
---
# 📊 Horizontal Pod Autoscaler
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: app-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: app-deployment
minReplicas: 3
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
- type: Pods
pods:
metric:
name: requests_per_second
target:
type: AverageValue
averageValue: "100"
---
# 🔧 ConfigMap
apiVersion: v1
kind: ConfigMap
metadata:
name: app-config
data:
config.json: |
{
"app": {
"name": "MyApp",
"version": "1.0.0",
"environment": "production"
},
"features": {
"analytics": true,
"monitoring": true,
"cache": true
},
"limits": {
"requestsPerMinute": 1000,
"maxFileSize": "10MB"
}
}
---
# 🔑 Secret (aplicar separadamente)
apiVersion: v1
kind: Secret
metadata:
name: app-secrets
type: Opaque
data:
database-url: <base64-encoded-database-url>
redis-url: <base64-encoded-redis-url>
nextauth-secret: <base64-encoded-nextauth-secret>
# 📁 k8s/ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: app-ingress
annotations:
kubernetes.io/ingress.class: "nginx"
cert-manager.io/cluster-issuer: "letsencrypt-prod"
nginx.ingress.kubernetes.io/rate-limit: "100"
nginx.ingress.kubernetes.io/rate-limit-window: "1m"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "10m"
nginx.ingress.kubernetes.io/proxy-read-timeout: "300"
nginx.ingress.kubernetes.io/proxy-send-timeout: "300"
nginx.ingress.kubernetes.io/enable-cors: "true"
nginx.ingress.kubernetes.io/cors-allow-origin: "https://myapp.com"
nginx.ingress.kubernetes.io/cors-allow-methods: "GET, POST, PUT, DELETE, OPTIONS"
nginx.ingress.kubernetes.io/cors-allow-headers: "DNT,X-CustomHeader,Keep-Alive,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Authorization"
spec:
tls:
- hosts:
- myapp.com
- api.myapp.com
secretName: app-tls
rules:
- host: myapp.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: app-service
port:
number: 80
- host: api.myapp.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: app-service
port:
number: 80
---
# 🔧 NetworkPolicy
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: app-network-policy
spec:
podSelector:
matchLabels:
app: myapp
policyTypes:
- Ingress
- Egress
ingress:
- from:
- namespaceSelector:
matchLabels:
name: ingress-nginx
- podSelector:
matchLabels:
app: monitoring
ports:
- protocol: TCP
port: 3000
egress:
- to:
- namespaceSelector:
matchLabels:
name: database
ports:
- protocol: TCP
port: 5432
- to:
- namespaceSelector:
matchLabels:
name: redis
ports:
- protocol: TCP
port: 6379
- to: []
ports:
- protocol: TCP
port: 443
- protocol: TCP
port: 80
// 📁 src/lib/monitoring/health.ts
import { NextRequest, NextResponse } from 'next/server';
import { prisma } from '@/lib/prisma';
import { redis } from '@/lib/redis';
// 🔍 Interface para health check
interface HealthCheck {
status: 'healthy' | 'unhealthy' | 'degraded';
timestamp: number;
version: string;
uptime: number;
checks: {
database: HealthCheckResult;
redis: HealthCheckResult;
disk: HealthCheckResult;
memory: HealthCheckResult;
};
}
interface HealthCheckResult {
status: 'healthy' | 'unhealthy';
responseTime: number;
error?: string;
}
// 🚀 Health check principal
export async function healthCheck(): Promise<HealthCheck> {
const startTime = Date.now();
// 🔍 Executar checks em paralelo
const [database, redisCheck, disk, memory] = await Promise.allSettled([
checkDatabase(),
checkRedis(),
checkDisk(),
checkMemory(),
]);
const checks = {
database: database.status === 'fulfilled' ? database.value : {
status: 'unhealthy' as const,
responseTime: Date.now() - startTime,
error: database.reason?.message || 'Unknown error',
},
redis: redisCheck.status === 'fulfilled' ? redisCheck.value : {
status: 'unhealthy' as const,
responseTime: Date.now() - startTime,
error: redisCheck.reason?.message || 'Unknown error',
},
disk: disk.status === 'fulfilled' ? disk.value : {
status: 'unhealthy' as const,
responseTime: Date.now() - startTime,
error: disk.reason?.message || 'Unknown error',
},
memory: memory.status === 'fulfilled' ? memory.value : {
status: 'unhealthy' as const,
responseTime: Date.now() - startTime,
error: memory.reason?.message || 'Unknown error',
},
};
// 🎯 Determinar status geral
const unhealthyChecks = Object.values(checks).filter(c => c.status === 'unhealthy');
const status = unhealthyChecks.length === 0 ? 'healthy' :
unhealthyChecks.length <= 1 ? 'degraded' : 'unhealthy';
return {
status,
timestamp: Date.now(),
version: process.env.npm_package_version || '1.0.0',
uptime: process.uptime(),
checks,
};
}
// 🗃️ Check do banco de dados
async function checkDatabase(): Promise<HealthCheckResult> {
const startTime = Date.now();
try {
await prisma.$queryRaw`SELECT 1`;
return {
status: 'healthy',
responseTime: Date.now() - startTime,
};
} catch (error) {
return {
status: 'unhealthy',
responseTime: Date.now() - startTime,
error: error instanceof Error ? error.message : 'Database connection failed',
};
}
}
// 📊 Check do Redis
async function checkRedis(): Promise<HealthCheckResult> {
const startTime = Date.now();
try {
await redis.ping();
return {
status: 'healthy',
responseTime: Date.now() - startTime,
};
} catch (error) {
return {
status: 'unhealthy',
responseTime: Date.now() - startTime,
error: error instanceof Error ? error.message : 'Redis connection failed',
};
}
}
// 💾 Check do disco
async function checkDisk(): Promise<HealthCheckResult> {
const startTime = Date.now();
try {
const fs = await import('fs/promises');
const stats = await fs.statfs('./');
// 🚨 Verificar espaço disponível (mínimo 1GB)
const freeSpace = stats.bavail * stats.bsize;
const minFreeSpace = 1024 * 1024 * 1024; // 1GB
if (freeSpace < minFreeSpace) {
return {
status: 'unhealthy',
responseTime: Date.now() - startTime,
error: `Low disk space: ${Math.round(freeSpace / 1024 / 1024)}MB available`,
};
}
return {
status: 'healthy',
responseTime: Date.now() - startTime,
};
} catch (error) {
return {
status: 'unhealthy',
responseTime: Date.now() - startTime,
error: error instanceof Error ? error.message : 'Disk check failed',
};
}
}
// 🧠 Check da memória
async function checkMemory(): Promise<HealthCheckResult> {
const startTime = Date.now();
try {
const usage = process.memoryUsage();
const totalMemory = usage.rss + usage.heapUsed + usage.external;
const maxMemory = 1024 * 1024 * 1024; // 1GB
if (totalMemory > maxMemory) {
return {
status: 'unhealthy',
responseTime: Date.now() - startTime,
error: `High memory usage: ${Math.round(totalMemory / 1024 / 1024)}MB`,
};
}
return {
status: 'healthy',
responseTime: Date.now() - startTime,
};
} catch (error) {
return {
status: 'unhealthy',
responseTime: Date.now() - startTime,
error: error instanceof Error ? error.message : 'Memory check failed',
};
}
}
// 🎯 Readiness check (mais rigoroso)
export async function readinessCheck(): Promise<boolean> {
try {
const health = await healthCheck();
// 🔍 Verificar se componentes críticos estão saudáveis
const criticalChecks = [health.checks.database, health.checks.redis];
const unhealthyCritical = criticalChecks.filter(c => c.status === 'unhealthy');
return unhealthyCritical.length === 0;
} catch (error) {
return false;
}
}
// 📊 Métricas de aplicação
export async function getMetrics(): Promise<Record<string, any>> {
const [
activeConnections,
requestsPerSecond,
errorRate,
responseTime,
cacheHitRate,
] = await Promise.allSettled([
getActiveConnections(),
getRequestsPerSecond(),
getErrorRate(),
getAverageResponseTime(),
getCacheHitRate(),
]);
return {
activeConnections: activeConnections.status === 'fulfilled' ? activeConnections.value : 0,
requestsPerSecond: requestsPerSecond.status === 'fulfilled' ? requestsPerSecond.value : 0,
errorRate: errorRate.status === 'fulfilled' ? errorRate.value : 0,
responseTime: responseTime.status === 'fulfilled' ? responseTime.value : 0,
cacheHitRate: cacheHitRate.status === 'fulfilled' ? cacheHitRate.value : 0,
uptime: process.uptime(),
memory: process.memoryUsage(),
cpu: process.cpuUsage(),
};
}
// 🔧 Funções auxiliares para métricas
async function getActiveConnections(): Promise<number> {
try {
const result = await prisma.$queryRaw`
SELECT count(*) as count
FROM pg_stat_activity
WHERE state = 'active'
` as any[];
return parseInt(result[0]?.count || '0');
} catch {
return 0;
}
}
async function getRequestsPerSecond(): Promise<number> {
try {
const key = 'metrics:requests_per_second';
const count = await redis.get(key);
return parseInt(count || '0');
} catch {
return 0;
}
}
async function getErrorRate(): Promise<number> {
try {
const totalKey = 'metrics:total_requests';
const errorKey = 'metrics:error_requests';
const [total, errors] = await Promise.all([
redis.get(totalKey),
redis.get(errorKey),
]);
const totalCount = parseInt(total || '0');
const errorCount = parseInt(errors || '0');
return totalCount > 0 ? (errorCount / totalCount) * 100 : 0;
} catch {
return 0;
}
}
async function getAverageResponseTime(): Promise<number> {
try {
const key = 'metrics:response_times';
const times = await redis.lrange(key, 0, 99); // Últimas 100 requisições
if (times.length === 0) return 0;
const sum = times.reduce((acc, time) => acc + parseInt(time), 0);
return sum / times.length;
} catch {
return 0;
}
}
async function getCacheHitRate(): Promise<number> {
try {
const info = await redis.info('stats');
const hitsMatch = info.match(/keyspace_hits:(\d+)/);
const missesMatch = info.match(/keyspace_misses:(\d+)/);
if (!hitsMatch || !missesMatch) return 0;
const hits = parseInt(hitsMatch[1]);
const misses = parseInt(missesMatch[1]);
const total = hits + misses;
return total > 0 ? (hits / total) * 100 : 0;
} catch {
return 0;
}
}
// 🚨 Sistema de alertas
export class AlertManager {
private alerts: Array<{
id: string;
type: 'error' | 'warning' | 'info';
message: string;
timestamp: number;
resolved: boolean;
}> = [];
async checkAndAlert(): Promise<void> {
const health = await healthCheck();
const metrics = await getMetrics();
// 🔍 Verificar condições de alerta
if (health.status === 'unhealthy') {
this.createAlert('error', 'Application is unhealthy', health);
}
if (metrics.errorRate > 5) {
this.createAlert('warning', `High error rate: ${metrics.errorRate}%`, metrics);
}
if (metrics.responseTime > 2000) {
this.createAlert('warning', `High response time: ${metrics.responseTime}ms`, metrics);
}
if (metrics.cacheHitRate < 50) {
this.createAlert('info', `Low cache hit rate: ${metrics.cacheHitRate}%`, metrics);
}
}
private createAlert(type: 'error' | 'warning' | 'info', message: string, data: any): void {
const alert = {
id: `alert-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
type,
message,
timestamp: Date.now(),
resolved: false,
};
this.alerts.push(alert);
// 📱 Enviar notificação
this.sendNotification(alert, data);
}
private async sendNotification(alert: any, data: any): Promise<void> {
// 🔔 Implementar integração com Slack, Discord, etc.
console.log('ALERT:', alert, data);
}
}
// 🎯 Instância global do gerenciador de alertas
export const alertManager = new AlertManager();
// 🔄 Verificar alertas periodicamente
setInterval(async () => {
await alertManager.checkAndAlert();
}, 60 * 1000); // A cada minuto
// 📁 src/app/api/health/route.ts
import { NextResponse } from 'next/server';
import { healthCheck } from '@/lib/monitoring/health';
export async function GET() {
try {
const health = await healthCheck();
const statusCode = health.status === 'healthy' ? 200 :
health.status === 'degraded' ? 200 : 503;
return NextResponse.json(health, { status: statusCode });
} catch (error) {
return NextResponse.json(
{
status: 'unhealthy',
error: error instanceof Error ? error.message : 'Unknown error',
timestamp: Date.now(),
},
{ status: 503 }
);
}
}
// 📁 src/app/api/ready/route.ts
import { NextResponse } from 'next/server';
import { readinessCheck } from '@/lib/monitoring/health';
export async function GET() {
try {
const isReady = await readinessCheck();
if (isReady) {
return NextResponse.json({ status: 'ready' });
} else {
return NextResponse.json(
{ status: 'not ready' },
{ status: 503 }
);
}
} catch (error) {
return NextResponse.json(
{ status: 'error', error: error instanceof Error ? error.message : 'Unknown error' },
{ status: 503 }
);
}
}
// 📁 src/app/api/metrics/route.ts
import { NextResponse } from 'next/server';
import { getMetrics } from '@/lib/monitoring/health';
export async function GET() {
try {
const metrics = await getMetrics();
// 📊 Formato Prometheus
const prometheusFormat = `
# HELP nodejs_active_connections Number of active database connections
# TYPE nodejs_active_connections gauge
nodejs_active_connections ${metrics.activeConnections}
# HELP nodejs_requests_per_second Current requests per second
# TYPE nodejs_requests_per_second gauge
nodejs_requests_per_second ${metrics.requestsPerSecond}
# HELP nodejs_error_rate Current error rate percentage
# TYPE nodejs_error_rate gauge
nodejs_error_rate ${metrics.errorRate}
# HELP nodejs_response_time_ms Average response time in milliseconds
# TYPE nodejs_response_time_ms gauge
nodejs_response_time_ms ${metrics.responseTime}
# HELP nodejs_cache_hit_rate Cache hit rate percentage
# TYPE nodejs_cache_hit_rate gauge
nodejs_cache_hit_rate ${metrics.cacheHitRate}
# HELP nodejs_uptime_seconds Process uptime in seconds
# TYPE nodejs_uptime_seconds gauge
nodejs_uptime_seconds ${metrics.uptime}
# HELP nodejs_memory_usage_bytes Memory usage in bytes
# TYPE nodejs_memory_usage_bytes gauge
nodejs_memory_usage_bytes{type="rss"} ${metrics.memory.rss}
nodejs_memory_usage_bytes{type="heapUsed"} ${metrics.memory.heapUsed}
nodejs_memory_usage_bytes{type="heapTotal"} ${metrics.memory.heapTotal}
nodejs_memory_usage_bytes{type="external"} ${metrics.memory.external}
# HELP nodejs_cpu_usage_microseconds CPU usage in microseconds
# TYPE nodejs_cpu_usage_microseconds gauge
nodejs_cpu_usage_microseconds{type="user"} ${metrics.cpu.user}
nodejs_cpu_usage_microseconds{type="system"} ${metrics.cpu.system}
`.trim();
return new Response(prometheusFormat, {
headers: {
'Content-Type': 'text/plain; version=0.0.4; charset=utf-8',
},
});
} catch (error) {
return NextResponse.json(
{ error: error instanceof Error ? error.message : 'Unknown error' },
{ status: 500 }
);
}
}
// 📁 src/app/api/version/route.ts
import { NextResponse } from 'next/server';
import { readFile } from 'fs/promises';
import { join } from 'path';
export async function GET() {
try {
const packageJsonPath = join(process.cwd(), 'package.json');
const packageJson = JSON.parse(await readFile(packageJsonPath, 'utf-8'));
return NextResponse.json({
name: packageJson.name,
version: packageJson.version,
buildDate: process.env.BUILD_DATE || new Date().toISOString(),
commit: process.env.GITHUB_SHA || 'unknown',
node: process.version,
platform: process.platform,
arch: process.arch,
});
} catch (error) {
return NextResponse.json(
{ error: error instanceof Error ? error.message : 'Unknown error' },
{ status: 500 }
);
}
}
# 📁 prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
# 🚨 Configuração de alertas
rule_files:
- "alert_rules.yml"
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
# 🎯 Jobs de scraping
scrape_configs:
# 📊 Métricas da aplicação
- job_name: 'myapp'
static_configs:
- targets: ['app:3000']
metrics_path: '/api/metrics'
scrape_interval: 30s
scrape_timeout: 10s
honor_timestamps: true
# 🐳 Métricas do Docker
- job_name: 'docker'
static_configs:
- targets: ['docker-exporter:9323']
# 🐘 Métricas do PostgreSQL
- job_name: 'postgres'
static_configs:
- targets: ['postgres-exporter:9187']
# 📊 Métricas do Redis
- job_name: 'redis'
static_configs:
- targets: ['redis-exporter:9121']
# ☸️ Métricas do Kubernetes
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\\d+)?;(\\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
# 🔍 Métricas do Nginx
- job_name: 'nginx'
static_configs:
- targets: ['nginx-exporter:9113']
---
# 📁 alert_rules.yml
groups:
- name: application_alerts
rules:
# 🚨 Aplicação offline
- alert: ApplicationDown
expr: up{job="myapp"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Application is down"
description: "Application has been down for more than 1 minute"
# 🔥 Alta taxa de erro
- alert: HighErrorRate
expr: nodejs_error_rate > 5
for: 2m
labels:
severity: warning
annotations:
summary: "High error rate detected"
description: "Error rate is greater than 5% for more than 2 minutes"
# 🐌 Tempo de resposta alto
- alert: HighResponseTime
expr: nodejs_response_time_ms > 2000
for: 5m
labels:
severity: warning
annotations:
summary: "High response time detected"
description: "Response time is greater than 2000ms for more than 5 minutes"
# 🧠 Alto uso de memória
- alert: HighMemoryUsage
expr: nodejs_memory_usage_bytes{type="rss"} > 1073741824
for: 5m
labels:
severity: warning
annotations:
summary: "High memory usage detected"
description: "Memory usage is greater than 1GB for more than 5 minutes"
# 📊 Baixa taxa de cache
- alert: LowCacheHitRate
expr: nodejs_cache_hit_rate < 50
for: 10m
labels:
severity: info
annotations:
summary: "Low cache hit rate detected"
description: "Cache hit rate is less than 50% for more than 10 minutes"
# 🗃️ Muitas conexões de banco
- alert: TooManyDatabaseConnections
expr: nodejs_active_connections > 50
for: 5m
labels:
severity: warning
annotations:
summary: "Too many database connections"
description: "There are more than 50 active database connections"
- name: infrastructure_alerts
rules:
# 💾 Pouco espaço em disco
- alert: LowDiskSpace
expr: node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"} < 0.1
for: 5m
labels:
severity: warning
annotations:
summary: "Low disk space"
description: "Disk space is below 10% on instance"
# 🔥 Alta CPU
- alert: HighCPUUsage
expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
for: 5m
labels:
severity: warning
annotations:
summary: "High CPU usage"
description: "CPU usage is greater than 80% on instance"
# 📊 Pouca memória
- alert: LowMemory
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes < 0.1
for: 5m
labels:
severity: warning
annotations:
summary: "Low memory"
description: "Available memory is below 10% on instance"
Zero Downtime:Use rolling updates e health checks para deployments sem interrupção.
Monitoramento 360°:Monitore aplicação, infraestrutura e experiência do usuário.
Alertas Inteligentes:Configure alertas baseados em SLIs e evite fadiga de alertas.
Backup e Recovery:Tenha planos de backup e recovery testados regularmente.
Você completou com sucesso o curso completo de tRPC! Agora você possui as habilidades avançadas para construir APIs type-safe, performáticas e escaláveis.