grafana e prometeus

This commit is contained in:
JurTI-BR
2025-04-03 16:43:30 -03:00
parent 8ba6f345c7
commit b7e5cb64aa
35 changed files with 962 additions and 942 deletions

View File

@@ -0,0 +1,53 @@
global:
resolve_timeout: 5m
# Configurar esses valores com informações reais
smtp_smarthost: 'smtp.example.com:587'
smtp_from: 'alertmanager@example.com'
smtp_auth_username: 'alertmanager@example.com'
smtp_auth_password: 'password'
# Opcional: configuração para Slack
slack_api_url: 'https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX'
route:
group_by: ['alertname', 'severity']
group_wait: 30s
group_interval: 5m
repeat_interval: 1h
receiver: 'team-emails'
routes:
- match:
severity: critical
receiver: 'team-pager'
continue: true
- match:
severity: warning
receiver: 'team-emails'
receivers:
- name: 'team-emails'
email_configs:
- to: 'team@example.com'
send_resolved: true
html: '{{ template "email.html" . }}'
headers:
Subject: '{{ template "email.subject" . }}'
- name: 'team-pager'
slack_configs:
- channel: '#alerts'
send_resolved: true
icon_emoji: ':warning:'
title: '{{ template "slack.title" . }}'
text: '{{ template "slack.text" . }}'
# Adicione aqui outros sistemas de notificação para alertas críticos
# Por exemplo: webhook, PagerDuty, etc.
templates:
- '/etc/alertmanager/templates/*.tmpl'
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname']

View File

@@ -0,0 +1,11 @@
apiVersion: 1
providers:
- name: 'Prometheus'
orgId: 1
folder: ''
type: file
disableDeletion: false
editable: true
options:
path: /etc/grafana/provisioning/dashboards

View File

@@ -0,0 +1,9 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: false

View File

@@ -0,0 +1,28 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_timeout: 10s
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
rule_files:
- "rules/*.yml"
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'api-portaljuru'
metrics_path: '/metrics'
scrape_interval: 10s
static_configs:
- targets: ['host.docker.internal:8066']
- job_name: 'node-exporter'
static_configs:
- targets: ['node-exporter:9100']

View File

@@ -0,0 +1,52 @@
groups:
- name: portaljuru_api
rules:
# Alerta se a API ficar inacessível por mais de 1 minuto
- alert: PortalJuruApiDown
expr: up{job="api-portaljuru"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Portal Jurunense API fora do ar"
description: "A API do Portal Jurunense está inacessível há pelo menos 1 minuto."
# Alerta se a taxa de erro HTTP for maior que 5% em 5 minutos
- alert: HighErrorRate
expr: sum(rate(http_request_total{job="api-portaljuru", statusCode=~"5.."}[5m])) / sum(rate(http_request_total{job="api-portaljuru"}[5m])) > 0.05
for: 2m
labels:
severity: warning
annotations:
summary: "Taxa de erro elevada na API"
description: "A taxa de erros HTTP 5xx está acima de 5% nos últimos 5 minutos."
# Alerta se o tempo de resposta estiver muito alto
- alert: SlowResponseTime
expr: histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{job="api-portaljuru"}[5m])) by (le)) > 1
for: 5m
labels:
severity: warning
annotations:
summary: "Tempo de resposta elevado na API"
description: "95% das requisições estão levando mais de 1 segundo para completar."
# Alerta para uso alto de memória
- alert: HighMemoryUsage
expr: process_resident_memory_bytes{job="api-portaljuru"} > 350000000
for: 5m
labels:
severity: warning
annotations:
summary: "Uso elevado de memória"
description: "A API está usando mais de 350MB de memória por mais de 5 minutos."
# Alerta para pool de conexões quase esgotado
- alert: DatabaseConnectionPoolNearlyFull
expr: api_db_connection_pool_used{job="api-portaljuru"} / api_db_connection_pool_total{job="api-portaljuru"} > 0.8
for: 2m
labels:
severity: warning
annotations:
summary: "Pool de conexões quase cheio"
description: "O pool de conexões está usando mais de 80% da capacidade máxima."