feat: update set-up

This commit is contained in:
Eason Zhao 2025-10-20 21:42:16 -04:00
parent 9ece101d8c
commit 90a9337f1a

View File

@ -109,3 +109,30 @@ centralStorage:
controlledResources:
- cpu
- memory
prometheusRule:
name: freepeals-prod-central-storage
enabled: true
namespace: freeleaps-monitoring-system
labels:
release: kube-prometheus-stack
rules:
- alert: FreeleapsCentralStorageServiceDown
expr: up{job="central-storage-service"} == 0
for: 1m
labels:
severity: critical
service: central-storage-service
annotations:
summary: Freeleaps Central Storage service is down (instance {{ $labels.instance }})
description: Freeleaps Central Storage service has been down for more than 1 minutes.
runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7
- alert: FreeleapsCentralStorageServiceHighErrorRate
expr: rate(http_requests_total{job="central-storage-service",status=~"5.."}[5m]) > 0.1
for: 5m
labels:
severity: warning
service: central-storage-service
annotations:
summary: High error rate in freeleaps central storage service (instance {{ $labels.instance }})
description: Freeleaps Central Storage service error rate is {{ $value }} errors per second.
runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7