diff --git a/freeleaps/helm-pkg/authentication/values.prod.yaml b/freeleaps/helm-pkg/authentication/values.prod.yaml index 20ed13a1..5f0dc318 100644 --- a/freeleaps/helm-pkg/authentication/values.prod.yaml +++ b/freeleaps/helm-pkg/authentication/values.prod.yaml @@ -106,3 +106,32 @@ authentication: controlledResources: - cpu - memory + prometheusRule: + name: freepeals-alpha-authentication + enabled: false + namespace: freeleaps-monitoring-system + labels: + release: kube-prometheus-stack + rules: + - alert: FreeleapsAuthenticationServiceDown + expr: up{job="authentication-service"} == 0 + for: 1m + labels: + severity: critical + service: authentication-service + annotations: + summary: Freeleaps Authentication service is down (instance {{ $labels.instance }}) + description: Freeleaps Authentication service has been down for more than 1 minutes. + runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7 + - alert: FreeleapsAuthenticationServiceHighErrorRate + expr: rate(http_requests_total{job="authentication-service",status=~"5.."}[5m]) > 0.1 + for: 5m + labels: + severity: warning + service: authentication-service + annotations: + summary: High error rate in freeleaps authentication service (instance {{ $labels.instance }}) + description: Freeleaps Authentication service error rate is {{ $value }} errors per second. + runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7 + +