diff --git a/freeleaps/helm-pkg/authentication/templates/authentication/prometheusrule.yaml b/freeleaps/helm-pkg/authentication/templates/authentication/prometheusrule.yaml new file mode 100644 index 00000000..deff1b9b --- /dev/null +++ b/freeleaps/helm-pkg/authentication/templates/authentication/prometheusrule.yaml @@ -0,0 +1,37 @@ +{{- /* +Copyright Broadcom, Inc. All Rights Reserved. +SPDX-License-Identifier: APACHE-2.0 +*/}} + +{{- if .Values.authentication.prometheusRule.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ .Values.authentication.prometheusRule.name }} + namespace: {{ .Values.authentication.prometheusRule.namespace | quote }} + {{- with .Values.authentication.prometheusRule.labels }} + labels: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + groups: + {{- with .Values.authentication.prometheusRule.rules }} + - name: {{ $.Values.authentication.prometheusRule.name }} + rules: + {{- range . }} + - alert: {{ .alert }} + expr: {{ .expr | quote }} + {{- if .for }} + for: {{ .for }} + {{- end }} + {{- if .labels }} + labels: + {{- toYaml .labels | nindent 12 }} + {{- end }} + {{- if .annotations }} + annotations: + {{- toYaml .annotations | nindent 12 }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/freeleaps/helm-pkg/authentication/values.alpha.yaml b/freeleaps/helm-pkg/authentication/values.alpha.yaml index 35a35f83..fa7ac887 100644 --- a/freeleaps/helm-pkg/authentication/values.alpha.yaml +++ b/freeleaps/helm-pkg/authentication/values.alpha.yaml @@ -115,3 +115,31 @@ authentication: controlledResources: - cpu - memory + prometheusRule: + name: freepeals-alpha-authentication + enabled: false + namespace: freeleaps-monitoring-system + labels: + release: kube-prometheus-stack + rules: + - alert: FreeleapsAuthenticationServiceDown + expr: up{job="authentication-service"} == 0 + for: 1m + labels: + severity: critical + service: authentication-service + annotations: + summary: Freeleaps Authentication service is down (instance {{ $labels.instance }}) + description: Freeleaps Authentication service has been down for more than 1 minutes. + runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7 + - alert: FreeleapsAuthenticationServiceHighErrorRate + expr: rate(http_requests_total{job="authentication-service",status=~"5.."}[5m]) > 0.1 + for: 5m + labels: + severity: warning + service: authentication-service + annotations: + summary: High error rate in freeleaps authentication service (instance {{ $labels.instance }}) + description: Freeleaps Authentication service error rate is {{ $value }} errors per second. + runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7 +