Merge branch 'master' of https://gitea.freeleaps.mathmast.com/freeleaps/freeleaps-ops
This commit is contained in:
commit
d9209de99a
@ -1,32 +1,91 @@
|
|||||||
# Prometheus Alter Rule Config
|
# 1. Prometheus Alert Rule Configuration
|
||||||
|
|
||||||
Add `prometheusrule.yaml` to `<helm-pkg>/templates`.
|
## 1.1. Add `prometheusrule.yaml` to `<helm-pkg>/templates`.
|
||||||
see
|
|
||||||
```
|
Example:
|
||||||
|
|
||||||
|
> Update metrics to your service name, see freeleaps-ops/freeleaps/helm-pkg/metrics
|
||||||
|
```yaml
|
||||||
{{- /*
|
{{- /*
|
||||||
Copyright Broadcom, Inc. All Rights Reserved.
|
Copyright Broadcom, Inc. All Rights Reserved.
|
||||||
SPDX-License-Identifier: APACHE-2.0
|
SPDX-License-Identifier: APACHE-2.0
|
||||||
*/}}
|
*/}}
|
||||||
|
|
||||||
{{- if and .Values.metrics.enabled .Values.metrics.prometheusRule.enabled }}
|
{{- if .Values.metrics.prometheusRule.enabled }}
|
||||||
apiVersion: monitoring.coreos.com/v1
|
apiVersion: monitoring.coreos.com/v1
|
||||||
kind: PrometheusRule
|
kind: PrometheusRule
|
||||||
metadata:
|
metadata:
|
||||||
name: {{ include "common.names.fullname" . }}
|
name: {{ .Values.metrics.prometheusRule.name }}
|
||||||
namespace: {{ default (include "common.names.namespace" .) .Values.metrics.prometheusRule.namespace | quote}}
|
namespace: {{ .Values.metrics.prometheusRule.namespace | quote }}
|
||||||
labels: {{- include "common.labels.standard" ( dict "customLabels" .Values.commonLabels "context" $ ) | nindent 4 }}
|
{{- with .Values.metrics.prometheusRule.labels }}
|
||||||
{{- if .Values.metrics.prometheusRule.additionalLabels }}
|
labels:
|
||||||
{{- include "common.tplvalues.render" (dict "value" .Values.metrics.prometheusRule.additionalLabels "context" $) | nindent 4 }}
|
{{- toYaml . | nindent 4 }}
|
||||||
{{- end }}
|
|
||||||
{{- if .Values.commonAnnotations }}
|
|
||||||
annotations: {{- include "common.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }}
|
|
||||||
{{- end }}
|
{{- end }}
|
||||||
spec:
|
spec:
|
||||||
groups:
|
groups:
|
||||||
{{- with .Values.metrics.prometheusRule.rules }}
|
{{- with .Values.metrics.prometheusRule.rules }}
|
||||||
- name: {{ template "common.names.name" $ }}
|
- name: {{ $.Values.metrics.prometheusRule.name }}
|
||||||
rules: {{- include "common.tplvalues.render" (dict "value" . "context" $) | nindent 8 }}
|
rules:
|
||||||
|
{{- range . }}
|
||||||
|
- alert: {{ .alert }}
|
||||||
|
expr: {{ .expr | quote }}
|
||||||
|
{{- if .for }}
|
||||||
|
for: {{ .for }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .labels }}
|
||||||
|
labels:
|
||||||
|
{{- toYaml .labels | nindent 12 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .annotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml .annotations | nindent 12 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## 1.2. Add prometheusrule configuration to values.{alpha/prod}.yaml
|
||||||
|
Example:
|
||||||
|
|
||||||
|
> See freeleaps-ops/freeleaps/helm-pkg/metrics
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
prometheusRule:
|
||||||
|
name: freepeals-metrics
|
||||||
|
enabled: true
|
||||||
|
namespace: "freeleaps-monitoring-system"
|
||||||
|
labels:
|
||||||
|
release: kube-prometheus-stack
|
||||||
|
rules:
|
||||||
|
- alert: FreeleapsMetricsServiceDown
|
||||||
|
expr: up{job="metrics-service"} == 0
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
service: metrics-service
|
||||||
|
annotations:
|
||||||
|
summary: "Freeleaps Metrics service is down (instance {{ $labels.instance }})"
|
||||||
|
description: "Freeleaps Metrics service has been down for more than 1 minutes."
|
||||||
|
runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7"
|
||||||
|
|
||||||
|
- alert: FreeleapsMetricsServiceHighErrorRate
|
||||||
|
expr: rate(http_requests_total{job="metrics-service",status=~"5.."}[5m]) > 0.1
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: metrics-service
|
||||||
|
annotations:
|
||||||
|
summary: "High error rate in freeleaps metrics service (instance {{ $labels.instance }})"
|
||||||
|
description: "Freeleaps Metrics service error rate is {{ $value }} errors per second."
|
||||||
|
runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7"
|
||||||
|
```
|
||||||
|
|
||||||
|
## 1.3. Verify Alert Rule Configuration is Effective
|
||||||
|
|
||||||
|
> Redirect to local
|
||||||
|

|
||||||
|
|
||||||
|
> You can see the newly added rules indicating they are effective
|
||||||
|
|
||||||
|

|
||||||
BIN
docs/asserts/image4.png
Normal file
BIN
docs/asserts/image4.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 569 KiB |
BIN
docs/asserts/image5.png
Normal file
BIN
docs/asserts/image5.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 341 KiB |
@ -25,6 +25,7 @@ auth:
|
|||||||
securePassword: true
|
securePassword: true
|
||||||
updatePassword: false
|
updatePassword: false
|
||||||
enableLoopbackUser: false
|
enableLoopbackUser: false
|
||||||
|
erlangCookie: "iGF4ZVjeaXgHW2xELZTxJl8a6aFY7nes"
|
||||||
tls:
|
tls:
|
||||||
enabled: false
|
enabled: false
|
||||||
logs: "-"
|
logs: "-"
|
||||||
|
|||||||
@ -162,6 +162,7 @@ auth:
|
|||||||
## ref: https://github.com/bitnami/containers/tree/main/bitnami/rabbitmq#environment-variables
|
## ref: https://github.com/bitnami/containers/tree/main/bitnami/rabbitmq#environment-variables
|
||||||
##
|
##
|
||||||
updatePassword: false
|
updatePassword: false
|
||||||
|
|
||||||
## @param auth.existingPasswordSecret Existing secret with RabbitMQ credentials (existing secret must contain a value for `rabbitmq-password` key or override with setting auth.existingSecretPasswordKey)
|
## @param auth.existingPasswordSecret Existing secret with RabbitMQ credentials (existing secret must contain a value for `rabbitmq-password` key or override with setting auth.existingSecretPasswordKey)
|
||||||
## e.g:
|
## e.g:
|
||||||
## existingPasswordSecret: name-of-existing-secret
|
## existingPasswordSecret: name-of-existing-secret
|
||||||
@ -177,7 +178,7 @@ auth:
|
|||||||
## @param auth.erlangCookie Erlang cookie to determine whether different nodes are allowed to communicate with each other
|
## @param auth.erlangCookie Erlang cookie to determine whether different nodes are allowed to communicate with each other
|
||||||
## ref: https://github.com/bitnami/containers/tree/main/bitnami/rabbitmq#environment-variables
|
## ref: https://github.com/bitnami/containers/tree/main/bitnami/rabbitmq#environment-variables
|
||||||
##
|
##
|
||||||
erlangCookie: ""
|
erlangCookie: "iGF4ZVjeaXgHW2xELZTxJl8a6aFY7nes"
|
||||||
## @param auth.existingErlangSecret Existing secret with RabbitMQ Erlang cookie (must contain a value for `rabbitmq-erlang-cookie` key or override with auth.existingSecretErlangKey)
|
## @param auth.existingErlangSecret Existing secret with RabbitMQ Erlang cookie (must contain a value for `rabbitmq-erlang-cookie` key or override with auth.existingSecretErlangKey)
|
||||||
## e.g:
|
## e.g:
|
||||||
## existingErlangSecret: name-of-existing-secret
|
## existingErlangSecret: name-of-existing-secret
|
||||||
|
|||||||
@ -1,7 +1,3 @@
|
|||||||
global:
|
|
||||||
registry: docker.io
|
|
||||||
repository: freeleaps
|
|
||||||
nodeSelector: {}
|
|
||||||
dashboard:
|
dashboard:
|
||||||
enabled: true
|
enabled: true
|
||||||
name: freeleaps-prod-authentication-dashboard
|
name: freeleaps-prod-authentication-dashboard
|
||||||
@ -20,7 +16,7 @@ authentication:
|
|||||||
registry: docker.io
|
registry: docker.io
|
||||||
repository: null
|
repository: null
|
||||||
name: authentication
|
name: authentication
|
||||||
tag: 1.9.0
|
tag: snapshot-ee519ca
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
ports:
|
ports:
|
||||||
- name: http
|
- name: http
|
||||||
|
|||||||
@ -18,7 +18,7 @@ content:
|
|||||||
registry: docker.io
|
registry: docker.io
|
||||||
repository: null
|
repository: null
|
||||||
name: content
|
name: content
|
||||||
tag: 1.9.0
|
tag: snapshot-ee519ca
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
ports:
|
ports:
|
||||||
- name: http
|
- name: http
|
||||||
|
|||||||
@ -10,7 +10,7 @@ devops:
|
|||||||
registry: docker.io
|
registry: docker.io
|
||||||
repository: null
|
repository: null
|
||||||
name: devops
|
name: devops
|
||||||
tag: snapshot-96f2f52
|
tag: snapshot-3cba9e4
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
ports:
|
ports:
|
||||||
- name: http
|
- name: http
|
||||||
|
|||||||
@ -84,7 +84,9 @@ metrics:
|
|||||||
prometheusRule:
|
prometheusRule:
|
||||||
name: freepeals-alpha-metrics
|
name: freepeals-alpha-metrics
|
||||||
enabled: false
|
enabled: false
|
||||||
namespace: freeleaps-monitoring-system
|
namespace: "freeleaps-monitoring-system"
|
||||||
|
labels:
|
||||||
|
release: kube-prometheus-stack
|
||||||
rules:
|
rules:
|
||||||
- alert: FreeleapsMetricsServiceDown
|
- alert: FreeleapsMetricsServiceDown
|
||||||
expr: up{job="metrics-service"} == 0
|
expr: up{job="metrics-service"} == 0
|
||||||
|
|||||||
@ -15,7 +15,7 @@ metrics:
|
|||||||
registry: docker.io
|
registry: docker.io
|
||||||
repository: null
|
repository: null
|
||||||
name: metrics
|
name: metrics
|
||||||
tag: snapshot-38ff0ae
|
tag: snapshot-3cba9e4
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
ports:
|
ports:
|
||||||
- name: http
|
- name: http
|
||||||
@ -85,6 +85,8 @@ metrics:
|
|||||||
name: freepeals-prod-metrics
|
name: freepeals-prod-metrics
|
||||||
enabled: true
|
enabled: true
|
||||||
namespace: "freeleaps-monitoring-system"
|
namespace: "freeleaps-monitoring-system"
|
||||||
|
labels:
|
||||||
|
release: kube-prometheus-stack
|
||||||
rules:
|
rules:
|
||||||
- alert: FreeleapsMetricsServiceDown
|
- alert: FreeleapsMetricsServiceDown
|
||||||
expr: up{job="metrics-service"} == 0
|
expr: up{job="metrics-service"} == 0
|
||||||
@ -93,10 +95,9 @@ metrics:
|
|||||||
severity: critical
|
severity: critical
|
||||||
service: metrics-service
|
service: metrics-service
|
||||||
annotations:
|
annotations:
|
||||||
summary: "Freeleaps Metrics service is down (instance {{ $labels.instance }})"
|
summary: Freeleaps Metrics service is down (instance {{ $labels.instance }})
|
||||||
description: "Freeleaps Metrics service has been down for more than 1 minutes."
|
description: Freeleaps Metrics service has been down for more than 1 minutes.
|
||||||
runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7"
|
runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7
|
||||||
|
|
||||||
- alert: FreeleapsMetricsServiceHighErrorRate
|
- alert: FreeleapsMetricsServiceHighErrorRate
|
||||||
expr: rate(http_requests_total{job="metrics-service",status=~"5.."}[5m]) > 0.1
|
expr: rate(http_requests_total{job="metrics-service",status=~"5.."}[5m]) > 0.1
|
||||||
for: 5m
|
for: 5m
|
||||||
@ -104,6 +105,6 @@ metrics:
|
|||||||
severity: warning
|
severity: warning
|
||||||
service: metrics-service
|
service: metrics-service
|
||||||
annotations:
|
annotations:
|
||||||
summary: "High error rate in freeleaps metrics service (instance {{ $labels.instance }})"
|
summary: High error rate in freeleaps metrics service (instance {{ $labels.instance }})
|
||||||
description: "Freeleaps Metrics service error rate is {{ $value }} errors per second."
|
description: Freeleaps Metrics service error rate is {{ $value }} errors per second.
|
||||||
runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7"
|
runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7
|
||||||
|
|||||||
@ -84,6 +84,8 @@ metrics:
|
|||||||
name: freepeals-metrics
|
name: freepeals-metrics
|
||||||
enabled: true
|
enabled: true
|
||||||
namespace: "freeleaps-monitoring-system"
|
namespace: "freeleaps-monitoring-system"
|
||||||
|
labels:
|
||||||
|
release: kube-prometheus-stack
|
||||||
rules:
|
rules:
|
||||||
- alert: FreeleapsMetricsServiceDown
|
- alert: FreeleapsMetricsServiceDown
|
||||||
expr: up{job="metrics-service"} == 0
|
expr: up{job="metrics-service"} == 0
|
||||||
|
|||||||
@ -32,23 +32,23 @@ notification:
|
|||||||
readiness:
|
readiness:
|
||||||
type: httpGet
|
type: httpGet
|
||||||
config:
|
config:
|
||||||
path: /api/_/readyz
|
path: /api/_/livez
|
||||||
port: 8003
|
port: 8003
|
||||||
initialDelaySeconds: 30
|
initialDelaySeconds: 60
|
||||||
periodSeconds: 30
|
periodSeconds: 30
|
||||||
timeoutSeconds: 3
|
timeoutSeconds: 3
|
||||||
successThreshold: 1
|
successThreshold: 1
|
||||||
failureThreshold: 3
|
failureThreshold: 5
|
||||||
liveness:
|
liveness:
|
||||||
type: httpGet
|
type: httpGet
|
||||||
config:
|
config:
|
||||||
path: /api/_/livez
|
path: /api/_/livez
|
||||||
port: 8003
|
port: 8003
|
||||||
initialDelaySeconds: 30
|
initialDelaySeconds: 60
|
||||||
periodSeconds: 15
|
periodSeconds: 30
|
||||||
timeoutSeconds: 3
|
timeoutSeconds: 3
|
||||||
successThreshold: 1
|
successThreshold: 1
|
||||||
failureThreshold: 3
|
failureThreshold: 5
|
||||||
terminationGracePeriodSeconds: 30
|
terminationGracePeriodSeconds: 30
|
||||||
services:
|
services:
|
||||||
- name: notification-service
|
- name: notification-service
|
||||||
|
|||||||
@ -13,7 +13,7 @@ notification:
|
|||||||
registry: docker.io
|
registry: docker.io
|
||||||
repository: null
|
repository: null
|
||||||
name: notification
|
name: notification
|
||||||
tag: 1.9.0
|
tag: snapshot-ee519ca
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
ports:
|
ports:
|
||||||
- name: http
|
- name: http
|
||||||
@ -30,23 +30,23 @@ notification:
|
|||||||
readiness:
|
readiness:
|
||||||
type: httpGet
|
type: httpGet
|
||||||
config:
|
config:
|
||||||
path: /api/_/readyz
|
path: /api/_/livez
|
||||||
port: 8003
|
port: 8003
|
||||||
initialDelaySeconds: 30
|
initialDelaySeconds: 60
|
||||||
periodSeconds: 30
|
periodSeconds: 30
|
||||||
timeoutSeconds: 3
|
timeoutSeconds: 3
|
||||||
successThreshold: 1
|
successThreshold: 1
|
||||||
failureThreshold: 3
|
failureThreshold: 5
|
||||||
liveness:
|
liveness:
|
||||||
type: httpGet
|
type: httpGet
|
||||||
config:
|
config:
|
||||||
path: /api/_/livez
|
path: /api/_/livez
|
||||||
port: 8003
|
port: 8003
|
||||||
initialDelaySeconds: 30
|
initialDelaySeconds: 60
|
||||||
periodSeconds: 15
|
periodSeconds: 30
|
||||||
timeoutSeconds: 3
|
timeoutSeconds: 3
|
||||||
successThreshold: 1
|
successThreshold: 1
|
||||||
failureThreshold: 3
|
failureThreshold: 5
|
||||||
terminationGracePeriodSeconds: 30
|
terminationGracePeriodSeconds: 30
|
||||||
services:
|
services:
|
||||||
- name: notification-service
|
- name: notification-service
|
||||||
|
|||||||
@ -23,7 +23,7 @@ executeFreeleapsPipeline {
|
|||||||
sastEnabled: false,
|
sastEnabled: false,
|
||||||
imageRegistry: 'docker.io',
|
imageRegistry: 'docker.io',
|
||||||
imageRepository: 'freeleaps',
|
imageRepository: 'freeleaps',
|
||||||
imageName: 'devops',
|
imageName: 'metrics',
|
||||||
imageBuilder: 'dind',
|
imageBuilder: 'dind',
|
||||||
dockerfilePath: 'Dockerfile',
|
dockerfilePath: 'Dockerfile',
|
||||||
imageBuildRoot: '.',
|
imageBuildRoot: '.',
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user