113 lines
3.2 KiB
YAML
113 lines
3.2 KiB
YAML
global:
|
|
registry: docker.io
|
|
repository: freeleaps
|
|
nodeSelector: {}
|
|
logIngest:
|
|
enabled: false
|
|
dashboard:
|
|
enabled: true
|
|
name: freeleaps-prod-metrics-dashboard
|
|
title: Freeleaps Metrics Dashboard (PROD)
|
|
metricsPrefix: freeleaps_metrics
|
|
metrics:
|
|
replicas: 1
|
|
image:
|
|
registry: docker.io
|
|
repository: null
|
|
name: metrics
|
|
tag: snapshot-d4c027e
|
|
imagePullPolicy: IfNotPresent
|
|
ports:
|
|
- name: http
|
|
containerPort: 8009
|
|
protocol: TCP
|
|
resources:
|
|
requests:
|
|
cpu: '0.1'
|
|
memory: 64Mi
|
|
limits:
|
|
cpu: '0.2'
|
|
memory: 128Mi
|
|
probes:
|
|
liveness:
|
|
type: httpGet
|
|
config:
|
|
path: /api/_/livez
|
|
port: 8009
|
|
initialDelaySeconds: 30
|
|
periodSeconds: 10
|
|
timeoutSeconds: 10
|
|
successThreshold: 1
|
|
failureThreshold: 5
|
|
terminationGracePeriodSeconds: 30
|
|
readiness:
|
|
type: httpGet
|
|
config:
|
|
path: /api/_/readyz
|
|
port: 8009
|
|
initialDelaySeconds: 30
|
|
periodSeconds: 10
|
|
timeoutSeconds: 10
|
|
successThreshold: 1
|
|
failureThreshold: 5
|
|
services:
|
|
- name: metrics-service
|
|
type: ClusterIP
|
|
port: 8009
|
|
targetPort: 8009
|
|
serviceMonitor:
|
|
enabled: true
|
|
labels:
|
|
release: kube-prometheus-stack
|
|
namespace: freeleaps-monitoring-system
|
|
internal: 30s
|
|
scrapeTimeout: ''
|
|
configs:
|
|
starrocksHost: freeleaps-starrocks-cluster-fe-service.freeleaps-data-platform.svc
|
|
starrocksPort: 9030
|
|
starrocksUser: freeleaps_dw_admin
|
|
starrocksPassword: r7Y@QTb*7BQN@hDGsN
|
|
starrocksDatabase: freeleaps
|
|
prometheusEndpoint: http://kube-prometheus-stack-prometheus.freeleaps-monitoring-system:9090
|
|
vpa:
|
|
minAllowed:
|
|
enabled: false
|
|
cpu: 100m
|
|
memory: 64Mi
|
|
maxAllowed:
|
|
enabled: true
|
|
cpu: 100m
|
|
memory: 128Mi
|
|
controlledResources:
|
|
- cpu
|
|
- memory
|
|
prometheusRule:
|
|
name: freepeals-prod-metrics
|
|
enabled: true
|
|
namespace: freeleaps-monitoring-system
|
|
labels:
|
|
release: kube-prometheus-stack
|
|
rules:
|
|
- alert: FreeleapsMetricsServiceDown
|
|
expr: up{job="metrics-service"} == 1
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
service: metrics-service
|
|
namespace: freeleaps-monitoring-system
|
|
annotations:
|
|
summary: Freeleaps Metrics service is down (instance {{ $labels.instance }})
|
|
description: Freeleaps Metrics service has been down for more than 1 minutes.
|
|
runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7
|
|
- alert: FreeleapsMetricsServiceHighErrorRate
|
|
expr: rate(http_requests_total{job="metrics-service",status=~"5.."}[5m]) > 0.1
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
service: metrics-service
|
|
namespace: freeleaps-monitoring-system
|
|
annotations:
|
|
summary: High error rate in freeleaps metrics service (instance {{ $labels.instance }})
|
|
description: Freeleaps Metrics service error rate is {{ $value }} errors per second.
|
|
runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7
|