freeleaps-ops/freeleaps/helm-pkg/metrics/values.prod.yaml
2025-09-24 08:49:28 +00:00

113 lines
3.2 KiB
YAML

global:
registry: docker.io
repository: freeleaps
nodeSelector: {}
logIngest:
enabled: false
dashboard:
enabled: true
name: freeleaps-prod-metrics-dashboard
title: Freeleaps Metrics Dashboard (PROD)
metricsPrefix: freeleaps_metrics
metrics:
replicas: 1
image:
registry: docker.io
repository: null
name: metrics
tag: snapshot-d4c027e
imagePullPolicy: IfNotPresent
ports:
- name: http
containerPort: 8009
protocol: TCP
resources:
requests:
cpu: '0.1'
memory: 64Mi
limits:
cpu: '0.2'
memory: 128Mi
probes:
liveness:
type: httpGet
config:
path: /api/_/livez
port: 8009
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 10
successThreshold: 1
failureThreshold: 5
terminationGracePeriodSeconds: 30
readiness:
type: httpGet
config:
path: /api/_/readyz
port: 8009
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 10
successThreshold: 1
failureThreshold: 5
services:
- name: metrics-service
type: ClusterIP
port: 8009
targetPort: 8009
serviceMonitor:
enabled: true
labels:
release: kube-prometheus-stack
namespace: freeleaps-monitoring-system
internal: 30s
scrapeTimeout: ''
configs:
starrocksHost: freeleaps-starrocks-cluster-fe-service.freeleaps-data-platform.svc
starrocksPort: 9030
starrocksUser: freeleaps_dw_admin
starrocksPassword: r7Y@QTb*7BQN@hDGsN
starrocksDatabase: freeleaps
prometheusEndpoint: http://kube-prometheus-stack-prometheus.freeleaps-monitoring-system:9090
vpa:
minAllowed:
enabled: false
cpu: 100m
memory: 64Mi
maxAllowed:
enabled: true
cpu: 100m
memory: 128Mi
controlledResources:
- cpu
- memory
prometheusRule:
name: freepeals-prod-metrics
enabled: true
namespace: freeleaps-monitoring-system
labels:
release: kube-prometheus-stack
rules:
- alert: FreeleapsMetricsServiceDown
expr: up{job="metrics-service"} == 0
for: 1m
labels:
severity: critical
service: metrics-service
namespace: freeleaps-monitoring-system
annotations:
summary: Freeleaps Metrics service is down (instance {{ $labels.instance }})
description: Freeleaps Metrics service has been down for more than 1 minutes.
runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7
- alert: FreeleapsMetricsServiceHighErrorRate
expr: rate(http_requests_total{job="metrics-service",status=~"5.."}[5m]) > 0.1
for: 5m
labels:
severity: warning
service: metrics-service
namespace: freeleaps-monitoring-system
annotations:
summary: High error rate in freeleaps metrics service (instance {{ $labels.instance }})
description: Freeleaps Metrics service error rate is {{ $value }} errors per second.
runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7