global: registry: docker.io repository: freeleaps nodeSelector: {} logIngest: enabled: false dashboard: enabled: true name: freeleaps-prod-metrics-dashboard title: Freeleaps Metrics Dashboard (PROD) metricsPrefix: freeleaps_metrics metrics: replicas: 1 image: registry: docker.io repository: null name: metrics tag: 1.14.1 imagePullPolicy: IfNotPresent ports: - name: http containerPort: 8009 protocol: TCP resources: requests: cpu: '0.1' memory: 64Mi limits: cpu: '0.2' memory: 128Mi probes: liveness: type: httpGet config: path: /api/_/livez port: 8009 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 10 successThreshold: 1 failureThreshold: 5 terminationGracePeriodSeconds: 30 readiness: type: httpGet config: path: /api/_/readyz port: 8009 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 10 successThreshold: 1 failureThreshold: 5 services: - name: metrics-service type: ClusterIP port: 8009 targetPort: 8009 serviceMonitor: enabled: true labels: release: kube-prometheus-stack namespace: freeleaps-monitoring-system internal: 30s scrapeTimeout: '' configs: starrocksHost: freeleaps-starrocks-cluster-fe-service.freeleaps-data-platform.svc starrocksPort: 9030 starrocksUser: freeleaps_dw_admin starrocksPassword: r7Y@QTb*7BQN@hDGsN starrocksDatabase: freeleaps prometheusEndpoint: http://kube-prometheus-stack-prometheus.freeleaps-monitoring-system:9090 vpa: minAllowed: enabled: false cpu: 100m memory: 64Mi maxAllowed: enabled: true cpu: 100m memory: 128Mi controlledResources: - cpu - memory prometheusRule: name: freepeals-prod-metrics enabled: true namespace: freeleaps-monitoring-system labels: release: kube-prometheus-stack rules: - alert: FreeleapsMetricsServiceDown expr: up{job="metrics-service"} == 0 for: 1m labels: severity: critical service: metrics-service namespace: freeleaps-prod annotations: summary: Freeleaps Metrics service is down (instance {{ $labels.instance }}) description: Freeleaps Metrics service has been down for more than 1 minutes. runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7 - alert: FreeleapsMetricsServiceHighErrorRate expr: rate(http_requests_total{job="metrics-service",status=~"5.."}[5m]) > 0.1 for: 5m labels: severity: warning service: metrics-service namespace: freeleaps-prod annotations: summary: High error rate in freeleaps metrics service (instance {{ $labels.instance }}) description: Freeleaps Metrics service error rate is {{ $value }} errors per second. runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7