feat(storage): update resource requests and limits for MongoDB and add Azure Disk CSI storage classes

Signed-off-by: zhenyus <zhenyus@mathmast.com>
This commit is contained in:
zhenyus 2025-03-05 16:47:18 +08:00
parent 34ac69a8fe
commit ec3b743f7a
9 changed files with 2026 additions and 31 deletions

View File

@ -1397,31 +1397,31 @@ kubelet:
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
## ##
cAdvisorMetricRelabelings: cAdvisorMetricRelabelings: []
# Drop less useful container CPU metrics. # # Drop less useful container CPU metrics.
- sourceLabels: [__name__] # - sourceLabels: [__name__]
action: drop # action: drop
regex: 'container_cpu_(cfs_throttled_seconds_total|load_average_10s|system_seconds_total|user_seconds_total)' # regex: 'container_cpu_(cfs_throttled_seconds_total|load_average_10s|system_seconds_total|user_seconds_total)'
# Drop less useful container / always zero filesystem metrics. # # Drop less useful container / always zero filesystem metrics.
- sourceLabels: [__name__] # - sourceLabels: [__name__]
action: drop # action: drop
regex: 'container_fs_(io_current|io_time_seconds_total|io_time_weighted_seconds_total|reads_merged_total|sector_reads_total|sector_writes_total|writes_merged_total)' # regex: 'container_fs_(io_current|io_time_seconds_total|io_time_weighted_seconds_total|reads_merged_total|sector_reads_total|sector_writes_total|writes_merged_total)'
# Drop less useful / always zero container memory metrics. # # Drop less useful / always zero container memory metrics.
- sourceLabels: [__name__] # - sourceLabels: [__name__]
action: drop # action: drop
regex: 'container_memory_(mapped_file|swap)' # regex: 'container_memory_(mapped_file|swap)'
# Drop less useful container process metrics. # # Drop less useful container process metrics.
- sourceLabels: [__name__] # - sourceLabels: [__name__]
action: drop # action: drop
regex: 'container_(file_descriptors|tasks_state|threads_max)' # regex: 'container_(file_descriptors|tasks_state|threads_max)'
# Drop container spec metrics that overlap with kube-state-metrics. # # Drop container spec metrics that overlap with kube-state-metrics.
- sourceLabels: [__name__] # - sourceLabels: [__name__]
action: drop # action: drop
regex: 'container_spec.*' # regex: 'container_spec.*'
# Drop cgroup metrics with no pod. # # Drop cgroup metrics with no pod.
- sourceLabels: [id, pod] # - sourceLabels: [id, pod]
action: drop # action: drop
regex: '.+;' # regex: '.+;'
# - sourceLabels: [__name__, image] # - sourceLabels: [__name__, image]
# separator: ; # separator: ;
# regex: container_([a-z_]+); # regex: container_([a-z_]+);
@ -3868,7 +3868,7 @@ prometheus:
## How long to retain metrics ## How long to retain metrics
## ##
retention: 10d retention: 7d
## Maximum size of metrics ## Maximum size of metrics
## ##

View File

@ -34,3 +34,30 @@ mountOptions:
- -o negative_timeout=120 - -o negative_timeout=120
- --log-level=LOG_WARNING # LOG_WARNING, LOG_INFO, LOG_DEBUG - --log-level=LOG_WARNING # LOG_WARNING, LOG_INFO, LOG_DEBUG
- --cache-size-mb=1000 # Default will be 80% of available memory, eviction will happen beyond that. - --cache-size-mb=1000 # Default will be 80% of available memory, eviction will happen beyond that.
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: azure-blob-fuse-std-lrs
provisioner: blob.csi.azure.com
parameters:
skuName: Standard_LRS
protocol: fuse
location: westus2
resourceGroup: k8s
containerNamePrefix: fs-storage-sys-
secretName: freeleaps-azure-blob-storage-secret
secretNamespace: freeleaps-storage-system
reclaimPolicy: Delete
volumeBindingMode: Immediate
allowVolumeExpansion: true
mountOptions:
- -o allow_other
- --file-cache-timeout-in-seconds=120
- --use-attr-cache=true
- --cancel-list-on-mount-seconds=10 # prevent billing charges on mounting
- -o attr_timeout=120
- -o entry_timeout=120
- -o negative_timeout=120
- --log-level=LOG_WARNING # LOG_WARNING, LOG_INFO, LOG_DEBUG
- --cache-size-mb=1000 # Default will be 80% of available memory, eviction will happen beyond that.

View File

@ -0,0 +1,26 @@
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: azure-disk-std-lrs
provisioner: disk.csi.azure.com
parameters:
skuName: Standard_LRS
location: westus2
resourceGroup: k8s
reclaimPolicy: Delete
volumeBindingMode: WaitForFirstConsumer
allowVolumeExpansion: true
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: azure-disk-std-ssd-lrs
provisioner: disk.csi.azure.com
parameters:
skuName: StandardSSD_LRS
location: westus2
resourceGroup: k8s
reclaimPolicy: Delete
volumeBindingMode: WaitForFirstConsumer
allowVolumeExpansion: true

View File

@ -0,0 +1,42 @@
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: statefulset-blob
labels:
app: nginx
spec:
serviceName: statefulset-blob
replicas: 1
template:
metadata:
labels:
app: nginx
spec:
nodeSelector:
"kubernetes.io/os": linux
containers:
- name: statefulset-blob
image: mcr.microsoft.com/mirror/docker/library/nginx:1.23
command:
- "/bin/bash"
- "-c"
- set -euo pipefail; while true; do echo $(date) >> /mnt/blob/outfile; sleep 1; done
volumeMounts:
- name: persistent-storage
mountPath: /mnt/blob
readOnly: false
updateStrategy:
type: RollingUpdate
selector:
matchLabels:
app: nginx
volumeClaimTemplates:
- metadata:
name: persistent-storage
spec:
storageClassName: azure-disk-std-lrs
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi

View File

@ -0,0 +1,248 @@
image:
baseRepo: mcr.microsoft.com
azuredisk:
repository: /oss/kubernetes-csi/azuredisk-csi
tag: v1.31.2
pullPolicy: IfNotPresent
csiProvisioner:
repository: /oss/kubernetes-csi/csi-provisioner
tag: v5.1.0
pullPolicy: IfNotPresent
csiAttacher:
repository: /oss/kubernetes-csi/csi-attacher
tag: v4.7.0
pullPolicy: IfNotPresent
csiResizer:
repository: /oss/kubernetes-csi/csi-resizer
tag: v1.12.0
pullPolicy: IfNotPresent
livenessProbe:
repository: /oss/kubernetes-csi/livenessprobe
tag: v2.14.0
pullPolicy: IfNotPresent
nodeDriverRegistrar:
repository: /oss/kubernetes-csi/csi-node-driver-registrar
tag: v2.12.0
pullPolicy: IfNotPresent
serviceAccount:
create: true # When true, service accounts will be created for you. Set to false if you want to use your own.
controller: csi-azuredisk-controller-sa # Name of Service Account to be created or used
node: csi-azuredisk-node-sa # Name of Service Account to be created or used
snapshotController: csi-snapshot-controller-sa # Name of Service Account to be created or used
rbac:
create: true
name: azuredisk
controller:
name: csi-azuredisk-controller
cloudConfigSecretName: azure-cloud-provider
cloudConfigSecretNamespace: freeleaps-storage-system
allowEmptyCloudConfig: false
enableTrafficManager: false
trafficManagerPort: 7788
replicas: 1
metricsPort: 29604
livenessProbe:
healthPort: 29602
runOnMaster: false
runOnControlPlane: true
disableAvailabilitySetNodes: false
vmType: ""
provisionerWorkerThreads: 100
attacherWorkerThreads: 1000
vmssCacheTTLInSeconds: -1
logLevel: 5
extraArgs: []
otelTracing:
enabled: false
otelServiceName: csi-azuredisk-controller
otelExporterEndpoint: "http://localhost:4317"
tolerations:
- key: "node-role.kubernetes.io/master"
operator: "Exists"
effect: "NoSchedule"
- key: "node-role.kubernetes.io/controlplane"
operator: "Exists"
effect: "NoSchedule"
- key: "node-role.kubernetes.io/control-plane"
operator: "Exists"
effect: "NoSchedule"
- key: "CriticalAddonsOnly"
operator: "Exists"
effect: "NoSchedule"
hostNetwork: true # this setting could be disabled if controller does not depend on MSI setting
labels: {}
annotations: {}
podLabels: {}
podAnnotations: {}
nodeSelector: {}
affinity: {}
resources:
csiProvisioner:
limits:
memory: 500Mi
requests:
cpu: 10m
memory: 20Mi
csiAttacher:
limits:
memory: 500Mi
requests:
cpu: 10m
memory: 20Mi
csiResizer:
limits:
memory: 500Mi
requests:
cpu: 10m
memory: 20Mi
csiSnapshotter:
limits:
memory: 400Mi
requests:
cpu: 10m
memory: 20Mi
livenessProbe:
limits:
memory: 100Mi
requests:
cpu: 10m
memory: 20Mi
azuredisk:
limits:
memory: 500Mi
requests:
cpu: 10m
memory: 20Mi
node:
cloudConfigSecretName: azure-cloud-provider
cloudConfigSecretNamespace: freeleaps-storage-system
# reserved data disk slot number per node, driver.volumeAttachLimit must be < 0
reservedDataDiskSlotNum: 0
supportZone: true
allowEmptyCloudConfig: true
getNodeIDFromIMDS: false
maxUnavailable: 1
logLevel: 5
livenessProbe:
healthPort: 29603
snapshot:
enabled: true
name: csi-snapshot-controller
image:
csiSnapshotter:
repository: /oss/kubernetes-csi/csi-snapshotter
tag: v8.2.0
pullPolicy: IfNotPresent
csiSnapshotController:
repository: /oss/kubernetes-csi/snapshot-controller
tag: v8.2.0
pullPolicy: IfNotPresent
snapshotController:
name: csi-snapshot-controller
replicas: 1
labels: {}
annotations: {}
podLabels: {}
podAnnotations: {}
resources:
limits:
memory: 500Mi
requests:
cpu: 10m
memory: 20Mi
VolumeSnapshotClass:
enabled: false
name: csi-azuredisk-vsc
deletionPolicy: Delete
parameters:
incremental: '"true"' # available values: "true", "false" ("true" by default for Azure Public Cloud, and "false" by default for Azure Stack Cloud)
resourceGroup: "" # available values: EXISTING RESOURCE GROUP (If not specified, snapshot will be stored in the same resource group as source Azure disk)
tags: "" # tag format: 'key1=val1,key2=val2'
additionalLabels: {}
feature:
enableFSGroupPolicy: true
driver:
name: disk.csi.azure.com
# maximum number of attachable volumes per node,
# maximum number is defined according to node instance type by default(-1)
volumeAttachLimit: -1
customUserAgent: ""
userAgentSuffix: "OSS-helm"
azureGoSDKLogLevel: "" # available values: ""(no logs), DEBUG, INFO, WARNING, ERROR
httpsProxy: ""
httpProxy: ""
noProxy: ""
linux:
enabled: true
dsName: csi-azuredisk-node # daemonset name
kubelet: /var/lib/kubelet
distro: debian # available values: debian, fedora
enablePerfOptimization: true
otelTracing:
enabled: false
otelServiceName: csi-azuredisk-node
otelExporterEndpoint: "http://localhost:4317"
tolerations:
- operator: "Exists"
hostNetwork: true # this setting could be disabled if perfProfile is `none`
getNodeInfoFromLabels: false # get node info from node labels instead of IMDS
labels: {}
annotations: {}
podLabels: {}
podAnnotations: {}
nodeSelector: {}
affinity: {}
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: type
operator: NotIn
values:
- virtual-kubelet
resources:
livenessProbe:
limits:
memory: 100Mi
requests:
cpu: 10m
memory: 20Mi
nodeDriverRegistrar:
limits:
memory: 100Mi
requests:
cpu: 10m
memory: 20Mi
azuredisk:
limits:
memory: 1000Mi
requests:
cpu: 10m
memory: 20Mi
windows:
enabled: false
cloud: AzurePublicCloud
## Reference to one or more secrets to be used when pulling images
## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
##
imagePullSecrets: []
# - name: "image-pull-secret"
workloadIdentity:
clientID: ""
# [optional] If the AAD application or user-assigned managed identity is not in the same tenant as the cluster
# then set tenantID with the application or user-assigned managed identity tenant ID
tenantID: ""
azureCredentialFileConfigMap: azure-cred-file

View File

@ -7,3 +7,4 @@ jenkins-ci,https://charts.jenkins.io,force-update
openebs,https://openebs.github.io/openebs,force-update openebs,https://openebs.github.io/openebs,force-update
azure-blob-csi-driver,https://raw.githubusercontent.com/kubernetes-sigs/blob-csi-driver/master/charts,force-update azure-blob-csi-driver,https://raw.githubusercontent.com/kubernetes-sigs/blob-csi-driver/master/charts,force-update
godaddy-webhook,https://snowdrop.github.io/godaddy-webhook,force-update godaddy-webhook,https://snowdrop.github.io/godaddy-webhook,force-update
azure-disk-csi-driver,https://raw.githubusercontent.com/kubernetes-sigs/azuredisk-csi-driver/master/charts,force-update

View File

@ -20,11 +20,11 @@ updateStrategy:
type: RollingUpdate type: RollingUpdate
resources: resources:
requests: requests:
cpu: 1 cpu: 500m
memory: 512Mi memory: 256Mi
limits: limits:
cpu: 2 cpu: 1000m
memory: 1024Mi memory: 512Mi
containerPorts: containerPorts:
mongodb: 27017 mongodb: 27017
livenessProbe: livenessProbe:

File diff suppressed because it is too large Load Diff