diff --git a/cluster/manifests/freeleaps-data-platform/star-rocks/README.md b/cluster/manifests/freeleaps-data-platform/star-rocks/README.md new file mode 100644 index 00000000..65176c30 --- /dev/null +++ b/cluster/manifests/freeleaps-data-platform/star-rocks/README.md @@ -0,0 +1,128 @@ +# StarRocks 高可用集群部署 + +## 概述 +本项目使用 StarRocks Kubernetes Operator 直接部署高可用 StarRocks 集群。 + +## 组件架构 +- **Frontend (FE)**: 3个副本,负责元数据管理和查询协调 +- **Backend (BE)**: 3个副本,负责数据存储和计算 +- **Compute Node (CN)**: 2个副本,提供弹性计算能力 + +## 文件说明 + +### 1. ha-starrocks-cluster.yaml +高可用 StarRocks 集群的主要配置文件,包含: +- FE、BE、CN 组件的副本数和镜像配置 +- Pod 反亲和性配置,确保高可用 +- 服务配置和环境变量 + +### 2. starrocks-configmaps.yaml +各组件的配置文件: +- **FE配置**: 日志、端口、JVM参数等 +- **BE配置**: 存储、端口、日志级别等 +- **CN配置**: 计算节点端口和日志配置 + +### 3. ha-starrocks-values.yaml +Helm values 配置文件(用于参考) + +### 4. values.yaml +Helm Chart 的默认值配置(用于现有集群) + +### 5. vpa.yaml +垂直 Pod 自动扩缩容配置 + +## 部署步骤 + +### 1. 应用配置 +```bash +# 首先应用 ConfigMap 配置 +kubectl apply -f starrocks-configmaps.yaml + +# 然后应用 StarRocks 集群配置 +kubectl apply -f ha-starrocks-cluster.yaml +``` + +### 2. 检查部署状态 +```bash +# 检查 StarRocks 集群状态 +kubectl get starrockscluster -n freeleaps-data-platform + +# 检查 Pod 状态 +kubectl get pods -n freeleaps-data-platform -l app=starrocks + +# 检查服务状态 +kubectl get svc -n freeleaps-data-platform -l app=starrocks +``` + +### 3. 查看日志 +```bash +# 查看 FE 日志 +kubectl logs -n freeleaps-data-platform -l app.kubernetes.io/component=fe + +# 查看 BE 日志 +kubectl logs -n freeleaps-data-platform -l app.kubernetes.io/component=be + +# 查看 CN 日志 +kubectl logs -n freeleaps-data-platform -l app.kubernetes.io/component=cn +``` + +## 高可用特性 +- **FE高可用**: 3个FE节点,支持自动故障转移 +- **BE高可用**: 3个BE节点,数据多副本存储 +- **CN弹性**: 支持水平扩展和自动扩缩容 +- **Pod反亲和性**: 确保组件分布在不同节点上 +- **自动故障恢复**: Operator自动管理集群状态 + +## 网络配置 +- **内部服务**: 使用ClusterIP类型 +- **端口映射**: + - FE: 8030(HTTP), 9010(编辑日志), 9020(RPC), 9030(查询) + - BE: 8040(HTTP), 9050(心跳), 8060(BRPC), 9060(BE端口) + - CN: 8040(HTTP), 9050(心跳), 8060(BRPC), 9060(Thrift) + +## 存储配置 +- **FE元数据**: 使用现有存储配置 +- **BE数据**: 使用现有存储配置 +- **CN缓存**: 使用现有存储配置 +- **数据持久化**: 支持PVC自动创建和绑定 + +## 监控和运维 +- **健康检查**: 内置就绪和存活探针 +- **日志收集**: 结构化日志输出 +- **资源监控**: 支持Prometheus指标收集 +- **自动扩缩容**: VPA支持垂直资源调整 +- **Operator管理**: 自动集群生命周期管理 + +## 升级和回滚 +```bash +# 升级集群配置 +kubectl apply -f ha-starrocks-cluster.yaml + +# 回滚到上一个版本 +kubectl rollout undo deployment/starrocks-operator -n freeleaps-data-platform + +# 查看集群状态 +kubectl describe starrockscluster freeleaps-starrocks -n freeleaps-data-platform +``` + +## 故障排除 +```bash +# 检查集群状态 +kubectl describe starrockscluster freeleaps-starrocks -n freeleaps-data-platform + +# 检查 Pod 事件 +kubectl get events -n freeleaps-data-platform --sort-by='.lastTimestamp' + +# 检查 Operator 日志 +kubectl logs -n freeleaps-data-platform -l app=starrocks-operator + +# 检查存储状态 +kubectl get pvc -n freeleaps-data-platform +kubectl get pv +``` + +## 注意事项 +1. **现有集群**: 此配置将升级现有的 `freeleaps-starrocks` 集群 +2. **数据安全**: 升级过程中数据不会丢失 +3. **滚动更新**: Operator 会自动进行滚动更新,确保服务连续性 +4. **资源需求**: 确保集群有足够的资源支持增加的副本数 diff --git a/cluster/manifests/freeleaps-data-platform/star-rocks/ha-starrocks-cluster.yaml b/cluster/manifests/freeleaps-data-platform/star-rocks/ha-starrocks-cluster.yaml new file mode 100644 index 00000000..f650c3b6 --- /dev/null +++ b/cluster/manifests/freeleaps-data-platform/star-rocks/ha-starrocks-cluster.yaml @@ -0,0 +1,105 @@ +apiVersion: starrocks.com/v1 +kind: StarRocksCluster +metadata: + name: freeleaps-starrocks + namespace: freeleaps-data-platform + labels: + app: starrocks + component: analytics + cluster-type: ha +spec: + starRocksFeSpec: + replicas: 3 + image: starrocks/fe-ubuntu:3.3-latest + service: + type: ClusterIP + feEnvVars: + - name: TZ + value: UTC + configMapInfo: + configMapName: ha-starrocks-fe-config + resolveKey: fe.conf + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/component + operator: In + values: + - fe + topologyKey: "kubernetes.io/hostname" + tolerations: + - key: "node-role.kubernetes.io/control-plane" + operator: "Exists" + effect: "NoSchedule" + - key: "node-role.kubernetes.io/devops" + operator: "Exists" + effect: "NoSchedule" + - key: "node-role.kubernetes.io/disk-pressure" + operator: "Exists" + effect: "NoSchedule" + + starRocksBeSpec: + replicas: 3 + image: starrocks/be-ubuntu:3.3-latest + service: + type: ClusterIP + beEnvVars: + - name: TZ + value: UTC + configMapInfo: + configMapName: ha-starrocks-be-config + resolveKey: be.conf + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/component + operator: In + values: + - be + topologyKey: "kubernetes.io/hostname" + tolerations: + - key: "node-role.kubernetes.io/control-plane" + operator: "Exists" + effect: "NoSchedule" + - key: "node-role.kubernetes.io/devops" + operator: "Exists" + effect: "NoSchedule" + - key: "node-role.kubernetes.io/disk-pressure" + operator: "Exists" + effect: "NoSchedule" + + starRocksCnSpec: + replicas: 2 + image: starrocks/cn-ubuntu:3.3-latest + service: + type: ClusterIP + cnEnvVars: + - name: TZ + value: UTC + configMapInfo: + configMapName: ha-starrocks-cn-config + resolveKey: cn.conf + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/component + operator: In + values: + - cn + topologyKey: "kubernetes.io/hostname" + tolerations: + - key: "node-role.kubernetes.io/control-plane" + operator: "Exists" + effect: "NoSchedule" + - key: "node-role.kubernetes.io/devops" + operator: "Exists" + effect: "NoSchedule" + - key: "node-role.kubernetes.io/disk-pressure" + operator: "Exists" + effect: "NoSchedule" diff --git a/cluster/manifests/freeleaps-data-platform/star-rocks/ha-starrocks-values.yaml b/cluster/manifests/freeleaps-data-platform/star-rocks/ha-starrocks-values.yaml new file mode 100644 index 00000000..c49b6556 --- /dev/null +++ b/cluster/manifests/freeleaps-data-platform/star-rocks/ha-starrocks-values.yaml @@ -0,0 +1,181 @@ +# 高可用 StarRocks 集群配置 +# 基于现有的 values.yaml,配置高可用特性 + +# 设置时区 +timeZone: UTC + +# 部署 StarRocks 集群 +starrocksCluster: + # 集群名称 + name: "freeleaps-starrocks" + # 命名空间 + namespace: "freeleaps-data-platform" + # 启用 BE 和 CN 组件 + enabledBe: true + enabledCn: true + +# 全局组件配置 +componentValues: + # 镜像版本 + image: + tag: "3.3-latest" + # 时区 + timeZone: UTC + +# FE 组件配置 +starrocksFESpec: + # 副本数 - 高可用需要至少3个 + replicas: 3 + # 镜像 + image: + repository: starrocks/fe-ubuntu + tag: "3.3-latest" + # 资源配置 + resources: + requests: + cpu: 2 + memory: 4Gi + limits: + cpu: 4 + memory: 8Gi + # 存储配置 + storageSpec: + name: "fe" + storageClassName: "azure-disk-std-ssd-lrs" + storageSize: 20Gi + logStorageClassName: "azure-disk-std-ssd-lrs" + logStorageSize: 10Gi + # 服务配置 + service: + type: "ClusterIP" + # 环境变量 + feEnvVars: + - name: TZ + value: UTC + # 配置文件 + config: | + LOG_DIR = ${STARROCKS_HOME}/log + DATE = "$(date +%Y%m%d-%H%M%S)" + JAVA_OPTS="-Dlog4j2.formatMsgNoLookups=true -Xmx4g -XX:+UseG1GC -Xlog:gc*:${LOG_DIR}/fe.gc.log.$DATE:time" + http_port = 8030 + rpc_port = 9020 + query_port = 9030 + edit_log_port = 9010 + mysql_service_nio_enabled = true + sys_log_level = INFO + # Pod 反亲和性 - 确保高可用 + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/component + operator: In + values: + - fe + topologyKey: "kubernetes.io/hostname" + +# BE 组件配置 +starrocksBeSpec: + # 副本数 - 高可用需要至少3个 + replicas: 3 + # 镜像 + image: + repository: starrocks/be-ubuntu + tag: "3.3-latest" + # 资源配置 + resources: + requests: + cpu: 4 + memory: 8Gi + limits: + cpu: 8 + memory: 16Gi + # 存储配置 + storageSpec: + name: "be" + storageClassName: "azure-disk-std-ssd-lrs" + storageSize: 50Gi + logStorageClassName: "azure-disk-std-ssd-lrs" + logStorageSize: 20Gi + # 服务配置 + service: + type: "ClusterIP" + # 环境变量 + beEnvVars: + - name: TZ + value: UTC + # 配置文件 + config: | + be_port = 9060 + webserver_port = 8040 + heartbeat_service_port = 9050 + brpc_port = 8060 + sys_log_level = INFO + default_rowset_type = beta + # Pod 反亲和性 - 确保高可用 + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/component + operator: In + values: + - be + topologyKey: "kubernetes.io/hostname" + +# CN 组件配置 +starrocksCnSpec: + # 副本数 - 弹性计算节点 + replicas: 2 + # 镜像 + image: + repository: starrocks/cn-ubuntu + tag: "3.3-latest" + # 资源配置 + resources: + requests: + cpu: 2 + memory: 4Gi + limits: + cpu: 4 + memory: 8Gi + # 存储配置 + storageSpec: + name: "cn" + storageClassName: "azure-disk-std-ssd-lrs" + storageSize: 20Gi + logStorageClassName: "azure-disk-std-ssd-lrs" + logStorageSize: 10Gi + # 服务配置 + service: + type: "ClusterIP" + # 环境变量 + cnEnvVars: + - name: TZ + value: UTC + # 配置文件 + config: | + sys_log_level = INFO + thrift_port = 9060 + webserver_port = 8040 + heartbeat_service_port = 9050 + brpc_port = 8060 + # Pod 反亲和性 - 确保高可用 + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/component + operator: In + values: + - cn + topologyKey: "kubernetes.io/hostname" + +# 监控配置 +metrics: + serviceMonitor: + enabled: true + interval: 15s diff --git a/cluster/manifests/freeleaps-data-platform/star-rocks/starrocks-configmaps.yaml b/cluster/manifests/freeleaps-data-platform/star-rocks/starrocks-configmaps.yaml new file mode 100644 index 00000000..f6dd5138 --- /dev/null +++ b/cluster/manifests/freeleaps-data-platform/star-rocks/starrocks-configmaps.yaml @@ -0,0 +1,43 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: ha-starrocks-fe-config + namespace: freeleaps-data-platform +data: + fe.conf: | + LOG_DIR = ${STARROCKS_HOME}/log + DATE = "$(date +%Y%m%d-%H%M%S)" + JAVA_OPTS="-Dlog4j2.formatMsgNoLookups=true -Xmx4g -XX:+UseG1GC -Xlog:gc*:${LOG_DIR}/fe.gc.log.$DATE:time" + http_port = 8030 + rpc_port = 9020 + query_port = 9030 + edit_log_port = 9010 + mysql_service_nio_enabled = true + sys_log_level = INFO +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: ha-starrocks-be-config + namespace: freeleaps-data-platform +data: + be.conf: | + be_port = 9060 + webserver_port = 8040 + heartbeat_service_port = 9050 + brpc_port = 8060 + sys_log_level = INFO + default_rowset_type = beta +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: ha-starrocks-cn-config + namespace: freeleaps-data-platform +data: + cn.conf: | + sys_log_level = INFO + thrift_port = 9060 + webserver_port = 8040 + heartbeat_service_port = 9050 + brpc_port = 8060