395 lines
11 KiB
Bash
395 lines
11 KiB
Bash
|
|
#!/bin/bash
|
||
|
|
|
||
|
|
# Freeleaps Kubernetes Cluster Bootstrap Script
|
||
|
|
# This script bootstraps a complete Kubernetes cluster from Azure VMs
|
||
|
|
|
||
|
|
set -e
|
||
|
|
|
||
|
|
# Colors for output
|
||
|
|
RED='\033[0;31m'
|
||
|
|
GREEN='\033[0;32m'
|
||
|
|
YELLOW='\033[1;33m'
|
||
|
|
BLUE='\033[0;34m'
|
||
|
|
NC='\033[0m' # No Color
|
||
|
|
|
||
|
|
# Configuration
|
||
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
|
|
FREELEAPS_OPS_DIR="$(dirname "$SCRIPT_DIR")"
|
||
|
|
INVENTORY_FILE="$FREELEAPS_OPS_DIR/cluster/ansible/manifests/inventory.ini"
|
||
|
|
KUBESPRAY_DIR="$FREELEAPS_OPS_DIR/3rd/kubespray"
|
||
|
|
MANIFESTS_DIR="$FREELEAPS_OPS_DIR/cluster/manifests"
|
||
|
|
BIN_DIR="$FREELEAPS_OPS_DIR/cluster/bin"
|
||
|
|
|
||
|
|
# Function to print colored output
|
||
|
|
print_status() {
|
||
|
|
echo -e "${BLUE}[INFO]${NC} $1"
|
||
|
|
}
|
||
|
|
|
||
|
|
print_success() {
|
||
|
|
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||
|
|
}
|
||
|
|
|
||
|
|
print_warning() {
|
||
|
|
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||
|
|
}
|
||
|
|
|
||
|
|
print_error() {
|
||
|
|
echo -e "${RED}[ERROR]${NC} $1"
|
||
|
|
}
|
||
|
|
|
||
|
|
# Function to check prerequisites
|
||
|
|
check_prerequisites() {
|
||
|
|
print_status "Checking prerequisites..."
|
||
|
|
|
||
|
|
# Check if we're in the right directory
|
||
|
|
if [[ ! -f "$INVENTORY_FILE" ]]; then
|
||
|
|
print_error "Inventory file not found: $INVENTORY_FILE"
|
||
|
|
print_error "Please run this script from the freeleaps-ops/docs directory"
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Check if kubespray exists
|
||
|
|
if [[ ! -d "$KUBESPRAY_DIR" ]]; then
|
||
|
|
print_error "Kubespray directory not found: $KUBESPRAY_DIR"
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Check required tools
|
||
|
|
local missing_tools=()
|
||
|
|
|
||
|
|
if ! command -v ansible &> /dev/null; then
|
||
|
|
missing_tools+=("ansible")
|
||
|
|
fi
|
||
|
|
|
||
|
|
if ! command -v az &> /dev/null; then
|
||
|
|
missing_tools+=("azure-cli")
|
||
|
|
fi
|
||
|
|
|
||
|
|
if ! command -v kubectl &> /dev/null; then
|
||
|
|
missing_tools+=("kubectl")
|
||
|
|
fi
|
||
|
|
|
||
|
|
if [[ ${#missing_tools[@]} -gt 0 ]]; then
|
||
|
|
print_error "Missing required tools: ${missing_tools[*]}"
|
||
|
|
print_warning "Please install missing tools before proceeding"
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
print_success "All prerequisites are met"
|
||
|
|
}
|
||
|
|
|
||
|
|
# Function to verify Azure VMs
|
||
|
|
verify_azure_vms() {
|
||
|
|
print_status "Verifying Azure VMs..."
|
||
|
|
|
||
|
|
# Get VMs from inventory
|
||
|
|
local vms=()
|
||
|
|
while IFS= read -r line; do
|
||
|
|
if [[ $line =~ ^[a-zA-Z0-9-]+ ]]; then
|
||
|
|
vm_name=$(echo "$line" | awk '{print $1}')
|
||
|
|
vms+=("$vm_name")
|
||
|
|
fi
|
||
|
|
done < "$INVENTORY_FILE"
|
||
|
|
|
||
|
|
print_status "Found VMs in inventory: ${vms[*]}"
|
||
|
|
|
||
|
|
# Check VM status in Azure
|
||
|
|
for vm in "${vms[@]}"; do
|
||
|
|
local power_state=$(az vm show --resource-group k8s --name "$vm" --query "powerState" -o tsv 2>/dev/null)
|
||
|
|
if [[ "$power_state" != "VM running" ]]; then
|
||
|
|
print_warning "VM $vm is not running (state: $power_state)"
|
||
|
|
read -p "Do you want to start VM $vm? (y/N): " -n 1 -r
|
||
|
|
echo
|
||
|
|
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||
|
|
az vm start --resource-group k8s --name "$vm"
|
||
|
|
print_status "Starting VM $vm..."
|
||
|
|
sleep 30
|
||
|
|
fi
|
||
|
|
else
|
||
|
|
print_success "VM $vm is running"
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
}
|
||
|
|
|
||
|
|
# Function to test connectivity
|
||
|
|
test_connectivity() {
|
||
|
|
print_status "Testing connectivity to all VMs..."
|
||
|
|
|
||
|
|
cd "$(dirname "$INVENTORY_FILE")"
|
||
|
|
|
||
|
|
if ansible -i inventory.ini all -m ping -kK; then
|
||
|
|
print_success "Connectivity to all VMs verified"
|
||
|
|
else
|
||
|
|
print_error "Connectivity test failed"
|
||
|
|
print_warning "Please check:"
|
||
|
|
print_warning "1. VMs are running"
|
||
|
|
print_warning "2. Network security groups allow SSH (port 22)"
|
||
|
|
print_warning "3. SSH credentials are correct"
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# Function to bootstrap Kubernetes cluster
|
||
|
|
bootstrap_cluster() {
|
||
|
|
print_status "Bootstrapping Kubernetes cluster..."
|
||
|
|
|
||
|
|
cd "$KUBESPRAY_DIR"
|
||
|
|
|
||
|
|
print_status "Running Kubespray cluster installation..."
|
||
|
|
print_warning "This process may take 15-30 minutes..."
|
||
|
|
|
||
|
|
if ansible-playbook -i ../../cluster/ansible/manifests/inventory.ini ./cluster.yml -kK -b; then
|
||
|
|
print_success "Kubernetes cluster bootstrapped successfully"
|
||
|
|
else
|
||
|
|
print_error "Cluster bootstrap failed"
|
||
|
|
print_warning "Check the Ansible output for errors"
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# Function to get kubeconfig
|
||
|
|
get_kubeconfig() {
|
||
|
|
print_status "Retrieving kubeconfig..."
|
||
|
|
|
||
|
|
# Get the first master node IP
|
||
|
|
local master_ip=$(grep -A 10 "\[kube_control_plane\]" "$INVENTORY_FILE" | grep ansible_host | head -1 | awk '{print $2}' | cut -d'=' -f2)
|
||
|
|
|
||
|
|
if [[ -z "$master_ip" ]]; then
|
||
|
|
print_error "Could not find master node IP in inventory"
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
print_status "Getting kubeconfig from master node: $master_ip"
|
||
|
|
|
||
|
|
# Create .kube directory if it doesn't exist
|
||
|
|
mkdir -p ~/.kube
|
||
|
|
|
||
|
|
# Get kubeconfig from master node
|
||
|
|
ssh wwwadmin@mathmast.com@"$master_ip" "sudo cat /etc/kubernetes/admin.conf" > ~/.kube/config
|
||
|
|
|
||
|
|
if [[ $? -eq 0 ]]; then
|
||
|
|
print_success "Kubeconfig retrieved successfully"
|
||
|
|
else
|
||
|
|
print_error "Failed to retrieve kubeconfig"
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# Function to verify cluster
|
||
|
|
verify_cluster() {
|
||
|
|
print_status "Verifying cluster installation..."
|
||
|
|
|
||
|
|
# Wait for cluster to be ready
|
||
|
|
local max_attempts=30
|
||
|
|
local attempt=1
|
||
|
|
|
||
|
|
while [[ $attempt -le $max_attempts ]]; do
|
||
|
|
if kubectl get nodes &> /dev/null; then
|
||
|
|
print_success "Cluster is accessible"
|
||
|
|
break
|
||
|
|
fi
|
||
|
|
|
||
|
|
print_status "Waiting for cluster to be ready... (attempt $attempt/$max_attempts)"
|
||
|
|
sleep 30
|
||
|
|
((attempt++))
|
||
|
|
done
|
||
|
|
|
||
|
|
if [[ $attempt -gt $max_attempts ]]; then
|
||
|
|
print_error "Cluster verification failed"
|
||
|
|
print_warning "Troubleshooting steps:"
|
||
|
|
print_warning "1. Check VM resources (CPU, memory)"
|
||
|
|
print_warning "2. Check network connectivity between nodes"
|
||
|
|
print_warning "3. Check Ansible logs for errors"
|
||
|
|
print_warning "4. Verify inventory file configuration"
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Check node status
|
||
|
|
print_status "Checking node status..."
|
||
|
|
kubectl get nodes
|
||
|
|
|
||
|
|
# Wait for all nodes to be ready
|
||
|
|
local ready_nodes=$(kubectl get nodes --no-headers | grep -c "Ready")
|
||
|
|
local total_nodes=$(kubectl get nodes --no-headers | wc -l)
|
||
|
|
|
||
|
|
if [[ $ready_nodes -eq $total_nodes ]]; then
|
||
|
|
print_success "All $total_nodes nodes are ready"
|
||
|
|
else
|
||
|
|
print_warning "Only $ready_nodes/$total_nodes nodes are ready"
|
||
|
|
kubectl get nodes
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Check system pods
|
||
|
|
print_status "Checking system pods..."
|
||
|
|
kubectl get pods -n kube-system
|
||
|
|
|
||
|
|
# Wait for critical system pods
|
||
|
|
print_status "Waiting for critical system pods..."
|
||
|
|
local critical_pods=("kube-apiserver" "kube-controller-manager" "kube-scheduler" "etcd")
|
||
|
|
|
||
|
|
for pod_prefix in "${critical_pods[@]}"; do
|
||
|
|
local max_pod_attempts=20
|
||
|
|
local pod_attempt=1
|
||
|
|
|
||
|
|
while [[ $pod_attempt -le $max_pod_attempts ]]; do
|
||
|
|
if kubectl get pods -n kube-system | grep -q "$pod_prefix.*Running"; then
|
||
|
|
print_success "$pod_prefix is running"
|
||
|
|
break
|
||
|
|
fi
|
||
|
|
|
||
|
|
if [[ $pod_attempt -eq $max_pod_attempts ]]; then
|
||
|
|
print_warning "$pod_prefix is not running"
|
||
|
|
kubectl get pods -n kube-system | grep "$pod_prefix"
|
||
|
|
fi
|
||
|
|
|
||
|
|
sleep 10
|
||
|
|
((pod_attempt++))
|
||
|
|
done
|
||
|
|
done
|
||
|
|
|
||
|
|
# Check cluster info
|
||
|
|
print_status "Checking cluster info..."
|
||
|
|
kubectl cluster-info
|
||
|
|
}
|
||
|
|
|
||
|
|
# Function to deploy infrastructure
|
||
|
|
deploy_infrastructure() {
|
||
|
|
print_status "Deploying infrastructure components..."
|
||
|
|
|
||
|
|
cd "$MANIFESTS_DIR"
|
||
|
|
|
||
|
|
# Deploy in order
|
||
|
|
local components=(
|
||
|
|
"freeleaps-controls-system"
|
||
|
|
"freeleaps-devops-system"
|
||
|
|
"freeleaps-monitoring-system"
|
||
|
|
"freeleaps-logging-system"
|
||
|
|
"freeleaps-data-platform"
|
||
|
|
)
|
||
|
|
|
||
|
|
for component in "${components[@]}"; do
|
||
|
|
if [[ -d "$component" ]]; then
|
||
|
|
print_status "Deploying $component..."
|
||
|
|
kubectl apply -f "$component/"
|
||
|
|
|
||
|
|
# Wait for deployment to stabilize
|
||
|
|
print_status "Waiting for $component to stabilize..."
|
||
|
|
sleep 30
|
||
|
|
else
|
||
|
|
print_warning "Component directory not found: $component"
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
|
||
|
|
print_success "Infrastructure deployment completed"
|
||
|
|
}
|
||
|
|
|
||
|
|
# Function to setup authentication
|
||
|
|
setup_authentication() {
|
||
|
|
print_status "Setting up authentication..."
|
||
|
|
|
||
|
|
cd "$BIN_DIR"
|
||
|
|
|
||
|
|
if [[ -f "freeleaps-cluster-authenticator" ]]; then
|
||
|
|
print_status "Running authentication setup..."
|
||
|
|
./freeleaps-cluster-authenticator auth
|
||
|
|
else
|
||
|
|
print_warning "Authentication script not found"
|
||
|
|
print_warning "Please run authentication setup manually"
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# Function to display final status
|
||
|
|
display_final_status() {
|
||
|
|
print_success "Kubernetes cluster bootstrap completed!"
|
||
|
|
echo
|
||
|
|
echo "=== Cluster Status ==="
|
||
|
|
kubectl get nodes
|
||
|
|
echo
|
||
|
|
echo "=== System Pods ==="
|
||
|
|
kubectl get pods -n kube-system
|
||
|
|
echo
|
||
|
|
echo "=== Infrastructure Status ==="
|
||
|
|
kubectl get pods --all-namespaces | grep -E "(argocd|cert-manager|prometheus|grafana)"
|
||
|
|
echo
|
||
|
|
echo "=== Next Steps ==="
|
||
|
|
echo "1. Verify all components are running: kubectl get pods --all-namespaces"
|
||
|
|
echo "2. Access ArgoCD: kubectl port-forward svc/argocd-server -n freeleaps-devops-system 8080:80"
|
||
|
|
echo "3. Access Grafana: kubectl port-forward svc/kube-prometheus-stack-grafana -n freeleaps-monitoring-system 3000:80"
|
||
|
|
echo "4. Setup authentication: cd $BIN_DIR && ./freeleaps-cluster-authenticator auth"
|
||
|
|
echo "5. Deploy applications via ArgoCD"
|
||
|
|
}
|
||
|
|
|
||
|
|
# Main function
|
||
|
|
main() {
|
||
|
|
echo "=========================================="
|
||
|
|
echo "Freeleaps Kubernetes Cluster Bootstrap"
|
||
|
|
echo "=========================================="
|
||
|
|
echo
|
||
|
|
|
||
|
|
# Check prerequisites
|
||
|
|
check_prerequisites
|
||
|
|
|
||
|
|
# Verify Azure VMs
|
||
|
|
verify_azure_vms
|
||
|
|
|
||
|
|
# Test connectivity
|
||
|
|
test_connectivity
|
||
|
|
|
||
|
|
# Bootstrap cluster
|
||
|
|
bootstrap_cluster
|
||
|
|
|
||
|
|
# Get kubeconfig
|
||
|
|
get_kubeconfig
|
||
|
|
|
||
|
|
# Verify cluster
|
||
|
|
verify_cluster
|
||
|
|
|
||
|
|
# Deploy infrastructure
|
||
|
|
deploy_infrastructure
|
||
|
|
|
||
|
|
# Setup authentication
|
||
|
|
setup_authentication
|
||
|
|
|
||
|
|
# Display final status
|
||
|
|
display_final_status
|
||
|
|
}
|
||
|
|
|
||
|
|
# Handle script arguments
|
||
|
|
if [[ $# -eq 0 ]]; then
|
||
|
|
main
|
||
|
|
else
|
||
|
|
case "$1" in
|
||
|
|
--help|-h)
|
||
|
|
echo "Usage: $0 [OPTIONS]"
|
||
|
|
echo
|
||
|
|
echo "Options:"
|
||
|
|
echo " --help, -h Show this help message"
|
||
|
|
echo " --verify Only verify prerequisites and connectivity"
|
||
|
|
echo " --bootstrap Only bootstrap the cluster (skip infrastructure)"
|
||
|
|
echo
|
||
|
|
echo "This script bootstraps a complete Kubernetes cluster from Azure VMs."
|
||
|
|
exit 0
|
||
|
|
;;
|
||
|
|
--verify)
|
||
|
|
check_prerequisites
|
||
|
|
verify_azure_vms
|
||
|
|
test_connectivity
|
||
|
|
print_success "Verification completed successfully"
|
||
|
|
;;
|
||
|
|
--bootstrap)
|
||
|
|
check_prerequisites
|
||
|
|
verify_azure_vms
|
||
|
|
test_connectivity
|
||
|
|
bootstrap_cluster
|
||
|
|
get_kubeconfig
|
||
|
|
verify_cluster
|
||
|
|
print_success "Cluster bootstrap completed"
|
||
|
|
;;
|
||
|
|
*)
|
||
|
|
print_error "Unknown option: $1"
|
||
|
|
echo "Use --help for usage information"
|
||
|
|
exit 1
|
||
|
|
;;
|
||
|
|
esac
|
||
|
|
fi
|