398 lines
12 KiB
Bash
Executable File
398 lines
12 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Azure Kubernetes Node Addition Script
|
|
# This script automates the process of adding new Azure VMs to an existing Kubernetes cluster
|
|
|
|
set -e # Exit on any error
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Configuration
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
INVENTORY_FILE="freeleaps-ops/cluster/ansible/manifests/inventory.ini"
|
|
KUBESPRAY_DIR="freeleaps-ops/3rd/kubespray"
|
|
ANSIBLE_USER="wwwadmin@mathmast.com"
|
|
|
|
# Function to print colored output
|
|
print_status() {
|
|
echo -e "${BLUE}[INFO]${NC} $1"
|
|
}
|
|
|
|
print_success() {
|
|
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
|
}
|
|
|
|
print_warning() {
|
|
echo -e "${YELLOW}[WARNING]${NC} $1"
|
|
}
|
|
|
|
print_error() {
|
|
echo -e "${RED}[ERROR]${NC} $1"
|
|
}
|
|
|
|
# Function to validate input
|
|
validate_input() {
|
|
if [[ -z "$1" ]]; then
|
|
print_error "Input cannot be empty"
|
|
return 1
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
# Function to check prerequisites
|
|
check_prerequisites() {
|
|
print_status "Checking prerequisites..."
|
|
|
|
# Check if kubectl is installed
|
|
if ! command -v kubectl &> /dev/null; then
|
|
print_error "kubectl is not installed"
|
|
exit 1
|
|
fi
|
|
|
|
# Check if ansible is installed
|
|
if ! command -v ansible &> /dev/null; then
|
|
print_error "ansible is not installed"
|
|
exit 1
|
|
fi
|
|
|
|
# Check if az CLI is installed
|
|
if ! command -v az &> /dev/null; then
|
|
print_error "Azure CLI is not installed"
|
|
exit 1
|
|
fi
|
|
|
|
# Check if inventory file exists
|
|
if [[ ! -f "$INVENTORY_FILE" ]]; then
|
|
print_error "Inventory file not found: $INVENTORY_FILE"
|
|
exit 1
|
|
fi
|
|
|
|
# Check if kubespray directory exists
|
|
if [[ ! -d "$KUBESPRAY_DIR" ]]; then
|
|
print_error "Kubespray directory not found: $KUBESPRAY_DIR"
|
|
exit 1
|
|
fi
|
|
|
|
print_success "All prerequisites are met"
|
|
}
|
|
|
|
# Function to get VM details from Azure
|
|
get_vm_details() {
|
|
local vm_name="$1"
|
|
local resource_group="$2"
|
|
|
|
print_status "Getting VM details from Azure..."
|
|
|
|
# Get VM private IP
|
|
local private_ip=$(az vm show --resource-group "$resource_group" --name "$vm_name" --query "privateIps" -o tsv 2>/dev/null)
|
|
if [[ -z "$private_ip" ]]; then
|
|
print_error "Failed to get private IP for VM: $vm_name"
|
|
return 1
|
|
fi
|
|
|
|
# Get VM power state
|
|
local power_state=$(az vm show --resource-group "$resource_group" --name "$vm_name" --query "powerState" -o tsv 2>/dev/null)
|
|
if [[ "$power_state" != "VM running" ]]; then
|
|
print_warning "VM is not running. Current state: $power_state"
|
|
read -p "Do you want to start the VM? (y/N): " -n 1 -r
|
|
echo
|
|
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
|
az vm start --resource-group "$resource_group" --name "$vm_name"
|
|
print_status "Waiting for VM to start..."
|
|
sleep 30
|
|
else
|
|
print_error "VM must be running to proceed"
|
|
return 1
|
|
fi
|
|
fi
|
|
|
|
echo "$private_ip"
|
|
}
|
|
|
|
# Function to test SSH connectivity
|
|
test_ssh_connectivity() {
|
|
local ip_address="$1"
|
|
|
|
print_status "Testing SSH connectivity to $ip_address..."
|
|
|
|
# Test SSH connection
|
|
if timeout 10 ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$ANSIBLE_USER@$ip_address" "echo 'SSH connection successful'" 2>/dev/null; then
|
|
print_success "SSH connectivity verified"
|
|
return 0
|
|
else
|
|
print_error "SSH connection failed to $ip_address"
|
|
print_warning "Please ensure:"
|
|
print_warning "1. VM is running"
|
|
print_warning "2. Network security group allows SSH (port 22)"
|
|
print_warning "3. SSH service is running on the VM"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Function to update inventory file
|
|
update_inventory() {
|
|
local vm_name="$1"
|
|
local ip_address="$2"
|
|
local node_type="$3"
|
|
|
|
print_status "Updating inventory file..."
|
|
|
|
# Create backup of inventory file
|
|
cp "$INVENTORY_FILE" "${INVENTORY_FILE}.backup.$(date +%Y%m%d_%H%M%S)"
|
|
|
|
# Add node to inventory based on type
|
|
if [[ "$node_type" == "worker" ]]; then
|
|
echo "$vm_name ansible_host=$ip_address ansible_user=$ANSIBLE_USER host_name=$vm_name" >> "$INVENTORY_FILE"
|
|
print_success "Added worker node to inventory"
|
|
elif [[ "$node_type" == "master" ]]; then
|
|
echo "$vm_name ansible_host=$ip_address ansible_user=$ANSIBLE_USER etcd_member_name=${vm_name}-etcd host_name=$vm_name" >> "$INVENTORY_FILE"
|
|
print_success "Added master node to inventory"
|
|
else
|
|
print_error "Invalid node type: $node_type"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Function to verify inventory
|
|
verify_inventory() {
|
|
print_status "Verifying inventory configuration..."
|
|
|
|
# Test inventory syntax
|
|
if ansible-inventory -i "$INVENTORY_FILE" --list > /dev/null 2>&1; then
|
|
print_success "Inventory syntax is valid"
|
|
else
|
|
print_error "Inventory syntax is invalid"
|
|
return 1
|
|
fi
|
|
|
|
# Test connectivity to all nodes
|
|
print_status "Testing connectivity to all nodes..."
|
|
if ansible -i "$INVENTORY_FILE" all -m ping -kK; then
|
|
print_success "Connectivity to all nodes verified"
|
|
else
|
|
print_error "Connectivity test failed"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Function to run kubespray scale playbook
|
|
run_scale_playbook() {
|
|
print_status "Running Kubespray scale playbook..."
|
|
|
|
cd "$(dirname "$INVENTORY_FILE")"
|
|
|
|
# Run the scale playbook
|
|
if ansible-playbook -i inventory.ini "$KUBESPRAY_DIR/scale.yml" -kK -b; then
|
|
print_success "Scale playbook completed successfully"
|
|
else
|
|
print_error "Scale playbook failed"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Function to verify node addition
|
|
verify_node_addition() {
|
|
local vm_name="$1"
|
|
|
|
print_status "Verifying node addition..."
|
|
|
|
# Wait for node to appear
|
|
local max_attempts=30
|
|
local attempt=1
|
|
|
|
while [[ $attempt -le $max_attempts ]]; do
|
|
if kubectl get nodes | grep -q "$vm_name"; then
|
|
print_success "Node $vm_name found in cluster"
|
|
break
|
|
fi
|
|
|
|
print_status "Waiting for node to appear... (attempt $attempt/$max_attempts)"
|
|
sleep 10
|
|
((attempt++))
|
|
done
|
|
|
|
if [[ $attempt -gt $max_attempts ]]; then
|
|
print_error "Node $vm_name did not appear in cluster"
|
|
return 1
|
|
fi
|
|
|
|
# Wait for node to be ready
|
|
attempt=1
|
|
while [[ $attempt -le $max_attempts ]]; do
|
|
local node_status=$(kubectl get nodes "$vm_name" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null)
|
|
if [[ "$node_status" == "True" ]]; then
|
|
print_success "Node $vm_name is ready"
|
|
break
|
|
fi
|
|
|
|
print_status "Waiting for node to be ready... (attempt $attempt/$max_attempts)"
|
|
sleep 10
|
|
((attempt++))
|
|
done
|
|
|
|
if [[ $attempt -gt $max_attempts ]]; then
|
|
print_error "Node $vm_name is not ready"
|
|
kubectl describe node "$vm_name"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Function to test pod scheduling
|
|
test_pod_scheduling() {
|
|
local vm_name="$1"
|
|
|
|
print_status "Testing pod scheduling on new node..."
|
|
|
|
# Create a test pod
|
|
local test_pod_name="test-pod-$(date +%s)"
|
|
kubectl run "$test_pod_name" --image=nginx --restart=Never --overrides="{\"spec\":{\"nodeSelector\":{\"kubernetes.io/hostname\":\"$vm_name\"}}}"
|
|
|
|
# Wait for pod to be scheduled
|
|
local max_attempts=30
|
|
local attempt=1
|
|
|
|
while [[ $attempt -le $max_attempts ]]; do
|
|
local pod_status=$(kubectl get pod "$test_pod_name" -o jsonpath='{.status.phase}' 2>/dev/null)
|
|
if [[ "$pod_status" == "Running" ]]; then
|
|
print_success "Test pod is running on node $vm_name"
|
|
break
|
|
fi
|
|
|
|
print_status "Waiting for test pod to be ready... (attempt $attempt/$max_attempts)"
|
|
sleep 10
|
|
((attempt++))
|
|
done
|
|
|
|
# Clean up test pod
|
|
kubectl delete pod "$test_pod_name"
|
|
|
|
if [[ $attempt -gt $max_attempts ]]; then
|
|
print_error "Test pod failed to run on node $vm_name"
|
|
kubectl describe pod "$test_pod_name"
|
|
kubectl delete pod "$test_pod_name"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Function to display final status
|
|
display_final_status() {
|
|
local vm_name="$1"
|
|
|
|
print_success "Node addition completed successfully!"
|
|
echo
|
|
echo "=== Final Status ==="
|
|
echo "Node Name: $vm_name"
|
|
echo "Node Status: $(kubectl get nodes "$vm_name" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')"
|
|
echo "Node IP: $(kubectl get nodes "$vm_name" -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}')"
|
|
echo "Node Capacity: $(kubectl get nodes "$vm_name" -o jsonpath='{.status.capacity.cpu}') CPU, $(kubectl get nodes "$vm_name" -o jsonpath='{.status.capacity.memory}') Memory"
|
|
echo
|
|
echo "=== Next Steps ==="
|
|
echo "1. Monitor the node for any issues"
|
|
echo "2. Update monitoring and alerting if needed"
|
|
echo "3. Update documentation"
|
|
echo "4. Consider running node maintenance tasks"
|
|
}
|
|
|
|
# Main function
|
|
main() {
|
|
echo "=========================================="
|
|
echo "Azure Kubernetes Node Addition Script"
|
|
echo "=========================================="
|
|
echo
|
|
|
|
# Check prerequisites
|
|
check_prerequisites
|
|
|
|
# Get user input
|
|
echo "Please provide the following information:"
|
|
echo
|
|
|
|
read -p "VM Name: " vm_name
|
|
validate_input "$vm_name" || exit 1
|
|
|
|
read -p "Resource Group: " resource_group
|
|
validate_input "$resource_group" || exit 1
|
|
|
|
read -p "Node Type (worker/master): " node_type
|
|
if [[ "$node_type" != "worker" && "$node_type" != "master" ]]; then
|
|
print_error "Node type must be 'worker' or 'master'"
|
|
exit 1
|
|
fi
|
|
|
|
echo
|
|
print_status "Summary:"
|
|
echo " VM Name: $vm_name"
|
|
echo " Resource Group: $resource_group"
|
|
echo " Node Type: $node_type"
|
|
echo
|
|
|
|
read -p "Proceed with node addition? (y/N): " -n 1 -r
|
|
echo
|
|
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
|
print_status "Operation cancelled"
|
|
exit 0
|
|
fi
|
|
|
|
# Get VM details
|
|
ip_address=$(get_vm_details "$vm_name" "$resource_group")
|
|
if [[ $? -ne 0 ]]; then
|
|
exit 1
|
|
fi
|
|
|
|
print_success "VM IP Address: $ip_address"
|
|
|
|
# Test SSH connectivity
|
|
test_ssh_connectivity "$ip_address" || exit 1
|
|
|
|
# Update inventory
|
|
update_inventory "$vm_name" "$ip_address" "$node_type" || exit 1
|
|
|
|
# Verify inventory
|
|
verify_inventory || exit 1
|
|
|
|
# Run scale playbook
|
|
run_scale_playbook || exit 1
|
|
|
|
# Verify node addition
|
|
verify_node_addition "$vm_name" || exit 1
|
|
|
|
# Test pod scheduling
|
|
test_pod_scheduling "$vm_name" || exit 1
|
|
|
|
# Display final status
|
|
display_final_status "$vm_name"
|
|
}
|
|
|
|
# Handle script arguments
|
|
if [[ $# -eq 0 ]]; then
|
|
main
|
|
else
|
|
case "$1" in
|
|
--help|-h)
|
|
echo "Usage: $0 [OPTIONS]"
|
|
echo
|
|
echo "Options:"
|
|
echo " --help, -h Show this help message"
|
|
echo " --version, -v Show version information"
|
|
echo
|
|
echo "This script automates the process of adding new Azure VMs to an existing Kubernetes cluster."
|
|
echo "It will prompt for necessary information and guide you through the process."
|
|
exit 0
|
|
;;
|
|
--version|-v)
|
|
echo "Azure Kubernetes Node Addition Script v1.0"
|
|
exit 0
|
|
;;
|
|
*)
|
|
print_error "Unknown option: $1"
|
|
echo "Use --help for usage information"
|
|
exit 1
|
|
;;
|
|
esac
|
|
fi
|