#!/bin/bash
# filepath: repmgr-split-brain-recovery.sh

set -e

NAMESPACE="freeleaps-prod"
STATEFULSET="freeleaps-prod-gitea-postgresql-ha-postgresql"
HEADLESS_SVC="${STATEFULSET}-headless.${NAMESPACE}.svc.freeleaps.cluster"
REPMGR_USER="repmgr"
REPMGR_PASSWORD="WGZ47gbUTLvo"
POSTGRES_PASSWORD="X9H2*9M2ZWYmuZ"
REPMGR_DB="repmgr"
POSTGRES_USER="postgres"
BACKUP_DIR="/tmp/pg_backup_$(date +%Y%m%d_%H%M%S)"
LOCAL_BACKUP_DIR="./pg_backups_$(date +%Y%m%d_%H%M%S)"

echo "===== PostgreSQL Repmgr Split-Brain Recovery ====="
echo "This script will attempt to fix the repmgr split-brain issue"
echo ""

# Create local backup directory
mkdir -p $LOCAL_BACKUP_DIR

# Function to run commands in a pod
run_in_pod() {
  local pod=$1
  local cmd=$2
  kubectl exec -n $NAMESPACE $pod -- bash -c "$cmd"
}

# Function to get PostgreSQL WAL position
get_wal_position() {
  local pod=$1
  run_in_pod $pod "PGPASSWORD=$REPMGR_PASSWORD psql -U $REPMGR_USER -d $REPMGR_DB -t -c \"SELECT pg_current_wal_lsn();\""
}

# Function to check if node is primary
is_primary() {
  local pod=$1
  local result=$(run_in_pod $pod "PGPASSWORD=$REPMGR_PASSWORD psql -U $REPMGR_USER -d $REPMGR_DB -t -c \"SELECT pg_is_in_recovery();\"")
  if [[ $result == *"f"* ]]; then
    return 0  # is primary
  else
    return 1  # is standby
  fi
}

# Function to backup databases from a pod
backup_databases() {
  local pod=$1
  local backup_path="$BACKUP_DIR/$pod"
  
  echo "Creating backup directory in the pod..."
  run_in_pod $pod "mkdir -p $backup_path"
  
  echo "Getting list of databases..."
  local databases=$(run_in_pod $pod "PGPASSWORD=$POSTGRES_PASSWORD psql -U $POSTGRES_USER -t -c \"SELECT datname FROM pg_database WHERE datname NOT IN ('template0', 'template1', 'postgres')\" | tr -d ' '")
  
  echo "Backing up databases: $databases"
  for db in $databases; do
    echo "Backing up database: $db"
    run_in_pod $pod "PGPASSWORD=$POSTGRES_PASSWORD pg_dump -U $POSTGRES_USER -Fc $db > $backup_path/${db}.dump"
  done
  
  # Also backup global objects (roles, tablespaces)
  echo "Backing up global objects..."
  run_in_pod $pod "PGPASSWORD=$POSTGRES_PASSWORD pg_dumpall -U $POSTGRES_USER --globals-only > $backup_path/globals.sql"
  
  # Backup PostgreSQL configuration
  echo "Backing up PostgreSQL configuration..."
  run_in_pod $pod "cp /bitnami/postgresql/conf/postgresql.conf $backup_path/ 2>/dev/null || true"
  run_in_pod $pod "cp /bitnami/postgresql/conf/pg_hba.conf $backup_path/ 2>/dev/null || true"
  
  # Copy repmgr configuration
  echo "Backing up repmgr configuration..."
  run_in_pod $pod "cp /etc/repmgr.conf $backup_path/ 2>/dev/null || true"
  
  # Tar the backup files
  echo "Creating archive of the backup..."
  run_in_pod $pod "tar -czf ${backup_path}.tar.gz -C $(dirname $backup_path) $(basename $backup_path)"
  
  # Copy backup to local machine
  echo "Copying backup to local machine..."
  kubectl cp $NAMESPACE/$pod:${backup_path}.tar.gz $LOCAL_BACKUP_DIR/${pod}_backup.tar.gz
  
  # Cleanup backup in the pod
  echo "Cleaning up backup files in the pod..."
  run_in_pod $pod "rm -rf $backup_path ${backup_path}.tar.gz"
}

echo "Step 0: Checking current status of the cluster..."
for i in 0 1 2; do
  POD="${STATEFULSET}-${i}"
  echo -n "Node ${i} ($POD): "
  
  # Check if node is running as primary
  if is_primary $POD; then
    PRIMARY_STATE="running as primary"
    echo "$PRIMARY_STATE"
  else
    echo "running as standby"
  fi
  
  # Get WAL position
  WAL_POS=$(get_wal_position $POD 2>/dev/null || echo "N/A")
  if [ "$WAL_POS" != "N/A" ]; then
    echo "  - WAL position: $WAL_POS"
    # Store WAL positions for comparison
    declare "WAL_POS_${i}=$WAL_POS"
  fi
done

echo ""
echo "Step 1: Backing up all databases from each node..."
for i in 0 1 2; do
  POD="${STATEFULSET}-${i}"
  echo "Backing up data from node $i ($POD)..."
  backup_databases $POD
done

echo "All backups completed and stored in: $LOCAL_BACKUP_DIR"
echo ""

echo "Determining most advanced node based on WAL position..."

# Get the primary nodes from each pod - there might be more than one in split-brain
for i in 0 1 2; do
  POD="${STATEFULSET}-${i}"
  # Get node information
  NODE_INFO=$(run_in_pod $POD "PGPASSWORD=$REPMGR_PASSWORD psql -U $REPMGR_USER -d $REPMGR_DB -t -c \"SELECT node_id, node_name, type, active FROM repmgr.nodes WHERE node_name = '$POD';\"" 2>/dev/null || echo "")
  
  if [ -n "$NODE_INFO" ]; then
    echo "Node ${i} info: $NODE_INFO"
    
    # Store if this node thinks it's a primary
    if [[ $NODE_INFO == *"primary"* ]]; then
      echo "Node ${i} is configured as a primary"
      declare "NODE_${i}_IS_PRIMARY=true"
    else
      declare "NODE_${i}_IS_PRIMARY=false"
    fi
    
    # Check if node is actually running as primary using pg_is_in_recovery()
    if is_primary $POD; then
      echo "Node ${i} is running as primary (pg_is_in_recovery=false)"
      declare "NODE_${i}_RUNNING_AS_PRIMARY=true"
    else
      declare "NODE_${i}_RUNNING_AS_PRIMARY=false"
    fi
  else
    echo "Could not get info for node ${i}"
    declare "NODE_${i}_IS_PRIMARY=false"
    declare "NODE_${i}_RUNNING_AS_PRIMARY=false"
  fi
done

echo ""
echo "Analyzing WAL positions to determine the most advanced node..."

# Compare WAL positions
if [ -n "${WAL_POS_0}" ] && [ -n "${WAL_POS_1}" ] && [ -n "${WAL_POS_2}" ]; then
  # We have all WAL positions, find the most advanced
  if run_in_pod ${STATEFULSET}-0 "PGPASSWORD=$REPMGR_PASSWORD psql -U $REPMGR_USER -d $REPMGR_DB -t -c \"SELECT '${WAL_POS_0}' > '${WAL_POS_1}' AND '${WAL_POS_0}' > '${WAL_POS_2}';\"" | grep -q 't'; then
    NEW_PRIMARY=0
  elif run_in_pod ${STATEFULSET}-0 "PGPASSWORD=$REPMGR_PASSWORD psql -U $REPMGR_USER -d $REPMGR_DB -t -c \"SELECT '${WAL_POS_1}' > '${WAL_POS_2}';\"" | grep -q 't'; then
    NEW_PRIMARY=1
  else
    NEW_PRIMARY=2
  fi
elif [ -n "${WAL_POS_0}" ] && [ -n "${WAL_POS_1}" ]; then
  # Only nodes 0 and 1 have WAL positions
  if run_in_pod ${STATEFULSET}-0 "PGPASSWORD=$REPMGR_PASSWORD psql -U $REPMGR_USER -d $REPMGR_DB -t -c \"SELECT '${WAL_POS_0}' > '${WAL_POS_1}';\"" | grep -q 't'; then
    NEW_PRIMARY=0
  else
    NEW_PRIMARY=1
  fi
elif [ -n "${WAL_POS_0}" ] && [ -n "${WAL_POS_2}" ]; then
  # Only nodes 0 and 2 have WAL positions
  if run_in_pod ${STATEFULSET}-0 "PGPASSWORD=$REPMGR_PASSWORD psql -U $REPMGR_USER -d $REPMGR_DB -t -c \"SELECT '${WAL_POS_0}' > '${WAL_POS_2}';\"" | grep -q 't'; then
    NEW_PRIMARY=0
  else
    NEW_PRIMARY=2
  fi
elif [ -n "${WAL_POS_1}" ] && [ -n "${WAL_POS_2}" ]; then
  # Only nodes 1 and 2 have WAL positions
  if run_in_pod ${STATEFULSET}-1 "PGPASSWORD=$REPMGR_PASSWORD psql -U $REPMGR_USER -d $REPMGR_DB -t -c \"SELECT '${WAL_POS_1}' > '${WAL_POS_2}';\"" | grep -q 't'; then
    NEW_PRIMARY=1
  else
    NEW_PRIMARY=2
  fi
elif [ -n "${WAL_POS_0}" ]; then
  NEW_PRIMARY=0
elif [ -n "${WAL_POS_1}" ]; then
  NEW_PRIMARY=1
elif [ -n "${WAL_POS_2}" ]; then
  NEW_PRIMARY=2
else
  echo "Could not determine most advanced node. Using node 0 as default primary."
  NEW_PRIMARY=0
fi

echo "Selected node ${NEW_PRIMARY} as the new primary based on WAL position."
# Fix the bad substitution by using proper indirection
eval WAL_POS_VALUE=\$WAL_POS_${NEW_PRIMARY}
if [ -n "$WAL_POS_VALUE" ]; then
  echo "WAL position: $WAL_POS_VALUE"
fi
echo ""

# Confirm with user
read -p "Backups completed. Do you want to proceed with fixing the split-brain issue? (y/n): " CONFIRM
if [[ "$CONFIRM" != "y" ]]; then
  echo "Operation cancelled. Backups are still available at $LOCAL_BACKUP_DIR"
  exit 1
fi

echo ""
echo "Step 2: Registering node ${NEW_PRIMARY} as primary..."
PRIMARY_POD="${STATEFULSET}-${NEW_PRIMARY}"

# Create a temporary script to run repmgr commands
run_in_pod $PRIMARY_POD "cat > /tmp/register_primary.sh << EOF
#!/bin/bash
export PGUSER='$REPMGR_USER'
export PGPASSWORD='$REPMGR_PASSWORD' 
export PGDATABASE='$REPMGR_DB'
export PATH=\$PATH:/opt/bitnami/repmgr/bin:/opt/bitnami/postgresql/bin

# Try to find repmgr
repmgr_bin=\$(find /opt/bitnami -name repmgr -type f | head -1)
if [ -z \"\$repmgr_bin\" ]; then
    echo \"Could not find repmgr binary\"
    exit 1
fi

\$repmgr_bin -f /etc/repmgr.conf primary register --force
EOF
chmod +x /tmp/register_primary.sh"

# Run the script directly
run_in_pod $PRIMARY_POD "bash /tmp/register_primary.sh"

# Stop PostgreSQL on other nodes
for i in 0 1 2; do
  if [ $i -ne $NEW_PRIMARY ]; then
    STANDBY_POD="${STATEFULSET}-${i}"
    echo "Step 3: Stopping PostgreSQL on standby node ${i}..."
    run_in_pod $STANDBY_POD "/opt/bitnami/scripts/postgresql-repmgr/stop.sh"
    
    echo "Step 4: Cloning primary data to standby node ${i}..."
    
    # Create a temporary script for cloning the standby that doesn't rely on specific user
    run_in_pod $STANDBY_POD "cat > /tmp/clone_standby.sh << EOF
#!/bin/bash
export PGUSER='$REPMGR_USER'
export PGPASSWORD='$REPMGR_PASSWORD'
export PGDATABASE='$REPMGR_DB'
export PATH=\$PATH:/opt/bitnami/repmgr/bin:/opt/bitnami/postgresql/bin

# Remove existing data
rm -rf /bitnami/postgresql/data/*

# Try to find repmgr
repmgr_bin=\$(find /opt/bitnami -name repmgr -type f | head -1)
if [ -z \"\$repmgr_bin\" ]; then
    echo \"Could not find repmgr binary\"
    exit 1
fi

\$repmgr_bin -h ${PRIMARY_POD}.${HEADLESS_SVC} -p 5432 standby clone --force
EOF
chmod +x /tmp/clone_standby.sh"

    # Run the clone script directly
    run_in_pod $STANDBY_POD "bash /tmp/clone_standby.sh"
    
    echo "Step 5: Starting PostgreSQL on standby node ${i}..."
    run_in_pod $STANDBY_POD "/opt/bitnami/scripts/postgresql-repmgr/start.sh"
    
    echo "Step 6: Registering node ${i} as standby..."
    
    # Create a temporary script for registering the standby
    run_in_pod $STANDBY_POD "cat > /tmp/register_standby.sh << EOF
#!/bin/bash
export PGUSER='$REPMGR_USER'
export PGPASSWORD='$REPMGR_PASSWORD'
export PGDATABASE='$REPMGR_DB'
export PATH=\$PATH:/opt/bitnami/repmgr/bin:/opt/bitnami/postgresql/bin

# Try to find repmgr
repmgr_bin=\$(find /opt/bitnami -name repmgr -type f | head -1)
if [ -z \"\$repmgr_bin\" ]; then
    echo \"Could not find repmgr binary\"
    exit 1
fi

\$repmgr_bin -f /etc/repmgr.conf standby register --force
EOF
chmod +x /tmp/register_standby.sh"

    # Run the register script directly
    run_in_pod $STANDBY_POD "bash /tmp/register_standby.sh"
  fi
done

echo ""
echo "Step 7: Checking final cluster status..."

# Create a temporary script for checking cluster status
run_in_pod $PRIMARY_POD "cat > /tmp/cluster_status.sh << EOF
#!/bin/bash
export PGUSER='$REPMGR_USER'
export PGPASSWORD='$REPMGR_PASSWORD'
export PGDATABASE='$REPMGR_DB'
export PATH=\$PATH:/opt/bitnami/repmgr/bin:/opt/bitnami/postgresql/bin

# Try to find repmgr
repmgr_bin=\$(find /opt/bitnami -name repmgr -type f | head -1)
if [ -z \"\$repmgr_bin\" ]; then
    echo \"Could not find repmgr binary\"
    exit 1
fi

\$repmgr_bin -f /etc/repmgr.conf cluster show
EOF
chmod +x /tmp/cluster_status.sh"

# Run the cluster status script directly
FINAL_STATUS=$(run_in_pod $PRIMARY_POD "bash /tmp/cluster_status.sh")
echo "$FINAL_STATUS"

# Clean up temporary scripts
for i in 0 1 2; do
  POD="${STATEFULSET}-${i}"
  run_in_pod $POD "rm -f /tmp/register_primary.sh /tmp/clone_standby.sh /tmp/register_standby.sh /tmp/cluster_status.sh" || true
done

echo ""
echo "Split-brain recovery completed."
echo "Your database backups are available at: $LOCAL_BACKUP_DIR"
echo "Please verify that the cluster is now in a consistent state."