observability.svc.plus/files/postgres/pg-pitr
2026-02-01 20:53:55 +08:00

379 lines
14 KiB
Bash
Executable File

#!/bin/bash
set -uo pipefail
#==============================================================#
# File : pg-pitr
# Desc : Point-in-time recovery with pgbackrest
# Ctime : 2022-12-31
# Mtime : 2026-01-19
# Path : /pg/bin/pg-pitr
# Deps : pgbackrest, /etc/pgbackrest/pgbackrest.conf
# License : Apache-2.0 @ https://pigsty.io/docs/about/license/
# Copyright : 2018-2026 Ruohang Feng / Vonng (rh@vonng.com)
#==============================================================#
# This script restores a PostgreSQL data directory to a specific
# point in time using pgbackrest. It's a single-purpose tool that:
# - Uses the backup repository configured on current node
# - Restores to specified data directory
# - Supports various recovery targets (time, lsn, xid, name, etc.)
#
# Usage: https://pgbackrest.org/command.html#command-restore
#==============================================================#
PROG_NAME="$(basename $0)"
PROG_DIR="$(cd $(dirname $0) && pwd)"
#--------------------------------------------------------------#
# Usage
#--------------------------------------------------------------#
usage() {
cat <<-'EOF'
NAME
pg-pitr -- Point-in-time recovery with pgbackrest
SYNOPSIS
pg-pitr [options] [--time=<time>|--lsn=<lsn>|--xid=<xid>|--name=<name>]
RECOVERY TARGET (choose one):
-d, --default Recover to end of WAL archive stream (latest status)
-i, --immediate Recover only until database becomes consistent
-t, --time <timestamp> Recover to specific time (e.g., "2025-01-01 12:00:00+08")
-n, --name <restore_point> Recover to named restore point
-l, --lsn <lsn> Recover to specific LSN (e.g., "0/7C82CB8")
-x, --xid <xid> Recover to specific transaction ID
-b, --backup <label> Recover to specific backup set (check: pgbackrest info)
OPTIONS:
-D, --data <path> Data directory to restore (default: /pg/data)
-s, --stanza <name> pgbackrest stanza name (auto-detect from config)
-X, --exclusive Stop RIGHT BEFORE target (exclusive), not at it
-P, --promote Promote after reaching target (default: pause)
-c, --check Dry-run mode: print command without executing
-y, --yes Skip confirmation and countdown
-h, --help Show this help message
EXAMPLES:
pg-pitr -d # Restore to latest (end of WAL stream)
pg-pitr -i # Restore to backup completion time
pg-pitr -t "2025-01-01 12:00:00+08" # Restore to specific time
pg-pitr -t "2025-01-01 04:00:00+00" # Same time in UTC
pg-pitr -n my-savepoint # Restore to named restore point
pg-pitr -l "0/7C82CB8" # Restore to specific LSN
pg-pitr -x 12345678 -X # Restore to right before transaction
pg-pitr -b 20251225-120000F # Restore to specific backup
pg-pitr -D /tmp/data2 -c # Dry run to custom directory
NOTES:
- Run as postgres (dbsu) user
- PostgreSQL must be stopped before restore
- Time format: YYYY-MM-DD HH:MM:SS[.ssssss][+/-TZ]
- After restore, manually start PG, validate, then promote
EOF
exit "${1:-0}"
}
#--------------------------------------------------------------#
# Log Util
#--------------------------------------------------------------#
if [[ -t 1 ]]; then
__CN='\033[0m';__CK='\033[0;30m';__CR='\033[0;31m';__CG='\033[0;32m';
__CY='\033[0;33m';__CB='\033[0;34m';__CM='\033[0;35m';__CC='\033[0;36m';__CW='\033[0;37m';
else
__CN='';__CK='';__CR='';__CG='';__CY='';__CB='';__CM='';__CC='';__CW='';
fi
function log_info() { printf "[${__CG} OK ${__CN}] ${__CG}$*${__CN}\n"; }
function log_warn() { printf "[${__CY}WARN${__CN}] ${__CY}$*${__CN}\n"; }
function log_error() { printf "[${__CR}FAIL${__CN}] ${__CR}$*${__CN}\n"; }
function log_debug() { printf "[${__CB}HINT${__CN}] ${__CB}$*${__CN}\n"; }
function log_title() { printf "[${__CG}$1${__CN}] ${__CG}$2${__CN}\n"; }
function log_hint() { printf "${__CB}$*${__CN}\n"; }
function log_line() { printf "${__CM}===== $* =====${__CN}\n"; }
#--------------------------------------------------------------#
# Default Parameters
#--------------------------------------------------------------#
METHOD="default"
TARGET=""
DATA_DIR="/pg/data"
STANZA=""
TARGET_ACTION="pause"
TARGET_EXCLUSIVE=false
DRY_RUN=false
SKIP_CONFIRM=false
#--------------------------------------------------------------#
# Argument Parsing
#--------------------------------------------------------------#
# Print help if no arguments provided
[[ $# -eq 0 ]] && usage 0
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
usage 0 ;;
-D|--data|--data=*)
if [[ "$1" == *=* ]]; then DATA_DIR="${1#*=}"; else DATA_DIR="$2"; shift; fi ;;
-s|--stanza|--stanza=*)
if [[ "$1" == *=* ]]; then STANZA="${1#*=}"; else STANZA="$2"; shift; fi ;;
-d|--default)
METHOD="default" ;;
-i|--immediate)
[[ "$METHOD" != "default" ]] && { log_error "Multiple recovery targets specified"; exit 1; }
METHOD="immediate" ;;
-t|--time|--time=*)
[[ "$METHOD" != "default" ]] && { log_error "Multiple recovery targets specified"; exit 1; }
METHOD="time"
if [[ "$1" == *=* ]]; then TARGET="${1#*=}"; else TARGET="$2"; shift; fi ;;
-n|--name|--name=*)
[[ "$METHOD" != "default" ]] && { log_error "Multiple recovery targets specified"; exit 1; }
METHOD="name"
if [[ "$1" == *=* ]]; then TARGET="${1#*=}"; else TARGET="$2"; shift; fi ;;
-l|--lsn|--lsn=*)
[[ "$METHOD" != "default" ]] && { log_error "Multiple recovery targets specified"; exit 1; }
METHOD="lsn"
if [[ "$1" == *=* ]]; then TARGET="${1#*=}"; else TARGET="$2"; shift; fi ;;
-x|--xid|--xid=*)
[[ "$METHOD" != "default" ]] && { log_error "Multiple recovery targets specified"; exit 1; }
METHOD="xid"
if [[ "$1" == *=* ]]; then TARGET="${1#*=}"; else TARGET="$2"; shift; fi ;;
-b|--backup|--backup=*)
[[ "$METHOD" != "default" ]] && { log_error "Multiple recovery targets specified"; exit 1; }
METHOD="set"
if [[ "$1" == *=* ]]; then TARGET="${1#*=}"; else TARGET="$2"; shift; fi ;;
-X|--exclusive|--target-exclusive)
TARGET_EXCLUSIVE=true ;;
-P|--promote|--target-action=promote)
TARGET_ACTION="promote" ;;
-c|--check|--dry-run)
DRY_RUN=true ;;
-y|--yes)
SKIP_CONFIRM=true ;;
--)
shift; break ;;
-*)
log_error "Unknown option: $1"; exit 1 ;;
*)
break ;;
esac
shift
done
#--------------------------------------------------------------#
# Validation
#--------------------------------------------------------------#
# Check pgbackrest
if ! command -v pgbackrest &>/dev/null; then
log_error "pgbackrest not found in PATH"
exit 1
fi
# Check config file
PGBACKREST_CONF="/etc/pgbackrest/pgbackrest.conf"
if [[ ! -f "$PGBACKREST_CONF" ]]; then
log_error "pgbackrest config not found: $PGBACKREST_CONF"
exit 1
fi
# Auto-detect stanza if not specified
if [[ -z "$STANZA" ]]; then
STANZA=$(grep -oP '^\[\K[^\]:]+(?=\])' "$PGBACKREST_CONF" | head -n1)
if [[ -z "$STANZA" ]]; then
log_error "Cannot auto-detect stanza from config"
exit 1
fi
fi
# Validate data directory path
if [[ "$DATA_DIR" != /* ]]; then
log_error "Data directory must be absolute path: $DATA_DIR"
exit 1
fi
# Validate recovery target based on method
case "$METHOD" in
time)
# Accept: YYYY-MM-DD HH:MM:SS with optional .microseconds and optional timezone
# Examples: "2025-01-01 12:00:00", "2025-01-01 12:00:00+08", "2025-01-01 12:00:00.123456+08:00"
if [[ ! "$TARGET" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}[\ T][0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?([+-][0-9]{2}(:[0-9]{2})?)?$ ]]; then
log_error "Invalid time format: $TARGET"
log_hint "Expected: YYYY-MM-DD HH:MM:SS[.usec][+/-TZ]"
log_hint "Examples: 2025-01-01 12:00:00+08, 2025-01-01 04:00:00+00"
exit 1
fi
;;
lsn)
# LSN format: hex/hex (case insensitive)
TARGET=$(echo "$TARGET" | tr '[:lower:]' '[:upper:]')
if [[ ! "$TARGET" =~ ^[0-9A-F]{1,8}/[0-9A-F]{1,8}$ ]]; then
log_error "Invalid LSN format: $TARGET"
log_hint "Expected: X/X where X is 1-8 hex digits"
log_hint "Example: 0/7C82CB8"
exit 1
fi
;;
xid)
if [[ ! "$TARGET" =~ ^[0-9]+$ ]] || [[ "$TARGET" -le 0 ]] || [[ "$TARGET" -ge 4294967296 ]]; then
log_error "Invalid XID: $TARGET"
log_hint "Expected: positive 32-bit integer (1 to 4294967295)"
exit 1
fi
;;
name)
if [[ -z "$TARGET" ]]; then
log_error "Restore point name cannot be empty"
exit 1
fi
;;
set)
# Backup label: 'latest' or YYYYMMDD-HHMMSSF format
if [[ "$TARGET" != "latest" ]] && [[ ! "$TARGET" =~ ^[0-9]{8}-[0-9]{6}F(_[0-9]{8}-[0-9]{6}(D|I))?$ ]]; then
log_error "Invalid backup label: $TARGET"
log_hint "Expected: 'latest' or backup label like 20251225-120000F"
log_hint "Check available backups with: pgbackrest info --stanza=$STANZA"
exit 1
fi
;;
esac
#--------------------------------------------------------------#
# Build Command
#--------------------------------------------------------------#
CMD_ARGS=("--stanza=$STANZA" "--delta" "--force")
# Add data directory if not default
if [[ "$DATA_DIR" != "/pg/data" ]]; then
CMD_ARGS+=("--pg1-path=$DATA_DIR")
fi
# Add recovery target
case "$METHOD" in
default)
;; # No additional args needed
immediate)
CMD_ARGS+=("--type=immediate")
;;
time)
CMD_ARGS+=("--type=time" "--target=$TARGET")
;;
name)
CMD_ARGS+=("--type=name" "--target=$TARGET")
;;
lsn)
CMD_ARGS+=("--type=lsn" "--target=$TARGET")
;;
xid)
CMD_ARGS+=("--type=xid" "--target=$TARGET")
;;
set)
CMD_ARGS+=("--set=$TARGET")
;;
esac
# Add exclusive option
if [[ "$TARGET_EXCLUSIVE" == true ]]; then
CMD_ARGS+=("--target-exclusive")
fi
# Add target action (only for specific recovery types)
if [[ "$TARGET_ACTION" == "promote" ]] && [[ "$METHOD" =~ ^(time|name|lsn|xid|immediate)$ ]]; then
CMD_ARGS+=("--target-action=promote")
fi
# Final command
FULL_CMD="pgbackrest ${CMD_ARGS[*]} restore"
#--------------------------------------------------------------#
# Pre-flight Checks
#--------------------------------------------------------------#
log_info "Recovery Target: $METHOD${TARGET:+ ($TARGET)}"
log_info "Data Directory: $DATA_DIR"
log_info "Stanza: $STANZA"
log_info "Options: exclusive=$TARGET_EXCLUSIVE action=$TARGET_ACTION"
log_hint ""
log_hint "Command:"
log_hint " $FULL_CMD"
log_hint ""
# Dry run - just print and exit
if [[ "$DRY_RUN" == true ]]; then
log_warn "[CHECK] Command printed above. Run without -c to execute."
exit 0
fi
# Check if current user is in postgres group
if ! id -nG | grep -qw postgres; then
log_warn "Current user $(whoami) is not in postgres group"
fi
# Check if PostgreSQL is running in the target directory
if [[ -f "$DATA_DIR/postmaster.pid" ]]; then
PG_PID=$(head -1 "$DATA_DIR/postmaster.pid" 2>/dev/null)
if [[ -n "$PG_PID" ]] && kill -0 "$PG_PID" 2>/dev/null; then
log_error "PostgreSQL is still running (PID: $PG_PID)
Stop PostgreSQL before restore: pg_ctl -D $DATA_DIR stop -m fast"
fi
fi
#--------------------------------------------------------------#
# Confirmation & Execution
#--------------------------------------------------------------#
log_warn "==========================================="
log_warn "WARNING: Point-In-Time Recovery Operation"
log_warn "This will OVERWRITE data in: $DATA_DIR"
log_warn "==========================================="
#--------------------------------------------------------------#
# Countdown
#--------------------------------------------------------------#
# Countdown timer for interactive terminals
countdown() {
local seconds=${1:-5}
if [ -t 0 ] && [ -t 1 ]; then
for ((i=seconds; i>0; i--)); do
printf "\r${__CY}Starting in $i seconds... (Ctrl+C to abort)${__CN}"
sleep 1
done
printf "\n"
else
log_info "Non-interactive mode, skipping countdown..."
fi
}
if [[ "$SKIP_CONFIRM" != true ]] && [ -t 0 ] && [ -t 1 ]; then
log_hint "Press Ctrl+C to abort"
countdown 5
fi
log_info "Starting restore..."
log_hint "$ pgbackrest ${CMD_ARGS[*]} restore"
# Execute the restore command (using array to avoid eval security risks)
pgbackrest "${CMD_ARGS[@]}" restore
RC=$?
if [[ $RC -eq 0 ]]; then
log_info "==========================================="
log_info "Restore completed successfully!"
log_info "==========================================="
log_hint ""
log_hint "Next steps:"
log_hint " 1. Start PostgreSQL: pg_ctl -D $DATA_DIR start"
log_hint " 2. Validate your data"
log_hint " 3. Promote if satisfied: pg_ctl -D $DATA_DIR promote"
log_hint " 4. Enable archive_mode: psql -c \"ALTER SYSTEM SET archive_mode = on;\""
log_hint " 5. Restart PostgreSQL: pg_ctl -D $DATA_DIR restart"
else
log_error "Restore failed with exit code: $RC"
fi
exit $RC