observability.svc.plus/files/postgres/pg-repack
2026-02-01 20:53:55 +08:00

316 lines
11 KiB
Bash
Executable File

#!/bin/bash
set -uo pipefail
#==============================================================#
# File : pg-repack
# Desc : Repack bloated tables and indexes
# Ctime : 2018-05-18
# Mtime : 2025-12-30
# Path : /pg/bin/pg-repack
# Deps : pg_repack, psql, pg-role
# License : Apache-2.0 @ https://pigsty.io/docs/about/license/
# Copyright : 2018-2026 Ruohang Feng / Vonng (rh@vonng.com)
#==============================================================#
PROG_DIR="$(cd "$(dirname "$0")" && pwd)"
#--------------------------------------------------------------#
# Usage
#--------------------------------------------------------------#
# pg-repack [options] [database...]
#
# Repack bloated tables and indexes in PostgreSQL databases.
# This script must run on primary instance with dbsu (postgres).
#
# Options:
# -h, --help Show this help message
# -n, --dry-run Show what would be done without executing
# -t, --table Repack tables only
# -i, --index Repack indexes only
# -T, --timeout SEC Lock wait timeout in seconds (default: 10)
# -j, --jobs NUM Number of parallel jobs (default: 2)
#
# Examples:
# pg-repack # repack all databases
# pg-repack mydb # repack specific database
# pg-repack -n mydb # dry-run mode
# pg-repack -t mydb # repack tables only
# pg-repack -i mydb # repack indexes only
# pg-repack -T 30 -j 4 mydb # custom timeout and jobs
#--------------------------------------------------------------#
#--------------------------------------------------------------#
# Log Util
#--------------------------------------------------------------#
if [[ -t 1 ]]; then
__CN='\033[0m';__CR='\033[0;31m';__CG='\033[0;32m';__CY='\033[0;33m';__CB='\033[0;34m';__CM='\033[0;35m'
else
__CN='';__CR='';__CG='';__CY='';__CB='';__CM=''
fi
log_info() { printf "[${__CG} OK ${__CN}] ${__CG}%s${__CN}\n" "$*"; }
log_warn() { printf "[${__CY}WARN${__CN}] ${__CY}%s${__CN}\n" "$*"; }
log_error() { printf "[${__CR}FAIL${__CN}] ${__CR}%s${__CN}\n" "$*"; }
log_hint() { printf "[${__CB}HINT${__CN}] ${__CB}%s${__CN}\n" "$*"; }
#--------------------------------------------------------------#
# Utils
#--------------------------------------------------------------#
# kill vacuum/analyze queries to avoid lock contention
kill_queries() {
local db=$1
local kill_count
kill_count=$(psql "${db}" -AXtwqc "SELECT count(pg_cancel_backend(pid)) FROM pg_stat_activity WHERE state <> 'idle' AND pid <> pg_backend_pid() AND (query ~* 'vacuum' OR query ~* 'analyze');" 2>/dev/null)
if [[ -n "${kill_count}" ]] && [[ "${kill_count}" -gt 0 ]]; then
log_warn "killed ${kill_count} vacuum/analyze queries in ${db}"
fi
}
# repack single table
repack_table() {
local db=$1 relname=$2
if [[ "${DRY_RUN}" == "true" ]]; then
log_info "[dry-run] pg_repack ${db} -T ${LOCK_TIMEOUT} -j ${PARALLEL_JOBS} -t ${relname}"
else
kill_queries "${db}"
if pg_repack "${db}" -T "${LOCK_TIMEOUT}" -j "${PARALLEL_JOBS}" -t "${relname}"; then
log_info "repack table ${db}.${relname} done"
else
log_error "repack table ${db}.${relname} failed"
fi
fi
}
# repack single index
repack_index() {
local db=$1 idxname=$2
if [[ "${DRY_RUN}" == "true" ]]; then
log_info "[dry-run] pg_repack ${db} -T ${LOCK_TIMEOUT} -i ${idxname}"
else
kill_queries "${db}"
if pg_repack "${db}" -T "${LOCK_TIMEOUT}" -i "${idxname}"; then
log_info "repack index ${db}.${idxname} done"
else
log_error "repack index ${db}.${idxname} failed"
fi
fi
}
#--------------------------------------------------------------#
# Repack Tables
#--------------------------------------------------------------#
repack_tables() {
local db=$1
shift
if [[ $# -eq 0 ]]; then
log_info "repack ${db}: no bloat tables found"
return 0
fi
log_info "repack ${db} tables begin, count: $#"
for relname in "$@"; do
repack_table "${db}" "${relname}"
done
log_info "repack ${db} tables done"
}
#--------------------------------------------------------------#
# Get Bloat Tables
#--------------------------------------------------------------#
get_bloat_tables() {
psql "$1" -AXtwq <<-'EOF'
WITH bloat_tables AS (
SELECT nspname || '.' || relname AS relname,
size / 1048576 AS actual_mb,
ratio AS bloat_pct
FROM monitor.pg_table_bloat
WHERE nspname NOT IN ('pg_catalog', 'information_schema', 'monitor')
)
-- 64 small + 16 medium + 4 large (ratio is 0-1 float, 0.4 = 40%)
(SELECT relname FROM bloat_tables WHERE actual_mb < 256 AND bloat_pct > 0.4 ORDER BY bloat_pct DESC LIMIT 64) UNION
(SELECT relname FROM bloat_tables WHERE actual_mb BETWEEN 256 AND 2048 AND bloat_pct > 0.3 ORDER BY bloat_pct DESC LIMIT 16) UNION
(SELECT relname FROM bloat_tables WHERE actual_mb BETWEEN 2048 AND 8192 AND bloat_pct > 0.2 ORDER BY bloat_pct DESC LIMIT 4) UNION
(SELECT relname FROM bloat_tables WHERE actual_mb BETWEEN 8192 AND 65536 AND bloat_pct > 0.15 ORDER BY bloat_pct DESC LIMIT 1);
EOF
}
# warn about huge tables (>64GB) that need manual handling
warn_huge_tables() {
local db=$1
local huge_tables
huge_tables=$(psql "${db}" -AXtwq <<-'EOF'
SELECT nspname || '.' || relname || ' (' || pg_size_pretty(size) || ', ' || round(ratio::numeric * 100, 1) || '% bloat)'
FROM monitor.pg_table_bloat
WHERE size > 68719476736 AND ratio > 0.1
ORDER BY size DESC LIMIT 10;
EOF
)
if [[ -n "${huge_tables}" ]]; then
log_hint "huge tables (>64GB) skipped, use explicit repack:"
echo "${huge_tables}" | while read -r line; do log_hint " ${line}"; done
fi
}
#--------------------------------------------------------------#
# Repack Indexes
#--------------------------------------------------------------#
repack_indexes() {
local db=$1
shift
if [[ $# -eq 0 ]]; then
log_info "repack ${db}: no bloat indexes found"
return 0
fi
log_info "repack ${db} indexes begin, count: $#"
for idxname in "$@"; do
repack_index "${db}" "${idxname}"
done
log_info "repack ${db} indexes done"
}
#--------------------------------------------------------------#
# Get Bloat Indexes
#--------------------------------------------------------------#
get_bloat_indexes() {
psql "$1" -AXtwq <<-'EOF'
WITH bloat_indexes AS (
SELECT nspname || '.' || relname AS idx_name,
size / 1048576 AS actual_mb,
ratio AS bloat_pct
FROM monitor.pg_index_bloat
WHERE nspname NOT IN ('pg_catalog', 'information_schema', 'monitor')
)
-- 64 small + 16 medium + 4 large + 2 xlarge (ratio is 0-1 float)
(SELECT idx_name FROM bloat_indexes WHERE actual_mb < 128 AND bloat_pct > 0.4 ORDER BY bloat_pct DESC LIMIT 64) UNION
(SELECT idx_name FROM bloat_indexes WHERE actual_mb BETWEEN 128 AND 1024 AND bloat_pct > 0.35 ORDER BY bloat_pct DESC LIMIT 16) UNION
(SELECT idx_name FROM bloat_indexes WHERE actual_mb BETWEEN 1024 AND 8192 AND bloat_pct > 0.3 ORDER BY bloat_pct DESC LIMIT 4) UNION
(SELECT idx_name FROM bloat_indexes WHERE actual_mb BETWEEN 8192 AND 65536 AND bloat_pct > 0.2 ORDER BY bloat_pct DESC LIMIT 1);
EOF
}
# warn about huge indexes (>64GB) that need manual handling
warn_huge_indexes() {
local db=$1
local huge_indexes
huge_indexes=$(psql "${db}" -AXtwq <<-'EOF'
SELECT nspname || '.' || relname || ' (' || pg_size_pretty(size) || ', ' || round(ratio::numeric * 100, 1) || '% bloat)'
FROM monitor.pg_index_bloat
WHERE size > 68719476736 AND ratio > 0.1
ORDER BY size DESC LIMIT 10;
EOF
)
if [[ -n "${huge_indexes}" ]]; then
log_hint "huge indexes (>64GB) skipped, use explicit repack:"
echo "${huge_indexes}" | while read -r line; do log_hint " ${line}"; done
fi
}
#--------------------------------------------------------------#
# Repack Database
#--------------------------------------------------------------#
repack_database() {
local db=$1
log_info "repack database ${db} begin"
# check if monitor schema exists
if ! psql "${db}" -AXtwqc "SELECT 1 FROM pg_namespace WHERE nspname = 'monitor'" 2>/dev/null | grep -q 1; then
log_warn "monitor schema not found in ${db}, skip (run pg_init_template first)"
return 0
fi
if [[ "${REPACK_INDEX_ONLY}" != "true" ]]; then
repack_tables "${db}" $(get_bloat_tables "${db}")
warn_huge_tables "${db}"
fi
if [[ "${REPACK_TABLE_ONLY}" != "true" ]]; then
repack_indexes "${db}" $(get_bloat_indexes "${db}")
warn_huge_indexes "${db}"
fi
log_info "repack database ${db} done"
}
#--------------------------------------------------------------#
# Repack All Databases
#--------------------------------------------------------------#
repack_all() {
local databases
if [[ ${#TARGET_DATABASES[@]} -gt 0 ]]; then
databases="${TARGET_DATABASES[*]}"
else
databases=$(psql -AXtwqc "SELECT datname FROM pg_database WHERE datname NOT IN ('template0','template1','postgres') AND datallowconn;")
fi
log_info "repack databases: ${databases}"
for database in ${databases}; do
repack_database "${database}"
done
}
#--------------------------------------------------------------#
# Main
#--------------------------------------------------------------#
main() {
local lock_path="/tmp/pg-repack.lock"
# pre-flight check
local role=$("${PROG_DIR}/pg-role" 2>/dev/null)
if [[ "${role}" != "primary" ]]; then
log_error "this script must run on primary (current: ${role:-unknown})"
exit 1
fi
if ! command -v pg_repack &>/dev/null; then
log_error "pg_repack command not found in PATH"
exit 2
fi
# acquire lock
if [[ -e "${lock_path}" ]] && kill -0 "$(cat "${lock_path}")" 2>/dev/null; then
log_error "pg-repack already running: pid=$(cat "${lock_path}")"
exit 3
fi
trap "rm -f ${lock_path}; exit" INT TERM EXIT
echo $$ > "${lock_path}"
log_info "pg-repack begin, pid: $$"
repack_all
log_info "pg-repack done"
}
#--------------------------------------------------------------#
# Parse Arguments
#--------------------------------------------------------------#
DRY_RUN="false"
REPACK_TABLE_ONLY="false"
REPACK_INDEX_ONLY="false"
LOCK_TIMEOUT=10
PARALLEL_JOBS=2
TARGET_DATABASES=()
usage() {
grep '^#' "$0" | grep -v '#!/bin/bash' | sed 's/^#//g' | head -40
exit 0
}
while [[ $# -gt 0 ]]; do
case "$1" in
-h|--help) usage ;;
-n|--dry-run) DRY_RUN="true"; shift ;;
-t|--table) REPACK_TABLE_ONLY="true"; shift ;;
-i|--index) REPACK_INDEX_ONLY="true"; shift ;;
-T|--timeout) LOCK_TIMEOUT="$2"; shift 2 ;;
-j|--jobs) PARALLEL_JOBS="$2"; shift 2 ;;
-*) log_error "unknown option: $1"; usage ;;
*) TARGET_DATABASES+=("$1"); shift ;;
esac
done
main