316 lines
11 KiB
Bash
Executable File
316 lines
11 KiB
Bash
Executable File
#!/bin/bash
|
|
set -uo pipefail
|
|
#==============================================================#
|
|
# File : pg-repack
|
|
# Desc : Repack bloated tables and indexes
|
|
# Ctime : 2018-05-18
|
|
# Mtime : 2025-12-30
|
|
# Path : /pg/bin/pg-repack
|
|
# Deps : pg_repack, psql, pg-role
|
|
# License : Apache-2.0 @ https://pigsty.io/docs/about/license/
|
|
# Copyright : 2018-2026 Ruohang Feng / Vonng (rh@vonng.com)
|
|
#==============================================================#
|
|
PROG_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
|
|
|
|
#--------------------------------------------------------------#
|
|
# Usage
|
|
#--------------------------------------------------------------#
|
|
# pg-repack [options] [database...]
|
|
#
|
|
# Repack bloated tables and indexes in PostgreSQL databases.
|
|
# This script must run on primary instance with dbsu (postgres).
|
|
#
|
|
# Options:
|
|
# -h, --help Show this help message
|
|
# -n, --dry-run Show what would be done without executing
|
|
# -t, --table Repack tables only
|
|
# -i, --index Repack indexes only
|
|
# -T, --timeout SEC Lock wait timeout in seconds (default: 10)
|
|
# -j, --jobs NUM Number of parallel jobs (default: 2)
|
|
#
|
|
# Examples:
|
|
# pg-repack # repack all databases
|
|
# pg-repack mydb # repack specific database
|
|
# pg-repack -n mydb # dry-run mode
|
|
# pg-repack -t mydb # repack tables only
|
|
# pg-repack -i mydb # repack indexes only
|
|
# pg-repack -T 30 -j 4 mydb # custom timeout and jobs
|
|
#--------------------------------------------------------------#
|
|
|
|
|
|
#--------------------------------------------------------------#
|
|
# Log Util
|
|
#--------------------------------------------------------------#
|
|
if [[ -t 1 ]]; then
|
|
__CN='\033[0m';__CR='\033[0;31m';__CG='\033[0;32m';__CY='\033[0;33m';__CB='\033[0;34m';__CM='\033[0;35m'
|
|
else
|
|
__CN='';__CR='';__CG='';__CY='';__CB='';__CM=''
|
|
fi
|
|
log_info() { printf "[${__CG} OK ${__CN}] ${__CG}%s${__CN}\n" "$*"; }
|
|
log_warn() { printf "[${__CY}WARN${__CN}] ${__CY}%s${__CN}\n" "$*"; }
|
|
log_error() { printf "[${__CR}FAIL${__CN}] ${__CR}%s${__CN}\n" "$*"; }
|
|
log_hint() { printf "[${__CB}HINT${__CN}] ${__CB}%s${__CN}\n" "$*"; }
|
|
|
|
|
|
#--------------------------------------------------------------#
|
|
# Utils
|
|
#--------------------------------------------------------------#
|
|
# kill vacuum/analyze queries to avoid lock contention
|
|
kill_queries() {
|
|
local db=$1
|
|
local kill_count
|
|
kill_count=$(psql "${db}" -AXtwqc "SELECT count(pg_cancel_backend(pid)) FROM pg_stat_activity WHERE state <> 'idle' AND pid <> pg_backend_pid() AND (query ~* 'vacuum' OR query ~* 'analyze');" 2>/dev/null)
|
|
if [[ -n "${kill_count}" ]] && [[ "${kill_count}" -gt 0 ]]; then
|
|
log_warn "killed ${kill_count} vacuum/analyze queries in ${db}"
|
|
fi
|
|
}
|
|
|
|
# repack single table
|
|
repack_table() {
|
|
local db=$1 relname=$2
|
|
if [[ "${DRY_RUN}" == "true" ]]; then
|
|
log_info "[dry-run] pg_repack ${db} -T ${LOCK_TIMEOUT} -j ${PARALLEL_JOBS} -t ${relname}"
|
|
else
|
|
kill_queries "${db}"
|
|
if pg_repack "${db}" -T "${LOCK_TIMEOUT}" -j "${PARALLEL_JOBS}" -t "${relname}"; then
|
|
log_info "repack table ${db}.${relname} done"
|
|
else
|
|
log_error "repack table ${db}.${relname} failed"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# repack single index
|
|
repack_index() {
|
|
local db=$1 idxname=$2
|
|
if [[ "${DRY_RUN}" == "true" ]]; then
|
|
log_info "[dry-run] pg_repack ${db} -T ${LOCK_TIMEOUT} -i ${idxname}"
|
|
else
|
|
kill_queries "${db}"
|
|
if pg_repack "${db}" -T "${LOCK_TIMEOUT}" -i "${idxname}"; then
|
|
log_info "repack index ${db}.${idxname} done"
|
|
else
|
|
log_error "repack index ${db}.${idxname} failed"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
|
|
#--------------------------------------------------------------#
|
|
# Repack Tables
|
|
#--------------------------------------------------------------#
|
|
repack_tables() {
|
|
local db=$1
|
|
shift
|
|
if [[ $# -eq 0 ]]; then
|
|
log_info "repack ${db}: no bloat tables found"
|
|
return 0
|
|
fi
|
|
log_info "repack ${db} tables begin, count: $#"
|
|
for relname in "$@"; do
|
|
repack_table "${db}" "${relname}"
|
|
done
|
|
log_info "repack ${db} tables done"
|
|
}
|
|
|
|
|
|
#--------------------------------------------------------------#
|
|
# Get Bloat Tables
|
|
#--------------------------------------------------------------#
|
|
get_bloat_tables() {
|
|
psql "$1" -AXtwq <<-'EOF'
|
|
WITH bloat_tables AS (
|
|
SELECT nspname || '.' || relname AS relname,
|
|
size / 1048576 AS actual_mb,
|
|
ratio AS bloat_pct
|
|
FROM monitor.pg_table_bloat
|
|
WHERE nspname NOT IN ('pg_catalog', 'information_schema', 'monitor')
|
|
)
|
|
-- 64 small + 16 medium + 4 large (ratio is 0-1 float, 0.4 = 40%)
|
|
(SELECT relname FROM bloat_tables WHERE actual_mb < 256 AND bloat_pct > 0.4 ORDER BY bloat_pct DESC LIMIT 64) UNION
|
|
(SELECT relname FROM bloat_tables WHERE actual_mb BETWEEN 256 AND 2048 AND bloat_pct > 0.3 ORDER BY bloat_pct DESC LIMIT 16) UNION
|
|
(SELECT relname FROM bloat_tables WHERE actual_mb BETWEEN 2048 AND 8192 AND bloat_pct > 0.2 ORDER BY bloat_pct DESC LIMIT 4) UNION
|
|
(SELECT relname FROM bloat_tables WHERE actual_mb BETWEEN 8192 AND 65536 AND bloat_pct > 0.15 ORDER BY bloat_pct DESC LIMIT 1);
|
|
EOF
|
|
}
|
|
|
|
# warn about huge tables (>64GB) that need manual handling
|
|
warn_huge_tables() {
|
|
local db=$1
|
|
local huge_tables
|
|
huge_tables=$(psql "${db}" -AXtwq <<-'EOF'
|
|
SELECT nspname || '.' || relname || ' (' || pg_size_pretty(size) || ', ' || round(ratio::numeric * 100, 1) || '% bloat)'
|
|
FROM monitor.pg_table_bloat
|
|
WHERE size > 68719476736 AND ratio > 0.1
|
|
ORDER BY size DESC LIMIT 10;
|
|
EOF
|
|
)
|
|
if [[ -n "${huge_tables}" ]]; then
|
|
log_hint "huge tables (>64GB) skipped, use explicit repack:"
|
|
echo "${huge_tables}" | while read -r line; do log_hint " ${line}"; done
|
|
fi
|
|
}
|
|
|
|
|
|
#--------------------------------------------------------------#
|
|
# Repack Indexes
|
|
#--------------------------------------------------------------#
|
|
repack_indexes() {
|
|
local db=$1
|
|
shift
|
|
if [[ $# -eq 0 ]]; then
|
|
log_info "repack ${db}: no bloat indexes found"
|
|
return 0
|
|
fi
|
|
log_info "repack ${db} indexes begin, count: $#"
|
|
for idxname in "$@"; do
|
|
repack_index "${db}" "${idxname}"
|
|
done
|
|
log_info "repack ${db} indexes done"
|
|
}
|
|
|
|
|
|
#--------------------------------------------------------------#
|
|
# Get Bloat Indexes
|
|
#--------------------------------------------------------------#
|
|
get_bloat_indexes() {
|
|
psql "$1" -AXtwq <<-'EOF'
|
|
WITH bloat_indexes AS (
|
|
SELECT nspname || '.' || relname AS idx_name,
|
|
size / 1048576 AS actual_mb,
|
|
ratio AS bloat_pct
|
|
FROM monitor.pg_index_bloat
|
|
WHERE nspname NOT IN ('pg_catalog', 'information_schema', 'monitor')
|
|
)
|
|
-- 64 small + 16 medium + 4 large + 2 xlarge (ratio is 0-1 float)
|
|
(SELECT idx_name FROM bloat_indexes WHERE actual_mb < 128 AND bloat_pct > 0.4 ORDER BY bloat_pct DESC LIMIT 64) UNION
|
|
(SELECT idx_name FROM bloat_indexes WHERE actual_mb BETWEEN 128 AND 1024 AND bloat_pct > 0.35 ORDER BY bloat_pct DESC LIMIT 16) UNION
|
|
(SELECT idx_name FROM bloat_indexes WHERE actual_mb BETWEEN 1024 AND 8192 AND bloat_pct > 0.3 ORDER BY bloat_pct DESC LIMIT 4) UNION
|
|
(SELECT idx_name FROM bloat_indexes WHERE actual_mb BETWEEN 8192 AND 65536 AND bloat_pct > 0.2 ORDER BY bloat_pct DESC LIMIT 1);
|
|
EOF
|
|
}
|
|
|
|
# warn about huge indexes (>64GB) that need manual handling
|
|
warn_huge_indexes() {
|
|
local db=$1
|
|
local huge_indexes
|
|
huge_indexes=$(psql "${db}" -AXtwq <<-'EOF'
|
|
SELECT nspname || '.' || relname || ' (' || pg_size_pretty(size) || ', ' || round(ratio::numeric * 100, 1) || '% bloat)'
|
|
FROM monitor.pg_index_bloat
|
|
WHERE size > 68719476736 AND ratio > 0.1
|
|
ORDER BY size DESC LIMIT 10;
|
|
EOF
|
|
)
|
|
if [[ -n "${huge_indexes}" ]]; then
|
|
log_hint "huge indexes (>64GB) skipped, use explicit repack:"
|
|
echo "${huge_indexes}" | while read -r line; do log_hint " ${line}"; done
|
|
fi
|
|
}
|
|
|
|
|
|
#--------------------------------------------------------------#
|
|
# Repack Database
|
|
#--------------------------------------------------------------#
|
|
repack_database() {
|
|
local db=$1
|
|
log_info "repack database ${db} begin"
|
|
|
|
# check if monitor schema exists
|
|
if ! psql "${db}" -AXtwqc "SELECT 1 FROM pg_namespace WHERE nspname = 'monitor'" 2>/dev/null | grep -q 1; then
|
|
log_warn "monitor schema not found in ${db}, skip (run pg_init_template first)"
|
|
return 0
|
|
fi
|
|
|
|
if [[ "${REPACK_INDEX_ONLY}" != "true" ]]; then
|
|
repack_tables "${db}" $(get_bloat_tables "${db}")
|
|
warn_huge_tables "${db}"
|
|
fi
|
|
if [[ "${REPACK_TABLE_ONLY}" != "true" ]]; then
|
|
repack_indexes "${db}" $(get_bloat_indexes "${db}")
|
|
warn_huge_indexes "${db}"
|
|
fi
|
|
|
|
log_info "repack database ${db} done"
|
|
}
|
|
|
|
|
|
#--------------------------------------------------------------#
|
|
# Repack All Databases
|
|
#--------------------------------------------------------------#
|
|
repack_all() {
|
|
local databases
|
|
if [[ ${#TARGET_DATABASES[@]} -gt 0 ]]; then
|
|
databases="${TARGET_DATABASES[*]}"
|
|
else
|
|
databases=$(psql -AXtwqc "SELECT datname FROM pg_database WHERE datname NOT IN ('template0','template1','postgres') AND datallowconn;")
|
|
fi
|
|
|
|
log_info "repack databases: ${databases}"
|
|
for database in ${databases}; do
|
|
repack_database "${database}"
|
|
done
|
|
}
|
|
|
|
|
|
#--------------------------------------------------------------#
|
|
# Main
|
|
#--------------------------------------------------------------#
|
|
main() {
|
|
local lock_path="/tmp/pg-repack.lock"
|
|
|
|
# pre-flight check
|
|
local role=$("${PROG_DIR}/pg-role" 2>/dev/null)
|
|
if [[ "${role}" != "primary" ]]; then
|
|
log_error "this script must run on primary (current: ${role:-unknown})"
|
|
exit 1
|
|
fi
|
|
if ! command -v pg_repack &>/dev/null; then
|
|
log_error "pg_repack command not found in PATH"
|
|
exit 2
|
|
fi
|
|
|
|
# acquire lock
|
|
if [[ -e "${lock_path}" ]] && kill -0 "$(cat "${lock_path}")" 2>/dev/null; then
|
|
log_error "pg-repack already running: pid=$(cat "${lock_path}")"
|
|
exit 3
|
|
fi
|
|
trap "rm -f ${lock_path}; exit" INT TERM EXIT
|
|
echo $$ > "${lock_path}"
|
|
|
|
log_info "pg-repack begin, pid: $$"
|
|
repack_all
|
|
log_info "pg-repack done"
|
|
}
|
|
|
|
|
|
#--------------------------------------------------------------#
|
|
# Parse Arguments
|
|
#--------------------------------------------------------------#
|
|
DRY_RUN="false"
|
|
REPACK_TABLE_ONLY="false"
|
|
REPACK_INDEX_ONLY="false"
|
|
LOCK_TIMEOUT=10
|
|
PARALLEL_JOBS=2
|
|
TARGET_DATABASES=()
|
|
|
|
usage() {
|
|
grep '^#' "$0" | grep -v '#!/bin/bash' | sed 's/^#//g' | head -40
|
|
exit 0
|
|
}
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
-h|--help) usage ;;
|
|
-n|--dry-run) DRY_RUN="true"; shift ;;
|
|
-t|--table) REPACK_TABLE_ONLY="true"; shift ;;
|
|
-i|--index) REPACK_INDEX_ONLY="true"; shift ;;
|
|
-T|--timeout) LOCK_TIMEOUT="$2"; shift 2 ;;
|
|
-j|--jobs) PARALLEL_JOBS="$2"; shift 2 ;;
|
|
-*) log_error "unknown option: $1"; usage ;;
|
|
*) TARGET_DATABASES+=("$1"); shift ;;
|
|
esac
|
|
done
|
|
|
|
main
|