Files
securityonion/salt/postgres/tools/sbin/so-postgres-backup
T
Mike Reeves 244a73b7a2 Make so-postgres-backup fail-safe against silent corruption
The dump pipeline returned gzip's exit status, so a pg_dumpall that
died mid-stream still produced a valid .gz holding a truncated dump,
written straight to the final filename. The idempotency check then
blocked retries for the day and the corrupt file counted toward
retention, evicting a good backup each day until none remained.

- set -o pipefail so a failed pg_dumpall fails the pipeline
- dump to a .tmp file and atomically rename only after success, so
  the final filename appears only for a complete backup
- gzip -t integrity check before publishing
- trap-based cleanup of the temp file; sweep stale temps at startup
- run retention only after a successful backup, with a glob
  restricted to finished backups
- log timestamped OK/ERROR outcomes to /opt/so/log/postgres/backup.log
2026-05-15 08:48:54 -04:00

74 lines
2.4 KiB
Bash

#!/bin/bash
#
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
. /usr/sbin/so-common
# Without pipefail, a pipeline's exit status is gzip's. A failed pg_dumpall would
# otherwise be masked by a successful gzip, silently producing a valid .gz that
# holds a truncated dump.
set -o pipefail
# Backups contain role password hashes and full chat data; keep them 0600.
umask 0077
TODAY=$(date '+%Y_%m_%d')
BACKUPDIR=/nsm/backup
BACKUPFILE="$BACKUPDIR/so-postgres-backup-$TODAY.sql.gz"
TMPFILE="$BACKUPFILE.tmp"
MAXBACKUPS=7
LOGFILE=/opt/so/log/postgres/backup.log
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') $*" >> "$LOGFILE"
}
mkdir -p "$BACKUPDIR"
# Remove any temp files left behind by a previously crashed run
rm -f "$BACKUPDIR"/so-postgres-backup-*.sql.gz.tmp
# Skip if already backed up today
if [ -f "$BACKUPFILE" ]; then
exit 0
fi
# Skip if container isn't running
if ! docker ps --format '{{.Names}}' | grep -q '^so-postgres$'; then
exit 0
fi
# Always clean up the temp file on exit; the success path clears this trap
# after the atomic rename so the finished backup is not deleted.
trap 'rm -f "$TMPFILE"' EXIT
# Dump all databases and roles, compress. Write to a temp file so the final
# filename only ever appears for a complete, verified backup.
if ! docker exec so-postgres pg_dumpall -U postgres | gzip > "$TMPFILE"; then
log "ERROR: pg_dumpall/gzip failed; backup aborted"
exit 1
fi
# Verify the compressed stream is intact before publishing it
if ! gzip -t "$TMPFILE"; then
log "ERROR: backup failed gzip integrity check; backup aborted"
exit 1
fi
# Atomically publish the verified backup
mv "$TMPFILE" "$BACKUPFILE"
trap - EXIT
log "OK: wrote $BACKUPFILE"
# Retention cleanup (only reached after a successful backup). The glob is
# restricted to finished backups so an in-progress .tmp can never be counted.
NUMBACKUPS=$(find "$BACKUPDIR" -type f -name "so-postgres-backup-*.sql.gz" | wc -l)
while [ "$NUMBACKUPS" -gt "$MAXBACKUPS" ]; do
OLDEST=$(find "$BACKUPDIR" -type f -name "so-postgres-backup-*.sql.gz" -printf '%T+ %p\n' | sort | head -n 1 | awk -F" " '{print $2}')
rm -f "$OLDEST"
NUMBACKUPS=$(find "$BACKUPDIR" -type f -name "so-postgres-backup-*.sql.gz" | wc -l)
done