mirror of
https://github.com/Security-Onion-Solutions/securityonion.git
synced 2026-05-09 21:02:36 +02:00
470b3bd4da
Per-minion schemas cause table count to explode (N minions * M metrics) and the per-minion revocation story isn't worth it when retention is short. Move all minions to a shared 'telegraf' schema while keeping per-minion login credentials for audit. - New so_telegraf NOLOGIN group role owns the telegraf schema; each per-minion role is a member and inherits insert/select via role inheritance - Telegraf connection string uses options='-c role=so_telegraf' so tables auto-created on first write belong to the group role - so-telegraf-trim walks the flat telegraf.* table set instead of per-minion schemas - so-stats-show filters by host tag; CLI arg is now the hostname as tagged by Telegraf rather than a sanitized schema suffix - Also renames so-show-stats -> so-stats-show
104 lines
3.1 KiB
Bash
104 lines
3.1 KiB
Bash
#!/bin/bash
|
|
|
|
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
|
|
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
|
|
# https://securityonion.net/license; you may not use this file except in compliance with the
|
|
# Elastic License 2.0.
|
|
|
|
# Deletes Telegraf metric rows older than the configured retention window from
|
|
# every minion schema in the so_telegraf database. Intended to run daily from
|
|
# cron. Retention comes from pillar (postgres.telegraf.retention_days),
|
|
# defaulting to 14 days. An explicit --days argument overrides the pillar.
|
|
|
|
. /usr/sbin/so-common
|
|
|
|
usage() {
|
|
cat <<EOF
|
|
Usage: $0 [--days N] [--dry-run]
|
|
|
|
--days N Override retention in days (default: pillar
|
|
postgres.telegraf.retention_days, fallback 14)
|
|
--dry-run Report what would be deleted without modifying anything
|
|
EOF
|
|
exit 1
|
|
}
|
|
|
|
if [ "$(id -u)" -ne 0 ]; then
|
|
echo "This script must be run using sudo!"
|
|
exit 1
|
|
fi
|
|
|
|
DAYS=""
|
|
DRY_RUN=0
|
|
while [ $# -gt 0 ]; do
|
|
case "$1" in
|
|
--days) DAYS="$2"; shift 2 ;;
|
|
--dry-run) DRY_RUN=1; shift ;;
|
|
-h|--help) usage ;;
|
|
*) usage ;;
|
|
esac
|
|
done
|
|
|
|
if [ -z "$DAYS" ]; then
|
|
DAYS=$(salt-call --local --out=newline_values_only pillar.get postgres:telegraf:retention_days 2>/dev/null)
|
|
fi
|
|
if ! [[ "$DAYS" =~ ^[0-9]+$ ]] || [ "$DAYS" -lt 1 ]; then
|
|
DAYS=14
|
|
fi
|
|
|
|
log() {
|
|
echo "$(date '+%Y-%m-%d %H:%M:%S') so-telegraf-trim: $*"
|
|
}
|
|
|
|
so_psql() {
|
|
docker exec so-postgres psql -U postgres -d so_telegraf -At -F $'\t' "$@"
|
|
}
|
|
|
|
if ! docker exec so-postgres psql -U postgres -lqt 2>/dev/null | cut -d\| -f1 | grep -qw so_telegraf; then
|
|
log "Database so_telegraf not present; nothing to trim."
|
|
exit 0
|
|
fi
|
|
|
|
log "Trimming rows older than ${DAYS} days (dry_run=${DRY_RUN})."
|
|
|
|
TOTAL_DELETED=0
|
|
|
|
# Every metric table in the shared telegraf schema has a 'time' column.
|
|
# Tag tables (<metric>_tag) don't, so filtering on the column presence is
|
|
# enough to scope the trim to metric tables only.
|
|
ROWS=$(so_psql -c "
|
|
SELECT table_schema || '.' || table_name
|
|
FROM information_schema.columns
|
|
WHERE column_name = 'time'
|
|
AND data_type IN ('timestamp with time zone', 'timestamp without time zone')
|
|
AND table_schema = 'telegraf'
|
|
ORDER BY 1;")
|
|
|
|
if [ -z "$ROWS" ]; then
|
|
log "No telegraf metric tables found."
|
|
exit 0
|
|
fi
|
|
|
|
for qualified in $ROWS; do
|
|
if [ "$DRY_RUN" -eq 1 ]; then
|
|
count=$(so_psql -c "SELECT count(*) FROM \"${qualified%.*}\".\"${qualified#*.}\" WHERE time < now() - interval '${DAYS} days';")
|
|
log "would delete ${count:-0} rows from ${qualified}"
|
|
else
|
|
# RETURNING count via a CTE so we can log how much was trimmed per table
|
|
deleted=$(so_psql -c "
|
|
WITH d AS (
|
|
DELETE FROM \"${qualified%.*}\".\"${qualified#*.}\"
|
|
WHERE time < now() - interval '${DAYS} days'
|
|
RETURNING 1
|
|
)
|
|
SELECT count(*) FROM d;")
|
|
deleted=${deleted:-0}
|
|
TOTAL_DELETED=$((TOTAL_DELETED + deleted))
|
|
[ "$deleted" -gt 0 ] && log "deleted ${deleted} rows from ${qualified}"
|
|
fi
|
|
done
|
|
|
|
if [ "$DRY_RUN" -eq 0 ]; then
|
|
log "Trim complete. Total rows deleted: ${TOTAL_DELETED}."
|
|
fi
|