From f3181b204a1c02a42ebc76e74d0db9fb6ed75f8c Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 19:06:16 -0400 Subject: [PATCH] Remove so-telegraf-trim and update retention description pg_partman drops old partitions hourly; row-DELETE retention is obsolete and a confusing emergency fallback on partitioned tables. --- salt/postgres/soc_postgres.yaml | 2 +- salt/postgres/tools/sbin/so-telegraf-trim | 103 ---------------------- 2 files changed, 1 insertion(+), 104 deletions(-) delete mode 100644 salt/postgres/tools/sbin/so-telegraf-trim diff --git a/salt/postgres/soc_postgres.yaml b/salt/postgres/soc_postgres.yaml index 167772e3f..8b4e22921 100644 --- a/salt/postgres/soc_postgres.yaml +++ b/salt/postgres/soc_postgres.yaml @@ -1,7 +1,7 @@ postgres: telegraf: retention_days: - description: Number of days of Telegraf metrics to keep in the so_telegraf database. Older rows are deleted nightly by so-telegraf-trim. + description: Number of days of Telegraf metrics to keep in the so_telegraf database. Older partitions are dropped hourly by pg_partman. forcedType: int advanced: True helpLink: influxdb diff --git a/salt/postgres/tools/sbin/so-telegraf-trim b/salt/postgres/tools/sbin/so-telegraf-trim deleted file mode 100644 index 664469d0c..000000000 --- a/salt/postgres/tools/sbin/so-telegraf-trim +++ /dev/null @@ -1,103 +0,0 @@ -#!/bin/bash - -# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one -# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at -# https://securityonion.net/license; you may not use this file except in compliance with the -# Elastic License 2.0. - -# Deletes Telegraf metric rows older than the configured retention window from -# every minion schema in the so_telegraf database. Intended to run daily from -# cron. Retention comes from pillar (postgres.telegraf.retention_days), -# defaulting to 14 days. An explicit --days argument overrides the pillar. - -. /usr/sbin/so-common - -usage() { - cat </dev/null) -fi -if ! [[ "$DAYS" =~ ^[0-9]+$ ]] || [ "$DAYS" -lt 1 ]; then - DAYS=14 -fi - -log() { - echo "$(date '+%Y-%m-%d %H:%M:%S') so-telegraf-trim: $*" -} - -so_psql() { - docker exec so-postgres psql -U postgres -d so_telegraf -At -F $'\t' "$@" -} - -if ! docker exec so-postgres psql -U postgres -lqt 2>/dev/null | cut -d\| -f1 | grep -qw so_telegraf; then - log "Database so_telegraf not present; nothing to trim." - exit 0 -fi - -log "Trimming rows older than ${DAYS} days (dry_run=${DRY_RUN})." - -TOTAL_DELETED=0 - -# Every metric table in the shared telegraf schema has a 'time' column. -# Tag tables (_tag) don't, so filtering on the column presence is -# enough to scope the trim to metric tables only. -ROWS=$(so_psql -c " - SELECT table_schema || '.' || table_name - FROM information_schema.columns - WHERE column_name = 'time' - AND data_type IN ('timestamp with time zone', 'timestamp without time zone') - AND table_schema = 'telegraf' - ORDER BY 1;") - -if [ -z "$ROWS" ]; then - log "No telegraf metric tables found." - exit 0 -fi - -for qualified in $ROWS; do - if [ "$DRY_RUN" -eq 1 ]; then - count=$(so_psql -c "SELECT count(*) FROM \"${qualified%.*}\".\"${qualified#*.}\" WHERE time < now() - interval '${DAYS} days';") - log "would delete ${count:-0} rows from ${qualified}" - else - # RETURNING count via a CTE so we can log how much was trimmed per table - deleted=$(so_psql -c " - WITH d AS ( - DELETE FROM \"${qualified%.*}\".\"${qualified#*.}\" - WHERE time < now() - interval '${DAYS} days' - RETURNING 1 - ) - SELECT count(*) FROM d;") - deleted=${deleted:-0} - TOTAL_DELETED=$((TOTAL_DELETED + deleted)) - [ "$deleted" -gt 0 ] && log "deleted ${deleted} rows from ${qualified}" - fi -done - -if [ "$DRY_RUN" -eq 0 ]; then - log "Trim complete. Total rows deleted: ${TOTAL_DELETED}." -fi