Files
securityonion/salt/postgres/tools/sbin/so-stats-show
T
Mike Reeves 8225d41661 Harden postgres secrets, TLS enforcement, and admin tooling
- Deliver postgres super and app passwords via mounted 0600 secret files
  (POSTGRES_PASSWORD_FILE, SO_POSTGRES_PASS_FILE) instead of plaintext env
  vars visible in docker inspect output
- Mount a managed pg_hba.conf that only allows local trust and hostssl
  scram-sha-256 so TCP clients cannot negotiate cleartext sessions
- Restrict postgres.key to 0400 and ensure owner/group 939
- Set umask 0077 on so-postgres-backup output
- Validate host values in so-stats-show against [A-Za-z0-9._-] before SQL
  interpolation so a compromised minion cannot inject SQL via a tag value
- Coerce postgres:telegraf:retention_days to int before rendering into SQL
- Escape single quotes when rendering pillar values into postgresql.conf
- Own postgres tooling in /usr/sbin as root:root so a container escape
  cannot rewrite admin scripts
- Gate ES migration TLS verification on esVerifyCert (default false,
  matching the elastic module's existing pattern)
2026-04-20 12:36:17 -04:00

158 lines
5.1 KiB
Bash

#!/bin/bash
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
# Point-in-time host metrics from the Telegraf Postgres backend.
# Sanity-check tool for verifying metrics are landing before the grid
# dashboards consume them.
#
# Assumes Telegraf's postgresql output is configured with
# tags_as_foreign_keys = true, tags_as_jsonb = true, fields_as_jsonb = true,
# so metric tables are (time, tag_id, fields jsonb) and tag tables are
# (tag_id, tags jsonb).
. /usr/sbin/so-common
usage() {
cat <<EOF
Usage: $0 [host]
Shows the most recent CPU, memory, disk, and load metrics for each host
from the so_telegraf Postgres database. Without an argument, reports on
every host that has data. With a host, limits output to that one.
Requires: sudo, so-postgres running, global.telegraf_output set to
POSTGRES or BOTH.
EOF
exit 1
}
if [ "$(id -u)" -ne 0 ]; then
echo "This script must be run using sudo!"
exit 1
fi
case "${1:-}" in
-h|--help) usage ;;
esac
FILTER_HOST="${1:-}"
SCHEMA="telegraf"
# Host values are interpolated into SQL below. Hostnames are [A-Za-z0-9._-];
# any other character in a tag value or CLI arg is rejected to prevent a
# stored-tag (or CLI) → SQL injection via a compromised Telegraf writer.
HOST_RE='^[A-Za-z0-9._-]+$'
if [ -n "$FILTER_HOST" ] && ! [[ "$FILTER_HOST" =~ $HOST_RE ]]; then
echo "Invalid host filter: $FILTER_HOST" >&2
exit 1
fi
so_psql() {
docker exec so-postgres psql -U postgres -d so_telegraf -At -F $'\t' "$@"
}
if ! docker exec so-postgres psql -U postgres -lqt 2>/dev/null | cut -d\| -f1 | grep -qw so_telegraf; then
echo "Database so_telegraf not found. Is global.telegraf_output set to POSTGRES or BOTH?"
exit 2
fi
table_exists() {
local table="$1"
[ -n "$(so_psql -c "SELECT 1 FROM information_schema.tables WHERE table_schema='${SCHEMA}' AND table_name='${table}' LIMIT 1;")" ]
}
# Discover hosts from cpu_tag (every minion reports cpu).
if ! table_exists "cpu_tag"; then
echo "${SCHEMA}.cpu_tag not found. Has Telegraf written any rows yet?"
exit 0
fi
HOSTS=$(so_psql -c "
SELECT DISTINCT tags->>'host'
FROM \"${SCHEMA}\".cpu_tag
WHERE tags ? 'host'
ORDER BY 1;")
if [ -z "$HOSTS" ]; then
echo "No hosts found in ${SCHEMA}. Is Telegraf configured to write to Postgres?"
exit 0
fi
print_metric() {
so_psql -c "$1"
}
for host in $HOSTS; do
if ! [[ "$host" =~ $HOST_RE ]]; then
echo "Skipping host with invalid characters in tag value: $host" >&2
continue
fi
if [ -n "$FILTER_HOST" ] && [ "$host" != "$FILTER_HOST" ]; then
continue
fi
echo "===================================================================="
echo " Host: $host"
echo "===================================================================="
if table_exists "cpu"; then
print_metric "
SELECT 'cpu ' AS metric,
to_char(c.time, 'YYYY-MM-DD HH24:MI:SS') AS ts,
round((100 - (c.fields->>'usage_idle')::numeric), 1) || '% used'
FROM \"${SCHEMA}\".cpu c
JOIN \"${SCHEMA}\".cpu_tag t USING (tag_id)
WHERE t.tags->>'host' = '${host}' AND t.tags->>'cpu' = 'cpu-total'
ORDER BY c.time DESC LIMIT 1;"
fi
if table_exists "mem"; then
print_metric "
SELECT 'memory ' AS metric,
to_char(m.time, 'YYYY-MM-DD HH24:MI:SS') AS ts,
round((m.fields->>'used_percent')::numeric, 1) || '% used (' ||
pg_size_pretty((m.fields->>'used')::bigint) || ' of ' ||
pg_size_pretty((m.fields->>'total')::bigint) || ')'
FROM \"${SCHEMA}\".mem m
JOIN \"${SCHEMA}\".mem_tag t USING (tag_id)
WHERE t.tags->>'host' = '${host}'
ORDER BY m.time DESC LIMIT 1;"
fi
if table_exists "disk"; then
print_metric "
SELECT 'disk ' || rpad(t.tags->>'path', 12) AS metric,
to_char(d.time, 'YYYY-MM-DD HH24:MI:SS') AS ts,
round((d.fields->>'used_percent')::numeric, 1) || '% used (' ||
pg_size_pretty((d.fields->>'used')::bigint) || ' of ' ||
pg_size_pretty((d.fields->>'total')::bigint) || ')'
FROM \"${SCHEMA}\".disk d
JOIN \"${SCHEMA}\".disk_tag t USING (tag_id)
WHERE t.tags->>'host' = '${host}'
AND d.time = (SELECT max(d2.time)
FROM \"${SCHEMA}\".disk d2
JOIN \"${SCHEMA}\".disk_tag t2 USING (tag_id)
WHERE t2.tags->>'host' = '${host}')
ORDER BY t.tags->>'path';"
fi
if table_exists "system"; then
print_metric "
SELECT 'load ' AS metric,
to_char(s.time, 'YYYY-MM-DD HH24:MI:SS') AS ts,
(s.fields->>'load1') || ' / ' ||
(s.fields->>'load5') || ' / ' ||
(s.fields->>'load15') || ' (1/5/15m)'
FROM \"${SCHEMA}\".system s
JOIN \"${SCHEMA}\".system_tag t USING (tag_id)
WHERE t.tags->>'host' = '${host}'
ORDER BY s.time DESC LIMIT 1;"
fi
echo ""
done