diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index d510af9e5..7d62ee7f0 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -10,6 +10,28 @@ {% set TG_OUT = (GLOBALS.telegraf_output | default('INFLUXDB')) | upper %} {% if TG_OUT in ['POSTGRES', 'BOTH'] %} +# Provision the shared group role and schema once. Every per-minion role is a +# member of so_telegraf, and each Telegraf connection does SET ROLE so_telegraf +# (via options='-c role=so_telegraf' in the connection string) so tables created +# on first write are owned by the group role and every member can INSERT/SELECT. +postgres_telegraf_group_role: + cmd.run: + - name: | + docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf <<'EOSQL' + DO $$ + BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'so_telegraf') THEN + CREATE ROLE so_telegraf NOLOGIN; + END IF; + END + $$; + GRANT CONNECT ON DATABASE so_telegraf TO so_telegraf; + CREATE SCHEMA IF NOT EXISTS telegraf AUTHORIZATION so_telegraf; + GRANT USAGE, CREATE ON SCHEMA telegraf TO so_telegraf; + EOSQL + - require: + - docker_container: so-postgres + {% set users = salt['pillar.get']('postgres:auth:users', {}) %} {% for key, entry in users.items() %} {% if key.startswith('telegraf_') and entry.get('user') and entry.get('pass') %} @@ -30,10 +52,10 @@ postgres_telegraf_role_{{ u }}: END $$; GRANT CONNECT ON DATABASE so_telegraf TO "{{ u }}"; - CREATE SCHEMA IF NOT EXISTS "{{ u }}" AUTHORIZATION "{{ u }}"; + GRANT so_telegraf TO "{{ u }}"; EOSQL - require: - - docker_container: so-postgres + - cmd: postgres_telegraf_group_role {% endif %} {% endfor %} diff --git a/salt/postgres/tools/sbin/so-show-stats b/salt/postgres/tools/sbin/so-show-stats deleted file mode 100644 index 68fd52d00..000000000 --- a/salt/postgres/tools/sbin/so-show-stats +++ /dev/null @@ -1,145 +0,0 @@ -#!/bin/bash - -# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one -# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at -# https://securityonion.net/license; you may not use this file except in compliance with the -# Elastic License 2.0. - -# Point-in-time host metrics from the Telegraf Postgres backend. -# Sanity-check tool for verifying metrics are landing before the grid -# dashboards consume them. - -. /usr/sbin/so-common - -usage() { - cat </dev/null | cut -d\| -f1 | grep -qw so_telegraf; then - echo "Database so_telegraf not found. Is global.telegraf_output set to POSTGRES or BOTH?" - exit 2 -fi - -# List telegraf schemas (role-per-minion naming convention: so_telegraf_) -SCHEMAS=$(so_psql -c "SELECT schema_name FROM information_schema.schemata WHERE schema_name LIKE 'so_telegraf_%' ORDER BY schema_name;") - -if [ -z "$SCHEMAS" ]; then - echo "No minion schemas found in so_telegraf." - exit 0 -fi - -print_metric() { - local schema="$1" table="$2" query="$3" - # Confirm table exists in this schema before querying - local exists - exists=$(so_psql -c "SELECT 1 FROM information_schema.tables WHERE table_schema='${schema}' AND table_name='${table}' LIMIT 1;") - [ -z "$exists" ] && return 0 - so_psql -c "$query" -} - -# Telegraf's postgresql output stores tag values either as individual columns -# on the _tag table or as a single JSONB "tags" column, depending on -# plugin version. Returns a SQL expression that extracts the named tag -# regardless of layout. Empty string if the tag table doesn't exist. -tag_expr() { - local schema="$1" table="$2" tag="$3" alias="$4" - local has_col - has_col=$(so_psql -c " - SELECT 1 FROM information_schema.columns - WHERE table_schema='${schema}' AND table_name='${table}_tag' AND column_name='${tag}' - LIMIT 1;") - if [ -n "$has_col" ]; then - echo "${alias}.${tag}" - return - fi - local has_tags - has_tags=$(so_psql -c " - SELECT 1 FROM information_schema.columns - WHERE table_schema='${schema}' AND table_name='${table}_tag' AND column_name='tags' - LIMIT 1;") - if [ -n "$has_tags" ]; then - echo "(${alias}.tags->>'${tag}')" - return - fi - echo "" -} - -for schema in $SCHEMAS; do - minion="${schema#so_telegraf_}" - if [ -n "$FILTER_MINION" ]; then - # Compare against the sanitized form used in schema names - want=$(echo "$FILTER_MINION" | tr '.-' '_' | tr '[:upper:]' '[:lower:]') - [ "$minion" != "$want" ] && continue - fi - - echo "====================================================================" - echo " Minion: $minion" - echo "====================================================================" - - cpu_tag=$(tag_expr "$schema" "cpu" "cpu" "t") - if [ -n "$cpu_tag" ]; then - print_metric "$schema" "cpu" " - SELECT 'cpu ' AS metric, - to_char(c.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, - round((100 - c.usage_idle)::numeric, 1) || '% used' - FROM \"${schema}\".cpu c - JOIN \"${schema}\".cpu_tag t USING (tag_id) - WHERE ${cpu_tag} = 'cpu-total' - ORDER BY c.time DESC LIMIT 1;" - fi - - print_metric "$schema" "mem" " - SELECT 'memory ' AS metric, - to_char(m.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, - round(m.used_percent::numeric, 1) || '% used (' || - pg_size_pretty(m.used) || ' of ' || pg_size_pretty(m.total) || ')' - FROM \"${schema}\".mem m - ORDER BY m.time DESC LIMIT 1;" - - disk_path=$(tag_expr "$schema" "disk" "path" "t") - if [ -n "$disk_path" ]; then - print_metric "$schema" "disk" " - SELECT 'disk ' || rpad(${disk_path}, 12) AS metric, - to_char(d.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, - round(d.used_percent::numeric, 1) || '% used (' || - pg_size_pretty(d.used) || ' of ' || pg_size_pretty(d.total) || ')' - FROM \"${schema}\".disk d - JOIN \"${schema}\".disk_tag t USING (tag_id) - WHERE d.time = (SELECT max(time) FROM \"${schema}\".disk) - ORDER BY ${disk_path};" - fi - - print_metric "$schema" "system" " - SELECT 'load ' AS metric, - to_char(s.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, - s.load1 || ' / ' || s.load5 || ' / ' || s.load15 || ' (1/5/15m)' - FROM \"${schema}\".system s - ORDER BY s.time DESC LIMIT 1;" - - echo "" -done diff --git a/salt/postgres/tools/sbin/so-stats-show b/salt/postgres/tools/sbin/so-stats-show new file mode 100644 index 000000000..fd8dff39f --- /dev/null +++ b/salt/postgres/tools/sbin/so-stats-show @@ -0,0 +1,170 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Point-in-time host metrics from the Telegraf Postgres backend. +# Sanity-check tool for verifying metrics are landing before the grid +# dashboards consume them. + +. /usr/sbin/so-common + +usage() { + cat </dev/null | cut -d\| -f1 | grep -qw so_telegraf; then + echo "Database so_telegraf not found. Is global.telegraf_output set to POSTGRES or BOTH?" + exit 2 +fi + +# Telegraf's postgresql output stores tag values either as individual columns +# on the _tag table or as a single JSONB "tags" column, depending on +# plugin version. Returns a SQL expression that extracts the named tag +# regardless of layout. Empty string if the tag table doesn't exist. +tag_expr() { + local table="$1" tag="$2" alias="$3" + local has_col + has_col=$(so_psql -c " + SELECT 1 FROM information_schema.columns + WHERE table_schema='${SCHEMA}' AND table_name='${table}_tag' AND column_name='${tag}' + LIMIT 1;") + if [ -n "$has_col" ]; then + echo "${alias}.${tag}" + return + fi + local has_tags + has_tags=$(so_psql -c " + SELECT 1 FROM information_schema.columns + WHERE table_schema='${SCHEMA}' AND table_name='${table}_tag' AND column_name='tags' + LIMIT 1;") + if [ -n "$has_tags" ]; then + echo "(${alias}.tags->>'${tag}')" + return + fi + echo "" +} + +table_exists() { + local table="$1" + [ -n "$(so_psql -c "SELECT 1 FROM information_schema.tables WHERE table_schema='${SCHEMA}' AND table_name='${table}' LIMIT 1;")" ] +} + +# Discover hosts from cpu_tag (every minion reports cpu). +host_expr=$(tag_expr "cpu" "host" "t") +if [ -z "$host_expr" ]; then + echo "Unable to determine host tag column on ${SCHEMA}.cpu_tag. Has Telegraf written any rows yet?" + exit 0 +fi + +HOSTS=$(so_psql -c " + SELECT DISTINCT ${host_expr} + FROM \"${SCHEMA}\".cpu_tag t + WHERE ${host_expr} IS NOT NULL + ORDER BY 1;") + +if [ -z "$HOSTS" ]; then + echo "No hosts found in ${SCHEMA}. Is Telegraf configured to write to Postgres?" + exit 0 +fi + +print_metric() { + local query="$1" + so_psql -c "$query" +} + +for host in $HOSTS; do + if [ -n "$FILTER_HOST" ] && [ "$host" != "$FILTER_HOST" ]; then + continue + fi + + echo "====================================================================" + echo " Host: $host" + echo "====================================================================" + + cpu_host=$(tag_expr "cpu" "host" "t") + cpu_tag=$(tag_expr "cpu" "cpu" "t") + if [ -n "$cpu_host" ] && [ -n "$cpu_tag" ]; then + print_metric " + SELECT 'cpu ' AS metric, + to_char(c.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, + round((100 - c.usage_idle)::numeric, 1) || '% used' + FROM \"${SCHEMA}\".cpu c + JOIN \"${SCHEMA}\".cpu_tag t USING (tag_id) + WHERE ${cpu_host} = '${host}' AND ${cpu_tag} = 'cpu-total' + ORDER BY c.time DESC LIMIT 1;" + fi + + mem_host=$(tag_expr "mem" "host" "t") + if [ -n "$mem_host" ] && table_exists "mem"; then + print_metric " + SELECT 'memory ' AS metric, + to_char(m.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, + round(m.used_percent::numeric, 1) || '% used (' || + pg_size_pretty(m.used) || ' of ' || pg_size_pretty(m.total) || ')' + FROM \"${SCHEMA}\".mem m + JOIN \"${SCHEMA}\".mem_tag t USING (tag_id) + WHERE ${mem_host} = '${host}' + ORDER BY m.time DESC LIMIT 1;" + fi + + disk_host=$(tag_expr "disk" "host" "t") + disk_path=$(tag_expr "disk" "path" "t") + if [ -n "$disk_host" ] && [ -n "$disk_path" ] && table_exists "disk"; then + print_metric " + SELECT 'disk ' || rpad(${disk_path}, 12) AS metric, + to_char(d.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, + round(d.used_percent::numeric, 1) || '% used (' || + pg_size_pretty(d.used) || ' of ' || pg_size_pretty(d.total) || ')' + FROM \"${SCHEMA}\".disk d + JOIN \"${SCHEMA}\".disk_tag t USING (tag_id) + WHERE ${disk_host} = '${host}' + AND d.time = (SELECT max(d2.time) + FROM \"${SCHEMA}\".disk d2 + JOIN \"${SCHEMA}\".disk_tag t2 USING (tag_id) + WHERE ${disk_host/t./t2.} = '${host}') + ORDER BY ${disk_path};" + fi + + sys_host=$(tag_expr "system" "host" "t") + if [ -n "$sys_host" ] && table_exists "system"; then + print_metric " + SELECT 'load ' AS metric, + to_char(s.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, + s.load1 || ' / ' || s.load5 || ' / ' || s.load15 || ' (1/5/15m)' + FROM \"${SCHEMA}\".system s + JOIN \"${SCHEMA}\".system_tag t USING (tag_id) + WHERE ${sys_host} = '${host}' + ORDER BY s.time DESC LIMIT 1;" + fi + + echo "" +done diff --git a/salt/postgres/tools/sbin/so-telegraf-trim b/salt/postgres/tools/sbin/so-telegraf-trim index 0bf53c1d8..664469d0c 100644 --- a/salt/postgres/tools/sbin/so-telegraf-trim +++ b/salt/postgres/tools/sbin/so-telegraf-trim @@ -63,15 +63,15 @@ log "Trimming rows older than ${DAYS} days (dry_run=${DRY_RUN})." TOTAL_DELETED=0 -# One row per (schema, table) we might want to trim. -# Column name is 'time' for all telegraf output plugin tables; skip metadata -# tables (tag_* used for tags_as_foreign_keys). +# Every metric table in the shared telegraf schema has a 'time' column. +# Tag tables (_tag) don't, so filtering on the column presence is +# enough to scope the trim to metric tables only. ROWS=$(so_psql -c " SELECT table_schema || '.' || table_name FROM information_schema.columns WHERE column_name = 'time' AND data_type IN ('timestamp with time zone', 'timestamp without time zone') - AND table_schema LIKE 'so_telegraf_%' + AND table_schema = 'telegraf' ORDER BY 1;") if [ -z "$ROWS" ]; then diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index 4cdd81f20..4f0c279cc 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -94,10 +94,13 @@ {%- endif %} {%- if TG_OUT in ['POSTGRES', 'BOTH'] %} -# Configuration for sending metrics to PostgreSQL +# Configuration for sending metrics to PostgreSQL. +# options='-c role=so_telegraf' makes every connection SET ROLE to the shared +# group role so tables created on first write are owned by so_telegraf, and +# all per-minion members can INSERT/SELECT them via role inheritance. [[outputs.postgresql]] - connection = "host={{ PG_HOST }} port=5432 user={{ PG_USER }} password={{ PG_PASS }} dbname=so_telegraf sslmode=verify-full sslrootcert=/etc/telegraf/ca.crt" - schema = "{{ PG_USER }}" + connection = "host={{ PG_HOST }} port=5432 user={{ PG_USER }} password={{ PG_PASS }} dbname=so_telegraf sslmode=verify-full sslrootcert=/etc/telegraf/ca.crt options='-c role=so_telegraf'" + schema = "telegraf" tags_as_foreign_keys = true {%- endif %}