#!/bin/bash # Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one # or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. # Point-in-time host metrics from the Telegraf Postgres backend. # Sanity-check tool for verifying metrics are landing before the grid # dashboards consume them. # # Assumes Telegraf's postgresql output is configured with # tags_as_foreign_keys = true, tags_as_jsonb = true, fields_as_jsonb = true, # so metric tables are (time, tag_id, fields jsonb) and tag tables are # (tag_id, tags jsonb). . /usr/sbin/so-common usage() { cat <&2 exit 1 fi so_psql() { docker exec so-postgres psql -U postgres -d so_telegraf -At -F $'\t' "$@" } if ! docker exec so-postgres psql -U postgres -lqt 2>/dev/null | cut -d\| -f1 | grep -qw so_telegraf; then echo "Database so_telegraf not found. Is telegraf.output set to POSTGRES or BOTH?" exit 2 fi table_exists() { local table="$1" [ -n "$(so_psql -c "SELECT 1 FROM information_schema.tables WHERE table_schema='${SCHEMA}' AND table_name='${table}' LIMIT 1;")" ] } # Discover hosts from cpu_tag (every minion reports cpu). if ! table_exists "cpu_tag"; then echo "${SCHEMA}.cpu_tag not found. Has Telegraf written any rows yet?" exit 0 fi HOSTS=$(so_psql -c " SELECT DISTINCT tags->>'host' FROM \"${SCHEMA}\".cpu_tag WHERE tags ? 'host' ORDER BY 1;") if [ -z "$HOSTS" ]; then echo "No hosts found in ${SCHEMA}. Is Telegraf configured to write to Postgres?" exit 0 fi print_metric() { so_psql -c "$1" } for host in $HOSTS; do if ! [[ "$host" =~ $HOST_RE ]]; then echo "Skipping host with invalid characters in tag value: $host" >&2 continue fi if [ -n "$FILTER_HOST" ] && [ "$host" != "$FILTER_HOST" ]; then continue fi echo "====================================================================" echo " Host: $host" echo "====================================================================" if table_exists "cpu"; then print_metric " SELECT 'cpu ' AS metric, to_char(c.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, round((100 - (c.fields->>'usage_idle')::numeric), 1) || '% used' FROM \"${SCHEMA}\".cpu c JOIN \"${SCHEMA}\".cpu_tag t USING (tag_id) WHERE t.tags->>'host' = '${host}' AND t.tags->>'cpu' = 'cpu-total' ORDER BY c.time DESC LIMIT 1;" fi if table_exists "mem"; then print_metric " SELECT 'memory ' AS metric, to_char(m.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, round((m.fields->>'used_percent')::numeric, 1) || '% used (' || pg_size_pretty((m.fields->>'used')::bigint) || ' of ' || pg_size_pretty((m.fields->>'total')::bigint) || ')' FROM \"${SCHEMA}\".mem m JOIN \"${SCHEMA}\".mem_tag t USING (tag_id) WHERE t.tags->>'host' = '${host}' ORDER BY m.time DESC LIMIT 1;" fi if table_exists "disk"; then print_metric " SELECT 'disk ' || rpad(t.tags->>'path', 12) AS metric, to_char(d.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, round((d.fields->>'used_percent')::numeric, 1) || '% used (' || pg_size_pretty((d.fields->>'used')::bigint) || ' of ' || pg_size_pretty((d.fields->>'total')::bigint) || ')' FROM \"${SCHEMA}\".disk d JOIN \"${SCHEMA}\".disk_tag t USING (tag_id) WHERE t.tags->>'host' = '${host}' AND d.time = (SELECT max(d2.time) FROM \"${SCHEMA}\".disk d2 JOIN \"${SCHEMA}\".disk_tag t2 USING (tag_id) WHERE t2.tags->>'host' = '${host}') ORDER BY t.tags->>'path';" fi if table_exists "system"; then print_metric " SELECT 'load ' AS metric, to_char(s.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, (s.fields->>'load1') || ' / ' || (s.fields->>'load5') || ' / ' || (s.fields->>'load15') || ' (1/5/15m)' FROM \"${SCHEMA}\".system s JOIN \"${SCHEMA}\".system_tag t USING (tag_id) WHERE t.tags->>'host' = '${host}' ORDER BY s.time DESC LIMIT 1;" fi echo "" done