mirror of
https://github.com/Security-Onion-Solutions/securityonion.git
synced 2026-05-06 03:17:53 +02:00
614f32c5e0
The old flow had two writers for each per-minion Telegraf password
(so-minion wrote the minion pillar; postgres.auth regenerated any
missing aggregate entries). They drifted on first-boot and there was
no trigger to create DB roles when a new minion joined.
Split responsibilities:
- pillar/postgres/auth.sls (manager-scoped) keeps only the so_postgres
admin cred.
- pillar/telegraf/creds.sls (grid-wide) holds a {minion_id: {user,
pass}} map, shadowed per-install by the local-pillar copy.
- salt/manager/tools/sbin/so-telegraf-cred is the single writer:
flock, atomic YAML write, PyYAML safe_dump so passwords never
round-trip through so-yaml.py's type coercion. Idempotent add, quiet
remove.
- so-minion's add/remove hooks now shell out to so-telegraf-cred
instead of editing pillar files directly.
- postgres.telegraf_users iterates the new pillar key and CREATE/ALTERs
roles from it; telegraf.conf reads its own entry via grains.id.
- orch.deploy_newnode runs postgres.telegraf_users on the manager and
refreshes the new minion's pillar before the new node highstates,
so the DB role is in place the first time telegraf tries to connect.
- soup's post_to_3.1.0 backfills the creds pillar from accepted salt
keys (idempotent) and runs postgres.telegraf_users once to reconcile
the DB.
158 lines
6.6 KiB
Plaintext
158 lines
6.6 KiB
Plaintext
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
|
|
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
|
|
# https://securityonion.net/license; you may not use this file except in compliance with the
|
|
# Elastic License 2.0.
|
|
|
|
{% from 'allowed_states.map.jinja' import allowed_states %}
|
|
{% if sls.split('.')[0] in allowed_states %}
|
|
{% from 'vars/globals.map.jinja' import GLOBALS %}
|
|
{% from 'telegraf/map.jinja' import TELEGRAFMERGED %}
|
|
|
|
{# postgres_wait_ready below requires `docker_container: so-postgres`, which is
|
|
declared in postgres.enabled. Include it here so state.apply postgres.telegraf_users
|
|
on its own (e.g. from orch.deploy_newnode) still has that ID in scope. Salt
|
|
de-duplicates the circular include. #}
|
|
include:
|
|
- postgres.enabled
|
|
|
|
{% set TG_OUT = TELEGRAFMERGED.output | upper %}
|
|
{% if TG_OUT in ['POSTGRES', 'BOTH'] %}
|
|
|
|
# docker_container.running returns as soon as the container starts, but on
|
|
# first-init docker-entrypoint.sh starts a temporary postgres with
|
|
# `listen_addresses=''` to run /docker-entrypoint-initdb.d scripts, then
|
|
# shuts it down before exec'ing the real CMD. A default pg_isready check
|
|
# (Unix socket) passes during that ephemeral phase and races the shutdown
|
|
# with "the database system is shutting down". Checking TCP readiness on
|
|
# 127.0.0.1 only succeeds after the final postgres binds the port.
|
|
postgres_wait_ready:
|
|
cmd.run:
|
|
- name: |
|
|
for i in $(seq 1 60); do
|
|
if docker exec so-postgres pg_isready -h 127.0.0.1 -U postgres -q 2>/dev/null; then
|
|
exit 0
|
|
fi
|
|
sleep 2
|
|
done
|
|
echo "so-postgres did not accept TCP connections within 120s" >&2
|
|
exit 1
|
|
- require:
|
|
- docker_container: so-postgres
|
|
|
|
# Ensure the shared Telegraf database exists. init-users.sh only runs on a
|
|
# fresh data dir, so hosts upgraded onto an existing /nsm/postgres volume
|
|
# would otherwise never get so_telegraf.
|
|
postgres_create_telegraf_db:
|
|
cmd.run:
|
|
- name: |
|
|
if ! docker exec so-postgres psql -U postgres -tAc "SELECT 1 FROM pg_database WHERE datname='so_telegraf'" | grep -q 1; then
|
|
docker exec so-postgres psql -v ON_ERROR_STOP=1 -U postgres -c "CREATE DATABASE so_telegraf"
|
|
fi
|
|
- require:
|
|
- cmd: postgres_wait_ready
|
|
|
|
# Provision the shared group role and schema once. Every per-minion role is a
|
|
# member of so_telegraf, and each Telegraf connection does SET ROLE so_telegraf
|
|
# (via options='-c role=so_telegraf' in the connection string) so tables created
|
|
# on first write are owned by the group role and every member can INSERT/SELECT.
|
|
postgres_telegraf_group_role:
|
|
cmd.run:
|
|
- name: |
|
|
docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf <<'EOSQL'
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'so_telegraf') THEN
|
|
CREATE ROLE so_telegraf NOLOGIN;
|
|
END IF;
|
|
END
|
|
$$;
|
|
GRANT CONNECT ON DATABASE so_telegraf TO so_telegraf;
|
|
CREATE SCHEMA IF NOT EXISTS telegraf AUTHORIZATION so_telegraf;
|
|
GRANT USAGE, CREATE ON SCHEMA telegraf TO so_telegraf;
|
|
CREATE SCHEMA IF NOT EXISTS partman;
|
|
CREATE EXTENSION IF NOT EXISTS pg_partman SCHEMA partman;
|
|
CREATE EXTENSION IF NOT EXISTS pg_cron;
|
|
-- Telegraf (running as so_telegraf) calls partman.create_parent()
|
|
-- on first write of each metric, which needs USAGE on the partman
|
|
-- schema, EXECUTE on its functions/procedures, and write access to
|
|
-- partman.part_config so it can register new partitioned parents.
|
|
GRANT USAGE, CREATE ON SCHEMA partman TO so_telegraf;
|
|
GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA partman TO so_telegraf;
|
|
GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA partman TO so_telegraf;
|
|
GRANT EXECUTE ON ALL PROCEDURES IN SCHEMA partman TO so_telegraf;
|
|
-- partman creates per-parent template tables (partman.template_*) at
|
|
-- runtime; default privileges extend DML/sequence access to them.
|
|
ALTER DEFAULT PRIVILEGES IN SCHEMA partman
|
|
GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO so_telegraf;
|
|
ALTER DEFAULT PRIVILEGES IN SCHEMA partman
|
|
GRANT USAGE, SELECT, UPDATE ON SEQUENCES TO so_telegraf;
|
|
-- Hourly partman maintenance. cron.schedule is idempotent by jobname.
|
|
SELECT cron.schedule(
|
|
'telegraf-partman-maintenance',
|
|
'17 * * * *',
|
|
'CALL partman.run_maintenance_proc()'
|
|
);
|
|
EOSQL
|
|
- require:
|
|
- cmd: postgres_create_telegraf_db
|
|
|
|
{% set creds = salt['pillar.get']('telegraf:postgres_creds', {}) %}
|
|
{% for mid, entry in creds.items() %}
|
|
{% if entry.get('user') and entry.get('pass') %}
|
|
{% set u = entry.user %}
|
|
{% set p = entry.pass | replace("'", "''") %}
|
|
|
|
postgres_telegraf_role_{{ u }}:
|
|
cmd.run:
|
|
- name: |
|
|
docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf <<'EOSQL'
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{{ u }}') THEN
|
|
EXECUTE format('CREATE ROLE %I WITH LOGIN PASSWORD %L', '{{ u }}', '{{ p }}');
|
|
ELSE
|
|
EXECUTE format('ALTER ROLE %I WITH PASSWORD %L', '{{ u }}', '{{ p }}');
|
|
END IF;
|
|
END
|
|
$$;
|
|
GRANT CONNECT ON DATABASE so_telegraf TO "{{ u }}";
|
|
GRANT so_telegraf TO "{{ u }}";
|
|
EOSQL
|
|
- require:
|
|
- cmd: postgres_telegraf_group_role
|
|
|
|
{% endif %}
|
|
{% endfor %}
|
|
|
|
# Reconcile partman retention from pillar. Runs after role/schema setup so
|
|
# any partitioned parents Telegraf has already created get their retention
|
|
# refreshed whenever postgres.telegraf.retention_days changes.
|
|
{% set retention = salt['pillar.get']('postgres:telegraf:retention_days', 14) | int %}
|
|
postgres_telegraf_retention_reconcile:
|
|
cmd.run:
|
|
- name: |
|
|
docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf <<'EOSQL'
|
|
DO $$
|
|
BEGIN
|
|
IF EXISTS (SELECT 1 FROM pg_catalog.pg_extension WHERE extname = 'pg_partman') THEN
|
|
UPDATE partman.part_config
|
|
SET retention = '{{ retention }} days',
|
|
retention_keep_table = false
|
|
WHERE parent_table LIKE 'telegraf.%';
|
|
END IF;
|
|
END
|
|
$$;
|
|
EOSQL
|
|
- require:
|
|
- cmd: postgres_telegraf_group_role
|
|
|
|
{% endif %}
|
|
|
|
{% else %}
|
|
|
|
{{sls}}_state_not_allowed:
|
|
test.fail_without_changes:
|
|
- name: {{sls}}_state_not_allowed
|
|
|
|
{% endif %}
|