diff --git a/salt/postgres/defaults.yaml b/salt/postgres/defaults.yaml index dd7994044..30523cda9 100644 --- a/salt/postgres/defaults.yaml +++ b/salt/postgres/defaults.yaml @@ -14,3 +14,5 @@ postgres: log_destination: 'stderr' logging_collector: 'off' log_min_messages: 'warning' + shared_preload_libraries: pg_cron + cron.database_name: so_telegraf diff --git a/salt/postgres/enabled.sls b/salt/postgres/enabled.sls index 24e348365..b6a51580f 100644 --- a/salt/postgres/enabled.sls +++ b/salt/postgres/enabled.sls @@ -80,20 +80,14 @@ delete_so-postgres_so-status.disabled: - name: /opt/so/conf/so-status/so-status.conf - regex: ^so-postgres$ +# Retention is now handled by pg_partman (hourly maintenance via pg_cron +# scheduled from postgres/telegraf_users.sls). The so-telegraf-trim script +# stays on disk for manual/emergency use but is no longer scheduled. so_telegraf_trim: -{% if GLOBALS.telegraf_output in ['POSTGRES', 'BOTH'] %} - cron.present: -{% else %} cron.absent: -{% endif %} - name: /usr/sbin/so-telegraf-trim >> /opt/so/log/postgres/telegraf-trim.log 2>&1 - identifier: so_telegraf_trim - user: root - - minute: '17' - - hour: '3' - - daymonth: '*' - - month: '*' - - dayweek: '*' {% else %} diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 7d62ee7f0..5a3ea73e9 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -28,6 +28,14 @@ postgres_telegraf_group_role: GRANT CONNECT ON DATABASE so_telegraf TO so_telegraf; CREATE SCHEMA IF NOT EXISTS telegraf AUTHORIZATION so_telegraf; GRANT USAGE, CREATE ON SCHEMA telegraf TO so_telegraf; + CREATE EXTENSION IF NOT EXISTS pg_partman; + CREATE EXTENSION IF NOT EXISTS pg_cron; + -- Hourly partman maintenance. cron.schedule is idempotent by jobname. + SELECT cron.schedule( + 'telegraf-partman-maintenance', + '17 * * * *', + 'CALL partman.run_maintenance_proc()' + ); EOSQL - require: - docker_container: so-postgres @@ -60,6 +68,28 @@ postgres_telegraf_role_{{ u }}: {% endif %} {% endfor %} +# Reconcile partman retention from pillar. Runs after role/schema setup so +# any partitioned parents Telegraf has already created get their retention +# refreshed whenever postgres.telegraf.retention_days changes. +{% set retention = salt['pillar.get']('postgres:telegraf:retention_days', 14) %} +postgres_telegraf_retention_reconcile: + cmd.run: + - name: | + docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf <<'EOSQL' + DO $$ + BEGIN + IF EXISTS (SELECT 1 FROM pg_catalog.pg_extension WHERE extname = 'pg_partman') THEN + UPDATE partman.part_config + SET retention = '{{ retention }} days', + retention_keep_table = false + WHERE parent_table LIKE 'telegraf.%'; + END IF; + END + $$; + EOSQL + - require: + - cmd: postgres_telegraf_group_role + {% endif %} {% else %} diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index aa5f2a007..ea3e11c51 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -107,6 +107,12 @@ tags_as_foreign_keys = true tags_as_jsonb = true fields_as_jsonb = true + # Every metric table is a daily time-range partitioned parent managed by + # pg_partman. Retention drops old partitions instead of row-by-row DELETEs. + create_templates = [ + '''CREATE TABLE {TABLE} ({COLUMNS}) PARTITION BY RANGE ("time")''', + '''SELECT partman.create_parent(p_parent_table := {TABLELITERAL}, p_control := 'time', p_type := 'native', p_interval := '1 day', p_premake := 3)''' + ] {%- endif %} ###############################################################################