Adopt pg_partman + pg_cron for Telegraf metric tables

Every telegraf.* metric table is now a daily time-range partitioned
parent managed by pg_partman. Retention drops old partitions instead
of the row-by-row DELETE that so-telegraf-trim used to run nightly,
and dashboards will benefit from partition pruning at query time.

- Load pg_cron at server start via shared_preload_libraries and point
  cron.database_name at so_telegraf so job metadata lives alongside
  the metrics
- Telegraf create_templates override makes every new metric table a
  PARTITION BY RANGE (time) parent registered with partman.create_parent
  in one transaction (1 day interval, 3 premade)
- postgres_telegraf_group_role now also creates pg_partman and pg_cron
  extensions and schedules hourly partman.run_maintenance_proc
- New retention reconcile state updates partman.part_config.retention
  from postgres.telegraf.retention_days on every apply
- so_telegraf_trim cron is now unconditionally absent; script stays on
  disk as a manual fallback
This commit is contained in:
Mike Reeves
2026-04-16 17:27:15 -04:00
parent 9fe53d9ccc
commit d9a9029ce5
4 changed files with 41 additions and 9 deletions
+2
View File
@@ -14,3 +14,5 @@ postgres:
log_destination: 'stderr'
logging_collector: 'off'
log_min_messages: 'warning'
shared_preload_libraries: pg_cron
cron.database_name: so_telegraf
+3 -9
View File
@@ -80,20 +80,14 @@ delete_so-postgres_so-status.disabled:
- name: /opt/so/conf/so-status/so-status.conf
- regex: ^so-postgres$
# Retention is now handled by pg_partman (hourly maintenance via pg_cron
# scheduled from postgres/telegraf_users.sls). The so-telegraf-trim script
# stays on disk for manual/emergency use but is no longer scheduled.
so_telegraf_trim:
{% if GLOBALS.telegraf_output in ['POSTGRES', 'BOTH'] %}
cron.present:
{% else %}
cron.absent:
{% endif %}
- name: /usr/sbin/so-telegraf-trim >> /opt/so/log/postgres/telegraf-trim.log 2>&1
- identifier: so_telegraf_trim
- user: root
- minute: '17'
- hour: '3'
- daymonth: '*'
- month: '*'
- dayweek: '*'
{% else %}
+30
View File
@@ -28,6 +28,14 @@ postgres_telegraf_group_role:
GRANT CONNECT ON DATABASE so_telegraf TO so_telegraf;
CREATE SCHEMA IF NOT EXISTS telegraf AUTHORIZATION so_telegraf;
GRANT USAGE, CREATE ON SCHEMA telegraf TO so_telegraf;
CREATE EXTENSION IF NOT EXISTS pg_partman;
CREATE EXTENSION IF NOT EXISTS pg_cron;
-- Hourly partman maintenance. cron.schedule is idempotent by jobname.
SELECT cron.schedule(
'telegraf-partman-maintenance',
'17 * * * *',
'CALL partman.run_maintenance_proc()'
);
EOSQL
- require:
- docker_container: so-postgres
@@ -60,6 +68,28 @@ postgres_telegraf_role_{{ u }}:
{% endif %}
{% endfor %}
# Reconcile partman retention from pillar. Runs after role/schema setup so
# any partitioned parents Telegraf has already created get their retention
# refreshed whenever postgres.telegraf.retention_days changes.
{% set retention = salt['pillar.get']('postgres:telegraf:retention_days', 14) %}
postgres_telegraf_retention_reconcile:
cmd.run:
- name: |
docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf <<'EOSQL'
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM pg_catalog.pg_extension WHERE extname = 'pg_partman') THEN
UPDATE partman.part_config
SET retention = '{{ retention }} days',
retention_keep_table = false
WHERE parent_table LIKE 'telegraf.%';
END IF;
END
$$;
EOSQL
- require:
- cmd: postgres_telegraf_group_role
{% endif %}
{% else %}