mirror of
https://github.com/Security-Onion-Solutions/securityonion.git
synced 2026-05-10 21:30:30 +02:00
Adopt pg_partman + pg_cron for Telegraf metric tables
Every telegraf.* metric table is now a daily time-range partitioned parent managed by pg_partman. Retention drops old partitions instead of the row-by-row DELETE that so-telegraf-trim used to run nightly, and dashboards will benefit from partition pruning at query time. - Load pg_cron at server start via shared_preload_libraries and point cron.database_name at so_telegraf so job metadata lives alongside the metrics - Telegraf create_templates override makes every new metric table a PARTITION BY RANGE (time) parent registered with partman.create_parent in one transaction (1 day interval, 3 premade) - postgres_telegraf_group_role now also creates pg_partman and pg_cron extensions and schedules hourly partman.run_maintenance_proc - New retention reconcile state updates partman.part_config.retention from postgres.telegraf.retention_days on every apply - so_telegraf_trim cron is now unconditionally absent; script stays on disk as a manual fallback
This commit is contained in:
@@ -14,3 +14,5 @@ postgres:
|
||||
log_destination: 'stderr'
|
||||
logging_collector: 'off'
|
||||
log_min_messages: 'warning'
|
||||
shared_preload_libraries: pg_cron
|
||||
cron.database_name: so_telegraf
|
||||
|
||||
@@ -80,20 +80,14 @@ delete_so-postgres_so-status.disabled:
|
||||
- name: /opt/so/conf/so-status/so-status.conf
|
||||
- regex: ^so-postgres$
|
||||
|
||||
# Retention is now handled by pg_partman (hourly maintenance via pg_cron
|
||||
# scheduled from postgres/telegraf_users.sls). The so-telegraf-trim script
|
||||
# stays on disk for manual/emergency use but is no longer scheduled.
|
||||
so_telegraf_trim:
|
||||
{% if GLOBALS.telegraf_output in ['POSTGRES', 'BOTH'] %}
|
||||
cron.present:
|
||||
{% else %}
|
||||
cron.absent:
|
||||
{% endif %}
|
||||
- name: /usr/sbin/so-telegraf-trim >> /opt/so/log/postgres/telegraf-trim.log 2>&1
|
||||
- identifier: so_telegraf_trim
|
||||
- user: root
|
||||
- minute: '17'
|
||||
- hour: '3'
|
||||
- daymonth: '*'
|
||||
- month: '*'
|
||||
- dayweek: '*'
|
||||
|
||||
{% else %}
|
||||
|
||||
|
||||
@@ -28,6 +28,14 @@ postgres_telegraf_group_role:
|
||||
GRANT CONNECT ON DATABASE so_telegraf TO so_telegraf;
|
||||
CREATE SCHEMA IF NOT EXISTS telegraf AUTHORIZATION so_telegraf;
|
||||
GRANT USAGE, CREATE ON SCHEMA telegraf TO so_telegraf;
|
||||
CREATE EXTENSION IF NOT EXISTS pg_partman;
|
||||
CREATE EXTENSION IF NOT EXISTS pg_cron;
|
||||
-- Hourly partman maintenance. cron.schedule is idempotent by jobname.
|
||||
SELECT cron.schedule(
|
||||
'telegraf-partman-maintenance',
|
||||
'17 * * * *',
|
||||
'CALL partman.run_maintenance_proc()'
|
||||
);
|
||||
EOSQL
|
||||
- require:
|
||||
- docker_container: so-postgres
|
||||
@@ -60,6 +68,28 @@ postgres_telegraf_role_{{ u }}:
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
# Reconcile partman retention from pillar. Runs after role/schema setup so
|
||||
# any partitioned parents Telegraf has already created get their retention
|
||||
# refreshed whenever postgres.telegraf.retention_days changes.
|
||||
{% set retention = salt['pillar.get']('postgres:telegraf:retention_days', 14) %}
|
||||
postgres_telegraf_retention_reconcile:
|
||||
cmd.run:
|
||||
- name: |
|
||||
docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf <<'EOSQL'
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (SELECT 1 FROM pg_catalog.pg_extension WHERE extname = 'pg_partman') THEN
|
||||
UPDATE partman.part_config
|
||||
SET retention = '{{ retention }} days',
|
||||
retention_keep_table = false
|
||||
WHERE parent_table LIKE 'telegraf.%';
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
EOSQL
|
||||
- require:
|
||||
- cmd: postgres_telegraf_group_role
|
||||
|
||||
{% endif %}
|
||||
|
||||
{% else %}
|
||||
|
||||
Reference in New Issue
Block a user