From b3fbd5c7a46f99e02081aca011b48a63dd097431 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 14:55:13 -0400 Subject: [PATCH 1/7] Use Go-template placeholders and shell-guarded CREATE DATABASE - Telegraf's outputs.postgresql plugin uses Go text/template syntax, not uppercase tokens. The {TABLE}/{COLUMNS}/{TABLELITERAL} strings were passed through to Postgres literally, producing syntax errors on every metric's first write. Switch to {{ .table }}, {{ .columns }}, and {{ .table|quoteLiteral }} so partitioned parents and the partman create_parent() call succeed. - Replace the \gexec "CREATE DATABASE ... WHERE NOT EXISTS" idiom in both init-users.sh and telegraf_users.sls with an explicit shell conditional. The prior idiom occasionally fired CREATE DATABASE even when so_telegraf already existed, producing duplicate-key failures. --- salt/postgres/files/init-users.sh | 7 +++---- salt/postgres/telegraf_users.sls | 7 +++---- salt/telegraf/etc/telegraf.conf | 4 ++-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/salt/postgres/files/init-users.sh b/salt/postgres/files/init-users.sh index b07dfcdb0..e1be5df19 100644 --- a/salt/postgres/files/init-users.sh +++ b/salt/postgres/files/init-users.sh @@ -20,7 +20,6 @@ EOSQL # Bootstrap the Telegraf metrics database. Per-minion roles + schemas are # reconciled on every state.apply by postgres/telegraf_users.sls; this block # only ensures the shared database exists on first initialization. -psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL - SELECT 'CREATE DATABASE so_telegraf' - WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = 'so_telegraf')\gexec -EOSQL +if ! psql -U "$POSTGRES_USER" -tAc "SELECT 1 FROM pg_database WHERE datname='so_telegraf'" | grep -q 1; then + psql -v ON_ERROR_STOP=1 -U "$POSTGRES_USER" -c "CREATE DATABASE so_telegraf" +fi diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 920367fab..8972ce510 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -34,10 +34,9 @@ postgres_wait_ready: postgres_create_telegraf_db: cmd.run: - name: | - docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d postgres <<'EOSQL' - SELECT 'CREATE DATABASE so_telegraf' - WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = 'so_telegraf')\gexec - EOSQL + if ! docker exec so-postgres psql -U postgres -tAc "SELECT 1 FROM pg_database WHERE datname='so_telegraf'" | grep -q 1; then + docker exec so-postgres psql -v ON_ERROR_STOP=1 -U postgres -c "CREATE DATABASE so_telegraf" + fi - require: - cmd: postgres_wait_ready diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index 334b62888..fd614a68b 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -110,8 +110,8 @@ # Every metric table is a daily time-range partitioned parent managed by # pg_partman. Retention drops old partitions instead of row-by-row DELETEs. create_templates = [ - '''CREATE TABLE {TABLE} ({COLUMNS}) PARTITION BY RANGE ("time")''', - '''SELECT partman.create_parent(p_parent_table := {TABLELITERAL}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3)''' + '''CREATE TABLE {{ .table }} ({{ .columns }}) PARTITION BY RANGE ("time")''', + '''SELECT partman.create_parent(p_parent_table := {{ .table|quoteLiteral }}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3)''' ] {%- endif %} From af9330a9dddee39db104c467cefe1c7fd6d842ef Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 15:04:37 -0400 Subject: [PATCH 2/7] Escape Go-template placeholders from Jinja in telegraf.conf --- salt/telegraf/etc/telegraf.conf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index fd614a68b..e18205ad1 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -109,10 +109,12 @@ fields_as_jsonb = true # Every metric table is a daily time-range partitioned parent managed by # pg_partman. Retention drops old partitions instead of row-by-row DELETEs. + {% raw %} create_templates = [ '''CREATE TABLE {{ .table }} ({{ .columns }}) PARTITION BY RANGE ("time")''', '''SELECT partman.create_parent(p_parent_table := {{ .table|quoteLiteral }}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3)''' ] + {% endraw %} {%- endif %} ############################################################################### From 927eba566cfecfd1a94f3b44fc470ffc9e038bc0 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 15:13:08 -0400 Subject: [PATCH 3/7] Grant so_telegraf access to partman schema Telegraf calls partman.create_parent() on first write of each metric, which needs USAGE on the partman schema, EXECUTE on its functions and procedures, and DML on partman.part_config. --- salt/postgres/telegraf_users.sls | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 8972ce510..cbbd60249 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -61,6 +61,14 @@ postgres_telegraf_group_role: CREATE SCHEMA IF NOT EXISTS partman; CREATE EXTENSION IF NOT EXISTS pg_partman SCHEMA partman; CREATE EXTENSION IF NOT EXISTS pg_cron; + -- Telegraf (running as so_telegraf) calls partman.create_parent() + -- on first write of each metric, which needs USAGE on the partman + -- schema, EXECUTE on its functions/procedures, and write access to + -- partman.part_config so it can register new partitioned parents. + GRANT USAGE ON SCHEMA partman TO so_telegraf; + GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA partman TO so_telegraf; + GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA partman TO so_telegraf; + GRANT EXECUTE ON ALL PROCEDURES IN SCHEMA partman TO so_telegraf; -- Hourly partman maintenance. cron.schedule is idempotent by jobname. SELECT cron.schedule( 'telegraf-partman-maintenance', From 0fddcd8fe70a6032089fbe31c3afc4a9116c0db6 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 15:22:57 -0400 Subject: [PATCH 4/7] Pass unquoted schema.name to partman.create_parent pg_partman 5.x splits p_parent_table on '.' and looks up the parts as raw identifiers, so the literal must be 'schema.name' rather than the double-quoted form quoteLiteral emits for .table. --- salt/telegraf/etc/telegraf.conf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index e18205ad1..8a48186af 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -110,9 +110,14 @@ # Every metric table is a daily time-range partitioned parent managed by # pg_partman. Retention drops old partitions instead of row-by-row DELETEs. {% raw %} + # pg_partman.create_parent() splits p_parent_table on '.' and looks the + # parts up as raw identifiers, so the string literal must be + # 'schema.name', NOT '"schema"."name"'. {{ .table|quoteLiteral }} would + # emit the double-quoted form and partman fails with "Unable to find + # given parent table in system catalogs." create_templates = [ '''CREATE TABLE {{ .table }} ({{ .columns }}) PARTITION BY RANGE ("time")''', - '''SELECT partman.create_parent(p_parent_table := {{ .table|quoteLiteral }}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3)''' + '''SELECT partman.create_parent(p_parent_table := {{ printf "%s.%s" .table.Schema .table.Name | quoteLiteral }}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3)''' ] {% endraw %} {%- endif %} From f11e9da83a574a0038fd028417506bbc339b176b Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 15:27:06 -0400 Subject: [PATCH 5/7] Mark time column NOT NULL before partman.create_parent pg_partman 5.x requires the control column to be NOT NULL; Telegraf's generated columns are nullable by default. --- salt/telegraf/etc/telegraf.conf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index 8a48186af..6d46095f8 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -110,13 +110,13 @@ # Every metric table is a daily time-range partitioned parent managed by # pg_partman. Retention drops old partitions instead of row-by-row DELETEs. {% raw %} - # pg_partman.create_parent() splits p_parent_table on '.' and looks the - # parts up as raw identifiers, so the string literal must be - # 'schema.name', NOT '"schema"."name"'. {{ .table|quoteLiteral }} would - # emit the double-quoted form and partman fails with "Unable to find - # given parent table in system catalogs." + # pg_partman 5.x requires the control column (time) to be NOT NULL, so + # ALTER it before create_parent(). And create_parent() splits + # p_parent_table on '.' to look up raw identifiers, so the literal must + # be 'schema.name' (not '"schema"."name"' as .table|quoteLiteral emits). create_templates = [ '''CREATE TABLE {{ .table }} ({{ .columns }}) PARTITION BY RANGE ("time")''', + '''ALTER TABLE {{ .table }} ALTER COLUMN "time" SET NOT NULL''', '''SELECT partman.create_parent(p_parent_table := {{ printf "%s.%s" .table.Schema .table.Name | quoteLiteral }}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3)''' ] {% endraw %} From 21076af01ed9aa0f396b3c51ffbea7c9045757b0 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 15:34:19 -0400 Subject: [PATCH 6/7] Grant so_telegraf CREATE on partman schema pg_partman 5.x's create_partition() creates a per-parent template table inside the partman schema at runtime, which requires CREATE on that schema. Also extend ALTER DEFAULT PRIVILEGES so the runtime- created template tables are accessible to so_telegraf. --- salt/postgres/telegraf_users.sls | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index cbbd60249..804065bae 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -65,10 +65,16 @@ postgres_telegraf_group_role: -- on first write of each metric, which needs USAGE on the partman -- schema, EXECUTE on its functions/procedures, and write access to -- partman.part_config so it can register new partitioned parents. - GRANT USAGE ON SCHEMA partman TO so_telegraf; + GRANT USAGE, CREATE ON SCHEMA partman TO so_telegraf; GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA partman TO so_telegraf; GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA partman TO so_telegraf; GRANT EXECUTE ON ALL PROCEDURES IN SCHEMA partman TO so_telegraf; + -- partman creates per-parent template tables (partman.template_*) at + -- runtime; default privileges extend DML/sequence access to them. + ALTER DEFAULT PRIVILEGES IN SCHEMA partman + GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO so_telegraf; + ALTER DEFAULT PRIVILEGES IN SCHEMA partman + GRANT USAGE, SELECT, UPDATE ON SEQUENCES TO so_telegraf; -- Hourly partman maintenance. cron.schedule is idempotent by jobname. SELECT cron.schedule( 'telegraf-partman-maintenance', From 31383bd9d0fff009f7fbdd5d10b8bfeba4f3f7a4 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 15:43:50 -0400 Subject: [PATCH 7/7] Make Telegraf Postgres templates idempotent Use CREATE TABLE IF NOT EXISTS and a WHERE-guarded create_parent() so a Telegraf restart can re-run the templates safely after manual DB surgery. Add an explicit tag_table_create_templates mirroring the plugin default with IF NOT EXISTS for the same reason. --- salt/telegraf/etc/telegraf.conf | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index 6d46095f8..d28dc7f96 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -114,10 +114,15 @@ # ALTER it before create_parent(). And create_parent() splits # p_parent_table on '.' to look up raw identifiers, so the literal must # be 'schema.name' (not '"schema"."name"' as .table|quoteLiteral emits). + # IF NOT EXISTS keeps the three templates idempotent so a Telegraf + # restart after any DB-side surgery re-runs them safely. create_templates = [ - '''CREATE TABLE {{ .table }} ({{ .columns }}) PARTITION BY RANGE ("time")''', + '''CREATE TABLE IF NOT EXISTS {{ .table }} ({{ .columns }}) PARTITION BY RANGE ("time")''', '''ALTER TABLE {{ .table }} ALTER COLUMN "time" SET NOT NULL''', - '''SELECT partman.create_parent(p_parent_table := {{ printf "%s.%s" .table.Schema .table.Name | quoteLiteral }}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3)''' + '''SELECT partman.create_parent(p_parent_table := {{ printf "%s.%s" .table.Schema .table.Name | quoteLiteral }}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3) WHERE NOT EXISTS (SELECT 1 FROM partman.part_config WHERE parent_table = {{ printf "%s.%s" .table.Schema .table.Name | quoteLiteral }})''' + ] + tag_table_create_templates = [ + '''CREATE TABLE IF NOT EXISTS {{ .table }} ({{ .columns }}, PRIMARY KEY (tag_id))''' ] {% endraw %} {%- endif %}