Fix Telegraf→Postgres table creation and state.apply race

- Telegraf's partman template passed p_type:='native', which pg_partman 5.x (the version shipped by postgresql-17-partman on Debian) rejects. Switched to 'range' so partman.create_parent() actually creates partitions and Telegraf's INSERTs succeed. - Added a postgres_wait_ready gate in telegraf_users.sls so psql execs don't race the init-time restart that docker-entrypoint.sh performs. - so-verify now ignores the literal "-v ON_ERROR_STOP=1" token in the setup log. Dropped the matching entry from so-log-check, which scans container stdout where that token never appears.
2026-05-10 13:20:30 +02:00 · 2026-04-17 13:00:12 -04:00
parent 7d07f3c8fe
commit 5228668be0
4 changed files with 22 additions and 4 deletions
@@ -10,6 +10,24 @@
 {% set TG_OUT = (GLOBALS.telegraf_output | default('INFLUXDB')) | upper %}
 {% if TG_OUT in ['POSTGRES', 'BOTH'] %}

+# docker_container.running returns as soon as the container starts, but on
+# first-init docker-entrypoint.sh runs init scripts and then restarts
+# postgres, so the next docker exec can hit "the database system is shutting
+# down". Wait for pg_isready before any psql work.
+postgres_wait_ready:
+  cmd.run:
+    - name: |
+        for i in $(seq 1 60); do
+          if docker exec so-postgres pg_isready -U postgres -q 2>/dev/null; then
+            exit 0
+          fi
+          sleep 2
+        done
+        echo "so-postgres did not become ready within 120s" >&2
+        exit 1
+    - require:
+      - docker_container: so-postgres
+
 # Ensure the shared Telegraf database exists. init-users.sh only runs on a
 # fresh data dir, so hosts upgraded onto an existing /nsm/postgres volume
 # would otherwise never get so_telegraf.
@@ -21,7 +39,7 @@ postgres_create_telegraf_db:
        WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = 'so_telegraf')\gexec
        EOSQL
    - require:
-      - docker_container: so-postgres
+      - cmd: postgres_wait_ready

 # Provision the shared group role and schema once. Every per-minion role is a
 # member of so_telegraf, and each Telegraf connection does SET ROLE so_telegraf