reduce highstate frequency with active push for rules and pillars

- schedule highstate every 2 hours (was 15 minutes); interval lives in
  global:push:highstate_interval_hours so the SOC admin UI can tune it and
  so-salt-minion-check derives its threshold as (interval + 1) * 3600
- add inotify beacon on the manager + master reactor + orch.push_batch that
  writes per-app intent files, with a so-push-drainer schedule on the manager
  that debounces, dedupes, and dispatches a single orchestration
- pillar_push_map.yaml allowlists the apps whose pillar changes trigger an
  immediate targeted state.apply (targets verified against salt/top.sls);
  edits under pillar/minions/ trigger a state.highstate on that one minion
- host-batch every push orchestration (batch: 25%, batch_wait: 15) so rule
  changes don't thundering-herd large fleets
- new global:push:enabled kill-switch tears down the beacon, reactor config,
  and drainer schedule on the next highstate for operators who want to keep
  highstate-only behavior
- set restart_policy: unless-stopped on 23 container states so docker
  recovers crashes without waiting for the next highstate; leave registry
  (always), strelka/backend (on-failure), kratos, and hydra alone with
  inline comments explaining why
This commit is contained in:
Mike Reeves
2026-04-10 15:43:16 -04:00
parent 81afbd32d4
commit a0cf0489d6
42 changed files with 927 additions and 10 deletions
+20 -2
View File
@@ -1,3 +1,5 @@
{% from 'vars/globals.map.jinja' import GLOBALS %}
{% from 'global/map.jinja' import GLOBALMERGED %}
{% set CHECKS = salt['pillar.get']('healthcheck:checks', {}) %}
{% set ENABLED = salt['pillar.get']('healthcheck:enabled', False) %}
{% set SCHEDULE = salt['pillar.get']('healthcheck:schedule', 30) %}
@@ -14,12 +16,28 @@ salt_beacons:
- defaults:
CHECKS: {{ CHECKS }}
SCHEDULE: {{ SCHEDULE }}
- watch_in:
- watch_in:
- service: salt_minion_service
{% else %}
salt_beacons:
file.absent:
- name: /etc/salt/minion.d/beacons.conf
- watch_in:
- watch_in:
- service: salt_minion_service
{% endif %}
{% if GLOBALS.is_manager and GLOBALMERGED.push.enabled %}
salt_beacons_pushstate:
file.managed:
- name: /etc/salt/minion.d/beacons_pushstate.conf
- source: salt://salt/files/beacons_pushstate.conf.jinja
- template: jinja
- watch_in:
- service: salt_minion_service
{% else %}
salt_beacons_pushstate:
file.absent:
- name: /etc/salt/minion.d/beacons_pushstate.conf
- watch_in:
- service: salt_minion_service
{% endif %}
@@ -0,0 +1,26 @@
beacons:
inotify:
- disable_during_state_run: True
- coalesce: True
- files:
/opt/so/saltstack/local/salt/suricata/rules/:
mask:
- close_write
- moved_to
- delete
recurse: True
auto_add: True
/opt/so/saltstack/local/salt/strelka/rules/compiled/:
mask:
- close_write
- moved_to
- delete
recurse: True
auto_add: True
/opt/so/saltstack/local/pillar/:
mask:
- close_write
- moved_to
- delete
recurse: True
auto_add: True
+7
View File
@@ -0,0 +1,7 @@
reactor:
- 'salt/beacon/*/inotify//opt/so/saltstack/local/salt/suricata/rules/':
- salt://reactor/push_suricata.sls
- 'salt/beacon/*/inotify//opt/so/saltstack/local/salt/strelka/rules/compiled/':
- salt://reactor/push_strelka.sls
- 'salt/beacon/*/inotify//opt/so/saltstack/local/pillar/':
- salt://reactor/push_pillar.sls
+17
View File
@@ -10,6 +10,7 @@
# software that is protected by the license key."
{% from 'allowed_states.map.jinja' import allowed_states %}
{% from 'global/map.jinja' import GLOBALMERGED %}
{% if sls in allowed_states %}
include:
@@ -62,6 +63,22 @@ engines_config:
- name: /etc/salt/master.d/engines.conf
- source: salt://salt/files/engines.conf
{% if GLOBALMERGED.push.enabled %}
reactor_pushstate_config:
file.managed:
- name: /etc/salt/master.d/reactor_pushstate.conf
- source: salt://salt/files/reactor_pushstate.conf
- watch_in:
- service: salt_master_service
- order: last
{% else %}
reactor_pushstate_config:
file.absent:
- name: /etc/salt/master.d/reactor_pushstate.conf
- watch_in:
- service: salt_master_service
{% endif %}
# update the bootstrap script when used for salt-cloud
salt_bootstrap_cloud:
file.managed:
-1
View File
@@ -2,4 +2,3 @@
salt:
minion:
version: '3006.19'
check_threshold: 3600 # in seconds, threshold used for so-salt-minion-check. any value less than 600 seconds may cause a lot of salt-minion restarts since the job to touch the file occurs every 5-8 minutes by default