#!/bin/bash # # Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one # or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. # Runs once per boot on managers (via so-boot-mine-update.service), before # so-boot-highstate.service. Waits for the responsive minion set to settle, pushes # mine.update, then waits until every up minion has actually reported to the mine # so mine-backed pillars (node IPs, ES/Redis/Logstash discovery) are complete # before the boot highstate renders them -- otherwise a not-yet-reported node gets # dropped from those pillars and torn out of the configs they build. MAX_WAIT=${MINE_UPDATE_MAX_WAIT:-180} # hard backstop only INTERVAL=10 STABLE_CHECKS=3 # up-count must hold steady this many polls elapsed=0 prev=-1 stable=0 up=0 # Wait for the *reachable* minion set to settle rather than for every accepted # key to report up: an operator may accept a minion's key and then intentionally # power off that host, so requiring up >= accepted would never be satisfied and # we'd always burn the full MAX_WAIT. Once the responsive count stops growing we # stop waiting and run mine.update against whoever is up. while [ "$elapsed" -lt "$MAX_WAIT" ]; do up=$(/usr/bin/salt-run manage.up --out=json 2>/dev/null \ | python3 -c 'import sys,json; print(len(json.load(sys.stdin)))' 2>/dev/null) up=${up:-0} if [ "$up" -gt 0 ] && [ "$up" -eq "$prev" ]; then stable=$((stable + 1)) [ "$stable" -ge "$STABLE_CHECKS" ] && break else stable=0 fi prev=$up sleep "$INTERVAL" elapsed=$((elapsed + INTERVAL)) done echo "so-boot-mine-update: ${up} minions up (settled after ${elapsed}s); running mine.update" /usr/bin/salt '*' mine.update --out=txt # A node that is up but has not yet re-reported network.ip_addrs to the mine is # silently dropped from mine-backed pillars (elasticsearch:nodes, node_data, ...) # when highstate recompiles them -- which e.g. removes it from so-elasticsearch # ExtraHosts and forces a container recreate. After the broad mine.update above, # wait until every up minion actually has network.ip_addrs in the mine, re-pushing # mine.update to stragglers, before releasing the boot highstate. Bounded by the # same MAX_WAIT backstop so a slow/down node never blocks boot indefinitely. missing="" while [ "$elapsed" -lt "$MAX_WAIT" ]; do up_json=$(/usr/bin/salt-run manage.up --out=json 2>/dev/null) mine_json=$(/usr/bin/salt-run mine.get '*' network.ip_addrs tgt_type=glob --out=json 2>/dev/null) missing=$(printf '%s' "$up_json" | python3 -c ' import sys, json up = set(json.load(sys.stdin) or []) mine = {k for k, v in (json.loads(sys.argv[1]) or {}).items() if v} print("\n".join(sorted(up - mine))) ' "$mine_json" 2>/dev/null) if [ -z "$missing" ]; then echo "so-boot-mine-update: mine complete for all up minions after ${elapsed}s" break fi echo "so-boot-mine-update: mine missing up minion(s): $(echo $missing); re-running mine.update" for m in $missing; do /usr/bin/salt "$m" mine.update --out=txt; done sleep "$INTERVAL" elapsed=$((elapsed + INTERVAL)) done [ -n "$missing" ] && echo "so-boot-mine-update: WARNING ${MAX_WAIT}s backstop hit; up minion(s) still absent from mine: $(echo $missing); highstate may drop them from configs" # Log what node_data renders so the boot-time pillar state is inspectable. /usr/bin/salt-call saltutil.refresh_pillar >/dev/null 2>&1 sleep 2 rendered=$(/usr/bin/salt-call --out=json pillar.get node_data 2>/dev/null \ | python3 -c 'import sys,json; print(json.dumps(json.load(sys.stdin).get("local"), indent=2, sort_keys=True))' 2>/dev/null) echo "so-boot-mine-update: node_data rendered as:" echo "${rendered:-null}" exit 0