From 52574e21c67bdb7b3bdf75f627df47003158f65a Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 30 Jun 2026 09:40:23 -0400 Subject: [PATCH 1/4] suricata: treat in-progress rule reload as success so-suricata-reload-rules failed the surirulereload state when a rule reload was already running: suricatasc returns {"message":"Reload already in progress","return":"NOK"}, which never matched the expected output, so retry looped all 60 attempts (~3 min) and called fail. Wrap the suricatasc calls so an in-progress reload is treated as success (the in-flight reload picks up the new rules) while genuine container-not-ready conditions still retry and ultimately fail. --- .../tools/sbin/so-suricata-reload-rules | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/salt/suricata/tools/sbin/so-suricata-reload-rules b/salt/suricata/tools/sbin/so-suricata-reload-rules index e21e28e2f..aec6bc966 100644 --- a/salt/suricata/tools/sbin/so-suricata-reload-rules +++ b/salt/suricata/tools/sbin/so-suricata-reload-rules @@ -7,5 +7,19 @@ . /usr/sbin/so-common -retry 60 3 'docker exec so-suricata /opt/suricata/bin/suricatasc -c reload-rules /var/run/suricata/suricata-command.socket' '{"message":"done","return":"OK"}' || fail "The Suricata container was not ready in time." -retry 60 3 'docker exec so-suricata /opt/suricata/bin/suricatasc -c ruleset-reload-nonblocking /var/run/suricata/suricata-command.socket' '{"message":"done","return":"OK"}' || fail "The Suricata container was not ready in time." +reload_suricata_rules() { + # $1 = suricatasc command (reload-rules | ruleset-reload-nonblocking) + local output + output=$(docker exec so-suricata /opt/suricata/bin/suricatasc -c "$1" /var/run/suricata/suricata-command.socket) + echo "$output" + # A reload already running is fine — the new rules get picked up by it. + if [[ "$output" =~ "Reload already in progress" ]]; then + echo "A rule reload is already in progress; treating as success." + return 0 + fi + [[ "$output" =~ '{"message":"done","return":"OK"}' ]] && return 0 + return 1 +} + +retry 60 3 'reload_suricata_rules reload-rules' || fail "The Suricata container was not ready in time." +retry 60 3 'reload_suricata_rules ruleset-reload-nonblocking' || fail "The Suricata container was not ready in time." From 3b8459c6ec474c20b4e60c07b0577b6dd1fa98c2 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 30 Jun 2026 12:43:42 -0500 Subject: [PATCH 2/4] soup upgrade kafka cluster metadata v4 --- salt/manager/tools/sbin/soup | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 2b8680191..be99292d8 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -850,6 +850,28 @@ kibana_backport_streams_index_template() { } +# Runs kafka-features.sh upgrade --release-version $1 +# Upgrades Kafka KRaft cluster metadata +update_kafka_metadata() { + metadata_version="$1" + global_pillar="/opt/so/saltstack/local/pillar/global/soc_global.sls" + if PIPELINE=$(so-yaml.py get -r "$global_pillar" global.pipeline 2> /dev/null) && [[ "$PIPELINE" == "KAFKA" ]]; then + kafka_nodes_raw=$(salt-call pillar.get kafka:nodes --out=json) + if kafka_nodes=$(jq -er '.local | select(type == "object" and length > 0)' <<< "$kafka_nodes_raw"); then + bootstrap_servers=$(jq -r '[to_entries[] | select(.value.role | contains("broker")) | "\(.value.ip):9092"] | join(",")' <<< "$kafka_nodes") + echo "Upgrading Kafka KRaft cluster version" + so-kafka-cli kafka-features.sh --bootstrap-server "$bootstrap_servers" --command-config /opt/kafka/config/kraft/client.properties upgrade --release-version "$metadata_version" 2>/dev/null || true + + return 0 + else + FINAL_MESSAGE_QUEUE+=("WARNING: Unable to automatically perform Kafka Kraft cluster metadata update. This step can be performed manually using the following command (replacing \$BROKER_IP with the ip of atleast 1 available Kafka broker):") + FINAL_MESSAGE_QUEUE+=(" - so-kafka-cli kafka-features.sh --bootstrap-server \$BROKER_IP:9092 --command-config /opt/kafka/config/kraft/client.properties upgrade --release-version $metadata_version") + fi + else + echo "Nothing to do!" + fi +} + up_to_3.2.0() { fix_logstash_0013_lumberjack_pipeline_name @@ -867,6 +889,8 @@ post_to_3.2.0() { kibana_backport_streams_index_template + update_kafka_metadata "4.3" + POSTVERSION=3.2.0 } From 670d2b2757a7cd808d22aa05f6605c59a8b994a6 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 30 Jun 2026 12:57:56 -0500 Subject: [PATCH 3/4] casing --- salt/manager/tools/sbin/soup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index be99292d8..3c4cbe7c4 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -864,7 +864,7 @@ update_kafka_metadata() { return 0 else - FINAL_MESSAGE_QUEUE+=("WARNING: Unable to automatically perform Kafka Kraft cluster metadata update. This step can be performed manually using the following command (replacing \$BROKER_IP with the ip of atleast 1 available Kafka broker):") + FINAL_MESSAGE_QUEUE+=("WARNING: Unable to automatically perform Kafka KRaft cluster metadata update. This step can be performed manually using the following command (replacing \$BROKER_IP with the ip of atleast 1 available Kafka broker):") FINAL_MESSAGE_QUEUE+=(" - so-kafka-cli kafka-features.sh --bootstrap-server \$BROKER_IP:9092 --command-config /opt/kafka/config/kraft/client.properties upgrade --release-version $metadata_version") fi else From ee36f5f84c7ea9c6d3ab512377f732883fe6c3b4 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 1 Jul 2026 09:00:36 -0400 Subject: [PATCH 4/4] suricata: verify reloaded ruleset is newer than the rules file Treating an in-progress reload as instant success could report success while Suricata was still running a stale ruleset (the in-flight reload may have started before the new all-rulesets.rules was written). Make success conditional on Suricata actually having loaded the current ruleset: capture the rules-file mtime up front, trigger a blocking reload-rules, then query ruleset-reload-time and only succeed when last_reload >= mtime. An in-progress reload now retries (waits for it to clear so our own fresh reload runs) instead of short-circuiting, and a ruleset that never catches up within the retry window fails via fail(). Also drop the redundant ruleset-reload-nonblocking call (the verified blocking reload is authoritative and the async call was what left a reload running) and log human-readable timestamps. --- .../tools/sbin/so-suricata-reload-rules | 53 +++++++++++++++---- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/salt/suricata/tools/sbin/so-suricata-reload-rules b/salt/suricata/tools/sbin/so-suricata-reload-rules index aec6bc966..b966e4bc0 100644 --- a/salt/suricata/tools/sbin/so-suricata-reload-rules +++ b/salt/suricata/tools/sbin/so-suricata-reload-rules @@ -7,19 +7,50 @@ . /usr/sbin/so-common -reload_suricata_rules() { - # $1 = suricatasc command (reload-rules | ruleset-reload-nonblocking) - local output - output=$(docker exec so-suricata /opt/suricata/bin/suricatasc -c "$1" /var/run/suricata/suricata-command.socket) - echo "$output" - # A reload already running is fine — the new rules get picked up by it. - if [[ "$output" =~ "Reload already in progress" ]]; then - echo "A rule reload is already in progress; treating as success." +RULES_FILE="/opt/so/rules/suricata/all-rulesets.rules" +SOCKET="/var/run/suricata/suricata-command.socket" +SURICATASC="docker exec so-suricata /opt/suricata/bin/suricatasc" + +# Epoch mtime of the ruleset we need Suricata to have loaded. Captured once so a +# file update mid-reload does not move the goalpost. +target_mtime=$(stat -c %Y "$RULES_FILE") || fail "Could not stat the Suricata rules file: $RULES_FILE" + +# Format an epoch as a human-readable local timestamp for log messages. +fmt_time() { date -d "@$1" '+%Y-%m-%d %H:%M:%S %Z' 2>/dev/null; } + +# Epoch of Suricata's last *completed* ruleset reload; non-zero return on failure. +suricata_reload_epoch() { + local out ts + out=$($SURICATASC -c ruleset-reload-time "$SOCKET" 2>/dev/null) + ts=$(echo "$out" | jq -r '.message[0].last_reload // empty' 2>/dev/null) + [ -n "$ts" ] || return 1 + date -d "$ts" +%s 2>/dev/null +} + +# Trigger a fresh reload and confirm Suricata is running a ruleset at least as new +# as the rules file. Returns 0 only when both hold, so retry keeps going until an +# in-progress reload clears and our own reload completes. +reload_and_verify() { + local out reload_epoch + out=$($SURICATASC -c reload-rules "$SOCKET") + echo "reload-rules: $out" + + if [[ "$out" =~ "Reload already in progress" ]]; then + echo "A reload is already in progress; waiting for it to clear so a fresh reload can load the current ruleset." + return 1 + fi + if [[ ! "$out" =~ '{"message":"done","return":"OK"}' ]]; then + echo "Suricata not ready or unexpected reload output; will retry." + return 1 + fi + + reload_epoch=$(suricata_reload_epoch) || { echo "Could not read ruleset-reload-time; will retry."; return 1; } + if [ "$reload_epoch" -ge "$target_mtime" ]; then + echo "Loaded ruleset is current: last reload ($(fmt_time "$reload_epoch")) is newer than rules file ($(fmt_time "$target_mtime"))." return 0 fi - [[ "$output" =~ '{"message":"done","return":"OK"}' ]] && return 0 + echo "Loaded ruleset is stale: last reload ($(fmt_time "$reload_epoch")) is older than rules file ($(fmt_time "$target_mtime")); retrying." return 1 } -retry 60 3 'reload_suricata_rules reload-rules' || fail "The Suricata container was not ready in time." -retry 60 3 'reload_suricata_rules ruleset-reload-nonblocking' || fail "The Suricata container was not ready in time." +retry 60 3 'reload_and_verify' || fail "Suricata did not load the current ruleset in time."