suricata: only reload rules once the ruleset file exists

On a fresh install the surirulesync file.recurse creates .gitkeep before SOC has generated all-rulesets.rules. That change satisfied the surirulereload onchanges requisite, so the reload ran with no ruleset present, failed to stat the file, and reported the state (and install) as failed. Add an onlyif guard so the reload only runs when all-rulesets.rules exists. A .gitkeep-only sync now leaves the state a clean success (onlyif condition false); once SOC writes the ruleset, the reload fires normally.
suricata: timestamp each line of reload log output
2026-07-04 08:08:27 +02:00 · 2026-07-01 15:12:54 -04:00 · 2026-07-01 15:12:53 -04:00 · 2026-07-01 09:00:36 -04:00 · 2026-06-30 09:40:23 -04:00 · 2026-06-26 13:47:09 -05:00
7 changed files with 179 additions and 81 deletions
@@ -291,6 +291,20 @@ download_and_verify() {
 	fi
 }

+# check if container with name is running and optionally stop it
+docker_check_running() {
+    # show running containers, only names
+    if docker ps --format '{{.Names}}' | grep -q "^so-${1}$"; then
+        if [[ "$2" == "--stop" ]]; then
+            docker stop "so-${1}"
+        fi
+
+        return 0
+    else
+        return 1
+    fi
+}
+
 elastic_license() {

 read -r -d '' message <<- EOM
@@ -5,27 +5,41 @@
 # https://securityonion.net/license; you may not use this file except in compliance with the
 # Elastic License 2.0.

-
-
-# Usage: so-restart  kibana | playbook 
-
 . /usr/sbin/so-common

-if [ $# -ge 1 ]; then
+usage() {
+    echo "Usage: $0 <component> [args]"
+    echo ""
+    echo "Supported args:"
+    echo "  --force | -f          Force stop all Salt jobs before starting component."
+    echo ""
+    echo "Examples:"
+    echo "  $0 kibana             Restart Kibana"
+    echo "  $0 kibana --force     Force stop all Salt jobs before restarting Kibana"
+    exit 1
+}

-        echo $banner
-        printf "Restarting $1...\n\nThis could take a while if another Salt job is running. \nRun this command with --force to stop all Salt jobs before proceeding.\n"
-        echo $banner
-
-        if [ "$2" = "--force" ]; then
-                printf "\nForce-stopping all Salt jobs before proceeding\n\n"
-                salt-call saltutil.kill_all_jobs
-        fi
-
-        case $1 in
-                "elastic-fleet") docker stop so-elastic-fleet && docker rm so-elastic-fleet && salt-call state.apply elasticfleet queue=True;;
-                *)  docker stop so-$1 ; docker rm so-$1 ; salt-call state.apply $1 queue=True;;
-        esac
-else
-        echo -e "\nPlease provide an argument by running like so-restart $component, or by using the component-specific script.\nEx. so-restart logstash, or so-logstash-restart\n"
+if [[ $# -lt 1 ]]; then
+    usage
 fi
+
+#shellcheck disable=SC2154
+echo "$banner"
+printf "Restarting %s...\n\nThis could take a while if another Salt job is running. \nRun this command with --force to stop all Salt jobs before proceeding.\n" "$1"
+echo "$banner"
+if [[ "$2" = "--force" ]] || [[ "$2" = "-f" ]]; then
+    printf "\nForce-stopping all Salt jobs before proceeding\n\n"
+    salt-call saltutil.kill_all_jobs
+fi
+case $1 in
+    "elastic-fleet"|"elasticfleet")
+        docker_check_running "elastic-fleet" "--stop"
+        docker rm "so-elastic-fleet" 2> /dev/null
+        salt-call state.apply elasticfleet queue=True
+        ;;
+    *)
+        docker_check_running "$1" "--stop"
+        docker rm "so-${1}" 2> /dev/null
+        salt-call state.apply "$1" queue=True
+        ;;
+esac
@@ -5,27 +5,54 @@
 # https://securityonion.net/license; you may not use this file except in compliance with the
 # Elastic License 2.0.

-
-
-# Usage: so-start  all | kibana | playbook 
-
+# shellcheck disable=SC1091
 . /usr/sbin/so-common

-if [ $# -ge 1 ]; then
-	echo $banner
-	printf "Starting $1...\n\nThis could take a while if another Salt job is running. \nRun this command with --force to stop all Salt jobs before proceeding.\n"
-	echo $banner
+usage() {
+    echo "Usage: $0 <component> [args]"
+    echo ""
+    echo "Supported args:"
+    echo "  --force | -f          Force stop all Salt jobs before starting component."
+    echo ""
+    echo "Examples:"
+    echo "  $0 kibana             Start Kibana"
+    echo "  $0 kibana --force     Force stop all Salt jobs before starting Kibana"
+    exit 1
+}

-	if [ "$2" = "--force" ]; then
-   		printf "\nForce-stopping all Salt jobs before proceeding\n\n"
-   		salt-call saltutil.kill_all_jobs
-	fi
-
-	case $1 in
-   		"all") salt-call state.highstate queue=True;;
-   		"elastic-fleet") if docker ps | grep -q so-$1; then printf "\n$1 is already running!\n\n"; else docker rm so-$1 >/dev/null 2>&1 ; salt-call state.apply elasticfleet queue=True; fi ;; 
-   		*) if docker ps | grep -E -q '^so-$1$'; then printf "\n$1 is already running\n\n"; else docker rm so-$1 >/dev/null 2>&1 ; salt-call state.apply $1 queue=True; fi ;; 
-	esac
-else
-	echo -e "\nPlease provide an argument by running like so-start $component, or by using the component-specific script.\nEx. so-start logstash, or so-logstash-start\n"	
+if [[ $# -lt 1 ]]; then
+    usage
 fi
+
+#shellcheck disable=SC2154
+echo "$banner"
+printf "Starting %s...\n\nThis could take a while if another Salt job is running. \nRun this command with --force to stop all Salt jobs before proceeding.\n" "$1"
+echo "$banner"
+if [[ "$2" = "--force" ]] || [[ "$2" == "-f" ]]; then
+	printf "\nForce-stopping all Salt jobs before proceeding\n\n"
+	salt-call saltutil.kill_all_jobs
+fi
+
+case "$1" in
+    "all")
+        salt-call state.highstate queue=True
+        ;;
+   	"elastic-fleet"|"elasticfleet")
+        if docker_check_running "elastic-fleet"; then
+            printf "\nso-%s is already running!\n\n" "elastic-fleet"
+            /usr/sbin/so-status
+        else
+            docker rm "so-elastic-fleet" 2> /dev/null
+            salt-call state.apply elasticfleet queue=True
+        fi
+        ;;
+   	*)
+        if docker_check_running "$1"; then
+            printf "\nso-%s is already running\n\n" "$1"
+            /usr/sbin/so-status
+        else
+            docker rm "so-${1}" 2> /dev/null
+            salt-call state.apply "$1" queue=True
+        fi
+        ;;
+esac
@@ -5,21 +5,33 @@
 # https://securityonion.net/license; you may not use this file except in compliance with the
 # Elastic License 2.0.

-
-
-# Usage: so-stop kibana | playbook | thehive
-
+# shellcheck disable=SC1091
 . /usr/sbin/so-common

-if [ $# -ge 1 ]; then
-	echo $banner
-	printf "Stopping $1...\n"
-	echo $banner
+usage() {
+    echo "Usage: $0 <component>"
+    echo ""
+    echo "Examples:"
+    echo "  $0 kibana             Stop Kibana"
+    exit 1
+}

-	case $1 in
-    		*) docker stop so-$1 ; docker rm so-$1 ;;
-	esac
-else
-	echo -e "\nPlease provide an argument by running like so-stop $component, or by using the component-specific script.\nEx. so-stop logstash, or so-logstash-stop\n"	
+if [[ $# -lt 1 ]]; then
+    usage
 fi

+
+#shellcheck disable=SC2154
+echo "$banner"
+printf "Stopping %s...\n" "$1"
+echo "$banner"
+case $1 in
+	"elasticfleet"|"elastic-fleet")
+        docker_check_running "elastic-fleet" "--stop"
+        docker rm "so-elastic-fleet" 2> /dev/null
+        ;;
+    *)
+        docker_check_running "$1" "--stop"
+        docker rm "so-${1}" 2> /dev/null
+    ;;
+esac
@@ -850,28 +850,6 @@ kibana_backport_streams_index_template() {

 }

-# Runs kafka-features.sh upgrade --release-version $1
-#   Upgrades Kafka KRaft cluster metadata
-update_kafka_metadata() {
-    metadata_version="$1"
-    global_pillar="/opt/so/saltstack/local/pillar/global/soc_global.sls"
-    if PIPELINE=$(so-yaml.py get -r "$global_pillar" global.pipeline 2> /dev/null) && [[ "$PIPELINE" == "KAFKA" ]]; then
-        kafka_nodes_raw=$(salt-call pillar.get kafka:nodes --out=json)
-        if kafka_nodes=$(jq -er '.local | select(type == "object" and length > 0)' <<< "$kafka_nodes_raw"); then
-            bootstrap_servers=$(jq -r '[to_entries[] | select(.value.role | contains("broker")) | "\(.value.ip):9092"] | join(",")' <<< "$kafka_nodes")
-            echo "Upgrading Kafka KRaft cluster version"
-            so-kafka-cli kafka-features.sh --bootstrap-server "$bootstrap_servers" --command-config /opt/kafka/config/kraft/client.properties upgrade --release-version "$metadata_version" 2>/dev/null || true
-
-            return 0
-        else
-            FINAL_MESSAGE_QUEUE+=("WARNING: Unable to automatically perform Kafka KRaft cluster metadata update. This step can be performed manually using the following command (replacing \$BROKER_IP with the ip of atleast 1 available Kafka broker):")
-            FINAL_MESSAGE_QUEUE+=("    - so-kafka-cli kafka-features.sh --bootstrap-server \$BROKER_IP:9092 --command-config /opt/kafka/config/kraft/client.properties upgrade --release-version $metadata_version")
-        fi
-    else
-        echo "Nothing to do!"
-    fi
-}
-
 up_to_3.2.0() {
  fix_logstash_0013_lumberjack_pipeline_name

@@ -889,8 +867,6 @@ post_to_3.2.0() {

  kibana_backport_streams_index_template

-  update_kafka_metadata "4.3"
-
  POSTVERSION=3.2.0
 }

@@ -65,10 +65,11 @@ so-suricata:
      - file: suriclassifications

 surirulereload:
-  cmd.run: 
+  cmd.run:
    - name: /usr/sbin/so-suricata-reload-rules >> /opt/so/log/suricata/reload.log 2>&1
-    - onchanges: 
+    - onchanges:
        - file: surirulesync
+    - onlyif: test -f /opt/so/rules/suricata/all-rulesets.rules
    - require:
        - docker_container: so-suricata
    
@@ -7,5 +7,59 @@

 . /usr/sbin/so-common

-retry 60 3 'docker exec so-suricata /opt/suricata/bin/suricatasc -c reload-rules /var/run/suricata/suricata-command.socket' '{"message":"done","return":"OK"}' || fail "The Suricata container was not ready in time."
-retry 60 3 'docker exec so-suricata /opt/suricata/bin/suricatasc -c ruleset-reload-nonblocking /var/run/suricata/suricata-command.socket' '{"message":"done","return":"OK"}' || fail "The Suricata container was not ready in time."
+RULES_FILE="/opt/so/rules/suricata/all-rulesets.rules"
+SOCKET="/var/run/suricata/suricata-command.socket"
+SURICATASC="docker exec so-suricata /opt/suricata/bin/suricatasc"
+
+# Format an epoch as a human-readable local timestamp for log messages.
+fmt_time() { date -d "@$1" '+%Y-%m-%d %H:%M:%S %Z' 2>/dev/null; }
+
+# Prefix each input line with the current timestamp.
+timestamp_lines() { while IFS= read -r line; do printf '%s %s\n' "$(date '+%Y-%m-%d %H:%M:%S %Z')" "$line"; done; }
+
+# Epoch of Suricata's last *completed* ruleset reload; non-zero return on failure.
+suricata_reload_epoch() {
+  local out ts
+  out=$($SURICATASC -c ruleset-reload-time "$SOCKET" 2>/dev/null)
+  ts=$(echo "$out" | jq -r '.message[0].last_reload // empty' 2>/dev/null)
+  [ -n "$ts" ] || return 1
+  date -d "$ts" +%s 2>/dev/null
+}
+
+# Trigger a fresh reload and confirm Suricata is running a ruleset at least as new
+# as the rules file. Returns 0 only when both hold, so retry keeps going until an
+# in-progress reload clears and our own reload completes.
+reload_and_verify() {
+  local out reload_epoch
+  out=$($SURICATASC -c reload-rules "$SOCKET")
+  echo "reload-rules: $out"
+
+  if [[ "$out" =~ "Reload already in progress" ]]; then
+    echo "A reload is already in progress; waiting for it to clear so a fresh reload can load the current ruleset."
+    return 1
+  fi
+  if [[ ! "$out" =~ '{"message":"done","return":"OK"}' ]]; then
+    echo "Suricata not ready or unexpected reload output; will retry."
+    return 1
+  fi
+
+  reload_epoch=$(suricata_reload_epoch) || { echo "Could not read ruleset-reload-time; will retry."; return 1; }
+  if [ "$reload_epoch" -ge "$target_mtime" ]; then
+    echo "Loaded ruleset is current: last reload ($(fmt_time "$reload_epoch")) is newer than rules file ($(fmt_time "$target_mtime"))."
+    return 0
+  fi
+  echo "Loaded ruleset is stale: last reload ($(fmt_time "$reload_epoch")) is older than rules file ($(fmt_time "$target_mtime")); retrying."
+  return 1
+}
+
+# Run the reload/verify, timestamping every line of output (ours and the
+# retry/fail helpers') so reload.log shows when each step ran. The pipeline is
+# synchronous, so the log is fully flushed and ordered before we exit; the
+# script's real exit code is preserved via PIPESTATUS.
+{
+  # Epoch mtime of the ruleset we need Suricata to have loaded. Captured once so
+  # a file update mid-reload does not move the goalpost.
+  target_mtime=$(stat -c %Y "$RULES_FILE") || fail "Could not stat the Suricata rules file: $RULES_FILE"
+  retry 60 3 'reload_and_verify' || fail "Suricata did not load the current ruleset in time."
+} 2>&1 | timestamp_lines
+exit "${PIPESTATUS[0]}"
Author	SHA1	Message	Date
Josh Patterson	795aa898a3	suricata: only reload rules once the ruleset file exists On a fresh install the surirulesync file.recurse creates .gitkeep before SOC has generated all-rulesets.rules. That change satisfied the surirulereload onchanges requisite, so the reload ran with no ruleset present, failed to stat the file, and reported the state (and install) as failed. Add an onlyif guard so the reload only runs when all-rulesets.rules exists. A .gitkeep-only sync now leaves the state a clean success (onlyif condition false); once SOC writes the ruleset, the reload fires normally.	2026-07-01 15:12:54 -04:00
Josh Patterson	69d77382f1	suricata: timestamp each line of reload log output Route the reload/verify output (ours plus so-common's retry/fail lines) through a synchronous timestamping pipeline so every line in reload.log is prefixed with a date/time, and preserve the real exit code via PIPESTATUS.	2026-07-01 15:12:53 -04:00
Josh Patterson	ee36f5f84c	suricata: verify reloaded ruleset is newer than the rules file Treating an in-progress reload as instant success could report success while Suricata was still running a stale ruleset (the in-flight reload may have started before the new all-rulesets.rules was written). Make success conditional on Suricata actually having loaded the current ruleset: capture the rules-file mtime up front, trigger a blocking reload-rules, then query ruleset-reload-time and only succeed when last_reload >= mtime. An in-progress reload now retries (waits for it to clear so our own fresh reload runs) instead of short-circuiting, and a ruleset that never catches up within the retry window fails via fail(). Also drop the redundant ruleset-reload-nonblocking call (the verified blocking reload is authoritative and the async call was what left a reload running) and log human-readable timestamps.	2026-07-01 09:00:36 -04:00
Josh Patterson	52574e21c6	suricata: treat in-progress rule reload as success so-suricata-reload-rules failed the surirulereload state when a rule reload was already running: suricatasc returns {"message":"Reload already in progress","return":"NOK"}, which never matched the expected output, so retry looped all 60 attempts (~3 min) and called fail. Wrap the suricatasc calls so an in-progress reload is treated as success (the in-flight reload picks up the new rules) while genuine container-not-ready conditions still retry and ultimately fail.	2026-06-30 09:40:23 -04:00
Jorge Reyes	576c7bfedd	Merge pull request #16013 from Security-Onion-Solutions/reyesj2/so-start update so-stop \| so-start \| so-restart scripts	2026-06-26 13:47:09 -05:00
reyesj2	b3b7ecdded	update so-stop \| so-start \| so-restart scripts	2026-06-26 13:19:18 -05:00