add --certs flag to update certs. Used with --force, to ensure certs are updated even if hosts update isn't needed

2025-12-06 09:12:45 +01:00 · 2025-11-25 16:16:19 -06:00
16 changed files with 109 additions and 100 deletions
--- a/salt/common/tools/sbin/so-common
+++ b/salt/common/tools/sbin/so-common
@@ -395,7 +395,7 @@ is_manager_node() {
 }

 is_sensor_node() {
-	# Check to see if this is a sensor node
+	# Check to see if this is a sensor (forward) node
 	is_single_node_grid && return 0
 	grep "role: so-" /etc/salt/grains | grep -E "sensor|heavynode" &> /dev/null
 }
--- a/salt/common/tools/sbin/so-image-common
+++ b/salt/common/tools/sbin/so-image-common
@@ -62,6 +62,8 @@ container_list() {
      "so-soc"
      "so-steno"
      "so-strelka-backend"
+      "so-strelka-filestream"
+      "so-strelka-frontend"
      "so-strelka-manager"
      "so-suricata"
      "so-telegraf"
--- a/salt/elasticfleet/enabled.sls
+++ b/salt/elasticfleet/enabled.sls
@@ -32,6 +32,16 @@ so-elastic-fleet-auto-configure-logstash-outputs:
    - retry:
        attempts: 4
        interval: 30
+
+{# Separate from above in order to catch elasticfleet-logstash.crt changes and force update to fleet output policy #}
+so-elastic-fleet-auto-configure-logstash-outputs-force:
+  cmd.run:
+    - name: /usr/sbin/so-elastic-fleet-outputs-update --force --certs
+    - retry:
+        attempts: 4
+        interval: 30
+    - onchanges:
+        - x509: etc_elasticfleet_logstash_crt
 {% endif %}

 # If enabled, automatically update Fleet Server URLs & ES Connection
--- a/salt/elasticfleet/install_agent_grid.sls
+++ b/salt/elasticfleet/install_agent_grid.sls
@@ -2,10 +2,8 @@
 # or more contributor license agreements. Licensed under the Elastic License 2.0; you may not use
 # this file except in compliance with the Elastic License 2.0.

-{% set GRIDNODETOKEN = salt['pillar.get']('global:fleet_grid_enrollment_token_general') -%}
-{% if grains.role == 'so-heavynode' %}
-{%   set GRIDNODETOKEN = salt['pillar.get']('global:fleet_grid_enrollment_token_heavy') -%}
-{% endif %}
+{%- set GRIDNODETOKENGENERAL = salt['pillar.get']('global:fleet_grid_enrollment_token_general') -%}
+{%- set GRIDNODETOKENHEAVY = salt['pillar.get']('global:fleet_grid_enrollment_token_heavy') -%}

 {% set AGENT_STATUS = salt['service.available']('elastic-agent') %}
 {% if not AGENT_STATUS  %}
@@ -17,13 +15,19 @@ pull_agent_installer:
    - mode: 755
    - makedirs: True

+{% if grains.role not in ['so-heavynode'] %}
 run_installer:
  cmd.run:
-    - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKEN }}
+    - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKENGENERAL }}
    - cwd: /opt/so
-    - retry:
-        attempts: 3
-        interval: 20
+    - retry: True
+{% else %} 
+run_installer:
+  cmd.run:
+    - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKENHEAVY }}
+    - cwd: /opt/so
+    - retry: True
+{% endif %}  

 cleanup_agent_installer:
  file.absent:
--- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update
+++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update
@@ -8,6 +8,27 @@

 . /usr/sbin/so-common

+FORCE_UPDATE=false
+UPDATE_CERTS=false
+
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    -f|--force)
+      FORCE_UPDATE=true
+      shift
+      ;;
+    -c| --certs)
+      UPDATE_CERTS=true
+      shift
+      ;;
+    *)
+      echo "Unknown option $1"
+      echo "Usage: $0 [-f|--force] [-c|--certs]"
+      exit 1
+      ;;
+  esac
+done
+
 # Only run on Managers
 if ! is_manager_node; then
    printf "Not a Manager Node... Exiting"
@@ -17,17 +38,42 @@ fi
 function update_logstash_outputs() {
    if logstash_policy=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "http://localhost:5601/api/fleet/outputs/so-manager_logstash" --retry 3 --retry-delay 10 --fail 2>/dev/null); then
        SSL_CONFIG=$(echo "$logstash_policy" | jq -r '.item.ssl')
+        LOGSTASHKEY=$(openssl rsa -in  /etc/pki/elasticfleet-logstash.key)
+        LOGSTASHCRT=$(openssl x509 -in /etc/pki/elasticfleet-logstash.crt)
+        LOGSTASHCA=$(openssl x509 -in  /etc/pki/tls/certs/intca.crt)
        if SECRETS=$(echo "$logstash_policy" | jq -er '.item.secrets' 2>/dev/null); then
+            if [[ "$UPDATE_CERTS" != "true"  ]]; then
+                # Reuse existing secret
                JSON_STRING=$(jq -n \
                    --arg UPDATEDLIST "$NEW_LIST_JSON" \
                    --argjson SECRETS "$SECRETS" \
                    --argjson SSL_CONFIG "$SSL_CONFIG" \
                    '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl": $SSL_CONFIG,"secrets": $SECRETS}')
            else
+                # Update certs, creating new secret
+                JSON_STRING=$(jq -n \
+                    --arg UPDATEDLIST "$NEW_LIST_JSON" \
+                    --arg LOGSTASHKEY "$LOGSTASHKEY" \
+                    --arg LOGSTASHCRT "$LOGSTASHCRT" \
+                    --arg LOGSTASHCA "$LOGSTASHCA" \
+                    '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl": {"certificate": $LOGSTASHCRT,"certificate_authorities":[ $LOGSTASHCA ]},"secrets": {"ssl":{"key": $LOGSTASHKEY }}}')
+            fi
+        else
+            if [[ "$UPDATE_CERTS" != "true" ]]; then
+                # Reuse existing ssl config
                JSON_STRING=$(jq -n \
                    --arg UPDATEDLIST "$NEW_LIST_JSON" \
                    --argjson SSL_CONFIG "$SSL_CONFIG" \
                    '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl": $SSL_CONFIG}')
+            else
+                # Update ssl config
+                JSON_STRING=$(jq -n \
+                    --arg UPDATEDLIST "$NEW_LIST_JSON" \
+                    --arg LOGSTASHKEY "$LOGSTASHKEY" \
+                    --arg LOGSTASHCRT "$LOGSTASHCRT" \
+                    --arg LOGSTASHCA "$LOGSTASHCA" \
+                    '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl": {"certificate": $LOGSTASHCRT,"key": $LOGSTASHKEY,"certificate_authorities":[ $LOGSTASHCA ]}}')
+            fi
        fi
    fi

@@ -151,7 +197,7 @@ NEW_LIST_JSON=$(jq --compact-output --null-input '$ARGS.positional' --args -- "$
 NEW_HASH=$(sha1sum <<< "$NEW_LIST_JSON" | awk '{print $1}')

 # Compare the current & new list of outputs - if different, update the Logstash outputs
-if [ "$NEW_HASH" = "$CURRENT_HASH" ]; then
+if [[ "$NEW_HASH" = "$CURRENT_HASH" ]] && [[ "$FORCE_UPDATE" != "true" ]]; then
    printf "\nHashes match - no update needed.\n"
    printf "Current List: $CURRENT_LIST\nNew List: $NEW_LIST_JSON\n"

--- a/salt/elasticsearch/tools/sbin/so-elasticsearch-retention-estimate
+++ b/salt/elasticsearch/tools/sbin/so-elasticsearch-retention-estimate
@@ -41,13 +41,13 @@ create_temp_file() {
 }

 log_title() {
-    if [ "$1" == "LOG" ]; then
+    if [ $1 == "LOG" ]; then
        echo -e "\n${BOLD}================ $2 ================${NC}\n"
-    elif [ "$1" == "OK" ]; then
+    elif [ $1 == "OK" ]; then
        echo -e "${GREEN} $2 ${NC}"
-    elif [ "$1" == "WARN" ]; then
+    elif [ $1 == "WARN" ]; then
        echo -e "${YELLOW} $2 ${NC}"
-    elif [ "$1" == "ERROR" ]; then
+    elif [ $1 == "ERROR" ]; then
        echo -e "${RED} $2 ${NC}"
    fi
 }
@@ -756,7 +756,7 @@ if [ "$should_trigger_recommendations" = true ]; then

            ilm_output=$(so-elasticsearch-query "${index}/_ilm/explain" --fail 2>/dev/null) || true
            if [ -n "$ilm_output" ]; then
-                policy=$(echo "$ilm_output" | jq --arg idx "$index" -r ".indices[$idx].policy // empty" 2>/dev/null)
+                policy=$(echo "$ilm_output" | jq -r ".indices.\"$index\".policy // empty" 2>/dev/null)
            fi
            if [ -n "$policy" ] && [ -n "${policy_ages[$policy]:-}" ]; then
                delete_min_age=${policy_ages[$policy]}
@@ -1134,9 +1134,8 @@ else
            for i in "${!scheduled_indices_names[@]}"; do
                sorted_indices+=("${scheduled_indices_days[$i]}|${scheduled_indices_names[$i]}|${scheduled_indices_sizes[$i]}")
            done
-            OLD_IFS="$IFS"
            IFS=$'\n' sorted_indices=($(sort -t'|' -k1 -n <<<"${sorted_indices[*]}"))
-            IFS="$OLD_IFS"
+            unset IFS

            for entry in "${sorted_indices[@]}"; do
                IFS='|' read -r days_until index_name size_bytes <<< "$entry"
--- a/salt/manager/tools/sbin/soup
+++ b/salt/manager/tools/sbin/soup
@@ -23,7 +23,6 @@ TOPFILE=/opt/so/saltstack/default/salt/top.sls
 BACKUPTOPFILE=/opt/so/saltstack/default/salt/top.sls.backup
 SALTUPGRADED=false
 SALT_CLOUD_INSTALLED=false
-SALT_CLOUD_CONFIGURED=false
 # used to display messages to the user at the end of soup
 declare -a FINAL_MESSAGE_QUEUE=()

@@ -1271,10 +1270,6 @@ upgrade_salt() {
    if rpm -q salt-cloud &>/dev/null; then
      SALT_CLOUD_INSTALLED=true
    fi
-    # Check if salt-cloud is configured
-    if [[ -f /etc/salt/cloud.profiles.d/socloud.conf ]]; then
-      SALT_CLOUD_CONFIGURED=true
-    fi
    
    echo "Removing yum versionlock for Salt."
    echo ""
@@ -1592,7 +1587,7 @@ main() {
    # ensure the mine is updated and populated before highstates run, following the salt-master restart
    update_salt_mine

-    if [[ $SALT_CLOUD_CONFIGURED == true && $SALTUPGRADED == true ]]; then
+    if [[ $SALT_CLOUD_INSTALLED == true && $SALTUPGRADED == true ]]; then
      echo "Updating salt-cloud config to use the new Salt version"
      salt-call state.apply salt.cloud.config concurrent=True
    fi
@@ -1679,7 +1674,7 @@ This appears to be a distributed deployment. Other nodes should update themselve

 Each minion is on a random 15 minute check-in period and things like network bandwidth can be a factor in how long the actual upgrade takes. If you have a heavy node on a slow link, it is going to take a while to get the containers to it. Depending on what changes happened between the versions, Elasticsearch might not be able to talk to said heavy node until the update is complete.

-If it looks like you’re missing data after the upgrade, please avoid restarting services and instead make sure at least one search node has completed its upgrade. The best way to do this is to run 'sudo salt-call state.highstate' from a search node and make sure there are no errors. Typically if it works on one node it will work on the rest. Sensor nodes are less complex and will update as they check in so you can monitor those from the Grid section of SOC.
+If it looks like you’re missing data after the upgrade, please avoid restarting services and instead make sure at least one search node has completed its upgrade. The best way to do this is to run 'sudo salt-call state.highstate' from a search node and make sure there are no errors. Typically if it works on one node it will work on the rest. Forward nodes are less complex and will update as they check in so you can monitor those from the Grid section of SOC.

 For more information, please see $DOC_BASE_URL/soup.html#distributed-deployments.

--- a/salt/pcap/soc_pcap.yaml
+++ b/salt/pcap/soc_pcap.yaml
@@ -7,7 +7,7 @@ pcap:
      description: By default, Stenographer limits the number of files in the pcap directory to 30000 to avoid limitations with the ext3 filesystem. However, if you're using the ext4 or xfs filesystems, then it is safe to increase this value. So if you have a large amount of storage and find that you only have 3 weeks worth of PCAP on disk while still having plenty of free space, then you may want to increase this default setting.
      helpLink: stenographer.html
    diskfreepercentage:
-      description: Stenographer will purge old PCAP on a regular basis to keep the disk free percentage at this level. If you have a distributed deployment with dedicated Sensor nodes, then the default value of 10 should be reasonable since Stenographer should be the main consumer of disk space in the /nsm partition. However, if you have systems that run both Stenographer and Elasticsearch at the same time (like eval and standalone installations), then you’ll want to make sure that this value is no lower than 21 so that you avoid Elasticsearch hitting its watermark setting at 80% disk usage. If you have an older standalone installation, then you may need to manually change this value to 21.
+      description: Stenographer will purge old PCAP on a regular basis to keep the disk free percentage at this level. If you have a distributed deployment with dedicated forward nodes, then the default value of 10 should be reasonable since Stenographer should be the main consumer of disk space in the /nsm partition. However, if you have systems that run both Stenographer and Elasticsearch at the same time (like eval and standalone installations), then you’ll want to make sure that this value is no lower than 21 so that you avoid Elasticsearch hitting its watermark setting at 80% disk usage. If you have an older standalone installation, then you may need to manually change this value to 21.
      helpLink: stenographer.html
    blocks:
      description: The number of 1MB packet blocks used by Stenographer and AF_PACKET to store packets in memory, per thread. You shouldn't need to change this. 
--- a/salt/salt/cloud/config.sls
+++ b/salt/salt/cloud/config.sls
@@ -36,11 +36,6 @@ cloud_profiles:
        SALTVERSION: {{ SALTVERSION }}
    - template: jinja
    - makedirs: True
-{%     else %}
-no_hypervisors_configured:
-  test.succeed_without_changes:
-    - name: no_hypervisors_configured
-    - comment: No hypervisors are configured
 {%     endif %}

 {%   else %}
--- a/salt/soc/defaults.yaml
+++ b/salt/soc/defaults.yaml
@@ -2552,27 +2552,9 @@ soc:
        assistant:
          enabled: false
          investigationPrompt: Investigate Alert ID {socId}
+          contextLimitSmall: 200000
+          contextLimitLarge: 1000000
          thresholdColorRatioLow: 0.5
          thresholdColorRatioMed: 0.75
          thresholdColorRatioMax: 1
-          availableModels:
-            - id: sonnet-4
-              displayName: Claude Sonnet 4
-              contextLimitSmall: 200000
-              contextLimitLarge: 1000000
-              lowBalanceColorAlert: 500000
-            - id: sonnet-4.5
-              displayName: Claude Sonnet 4.5
-              contextLimitSmall: 200000
-              contextLimitLarge: 1000000
-              lowBalanceColorAlert: 500000
-            - id: gptoss-120b
-              displayName: GPT-OSS 120B
-              contextLimitSmall: 128000
-              contextLimitLarge: 128000
-              lowBalanceColorAlert: 500000
-            - id: qwen-235b
-              displayName: QWEN 235B
-              contextLimitSmall: 256000
-              contextLimitLarge: 256000
          lowBalanceColorAlert: 500000
--- a/salt/soc/soc_soc.yaml
+++ b/salt/soc/soc_soc.yaml
@@ -606,6 +606,14 @@ soc:
          investigationPrompt: 
            description: Prompt given to Onion AI when beginning an investigation.
            global: True
+          contextLimitSmall:
+            description: Smaller context limit for Onion AI.
+            global: True
+            advanced: True
+          contextLimitLarge:
+            description: Larger context limit for Onion AI.
+            global: True
+            advanced: True
          thresholdColorRatioLow:
            description: Lower visual context color change threshold.
            global: True
@@ -622,32 +630,6 @@ soc:
            description: Onion AI credit amount at which balance turns red.
            global: True
            advanced: True
-          availableModels:
-            description: List of AI models available for use in SOC as well as model specific warning thresholds.
-            global: True
-            advanced: True
-            forcedType: "[]{}"
-            helpLink: assistant.html
-            syntax: json
-            uiElements:
-            - field: id
-              label: Model ID
-              required: True
-            - field: displayName
-              label: Display Name
-              required: True
-            - field: contextLimitSmall
-              label: Context Limit (Small)
-              forcedType: int
-              required: True
-            - field: contextLimitLarge
-              label: Context Limit (Large)
-              forcedType: int
-              required: True
-            - field: lowBalanceColorAlert
-              label: Low Balance Color Alert
-              forcedType: int
-              required: True
        apiTimeoutMs:
          description: Duration (in milliseconds) to wait for a response from the SOC server API before giving up and showing an error on the SOC UI.
          global: True
--- a/salt/strelka/filestream/enabled.sls
+++ b/salt/strelka/filestream/enabled.sls
@@ -14,7 +14,7 @@ include:

 strelka_filestream:
  docker_container.running:
-    - image: {{ GLOBALS.registry_host }}:5000/{{ GLOBALS.image_repo }}/so-strelka-manager:{{ GLOBALS.so_version }}
+    - image: {{ GLOBALS.registry_host }}:5000/{{ GLOBALS.image_repo }}/so-strelka-filestream:{{ GLOBALS.so_version }}
    - binds:
      - /opt/so/conf/strelka/filestream/:/etc/strelka/:ro
      - /nsm/strelka:/nsm/strelka
--- a/salt/strelka/frontend/enabled.sls
+++ b/salt/strelka/frontend/enabled.sls
@@ -14,7 +14,7 @@ include:

 strelka_frontend:
  docker_container.running:
-    - image: {{ GLOBALS.registry_host }}:5000/{{ GLOBALS.image_repo }}/so-strelka-manager:{{ GLOBALS.so_version }}
+    - image: {{ GLOBALS.registry_host }}:5000/{{ GLOBALS.image_repo }}/so-strelka-frontend:{{ GLOBALS.so_version }}
    - binds:
      - /opt/so/conf/strelka/frontend/:/etc/strelka/:ro
      - /nsm/strelka/log/:/var/log/strelka/:rw
--- a/setup/so-functions
+++ b/setup/so-functions
@@ -1646,12 +1646,6 @@ reserve_ports() {
 	fi
 }

-clear_previous_setup_results() {
-	# Disregard previous setup outcomes.
-	rm -f /root/failure
-	rm -f /root/success
-}
-
 reinstall_init() {
 	info "Putting system in state to run setup again"

@@ -1663,6 +1657,10 @@ reinstall_init() {

 	local service_retry_count=20

+	# Disregard previous install outcomes
+	rm -f /root/failure
+	rm -f /root/success
+
 	{
 		# remove all of root's cronjobs
 		logCmd "crontab -r -u root"
--- a/setup/so-setup
+++ b/setup/so-setup
@@ -132,10 +132,6 @@ if [[ -f /root/accept_changes ]]; then
 	reset_proxy
 fi

-# Previous setup attempts, even if setup doesn't actually start the installation,
-# can leave behind results that may interfere with the current setup attempt.
-clear_previous_setup_results
-
 title "Parsing Username for Install"
 parse_install_username

--- a/setup/so-whiptail
+++ b/setup/so-whiptail
@@ -676,8 +676,8 @@ whiptail_install_type_dist_existing() {
 	EOM
 	
 	install_type=$(whiptail --title "$whiptail_title" --menu "$node_msg" 19 75 7 \
-		"SENSOR" "Add a Sensor Node for monitoring network traffic " \
-		"SEARCHNODE" "Add a Search Node with parsing " \
+		"SENSOR" "Create a forward only sensor " \
+		"SEARCHNODE" "Add a search node with parsing " \
 		"FLEET" "Dedicated Elastic Fleet Node " \
 		"HEAVYNODE" "Sensor + Search Node " \
 		"IDH" "Intrusion Detection Honeypot Node " \