diff --git a/pillar/data/addtotab.sh b/pillar/data/addtotab.sh deleted file mode 100644 index 65f9446dd..000000000 --- a/pillar/data/addtotab.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash - -# This script adds sensors/nodes/etc to the nodes tab -default_salt_dir=/opt/so/saltstack/default -local_salt_dir=/opt/so/saltstack/local -TYPE=$1 -NAME=$2 -IPADDRESS=$3 -CPUS=$4 -GUID=$5 -MANINT=$6 -ROOTFS=$7 -NSM=$8 -MONINT=$9 -#NODETYPE=$10 -#HOTNAME=$11 - -echo "Seeing if this host is already in here. If so delete it" -if grep -q $NAME "$local_salt_dir/pillar/data/$TYPE.sls"; then - echo "Node Already Present - Let's re-add it" - awk -v blah=" $NAME:" 'BEGIN{ print_flag=1 } -{ - if( $0 ~ blah ) - { - print_flag=0; - next - } - if( $0 ~ /^ [a-zA-Z0-9]+:$/ ) - { - print_flag=1; - } - if ( print_flag == 1 ) - print $0 - -} ' $local_salt_dir/pillar/data/$TYPE.sls > $local_salt_dir/pillar/data/tmp.$TYPE.sls -mv $local_salt_dir/pillar/data/tmp.$TYPE.sls $local_salt_dir/pillar/data/$TYPE.sls -echo "Deleted $NAME from the tab. Now adding it in again with updated info" -fi -echo " $NAME:" >> $local_salt_dir/pillar/data/$TYPE.sls -echo " ip: $IPADDRESS" >> $local_salt_dir/pillar/data/$TYPE.sls -echo " manint: $MANINT" >> $local_salt_dir/pillar/data/$TYPE.sls -echo " totalcpus: $CPUS" >> $local_salt_dir/pillar/data/$TYPE.sls -echo " guid: $GUID" >> $local_salt_dir/pillar/data/$TYPE.sls -echo " rootfs: $ROOTFS" >> $local_salt_dir/pillar/data/$TYPE.sls -echo " nsmfs: $NSM" >> $local_salt_dir/pillar/data/$TYPE.sls -if [ $TYPE == 'sensorstab' ]; then - echo " monint: bond0" >> $local_salt_dir/pillar/data/$TYPE.sls -fi -if [ $TYPE == 'evaltab' ] || [ $TYPE == 'standalonetab' ]; then - echo " monint: bond0" >> $local_salt_dir/pillar/data/$TYPE.sls - if [ ! $10 ]; then - salt-call state.apply utility queue=True - fi -fi -if [ $TYPE == 'nodestab' ]; then - salt-call state.apply elasticsearch queue=True -# echo " nodetype: $NODETYPE" >> $local_salt_dir/pillar/data/$TYPE.sls -# echo " hotname: $HOTNAME" >> $local_salt_dir/pillar/data/$TYPE.sls -fi diff --git a/salt/allowed_states.map.jinja b/salt/allowed_states.map.jinja index c831b45fe..72e4bbe82 100644 --- a/salt/allowed_states.map.jinja +++ b/salt/allowed_states.map.jinja @@ -37,8 +37,7 @@ 'elasticfleet', 'elasticfleet.manager', 'elasticsearch.cluster', - 'elastic-fleet-package-registry', - 'utility' + 'elastic-fleet-package-registry' ] %} {% set sensor_states = [ diff --git a/salt/common/tools/sbin/so-common b/salt/common/tools/sbin/so-common index 812c1bb10..4e6580ae1 100755 --- a/salt/common/tools/sbin/so-common +++ b/salt/common/tools/sbin/so-common @@ -291,6 +291,20 @@ download_and_verify() { fi } +# check if container with name is running and optionally stop it +docker_check_running() { + # show running containers, only names + if docker ps --format '{{.Names}}' | grep -q "^so-${1}$"; then + if [[ "$2" == "--stop" ]]; then + docker stop "so-${1}" + fi + + return 0 + else + return 1 + fi +} + elastic_license() { read -r -d '' message <<- EOM diff --git a/salt/common/tools/sbin/so-kernel-upgrade b/salt/common/tools/sbin/so-kernel-upgrade new file mode 100755 index 000000000..46d471051 --- /dev/null +++ b/salt/common/tools/sbin/so-kernel-upgrade @@ -0,0 +1,57 @@ +#!/bin/bash +# +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. +# +# so-kernel-upgrade — switch the boot default to the installed UEK8 (6.x) kernel. +# +# Security Onion is moving off the EL9 stock kernel / UEK7 (5.x) onto UEK8 (6.x). +# Installing the kernel-uek-core package adds a UEK8 boot entry but does NOT make it the +# default: kernel-install/grubby only auto-promote a new kernel within the running +# kernel's flavor lineage, and we're crossing from a 5.x kernel to the new 6.x UEK flavor. +# So even with UPDATEDEFAULT=yes and DEFAULTKERNEL=kernel-uek-core the box keeps booting +# the old kernel. This tool finds the newest installed 6.x UEK kernel and makes it the +# GRUB default via grubby so the next boot comes up on UEK8. +# +# Idempotent: if the UEK8 kernel is already the default it does nothing. It only sets the +# boot default; it does NOT reboot — the admin reboots the node on their own schedule. + +log() { echo "[so-kernel-upgrade] $*"; } + +[ "$(id -u)" -eq 0 ] || { log "must run as root"; exit 1; } +command -v grubby >/dev/null 2>&1 || { log "grubby not found"; exit 1; } + +# Newest installed UEK8 (6.x) kernel known to the bootloader. UEK8 vmlinuz paths look like +# /boot/vmlinuz-6.12.0-203.76.7.5.el9uek.x86_64; the 5.x UEK7 and 5.14 RHCK won't match. +target="$(grubby --info=ALL 2>/dev/null \ + | sed -n 's/^kernel="\(.*\)"$/\1/p' \ + | grep -E '/vmlinuz-6\.[0-9]+.*uek' \ + | sort -V | tail -1)" + +if [ -z "$target" ]; then + log "no installed 6.x UEK (UEK8) kernel found — confirm the kernel repo is assigned and" + log "'dnf update' has installed kernel-uek-core. Nothing to do." + exit 0 +fi + +current="$(grubby --default-kernel 2>/dev/null)" +if [ "$current" = "$target" ]; then + log "UEK8 kernel is already the boot default: $target" + exit 0 +fi + +log "current default kernel: ${current:-unknown}" +log "switching boot default to UEK8 kernel: $target" +grubby --set-default="$target" || { log "ERROR: grubby --set-default failed for $target"; exit 1; } + +# Verify the change actually took before claiming success. +now="$(grubby --default-kernel 2>/dev/null)" +if [ "$now" != "$target" ]; then + log "ERROR: default kernel is still '${now:-unknown}' after set-default" + exit 1 +fi + +log "boot default is now $target" +log "REBOOT REQUIRED to start using the UEK8 kernel (currently running $(uname -r))." diff --git a/salt/common/tools/sbin/so-restart b/salt/common/tools/sbin/so-restart index 7345078b8..14747d134 100755 --- a/salt/common/tools/sbin/so-restart +++ b/salt/common/tools/sbin/so-restart @@ -5,27 +5,41 @@ # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. - - -# Usage: so-restart kibana | playbook - . /usr/sbin/so-common -if [ $# -ge 1 ]; then +usage() { + echo "Usage: $0 [args]" + echo "" + echo "Supported args:" + echo " --force | -f Force stop all Salt jobs before starting component." + echo "" + echo "Examples:" + echo " $0 kibana Restart Kibana" + echo " $0 kibana --force Force stop all Salt jobs before restarting Kibana" + exit 1 +} - echo $banner - printf "Restarting $1...\n\nThis could take a while if another Salt job is running. \nRun this command with --force to stop all Salt jobs before proceeding.\n" - echo $banner - - if [ "$2" = "--force" ]; then - printf "\nForce-stopping all Salt jobs before proceeding\n\n" - salt-call saltutil.kill_all_jobs - fi - - case $1 in - "elastic-fleet") docker stop so-elastic-fleet && docker rm so-elastic-fleet && salt-call state.apply elasticfleet queue=True;; - *) docker stop so-$1 ; docker rm so-$1 ; salt-call state.apply $1 queue=True;; - esac -else - echo -e "\nPlease provide an argument by running like so-restart $component, or by using the component-specific script.\nEx. so-restart logstash, or so-logstash-restart\n" +if [[ $# -lt 1 ]]; then + usage fi + +#shellcheck disable=SC2154 +echo "$banner" +printf "Restarting %s...\n\nThis could take a while if another Salt job is running. \nRun this command with --force to stop all Salt jobs before proceeding.\n" "$1" +echo "$banner" +if [[ "$2" = "--force" ]] || [[ "$2" = "-f" ]]; then + printf "\nForce-stopping all Salt jobs before proceeding\n\n" + salt-call saltutil.kill_all_jobs +fi +case $1 in + "elastic-fleet"|"elasticfleet") + docker_check_running "elastic-fleet" "--stop" + docker rm "so-elastic-fleet" 2> /dev/null + salt-call state.apply elasticfleet queue=True + ;; + *) + docker_check_running "$1" "--stop" + docker rm "so-${1}" 2> /dev/null + salt-call state.apply "$1" queue=True + ;; +esac diff --git a/salt/common/tools/sbin/so-start b/salt/common/tools/sbin/so-start index 1a312a94d..a5c66ffe7 100755 --- a/salt/common/tools/sbin/so-start +++ b/salt/common/tools/sbin/so-start @@ -5,27 +5,54 @@ # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. - - -# Usage: so-start all | kibana | playbook - +# shellcheck disable=SC1091 . /usr/sbin/so-common -if [ $# -ge 1 ]; then - echo $banner - printf "Starting $1...\n\nThis could take a while if another Salt job is running. \nRun this command with --force to stop all Salt jobs before proceeding.\n" - echo $banner +usage() { + echo "Usage: $0 [args]" + echo "" + echo "Supported args:" + echo " --force | -f Force stop all Salt jobs before starting component." + echo "" + echo "Examples:" + echo " $0 kibana Start Kibana" + echo " $0 kibana --force Force stop all Salt jobs before starting Kibana" + exit 1 +} - if [ "$2" = "--force" ]; then - printf "\nForce-stopping all Salt jobs before proceeding\n\n" - salt-call saltutil.kill_all_jobs - fi - - case $1 in - "all") salt-call state.highstate queue=True;; - "elastic-fleet") if docker ps | grep -q so-$1; then printf "\n$1 is already running!\n\n"; else docker rm so-$1 >/dev/null 2>&1 ; salt-call state.apply elasticfleet queue=True; fi ;; - *) if docker ps | grep -E -q '^so-$1$'; then printf "\n$1 is already running\n\n"; else docker rm so-$1 >/dev/null 2>&1 ; salt-call state.apply $1 queue=True; fi ;; - esac -else - echo -e "\nPlease provide an argument by running like so-start $component, or by using the component-specific script.\nEx. so-start logstash, or so-logstash-start\n" +if [[ $# -lt 1 ]]; then + usage fi + +#shellcheck disable=SC2154 +echo "$banner" +printf "Starting %s...\n\nThis could take a while if another Salt job is running. \nRun this command with --force to stop all Salt jobs before proceeding.\n" "$1" +echo "$banner" +if [[ "$2" = "--force" ]] || [[ "$2" == "-f" ]]; then + printf "\nForce-stopping all Salt jobs before proceeding\n\n" + salt-call saltutil.kill_all_jobs +fi + +case "$1" in + "all") + salt-call state.highstate queue=True + ;; + "elastic-fleet"|"elasticfleet") + if docker_check_running "elastic-fleet"; then + printf "\nso-%s is already running!\n\n" "elastic-fleet" + /usr/sbin/so-status + else + docker rm "so-elastic-fleet" 2> /dev/null + salt-call state.apply elasticfleet queue=True + fi + ;; + *) + if docker_check_running "$1"; then + printf "\nso-%s is already running\n\n" "$1" + /usr/sbin/so-status + else + docker rm "so-${1}" 2> /dev/null + salt-call state.apply "$1" queue=True + fi + ;; +esac diff --git a/salt/common/tools/sbin/so-stop b/salt/common/tools/sbin/so-stop index 32e24f83a..d036a7b63 100755 --- a/salt/common/tools/sbin/so-stop +++ b/salt/common/tools/sbin/so-stop @@ -5,21 +5,33 @@ # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. - - -# Usage: so-stop kibana | playbook | thehive - +# shellcheck disable=SC1091 . /usr/sbin/so-common -if [ $# -ge 1 ]; then - echo $banner - printf "Stopping $1...\n" - echo $banner +usage() { + echo "Usage: $0 " + echo "" + echo "Examples:" + echo " $0 kibana Stop Kibana" + exit 1 +} - case $1 in - *) docker stop so-$1 ; docker rm so-$1 ;; - esac -else - echo -e "\nPlease provide an argument by running like so-stop $component, or by using the component-specific script.\nEx. so-stop logstash, or so-logstash-stop\n" +if [[ $# -lt 1 ]]; then + usage fi + +#shellcheck disable=SC2154 +echo "$banner" +printf "Stopping %s...\n" "$1" +echo "$banner" +case $1 in + "elasticfleet"|"elastic-fleet") + docker_check_running "elastic-fleet" "--stop" + docker rm "so-elastic-fleet" 2> /dev/null + ;; + *) + docker_check_running "$1" "--stop" + docker rm "so-${1}" 2> /dev/null + ;; +esac diff --git a/salt/elasticsearch/files/ingest/zeek.ssl b/salt/elasticsearch/files/ingest/zeek.ssl index 0bd6fedb2..80a7b12da 100644 --- a/salt/elasticsearch/files/ingest/zeek.ssl +++ b/salt/elasticsearch/files/ingest/zeek.ssl @@ -5,6 +5,7 @@ { "remove": { "field": ["host"], "ignore_failure": true } }, { "json": { "field": "message", "target_field": "message2", "ignore_failure": true } }, { "rename": { "field": "message2.version", "target_field": "ssl.version", "ignore_missing": true } }, + { "set": { "description": "Set transport for the community_id processor", "if": "ctx.ssl?.version == null || !ctx.ssl.version.startsWith('DTLS')", "field": "network.transport", "value": "tcp", "ignore_failure": true } }, { "rename": { "field": "message2.cipher", "target_field": "ssl.cipher", "ignore_missing": true } }, { "rename": { "field": "message2.curve", "target_field": "ssl.curve", "ignore_missing": true } }, { "rename": { "field": "message2.server_name", "target_field": "ssl.server_name", "ignore_missing": true } }, diff --git a/salt/kibana/enabled.sls b/salt/kibana/enabled.sls index 1257f66c6..e8b561754 100644 --- a/salt/kibana/enabled.sls +++ b/salt/kibana/enabled.sls @@ -69,7 +69,7 @@ wait_for_so-kibana: - ssl: True - verify_ssl: False - status: 200 - - wait_for: 300 + - wait_for: 600 - request_interval: 15 - require: - docker_container: so-kibana diff --git a/salt/manager/files/mirror-kernel.txt b/salt/manager/files/mirror-kernel.txt new file mode 100644 index 000000000..1d9ce75d2 --- /dev/null +++ b/salt/manager/files/mirror-kernel.txt @@ -0,0 +1,2 @@ +https://repo.securityonion.net/file/so-repo/prod/3/oracle/9-uek8 +https://repo-alt.securityonion.net/prod/3/oracle/9-uek8 diff --git a/salt/manager/files/repodownload.conf b/salt/manager/files/repodownload.conf index 3c156a9db..9c9cb5109 100644 --- a/salt/manager/files/repodownload.conf +++ b/salt/manager/files/repodownload.conf @@ -10,4 +10,9 @@ keepcache=0 name=Security Onion Repo repo mirrorlist=file:///opt/so/conf/reposync/mirror.txt enabled=1 -gpgcheck=1 \ No newline at end of file +gpgcheck=1 +[securityonionkernelsync] +name=Security Onion Kernel Repo repo +mirrorlist=file:///opt/so/conf/reposync/mirror-kernel.txt +enabled=1 +gpgcheck=1 diff --git a/salt/manager/init.sls b/salt/manager/init.sls index 2353bb64b..e77b1a601 100644 --- a/salt/manager/init.sls +++ b/salt/manager/init.sls @@ -86,6 +86,28 @@ repo_dir: - group - show_changes: False +kernelrepo_dir: + file.directory: + - name: /nsm/kernelrepo + - user: socore + - group: socore + - recurse: + - user + - group + - show_changes: False + +# Ensure /nsm/kernelrepo is always a valid (if empty) repo before it is ever assigned to +# a client. Without repodata/repomd.xml an enabled file:///nsm/kernelrepo repo makes every +# dnf operation fail; so-repo-sync only populates it after the highstate, so seed an empty +# repo here. Only runs when repodata is missing, so it won't clobber a synced repo. +kernelrepo_init_empty: + cmd.run: + - name: createrepo /nsm/kernelrepo + - unless: 'test -e /nsm/kernelrepo/repodata/repomd.xml' + - require: + - file: kernelrepo_dir + - pkg: install_createrepo + manager_sbin: file.recurse: - name: /usr/sbin @@ -122,6 +144,13 @@ so-repo-mirrorlist: - user: socore - group: socore +so-repo-kernel-mirrorlist: + file.managed: + - name: /opt/so/conf/reposync/mirror-kernel.txt + - source: salt://manager/files/mirror-kernel.txt + - user: socore + - group: socore + so-repo-sync: {% if MANAGERMERGED.reposync.enabled %} cron.present: diff --git a/salt/manager/tools/sbin/so-repo-sync b/salt/manager/tools/sbin/so-repo-sync index a0393a36b..d6a290c25 100755 --- a/salt/manager/tools/sbin/so-repo-sync +++ b/salt/manager/tools/sbin/so-repo-sync @@ -10,5 +10,16 @@ NOROOT=1 set -e curl --retry 5 --retry-delay 60 -A "reposync/$(sync_options)" https://sigs.securityonion.net/checkup --output /tmp/checkup + dnf reposync --norepopath -g --delete -m -c /opt/so/conf/reposync/repodownload.conf --repoid=securityonionsync --download-metadata -p /nsm/repo/ createrepo /nsm/repo + +# The kernel repo section is deployed to repodownload.conf by the manager highstate, which +# runs AFTER this script during soup. On the first upgrade to a kernel-aware version the +# on-disk config still predates the section, so guard on its presence to avoid dnf's +# "Unknown repo: 'securityonionkernelsync'" aborting the sync (set -e). The next sync after the +# highstate deploys the section will pick it up. +if grep -q '^\[securityonionkernelsync\]' /opt/so/conf/reposync/repodownload.conf; then + dnf reposync --norepopath -g --delete -m -c /opt/so/conf/reposync/repodownload.conf --repoid=securityonionkernelsync --download-metadata -p /nsm/kernelrepo/ + createrepo /nsm/kernelrepo +fi diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 2b8680191..6725cc95c 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -245,6 +245,7 @@ check_airgap() { UPDATE_DIR=/tmp/soagupdate/SecurityOnion AGDOCKER=/tmp/soagupdate/docker AGREPO=/tmp/soagupdate/minimal/Packages + AGUEKREPO=/tmp/soagupdate/uek/Packages else is_airgap=1 fi @@ -850,6 +851,28 @@ kibana_backport_streams_index_template() { } +# Runs kafka-features.sh upgrade --release-version $1 +# Upgrades Kafka KRaft cluster metadata +update_kafka_metadata() { + metadata_version="$1" + global_pillar="/opt/so/saltstack/local/pillar/global/soc_global.sls" + if PIPELINE=$(so-yaml.py get -r "$global_pillar" global.pipeline 2> /dev/null) && [[ "$PIPELINE" == "KAFKA" ]]; then + kafka_nodes_raw=$(salt-call pillar.get kafka:nodes --out=json) + if kafka_nodes=$(jq -er '.local | select(type == "object" and length > 0)' <<< "$kafka_nodes_raw"); then + bootstrap_servers=$(jq -r '[to_entries[] | select(.value.role | contains("broker")) | "\(.value.ip):9092"] | join(",")' <<< "$kafka_nodes") + echo "Upgrading Kafka KRaft cluster version" + so-kafka-cli kafka-features.sh --bootstrap-server "$bootstrap_servers" --command-config /opt/kafka/config/kraft/client.properties upgrade --release-version "$metadata_version" 2>/dev/null || true + + return 0 + else + FINAL_MESSAGE_QUEUE+=("WARNING: Unable to automatically perform Kafka KRaft cluster metadata update. This step can be performed manually using the following command (replacing \$BROKER_IP with the ip of atleast 1 available Kafka broker):") + FINAL_MESSAGE_QUEUE+=(" - so-kafka-cli kafka-features.sh --bootstrap-server \$BROKER_IP:9092 --command-config /opt/kafka/config/kraft/client.properties upgrade --release-version $metadata_version") + fi + else + echo "Nothing to do!" + fi +} + up_to_3.2.0() { fix_logstash_0013_lumberjack_pipeline_name @@ -867,6 +890,8 @@ post_to_3.2.0() { kibana_backport_streams_index_template + update_kafka_metadata "4.3" + POSTVERSION=3.2.0 } @@ -980,13 +1005,19 @@ update_airgap_rules() { rsync -a $UPDATE_DIR/agrules/securityonion-resources/* /nsm/securityonion-resources/ } -update_airgap_repo() { +update_airgap_repos() { # Update the files in the repo - echo "Syncing new updates to /nsm/repo" - rsync -a $AGREPO/* /nsm/repo/ - echo "Creating repo" + echo "Syncing new updates to /nsm/repo & /nsm/kernelrepo" + # Airgap soup copies new files into the local repo, but doesn't remove old packages. Retaining the ability to rollback package updates + rsync -a "$AGREPO"/ /nsm/repo/ + rsync -a "$AGUEKREPO"/ /nsm/kernelrepo/ + dnf -y install yum-utils createrepo_c + + echo "Running createrepo for /nsm/repo" createrepo /nsm/repo + echo "Running createrepo for /nsm/kernelrepo" + createrepo /nsm/kernelrepo } update_salt_mine() { @@ -1742,7 +1773,7 @@ main() { set -e if [[ $is_airgap -eq 0 ]]; then - update_airgap_repo + update_airgap_repos dnf clean all check_os_updates elif [[ $OS == 'oracle' ]]; then diff --git a/salt/nginx/enabled.sls b/salt/nginx/enabled.sls index 2e4c9631c..40fde5b0e 100644 --- a/salt/nginx/enabled.sls +++ b/salt/nginx/enabled.sls @@ -59,6 +59,7 @@ so-nginx: - /opt/so/conf/navigator/layers/:/opt/socore/html/navigator/assets/so:ro - /opt/so/conf/navigator/config.json:/opt/socore/html/navigator/assets/config.json:ro - /nsm/repo:/opt/socore/html/repo:ro + - /nsm/kernelrepo:/opt/socore/html/kernelrepo:ro - /nsm/rules:/nsm/rules:ro {% if NGINXMERGED.external_suricata %} - /opt/so/rules/nids/suri:/surirules:ro diff --git a/salt/nginx/etc/nginx.conf b/salt/nginx/etc/nginx.conf index 8150265f5..b7a70da2b 100644 --- a/salt/nginx/etc/nginx.conf +++ b/salt/nginx/etc/nginx.conf @@ -323,6 +323,16 @@ http { autoindex_localtime on; } + location /kernelrepo/ { + allow all; + sendfile on; + sendfile_max_chunk 1m; + autoindex on; + autoindex_exact_size off; + autoindex_format html; + autoindex_localtime on; + } + location /influxdb/ { auth_request /auth/sessions/whoami; rewrite /influxdb/api/(.*) /api/$1 break; diff --git a/salt/repo/client/oracle.sls b/salt/repo/client/oracle.sls index 70f529830..bf0a02751 100644 --- a/salt/repo/client/oracle.sls +++ b/salt/repo/client/oracle.sls @@ -6,6 +6,10 @@ {% from 'repo/client/map.jinja' import REPOPATH with context %} {% from 'vars/globals.map.jinja' import GLOBALS %} +{% import_yaml 'salt/minion.defaults.yaml' as saltversion %} +{% set saltversion = saltversion.salt.minion.version %} +{% set INSTALLEDSALTVERSION = grains.saltversion %} + {% set role = grains.id.split('_') | last %} {% set MANAGER = salt['grains.get']('master') %} {% if grains['os'] == 'OEL' %} @@ -57,6 +61,32 @@ so_repo: - enabled: 1 - gpgcheck: 1 +# Only assign the kernel repo once this node's running salt matches the version this +# SO release ships. During a soup the grid is mid-salt-upgrade; gating here keeps the +# UEK8 kernel repo (and the kernel update it enables) from activating until the node is +# fully on the target salt, the same way other states defer across the upgrade window. +{% if saltversion | string == INSTALLEDSALTVERSION | string %} +so_kernel_repo: + pkgrepo.managed: + - name: securityonionkernel + - humanname: Security Onion Kernel Repo + {% if GLOBALS.is_manager %} + - baseurl: file:///nsm/kernelrepo/ + {% else %} + - baseurl: https://{{ GLOBALS.repo_host }}/kernelrepo + {% endif %} + - enabled: 1 + - gpgcheck: 1 + # Supplementary kernel repo: tolerate it being empty/unreachable (e.g. before the + # manager has populated /nsm/kernelrepo) so a missing repomd.xml can't make every + # dnf/pkg operation on the grid fail. + - skip_if_unavailable: 1 + # Only assign the kernel repo once physical NIC names are pinned by MAC, so the + # UEK8 kernel update can't renumber interfaces SO binds by name (see pin_nic_names + # in salt/common/init.sls, which drops this marker via /usr/sbin/so-nic-pin). + - onlyif: 'test -e /opt/so/state/nic_names_pinned' +{% endif %} + {% endif %} # TODO: Add a pillar entry for custom repos diff --git a/salt/suricata/enabled.sls b/salt/suricata/enabled.sls index d9d7f32ae..bb31b2c78 100644 --- a/salt/suricata/enabled.sls +++ b/salt/suricata/enabled.sls @@ -65,10 +65,11 @@ so-suricata: - file: suriclassifications surirulereload: - cmd.run: + cmd.run: - name: /usr/sbin/so-suricata-reload-rules >> /opt/so/log/suricata/reload.log 2>&1 - - onchanges: + - onchanges: - file: surirulesync + - onlyif: test -f /opt/so/rules/suricata/all-rulesets.rules - require: - docker_container: so-suricata diff --git a/salt/suricata/tools/sbin/so-suricata-reload-rules b/salt/suricata/tools/sbin/so-suricata-reload-rules index e21e28e2f..6db519413 100644 --- a/salt/suricata/tools/sbin/so-suricata-reload-rules +++ b/salt/suricata/tools/sbin/so-suricata-reload-rules @@ -7,5 +7,59 @@ . /usr/sbin/so-common -retry 60 3 'docker exec so-suricata /opt/suricata/bin/suricatasc -c reload-rules /var/run/suricata/suricata-command.socket' '{"message":"done","return":"OK"}' || fail "The Suricata container was not ready in time." -retry 60 3 'docker exec so-suricata /opt/suricata/bin/suricatasc -c ruleset-reload-nonblocking /var/run/suricata/suricata-command.socket' '{"message":"done","return":"OK"}' || fail "The Suricata container was not ready in time." +RULES_FILE="/opt/so/rules/suricata/all-rulesets.rules" +SOCKET="/var/run/suricata/suricata-command.socket" +SURICATASC="docker exec so-suricata /opt/suricata/bin/suricatasc" + +# Format an epoch as a human-readable local timestamp for log messages. +fmt_time() { date -d "@$1" '+%Y-%m-%d %H:%M:%S %Z' 2>/dev/null; } + +# Prefix each input line with the current timestamp. +timestamp_lines() { while IFS= read -r line; do printf '%s %s\n' "$(date '+%Y-%m-%d %H:%M:%S %Z')" "$line"; done; } + +# Epoch of Suricata's last *completed* ruleset reload; non-zero return on failure. +suricata_reload_epoch() { + local out ts + out=$($SURICATASC -c ruleset-reload-time "$SOCKET" 2>/dev/null) + ts=$(echo "$out" | jq -r '.message[0].last_reload // empty' 2>/dev/null) + [ -n "$ts" ] || return 1 + date -d "$ts" +%s 2>/dev/null +} + +# Trigger a fresh reload and confirm Suricata is running a ruleset at least as new +# as the rules file. Returns 0 only when both hold, so retry keeps going until an +# in-progress reload clears and our own reload completes. +reload_and_verify() { + local out reload_epoch + out=$($SURICATASC -c reload-rules "$SOCKET") + echo "reload-rules: $out" + + if [[ "$out" =~ "Reload already in progress" ]]; then + echo "A reload is already in progress; waiting for it to clear so a fresh reload can load the current ruleset." + return 1 + fi + if [[ ! "$out" =~ '{"message":"done","return":"OK"}' ]]; then + echo "Suricata not ready or unexpected reload output; will retry." + return 1 + fi + + reload_epoch=$(suricata_reload_epoch) || { echo "Could not read ruleset-reload-time; will retry."; return 1; } + if [ "$reload_epoch" -ge "$target_mtime" ]; then + echo "Loaded ruleset is current: last reload ($(fmt_time "$reload_epoch")) is newer than rules file ($(fmt_time "$target_mtime"))." + return 0 + fi + echo "Loaded ruleset is stale: last reload ($(fmt_time "$reload_epoch")) is older than rules file ($(fmt_time "$target_mtime")); retrying." + return 1 +} + +# Run the reload/verify, timestamping every line of output (ours and the +# retry/fail helpers') so reload.log shows when each step ran. The pipeline is +# synchronous, so the log is fully flushed and ordered before we exit; the +# script's real exit code is preserved via PIPESTATUS. +{ + # Epoch mtime of the ruleset we need Suricata to have loaded. Captured once so + # a file update mid-reload does not move the goalpost. + target_mtime=$(stat -c %Y "$RULES_FILE") || fail "Could not stat the Suricata rules file: $RULES_FILE" + retry 60 3 'reload_and_verify' || fail "Suricata did not load the current ruleset in time." +} 2>&1 | timestamp_lines +exit "${PIPESTATUS[0]}" diff --git a/salt/top.sls b/salt/top.sls index cf743edd1..ffa43864c 100644 --- a/salt/top.sls +++ b/salt/top.sls @@ -83,7 +83,6 @@ base: - zeek - strelka - elastalert - - utility - elasticfleet - pcap.cleanup @@ -113,7 +112,6 @@ base: - zeek - strelka - elastalert - - utility - elasticfleet - stig - kafka @@ -141,7 +139,6 @@ base: - elastic-fleet-package-registry - kibana - elastalert - - utility - elasticfleet - stig - kafka @@ -168,7 +165,6 @@ base: - elastic-fleet-package-registry - kibana - elastalert - - utility - elasticfleet - kafka @@ -198,7 +194,6 @@ base: - elastic-fleet-package-registry - kibana - elastalert - - utility - elasticfleet - stig - kafka @@ -222,7 +217,6 @@ base: - elasticsearch - elastic-fleet-package-registry - kibana - - utility - suricata - zeek - elasticfleet diff --git a/salt/utility/bin/eval b/salt/utility/bin/eval deleted file mode 100644 index f30f0f421..000000000 --- a/salt/utility/bin/eval +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -# Wait for ElasticSearch to come up, so that we can query for version infromation -echo -n "Waiting for ElasticSearch..." -COUNT=0 -ELASTICSEARCH_CONNECTED="no" -while [[ "$COUNT" -le 30 ]]; do - curl -K /opt/so/conf/elasticsearch/curl.config -k --output /dev/null --silent --head --fail -L https://{{ GLOBALS.manager_ip }}:9200 - if [ $? -eq 0 ]; then - ELASTICSEARCH_CONNECTED="yes" - echo "connected!" - break - else - ((COUNT+=1)) - sleep 1 - echo -n "." - fi -done -if [ "$ELASTICSEARCH_CONNECTED" == "no" ]; then - echo - echo -e "Connection attempt timed out. Unable to connect to ElasticSearch. \nPlease try: \n -checking log(s) in /var/log/elasticsearch/\n -running 'docker ps' \n -running 'sudo so-elastic-restart'" - echo - - exit -fi - -echo "Applying cross cluster search config..." - curl -K /opt/so/conf/elasticsearch/curl.config -s -k -XPUT -L https://{{ GLOBALS.manager_ip }}:9200/_cluster/settings \ - -H 'Content-Type: application/json' \ - -d "{\"persistent\": {\"search\": {\"remote\": {\"{{ grains.host }}\": {\"seeds\": [\"127.0.0.1:9300\"]}}}}}" diff --git a/salt/utility/init.sls b/salt/utility/init.sls deleted file mode 100644 index 49bb2cb0c..000000000 --- a/salt/utility/init.sls +++ /dev/null @@ -1,22 +0,0 @@ -{% from 'allowed_states.map.jinja' import allowed_states %} -{% from 'vars/globals.map.jinja' import GLOBALS %} - -{% if sls in allowed_states %} - {% if grains['role'] in ['so-eval', 'so-import'] %} -fixsearch: - cmd.script: - - shell: /bin/bash - - cwd: /opt/so - - source: salt://utility/bin/eval - - template: jinja - - defaults: - GLOBALS: {{ GLOBALS }} - {% endif %} - -{% else %} - -{{sls}}_state_not_allowed: - test.fail_without_changes: - - name: {{sls}}_state_not_allowed - -{% endif %} diff --git a/setup/so-functions b/setup/so-functions index c1f8e11f8..bb7d6922b 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -889,6 +889,7 @@ create_repo() { title "Create the repo directory" logCmd "dnf -y install yum-utils createrepo_c" logCmd "createrepo /nsm/repo" + logCmd "createrepo /nsm/kernelrepo" } @@ -1813,6 +1814,16 @@ securityonion_repo() { echo "mirrorlist=file:///etc/yum/mirror.txt" >> /etc/yum.repos.d/securityonion.repo echo "enabled=1" >> /etc/yum.repos.d/securityonion.repo echo "gpgcheck=1" >> /etc/yum.repos.d/securityonion.repo + echo "https://repo.securityonion.net/file/so-repo/prod/3/oracle/9-uek8" > /etc/yum/mirror-kernel.txt + echo "https://so-repo-east.s3.us-east-005.backblazeb2.com/prod/3/oracle/9-uek8" >> /etc/yum/mirror-kernel.txt + echo "[securityonionkernel]" > /etc/yum.repos.d/securityonionkernel.repo + echo "name=Security Onion Kernel Repo repo" >> /etc/yum.repos.d/securityonionkernel.repo + echo "mirrorlist=file:///etc/yum/mirror-kernel.txt" >> /etc/yum.repos.d/securityonionkernel.repo + echo "enabled=1" >> /etc/yum.repos.d/securityonionkernel.repo + echo "gpgcheck=1" >> /etc/yum.repos.d/securityonionkernel.repo + # Supplementary kernel repo: tolerate it being empty/unreachable so a missing + # repomd.xml can't make every dnf operation fail before the repo is populated. + echo "skip_if_unavailable=1" >> /etc/yum.repos.d/securityonionkernel.repo logCmd "dnf repolist" else echo "[securityonion]" > /etc/yum.repos.d/securityonion.repo @@ -1821,6 +1832,13 @@ securityonion_repo() { echo "enabled=1" >> /etc/yum.repos.d/securityonion.repo echo "gpgcheck=1" >> /etc/yum.repos.d/securityonion.repo echo "sslverify=0" >> /etc/yum.repos.d/securityonion.repo + echo "[securityonionkernel]" > /etc/yum.repos.d/securityonionkernel.repo + echo "name=Security Onion Kernel Repo" >> /etc/yum.repos.d/securityonionkernel.repo + echo "baseurl=https://$MSRV/kernelrepo" >> /etc/yum.repos.d/securityonionkernel.repo + echo "enabled=1" >> /etc/yum.repos.d/securityonionkernel.repo + echo "gpgcheck=1" >> /etc/yum.repos.d/securityonionkernel.repo + echo "sslverify=0" >> /etc/yum.repos.d/securityonionkernel.repo + echo "skip_if_unavailable=1" >> /etc/yum.repos.d/securityonionkernel.repo logCmd "dnf repolist" fi elif [[ ! $waitforstate ]]; then @@ -1830,12 +1848,25 @@ securityonion_repo() { echo "enabled=1" >> /etc/yum.repos.d/securityonion.repo echo "gpgcheck=1" >> /etc/yum.repos.d/securityonion.repo echo "sslverify=0" >> /etc/yum.repos.d/securityonion.repo + echo "[securityonionkernel]" > /etc/yum.repos.d/securityonionkernel.repo + echo "name=Security Onion Kernel Repo" >> /etc/yum.repos.d/securityonionkernel.repo + echo "baseurl=https://$MSRV/kernelrepo" >> /etc/yum.repos.d/securityonionkernel.repo + echo "enabled=1" >> /etc/yum.repos.d/securityonionkernel.repo + echo "gpgcheck=1" >> /etc/yum.repos.d/securityonionkernel.repo + echo "sslverify=0" >> /etc/yum.repos.d/securityonionkernel.repo + echo "skip_if_unavailable=1" >> /etc/yum.repos.d/securityonionkernel.repo elif [[ $waitforstate ]]; then echo "[securityonion]" > /etc/yum.repos.d/securityonion.repo echo "name=Security Onion Repo" >> /etc/yum.repos.d/securityonion.repo echo "baseurl=file:///nsm/repo/" >> /etc/yum.repos.d/securityonion.repo echo "enabled=1" >> /etc/yum.repos.d/securityonion.repo echo "gpgcheck=1" >> /etc/yum.repos.d/securityonion.repo + echo "[securityonionkernel]" > /etc/yum.repos.d/securityonionkernel.repo + echo "name=Security Onion Kernel Repo" >> /etc/yum.repos.d/securityonionkernel.repo + echo "baseurl=file:///nsm/kernelrepo/" >> /etc/yum.repos.d/securityonionkernel.repo + echo "enabled=1" >> /etc/yum.repos.d/securityonionkernel.repo + echo "gpgcheck=1" >> /etc/yum.repos.d/securityonionkernel.repo + echo "skip_if_unavailable=1" >> /etc/yum.repos.d/securityonionkernel.repo fi logCmd "dnf repolist all" if [[ $waitforstate ]]; then @@ -1851,9 +1882,12 @@ repo_sync_local() { # Sync the repo from the SO repo locally. info "Adding Repo Download Configuration" mkdir -p /nsm/repo + mkdir -p /nsm/kernelrepo mkdir -p /opt/so/conf/reposync/cache echo "https://repo.securityonion.net/file/so-repo/prod/3/oracle/9" > /opt/so/conf/reposync/mirror.txt echo "https://repo-alt.securityonion.net/prod/3/oracle/9" >> /opt/so/conf/reposync/mirror.txt + echo "https://repo.securityonion.net/file/so-repo/prod/3/oracle/9-uek8" > /opt/so/conf/reposync/mirror-kernel.txt + echo "https://repo-alt.securityonion.net/prod/3/oracle/9-uek8" >> /opt/so/conf/reposync/mirror-kernel.txt echo "[main]" > /opt/so/conf/reposync/repodownload.conf echo "gpgcheck=1" >> /opt/so/conf/reposync/repodownload.conf echo "installonly_limit=3" >> /opt/so/conf/reposync/repodownload.conf @@ -1867,12 +1901,18 @@ repo_sync_local() { echo "mirrorlist=file:///opt/so/conf/reposync/mirror.txt" >> /opt/so/conf/reposync/repodownload.conf echo "enabled=1" >> /opt/so/conf/reposync/repodownload.conf echo "gpgcheck=1" >> /opt/so/conf/reposync/repodownload.conf + echo "[securityonionkernel]" >> /opt/so/conf/reposync/repodownload.conf + echo "name=Security Onion Kernel Repo repo" >> /opt/so/conf/reposync/repodownload.conf + echo "mirrorlist=file:///opt/so/conf/reposync/mirror-kernel.txt" >> /opt/so/conf/reposync/repodownload.conf + echo "enabled=1" >> /opt/so/conf/reposync/repodownload.conf + echo "gpgcheck=1" >> /opt/so/conf/reposync/repodownload.conf logCmd "dnf repolist" if [[ ! $is_airgap ]]; then curl --retry 5 --retry-delay 60 -A "netinstall/$SOVERSION/$OS/$(uname -r)/1" https://sigs.securityonion.net/checkup --output /tmp/install - retry 5 60 "dnf reposync --norepopath -g --delete -m -c /opt/so/conf/reposync/repodownload.conf --repoid=securityonionsync --download-metadata -p /nsm/repo/" >> "$setup_log" 2>&1 || fail_setup "Repo sync failed" + retry 5 60 "dnf reposync --norepopath -g --delete -m -c /opt/so/conf/reposync/repodownload.conf --repoid=securityonionsync --download-metadata -p /nsm/repo/" >> "$setup_log" 2>&1 || fail_setup "Failed to sync repos" + retry 5 60 "dnf reposync --norepopath -g --delete -m -c /opt/so/conf/reposync/repodownload.conf --repoid=securityonionkernel --download-metadata -p /nsm/kernelrepo/" >> "$setup_log" 2>&1 || fail_setup "Failed to sync kernel repos" # After the download is complete run createrepo create_repo fi @@ -2229,6 +2269,13 @@ update_sudoers_for_testing() { } update_packages() { + # Pin physical NIC names by MAC BEFORE pulling packages, so the UEK8 kernel that + # the update below installs can't renumber the interfaces SO binds by name. Doing + # it here (instead of waiting for the common highstate) also drops the + # /opt/so/state/nic_names_pinned marker that gates the kernel repo, so the kernel + # repo is assigned on the very first highstate and the kernel isn't downgraded and + # then re-upgraded. Run-once: so-nic-pin no-ops if the marker already exists. + logCmd "bash ../salt/common/tools/sbin/so-nic-pin" logCmd "dnf repolist" logCmd "dnf -y update --allowerasing --exclude=salt*,docker*,containerd*" RMREPOFILES=("oracle-linux-ol9.repo" "uek-ol9.repo" "virt-ol9.repo") diff --git a/setup/so-setup b/setup/so-setup index e4b74716b..a44934088 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -9,14 +9,17 @@ # Make sure you are root before doing anything uid="$(id -u)" if [ "$uid" -ne 0 ]; then - echo "This script must be run using sudo!" - fail_setup + echo "This script must be run using sudo!" >&2 + exit 1 fi # Save the original argument array since we modify it original_args=("$@") -cd "$(dirname "$0")" || fail_setup +cd "$(dirname "$0")" || { + echo "Unable to change to setup directory" >&2 + exit 1 +} echo "Getting started..."