Compare commits

..

20 Commits

Author SHA1 Message Date
Josh Patterson 199c2746f1 stop salt-minion and salt-master regardless of install type. display reinstall on console and save to logfile 2026-04-24 15:24:11 -04:00
Josh Patterson 8eca465ef6 uninstall elastic-agent before stopping dockers on reinstall 2026-04-24 14:35:11 -04:00
Josh Patterson 02381fbbe9 stop salt-cloud , belt-and-suspenders against a broken/incomplete salt RPM 2026-04-24 11:33:21 -04:00
Josh Patterson 0722b681b1 redo service stop on reinstall 2026-04-24 11:04:46 -04:00
Josh Patterson 564815e836 redo how services are stopped during reinstall 2026-04-24 10:46:29 -04:00
Jason Ertel ba55468da8 Merge pull request #15822 from Security-Onion-Solutions/jertel/wip
numeric test description
2026-04-24 08:26:55 -04:00
Jason Ertel cdd217283d numeric test description 2026-04-24 08:13:36 -04:00
Jorge Reyes 810a582717 Merge pull request #15813 from Security-Onion-Solutions/reyesj2-es933
split up Elastic Fleet state
2026-04-23 14:51:32 -05:00
Mike Reeves 5f35554fdc Merge pull request #15712 from Security-Onion-Solutions/soupfix
Fix soup
2026-04-23 12:39:50 -04:00
reyesj2 fdfca469cc prevent non-manager nodes from running elasticsearch.cluster state manually 2026-04-23 09:53:07 -05:00
reyesj2 5f2ec76ba8 prevent fleetnode from being able to run elasticfleet.manager state manually 2026-04-23 09:50:45 -05:00
reyesj2 b015c8ff14 remove docker import 2026-04-23 09:31:30 -05:00
reyesj2 7e70870a9e remove globals import 2026-04-23 09:25:36 -05:00
reyesj2 22b32a16dd include elasticfleet.config 2026-04-23 08:30:47 -05:00
Josh Patterson cd6707a566 Merge pull request #15800 from Security-Onion-Solutions/feature/vm-raid-status
monitor raid for vms
2026-04-22 09:42:44 -04:00
Josh Patterson edd207a9d5 soup update socloud.conf 2026-04-22 09:20:53 -04:00
Jorge Reyes 01bd3b6e06 Merge pull request #15807 from Security-Onion-Solutions/reyesj2-es933
urlencode elasticsearch version
2026-04-21 14:11:04 -05:00
Josh Patterson 7f93110d68 Merge remote-tracking branch 'origin/3/dev' into feature/vm-raid-status 2026-04-21 10:10:38 -04:00
Josh Patterson ee437265fc monitor raid for vms 2026-04-20 12:00:02 -04:00
Mike Reeves 664f3fd18a Fix soup 2026-04-01 14:47:05 -04:00
9 changed files with 75 additions and 49 deletions
+2
View File
@@ -33,6 +33,8 @@
'kratos',
'hydra',
'elasticfleet',
'elasticfleet.manager',
'elasticsearch.cluster',
'elastic-fleet-package-registry',
'utility'
] %}
+9 -2
View File
@@ -9,7 +9,7 @@
. /usr/sbin/so-common
software_raid=("SOSMN" "SOSMN-DE02" "SOSSNNV" "SOSSNNV-DE02" "SOS10k-DE02" "SOS10KNV" "SOS10KNV-DE02" "SOS10KNV-DE02" "SOS2000-DE02" "SOS-GOFAST-LT-DE02" "SOS-GOFAST-MD-DE02" "SOS-GOFAST-HV-DE02")
software_raid=("SOSMN" "SOSMN-DE02" "SOSSNNV" "SOSSNNV-DE02" "SOS10k-DE02" "SOS10KNV" "SOS10KNV-DE02" "SOS10KNV-DE02" "SOS2000-DE02" "SOS-GOFAST-LT-DE02" "SOS-GOFAST-MD-DE02" "SOS-GOFAST-HV-DE02" "HVGUEST")
hardware_raid=("SOS1000" "SOS1000F" "SOSSN7200" "SOS5000" "SOS4000")
{%- if salt['grains.get']('sosmodel', '') %}
@@ -87,6 +87,11 @@ check_boss_raid() {
}
check_software_raid() {
if [[ ! -f /proc/mdstat ]]; then
SWRAID=0
return
fi
SWRC=$(grep "_" /proc/mdstat)
if [[ -n $SWRC ]]; then
# RAID is failed in some way
@@ -107,7 +112,9 @@ if [[ "$is_hwraid" == "true" ]]; then
fi
if [[ "$is_softwareraid" == "true" ]]; then
check_software_raid
check_boss_raid
if [ "$model" != "HVGUEST" ]; then
check_boss_raid
fi
fi
sum=$(($SWRAID + $BOSSRAID + $HWRAID))
+1 -1
View File
@@ -21,7 +21,7 @@ include:
- elasticfleet.manager
{%- endif %}
{% if GLOBALS.role not in ['so-fleet'] %}
{% if GLOBALS.role != "so-fleet" %}
# Wait for Elasticsearch to be ready - no reason to try running Elastic Fleet server if ES is not ready
wait_for_elasticsearch_elasticfleet:
cmd.run:
+4 -3
View File
@@ -4,11 +4,12 @@
# Elastic License 2.0.
{% from 'allowed_states.map.jinja' import allowed_states %}
{% if sls.split('.')[0] in allowed_states %}
{% from 'vars/globals.map.jinja' import GLOBALS %}
{% from 'docker/docker.map.jinja' import DOCKERMERGED %}
{% if sls in allowed_states %}
{% from 'elasticfleet/map.jinja' import ELASTICFLEETMERGED %}
include:
- elasticfleet.config
# If enabled, automatically update Fleet Logstash Outputs
{% if ELASTICFLEETMERGED.config.server.enable_auto_configuration and grains.role not in ['so-import', 'so-eval'] %}
so-elastic-fleet-auto-configure-logstash-outputs:
+1 -1
View File
@@ -4,7 +4,7 @@
# Elastic License 2.0.
{% from 'allowed_states.map.jinja' import allowed_states %}
{% if sls.split('.')[0] in allowed_states %}
{% if sls in allowed_states %}
{% from 'vars/globals.map.jinja' import GLOBALS %}
{% from 'elasticsearch/config.map.jinja' import ELASTICSEARCHMERGED %}
{% from 'elasticsearch/template.map.jinja' import ES_INDEX_SETTINGS, SO_MANAGED_INDICES %}
+12 -9
View File
@@ -24,6 +24,14 @@ BACKUPTOPFILE=/opt/so/saltstack/default/salt/top.sls.backup
SALTUPGRADED=false
SALT_CLOUD_INSTALLED=false
SALT_CLOUD_CONFIGURED=false
# Check if salt-cloud is installed
if rpm -q salt-cloud &>/dev/null; then
SALT_CLOUD_INSTALLED=true
fi
# Check if salt-cloud is configured
if [[ -f /etc/salt/cloud.profiles.d/socloud.conf ]]; then
SALT_CLOUD_CONFIGURED=true
fi
# used to display messages to the user at the end of soup
declare -a FINAL_MESSAGE_QUEUE=()
@@ -489,6 +497,10 @@ up_to_3.1.0() {
post_to_3.1.0() {
/usr/sbin/so-kibana-space-defaults
# ensure manager has new version of socloud.conf
if [[ $SALT_CLOUD_CONFIGURED == true ]]; then
salt-call state.apply salt.cloud.config concurrent=True
fi
POSTVERSION=3.1.0
}
@@ -663,15 +675,6 @@ upgrade_check_salt() {
upgrade_salt() {
echo "Performing upgrade of Salt from $INSTALLEDSALTVERSION to $NEWSALTVERSION."
echo ""
# Check if salt-cloud is installed
if rpm -q salt-cloud &>/dev/null; then
SALT_CLOUD_INSTALLED=true
fi
# Check if salt-cloud is configured
if [[ -f /etc/salt/cloud.profiles.d/socloud.conf ]]; then
SALT_CLOUD_CONFIGURED=true
fi
echo "Removing yum versionlock for Salt."
echo ""
yum versionlock delete "salt"
@@ -27,6 +27,7 @@ sool9_{{host}}:
log_file: /opt/so/log/salt/minion
grains:
hypervisor_host: {{host ~ "_" ~ role}}
sosmodel: HVGUEST
preflight_cmds:
- |
{%- set hostnames = [MANAGERHOSTNAME] %}
+44 -32
View File
@@ -202,10 +202,10 @@ check_service_status() {
systemctl status $service_name > /dev/null 2>&1
local status=$?
if [ $status -gt 0 ]; then
info " $service_name is not running"
info "$service_name is not running"
return 1;
else
info " $service_name is running"
info "$service_name is running"
return 0;
fi
@@ -1541,13 +1541,8 @@ clear_previous_setup_results() {
reinstall_init() {
info "Putting system in state to run setup again"
if [[ $install_type =~ ^(MANAGER|EVAL|MANAGERSEARCH|MANAGERHYPE|STANDALONE|FLEET|IMPORT)$ ]]; then
local salt_services=( "salt-master" "salt-minion" )
else
local salt_services=( "salt-minion" )
fi
local service_retry_count=20
# Always include both services. check_service_status skips units that aren't present.
local salt_services=( "salt-master" "salt-minion" )
{
# remove all of root's cronjobs
@@ -1563,31 +1558,51 @@ reinstall_init() {
salt-call state.apply ca.remove -linfo --local --file-root=../salt
# Kill any salt processes (safely)
# Stop salt services and force-kill any lingering salt processes (including orphans
# from an earlier reinstall attempt where the unit file is gone but processes survive)
# so dnf remove salt can run cleanly
for service in "${salt_services[@]}"; do
# Stop the service in the background so we can exit after a certain amount of time
if check_service_status "$service"; then
systemctl stop "$service" &
info "Stopping $service via systemctl"
systemctl stop "$service"
fi
local pid=$!
local count=0
while check_service_status "$service"; do
if [[ $count -gt $service_retry_count ]]; then
echo "Could not stop $service after 1 minute, exiting setup."
# Stop the systemctl process trying to kill the service, show user a message, then exit setup
kill -9 $pid
fail_setup
fi
sleep 5
((count++))
done
done
# Unconditionally force-kill any remaining salt binaries — these may be orphaned
# from a prior aborted reinstall (no unit file, so systemctl can't see them).
for salt_bin in salt-master salt-minion salt-call salt-cloud; do
if pgrep -f "/usr/bin/${salt_bin}" > /dev/null 2>&1; then
info "Force-killing lingering $salt_bin processes"
pkill -9 -ef "/usr/bin/${salt_bin}" 2>/dev/null
fi
done
# Catch stray `salt` CLI children from saltutil.kill_all_jobs / state.apply invocations
pkill -9 -ef "/usr/bin/python3 /bin/salt" 2>/dev/null
# Give the kernel a moment to reap the killed processes before dnf removes the binaries
local kill_wait=0
while pgrep -f "/usr/bin/salt-" > /dev/null 2>&1; do
if [[ $kill_wait -gt 10 ]]; then
info "Salt processes still present after SIGKILL + 10s wait; proceeding anyway"
pgrep -af "/usr/bin/salt-" | while read -r line; do info " lingering: $line"; done
break
fi
sleep 1
((kill_wait++))
done
# Clear the 'failed' state SIGKILL left on the units before removing the package
systemctl reset-failed salt-master.service salt-minion.service 2>/dev/null || true
# Remove all salt configs
rm -rf /etc/salt/engines/* /etc/salt/grains /etc/salt/master /etc/salt/master.d/* /etc/salt/minion /etc/salt/minion.d/* /etc/salt/pki/* /etc/salt/proxy /etc/salt/proxy.d/* /var/cache/salt/
dnf -y remove salt
rm -rf /etc/salt/ /var/cache/salt/
# Drop systemd's in-memory references to the now-removed units
systemctl daemon-reload
# Uninstall local Elastic Agent, if installed
elastic-agent uninstall -f
if command -v docker &> /dev/null; then
# Stop and remove all so-* containers so files can be changed with more safety
@@ -1611,10 +1626,7 @@ reinstall_init() {
backup_dir /nsm/hydra "$date_string"
backup_dir /nsm/influxdb "$date_string"
# Uninstall local Elastic Agent, if installed
elastic-agent uninstall -f
} >> "$setup_log" 2>&1
} 2>&1 | tee -a "$setup_log"
info "System reinstall init has been completed."
}
+1 -1
View File
@@ -219,7 +219,7 @@ if [ -n "$test_profile" ]; then
WEBUSER=onionuser@somewhere.invalid
WEBPASSWD1=0n10nus3r
WEBPASSWD2=0n10nus3r
NODE_DESCRIPTION="${HOSTNAME} - ${install_type} - ${MAINIP}"
NODE_DESCRIPTION="${HOSTNAME} - ${install_type} - ${MSRVIP_OFFSET}"
update_sudoers_for_testing
fi