Compare commits

...

13 Commits

Author SHA1 Message Date
Josh Patterson 21aeb68188 fix sominion_setup reactor 2026-04-27 14:30:41 -04:00
Josh Patterson 81e60ec5bf Merge pull request #15829 from Security-Onion-Solutions/fix/reinstall2
fix reinstall
2026-04-24 16:20:53 -04:00
Josh Patterson 199c2746f1 stop salt-minion and salt-master regardless of install type. display reinstall on console and save to logfile 2026-04-24 15:24:11 -04:00
Josh Patterson 8eca465ef6 uninstall elastic-agent before stopping dockers on reinstall 2026-04-24 14:35:11 -04:00
Jorge Reyes a45e59239f Merge pull request #15826 from Security-Onion-Solutions/reyesj2-es933
heavynode should run es cluster state
2026-04-24 13:07:48 -05:00
Josh Patterson 2ad0bcab7c Merge pull request #15828 from Security-Onion-Solutions/fix/annotations
readonly soc and kratos enabled
2026-04-24 14:00:02 -04:00
reyesj2 90ecbe90d8 allow heavynodes to run elasticsearch/cluster state 2026-04-24 12:56:27 -05:00
Josh Patterson 813fa03dc3 Merge pull request #15824 from Security-Onion-Solutions/fix/reinstall2
fix reinstall issue with salt
2026-04-24 12:22:54 -04:00
Josh Patterson 02381fbbe9 stop salt-cloud , belt-and-suspenders against a broken/incomplete salt RPM 2026-04-24 11:33:21 -04:00
Josh Patterson 0722b681b1 redo service stop on reinstall 2026-04-24 11:04:46 -04:00
Josh Patterson 564815e836 redo how services are stopped during reinstall 2026-04-24 10:46:29 -04:00
Jorge Reyes 88b30adf7f Merge pull request #15823 from Security-Onion-Solutions/reyesj2-es933
typo
2026-04-24 09:27:08 -05:00
reyesj2 b6acf3b522 typo 2026-04-24 09:24:58 -05:00
4 changed files with 99 additions and 52 deletions
+1 -1
View File
@@ -79,7 +79,7 @@
),
'so-heavynode': (
sensor_states +
['elasticagent', 'elasticsearch', 'logstash', 'redis', 'nginx']
['elasticagent', 'elasticsearch', 'elasticsearch.cluster', 'logstash', 'redis', 'nginx']
),
'so-idh': (
['idh']
+1 -1
View File
@@ -17,7 +17,7 @@ include:
- elasticsearch.ssl
- elasticsearch.config
- elasticsearch.sostatus
{%- if GLOBALS.role != "so-searchode" %}
{%- if GLOBALS.role != "so-searchnode" %}
- elasticsearch.cluster
{%- endif%}
+53 -18
View File
@@ -6,39 +6,74 @@
# Elastic License 2.0.
import logging
from subprocess import call
import yaml
import os
import re
import shlex
import subprocess
log = logging.getLogger(__name__)
SO_MINION = '/usr/sbin/so-minion'
_NODETYPE_RE = re.compile(r'^[A-Z][A-Z0-9_]{0,31}$')
_MINIONID_RE = re.compile(r'^[A-Za-z0-9._-]{1,253}$')
_HOSTPART_RE = re.compile(r'^[A-Za-z0-9._-]{1,253}$')
_IPV4_RE = re.compile(
r'^(?:(?:25[0-5]|2[0-4]\d|[01]?\d?\d)\.){3}'
r'(?:25[0-5]|2[0-4]\d|[01]?\d?\d)$'
)
_HEAP_RE = re.compile(r'^\d{1,6}[kKmMgG]?$')
def _check(name, value, pattern):
s = str(value)
if not pattern.match(s):
raise ValueError("sominion_setup_reactor: refusing unsafe %s=%r" % (name, value))
return s
def run():
log.info('sominion_setup_reactor: Running')
minionid = data['id']
DATA = data['data']
hv_name = DATA['HYPERVISOR_HOST']
log.info('sominion_setup_reactor: DATA: %s' % DATA)
# Build the base command
cmd = "NODETYPE=" + DATA['NODETYPE'] + " /usr/sbin/so-minion -o=addVM -m=" + minionid + " -n=" + DATA['MNIC'] + " -i=" + DATA['MAINIP'] + " -c=" + str(DATA['CPUCORES']) + " -d='" + DATA['NODE_DESCRIPTION'] + "'"
# Add optional arguments only if they exist in DATA
nodetype = _check('NODETYPE', DATA['NODETYPE'], _NODETYPE_RE)
argv = [
SO_MINION,
'-o=addVM',
'-m=' + _check('minionid', minionid, _MINIONID_RE),
'-n=' + _check('MNIC', DATA['MNIC'], _HOSTPART_RE),
'-i=' + _check('MAINIP', DATA['MAINIP'], _IPV4_RE),
'-c=' + str(int(DATA['CPUCORES'])),
'-d=' + str(DATA['NODE_DESCRIPTION']),
]
if 'CORECOUNT' in DATA:
cmd += " -C=" + str(DATA['CORECOUNT'])
argv.append('-C=' + str(int(DATA['CORECOUNT'])))
if 'INTERFACE' in DATA:
cmd += " -a=" + DATA['INTERFACE']
argv.append('-a=' + _check('INTERFACE', DATA['INTERFACE'], _HOSTPART_RE))
if 'ES_HEAP_SIZE' in DATA:
cmd += " -e=" + DATA['ES_HEAP_SIZE']
argv.append('-e=' + _check('ES_HEAP_SIZE', DATA['ES_HEAP_SIZE'], _HEAP_RE))
if 'LS_HEAP_SIZE' in DATA:
cmd += " -l=" + DATA['LS_HEAP_SIZE']
argv.append('-l=' + _check('LS_HEAP_SIZE', DATA['LS_HEAP_SIZE'], _HEAP_RE))
if 'LSHOSTNAME' in DATA:
cmd += " -L=" + DATA['LSHOSTNAME']
log.info('sominion_setup_reactor: Command: %s' % cmd)
rc = call(cmd, shell=True)
argv.append('-L=' + _check('LSHOSTNAME', DATA['LSHOSTNAME'], _HOSTPART_RE))
env = os.environ.copy()
env['NODETYPE'] = nodetype
log.info(
'sominion_setup_reactor: argv: %s (NODETYPE=%s)',
' '.join(shlex.quote(a) for a in argv),
shlex.quote(nodetype),
)
rc = subprocess.call(argv, shell=False, env=env)
log.info('sominion_setup_reactor: rc: %s' % rc)
+44 -32
View File
@@ -202,10 +202,10 @@ check_service_status() {
systemctl status $service_name > /dev/null 2>&1
local status=$?
if [ $status -gt 0 ]; then
info " $service_name is not running"
info "$service_name is not running"
return 1;
else
info " $service_name is running"
info "$service_name is running"
return 0;
fi
@@ -1541,13 +1541,8 @@ clear_previous_setup_results() {
reinstall_init() {
info "Putting system in state to run setup again"
if [[ $install_type =~ ^(MANAGER|EVAL|MANAGERSEARCH|MANAGERHYPE|STANDALONE|FLEET|IMPORT)$ ]]; then
local salt_services=( "salt-master" "salt-minion" )
else
local salt_services=( "salt-minion" )
fi
local service_retry_count=20
# Always include both services. check_service_status skips units that aren't present.
local salt_services=( "salt-master" "salt-minion" )
{
# remove all of root's cronjobs
@@ -1563,31 +1558,51 @@ reinstall_init() {
salt-call state.apply ca.remove -linfo --local --file-root=../salt
# Kill any salt processes (safely)
# Stop salt services and force-kill any lingering salt processes (including orphans
# from an earlier reinstall attempt where the unit file is gone but processes survive)
# so dnf remove salt can run cleanly
for service in "${salt_services[@]}"; do
# Stop the service in the background so we can exit after a certain amount of time
if check_service_status "$service"; then
systemctl stop "$service" &
info "Stopping $service via systemctl"
systemctl stop "$service"
fi
local pid=$!
local count=0
while check_service_status "$service"; do
if [[ $count -gt $service_retry_count ]]; then
echo "Could not stop $service after 1 minute, exiting setup."
# Stop the systemctl process trying to kill the service, show user a message, then exit setup
kill -9 $pid
fail_setup
fi
sleep 5
((count++))
done
done
# Unconditionally force-kill any remaining salt binaries — these may be orphaned
# from a prior aborted reinstall (no unit file, so systemctl can't see them).
for salt_bin in salt-master salt-minion salt-call salt-cloud; do
if pgrep -f "/usr/bin/${salt_bin}" > /dev/null 2>&1; then
info "Force-killing lingering $salt_bin processes"
pkill -9 -ef "/usr/bin/${salt_bin}" 2>/dev/null
fi
done
# Catch stray `salt` CLI children from saltutil.kill_all_jobs / state.apply invocations
pkill -9 -ef "/usr/bin/python3 /bin/salt" 2>/dev/null
# Give the kernel a moment to reap the killed processes before dnf removes the binaries
local kill_wait=0
while pgrep -f "/usr/bin/salt-" > /dev/null 2>&1; do
if [[ $kill_wait -gt 10 ]]; then
info "Salt processes still present after SIGKILL + 10s wait; proceeding anyway"
pgrep -af "/usr/bin/salt-" | while read -r line; do info " lingering: $line"; done
break
fi
sleep 1
((kill_wait++))
done
# Clear the 'failed' state SIGKILL left on the units before removing the package
systemctl reset-failed salt-master.service salt-minion.service 2>/dev/null || true
# Remove all salt configs
rm -rf /etc/salt/engines/* /etc/salt/grains /etc/salt/master /etc/salt/master.d/* /etc/salt/minion /etc/salt/minion.d/* /etc/salt/pki/* /etc/salt/proxy /etc/salt/proxy.d/* /var/cache/salt/
dnf -y remove salt
rm -rf /etc/salt/ /var/cache/salt/
# Drop systemd's in-memory references to the now-removed units
systemctl daemon-reload
# Uninstall local Elastic Agent, if installed
elastic-agent uninstall -f
if command -v docker &> /dev/null; then
# Stop and remove all so-* containers so files can be changed with more safety
@@ -1611,10 +1626,7 @@ reinstall_init() {
backup_dir /nsm/hydra "$date_string"
backup_dir /nsm/influxdb "$date_string"
# Uninstall local Elastic Agent, if installed
elastic-agent uninstall -f
} >> "$setup_log" 2>&1
} 2>&1 | tee -a "$setup_log"
info "System reinstall init has been completed."
}