Merge pull request #15107 from Security-Onion-Solutions/2.4/dev

2.4/dev
This commit is contained in:
Jorge Reyes
2025-10-06 12:42:47 -05:00
committed by GitHub
6 changed files with 212 additions and 18 deletions

View File

@@ -15,8 +15,21 @@ if ! is_manager_node; then
fi
function update_logstash_outputs() {
# Generate updated JSON payload
JSON_STRING=$(jq -n --arg UPDATEDLIST $NEW_LIST_JSON '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":""}')
if logstash_policy=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "http://localhost:5601/api/fleet/outputs/so-manager_logstash" --retry 3 --retry-delay 10 --fail 2>/dev/null); then
SSL_CONFIG=$(echo "$logstash_policy" | jq -r '.item.ssl')
if SECRETS=$(echo "$logstash_policy" | jq -er '.item.secrets' 2>/dev/null); then
JSON_STRING=$(jq -n \
--arg UPDATEDLIST "$NEW_LIST_JSON" \
--argjson SECRETS "$SECRETS" \
--argjson SSL_CONFIG "$SSL_CONFIG" \
'{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl": $SSL_CONFIG,"secrets": $SECRETS}')
else
JSON_STRING=$(jq -n \
--arg UPDATEDLIST "$NEW_LIST_JSON" \
--argjson SSL_CONFIG "$SSL_CONFIG" \
'{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl": $SSL_CONFIG}')
fi
fi
# Update Logstash Outputs
curl -K /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/outputs/so-manager_logstash" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" | jq

View File

@@ -127,7 +127,7 @@ JSON_STRING=$( jq -n \
--arg LOGSTASHCRT "$LOGSTASHCRT" \
--arg LOGSTASHKEY "$LOGSTASHKEY" \
--arg LOGSTASHCA "$LOGSTASHCA" \
'{"name":"grid-logstash","is_default":true,"is_default_monitoring":true,"id":"so-manager_logstash","type":"logstash","hosts":["{{ GLOBALS.manager_ip }}:5055", "{{ GLOBALS.manager }}:5055"],"config_yaml":"","ssl":{"certificate": $LOGSTASHCRT,"key": $LOGSTASHKEY,"certificate_authorities":[ $LOGSTASHCA ]},"proxy_id":null}'
'{"name":"grid-logstash","is_default":true,"is_default_monitoring":true,"id":"so-manager_logstash","type":"logstash","hosts":["{{ GLOBALS.manager_ip }}:5055", "{{ GLOBALS.manager }}:5055"],"config_yaml":"","ssl":{"certificate": $LOGSTASHCRT,"certificate_authorities":[ $LOGSTASHCA ]},"secrets":{"ssl":{"key": $LOGSTASHKEY }},"proxy_id":null}'
)
if ! fleet_api "outputs" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then
echo -e "\nFailed to create logstash fleet output"

View File

@@ -13,6 +13,7 @@
{# Import defaults.yaml for model hardware capabilities #}
{% import_yaml 'hypervisor/defaults.yaml' as DEFAULTS %}
{% set HYPERVISORMERGED = salt['pillar.get']('hypervisor', default=DEFAULTS.hypervisor, merge=True) %}
{# Get hypervisor nodes from pillar #}
{% set NODES = salt['pillar.get']('hypervisor:nodes', {}) %}
@@ -30,9 +31,10 @@
{% set model = '' %}
{% if grains %}
{% set minion_id = grains.keys() | first %}
{% set model = grains[minion_id].get('sosmodel', '') %}
{% set model = grains[minion_id].get('sosmodel', grains[minion_id].get('byodmodel', '')) %}
{% endif %}
{% set model_config = DEFAULTS.hypervisor.model.get(model, {}) %}
{% set model_config = HYPERVISORMERGED.model.get(model, {}) %}
{# Get VM list from VMs file #}
{% set vms = {} %}

View File

@@ -30,7 +30,9 @@
#
# WARNING: This script will DESTROY all data on the target drives!
#
# USAGE: sudo ./so-nvme-raid1.sh
# USAGE:
# sudo ./so-nvme-raid1.sh # Normal operation
# sudo ./so-nvme-raid1.sh --force-cleanup # Force cleanup of existing RAID
#
#################################################################
@@ -41,6 +43,19 @@ set -e
RAID_ARRAY_NAME="md0"
RAID_DEVICE="/dev/${RAID_ARRAY_NAME}"
MOUNT_POINT="/nsm"
FORCE_CLEANUP=false
# Parse command line arguments
for arg in "$@"; do
case $arg in
--force-cleanup)
FORCE_CLEANUP=true
shift
;;
*)
;;
esac
done
# Function to log messages
log() {
@@ -55,6 +70,91 @@ check_root() {
fi
}
# Function to force cleanup all RAID components
force_cleanup_raid() {
log "=== FORCE CLEANUP MODE ==="
log "This will destroy all RAID configurations and data on target drives!"
# Stop all MD arrays
log "Stopping all MD arrays"
mdadm --stop --scan 2>/dev/null || true
# Wait for arrays to stop
sleep 2
# Remove any running md devices
for md in /dev/md*; do
if [ -b "$md" ]; then
log "Stopping $md"
mdadm --stop "$md" 2>/dev/null || true
fi
done
# Force cleanup both NVMe drives
for device in "/dev/nvme0n1" "/dev/nvme1n1"; do
log "Force cleaning $device"
# Kill any processes using the device
fuser -k "${device}"* 2>/dev/null || true
# Unmount any mounted partitions
for part in "${device}"*; do
if [ -b "$part" ]; then
umount -f "$part" 2>/dev/null || true
fi
done
# Force zero RAID superblocks on partitions
for part in "${device}"p*; do
if [ -b "$part" ]; then
log "Zeroing RAID superblock on $part"
mdadm --zero-superblock --force "$part" 2>/dev/null || true
fi
done
# Zero superblock on the device itself
log "Zeroing RAID superblock on $device"
mdadm --zero-superblock --force "$device" 2>/dev/null || true
# Remove LVM physical volumes
pvremove -ff -y "$device" 2>/dev/null || true
# Wipe all filesystem and partition signatures
log "Wiping all signatures from $device"
wipefs -af "$device" 2>/dev/null || true
# Overwrite the beginning of the drive (partition table area)
log "Clearing partition table on $device"
dd if=/dev/zero of="$device" bs=1M count=10 2>/dev/null || true
# Clear the end of the drive (backup partition table area)
local device_size=$(blockdev --getsz "$device" 2>/dev/null || echo "0")
if [ "$device_size" -gt 0 ]; then
dd if=/dev/zero of="$device" bs=512 seek=$(( device_size - 2048 )) count=2048 2>/dev/null || true
fi
# Force kernel to re-read partition table
blockdev --rereadpt "$device" 2>/dev/null || true
partprobe -s "$device" 2>/dev/null || true
done
# Clear mdadm configuration
log "Clearing mdadm configuration"
echo "DEVICE partitions" > /etc/mdadm.conf
# Remove any fstab entries for the RAID device or mount point
log "Cleaning fstab entries"
sed -i "\|${RAID_DEVICE}|d" /etc/fstab
sed -i "\|${MOUNT_POINT}|d" /etc/fstab
# Wait for system to settle
udevadm settle
sleep 5
log "Force cleanup complete!"
log "Proceeding with RAID setup..."
}
# Function to find MD arrays using specific devices
find_md_arrays_using_devices() {
local target_devices=("$@")
@@ -205,10 +305,15 @@ check_existing_raid() {
fi
log "Error: $device appears to be part of an existing RAID array"
log "To reuse this device, you must first:"
log "1. Unmount any filesystems"
log "2. Stop the RAID array: mdadm --stop $array_name"
log "3. Zero the superblock: mdadm --zero-superblock ${device}p1"
log "Old RAID metadata detected but array is not running."
log ""
log "To fix this, run the script with --force-cleanup:"
log " sudo $0 --force-cleanup"
log ""
log "Or manually clean up with:"
log "1. Stop any arrays: mdadm --stop --scan"
log "2. Zero superblocks: mdadm --zero-superblock --force ${device}p1"
log "3. Wipe signatures: wipefs -af $device"
exit 1
fi
done
@@ -238,7 +343,7 @@ ensure_devices_free() {
done
# Clear MD superblock
mdadm --zero-superblock "${device}"* 2>/dev/null || true
mdadm --zero-superblock --force "${device}"* 2>/dev/null || true
# Remove LVM PV if exists
pvremove -ff -y "$device" 2>/dev/null || true
@@ -263,6 +368,11 @@ main() {
# Check if running as root
check_root
# If force cleanup flag is set, do aggressive cleanup first
if [ "$FORCE_CLEANUP" = true ]; then
force_cleanup_raid
fi
# Check for existing RAID setup
check_existing_raid

View File

@@ -422,6 +422,7 @@ preupgrade_changes() {
[[ "$INSTALLEDVERSION" == 2.4.150 ]] && up_to_2.4.160
[[ "$INSTALLEDVERSION" == 2.4.160 ]] && up_to_2.4.170
[[ "$INSTALLEDVERSION" == 2.4.170 ]] && up_to_2.4.180
[[ "$INSTALLEDVERSION" == 2.4.180 ]] && up_to_2.4.190
true
}
@@ -617,6 +618,16 @@ post_to_2.4.190() {
update_import_fleet_output
fi
# Check if expected default policy is logstash (global.pipeline is REDIS or "")
pipeline=$(lookup_pillar "pipeline" "global")
if [[ -z "$pipeline" ]] || [[ "$pipeline" == "REDIS" ]]; then
# Check if this grid is currently affected by corrupt fleet output policy
if elastic-agent status | grep "config: key file not configured" > /dev/null 2>&1; then
echo "Elastic Agent shows an ssl error connecting to logstash output. Updating output policy..."
update_default_logstash_output
fi
fi
POSTVERSION=2.4.190
}
@@ -1173,6 +1184,31 @@ update_import_fleet_output() {
fi
}
update_default_logstash_output() {
echo "Updating fleet logstash output policy grid-logstash"
if logstash_policy=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "http://localhost:5601/api/fleet/outputs/so-manager_logstash" --retry 3 --retry-delay 10 --fail 2>/dev/null); then
# Keep already configured hosts for this update, subsequent host updates come from so-elastic-fleet-outputs-update
HOSTS=$(echo "$logstash_policy" | jq -r '.item.hosts')
DEFAULT_ENABLED=$(echo "$logstash_policy" | jq -r '.item.is_default')
DEFAULT_MONITORING_ENABLED=$(echo "$logstash_policy" | jq -r '.item.is_default_monitoring')
LOGSTASHKEY=$(openssl rsa -in /etc/pki/elasticfleet-logstash.key)
LOGSTASHCRT=$(openssl x509 -in /etc/pki/elasticfleet-logstash.crt)
LOGSTASHCA=$(openssl x509 -in /etc/pki/tls/certs/intca.crt)
JSON_STRING=$(jq -n \
--argjson HOSTS "$HOSTS" \
--arg DEFAULT_ENABLED "$DEFAULT_ENABLED" \
--arg DEFAULT_MONITORING_ENABLED "$DEFAULT_MONITORING_ENABLED" \
--arg LOGSTASHKEY "$LOGSTASHKEY" \
--arg LOGSTASHCRT "$LOGSTASHCRT" \
--arg LOGSTASHCA "$LOGSTASHCA" \
'{"name":"grid-logstash","type":"logstash","hosts": $HOSTS,"is_default": $DEFAULT_ENABLED,"is_default_monitoring": $DEFAULT_MONITORING_ENABLED,"config_yaml":"","ssl":{"certificate": $LOGSTASHCRT,"certificate_authorities":[ $LOGSTASHCA ]},"secrets":{"ssl":{"key": $LOGSTASHKEY }}}')
fi
if curl -K /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/outputs/so-manager_logstash" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" --retry 3 --retry-delay 10 --fail; then
echo "Successfully updated grid-logstash fleet output policy"
fi
}
update_salt_mine() {
echo "Populating the mine with mine_functions for each host."
set +e

View File

@@ -161,6 +161,7 @@ DEFAULT_BASE_PATH = '/opt/so/saltstack/local/salt/hypervisor/hosts'
VALID_ROLES = ['sensor', 'searchnode', 'idh', 'receiver', 'heavynode', 'fleet']
LICENSE_PATH = '/opt/so/saltstack/local/pillar/soc/license.sls'
DEFAULTS_PATH = '/opt/so/saltstack/default/salt/hypervisor/defaults.yaml'
HYPERVISOR_PILLAR_PATH = '/opt/so/saltstack/local/pillar/hypervisor/soc_hypervisor.sls'
# Define the retention period for destroyed VMs (in hours)
DESTROYED_VM_RETENTION_HOURS = 48
@@ -271,7 +272,7 @@ def parse_hardware_indices(hw_value: Any) -> List[int]:
return indices
def get_hypervisor_model(hypervisor: str) -> str:
"""Get sosmodel from hypervisor grains."""
"""Get sosmodel or byodmodel from hypervisor grains."""
try:
# Get cached grains using Salt runner
grains = runner.cmd(
@@ -283,9 +284,9 @@ def get_hypervisor_model(hypervisor: str) -> str:
# Get the first minion ID that matches our hypervisor
minion_id = next(iter(grains.keys()))
model = grains[minion_id].get('sosmodel')
model = grains[minion_id].get('sosmodel', grains[minion_id].get('byodmodel', ''))
if not model:
raise ValueError(f"No sosmodel grain found for hypervisor {hypervisor}")
raise ValueError(f"No sosmodel or byodmodel grain found for hypervisor {hypervisor}")
log.debug("Found model %s for hypervisor %s", model, hypervisor)
return model
@@ -295,16 +296,48 @@ def get_hypervisor_model(hypervisor: str) -> str:
raise
def load_hardware_defaults(model: str) -> dict:
"""Load hardware configuration from defaults.yaml."""
"""Load hardware configuration from defaults.yaml and optionally override with pillar configuration."""
config = None
config_source = None
try:
# First, try to load from defaults.yaml
log.debug("Checking for model %s in %s", model, DEFAULTS_PATH)
defaults = read_yaml_file(DEFAULTS_PATH)
if not defaults or 'hypervisor' not in defaults:
raise ValueError("Invalid defaults.yaml structure")
if 'model' not in defaults['hypervisor']:
raise ValueError("No model configurations found in defaults.yaml")
if model not in defaults['hypervisor']['model']:
raise ValueError(f"Model {model} not found in defaults.yaml")
return defaults['hypervisor']['model'][model]
# Check if model exists in defaults
if model in defaults['hypervisor']['model']:
config = defaults['hypervisor']['model'][model]
config_source = DEFAULTS_PATH
log.debug("Found model %s in %s", model, DEFAULTS_PATH)
# Then, try to load from pillar file (if it exists)
try:
log.debug("Checking for model %s in %s", model, HYPERVISOR_PILLAR_PATH)
pillar_config = read_yaml_file(HYPERVISOR_PILLAR_PATH)
if pillar_config and 'hypervisor' in pillar_config:
if 'model' in pillar_config['hypervisor']:
if model in pillar_config['hypervisor']['model']:
# Override with pillar configuration
config = pillar_config['hypervisor']['model'][model]
config_source = HYPERVISOR_PILLAR_PATH
log.debug("Found model %s in %s (overriding defaults)", model, HYPERVISOR_PILLAR_PATH)
except FileNotFoundError:
log.debug("Pillar file %s not found, using defaults only", HYPERVISOR_PILLAR_PATH)
except Exception as e:
log.warning("Failed to read pillar file %s: %s (using defaults)", HYPERVISOR_PILLAR_PATH, str(e))
# If model was not found in either file, raise an error
if config is None:
raise ValueError(f"Model {model} not found in {DEFAULTS_PATH} or {HYPERVISOR_PILLAR_PATH}")
log.debug("Using hardware configuration for model %s from %s", model, config_source)
return config
except Exception as e:
log.error("Failed to load hardware defaults: %s", str(e))
raise