Merge pull request #15107 from Security-Onion-Solutions/2.4/dev

2.4/dev
2025-12-06 09:12:45 +01:00 · 2025-10-06 12:42:47 -05:00
parent 37bfd9eb30 3db6542398
commit 5186603dbd
6 changed files with 212 additions and 18 deletions
--- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update
+++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update
@@ -15,8 +15,21 @@ if ! is_manager_node; then
 fi

 function update_logstash_outputs() {
-	# Generate updated JSON payload
-    JSON_STRING=$(jq -n --arg UPDATEDLIST $NEW_LIST_JSON '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":""}')
+    if logstash_policy=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "http://localhost:5601/api/fleet/outputs/so-manager_logstash" --retry 3 --retry-delay 10 --fail 2>/dev/null); then
+        SSL_CONFIG=$(echo "$logstash_policy" | jq -r '.item.ssl')
+        if SECRETS=$(echo "$logstash_policy" | jq -er '.item.secrets' 2>/dev/null); then
+            JSON_STRING=$(jq -n \
+                --arg UPDATEDLIST "$NEW_LIST_JSON" \
+                --argjson SECRETS "$SECRETS" \
+                --argjson SSL_CONFIG "$SSL_CONFIG" \
+                '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl": $SSL_CONFIG,"secrets": $SECRETS}')
+        else
+            JSON_STRING=$(jq -n \
+                --arg UPDATEDLIST "$NEW_LIST_JSON" \
+                --argjson SSL_CONFIG "$SSL_CONFIG" \
+                '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl": $SSL_CONFIG}')
+        fi
+    fi

    # Update Logstash Outputs
    curl -K /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/outputs/so-manager_logstash" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" | jq
--- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup
+++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup
@@ -127,7 +127,7 @@ JSON_STRING=$( jq -n \
                  --arg LOGSTASHCRT "$LOGSTASHCRT" \
                  --arg LOGSTASHKEY "$LOGSTASHKEY" \
                  --arg LOGSTASHCA "$LOGSTASHCA" \
-                  '{"name":"grid-logstash","is_default":true,"is_default_monitoring":true,"id":"so-manager_logstash","type":"logstash","hosts":["{{ GLOBALS.manager_ip }}:5055", "{{ GLOBALS.manager }}:5055"],"config_yaml":"","ssl":{"certificate": $LOGSTASHCRT,"key": $LOGSTASHKEY,"certificate_authorities":[ $LOGSTASHCA ]},"proxy_id":null}'
+                  '{"name":"grid-logstash","is_default":true,"is_default_monitoring":true,"id":"so-manager_logstash","type":"logstash","hosts":["{{ GLOBALS.manager_ip }}:5055", "{{ GLOBALS.manager }}:5055"],"config_yaml":"","ssl":{"certificate": $LOGSTASHCRT,"certificate_authorities":[ $LOGSTASHCA ]},"secrets":{"ssl":{"key": $LOGSTASHKEY }},"proxy_id":null}'
                  )
 if ! fleet_api "outputs" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then
    echo -e "\nFailed to create logstash fleet output"
--- a/salt/hypervisor/map.jinja
+++ b/salt/hypervisor/map.jinja
@@ -13,6 +13,7 @@

 {# Import defaults.yaml for model hardware capabilities #}
 {% import_yaml 'hypervisor/defaults.yaml' as DEFAULTS %}
+{% set HYPERVISORMERGED = salt['pillar.get']('hypervisor', default=DEFAULTS.hypervisor, merge=True) %}

 {# Get hypervisor nodes from pillar #}
 {% set NODES = salt['pillar.get']('hypervisor:nodes', {}) %}
@@ -30,9 +31,10 @@
    {% set model = '' %}
    {% if grains %}
      {% set minion_id = grains.keys() | first %}
-      {% set model = grains[minion_id].get('sosmodel', '') %}
+      {% set model = grains[minion_id].get('sosmodel', grains[minion_id].get('byodmodel', '')) %}
    {% endif %}
-    {% set model_config = DEFAULTS.hypervisor.model.get(model, {}) %}
+
+    {% set model_config = HYPERVISORMERGED.model.get(model, {}) %}
    
    {# Get VM list from VMs file #}
    {% set vms = {} %}
--- a/salt/hypervisor/tools/sbin/so-nvme-raid1.sh
+++ b/salt/hypervisor/tools/sbin/so-nvme-raid1.sh
@@ -30,7 +30,9 @@
 #
 # WARNING: This script will DESTROY all data on the target drives!
 #
-# USAGE: sudo ./so-nvme-raid1.sh
+# USAGE: 
+#   sudo ./so-nvme-raid1.sh              # Normal operation
+#   sudo ./so-nvme-raid1.sh --force-cleanup  # Force cleanup of existing RAID
 #
 #################################################################

@@ -41,6 +43,19 @@ set -e
 RAID_ARRAY_NAME="md0"
 RAID_DEVICE="/dev/${RAID_ARRAY_NAME}"
 MOUNT_POINT="/nsm"
+FORCE_CLEANUP=false
+
+# Parse command line arguments
+for arg in "$@"; do
+    case $arg in
+        --force-cleanup)
+            FORCE_CLEANUP=true
+            shift
+            ;;
+        *)
+            ;;
+    esac
+done

 # Function to log messages
 log() {
@@ -55,6 +70,91 @@ check_root() {
    fi
 }

+# Function to force cleanup all RAID components
+force_cleanup_raid() {
+    log "=== FORCE CLEANUP MODE ==="
+    log "This will destroy all RAID configurations and data on target drives!"
+    
+    # Stop all MD arrays
+    log "Stopping all MD arrays"
+    mdadm --stop --scan 2>/dev/null || true
+    
+    # Wait for arrays to stop
+    sleep 2
+    
+    # Remove any running md devices
+    for md in /dev/md*; do
+        if [ -b "$md" ]; then
+            log "Stopping $md"
+            mdadm --stop "$md" 2>/dev/null || true
+        fi
+    done
+    
+    # Force cleanup both NVMe drives
+    for device in "/dev/nvme0n1" "/dev/nvme1n1"; do
+        log "Force cleaning $device"
+        
+        # Kill any processes using the device
+        fuser -k "${device}"* 2>/dev/null || true
+        
+        # Unmount any mounted partitions
+        for part in "${device}"*; do
+            if [ -b "$part" ]; then
+                umount -f "$part" 2>/dev/null || true
+            fi
+        done
+        
+        # Force zero RAID superblocks on partitions
+        for part in "${device}"p*; do
+            if [ -b "$part" ]; then
+                log "Zeroing RAID superblock on $part"
+                mdadm --zero-superblock --force "$part" 2>/dev/null || true
+            fi
+        done
+        
+        # Zero superblock on the device itself
+        log "Zeroing RAID superblock on $device"
+        mdadm --zero-superblock --force "$device" 2>/dev/null || true
+        
+        # Remove LVM physical volumes
+        pvremove -ff -y "$device" 2>/dev/null || true
+        
+        # Wipe all filesystem and partition signatures
+        log "Wiping all signatures from $device"
+        wipefs -af "$device" 2>/dev/null || true
+        
+        # Overwrite the beginning of the drive (partition table area)
+        log "Clearing partition table on $device"
+        dd if=/dev/zero of="$device" bs=1M count=10 2>/dev/null || true
+        
+        # Clear the end of the drive (backup partition table area)
+        local device_size=$(blockdev --getsz "$device" 2>/dev/null || echo "0")
+        if [ "$device_size" -gt 0 ]; then
+            dd if=/dev/zero of="$device" bs=512 seek=$(( device_size - 2048 )) count=2048 2>/dev/null || true
+        fi
+        
+        # Force kernel to re-read partition table
+        blockdev --rereadpt "$device" 2>/dev/null || true
+        partprobe -s "$device" 2>/dev/null || true
+    done
+    
+    # Clear mdadm configuration
+    log "Clearing mdadm configuration"
+    echo "DEVICE partitions" > /etc/mdadm.conf
+    
+    # Remove any fstab entries for the RAID device or mount point
+    log "Cleaning fstab entries"
+    sed -i "\|${RAID_DEVICE}|d" /etc/fstab
+    sed -i "\|${MOUNT_POINT}|d" /etc/fstab
+    
+    # Wait for system to settle
+    udevadm settle
+    sleep 5
+    
+    log "Force cleanup complete!"
+    log "Proceeding with RAID setup..."
+}
+
 # Function to find MD arrays using specific devices
 find_md_arrays_using_devices() {
    local target_devices=("$@")
@@ -205,10 +305,15 @@ check_existing_raid() {
            fi
            
            log "Error: $device appears to be part of an existing RAID array"
-            log "To reuse this device, you must first:"
-            log "1. Unmount any filesystems"
-            log "2. Stop the RAID array: mdadm --stop $array_name"
-            log "3. Zero the superblock: mdadm --zero-superblock ${device}p1"
+            log "Old RAID metadata detected but array is not running."
+            log ""
+            log "To fix this, run the script with --force-cleanup:"
+            log "  sudo $0 --force-cleanup"
+            log ""
+            log "Or manually clean up with:"
+            log "1. Stop any arrays: mdadm --stop --scan"
+            log "2. Zero superblocks: mdadm --zero-superblock --force ${device}p1"
+            log "3. Wipe signatures: wipefs -af $device"
            exit 1
        fi
    done
@@ -238,7 +343,7 @@ ensure_devices_free() {
    done
    
    # Clear MD superblock
-    mdadm --zero-superblock "${device}"* 2>/dev/null || true
+    mdadm --zero-superblock --force "${device}"* 2>/dev/null || true
    
    # Remove LVM PV if exists
    pvremove -ff -y "$device" 2>/dev/null || true
@@ -263,6 +368,11 @@ main() {
    # Check if running as root
    check_root
    
+    # If force cleanup flag is set, do aggressive cleanup first
+    if [ "$FORCE_CLEANUP" = true ]; then
+        force_cleanup_raid
+    fi
+    
    # Check for existing RAID setup
    check_existing_raid
    
--- a/salt/manager/tools/sbin/soup
+++ b/salt/manager/tools/sbin/soup
@@ -422,6 +422,7 @@ preupgrade_changes() {
    [[ "$INSTALLEDVERSION" == 2.4.150 ]] && up_to_2.4.160
    [[ "$INSTALLEDVERSION" == 2.4.160 ]] && up_to_2.4.170
    [[ "$INSTALLEDVERSION" == 2.4.170 ]] && up_to_2.4.180
+    [[ "$INSTALLEDVERSION" == 2.4.180 ]] && up_to_2.4.190
    true
 }

@@ -617,6 +618,16 @@ post_to_2.4.190() {
        update_import_fleet_output
    fi

+    # Check if expected default policy is logstash (global.pipeline is REDIS or "")
+    pipeline=$(lookup_pillar "pipeline" "global")
+    if [[ -z "$pipeline" ]] || [[ "$pipeline" == "REDIS" ]]; then
+        # Check if this grid is currently affected by corrupt fleet output policy
+        if elastic-agent status | grep "config: key file not configured" > /dev/null 2>&1; then
+            echo "Elastic Agent shows an ssl error connecting to logstash output. Updating output policy..."
+            update_default_logstash_output
+        fi
+    fi
+
  POSTVERSION=2.4.190
 }

@@ -1173,6 +1184,31 @@ update_import_fleet_output() {
    fi
 }

+update_default_logstash_output() {
+    echo "Updating fleet logstash output policy grid-logstash"
+    if logstash_policy=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "http://localhost:5601/api/fleet/outputs/so-manager_logstash" --retry 3 --retry-delay 10 --fail 2>/dev/null); then
+        # Keep already configured hosts for this update, subsequent host updates come from so-elastic-fleet-outputs-update
+        HOSTS=$(echo "$logstash_policy" | jq -r '.item.hosts')
+        DEFAULT_ENABLED=$(echo "$logstash_policy" | jq -r '.item.is_default')
+        DEFAULT_MONITORING_ENABLED=$(echo "$logstash_policy" | jq -r '.item.is_default_monitoring')
+        LOGSTASHKEY=$(openssl rsa -in  /etc/pki/elasticfleet-logstash.key)
+        LOGSTASHCRT=$(openssl x509 -in /etc/pki/elasticfleet-logstash.crt)
+        LOGSTASHCA=$(openssl x509 -in  /etc/pki/tls/certs/intca.crt)
+        JSON_STRING=$(jq -n \
+            --argjson HOSTS "$HOSTS" \
+            --arg DEFAULT_ENABLED "$DEFAULT_ENABLED" \
+            --arg DEFAULT_MONITORING_ENABLED "$DEFAULT_MONITORING_ENABLED" \
+            --arg LOGSTASHKEY "$LOGSTASHKEY" \
+            --arg LOGSTASHCRT "$LOGSTASHCRT" \
+            --arg LOGSTASHCA "$LOGSTASHCA" \
+            '{"name":"grid-logstash","type":"logstash","hosts": $HOSTS,"is_default": $DEFAULT_ENABLED,"is_default_monitoring": $DEFAULT_MONITORING_ENABLED,"config_yaml":"","ssl":{"certificate": $LOGSTASHCRT,"certificate_authorities":[ $LOGSTASHCA ]},"secrets":{"ssl":{"key": $LOGSTASHKEY }}}')
+    fi
+
+    if curl -K /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/outputs/so-manager_logstash" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" --retry 3 --retry-delay 10 --fail; then
+        echo "Successfully updated grid-logstash fleet output policy"
+    fi
+}
+
 update_salt_mine() {
    echo "Populating the mine with mine_functions for each host."
    set +e
--- a/salt/salt/engines/master/virtual_node_manager.py
+++ b/salt/salt/engines/master/virtual_node_manager.py
@@ -161,6 +161,7 @@ DEFAULT_BASE_PATH = '/opt/so/saltstack/local/salt/hypervisor/hosts'
 VALID_ROLES = ['sensor', 'searchnode', 'idh', 'receiver', 'heavynode', 'fleet']
 LICENSE_PATH = '/opt/so/saltstack/local/pillar/soc/license.sls'
 DEFAULTS_PATH = '/opt/so/saltstack/default/salt/hypervisor/defaults.yaml'
+HYPERVISOR_PILLAR_PATH = '/opt/so/saltstack/local/pillar/hypervisor/soc_hypervisor.sls'
 # Define the retention period for destroyed VMs (in hours)
 DESTROYED_VM_RETENTION_HOURS = 48

@@ -271,7 +272,7 @@ def parse_hardware_indices(hw_value: Any) -> List[int]:
    return indices

 def get_hypervisor_model(hypervisor: str) -> str:
-    """Get sosmodel from hypervisor grains."""
+    """Get sosmodel or byodmodel from hypervisor grains."""
    try:
        # Get cached grains using Salt runner
        grains = runner.cmd(
@@ -283,9 +284,9 @@ def get_hypervisor_model(hypervisor: str) -> str:
            
        # Get the first minion ID that matches our hypervisor
        minion_id = next(iter(grains.keys()))
-        model = grains[minion_id].get('sosmodel')
+        model = grains[minion_id].get('sosmodel', grains[minion_id].get('byodmodel', ''))
        if not model:
-            raise ValueError(f"No sosmodel grain found for hypervisor {hypervisor}")
+            raise ValueError(f"No sosmodel or byodmodel grain found for hypervisor {hypervisor}")
            
        log.debug("Found model %s for hypervisor %s", model, hypervisor)
        return model
@@ -295,16 +296,48 @@ def get_hypervisor_model(hypervisor: str) -> str:
        raise

 def load_hardware_defaults(model: str) -> dict:
-    """Load hardware configuration from defaults.yaml."""
+    """Load hardware configuration from defaults.yaml and optionally override with pillar configuration."""
+    config = None
+    config_source = None
+    
    try:
+        # First, try to load from defaults.yaml
+        log.debug("Checking for model %s in %s", model, DEFAULTS_PATH)
        defaults = read_yaml_file(DEFAULTS_PATH)
        if not defaults or 'hypervisor' not in defaults:
            raise ValueError("Invalid defaults.yaml structure")
        if 'model' not in defaults['hypervisor']:
            raise ValueError("No model configurations found in defaults.yaml")
-        if model not in defaults['hypervisor']['model']:
-            raise ValueError(f"Model {model} not found in defaults.yaml")
-        return defaults['hypervisor']['model'][model]
+        
+        # Check if model exists in defaults
+        if model in defaults['hypervisor']['model']:
+            config = defaults['hypervisor']['model'][model]
+            config_source = DEFAULTS_PATH
+            log.debug("Found model %s in %s", model, DEFAULTS_PATH)
+        
+        # Then, try to load from pillar file (if it exists)
+        try:
+            log.debug("Checking for model %s in %s", model, HYPERVISOR_PILLAR_PATH)
+            pillar_config = read_yaml_file(HYPERVISOR_PILLAR_PATH)
+            if pillar_config and 'hypervisor' in pillar_config:
+                if 'model' in pillar_config['hypervisor']:
+                    if model in pillar_config['hypervisor']['model']:
+                        # Override with pillar configuration
+                        config = pillar_config['hypervisor']['model'][model]
+                        config_source = HYPERVISOR_PILLAR_PATH
+                        log.debug("Found model %s in %s (overriding defaults)", model, HYPERVISOR_PILLAR_PATH)
+        except FileNotFoundError:
+            log.debug("Pillar file %s not found, using defaults only", HYPERVISOR_PILLAR_PATH)
+        except Exception as e:
+            log.warning("Failed to read pillar file %s: %s (using defaults)", HYPERVISOR_PILLAR_PATH, str(e))
+        
+        # If model was not found in either file, raise an error
+        if config is None:
+            raise ValueError(f"Model {model} not found in {DEFAULTS_PATH} or {HYPERVISOR_PILLAR_PATH}")
+        
+        log.debug("Using hardware configuration for model %s from %s", model, config_source)
+        return config
+        
    except Exception as e:
        log.error("Failed to load hardware defaults: %s", str(e))
        raise