From b0a515f2c33f715e3ca233ad2ac941b8d8fcd1fa Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 16 Jul 2025 12:09:01 -0400 Subject: [PATCH 01/22] update base cloud image location --- salt/_runners/setup_hypervisor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/_runners/setup_hypervisor.py b/salt/_runners/setup_hypervisor.py index 6ddd571c9..9d7116d59 100644 --- a/salt/_runners/setup_hypervisor.py +++ b/salt/_runners/setup_hypervisor.py @@ -165,7 +165,7 @@ def _validate_image_checksum(path, expected_sha256): return True # Constants -IMAGE_URL = "https://yum.oracle.com/templates/OracleLinux/OL9/u5/x86_64/OL9U5_x86_64-kvm-b253.qcow2" +IMAGE_URL = "https://download.securityonion.net/file/securityonion/OL9U5_x86_64-kvm-b253.qcow2" IMAGE_SHA256 = "3b00bbbefc8e78dd28d9f538834fb9e2a03d5ccdc2cadf2ffd0036c0a8f02021" IMAGE_PATH = "/nsm/libvirt/boot/OL9U5_x86_64-kvm-b253.qcow2" MANAGER_HOSTNAME = socket.gethostname() From 58ffe576d7230d237e6afb5a056e69d306e39679 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 16 Jul 2025 12:09:39 -0400 Subject: [PATCH 02/22] add pci mappings for sos hw --- salt/hypervisor/defaults.yaml | 122 +++++++++++++++++++++++++--------- 1 file changed, 92 insertions(+), 30 deletions(-) diff --git a/salt/hypervisor/defaults.yaml b/salt/hypervisor/defaults.yaml index 06509828c..8cf754193 100644 --- a/salt/hypervisor/defaults.yaml +++ b/salt/hypervisor/defaults.yaml @@ -17,42 +17,104 @@ hypervisor: 6: pci_0000_02_00_1 7: pci_0000_41_00_0 8: pci_0000_41_00_1 - model1: + SOSSNNV: hardware: cpu: 128 - memory: 128 + memory: 256 disk: - 1: pci_0000_c7_00_0 - 2: pci_0000_c8_00_0 + 1: pci_0000_42_00_0 + 2: pci_0000_43_00_0 + 3: pci_0000_44_00_0 + 4: pci_0000_45_00_0 copper: - 1: pci_0000_c4_00_0 - 2: pci_0000_c4_00_1 - 3: pci_0000_c4_00_2 - 4: pci_0000_c4_00_3 + sfp: + 1: pci_0000_02_00_0 + 2: pci_0000_02_00_1 + 3: pci_0000_41_00_0 + 4: pci_0000_41_00_1 + SOSSNNV-DE02: + cpu: 128 + memory: 384 + disk: + 1: pci_0000_41_00_0 + 2: pci_0000_42_00_0 + 3: pci_0000_81_00_0 + 4: pci_0000_82_00_0 + 5: pci_0000_83_00_0 + 6: pci_0000_84_00_0 + copper: + 1: pci_0000_85_00_0 + 2: pci_0000_85_00_1 + 3: pci_0000_85_00_2 + 4: pci_0000_85_00_3 + sfp: + 5: pci_0000_c4_00_0 + 6: pci_0000_c4_00_1 + 7: pci_0000_c5_00_0 + 8: pci_0000_c5_00_1 + 9: pci_0000_c5_00_2 + 10: pci_0000_c5_00_3 + SOSSN7200: + cpu: 128 + memory: 256 + copper: + 1: pci_0000_03_00_0 + 2: pci_0000_03_00_1 + 3: pci_0000_03_00_2 + 4: pci_0000_03_00_3 sfp: 5: pci_0000_02_00_0 6: pci_0000_02_00_1 - 7: pci_0000_41_00_0 - 8: pci_0000_41_00_1 - model2: - cpu: 256 - memory: 256 - disk: - 1: pci_0000_c7_00_0 - 2: pci_0000_c8_00_0 - 3: pci_0000_c9_00_0 - 4: pci_0000_c10_00_0 + 7: pci_0000_81_00_0 + 8: pci_0000_81_00_1 + 9: pci_0000_81_00_2 + 10: pci_0000_81_00_3 + SOSSN7200-DE02: + cpu: 128 + memory: 384 copper: - 1: pci_0000_c4_00_0 - 2: pci_0000_c4_00_1 - 3: pci_0000_c4_00_2 - 4: pci_0000_c4_00_3 - 5: pci_0000_c5_00_0 - 6: pci_0000_c5_00_1 - 7: pci_0000_c5_00_2 - 8: pci_0000_c5_00_3 + 1: pci_0000_82_00_0 + 2: pci_0000_82_00_1 + 3: pci_0000_82_00_2 + 4: pci_0000_82_00_3 sfp: - 9: pci_0000_02_00_0 - 10: pci_0000_02_00_1 - 11: pci_0000_41_00_0 - 12: pci_0000_41_00_1 \ No newline at end of file + 5: pci_0000_c4_00_0 + 6: pci_0000_c4_00_1 + 7: pci_0000_c5_00_0 + 8: pci_0000_c5_00_1 + 9: pci_0000_c6_00_0 + 10: pci_0000_c6_00_1 + 11: pci_0000_c6_00_2 + 12: pci_0000_c6_00_3 + SOS4000: + cpu: 128 + memory: 256 + copper: + 1: pci_0000_03_00_0 + 2: pci_0000_03_00_1 + 3: pci_0000_03_00_2 + 4: pci_0000_03_00_3 + sfp: + 5: pci_0000_02_00_0 + 6: pci_0000_02_00_1 + 7: pci_0000_81_00_0 + 8: pci_0000_81_00_1 + 9: pci_0000_81_00_2 + 10: pci_0000_81_00_3 + SOS5000-DE02: + cpu: 128 + memory: 384 + copper: + 1: pci_0000_82_00_0 + 2: pci_0000_82_00_1 + 3: pci_0000_82_00_2 + 4: pci_0000_82_00_3 + sfp: + 5: pci_0000_c4_00_0 + 6: pci_0000_c4_00_1 + 7: pci_0000_c5_00_0 + 8: pci_0000_c5_00_1 + 9: pci_0000_c6_00_0 + 10: pci_0000_c6_00_1 + 11: pci_0000_c6_00_2 + 12: pci_0000_c6_00_3 From 5a67b89a808e89167014aa6fc0d1c03e8a8b409d Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 24 Sep 2025 09:49:02 -0400 Subject: [PATCH 03/22] Update so-saltstack-update add -v -vv and test / dry run mode --- salt/manager/tools/sbin/so-saltstack-update | 160 +++++++++++++++++++- 1 file changed, 153 insertions(+), 7 deletions(-) diff --git a/salt/manager/tools/sbin/so-saltstack-update b/salt/manager/tools/sbin/so-saltstack-update index 4be8f095c..2f385ab89 100755 --- a/salt/manager/tools/sbin/so-saltstack-update +++ b/salt/manager/tools/sbin/so-saltstack-update @@ -5,10 +5,12 @@ # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. - default_salt_dir=/opt/so/saltstack/default -clone_to_tmp() { +VERBOSE=0 +VERY_VERBOSE=0 +TEST_MODE=0 +clone_to_tmp() { # TODO Need to add a air gap option # Make a temp location for the files mkdir /tmp/sogh @@ -16,19 +18,110 @@ clone_to_tmp() { #git clone -b dev https://github.com/Security-Onion-Solutions/securityonion.git git clone https://github.com/Security-Onion-Solutions/securityonion.git cd /tmp +} +show_file_changes() { + local source_dir="$1" + local dest_dir="$2" + local dir_type="$3" # "salt" or "pillar" + + if [ $VERBOSE -eq 0 ]; then + return + fi + + echo "=== Changes for $dir_type directory ===" + + # Find all files in source directory + if [ -d "$source_dir" ]; then + find "$source_dir" -type f | while read -r source_file; do + # Get relative path + rel_path="${source_file#$source_dir/}" + dest_file="$dest_dir/$rel_path" + + if [ ! -f "$dest_file" ]; then + echo "ADDED: $dest_file" + if [ $VERY_VERBOSE -eq 1 ]; then + echo " (New file - showing first 20 lines)" + head -n 20 "$source_file" | sed 's/^/ + /' + echo "" + fi + elif ! cmp -s "$source_file" "$dest_file"; then + echo "MODIFIED: $dest_file" + if [ $VERY_VERBOSE -eq 1 ]; then + echo " (Changes:)" + diff -u "$dest_file" "$source_file" | sed 's/^/ /' + echo "" + fi + fi + done + fi + + # Find deleted files (exist in dest but not in source) + if [ -d "$dest_dir" ]; then + find "$dest_dir" -type f | while read -r dest_file; do + # Get relative path + rel_path="${dest_file#$dest_dir/}" + source_file="$source_dir/$rel_path" + + if [ ! -f "$source_file" ]; then + echo "DELETED: $dest_file" + if [ $VERY_VERBOSE -eq 1 ]; then + echo " (File was deleted)" + echo "" + fi + fi + done + fi + + echo "" } copy_new_files() { - # Copy new files over to the salt dir cd /tmp/sogh/securityonion git checkout $BRANCH VERSION=$(cat VERSION) + + if [ $TEST_MODE -eq 1 ]; then + echo "=== TEST MODE: Showing what would change without making changes ===" + echo "Branch: $BRANCH" + echo "Version: $VERSION" + echo "" + fi + + # Show changes before copying if verbose mode is enabled OR if in test mode + if [ $VERBOSE -eq 1 ] || [ $TEST_MODE -eq 1 ]; then + if [ $TEST_MODE -eq 1 ]; then + # In test mode, force at least basic verbose output + local old_verbose=$VERBOSE + if [ $VERBOSE -eq 0 ]; then + VERBOSE=1 + fi + fi + + echo "Analyzing file changes..." + show_file_changes "$(pwd)/salt" "$default_salt_dir/salt" "salt" + show_file_changes "$(pwd)/pillar" "$default_salt_dir/pillar" "pillar" + + if [ $TEST_MODE -eq 1 ] && [ $old_verbose -eq 0 ]; then + # Restore original verbose setting + VERBOSE=$old_verbose + fi + fi + + # If in test mode, don't copy files + if [ $TEST_MODE -eq 1 ]; then + echo "=== TEST MODE: No files were modified ===" + echo "To apply these changes, run without --test option" + rm -rf /tmp/sogh + return + fi + # We need to overwrite if there is a repo file if [ -d /opt/so/repo ]; then tar -czf /opt/so/repo/"$VERSION".tar.gz -C "$(pwd)/.." . fi + rsync -a salt $default_salt_dir/ rsync -a pillar $default_salt_dir/ chown -R socore:socore $default_salt_dir/salt @@ -45,11 +138,64 @@ got_root(){ fi } -got_root -if [ $# -ne 1 ] ; then +show_usage() { + echo "Usage: $0 [-v] [-vv] [--test] [branch]" + echo " -v Show verbose output (files changed/added/deleted)" + echo " -vv Show very verbose output (includes file diffs)" + echo " --test Test mode - show what would change without making changes" + echo " branch Git branch to checkout (default: 2.4/main)" + echo "" + echo "Examples:" + echo " $0 # Normal operation" + echo " $0 -v # Show which files change" + echo " $0 -vv # Show files and their diffs" + echo " $0 --test # See what would change (dry run)" + echo " $0 --test -vv # Test mode with detailed diffs" + echo " $0 -v dev-branch # Use specific branch with verbose output" + exit 1 +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -v) + VERBOSE=1 + shift + ;; + -vv) + VERBOSE=1 + VERY_VERBOSE=1 + shift + ;; + --test) + TEST_MODE=1 + shift + ;; + -h|--help) + show_usage + ;; + -*) + echo "Unknown option $1" + show_usage + ;; + *) + # This should be the branch name + if [ -z "$BRANCH" ]; then + BRANCH="$1" + else + echo "Too many arguments" + show_usage + fi + shift + ;; + esac +done + +# Set default branch if not provided +if [ -z "$BRANCH" ]; then BRANCH=2.4/main -else - BRANCH=$1 fi + +got_root clone_to_tmp copy_new_files From ac0d6c57e1dfcd5255a467b473d2823525174d29 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Mon, 6 Oct 2025 11:52:35 -0400 Subject: [PATCH 04/22] create common.grains state and nsm_total grain --- salt/common/grains.sls | 21 +++++++++++++++++++++ salt/common/init.sls | 1 + 2 files changed, 22 insertions(+) create mode 100644 salt/common/grains.sls diff --git a/salt/common/grains.sls b/salt/common/grains.sls new file mode 100644 index 000000000..b8d3a4c90 --- /dev/null +++ b/salt/common/grains.sls @@ -0,0 +1,21 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% set nsm_exists = salt['file.directory_exists']('/nsm') %} +{% if nsm_exists %} +{% set nsm_total = salt['cmd.shell']('df -BG /nsm | tail -1 | awk \'{print $2}\'') %} + +nsm_total: + grains.present: + - name: nsm_total + - value: {{ nsm_total }} + +{% else %} + +nsm_missing: + test.succeed_without_changes: + - name: /nsm does not exist, skipping grain assignment + +{% endif %} diff --git a/salt/common/init.sls b/salt/common/init.sls index 7137ff11f..eba18f651 100644 --- a/salt/common/init.sls +++ b/salt/common/init.sls @@ -4,6 +4,7 @@ {% from 'vars/globals.map.jinja' import GLOBALS %} include: + - common.grains - common.packages {% if GLOBALS.role in GLOBALS.manager_roles %} - manager.elasticsearch # needed for elastic_curl_config state From 60cccb21b4d40822040977122afec4d6f727cad6 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 7 Oct 2025 12:20:42 -0400 Subject: [PATCH 05/22] create volume --- salt/_modules/qcow2.py | 95 +++- .../tools/sbin_jinja/so-kvm-create-volume | 533 ++++++++++++++++++ salt/manager/tools/sbin_jinja/so-salt-cloud | 96 +++- .../engines/master/virtual_node_manager.py | 34 ++ salt/soc/dyanno/hypervisor/hypervisor.yaml | 6 +- 5 files changed, 758 insertions(+), 6 deletions(-) create mode 100644 salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume diff --git a/salt/_modules/qcow2.py b/salt/_modules/qcow2.py index 6e71dc459..10c4d185b 100644 --- a/salt/_modules/qcow2.py +++ b/salt/_modules/qcow2.py @@ -7,12 +7,14 @@ """ Salt module for managing QCOW2 image configurations and VM hardware settings. This module provides functions -for modifying network configurations within QCOW2 images and adjusting virtual machine hardware settings. -It serves as a Salt interface to the so-qcow2-modify-network and so-kvm-modify-hardware scripts. +for modifying network configurations within QCOW2 images, adjusting virtual machine hardware settings, and +creating virtual storage volumes. It serves as a Salt interface to the so-qcow2-modify-network, +so-kvm-modify-hardware, and so-kvm-create-volume scripts. -The module offers two main capabilities: +The module offers three main capabilities: 1. Network Configuration: Modify network settings (DHCP/static IP) within QCOW2 images 2. Hardware Configuration: Adjust VM hardware settings (CPU, memory, PCI passthrough) +3. Volume Management: Create and attach virtual storage volumes for NSM data This module is intended to work with Security Onion's virtualization infrastructure and is typically used in conjunction with salt-cloud for VM provisioning and management. @@ -244,3 +246,90 @@ def modify_hardware_config(vm_name, cpu=None, memory=None, pci=None, start=False except Exception as e: log.error('qcow2 module: An error occurred while executing the script: {}'.format(e)) raise + +def create_volume_config(vm_name, size_gb, start=False): + ''' + Usage: + salt '*' qcow2.create_volume_config vm_name= size_gb= [start=] + + Options: + vm_name + Name of the virtual machine to attach the volume to + size_gb + Volume size in GB (positive integer) + This determines the capacity of the virtual storage volume + start + Boolean flag to start the VM after volume creation + Optional - defaults to False + + Examples: + 1. **Create 500GB Volume:** + ```bash + salt '*' qcow2.create_volume_config vm_name='sensor1_sensor' size_gb=500 + ``` + This creates a 500GB virtual volume for NSM storage + + 2. **Create 1TB Volume and Start VM:** + ```bash + salt '*' qcow2.create_volume_config vm_name='sensor1_sensor' size_gb=1000 start=True + ``` + This creates a 1TB volume and starts the VM after attachment + + Notes: + - VM must be stopped before volume creation + - Volume is created as a qcow2 image and attached to the VM + - This is an alternative to disk passthrough via modify_hardware_config + - Volume is automatically attached to the VM's libvirt configuration + - Requires so-kvm-create-volume script to be installed + - Volume files are stored in the hypervisor's VM storage directory + + Description: + This function creates and attaches a virtual storage volume to a KVM virtual machine + using the so-kvm-create-volume script. It creates a qcow2 disk image of the specified + size and attaches it to the VM for NSM (Network Security Monitoring) storage purposes. + This provides an alternative to physical disk passthrough, allowing flexible storage + allocation without requiring dedicated hardware. The VM can optionally be started + after the volume is successfully created and attached. + + Exit Codes: + 0: Success + 1: Invalid parameters + 2: VM state error (running when should be stopped) + 3: Volume creation error + 4: System command error + 255: Unexpected error + + Logging: + - All operations are logged to the salt minion log + - Log entries are prefixed with 'qcow2 module:' + - Volume creation and attachment operations are logged + - Errors include detailed messages and stack traces + - Final status of volume creation is logged + ''' + + # Validate size_gb parameter + if not isinstance(size_gb, int) or size_gb <= 0: + raise ValueError('size_gb must be a positive integer.') + + cmd = ['/usr/sbin/so-kvm-create-volume', '-v', vm_name, '-s', str(size_gb)] + + if start: + cmd.append('-S') + + log.info('qcow2 module: Executing command: {}'.format(' '.join(shlex.quote(arg) for arg in cmd))) + + try: + result = subprocess.run(cmd, capture_output=True, text=True, check=False) + ret = { + 'retcode': result.returncode, + 'stdout': result.stdout, + 'stderr': result.stderr + } + if result.returncode != 0: + log.error('qcow2 module: Script execution failed with return code {}: {}'.format(result.returncode, result.stderr)) + else: + log.info('qcow2 module: Script executed successfully.') + return ret + except Exception as e: + log.error('qcow2 module: An error occurred while executing the script: {}'.format(e)) + raise diff --git a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume new file mode 100644 index 000000000..8b7cd8a23 --- /dev/null +++ b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume @@ -0,0 +1,533 @@ +#!/usr/bin/python3 + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. +# +# Note: Per the Elastic License 2.0, the second limitation states: +# +# "You may not move, change, disable, or circumvent the license key functionality +# in the software, and you may not remove or obscure any functionality in the +# software that is protected by the license key." + +{% if 'vrt' in salt['pillar.get']('features', []) %} + +""" +Script for creating and attaching virtual volumes to KVM virtual machines for NSM storage. +This script provides functionality to create pre-allocated raw disk images and attach them +to VMs as virtio-blk devices for high-performance network security monitoring data storage. + +The script handles the complete volume lifecycle: +1. Volume Creation: Creates pre-allocated raw disk images using qemu-img +2. Volume Attachment: Attaches volumes to VMs as virtio-blk devices +3. VM Management: Stops/starts VMs as needed during the process + +This script is designed to work with Security Onion's virtualization infrastructure and is typically +used during VM provisioning to add dedicated NSM storage volumes. + +**Usage:** + so-kvm-create-volume -v -s [-S] + +**Options:** + -v, --vm Name of the virtual machine to attach the volume to (required). + -s, --size Size of the volume in GB (required, must be a positive integer). + -S, --start Start the VM after volume creation and attachment (optional). + +**Examples:** + +1. **Create and Attach 500GB Volume:** + + ```bash + so-kvm-create-volume -v vm1_sensor -s 500 + ``` + + This command creates and attaches a volume with the following settings: + - VM Name: `vm1_sensor` + - Volume Size: `500` GB + - Volume Path: `/nsm/libvirt/volumes/vm1_sensor-nsm.img` + - Device: `/dev/vdb` (virtio-blk) + - VM remains stopped after attachment + +2. **Create Volume and Start VM:** + + ```bash + so-kvm-create-volume -v vm2_sensor -s 1000 -S + ``` + + This command creates a volume and starts the VM: + - VM Name: `vm2_sensor` + - Volume Size: `1000` GB (1 TB) + - VM is started after volume attachment due to the `-S` flag + +3. **Create Large Volume for Heavy Traffic:** + + ```bash + so-kvm-create-volume -v vm3_sensor -s 2000 -S + ``` + + This command creates a large volume for high-traffic environments: + - VM Name: `vm3_sensor` + - Volume Size: `2000` GB (2 TB) + - VM is started after attachment + +**Notes:** + +- The script automatically stops the VM if it's running before creating and attaching the volume. +- Volumes are created with full pre-allocation for optimal performance. +- Volume files are stored in `/nsm/libvirt/volumes/` with naming pattern `-nsm.img`. +- Volumes are attached as `/dev/vdb` using virtio-blk for high performance. +- The script checks available disk space before creating the volume. +- Ownership is set to `socore:socore` with permissions `644`. +- Without the `-S` flag, the VM remains stopped after volume attachment. + +**Description:** + +The `so-kvm-create-volume` script creates and attaches NSM storage volumes using the following process: + +1. **Pre-flight Checks:** + - Validates input parameters (VM name, size) + - Checks available disk space in `/nsm/libvirt/volumes/` + - Ensures sufficient space for the requested volume size + +2. **VM State Management:** + - Connects to the local libvirt daemon + - Stops the VM if it's currently running + - Retrieves current VM configuration + +3. **Volume Creation:** + - Creates volume directory if it doesn't exist + - Uses `qemu-img create` with full pre-allocation + - Sets proper ownership (socore:socore) and permissions (644) + - Validates volume creation success + +4. **Volume Attachment:** + - Modifies VM's libvirt XML configuration + - Adds disk element with virtio-blk driver + - Configures cache='none' and io='native' for performance + - Attaches volume as `/dev/vdb` + +5. **VM Redefinition:** + - Applies the new configuration by redefining the VM + - Optionally starts the VM if requested + - Emits deployment status events for monitoring + +6. **Error Handling:** + - Validates all input parameters + - Checks disk space before creation + - Handles volume creation failures + - Handles volume attachment failures + - Provides detailed error messages for troubleshooting + +**Exit Codes:** + +- `0`: Success +- `1`: An error occurred during execution + +**Logging:** + +- Logs are written to `/opt/so/log/hypervisor/so-kvm-create-volume.log` +- Both file and console logging are enabled for real-time monitoring +- Log entries include timestamps and severity levels +- Log prefixes: VOLUME:, VM:, HARDWARE:, SPACE: +- Detailed error messages are logged for troubleshooting +""" + +import argparse +import sys +import os +import libvirt +import logging +import socket +import subprocess +import pwd +import grp +import xml.etree.ElementTree as ET +from io import StringIO +from so_vm_utils import start_vm, stop_vm +from so_logging_utils import setup_logging + +# Get hypervisor name from local hostname +HYPERVISOR = socket.gethostname() + +# Volume storage directory +VOLUME_DIR = '/nsm/libvirt/volumes' + +# Custom exception classes +class InsufficientSpaceError(Exception): + """Raised when there is insufficient disk space for volume creation.""" + pass + +class VolumeCreationError(Exception): + """Raised when volume creation fails.""" + pass + +class VolumeAttachmentError(Exception): + """Raised when volume attachment fails.""" + pass + +# Custom log handler to capture output +class StringIOHandler(logging.Handler): + def __init__(self): + super().__init__() + self.strio = StringIO() + + def emit(self, record): + msg = self.format(record) + self.strio.write(msg + '\n') + + def get_value(self): + return self.strio.getvalue() + +def parse_arguments(): + """Parse command-line arguments.""" + parser = argparse.ArgumentParser(description='Create and attach a virtual volume to a KVM virtual machine for NSM storage.') + parser.add_argument('-v', '--vm', required=True, help='Name of the virtual machine to attach the volume to.') + parser.add_argument('-s', '--size', type=int, required=True, help='Size of the volume in GB (must be a positive integer).') + parser.add_argument('-S', '--start', action='store_true', help='Start the VM after volume creation and attachment.') + args = parser.parse_args() + + # Validate size is positive + if args.size <= 0: + parser.error("Volume size must be a positive integer.") + + return args + +def check_disk_space(size_gb, logger): + """ + Check if there is sufficient disk space available for volume creation. + + Args: + size_gb: Size of the volume in GB + logger: Logger instance + + Raises: + InsufficientSpaceError: If there is not enough disk space + """ + try: + stat = os.statvfs(VOLUME_DIR) + # Available space in bytes + available_bytes = stat.f_bavail * stat.f_frsize + # Required space in bytes (add 10% buffer) + required_bytes = size_gb * 1024 * 1024 * 1024 * 1.1 + + available_gb = available_bytes / (1024 * 1024 * 1024) + required_gb = required_bytes / (1024 * 1024 * 1024) + + logger.info(f"SPACE: Available: {available_gb:.2f} GB, Required: {required_gb:.2f} GB") + + if available_bytes < required_bytes: + raise InsufficientSpaceError( + f"Insufficient disk space. Available: {available_gb:.2f} GB, Required: {required_gb:.2f} GB" + ) + + logger.info(f"SPACE: Sufficient disk space available for {size_gb} GB volume") + + except OSError as e: + logger.error(f"SPACE: Failed to check disk space: {e}") + raise + +def create_volume_file(vm_name, size_gb, logger): + """ + Create a pre-allocated raw disk image for the VM. + + Args: + vm_name: Name of the VM + size_gb: Size of the volume in GB + logger: Logger instance + + Returns: + Path to the created volume file + + Raises: + VolumeCreationError: If volume creation fails + """ + # Create volume directory if it doesn't exist + try: + if not os.path.exists(VOLUME_DIR): + os.makedirs(VOLUME_DIR, mode=0o755) + logger.info(f"VOLUME: Created volume directory: {VOLUME_DIR}") + except OSError as e: + logger.error(f"VOLUME: Failed to create volume directory: {e}") + raise VolumeCreationError(f"Failed to create volume directory: {e}") + + # Define volume path + volume_path = os.path.join(VOLUME_DIR, f"{vm_name}-nsm.img") + + # Check if volume already exists + if os.path.exists(volume_path): + logger.error(f"VOLUME: Volume already exists: {volume_path}") + raise VolumeCreationError(f"Volume already exists: {volume_path}") + + logger.info(f"VOLUME: Creating {size_gb} GB volume at {volume_path}") + + # Create volume using qemu-img with full pre-allocation + try: + cmd = [ + 'qemu-img', 'create', + '-f', 'raw', + '-o', 'preallocation=full', + volume_path, + f"{size_gb}G" + ] + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=True + ) + + logger.info(f"VOLUME: Volume created successfully") + if result.stdout: + logger.debug(f"VOLUME: qemu-img output: {result.stdout.strip()}") + + except subprocess.CalledProcessError as e: + logger.error(f"VOLUME: Failed to create volume: {e}") + if e.stderr: + logger.error(f"VOLUME: qemu-img error: {e.stderr.strip()}") + raise VolumeCreationError(f"Failed to create volume: {e}") + + # Set ownership to socore:socore + try: + socore_uid = pwd.getpwnam('socore').pw_uid + socore_gid = grp.getgrnam('socore').gr_gid + os.chown(volume_path, socore_uid, socore_gid) + logger.info(f"VOLUME: Set ownership to socore:socore") + except (KeyError, OSError) as e: + logger.error(f"VOLUME: Failed to set ownership: {e}") + raise VolumeCreationError(f"Failed to set ownership: {e}") + + # Set permissions to 644 + try: + os.chmod(volume_path, 0o644) + logger.info(f"VOLUME: Set permissions to 644") + except OSError as e: + logger.error(f"VOLUME: Failed to set permissions: {e}") + raise VolumeCreationError(f"Failed to set permissions: {e}") + + # Verify volume was created + if not os.path.exists(volume_path): + logger.error(f"VOLUME: Volume file not found after creation: {volume_path}") + raise VolumeCreationError(f"Volume file not found after creation: {volume_path}") + + volume_size = os.path.getsize(volume_path) + logger.info(f"VOLUME: Volume created: {volume_path} ({volume_size} bytes)") + + return volume_path + +def attach_volume_to_vm(conn, vm_name, volume_path, logger): + """ + Attach the volume to the VM's libvirt XML configuration. + + Args: + conn: Libvirt connection + vm_name: Name of the VM + volume_path: Path to the volume file + logger: Logger instance + + Raises: + VolumeAttachmentError: If volume attachment fails + """ + try: + # Get the VM domain + dom = conn.lookupByName(vm_name) + + # Get the XML description of the VM + xml_desc = dom.XMLDesc() + root = ET.fromstring(xml_desc) + + # Find the devices element + devices_elem = root.find('./devices') + if devices_elem is None: + logger.error("VM: Could not find element in XML") + raise VolumeAttachmentError("Could not find element in VM XML") + + # Check if vdb already exists + for disk in devices_elem.findall('./disk'): + target = disk.find('./target') + if target is not None and target.get('dev') == 'vdb': + logger.error("VM: Device vdb already exists in VM configuration") + raise VolumeAttachmentError("Device vdb already exists in VM configuration") + + logger.info(f"VM: Attaching volume to {vm_name} as /dev/vdb") + + # Create disk element + disk_elem = ET.SubElement(devices_elem, 'disk', attrib={ + 'type': 'file', + 'device': 'disk' + }) + + # Add driver element + ET.SubElement(disk_elem, 'driver', attrib={ + 'name': 'qemu', + 'type': 'raw', + 'cache': 'none', + 'io': 'native' + }) + + # Add source element + ET.SubElement(disk_elem, 'source', attrib={ + 'file': volume_path + }) + + # Add target element + ET.SubElement(disk_elem, 'target', attrib={ + 'dev': 'vdb', + 'bus': 'virtio' + }) + + # Add address element + ET.SubElement(disk_elem, 'address', attrib={ + 'type': 'pci', + 'domain': '0x0000', + 'bus': '0x00', + 'slot': '0x07', + 'function': '0x0' + }) + + logger.info(f"HARDWARE: Added disk configuration for vdb") + + # Convert XML back to string + new_xml_desc = ET.tostring(root, encoding='unicode') + + # Redefine the VM with the new XML + conn.defineXML(new_xml_desc) + logger.info(f"VM: VM redefined with volume attached") + + except libvirt.libvirtError as e: + logger.error(f"VM: Failed to attach volume: {e}") + raise VolumeAttachmentError(f"Failed to attach volume: {e}") + except Exception as e: + logger.error(f"VM: Failed to attach volume: {e}") + raise VolumeAttachmentError(f"Failed to attach volume: {e}") + +def emit_status_event(vm_name, status): + """ + Emit a deployment status event. + + Args: + vm_name: Name of the VM + status: Status message + """ + try: + subprocess.run([ + 'so-salt-emit-vm-deployment-status-event', + '-v', vm_name, + '-H', HYPERVISOR, + '-s', status + ], check=True) + except subprocess.CalledProcessError as e: + # Don't fail the entire operation if status event fails + pass + +def main(): + """Main function to orchestrate volume creation and attachment.""" + # Set up logging using the so_logging_utils library + string_handler = StringIOHandler() + string_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) + logger = setup_logging( + logger_name='so-kvm-create-volume', + log_file_path='/opt/so/log/hypervisor/so-kvm-create-volume.log', + log_level=logging.INFO, + format_str='%(asctime)s - %(levelname)s - %(message)s' + ) + logger.addHandler(string_handler) + + vm_name = None + + try: + # Parse arguments + args = parse_arguments() + + vm_name = args.vm + size_gb = args.size + start_vm_flag = args.start + + logger.info(f"VOLUME: Starting volume creation for VM '{vm_name}' with size {size_gb} GB") + + # Emit start status event + emit_status_event(vm_name, 'Volume Creation') + + # Check disk space + check_disk_space(size_gb, logger) + + # Connect to libvirt + try: + conn = libvirt.open(None) + logger.info("VM: Connected to libvirt") + except libvirt.libvirtError as e: + logger.error(f"VM: Failed to open connection to libvirt: {e}") + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + # Stop VM if running + dom = stop_vm(conn, vm_name, logger) + + # Create volume file + volume_path = create_volume_file(vm_name, size_gb, logger) + + # Attach volume to VM + attach_volume_to_vm(conn, vm_name, volume_path, logger) + + # Start VM if -S or --start argument is provided + if start_vm_flag: + dom = conn.lookupByName(vm_name) + start_vm(dom, logger) + logger.info(f"VM: VM '{vm_name}' started successfully") + else: + logger.info("VM: Start flag not provided; VM will remain stopped") + + # Close connection + conn.close() + + # Emit success status event + emit_status_event(vm_name, 'Volume Configuration') + + logger.info(f"VOLUME: Volume creation and attachment completed successfully for VM '{vm_name}'") + + except KeyboardInterrupt: + error_msg = "Operation cancelled by user" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + except InsufficientSpaceError as e: + error_msg = f"SPACE: {str(e)}" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + except VolumeCreationError as e: + error_msg = f"VOLUME: {str(e)}" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + except VolumeAttachmentError as e: + error_msg = f"VM: {str(e)}" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + except Exception as e: + error_msg = f"An error occurred: {str(e)}" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + +if __name__ == '__main__': + main() + +{%- else -%} + +echo "Hypervisor nodes are a feature supported only for customers with a valid license. \ + Contact Security Onion Solutions, LLC via our website at https://securityonionsolutions.com \ + for more information about purchasing a license to enable this feature." + +{% endif -%} diff --git a/salt/manager/tools/sbin_jinja/so-salt-cloud b/salt/manager/tools/sbin_jinja/so-salt-cloud index daa92fa67..e8674cc2c 100644 --- a/salt/manager/tools/sbin_jinja/so-salt-cloud +++ b/salt/manager/tools/sbin_jinja/so-salt-cloud @@ -533,6 +533,64 @@ def run_qcow2_modify_hardware_config(profile, vm_name, cpu=None, memory=None, pc except Exception as e: logger.error(f"An error occurred while running qcow2.modify_hardware_config: {e}") +def run_qcow2_create_volume_config(profile, vm_name, size_gb, cpu=None, memory=None, start=False): + """Create a volume for the VM and optionally configure CPU/memory. + + Args: + profile (str): The cloud profile name + vm_name (str): The name of the VM + size_gb (int): Size of the volume in GB + cpu (int, optional): Number of CPUs to assign + memory (int, optional): Amount of memory in MiB + start (bool): Whether to start the VM after configuration + """ + hv_name = profile.split('-')[1] + target = hv_name + "_*" + + try: + # Step 1: Create the volume + logger.info(f"Creating {size_gb}GB volume for VM {vm_name}") + volume_result = local.cmd( + target, + 'qcow2.create_volume_config', + kwarg={ + 'vm_name': vm_name, + 'size_gb': size_gb, + 'start': False # Don't start yet if we need to configure CPU/memory + } + ) + format_qcow2_output('Volume creation', volume_result) + + # Step 2: Configure CPU and memory if specified + if cpu or memory: + logger.info(f"Configuring hardware for VM {vm_name}: CPU={cpu}, Memory={memory}MiB") + hw_result = local.cmd( + target, + 'qcow2.modify_hardware_config', + kwarg={ + 'vm_name': vm_name, + 'cpu': cpu, + 'memory': memory, + 'start': start + } + ) + format_qcow2_output('Hardware configuration', hw_result) + elif start: + # If no CPU/memory config needed but we need to start the VM + logger.info(f"Starting VM {vm_name}") + start_result = local.cmd( + target, + 'qcow2.modify_hardware_config', + kwarg={ + 'vm_name': vm_name, + 'start': True + } + ) + format_qcow2_output('VM startup', start_result) + + except Exception as e: + logger.error(f"An error occurred while creating volume and configuring hardware: {e}") + def run_qcow2_modify_network_config(profile, vm_name, mode, ip=None, gateway=None, dns=None, search_domain=None): hv_name = profile.split('-')[1] target = hv_name + "_*" @@ -586,6 +644,7 @@ def parse_arguments(): network_group.add_argument('-c', '--cpu', type=int, help='Number of virtual CPUs to assign.') network_group.add_argument('-m', '--memory', type=int, help='Amount of memory to assign in MiB.') network_group.add_argument('-P', '--pci', action='append', help='PCI hardware ID(s) to passthrough to the VM (e.g., 0000:c7:00.0). Can be specified multiple times.') + network_group.add_argument('--nsm-size', type=int, help='Size in GB for NSM volume creation. If both --pci and --nsm-size are specified, --pci takes precedence.') args = parser.parse_args() @@ -621,6 +680,8 @@ def main(): hw_config.append(f"{args.memory}MB RAM") if args.pci: hw_config.append(f"PCI devices: {', '.join(args.pci)}") + if args.nsm_size: + hw_config.append(f"NSM volume: {args.nsm_size}GB") hw_string = f" and hardware config: {', '.join(hw_config)}" if hw_config else "" logger.info(f"Received request to create VM '{args.vm_name}' using profile '{args.profile}' {network_config}{hw_string}") @@ -643,8 +704,39 @@ def main(): # Step 2: Provision the VM (without starting it) call_salt_cloud(args.profile, args.vm_name) - # Step 3: Modify hardware configuration - run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=args.cpu, memory=args.memory, pci_list=args.pci, start=True) + # Step 3: Determine storage configuration approach + # Priority: disk passthrough (--pci) > volume creation (--nsm-size) + use_disk_passthrough = False + use_volume_creation = False + + if args.pci: + use_disk_passthrough = True + logger.info("Using disk passthrough (--pci parameter specified)") + if args.nsm_size: + logger.warning(f"Both --pci and --nsm-size specified. Using --pci (disk passthrough) and ignoring --nsm-size={args.nsm_size}GB") + elif args.nsm_size: + use_volume_creation = True + # Validate nsm_size + if args.nsm_size <= 0: + logger.error(f"Invalid nsm_size value: {args.nsm_size}. Must be a positive integer.") + sys.exit(1) + logger.info(f"Using volume creation with size {args.nsm_size}GB (--nsm-size parameter specified)") + + # Step 4: Configure hardware based on storage approach + if use_disk_passthrough: + # Use existing disk passthrough logic via modify_hardware_config + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=args.cpu, memory=args.memory, pci_list=args.pci, start=True) + elif use_volume_creation: + # Use new volume creation logic + run_qcow2_create_volume_config(args.profile, args.vm_name, size_gb=args.nsm_size, cpu=args.cpu, memory=args.memory, start=True) + else: + # No storage configuration, just configure CPU/memory if specified + if args.cpu or args.memory: + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=args.cpu, memory=args.memory, pci_list=None, start=True) + else: + # No hardware configuration needed, just start the VM + logger.info(f"No hardware configuration specified, starting VM {args.vm_name}") + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=None, memory=None, pci_list=None, start=True) except KeyboardInterrupt: logger.error("so-salt-cloud: Operation cancelled by user.") diff --git a/salt/salt/engines/master/virtual_node_manager.py b/salt/salt/engines/master/virtual_node_manager.py index bc098d075..1e13c6022 100644 --- a/salt/salt/engines/master/virtual_node_manager.py +++ b/salt/salt/engines/master/virtual_node_manager.py @@ -633,6 +633,35 @@ def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: except subprocess.CalledProcessError as e: logger.error(f"Failed to emit success status event: {e}") + # Validate nsm_size if present + if 'nsm_size' in vm_config: + try: + size = int(vm_config['nsm_size']) + if size <= 0: + log.error("VM: %s - nsm_size must be a positive integer, got: %d", vm_name, size) + mark_invalid_hardware(hypervisor_path, vm_name, vm_config, + {'nsm_size': 'Invalid nsm_size: must be positive integer'}) + return + if size > 10000: # 10TB reasonable maximum + log.error("VM: %s - nsm_size %dGB exceeds reasonable maximum (10000GB)", vm_name, size) + mark_invalid_hardware(hypervisor_path, vm_name, vm_config, + {'nsm_size': f'Invalid nsm_size: {size}GB exceeds maximum (10000GB)'}) + return + log.debug("VM: %s - nsm_size validated: %dGB", vm_name, size) + except (ValueError, TypeError) as e: + log.error("VM: %s - nsm_size must be a valid integer, got: %s", vm_name, vm_config.get('nsm_size')) + mark_invalid_hardware(hypervisor_path, vm_name, vm_config, + {'nsm_size': 'Invalid nsm_size: must be valid integer'}) + return + + # Check for conflicting storage configurations + has_disk = 'disk' in vm_config and vm_config['disk'] + has_nsm_size = 'nsm_size' in vm_config and vm_config['nsm_size'] + + if has_disk and has_nsm_size: + log.warning("VM: %s - Both disk and nsm_size specified. disk takes precedence, nsm_size will be ignored.", + vm_name) + # Initial hardware validation against model is_valid, errors = validate_hardware_request(model_config, vm_config) if not is_valid: @@ -668,6 +697,11 @@ def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: if 'memory' in vm_config: memory_mib = int(vm_config['memory']) * 1024 cmd.extend(['-m', str(memory_mib)]) + + # Add nsm_size if specified and disk is not specified + if 'nsm_size' in vm_config and vm_config['nsm_size'] and not ('disk' in vm_config and vm_config['disk']): + cmd.extend(['--nsm-size', str(vm_config['nsm_size'])]) + log.debug("VM: %s - Adding nsm_size parameter: %s", vm_name, vm_config['nsm_size']) # Add PCI devices for hw_type in ['disk', 'copper', 'sfp']: diff --git a/salt/soc/dyanno/hypervisor/hypervisor.yaml b/salt/soc/dyanno/hypervisor/hypervisor.yaml index d13c928ec..7ae0631cb 100644 --- a/salt/soc/dyanno/hypervisor/hypervisor.yaml +++ b/salt/soc/dyanno/hypervisor/hypervisor.yaml @@ -63,8 +63,12 @@ hypervisor: required: true readonly: true forcedType: int + - field: nsm_size + label: "Size of /nsm, in GB. Only used if there is not a passthrough disk." + forcedType: int + readonly: true - field: disk - label: "Disk(s) for passthrough. Free: FREE | Total: TOTAL" + label: "Disk(s) for passthrough to /nsm. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' From a1a8f75409f62157184f888a92ad556fd298a1c5 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 7 Oct 2025 16:36:23 -0400 Subject: [PATCH 06/22] create and mount volume. being mounted as vda --- .../tools/sbin_jinja/so-kvm-create-volume | 23 +++++++++++-------- salt/manager/tools/sbin_jinja/so-salt-cloud | 2 +- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume index 8b7cd8a23..dc9b28c21 100644 --- a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume +++ b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume @@ -242,16 +242,7 @@ def create_volume_file(vm_name, size_gb, logger): Raises: VolumeCreationError: If volume creation fails """ - # Create volume directory if it doesn't exist - try: - if not os.path.exists(VOLUME_DIR): - os.makedirs(VOLUME_DIR, mode=0o755) - logger.info(f"VOLUME: Created volume directory: {VOLUME_DIR}") - except OSError as e: - logger.error(f"VOLUME: Failed to create volume directory: {e}") - raise VolumeCreationError(f"Failed to create volume directory: {e}") - - # Define volume path + # Define volume path (directory already created in main()) volume_path = os.path.join(VOLUME_DIR, f"{vm_name}-nsm.img") # Check if volume already exists @@ -449,6 +440,18 @@ def main(): # Emit start status event emit_status_event(vm_name, 'Volume Creation') + # Ensure volume directory exists before checking disk space + try: + os.makedirs(VOLUME_DIR, mode=0o755, exist_ok=True) + socore_uid = pwd.getpwnam('socore').pw_uid + socore_gid = grp.getgrnam('socore').gr_gid + os.chown(VOLUME_DIR, socore_uid, socore_gid) + logger.debug(f"VOLUME: Ensured volume directory exists: {VOLUME_DIR}") + except Exception as e: + logger.error(f"VOLUME: Failed to create volume directory: {e}") + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + # Check disk space check_disk_space(size_gb, logger) diff --git a/salt/manager/tools/sbin_jinja/so-salt-cloud b/salt/manager/tools/sbin_jinja/so-salt-cloud index b5e2d0ab1..2f99f769b 100644 --- a/salt/manager/tools/sbin_jinja/so-salt-cloud +++ b/salt/manager/tools/sbin_jinja/so-salt-cloud @@ -544,7 +544,7 @@ def run_qcow2_create_volume_config(profile, vm_name, size_gb, cpu=None, memory=N memory (int, optional): Amount of memory in MiB start (bool): Whether to start the VM after configuration """ - hv_name = profile.split('-')[1] + hv_name = profile.split('_')[1] target = hv_name + "_*" try: From c7edaac42a5ef21eb6011eba1e45f79e829fe179 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 7 Oct 2025 17:20:11 -0400 Subject: [PATCH 07/22] nsm volume as vdb, os vda by ordering pci slots --- .../tools/sbin_jinja/so-kvm-create-volume | 54 ++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume index dc9b28c21..57309ec8e 100644 --- a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume +++ b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume @@ -334,6 +334,34 @@ def attach_volume_to_vm(conn, vm_name, volume_path, logger): logger.error("VM: Could not find element in XML") raise VolumeAttachmentError("Could not find element in VM XML") + # Log ALL devices with PCI addresses to find conflicts + logger.info("DISK_DEBUG: Examining ALL devices with PCI addresses") + for device in devices_elem: + address = device.find('./address') + if address is not None and address.get('type') == 'pci': + bus = address.get('bus', 'unknown') + slot = address.get('slot', 'unknown') + function = address.get('function', 'unknown') + logger.info(f"DISK_DEBUG: Device {device.tag}: bus={bus}, slot={slot}, function={function}") + + # Log existing disk configuration for debugging + logger.info("DISK_DEBUG: Examining existing disk configuration") + existing_disks = devices_elem.findall('./disk') + for idx, disk in enumerate(existing_disks): + target = disk.find('./target') + source = disk.find('./source') + address = disk.find('./address') + + dev_name = target.get('dev') if target is not None else 'unknown' + source_file = source.get('file') if source is not None else 'unknown' + + if address is not None: + slot = address.get('slot', 'unknown') + bus = address.get('bus', 'unknown') + logger.info(f"DISK_DEBUG: Disk {idx}: dev={dev_name}, source={source_file}, slot={slot}, bus={bus}") + else: + logger.info(f"DISK_DEBUG: Disk {idx}: dev={dev_name}, source={source_file}, no address element") + # Check if vdb already exists for disk in devices_elem.findall('./disk'): target = disk.find('./target') @@ -369,16 +397,38 @@ def attach_volume_to_vm(conn, vm_name, volume_path, logger): }) # Add address element + # Use bus 0x07 with slot 0x00 to ensure NSM volume appears after OS disk (which is on bus 0x04) + # Bus 0x05 is used by memballoon, bus 0x06 is used by rng device + # Libvirt requires slot <= 0 for non-zero buses + # This ensures vda = OS disk, vdb = NSM volume ET.SubElement(disk_elem, 'address', attrib={ 'type': 'pci', 'domain': '0x0000', - 'bus': '0x00', - 'slot': '0x07', + 'bus': '0x07', + 'slot': '0x00', 'function': '0x0' }) logger.info(f"HARDWARE: Added disk configuration for vdb") + # Log disk ordering after adding new disk + logger.info("DISK_DEBUG: Disk configuration after adding NSM volume") + all_disks = devices_elem.findall('./disk') + for idx, disk in enumerate(all_disks): + target = disk.find('./target') + source = disk.find('./source') + address = disk.find('./address') + + dev_name = target.get('dev') if target is not None else 'unknown' + source_file = source.get('file') if source is not None else 'unknown' + + if address is not None: + slot = address.get('slot', 'unknown') + bus = address.get('bus', 'unknown') + logger.info(f"DISK_DEBUG: Disk {idx}: dev={dev_name}, source={source_file}, slot={slot}, bus={bus}") + else: + logger.info(f"DISK_DEBUG: Disk {idx}: dev={dev_name}, source={source_file}, no address element") + # Convert XML back to string new_xml_desc = ET.tostring(root, encoding='unicode') From 659c039ba8035bbbdebc2ed8f9fe8c1256bd2fe7 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 8 Oct 2025 10:51:04 -0400 Subject: [PATCH 08/22] handle nsm volume size and non disk passthrough --- salt/manager/tools/sbin_jinja/so-salt-cloud | 61 ++++++++++++++------- 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/salt/manager/tools/sbin_jinja/so-salt-cloud b/salt/manager/tools/sbin_jinja/so-salt-cloud index 2f99f769b..6d1d2e529 100644 --- a/salt/manager/tools/sbin_jinja/so-salt-cloud +++ b/salt/manager/tools/sbin_jinja/so-salt-cloud @@ -516,17 +516,21 @@ def run_qcow2_modify_hardware_config(profile, vm_name, cpu=None, memory=None, pc target = hv_name + "_*" try: - args_list = [ - 'vm_name=' + vm_name, - 'cpu=' + str(cpu) if cpu else '', - 'memory=' + str(memory) if memory else '', - 'start=' + str(start) - ] - + args_list = ['vm_name=' + vm_name] + + # Only add parameters that are actually specified + if cpu is not None: + args_list.append('cpu=' + str(cpu)) + if memory is not None: + args_list.append('memory=' + str(memory)) + # Add PCI devices if provided if pci_list: # Pass all PCI devices as a comma-separated list args_list.append('pci=' + ','.join(pci_list)) + + # Always add start parameter + args_list.append('start=' + str(start)) result = local.cmd(target, 'qcow2.modify_hardware_config', args_list) format_qcow2_output('Hardware configuration', result) @@ -644,7 +648,7 @@ def parse_arguments(): network_group.add_argument('-c', '--cpu', type=int, help='Number of virtual CPUs to assign.') network_group.add_argument('-m', '--memory', type=int, help='Amount of memory to assign in MiB.') network_group.add_argument('-P', '--pci', action='append', help='PCI hardware ID(s) to passthrough to the VM (e.g., 0000:c7:00.0). Can be specified multiple times.') - network_group.add_argument('--nsm-size', type=int, help='Size in GB for NSM volume creation. If both --pci and --nsm-size are specified, --pci takes precedence.') + network_group.add_argument('--nsm-size', type=int, help='Size in GB for NSM volume creation. Can be used with copper/sfp NICs (--pci). Only disk passthrough (without --nsm-size) prevents volume creation.') args = parser.parse_args() @@ -705,30 +709,47 @@ def main(): call_salt_cloud(args.profile, args.vm_name) # Step 3: Determine storage configuration approach - # Priority: disk passthrough (--pci) > volume creation (--nsm-size) + # Priority: disk passthrough > volume creation (but volume can coexist with copper/sfp NICs) + # Note: virtual_node_manager.py already filters out --nsm-size when disk is present, + # so if both --pci and --nsm-size are present here, the PCI devices are copper/sfp NICs use_disk_passthrough = False use_volume_creation = False + has_nic_passthrough = False - if args.pci: - use_disk_passthrough = True - logger.info("Using disk passthrough (--pci parameter specified)") - if args.nsm_size: - logger.warning(f"Both --pci and --nsm-size specified. Using --pci (disk passthrough) and ignoring --nsm-size={args.nsm_size}GB") - elif args.nsm_size: - use_volume_creation = True + if args.nsm_size: # Validate nsm_size if args.nsm_size <= 0: logger.error(f"Invalid nsm_size value: {args.nsm_size}. Must be a positive integer.") sys.exit(1) + use_volume_creation = True logger.info(f"Using volume creation with size {args.nsm_size}GB (--nsm-size parameter specified)") + + if args.pci: + # If both nsm_size and PCI are present, PCI devices are copper/sfp NICs + # (virtual_node_manager.py filters out nsm_size when disk is present) + has_nic_passthrough = True + logger.info(f"PCI devices (copper/sfp NICs) will be passed through along with volume: {', '.join(args.pci)}") + elif args.pci: + # Only PCI devices, no nsm_size - this is disk passthrough + use_disk_passthrough = True + logger.info(f"Using disk passthrough (--pci parameter specified without --nsm-size)") # Step 4: Configure hardware based on storage approach - if use_disk_passthrough: + if use_volume_creation: + # Create volume first + run_qcow2_create_volume_config(args.profile, args.vm_name, size_gb=args.nsm_size, cpu=args.cpu, memory=args.memory, start=False) + + # Then configure NICs if present + if has_nic_passthrough: + logger.info(f"Configuring NIC passthrough for VM {args.vm_name}") + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=None, memory=None, pci_list=args.pci, start=True) + else: + # No NICs, just start the VM + logger.info(f"Starting VM {args.vm_name}") + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=None, memory=None, pci_list=None, start=True) + elif use_disk_passthrough: # Use existing disk passthrough logic via modify_hardware_config run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=args.cpu, memory=args.memory, pci_list=args.pci, start=True) - elif use_volume_creation: - # Use new volume creation logic - run_qcow2_create_volume_config(args.profile, args.vm_name, size_gb=args.nsm_size, cpu=args.cpu, memory=args.memory, start=True) else: # No storage configuration, just configure CPU/memory if specified if args.cpu or args.memory: From e45b0bf871278c33ecff4f0fb4f777c64c2df08e Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 8 Oct 2025 11:51:35 -0400 Subject: [PATCH 09/22] var and comment update --- salt/manager/tools/sbin_jinja/so-salt-cloud | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/salt/manager/tools/sbin_jinja/so-salt-cloud b/salt/manager/tools/sbin_jinja/so-salt-cloud index 6d1d2e529..a1f99712a 100644 --- a/salt/manager/tools/sbin_jinja/so-salt-cloud +++ b/salt/manager/tools/sbin_jinja/so-salt-cloud @@ -712,7 +712,7 @@ def main(): # Priority: disk passthrough > volume creation (but volume can coexist with copper/sfp NICs) # Note: virtual_node_manager.py already filters out --nsm-size when disk is present, # so if both --pci and --nsm-size are present here, the PCI devices are copper/sfp NICs - use_disk_passthrough = False + use_passthrough = False use_volume_creation = False has_nic_passthrough = False @@ -730,9 +730,11 @@ def main(): has_nic_passthrough = True logger.info(f"PCI devices (copper/sfp NICs) will be passed through along with volume: {', '.join(args.pci)}") elif args.pci: - # Only PCI devices, no nsm_size - this is disk passthrough - use_disk_passthrough = True - logger.info(f"Using disk passthrough (--pci parameter specified without --nsm-size)") + # Only PCI devices, no nsm_size - could be disk or NICs + # this script is called by virtual_node_manager and that strips any possibility that nsm_size and the disk pci slot is sent to this script + # we might have not specified a disk passthrough or nsm_size, but pass another pci slot and we end up here + use_passthrough = True + logger.info(f"Configuring PCI device passthrough.(--pci parameter specified without --nsm-size)") # Step 4: Configure hardware based on storage approach if use_volume_creation: @@ -747,8 +749,8 @@ def main(): # No NICs, just start the VM logger.info(f"Starting VM {args.vm_name}") run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=None, memory=None, pci_list=None, start=True) - elif use_disk_passthrough: - # Use existing disk passthrough logic via modify_hardware_config + elif use_passthrough: + # Use existing passthrough logic via modify_hardware_config run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=args.cpu, memory=args.memory, pci_list=args.pci, start=True) else: # No storage configuration, just configure CPU/memory if specified From 7827e05c2458cbf51f5e77a7c31a24140674c0a8 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 8 Oct 2025 12:18:34 -0400 Subject: [PATCH 10/22] handle mounting vdb as nsm when nsm set in soc grid config --- salt/storage/init.sls | 11 +- .../{nsm_mount.sls => nsm_mount_nvme.sls} | 6 +- salt/storage/nsm_mount_virtio.sls | 39 ++++ .../sbin/{so-nsm-mount => so-nsm-mount-nvme} | 2 +- salt/storage/tools/sbin/so-nsm-mount-virtio | 171 ++++++++++++++++++ 5 files changed, 223 insertions(+), 6 deletions(-) rename salt/storage/{nsm_mount.sls => nsm_mount_nvme.sls} (87%) create mode 100644 salt/storage/nsm_mount_virtio.sls rename salt/storage/tools/sbin/{so-nsm-mount => so-nsm-mount-nvme} (99%) create mode 100644 salt/storage/tools/sbin/so-nsm-mount-virtio diff --git a/salt/storage/init.sls b/salt/storage/init.sls index 533366fd0..ab5926bf5 100644 --- a/salt/storage/init.sls +++ b/salt/storage/init.sls @@ -4,10 +4,17 @@ # Elastic License 2.0. -{% set nvme_devices = salt['cmd.shell']("find /dev -name 'nvme*n1' 2>/dev/null") %} +{% set nvme_devices = salt['cmd.shell']("ls /dev/nvme*n1 2>/dev/null || echo ''") %} +{% set virtio_devices = salt['cmd.shell']("test -b /dev/vdb && echo '/dev/vdb' || echo ''") %} + {% if nvme_devices %} include: - - storage.nsm_mount + - storage.nsm_mount_nvme + +{% elif virtio_devices %} + +include: + - storage.nsm_mount_virtio {% endif %} diff --git a/salt/storage/nsm_mount.sls b/salt/storage/nsm_mount_nvme.sls similarity index 87% rename from salt/storage/nsm_mount.sls rename to salt/storage/nsm_mount_nvme.sls index ed9e97c33..a0d317014 100644 --- a/salt/storage/nsm_mount.sls +++ b/salt/storage/nsm_mount_nvme.sls @@ -22,8 +22,8 @@ storage_nsm_mount_logdir: # Install the NSM mount script storage_nsm_mount_script: file.managed: - - name: /usr/sbin/so-nsm-mount - - source: salt://storage/tools/sbin/so-nsm-mount + - name: /usr/sbin/so-nsm-mount-nvme + - source: salt://storage/tools/sbin/so-nsm-mount-nvme - mode: 755 - user: root - group: root @@ -34,7 +34,7 @@ storage_nsm_mount_script: # Execute the mount script if not already mounted storage_nsm_mount_execute: cmd.run: - - name: /usr/sbin/so-nsm-mount + - name: /usr/sbin/so-nsm-mount-nvme - unless: mountpoint -q /nsm - require: - file: storage_nsm_mount_script diff --git a/salt/storage/nsm_mount_virtio.sls b/salt/storage/nsm_mount_virtio.sls new file mode 100644 index 000000000..34ca8a883 --- /dev/null +++ b/salt/storage/nsm_mount_virtio.sls @@ -0,0 +1,39 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Install required packages +storage_nsm_mount_virtio_packages: + pkg.installed: + - pkgs: + - xfsprogs + +# Ensure log directory exists +storage_nsm_mount_virtio_logdir: + file.directory: + - name: /opt/so/log + - makedirs: True + - user: root + - group: root + - mode: 755 + +# Install the NSM mount script +storage_nsm_mount_virtio_script: + file.managed: + - name: /usr/sbin/so-nsm-mount-virtio + - source: salt://storage/tools/sbin/so-nsm-mount-virtio + - mode: 755 + - user: root + - group: root + - require: + - pkg: storage_nsm_mount_virtio_packages + - file: storage_nsm_mount_virtio_logdir + +# Execute the mount script if not already mounted +storage_nsm_mount_virtio_execute: + cmd.run: + - name: /usr/sbin/so-nsm-mount-virtio + - unless: mountpoint -q /nsm + - require: + - file: storage_nsm_mount_virtio_script diff --git a/salt/storage/tools/sbin/so-nsm-mount b/salt/storage/tools/sbin/so-nsm-mount-nvme similarity index 99% rename from salt/storage/tools/sbin/so-nsm-mount rename to salt/storage/tools/sbin/so-nsm-mount-nvme index 24125fc40..f612c9915 100644 --- a/salt/storage/tools/sbin/so-nsm-mount +++ b/salt/storage/tools/sbin/so-nsm-mount-nvme @@ -81,7 +81,7 @@ set -e -LOG_FILE="/opt/so/log/so-nsm-mount.log" +LOG_FILE="/opt/so/log/so-nsm-mount-nvme.log" VG_NAME="" LV_NAME="nsm" MOUNT_POINT="/nsm" diff --git a/salt/storage/tools/sbin/so-nsm-mount-virtio b/salt/storage/tools/sbin/so-nsm-mount-virtio new file mode 100644 index 000000000..8385d7c21 --- /dev/null +++ b/salt/storage/tools/sbin/so-nsm-mount-virtio @@ -0,0 +1,171 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Usage: +# so-nsm-mount-virtio +# +# Options: +# None - script automatically configures /dev/vdb +# +# Examples: +# 1. Configure and mount virtio-blk device: +# ```bash +# sudo so-nsm-mount-virtio +# ``` +# +# Notes: +# - Requires root privileges +# - Mounts /dev/vdb as /nsm +# - Creates XFS filesystem if needed +# - Configures persistent mount via /etc/fstab +# - Safe to run multiple times +# +# Description: +# This script automates the configuration and mounting of virtio-blk devices +# as /nsm in Security Onion virtual machines. It performs these steps: +# +# Dependencies: +# - xfsprogs: Required for XFS filesystem operations +# +# 1. Safety Checks: +# - Verifies root privileges +# - Checks if /nsm is already mounted +# - Verifies /dev/vdb exists +# +# 2. Filesystem Creation: +# - Creates XFS filesystem on /dev/vdb if not already formatted +# +# 3. Mount Configuration: +# - Creates /nsm directory if needed +# - Adds entry to /etc/fstab for persistence +# - Mounts the filesystem as /nsm +# +# Exit Codes: +# 0: Success conditions: +# - Device configured and mounted +# - Already properly mounted +# 1: Error conditions: +# - Must be run as root +# - Device /dev/vdb not found +# - Filesystem creation failed +# - Mount operation failed +# +# Logging: +# - All operations logged to /opt/so/log/so-nsm-mount-virtio.log + +set -e + +LOG_FILE="/opt/so/log/so-nsm-mount-virtio.log" +DEVICE="/dev/vdb" +MOUNT_POINT="/nsm" + +# Function to log messages +log() { + echo "$(date '+%Y-%m-%d %H:%M:%S') $1" | tee -a "$LOG_FILE" +} + +# Function to log errors +log_error() { + echo "$(date '+%Y-%m-%d %H:%M:%S') ERROR: $1" | tee -a "$LOG_FILE" >&2 +} + +# Function to check if running as root +check_root() { + if [ "$EUID" -ne 0 ]; then + log_error "Must be run as root" + exit 1 + fi +} + +# Main execution +main() { + log "==========================================" + log "Starting virtio-blk NSM mount process" + log "==========================================" + + # Check root privileges + check_root + + # Check if already mounted + if mountpoint -q "$MOUNT_POINT"; then + log "$MOUNT_POINT is already mounted" + log "==========================================" + exit 0 + fi + + # Check if device exists + if [ ! -b "$DEVICE" ]; then + log_error "Device $DEVICE not found" + log "==========================================" + exit 1 + fi + + log "Found device: $DEVICE" + + # Get device size + local size=$(lsblk -dbn -o SIZE "$DEVICE" 2>/dev/null | numfmt --to=iec) + log "Device size: $size" + + # Check if device has filesystem + if ! blkid "$DEVICE" | grep -q 'TYPE="xfs"'; then + log "Creating XFS filesystem on $DEVICE" + if ! mkfs.xfs -f "$DEVICE" 2>&1 | tee -a "$LOG_FILE"; then + log_error "Failed to create filesystem" + log "==========================================" + exit 1 + fi + log "Filesystem created successfully" + else + log "Device already has XFS filesystem" + fi + + # Create mount point + if [ ! -d "$MOUNT_POINT" ]; then + log "Creating mount point $MOUNT_POINT" + mkdir -p "$MOUNT_POINT" + fi + + # Add to fstab if not present + if ! grep -q "$DEVICE.*$MOUNT_POINT" /etc/fstab; then + log "Adding entry to /etc/fstab" + echo "$DEVICE $MOUNT_POINT xfs defaults 0 0" >> /etc/fstab + log "Entry added to /etc/fstab" + else + log "Entry already exists in /etc/fstab" + fi + + # Mount the filesystem + log "Mounting $DEVICE to $MOUNT_POINT" + if mount "$MOUNT_POINT" 2>&1 | tee -a "$LOG_FILE"; then + log "Successfully mounted $DEVICE to $MOUNT_POINT" + + # Verify mount + if mountpoint -q "$MOUNT_POINT"; then + log "Mount verified successfully" + + # Display mount information + log "Mount details:" + df -h "$MOUNT_POINT" | tail -n 1 | tee -a "$LOG_FILE" + else + log_error "Mount verification failed" + log "==========================================" + exit 1 + fi + else + log_error "Failed to mount $DEVICE" + log "==========================================" + exit 1 + fi + + log "==========================================" + log "Virtio-blk NSM mount process completed successfully" + log "==========================================" + exit 0 +} + +# Run main function +main From af42c31740441863edad62aa9d68a338ee91ec0e Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 8 Oct 2025 13:24:54 -0400 Subject: [PATCH 11/22] update yaml for annotation --- salt/soc/dyanno/hypervisor/hypervisor.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/salt/soc/dyanno/hypervisor/hypervisor.yaml b/salt/soc/dyanno/hypervisor/hypervisor.yaml index 7ae0631cb..b3b802d8e 100644 --- a/salt/soc/dyanno/hypervisor/hypervisor.yaml +++ b/salt/soc/dyanno/hypervisor/hypervisor.yaml @@ -64,21 +64,21 @@ hypervisor: readonly: true forcedType: int - field: nsm_size - label: "Size of /nsm, in GB. Only used if there is not a passthrough disk." + label: "Size of virtual disk to create and use for /nsm, in GB. Only applicable if no passthrough disk." forcedType: int readonly: true - field: disk - label: "Disk(s) for passthrough to /nsm. Free: FREE | Total: TOTAL" + label: "Disk(s) to passthrough for /nsm. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' - field: copper - label: "Copper port(s) for passthrough. Free: FREE | Total: TOTAL" + label: "Copper port(s) to passthrough. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' - field: sfp - label: "SFP port(s) for passthrough. Free: FREE | Total: TOTAL" + label: "SFP port(s) to passthrough. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' From a3e7649a3c0ca1ecb2263e4c4a51baddbc0af4e5 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 8 Oct 2025 13:52:34 -0400 Subject: [PATCH 12/22] minor hypervisor annotation --- salt/soc/dyanno/hypervisor/hypervisor.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/salt/soc/dyanno/hypervisor/hypervisor.yaml b/salt/soc/dyanno/hypervisor/hypervisor.yaml index b3b802d8e..143a2f5cb 100644 --- a/salt/soc/dyanno/hypervisor/hypervisor.yaml +++ b/salt/soc/dyanno/hypervisor/hypervisor.yaml @@ -64,21 +64,21 @@ hypervisor: readonly: true forcedType: int - field: nsm_size - label: "Size of virtual disk to create and use for /nsm, in GB. Only applicable if no passthrough disk." + label: "Size of virtual disk to create and use for /nsm, in GB. Only applicable if no pass-through disk." forcedType: int readonly: true - field: disk - label: "Disk(s) to passthrough for /nsm. Free: FREE | Total: TOTAL" + label: "Disk(s) to pass through for /nsm. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' - field: copper - label: "Copper port(s) to passthrough. Free: FREE | Total: TOTAL" + label: "Copper port(s) to pass through. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' - field: sfp - label: "SFP port(s) to passthrough. Free: FREE | Total: TOTAL" + label: "SFP port(s) to pass through. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' From e551c6e037579cc8c7cfa8f17874b612130a3a33 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Thu, 9 Oct 2025 10:19:25 -0400 Subject: [PATCH 13/22] owner and perms of volumes --- .../tools/sbin_jinja/so-kvm-create-volume | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume index 57309ec8e..2322c3a94 100644 --- a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume +++ b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume @@ -78,7 +78,7 @@ used during VM provisioning to add dedicated NSM storage volumes. - Volume files are stored in `/nsm/libvirt/volumes/` with naming pattern `-nsm.img`. - Volumes are attached as `/dev/vdb` using virtio-blk for high performance. - The script checks available disk space before creating the volume. -- Ownership is set to `socore:socore` with permissions `644`. +- Ownership is set to `qemu:qemu` with permissions `640`. - Without the `-S` flag, the VM remains stopped after volume attachment. **Description:** @@ -98,7 +98,7 @@ The `so-kvm-create-volume` script creates and attaches NSM storage volumes using 3. **Volume Creation:** - Creates volume directory if it doesn't exist - Uses `qemu-img create` with full pre-allocation - - Sets proper ownership (socore:socore) and permissions (644) + - Sets proper ownership (qemu:qemu) and permissions (640) - Validates volume creation success 4. **Volume Attachment:** @@ -279,20 +279,20 @@ def create_volume_file(vm_name, size_gb, logger): logger.error(f"VOLUME: qemu-img error: {e.stderr.strip()}") raise VolumeCreationError(f"Failed to create volume: {e}") - # Set ownership to socore:socore + # Set ownership to qemu:qemu try: - socore_uid = pwd.getpwnam('socore').pw_uid - socore_gid = grp.getgrnam('socore').gr_gid - os.chown(volume_path, socore_uid, socore_gid) - logger.info(f"VOLUME: Set ownership to socore:socore") + qemu_uid = pwd.getpwnam('qemu').pw_uid + qemu_gid = grp.getgrnam('qemu').gr_gid + os.chown(volume_path, qemu_uid, qemu_gid) + logger.info(f"VOLUME: Set ownership to qemu:qemu") except (KeyError, OSError) as e: logger.error(f"VOLUME: Failed to set ownership: {e}") raise VolumeCreationError(f"Failed to set ownership: {e}") - # Set permissions to 644 + # Set permissions to 640 try: - os.chmod(volume_path, 0o644) - logger.info(f"VOLUME: Set permissions to 644") + os.chmod(volume_path, 0o640) + logger.info(f"VOLUME: Set permissions to 640") except OSError as e: logger.error(f"VOLUME: Failed to set permissions: {e}") raise VolumeCreationError(f"Failed to set permissions: {e}") @@ -492,10 +492,10 @@ def main(): # Ensure volume directory exists before checking disk space try: - os.makedirs(VOLUME_DIR, mode=0o755, exist_ok=True) - socore_uid = pwd.getpwnam('socore').pw_uid - socore_gid = grp.getgrnam('socore').gr_gid - os.chown(VOLUME_DIR, socore_uid, socore_gid) + os.makedirs(VOLUME_DIR, mode=0o754, exist_ok=True) + qemu_uid = pwd.getpwnam('qemu').pw_uid + qemu_gid = grp.getgrnam('qemu').gr_gid + os.chown(VOLUME_DIR, qemu_uid, qemu_gid) logger.debug(f"VOLUME: Ensured volume directory exists: {VOLUME_DIR}") except Exception as e: logger.error(f"VOLUME: Failed to create volume directory: {e}") From 09d699432a29e66163781703fb8a6b63a7fae733 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 10 Oct 2025 17:07:02 -0400 Subject: [PATCH 14/22] ui notification of nsm volume creation failure and cleanup of vm inventory in soc grid config for hypervisor --- salt/_modules/hypervisor.py | 91 ++++++++ salt/hypervisor/map.jinja | 22 +- .../engines/master/virtual_node_manager.py | 204 +++++++++++++++++- salt/soc/dyanno/hypervisor/map.jinja | 1 + .../dyanno/hypervisor/remove_failed_vm.sls | 51 +++++ 5 files changed, 355 insertions(+), 14 deletions(-) create mode 100644 salt/_modules/hypervisor.py create mode 100644 salt/soc/dyanno/hypervisor/remove_failed_vm.sls diff --git a/salt/_modules/hypervisor.py b/salt/_modules/hypervisor.py new file mode 100644 index 000000000..7119c8507 --- /dev/null +++ b/salt/_modules/hypervisor.py @@ -0,0 +1,91 @@ +#!/opt/saltstack/salt/bin/python3 + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. +# +# Note: Per the Elastic License 2.0, the second limitation states: +# +# "You may not move, change, disable, or circumvent the license key functionality +# in the software, and you may not remove or obscure any functionality in the +# software that is protected by the license key." + +""" +Salt execution module for hypervisor operations. + +This module provides functions for managing hypervisor configurations, +including VM file management. +""" + +import json +import logging +import os + +log = logging.getLogger(__name__) + +__virtualname__ = 'hypervisor' + + +def __virtual__(): + """ + Only load this module if we're on a system that can manage hypervisors. + """ + return __virtualname__ + + +def remove_vm_from_vms_file(vms_file_path, vm_hostname, vm_role): + """ + Remove a VM entry from the hypervisorVMs file. + + Args: + vms_file_path (str): Path to the hypervisorVMs file + vm_hostname (str): Hostname of the VM to remove (without role suffix) + vm_role (str): Role of the VM + + Returns: + dict: Result dictionary with success status and message + + CLI Example: + salt '*' hypervisor.remove_vm_from_vms_file /opt/so/saltstack/local/salt/hypervisor/hosts/hypervisor1VMs node1 nsm + """ + try: + # Check if file exists + if not os.path.exists(vms_file_path): + msg = f"VMs file not found: {vms_file_path}" + log.error(msg) + return {'result': False, 'comment': msg} + + # Read current VMs + with open(vms_file_path, 'r') as f: + content = f.read().strip() + vms = json.loads(content) if content else [] + + # Find and remove the VM entry + original_count = len(vms) + vms = [vm for vm in vms if not (vm.get('hostname') == vm_hostname and vm.get('role') == vm_role)] + + if len(vms) < original_count: + # VM was found and removed, write back to file + with open(vms_file_path, 'w') as f: + json.dump(vms, f, indent=2) + + # Set socore:socore ownership (939:939) + os.chown(vms_file_path, 939, 939) + + msg = f"Removed VM {vm_hostname}_{vm_role} from {vms_file_path}" + log.info(msg) + return {'result': True, 'comment': msg} + else: + msg = f"VM {vm_hostname}_{vm_role} not found in {vms_file_path}" + log.warning(msg) + return {'result': False, 'comment': msg} + + except json.JSONDecodeError as e: + msg = f"Failed to parse JSON in {vms_file_path}: {str(e)}" + log.error(msg) + return {'result': False, 'comment': msg} + except Exception as e: + msg = f"Failed to remove VM {vm_hostname}_{vm_role} from {vms_file_path}: {str(e)}" + log.error(msg) + return {'result': False, 'comment': msg} diff --git a/salt/hypervisor/map.jinja b/salt/hypervisor/map.jinja index 3519f6078..087fd7bf7 100644 --- a/salt/hypervisor/map.jinja +++ b/salt/hypervisor/map.jinja @@ -58,10 +58,26 @@ {% set role = vm.get('role', '') %} {% do salt.log.debug('salt/hypervisor/map.jinja: Processing VM - hostname: ' ~ hostname ~ ', role: ' ~ role) %} - {# Load VM configuration from config file #} + {# Try to load VM configuration from config file first, then .error file if config doesn't exist #} {% set vm_file = 'hypervisor/hosts/' ~ hypervisor ~ '/' ~ hostname ~ '_' ~ role %} + {% set vm_error_file = vm_file ~ '.error' %} {% do salt.log.debug('salt/hypervisor/map.jinja: VM config file: ' ~ vm_file) %} - {% import_json vm_file as vm_state %} + + {# Check if base config file exists #} + {% set config_exists = salt['file.file_exists']('/opt/so/saltstack/local/salt/' ~ vm_file) %} + {% set error_exists = salt['file.file_exists']('/opt/so/saltstack/local/salt/' ~ vm_error_file) %} + + {% set vm_state = none %} + {% if config_exists %} + {% import_json vm_file as vm_state %} + {% do salt.log.debug('salt/hypervisor/map.jinja: Loaded VM config from base file') %} + {% elif error_exists %} + {% import_json vm_error_file as vm_state %} + {% do salt.log.debug('salt/hypervisor/map.jinja: Loaded VM config from .error file') %} + {% else %} + {% do salt.log.warning('salt/hypervisor/map.jinja: No config or error file found for VM ' ~ hostname ~ '_' ~ role) %} + {% endif %} + {% if vm_state %} {% do salt.log.debug('salt/hypervisor/map.jinja: VM config content: ' ~ vm_state | tojson) %} {% set vm_data = {'config': vm_state.config} %} @@ -85,7 +101,7 @@ {% endif %} {% do vms.update({hostname ~ '_' ~ role: vm_data}) %} {% else %} - {% do salt.log.debug('salt/hypervisor/map.jinja: Config file empty: ' ~ vm_file) %} + {% do salt.log.debug('salt/hypervisor/map.jinja: Skipping VM ' ~ hostname ~ '_' ~ role ~ ' - no config available') %} {% endif %} {% endfor %} diff --git a/salt/salt/engines/master/virtual_node_manager.py b/salt/salt/engines/master/virtual_node_manager.py index 1c4eae7ea..270a93c11 100644 --- a/salt/salt/engines/master/virtual_node_manager.py +++ b/salt/salt/engines/master/virtual_node_manager.py @@ -117,7 +117,7 @@ Exit Codes: 4: VM provisioning failure (so-salt-cloud execution failed) Logging: - Log files are written to /opt/so/log/salt/engines/virtual_node_manager.log + Log files are written to /opt/so/log/salt/engines/virtual_node_manager Comprehensive logging includes: - Hardware validation details - PCI ID conversion process @@ -138,23 +138,49 @@ import pwd import grp import salt.config import salt.runner +import salt.client from typing import Dict, List, Optional, Tuple, Any from datetime import datetime, timedelta from threading import Lock -# Get socore uid/gid -SOCORE_UID = pwd.getpwnam('socore').pw_uid -SOCORE_GID = grp.getgrnam('socore').gr_gid - -# Initialize Salt runner once +# Initialize Salt runner and local client once opts = salt.config.master_config('/etc/salt/master') opts['output'] = 'json' runner = salt.runner.RunnerClient(opts) +local = salt.client.LocalClient() + +# Get socore uid/gid for file ownership +SOCORE_UID = pwd.getpwnam('socore').pw_uid +SOCORE_GID = grp.getgrnam('socore').gr_gid # Configure logging log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) +# Prevent propagation to parent loggers to avoid duplicate log entries +log.propagate = False + +# Add file handler for dedicated log file +log_dir = '/opt/so/log/salt' +log_file = os.path.join(log_dir, 'virtual_node_manager') + +# Create log directory if it doesn't exist +os.makedirs(log_dir, exist_ok=True) + +# Create file handler +file_handler = logging.FileHandler(log_file) +file_handler.setLevel(logging.DEBUG) + +# Create formatter +formatter = logging.Formatter( + '%(asctime)s [%(name)s:%(lineno)d][%(levelname)-8s][%(process)d] %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' +) +file_handler.setFormatter(formatter) + +# Add handler to logger +log.addHandler(file_handler) + # Constants DEFAULT_INTERVAL = 30 DEFAULT_BASE_PATH = '/opt/so/saltstack/local/salt/hypervisor/hosts' @@ -203,6 +229,39 @@ def write_json_file(file_path: str, data: Any) -> None: except Exception as e: log.error("Failed to write JSON file %s: %s", file_path, str(e)) raise +def remove_vm_from_vms_file(vms_file_path: str, vm_hostname: str, vm_role: str) -> bool: + """ + Remove a VM entry from the hypervisorVMs file. + + Args: + vms_file_path: Path to the hypervisorVMs file + vm_hostname: Hostname of the VM to remove (without role suffix) + vm_role: Role of the VM + + Returns: + bool: True if VM was removed, False otherwise + """ + try: + # Read current VMs + vms = read_json_file(vms_file_path) + + # Find and remove the VM entry + original_count = len(vms) + vms = [vm for vm in vms if not (vm.get('hostname') == vm_hostname and vm.get('role') == vm_role)] + + if len(vms) < original_count: + # VM was found and removed, write back to file + write_json_file(vms_file_path, vms) + log.info("Removed VM %s_%s from %s", vm_hostname, vm_role, vms_file_path) + return True + else: + log.warning("VM %s_%s not found in %s", vm_hostname, vm_role, vms_file_path) + return False + + except Exception as e: + log.error("Failed to remove VM %s_%s from %s: %s", vm_hostname, vm_role, vms_file_path, str(e)) + return False + def read_yaml_file(file_path: str) -> dict: """Read and parse a YAML file.""" @@ -558,6 +617,13 @@ def mark_vm_failed(vm_file: str, error_code: int, message: str) -> None: # Remove the original file since we'll create an error file os.remove(vm_file) + # Clear hardware resource claims so failed VMs don't consume resources + # Keep nsm_size for reference but clear cpu, memory, sfp, copper + config.pop('cpu', None) + config.pop('memory', None) + config.pop('sfp', None) + config.pop('copper', None) + # Create error file error_file = f"{vm_file}.error" data = { @@ -586,8 +652,16 @@ def mark_invalid_hardware(hypervisor_path: str, vm_name: str, config: dict, erro # Join all messages with proper sentence structure full_message = "Hardware validation failure: " + " ".join(error_messages) + # Clear hardware resource claims so failed VMs don't consume resources + # Keep nsm_size for reference but clear cpu, memory, sfp, copper + config_copy = config.copy() + config_copy.pop('cpu', None) + config_copy.pop('memory', None) + config_copy.pop('sfp', None) + config_copy.pop('copper', None) + data = { - 'config': config, + 'config': config_copy, 'status': 'error', 'timestamp': datetime.now().isoformat(), 'error_details': { @@ -634,6 +708,61 @@ def validate_vrt_license() -> bool: log.error("Error reading license file: %s", str(e)) return False +def check_hypervisor_disk_space(hypervisor: str, size_gb: int) -> Tuple[bool, Optional[str]]: + """ + Check if hypervisor has sufficient disk space for volume creation. + + Args: + hypervisor: Hypervisor hostname + size_gb: Required size in GB + + Returns: + Tuple of (has_space, error_message) + """ + try: + # Get hypervisor minion ID + hypervisor_minion = f"{hypervisor}_hypervisor" + + # Check disk space on /nsm/libvirt/volumes using LocalClient + result = local.cmd( + hypervisor_minion, + 'cmd.run', + ["df -BG /nsm/libvirt/volumes | tail -1 | awk '{print $4}' | sed 's/G//'"] + ) + + if not result or hypervisor_minion not in result: + log.error("Failed to check disk space on hypervisor %s", hypervisor) + return False, "Failed to check disk space on hypervisor" + + available_gb_str = result[hypervisor_minion].strip() + if not available_gb_str: + log.error("Empty disk space response from hypervisor %s", hypervisor) + return False, "Failed to get disk space information" + + try: + available_gb = float(available_gb_str) + except ValueError: + log.error("Invalid disk space value from hypervisor %s: %s", hypervisor, available_gb_str) + return False, f"Invalid disk space value: {available_gb_str}" + + # Add 10% buffer for filesystem overhead + required_gb = size_gb * 1.1 + + log.debug("Hypervisor %s disk space check: Available=%.2fGB, Required=%.2fGB", + hypervisor, available_gb, required_gb) + + if available_gb < required_gb: + error_msg = f"Insufficient disk space on hypervisor {hypervisor}. Available: {available_gb:.2f}GB, Required: {required_gb:.2f}GB (including 10% overhead)" + log.error(error_msg) + return False, error_msg + + log.info("Hypervisor %s has sufficient disk space for %dGB volume", hypervisor, size_gb) + return True, None + + except Exception as e: + log.error("Error checking disk space on hypervisor %s: %s", hypervisor, str(e)) + return False, f"Error checking disk space: {str(e)}" + def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: """ Process a single VM creation request. @@ -695,6 +824,33 @@ def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: log.warning("VM: %s - Both disk and nsm_size specified. disk takes precedence, nsm_size will be ignored.", vm_name) + # Check disk space BEFORE creating VM if nsm_size is specified + if has_nsm_size and not has_disk: + size_gb = int(vm_config['nsm_size']) + has_space, space_error = check_hypervisor_disk_space(hypervisor, size_gb) + if not has_space: + log.error("VM: %s - %s", vm_name, space_error) + + # Send Volume nsm Create Failed status event + try: + subprocess.run([ + 'so-salt-emit-vm-deployment-status-event', + '-v', vm_name, + '-H', hypervisor, + '-s', 'Volume nsm Create Failed' + ], check=True) + except subprocess.CalledProcessError as e: + log.error("Failed to emit volume create failed event for %s: %s", vm_name, str(e)) + + mark_invalid_hardware( + hypervisor_path, + vm_name, + vm_config, + {'disk_space': f"Insufficient disk space for {size_gb}GB volume: {space_error}"} + ) + return + log.debug("VM: %s - Hypervisor has sufficient space for %dGB volume", vm_name, size_gb) + # Initial hardware validation against model is_valid, errors = validate_hardware_request(model_config, vm_config) if not is_valid: @@ -967,12 +1123,21 @@ def process_hypervisor(hypervisor_path: str) -> None: if not nodes_config: log.debug("Empty VMs configuration in %s", vms_file) - # Get existing VMs + # Get existing VMs and track failed VMs separately existing_vms = set() + failed_vms = set() # VMs with .error files for file_path in glob.glob(os.path.join(hypervisor_path, '*_*')): basename = os.path.basename(file_path) - # Skip error and status files - if not basename.endswith('.error') and not basename.endswith('.status'): + # Skip status files + if basename.endswith('.status'): + continue + # Track VMs with .error files separately + if basename.endswith('.error'): + vm_name = basename[:-6] # Remove '.error' suffix + failed_vms.add(vm_name) + existing_vms.add(vm_name) # Also add to existing to prevent recreation + log.debug(f"Found failed VM with .error file: {vm_name}") + else: existing_vms.add(basename) # Process new VMs @@ -989,12 +1154,29 @@ def process_hypervisor(hypervisor_path: str) -> None: # process_vm_creation handles its own locking process_vm_creation(hypervisor_path, vm_config) - # Process VM deletions + # Process VM deletions (but skip failed VMs that only have .error files) vms_to_delete = existing_vms - configured_vms log.debug(f"Existing VMs: {existing_vms}") log.debug(f"Configured VMs: {configured_vms}") + log.debug(f"Failed VMs: {failed_vms}") log.debug(f"VMs to delete: {vms_to_delete}") for vm_name in vms_to_delete: + # Skip deletion if VM only has .error file (no actual VM to delete) + if vm_name in failed_vms: + error_file = os.path.join(hypervisor_path, f"{vm_name}.error") + base_file = os.path.join(hypervisor_path, vm_name) + # Only skip if there's no base file (VM never successfully created) + if not os.path.exists(base_file): + log.info(f"Skipping deletion of failed VM {vm_name} (VM never successfully created)") + # Clean up the .error and .status files since VM is no longer configured + if os.path.exists(error_file): + os.remove(error_file) + log.info(f"Removed .error file for unconfigured VM: {vm_name}") + status_file = os.path.join(hypervisor_path, f"{vm_name}.status") + if os.path.exists(status_file): + os.remove(status_file) + log.info(f"Removed .status file for unconfigured VM: {vm_name}") + continue log.info(f"Initiating deletion process for VM: {vm_name}") process_vm_deletion(hypervisor_path, vm_name) diff --git a/salt/soc/dyanno/hypervisor/map.jinja b/salt/soc/dyanno/hypervisor/map.jinja index 4a5107371..139003f17 100644 --- a/salt/soc/dyanno/hypervisor/map.jinja +++ b/salt/soc/dyanno/hypervisor/map.jinja @@ -3,6 +3,7 @@ {# Define the list of process steps in order (case-sensitive) #} {% set PROCESS_STEPS = [ 'Processing', + 'Volume nsm Create Failed', 'IP Configuration', 'Starting Create', 'Executing Deploy Script', diff --git a/salt/soc/dyanno/hypervisor/remove_failed_vm.sls b/salt/soc/dyanno/hypervisor/remove_failed_vm.sls new file mode 100644 index 000000000..a47eff595 --- /dev/null +++ b/salt/soc/dyanno/hypervisor/remove_failed_vm.sls @@ -0,0 +1,51 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. +# +# Note: Per the Elastic License 2.0, the second limitation states: +# +# "You may not move, change, disable, or circumvent the license key functionality +# in the software, and you may not remove or obscure any functionality in the +# software that is protected by the license key." + +{% if 'vrt' in salt['pillar.get']('features', []) %} + +{% do salt.log.info('soc/dyanno/hypervisor/remove_failed_vm: Running') %} +{% set vm_name = pillar.get('vm_name') %} +{% set hypervisor = pillar.get('hypervisor') %} + +{% if vm_name and hypervisor %} +{% set vm_parts = vm_name.split('_') %} +{% if vm_parts | length >= 2 %} +{% set vm_role = vm_parts[-1] %} +{% set vm_hostname = '_'.join(vm_parts[:-1]) %} +{% set vms_file = '/opt/so/saltstack/local/salt/hypervisor/hosts/' ~ hypervisor ~ 'VMs' %} + +{% do salt.log.info('soc/dyanno/hypervisor/remove_failed_vm: Removing VM ' ~ vm_name ~ ' from ' ~ vms_file) %} + +remove_vm_{{ vm_name }}_from_vms_file: + module.run: + - name: hypervisor.remove_vm_from_vms_file + - vms_file_path: {{ vms_file }} + - vm_hostname: {{ vm_hostname }} + - vm_role: {{ vm_role }} + +{% else %} +{% do salt.log.error('soc/dyanno/hypervisor/remove_failed_vm: Invalid vm_name format: ' ~ vm_name) %} +{% endif %} +{% else %} +{% do salt.log.error('soc/dyanno/hypervisor/remove_failed_vm: Missing required pillar data (vm_name or hypervisor)') %} +{% endif %} + +{% do salt.log.info('soc/dyanno/hypervisor/remove_failed_vm: Completed') %} + +{% else %} + +{% do salt.log.error( + 'Hypervisor nodes are a feature supported only for customers with a valid license. ' + 'Contact Security Onion Solutions, LLC via our website at https://securityonionsolutions.com ' + 'for more information about purchasing a license to enable this feature.' +) %} + +{% endif %} From fe3caf66a112e032107c1cc8f480713c502615ac Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 10 Oct 2025 17:21:09 -0400 Subject: [PATCH 15/22] update failure description --- salt/salt/engines/master/virtual_node_manager.py | 4 ++-- salt/soc/dyanno/hypervisor/map.jinja | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/salt/salt/engines/master/virtual_node_manager.py b/salt/salt/engines/master/virtual_node_manager.py index 270a93c11..cc3a3fd81 100644 --- a/salt/salt/engines/master/virtual_node_manager.py +++ b/salt/salt/engines/master/virtual_node_manager.py @@ -831,13 +831,13 @@ def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: if not has_space: log.error("VM: %s - %s", vm_name, space_error) - # Send Volume nsm Create Failed status event + # Send Hypervisor NSM Disk Full status event try: subprocess.run([ 'so-salt-emit-vm-deployment-status-event', '-v', vm_name, '-H', hypervisor, - '-s', 'Volume nsm Create Failed' + '-s', 'Hypervisor NSM Disk Full' ], check=True) except subprocess.CalledProcessError as e: log.error("Failed to emit volume create failed event for %s: %s", vm_name, str(e)) diff --git a/salt/soc/dyanno/hypervisor/map.jinja b/salt/soc/dyanno/hypervisor/map.jinja index 139003f17..8fa54c146 100644 --- a/salt/soc/dyanno/hypervisor/map.jinja +++ b/salt/soc/dyanno/hypervisor/map.jinja @@ -3,7 +3,7 @@ {# Define the list of process steps in order (case-sensitive) #} {% set PROCESS_STEPS = [ 'Processing', - 'Volume nsm Create Failed', + 'Hypervisor NSM Disk Full', 'IP Configuration', 'Starting Create', 'Executing Deploy Script', From 254e782da67c02436e089dfdafa73fe3c88af937 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 10 Oct 2025 22:15:20 -0400 Subject: [PATCH 16/22] add volume creation and configuration process steps --- salt/soc/dyanno/hypervisor/map.jinja | 2 ++ 1 file changed, 2 insertions(+) diff --git a/salt/soc/dyanno/hypervisor/map.jinja b/salt/soc/dyanno/hypervisor/map.jinja index 8fa54c146..cb0810959 100644 --- a/salt/soc/dyanno/hypervisor/map.jinja +++ b/salt/soc/dyanno/hypervisor/map.jinja @@ -9,6 +9,8 @@ 'Executing Deploy Script', 'Initialize Minion Pillars', 'Created Instance', + 'Volume Creation', + 'Volume Configuration', 'Hardware Configuration', 'Highstate Initiated', 'Destroyed Instance' From f9c5aa3fefa50af99a4722005adfbb5c9f1a959e Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 14 Oct 2025 09:36:05 -0400 Subject: [PATCH 17/22] remove PROCESS_STEPS from hypervisor annotation --- salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja | 3 --- 1 file changed, 3 deletions(-) diff --git a/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja b/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja index 926263b9d..8e49b60b5 100644 --- a/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja +++ b/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja @@ -13,7 +13,6 @@ {%- import_yaml 'soc/dyanno/hypervisor/hypervisor.yaml' as ANNOTATION -%} {%- from 'hypervisor/map.jinja' import HYPERVISORS -%} -{%- from 'soc/dyanno/hypervisor/map.jinja' import PROCESS_STEPS -%} {%- set TEMPLATE = ANNOTATION.hypervisor.hosts.pop('defaultHost') -%} @@ -27,7 +26,6 @@ {%- if baseDomainStatus == 'Initialized' %} {%- if vm_list %} #### Virtual Machines -Status values: {% for step in PROCESS_STEPS %}{{ step }}{% if not loop.last %}, {% endif %}{% endfor %}. "Last Updated" shows when status changed. After "Highstate Initiated", only "Destroyed Instance" updates the timestamp. | Name | Status | CPU Cores | Memory (GB)| Disk | Copper | SFP | Last Updated | |--------------------|--------------------|-----------|------------|------|--------|------|---------------------| @@ -42,7 +40,6 @@ Status values: {% for step in PROCESS_STEPS %}{{ step }}{% if not loop.last %}, {%- endfor %} {%- else %} #### Virtual Machines -Status values: {% for step in PROCESS_STEPS %}{{ step }}{% if not loop.last %}, {% endif %}{% endfor %}. "Last Updated" shows when status changed. After "Highstate Initiated", only "Destroyed Instance" updates the timestamp. No Virtual Machines Found {%- endif %} From 793e98f75ce3c4e939606d3b80ce69da31d8b8d2 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 14 Oct 2025 10:37:16 -0400 Subject: [PATCH 18/22] update annotation after failed vm removal from VMs file --- salt/salt/engines/master/virtual_node_manager.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/salt/salt/engines/master/virtual_node_manager.py b/salt/salt/engines/master/virtual_node_manager.py index cc3a3fd81..6d88bd688 100644 --- a/salt/salt/engines/master/virtual_node_manager.py +++ b/salt/salt/engines/master/virtual_node_manager.py @@ -1176,6 +1176,14 @@ def process_hypervisor(hypervisor_path: str) -> None: if os.path.exists(status_file): os.remove(status_file) log.info(f"Removed .status file for unconfigured VM: {vm_name}") + + # Trigger hypervisor annotation update to reflect the removal + try: + log.info(f"Triggering hypervisor annotation update after removing failed VM: {vm_name}") + runner.cmd('state.orch', ['orch.dyanno_hypervisor']) + except Exception as e: + log.error(f"Failed to trigger hypervisor annotation update for {vm_name}: {str(e)}") + continue log.info(f"Initiating deletion process for VM: {vm_name}") process_vm_deletion(hypervisor_path, vm_name) From d56af4acab4a13a3f484f76980900483c6634432 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 14 Oct 2025 10:58:57 -0400 Subject: [PATCH 19/22] remove .log extension --- salt/manager/tools/sbin_jinja/so-salt-cloud | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/salt/manager/tools/sbin_jinja/so-salt-cloud b/salt/manager/tools/sbin_jinja/so-salt-cloud index a1f99712a..c8177e1bc 100644 --- a/salt/manager/tools/sbin_jinja/so-salt-cloud +++ b/salt/manager/tools/sbin_jinja/so-salt-cloud @@ -211,7 +211,7 @@ Exit Codes: Logging: -- Logs are written to /opt/so/log/salt/so-salt-cloud.log. +- Logs are written to /opt/so/log/salt/so-salt-cloud. - Both file and console logging are enabled for real-time monitoring. """ @@ -233,7 +233,7 @@ local = salt.client.LocalClient() logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -file_handler = logging.FileHandler('/opt/so/log/salt/so-salt-cloud.log') +file_handler = logging.FileHandler('/opt/so/log/salt/so-salt-cloud') console_handler = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s %(message)s') From 860710f5f922cad8ab82ce4dcc3495d18fd3529c Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 14 Oct 2025 11:03:00 -0400 Subject: [PATCH 20/22] remove .log extension --- salt/storage/tools/sbin/so-nsm-mount-nvme | 2 +- salt/storage/tools/sbin/so-nsm-mount-virtio | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/salt/storage/tools/sbin/so-nsm-mount-nvme b/salt/storage/tools/sbin/so-nsm-mount-nvme index f612c9915..fdde0c2e9 100644 --- a/salt/storage/tools/sbin/so-nsm-mount-nvme +++ b/salt/storage/tools/sbin/so-nsm-mount-nvme @@ -81,7 +81,7 @@ set -e -LOG_FILE="/opt/so/log/so-nsm-mount-nvme.log" +LOG_FILE="/opt/so/log/so-nsm-mount-nvme" VG_NAME="" LV_NAME="nsm" MOUNT_POINT="/nsm" diff --git a/salt/storage/tools/sbin/so-nsm-mount-virtio b/salt/storage/tools/sbin/so-nsm-mount-virtio index 8385d7c21..03476e378 100644 --- a/salt/storage/tools/sbin/so-nsm-mount-virtio +++ b/salt/storage/tools/sbin/so-nsm-mount-virtio @@ -55,11 +55,11 @@ # - Mount operation failed # # Logging: -# - All operations logged to /opt/so/log/so-nsm-mount-virtio.log +# - All operations logged to /opt/so/log/so-nsm-mount-virtio set -e -LOG_FILE="/opt/so/log/so-nsm-mount-virtio.log" +LOG_FILE="/opt/so/log/so-nsm-mount-virtio" DEVICE="/dev/vdb" MOUNT_POINT="/nsm" From c8aad2b03b9e668086696464aa24b202b4462bc7 Mon Sep 17 00:00:00 2001 From: Corey Ogburn Date: Tue, 14 Oct 2025 13:24:43 -0600 Subject: [PATCH 21/22] New Config Entries --- salt/soc/defaults.yaml | 2 ++ salt/soc/soc_soc.yaml | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/salt/soc/defaults.yaml b/salt/soc/defaults.yaml index d93b405b1..0a6285d34 100644 --- a/salt/soc/defaults.yaml +++ b/salt/soc/defaults.yaml @@ -1494,6 +1494,8 @@ soc: assistant: apiUrl: https://onionai.securityonion.net healthTimeoutSeconds: 3 + systemPromptAddendum: "" + systemPromptAddendumMaxLength: 50000 salt: queueDir: /opt/sensoroni/queue timeoutMs: 45000 diff --git a/salt/soc/soc_soc.yaml b/salt/soc/soc_soc.yaml index 3fa914227..589b995ef 100644 --- a/salt/soc/soc_soc.yaml +++ b/salt/soc/soc_soc.yaml @@ -589,6 +589,14 @@ soc: description: Timeout in seconds for the Onion AI health check. global: True advanced: True + systemPromptAddendum: + description: Additional context to provide to the AI assistant about this SOC deployment. This can include information about your environment, policies, or any other relevant details that can help the AI provide more accurate and tailored assistance. Long prompts may be shortened. + global: True + advanced: False + systemPromptAddendumMaxLength: + description: Maximum length of the system prompt addendum. Longer prompts will be truncated. + global: True + advanced: True client: assistant: enabled: From 3e22043ea6b6f7a691d986f5c4fb7176149ed1db Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 14 Oct 2025 15:08:51 -0500 Subject: [PATCH 22/22] es logging retention --- salt/elasticsearch/files/log4j2.properties | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/salt/elasticsearch/files/log4j2.properties b/salt/elasticsearch/files/log4j2.properties index 0a337e751..305069882 100644 --- a/salt/elasticsearch/files/log4j2.properties +++ b/salt/elasticsearch/files/log4j2.properties @@ -20,7 +20,7 @@ appender.rolling.strategy.type = DefaultRolloverStrategy appender.rolling.strategy.action.type = Delete appender.rolling.strategy.action.basepath = /var/log/elasticsearch appender.rolling.strategy.action.condition.type = IfFileName -appender.rolling.strategy.action.condition.glob = *.gz +appender.rolling.strategy.action.condition.glob = *.log.gz appender.rolling.strategy.action.condition.nested_condition.type = IfLastModified appender.rolling.strategy.action.condition.nested_condition.age = 7D @@ -29,19 +29,13 @@ appender.rolling_json.name = rolling_json appender.rolling_json.fileName = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}.json appender.rolling_json.layout.type = ECSJsonLayout appender.rolling_json.layout.dataset = elasticsearch.server -appender.rolling_json.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}-%d{yyyy-MM-dd}-%i.json.gz +appender.rolling_json.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}-%d{yyyy-MM-dd}.json.gz appender.rolling_json.policies.type = Policies appender.rolling_json.policies.time.type = TimeBasedTriggeringPolicy appender.rolling_json.policies.time.interval = 1 appender.rolling_json.policies.time.modulate = true appender.rolling_json.strategy.type = DefaultRolloverStrategy -appender.rolling_json.strategy.action.type = Delete -appender.rolling_json.strategy.action.basepath = /var/log/elasticsearch -appender.rolling_json.strategy.action.condition.type = IfFileName -appender.rolling_json.strategy.action.condition.glob = *.gz -appender.rolling_json.strategy.action.condition.nested_condition.type = IfLastModified -appender.rolling_json.strategy.action.condition.nested_condition.age = 7D - +appender.rolling_json.strategy.max = 1 rootLogger.level = info rootLogger.appenderRef.rolling.ref = rolling