diff --git a/salt/_modules/hypervisor.py b/salt/_modules/hypervisor.py new file mode 100644 index 000000000..7119c8507 --- /dev/null +++ b/salt/_modules/hypervisor.py @@ -0,0 +1,91 @@ +#!/opt/saltstack/salt/bin/python3 + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. +# +# Note: Per the Elastic License 2.0, the second limitation states: +# +# "You may not move, change, disable, or circumvent the license key functionality +# in the software, and you may not remove or obscure any functionality in the +# software that is protected by the license key." + +""" +Salt execution module for hypervisor operations. + +This module provides functions for managing hypervisor configurations, +including VM file management. +""" + +import json +import logging +import os + +log = logging.getLogger(__name__) + +__virtualname__ = 'hypervisor' + + +def __virtual__(): + """ + Only load this module if we're on a system that can manage hypervisors. + """ + return __virtualname__ + + +def remove_vm_from_vms_file(vms_file_path, vm_hostname, vm_role): + """ + Remove a VM entry from the hypervisorVMs file. + + Args: + vms_file_path (str): Path to the hypervisorVMs file + vm_hostname (str): Hostname of the VM to remove (without role suffix) + vm_role (str): Role of the VM + + Returns: + dict: Result dictionary with success status and message + + CLI Example: + salt '*' hypervisor.remove_vm_from_vms_file /opt/so/saltstack/local/salt/hypervisor/hosts/hypervisor1VMs node1 nsm + """ + try: + # Check if file exists + if not os.path.exists(vms_file_path): + msg = f"VMs file not found: {vms_file_path}" + log.error(msg) + return {'result': False, 'comment': msg} + + # Read current VMs + with open(vms_file_path, 'r') as f: + content = f.read().strip() + vms = json.loads(content) if content else [] + + # Find and remove the VM entry + original_count = len(vms) + vms = [vm for vm in vms if not (vm.get('hostname') == vm_hostname and vm.get('role') == vm_role)] + + if len(vms) < original_count: + # VM was found and removed, write back to file + with open(vms_file_path, 'w') as f: + json.dump(vms, f, indent=2) + + # Set socore:socore ownership (939:939) + os.chown(vms_file_path, 939, 939) + + msg = f"Removed VM {vm_hostname}_{vm_role} from {vms_file_path}" + log.info(msg) + return {'result': True, 'comment': msg} + else: + msg = f"VM {vm_hostname}_{vm_role} not found in {vms_file_path}" + log.warning(msg) + return {'result': False, 'comment': msg} + + except json.JSONDecodeError as e: + msg = f"Failed to parse JSON in {vms_file_path}: {str(e)}" + log.error(msg) + return {'result': False, 'comment': msg} + except Exception as e: + msg = f"Failed to remove VM {vm_hostname}_{vm_role} from {vms_file_path}: {str(e)}" + log.error(msg) + return {'result': False, 'comment': msg} diff --git a/salt/_modules/qcow2.py b/salt/_modules/qcow2.py index 6e71dc459..10c4d185b 100644 --- a/salt/_modules/qcow2.py +++ b/salt/_modules/qcow2.py @@ -7,12 +7,14 @@ """ Salt module for managing QCOW2 image configurations and VM hardware settings. This module provides functions -for modifying network configurations within QCOW2 images and adjusting virtual machine hardware settings. -It serves as a Salt interface to the so-qcow2-modify-network and so-kvm-modify-hardware scripts. +for modifying network configurations within QCOW2 images, adjusting virtual machine hardware settings, and +creating virtual storage volumes. It serves as a Salt interface to the so-qcow2-modify-network, +so-kvm-modify-hardware, and so-kvm-create-volume scripts. -The module offers two main capabilities: +The module offers three main capabilities: 1. Network Configuration: Modify network settings (DHCP/static IP) within QCOW2 images 2. Hardware Configuration: Adjust VM hardware settings (CPU, memory, PCI passthrough) +3. Volume Management: Create and attach virtual storage volumes for NSM data This module is intended to work with Security Onion's virtualization infrastructure and is typically used in conjunction with salt-cloud for VM provisioning and management. @@ -244,3 +246,90 @@ def modify_hardware_config(vm_name, cpu=None, memory=None, pci=None, start=False except Exception as e: log.error('qcow2 module: An error occurred while executing the script: {}'.format(e)) raise + +def create_volume_config(vm_name, size_gb, start=False): + ''' + Usage: + salt '*' qcow2.create_volume_config vm_name= size_gb= [start=] + + Options: + vm_name + Name of the virtual machine to attach the volume to + size_gb + Volume size in GB (positive integer) + This determines the capacity of the virtual storage volume + start + Boolean flag to start the VM after volume creation + Optional - defaults to False + + Examples: + 1. **Create 500GB Volume:** + ```bash + salt '*' qcow2.create_volume_config vm_name='sensor1_sensor' size_gb=500 + ``` + This creates a 500GB virtual volume for NSM storage + + 2. **Create 1TB Volume and Start VM:** + ```bash + salt '*' qcow2.create_volume_config vm_name='sensor1_sensor' size_gb=1000 start=True + ``` + This creates a 1TB volume and starts the VM after attachment + + Notes: + - VM must be stopped before volume creation + - Volume is created as a qcow2 image and attached to the VM + - This is an alternative to disk passthrough via modify_hardware_config + - Volume is automatically attached to the VM's libvirt configuration + - Requires so-kvm-create-volume script to be installed + - Volume files are stored in the hypervisor's VM storage directory + + Description: + This function creates and attaches a virtual storage volume to a KVM virtual machine + using the so-kvm-create-volume script. It creates a qcow2 disk image of the specified + size and attaches it to the VM for NSM (Network Security Monitoring) storage purposes. + This provides an alternative to physical disk passthrough, allowing flexible storage + allocation without requiring dedicated hardware. The VM can optionally be started + after the volume is successfully created and attached. + + Exit Codes: + 0: Success + 1: Invalid parameters + 2: VM state error (running when should be stopped) + 3: Volume creation error + 4: System command error + 255: Unexpected error + + Logging: + - All operations are logged to the salt minion log + - Log entries are prefixed with 'qcow2 module:' + - Volume creation and attachment operations are logged + - Errors include detailed messages and stack traces + - Final status of volume creation is logged + ''' + + # Validate size_gb parameter + if not isinstance(size_gb, int) or size_gb <= 0: + raise ValueError('size_gb must be a positive integer.') + + cmd = ['/usr/sbin/so-kvm-create-volume', '-v', vm_name, '-s', str(size_gb)] + + if start: + cmd.append('-S') + + log.info('qcow2 module: Executing command: {}'.format(' '.join(shlex.quote(arg) for arg in cmd))) + + try: + result = subprocess.run(cmd, capture_output=True, text=True, check=False) + ret = { + 'retcode': result.returncode, + 'stdout': result.stdout, + 'stderr': result.stderr + } + if result.returncode != 0: + log.error('qcow2 module: Script execution failed with return code {}: {}'.format(result.returncode, result.stderr)) + else: + log.info('qcow2 module: Script executed successfully.') + return ret + except Exception as e: + log.error('qcow2 module: An error occurred while executing the script: {}'.format(e)) + raise diff --git a/salt/common/grains.sls b/salt/common/grains.sls new file mode 100644 index 000000000..b8d3a4c90 --- /dev/null +++ b/salt/common/grains.sls @@ -0,0 +1,21 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% set nsm_exists = salt['file.directory_exists']('/nsm') %} +{% if nsm_exists %} +{% set nsm_total = salt['cmd.shell']('df -BG /nsm | tail -1 | awk \'{print $2}\'') %} + +nsm_total: + grains.present: + - name: nsm_total + - value: {{ nsm_total }} + +{% else %} + +nsm_missing: + test.succeed_without_changes: + - name: /nsm does not exist, skipping grain assignment + +{% endif %} diff --git a/salt/common/init.sls b/salt/common/init.sls index 7137ff11f..eba18f651 100644 --- a/salt/common/init.sls +++ b/salt/common/init.sls @@ -4,6 +4,7 @@ {% from 'vars/globals.map.jinja' import GLOBALS %} include: + - common.grains - common.packages {% if GLOBALS.role in GLOBALS.manager_roles %} - manager.elasticsearch # needed for elastic_curl_config state diff --git a/salt/hypervisor/map.jinja b/salt/hypervisor/map.jinja index 3519f6078..087fd7bf7 100644 --- a/salt/hypervisor/map.jinja +++ b/salt/hypervisor/map.jinja @@ -58,10 +58,26 @@ {% set role = vm.get('role', '') %} {% do salt.log.debug('salt/hypervisor/map.jinja: Processing VM - hostname: ' ~ hostname ~ ', role: ' ~ role) %} - {# Load VM configuration from config file #} + {# Try to load VM configuration from config file first, then .error file if config doesn't exist #} {% set vm_file = 'hypervisor/hosts/' ~ hypervisor ~ '/' ~ hostname ~ '_' ~ role %} + {% set vm_error_file = vm_file ~ '.error' %} {% do salt.log.debug('salt/hypervisor/map.jinja: VM config file: ' ~ vm_file) %} - {% import_json vm_file as vm_state %} + + {# Check if base config file exists #} + {% set config_exists = salt['file.file_exists']('/opt/so/saltstack/local/salt/' ~ vm_file) %} + {% set error_exists = salt['file.file_exists']('/opt/so/saltstack/local/salt/' ~ vm_error_file) %} + + {% set vm_state = none %} + {% if config_exists %} + {% import_json vm_file as vm_state %} + {% do salt.log.debug('salt/hypervisor/map.jinja: Loaded VM config from base file') %} + {% elif error_exists %} + {% import_json vm_error_file as vm_state %} + {% do salt.log.debug('salt/hypervisor/map.jinja: Loaded VM config from .error file') %} + {% else %} + {% do salt.log.warning('salt/hypervisor/map.jinja: No config or error file found for VM ' ~ hostname ~ '_' ~ role) %} + {% endif %} + {% if vm_state %} {% do salt.log.debug('salt/hypervisor/map.jinja: VM config content: ' ~ vm_state | tojson) %} {% set vm_data = {'config': vm_state.config} %} @@ -85,7 +101,7 @@ {% endif %} {% do vms.update({hostname ~ '_' ~ role: vm_data}) %} {% else %} - {% do salt.log.debug('salt/hypervisor/map.jinja: Config file empty: ' ~ vm_file) %} + {% do salt.log.debug('salt/hypervisor/map.jinja: Skipping VM ' ~ hostname ~ '_' ~ role ~ ' - no config available') %} {% endif %} {% endfor %} diff --git a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume new file mode 100644 index 000000000..2322c3a94 --- /dev/null +++ b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume @@ -0,0 +1,586 @@ +#!/usr/bin/python3 + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. +# +# Note: Per the Elastic License 2.0, the second limitation states: +# +# "You may not move, change, disable, or circumvent the license key functionality +# in the software, and you may not remove or obscure any functionality in the +# software that is protected by the license key." + +{% if 'vrt' in salt['pillar.get']('features', []) %} + +""" +Script for creating and attaching virtual volumes to KVM virtual machines for NSM storage. +This script provides functionality to create pre-allocated raw disk images and attach them +to VMs as virtio-blk devices for high-performance network security monitoring data storage. + +The script handles the complete volume lifecycle: +1. Volume Creation: Creates pre-allocated raw disk images using qemu-img +2. Volume Attachment: Attaches volumes to VMs as virtio-blk devices +3. VM Management: Stops/starts VMs as needed during the process + +This script is designed to work with Security Onion's virtualization infrastructure and is typically +used during VM provisioning to add dedicated NSM storage volumes. + +**Usage:** + so-kvm-create-volume -v -s [-S] + +**Options:** + -v, --vm Name of the virtual machine to attach the volume to (required). + -s, --size Size of the volume in GB (required, must be a positive integer). + -S, --start Start the VM after volume creation and attachment (optional). + +**Examples:** + +1. **Create and Attach 500GB Volume:** + + ```bash + so-kvm-create-volume -v vm1_sensor -s 500 + ``` + + This command creates and attaches a volume with the following settings: + - VM Name: `vm1_sensor` + - Volume Size: `500` GB + - Volume Path: `/nsm/libvirt/volumes/vm1_sensor-nsm.img` + - Device: `/dev/vdb` (virtio-blk) + - VM remains stopped after attachment + +2. **Create Volume and Start VM:** + + ```bash + so-kvm-create-volume -v vm2_sensor -s 1000 -S + ``` + + This command creates a volume and starts the VM: + - VM Name: `vm2_sensor` + - Volume Size: `1000` GB (1 TB) + - VM is started after volume attachment due to the `-S` flag + +3. **Create Large Volume for Heavy Traffic:** + + ```bash + so-kvm-create-volume -v vm3_sensor -s 2000 -S + ``` + + This command creates a large volume for high-traffic environments: + - VM Name: `vm3_sensor` + - Volume Size: `2000` GB (2 TB) + - VM is started after attachment + +**Notes:** + +- The script automatically stops the VM if it's running before creating and attaching the volume. +- Volumes are created with full pre-allocation for optimal performance. +- Volume files are stored in `/nsm/libvirt/volumes/` with naming pattern `-nsm.img`. +- Volumes are attached as `/dev/vdb` using virtio-blk for high performance. +- The script checks available disk space before creating the volume. +- Ownership is set to `qemu:qemu` with permissions `640`. +- Without the `-S` flag, the VM remains stopped after volume attachment. + +**Description:** + +The `so-kvm-create-volume` script creates and attaches NSM storage volumes using the following process: + +1. **Pre-flight Checks:** + - Validates input parameters (VM name, size) + - Checks available disk space in `/nsm/libvirt/volumes/` + - Ensures sufficient space for the requested volume size + +2. **VM State Management:** + - Connects to the local libvirt daemon + - Stops the VM if it's currently running + - Retrieves current VM configuration + +3. **Volume Creation:** + - Creates volume directory if it doesn't exist + - Uses `qemu-img create` with full pre-allocation + - Sets proper ownership (qemu:qemu) and permissions (640) + - Validates volume creation success + +4. **Volume Attachment:** + - Modifies VM's libvirt XML configuration + - Adds disk element with virtio-blk driver + - Configures cache='none' and io='native' for performance + - Attaches volume as `/dev/vdb` + +5. **VM Redefinition:** + - Applies the new configuration by redefining the VM + - Optionally starts the VM if requested + - Emits deployment status events for monitoring + +6. **Error Handling:** + - Validates all input parameters + - Checks disk space before creation + - Handles volume creation failures + - Handles volume attachment failures + - Provides detailed error messages for troubleshooting + +**Exit Codes:** + +- `0`: Success +- `1`: An error occurred during execution + +**Logging:** + +- Logs are written to `/opt/so/log/hypervisor/so-kvm-create-volume.log` +- Both file and console logging are enabled for real-time monitoring +- Log entries include timestamps and severity levels +- Log prefixes: VOLUME:, VM:, HARDWARE:, SPACE: +- Detailed error messages are logged for troubleshooting +""" + +import argparse +import sys +import os +import libvirt +import logging +import socket +import subprocess +import pwd +import grp +import xml.etree.ElementTree as ET +from io import StringIO +from so_vm_utils import start_vm, stop_vm +from so_logging_utils import setup_logging + +# Get hypervisor name from local hostname +HYPERVISOR = socket.gethostname() + +# Volume storage directory +VOLUME_DIR = '/nsm/libvirt/volumes' + +# Custom exception classes +class InsufficientSpaceError(Exception): + """Raised when there is insufficient disk space for volume creation.""" + pass + +class VolumeCreationError(Exception): + """Raised when volume creation fails.""" + pass + +class VolumeAttachmentError(Exception): + """Raised when volume attachment fails.""" + pass + +# Custom log handler to capture output +class StringIOHandler(logging.Handler): + def __init__(self): + super().__init__() + self.strio = StringIO() + + def emit(self, record): + msg = self.format(record) + self.strio.write(msg + '\n') + + def get_value(self): + return self.strio.getvalue() + +def parse_arguments(): + """Parse command-line arguments.""" + parser = argparse.ArgumentParser(description='Create and attach a virtual volume to a KVM virtual machine for NSM storage.') + parser.add_argument('-v', '--vm', required=True, help='Name of the virtual machine to attach the volume to.') + parser.add_argument('-s', '--size', type=int, required=True, help='Size of the volume in GB (must be a positive integer).') + parser.add_argument('-S', '--start', action='store_true', help='Start the VM after volume creation and attachment.') + args = parser.parse_args() + + # Validate size is positive + if args.size <= 0: + parser.error("Volume size must be a positive integer.") + + return args + +def check_disk_space(size_gb, logger): + """ + Check if there is sufficient disk space available for volume creation. + + Args: + size_gb: Size of the volume in GB + logger: Logger instance + + Raises: + InsufficientSpaceError: If there is not enough disk space + """ + try: + stat = os.statvfs(VOLUME_DIR) + # Available space in bytes + available_bytes = stat.f_bavail * stat.f_frsize + # Required space in bytes (add 10% buffer) + required_bytes = size_gb * 1024 * 1024 * 1024 * 1.1 + + available_gb = available_bytes / (1024 * 1024 * 1024) + required_gb = required_bytes / (1024 * 1024 * 1024) + + logger.info(f"SPACE: Available: {available_gb:.2f} GB, Required: {required_gb:.2f} GB") + + if available_bytes < required_bytes: + raise InsufficientSpaceError( + f"Insufficient disk space. Available: {available_gb:.2f} GB, Required: {required_gb:.2f} GB" + ) + + logger.info(f"SPACE: Sufficient disk space available for {size_gb} GB volume") + + except OSError as e: + logger.error(f"SPACE: Failed to check disk space: {e}") + raise + +def create_volume_file(vm_name, size_gb, logger): + """ + Create a pre-allocated raw disk image for the VM. + + Args: + vm_name: Name of the VM + size_gb: Size of the volume in GB + logger: Logger instance + + Returns: + Path to the created volume file + + Raises: + VolumeCreationError: If volume creation fails + """ + # Define volume path (directory already created in main()) + volume_path = os.path.join(VOLUME_DIR, f"{vm_name}-nsm.img") + + # Check if volume already exists + if os.path.exists(volume_path): + logger.error(f"VOLUME: Volume already exists: {volume_path}") + raise VolumeCreationError(f"Volume already exists: {volume_path}") + + logger.info(f"VOLUME: Creating {size_gb} GB volume at {volume_path}") + + # Create volume using qemu-img with full pre-allocation + try: + cmd = [ + 'qemu-img', 'create', + '-f', 'raw', + '-o', 'preallocation=full', + volume_path, + f"{size_gb}G" + ] + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=True + ) + + logger.info(f"VOLUME: Volume created successfully") + if result.stdout: + logger.debug(f"VOLUME: qemu-img output: {result.stdout.strip()}") + + except subprocess.CalledProcessError as e: + logger.error(f"VOLUME: Failed to create volume: {e}") + if e.stderr: + logger.error(f"VOLUME: qemu-img error: {e.stderr.strip()}") + raise VolumeCreationError(f"Failed to create volume: {e}") + + # Set ownership to qemu:qemu + try: + qemu_uid = pwd.getpwnam('qemu').pw_uid + qemu_gid = grp.getgrnam('qemu').gr_gid + os.chown(volume_path, qemu_uid, qemu_gid) + logger.info(f"VOLUME: Set ownership to qemu:qemu") + except (KeyError, OSError) as e: + logger.error(f"VOLUME: Failed to set ownership: {e}") + raise VolumeCreationError(f"Failed to set ownership: {e}") + + # Set permissions to 640 + try: + os.chmod(volume_path, 0o640) + logger.info(f"VOLUME: Set permissions to 640") + except OSError as e: + logger.error(f"VOLUME: Failed to set permissions: {e}") + raise VolumeCreationError(f"Failed to set permissions: {e}") + + # Verify volume was created + if not os.path.exists(volume_path): + logger.error(f"VOLUME: Volume file not found after creation: {volume_path}") + raise VolumeCreationError(f"Volume file not found after creation: {volume_path}") + + volume_size = os.path.getsize(volume_path) + logger.info(f"VOLUME: Volume created: {volume_path} ({volume_size} bytes)") + + return volume_path + +def attach_volume_to_vm(conn, vm_name, volume_path, logger): + """ + Attach the volume to the VM's libvirt XML configuration. + + Args: + conn: Libvirt connection + vm_name: Name of the VM + volume_path: Path to the volume file + logger: Logger instance + + Raises: + VolumeAttachmentError: If volume attachment fails + """ + try: + # Get the VM domain + dom = conn.lookupByName(vm_name) + + # Get the XML description of the VM + xml_desc = dom.XMLDesc() + root = ET.fromstring(xml_desc) + + # Find the devices element + devices_elem = root.find('./devices') + if devices_elem is None: + logger.error("VM: Could not find element in XML") + raise VolumeAttachmentError("Could not find element in VM XML") + + # Log ALL devices with PCI addresses to find conflicts + logger.info("DISK_DEBUG: Examining ALL devices with PCI addresses") + for device in devices_elem: + address = device.find('./address') + if address is not None and address.get('type') == 'pci': + bus = address.get('bus', 'unknown') + slot = address.get('slot', 'unknown') + function = address.get('function', 'unknown') + logger.info(f"DISK_DEBUG: Device {device.tag}: bus={bus}, slot={slot}, function={function}") + + # Log existing disk configuration for debugging + logger.info("DISK_DEBUG: Examining existing disk configuration") + existing_disks = devices_elem.findall('./disk') + for idx, disk in enumerate(existing_disks): + target = disk.find('./target') + source = disk.find('./source') + address = disk.find('./address') + + dev_name = target.get('dev') if target is not None else 'unknown' + source_file = source.get('file') if source is not None else 'unknown' + + if address is not None: + slot = address.get('slot', 'unknown') + bus = address.get('bus', 'unknown') + logger.info(f"DISK_DEBUG: Disk {idx}: dev={dev_name}, source={source_file}, slot={slot}, bus={bus}") + else: + logger.info(f"DISK_DEBUG: Disk {idx}: dev={dev_name}, source={source_file}, no address element") + + # Check if vdb already exists + for disk in devices_elem.findall('./disk'): + target = disk.find('./target') + if target is not None and target.get('dev') == 'vdb': + logger.error("VM: Device vdb already exists in VM configuration") + raise VolumeAttachmentError("Device vdb already exists in VM configuration") + + logger.info(f"VM: Attaching volume to {vm_name} as /dev/vdb") + + # Create disk element + disk_elem = ET.SubElement(devices_elem, 'disk', attrib={ + 'type': 'file', + 'device': 'disk' + }) + + # Add driver element + ET.SubElement(disk_elem, 'driver', attrib={ + 'name': 'qemu', + 'type': 'raw', + 'cache': 'none', + 'io': 'native' + }) + + # Add source element + ET.SubElement(disk_elem, 'source', attrib={ + 'file': volume_path + }) + + # Add target element + ET.SubElement(disk_elem, 'target', attrib={ + 'dev': 'vdb', + 'bus': 'virtio' + }) + + # Add address element + # Use bus 0x07 with slot 0x00 to ensure NSM volume appears after OS disk (which is on bus 0x04) + # Bus 0x05 is used by memballoon, bus 0x06 is used by rng device + # Libvirt requires slot <= 0 for non-zero buses + # This ensures vda = OS disk, vdb = NSM volume + ET.SubElement(disk_elem, 'address', attrib={ + 'type': 'pci', + 'domain': '0x0000', + 'bus': '0x07', + 'slot': '0x00', + 'function': '0x0' + }) + + logger.info(f"HARDWARE: Added disk configuration for vdb") + + # Log disk ordering after adding new disk + logger.info("DISK_DEBUG: Disk configuration after adding NSM volume") + all_disks = devices_elem.findall('./disk') + for idx, disk in enumerate(all_disks): + target = disk.find('./target') + source = disk.find('./source') + address = disk.find('./address') + + dev_name = target.get('dev') if target is not None else 'unknown' + source_file = source.get('file') if source is not None else 'unknown' + + if address is not None: + slot = address.get('slot', 'unknown') + bus = address.get('bus', 'unknown') + logger.info(f"DISK_DEBUG: Disk {idx}: dev={dev_name}, source={source_file}, slot={slot}, bus={bus}") + else: + logger.info(f"DISK_DEBUG: Disk {idx}: dev={dev_name}, source={source_file}, no address element") + + # Convert XML back to string + new_xml_desc = ET.tostring(root, encoding='unicode') + + # Redefine the VM with the new XML + conn.defineXML(new_xml_desc) + logger.info(f"VM: VM redefined with volume attached") + + except libvirt.libvirtError as e: + logger.error(f"VM: Failed to attach volume: {e}") + raise VolumeAttachmentError(f"Failed to attach volume: {e}") + except Exception as e: + logger.error(f"VM: Failed to attach volume: {e}") + raise VolumeAttachmentError(f"Failed to attach volume: {e}") + +def emit_status_event(vm_name, status): + """ + Emit a deployment status event. + + Args: + vm_name: Name of the VM + status: Status message + """ + try: + subprocess.run([ + 'so-salt-emit-vm-deployment-status-event', + '-v', vm_name, + '-H', HYPERVISOR, + '-s', status + ], check=True) + except subprocess.CalledProcessError as e: + # Don't fail the entire operation if status event fails + pass + +def main(): + """Main function to orchestrate volume creation and attachment.""" + # Set up logging using the so_logging_utils library + string_handler = StringIOHandler() + string_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) + logger = setup_logging( + logger_name='so-kvm-create-volume', + log_file_path='/opt/so/log/hypervisor/so-kvm-create-volume.log', + log_level=logging.INFO, + format_str='%(asctime)s - %(levelname)s - %(message)s' + ) + logger.addHandler(string_handler) + + vm_name = None + + try: + # Parse arguments + args = parse_arguments() + + vm_name = args.vm + size_gb = args.size + start_vm_flag = args.start + + logger.info(f"VOLUME: Starting volume creation for VM '{vm_name}' with size {size_gb} GB") + + # Emit start status event + emit_status_event(vm_name, 'Volume Creation') + + # Ensure volume directory exists before checking disk space + try: + os.makedirs(VOLUME_DIR, mode=0o754, exist_ok=True) + qemu_uid = pwd.getpwnam('qemu').pw_uid + qemu_gid = grp.getgrnam('qemu').gr_gid + os.chown(VOLUME_DIR, qemu_uid, qemu_gid) + logger.debug(f"VOLUME: Ensured volume directory exists: {VOLUME_DIR}") + except Exception as e: + logger.error(f"VOLUME: Failed to create volume directory: {e}") + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + # Check disk space + check_disk_space(size_gb, logger) + + # Connect to libvirt + try: + conn = libvirt.open(None) + logger.info("VM: Connected to libvirt") + except libvirt.libvirtError as e: + logger.error(f"VM: Failed to open connection to libvirt: {e}") + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + # Stop VM if running + dom = stop_vm(conn, vm_name, logger) + + # Create volume file + volume_path = create_volume_file(vm_name, size_gb, logger) + + # Attach volume to VM + attach_volume_to_vm(conn, vm_name, volume_path, logger) + + # Start VM if -S or --start argument is provided + if start_vm_flag: + dom = conn.lookupByName(vm_name) + start_vm(dom, logger) + logger.info(f"VM: VM '{vm_name}' started successfully") + else: + logger.info("VM: Start flag not provided; VM will remain stopped") + + # Close connection + conn.close() + + # Emit success status event + emit_status_event(vm_name, 'Volume Configuration') + + logger.info(f"VOLUME: Volume creation and attachment completed successfully for VM '{vm_name}'") + + except KeyboardInterrupt: + error_msg = "Operation cancelled by user" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + except InsufficientSpaceError as e: + error_msg = f"SPACE: {str(e)}" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + except VolumeCreationError as e: + error_msg = f"VOLUME: {str(e)}" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + except VolumeAttachmentError as e: + error_msg = f"VM: {str(e)}" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + except Exception as e: + error_msg = f"An error occurred: {str(e)}" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + +if __name__ == '__main__': + main() + +{%- else -%} + +echo "Hypervisor nodes are a feature supported only for customers with a valid license. \ + Contact Security Onion Solutions, LLC via our website at https://securityonionsolutions.com \ + for more information about purchasing a license to enable this feature." + +{% endif -%} diff --git a/salt/manager/tools/sbin_jinja/so-salt-cloud b/salt/manager/tools/sbin_jinja/so-salt-cloud index 0c98750f4..c8177e1bc 100644 --- a/salt/manager/tools/sbin_jinja/so-salt-cloud +++ b/salt/manager/tools/sbin_jinja/so-salt-cloud @@ -211,7 +211,7 @@ Exit Codes: Logging: -- Logs are written to /opt/so/log/salt/so-salt-cloud.log. +- Logs are written to /opt/so/log/salt/so-salt-cloud. - Both file and console logging are enabled for real-time monitoring. """ @@ -233,7 +233,7 @@ local = salt.client.LocalClient() logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -file_handler = logging.FileHandler('/opt/so/log/salt/so-salt-cloud.log') +file_handler = logging.FileHandler('/opt/so/log/salt/so-salt-cloud') console_handler = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s %(message)s') @@ -516,23 +516,85 @@ def run_qcow2_modify_hardware_config(profile, vm_name, cpu=None, memory=None, pc target = hv_name + "_*" try: - args_list = [ - 'vm_name=' + vm_name, - 'cpu=' + str(cpu) if cpu else '', - 'memory=' + str(memory) if memory else '', - 'start=' + str(start) - ] - + args_list = ['vm_name=' + vm_name] + + # Only add parameters that are actually specified + if cpu is not None: + args_list.append('cpu=' + str(cpu)) + if memory is not None: + args_list.append('memory=' + str(memory)) + # Add PCI devices if provided if pci_list: # Pass all PCI devices as a comma-separated list args_list.append('pci=' + ','.join(pci_list)) + + # Always add start parameter + args_list.append('start=' + str(start)) result = local.cmd(target, 'qcow2.modify_hardware_config', args_list) format_qcow2_output('Hardware configuration', result) except Exception as e: logger.error(f"An error occurred while running qcow2.modify_hardware_config: {e}") +def run_qcow2_create_volume_config(profile, vm_name, size_gb, cpu=None, memory=None, start=False): + """Create a volume for the VM and optionally configure CPU/memory. + + Args: + profile (str): The cloud profile name + vm_name (str): The name of the VM + size_gb (int): Size of the volume in GB + cpu (int, optional): Number of CPUs to assign + memory (int, optional): Amount of memory in MiB + start (bool): Whether to start the VM after configuration + """ + hv_name = profile.split('_')[1] + target = hv_name + "_*" + + try: + # Step 1: Create the volume + logger.info(f"Creating {size_gb}GB volume for VM {vm_name}") + volume_result = local.cmd( + target, + 'qcow2.create_volume_config', + kwarg={ + 'vm_name': vm_name, + 'size_gb': size_gb, + 'start': False # Don't start yet if we need to configure CPU/memory + } + ) + format_qcow2_output('Volume creation', volume_result) + + # Step 2: Configure CPU and memory if specified + if cpu or memory: + logger.info(f"Configuring hardware for VM {vm_name}: CPU={cpu}, Memory={memory}MiB") + hw_result = local.cmd( + target, + 'qcow2.modify_hardware_config', + kwarg={ + 'vm_name': vm_name, + 'cpu': cpu, + 'memory': memory, + 'start': start + } + ) + format_qcow2_output('Hardware configuration', hw_result) + elif start: + # If no CPU/memory config needed but we need to start the VM + logger.info(f"Starting VM {vm_name}") + start_result = local.cmd( + target, + 'qcow2.modify_hardware_config', + kwarg={ + 'vm_name': vm_name, + 'start': True + } + ) + format_qcow2_output('VM startup', start_result) + + except Exception as e: + logger.error(f"An error occurred while creating volume and configuring hardware: {e}") + def run_qcow2_modify_network_config(profile, vm_name, mode, ip=None, gateway=None, dns=None, search_domain=None): hv_name = profile.split('_')[1] target = hv_name + "_*" @@ -586,6 +648,7 @@ def parse_arguments(): network_group.add_argument('-c', '--cpu', type=int, help='Number of virtual CPUs to assign.') network_group.add_argument('-m', '--memory', type=int, help='Amount of memory to assign in MiB.') network_group.add_argument('-P', '--pci', action='append', help='PCI hardware ID(s) to passthrough to the VM (e.g., 0000:c7:00.0). Can be specified multiple times.') + network_group.add_argument('--nsm-size', type=int, help='Size in GB for NSM volume creation. Can be used with copper/sfp NICs (--pci). Only disk passthrough (without --nsm-size) prevents volume creation.') args = parser.parse_args() @@ -621,6 +684,8 @@ def main(): hw_config.append(f"{args.memory}MB RAM") if args.pci: hw_config.append(f"PCI devices: {', '.join(args.pci)}") + if args.nsm_size: + hw_config.append(f"NSM volume: {args.nsm_size}GB") hw_string = f" and hardware config: {', '.join(hw_config)}" if hw_config else "" logger.info(f"Received request to create VM '{args.vm_name}' using profile '{args.profile}' {network_config}{hw_string}") @@ -643,8 +708,58 @@ def main(): # Step 2: Provision the VM (without starting it) call_salt_cloud(args.profile, args.vm_name) - # Step 3: Modify hardware configuration - run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=args.cpu, memory=args.memory, pci_list=args.pci, start=True) + # Step 3: Determine storage configuration approach + # Priority: disk passthrough > volume creation (but volume can coexist with copper/sfp NICs) + # Note: virtual_node_manager.py already filters out --nsm-size when disk is present, + # so if both --pci and --nsm-size are present here, the PCI devices are copper/sfp NICs + use_passthrough = False + use_volume_creation = False + has_nic_passthrough = False + + if args.nsm_size: + # Validate nsm_size + if args.nsm_size <= 0: + logger.error(f"Invalid nsm_size value: {args.nsm_size}. Must be a positive integer.") + sys.exit(1) + use_volume_creation = True + logger.info(f"Using volume creation with size {args.nsm_size}GB (--nsm-size parameter specified)") + + if args.pci: + # If both nsm_size and PCI are present, PCI devices are copper/sfp NICs + # (virtual_node_manager.py filters out nsm_size when disk is present) + has_nic_passthrough = True + logger.info(f"PCI devices (copper/sfp NICs) will be passed through along with volume: {', '.join(args.pci)}") + elif args.pci: + # Only PCI devices, no nsm_size - could be disk or NICs + # this script is called by virtual_node_manager and that strips any possibility that nsm_size and the disk pci slot is sent to this script + # we might have not specified a disk passthrough or nsm_size, but pass another pci slot and we end up here + use_passthrough = True + logger.info(f"Configuring PCI device passthrough.(--pci parameter specified without --nsm-size)") + + # Step 4: Configure hardware based on storage approach + if use_volume_creation: + # Create volume first + run_qcow2_create_volume_config(args.profile, args.vm_name, size_gb=args.nsm_size, cpu=args.cpu, memory=args.memory, start=False) + + # Then configure NICs if present + if has_nic_passthrough: + logger.info(f"Configuring NIC passthrough for VM {args.vm_name}") + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=None, memory=None, pci_list=args.pci, start=True) + else: + # No NICs, just start the VM + logger.info(f"Starting VM {args.vm_name}") + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=None, memory=None, pci_list=None, start=True) + elif use_passthrough: + # Use existing passthrough logic via modify_hardware_config + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=args.cpu, memory=args.memory, pci_list=args.pci, start=True) + else: + # No storage configuration, just configure CPU/memory if specified + if args.cpu or args.memory: + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=args.cpu, memory=args.memory, pci_list=None, start=True) + else: + # No hardware configuration needed, just start the VM + logger.info(f"No hardware configuration specified, starting VM {args.vm_name}") + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=None, memory=None, pci_list=None, start=True) except KeyboardInterrupt: logger.error("so-salt-cloud: Operation cancelled by user.") diff --git a/salt/salt/engines/master/virtual_node_manager.py b/salt/salt/engines/master/virtual_node_manager.py index 7783e7c35..6d88bd688 100644 --- a/salt/salt/engines/master/virtual_node_manager.py +++ b/salt/salt/engines/master/virtual_node_manager.py @@ -117,7 +117,7 @@ Exit Codes: 4: VM provisioning failure (so-salt-cloud execution failed) Logging: - Log files are written to /opt/so/log/salt/engines/virtual_node_manager.log + Log files are written to /opt/so/log/salt/engines/virtual_node_manager Comprehensive logging includes: - Hardware validation details - PCI ID conversion process @@ -138,23 +138,49 @@ import pwd import grp import salt.config import salt.runner +import salt.client from typing import Dict, List, Optional, Tuple, Any from datetime import datetime, timedelta from threading import Lock -# Get socore uid/gid -SOCORE_UID = pwd.getpwnam('socore').pw_uid -SOCORE_GID = grp.getgrnam('socore').gr_gid - -# Initialize Salt runner once +# Initialize Salt runner and local client once opts = salt.config.master_config('/etc/salt/master') opts['output'] = 'json' runner = salt.runner.RunnerClient(opts) +local = salt.client.LocalClient() + +# Get socore uid/gid for file ownership +SOCORE_UID = pwd.getpwnam('socore').pw_uid +SOCORE_GID = grp.getgrnam('socore').gr_gid # Configure logging log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) +# Prevent propagation to parent loggers to avoid duplicate log entries +log.propagate = False + +# Add file handler for dedicated log file +log_dir = '/opt/so/log/salt' +log_file = os.path.join(log_dir, 'virtual_node_manager') + +# Create log directory if it doesn't exist +os.makedirs(log_dir, exist_ok=True) + +# Create file handler +file_handler = logging.FileHandler(log_file) +file_handler.setLevel(logging.DEBUG) + +# Create formatter +formatter = logging.Formatter( + '%(asctime)s [%(name)s:%(lineno)d][%(levelname)-8s][%(process)d] %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' +) +file_handler.setFormatter(formatter) + +# Add handler to logger +log.addHandler(file_handler) + # Constants DEFAULT_INTERVAL = 30 DEFAULT_BASE_PATH = '/opt/so/saltstack/local/salt/hypervisor/hosts' @@ -203,6 +229,39 @@ def write_json_file(file_path: str, data: Any) -> None: except Exception as e: log.error("Failed to write JSON file %s: %s", file_path, str(e)) raise +def remove_vm_from_vms_file(vms_file_path: str, vm_hostname: str, vm_role: str) -> bool: + """ + Remove a VM entry from the hypervisorVMs file. + + Args: + vms_file_path: Path to the hypervisorVMs file + vm_hostname: Hostname of the VM to remove (without role suffix) + vm_role: Role of the VM + + Returns: + bool: True if VM was removed, False otherwise + """ + try: + # Read current VMs + vms = read_json_file(vms_file_path) + + # Find and remove the VM entry + original_count = len(vms) + vms = [vm for vm in vms if not (vm.get('hostname') == vm_hostname and vm.get('role') == vm_role)] + + if len(vms) < original_count: + # VM was found and removed, write back to file + write_json_file(vms_file_path, vms) + log.info("Removed VM %s_%s from %s", vm_hostname, vm_role, vms_file_path) + return True + else: + log.warning("VM %s_%s not found in %s", vm_hostname, vm_role, vms_file_path) + return False + + except Exception as e: + log.error("Failed to remove VM %s_%s from %s: %s", vm_hostname, vm_role, vms_file_path, str(e)) + return False + def read_yaml_file(file_path: str) -> dict: """Read and parse a YAML file.""" @@ -558,6 +617,13 @@ def mark_vm_failed(vm_file: str, error_code: int, message: str) -> None: # Remove the original file since we'll create an error file os.remove(vm_file) + # Clear hardware resource claims so failed VMs don't consume resources + # Keep nsm_size for reference but clear cpu, memory, sfp, copper + config.pop('cpu', None) + config.pop('memory', None) + config.pop('sfp', None) + config.pop('copper', None) + # Create error file error_file = f"{vm_file}.error" data = { @@ -586,8 +652,16 @@ def mark_invalid_hardware(hypervisor_path: str, vm_name: str, config: dict, erro # Join all messages with proper sentence structure full_message = "Hardware validation failure: " + " ".join(error_messages) + # Clear hardware resource claims so failed VMs don't consume resources + # Keep nsm_size for reference but clear cpu, memory, sfp, copper + config_copy = config.copy() + config_copy.pop('cpu', None) + config_copy.pop('memory', None) + config_copy.pop('sfp', None) + config_copy.pop('copper', None) + data = { - 'config': config, + 'config': config_copy, 'status': 'error', 'timestamp': datetime.now().isoformat(), 'error_details': { @@ -634,6 +708,61 @@ def validate_vrt_license() -> bool: log.error("Error reading license file: %s", str(e)) return False +def check_hypervisor_disk_space(hypervisor: str, size_gb: int) -> Tuple[bool, Optional[str]]: + """ + Check if hypervisor has sufficient disk space for volume creation. + + Args: + hypervisor: Hypervisor hostname + size_gb: Required size in GB + + Returns: + Tuple of (has_space, error_message) + """ + try: + # Get hypervisor minion ID + hypervisor_minion = f"{hypervisor}_hypervisor" + + # Check disk space on /nsm/libvirt/volumes using LocalClient + result = local.cmd( + hypervisor_minion, + 'cmd.run', + ["df -BG /nsm/libvirt/volumes | tail -1 | awk '{print $4}' | sed 's/G//'"] + ) + + if not result or hypervisor_minion not in result: + log.error("Failed to check disk space on hypervisor %s", hypervisor) + return False, "Failed to check disk space on hypervisor" + + available_gb_str = result[hypervisor_minion].strip() + if not available_gb_str: + log.error("Empty disk space response from hypervisor %s", hypervisor) + return False, "Failed to get disk space information" + + try: + available_gb = float(available_gb_str) + except ValueError: + log.error("Invalid disk space value from hypervisor %s: %s", hypervisor, available_gb_str) + return False, f"Invalid disk space value: {available_gb_str}" + + # Add 10% buffer for filesystem overhead + required_gb = size_gb * 1.1 + + log.debug("Hypervisor %s disk space check: Available=%.2fGB, Required=%.2fGB", + hypervisor, available_gb, required_gb) + + if available_gb < required_gb: + error_msg = f"Insufficient disk space on hypervisor {hypervisor}. Available: {available_gb:.2f}GB, Required: {required_gb:.2f}GB (including 10% overhead)" + log.error(error_msg) + return False, error_msg + + log.info("Hypervisor %s has sufficient disk space for %dGB volume", hypervisor, size_gb) + return True, None + + except Exception as e: + log.error("Error checking disk space on hypervisor %s: %s", hypervisor, str(e)) + return False, f"Error checking disk space: {str(e)}" + def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: """ Process a single VM creation request. @@ -666,6 +795,62 @@ def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: except subprocess.CalledProcessError as e: logger.error(f"Failed to emit success status event: {e}") + # Validate nsm_size if present + if 'nsm_size' in vm_config: + try: + size = int(vm_config['nsm_size']) + if size <= 0: + log.error("VM: %s - nsm_size must be a positive integer, got: %d", vm_name, size) + mark_invalid_hardware(hypervisor_path, vm_name, vm_config, + {'nsm_size': 'Invalid nsm_size: must be positive integer'}) + return + if size > 10000: # 10TB reasonable maximum + log.error("VM: %s - nsm_size %dGB exceeds reasonable maximum (10000GB)", vm_name, size) + mark_invalid_hardware(hypervisor_path, vm_name, vm_config, + {'nsm_size': f'Invalid nsm_size: {size}GB exceeds maximum (10000GB)'}) + return + log.debug("VM: %s - nsm_size validated: %dGB", vm_name, size) + except (ValueError, TypeError) as e: + log.error("VM: %s - nsm_size must be a valid integer, got: %s", vm_name, vm_config.get('nsm_size')) + mark_invalid_hardware(hypervisor_path, vm_name, vm_config, + {'nsm_size': 'Invalid nsm_size: must be valid integer'}) + return + + # Check for conflicting storage configurations + has_disk = 'disk' in vm_config and vm_config['disk'] + has_nsm_size = 'nsm_size' in vm_config and vm_config['nsm_size'] + + if has_disk and has_nsm_size: + log.warning("VM: %s - Both disk and nsm_size specified. disk takes precedence, nsm_size will be ignored.", + vm_name) + + # Check disk space BEFORE creating VM if nsm_size is specified + if has_nsm_size and not has_disk: + size_gb = int(vm_config['nsm_size']) + has_space, space_error = check_hypervisor_disk_space(hypervisor, size_gb) + if not has_space: + log.error("VM: %s - %s", vm_name, space_error) + + # Send Hypervisor NSM Disk Full status event + try: + subprocess.run([ + 'so-salt-emit-vm-deployment-status-event', + '-v', vm_name, + '-H', hypervisor, + '-s', 'Hypervisor NSM Disk Full' + ], check=True) + except subprocess.CalledProcessError as e: + log.error("Failed to emit volume create failed event for %s: %s", vm_name, str(e)) + + mark_invalid_hardware( + hypervisor_path, + vm_name, + vm_config, + {'disk_space': f"Insufficient disk space for {size_gb}GB volume: {space_error}"} + ) + return + log.debug("VM: %s - Hypervisor has sufficient space for %dGB volume", vm_name, size_gb) + # Initial hardware validation against model is_valid, errors = validate_hardware_request(model_config, vm_config) if not is_valid: @@ -701,6 +886,11 @@ def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: if 'memory' in vm_config: memory_mib = int(vm_config['memory']) * 1024 cmd.extend(['-m', str(memory_mib)]) + + # Add nsm_size if specified and disk is not specified + if 'nsm_size' in vm_config and vm_config['nsm_size'] and not ('disk' in vm_config and vm_config['disk']): + cmd.extend(['--nsm-size', str(vm_config['nsm_size'])]) + log.debug("VM: %s - Adding nsm_size parameter: %s", vm_name, vm_config['nsm_size']) # Add PCI devices for hw_type in ['disk', 'copper', 'sfp']: @@ -933,12 +1123,21 @@ def process_hypervisor(hypervisor_path: str) -> None: if not nodes_config: log.debug("Empty VMs configuration in %s", vms_file) - # Get existing VMs + # Get existing VMs and track failed VMs separately existing_vms = set() + failed_vms = set() # VMs with .error files for file_path in glob.glob(os.path.join(hypervisor_path, '*_*')): basename = os.path.basename(file_path) - # Skip error and status files - if not basename.endswith('.error') and not basename.endswith('.status'): + # Skip status files + if basename.endswith('.status'): + continue + # Track VMs with .error files separately + if basename.endswith('.error'): + vm_name = basename[:-6] # Remove '.error' suffix + failed_vms.add(vm_name) + existing_vms.add(vm_name) # Also add to existing to prevent recreation + log.debug(f"Found failed VM with .error file: {vm_name}") + else: existing_vms.add(basename) # Process new VMs @@ -955,12 +1154,37 @@ def process_hypervisor(hypervisor_path: str) -> None: # process_vm_creation handles its own locking process_vm_creation(hypervisor_path, vm_config) - # Process VM deletions + # Process VM deletions (but skip failed VMs that only have .error files) vms_to_delete = existing_vms - configured_vms log.debug(f"Existing VMs: {existing_vms}") log.debug(f"Configured VMs: {configured_vms}") + log.debug(f"Failed VMs: {failed_vms}") log.debug(f"VMs to delete: {vms_to_delete}") for vm_name in vms_to_delete: + # Skip deletion if VM only has .error file (no actual VM to delete) + if vm_name in failed_vms: + error_file = os.path.join(hypervisor_path, f"{vm_name}.error") + base_file = os.path.join(hypervisor_path, vm_name) + # Only skip if there's no base file (VM never successfully created) + if not os.path.exists(base_file): + log.info(f"Skipping deletion of failed VM {vm_name} (VM never successfully created)") + # Clean up the .error and .status files since VM is no longer configured + if os.path.exists(error_file): + os.remove(error_file) + log.info(f"Removed .error file for unconfigured VM: {vm_name}") + status_file = os.path.join(hypervisor_path, f"{vm_name}.status") + if os.path.exists(status_file): + os.remove(status_file) + log.info(f"Removed .status file for unconfigured VM: {vm_name}") + + # Trigger hypervisor annotation update to reflect the removal + try: + log.info(f"Triggering hypervisor annotation update after removing failed VM: {vm_name}") + runner.cmd('state.orch', ['orch.dyanno_hypervisor']) + except Exception as e: + log.error(f"Failed to trigger hypervisor annotation update for {vm_name}: {str(e)}") + + continue log.info(f"Initiating deletion process for VM: {vm_name}") process_vm_deletion(hypervisor_path, vm_name) diff --git a/salt/soc/dyanno/hypervisor/hypervisor.yaml b/salt/soc/dyanno/hypervisor/hypervisor.yaml index d13c928ec..143a2f5cb 100644 --- a/salt/soc/dyanno/hypervisor/hypervisor.yaml +++ b/salt/soc/dyanno/hypervisor/hypervisor.yaml @@ -63,18 +63,22 @@ hypervisor: required: true readonly: true forcedType: int + - field: nsm_size + label: "Size of virtual disk to create and use for /nsm, in GB. Only applicable if no pass-through disk." + forcedType: int + readonly: true - field: disk - label: "Disk(s) for passthrough. Free: FREE | Total: TOTAL" + label: "Disk(s) to pass through for /nsm. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' - field: copper - label: "Copper port(s) for passthrough. Free: FREE | Total: TOTAL" + label: "Copper port(s) to pass through. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' - field: sfp - label: "SFP port(s) for passthrough. Free: FREE | Total: TOTAL" + label: "SFP port(s) to pass through. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' diff --git a/salt/soc/dyanno/hypervisor/map.jinja b/salt/soc/dyanno/hypervisor/map.jinja index 4a5107371..cb0810959 100644 --- a/salt/soc/dyanno/hypervisor/map.jinja +++ b/salt/soc/dyanno/hypervisor/map.jinja @@ -3,11 +3,14 @@ {# Define the list of process steps in order (case-sensitive) #} {% set PROCESS_STEPS = [ 'Processing', + 'Hypervisor NSM Disk Full', 'IP Configuration', 'Starting Create', 'Executing Deploy Script', 'Initialize Minion Pillars', 'Created Instance', + 'Volume Creation', + 'Volume Configuration', 'Hardware Configuration', 'Highstate Initiated', 'Destroyed Instance' diff --git a/salt/soc/dyanno/hypervisor/remove_failed_vm.sls b/salt/soc/dyanno/hypervisor/remove_failed_vm.sls new file mode 100644 index 000000000..a47eff595 --- /dev/null +++ b/salt/soc/dyanno/hypervisor/remove_failed_vm.sls @@ -0,0 +1,51 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. +# +# Note: Per the Elastic License 2.0, the second limitation states: +# +# "You may not move, change, disable, or circumvent the license key functionality +# in the software, and you may not remove or obscure any functionality in the +# software that is protected by the license key." + +{% if 'vrt' in salt['pillar.get']('features', []) %} + +{% do salt.log.info('soc/dyanno/hypervisor/remove_failed_vm: Running') %} +{% set vm_name = pillar.get('vm_name') %} +{% set hypervisor = pillar.get('hypervisor') %} + +{% if vm_name and hypervisor %} +{% set vm_parts = vm_name.split('_') %} +{% if vm_parts | length >= 2 %} +{% set vm_role = vm_parts[-1] %} +{% set vm_hostname = '_'.join(vm_parts[:-1]) %} +{% set vms_file = '/opt/so/saltstack/local/salt/hypervisor/hosts/' ~ hypervisor ~ 'VMs' %} + +{% do salt.log.info('soc/dyanno/hypervisor/remove_failed_vm: Removing VM ' ~ vm_name ~ ' from ' ~ vms_file) %} + +remove_vm_{{ vm_name }}_from_vms_file: + module.run: + - name: hypervisor.remove_vm_from_vms_file + - vms_file_path: {{ vms_file }} + - vm_hostname: {{ vm_hostname }} + - vm_role: {{ vm_role }} + +{% else %} +{% do salt.log.error('soc/dyanno/hypervisor/remove_failed_vm: Invalid vm_name format: ' ~ vm_name) %} +{% endif %} +{% else %} +{% do salt.log.error('soc/dyanno/hypervisor/remove_failed_vm: Missing required pillar data (vm_name or hypervisor)') %} +{% endif %} + +{% do salt.log.info('soc/dyanno/hypervisor/remove_failed_vm: Completed') %} + +{% else %} + +{% do salt.log.error( + 'Hypervisor nodes are a feature supported only for customers with a valid license. ' + 'Contact Security Onion Solutions, LLC via our website at https://securityonionsolutions.com ' + 'for more information about purchasing a license to enable this feature.' +) %} + +{% endif %} diff --git a/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja b/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja index 926263b9d..8e49b60b5 100644 --- a/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja +++ b/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja @@ -13,7 +13,6 @@ {%- import_yaml 'soc/dyanno/hypervisor/hypervisor.yaml' as ANNOTATION -%} {%- from 'hypervisor/map.jinja' import HYPERVISORS -%} -{%- from 'soc/dyanno/hypervisor/map.jinja' import PROCESS_STEPS -%} {%- set TEMPLATE = ANNOTATION.hypervisor.hosts.pop('defaultHost') -%} @@ -27,7 +26,6 @@ {%- if baseDomainStatus == 'Initialized' %} {%- if vm_list %} #### Virtual Machines -Status values: {% for step in PROCESS_STEPS %}{{ step }}{% if not loop.last %}, {% endif %}{% endfor %}. "Last Updated" shows when status changed. After "Highstate Initiated", only "Destroyed Instance" updates the timestamp. | Name | Status | CPU Cores | Memory (GB)| Disk | Copper | SFP | Last Updated | |--------------------|--------------------|-----------|------------|------|--------|------|---------------------| @@ -42,7 +40,6 @@ Status values: {% for step in PROCESS_STEPS %}{{ step }}{% if not loop.last %}, {%- endfor %} {%- else %} #### Virtual Machines -Status values: {% for step in PROCESS_STEPS %}{{ step }}{% if not loop.last %}, {% endif %}{% endfor %}. "Last Updated" shows when status changed. After "Highstate Initiated", only "Destroyed Instance" updates the timestamp. No Virtual Machines Found {%- endif %} diff --git a/salt/storage/init.sls b/salt/storage/init.sls index 533366fd0..ab5926bf5 100644 --- a/salt/storage/init.sls +++ b/salt/storage/init.sls @@ -4,10 +4,17 @@ # Elastic License 2.0. -{% set nvme_devices = salt['cmd.shell']("find /dev -name 'nvme*n1' 2>/dev/null") %} +{% set nvme_devices = salt['cmd.shell']("ls /dev/nvme*n1 2>/dev/null || echo ''") %} +{% set virtio_devices = salt['cmd.shell']("test -b /dev/vdb && echo '/dev/vdb' || echo ''") %} + {% if nvme_devices %} include: - - storage.nsm_mount + - storage.nsm_mount_nvme + +{% elif virtio_devices %} + +include: + - storage.nsm_mount_virtio {% endif %} diff --git a/salt/storage/nsm_mount.sls b/salt/storage/nsm_mount_nvme.sls similarity index 87% rename from salt/storage/nsm_mount.sls rename to salt/storage/nsm_mount_nvme.sls index ed9e97c33..a0d317014 100644 --- a/salt/storage/nsm_mount.sls +++ b/salt/storage/nsm_mount_nvme.sls @@ -22,8 +22,8 @@ storage_nsm_mount_logdir: # Install the NSM mount script storage_nsm_mount_script: file.managed: - - name: /usr/sbin/so-nsm-mount - - source: salt://storage/tools/sbin/so-nsm-mount + - name: /usr/sbin/so-nsm-mount-nvme + - source: salt://storage/tools/sbin/so-nsm-mount-nvme - mode: 755 - user: root - group: root @@ -34,7 +34,7 @@ storage_nsm_mount_script: # Execute the mount script if not already mounted storage_nsm_mount_execute: cmd.run: - - name: /usr/sbin/so-nsm-mount + - name: /usr/sbin/so-nsm-mount-nvme - unless: mountpoint -q /nsm - require: - file: storage_nsm_mount_script diff --git a/salt/storage/nsm_mount_virtio.sls b/salt/storage/nsm_mount_virtio.sls new file mode 100644 index 000000000..34ca8a883 --- /dev/null +++ b/salt/storage/nsm_mount_virtio.sls @@ -0,0 +1,39 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Install required packages +storage_nsm_mount_virtio_packages: + pkg.installed: + - pkgs: + - xfsprogs + +# Ensure log directory exists +storage_nsm_mount_virtio_logdir: + file.directory: + - name: /opt/so/log + - makedirs: True + - user: root + - group: root + - mode: 755 + +# Install the NSM mount script +storage_nsm_mount_virtio_script: + file.managed: + - name: /usr/sbin/so-nsm-mount-virtio + - source: salt://storage/tools/sbin/so-nsm-mount-virtio + - mode: 755 + - user: root + - group: root + - require: + - pkg: storage_nsm_mount_virtio_packages + - file: storage_nsm_mount_virtio_logdir + +# Execute the mount script if not already mounted +storage_nsm_mount_virtio_execute: + cmd.run: + - name: /usr/sbin/so-nsm-mount-virtio + - unless: mountpoint -q /nsm + - require: + - file: storage_nsm_mount_virtio_script diff --git a/salt/storage/tools/sbin/so-nsm-mount b/salt/storage/tools/sbin/so-nsm-mount-nvme similarity index 99% rename from salt/storage/tools/sbin/so-nsm-mount rename to salt/storage/tools/sbin/so-nsm-mount-nvme index 24125fc40..fdde0c2e9 100644 --- a/salt/storage/tools/sbin/so-nsm-mount +++ b/salt/storage/tools/sbin/so-nsm-mount-nvme @@ -81,7 +81,7 @@ set -e -LOG_FILE="/opt/so/log/so-nsm-mount.log" +LOG_FILE="/opt/so/log/so-nsm-mount-nvme" VG_NAME="" LV_NAME="nsm" MOUNT_POINT="/nsm" diff --git a/salt/storage/tools/sbin/so-nsm-mount-virtio b/salt/storage/tools/sbin/so-nsm-mount-virtio new file mode 100644 index 000000000..03476e378 --- /dev/null +++ b/salt/storage/tools/sbin/so-nsm-mount-virtio @@ -0,0 +1,171 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Usage: +# so-nsm-mount-virtio +# +# Options: +# None - script automatically configures /dev/vdb +# +# Examples: +# 1. Configure and mount virtio-blk device: +# ```bash +# sudo so-nsm-mount-virtio +# ``` +# +# Notes: +# - Requires root privileges +# - Mounts /dev/vdb as /nsm +# - Creates XFS filesystem if needed +# - Configures persistent mount via /etc/fstab +# - Safe to run multiple times +# +# Description: +# This script automates the configuration and mounting of virtio-blk devices +# as /nsm in Security Onion virtual machines. It performs these steps: +# +# Dependencies: +# - xfsprogs: Required for XFS filesystem operations +# +# 1. Safety Checks: +# - Verifies root privileges +# - Checks if /nsm is already mounted +# - Verifies /dev/vdb exists +# +# 2. Filesystem Creation: +# - Creates XFS filesystem on /dev/vdb if not already formatted +# +# 3. Mount Configuration: +# - Creates /nsm directory if needed +# - Adds entry to /etc/fstab for persistence +# - Mounts the filesystem as /nsm +# +# Exit Codes: +# 0: Success conditions: +# - Device configured and mounted +# - Already properly mounted +# 1: Error conditions: +# - Must be run as root +# - Device /dev/vdb not found +# - Filesystem creation failed +# - Mount operation failed +# +# Logging: +# - All operations logged to /opt/so/log/so-nsm-mount-virtio + +set -e + +LOG_FILE="/opt/so/log/so-nsm-mount-virtio" +DEVICE="/dev/vdb" +MOUNT_POINT="/nsm" + +# Function to log messages +log() { + echo "$(date '+%Y-%m-%d %H:%M:%S') $1" | tee -a "$LOG_FILE" +} + +# Function to log errors +log_error() { + echo "$(date '+%Y-%m-%d %H:%M:%S') ERROR: $1" | tee -a "$LOG_FILE" >&2 +} + +# Function to check if running as root +check_root() { + if [ "$EUID" -ne 0 ]; then + log_error "Must be run as root" + exit 1 + fi +} + +# Main execution +main() { + log "==========================================" + log "Starting virtio-blk NSM mount process" + log "==========================================" + + # Check root privileges + check_root + + # Check if already mounted + if mountpoint -q "$MOUNT_POINT"; then + log "$MOUNT_POINT is already mounted" + log "==========================================" + exit 0 + fi + + # Check if device exists + if [ ! -b "$DEVICE" ]; then + log_error "Device $DEVICE not found" + log "==========================================" + exit 1 + fi + + log "Found device: $DEVICE" + + # Get device size + local size=$(lsblk -dbn -o SIZE "$DEVICE" 2>/dev/null | numfmt --to=iec) + log "Device size: $size" + + # Check if device has filesystem + if ! blkid "$DEVICE" | grep -q 'TYPE="xfs"'; then + log "Creating XFS filesystem on $DEVICE" + if ! mkfs.xfs -f "$DEVICE" 2>&1 | tee -a "$LOG_FILE"; then + log_error "Failed to create filesystem" + log "==========================================" + exit 1 + fi + log "Filesystem created successfully" + else + log "Device already has XFS filesystem" + fi + + # Create mount point + if [ ! -d "$MOUNT_POINT" ]; then + log "Creating mount point $MOUNT_POINT" + mkdir -p "$MOUNT_POINT" + fi + + # Add to fstab if not present + if ! grep -q "$DEVICE.*$MOUNT_POINT" /etc/fstab; then + log "Adding entry to /etc/fstab" + echo "$DEVICE $MOUNT_POINT xfs defaults 0 0" >> /etc/fstab + log "Entry added to /etc/fstab" + else + log "Entry already exists in /etc/fstab" + fi + + # Mount the filesystem + log "Mounting $DEVICE to $MOUNT_POINT" + if mount "$MOUNT_POINT" 2>&1 | tee -a "$LOG_FILE"; then + log "Successfully mounted $DEVICE to $MOUNT_POINT" + + # Verify mount + if mountpoint -q "$MOUNT_POINT"; then + log "Mount verified successfully" + + # Display mount information + log "Mount details:" + df -h "$MOUNT_POINT" | tail -n 1 | tee -a "$LOG_FILE" + else + log_error "Mount verification failed" + log "==========================================" + exit 1 + fi + else + log_error "Failed to mount $DEVICE" + log "==========================================" + exit 1 + fi + + log "==========================================" + log "Virtio-blk NSM mount process completed successfully" + log "==========================================" + exit 0 +} + +# Run main function +main