From b68f561e6fa462ad4c88e1594306c3ea338c7982 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 21 Feb 2025 09:50:01 -0500 Subject: [PATCH] progress and hw tracking for soc hypervisor dynamic annotations --- salt/_modules/qcow2.py | 8 +- .../so-salt-emit-vm-deployment-status-event | 116 +++++++++++++++++ salt/hypervisor/map.jinja | 52 +++++++- .../tools/sbin/so-kvm-modify-hardware | 79 +++++++++++- .../tools/sbin/so-qcow2-modify-network | 61 ++++++++- salt/manager/tools/sbin/so-salt-cloud | 5 +- salt/orch/dyanno_hypervisor.sls | 97 +++++++++++++++ salt/reactor/createEmptyPillar.sls | 24 +++- salt/reactor/vm_status.sls | 117 ++++++++++++++++++ salt/salt/cloud/reactor_config_hypervisor.sls | 10 +- .../engines/master/virtual_node_manager.py | 30 +++-- salt/soc/dyanno/hypervisor/init.sls | 2 + .../hypervisor/soc_hypervisor.yaml.jinja | 26 +++- salt/soc/dyanno/hypervisor/write_status.sls | 69 +++++++++++ salt/top.sls | 1 + salt/vm_status/init.sls | 10 ++ 16 files changed, 674 insertions(+), 33 deletions(-) create mode 100644 salt/common/tools/sbin/so-salt-emit-vm-deployment-status-event create mode 100644 salt/orch/dyanno_hypervisor.sls create mode 100644 salt/reactor/vm_status.sls create mode 100644 salt/soc/dyanno/hypervisor/write_status.sls create mode 100644 salt/vm_status/init.sls diff --git a/salt/_modules/qcow2.py b/salt/_modules/qcow2.py index 81a00ca66..6e71dc459 100644 --- a/salt/_modules/qcow2.py +++ b/salt/_modules/qcow2.py @@ -29,10 +29,10 @@ __virtualname__ = 'qcow2' def __virtual__(): return __virtualname__ -def modify_network_config(image, interface, mode, ip4=None, gw4=None, dns4=None, search4=None): +def modify_network_config(image, interface, mode, vm_name, ip4=None, gw4=None, dns4=None, search4=None): ''' Usage: - salt '*' qcow2.modify_network_config image= interface= mode= [ip4=] [gw4=] [dns4=] [search4=] + salt '*' qcow2.modify_network_config image= interface= mode= vm_name= [ip4=] [gw4=] [dns4=] [search4=] Options: image @@ -41,6 +41,8 @@ def modify_network_config(image, interface, mode, ip4=None, gw4=None, dns4=None, Network interface name to configure (e.g., 'enp1s0') mode Network configuration mode, either 'dhcp4' or 'static4' + vm_name + Full name of the VM (hostname_role) ip4 IPv4 address with CIDR notation (e.g., '192.168.1.10/24') Required when mode='static4' @@ -94,7 +96,7 @@ def modify_network_config(image, interface, mode, ip4=None, gw4=None, dns4=None, - Success/failure status is logged for verification ''' - cmd = ['/usr/sbin/so-qcow2-modify-network', '-I', image, '-i', interface] + cmd = ['/usr/sbin/so-qcow2-modify-network', '-I', image, '-i', interface, '-n', vm_name] if mode.lower() == 'dhcp4': cmd.append('--dhcp4') diff --git a/salt/common/tools/sbin/so-salt-emit-vm-deployment-status-event b/salt/common/tools/sbin/so-salt-emit-vm-deployment-status-event new file mode 100644 index 000000000..454e1ba9b --- /dev/null +++ b/salt/common/tools/sbin/so-salt-emit-vm-deployment-status-event @@ -0,0 +1,116 @@ +#!/opt/saltstack/salt/bin/python3 + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +""" +Script for emitting VM deployment status events to the Salt event bus. + +This script provides functionality to emit status events for VM deployment operations, +used by various Security Onion VM management tools. + +Usage: + so-salt-emit-vm-deployment-status-event -v -H -s + +Arguments: + -v, --vm-name Name of the VM (hostname_role) + -H, --hypervisor Name of the hypervisor + -s, --status Current deployment status of the VM + +Example: + so-salt-emit-vm-deployment-status-event -v sensor1_sensor -H hypervisor1 -s "Creating" +""" + +import sys +import argparse +import logging +import salt.client +from typing import Dict, Any + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) +log = logging.getLogger(__name__) + +def emit_event(vm_name: str, hypervisor: str, status: str) -> bool: + """ + Emit a VM deployment status event to the salt event bus. + + Args: + vm_name: Name of the VM (hostname_role) + hypervisor: Name of the hypervisor + status: Current deployment status of the VM + + Returns: + bool: True if event was sent successfully, False otherwise + + Raises: + ValueError: If status is not a valid deployment status + """ + log.info("Attempting to emit deployment event...") + + try: + caller = salt.client.Caller() + event_data = { + 'vm_name': vm_name, + 'hypervisor': hypervisor, + 'status': status + } + + # Use consistent event tag structure + event_tag = f'soc/dyanno/hypervisor/{status.lower()}' + + ret = caller.cmd( + 'event.send', + event_tag, + event_data + ) + + if not ret: + log.error("Failed to emit VM deployment status event: %s", event_data) + return False + + log.info("Successfully emitted VM deployment status event: %s", event_data) + return True + + except Exception as e: + log.error("Error emitting VM deployment status event: %s", str(e)) + return False + +def parse_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description='Emit VM deployment status events to the Salt event bus.' + ) + parser.add_argument('-v', '--vm-name', required=True, + help='Name of the VM (hostname_role)') + parser.add_argument('-H', '--hypervisor', required=True, + help='Name of the hypervisor') + parser.add_argument('-s', '--status', required=True, + help='Current deployment status of the VM') + return parser.parse_args() + +def main(): + """Main entry point for the script.""" + try: + args = parse_args() + + success = emit_event( + vm_name=args.vm_name, + hypervisor=args.hypervisor, + status=args.status + ) + + if not success: + sys.exit(1) + + except Exception as e: + log.error("Failed to emit status event: %s", str(e)) + sys.exit(1) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/salt/hypervisor/map.jinja b/salt/hypervisor/map.jinja index b187bdfb1..cf323f51d 100644 --- a/salt/hypervisor/map.jinja +++ b/salt/hypervisor/map.jinja @@ -6,9 +6,12 @@ {# Build enhanced HYPERVISORS structure #} {% set HYPERVISORS = {} %} +{% do salt.log.info('salt/hypervisor/map.jinja: NODES content: ' ~ NODES | tojson) %} {% for role, hypervisors in NODES.items() %} + {% do salt.log.info('salt/hypervisor/map.jinja: Processing role: ' ~ role) %} {% do HYPERVISORS.update({role: {}}) %} {% for hypervisor, config in hypervisors.items() %} + {% do salt.log.info('salt/hypervisor/map.jinja: Processing hypervisor: ' ~ hypervisor ~ ' with config: ' ~ config | tojson) %} {# Get model from cached grains using Salt runner #} {% set grains = salt.saltutil.runner('cache.grains', tgt=hypervisor ~ '_*', tgt_type='glob') %} {% set model = '' %} @@ -18,17 +21,54 @@ {% endif %} {% set model_config = DEFAULTS.hypervisor.model.get(model, {}) %} - {# Get VM list and states #} + {# Get VM list from VMs file #} {% set vms = {} %} - {% import_json 'hypervisor/hosts/' ~ hypervisor ~ 'VMs' as vm_list %} + {% set vm_list = [] %} + {% set vm_list_file = 'hypervisor/hosts/' ~ hypervisor ~ 'VMs' %} + {% do salt.log.info('salt/hypervisor/map.jinja: VM list file: ' ~ vm_list_file) %} + {% import_json vm_list_file as vm_list %} + {% if vm_list %} + {% do salt.log.info('salt/hypervisor/map.jinja: VM list content: ' ~ vm_list | tojson) %} + {% else %} + {# we won't get here if the vm_list_file doesn't exist because we will get TemplateNotFound on the import_json #} + {% do salt.log.info('salt/hypervisor/map.jinja: VM list empty: ' ~ vm_list_file) %} + {% endif %} - {# Load state for each VM #} + {# Load status and configuration for each VM #} {% for vm in vm_list %} + {# Get VM details from list entry #} {% set hostname = vm.get('hostname', '') %} {% set role = vm.get('role', '') %} - {% if hostname and role %} - {% import_json 'hypervisor/hosts/' ~ hypervisor ~ '/' ~ hostname ~ '_' ~ role as vm_state %} - {% do vms.update({hostname: vm_state}) %} + {% do salt.log.info('salt/hypervisor/map.jinja: Processing VM - hostname: ' ~ hostname ~ ', role: ' ~ role) %} + + {# Load VM configuration from config file #} + {% set vm_file = 'hypervisor/hosts/' ~ hypervisor ~ '/' ~ hostname ~ '_' ~ role %} + {% do salt.log.info('salt/hypervisor/map.jinja: VM config file: ' ~ vm_file) %} + {% import_json vm_file as vm_state %} + {% if vm_state %} + {% do salt.log.info('salt/hypervisor/map.jinja: VM config content: ' ~ vm_state | tojson) %} + {% set vm_data = {'config': vm_state.config} %} + + {# Load VM status from status file #} + {% set status_file = vm_file ~ '.status' %} + {% do salt.log.info('salt/hypervisor/map.jinja: VM status file: ' ~ status_file) %} + {% import_json status_file as status_data %} + {% if status_data %} + {% do salt.log.info('salt/hypervisor/map.jinja: VM status content: ' ~ status_data | tojson) %} + {% do vm_data.update({'status': status_data}) %} + {% else %} + {% do salt.log.info('salt/hypervisor/map.jinja: Status file empty: ' ~ status_file) %} + {% do vm_data.update({ + 'status': { + 'status': '', + 'details': null, + 'timestamp': '' + } + }) %} + {% endif %} + {% do vms.update({hostname: vm_data}) %} + {% else %} + {% do salt.log.info('salt/hypervisor/map.jinja: Config file empty: ' ~ vm_file) %} {% endif %} {% endfor %} diff --git a/salt/hypervisor/tools/sbin/so-kvm-modify-hardware b/salt/hypervisor/tools/sbin/so-kvm-modify-hardware index a6becfb95..3d80ebaeb 100644 --- a/salt/hypervisor/tools/sbin/so-kvm-modify-hardware +++ b/salt/hypervisor/tools/sbin/so-kvm-modify-hardware @@ -25,7 +25,7 @@ used during VM provisioning and hardware reconfiguration tasks. -v, --vm Name of the virtual machine to modify. -c, --cpu Number of virtual CPUs to assign. -m, --memory Amount of memory to assign in MiB. - -p, --pci PCI hardware ID(s) to passthrough to the VM (e.g., 0000:c7:00.0). Can be specified multiple times. + -p, --pci PCI hardware ID(s) to passthrough to the VM (e.g., 0000:00:1f.2). Can be specified multiple times. Format: domain:bus:device.function -s, --start Start the VM after modification. @@ -124,16 +124,34 @@ The `so-kvm-modify-hardware` script modifies hardware parameters of KVM virtual - Both file and console logging are enabled for real-time monitoring - Log entries include timestamps and severity levels - Detailed error messages are logged for troubleshooting - """ import argparse import sys import libvirt import logging +import socket import xml.etree.ElementTree as ET +from io import StringIO from so_vm_utils import start_vm, stop_vm from so_logging_utils import setup_logging +import subprocess + +# Get hypervisor name from local hostname +HYPERVISOR = socket.gethostname() + +# Custom log handler to capture output +class StringIOHandler(logging.Handler): + def __init__(self): + super().__init__() + self.strio = StringIO() + + def emit(self, record): + msg = self.format(record) + self.strio.write(msg + '\n') + + def get_value(self): + return self.strio.getvalue() def parse_arguments(): parser = argparse.ArgumentParser(description='Modify hardware parameters of a KVM virtual machine.') @@ -226,12 +244,15 @@ def redefine_vm(conn, new_xml_desc, logger): def main(): # Set up logging using the so_logging_utils library + string_handler = StringIOHandler() + string_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) logger = setup_logging( logger_name='so-kvm-modify-hardware', log_file_path='/opt/so/log/hypervisor/so-kvm-modify-hardware.log', log_level=logging.INFO, format_str='%(asctime)s - %(levelname)s - %(message)s' ) + logger.addHandler(string_handler) try: args = parse_arguments() @@ -247,6 +268,15 @@ def main(): conn = libvirt.open(None) except libvirt.libvirtError as e: logger.error(f"Failed to open connection to libvirt: {e}") + try: + subprocess.run([ + 'so-salt-emit-vm-deployment-status-event', + '-v', vm_name, + '-H', HYPERVISOR, + '-s', 'Hardware Configuration Failed' + ], check=True) + except subprocess.CalledProcessError as e: + logger.error(f"Failed to emit failure status event: {e}") sys.exit(1) # Stop VM if running @@ -262,16 +292,57 @@ def main(): if start_vm_flag: dom = conn.lookupByName(vm_name) start_vm(dom, logger) + logger.info(f"VM '{vm_name}' started successfully.") else: logger.info("VM start flag not provided; VM will remain stopped.") # Close connection conn.close() + + # Send success status event + try: + subprocess.run([ + 'so-salt-emit-vm-deployment-status-event', + '-v', vm_name, + '-H', HYPERVISOR, + '-s', 'Hardware Configuration' + ], check=True) + except subprocess.CalledProcessError as e: + logger.error(f"Failed to emit success status event: {e}") + except KeyboardInterrupt: - logger.error("Operation cancelled by user.") + error_msg = "Operation cancelled by user" + logger.error(error_msg) + try: + subprocess.run([ + 'so-salt-emit-vm-deployment-status-event', + '-v', vm_name, + '-H', HYPERVISOR, + '-s', 'Hardware Configuration Failed' + ], check=True) + except subprocess.CalledProcessError as e: + logger.error(f"Failed to emit failure status event: {e}") sys.exit(1) except Exception as e: - logger.error(f"An error occurred: {e}") + error_msg = str(e) + if "Failed to open connection to libvirt" in error_msg: + error_msg = f"Failed to connect to libvirt: {error_msg}" + elif "Failed to redefine VM" in error_msg: + error_msg = f"Failed to apply hardware changes: {error_msg}" + elif "Failed to modify VM XML" in error_msg: + error_msg = f"Failed to update hardware configuration: {error_msg}" + else: + error_msg = f"An error occurred: {error_msg}" + logger.error(error_msg) + try: + subprocess.run([ + 'so-salt-emit-vm-deployment-status-event', + '-v', vm_name, + '-h', HYPERVISOR, + '-s', 'Hardware Configuration Failed' + ], check=True) + except subprocess.CalledProcessError as e: + logger.error(f"Failed to emit failure status event: {e}") sys.exit(1) if __name__ == '__main__': diff --git a/salt/hypervisor/tools/sbin/so-qcow2-modify-network b/salt/hypervisor/tools/sbin/so-qcow2-modify-network index 5f0690542..b2651a9c7 100644 --- a/salt/hypervisor/tools/sbin/so-qcow2-modify-network +++ b/salt/hypervisor/tools/sbin/so-qcow2-modify-network @@ -118,7 +118,6 @@ The `so-qcow2-modify-network` script modifies network configuration within a QCO - Image mount/unmount operations - Validation failures - File access errors - """ import argparse @@ -127,20 +126,41 @@ import re import sys import logging import os +import socket import ipaddress import configparser import uuid from io import StringIO import libvirt from so_logging_utils import setup_logging +import subprocess + +# Get hypervisor name from local hostname +HYPERVISOR = socket.gethostname() + +# Custom log handler to capture output +class StringIOHandler(logging.Handler): + def __init__(self): + super().__init__() + self.strio = StringIO() + + def emit(self, record): + msg = self.format(record) + self.strio.write(msg + '\n') + + def get_value(self): + return self.strio.getvalue() # Set up logging using the so_logging_utils library +string_handler = StringIOHandler() +string_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) logger = setup_logging( logger_name='so-qcow2-modify-network', log_file_path='/opt/so/log/hypervisor/so-qcow2-modify-network.log', log_level=logging.INFO, format_str='%(asctime)s - %(levelname)s - %(message)s' ) +logger.addHandler(string_handler) NETWORK_CONFIG_DIR = "/etc/NetworkManager/system-connections" @@ -403,6 +423,7 @@ def parse_arguments(): parser = argparse.ArgumentParser(description="Modify IPv4 settings in a QCOW2 image for a specified network interface.") parser.add_argument("-I", "--image", required=True, help="Path to the QCOW2 image.") parser.add_argument("-i", "--interface", required=True, help="Network interface to modify (e.g., enp1s0).") + parser.add_argument("-n", "--vm-name", required=True, help="Full name of the VM (hostname_role).") group = parser.add_mutually_exclusive_group(required=True) group.add_argument("--dhcp4", action="store_true", help="Configure interface for DHCP (IPv4).") group.add_argument("--static4", action="store_true", help="Configure interface for static IPv4 settings.") @@ -448,15 +469,47 @@ def main(): modify_network_config(args.image, args.interface, mode, args.ip4, args.gw4, args.dns4, args.search4) logger.info("Network configuration update completed successfully") + # Send success status event + try: + subprocess.run([ + 'so-salt-emit-vm-deployment-status-event', + '-v', args.vm_name, + '-H', HYPERVISOR, + '-s', 'IP Configuration' + ], check=True) + except subprocess.CalledProcessError as e: + logger.error(f"Failed to emit success status event: {e}") + except KeyboardInterrupt: - logger.error("Operation cancelled by user.") + error_msg = "Operation cancelled by user" + logger.error(error_msg) + try: + subprocess.run([ + 'so-salt-emit-vm-deployment-status-event', + '-v', args.vm_name, + '-H', HYPERVISOR, + '-s', 'IP Configuration Failed' + ], check=True) + except subprocess.CalledProcessError as e: + logger.error(f"Failed to emit failure status event: {e}") sys.exit(1) except Exception as e: - if "base domain is running" in str(e): + error_msg = str(e) + if "base domain is running" in error_msg: logger.error("Cannot proceed: Base domain must not be running when modifying network configuration") + error_msg = "Base domain must not be running when modifying network configuration" else: logger.error(f"An error occurred: {e}") + try: + subprocess.run([ + 'so-salt-emit-vm-deployment-status-event', + '-v', args.vm_name, + '-H', HYPERVISOR, + '-s', 'IP Configuration Failed' + ], check=True) + except subprocess.CalledProcessError as e: + logger.error(f"Failed to emit failure status event: {e}") sys.exit(1) -if __name__ == "__main__": +if __name__ == '__main__': main() diff --git a/salt/manager/tools/sbin/so-salt-cloud b/salt/manager/tools/sbin/so-salt-cloud index 1145c1b65..0d5f5c81c 100644 --- a/salt/manager/tools/sbin/so-salt-cloud +++ b/salt/manager/tools/sbin/so-salt-cloud @@ -489,7 +489,7 @@ def run_qcow2_modify_hardware_config(profile, vm_name, cpu=None, memory=None, pc except Exception as e: logger.error(f"An error occurred while running qcow2.modify_hardware_config: {e}") -def run_qcow2_modify_network_config(profile, mode, ip=None, gateway=None, dns=None, search_domain=None): +def run_qcow2_modify_network_config(profile, vm_name, mode, ip=None, gateway=None, dns=None, search_domain=None): hv_name = profile.split('-')[1] target = hv_name + "_*" image = '/nsm/libvirt/images/sool9/sool9.qcow2' @@ -500,6 +500,7 @@ def run_qcow2_modify_network_config(profile, mode, ip=None, gateway=None, dns=No 'image=' + image, 'interface=' + interface, 'mode=' + mode, + 'vm_name=' + vm_name, 'ip4=' + ip if ip else '', 'gw4=' + gateway if gateway else '', 'dns4=' + dns if dns else '', @@ -583,7 +584,7 @@ def main(): mode = "dhcp4" # Default to DHCP if not specified # Step 1: Modify network configuration - run_qcow2_modify_network_config(args.profile, mode, args.ip4, args.gw4, args.dns4, args.search4) + run_qcow2_modify_network_config(args.profile, args.vm_name, mode, args.ip4, args.gw4, args.dns4, args.search4) # Step 2: Provision the VM (without starting it) call_salt_cloud(args.profile, args.vm_name) diff --git a/salt/orch/dyanno_hypervisor.sls b/salt/orch/dyanno_hypervisor.sls new file mode 100644 index 000000000..0e2d91e7c --- /dev/null +++ b/salt/orch/dyanno_hypervisor.sls @@ -0,0 +1,97 @@ +{% do salt.log.info('dyanno_hypervisor_orch: Running') %} +{% set event_data = pillar.get('event_data', {}) %} +{% set event_tag = pillar.get('event_tag', '') %} +{% set timestamp = event_data.get('_stamp') %} +{% do salt.log.debug('dyanno_hypervisor_orch: tag: ' ~ event_tag) %} + + +{# Our custom tag #} +{% if event_tag.startswith('soc/dyanno/hypervisor') %} +{% set status_data = event_data.get('data')%} +{% do salt.log.debug('dyanno_hypervisor_orch: Received data: ' ~ status_data|json|string) %} +{% do salt.log.debug('dyanno_hypervisor_orch: Setting vm_name, hypervisor and status') %} +{% set vm_name = status_data.get('vm_name') %} +{% set hypervisor = status_data.get('hypervisor') %} +{% set status = status_data.get('status') %} +{% set details = status_data.get('details', '') %} +{% do salt.log.info('dyanno_hypervisor_orch: vm_name: ' ~ vm_name ~ ' hypervisor: ' ~ hypervisor ~ ' status: ' ~ status) %} +{% endif %} + +{# salt-cloud tag #} +{% if 'salt/cloud/' in event_tag and event_tag.endswith('/destroyed') %} +{% set status_data = event_data %} +{% do salt.log.debug('dyanno_hypervisor_orch: Setting vm_name, hypervisor and status') %} +{% do salt.log.debug('dyanno_hypervisor_orch: Received data: ' ~ status_data|json|string) %} +{% set vm_name = status_data.get('name') %} +{% set hypervisor = None %} +{% set status = status_data.get('event') %} +{% do salt.log.info('dyanno_hypervisor_orch: vm_name: ' ~ vm_name ~ ' hypervisor: ' ~ hypervisor ~ ' status: ' ~ status) %} +{% endif %} + +{# +{% if event_tag.startswith('soc/dyanno/hypervisor') %} +{% if vm_name and status and hypervisor %} +{% do salt.log.info('dyanno_hypervisor_orch: soc.dyanno.hypervisor.write_status state running - vm_name: ' ~ vm_name ~ ' hypervisor: ' ~ hypervisor ~ ' status: ' ~ status) %} +# Write status file + +write_vm_status: + salt.runner: + - name: state.orchestrate + - mods: soc.dyanno.hypervisor.write_status + - pillar: + vm_name: {{ vm_name }} + hypervisor: {{ hypervisor }} + status_data: + timestamp: {{ timestamp }} + status: {{ status }} + details: {{ details }} + event_tag: {{ event_tag }} + +write_vm_status: + salt.state: + - tgt: 'G@role:so-manager or G@role:so-managersearch or G@role:so-standalone or G@role:so-eval' + - tgt_type: compound + - sls: + - soc.dyanno.hypervisor.write_status + - concurrent: True + - pillar: + vm_name: {{ vm_name }} + hypervisor: {{ hypervisor }} + status_data: + timestamp: {{ timestamp }} + status: {{ status }} + details: {{ details }} + event_tag: {{ event_tag }} + + +{% else %} +{% do salt.log.error('dyanno_hypervisor_orch: Missing required fields - vm_name: ' ~ vm_name ~ ' hypervisor: ' ~ hypervisor ~ ' status: ' ~ status) %} +{% endif %} +{% endif %} +#} + +{# +update_hypervisor_status: + salt.runner: + - name: state.orchestrate + - mods: soc.dyanno.hypervisor +{% if event_tag.startswith('soc/dyanno/hypervisor') %} + - require: + - salt: write_vm_status +{% endif %} +#} + +# Update hypervisor status +update_hypervisor_annotation: + salt.state: + - tgt: 'G@role:so-manager or G@role:so-managersearch or G@role:so-standalone or G@role:so-eval' + - tgt_type: compound + - sls: + - soc.dyanno.hypervisor + - concurrent: True +{#% if event_tag.startswith('soc/dyanno/hypervisor') %} + - require: + - salt: write_vm_status +{% endif %#} + +{% do salt.log.info('dyanno_hypervisor_orch: Completed') %} \ No newline at end of file diff --git a/salt/reactor/createEmptyPillar.sls b/salt/reactor/createEmptyPillar.sls index a182e1338..dccf25f89 100644 --- a/salt/reactor/createEmptyPillar.sls +++ b/salt/reactor/createEmptyPillar.sls @@ -7,14 +7,32 @@ import logging import os +import pwd +import grp def run(): vm_name = data['kwargs']['name'] logging.error("createEmptyPillar reactor: vm_name: %s" % vm_name) pillar_root = '/opt/so/saltstack/local/pillar/minions/' pillar_files = ['adv_' + vm_name + '.sls', vm_name + '.sls'] - for f in pillar_files: - if not os.path.exists(pillar_root + f): - os.mknod(pillar_root + f) + + try: + # Get socore user and group IDs + socore_uid = pwd.getpwnam('socore').pw_uid + socore_gid = grp.getgrnam('socore').gr_gid + + for f in pillar_files: + full_path = pillar_root + f + if not os.path.exists(full_path): + # Create empty file + os.mknod(full_path) + # Set ownership to socore:socore + os.chown(full_path, socore_uid, socore_gid) + # Set mode to 644 (rw-r--r--) + os.chmod(full_path, 0o644) + logging.error("createEmptyPillar reactor: created %s with socore:socore ownership and mode 644" % f) + + except (KeyError, OSError) as e: + logging.error("createEmptyPillar reactor: Error setting ownership/permissions: %s" % str(e)) return {} diff --git a/salt/reactor/vm_status.sls b/salt/reactor/vm_status.sls new file mode 100644 index 000000000..5dc0c0f6d --- /dev/null +++ b/salt/reactor/vm_status.sls @@ -0,0 +1,117 @@ +{% do salt.log.debug('vm_status_reactor: Running') %} +{% do salt.log.debug('vm_status_reactor: tag: ' ~ tag | string) %} + +{# Remove all the nasty characters that exist in this data #} +{% if tag.startswith('salt/cloud/') and tag.endswith('/deploying') %} +{% set data = { + "_stamp": data._stamp, + "event": data.event, + "kwargs": { + "cloud_grains": data.kwargs.cloud_grains + } +} %} +{% endif %} + +{% do salt.log.debug('vm_status_reactor: Received data: ' ~ data|json|string) %} + +{# +update_hypervisor: + runner.state.orchestrate: + - args: + - mods: orch.dyanno_hypervisor + - pillar: + event_tag: {{ tag }} + event_data: {{ data }} +#} + +{# Our custom tag #} +{% if tag.startswith('soc/dyanno/hypervisor') %} +{% set status_data = data.get('data')%} +{% do salt.log.debug('vm_status_reactor: Received data: ' ~ status_data|json|string) %} +{% do salt.log.debug('vm_status_reactor: Setting vm_name, hypervisor and status') %} +{% set vm_name = status_data.get('vm_name') %} +{% set hypervisor = status_data.get('hypervisor') %} +{% set status = status_data.get('status') %} +{% set details = status_data.get('details', '') %} +{% endif %} + +{# setup/so-minion tag #} +{% if tag == ('setup/so-minion') %} +{% set status_data = data.get('data')%} +{% do salt.log.debug('vm_status_reactor: Received data: ' ~ status_data|json|string) %} +{% do salt.log.debug('vm_status_reactor: Setting vm_name, hypervisor and status') %} +{% set vm_name = data.get('id') %} + +{% set grains = salt.saltutil.runner('cache.grains', tgt=vm_name).get(vm_name) %} +{% if grains %} +{% do salt.log.debug('vm_status_reactor: Got cache.grains ' ~ grains|string) %} +{% if grains.get('salt-cloud').get('profile') %} +{% do salt.log.debug('vm_status_reactor: Found salt-cloud:profile grain: ' ~ grains.get('salt-cloud').get('profile')|string) %} +{% set hypervisor = grains.get('salt-cloud').get('profile').split('-')[1] %} +{% do salt.log.debug('vm_status_reactor: Got hypervisor: ' ~ hypervisor) %} +{% endif %} +{% else %} +{% do salt.log.debug('vm_status_reactor: Did not get cache.grains.') %} +{% endif %} + +{% set hypervisor = hypervisor %} +{% set status = 'Initialize Minion Pillars' %} +{% set details = status_data.get('details', '') %} +{% endif %} + +{# salt-cloud tag #} +{% if tag.startswith('salt/cloud/') and (tag.endswith('/creating') or tag.endswith('/deploying') or tag.endswith('/created') or tag.endswith('/destroyed')) %} +{% do salt.log.debug('vm_status_reactor: Received data: ' ~ data|json|string) %} +{% do salt.log.debug('vm_status_reactor: Setting vm_name, hypervisor and status') %} +{% set vm_name = tag.split('/')[2] %} +{% do salt.log.debug('vm_status_reactor: Got vm_name from tag: ' ~ vm_name) %} + +{% if tag.endswith('/deploying') %} +{% set hypervisor = data.get('kwargs').get('cloud_grains').get('profile').split('-')[1] %} +{% endif %} + +{% if data.get('profile', False) %} +{% do salt.log.debug('vm_status_reactor: Did not get cache.grains.') %} +{% set hypervisor = data.profile.split('-')[1] %} +{% do salt.log.debug('vm_status_reactor: Got hypervisor from data: ' ~ hypervisor) %} +{% else %} +{% set grains = salt.saltutil.runner('cache.grains', tgt=vm_name).get(vm_name) %} +{% if grains %} +{% do salt.log.debug('vm_status_reactor: Got cache.grains: ' ~ grains|string) %} +{% if grains.get('salt-cloud').get('profile') %} +{% do salt.log.debug('vm_status_reactor: Found salt-cloud:profile grain: ' ~ grains.get('salt-cloud').get('profile')|string) %} +{% set hypervisor = grains.get('salt-cloud').get('profile').split('-')[1] %} +{% do salt.log.debug('vm_status_reactor: Got hypervisor: ' ~ hypervisor) %} +{% endif %} +{% endif %} +{% endif %} + +{% set status = data.get('event').title() %} +{% set details = data.get('details', '') %} +{% endif %} + +{% do salt.log.info('vm_status_reactor: vm_name: ' ~ vm_name ~ ' hypervisor: ' ~ hypervisor ~ ' status: ' ~ status) %} + +{% set timestamp = data.get('_stamp') %} +write_vm_status: + runner.state.orchestrate: + - args: + - mods: soc.dyanno.hypervisor.write_status + - pillar: + vm_name: {{ vm_name }} + hypervisor: {{ hypervisor }} + status_data: + timestamp: {{ timestamp }} + status: {{ status }} + details: {{ details }} + event_tag: {{ tag }} + +update_hypervisor: + runner.state.orchestrate: + - args: + - mods: orch.dyanno_hypervisor + - pillar: + event_tag: {{ tag }} + event_data: {{ data }} + +{% do salt.log.debug('vm_status_reactor: Completed') %} \ No newline at end of file diff --git a/salt/salt/cloud/reactor_config_hypervisor.sls b/salt/salt/cloud/reactor_config_hypervisor.sls index 433e01f31..fcf1a5dfe 100644 --- a/salt/salt/cloud/reactor_config_hypervisor.sls +++ b/salt/salt/cloud/reactor_config_hypervisor.sls @@ -19,13 +19,21 @@ reactor_config_hypervisor: reactor: - 'salt/key': - salt://reactor/check_hypervisor.sls + - 'salt/cloud/*/creating': + - /opt/so/saltstack/default/salt/reactor/vm_status.sls - 'salt/cloud/*/deploying': - /opt/so/saltstack/default/salt/reactor/createEmptyPillar.sls + - /opt/so/saltstack/default/salt/reactor/vm_status.sls - 'setup/so-minion': - /opt/so/saltstack/default/salt/reactor/sominion_setup.sls + - /opt/so/saltstack/default/salt/reactor/vm_status.sls + - 'salt/cloud/*/created': + - /opt/so/saltstack/default/salt/reactor/vm_status.sls + - 'soc/dyanno/hypervisor/*': + - /opt/so/saltstack/default/salt/reactor/vm_status.sls - 'salt/cloud/*/destroyed': - - /opt/so/saltstack/default/salt/reactor/virtReleaseHardware.sls - /opt/so/saltstack/default/salt/reactor/deleteKey.sls + - /opt/so/saltstack/default/salt/reactor/vm_status.sls - user: root - group: root - mode: 644 diff --git a/salt/salt/engines/master/virtual_node_manager.py b/salt/salt/engines/master/virtual_node_manager.py index ef04032ce..9e59b3d9a 100644 --- a/salt/salt/engines/master/virtual_node_manager.py +++ b/salt/salt/engines/master/virtual_node_manager.py @@ -462,7 +462,8 @@ def create_vm_tracking_file(hypervisor_path: str, vm_name: str, config: dict) -> data = { 'config': config, - 'status': 'creating' + 'status': 'creating', + 'timestamp': datetime.now().isoformat() } # Write file and set ownership write_json_file(file_path, data) @@ -487,9 +488,10 @@ def mark_vm_failed(vm_file: str, error_code: int, message: str) -> None: data = { 'config': config, 'status': 'error', + 'timestamp': datetime.now().isoformat(), 'error_details': { - 'message': message, - 'timestamp': datetime.now().isoformat() + 'code': error_code, + 'message': message } } write_json_file(error_file, data) @@ -512,9 +514,10 @@ def mark_invalid_hardware(hypervisor_path: str, vm_name: str, config: dict, erro data = { 'config': config, 'status': 'error', + 'timestamp': datetime.now().isoformat(), 'error_details': { - 'message': full_message, - 'timestamp': datetime.now().isoformat() + 'code': 3, # Hardware validation failure code + 'message': full_message } } write_json_file(file_path, data) @@ -577,6 +580,17 @@ def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: model = get_hypervisor_model(hypervisor) model_config = load_hardware_defaults(model) + # Send Processing status event + try: + subprocess.run([ + 'so-salt-emit-vm-deployment-status-event', + '-v', vm_name, + '-H', hypervisor, + '-s', 'Processing' + ], check=True) + except subprocess.CalledProcessError as e: + logger.error(f"Failed to emit success status event: {e}") + # Initial hardware validation against model is_valid, errors = validate_hardware_request(model_config, vm_config) if not is_valid: @@ -626,10 +640,11 @@ def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: # Execute command result = subprocess.run(cmd, capture_output=True, text=True, check=True) - # Update tracking file status + # Update tracking file status with timestamp tracking_file = os.path.join(hypervisor_path, vm_name) data = read_json_file(tracking_file) data['status'] = 'running' + data['timestamp'] = datetime.now().isoformat() write_json_file(tracking_file, data) except subprocess.CalledProcessError as e: @@ -721,7 +736,8 @@ def process_hypervisor(hypervisor_path: str) -> None: existing_vms = set() for file_path in glob.glob(os.path.join(hypervisor_path, '*_*')): basename = os.path.basename(file_path) - if not basename.endswith('.error'): + # Skip error and status files + if not basename.endswith('.error') and not basename.endswith('.status'): existing_vms.add(basename) # Process new VMs diff --git a/salt/soc/dyanno/hypervisor/init.sls b/salt/soc/dyanno/hypervisor/init.sls index 5645a8f63..5914cdf5b 100644 --- a/salt/soc/dyanno/hypervisor/init.sls +++ b/salt/soc/dyanno/hypervisor/init.sls @@ -5,6 +5,8 @@ hypervisor_annotation: - name: /opt/so/saltstack/default/salt/hypervisor/soc_hypervisor.yaml - source: salt://soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja - template: jinja + - user: socore + - group: socore - defaults: HYPERVISORS: {{ HYPERVISORS }} diff --git a/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja b/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja index f5ce34563..f24ca69da 100644 --- a/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja +++ b/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja @@ -3,8 +3,27 @@ {%- set TEMPLATE = ANNOTATION.hypervisor.hosts.pop('defaultHost') -%} -{%- macro update_description(description, cpu_free, mem_free, disk_free, copper_free, sfp_free) -%} -{{- description -}} +{%- macro update_description(description, cpu_free, mem_free, disk_free, copper_free, sfp_free, vm_list) -%} +{{- description }} + +Resource Summary: +- CPU: {{ cpu_free }} cores available +- Memory: {{ mem_free }}GB available +- Disk Slots: {{ disk_free if disk_free else 'None' }} available +- Copper Ports: {{ copper_free if copper_free else 'None' }} available +- SFP Ports: {{ sfp_free if sfp_free else 'None' }} available + +{%- if vm_list %} +Virtual Machines: +{%- for hostname, vm_data in vm_list.items() %} +- {{ hostname }}: + Status: {{ vm_data.get('status', {}).get('status', 'Unknown') }} + Details: {{ vm_data.get('status', {}).get('details', 'No details available') }} + Last Updated: {{ vm_data.get('status', {}).get('timestamp', 'Never') }} +{%- endfor %} +{%- else %} +No Virtual Machines Found +{%- endif %} {%- endmacro -%} {%- macro update_label(label, total, free) -%} @@ -91,7 +110,8 @@ mem_free, disk_free, copper_free, - sfp_free + sfp_free, + vms ) }) -%} {%- do ANNOTATION.hypervisor.hosts.update({hypervisor ~ 'VMs': updated_template}) -%} diff --git a/salt/soc/dyanno/hypervisor/write_status.sls b/salt/soc/dyanno/hypervisor/write_status.sls new file mode 100644 index 000000000..96dcf7e9f --- /dev/null +++ b/salt/soc/dyanno/hypervisor/write_status.sls @@ -0,0 +1,69 @@ +{% do salt.log.info('soc/dyanno/hypervisor/write_status: Running') %} +{% set vm_name = pillar.get('vm_name') %} +{% set hypervisor = pillar.get('hypervisor') %} +{% set status_data = pillar.get('status_data', {}) %} +{% set event_tag = pillar.get('event_tag') %} +{% do salt.log.debug('soc/dyanno/hypervisor/write_status: tag: ' ~ event_tag) %} +{% set base_path = '/opt/so/saltstack/local/salt/hypervisor/hosts' %} +{% set status_dir = base_path ~ '/' ~ hypervisor %} +{% set status_file = status_dir ~ '/' ~ vm_name ~ '.status' %} + +# Define the list of process steps in order (case-sensitive) +{% set process_steps = ['Processing', 'IP Configuration', 'Starting Create', 'Executing Deploy Script', 'Initialize Minion Pillars', 'Created Instance', 'Hardware Configuration', 'Highstate Triggered', 'Destroyed Instance'] %} +{% set new_index = process_steps.index(status_data.get('status')) %} +{% do salt.log.debug('soc/dyanno/hypervisor/write_status: new_index: ' ~ new_index|string) %} + +# Function to read and parse current JSON status file +{% macro get_current_status(status_file) %} +{% do salt.log.debug('soc/dyanno/hypervisor/write_status: getting current status from file: ' ~ status_file) %} + +{% set rel_path_status_file = 'hypervisor/hosts' ~ '/' ~ hypervisor ~ '/' ~ vm_name ~ '.status' %} +{# If the status file doesn't exist, then we are just now Processing, so return -1 #} +{% if salt['file.file_exists'](status_file)%} +{% import_json rel_path_status_file as current_status %} +{% do salt.log.debug('soc/dyanno/hypervisor/write_status: current status: ' ~ current_status) %} +{% do salt.log.debug('soc/dyanno/hypervisor/write_status: current status: ' ~ current_status.get('status')) %} +{% if current_status.get('status') in process_steps %} +{% set current_index = process_steps.index(current_status.get('status')) %} +{% do salt.log.debug('soc/dyanno/hypervisor/write_status: current_index: ' ~ current_index|string) %} +{%- set return_value = current_index -%} +{% else %} +{%- set return_value = -1 -%} +{% endif %} +{% else %} +{% set return_value = -1 %} +{% endif %} +{{- return_value -}} +{% endmacro %} + +{% set current_index = get_current_status(status_file)|int %} +{% do salt.log.debug('soc/dyanno/hypervisor/write_status: ' ~ status_file ~ ' current status index: ' ~ current_index|string) %} + +ensure_status_dir: + file.directory: + - name: {{ status_dir }} + - user: 939 + - group: 939 + - mode: 755 + - makedirs: True + + +{# Some of the status updates trigger within a second of each other can can cause, for example, IP Configuration orchestration to process before the Processing #} +{# This check has been put in place to ensure a status sooner in the process can't overwrite this file if a status later in the process wrote to it first. #} +{# The final step is Destroyed, so we allow Processing to overwrite that incase someone creates a new VM with same name that was previously destroyed. #} +{% if new_index > current_index or current_index == process_steps | length - 1 %} +write_status_file: + file.serialize: + - name: {{ status_file }} + - dataset: {{ status_data|json }} + - formatter: json + - user: 939 + - group: 939 + - mode: 600 + - indent: 2 + - require: + - file: ensure_status_dir +{% else %} +{% do salt.log.debug('soc/dyanno/hypervisor/write_status: File not written. ' ~ process_steps[new_index] ~ ' cannot overwrite ' ~ process_steps[current_index] ~ '.' ) %} +{% endif %} +{% do salt.log.info('soc/dyanno/hypervisor/write_status: Completed') %} diff --git a/salt/top.sls b/salt/top.sls index 82f074626..0d22bd782 100644 --- a/salt/top.sls +++ b/salt/top.sls @@ -11,6 +11,7 @@ base: 'salt-cloud:driver:libvirt': - match: grain - storage + - vm_status '*': - cron.running diff --git a/salt/vm_status/init.sls b/salt/vm_status/init.sls new file mode 100644 index 000000000..ba2b21968 --- /dev/null +++ b/salt/vm_status/init.sls @@ -0,0 +1,10 @@ +# Send highstate trigger event for VM deployment status tracking +# so-salt-emit-vm-deployment-status sets event_tag = f'soc/dyanno/hypervisor/{status.lower()}' +vm_highstate_trigger: + event.send: + - name: soc/dyanno/hypervisor/highstate triggered + - data: + status: Highstate Triggered + vm_name: {{ grains.id }} + hypervisor: {{ salt['grains.get']('salt-cloud:profile', '').split('-')[1] }} + - order: 1 # Ensure this runs early in the highstate process \ No newline at end of file