mirror of
https://github.com/Security-Onion-Solutions/securityonion.git
synced 2025-12-06 09:12:45 +01:00
allow for destroyed vms to be displayed in ui. VNM cleanup destroyed status files after 48h
This commit is contained in:
@@ -85,6 +85,60 @@
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
{# Find and add destroyed VMs from status files #}
|
||||
{% set processed_vms = [] %}
|
||||
{% for vm_name, vm_data in vms.items() %}
|
||||
{% do processed_vms.append(vm_name) %}
|
||||
{% endfor %}
|
||||
|
||||
{# Find all status files for this hypervisor #}
|
||||
{% set relative_path = 'hypervisor/hosts/' ~ hypervisor %}
|
||||
{% set absolute_path = '/opt/so/saltstack/local/salt/' ~ relative_path %}
|
||||
{% do salt.log.info('salt/hypervisor/map.jinja: Scanning for status files in: ' ~ absolute_path) %}
|
||||
|
||||
{# Try to find status files using file.find with absolute path #}
|
||||
{% set status_files = salt['file.find'](absolute_path, name='*_*.status', type='f') %}
|
||||
{% do salt.log.info('salt/hypervisor/map.jinja: Found status files: ' ~ status_files | tojson) %}
|
||||
|
||||
{# Convert absolute paths back to relative paths for processing #}
|
||||
{% set relative_status_files = [] %}
|
||||
{% for status_file in status_files %}
|
||||
{% set relative_file = status_file | replace('/opt/so/saltstack/local/salt/', '') %}
|
||||
{% do relative_status_files.append(relative_file) %}
|
||||
{% endfor %}
|
||||
{% set status_files = relative_status_files %}
|
||||
|
||||
{% do salt.log.info('salt/hypervisor/map.jinja: Converted to relative paths: ' ~ status_files | tojson) %}
|
||||
|
||||
{% for status_file in status_files %}
|
||||
{# Extract the VM name from the filename #}
|
||||
{% set basename = status_file.split('/')[-1] %}
|
||||
{% set vm_name = basename.replace('.status', '') %}
|
||||
{% set hostname = vm_name.split('_')[0] %}
|
||||
|
||||
{# Skip already processed VMs #}
|
||||
{% if hostname in processed_vms %}
|
||||
{% continue %}
|
||||
{% endif %}
|
||||
|
||||
{# Read the status file #}
|
||||
{% do salt.log.info('salt/hypervisor/map.jinja: Processing potential destroyed VM status file: ' ~ status_file) %}
|
||||
{% import_json status_file as status_data %}
|
||||
|
||||
{# Only process files with "Destroyed Instance" status #}
|
||||
{% if status_data and status_data.status == 'Destroyed Instance' %}
|
||||
{% do salt.log.info('salt/hypervisor/map.jinja: Found VM with Destroyed Instance status: ' ~ hostname) %}
|
||||
|
||||
{# Add to vms with minimal config #}
|
||||
{% do vms.update({
|
||||
hostname: {
|
||||
'status': status_data,
|
||||
'config': {}
|
||||
}
|
||||
}) %}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
{# Merge node config with model capabilities and VM states #}
|
||||
{% do HYPERVISORS[role].update({
|
||||
hypervisor: {
|
||||
|
||||
@@ -73,7 +73,7 @@ Notes:
|
||||
Description:
|
||||
The engine operates in the following phases:
|
||||
|
||||
1. Engine Lock Acquisition
|
||||
1. Lock Acquisition
|
||||
- Acquires single engine-wide lock
|
||||
- Prevents multiple instances from running
|
||||
- Lock remains until clean shutdown or error
|
||||
@@ -138,7 +138,7 @@ import grp
|
||||
import salt.config
|
||||
import salt.runner
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from threading import Lock
|
||||
|
||||
# Get socore uid/gid
|
||||
@@ -160,6 +160,8 @@ DEFAULT_BASE_PATH = '/opt/so/saltstack/local/salt/hypervisor/hosts'
|
||||
VALID_ROLES = ['sensor', 'searchnode', 'idh', 'receiver', 'heavynode', 'fleet']
|
||||
LICENSE_PATH = '/opt/so/saltstack/local/pillar/soc/license.sls'
|
||||
DEFAULTS_PATH = '/opt/so/saltstack/default/salt/hypervisor/defaults.yaml'
|
||||
# Define the retention period for destroyed VMs (in hours)
|
||||
DESTROYED_VM_RETENTION_HOURS = 48
|
||||
|
||||
# Single engine-wide lock for virtual node manager
|
||||
engine_lock = Lock()
|
||||
@@ -667,6 +669,50 @@ def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None:
|
||||
mark_vm_failed(os.path.join(hypervisor_path, f"{vm_name}_failed"), 4, error_msg)
|
||||
raise
|
||||
|
||||
def cleanup_destroyed_vm_status_files(hypervisor_path: str) -> None:
|
||||
"""
|
||||
Clean up status files for destroyed VMs that are older than the retention period.
|
||||
|
||||
Args:
|
||||
hypervisor_path: Path to the hypervisor directory
|
||||
"""
|
||||
try:
|
||||
log.debug(f"Using destroyed VM retention period of {DESTROYED_VM_RETENTION_HOURS} hours")
|
||||
|
||||
# Calculate the retention cutoff time
|
||||
cutoff_time = datetime.now() - timedelta(hours=DESTROYED_VM_RETENTION_HOURS)
|
||||
|
||||
# Find all status files for destroyed VMs
|
||||
status_files = glob.glob(os.path.join(hypervisor_path, '*_*.status'))
|
||||
log.debug(f"Found {len(status_files)} status files to check for expired destroyed VMs")
|
||||
|
||||
for status_file in status_files:
|
||||
try:
|
||||
# Read the status file
|
||||
status_data = read_json_file(status_file)
|
||||
|
||||
# Check if this is a destroyed VM
|
||||
if status_data.get('status') == 'Destroyed Instance':
|
||||
# Parse the timestamp
|
||||
timestamp_str = status_data.get('timestamp', '')
|
||||
if timestamp_str:
|
||||
timestamp = datetime.fromisoformat(timestamp_str)
|
||||
vm_name = os.path.basename(status_file).replace('.status', '')
|
||||
age_hours = (datetime.now() - timestamp).total_seconds() / 3600
|
||||
|
||||
# If older than retention period, delete the file
|
||||
if timestamp < cutoff_time:
|
||||
log.info(f"Removing expired status file for VM {vm_name} (age: {age_hours:.1f} hours > retention: {DESTROYED_VM_RETENTION_HOURS} hours)")
|
||||
os.remove(status_file)
|
||||
else:
|
||||
log.debug(f"Status file for VM {vm_name} (age: {age_hours:.1f} hours < retention: {DESTROYED_VM_RETENTION_HOURS} hours)")
|
||||
except Exception as e:
|
||||
log.error(f"Error processing status file {status_file}: {e}")
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Failed to clean up destroyed VM status files: {e}")
|
||||
|
||||
|
||||
def process_vm_deletion(hypervisor_path: str, vm_name: str) -> None:
|
||||
"""
|
||||
Process a single VM deletion request.
|
||||
@@ -731,6 +777,9 @@ def process_hypervisor(hypervisor_path: str) -> None:
|
||||
vms_file = os.path.join(os.path.dirname(hypervisor_path), f"{hypervisor}VMs")
|
||||
if not os.path.exists(vms_file):
|
||||
log.debug("No VMs file found at %s", vms_file)
|
||||
|
||||
# Even if no VMs file exists, we should still clean up any expired status files
|
||||
cleanup_destroyed_vm_status_files(hypervisor_path)
|
||||
return
|
||||
|
||||
nodes_config = read_json_file(vms_file)
|
||||
@@ -768,6 +817,9 @@ def process_hypervisor(hypervisor_path: str) -> None:
|
||||
log.info(f"Initiating deletion process for VM: {vm_name}")
|
||||
process_vm_deletion(hypervisor_path, vm_name)
|
||||
|
||||
# Clean up expired status files for destroyed VMs
|
||||
cleanup_destroyed_vm_status_files(hypervisor_path)
|
||||
|
||||
except Exception as e:
|
||||
log.error("Failed to process hypervisor %s: %s", hypervisor_path, str(e))
|
||||
raise
|
||||
@@ -797,12 +849,12 @@ def start(interval: int = DEFAULT_INTERVAL,
|
||||
if not validate_hvn_license():
|
||||
return
|
||||
|
||||
# Attempt to acquire engine lock
|
||||
# Attempt to acquire lock
|
||||
if not engine_lock.acquire(blocking=False):
|
||||
log.error("Another virtual node manager is already running")
|
||||
return
|
||||
|
||||
log.debug("Virtual node manager acquired engine lock")
|
||||
log.debug("Virtual node manager acquired lock")
|
||||
|
||||
try:
|
||||
# Process each hypervisor directory
|
||||
@@ -811,7 +863,7 @@ def start(interval: int = DEFAULT_INTERVAL,
|
||||
process_hypervisor(hypervisor_path)
|
||||
|
||||
# Clean shutdown - release lock
|
||||
log.debug("Virtual node manager releasing engine lock")
|
||||
log.debug("Virtual node manager releasing lock")
|
||||
engine_lock.release()
|
||||
log.info("Virtual node manager completed successfully")
|
||||
|
||||
|
||||
@@ -4,6 +4,8 @@ hypervisor:
|
||||
title: defaultHost
|
||||
description: "Hypervisor Configuration"
|
||||
syntax: json
|
||||
file: true
|
||||
global: true
|
||||
uiElements:
|
||||
- field: hostname
|
||||
label: "Hostname"
|
||||
@@ -62,21 +64,16 @@ hypervisor:
|
||||
forcedType: int
|
||||
- field: disk
|
||||
label: "Disk(s) for passthrough. Line-delimited list. Free: FREE | Total: TOTAL"
|
||||
required: true
|
||||
readonly: true
|
||||
forcedType: '[]int'
|
||||
multiline: true
|
||||
- field: copper
|
||||
label: "Copper port(s) for passthrough. Line-delimited list. Free: FREE | Total: TOTAL"
|
||||
required: true
|
||||
readonly: true
|
||||
forcedType: '[]int'
|
||||
multiline: true
|
||||
- field: sfp
|
||||
label: "SFP port(s) for passthrough. Line-delimited list. Free: FREE | Total: TOTAL"
|
||||
required: true
|
||||
readonly: true
|
||||
forcedType: '[]int'
|
||||
multiline: true
|
||||
file: true
|
||||
global: true
|
||||
|
||||
@@ -1 +1,14 @@
|
||||
{% set HYPERVISORS = salt['pillar.get']('hypervisor:nodes', {}) %}
|
||||
|
||||
{# Define the list of process steps in order (case-sensitive) #}
|
||||
{% set PROCESS_STEPS = [
|
||||
'Processing',
|
||||
'IP Configuration',
|
||||
'Starting Create',
|
||||
'Executing Deploy Script',
|
||||
'Initialize Minion Pillars',
|
||||
'Created Instance',
|
||||
'Hardware Configuration',
|
||||
'Highstate Triggered',
|
||||
'Destroyed Instance'
|
||||
] %}
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
|
||||
{%- import_yaml 'soc/dyanno/hypervisor/hypervisor.yaml' as ANNOTATION -%}
|
||||
{%- from 'hypervisor/map.jinja' import HYPERVISORS -%}
|
||||
{%- from 'soc/dyanno/hypervisor/map.jinja' import PROCESS_STEPS -%}
|
||||
|
||||
{%- set TEMPLATE = ANNOTATION.hypervisor.hosts.pop('defaultHost') -%}
|
||||
|
||||
@@ -20,23 +21,53 @@
|
||||
# Hypervisor Configuration: {{ description }}
|
||||
|
||||
## Resource Summary
|
||||
| Resource | Available | Total |
|
||||
|-------------|-----------|-----------|
|
||||
| CPU Cores | {{ cpu_free }} | {{ cpu_total }} |
|
||||
| Memory (GB) | {{ mem_free }} | {{ mem_total }} |
|
||||
| Disk | {{ disk_free | replace('\n', ',') if disk_free else 'None' }} | {{ disk_total | replace('\n', ',') }} |
|
||||
| Copper | {{ copper_free | replace('\n', ',') if copper_free else 'None' }} | {{ copper_total | replace('\n', ',') }} |
|
||||
| SFP | {{ sfp_free | replace('\n', ',') if sfp_free else 'None' }} | {{ sfp_total | replace('\n', ',') }} |
|
||||
| | CPU Cores | Memory (GB) | Disk | Copper | SFP |
|
||||
|-----------|-----------|-------------|-------------|-------------|-------------|
|
||||
| Available | {{ cpu_free }} | {{ mem_free }} | {{ disk_free | replace('\n', ',') if disk_free else 'None' }} | {{ copper_free | replace('\n', ',') if copper_free else 'None' }} | {{ sfp_free | replace('\n', ',') if sfp_free else 'None' }} |
|
||||
| Total | {{ cpu_total }} | {{ mem_total }} | {{ disk_total | replace('\n', ',') }} | {{ copper_total | replace('\n', ',') }} | {{ sfp_total | replace('\n', ',') }} |
|
||||
|
||||
{%- if vm_list %}
|
||||
## Virtual Machines
|
||||
VMs can have the following status values: {% for step in PROCESS_STEPS %}{{ step }}{% if not loop.last %}, {% endif %}{% endfor %}. The "Last Updated" timestamp shows when the VM status was last changed. After reaching "Highstate Triggered" status, additional highstate runs will not update the timestamp. Only changing to "Destroyed Instance" status will update the timestamp again.
|
||||
|
||||
| Name | Status | CPU Cores | Memory (GB)| Disk | Copper | SFP | Last Updated |
|
||||
|--------------------|--------------------|-----------|------------|------|--------|------|---------------------|
|
||||
{%- for hostname, vm_data in vm_list.items() %}
|
||||
| {{ hostname }}_{{ vm_data.get('config', {}).get('role', 'unknown') }} | {{ vm_data.get('status', {}).get('status', 'Unknown') }} | {{ vm_data.get('config', {}).get('cpu', 'N/A') }} | {{ vm_data.get('config', {}).get('memory', 'N/A') }} | {{ vm_data.get('config', {}).get('disk', '-') | replace('\n', ',') if vm_data.get('config', {}).get('disk') else '-' }} | {{ vm_data.get('config', {}).get('copper', '-') | replace('\n', ',') if vm_data.get('config', {}).get('copper') else '-' }} | {{ vm_data.get('config', {}).get('sfp', '-') | replace('\n', ',') if vm_data.get('config', {}).get('sfp') else '-' }} | {{ vm_data.get('status', {}).get('timestamp', 'Never') | replace('T', ' ') | regex_replace('\\.[0-9]+', '') }} |
|
||||
{%- set vm_status = vm_data.get('status', {}).get('status', 'Unknown') %}
|
||||
{%- set is_destroyed = vm_status == 'Destroyed Instance' %}
|
||||
{%- set vm_role = vm_data.get('config', {}).get('role', 'unknown') %}
|
||||
{%- set name = hostname ~ (('_' ~ vm_role) if not is_destroyed and vm_role != 'unknown' else '') %}
|
||||
| {{ name }} | {{ vm_status }} |
|
||||
{%- if is_destroyed -%}
|
||||
-
|
||||
{%- else -%}
|
||||
{{ vm_data.get('config', {}).get('cpu', 'N/A') }}
|
||||
{%- endif %} |
|
||||
{%- if is_destroyed -%}
|
||||
-
|
||||
{%- else -%}
|
||||
{{ vm_data.get('config', {}).get('memory', 'N/A') }}
|
||||
{%- endif %} |
|
||||
{%- if is_destroyed -%}
|
||||
-
|
||||
{%- else -%}
|
||||
{{ vm_data.get('config', {}).get('disk', '-') | replace('\n', ',') if vm_data.get('config', {}).get('disk') else '-' }}
|
||||
{%- endif %} |
|
||||
{%- if is_destroyed -%}
|
||||
-
|
||||
{%- else -%}
|
||||
{{ vm_data.get('config', {}).get('copper', '-') | replace('\n', ',') if vm_data.get('config', {}).get('copper') else '-' }}
|
||||
{%- endif %} |
|
||||
{%- if is_destroyed -%}
|
||||
-
|
||||
{%- else -%}
|
||||
{{ vm_data.get('config', {}).get('sfp', '-') | replace('\n', ',') if vm_data.get('config', {}).get('sfp') else '-' }}
|
||||
{%- endif %} | {{ vm_data.get('status', {}).get('timestamp', 'Never') | replace('T', ' ') | regex_replace('\\.[0-9]+', '') }} |
|
||||
{%- endfor %}
|
||||
{%- else %}
|
||||
## Virtual Machines
|
||||
VMs can have the following status values: {% for step in PROCESS_STEPS %}{{ step }}{% if not loop.last %}, {% endif %}{% endfor %}. The "Last Updated" timestamp shows when the VM status was last changed. After reaching "Highstate Triggered" status, additional highstate runs will not update the timestamp. Only changing to "Destroyed Instance" status will update the timestamp again.
|
||||
|
||||
No Virtual Machines Found
|
||||
{%- endif %}
|
||||
{%- endmacro -%}
|
||||
@@ -66,9 +97,12 @@ No Virtual Machines Found
|
||||
{%- set used_memory = 0 -%}
|
||||
{%- set ns = namespace(used_cpu=0, used_memory=0) -%}
|
||||
{%- for hostname, vm_data in vms.items() -%}
|
||||
{%- set vm_status = vm_data.get('status', {}).get('status', '') -%}
|
||||
{%- if vm_status != 'Destroyed Instance' -%}
|
||||
{%- set vm_config = vm_data.config -%}
|
||||
{%- set ns.used_cpu = ns.used_cpu + vm_config.cpu | int -%}
|
||||
{%- set ns.used_memory = ns.used_memory + vm_config.memory | int -%}
|
||||
{%- set ns.used_cpu = ns.used_cpu + vm_config.get('cpu', 0) | int -%}
|
||||
{%- set ns.used_memory = ns.used_memory + vm_config.get('memory', 0) | int -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
|
||||
{# Calculate available resources #}
|
||||
@@ -80,10 +114,13 @@ No Virtual Machines Found
|
||||
{%- set used_copper = [] -%}
|
||||
{%- set used_sfp = [] -%}
|
||||
{%- for hostname, vm in vms.items() -%}
|
||||
{%- set vm_status = vm.get('status', {}).get('status', '') -%}
|
||||
{%- if vm_status != 'Destroyed Instance' -%}
|
||||
{%- set config = vm.get('config', {}) -%}
|
||||
{%- do used_disk.extend((config.get('disk', '') | string).split('\n') | map('trim') | list) -%}
|
||||
{%- do used_copper.extend((config.get('copper', '') | string).split('\n') | map('trim') | list) -%}
|
||||
{%- do used_sfp.extend((config.get('sfp', '') | string).split('\n') | map('trim') | list) -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
|
||||
{# Get available PCI indices #}
|
||||
|
||||
@@ -11,6 +11,9 @@
|
||||
|
||||
{% if 'hvn' in salt['pillar.get']('features', []) %}
|
||||
|
||||
{# Import the process steps from map.jinja #}
|
||||
{% from 'soc/dyanno/hypervisor/map.jinja' import PROCESS_STEPS %}
|
||||
|
||||
{% do salt.log.info('soc/dyanno/hypervisor/write_status: Running') %}
|
||||
{% set vm_name = pillar.get('vm_name') %}
|
||||
{% set hypervisor = pillar.get('hypervisor') %}
|
||||
@@ -21,19 +24,7 @@
|
||||
{% set status_dir = base_path ~ '/' ~ hypervisor %}
|
||||
{% set status_file = status_dir ~ '/' ~ vm_name ~ '.status' %}
|
||||
|
||||
# Define the list of process steps in order (case-sensitive)
|
||||
{% set process_steps = [
|
||||
'Processing',
|
||||
'IP Configuration',
|
||||
'Starting Create',
|
||||
'Executing Deploy Script',
|
||||
'Initialize Minion Pillars',
|
||||
'Created Instance',
|
||||
'Hardware Configuration',
|
||||
'Highstate Triggered',
|
||||
'Destroyed Instance'
|
||||
] %}
|
||||
{% set new_index = process_steps.index(status_data.get('status')) %}
|
||||
{% set new_index = PROCESS_STEPS.index(status_data.get('status')) %}
|
||||
{% do salt.log.debug('soc/dyanno/hypervisor/write_status: new_index: ' ~ new_index|string) %}
|
||||
|
||||
# Function to read and parse current JSON status file
|
||||
@@ -46,8 +37,8 @@
|
||||
{% import_json rel_path_status_file as current_status %}
|
||||
{% do salt.log.debug('soc/dyanno/hypervisor/write_status: current status: ' ~ current_status) %}
|
||||
{% do salt.log.debug('soc/dyanno/hypervisor/write_status: current status: ' ~ current_status.get('status')) %}
|
||||
{% if current_status.get('status') in process_steps %}
|
||||
{% set current_index = process_steps.index(current_status.get('status')) %}
|
||||
{% if current_status.get('status') in PROCESS_STEPS %}
|
||||
{% set current_index = PROCESS_STEPS.index(current_status.get('status')) %}
|
||||
{% do salt.log.debug('soc/dyanno/hypervisor/write_status: current_index: ' ~ current_index|string) %}
|
||||
{%- set return_value = current_index -%}
|
||||
{% else %}
|
||||
@@ -74,7 +65,7 @@ ensure_status_dir:
|
||||
{# Some of the status updates trigger within a second of each other can can cause, for example, IP Configuration orchestration to process before the Processing #}
|
||||
{# This check has been put in place to ensure a status sooner in the process can't overwrite this file if a status later in the process wrote to it first. #}
|
||||
{# The final step is Destroyed, so we allow Processing to overwrite that incase someone creates a new VM with same name that was previously destroyed. #}
|
||||
{% if new_index > current_index or (current_index == process_steps | length - 1 and new_index == 0) %}
|
||||
{% if new_index > current_index or (current_index == PROCESS_STEPS | length - 1 and new_index == 0) %}
|
||||
write_status_file:
|
||||
file.serialize:
|
||||
- name: {{ status_file }}
|
||||
@@ -88,7 +79,7 @@ write_status_file:
|
||||
- file: ensure_status_dir
|
||||
{% else %}
|
||||
|
||||
{% do salt.log.debug('soc/dyanno/hypervisor/write_status: File not written. ' ~ process_steps[new_index] ~ ' cannot overwrite ' ~ process_steps[current_index] ~ '.' ) %}
|
||||
{% do salt.log.debug('soc/dyanno/hypervisor/write_status: File not written. ' ~ PROCESS_STEPS[new_index] ~ ' cannot overwrite ' ~ PROCESS_STEPS[current_index] ~ '.' ) %}
|
||||
|
||||
{% endif %}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user