mirror of
https://github.com/Security-Onion-Solutions/securityonion.git
synced 2025-12-06 17:22:49 +01:00
watch for files and create a vm
This commit is contained in:
507
salt/salt/engines/master/virtual_node_manager.py
Normal file
507
salt/salt/engines/master/virtual_node_manager.py
Normal file
@@ -0,0 +1,507 @@
|
||||
#!/opt/saltstack/salt/bin/python3
|
||||
|
||||
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
|
||||
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
|
||||
# https://securityonion.net/license; you may not use this file except in compliance with the
|
||||
# Elastic License 2.0.
|
||||
|
||||
"""
|
||||
Salt Engine for Virtual Node Management
|
||||
|
||||
This engine manages the automated provisioning of virtual machines in Security Onion's
|
||||
virtualization infrastructure. It monitors directories for VM creation requests and
|
||||
handles the entire provisioning process including hardware allocation.
|
||||
|
||||
Usage:
|
||||
engines:
|
||||
- virtual_node_manager:
|
||||
interval: 30
|
||||
base_path: /opt/so/saltstack/local/salt/hypervisor/hosts
|
||||
|
||||
Options:
|
||||
interval: Time in seconds between directory scans (default: 30)
|
||||
base_path: Base directory to monitor for VM requests (default: /opt/so/saltstack/local/salt/hypervisor/hosts)
|
||||
|
||||
Memory values in VM configuration YAML files should be specified in GB. These values
|
||||
will automatically be converted to MiB when passed to so-salt-cloud.
|
||||
|
||||
Examples:
|
||||
1. Basic Configuration:
|
||||
engines:
|
||||
- virtual_node_manager: {}
|
||||
|
||||
Uses default settings to monitor for VM requests.
|
||||
|
||||
2. Custom Scan Interval:
|
||||
engines:
|
||||
- virtual_node_manager:
|
||||
interval: 60
|
||||
|
||||
Scans for new requests every 60 seconds.
|
||||
|
||||
Notes:
|
||||
- Requires 'hvn' feature license
|
||||
- Monitors for files named: add_sensor, add_searchnode, add_idh, add_receiver, add_heavynode, add_fleet
|
||||
- Hardware allocation is tracked in hypervisor YAML files
|
||||
- Creates VM-specific hardware tracking files
|
||||
|
||||
Description:
|
||||
The engine operates in the following phases:
|
||||
|
||||
1. License Validation
|
||||
- Verifies 'hvn' feature is licensed
|
||||
- Prevents operation if license is invalid
|
||||
|
||||
2. File Monitoring
|
||||
- Scans configured directory for add_* files
|
||||
- Parses VM configuration from YAML
|
||||
- Validates configuration parameters
|
||||
|
||||
3. Hardware Allocation
|
||||
- Reads hypervisor hardware inventory
|
||||
- Claims requested PCI devices
|
||||
- Updates hardware tracking
|
||||
- Creates VM hardware record
|
||||
|
||||
4. VM Provisioning
|
||||
- Executes so-salt-cloud with configuration
|
||||
- Handles network setup
|
||||
- Configures hardware passthrough
|
||||
- Manages VM startup
|
||||
|
||||
Exit Codes:
|
||||
0: Success
|
||||
1: Invalid license
|
||||
2: Configuration error
|
||||
3: Hardware allocation failure
|
||||
4: VM provisioning failure
|
||||
|
||||
Logging:
|
||||
Log files are written to /opt/so/log/salt/engines/virtual_node_manager.log
|
||||
Critical operations, errors, and hardware allocation events are logged
|
||||
"""
|
||||
|
||||
import os
|
||||
import glob
|
||||
import yaml
|
||||
import time
|
||||
import logging
|
||||
import subprocess
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
# Configure logging
|
||||
log = logging.getLogger(__name__)
|
||||
log.setLevel(logging.DEBUG)
|
||||
|
||||
# Constants
|
||||
DEFAULT_INTERVAL = 30
|
||||
DEFAULT_BASE_PATH = '/opt/so/saltstack/local/salt/hypervisor/hosts'
|
||||
VALID_ROLES = ['sensor', 'searchnode', 'idh', 'receiver', 'heavynode', 'fleet']
|
||||
MAX_RETRIES = 3
|
||||
RETRY_DELAY = 5
|
||||
LICENSE_PATH = '/opt/so/saltstack/local/pillar/soc/license.sls'
|
||||
|
||||
def ensure_claimed_section(hw_config: dict) -> dict:
|
||||
"""
|
||||
Ensure the claimed section exists and is a dict.
|
||||
|
||||
Args:
|
||||
hw_config: Hardware configuration section containing 'claimed' key
|
||||
|
||||
Returns:
|
||||
The claimed section as a dict
|
||||
"""
|
||||
if hw_config['claimed'] is None or not isinstance(hw_config['claimed'], dict):
|
||||
hw_config['claimed'] = {}
|
||||
return hw_config['claimed']
|
||||
|
||||
def convert_pci_id(pci_id: str) -> str:
|
||||
"""
|
||||
Convert PCI ID from pci_0000_c7_00_0 format to 0000:c7:00.0 format.
|
||||
|
||||
Args:
|
||||
pci_id: PCI ID in underscore format (e.g., pci_0000_c7_00_0)
|
||||
|
||||
Returns:
|
||||
PCI ID in domain:bus:slot.function format (e.g., 0000:c7:00.0)
|
||||
|
||||
Example:
|
||||
>>> convert_pci_id('pci_0000_c7_00_0')
|
||||
'0000:c7:00.0'
|
||||
"""
|
||||
# Remove 'pci_' prefix
|
||||
pci_id = pci_id.replace('pci_', '')
|
||||
|
||||
# Split into components
|
||||
parts = pci_id.split('_')
|
||||
if len(parts) != 4:
|
||||
raise ValueError(f"Invalid PCI ID format: {pci_id}. Expected format: pci_domain_bus_slot_function")
|
||||
|
||||
# Reconstruct with proper format (using period for function)
|
||||
domain, bus, slot, function = parts
|
||||
return f"{domain}:{bus}:{slot}.{function}"
|
||||
|
||||
class HardwareManager:
|
||||
"""
|
||||
Manages hardware allocation and tracking for virtual machines.
|
||||
Handles reading hypervisor configuration, claiming PCI devices,
|
||||
and maintaining hardware tracking files.
|
||||
"""
|
||||
|
||||
def __init__(self, hypervisor: str, base_path: str):
|
||||
"""Initialize the hardware manager for a specific hypervisor."""
|
||||
self.hypervisor = hypervisor
|
||||
self.base_path = base_path
|
||||
self.hypervisor_file = os.path.join(base_path, hypervisor, f"{hypervisor}.yaml")
|
||||
|
||||
def read_hypervisor_config(self) -> dict:
|
||||
"""
|
||||
Read and initialize the hypervisor's hardware configuration file.
|
||||
Ensures all hardware sections have a valid claimed section.
|
||||
"""
|
||||
try:
|
||||
with open(self.hypervisor_file, 'r') as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
# Initialize any null claimed sections
|
||||
modified = False
|
||||
for hw_type in ['disk', 'copper', 'sfp']:
|
||||
hw_config = config['hypervisor']['hardware'][hw_type]
|
||||
if hw_config['claimed'] is None:
|
||||
log.debug("Initializing null claimed section for %s", hw_type)
|
||||
hw_config['claimed'] = {}
|
||||
modified = True
|
||||
|
||||
# Save if we made any changes
|
||||
if modified:
|
||||
log.info("Updating hypervisor config with initialized claimed sections")
|
||||
self.write_hypervisor_config(config)
|
||||
|
||||
return config
|
||||
except Exception as e:
|
||||
log.error("Failed to read hypervisor configuration: %s", str(e))
|
||||
raise
|
||||
|
||||
def write_hypervisor_config(self, config: dict) -> None:
|
||||
"""Write updated configuration back to the hypervisor file."""
|
||||
try:
|
||||
with open(self.hypervisor_file, 'w') as f:
|
||||
yaml.dump(config, f, default_flow_style=False)
|
||||
except Exception as e:
|
||||
log.error("Failed to write hypervisor configuration: %s", str(e))
|
||||
raise
|
||||
|
||||
def get_pci_ids(self, hw_type: str, indices: List[int]) -> List[str]:
|
||||
"""
|
||||
Look up PCI IDs for requested hardware indices.
|
||||
|
||||
Args:
|
||||
hw_type: Type of hardware (disk, copper, sfp)
|
||||
indices: List of hardware indices to look up
|
||||
|
||||
Returns:
|
||||
List of PCI IDs
|
||||
"""
|
||||
config = self.read_hypervisor_config()
|
||||
log.debug("Full config structure: %s", config)
|
||||
log.debug("Looking up indices %s for hardware type %s", indices, hw_type)
|
||||
pci_ids = []
|
||||
|
||||
try:
|
||||
hardware_section = config['hypervisor']['hardware']
|
||||
log.debug("Hardware section: %s", hardware_section)
|
||||
if hw_type not in hardware_section:
|
||||
raise ValueError(f"Hardware type {hw_type} not found in configuration")
|
||||
|
||||
hw_config = hardware_section[hw_type]
|
||||
free_hw = hw_config['free']
|
||||
claimed_hw = ensure_claimed_section(hw_config)
|
||||
|
||||
log.debug("Free hardware section for %s: %s", hw_type, free_hw)
|
||||
log.debug("Claimed hardware section for %s: %s", hw_type, claimed_hw)
|
||||
|
||||
for idx in indices:
|
||||
if idx in free_hw:
|
||||
pci_id = convert_pci_id(free_hw[idx])
|
||||
log.debug("Converting PCI ID from %s to %s", free_hw[idx], pci_id)
|
||||
pci_ids.append(pci_id)
|
||||
elif idx in claimed_hw:
|
||||
raise ValueError(f"Hardware index {idx} for {hw_type} is already claimed")
|
||||
else:
|
||||
raise ValueError(f"Hardware index {idx} for {hw_type} does not exist")
|
||||
except KeyError as e:
|
||||
log.error("Invalid hardware configuration structure: %s", str(e))
|
||||
raise
|
||||
|
||||
return pci_ids
|
||||
|
||||
def claim_hardware(self, hw_type: str, indices: List[int]) -> None:
|
||||
"""
|
||||
Move hardware from free to claimed in the hypervisor configuration.
|
||||
|
||||
Args:
|
||||
hw_type: Type of hardware (disk, copper, sfp)
|
||||
indices: List of hardware indices to claim
|
||||
"""
|
||||
config = self.read_hypervisor_config()
|
||||
|
||||
try:
|
||||
hw_config = config['hypervisor']['hardware'][hw_type]
|
||||
ensure_claimed_section(hw_config)
|
||||
|
||||
for idx in indices:
|
||||
if idx in hw_config['free']:
|
||||
pci_id = hw_config['free'][idx]
|
||||
hw_config['claimed'][idx] = pci_id
|
||||
del hw_config['free'][idx]
|
||||
else:
|
||||
raise ValueError(f"{hw_type} hardware index {idx} not available")
|
||||
|
||||
self.write_hypervisor_config(config)
|
||||
log.info("Successfully claimed %s hardware indices: %s", hw_type, indices)
|
||||
except Exception as e:
|
||||
log.error("Failed to claim hardware: %s", str(e))
|
||||
raise
|
||||
|
||||
def create_vm_hardware_file(self, hostname: str, role: str, hardware: dict) -> None:
|
||||
"""
|
||||
Create a YAML file tracking hardware allocated to a VM.
|
||||
|
||||
Args:
|
||||
hostname: Name of the VM
|
||||
role: VM role (sensor, searchnode, etc.)
|
||||
hardware: Dictionary of allocated hardware
|
||||
"""
|
||||
vm_file = os.path.join(self.base_path, self.hypervisor, f"{hostname}_{role}.yaml")
|
||||
try:
|
||||
with open(vm_file, 'w') as f:
|
||||
yaml.dump({'hardware': hardware}, f, default_flow_style=False)
|
||||
log.info("Created hardware tracking file for %s_%s", hostname, role)
|
||||
except Exception as e:
|
||||
log.error("Failed to create VM hardware file: %s", str(e))
|
||||
raise
|
||||
|
||||
def validate_hvn_license() -> bool:
|
||||
"""Check if the license file exists and contains required values."""
|
||||
if not os.path.exists(LICENSE_PATH):
|
||||
log.error("LICENSE: License file not found at %s", LICENSE_PATH)
|
||||
return False
|
||||
|
||||
try:
|
||||
with open(LICENSE_PATH, 'r') as f:
|
||||
license_data = yaml.safe_load(f)
|
||||
|
||||
if not license_data:
|
||||
log.error("LICENSE: Empty or invalid license file")
|
||||
return False
|
||||
|
||||
license_id = license_data.get('license_id')
|
||||
features = license_data.get('features', [])
|
||||
|
||||
if not license_id:
|
||||
log.error("LICENSE: No license_id found in license file")
|
||||
return False
|
||||
|
||||
if 'hvn' not in features:
|
||||
log.error("Hypervisor nodes are a feature supported only for customers with a valid license.\n"
|
||||
"Contact Security Onion Solutions, LLC via our website at https://securityonionsolutions.com\n"
|
||||
"for more information about purchasing a license to enable this feature.")
|
||||
return False
|
||||
|
||||
log.info("LICENSE: License validation successful")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
log.error("LICENSE: Error reading license file: %s", str(e))
|
||||
return False
|
||||
|
||||
def parse_add_file(file_path: str) -> Tuple[str, str, dict]:
|
||||
"""
|
||||
Parse an add_* file to extract VM configuration.
|
||||
|
||||
Returns:
|
||||
Tuple of (hypervisor, role, config)
|
||||
"""
|
||||
try:
|
||||
# Extract hypervisor and role from path
|
||||
path_parts = file_path.split(os.path.sep)
|
||||
hypervisor = path_parts[-2]
|
||||
role = path_parts[-1].replace('add_', '')
|
||||
|
||||
if role not in VALID_ROLES:
|
||||
raise ValueError(f"Invalid role: {role}")
|
||||
|
||||
# Read and parse YAML configuration
|
||||
with open(file_path, 'r') as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
required_fields = ['hostname', 'network_mode']
|
||||
if config.get('network_mode') == 'static4':
|
||||
required_fields.extend(['ip4', 'gw4'])
|
||||
|
||||
for field in required_fields:
|
||||
if field not in config:
|
||||
raise ValueError(f"Missing required field: {field}")
|
||||
|
||||
return hypervisor, role, config
|
||||
except Exception as e:
|
||||
log.error("Failed to parse add file %s: %s", file_path, str(e))
|
||||
raise
|
||||
|
||||
def convert_gb_to_mib(gb: int) -> int:
|
||||
"""
|
||||
Convert memory value from GB to MiB.
|
||||
|
||||
Args:
|
||||
gb: Memory value in gigabytes
|
||||
|
||||
Returns:
|
||||
Memory value in mebibytes (1 GB = 1024 MiB)
|
||||
"""
|
||||
return gb * 1024
|
||||
|
||||
def execute_salt_cloud(profile: str, hostname: str, role: str, config: dict, pci_ids: List[str]) -> None:
|
||||
"""
|
||||
Execute so-salt-cloud to create the VM.
|
||||
|
||||
Args:
|
||||
profile: Salt cloud profile name
|
||||
hostname: VM hostname
|
||||
role: VM role
|
||||
config: VM configuration
|
||||
pci_ids: List of PCI IDs for hardware passthrough
|
||||
"""
|
||||
try:
|
||||
cmd = ['so-salt-cloud', '-p', f'sool9-{profile}', f'{hostname}_{role}']
|
||||
|
||||
# Add network configuration
|
||||
if config['network_mode'] == 'static4':
|
||||
cmd.extend(['--static4', '--ip4', config['ip4'], '--gw4', config['gw4']])
|
||||
if 'dns4' in config:
|
||||
cmd.extend(['--dns4', ','.join(config['dns4'])])
|
||||
if 'search4' in config:
|
||||
cmd.extend(['--search4', config['search4']])
|
||||
else:
|
||||
cmd.append('--dhcp4')
|
||||
|
||||
# Add hardware configuration
|
||||
if 'cpu' in config:
|
||||
cmd.extend(['-c', str(config['cpu'])])
|
||||
if 'memory' in config:
|
||||
# Convert memory from GB to MiB for so-salt-cloud
|
||||
memory_mib = convert_gb_to_mib(config['memory'])
|
||||
cmd.extend(['-m', str(memory_mib)])
|
||||
|
||||
# Add PCI devices
|
||||
for pci_id in pci_ids:
|
||||
cmd.extend(['-P', pci_id])
|
||||
|
||||
log.info("Executing: %s", ' '.join(cmd))
|
||||
subprocess.run(cmd, check=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
log.error("so-salt-cloud execution failed: %s", str(e))
|
||||
raise
|
||||
except Exception as e:
|
||||
log.error("Failed to execute so-salt-cloud: %s", str(e))
|
||||
raise
|
||||
|
||||
def process_add_file(file_path: str, base_path: str) -> None:
|
||||
"""
|
||||
Process a single add_* file for VM creation.
|
||||
|
||||
Args:
|
||||
file_path: Path to the add_* file
|
||||
base_path: Base path for hypervisor configuration
|
||||
"""
|
||||
try:
|
||||
hypervisor, role, config = parse_add_file(file_path)
|
||||
log.debug("Parsed config from add file: %s", config)
|
||||
hw_manager = HardwareManager(hypervisor, base_path)
|
||||
|
||||
# Collect all PCI IDs
|
||||
pci_ids = []
|
||||
hardware_tracking = {
|
||||
'cpu': config.get('cpu'),
|
||||
'memory': config.get('memory')
|
||||
}
|
||||
|
||||
# Process each hardware type
|
||||
for hw_type in ['disk', 'copper', 'sfp']:
|
||||
if hw_type in config:
|
||||
indices = config[hw_type]
|
||||
hw_pci_ids = hw_manager.get_pci_ids(hw_type, indices)
|
||||
pci_ids.extend(hw_pci_ids)
|
||||
|
||||
# Track hardware allocation
|
||||
hardware_tracking[hw_type] = [
|
||||
{'id': idx, 'pci': pci_id}
|
||||
for idx, pci_id in zip(indices, hw_pci_ids)
|
||||
]
|
||||
|
||||
# Claim the hardware
|
||||
hw_manager.claim_hardware(hw_type, indices)
|
||||
|
||||
# Create VM
|
||||
execute_salt_cloud(hypervisor, config['hostname'], role, config, pci_ids)
|
||||
|
||||
# Create hardware tracking file
|
||||
hw_manager.create_vm_hardware_file(config['hostname'], role, hardware_tracking)
|
||||
|
||||
# Clean up the add_* file
|
||||
os.remove(file_path)
|
||||
log.info("Successfully processed VM creation request: %s_%s", config['hostname'], role)
|
||||
|
||||
except Exception as e:
|
||||
log.error("Failed to process add file %s: %s", file_path, str(e))
|
||||
raise
|
||||
|
||||
def monitor_add_files(base_path: str) -> None:
|
||||
"""
|
||||
Monitor directories for add_* files and process them.
|
||||
|
||||
Args:
|
||||
base_path: Base path to monitor for add_* files
|
||||
"""
|
||||
pattern = os.path.join(base_path, '*', 'add_*')
|
||||
|
||||
try:
|
||||
for file_path in glob.glob(pattern):
|
||||
log.info("Found new VM request file: %s", file_path)
|
||||
|
||||
for attempt in range(MAX_RETRIES):
|
||||
try:
|
||||
process_add_file(file_path, base_path)
|
||||
break
|
||||
except Exception as e:
|
||||
if attempt < MAX_RETRIES - 1:
|
||||
log.warning("Attempt %d failed, retrying in %d seconds: %s",
|
||||
attempt + 1, RETRY_DELAY, str(e))
|
||||
time.sleep(RETRY_DELAY)
|
||||
else:
|
||||
log.error("All attempts failed for file %s", file_path)
|
||||
raise
|
||||
except Exception as e:
|
||||
log.error("Error monitoring add files: %s", str(e))
|
||||
raise
|
||||
|
||||
def start(interval: int = DEFAULT_INTERVAL,
|
||||
base_path: str = DEFAULT_BASE_PATH) -> None:
|
||||
"""
|
||||
Main engine loop.
|
||||
|
||||
Args:
|
||||
interval: Time in seconds between directory scans
|
||||
base_path: Base path to monitor for VM requests
|
||||
"""
|
||||
log.info("Starting virtual node manager engine")
|
||||
|
||||
if not validate_hvn_license():
|
||||
return
|
||||
|
||||
while True:
|
||||
try:
|
||||
monitor_add_files(base_path)
|
||||
except Exception as e:
|
||||
log.error("Error in main engine loop: %s", str(e))
|
||||
|
||||
time.sleep(interval)
|
||||
Reference in New Issue
Block a user