diff --git a/salt/_runners/setup_hypervisor.py b/salt/_runners/setup_hypervisor.py index 929801783..182a9b2c8 100644 --- a/salt/_runners/setup_hypervisor.py +++ b/salt/_runners/setup_hypervisor.py @@ -172,7 +172,15 @@ MANAGER_HOSTNAME = socket.gethostname() def _download_image(): """ - Download and validate the Oracle Linux KVM image. + Download and validate the Oracle Linux KVM image with retry logic and progress monitoring. + + Features: + - Detects stalled downloads (no progress for 30 seconds) + - Retries up to 3 times on failure + - Connection timeout of 30 seconds + - Read timeout of 60 seconds + - Cleans up partial downloads on failure + Returns: bool: True if successful or file exists with valid checksum, False on error """ @@ -185,45 +193,107 @@ def _download_image(): os.unlink(IMAGE_PATH) log.info("Starting image download process") + + # Retry configuration + max_attempts = 3 + retry_delay = 5 # seconds to wait between retry attempts + stall_timeout = 30 # seconds without progress before considering download stalled + connection_timeout = 30 # seconds to establish connection + read_timeout = 60 # seconds to wait for data chunks + + for attempt in range(1, max_attempts + 1): + log.info("Download attempt %d of %d", attempt, max_attempts) + + try: + # Download file with timeouts + log.info("Downloading Oracle Linux KVM image from %s to %s", IMAGE_URL, IMAGE_PATH) + response = requests.get( + IMAGE_URL, + stream=True, + timeout=(connection_timeout, read_timeout) + ) + response.raise_for_status() - try: - # Download file - log.info("Downloading Oracle Linux KVM image from %s to %s", IMAGE_URL, IMAGE_PATH) - response = requests.get(IMAGE_URL, stream=True) - response.raise_for_status() + # Get total file size for progress tracking + total_size = int(response.headers.get('content-length', 0)) + downloaded_size = 0 + last_log_time = 0 + last_progress_time = time.time() + last_downloaded_size = 0 - # Get total file size for progress tracking - total_size = int(response.headers.get('content-length', 0)) - downloaded_size = 0 - last_log_time = 0 + # Save file with progress logging and stall detection + with salt.utils.files.fopen(IMAGE_PATH, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + if chunk: # filter out keep-alive new chunks + f.write(chunk) + downloaded_size += len(chunk) + current_time = time.time() + + # Check for stalled download + if downloaded_size > last_downloaded_size: + # Progress made, reset stall timer + last_progress_time = current_time + last_downloaded_size = downloaded_size + elif current_time - last_progress_time > stall_timeout: + # No progress for stall_timeout seconds + raise Exception( + f"Download stalled: no progress for {stall_timeout} seconds " + f"at {downloaded_size}/{total_size} bytes" + ) + + # Log progress every second + if current_time - last_log_time >= 1: + progress = (downloaded_size / total_size) * 100 if total_size > 0 else 0 + log.info("Progress - %.1f%% (%d/%d bytes)", + progress, downloaded_size, total_size) + last_log_time = current_time - # Save file with progress logging - with salt.utils.files.fopen(IMAGE_PATH, 'wb') as f: - for chunk in response.iter_content(chunk_size=8192): - f.write(chunk) - downloaded_size += len(chunk) + # Validate downloaded file + log.info("Download complete, validating checksum...") + if not _validate_image_checksum(IMAGE_PATH, IMAGE_SHA256): + log.error("Checksum validation failed on attempt %d", attempt) + os.unlink(IMAGE_PATH) + if attempt < max_attempts: + log.info("Will retry download...") + continue + else: + log.error("All download attempts failed due to checksum mismatch") + return False + + log.info("Successfully downloaded and validated Oracle Linux KVM image") + return True + + except requests.exceptions.Timeout as e: + log.error("Download attempt %d failed: Timeout - %s", attempt, str(e)) + if os.path.exists(IMAGE_PATH): + os.unlink(IMAGE_PATH) + if attempt < max_attempts: + log.info("Will retry download in %d seconds...", retry_delay) + time.sleep(retry_delay) + else: + log.error("All download attempts failed due to timeout") - # Log progress every second - current_time = time.time() - if current_time - last_log_time >= 1: - progress = (downloaded_size / total_size) * 100 if total_size > 0 else 0 - log.info("Progress - %.1f%% (%d/%d bytes)", - progress, downloaded_size, total_size) - last_log_time = current_time - - # Validate downloaded file - if not _validate_image_checksum(IMAGE_PATH, IMAGE_SHA256): - os.unlink(IMAGE_PATH) - return False - - log.info("Successfully downloaded and validated Oracle Linux KVM image") - return True - - except Exception as e: - log.error("Error downloading hypervisor image: %s", str(e)) - if os.path.exists(IMAGE_PATH): - os.unlink(IMAGE_PATH) - return False + except requests.exceptions.RequestException as e: + log.error("Download attempt %d failed: Network error - %s", attempt, str(e)) + if os.path.exists(IMAGE_PATH): + os.unlink(IMAGE_PATH) + if attempt < max_attempts: + log.info("Will retry download in %d seconds...", retry_delay) + time.sleep(retry_delay) + else: + log.error("All download attempts failed due to network errors") + + except Exception as e: + log.error("Download attempt %d failed: %s", attempt, str(e)) + if os.path.exists(IMAGE_PATH): + os.unlink(IMAGE_PATH) + if attempt < max_attempts: + log.info("Will retry download in %d seconds...", retry_delay) + time.sleep(retry_delay) + else: + log.error("All download attempts failed") + + return False def _check_ssh_keys_exist(): """ @@ -419,25 +489,28 @@ def _ensure_hypervisor_host_dir(minion_id: str = None): log.error(f"Error creating hypervisor host directory: {str(e)}") return False -def _apply_dyanno_hypervisor_state(): +def _apply_dyanno_hypervisor_state(status): """ Apply the soc.dyanno.hypervisor state on the salt master. This function applies the soc.dyanno.hypervisor state on the salt master to update the hypervisor annotation and ensure all hypervisor host directories exist. + Args: + status: Status passed to the hypervisor annotation state + Returns: bool: True if state was applied successfully, False otherwise """ try: - log.info("Applying soc.dyanno.hypervisor state on salt master") + log.info(f"Applying soc.dyanno.hypervisor state on salt master with status: {status}") # Initialize the LocalClient local = salt.client.LocalClient() # Target the salt master to apply the soc.dyanno.hypervisor state target = MANAGER_HOSTNAME + '_*' - state_result = local.cmd(target, 'state.apply', ['soc.dyanno.hypervisor', "pillar={'baseDomain': {'status': 'PreInit'}}", 'concurrent=True'], tgt_type='glob') + state_result = local.cmd(target, 'state.apply', ['soc.dyanno.hypervisor', f"pillar={{'baseDomain': {{'status': '{status}'}}}}", 'concurrent=True'], tgt_type='glob') log.debug(f"state_result: {state_result}") # Check if state was applied successfully if state_result: @@ -454,17 +527,17 @@ def _apply_dyanno_hypervisor_state(): success = False if success: - log.info("Successfully applied soc.dyanno.hypervisor state") + log.info(f"Successfully applied soc.dyanno.hypervisor state with status: {status}") return True else: - log.error("Failed to apply soc.dyanno.hypervisor state") + log.error(f"Failed to apply soc.dyanno.hypervisor state with status: {status}") return False else: - log.error("No response from salt master when applying soc.dyanno.hypervisor state") + log.error(f"No response from salt master when applying soc.dyanno.hypervisor state with status: {status}") return False except Exception as e: - log.error(f"Error applying soc.dyanno.hypervisor state: {str(e)}") + log.error(f"Error applying soc.dyanno.hypervisor state with status: {status}: {str(e)}") return False def _apply_cloud_config_state(): @@ -598,11 +671,6 @@ def setup_environment(vm_name: str = 'sool9', disk_size: str = '220G', minion_id log.warning("Failed to apply salt.cloud.config state, continuing with setup") # We don't return an error here as we want to continue with the setup process - # Apply the soc.dyanno.hypervisor state on the salt master - if not _apply_dyanno_hypervisor_state(): - log.warning("Failed to apply soc.dyanno.hypervisor state, continuing with setup") - # We don't return an error here as we want to continue with the setup process - log.info("Starting setup_environment in setup_hypervisor runner") # Check if environment is already set up @@ -616,9 +684,12 @@ def setup_environment(vm_name: str = 'sool9', disk_size: str = '220G', minion_id # Handle image setup if needed if not image_valid: + _apply_dyanno_hypervisor_state('ImageDownloadStart') log.info("Starting image download/validation process") if not _download_image(): log.error("Image download failed") + # Update hypervisor annotation with failure status + _apply_dyanno_hypervisor_state('ImageDownloadFailed') return { 'success': False, 'error': 'Image download failed', @@ -631,6 +702,8 @@ def setup_environment(vm_name: str = 'sool9', disk_size: str = '220G', minion_id log.info("Setting up SSH keys") if not _setup_ssh_keys(): log.error("SSH key setup failed") + # Update hypervisor annotation with failure status + _apply_dyanno_hypervisor_state('SSHKeySetupFailed') return { 'success': False, 'error': 'SSH key setup failed', @@ -655,6 +728,12 @@ def setup_environment(vm_name: str = 'sool9', disk_size: str = '220G', minion_id success = vm_result.get('success', False) log.info("Setup environment completed with status: %s", "SUCCESS" if success else "FAILED") + # Update hypervisor annotation with success status + if success: + _apply_dyanno_hypervisor_state('PreInit') + else: + _apply_dyanno_hypervisor_state('SetupFailed') + # If setup was successful and we have a minion_id, run highstate if success and minion_id: log.info("Running highstate on hypervisor %s", minion_id) diff --git a/salt/elasticfleet/enabled.sls b/salt/elasticfleet/enabled.sls index cef47168f..ec8c8337e 100644 --- a/salt/elasticfleet/enabled.sls +++ b/salt/elasticfleet/enabled.sls @@ -32,6 +32,16 @@ so-elastic-fleet-auto-configure-logstash-outputs: - retry: attempts: 4 interval: 30 + +{# Separate from above in order to catch elasticfleet-logstash.crt changes and force update to fleet output policy #} +so-elastic-fleet-auto-configure-logstash-outputs-force: + cmd.run: + - name: /usr/sbin/so-elastic-fleet-outputs-update --force --certs + - retry: + attempts: 4 + interval: 30 + - onchanges: + - x509: etc_elasticfleet_logstash_crt {% endif %} # If enabled, automatically update Fleet Server URLs & ES Connection diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update index de9b5f93f..c64d022a4 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update @@ -10,6 +10,26 @@ . /usr/sbin/so-common FORCE_UPDATE=false +UPDATE_CERTS=false + +while [[ $# -gt 0 ]]; do + case $1 in + -f|--force) + FORCE_UPDATE=true + shift + ;; + -c| --certs) + UPDATE_CERTS=true + shift + ;; + *) + echo "Unknown option $1" + echo "Usage: $0 [-f|--force] [-c|--certs]" + exit 1 + ;; + esac +done + # Only run on Managers if ! is_manager_node; then printf "Not a Manager Node... Exiting" @@ -19,17 +39,42 @@ fi function update_logstash_outputs() { if logstash_policy=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "http://localhost:5601/api/fleet/outputs/so-manager_logstash" --retry 3 --retry-delay 10 --fail 2>/dev/null); then SSL_CONFIG=$(echo "$logstash_policy" | jq -r '.item.ssl') + LOGSTASHKEY=$(openssl rsa -in /etc/pki/elasticfleet-logstash.key) + LOGSTASHCRT=$(openssl x509 -in /etc/pki/elasticfleet-logstash.crt) + LOGSTASHCA=$(openssl x509 -in /etc/pki/tls/certs/intca.crt) if SECRETS=$(echo "$logstash_policy" | jq -er '.item.secrets' 2>/dev/null); then - JSON_STRING=$(jq -n \ - --arg UPDATEDLIST "$NEW_LIST_JSON" \ - --argjson SECRETS "$SECRETS" \ - --argjson SSL_CONFIG "$SSL_CONFIG" \ - '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"{{ LOGSTASH_CONFIG_YAML }}","ssl": $SSL_CONFIG,"secrets": $SECRETS}') + if [[ "$UPDATE_CERTS" != "true" ]]; then + # Reuse existing secret + JSON_STRING=$(jq -n \ + --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --argjson SECRETS "$SECRETS" \ + --argjson SSL_CONFIG "$SSL_CONFIG" \ + '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"{{ LOGSTASH_CONFIG_YAML }}","ssl": $SSL_CONFIG,"secrets": $SECRETS}') + else + # Update certs, creating new secret + JSON_STRING=$(jq -n \ + --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --arg LOGSTASHKEY "$LOGSTASHKEY" \ + --arg LOGSTASHCRT "$LOGSTASHCRT" \ + --arg LOGSTASHCA "$LOGSTASHCA" \ + '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"{{ LOGSTASH_CONFIG_YAML }}","ssl": {"certificate": $LOGSTASHCRT,"certificate_authorities":[ $LOGSTASHCA ]},"secrets": {"ssl":{"key": $LOGSTASHKEY }}}') + fi else - JSON_STRING=$(jq -n \ - --arg UPDATEDLIST "$NEW_LIST_JSON" \ - --argjson SSL_CONFIG "$SSL_CONFIG" \ - '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl": $SSL_CONFIG}') + if [[ "$UPDATE_CERTS" != "true" ]]; then + # Reuse existing ssl config + JSON_STRING=$(jq -n \ + --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --argjson SSL_CONFIG "$SSL_CONFIG" \ + '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"{{ LOGSTASH_CONFIG_YAML }}","ssl": $SSL_CONFIG}') + else + # Update ssl config + JSON_STRING=$(jq -n \ + --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --arg LOGSTASHKEY "$LOGSTASHKEY" \ + --arg LOGSTASHCRT "$LOGSTASHCRT" \ + --arg LOGSTASHCA "$LOGSTASHCA" \ + '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"{{ LOGSTASH_CONFIG_YAML }}","ssl": {"certificate": $LOGSTASHCRT,"key": $LOGSTASHKEY,"certificate_authorities":[ $LOGSTASHCA ]}}') + fi fi fi diff --git a/salt/elasticsearch/files/ingest/suricata.common b/salt/elasticsearch/files/ingest/suricata.common index 102b5dac8..7b2dc7eeb 100644 --- a/salt/elasticsearch/files/ingest/suricata.common +++ b/salt/elasticsearch/files/ingest/suricata.common @@ -1,30 +1,155 @@ { - "description" : "suricata.common", - "processors" : [ - { "json": { "field": "message", "target_field": "message2", "ignore_failure": true } }, - { "rename": { "field": "message2.pkt_src", "target_field": "network.packet_source","ignore_failure": true } }, - { "rename": { "field": "message2.proto", "target_field": "network.transport", "ignore_failure": true } }, - { "rename": { "field": "message2.in_iface", "target_field": "observer.ingress.interface.name", "ignore_failure": true } }, - { "rename": { "field": "message2.flow_id", "target_field": "log.id.uid", "ignore_failure": true } }, - { "rename": { "field": "message2.src_ip", "target_field": "source.ip", "ignore_failure": true } }, - { "rename": { "field": "message2.src_port", "target_field": "source.port", "ignore_failure": true } }, - { "rename": { "field": "message2.dest_ip", "target_field": "destination.ip", "ignore_failure": true } }, - { "rename": { "field": "message2.dest_port", "target_field": "destination.port", "ignore_failure": true } }, - { "rename": { "field": "message2.vlan", "target_field": "network.vlan.id", "ignore_failure": true } }, - { "rename": { "field": "message2.community_id", "target_field": "network.community_id", "ignore_missing": true } }, - { "rename": { "field": "message2.xff", "target_field": "xff.ip", "ignore_missing": true } }, - { "set": { "field": "event.dataset", "value": "{{ message2.event_type }}" } }, - { "set": { "field": "observer.name", "value": "{{agent.name}}" } }, - { "set": { "field": "event.ingested", "value": "{{@timestamp}}" } }, - { "date": { "field": "message2.timestamp", "target_field": "@timestamp", "formats": ["ISO8601", "UNIX"], "timezone": "UTC", "ignore_failure": true } }, - { "remove":{ "field": "agent", "ignore_failure": true } }, - {"append":{"field":"related.ip","value":["{{source.ip}}","{{destination.ip}}"],"allow_duplicates":false,"ignore_failure":true}}, - { - "script": { - "source": "boolean isPrivate(def ip) { if (ip == null) return false; int dot1 = ip.indexOf('.'); if (dot1 == -1) return false; int dot2 = ip.indexOf('.', dot1 + 1); if (dot2 == -1) return false; int first = Integer.parseInt(ip.substring(0, dot1)); if (first == 10) return true; if (first == 192 && ip.startsWith('168.', dot1 + 1)) return true; if (first == 172) { int second = Integer.parseInt(ip.substring(dot1 + 1, dot2)); return second >= 16 && second <= 31; } return false; } String[] fields = new String[] {\"source\", \"destination\"}; for (int i = 0; i < fields.length; i++) { def field = fields[i]; def ip = ctx[field]?.ip; if (ip != null) { if (ctx.network == null) ctx.network = new HashMap(); if (isPrivate(ip)) { if (ctx.network.private_ip == null) ctx.network.private_ip = new ArrayList(); if (!ctx.network.private_ip.contains(ip)) ctx.network.private_ip.add(ip); } else { if (ctx.network.public_ip == null) ctx.network.public_ip = new ArrayList(); if (!ctx.network.public_ip.contains(ip)) ctx.network.public_ip.add(ip); } } }", - "ignore_failure": false - } - }, - { "pipeline": { "if": "ctx?.event?.dataset != null", "name": "suricata.{{event.dataset}}" } } - ] -} + "description": "suricata.common", + "processors": [ + { + "json": { + "field": "message", + "target_field": "message2", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.pkt_src", + "target_field": "network.packet_source", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.proto", + "target_field": "network.transport", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.in_iface", + "target_field": "observer.ingress.interface.name", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.flow_id", + "target_field": "log.id.uid", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.src_ip", + "target_field": "source.ip", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.src_port", + "target_field": "source.port", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.dest_ip", + "target_field": "destination.ip", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.dest_port", + "target_field": "destination.port", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.vlan", + "target_field": "network.vlan.id", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.community_id", + "target_field": "network.community_id", + "ignore_missing": true + } + }, + { + "rename": { + "field": "message2.xff", + "target_field": "xff.ip", + "ignore_missing": true + } + }, + { + "set": { + "field": "event.dataset", + "value": "{{ message2.event_type }}" + } + }, + { + "set": { + "field": "observer.name", + "value": "{{agent.name}}" + } + }, + { + "set": { + "field": "event.ingested", + "value": "{{@timestamp}}" + } + }, + { + "date": { + "field": "message2.timestamp", + "target_field": "@timestamp", + "formats": [ + "ISO8601", + "UNIX" + ], + "timezone": "UTC", + "ignore_failure": true + } + }, + { + "remove": { + "field": "agent", + "ignore_failure": true + } + }, + { + "append": { + "field": "related.ip", + "value": [ + "{{source.ip}}", + "{{destination.ip}}" + ], + "allow_duplicates": false, + "ignore_failure": true + } + }, + { + "script": { + "source": "boolean isPrivate(def ip) { if (ip == null) return false; int dot1 = ip.indexOf('.'); if (dot1 == -1) return false; int dot2 = ip.indexOf('.', dot1 + 1); if (dot2 == -1) return false; int first = Integer.parseInt(ip.substring(0, dot1)); if (first == 10) return true; if (first == 192 && ip.startsWith('168.', dot1 + 1)) return true; if (first == 172) { int second = Integer.parseInt(ip.substring(dot1 + 1, dot2)); return second >= 16 && second <= 31; } return false; } String[] fields = new String[] {\"source\", \"destination\"}; for (int i = 0; i < fields.length; i++) { def field = fields[i]; def ip = ctx[field]?.ip; if (ip != null) { if (ctx.network == null) ctx.network = new HashMap(); if (isPrivate(ip)) { if (ctx.network.private_ip == null) ctx.network.private_ip = new ArrayList(); if (!ctx.network.private_ip.contains(ip)) ctx.network.private_ip.add(ip); } else { if (ctx.network.public_ip == null) ctx.network.public_ip = new ArrayList(); if (!ctx.network.public_ip.contains(ip)) ctx.network.public_ip.add(ip); } } }", + "ignore_failure": false + } + }, + { + "rename": { + "field": "message2.capture_file", + "target_field": "suricata.capture_file", + "ignore_missing": true + } + }, + { + "pipeline": { + "if": "ctx?.event?.dataset != null", + "name": "suricata.{{event.dataset}}" + } + } + ] +} \ No newline at end of file diff --git a/salt/elasticsearch/templates/component/ecs/suricata.json b/salt/elasticsearch/templates/component/ecs/suricata.json index 1eb06d266..3f393ff6a 100644 --- a/salt/elasticsearch/templates/component/ecs/suricata.json +++ b/salt/elasticsearch/templates/component/ecs/suricata.json @@ -841,6 +841,10 @@ "type": "long" } } + }, + "capture_file": { + "type": "keyword", + "ignore_above": 1024 } } } diff --git a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume index 2322c3a94..601de643f 100644 --- a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume +++ b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume @@ -45,7 +45,7 @@ used during VM provisioning to add dedicated NSM storage volumes. This command creates and attaches a volume with the following settings: - VM Name: `vm1_sensor` - Volume Size: `500` GB - - Volume Path: `/nsm/libvirt/volumes/vm1_sensor-nsm.img` + - Volume Path: `/nsm/libvirt/volumes/vm1_sensor-nsm-.img` - Device: `/dev/vdb` (virtio-blk) - VM remains stopped after attachment @@ -75,7 +75,8 @@ used during VM provisioning to add dedicated NSM storage volumes. - The script automatically stops the VM if it's running before creating and attaching the volume. - Volumes are created with full pre-allocation for optimal performance. -- Volume files are stored in `/nsm/libvirt/volumes/` with naming pattern `-nsm.img`. +- Volume files are stored in `/nsm/libvirt/volumes/` with naming pattern `-nsm-.img`. +- The epoch timestamp ensures unique volume names and prevents conflicts. - Volumes are attached as `/dev/vdb` using virtio-blk for high performance. - The script checks available disk space before creating the volume. - Ownership is set to `qemu:qemu` with permissions `640`. @@ -142,6 +143,7 @@ import socket import subprocess import pwd import grp +import time import xml.etree.ElementTree as ET from io import StringIO from so_vm_utils import start_vm, stop_vm @@ -242,10 +244,13 @@ def create_volume_file(vm_name, size_gb, logger): Raises: VolumeCreationError: If volume creation fails """ - # Define volume path (directory already created in main()) - volume_path = os.path.join(VOLUME_DIR, f"{vm_name}-nsm.img") + # Generate epoch timestamp for unique volume naming + epoch_timestamp = int(time.time()) - # Check if volume already exists + # Define volume path with epoch timestamp for uniqueness + volume_path = os.path.join(VOLUME_DIR, f"{vm_name}-nsm-{epoch_timestamp}.img") + + # Check if volume already exists (shouldn't be possible with timestamp) if os.path.exists(volume_path): logger.error(f"VOLUME: Volume already exists: {volume_path}") raise VolumeCreationError(f"Volume already exists: {volume_path}") diff --git a/salt/salt/engines/master/virtual_node_manager.py b/salt/salt/engines/master/virtual_node_manager.py index 6d88bd688..ccc063d64 100644 --- a/salt/salt/engines/master/virtual_node_manager.py +++ b/salt/salt/engines/master/virtual_node_manager.py @@ -727,7 +727,8 @@ def check_hypervisor_disk_space(hypervisor: str, size_gb: int) -> Tuple[bool, Op result = local.cmd( hypervisor_minion, 'cmd.run', - ["df -BG /nsm/libvirt/volumes | tail -1 | awk '{print $4}' | sed 's/G//'"] + ["df -BG /nsm/libvirt/volumes | tail -1 | awk '{print $4}' | sed 's/G//'"], + kwarg={'python_shell': True} ) if not result or hypervisor_minion not in result: diff --git a/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja b/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja index ac2fd6fea..f23fdb5d9 100644 --- a/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja +++ b/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja @@ -43,10 +43,26 @@ No Virtual Machines Found {%- endif %} -{%- else %} +{%- elif baseDomainStatus == 'ImageDownloadStart' %} +#### INFO + +Base domain image download started. +{%- elif baseDomainStatus == 'ImageDownloadFailed' %} +#### ERROR + +Base domain image download failed. Please check the salt-master log for details and verify network connectivity. +{%- elif baseDomainStatus == 'SSHKeySetupFailed' %} +#### ERROR + +SSH key setup failed. Please check the salt-master log for details. +{%- elif baseDomainStatus == 'SetupFailed' %} #### WARNING -Base domain has not been initialized. +Setup failed. Please check the salt-master log for details. +{%- elif baseDomainStatus == 'PreInit' %} +#### WARNING + +Base domain has not been initialized. Waiting for hypervisor to highstate. {%- endif %} {%- endmacro -%} diff --git a/salt/zeek/files/config.zeek.ja4 b/salt/zeek/files/config.zeek.ja4 index e3dd08a48..3d0035481 100644 --- a/salt/zeek/files/config.zeek.ja4 +++ b/salt/zeek/files/config.zeek.ja4 @@ -11,6 +11,8 @@ export { option JA4S_enabled: bool = F; option JA4S_raw: bool = F; + option JA4D_enabled: bool = F; + option JA4H_enabled: bool = F; option JA4H_raw: bool = F;