mirror of
https://github.com/Security-Onion-Solutions/securityonion.git
synced 2025-12-06 09:12:45 +01:00
fix failed or hung qcow2 image download
This commit is contained in:
@@ -172,7 +172,15 @@ MANAGER_HOSTNAME = socket.gethostname()
|
|||||||
|
|
||||||
def _download_image():
|
def _download_image():
|
||||||
"""
|
"""
|
||||||
Download and validate the Oracle Linux KVM image.
|
Download and validate the Oracle Linux KVM image with retry logic and progress monitoring.
|
||||||
|
|
||||||
|
Features:
|
||||||
|
- Detects stalled downloads (no progress for 30 seconds)
|
||||||
|
- Retries up to 3 times on failure
|
||||||
|
- Connection timeout of 30 seconds
|
||||||
|
- Read timeout of 60 seconds
|
||||||
|
- Cleans up partial downloads on failure
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
bool: True if successful or file exists with valid checksum, False on error
|
bool: True if successful or file exists with valid checksum, False on error
|
||||||
"""
|
"""
|
||||||
@@ -186,25 +194,53 @@ def _download_image():
|
|||||||
|
|
||||||
log.info("Starting image download process")
|
log.info("Starting image download process")
|
||||||
|
|
||||||
|
# Retry configuration
|
||||||
|
max_attempts = 3
|
||||||
|
stall_timeout = 30 # seconds without progress before considering download stalled
|
||||||
|
connection_timeout = 30 # seconds to establish connection
|
||||||
|
read_timeout = 60 # seconds to wait for data chunks
|
||||||
|
|
||||||
|
for attempt in range(1, max_attempts + 1):
|
||||||
|
log.info("Download attempt %d of %d", attempt, max_attempts)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Download file
|
# Download file with timeouts
|
||||||
log.info("Downloading Oracle Linux KVM image from %s to %s", IMAGE_URL, IMAGE_PATH)
|
log.info("Downloading Oracle Linux KVM image from %s to %s", IMAGE_URL, IMAGE_PATH)
|
||||||
response = requests.get(IMAGE_URL, stream=True)
|
response = requests.get(
|
||||||
|
IMAGE_URL,
|
||||||
|
stream=True,
|
||||||
|
timeout=(connection_timeout, read_timeout)
|
||||||
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
# Get total file size for progress tracking
|
# Get total file size for progress tracking
|
||||||
total_size = int(response.headers.get('content-length', 0))
|
total_size = int(response.headers.get('content-length', 0))
|
||||||
downloaded_size = 0
|
downloaded_size = 0
|
||||||
last_log_time = 0
|
last_log_time = 0
|
||||||
|
last_progress_time = time.time()
|
||||||
|
last_downloaded_size = 0
|
||||||
|
|
||||||
# Save file with progress logging
|
# Save file with progress logging and stall detection
|
||||||
with salt.utils.files.fopen(IMAGE_PATH, 'wb') as f:
|
with salt.utils.files.fopen(IMAGE_PATH, 'wb') as f:
|
||||||
for chunk in response.iter_content(chunk_size=8192):
|
for chunk in response.iter_content(chunk_size=8192):
|
||||||
|
if chunk: # filter out keep-alive new chunks
|
||||||
f.write(chunk)
|
f.write(chunk)
|
||||||
downloaded_size += len(chunk)
|
downloaded_size += len(chunk)
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
# Check for stalled download
|
||||||
|
if downloaded_size > last_downloaded_size:
|
||||||
|
# Progress made, reset stall timer
|
||||||
|
last_progress_time = current_time
|
||||||
|
last_downloaded_size = downloaded_size
|
||||||
|
elif current_time - last_progress_time > stall_timeout:
|
||||||
|
# No progress for stall_timeout seconds
|
||||||
|
raise Exception(
|
||||||
|
f"Download stalled: no progress for {stall_timeout} seconds "
|
||||||
|
f"at {downloaded_size}/{total_size} bytes"
|
||||||
|
)
|
||||||
|
|
||||||
# Log progress every second
|
# Log progress every second
|
||||||
current_time = time.time()
|
|
||||||
if current_time - last_log_time >= 1:
|
if current_time - last_log_time >= 1:
|
||||||
progress = (downloaded_size / total_size) * 100 if total_size > 0 else 0
|
progress = (downloaded_size / total_size) * 100 if total_size > 0 else 0
|
||||||
log.info("Progress - %.1f%% (%d/%d bytes)",
|
log.info("Progress - %.1f%% (%d/%d bytes)",
|
||||||
@@ -212,17 +248,47 @@ def _download_image():
|
|||||||
last_log_time = current_time
|
last_log_time = current_time
|
||||||
|
|
||||||
# Validate downloaded file
|
# Validate downloaded file
|
||||||
|
log.info("Download complete, validating checksum...")
|
||||||
if not _validate_image_checksum(IMAGE_PATH, IMAGE_SHA256):
|
if not _validate_image_checksum(IMAGE_PATH, IMAGE_SHA256):
|
||||||
|
log.error("Checksum validation failed on attempt %d", attempt)
|
||||||
os.unlink(IMAGE_PATH)
|
os.unlink(IMAGE_PATH)
|
||||||
|
if attempt < max_attempts:
|
||||||
|
log.info("Will retry download...")
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
log.error("All download attempts failed due to checksum mismatch")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
log.info("Successfully downloaded and validated Oracle Linux KVM image")
|
log.info("Successfully downloaded and validated Oracle Linux KVM image")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except requests.exceptions.Timeout as e:
|
||||||
log.error("Error downloading hypervisor image: %s", str(e))
|
log.error("Download attempt %d failed: Timeout - %s", attempt, str(e))
|
||||||
if os.path.exists(IMAGE_PATH):
|
if os.path.exists(IMAGE_PATH):
|
||||||
os.unlink(IMAGE_PATH)
|
os.unlink(IMAGE_PATH)
|
||||||
|
if attempt < max_attempts:
|
||||||
|
log.info("Will retry download...")
|
||||||
|
else:
|
||||||
|
log.error("All download attempts failed due to timeout")
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
log.error("Download attempt %d failed: Network error - %s", attempt, str(e))
|
||||||
|
if os.path.exists(IMAGE_PATH):
|
||||||
|
os.unlink(IMAGE_PATH)
|
||||||
|
if attempt < max_attempts:
|
||||||
|
log.info("Will retry download...")
|
||||||
|
else:
|
||||||
|
log.error("All download attempts failed due to network errors")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log.error("Download attempt %d failed: %s", attempt, str(e))
|
||||||
|
if os.path.exists(IMAGE_PATH):
|
||||||
|
os.unlink(IMAGE_PATH)
|
||||||
|
if attempt < max_attempts:
|
||||||
|
log.info("Will retry download...")
|
||||||
|
else:
|
||||||
|
log.error("All download attempts failed")
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _check_ssh_keys_exist():
|
def _check_ssh_keys_exist():
|
||||||
|
|||||||
Reference in New Issue
Block a user