#!/bin/bash . /usr/sbin/so-common RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' BOLD='\033[1;37m' NC='\033[0m' log_title() { if [ $1 == "LOG" ]; then echo -e "\n${BOLD}================ $2 ================${NC}\n" elif [ $1 == "OK" ]; then echo -e "${GREEN} $2 ${NC}" elif [ $1 == "WARN" ]; then echo -e "${YELLOW} $2 ${NC}" elif [ $1 == "ERROR" ]; then echo -e "${RED} $2 ${NC}" fi } health_report() { if ! health_report_output=$(so-elasticsearch-query _health_report?format=json --fail 2>/dev/null); then log_title "ERROR" "Failed to retrieve health report from Elasticsearch" return 1 fi non_green_count=$(echo "$health_report_output" | jq '[.indicators | to_entries[] | select(.value.status != "green")] | length') if [ "$non_green_count" -gt 0 ]; then echo "$health_report_output" | jq -r '.indicators | to_entries[] | select(.value.status != "green") | .key' | while read -r indicator_name; do indicator=$(echo "$health_report_output" | jq -r ".indicators.\"$indicator_name\"") status=$(echo "$indicator" | jq -r '.status') symptom=$(echo "$indicator" | jq -r '.symptom // "No symptom available"') # reormat indicator name display_name=$(echo "$indicator_name" | tr '_' ' ' | sed 's/\b\(.\)/\u\1/g') if [ "$status" = "yellow" ]; then log_title "WARN" "$display_name: $symptom" else log_title "ERROR" "$display_name: $symptom" fi # diagnosis if available echo "$indicator" | jq -c '.diagnosis[]? // empty' | while read -r diagnosis; do cause=$(echo "$diagnosis" | jq -r '.cause // "Unknown"') action=$(echo "$diagnosis" | jq -r '.action // "No action specified"') echo -e " ${BOLD}Cause:${NC} $cause\n" echo -e " ${BOLD}Action:${NC} $action\n" # Check for affected indices affected_indices=$(echo "$diagnosis" | jq -r '.affected_resources.indices[]? // empty') if [ -n "$affected_indices" ]; then echo -e " ${BOLD}Affected indices:${NC}" total_indices=$(echo "$affected_indices" | wc -l) echo "$affected_indices" | head -10 | while read -r index; do echo " - $index" done if [ "$total_indices" -gt 10 ]; then remaining=$((total_indices - 10)) echo " ... and $remaining more indices (truncated for readability)" fi fi echo done done else log_title "OK" "All health indicators are green" fi } elasticsearch_status() { log_title "LOG" "Elasticsearch Status" if so-elasticsearch-query / --fail --output /dev/null; then health_report else log_title "ERROR" "Elasticsearch API is not accessible" so-status log_title "ERROR" "Make sure Elasticsearch is running. Addtionally, check for startup errors in /opt/so/log/elasticsearch/securityonion.log${NC}\n" exit 1 fi } indices_by_age() { log_title "LOG" "Indices by Creation Date - Size > 1KB" log_title "WARN" "Since high/flood watermark has been reached consider updating ILM policies.\n" if ! indices_output=$(so-elasticsearch-query '_cat/indices?v&s=creation.date:asc&h=creation.date.string,index,status,health,docs.count,pri.store.size&bytes=b&format=json' --fail 2>/dev/null); then log_title "ERROR" "Failed to retrieve indices list from Elasticsearch" return 1 fi # Filter for indices with size > 1KB (1024 bytes) and format output echo -e "${BOLD}Creation Date Name Size${NC}" echo -e "${BOLD}--------------------------------------------------------------------------------------------------------------${NC}" # Create list of indices excluding .internal, so-detection*, so-case* echo "$indices_output" | jq -r '.[] | select((."pri.store.size" | tonumber) > 1024) | select(.index | (startswith(".internal") or startswith("so-detection") or startswith("so-case")) | not ) | "\(."creation.date.string") | \(.index) | \(."pri.store.size")"' | while IFS='|' read -r creation_date index_name size_bytes; do # Convert bytes to GB / MB if [ "$size_bytes" -gt 1073741824 ]; then size_human=$(echo "scale=2; $size_bytes / 1073741824" | bc)GB else size_human=$(echo "scale=2; $size_bytes / 1048576" | bc)MB fi creation_date=$(date -d "$creation_date" '+%Y-%m-%dT%H:%MZ' ) # Format output with spacing printf "%-19s %-76s %10s\n" "$creation_date" "$index_name" "$size_human" done } watermark_settings() { watermark_path=".defaults.cluster.routing.allocation.disk.watermark" if ! watermark_output=$(so-elasticsearch-query _cluster/settings?include_defaults=true\&filter_path=*.cluster.routing.allocation.disk.* --fail 2>/dev/null); then log_title "ERROR" "Failed to retrieve watermark settings from Elasticsearch" return 1 fi if ! disk_allocation_output=$(so-elasticsearch-query _cat/nodes?v\&h=name,ip,disk.used_percent,disk.avail,disk.total\&format=json --fail 2>/dev/null); then log_title "ERROR" "Failed to retrieve disk allocation data from Elasticsearch" return 1 fi flood=$(echo $watermark_output | jq -r "$watermark_path.flood_stage" ) high=$(echo $watermark_output | jq -r "$watermark_path.high" ) low=$(echo $watermark_output | jq -r "$watermark_path.low" ) # Strip percentage signs for comparison flood_num=${flood%\%} high_num=${high%\%} low_num=${low%\%} # Check each nodes disk usage log_title "LOG" "Disk Usage Check" echo -e "${BOLD}LOW:${GREEN}$low${NC}${BOLD} HIGH:${YELLOW}${high}${NC}${BOLD} FLOOD:${RED}${flood}${NC}\n" echo "$disk_allocation_output" | jq -r '.[] | "\(.name)|\(.["disk.used_percent"])"' | while IFS='|' read -r node_name disk_used; do disk_used_num=$(echo $disk_used | bc) if (( $(echo "$disk_used_num >= $flood_num" | bc -l) )); then log_title "ERROR" "$node_name is at or above the flood watermark ($flood)! Disk usage: ${disk_used}%" touch /tmp/watermark_reached elif (( $(echo "$disk_used_num >= $high_num" | bc -l) )); then log_title "ERROR" "$node_name is at or above the high watermark ($high)! Disk usage: ${disk_used}%" touch /tmp/watermark_reached else log_title "OK" "$node_name disk usage: ${disk_used}%" fi done # Check if we need to show indices by age if [ -f /tmp/watermark_reached ]; then indices_by_age rm -f /tmp/watermark_reached fi } unassigned_shards() { if ! unassigned_shards_output=$(so-elasticsearch-query _cat/shards?v\&h=index,shard,prirep,state,unassigned.reason,unassigned.details\&s=state\&format=json --fail 2>/dev/null); then log_title "ERROR" "Failed to retrieve shard data from Elasticsearch" return 1 fi log_title "LOG" "Unassigned Shards Check" # Check if there are any UNASSIGNED shards unassigned_count=$(echo "$unassigned_shards_output" | jq '[.[] | select(.state == "UNASSIGNED")] | length') if [ "$unassigned_count" -gt 0 ]; then echo "$unassigned_shards_output" | jq -r '.[] | select(.state == "UNASSIGNED") | "\(.index)|\(.shard)|\(.prirep)|\(."unassigned.reason")"' | while IFS='|' read -r index shard prirep reason; do if [ "$prirep" = "r" ]; then log_title "WARN" "Replica shard for index $index is unassigned. Reason: $reason" elif [ "$prirep" = "p" ]; then log_title "ERROR" "Primary shard for index $index is unassigned. Reason: $reason" fi done else log_title "OK" "All shards are assigned" fi } main() { elasticsearch_status watermark_settings unassigned_shards } main