mirror of
https://github.com/Security-Onion-Solutions/securityonion.git
synced 2025-12-06 17:22:49 +01:00
195 lines
7.5 KiB
Bash
195 lines
7.5 KiB
Bash
#!/bin/bash
|
|
|
|
. /usr/sbin/so-common
|
|
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
BOLD='\033[1;37m'
|
|
NC='\033[0m'
|
|
|
|
log_title() {
|
|
if [ $1 == "LOG" ]; then
|
|
echo -e "\n${BOLD}================ $2 ================${NC}\n"
|
|
elif [ $1 == "OK" ]; then
|
|
echo -e "${GREEN} $2 ${NC}"
|
|
elif [ $1 == "WARN" ]; then
|
|
echo -e "${YELLOW} $2 ${NC}"
|
|
elif [ $1 == "ERROR" ]; then
|
|
echo -e "${RED} $2 ${NC}"
|
|
fi
|
|
}
|
|
|
|
health_report() {
|
|
if ! health_report_output=$(so-elasticsearch-query _health_report?format=json --fail 2>/dev/null); then
|
|
log_title "ERROR" "Failed to retrieve health report from Elasticsearch"
|
|
return 1
|
|
fi
|
|
non_green_count=$(echo "$health_report_output" | jq '[.indicators | to_entries[] | select(.value.status != "green")] | length')
|
|
|
|
if [ "$non_green_count" -gt 0 ]; then
|
|
echo "$health_report_output" | jq -r '.indicators | to_entries[] | select(.value.status != "green") | .key' | while read -r indicator_name; do
|
|
indicator=$(echo "$health_report_output" | jq -r ".indicators.\"$indicator_name\"")
|
|
status=$(echo "$indicator" | jq -r '.status')
|
|
symptom=$(echo "$indicator" | jq -r '.symptom // "No symptom available"')
|
|
|
|
# reormat indicator name
|
|
display_name=$(echo "$indicator_name" | tr '_' ' ' | sed 's/\b\(.\)/\u\1/g')
|
|
|
|
if [ "$status" = "yellow" ]; then
|
|
log_title "WARN" "$display_name: $symptom"
|
|
else
|
|
log_title "ERROR" "$display_name: $symptom"
|
|
fi
|
|
|
|
# diagnosis if available
|
|
echo "$indicator" | jq -c '.diagnosis[]? // empty' | while read -r diagnosis; do
|
|
cause=$(echo "$diagnosis" | jq -r '.cause // "Unknown"')
|
|
action=$(echo "$diagnosis" | jq -r '.action // "No action specified"')
|
|
|
|
echo -e " ${BOLD}Cause:${NC} $cause\n"
|
|
echo -e " ${BOLD}Action:${NC} $action\n"
|
|
|
|
# Check for affected indices
|
|
affected_indices=$(echo "$diagnosis" | jq -r '.affected_resources.indices[]? // empty')
|
|
if [ -n "$affected_indices" ]; then
|
|
echo -e " ${BOLD}Affected indices:${NC}"
|
|
total_indices=$(echo "$affected_indices" | wc -l)
|
|
echo "$affected_indices" | head -10 | while read -r index; do
|
|
echo " - $index"
|
|
done
|
|
if [ "$total_indices" -gt 10 ]; then
|
|
remaining=$((total_indices - 10))
|
|
echo " ... and $remaining more indices (truncated for readability)"
|
|
fi
|
|
fi
|
|
echo
|
|
done
|
|
done
|
|
else
|
|
log_title "OK" "All health indicators are green"
|
|
fi
|
|
}
|
|
|
|
elasticsearch_status() {
|
|
log_title "LOG" "Elasticsearch Status"
|
|
if so-elasticsearch-query / --fail --output /dev/null; then
|
|
health_report
|
|
else
|
|
log_title "ERROR" "Elasticsearch API is not accessible"
|
|
so-status
|
|
log_title "ERROR" "Make sure Elasticsearch is running. Addtionally, check for startup errors in /opt/so/log/elasticsearch/securityonion.log${NC}\n"
|
|
|
|
exit 1
|
|
fi
|
|
|
|
}
|
|
|
|
indices_by_age() {
|
|
log_title "LOG" "Indices by Creation Date - Size > 1KB"
|
|
log_title "WARN" "Since high/flood watermark has been reached consider updating ILM policies.\n"
|
|
if ! indices_output=$(so-elasticsearch-query '_cat/indices?v&s=creation.date:asc&h=creation.date.string,index,status,health,docs.count,pri.store.size&bytes=b&format=json' --fail 2>/dev/null); then
|
|
log_title "ERROR" "Failed to retrieve indices list from Elasticsearch"
|
|
return 1
|
|
fi
|
|
|
|
# Filter for indices with size > 1KB (1024 bytes) and format output
|
|
echo -e "${BOLD}Creation Date Name Size${NC}"
|
|
echo -e "${BOLD}--------------------------------------------------------------------------------------------------------------${NC}"
|
|
|
|
# Create list of indices excluding .internal, so-detection*, so-case*
|
|
echo "$indices_output" | jq -r '.[] | select((."pri.store.size" | tonumber) > 1024) | select(.index | (startswith(".internal") or startswith("so-detection") or startswith("so-case")) | not ) | "\(."creation.date.string") | \(.index) | \(."pri.store.size")"' | while IFS='|' read -r creation_date index_name size_bytes; do
|
|
# Convert bytes to GB / MB
|
|
if [ "$size_bytes" -gt 1073741824 ]; then
|
|
size_human=$(echo "scale=2; $size_bytes / 1073741824" | bc)GB
|
|
else
|
|
size_human=$(echo "scale=2; $size_bytes / 1048576" | bc)MB
|
|
fi
|
|
|
|
creation_date=$(date -d "$creation_date" '+%Y-%m-%dT%H:%MZ' )
|
|
|
|
# Format output with spacing
|
|
printf "%-19s %-76s %10s\n" "$creation_date" "$index_name" "$size_human"
|
|
done
|
|
}
|
|
|
|
watermark_settings() {
|
|
watermark_path=".defaults.cluster.routing.allocation.disk.watermark"
|
|
if ! watermark_output=$(so-elasticsearch-query _cluster/settings?include_defaults=true\&filter_path=*.cluster.routing.allocation.disk.* --fail 2>/dev/null); then
|
|
log_title "ERROR" "Failed to retrieve watermark settings from Elasticsearch"
|
|
return 1
|
|
fi
|
|
|
|
if ! disk_allocation_output=$(so-elasticsearch-query _cat/nodes?v\&h=name,ip,disk.used_percent,disk.avail,disk.total\&format=json --fail 2>/dev/null); then
|
|
log_title "ERROR" "Failed to retrieve disk allocation data from Elasticsearch"
|
|
return 1
|
|
fi
|
|
|
|
flood=$(echo $watermark_output | jq -r "$watermark_path.flood_stage" )
|
|
high=$(echo $watermark_output | jq -r "$watermark_path.high" )
|
|
low=$(echo $watermark_output | jq -r "$watermark_path.low" )
|
|
|
|
# Strip percentage signs for comparison
|
|
flood_num=${flood%\%}
|
|
high_num=${high%\%}
|
|
low_num=${low%\%}
|
|
|
|
# Check each nodes disk usage
|
|
log_title "LOG" "Disk Usage Check"
|
|
echo -e "${BOLD}LOW:${GREEN}$low${NC}${BOLD} HIGH:${YELLOW}${high}${NC}${BOLD} FLOOD:${RED}${flood}${NC}\n"
|
|
|
|
echo "$disk_allocation_output" | jq -r '.[] | "\(.name)|\(.["disk.used_percent"])"' | while IFS='|' read -r node_name disk_used; do
|
|
disk_used_num=$(echo $disk_used | bc)
|
|
|
|
if (( $(echo "$disk_used_num >= $flood_num" | bc -l) )); then
|
|
log_title "ERROR" "$node_name is at or above the flood watermark ($flood)! Disk usage: ${disk_used}%"
|
|
touch /tmp/watermark_reached
|
|
elif (( $(echo "$disk_used_num >= $high_num" | bc -l) )); then
|
|
log_title "ERROR" "$node_name is at or above the high watermark ($high)! Disk usage: ${disk_used}%"
|
|
touch /tmp/watermark_reached
|
|
else
|
|
log_title "OK" "$node_name disk usage: ${disk_used}%"
|
|
fi
|
|
done
|
|
|
|
# Check if we need to show indices by age
|
|
if [ -f /tmp/watermark_reached ]; then
|
|
indices_by_age
|
|
rm -f /tmp/watermark_reached
|
|
fi
|
|
|
|
}
|
|
|
|
unassigned_shards() {
|
|
|
|
if ! unassigned_shards_output=$(so-elasticsearch-query _cat/shards?v\&h=index,shard,prirep,state,unassigned.reason,unassigned.details\&s=state\&format=json --fail 2>/dev/null); then
|
|
log_title "ERROR" "Failed to retrieve shard data from Elasticsearch"
|
|
return 1
|
|
fi
|
|
|
|
log_title "LOG" "Unassigned Shards Check"
|
|
# Check if there are any UNASSIGNED shards
|
|
unassigned_count=$(echo "$unassigned_shards_output" | jq '[.[] | select(.state == "UNASSIGNED")] | length')
|
|
|
|
if [ "$unassigned_count" -gt 0 ]; then
|
|
echo "$unassigned_shards_output" | jq -r '.[] | select(.state == "UNASSIGNED") | "\(.index)|\(.shard)|\(.prirep)|\(."unassigned.reason")"' | while IFS='|' read -r index shard prirep reason; do
|
|
if [ "$prirep" = "r" ]; then
|
|
log_title "WARN" "Replica shard for index $index is unassigned. Reason: $reason"
|
|
elif [ "$prirep" = "p" ]; then
|
|
log_title "ERROR" "Primary shard for index $index is unassigned. Reason: $reason"
|
|
fi
|
|
done
|
|
else
|
|
log_title "OK" "All shards are assigned"
|
|
fi
|
|
}
|
|
|
|
main() {
|
|
elasticsearch_status
|
|
watermark_settings
|
|
unassigned_shards
|
|
}
|
|
|
|
main
|