#!/bin/bash
# ============================================
# System Health Check - Schnelluebersicht
# Prueft Disk, RAM, CPU, Services, Updates
# ============================================
# Aufruf:  bash system-health.sh
# Quelle:  sgit.space/downloads
# ============================================

set -euo pipefail

# --- Schwellwerte ---
DISK_WARN=80       # Prozent
RAM_WARN=85        # Prozent
LOAD_WARN_FACTOR=2 # x Anzahl Cores

# --- Farben ---
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m'

WARNINGS=0

warn() { echo -e "  ${RED}WARNUNG${NC}: $1"; WARNINGS=$((WARNINGS + 1)); }
ok() { echo -e "  ${GREEN}OK${NC}: $1"; }
info() { echo -e "  ${CYAN}INFO${NC}: $1"; }

echo -e "\n${BOLD}===== System Health Check =====${NC}"
echo -e "Host: $(hostname) | $(date '+%Y-%m-%d %H:%M')\n"

# === Betriebssystem ===
echo -e "${BOLD}--- System ---${NC}"
if [ -f /etc/os-release ]; then
    . /etc/os-release
    info "$PRETTY_NAME"
fi
UPTIME=$(uptime -p 2>/dev/null || uptime)
info "Uptime: $UPTIME"

# === CPU / Load ===
echo -e "\n${BOLD}--- CPU & Load ---${NC}"
CORES=$(nproc)
LOAD_1=$(awk '{print $1}' /proc/loadavg)
LOAD_5=$(awk '{print $2}' /proc/loadavg)
LOAD_THRESHOLD=$(echo "$CORES * $LOAD_WARN_FACTOR" | bc 2>/dev/null || echo "$((CORES * LOAD_WARN_FACTOR))")

info "$CORES Cores | Load: $LOAD_1 (1m) / $LOAD_5 (5m)"

if [ "$(echo "$LOAD_5 > $LOAD_THRESHOLD" | bc 2>/dev/null || echo 0)" -eq 1 ]; then
    warn "Load 5m ($LOAD_5) ueber Schwellwert ($LOAD_THRESHOLD)"
fi

# === RAM ===
echo -e "\n${BOLD}--- RAM ---${NC}"
RAM_TOTAL=$(free -m | awk '/Mem:/ {print $2}')
RAM_USED=$(free -m | awk '/Mem:/ {print $3}')
RAM_PCT=$((RAM_USED * 100 / RAM_TOTAL))
SWAP_TOTAL=$(free -m | awk '/Swap:/ {print $2}')
SWAP_USED=$(free -m | awk '/Swap:/ {print $3}')

if [ "$RAM_PCT" -gt "$RAM_WARN" ]; then
    warn "RAM: ${RAM_USED}MB / ${RAM_TOTAL}MB (${RAM_PCT}%)"
else
    ok "RAM: ${RAM_USED}MB / ${RAM_TOTAL}MB (${RAM_PCT}%)"
fi

if [ "$SWAP_TOTAL" -gt 0 ] && [ "$SWAP_USED" -gt 0 ]; then
    SWAP_PCT=$((SWAP_USED * 100 / SWAP_TOTAL))
    if [ "$SWAP_PCT" -gt 50 ]; then
        warn "Swap: ${SWAP_USED}MB / ${SWAP_TOTAL}MB (${SWAP_PCT}%)"
    else
        info "Swap: ${SWAP_USED}MB / ${SWAP_TOTAL}MB (${SWAP_PCT}%)"
    fi
fi

# === Disk ===
echo -e "\n${BOLD}--- Disk ---${NC}"
df -h --output=target,size,used,avail,pcent -x tmpfs -x devtmpfs -x squashfs 2>/dev/null | while IFS= read -r line; do
    if echo "$line" | grep -qE '[0-9]+%'; then
        PCT=$(echo "$line" | grep -oE '[0-9]+%' | tr -d '%')
        MOUNT=$(echo "$line" | awk '{print $1}')
        if [ "$PCT" -gt "$DISK_WARN" ]; then
            echo -e "  ${RED}WARNUNG${NC}: $line"
        else
            echo -e "  ${GREEN}OK${NC}: $line"
        fi
    else
        echo -e "  $line"
    fi
done

# === Docker ===
echo -e "\n${BOLD}--- Docker ---${NC}"
if command -v docker &> /dev/null; then
    RUNNING=$(docker ps -q 2>/dev/null | wc -l)
    STOPPED=$(docker ps -aq --filter status=exited 2>/dev/null | wc -l)
    IMAGES=$(docker images -q 2>/dev/null | wc -l)
    ok "Docker: $RUNNING laufend, $STOPPED gestoppt, $IMAGES Images"

    # Unhealthy Container
    UNHEALTHY=$(docker ps --filter health=unhealthy --format "{{.Names}}" 2>/dev/null)
    if [ -n "$UNHEALTHY" ]; then
        warn "Unhealthy Container: $UNHEALTHY"
    fi

    # Docker Disk Usage
    DOCKER_SIZE=$(docker system df --format "{{.Size}}" 2>/dev/null | head -1)
    DOCKER_RECLAIM=$(docker system df --format "{{.Reclaimable}}" 2>/dev/null | head -1)
    info "Docker Disk: $DOCKER_SIZE (rueckgewinnbar: $DOCKER_RECLAIM)"
else
    info "Docker nicht installiert"
fi

# === Systemd Services ===
echo -e "\n${BOLD}--- Fehlgeschlagene Services ---${NC}"
FAILED_SERVICES=$(systemctl --failed --no-legend --no-pager 2>/dev/null | wc -l)
if [ "$FAILED_SERVICES" -gt 0 ]; then
    warn "$FAILED_SERVICES fehlgeschlagene Service(s):"
    systemctl --failed --no-legend --no-pager 2>/dev/null | while read -r line; do
        echo -e "    $line"
    done
else
    ok "Keine fehlgeschlagenen Services"
fi

# === Updates ===
echo -e "\n${BOLD}--- Updates ---${NC}"
if command -v apt &> /dev/null; then
    # Nicht apt-get update ausfuehren (dauert zu lang), nur Cache pruefen
    UPDATES=$(apt list --upgradable 2>/dev/null | grep -c upgradable || echo 0)
    if [ "$UPDATES" -gt 10 ]; then
        warn "$UPDATES Updates ausstehend"
    elif [ "$UPDATES" -gt 0 ]; then
        info "$UPDATES Updates ausstehend"
    else
        ok "System aktuell"
    fi

    # Security Updates
    SEC_UPDATES=$(apt list --upgradable 2>/dev/null | grep -c security || echo 0)
    if [ "$SEC_UPDATES" -gt 0 ]; then
        warn "$SEC_UPDATES Security-Updates!"
    fi
fi

# === Zusammenfassung ===
echo -e "\n${BOLD}===== Ergebnis =====${NC}"
if [ "$WARNINGS" -gt 0 ]; then
    echo -e "${RED}$WARNINGS Warnung(en) gefunden!${NC}"
    exit 1
else
    echo -e "${GREEN}Alles OK.${NC}"
    exit 0
fi
