diff --git a/HorizonBench.sh b/HorizonBench.sh index ca11e97..7906187 100644 --- a/HorizonBench.sh +++ b/HorizonBench.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # ============================================================================= -# mtu_test.sh — MTU diagnostic & load test (AlmaLinux / Debian) v2.1 +# HorizonBench.sh — MTU diagnostic & load test (AlmaLinux / Debian) v2.1 # # READ-ONLY / NON-DESTRUCTIVE: This script makes NO changes to the system. # It reads kernel state, sends ICMP/TCP probes, and writes a log to /tmp. @@ -14,16 +14,16 @@ # - Reports retransmits + RX/TX error deltas # - Falls back to flood-ping if iperf3 is not installed # -# Usage: sudo ./mtu_test.sh [TARGET_IP] [INTERFACE] [--no-load] +# Usage: sudo ./HorizonBench.sh [TARGET_IP] [INTERFACE] [--no-load] # # TARGET_IP IP or hostname to probe (default: 8.8.8.8) # INTERFACE Network interface to inspect (default: auto-detected) # --no-load Skip the load test entirely (zero extra traffic) # # Examples: -# sudo ./mtu_test.sh -# sudo ./mtu_test.sh 1.2.3.4 eth0 -# sudo ./mtu_test.sh 1.2.3.4 eth0 --no-load +# sudo ./HorizonBench.sh +# sudo ./HorizonBench.sh 1.2.3.4 eth0 +# sudo ./HorizonBench.sh 1.2.3.4 eth0 --no-load # ============================================================================= # Do NOT use set -e / set -euo pipefail — this is a diagnostic script and @@ -80,7 +80,7 @@ for arg in "$@"; do done # ── Runtime state ───────────────────────────────────────────────────────────── -LOG_FILE="/tmp/mtu_test_$(date +%Y%m%d_%H%M%S).log" +LOG_FILE="/tmp/horizonbench_$(date +%Y%m%d_%H%M%S).log" LOAD_DURATION=15 LOAD_PARALLEL=4 TMPDIR_PINGS="/tmp/mtu_pings_$$" @@ -502,6 +502,7 @@ section_interface() { local iface_mtu iface_mtu=$(safe_read ip link show "$IFACE" | awk '/mtu/{for(i=1;i<=NF;i++) if($i=="mtu") print $(i+1)}') + RPT_IFACE_MTU="$iface_mtu" if [[ -z "$iface_mtu" ]]; then fail "Could not read MTU for '$IFACE' — does it exist?" @@ -546,6 +547,7 @@ section_interface() { done local pmtu_probe; pmtu_probe=$(safe_read sysctl -n net.ipv4.tcp_mtu_probing) + RPT_PMTUD_PROBE="${pmtu_probe:-0}" if [[ "${pmtu_probe:-0}" -ge 1 ]]; then pass "tcp_mtu_probing=${pmtu_probe} (PMTU discovery active)" else @@ -591,6 +593,7 @@ section_ping_mtu() { log "" info "Largest successful ICMP size (step-down): ${BLD}${max_ok} bytes${RST}" + RPT_PATH_MTU="$max_ok" local iface_mtu iface_mtu=$(safe_read ip link show "$IFACE" | awk '/mtu/{for(i=1;i<=NF;i++) if($i=="mtu") print $(i+1)}') @@ -657,6 +660,7 @@ section_pmtu_bisect() { fi info "Exact path MTU: ${BLD}${best} bytes${RST}" + RPT_EXACT_MTU="$best" local baseline=$(( EXPECTED_MTU > 0 ? EXPECTED_MTU : 1500 )) local tolerance=10 @@ -712,6 +716,7 @@ section_tcp_mss() { local dominant_mss dominant_mss=$(echo "$mss_data" | awk -F: '{print $2}' | sort | uniq -c | sort -rn | awk 'NR==1{print $2}') + RPT_DOMINANT_MSS="$dominant_mss" log "" # Expected MSS = MTU - 40 (20 IP + 20 TCP headers) local expected_mss=$(( EXPECTED_MTU > 0 ? EXPECTED_MTU - 40 : 1460 )) @@ -758,7 +763,9 @@ section_tcp_mss() { if [[ -n "$tcpmss_rules" ]]; then echo "$tcpmss_rules" | sed 's/^/ /' | tee -a "$LOG_FILE" pass "MSS clamping rule(s) present" + RPT_CLAMPING="present" else + RPT_CLAMPING="none" case "$ROLE" in wg-router|wg-bird-router) warn "No TCPMSS clamping rule in mangle FORWARD — required on WireGuard router to prevent MTU black holes" @@ -814,6 +821,8 @@ section_iface_errors() { [[ "${tx_errs:-0}" -gt 0 ]] && { fail "TX errors: ${tx_errs}"; all_ok=0; } [[ "${tx_drop:-0}" -gt 0 ]] && { warn "TX drops: ${tx_drop}"; all_ok=0; } [[ $all_ok -eq 1 ]] && pass "No RX/TX errors or drops on ${IFACE}" + RPT_RX_ERRS="${rx_errs:-0}"; RPT_RX_DROP="${rx_drop:-0}" + RPT_TX_ERRS="${tx_errs:-0}"; RPT_TX_DROP="${tx_drop:-0}" log "" info "ip -s link output (read-only):" @@ -891,6 +900,20 @@ IPERF3_SERVERS=( IPERF3_BEST_HOST="" IPERF3_BEST_PORT="" +# ── Report tracking — populated during test sections ───────────────────────── +RPT_IFACE_MTU="" +RPT_PATH_MTU="" +RPT_EXACT_MTU="" +RPT_DOMINANT_MSS="" +RPT_RX_ERRS=0; RPT_RX_DROP=0; RPT_TX_ERRS=0; RPT_TX_DROP=0 +RPT_IPERF_SERVER="" +RPT_IPERF_THROUGHPUT="" +RPT_IPERF_RETRANSMITS="" +RPT_IPERF_VERDICT="" +RPT_LOAD_ERRS=0; RPT_LOAD_DROPS=0 +RPT_CLAMPING="" +RPT_PMTUD_PROBE="" + # ── Helper: ping a host, write "avg host port" to a file ───────────────────── # Usage: _ping_server "host" "port" "count" "outfile" _ping_server() { @@ -1083,6 +1106,7 @@ select_iperf3_server() { log "" pass "Selected: ${BLD}${IPERF3_BEST_HOST}:${IPERF3_BEST_PORT}${RST} (avg RTT ${best_rtt} ms, ${best_country})" + RPT_IPERF_SERVER="${IPERF3_BEST_HOST}:${IPERF3_BEST_PORT} (${best_country}, RTT ${best_rtt} ms)" } # ── Section 6: Load test ────────────────────────────────────────────────────── @@ -1158,12 +1182,23 @@ section_load_test() { fi if [[ -n "$iperf_host" ]]; then - info "Running ${LOAD_PARALLEL}x parallel TCP streams, MSS 1460, ${LOAD_DURATION}s..." + # Determine the correct MSS to use: + # Priority: 1) dominant MSS from section 4 2) expected_mtu-40 3) 1460 + local iperf_mss=1460 + if [[ -n "$RPT_DOMINANT_MSS" && "$RPT_DOMINANT_MSS" =~ ^[0-9]+$ && "$RPT_DOMINANT_MSS" -lt 1460 ]]; then + iperf_mss="$RPT_DOMINANT_MSS" + elif [[ $EXPECTED_MTU -gt 0 ]]; then + iperf_mss=$(( EXPECTED_MTU - 40 )) + fi + + info "Running ${LOAD_PARALLEL}x parallel TCP streams, MSS ${iperf_mss}, ${LOAD_DURATION}s..." + [[ $iperf_mss -lt 1460 ]] && \ + info " Using MSS ${iperf_mss} (derived from ${RPT_DOMINANT_MSS:+detected dominant MSS}${RPT_DOMINANT_MSS:-expected MTU ${EXPECTED_MTU}}) — matches path MTU" iperf3 -c "$iperf_host" -p "$iperf_port" \ -t "$LOAD_DURATION" \ -P "$LOAD_PARALLEL" \ - -M 1460 \ + -M "$iperf_mss" \ --logfile "${TMPDIR_PINGS}/iperf_client.txt" \ > /dev/null 2>&1 & local iperf_client_pid=$! @@ -1209,7 +1244,7 @@ section_load_test() { # ── Parse iperf3 output ─────────────────────────────────────────────── log "" - info "iperf3 results (${iperf_mode}, MSS 1460, ${LOAD_PARALLEL} streams):" + info "iperf3 results (${iperf_mode}, MSS ${iperf_mss}, ${LOAD_PARALLEL} streams):" if [[ -f "${TMPDIR_PINGS}/iperf_client.txt" ]]; then grep -E '\[SUM\]|\[ ID\]|sender|receiver|error|connect' "${TMPDIR_PINGS}/iperf_client.txt" \ | sed 's/^/ /' | tee -a "$LOG_FILE" || true @@ -1221,6 +1256,13 @@ section_load_test() { [[ "$retransmits" == "?" ]] && \ retransmits=$(grep -oP '\d+(?= sender)' "${TMPDIR_PINGS}/iperf_client.txt" | tail -1 || echo "?") + # Capture throughput for report + local throughput + throughput=$(grep -E 'SUM.*sender' "${TMPDIR_PINGS}/iperf_client.txt" \ + | awk '{for(i=1;i<=NF;i++) if($i~/bits/) print $(i-1)" "$i}' | tail -1 || echo "") + RPT_IPERF_RETRANSMITS="${retransmits:-?}" + RPT_IPERF_THROUGHPUT="${throughput:-unknown}" + log "" if [[ "$iperf_mode" == "external" ]]; then # External retransmits are only meaningful when corroborated by @@ -1304,19 +1346,28 @@ section_load_test() { if [[ "$retr" == "0" ]]; then pass "iperf3 retransmits: 0 — clean TCP over real network path" + RPT_IPERF_VERDICT="clean (0 retransmits)" elif [[ $icmp_ok -eq 1 ]]; then - # ICMP clean at expected MTU → retransmits are PMTUD warmup: - # kernel starts SYN at MSS 1460, path clamps to MSS=(path_mtu-40) - # in first RTTs, those early segments retransmit. Expected behaviour. - local settled_mss=$(( EXPECTED_MTU > 0 ? EXPECTED_MTU - 40 : 1330 )) - info "iperf3 retransmits: ${retr} — ICMP path clean at/below expected MTU ${EXPECTED_MTU:-1500}" - info " TCP PMTUD warmup: kernel opens at MSS 1460, path reduces to MSS ${settled_mss}" - info " in the first 1-2 seconds → burst of retransmits, then settles. Normal." - pass "Retransmit correlation: consistent with PMTUD warmup, not an MTU problem ✓" + local settled_mss=$(( EXPECTED_MTU > 0 ? EXPECTED_MTU - 40 : 1460 )) + if [[ "$iperf_mss" -lt 1460 ]]; then + # Correct MSS was used from the start — retransmits are + # pure TCP congestion control on the real network path, + # not MSS negotiation. Normal for a shared public iperf3 server. + info "iperf3 retransmits: ${retr} — ICMP path clean, MSS ${iperf_mss} was correct from the start" + info " Retransmits are TCP congestion control on the real path — normal for shared public servers" + else + info "iperf3 retransmits: ${retr} — ICMP path clean at/below expected MTU ${EXPECTED_MTU:-1500}" + info " TCP PMTUD warmup: kernel opens at MSS 1460, path reduces to MSS ${settled_mss}" + info " in the first 1-2 seconds → burst of retransmits, then settles. Normal." + fi + pass "Retransmit correlation: ICMP clean, no MTU problem ✓" + RPT_IPERF_VERDICT="TCP congestion control (${retr} retransmits, normal)" elif [[ "${retr}" =~ ^[0-9]+$ ]] && [[ "$retr" -lt 50 ]]; then warn "iperf3 retransmits: ${retr} — minor, correlate with ICMP failures above" + RPT_IPERF_VERDICT="minor retransmits (${retr})" else fail "iperf3 retransmits: ${retr} — corroborated by ICMP failures ≤expected MTU, real MTU problem" + RPT_IPERF_VERDICT="HIGH retransmits (${retr}) — MTU problem" fi elif [[ "$iperf_mode" == "loopback" ]] && \ [[ "${_iperf_retransmits:-?}" =~ ^[0-9]+$ ]] && [[ "${_iperf_retransmits}" -gt 0 ]] && \ @@ -1401,8 +1452,11 @@ section_load_test() { if [[ $delta_rx_errs -eq 0 && $delta_rx_drop -eq 0 && $delta_tx_errs -eq 0 && $delta_tx_drop -eq 0 ]]; then pass "No new RX/TX errors or drops during load test" + RPT_LOAD_ERRS=0; RPT_LOAD_DROPS=0 else fail "New errors/drops during load — check MTU mismatch, ring buffer size, or NIC driver" + RPT_LOAD_ERRS=$(( delta_rx_errs + delta_tx_errs )) + RPT_LOAD_DROPS=$(( delta_rx_drop + delta_tx_drop )) fi } @@ -1507,6 +1561,179 @@ section_summary() { log " Full log: ${BLD}${LOG_FILE}${RST}" } +# ── Shareable summary report ────────────────────────────────────────────────── +print_report() { + local ts; ts=$(date '+%Y-%m-%d %H:%M') + local hostname; hostname=$(hostname -f 2>/dev/null || hostname) + + # Overall verdict string + local overall_verdict overall_col + if [[ $ISSUES_FOUND -eq 0 ]]; then + overall_verdict="PASS — no issues detected" + overall_col="$GRN" + elif [[ $ISSUES_FOUND -le 3 ]]; then + overall_verdict="WARN — ${ISSUES_FOUND} issue(s) found" + overall_col="$YEL" + else + overall_verdict="FAIL — ${ISSUES_FOUND} issue(s) found" + overall_col="$RED" + fi + + # Interface counter summary + local iface_status + if [[ "${RPT_RX_ERRS:-0}" -eq 0 && "${RPT_RX_DROP:-0}" -eq 0 && \ + "${RPT_TX_ERRS:-0}" -eq 0 && "${RPT_TX_DROP:-0}" -eq 0 ]]; then + iface_status="${GRN}clean${RST}" + else + iface_status="${RED}errors/drops present${RST}" + fi + + # Load test counter summary + local load_status + if [[ "${RPT_LOAD_ERRS:-0}" -eq 0 && "${RPT_LOAD_DROPS:-0}" -eq 0 ]]; then + load_status="${GRN}clean${RST}" + else + load_status="${RED}new errors/drops under load${RST}" + fi + + # Path MTU verdict + local path_mtu_str="${RPT_EXACT_MTU:-unknown}" + local path_mtu_col="$GRN" + if [[ -n "$RPT_EXACT_MTU" && $EXPECTED_MTU -gt 0 ]]; then + local diff=$(( RPT_EXACT_MTU - EXPECTED_MTU )) + [[ ${diff#-} -gt 10 ]] && path_mtu_col="$YEL" + elif [[ -n "$RPT_EXACT_MTU" && "${RPT_EXACT_MTU}" -lt 1490 && $EXPECTED_MTU -eq 0 ]]; then + path_mtu_col="$YEL" + fi + + # Clamping + local clamp_str + case "${RPT_CLAMPING:-none}" in + present) clamp_str="${GRN}present${RST}" ;; + none) + case "$ROLE" in + wg-router|wg-bird-router) clamp_str="${RED}MISSING${RST}" ;; + vps|wg-client) clamp_str="${CYN}not needed (VPS)${RST}" ;; + *) clamp_str="${YEL}not found${RST}" ;; + esac ;; + *) clamp_str="${YEL}unknown${RST}" ;; + esac + + echo "" + echo -e "${BLD}${CYN}╔══════════════════════════════════════════════════════════════════╗${RST}" + echo -e "${BLD}${CYN}║ HorizonBench — MTU Test Report ║${RST}" + echo -e "${BLD}${CYN}╚══════════════════════════════════════════════════════════════════╝${RST}" + echo "" + printf " %-20s %s\n" "Host:" "$hostname" + printf " %-20s %s\n" "Date:" "$ts" + printf " %-20s %s\n" "Role:" "$ROLE" + printf " %-20s %s\n" "Interface:" "${IFACE} (iface MTU ${RPT_IFACE_MTU:-?})" + printf " %-20s %s\n" "Test target:" "$TARGET" + [[ $EXPECTED_MTU -gt 0 ]] && \ + printf " %-20s %s\n" "Expected MTU:" "$EXPECTED_MTU" + echo "" + echo -e " ${BLD}── Path MTU ──────────────────────────────────────────────────────${RST}" + printf " %-20s %b\n" "Step-down probe:" "${path_mtu_col}${RPT_PATH_MTU:-unknown} bytes${RST} (closest lower step in probe list)" + printf " %-20s %b\n" "Exact (bisect):" "${path_mtu_col}${path_mtu_str} bytes${RST}" + [[ $EXPECTED_MTU -gt 0 ]] && \ + printf " %-20s %b\n" "vs expected:" "${path_mtu_col}${EXPECTED_MTU} bytes${RST}" + echo "" + echo -e " ${BLD}── TCP ──────────────────────────────────────────────────────────${RST}" + printf " %-20s %s\n" "Dominant MSS:" "${RPT_DOMINANT_MSS:-unknown}" + printf " %-20s %b\n" "MSS clamping:" "$clamp_str" + printf " %-20s %s\n" "PMTUD probing:" "${RPT_PMTUD_PROBE:-0} (tcp_mtu_probing sysctl)" + echo "" + echo -e " ${BLD}── Interface counters ───────────────────────────────────────────${RST}" + printf " %-20s %b\n" "Idle counters:" "$iface_status" + printf " %-20s %b\n" "Under load:" "$load_status" + echo "" + echo -e " ${BLD}── Load test ────────────────────────────────────────────────────${RST}" + if [[ -n "$RPT_IPERF_SERVER" ]]; then + printf " %-20s %s\n" "iperf3 server:" "$RPT_IPERF_SERVER" + printf " %-20s %s\n" "Throughput:" "${RPT_IPERF_THROUGHPUT:-unknown}" + printf " %-20s %s\n" "Retransmits:" "${RPT_IPERF_VERDICT:-unknown}" + else + printf " %-20s %s\n" "Load test:" "flood-ping fallback (iperf3 not installed)" + fi + [[ ${#WG_IFACES[@]} -gt 0 ]] && { + echo "" + echo -e " ${BLD}── WireGuard ─────────────────────────────────────────────────────${RST}" + printf " %-20s %s\n" "Interfaces:" "${WG_IFACES[*]}" + printf " %-20s %s\n" "Tunnel MTU:" "${WG_MTU:-unknown}" + printf " %-20s %s\n" "Peers:" "${WG_PEERS:-0}" + [[ ${#WG_PUBSUBNETS[@]} -gt 0 ]] && \ + printf " %-20s %s\n" "Public subnets:" "${WG_PUBSUBNETS[*]}" + } + [[ $BIRD_RUNNING -eq 1 ]] && { + echo "" + echo -e " ${BLD}── BIRD ──────────────────────────────────────────────────────────${RST}" + printf " %-20s %s\n" "Daemon:" "running" + [[ ${#BIRD_PROTOCOLS[@]} -gt 0 ]] && \ + printf " %-20s %s\n" "BGP sessions:" "${BIRD_PROTOCOLS[*]}" + } + echo "" + echo -e " ${BLD}── Overall ──────────────────────────────────────────────────────${RST}" + echo -e " ${overall_col}${BLD} ${overall_verdict}${RST}" + echo "" + echo -e " Full log: ${BLD}${LOG_FILE}${RST}" + echo -e "${BLD}${CYN}══════════════════════════════════════════════════════════════════${RST}" + echo "" + + # Also write plain-text version to log (no colour codes) + { + echo "" + echo "════════════════════════════════════════════════════════════════════" + echo " HorizonBench — MTU Test Report" + echo "════════════════════════════════════════════════════════════════════" + echo " Host : $hostname" + echo " Date : $ts" + echo " Role : $ROLE" + echo " Interface : ${IFACE} (iface MTU ${RPT_IFACE_MTU:-?})" + echo " Test target : $TARGET" + [[ $EXPECTED_MTU -gt 0 ]] && echo " Expected MTU : $EXPECTED_MTU" + echo "" + echo " Path MTU" + echo " Step-down : ${RPT_PATH_MTU:-unknown} bytes" + echo " Exact (bisect) : ${path_mtu_str} bytes" + [[ $EXPECTED_MTU -gt 0 ]] && echo " vs expected : ${EXPECTED_MTU} bytes" + echo "" + echo " TCP" + echo " Dominant MSS : ${RPT_DOMINANT_MSS:-unknown}" + echo " MSS clamping : ${RPT_CLAMPING:-none}" + echo " PMTUD probing : ${RPT_PMTUD_PROBE:-0}" + echo "" + echo " Interface counters" + echo " Idle : RX errs=${RPT_RX_ERRS:-0} drops=${RPT_RX_DROP:-0} TX errs=${RPT_TX_ERRS:-0} drops=${RPT_TX_DROP:-0}" + echo " Under load : new errs=${RPT_LOAD_ERRS:-0} drops=${RPT_LOAD_DROPS:-0}" + echo "" + echo " Load test" + if [[ -n "$RPT_IPERF_SERVER" ]]; then + echo " Server : $RPT_IPERF_SERVER" + echo " Throughput : ${RPT_IPERF_THROUGHPUT:-unknown}" + echo " Retransmits : ${RPT_IPERF_VERDICT:-unknown}" + else + echo " Method : flood-ping fallback" + fi + [[ ${#WG_IFACES[@]} -gt 0 ]] && { + echo "" + echo " WireGuard" + echo " Interfaces : ${WG_IFACES[*]}" + echo " Tunnel MTU : ${WG_MTU:-unknown}" + echo " Peers : ${WG_PEERS:-0}" + [[ ${#WG_PUBSUBNETS[@]} -gt 0 ]] && echo " Public subnets : ${WG_PUBSUBNETS[*]}" + } + [[ $BIRD_RUNNING -eq 1 ]] && { + echo "" + echo " BIRD" + echo " Daemon : running" + [[ ${#BIRD_PROTOCOLS[@]} -gt 0 ]] && echo " BGP sessions : ${BIRD_PROTOCOLS[*]}" + } + echo "" + echo " Overall : $( [[ $ISSUES_FOUND -eq 0 ]] && echo "PASS" || echo "WARN/FAIL" ) — ${ISSUES_FOUND} issue(s)" + echo "════════════════════════════════════════════════════════════════════" + } >> "$LOG_FILE" +} + # ── Main ────────────────────────────────────────────────────────────────────── main() { echo -e "${BLD}${CYN}" @@ -1546,8 +1773,7 @@ main() { else fail "${ISSUES_FOUND} issue(s) found — MTU configuration needs attention" fi - log "" - log " Log: ${BLD}${LOG_FILE}${RST}" + print_report } main "$@" \ No newline at end of file