Update HorizonBench

This commit is contained in:
2026-03-16 23:50:16 +00:00
parent 04e8039a20
commit 0f12a0c5f0

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env bash
# =============================================================================
# mtu_test.sh — MTU diagnostic & load test (AlmaLinux / Debian) v2.1
# HorizonBench.sh — MTU diagnostic & load test (AlmaLinux / Debian) v2.1
#
# READ-ONLY / NON-DESTRUCTIVE: This script makes NO changes to the system.
# It reads kernel state, sends ICMP/TCP probes, and writes a log to /tmp.
@@ -14,16 +14,16 @@
# - Reports retransmits + RX/TX error deltas
# - Falls back to flood-ping if iperf3 is not installed
#
# Usage: sudo ./mtu_test.sh [TARGET_IP] [INTERFACE] [--no-load]
# Usage: sudo ./HorizonBench.sh [TARGET_IP] [INTERFACE] [--no-load]
#
# TARGET_IP IP or hostname to probe (default: 8.8.8.8)
# INTERFACE Network interface to inspect (default: auto-detected)
# --no-load Skip the load test entirely (zero extra traffic)
#
# Examples:
# sudo ./mtu_test.sh
# sudo ./mtu_test.sh 1.2.3.4 eth0
# sudo ./mtu_test.sh 1.2.3.4 eth0 --no-load
# sudo ./HorizonBench.sh
# sudo ./HorizonBench.sh 1.2.3.4 eth0
# sudo ./HorizonBench.sh 1.2.3.4 eth0 --no-load
# =============================================================================
# Do NOT use set -e / set -euo pipefail — this is a diagnostic script and
@@ -80,7 +80,7 @@ for arg in "$@"; do
done
# ── Runtime state ─────────────────────────────────────────────────────────────
LOG_FILE="/tmp/mtu_test_$(date +%Y%m%d_%H%M%S).log"
LOG_FILE="/tmp/horizonbench_$(date +%Y%m%d_%H%M%S).log"
LOAD_DURATION=15
LOAD_PARALLEL=4
TMPDIR_PINGS="/tmp/mtu_pings_$$"
@@ -502,6 +502,7 @@ section_interface() {
local iface_mtu
iface_mtu=$(safe_read ip link show "$IFACE" | awk '/mtu/{for(i=1;i<=NF;i++) if($i=="mtu") print $(i+1)}')
RPT_IFACE_MTU="$iface_mtu"
if [[ -z "$iface_mtu" ]]; then
fail "Could not read MTU for '$IFACE' — does it exist?"
@@ -546,6 +547,7 @@ section_interface() {
done
local pmtu_probe; pmtu_probe=$(safe_read sysctl -n net.ipv4.tcp_mtu_probing)
RPT_PMTUD_PROBE="${pmtu_probe:-0}"
if [[ "${pmtu_probe:-0}" -ge 1 ]]; then
pass "tcp_mtu_probing=${pmtu_probe} (PMTU discovery active)"
else
@@ -591,6 +593,7 @@ section_ping_mtu() {
log ""
info "Largest successful ICMP size (step-down): ${BLD}${max_ok} bytes${RST}"
RPT_PATH_MTU="$max_ok"
local iface_mtu
iface_mtu=$(safe_read ip link show "$IFACE" | awk '/mtu/{for(i=1;i<=NF;i++) if($i=="mtu") print $(i+1)}')
@@ -657,6 +660,7 @@ section_pmtu_bisect() {
fi
info "Exact path MTU: ${BLD}${best} bytes${RST}"
RPT_EXACT_MTU="$best"
local baseline=$(( EXPECTED_MTU > 0 ? EXPECTED_MTU : 1500 ))
local tolerance=10
@@ -712,6 +716,7 @@ section_tcp_mss() {
local dominant_mss
dominant_mss=$(echo "$mss_data" | awk -F: '{print $2}' | sort | uniq -c | sort -rn | awk 'NR==1{print $2}')
RPT_DOMINANT_MSS="$dominant_mss"
log ""
# Expected MSS = MTU - 40 (20 IP + 20 TCP headers)
local expected_mss=$(( EXPECTED_MTU > 0 ? EXPECTED_MTU - 40 : 1460 ))
@@ -758,7 +763,9 @@ section_tcp_mss() {
if [[ -n "$tcpmss_rules" ]]; then
echo "$tcpmss_rules" | sed 's/^/ /' | tee -a "$LOG_FILE"
pass "MSS clamping rule(s) present"
RPT_CLAMPING="present"
else
RPT_CLAMPING="none"
case "$ROLE" in
wg-router|wg-bird-router)
warn "No TCPMSS clamping rule in mangle FORWARD — required on WireGuard router to prevent MTU black holes"
@@ -814,6 +821,8 @@ section_iface_errors() {
[[ "${tx_errs:-0}" -gt 0 ]] && { fail "TX errors: ${tx_errs}"; all_ok=0; }
[[ "${tx_drop:-0}" -gt 0 ]] && { warn "TX drops: ${tx_drop}"; all_ok=0; }
[[ $all_ok -eq 1 ]] && pass "No RX/TX errors or drops on ${IFACE}"
RPT_RX_ERRS="${rx_errs:-0}"; RPT_RX_DROP="${rx_drop:-0}"
RPT_TX_ERRS="${tx_errs:-0}"; RPT_TX_DROP="${tx_drop:-0}"
log ""
info "ip -s link output (read-only):"
@@ -891,6 +900,20 @@ IPERF3_SERVERS=(
IPERF3_BEST_HOST=""
IPERF3_BEST_PORT=""
# ── Report tracking — populated during test sections ─────────────────────────
RPT_IFACE_MTU=""
RPT_PATH_MTU=""
RPT_EXACT_MTU=""
RPT_DOMINANT_MSS=""
RPT_RX_ERRS=0; RPT_RX_DROP=0; RPT_TX_ERRS=0; RPT_TX_DROP=0
RPT_IPERF_SERVER=""
RPT_IPERF_THROUGHPUT=""
RPT_IPERF_RETRANSMITS=""
RPT_IPERF_VERDICT=""
RPT_LOAD_ERRS=0; RPT_LOAD_DROPS=0
RPT_CLAMPING=""
RPT_PMTUD_PROBE=""
# ── Helper: ping a host, write "avg host port" to a file ─────────────────────
# Usage: _ping_server "host" "port" "count" "outfile"
_ping_server() {
@@ -1083,6 +1106,7 @@ select_iperf3_server() {
log ""
pass "Selected: ${BLD}${IPERF3_BEST_HOST}:${IPERF3_BEST_PORT}${RST} (avg RTT ${best_rtt} ms, ${best_country})"
RPT_IPERF_SERVER="${IPERF3_BEST_HOST}:${IPERF3_BEST_PORT} (${best_country}, RTT ${best_rtt} ms)"
}
# ── Section 6: Load test ──────────────────────────────────────────────────────
@@ -1158,12 +1182,23 @@ section_load_test() {
fi
if [[ -n "$iperf_host" ]]; then
info "Running ${LOAD_PARALLEL}x parallel TCP streams, MSS 1460, ${LOAD_DURATION}s..."
# Determine the correct MSS to use:
# Priority: 1) dominant MSS from section 4 2) expected_mtu-40 3) 1460
local iperf_mss=1460
if [[ -n "$RPT_DOMINANT_MSS" && "$RPT_DOMINANT_MSS" =~ ^[0-9]+$ && "$RPT_DOMINANT_MSS" -lt 1460 ]]; then
iperf_mss="$RPT_DOMINANT_MSS"
elif [[ $EXPECTED_MTU -gt 0 ]]; then
iperf_mss=$(( EXPECTED_MTU - 40 ))
fi
info "Running ${LOAD_PARALLEL}x parallel TCP streams, MSS ${iperf_mss}, ${LOAD_DURATION}s..."
[[ $iperf_mss -lt 1460 ]] && \
info " Using MSS ${iperf_mss} (derived from ${RPT_DOMINANT_MSS:+detected dominant MSS}${RPT_DOMINANT_MSS:-expected MTU ${EXPECTED_MTU}}) — matches path MTU"
iperf3 -c "$iperf_host" -p "$iperf_port" \
-t "$LOAD_DURATION" \
-P "$LOAD_PARALLEL" \
-M 1460 \
-M "$iperf_mss" \
--logfile "${TMPDIR_PINGS}/iperf_client.txt" \
> /dev/null 2>&1 &
local iperf_client_pid=$!
@@ -1209,7 +1244,7 @@ section_load_test() {
# ── Parse iperf3 output ───────────────────────────────────────────────
log ""
info "iperf3 results (${iperf_mode}, MSS 1460, ${LOAD_PARALLEL} streams):"
info "iperf3 results (${iperf_mode}, MSS ${iperf_mss}, ${LOAD_PARALLEL} streams):"
if [[ -f "${TMPDIR_PINGS}/iperf_client.txt" ]]; then
grep -E '\[SUM\]|\[ ID\]|sender|receiver|error|connect' "${TMPDIR_PINGS}/iperf_client.txt" \
| sed 's/^/ /' | tee -a "$LOG_FILE" || true
@@ -1221,6 +1256,13 @@ section_load_test() {
[[ "$retransmits" == "?" ]] && \
retransmits=$(grep -oP '\d+(?= sender)' "${TMPDIR_PINGS}/iperf_client.txt" | tail -1 || echo "?")
# Capture throughput for report
local throughput
throughput=$(grep -E 'SUM.*sender' "${TMPDIR_PINGS}/iperf_client.txt" \
| awk '{for(i=1;i<=NF;i++) if($i~/bits/) print $(i-1)" "$i}' | tail -1 || echo "")
RPT_IPERF_RETRANSMITS="${retransmits:-?}"
RPT_IPERF_THROUGHPUT="${throughput:-unknown}"
log ""
if [[ "$iperf_mode" == "external" ]]; then
# External retransmits are only meaningful when corroborated by
@@ -1304,19 +1346,28 @@ section_load_test() {
if [[ "$retr" == "0" ]]; then
pass "iperf3 retransmits: 0 — clean TCP over real network path"
RPT_IPERF_VERDICT="clean (0 retransmits)"
elif [[ $icmp_ok -eq 1 ]]; then
# ICMP clean at expected MTU → retransmits are PMTUD warmup:
# kernel starts SYN at MSS 1460, path clamps to MSS=(path_mtu-40)
# in first RTTs, those early segments retransmit. Expected behaviour.
local settled_mss=$(( EXPECTED_MTU > 0 ? EXPECTED_MTU - 40 : 1330 ))
info "iperf3 retransmits: ${retr} — ICMP path clean at/below expected MTU ${EXPECTED_MTU:-1500}"
info " TCP PMTUD warmup: kernel opens at MSS 1460, path reduces to MSS ${settled_mss}"
info " in the first 1-2 seconds → burst of retransmits, then settles. Normal."
pass "Retransmit correlation: consistent with PMTUD warmup, not an MTU problem ✓"
local settled_mss=$(( EXPECTED_MTU > 0 ? EXPECTED_MTU - 40 : 1460 ))
if [[ "$iperf_mss" -lt 1460 ]]; then
# Correct MSS was used from the start — retransmits are
# pure TCP congestion control on the real network path,
# not MSS negotiation. Normal for a shared public iperf3 server.
info "iperf3 retransmits: ${retr} — ICMP path clean, MSS ${iperf_mss} was correct from the start"
info " Retransmits are TCP congestion control on the real path — normal for shared public servers"
else
info "iperf3 retransmits: ${retr} — ICMP path clean at/below expected MTU ${EXPECTED_MTU:-1500}"
info " TCP PMTUD warmup: kernel opens at MSS 1460, path reduces to MSS ${settled_mss}"
info " in the first 1-2 seconds → burst of retransmits, then settles. Normal."
fi
pass "Retransmit correlation: ICMP clean, no MTU problem ✓"
RPT_IPERF_VERDICT="TCP congestion control (${retr} retransmits, normal)"
elif [[ "${retr}" =~ ^[0-9]+$ ]] && [[ "$retr" -lt 50 ]]; then
warn "iperf3 retransmits: ${retr} — minor, correlate with ICMP failures above"
RPT_IPERF_VERDICT="minor retransmits (${retr})"
else
fail "iperf3 retransmits: ${retr} — corroborated by ICMP failures ≤expected MTU, real MTU problem"
RPT_IPERF_VERDICT="HIGH retransmits (${retr}) — MTU problem"
fi
elif [[ "$iperf_mode" == "loopback" ]] && \
[[ "${_iperf_retransmits:-?}" =~ ^[0-9]+$ ]] && [[ "${_iperf_retransmits}" -gt 0 ]] && \
@@ -1401,8 +1452,11 @@ section_load_test() {
if [[ $delta_rx_errs -eq 0 && $delta_rx_drop -eq 0 && $delta_tx_errs -eq 0 && $delta_tx_drop -eq 0 ]]; then
pass "No new RX/TX errors or drops during load test"
RPT_LOAD_ERRS=0; RPT_LOAD_DROPS=0
else
fail "New errors/drops during load — check MTU mismatch, ring buffer size, or NIC driver"
RPT_LOAD_ERRS=$(( delta_rx_errs + delta_tx_errs ))
RPT_LOAD_DROPS=$(( delta_rx_drop + delta_tx_drop ))
fi
}
@@ -1507,6 +1561,179 @@ section_summary() {
log " Full log: ${BLD}${LOG_FILE}${RST}"
}
# ── Shareable summary report ──────────────────────────────────────────────────
print_report() {
local ts; ts=$(date '+%Y-%m-%d %H:%M')
local hostname; hostname=$(hostname -f 2>/dev/null || hostname)
# Overall verdict string
local overall_verdict overall_col
if [[ $ISSUES_FOUND -eq 0 ]]; then
overall_verdict="PASS — no issues detected"
overall_col="$GRN"
elif [[ $ISSUES_FOUND -le 3 ]]; then
overall_verdict="WARN — ${ISSUES_FOUND} issue(s) found"
overall_col="$YEL"
else
overall_verdict="FAIL — ${ISSUES_FOUND} issue(s) found"
overall_col="$RED"
fi
# Interface counter summary
local iface_status
if [[ "${RPT_RX_ERRS:-0}" -eq 0 && "${RPT_RX_DROP:-0}" -eq 0 && \
"${RPT_TX_ERRS:-0}" -eq 0 && "${RPT_TX_DROP:-0}" -eq 0 ]]; then
iface_status="${GRN}clean${RST}"
else
iface_status="${RED}errors/drops present${RST}"
fi
# Load test counter summary
local load_status
if [[ "${RPT_LOAD_ERRS:-0}" -eq 0 && "${RPT_LOAD_DROPS:-0}" -eq 0 ]]; then
load_status="${GRN}clean${RST}"
else
load_status="${RED}new errors/drops under load${RST}"
fi
# Path MTU verdict
local path_mtu_str="${RPT_EXACT_MTU:-unknown}"
local path_mtu_col="$GRN"
if [[ -n "$RPT_EXACT_MTU" && $EXPECTED_MTU -gt 0 ]]; then
local diff=$(( RPT_EXACT_MTU - EXPECTED_MTU ))
[[ ${diff#-} -gt 10 ]] && path_mtu_col="$YEL"
elif [[ -n "$RPT_EXACT_MTU" && "${RPT_EXACT_MTU}" -lt 1490 && $EXPECTED_MTU -eq 0 ]]; then
path_mtu_col="$YEL"
fi
# Clamping
local clamp_str
case "${RPT_CLAMPING:-none}" in
present) clamp_str="${GRN}present${RST}" ;;
none)
case "$ROLE" in
wg-router|wg-bird-router) clamp_str="${RED}MISSING${RST}" ;;
vps|wg-client) clamp_str="${CYN}not needed (VPS)${RST}" ;;
*) clamp_str="${YEL}not found${RST}" ;;
esac ;;
*) clamp_str="${YEL}unknown${RST}" ;;
esac
echo ""
echo -e "${BLD}${CYN}╔══════════════════════════════════════════════════════════════════╗${RST}"
echo -e "${BLD}${CYN}║ HorizonBench — MTU Test Report ║${RST}"
echo -e "${BLD}${CYN}╚══════════════════════════════════════════════════════════════════╝${RST}"
echo ""
printf " %-20s %s\n" "Host:" "$hostname"
printf " %-20s %s\n" "Date:" "$ts"
printf " %-20s %s\n" "Role:" "$ROLE"
printf " %-20s %s\n" "Interface:" "${IFACE} (iface MTU ${RPT_IFACE_MTU:-?})"
printf " %-20s %s\n" "Test target:" "$TARGET"
[[ $EXPECTED_MTU -gt 0 ]] && \
printf " %-20s %s\n" "Expected MTU:" "$EXPECTED_MTU"
echo ""
echo -e " ${BLD}── Path MTU ──────────────────────────────────────────────────────${RST}"
printf " %-20s %b\n" "Step-down probe:" "${path_mtu_col}${RPT_PATH_MTU:-unknown} bytes${RST} (closest lower step in probe list)"
printf " %-20s %b\n" "Exact (bisect):" "${path_mtu_col}${path_mtu_str} bytes${RST}"
[[ $EXPECTED_MTU -gt 0 ]] && \
printf " %-20s %b\n" "vs expected:" "${path_mtu_col}${EXPECTED_MTU} bytes${RST}"
echo ""
echo -e " ${BLD}── TCP ──────────────────────────────────────────────────────────${RST}"
printf " %-20s %s\n" "Dominant MSS:" "${RPT_DOMINANT_MSS:-unknown}"
printf " %-20s %b\n" "MSS clamping:" "$clamp_str"
printf " %-20s %s\n" "PMTUD probing:" "${RPT_PMTUD_PROBE:-0} (tcp_mtu_probing sysctl)"
echo ""
echo -e " ${BLD}── Interface counters ───────────────────────────────────────────${RST}"
printf " %-20s %b\n" "Idle counters:" "$iface_status"
printf " %-20s %b\n" "Under load:" "$load_status"
echo ""
echo -e " ${BLD}── Load test ────────────────────────────────────────────────────${RST}"
if [[ -n "$RPT_IPERF_SERVER" ]]; then
printf " %-20s %s\n" "iperf3 server:" "$RPT_IPERF_SERVER"
printf " %-20s %s\n" "Throughput:" "${RPT_IPERF_THROUGHPUT:-unknown}"
printf " %-20s %s\n" "Retransmits:" "${RPT_IPERF_VERDICT:-unknown}"
else
printf " %-20s %s\n" "Load test:" "flood-ping fallback (iperf3 not installed)"
fi
[[ ${#WG_IFACES[@]} -gt 0 ]] && {
echo ""
echo -e " ${BLD}── WireGuard ─────────────────────────────────────────────────────${RST}"
printf " %-20s %s\n" "Interfaces:" "${WG_IFACES[*]}"
printf " %-20s %s\n" "Tunnel MTU:" "${WG_MTU:-unknown}"
printf " %-20s %s\n" "Peers:" "${WG_PEERS:-0}"
[[ ${#WG_PUBSUBNETS[@]} -gt 0 ]] && \
printf " %-20s %s\n" "Public subnets:" "${WG_PUBSUBNETS[*]}"
}
[[ $BIRD_RUNNING -eq 1 ]] && {
echo ""
echo -e " ${BLD}── BIRD ──────────────────────────────────────────────────────────${RST}"
printf " %-20s %s\n" "Daemon:" "running"
[[ ${#BIRD_PROTOCOLS[@]} -gt 0 ]] && \
printf " %-20s %s\n" "BGP sessions:" "${BIRD_PROTOCOLS[*]}"
}
echo ""
echo -e " ${BLD}── Overall ──────────────────────────────────────────────────────${RST}"
echo -e " ${overall_col}${BLD} ${overall_verdict}${RST}"
echo ""
echo -e " Full log: ${BLD}${LOG_FILE}${RST}"
echo -e "${BLD}${CYN}══════════════════════════════════════════════════════════════════${RST}"
echo ""
# Also write plain-text version to log (no colour codes)
{
echo ""
echo "════════════════════════════════════════════════════════════════════"
echo " HorizonBench — MTU Test Report"
echo "════════════════════════════════════════════════════════════════════"
echo " Host : $hostname"
echo " Date : $ts"
echo " Role : $ROLE"
echo " Interface : ${IFACE} (iface MTU ${RPT_IFACE_MTU:-?})"
echo " Test target : $TARGET"
[[ $EXPECTED_MTU -gt 0 ]] && echo " Expected MTU : $EXPECTED_MTU"
echo ""
echo " Path MTU"
echo " Step-down : ${RPT_PATH_MTU:-unknown} bytes"
echo " Exact (bisect) : ${path_mtu_str} bytes"
[[ $EXPECTED_MTU -gt 0 ]] && echo " vs expected : ${EXPECTED_MTU} bytes"
echo ""
echo " TCP"
echo " Dominant MSS : ${RPT_DOMINANT_MSS:-unknown}"
echo " MSS clamping : ${RPT_CLAMPING:-none}"
echo " PMTUD probing : ${RPT_PMTUD_PROBE:-0}"
echo ""
echo " Interface counters"
echo " Idle : RX errs=${RPT_RX_ERRS:-0} drops=${RPT_RX_DROP:-0} TX errs=${RPT_TX_ERRS:-0} drops=${RPT_TX_DROP:-0}"
echo " Under load : new errs=${RPT_LOAD_ERRS:-0} drops=${RPT_LOAD_DROPS:-0}"
echo ""
echo " Load test"
if [[ -n "$RPT_IPERF_SERVER" ]]; then
echo " Server : $RPT_IPERF_SERVER"
echo " Throughput : ${RPT_IPERF_THROUGHPUT:-unknown}"
echo " Retransmits : ${RPT_IPERF_VERDICT:-unknown}"
else
echo " Method : flood-ping fallback"
fi
[[ ${#WG_IFACES[@]} -gt 0 ]] && {
echo ""
echo " WireGuard"
echo " Interfaces : ${WG_IFACES[*]}"
echo " Tunnel MTU : ${WG_MTU:-unknown}"
echo " Peers : ${WG_PEERS:-0}"
[[ ${#WG_PUBSUBNETS[@]} -gt 0 ]] && echo " Public subnets : ${WG_PUBSUBNETS[*]}"
}
[[ $BIRD_RUNNING -eq 1 ]] && {
echo ""
echo " BIRD"
echo " Daemon : running"
[[ ${#BIRD_PROTOCOLS[@]} -gt 0 ]] && echo " BGP sessions : ${BIRD_PROTOCOLS[*]}"
}
echo ""
echo " Overall : $( [[ $ISSUES_FOUND -eq 0 ]] && echo "PASS" || echo "WARN/FAIL" )${ISSUES_FOUND} issue(s)"
echo "════════════════════════════════════════════════════════════════════"
} >> "$LOG_FILE"
}
# ── Main ──────────────────────────────────────────────────────────────────────
main() {
echo -e "${BLD}${CYN}"
@@ -1546,8 +1773,7 @@ main() {
else fail "${ISSUES_FOUND} issue(s) found — MTU configuration needs attention"
fi
log ""
log " Log: ${BLD}${LOG_FILE}${RST}"
print_report
}
main "$@"