Files
HorizonBench/HorizonBench.sh
2026-03-16 23:50:16 +00:00

1779 lines
86 KiB
Bash
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
# =============================================================================
# HorizonBench.sh — MTU diagnostic & load test (AlmaLinux / Debian) v2.1
#
# READ-ONLY / NON-DESTRUCTIVE: This script makes NO changes to the system.
# It reads kernel state, sends ICMP/TCP probes, and writes a log to /tmp.
# All temp files (iperf3 output, ping logs) are cleaned up via trap on exit.
#
# Load test (Section 6):
# - Starts iperf3 server on 127.0.0.1:15201 (loopback, no remote needed)
# - Runs 4x parallel TCP streams MSS 1460 for LOAD_DURATION seconds
# - Concurrently fires ICMP MTU step-down probes (DF-bit) at TARGET
# so you see which sizes fail *under real TCP load* vs idle
# - Reports retransmits + RX/TX error deltas
# - Falls back to flood-ping if iperf3 is not installed
#
# Usage: sudo ./HorizonBench.sh [TARGET_IP] [INTERFACE] [--no-load]
#
# TARGET_IP IP or hostname to probe (default: 8.8.8.8)
# INTERFACE Network interface to inspect (default: auto-detected)
# --no-load Skip the load test entirely (zero extra traffic)
#
# Examples:
# sudo ./HorizonBench.sh
# sudo ./HorizonBench.sh 1.2.3.4 eth0
# sudo ./HorizonBench.sh 1.2.3.4 eth0 --no-load
# =============================================================================
# Do NOT use set -e / set -euo pipefail — this is a diagnostic script and
# individual test failures (e.g. ping returning non-zero) must not abort the
# run. Each command handles its own errors explicitly.
set -uo pipefail
# ── Colours ──────────────────────────────────────────────────────────────────
readonly RED='\033[0;31m'
readonly YEL='\033[1;33m'
readonly GRN='\033[0;32m'
readonly CYN='\033[0;36m'
readonly BLD='\033[1m'
readonly RST='\033[0m'
# ── Argument parsing ──────────────────────────────────────────────────────────
TARGET="8.8.8.8"
IFACE=""
NO_LOAD=0
EXPECTED_MTU=0 # 0 = auto (assume 1500), >0 = user-specified tunnel MTU
_TARGET_SET=""
_IFACE_SET=""
_prev_arg=""
for arg in "$@"; do
case "$arg" in
--no-load) NO_LOAD=1 ;;
--expected-mtu)
_prev_arg="--expected-mtu" ;;
--expected-mtu=*)
EXPECTED_MTU="${arg#--expected-mtu=}"
if ! [[ "$EXPECTED_MTU" =~ ^[0-9]+$ ]] || [[ $EXPECTED_MTU -lt 576 ]] || [[ $EXPECTED_MTU -gt 9000 ]]; then
echo -e "${RED} --expected-mtu must be a number between 576 and 9000${RST}"; exit 1
fi ;;
--*)
echo -e "${RED}Unknown option: $arg${RST}"
echo -e " Usage: $0 [TARGET_IP] [INTERFACE] [--no-load] [--expected-mtu N]"
exit 1 ;;
*)
if [[ "$_prev_arg" == "--expected-mtu" ]]; then
EXPECTED_MTU="$arg"
if ! [[ "$EXPECTED_MTU" =~ ^[0-9]+$ ]] || [[ $EXPECTED_MTU -lt 576 ]] || [[ $EXPECTED_MTU -gt 9000 ]]; then
echo -e "${RED} --expected-mtu must be a number between 576 and 9000${RST}"; exit 1
fi
_prev_arg=""
elif [[ -z "$_TARGET_SET" ]]; then
TARGET="$arg"; _TARGET_SET=1
elif [[ -z "$_IFACE_SET" ]]; then
IFACE="$arg"; _IFACE_SET=1
fi
;;
esac
[[ "$arg" != "--expected-mtu" ]] && _prev_arg=""
done
# ── Runtime state ─────────────────────────────────────────────────────────────
LOG_FILE="/tmp/horizonbench_$(date +%Y%m%d_%H%M%S).log"
LOAD_DURATION=15
LOAD_PARALLEL=4
TMPDIR_PINGS="/tmp/mtu_pings_$$"
HAS_IPERF3=0
HAS_ETHTOOL=0
HAS_TC=0
HAS_BIRD=0
ISSUES_FOUND=0
# Role detection results (set by detect_role)
ROLE="unknown" # vps | wg-router | bird-router | wg-bird-router | generic
WG_IFACES=() # detected WireGuard interfaces
WG_MTU=0 # MTU of first WG interface found
WG_PEERS=0 # total WireGuard peer count
WG_PUBSUBNETS=() # routed public subnets found in wg allowed-ips
BIRD_RUNNING=0 # 1 if BIRD2 daemon is active
BIRD_PROTOCOLS=() # active BGP/OSPF protocol names
IP_FORWARD=0 # kernel ip_forward value
# ── Cleanup trap — only removes files this script created ────────────────────
cleanup() {
rm -rf "$TMPDIR_PINGS" 2>/dev/null || true
}
trap cleanup EXIT INT TERM
# ── Logging helpers ───────────────────────────────────────────────────────────
log() { echo -e "$*" | tee -a "$LOG_FILE"; }
pass() { log "${GRN} [PASS]${RST} $*"; }
warn() { log "${YEL} [WARN]${RST} $*"; ISSUES_FOUND=$(( ISSUES_FOUND + 1 )); }
fail() { log "${RED} [FAIL]${RST} $*"; ISSUES_FOUND=$(( ISSUES_FOUND + 1 )); }
info() { log "${CYN} [INFO]${RST} $*"; }
skip() { log "${YEL} [SKIP]${RST} $*"; }
sect() {
log ""
log "${BLD}${CYN}══════════════════════════════════════════════${RST}"
log "${BLD}${CYN} $*${RST}"
log "${BLD}${CYN}══════════════════════════════════════════════${RST}"
}
# Safe read-only command wrapper — never aborts on failure
safe_read() { "$@" 2>/dev/null || true; }
# ── Pre-flight ────────────────────────────────────────────────────────────────
require_root() {
if [[ $EUID -ne 0 ]]; then
echo -e "${RED} Run as root: sudo $0${RST}"
exit 1
fi
}
check_deps() {
local missing=()
for cmd in ping ip ss awk grep tee sysctl; do
command -v "$cmd" &>/dev/null || missing+=("$cmd")
done
command -v iperf3 &>/dev/null && HAS_IPERF3=1 || true
command -v ethtool &>/dev/null && HAS_ETHTOOL=1 || true
command -v tc &>/dev/null && HAS_TC=1 || true
command -v birdc &>/dev/null && HAS_BIRD=1 || true
command -v wg &>/dev/null || true # optional, graceful fallback
if [[ ${#missing[@]} -gt 0 ]]; then
echo -e "${RED} Missing: ${missing[*]}${RST}"
echo -e "${RED} Debian : apt install iproute2 iputils-ping${RST}"
echo -e "${RED} Alma : dnf install iproute iputils${RST}"
exit 1
fi
}
# ── Role detection ────────────────────────────────────────────────────────────
# Inspects the machine and determines what kind of node it is.
# Sets: ROLE, WG_IFACES, WG_MTU, WG_PEERS, WG_PUBSUBNETS,
# BIRD_RUNNING, BIRD_PROTOCOLS, IP_FORWARD
# Also auto-sets EXPECTED_MTU if not already provided by --expected-mtu
detect_role() {
sect "0. Machine Role Detection [read-only]"
# ── ip_forward ────────────────────────────────────────────────────────────
IP_FORWARD=$(safe_read sysctl -n net.ipv4.ip_forward)
info "ip_forward = ${IP_FORWARD:-0}"
# ── WireGuard interfaces ──────────────────────────────────────────────────
local wg_found=0
while IFS= read -r line; do
if [[ "$line" =~ ^[0-9]+:\ (wg[^:]+): ]]; then
local wname="${BASH_REMATCH[1]}"
WG_IFACES+=("$wname")
wg_found=1
fi
done < <(ip link show 2>/dev/null)
# Also catch WireGuard interfaces that use other names (check type via ethtool/ip)
while IFS= read -r iname; do
local already=0
for w in "${WG_IFACES[@]:-}"; do [[ "$w" == "$iname" ]] && already=1; done
if [[ $already -eq 0 ]]; then
local ltype; ltype=$(ip -d link show "$iname" 2>/dev/null | grep -o 'wireguard' || true)
[[ -n "$ltype" ]] && WG_IFACES+=("$iname") && wg_found=1
fi
done < <(ip -d link show 2>/dev/null | awk '/wireguard/{print prev} {prev=$2}' | tr -d ':' || true)
if [[ ${#WG_IFACES[@]} -gt 0 ]]; then
info "WireGuard interfaces found: ${WG_IFACES[*]}"
# MTU of first WG interface
WG_MTU=$(safe_read ip link show "${WG_IFACES[0]}" | \
awk '/mtu/{for(i=1;i<=NF;i++) if($i=="mtu") print $(i+1)}')
info "WireGuard MTU (${WG_IFACES[0]}): ${BLD}${WG_MTU}${RST}"
# Peer count + routed public subnets via 'wg show' (read-only)
if command -v wg &>/dev/null; then
for wif in "${WG_IFACES[@]}"; do
local peer_count
peer_count=$(safe_read wg show "$wif" peers | wc -l)
WG_PEERS=$(( WG_PEERS + peer_count ))
# Collect allowed-ips that are public (not RFC1918/loopback/link-local)
while IFS= read -r aip; do
# Strip peer pubkey prefix if present
local subnet; subnet=$(echo "$aip" | grep -oP '[\d.]+/\d+' | head -1)
[[ -z "$subnet" ]] && continue
local first_octet; first_octet=$(echo "$subnet" | cut -d. -f1)
local second_octet; second_octet=$(echo "$subnet" | cut -d. -f2)
# Skip RFC1918, loopback, link-local, 0.0.0.0
case "$first_octet" in
10|127) continue ;;
172) [[ $second_octet -ge 16 && $second_octet -le 31 ]] && continue ;;
192) [[ $second_octet -eq 168 ]] && continue ;;
0) continue ;;
esac
# Skip /32 host routes (single peers) — only show /24 and larger blocks
local prefix; prefix=$(echo "$subnet" | cut -d/ -f2)
[[ $prefix -gt 30 ]] && continue
WG_PUBSUBNETS+=("$subnet (via $wif)")
done < <(safe_read wg show "$wif" allowed-ips | awk '{for(i=2;i<=NF;i++) print $i}')
done
info "WireGuard total peers : ${BLD}${WG_PEERS}${RST}"
if [[ ${#WG_PUBSUBNETS[@]} -gt 0 ]]; then
info "Routed public subnets :"
for s in "${WG_PUBSUBNETS[@]}"; do
log " ${GRN}${RST} $s"
done
else
info "Routed public subnets : none (point-to-point or private only)"
fi
else
info " (install 'wireguard-tools' for peer/subnet detail)"
fi
# Auto-set EXPECTED_MTU from WG MTU only if user didn't already specify it
if [[ $EXPECTED_MTU -eq 0 && ${WG_MTU:-0} -gt 0 ]]; then
EXPECTED_MTU=$WG_MTU
info "Auto-set --expected-mtu=${EXPECTED_MTU} from WireGuard interface MTU"
elif [[ $EXPECTED_MTU -gt 0 && ${WG_MTU:-0} -gt 0 && $EXPECTED_MTU -ne $WG_MTU ]]; then
info "Using --expected-mtu=${EXPECTED_MTU} (override; WG interface MTU is ${WG_MTU})"
fi
else
info "No WireGuard interfaces detected"
fi
# ── BIRD routing daemon ───────────────────────────────────────────────────
log ""
if systemctl is-active --quiet bird 2>/dev/null || \
systemctl is-active --quiet bird2 2>/dev/null || \
pgrep -x bird &>/dev/null || pgrep -x bird2 &>/dev/null; then
BIRD_RUNNING=1
info "BIRD routing daemon: ${GRN}running${RST}"
if [[ $HAS_BIRD -eq 1 ]]; then
# Read active protocols (BGP, OSPF, etc.) — birdc is read-only
local proto_out
proto_out=$(safe_read birdc show protocols | grep -E 'BGP|OSPF|BFD|Static|Babel' || true)
if [[ -n "$proto_out" ]]; then
info "Active routing protocols:"
echo "$proto_out" | sed 's/^/ /' | tee -a "$LOG_FILE"
# Extract names of up BGP sessions
while IFS= read -r pline; do
local pname pstate
pname=$(echo "$pline" | awk '{print $1}')
pstate=$(echo "$pline" | awk '{print $4}')
[[ "$pstate" == "Established" || "$pstate" == "up" ]] && \
BIRD_PROTOCOLS+=("$pname")
done <<< "$proto_out"
fi
# Route table summary
local route_count
route_count=$(safe_read birdc show route count | grep -oP '\d+ of \d+' | head -1 || echo "unknown")
info "Route table: ${route_count} routes"
else
info " (install 'bird2' package for protocol detail via birdc)"
fi
else
info "BIRD routing daemon: not running"
fi
# ── Determine role ────────────────────────────────────────────────────────
log ""
local is_wg=$(( ${#WG_IFACES[@]} > 0 ? 1 : 0 ))
local is_router=$(( IP_FORWARD == 1 ? 1 : 0 ))
local is_bird=$BIRD_RUNNING
local has_pubsubnets=$(( ${#WG_PUBSUBNETS[@]} > 0 ? 1 : 0 ))
if [[ $is_bird -eq 1 && $is_wg -eq 1 ]]; then ROLE="wg-bird-router"
elif [[ $is_bird -eq 1 ]]; then ROLE="bird-router"
elif [[ $is_wg -eq 1 && $is_router -eq 1 ]]; then ROLE="wg-router"
elif [[ $is_wg -eq 1 ]]; then ROLE="wg-client"
elif [[ $is_router -eq 1 ]]; then ROLE="router"
else ROLE="vps"
fi
# Role label + description
case "$ROLE" in
vps) local role_label="VPS / plain server"
local role_desc="No routing, no tunnels detected" ;;
wg-client) local role_label="WireGuard client"
local role_desc="WireGuard present, ip_forward off" ;;
wg-router) local role_label="WireGuard gateway / router"
local role_desc="WireGuard + ip_forward=1" ;;
bird-router) local role_label="BGP/OSPF router (BIRD)"
local role_desc="BIRD running, no WireGuard" ;;
wg-bird-router) local role_label="WireGuard + BGP router (BIRD)"
local role_desc="Full routing stack: WireGuard tunnel + BIRD BGP/OSPF" ;;
router) local role_label="Generic router"
local role_desc="ip_forward=1, no WireGuard or BIRD" ;;
esac
log ""
log " ${BLD}${CYN}┌─────────────────────────────────────────────┐${RST}"
log " ${BLD}${CYN}│ Detected role : ${BLD}${role_label}${RST}"
log " ${BLD}${CYN}${role_desc}${RST}"
[[ ${WG_MTU:-0} -gt 0 ]] && \
log " ${BLD}${CYN}│ WireGuard MTU : ${WG_MTU} → expected path MTU set to ${EXPECTED_MTU}${RST}"
[[ $has_pubsubnets -eq 1 ]] && \
log " ${BLD}${CYN}│ Public subnets: ${#WG_PUBSUBNETS[@]} routed block(s) in WireGuard${RST}"
[[ ${#BIRD_PROTOCOLS[@]} -gt 0 ]] && \
log " ${BLD}${CYN}│ BGP sessions : ${BIRD_PROTOCOLS[*]}${RST}"
log " ${BLD}${CYN}└─────────────────────────────────────────────┘${RST}"
log ""
pass "Role detected: ${role_label}"
# Role-specific hints that feed into the rest of the test
case "$ROLE" in
wg-router|wg-bird-router)
info "Role context: testing as WireGuard gateway"
info " → MTU verdicts will use WG MTU ${EXPECTED_MTU} as baseline"
info " → MSS clamping check applies to FORWARD chain"
if [[ $has_pubsubnets -eq 1 ]]; then
info " → Routed public subnets detected — checking tunnel carries full subnet range"
fi ;;
bird-router)
info "Role context: testing as BGP/OSPF router"
info " → Checking for MTU consistency across routing interfaces" ;;
vps|wg-client)
info "Role context: testing as end-host / VPS"
info " → Standard MTU path check, no routing concerns" ;;
esac
}
detect_iface() {
# ── If interface was passed as CLI arg, skip TUI ─────────────────────────
if [[ -n "$IFACE" ]]; then
if ! ip link show "$IFACE" &>/dev/null; then
echo -e "${RED} Interface '$IFACE' not found${RST}"
exit 1
fi
_print_iface_summary
return
fi
# ── Collect interfaces from ip link ──────────────────────────────────────
local -a iface_names iface_mtus iface_states iface_ips iface_types
while IFS= read -r line; do
# Line like: "2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 ..."
if [[ "$line" =~ ^[0-9]+:\ ([^:]+):\ \<([^>]+)\>.*mtu\ ([0-9]+) ]]; then
local name="${BASH_REMATCH[1]// /}" # trim spaces
local flags="${BASH_REMATCH[2]}"
local mtu="${BASH_REMATCH[3]}"
# Skip loopback
[[ "$name" == "lo" ]] && continue
# State
local state="DOWN"
[[ "$flags" == *"UP"* ]] && state="UP"
[[ "$flags" == *"LOWER_UP"* ]] && state="UP"
# Type hint
local type="ethernet"
[[ "$name" == wg* ]] && type="wireguard"
[[ "$name" == tun* ]] && type="tun"
[[ "$name" == tap* ]] && type="tap"
[[ "$name" == veth* ]] && type="veth"
[[ "$name" == br* ]] && type="bridge"
[[ "$name" == bond* ]] && type="bond"
[[ "$name" == vlan* || "$name" == *"."* ]] && type="vlan"
[[ "$name" == dummy* ]] && type="dummy"
# IP address (next addr line for this iface)
local ip
ip=$(ip addr show "$name" 2>/dev/null \
| awk '/inet /{print $2}' | head -1)
[[ -z "$ip" ]] && ip="(no IP)"
iface_names+=("$name")
iface_mtus+=("$mtu")
iface_states+=("$state")
iface_ips+=("$ip")
iface_types+=("$type")
fi
done < <(ip link show 2>/dev/null)
if [[ ${#iface_names[@]} -eq 0 ]]; then
echo -e "${RED} No network interfaces found${RST}"
exit 1
fi
# ── Draw TUI ─────────────────────────────────────────────────────────────
local selected=0
local total=${#iface_names[@]}
_draw_tui() {
# Move cursor up $total + header + footer lines if not first draw
if [[ "${_tui_drawn:-0}" -eq 1 ]]; then
# +5 = 2 header + 1 blank + 1 prompt + 1 blank above table
printf '\033[%dA' $(( total + 5 ))
fi
_tui_drawn=1
echo -e ""
echo -e " ${BLD}${CYN}Select network interface to test:${RST}"
echo -e " ${CYN}Use ↑/↓ arrow keys, Enter to confirm${RST}"
echo -e ""
local i
for (( i=0; i<total; i++ )); do
local name="${iface_names[$i]}"
local mtu="${iface_mtus[$i]}"
local state="${iface_states[$i]}"
local ip="${iface_ips[$i]}"
local type="${iface_types[$i]}"
# State colour
local state_col="$RED"
[[ "$state" == "UP" ]] && state_col="$GRN"
# Highlight selected row
if [[ $i -eq $selected ]]; then
printf " ${BLD}${CYN}▶ %-14s${RST} ${state_col}%-5s${RST} MTU ${BLD}%-6s${RST} %-18s ${CYN}%s${RST}\n" \
"$name" "$state" "$mtu" "$ip" "$type"
else
printf " %-14s ${state_col}%-5s${RST} MTU %-6s %-18s %s\n" \
"$name" "$state" "$mtu" "$ip" "$type"
fi
done
echo ""
}
# Hide cursor
printf '\033[?25l'
# Restore cursor on exit/interrupt
trap 'printf "\033[?25h"; cleanup' EXIT INT TERM
_draw_tui
# ── Key input loop ────────────────────────────────────────────────────────
local key esc bracket
while true; do
# Read one char at a time; handle escape sequences for arrow keys
IFS= read -r -s -n1 key </dev/tty
if [[ "$key" == $'\x1b' ]]; then
IFS= read -r -s -n1 -t 0.1 esc </dev/tty || true
IFS= read -r -s -n1 -t 0.1 bracket </dev/tty || true
if [[ "$esc" == "[" ]]; then
case "$bracket" in
A) # Up
selected=$(( (selected - 1 + total) % total ))
_draw_tui ;;
B) # Down
selected=$(( (selected + 1) % total ))
_draw_tui ;;
esac
fi
elif [[ "$key" == "" || "$key" == $'\n' || "$key" == $'\r' ]]; then
# Enter — confirm selection
break
fi
done
# Restore cursor
printf '\033[?25h'
IFACE="${iface_names[$selected]}"
echo -e " ${GRN}✔ Selected: ${BLD}${IFACE}${RST} (MTU ${iface_mtus[$selected]}, ${iface_states[$selected]})"
echo ""
_print_iface_summary
}
_print_iface_summary() {
info "Interface : ${BLD}${IFACE}${RST}"
info "Target : ${BLD}${TARGET}${RST}"
info "Log file : ${BLD}${LOG_FILE}${RST}"
if [[ $EXPECTED_MTU -gt 0 ]]; then
info "Expected MTU : ${BLD}${EXPECTED_MTU}${RST} ${YEL}(tunnel mode — verdicts relative to this value)${RST}"
fi
if [[ $NO_LOAD -eq 1 ]]; then
info "Load test : ${YEL}SKIPPED (--no-load)${RST}"
else
info "Load test : enabled (${LOAD_DURATION}s, ${LOAD_PARALLEL} streams)"
fi
}
# ── Section 1: Interface & kernel read ───────────────────────────────────────
section_interface() {
sect "1. Interface & Kernel MTU Settings [read-only]"
local iface_mtu
iface_mtu=$(safe_read ip link show "$IFACE" | awk '/mtu/{for(i=1;i<=NF;i++) if($i=="mtu") print $(i+1)}')
RPT_IFACE_MTU="$iface_mtu"
if [[ -z "$iface_mtu" ]]; then
fail "Could not read MTU for '$IFACE' — does it exist?"
return
fi
info "Interface MTU (${IFACE}): ${BLD}${iface_mtu}${RST}"
if [[ $EXPECTED_MTU -gt 0 ]]; then
if [[ "$iface_mtu" -eq $EXPECTED_MTU ]]; then
pass "Interface MTU=${iface_mtu} matches expected MTU"
elif [[ "$iface_mtu" -gt $EXPECTED_MTU ]]; then
info "Interface MTU=${iface_mtu} (higher than expected ${EXPECTED_MTU} — path will be the bottleneck)"
else
warn "Interface MTU=${iface_mtu} is below expected ${EXPECTED_MTU}"
fi
elif [[ "$iface_mtu" -eq 1500 ]]; then
pass "Standard MTU 1500"
elif [[ "$iface_mtu" -gt 1500 ]]; then
warn "Jumbo frames MTU=$iface_mtu — verify all path hops support this"
else
# Non-1500 MTU without explicit expected-mtu
case "$ROLE" in
wg-router|wg-bird-router|wg-client)
info "Interface MTU=${iface_mtu} — expected for WireGuard interface" ;;
*)
warn "Non-standard MTU=${iface_mtu} — tunnel overhead likely (use --expected-mtu if intentional)" ;;
esac
fi
log ""
info "All interface MTUs:"
safe_read ip link show | \
awk '/^[0-9]+:/{iface=$2} /mtu/{for(i=1;i<=NF;i++) if($i=="mtu") printf " %-22s MTU %s\n", iface, $(i+1)}' | \
tee -a "$LOG_FILE"
log ""
info "Kernel PMTU / fragmentation sysctls (read-only):"
for k in net.ipv4.ip_no_pmtu_disc net.ipv4.tcp_mtu_probing net.ipv4.route.min_pmtu net.ipv4.ip_forward net.ipv4.conf.all.rp_filter; do
local val; val=$(safe_read sysctl -n "$k")
log " ${k} = ${val:-N/A}"
done
local pmtu_probe; pmtu_probe=$(safe_read sysctl -n net.ipv4.tcp_mtu_probing)
RPT_PMTUD_PROBE="${pmtu_probe:-0}"
if [[ "${pmtu_probe:-0}" -ge 1 ]]; then
pass "tcp_mtu_probing=${pmtu_probe} (PMTU discovery active)"
else
case "$ROLE" in
vps|wg-client|generic)
if [[ $EXPECTED_MTU -gt 0 ]]; then
# Standard PMTUD is already working — upstream sends ICMP frag-needed
# and kernel adapts MSS correctly (proven by dominant MSS in section 4)
info "tcp_mtu_probing=0 — standard PMTUD working via upstream ICMP (MSS adapts automatically)"
else
warn "tcp_mtu_probing=0 — kernel won't auto-adapt TCP MSS on black-hole paths"
fi ;;
wg-router|wg-bird-router)
info "tcp_mtu_probing=0 — acceptable on WireGuard router if MSS clamping is active" ;;
bird-router|router)
warn "tcp_mtu_probing=0 — recommended to enable on routing nodes" ;;
esac
fi
}
# ── Section 2: ICMP step-down probe ──────────────────────────────────────────
section_ping_mtu() {
sect "2. ICMP MTU Step-Down Probe (Outbound, DF-bit set)"
local -a sizes=(1500 1492 1480 1472 1450 1420 1400 1300 1200 1000 576)
local max_ok=0
info "Probing packet sizes to ${TARGET} with Don't-Fragment bit..."
log ""
printf " %-8s %s\n" "Size" "Result" | tee -a "$LOG_FILE"
printf " %-8s %s\n" "────" "──────" | tee -a "$LOG_FILE"
for size in "${sizes[@]}"; do
local payload=$(( size - 28 ))
[[ $payload -lt 0 ]] && continue
if ping -c 2 -W 2 -M do -s "$payload" "$TARGET" &>/dev/null; then
printf " %-8s ${GRN}OK${RST}\n" "$size" | tee -a "$LOG_FILE"
[[ $size -gt $max_ok ]] && max_ok=$size
else
printf " %-8s ${RED}FAIL${RST}\n" "$size" | tee -a "$LOG_FILE"
fi
done
log ""
info "Largest successful ICMP size (step-down): ${BLD}${max_ok} bytes${RST}"
RPT_PATH_MTU="$max_ok"
local iface_mtu
iface_mtu=$(safe_read ip link show "$IFACE" | awk '/mtu/{for(i=1;i<=NF;i++) if($i=="mtu") print $(i+1)}')
local baseline=$(( EXPECTED_MTU > 0 ? EXPECTED_MTU : 1500 ))
# Step-down list gaps are up to ~100 bytes (e.g. 1400→1300).
# The exact path MTU is determined by binary bisect in section 3.
# Here we only need to confirm: is the largest OK size plausibly
# consistent with the expected (or auto-detected) path MTU?
local tolerance=110
if [[ $max_ok -ge $(( baseline - tolerance )) ]]; then
if [[ $EXPECTED_MTU -gt 0 ]]; then
pass "Step-down largest OK=${max_ok} — consistent with expected path MTU ${EXPECTED_MTU}"
info " Note: step-down list has no probe at ${EXPECTED_MTU}, so ${max_ok} is the closest lower step"
info " Section 3 binary bisect gives the exact value"
else
pass "Full 1500-byte path confirmed — no fragmentation detected"
fi
elif [[ $max_ok -ge 1400 ]]; then
if [[ $EXPECTED_MTU -gt 0 ]]; then
warn "Step-down largest OK=${max_ok} is well below expected ${EXPECTED_MTU} — check Section 3 for exact path MTU"
else
warn "Path MTU around ${max_ok} (below interface MTU ${iface_mtu}) — tunnel overhead suspected"
info " Hint: WireGuard → 1420 | PPPoE → 1492 | VXLAN/GRE → 1450"
fi
elif [[ $max_ok -ge 576 ]]; then
if [[ $EXPECTED_MTU -gt 0 && $max_ok -ge $(( EXPECTED_MTU - tolerance )) ]]; then
pass "Step-down largest OK=${max_ok} — consistent with expected path MTU ${EXPECTED_MTU}"
info " Step-down list gap: no probe between ${max_ok} and $(( max_ok + 100 )), bisect in Section 3 gives exact value"
else
case "$ROLE" in
vps|wg-client)
info "Path MTU appears reduced (${max_ok}) — likely upstream tunnel, use --expected-mtu if intentional" ;;
*)
fail "Path MTU severely reduced (${max_ok}) — check firewall DF-blocking or misconfigured tunnel" ;;
esac
fi
else
fail "No ICMP sizes succeeded — target unreachable or ICMP fully blocked"
fi
}
# ── Section 3: Binary bisect ──────────────────────────────────────────────────
section_pmtu_bisect() {
sect "3. Binary-Search: Exact Path MTU"
info "Bisecting between 576 and 1500 bytes..."
local lo=576 hi=1500 mid best=0
while [[ $lo -le $hi ]]; do
mid=$(( (lo + hi) / 2 ))
local payload=$(( mid - 28 ))
if ping -c 2 -W 2 -M do -s "$payload" "$TARGET" &>/dev/null; then
best=$mid; lo=$(( mid + 1 ))
else
hi=$(( mid - 1 ))
fi
done
if [[ $best -eq 0 ]]; then
fail "Bisect failed — ICMP may be filtered by target or firewall"
return
fi
info "Exact path MTU: ${BLD}${best} bytes${RST}"
RPT_EXACT_MTU="$best"
local baseline=$(( EXPECTED_MTU > 0 ? EXPECTED_MTU : 1500 ))
local tolerance=10
if [[ $best -ge $(( baseline - tolerance )) && $best -le $(( baseline + tolerance )) ]]; then
if [[ $EXPECTED_MTU -gt 0 ]]; then
pass "Path MTU=${best} — matches expected tunnel MTU ${EXPECTED_MTU}"
else
pass "Full 1500 path MTU — no overhead"
fi
elif [[ $best -ge 1400 ]]; then
if [[ $EXPECTED_MTU -gt 0 ]]; then
warn "Path MTU=${best} differs from expected ${EXPECTED_MTU} — check tunnel config"
else
case "$ROLE" in
vps|wg-client)
info "Path MTU=${best} — reduced by upstream tunnel, use --expected-mtu ${best} to suppress this" ;;
*)
warn "Path MTU=${best} — apply MSS clamping or adjust tunnel MTU" ;;
esac
fi
else
if [[ $EXPECTED_MTU -gt 0 && $best -ge $(( EXPECTED_MTU - tolerance )) ]]; then
pass "Path MTU=${best} — matches expected tunnel MTU ${EXPECTED_MTU}"
else
case "$ROLE" in
vps|wg-client)
if [[ $EXPECTED_MTU -eq 0 ]]; then
info "Path MTU=${best} — reduced by upstream infrastructure, use --expected-mtu ${best} if intentional"
else
fail "Path MTU=${best} is below expected ${EXPECTED_MTU} — upstream tunnel MTU worse than configured"
fi ;;
*)
fail "Path MTU=${best} — significant restriction, investigate middleboxes" ;;
esac
fi
fi
}
# ── Section 4: TCP MSS inspection ─────────────────────────────────────────────
section_tcp_mss() {
sect "4. TCP MSS & Active Socket Inspection [read-only]"
info "MSS distribution across active TCP connections (ss -tin):"
local mss_data
mss_data=$(safe_read ss -tin | grep -Eo 'mss:[0-9]+')
if [[ -z "$mss_data" ]]; then
info " No active TCP connections with MSS data found"
else
echo "$mss_data" | sort | uniq -c | sort -rn | \
awk '{printf " %-6s connections at %s\n", $1, $2}' | tee -a "$LOG_FILE"
local dominant_mss
dominant_mss=$(echo "$mss_data" | awk -F: '{print $2}' | sort | uniq -c | sort -rn | awk 'NR==1{print $2}')
RPT_DOMINANT_MSS="$dominant_mss"
log ""
# Expected MSS = MTU - 40 (20 IP + 20 TCP headers)
local expected_mss=$(( EXPECTED_MTU > 0 ? EXPECTED_MTU - 40 : 1460 ))
local mss_tolerance=10
if [[ "${dominant_mss:-0}" -ge $(( expected_mss - mss_tolerance )) && \
"${dominant_mss:-0}" -le $(( expected_mss + mss_tolerance )) ]]; then
if [[ $EXPECTED_MTU -gt 0 ]]; then
pass "Dominant MSS=${dominant_mss} — matches expected tunnel MSS ${expected_mss}"
else
pass "Dominant MSS=1460 — standard 1500-byte path"
fi
elif [[ "${dominant_mss:-0}" -ge 1400 ]]; then
case "$ROLE" in
vps|wg-client)
info "Dominant MSS=${dominant_mss} — slightly reduced, consistent with upstream tunnel clamping" ;;
*)
warn "Dominant MSS=${dominant_mss} — slightly below expected ${expected_mss}" ;;
esac
elif [[ "${dominant_mss:-0}" -gt 0 ]]; then
if [[ $EXPECTED_MTU -gt 0 ]]; then
case "$ROLE" in
vps|wg-client)
info "Dominant MSS=${dominant_mss} — reduced by upstream WireGuard router clamping, expected for this setup" ;;
*)
warn "Low dominant MSS=${dominant_mss} — expected ~${expected_mss} for tunnel MTU ${EXPECTED_MTU}" ;;
esac
else
case "$ROLE" in
vps|wg-client)
info "Dominant MSS=${dominant_mss} — may be reduced by upstream router or tunnel" ;;
*)
warn "Low dominant MSS=${dominant_mss} — clamping or tunnel in path" ;;
esac
fi
fi
fi
if command -v iptables &>/dev/null; then
log ""
info "iptables TCPMSS clamping rules (list only, no changes):"
local tcpmss_rules
tcpmss_rules=$(safe_read iptables -t mangle -L FORWARD -n --line-numbers | grep -i "TCPMSS")
if [[ -n "$tcpmss_rules" ]]; then
echo "$tcpmss_rules" | sed 's/^/ /' | tee -a "$LOG_FILE"
pass "MSS clamping rule(s) present"
RPT_CLAMPING="present"
else
RPT_CLAMPING="none"
case "$ROLE" in
wg-router|wg-bird-router)
warn "No TCPMSS clamping rule in mangle FORWARD — required on WireGuard router to prevent MTU black holes"
info " iptables -t mangle -A FORWARD -p tcp --tcp-flags SYN,RST SYN -j TCPMSS --clamp-mss-to-pmtu"
info " (script does NOT add it — recommendation only)" ;;
bird-router|router)
warn "No TCPMSS clamping rule — recommended on routing nodes with tunnel interfaces" ;;
vps|wg-client)
info "No TCPMSS clamping rule — not needed on a VPS/client (ip_forward is off, no packet forwarding)" ;;
*)
if [[ $EXPECTED_MTU -gt 0 ]]; then
info "No TCPMSS clamping rule — may be intentional if clamping is done upstream"
else
warn "No TCPMSS clamping rule in mangle FORWARD — recommended when using tunnels"
fi ;;
esac
fi
fi
}
# ── Section 5: Interface error counters ───────────────────────────────────────
section_iface_errors() {
sect "5. Interface Error Counters — Inbound & Outbound [read-only]"
local proc_line
proc_line=$(grep -E "^\s*${IFACE}:" /proc/net/dev 2>/dev/null || true)
if [[ -z "$proc_line" ]]; then
fail "Interface '${IFACE}' not found in /proc/net/dev"
return
fi
# Parse /proc/net/dev columns
local fields; IFS=': ' read -ra fields <<< "$proc_line"
local clean=()
for f in "${fields[@]}"; do [[ -n "$f" ]] && clean+=("$f"); done
local rx_bytes="${clean[1]:-0}" rx_pkts="${clean[2]:-0}"
local rx_errs="${clean[3]:-0}" rx_drop="${clean[4]:-0}"
local tx_bytes="${clean[9]:-0}" tx_pkts="${clean[10]:-0}"
local tx_errs="${clean[11]:-0}" tx_drop="${clean[12]:-0}"
log ""
log " Direction Bytes Packets Errors Drops"
log " ───────── ────────────── ─────────── ────── ─────"
printf " %-12s %-16s %-13s %-8s %s\n" "RX (in)" "$rx_bytes" "$rx_pkts" "$rx_errs" "$rx_drop" | tee -a "$LOG_FILE"
printf " %-12s %-16s %-13s %-8s %s\n" "TX (out)" "$tx_bytes" "$tx_pkts" "$tx_errs" "$tx_drop" | tee -a "$LOG_FILE"
log ""
local all_ok=1
[[ "${rx_errs:-0}" -gt 0 ]] && { fail "RX errors: ${rx_errs}"; all_ok=0; }
[[ "${rx_drop:-0}" -gt 0 ]] && { warn "RX drops: ${rx_drop} (may be firewall drops, not necessarily MTU)"; all_ok=0; }
[[ "${tx_errs:-0}" -gt 0 ]] && { fail "TX errors: ${tx_errs}"; all_ok=0; }
[[ "${tx_drop:-0}" -gt 0 ]] && { warn "TX drops: ${tx_drop}"; all_ok=0; }
[[ $all_ok -eq 1 ]] && pass "No RX/TX errors or drops on ${IFACE}"
RPT_RX_ERRS="${rx_errs:-0}"; RPT_RX_DROP="${rx_drop:-0}"
RPT_TX_ERRS="${tx_errs:-0}"; RPT_TX_DROP="${tx_drop:-0}"
log ""
info "ip -s link output (read-only):"
safe_read ip -s link show "$IFACE" | sed 's/^/ /' | tee -a "$LOG_FILE"
if [[ $HAS_ETHTOOL -eq 1 ]]; then
log ""
info "ethtool NIC stats — fragmentation-relevant (read-only):"
local eth_stats
eth_stats=$(safe_read ethtool -S "$IFACE" | grep -iE 'frag|oversize|giant|jabber|mtu|error|drop')
if [[ -n "$eth_stats" ]]; then
echo "$eth_stats" | sed 's/^/ /' | tee -a "$LOG_FILE"
else
info " No fragmentation-related ethtool stats found"
fi
fi
}
# ── Public iperf3 server list — organised by region → country ────────────────
# Format per entry: "host port region country"
# Regions: EU, NA, ASIA, OCE
# Phase 1: one representative per region pinged in parallel → pick best region(s)
# Phase 2: all servers in winning region(s) pinged 2 at a time → pick best country
# Phase 3: top 3 servers of best country retested 10× → winner selected
declare -A IPERF3_REGION_REPR=(
[EU]="speedtest.ams1.nl.leaseweb.net"
[NA]="nyc.speedtest.clouvider.net"
[ASIA]="speedtest.tyo11.jp.leaseweb.net"
[OCE]="speedtest.syd12.au.leaseweb.net"
)
IPERF3_SERVERS=(
# EU — NL
"iperf-ams-nl.eranium.net 5201 EU NL"
"speedtest.ams1.nl.leaseweb.net 5201 EU NL"
"speedtest.ams2.nl.leaseweb.net 5201 EU NL"
"ams.speedtest.clouvider.net 5200 EU NL"
"speedtest.ams1.novogara.net 5200 EU NL"
"ping-ams1.online.net 5200 EU NL"
"speedtest.netone.nl 5201 EU NL"
"iperf.worldstream.nl 5201 EU NL"
# EU — DE
"fra.speedtest.clouvider.net 5200 EU DE"
"speedtest.fra1.de.leaseweb.net 5201 EU DE"
"speedtest.wtnet.de 5200 EU DE"
"a205.speedtest.wobcom.de 5201 EU DE"
# EU — GB
"lon.speedtest.clouvider.net 5200 EU GB"
"speedtest.lon1.uk.leaseweb.net 5201 EU GB"
# EU — CH
"speedtest.init7.net 5201 EU CH"
# EU — FR
"iperf.online.net 5200 EU FR"
"ping.online.net 5200 EU FR"
# EU — SE
"speedtest.keff.org 9201 EU SE"
# NA — US
"nyc.speedtest.clouvider.net 5201 NA US"
"speedtest.nyc1.us.leaseweb.net 5201 NA US"
"dal.speedtest.clouvider.net 5200 NA US"
"speedtest.dal13.us.leaseweb.net 5201 NA US"
"la.speedtest.clouvider.net 5200 NA US"
# NA — CA
"speedtest.mtl2.ca.leaseweb.net 5201 NA CA"
# ASIA — JP
"speedtest.tyo11.jp.leaseweb.net 5201 ASIA JP"
# ASIA — SG
"speedtest.sin1.sg.leaseweb.net 5201 ASIA SG"
"sgp.proof.ovh.net 5201 ASIA SG"
# OCE — AU
"speedtest.syd12.au.leaseweb.net 5201 OCE AU"
"syd.proof.ovh.net 5201 OCE AU"
)
IPERF3_BEST_HOST=""
IPERF3_BEST_PORT=""
# ── Report tracking — populated during test sections ─────────────────────────
RPT_IFACE_MTU=""
RPT_PATH_MTU=""
RPT_EXACT_MTU=""
RPT_DOMINANT_MSS=""
RPT_RX_ERRS=0; RPT_RX_DROP=0; RPT_TX_ERRS=0; RPT_TX_DROP=0
RPT_IPERF_SERVER=""
RPT_IPERF_THROUGHPUT=""
RPT_IPERF_RETRANSMITS=""
RPT_IPERF_VERDICT=""
RPT_LOAD_ERRS=0; RPT_LOAD_DROPS=0
RPT_CLAMPING=""
RPT_PMTUD_PROBE=""
# ── Helper: ping a host, write "avg host port" to a file ─────────────────────
# Usage: _ping_server "host" "port" "count" "outfile"
_ping_server() {
local host="$1" port="$2" count="$3" outfile="$4"
local out avg loss
out=$(ping -c "$count" -W 2 -q "$host" 2>/dev/null || true)
avg=$(echo "$out" | grep -oP 'rtt.*=\s*[\d.]+/\K[\d.]+' || true)
loss=$(echo "$out" | grep -oP '\d+(?=% packet loss)' || echo "100")
if [[ -n "$avg" && "${loss:-100}" -lt 50 ]]; then
echo "$avg $host $port $loss" >> "$outfile"
fi
}
select_iperf3_server() {
sect "6a. Public iperf3 Server Selection [parallel ping, read-only]"
if [[ $HAS_IPERF3 -eq 0 ]]; then
skip "iperf3 not installed — skipping server selection"
return
fi
mkdir -p "$TMPDIR_PINGS"
# ═══════════════════════════════════════════════════════════════════════════
# PHASE 1 — ping one representative per region in parallel (3 pings each)
# Goal: eliminate distant regions fast
# ═══════════════════════════════════════════════════════════════════════════
info "Phase 1: Pinging region representatives in parallel (3 pings each)..."
log ""
local region_file="${TMPDIR_PINGS}/phase1_regions.txt"
: > "$region_file"
local pids=()
for region in "${!IPERF3_REGION_REPR[@]}"; do
local repr="${IPERF3_REGION_REPR[$region]}"
(
local out avg loss
out=$(ping -c 3 -W 2 -q "$repr" 2>/dev/null || true)
avg=$(echo "$out" | grep -oP 'rtt.*=\s*[\d.]+/\K[\d.]+' || true)
loss=$(echo "$out" | grep -oP '\d+(?=% packet loss)' || echo "100")
if [[ -n "$avg" && "${loss:-100}" -lt 60 ]]; then
echo "$avg $region" >> "$region_file"
printf " %-6s %-44s ${GRN}avg %7s ms${RST}\n" \
"[$region]" "$repr" "$avg" | tee -a "$LOG_FILE"
else
printf " %-6s %-44s ${YEL}unreachable${RST}\n" \
"[$region]" "$repr" | tee -a "$LOG_FILE"
fi
) &
pids+=($!)
done
for pid in "${pids[@]}"; do wait "$pid" 2>/dev/null || true; done
if [[ ! -s "$region_file" ]]; then
warn "No regions reachable — falling back to loopback"
return
fi
# Pick regions within 2× the best region's RTT (keeps nearby regions)
local best_region_rtt
best_region_rtt=$(sort -n "$region_file" | head -1 | awk '{print $1}' | cut -d. -f1)
local rtt_cutoff=$(( best_region_rtt * 2 + 20 ))
local winning_regions=()
while IFS= read -r line; do
local rtt region
rtt=$(echo "$line" | awk '{print $1}' | cut -d. -f1)
region=$(echo "$line" | awk '{print $2}')
[[ $rtt -le $rtt_cutoff ]] && winning_regions+=("$region")
done < <(sort -n "$region_file")
log ""
info "Winning region(s): ${BLD}${winning_regions[*]}${RST} (cutoff ${rtt_cutoff} ms)"
# ═══════════════════════════════════════════════════════════════════════════
# PHASE 2 — ping all servers in winning regions, 2 at a time (4 pings each)
# Goal: find the best country within the winning region(s)
# ═══════════════════════════════════════════════════════════════════════════
log ""
info "Phase 2: Pinging all servers in winning region(s), 2 at a time (4 pings)..."
log ""
local country_file="${TMPDIR_PINGS}/phase2_countries.txt"
: > "$country_file"
# Collect servers in winning regions
local -a region_servers=()
for entry in "${IPERF3_SERVERS[@]}"; do
local host port region country
read -r host port region country <<< "$entry"
for wr in "${winning_regions[@]}"; do
if [[ "$region" == "$wr" ]]; then
region_servers+=("$host $port $country")
break
fi
done
done
# Run in batches of 2 in parallel
local batch_pids=()
local batch_count=0
for entry in "${region_servers[@]}"; do
local host port country
read -r host port country <<< "$entry"
(
local out avg loss
out=$(ping -c 4 -W 2 -q "$host" 2>/dev/null || true)
avg=$(echo "$out" | grep -oP 'rtt.*=\s*[\d.]+/\K[\d.]+' || true)
loss=$(echo "$out" | grep -oP '\d+(?=% packet loss)' || echo "100")
if [[ -n "$avg" && "${loss:-100}" -lt 30 ]]; then
echo "$avg $host $port $country" >> "$country_file"
printf " [%2s] %-44s ${GRN}avg %7s ms loss %s%%${RST}\n" \
"$country" "$host" "$avg" "${loss:-0}" | tee -a "$LOG_FILE"
else
printf " [%2s] %-44s ${YEL}skip (loss %s%%)${RST}\n" \
"$country" "$host" "${loss:-100}" | tee -a "$LOG_FILE"
fi
) &
batch_pids+=($!)
batch_count=$(( batch_count + 1 ))
if [[ $batch_count -ge 2 ]]; then
for pid in "${batch_pids[@]}"; do wait "$pid" 2>/dev/null || true; done
batch_pids=()
batch_count=0
fi
done
# Flush remaining
for pid in "${batch_pids[@]}"; do wait "$pid" 2>/dev/null || true; done
if [[ ! -s "$country_file" ]]; then
warn "Phase 2: no servers responded — falling back to loopback"
return
fi
# Best country = country of the top-3 servers by avg RTT
local best_country
best_country=$(sort -n "$country_file" | head -3 | awk '{print $4}' | sort | uniq -c | sort -rn | awk 'NR==1{print $2}')
log ""
info "Best country: ${BLD}${best_country}${RST}"
# ═══════════════════════════════════════════════════════════════════════════
# PHASE 3 — retest top-3 servers of best country with 10 pings each (serial)
# Goal: accurate final selection
# ═══════════════════════════════════════════════════════════════════════════
log ""
info "Phase 3: Retesting top 3 servers in ${best_country} with 10 pings each..."
log ""
local final_file="${TMPDIR_PINGS}/phase3_final.txt"
: > "$final_file"
local top3
top3=$(sort -n "$country_file" | awk -v c="$best_country" '$4==c' | head -3)
while IFS= read -r line; do
local host port country
host=$(echo "$line" | awk '{print $2}')
port=$(echo "$line" | awk '{print $3}')
printf " %-44s " "$host" | tee -a "$LOG_FILE"
local out avg loss
out=$(ping -c 10 -W 2 -q "$host" 2>/dev/null || true)
avg=$(echo "$out" | grep -oP 'rtt.*=\s*[\d.]+/\K[\d.]+' || true)
loss=$(echo "$out" | grep -oP '\d+(?=% packet loss)' || echo "100")
if [[ -n "$avg" && "${loss:-100}" -lt 20 ]]; then
printf "${GRN}avg %7s ms loss %s%%${RST}\n" "$avg" "${loss:-0}" | tee -a "$LOG_FILE"
echo "$avg $host $port" >> "$final_file"
else
printf "${RED}skip (loss %s%%)${RST}\n" "${loss:-100}" | tee -a "$LOG_FILE"
fi
done <<< "$top3"
if [[ ! -s "$final_file" ]]; then
warn "Phase 3: all top servers failed retest — falling back to loopback"
return
fi
local winner
winner=$(sort -n "$final_file" | head -1)
IPERF3_BEST_HOST=$(echo "$winner" | awk '{print $2}')
IPERF3_BEST_PORT=$(echo "$winner" | awk '{print $3}')
local best_rtt
best_rtt=$(echo "$winner" | awk '{print $1}')
log ""
pass "Selected: ${BLD}${IPERF3_BEST_HOST}:${IPERF3_BEST_PORT}${RST} (avg RTT ${best_rtt} ms, ${best_country})"
RPT_IPERF_SERVER="${IPERF3_BEST_HOST}:${IPERF3_BEST_PORT} (${best_country}, RTT ${best_rtt} ms)"
}
# ── Section 6: Load test ──────────────────────────────────────────────────────
#
# Strategy:
# Phase A — iperf3 loopback TCP flood (no remote server needed)
# Spins up iperf3 -s on 127.0.0.1:15201, then runs 4 parallel
# client streams for LOAD_DURATION seconds with MSS 1460.
# This saturates the NIC TX/RX path the way real TCP traffic does.
#
# Phase B — Concurrent MTU ICMP probes
# While the iperf3 flood is running, a second background job fires
# step-down ICMP probes (DF-bit) at the real TARGET.
# If any size that passed idle now fails under load → MTU problem.
#
# Phase C — Counter delta
# /proc/net/dev RX/TX error + drop deltas before vs after.
#
# Falls back to flood-ping only if iperf3 is not installed.
# =============================================================================
section_load_test() {
sect "6b. MTU Under Load [iperf3 real-path + concurrent ICMP probes]"
if [[ $NO_LOAD -eq 1 ]]; then
skip "Load test skipped (--no-load)"
return
fi
mkdir -p "$TMPDIR_PINGS"
# ── Snapshot /proc counters before ───────────────────────────────────────
local pre_line; pre_line=$(grep -E "^\s*${IFACE}:" /proc/net/dev 2>/dev/null || true)
local pre=(); IFS=': ' read -ra pre <<< "$pre_line"
local pre_clean=()
for f in "${pre[@]}"; do [[ -n "$f" ]] && pre_clean+=("$f"); done
local pre_rx_errs="${pre_clean[3]:-0}" pre_rx_drop="${pre_clean[4]:-0}"
local pre_tx_errs="${pre_clean[11]:-0}" pre_tx_drop="${pre_clean[12]:-0}"
# ── Decide: external server or loopback ──────────────────────────────────
local iperf_host="" iperf_port="" iperf_mode=""
local iperf_server_pid="" iperf_used=0
local _iperf_retransmits="" # set during parse, consumed in deferred verdict block
if [[ $HAS_IPERF3 -eq 1 ]]; then
if [[ -n "$IPERF3_BEST_HOST" && -n "$IPERF3_BEST_PORT" ]]; then
iperf_host="$IPERF3_BEST_HOST"
iperf_port="$IPERF3_BEST_PORT"
iperf_mode="external"
info "Using external iperf3 server: ${BLD}${iperf_host}:${iperf_port}${RST}"
info "Traffic will traverse your real NIC path — this is the most realistic test."
else
# Loopback fallback
local IPERF_PORT=15201
if ss -tlnp 2>/dev/null | grep -q ":${IPERF_PORT} "; then
warn "Port ${IPERF_PORT} in use — cannot start loopback server"
else
iperf3 -s -B 127.0.0.1 -p "$IPERF_PORT" --one-off \
> "${TMPDIR_PINGS}/iperf_server.txt" 2>&1 &
iperf_server_pid=$!
sleep 0.8
if kill -0 "$iperf_server_pid" 2>/dev/null; then
iperf_host="127.0.0.1"
iperf_port="$IPERF_PORT"
iperf_mode="loopback"
warn "No public server available — using loopback fallback (less realistic)"
info " Retransmits on loopback at high throughput are normal, not an MTU indicator."
else
warn "iperf3 loopback server failed to start"
iperf_server_pid=""
fi
fi
fi
fi
if [[ -n "$iperf_host" ]]; then
# Determine the correct MSS to use:
# Priority: 1) dominant MSS from section 4 2) expected_mtu-40 3) 1460
local iperf_mss=1460
if [[ -n "$RPT_DOMINANT_MSS" && "$RPT_DOMINANT_MSS" =~ ^[0-9]+$ && "$RPT_DOMINANT_MSS" -lt 1460 ]]; then
iperf_mss="$RPT_DOMINANT_MSS"
elif [[ $EXPECTED_MTU -gt 0 ]]; then
iperf_mss=$(( EXPECTED_MTU - 40 ))
fi
info "Running ${LOAD_PARALLEL}x parallel TCP streams, MSS ${iperf_mss}, ${LOAD_DURATION}s..."
[[ $iperf_mss -lt 1460 ]] && \
info " Using MSS ${iperf_mss} (derived from ${RPT_DOMINANT_MSS:+detected dominant MSS}${RPT_DOMINANT_MSS:-expected MTU ${EXPECTED_MTU}}) — matches path MTU"
iperf3 -c "$iperf_host" -p "$iperf_port" \
-t "$LOAD_DURATION" \
-P "$LOAD_PARALLEL" \
-M "$iperf_mss" \
--logfile "${TMPDIR_PINGS}/iperf_client.txt" \
> /dev/null 2>&1 &
local iperf_client_pid=$!
iperf_used=1
# ── Phase B: concurrent ICMP MTU probes while TCP load is running ────
info "Firing concurrent ICMP MTU probes (DF-bit) while TCP load is active..."
local -a probe_sizes=(1500 1472 1450 1420 1400 1300 1000)
declare -A probe_results_load=()
for size in "${probe_sizes[@]}"; do
local payload=$(( size - 28 ))
if ping -c 3 -W 2 -M do -s "$payload" "$TARGET" &>/dev/null; then
probe_results_load[$size]="OK"
else
probe_results_load[$size]="FAIL"
fi
done
# Progress bar
local elapsed=0
while kill -0 "$iperf_client_pid" 2>/dev/null && [[ $elapsed -lt $(( LOAD_DURATION + 5 )) ]]; do
local pct=$(( elapsed * 100 / LOAD_DURATION ))
[[ $pct -gt 100 ]] && pct=100
local bar_filled=$(( pct / 5 ))
local bar=""
for (( b=0; b<20; b++ )); do
[[ $b -lt $bar_filled ]] && bar+="█" || bar+="░"
done
printf "\r ${CYN}[%s] %3d%% iperf3 [%s] running...${RST}" "$bar" "$pct" "$iperf_mode"
sleep 1
elapsed=$(( elapsed + 1 ))
done
printf "\r ${GRN}[████████████████████] 100%% iperf3 complete ${RST}\n"
wait "$iperf_client_pid" 2>/dev/null || true
# Kill loopback server if we started one
if [[ -n "$iperf_server_pid" ]]; then
kill "$iperf_server_pid" 2>/dev/null || true
wait "$iperf_server_pid" 2>/dev/null || true
fi
# ── Parse iperf3 output ───────────────────────────────────────────────
log ""
info "iperf3 results (${iperf_mode}, MSS ${iperf_mss}, ${LOAD_PARALLEL} streams):"
if [[ -f "${TMPDIR_PINGS}/iperf_client.txt" ]]; then
grep -E '\[SUM\]|\[ ID\]|sender|receiver|error|connect' "${TMPDIR_PINGS}/iperf_client.txt" \
| sed 's/^/ /' | tee -a "$LOG_FILE" || true
local retransmits
retransmits=$(grep -E 'SUM.*sender' "${TMPDIR_PINGS}/iperf_client.txt" \
| awk '{print $(NF-1)}' 2>/dev/null || echo "?")
# fallback parse
[[ "$retransmits" == "?" ]] && \
retransmits=$(grep -oP '\d+(?= sender)' "${TMPDIR_PINGS}/iperf_client.txt" | tail -1 || echo "?")
# Capture throughput for report
local throughput
throughput=$(grep -E 'SUM.*sender' "${TMPDIR_PINGS}/iperf_client.txt" \
| awk '{for(i=1;i<=NF;i++) if($i~/bits/) print $(i-1)" "$i}' | tail -1 || echo "")
RPT_IPERF_RETRANSMITS="${retransmits:-?}"
RPT_IPERF_THROUGHPUT="${throughput:-unknown}"
log ""
if [[ "$iperf_mode" == "external" ]]; then
# External retransmits are only meaningful when corroborated by
# ICMP failures. If --expected-mtu is set and ICMP ≤expected MTU
# is clean, retransmits are caused by PMTUD MSS renegotiation
# during TCP warmup (kernel starts at MSS 1460, path reduces it
# to MSS = expected_mtu - 40 in the first few seconds), not an
# actual MTU problem. We defer the verdict until after ICMP results.
_iperf_retransmits="$retransmits" # picked up after ICMP block
else
# Loopback: retransmits are congestion control noise, not MTU signal
if [[ "${retransmits:-0}" == "0" ]]; then
pass "iperf3 retransmits: 0 (loopback)"
elif [[ "${retransmits:-?}" =~ ^[0-9]+$ ]]; then
info "iperf3 retransmits: ${retransmits} (loopback congestion control — not an MTU indicator)"
else
info "iperf3 retransmits: could not parse"
fi
fi
else
warn "iperf3 client output not found — server may have refused connection"
fi
# ── Report concurrent ICMP results ────────────────────────────────────
log ""
info "Concurrent ICMP probe results (fired during TCP load on real path):"
log " Size Result"
log " ──── ──────"
local concurrent_fail=0
for size in "${probe_sizes[@]}"; do
local res="${probe_results_load[$size]:-SKIP}"
if [[ "$res" == "OK" ]]; then
printf " %-8s ${GRN}OK${RST}\n" "$size" | tee -a "$LOG_FILE"
else
printf " %-8s ${RED}FAIL${RST}\n" "$size" | tee -a "$LOG_FILE"
concurrent_fail=$(( concurrent_fail + 1 ))
fi
done
log ""
if [[ $concurrent_fail -eq 0 ]]; then
pass "All ICMP sizes passed under TCP load — no MTU regression under load"
else
# If expected MTU is set, check if only sizes above it failed
if [[ $EXPECTED_MTU -gt 0 ]]; then
local unexpected_fail=0
for size in "${probe_sizes[@]}"; do
local res="${probe_results_load[$size]:-SKIP}"
if [[ "$res" == "FAIL" && $size -le $EXPECTED_MTU ]]; then
unexpected_fail=$(( unexpected_fail + 1 ))
fi
done
if [[ $unexpected_fail -eq 0 ]]; then
pass "ICMP sizes ≤${EXPECTED_MTU} all passed — failures above expected tunnel MTU are normal ✓"
else
fail "${unexpected_fail} ICMP size(s) ≤${EXPECTED_MTU} failed under load — MTU instability below expected tunnel MTU"
fi
else
fail "${concurrent_fail} ICMP size(s) failed under load — MTU instability under real traffic"
fi
fi
# ── Deferred retransmit verdict ───────────────────────────────────────
# For external mode we deferred the verdict until we knew the ICMP result.
# For loopback we still do a simple correlation pass.
local retr="${_iperf_retransmits:-}"
if [[ "$iperf_mode" == "external" && -n "$retr" ]]; then
log ""
# Determine if ICMP was clean at/below the expected MTU baseline
local icmp_ok=1
if [[ $EXPECTED_MTU -gt 0 ]]; then
# Only count failures at or below expected MTU as real problems
for size in "${probe_sizes[@]}"; do
local res="${probe_results_load[$size]:-SKIP}"
[[ "$res" == "FAIL" && $size -le $(( EXPECTED_MTU + 10 )) ]] && icmp_ok=0
done
else
[[ $concurrent_fail -gt 0 ]] && icmp_ok=0
fi
if [[ "$retr" == "0" ]]; then
pass "iperf3 retransmits: 0 — clean TCP over real network path"
RPT_IPERF_VERDICT="clean (0 retransmits)"
elif [[ $icmp_ok -eq 1 ]]; then
local settled_mss=$(( EXPECTED_MTU > 0 ? EXPECTED_MTU - 40 : 1460 ))
if [[ "$iperf_mss" -lt 1460 ]]; then
# Correct MSS was used from the start — retransmits are
# pure TCP congestion control on the real network path,
# not MSS negotiation. Normal for a shared public iperf3 server.
info "iperf3 retransmits: ${retr} — ICMP path clean, MSS ${iperf_mss} was correct from the start"
info " Retransmits are TCP congestion control on the real path — normal for shared public servers"
else
info "iperf3 retransmits: ${retr} — ICMP path clean at/below expected MTU ${EXPECTED_MTU:-1500}"
info " TCP PMTUD warmup: kernel opens at MSS 1460, path reduces to MSS ${settled_mss}"
info " in the first 1-2 seconds → burst of retransmits, then settles. Normal."
fi
pass "Retransmit correlation: ICMP clean, no MTU problem ✓"
RPT_IPERF_VERDICT="TCP congestion control (${retr} retransmits, normal)"
elif [[ "${retr}" =~ ^[0-9]+$ ]] && [[ "$retr" -lt 50 ]]; then
warn "iperf3 retransmits: ${retr} — minor, correlate with ICMP failures above"
RPT_IPERF_VERDICT="minor retransmits (${retr})"
else
fail "iperf3 retransmits: ${retr} — corroborated by ICMP failures ≤expected MTU, real MTU problem"
RPT_IPERF_VERDICT="HIGH retransmits (${retr}) — MTU problem"
fi
elif [[ "$iperf_mode" == "loopback" ]] && \
[[ "${_iperf_retransmits:-?}" =~ ^[0-9]+$ ]] && [[ "${_iperf_retransmits}" -gt 0 ]] && \
[[ $concurrent_fail -eq 0 ]]; then
pass "Retransmit correlation: ICMP probes clean — loopback retransmits are not MTU-related"
fi
fi
# ── Fallback: flood-ping if iperf3 completely unavailable ────────────────
if [[ $iperf_used -eq 0 ]]; then
warn "iperf3 not available — using flood-ping fallback"
info " Install: apt install iperf3 / dnf install iperf3"
log ""
info "Launching ${LOAD_PARALLEL} parallel ping streams (payload=1472, DF-bit, ${LOAD_DURATION}s)..."
local pids=()
local count=$(( LOAD_DURATION * 20 ))
for (( i=0; i<LOAD_PARALLEL; i++ )); do
ping -c "$count" -i 0.05 -W 2 -M do -s 1472 -q "$TARGET" \
> "${TMPDIR_PINGS}/ping_${i}.txt" 2>&1 &
pids+=($!)
done
local elapsed=0
while [[ $elapsed -lt $LOAD_DURATION ]]; do
local pct=$(( elapsed * 100 / LOAD_DURATION ))
local bar_filled=$(( pct / 5 ))
local bar=""
for (( b=0; b<20; b++ )); do
[[ $b -lt $bar_filled ]] && bar+="█" || bar+="░"
done
printf "\r ${CYN}[%s] %3d%% %ds / %ds${RST}" "$bar" "$pct" "$elapsed" "$LOAD_DURATION"
sleep 1
elapsed=$(( elapsed + 1 ))
done
printf "\r ${GRN}[████████████████████] 100%% Complete ${RST}\n"
for pid in "${pids[@]}"; do wait "$pid" 2>/dev/null || true; done
local total_sent=0 total_recv=0 max_loss=0
for (( i=0; i<LOAD_PARALLEL; i++ )); do
local f="${TMPDIR_PINGS}/ping_${i}.txt"
[[ -f "$f" ]] || continue
local sent recv loss
sent=$(grep -oP '\d+(?= packets transmitted)' "$f" 2>/dev/null || echo 0)
recv=$(grep -oP '\d+(?= received)' "$f" 2>/dev/null || echo 0)
loss=$(grep -oP '\d+(?=% packet loss)' "$f" 2>/dev/null || echo 0)
total_sent=$(( total_sent + sent ))
total_recv=$(( total_recv + recv ))
[[ ${loss:-0} -gt $max_loss ]] && max_loss=${loss:-0}
done
log ""
log " Packets sent : $total_sent"
log " Packets received : $total_recv"
log " Max stream loss : ${max_loss}%"
log ""
if [[ $max_loss -eq 0 ]]; then pass "Zero packet loss under load at 1472-byte payload"
elif [[ $max_loss -le 2 ]]; then warn "Minor loss (${max_loss}%) — may be rate-limiting at target"
else fail "Loss ${max_loss}% under load — MTU fragmentation or buffer issue"
fi
fi
# ── Phase C: counter delta ────────────────────────────────────────────────
local post_line; post_line=$(grep -E "^\s*${IFACE}:" /proc/net/dev 2>/dev/null || true)
local post=(); IFS=': ' read -ra post <<< "$post_line"
local post_clean=()
for f in "${post[@]}"; do [[ -n "$f" ]] && post_clean+=("$f"); done
local delta_rx_errs=$(( ${post_clean[3]:-0} - pre_rx_errs ))
local delta_rx_drop=$(( ${post_clean[4]:-0} - pre_rx_drop ))
local delta_tx_errs=$(( ${post_clean[11]:-0} - pre_tx_errs ))
local delta_tx_drop=$(( ${post_clean[12]:-0} - pre_tx_drop ))
log ""
info "Interface counter delta (entire load test period):"
log " New RX errors : $delta_rx_errs"
log " New RX drops : $delta_rx_drop"
log " New TX errors : $delta_tx_errs"
log " New TX drops : $delta_tx_drop"
if [[ $delta_rx_errs -eq 0 && $delta_rx_drop -eq 0 && $delta_tx_errs -eq 0 && $delta_tx_drop -eq 0 ]]; then
pass "No new RX/TX errors or drops during load test"
RPT_LOAD_ERRS=0; RPT_LOAD_DROPS=0
else
fail "New errors/drops during load — check MTU mismatch, ring buffer size, or NIC driver"
RPT_LOAD_ERRS=$(( delta_rx_errs + delta_tx_errs ))
RPT_LOAD_DROPS=$(( delta_rx_drop + delta_tx_drop ))
fi
}
# ── Section 7: tc / QoS read ──────────────────────────────────────────────────
section_tc() {
[[ $HAS_TC -eq 0 ]] && return
sect "7. Traffic Control (tc) QoS Inspection [read-only]"
info "qdiscs on ${IFACE}:"
safe_read tc qdisc show dev "$IFACE" | sed 's/^/ /' | tee -a "$LOG_FILE"
local mpu
mpu=$(safe_read tc qdisc show dev "$IFACE" | grep -oP 'mpu \K\d+' || true)
if [[ -n "$mpu" ]]; then
info "Minimum Packet Unit (mpu): $mpu"
[[ "${mpu:-0}" -le 64 ]] && pass "mpu=$mpu looks reasonable" || \
warn "mpu=$mpu is large — may affect small-packet handling"
else
info " No mpu value set in qdisc (kernel default)"
fi
info ""
info "tc filters on ${IFACE}:"
safe_read tc filter show dev "$IFACE" | sed 's/^/ /' | tee -a "$LOG_FILE" || info " (none)"
}
# ── Section 8: Summary ────────────────────────────────────────────────────────
section_summary() {
sect "8. Summary & Recommendations"
local iface_mtu
iface_mtu=$(safe_read ip link show "$IFACE" | awk '/mtu/{for(i=1;i<=NF;i++) if($i=="mtu") print $(i+1)}')
log " Role : ${BLD}${ROLE}${RST}"
log " Interface : ${IFACE} (MTU ${iface_mtu})"
[[ ${WG_MTU:-0} -gt 0 ]] && log " WG MTU : ${WG_MTU} (${#WG_IFACES[@]} interface(s): ${WG_IFACES[*]})"
[[ ${#WG_PUBSUBNETS[@]} -gt 0 ]] && log " Pub subnets: ${WG_PUBSUBNETS[*]}"
[[ $BIRD_RUNNING -eq 1 ]] && log " BIRD : running sessions: ${BIRD_PROTOCOLS[*]:-none}"
log " Target : ${TARGET}"
[[ $EXPECTED_MTU -gt 0 ]] && log " Exp. MTU : ${EXPECTED_MTU} (path MTU baseline)"
log " Issues : ${ISSUES_FOUND} warning(s)/failure(s)"
log ""
# ── Role-aware fixes box ──────────────────────────────────────────────────
case "$ROLE" in
vps|wg-client)
log " ┌──────────────────────────────────────────────────────────────────┐"
log " │ Relevant fixes for this VPS / client │"
log " ├──────────────────────────────────────────────────────────────────┤"
log " │ PMTUD on sysctl -w net.ipv4.tcp_mtu_probing=1 │"
log " │ Persist echo 'net.ipv4.tcp_mtu_probing=1' │"
log " │ >> /etc/sysctl.d/99-mtu.conf │"
if [[ $EXPECTED_MTU -gt 0 ]]; then
log " │ Path MTU ${EXPECTED_MTU} is set by upstream router — no local fix needed │"
fi
log " │ If issues persist: contact your hoster about path MTU │"
log " │ tracepath 8.8.8.8 ← shows which hop reduces MTU │"
log " └──────────────────────────────────────────────────────────────────┘" ;;
wg-router|wg-bird-router)
local wg_mtu_display="${WG_MTU:-1420}"
local wg_mss=$(( wg_mtu_display - 40 ))
log " ┌──────────────────────────────────────────────────────────────────┐"
log " │ Relevant fixes for this WireGuard router │"
log " ├──────────────────────────────────────────────────────────────────┤"
log " │ WG MTU ip link set ${WG_IFACES[0]:-wg0} mtu ${wg_mtu_display}"
log " │ MSS clamp iptables -t mangle -A FORWARD │"
log " │ -p tcp --tcp-flags SYN,RST SYN │"
log " │ -j TCPMSS --set-mss ${wg_mss}"
log " │ Also clamp iptables -t mangle -A OUTPUT │"
log " │ -p tcp --tcp-flags SYN,RST SYN │"
log " │ -j TCPMSS --set-mss ${wg_mss}"
log " │ PMTUD on sysctl -w net.ipv4.tcp_mtu_probing=1 │"
log " │ Persist echo 'net.ipv4.tcp_mtu_probing=1' │"
log " │ >> /etc/sysctl.d/99-mtu.conf │"
log " └──────────────────────────────────────────────────────────────────┘" ;;
bird-router|router)
log " ┌──────────────────────────────────────────────────────────────────┐"
log " │ Relevant fixes for this router │"
log " ├──────────────────────────────────────────────────────────────────┤"
log " │ MSS clamp iptables -t mangle -A FORWARD │"
log " │ -p tcp --tcp-flags SYN,RST SYN │"
log " │ -j TCPMSS --clamp-mss-to-pmtu │"
log " │ PMTUD on sysctl -w net.ipv4.tcp_mtu_probing=1 │"
log " │ Persist echo 'net.ipv4.tcp_mtu_probing=1' │"
log " │ >> /etc/sysctl.d/99-mtu.conf │"
log " └──────────────────────────────────────────────────────────────────┘" ;;
*)
log " ┌──────────────────────────────────────────────────────────────────┐"
log " │ General fixes (apply manually — this script makes NO changes) │"
log " ├──────────────────────────────────────────────────────────────────┤"
log " │ WireGuard ip link set wg0 mtu 1420 │"
log " │ PPPoE ip link set ppp0 mtu 1492 │"
log " │ VXLAN/GRE ip link set vxlan0 mtu 1450 │"
log " │ MSS clamp iptables -t mangle -A FORWARD │"
log " │ -p tcp --tcp-flags SYN,RST SYN │"
log " │ -j TCPMSS --clamp-mss-to-pmtu │"
log " │ PMTUD on sysctl -w net.ipv4.tcp_mtu_probing=1 │"
log " │ Persist echo 'net.ipv4.tcp_mtu_probing=1' │"
log " │ >> /etc/sysctl.d/99-mtu.conf │"
log " └──────────────────────────────────────────────────────────────────┘" ;;
esac
log ""
log " Full log: ${BLD}${LOG_FILE}${RST}"
}
# ── Shareable summary report ──────────────────────────────────────────────────
print_report() {
local ts; ts=$(date '+%Y-%m-%d %H:%M')
local hostname; hostname=$(hostname -f 2>/dev/null || hostname)
# Overall verdict string
local overall_verdict overall_col
if [[ $ISSUES_FOUND -eq 0 ]]; then
overall_verdict="PASS — no issues detected"
overall_col="$GRN"
elif [[ $ISSUES_FOUND -le 3 ]]; then
overall_verdict="WARN — ${ISSUES_FOUND} issue(s) found"
overall_col="$YEL"
else
overall_verdict="FAIL — ${ISSUES_FOUND} issue(s) found"
overall_col="$RED"
fi
# Interface counter summary
local iface_status
if [[ "${RPT_RX_ERRS:-0}" -eq 0 && "${RPT_RX_DROP:-0}" -eq 0 && \
"${RPT_TX_ERRS:-0}" -eq 0 && "${RPT_TX_DROP:-0}" -eq 0 ]]; then
iface_status="${GRN}clean${RST}"
else
iface_status="${RED}errors/drops present${RST}"
fi
# Load test counter summary
local load_status
if [[ "${RPT_LOAD_ERRS:-0}" -eq 0 && "${RPT_LOAD_DROPS:-0}" -eq 0 ]]; then
load_status="${GRN}clean${RST}"
else
load_status="${RED}new errors/drops under load${RST}"
fi
# Path MTU verdict
local path_mtu_str="${RPT_EXACT_MTU:-unknown}"
local path_mtu_col="$GRN"
if [[ -n "$RPT_EXACT_MTU" && $EXPECTED_MTU -gt 0 ]]; then
local diff=$(( RPT_EXACT_MTU - EXPECTED_MTU ))
[[ ${diff#-} -gt 10 ]] && path_mtu_col="$YEL"
elif [[ -n "$RPT_EXACT_MTU" && "${RPT_EXACT_MTU}" -lt 1490 && $EXPECTED_MTU -eq 0 ]]; then
path_mtu_col="$YEL"
fi
# Clamping
local clamp_str
case "${RPT_CLAMPING:-none}" in
present) clamp_str="${GRN}present${RST}" ;;
none)
case "$ROLE" in
wg-router|wg-bird-router) clamp_str="${RED}MISSING${RST}" ;;
vps|wg-client) clamp_str="${CYN}not needed (VPS)${RST}" ;;
*) clamp_str="${YEL}not found${RST}" ;;
esac ;;
*) clamp_str="${YEL}unknown${RST}" ;;
esac
echo ""
echo -e "${BLD}${CYN}╔══════════════════════════════════════════════════════════════════╗${RST}"
echo -e "${BLD}${CYN}║ HorizonBench — MTU Test Report ║${RST}"
echo -e "${BLD}${CYN}╚══════════════════════════════════════════════════════════════════╝${RST}"
echo ""
printf " %-20s %s\n" "Host:" "$hostname"
printf " %-20s %s\n" "Date:" "$ts"
printf " %-20s %s\n" "Role:" "$ROLE"
printf " %-20s %s\n" "Interface:" "${IFACE} (iface MTU ${RPT_IFACE_MTU:-?})"
printf " %-20s %s\n" "Test target:" "$TARGET"
[[ $EXPECTED_MTU -gt 0 ]] && \
printf " %-20s %s\n" "Expected MTU:" "$EXPECTED_MTU"
echo ""
echo -e " ${BLD}── Path MTU ──────────────────────────────────────────────────────${RST}"
printf " %-20s %b\n" "Step-down probe:" "${path_mtu_col}${RPT_PATH_MTU:-unknown} bytes${RST} (closest lower step in probe list)"
printf " %-20s %b\n" "Exact (bisect):" "${path_mtu_col}${path_mtu_str} bytes${RST}"
[[ $EXPECTED_MTU -gt 0 ]] && \
printf " %-20s %b\n" "vs expected:" "${path_mtu_col}${EXPECTED_MTU} bytes${RST}"
echo ""
echo -e " ${BLD}── TCP ──────────────────────────────────────────────────────────${RST}"
printf " %-20s %s\n" "Dominant MSS:" "${RPT_DOMINANT_MSS:-unknown}"
printf " %-20s %b\n" "MSS clamping:" "$clamp_str"
printf " %-20s %s\n" "PMTUD probing:" "${RPT_PMTUD_PROBE:-0} (tcp_mtu_probing sysctl)"
echo ""
echo -e " ${BLD}── Interface counters ───────────────────────────────────────────${RST}"
printf " %-20s %b\n" "Idle counters:" "$iface_status"
printf " %-20s %b\n" "Under load:" "$load_status"
echo ""
echo -e " ${BLD}── Load test ────────────────────────────────────────────────────${RST}"
if [[ -n "$RPT_IPERF_SERVER" ]]; then
printf " %-20s %s\n" "iperf3 server:" "$RPT_IPERF_SERVER"
printf " %-20s %s\n" "Throughput:" "${RPT_IPERF_THROUGHPUT:-unknown}"
printf " %-20s %s\n" "Retransmits:" "${RPT_IPERF_VERDICT:-unknown}"
else
printf " %-20s %s\n" "Load test:" "flood-ping fallback (iperf3 not installed)"
fi
[[ ${#WG_IFACES[@]} -gt 0 ]] && {
echo ""
echo -e " ${BLD}── WireGuard ─────────────────────────────────────────────────────${RST}"
printf " %-20s %s\n" "Interfaces:" "${WG_IFACES[*]}"
printf " %-20s %s\n" "Tunnel MTU:" "${WG_MTU:-unknown}"
printf " %-20s %s\n" "Peers:" "${WG_PEERS:-0}"
[[ ${#WG_PUBSUBNETS[@]} -gt 0 ]] && \
printf " %-20s %s\n" "Public subnets:" "${WG_PUBSUBNETS[*]}"
}
[[ $BIRD_RUNNING -eq 1 ]] && {
echo ""
echo -e " ${BLD}── BIRD ──────────────────────────────────────────────────────────${RST}"
printf " %-20s %s\n" "Daemon:" "running"
[[ ${#BIRD_PROTOCOLS[@]} -gt 0 ]] && \
printf " %-20s %s\n" "BGP sessions:" "${BIRD_PROTOCOLS[*]}"
}
echo ""
echo -e " ${BLD}── Overall ──────────────────────────────────────────────────────${RST}"
echo -e " ${overall_col}${BLD} ${overall_verdict}${RST}"
echo ""
echo -e " Full log: ${BLD}${LOG_FILE}${RST}"
echo -e "${BLD}${CYN}══════════════════════════════════════════════════════════════════${RST}"
echo ""
# Also write plain-text version to log (no colour codes)
{
echo ""
echo "════════════════════════════════════════════════════════════════════"
echo " HorizonBench — MTU Test Report"
echo "════════════════════════════════════════════════════════════════════"
echo " Host : $hostname"
echo " Date : $ts"
echo " Role : $ROLE"
echo " Interface : ${IFACE} (iface MTU ${RPT_IFACE_MTU:-?})"
echo " Test target : $TARGET"
[[ $EXPECTED_MTU -gt 0 ]] && echo " Expected MTU : $EXPECTED_MTU"
echo ""
echo " Path MTU"
echo " Step-down : ${RPT_PATH_MTU:-unknown} bytes"
echo " Exact (bisect) : ${path_mtu_str} bytes"
[[ $EXPECTED_MTU -gt 0 ]] && echo " vs expected : ${EXPECTED_MTU} bytes"
echo ""
echo " TCP"
echo " Dominant MSS : ${RPT_DOMINANT_MSS:-unknown}"
echo " MSS clamping : ${RPT_CLAMPING:-none}"
echo " PMTUD probing : ${RPT_PMTUD_PROBE:-0}"
echo ""
echo " Interface counters"
echo " Idle : RX errs=${RPT_RX_ERRS:-0} drops=${RPT_RX_DROP:-0} TX errs=${RPT_TX_ERRS:-0} drops=${RPT_TX_DROP:-0}"
echo " Under load : new errs=${RPT_LOAD_ERRS:-0} drops=${RPT_LOAD_DROPS:-0}"
echo ""
echo " Load test"
if [[ -n "$RPT_IPERF_SERVER" ]]; then
echo " Server : $RPT_IPERF_SERVER"
echo " Throughput : ${RPT_IPERF_THROUGHPUT:-unknown}"
echo " Retransmits : ${RPT_IPERF_VERDICT:-unknown}"
else
echo " Method : flood-ping fallback"
fi
[[ ${#WG_IFACES[@]} -gt 0 ]] && {
echo ""
echo " WireGuard"
echo " Interfaces : ${WG_IFACES[*]}"
echo " Tunnel MTU : ${WG_MTU:-unknown}"
echo " Peers : ${WG_PEERS:-0}"
[[ ${#WG_PUBSUBNETS[@]} -gt 0 ]] && echo " Public subnets : ${WG_PUBSUBNETS[*]}"
}
[[ $BIRD_RUNNING -eq 1 ]] && {
echo ""
echo " BIRD"
echo " Daemon : running"
[[ ${#BIRD_PROTOCOLS[@]} -gt 0 ]] && echo " BGP sessions : ${BIRD_PROTOCOLS[*]}"
}
echo ""
echo " Overall : $( [[ $ISSUES_FOUND -eq 0 ]] && echo "PASS" || echo "WARN/FAIL" )${ISSUES_FOUND} issue(s)"
echo "════════════════════════════════════════════════════════════════════"
} >> "$LOG_FILE"
}
# ── Main ──────────────────────────────────────────────────────────────────────
main() {
echo -e "${BLD}${CYN}"
echo -e " ╔═══════════════════════════════════════════════╗"
echo -e " ║ MTU DIAGNOSTIC TOOL v2.6 ║"
echo -e " ║ READ-ONLY — no system changes are made ║"
echo -e " ╚═══════════════════════════════════════════════╝${RST}"
echo ""
echo -e " ${CYN}Started : $(date)${RST}"
echo ""
{
echo "MTU Diagnostic Tool v2.5"
echo "READ-ONLY — no system changes made"
echo "Started : $(date)"
echo "---"
} > "$LOG_FILE"
require_root
check_deps
detect_role
detect_iface
section_interface
section_ping_mtu
section_pmtu_bisect
section_tcp_mss
section_iface_errors
select_iperf3_server
section_load_test
section_tc
section_summary
sect "Complete"
if [[ $ISSUES_FOUND -eq 0 ]]; then pass "All checks passed — no MTU issues detected"
elif [[ $ISSUES_FOUND -le 3 ]]; then warn "${ISSUES_FOUND} issue(s) found — review WARNs/FAILs above"
else fail "${ISSUES_FOUND} issue(s) found — MTU configuration needs attention"
fi
print_report
}
main "$@"