Modifying control register in kernel module

2019-09-17 06:41发布

The following simple kernel module sets the 13th bit of the cr4 register (CR4.VMXE) once it is loaded and clears the bit on exit.

vmx.c

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>

MODULE_LICENSE("GPL");

static inline uint64_t getcr4(void) {
    register uint64_t ret = 0;

    asm volatile (
        "movq %%cr4, %0\n"
        :"=r"(ret)
    );

    return ret;
}

static inline void setcr4(register uint64_t val) {
    asm volatile (
        "movq %0, %%cr4\n"
        :
        :"r"(val)
    );
}

static int __init init_routine(void) {
    uint64_t cr4 = getcr4();

    printk(KERN_INFO "VTX Test loaded: %llu (%u).\n", cr4, (unsigned char)((cr4 >> 13) & 1));
    cr4 |= (1 << 13);
    setcr4(cr4);
    cr4 = getcr4();
    printk(KERN_INFO "cr4: %llu (%u).\n", cr4, (unsigned char)((cr4 >> 13) & 1));

    return 0;
}

static void __exit exit_routine(void) {
    uint64_t cr4 = getcr4();

    printk(KERN_INFO "cr4: %llu (%u).\n", cr4, (unsigned char)((cr4 >> 13) & 1));
    cr4 &= ~(1 << 13);
    setcr4(cr4);
    cr4 = getcr4();
    printk(KERN_INFO "VTX Test exited: %llu (%u).\n", cr4, (unsigned char)((cr4 >> 13) & 1));
}

module_init(init_routine);
module_exit(exit_routine);

Makefile

obj-m += vmx.o

all:
    make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules

clean:
    make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean

To run the module I use make clean && make && sudo insmod vmx.ko && sudo rmmod vmx && sudo dmesg -c. This sometimes gives me the following (expected) output

[ 2295.121537] VTX Test loaded: 1312736 (0).
[ 2295.121540] cr4: 1320928 (1).
[ 2295.123975] cr4: 1320928 (1).
[ 2295.123977] VTX Test exited: 1312736 (0).

And sometimes also the following:

[ 2296.256982] VTX Test loaded: 1320928 (1).
[ 2296.256984] cr4: 1320928 (1).
[ 2296.259481] cr4: 1312736 (0).
[ 2296.259483] VTX Test exited: 1312736 (0).

The second and third line in the secound output seem strange to me, because it seems like the modified control register cr4 has been reset after leaving init_routine. Additionally it is strange that in the first line the VMXE bit seems to be set, which doesn't really make any sense. Is this behavior normal? How can it be explained? Could there be another kernel module running which modifies CR4? This seems rather strange because I've seen several VTX implementations and they all set the VMXE bit in their initialization routine and clear the bit in their exit routine in the same fashion as in this module.

2条回答
叛逆
2楼-- · 2019-09-17 07:08

As it turns out, the problem is that the register is not modified on all CPU cores. To ensure that the modifications happen on all cores it seems to be enough to invoke on_each_cpu. Fixed code below, Makefile unchanged.

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>

MODULE_LICENSE("GPL");

static inline uint64_t getcr4(void) {
    register uint64_t ret = 0;

    asm volatile (
        "movq %%cr4, %0\n"
        :"=r"(ret)
    );

    return ret;
}

static inline void setcr4(register uint64_t val) {
    asm volatile (
        "movq %0, %%cr4\n"
        :
        :"r"(val)
    );
}

static void setvmxe(void* info) {
    uint64_t cr4 = getcr4();

    cr4 |= (1 << 13);
    setcr4(cr4);
}

static void clearvmxe(void* info) {
    uint64_t cr4 = getcr4();

    cr4 &= ~(1 << 13);
    setcr4(cr4);
}

static int __init init_routine(void) {
    uint64_t cr4 = getcr4();

    printk(KERN_INFO "VTX Test loaded: %llu (%u).\n", cr4, (unsigned char)((cr4 >> 13) & 1));
    on_each_cpu(setvmxe, NULL, 0);
    cr4 = getcr4();
    printk(KERN_INFO "cr4: %llu (%u).\n", cr4, (unsigned char)((cr4 >> 13) & 1));

    return 0;
}

static void __exit exit_routine(void) {
    uint64_t cr4 = getcr4();

    printk(KERN_INFO "cr4: %llu (%u).\n", cr4, (unsigned char)((cr4 >> 13) & 1));
    on_each_cpu(clearvmxe, NULL, 0);
    cr4 = getcr4();
    printk(KERN_INFO "VTX Test exited: %llu (%u).\n", cr4, (unsigned char)((cr4 >> 13) & 1));
}

module_init(init_routine);
module_exit(exit_routine);
查看更多
Ridiculous、
3楼-- · 2019-09-17 07:25

You probably have the kvm module (and the associated kvm-intel or kvm-amd) kernel modules loaded. These modules already manage the processor's VT state, and are likely to be very confused if you start modifying it elsewhere.

Take a look at the arch/x86/kvm directory in the kernel source to get a sense of what already exists.

查看更多
登录 后发表回答