Reading block_device from kernel (3.7) module: seg

2019-04-14 04:57发布

问题:

Hello stackoverflow.

Wondering what I got wrong trying to access a block-device directly from a kernel module. (Kernel 3.7 on amd64)

I get the (struct gendisk*) of the device using get_gendisk(). Next, create a bio with bio_map_kern(), add the block_device to it using bdget_disk(), and send it using submit_bio(). (see code below)

When doing this on 'sdb', it works fine. When doing it on 'loop0' or a ramdisk device, it fails with segfault. The fault boils down to generic_make_request_checks() calls inline-function bdev_get_queue(), which is trying to access the 'bd_disk' field in the block_device struct.

RIP: 0010:[] [] generic_make_request_checks+0x3e/0x2b1

When getting the block_device from 'sdb', the bd_disk is linked back to the device's gendisk struct (on any partition of it). When trying the same on the 'loop0' device, this pointer is zero. But loop0 is properly set up as I can mkfs, mount or dd with it.

Any hints on how to set up a simple data read? The clean and proper way? adding the gendisk pointer to the block_device cannot be a nice solution as I don't 'own' the structures.

Maybe this approach is all wrong and there is some easy read() function I missed... :-)

callstack (kernel 3.7, amd64):

[<ffffffff812796f5>] generic_make_request_checks+0x3e/0x2b1
[<ffffffff8103c2b8>] ? console_trylock+0xf/0x47
[<ffffffff8103dad0>] ? vprintk_emit+0x3aa/0x3d0
[<ffffffff81279976>] ? generic_make_request+0xe/0xd5
[<ffffffff8127a75c>] ? submit_bio+0x10a/0x13b
[<ffffffffa02ad191>] ? init_tryKM2+0x16e/0x221 [tryKM2]
[<ffffffffa02ad023>] ? endFunc_tryKM2+0x23/0x23 [tryKM2]
[<ffffffff810020b6>] ? do_one_initcall+0x75/0x12b
[<ffffffff8107c9a3>] ? sys_init_module+0x105/0x251
[<ffffffff8145f1e9>] ? system_call_fastpath+0x16/0x1b

My test code for trying this:

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/genhd.h>
#include <linux/bio.h>
#include <linux/completion.h>

typedef struct _drvDat
{
    dev_t di;
    struct gendisk *gd;
} drvDat;

#define printf(...) printk(KERN_ALERT "tryKM2: " __VA_ARGS__)
#define DEVNAME "loop0"
#define PARTITIONNO 0

static int init_tryKM2(void)
{
  int dummy;
    // init self
    memset(&self,0,sizeof(self));

    self.di = blk_lookup_devt(DEVNAME,0);    // use partition zero here. gendisk handle is same anyway.
    if(self.di==0)
        return -1;
    self.gd = get_gendisk(self.di,&dummy);        // get_gendisk always gets the 'disk', even if dev_t points to a partition.
    if(self.gd==0)
        return -1;

    // check if have partitions  (this array always has at least one entry. That is same as &(self.gd->part0)
    if(self.gd->part_tbl->len<1+PARTITIONNO)
        {put_disk(self.gd);self.gd=0;return -1;}

    readbytes_tryKM2(&self);

    return 0;
}

static void endFunc_tryKM2(struct bio *bb, int err)
{
    if(bb->bi_private)
        complete( (struct completion*)(bb->bi_private) );
}

static void readbytes_tryKM2(drvDat *self)
{
  struct bio *bb;
  DECLARE_COMPLETION_ONSTACK(waithandle);
  unsigned char *buf;
  unsigned int i,j;

    printf("readbytes_tryKM2\n");
    buf = (unsigned char*)vmalloc(0x800);
    memset( buf , 0xFE , 0x800 );


    bb = bio_map_kern( self->gd->queue , buf , 0x400 , GFP_KERNEL );
    if( IS_ERR(bb) )
        {vfree(buf);return;}

    bb->bi_sector = 0;

    bb->bi_bdev = bdget_disk(self->gd,PARTITIONNO);
    printf("   bi_bdev = %016lX\n",(unsigned long)(bb->bi_bdev));
    printf("   bi_bdev->bd_disk = %016lX\n",(unsigned long)(bb->bi_bdev->bd_disk));

    bb->bi_end_io = endFunc_tryKM2;
    bb->bi_private = &waithandle;

    printf("  send...\n");
    submit_bio( 0 , bb );
    printf("  wait...\n");
    wait_for_completion(&waithandle);

    printf("  done. flags=0x%X\n",(unsigned int)(bb->bi_flags));

    if(!( bb->bi_flags & (1<<BIO_UPTODATE) ))
        {bio_put(bb);vfree(buf);return;}

    // just dump data... (misusing upper half of buffer)
    for(i=0;i<0x400;i+=0x20)
    {
        for(j=0;j<0x20;j++)
            snprintf( (char*)(buf+0x400+3*j) , 8 , "%02X " , (unsigned int)buf[i+j] );
        buf[3*0x20-1]=0;
        printf("   %s\n",buf+0x400);
    }

    bio_put(bb);

    vfree(buf);
}

module_init(init_tryKM2);
module_exit(cleanup_tryKM2);

回答1:

Instead of bb->bi_bdev = bdget_disk(self->gd,PARTITIONNO); Try this bb->bi_bdev = blkdev_get_by_dev(self->di, FMODE_READ|FMODE_WRITE|FMODE_EXCL, NULL);



回答2:

I found that I would first lookup the bdev by its path:

struct block_device *bdev;
bdev = lookup_bdev("/dev/loop0");

And then open it:

bb->bi_bdev = blkdev_get_by_dev(bdev->bd_dev, FMODE_READ|FMODE_WRITE, NULL);

OR

bb->bi_bdev = blkdev_get_by_path("/dev/loop0", FMODE_READ|FMODE_WRITE, NULL);

Without FMODE_EXCL. But when you are done, you need to close the device:

blkdev_put(bb->bi_bdev, FMODE_READ|FMODE_WRITE);