最近在从msm_A/kernel3.18 port自定义mmc driver到msm_B/kernel4.9时,出现了kernel crash,log如下:

[   18.739259] Unable to handle kernel paging request at virtual address ffffffc08b8c4f40
[   18.743483] pgd = ffffffc0a6b6e000
[   18.751370] [ffffffc08b8c4f40] *pgd=0000000000000000, *pud=0000000000000000
...
[   18.829858] PC is at __inval_cache_range+0x24/0x60
[   18.835663] LR is at __swiotlb_map_sg_attrs+0x9c/0xc8
...
[   19.133929] [<ffffff8088ea3124>] __inval_cache_range+0x24/0x60
[   19.138620] [<ffffff808999b3d4>] sdhci_pre_dma_transfer.isra.25+0xa4/0x104
[   19.144523] [<ffffff808999dd88>] sdhci_send_command+0x8b0/0x1224
[   19.151379] [<ffffff80899a0924>] sdhci_request+0x14c/0x378
[   19.157542] [<ffffff80899750d0>] __mmc_start_request+0x78/0x1b4
[   19.162836] [<ffffff8089977ca4>] mmc_start_request+0x15c/0x2d4
[   19.168652] [<ffffff8089978348>] __mmc_start_req+0x94/0xf4
[   19.174553] [<ffffff80899794dc>] mmc_wait_for_req+0x30/0x5c
[   19.180024] [<ffffff8089b56f0c>] custom_transfer.constprop.7+0x1c8/0x404

custom_transfer定义如下:

int custom_transfer(char *buf, ...)
{
    sg_init_one(&sg, buf, buf_size);
    ...
    mmc_wait_for_req()
    ...
}

同样的代码在msm_A/kernel 3.18上是正常的,奇怪,先看下死机位置。

cache.S里会定义__inval_cache_range

/*
 *      __inval_cache_range(start, end)
 *      - start   - start address of region
 *      - end     - end address of region
 */
ENTRY(__inval_cache_range)
        dcache_line_size x2, x3
        sub     x3, x2, #1
        tst     x1, x3                          // end cache line aligned?
        bic     x1, x1, x3
        b.eq    1f  
        dc      civac, x1                       // clean & invalidate D / U line
1:      tst     x0, x3                          // start cache line aligned?
        bic     x0, x0, x3
        b.eq    2f  
        dc      civac, x0                       // clean & invalidate D / U line
        b       3f  
2:      dc      ivac, x0                        // invalidate D / U line
3:      add     x0, x0, x2
        cmp     x0, x1
        b.lo    2b  
        dsb     sy  
        ret 
ENDPIPROC(__inval_cache_range)

好家伙,死在cache里。

看下临近的sdhci_pre_dma_transfer:

static int sdhci_pre_dma_transfer(struct sdhci_host *host,
                                  struct mmc_data *data, int cookie)
{
        int sg_count;

        /*   
         * If the data buffers are already mapped, return the previous
         * dma_map_sg() result.
         */
        if (data->host_cookie == COOKIE_PRE_MAPPED)
                return data->sg_count;

        sg_count = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
                                data->flags & MMC_DATA_WRITE ?
                                DMA_TO_DEVICE : DMA_FROM_DEVICE);

        if (sg_count == 0)
                return -ENOSPC;

        data->sg_count = sg_count;
        data->host_cookie = cookie;

        return sg_count;
}

dma_map_sg会call dma_map_sg_attrs

static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
                                   int nents, enum dma_data_direction dir,
                                   unsigned long attrs)
{
        const struct dma_map_ops *ops = get_dma_ops(dev);
        int i, ents;
        struct scatterlist *s; 

        for_each_sg(sg, s, nents, i)
                kmemcheck_mark_initialized(sg_virt(s), s->length);
        BUG_ON(!valid_dma_direction(dir));
        ents = ops->map_sg(dev, sg, nents, dir, attrs);
        BUG_ON(ents < 0); 
        debug_dma_map_sg(dev, sg, nents, ents, dir);

        return ents;
}

cache line, map... 怀疑buffer。

看下buffer是如何调用的:

static char cust_buf[COUNT][BUFF_SIZE];
custm_transfer(cust_buf[block]);

二维数组? 改成malloc,fk,好了,鸡冻啊。

从这个案例可以看出,禁止用全局大二维数据:]

btw: 话说这家corp code真是不咋地,要不是...,早...