最近在从msm_A/kernel3.18 port自定义mmc driver到msm_B/kernel4.9时,出现了kernel crash,log如下:

[   18.739259] Unable to handle kernel paging request at virtual address ffffffc08b8c4f40
[ 18.743483] pgd = ffffffc0a6b6e000
[ 18.751370] [ffffffc08b8c4f40] *pgd=0000000000000000, *pud=0000000000000000
...
[ 18.829858] PC is at __inval_cache_range+0x24/0x60
[ 18.835663] LR is at __swiotlb_map_sg_attrs+0x9c/0xc8
...
[ 19.133929] [<ffffff8088ea3124>] __inval_cache_range+0x24/0x60
[ 19.138620] [<ffffff808999b3d4>] sdhci_pre_dma_transfer.isra.25+0xa4/0x104
[ 19.144523] [<ffffff808999dd88>] sdhci_send_command+0x8b0/0x1224
[ 19.151379] [<ffffff80899a0924>] sdhci_request+0x14c/0x378
[ 19.157542] [<ffffff80899750d0>] __mmc_start_request+0x78/0x1b4
[ 19.162836] [<ffffff8089977ca4>] mmc_start_request+0x15c/0x2d4
[ 19.168652] [<ffffff8089978348>] __mmc_start_req+0x94/0xf4
[ 19.174553] [<ffffff80899794dc>] mmc_wait_for_req+0x30/0x5c
[ 19.180024] [<ffffff8089b56f0c>] custom_transfer.constprop.7+0x1c8/0x404

custom_transfer定义如下:

int custom_transfer(char *buf, ...)
{
sg_init_one(&sg, buf, buf_size);
...
mmc_wait_for_req()
...
}

同样的代码在msm_A/kernel 3.18上是正常的,奇怪,先看下死机位置。

cache.S里会定义__inval_cache_range

/*
* __inval_cache_range(start, end)
* - start - start address of region
* - end - end address of region
*/
ENTRY(__inval_cache_range)
dcache_line_size x2, x3
sub x3, x2, #1
tst x1, x3 // end cache line aligned?
bic x1, x1, x3
b.eq 1f
dc civac, x1 // clean & invalidate D / U line
1: tst x0, x3 // start cache line aligned?
bic x0, x0, x3
b.eq 2f
dc civac, x0 // clean & invalidate D / U line
b 3f
2: dc ivac, x0 // invalidate D / U line
3: add x0, x0, x2
cmp x0, x1
b.lo 2b
dsb sy
ret
ENDPIPROC(__inval_cache_range)

好家伙,死在cache里。

看下临近的sdhci_pre_dma_transfer:

static int sdhci_pre_dma_transfer(struct sdhci_host *host,
struct mmc_data *data, int cookie)
{
int sg_count;

/*
* If the data buffers are already mapped, return the previous
* dma_map_sg() result.
*/
if (data->host_cookie == COOKIE_PRE_MAPPED)
return data->sg_count;

sg_count = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
data->flags & MMC_DATA_WRITE ?
DMA_TO_DEVICE : DMA_FROM_DEVICE);

if (sg_count == 0)
return -ENOSPC;

data->sg_count = sg_count;
data->host_cookie = cookie;

return sg_count;
}

dma_map_sg会call dma_map_sg_attrs

static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir,
unsigned long attrs)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
int i, ents;
struct scatterlist *s;

for_each_sg(sg, s, nents, i)
kmemcheck_mark_initialized(sg_virt(s), s->length);
BUG_ON(!valid_dma_direction(dir));
ents = ops->map_sg(dev, sg, nents, dir, attrs);
BUG_ON(ents < 0);
debug_dma_map_sg(dev, sg, nents, ents, dir);

return ents;
}

cache line, map… 怀疑buffer。

看下buffer是如何调用的:

static char cust_buf[COUNT][BUFF_SIZE];
custm_transfer(cust_buf[block]);

二维数组? 改成malloc,fk,好了,鸡冻啊。

从这个案例可以看出,禁止用全局大二维数据:]

btw: 话说这家corp code真是不咋地,要不是…,早…