QCOM Android Q平台,现场adb不可用,TP不能用,Kernel 4.x。
热键进入ramdump mode,导出RAM后check kernel log,发现如下eMMC错误:
[41534.077689] mmc0: Reset 0x4 never completed. [41534.077715] mmc0: sdhci: ============ SDHCI REGISTER DUMP =========== [41534.077722] mmc0: sdhci: Sys addr: 0x00000000 | Version: 0x00007202 [41534.077728] mmc0: sdhci: Blk size: 0x00000200 | Blk cnt: 0x00000001 [41534.077733] mmc0: sdhci: Argument: 0x002c0046 | Trn mode: 0x00000013 ... [41534.077814] ----------- VENDOR REGISTER DUMP ----------- [41534.077816] ---- Debug RAM dump ---- [41534.077823] cmdq-host: Debug RAM wrap-around: 0x0000ff80 | Debug RAM overlap: 0x00000596 [41534.077828] cmdq-host: Debug RAM dump [0]: 0x0000014d [41534.077834] cmdq-host: Debug RAM dump [1]: 0x0000006c
ok,上code:
while (1 ) { bool timedout = ktime_after(ktime_get(), timeout); if (!(sdhci_readb(host, SDHCI_SOFTWARE_RESET) & mask)) break ; if (timedout) { pr_err("%s: Reset 0x%x never completed.\n" , mmc_hostname(host->mmc), (int )mask); MMC_TRACE(host->mmc, "%s: Reset 0x%x never completed\n" , __func__, (int )mask); if ((host->quirks2 & SDHCI_QUIRK2_USE_RESET_WORKAROUND) && host->ops->reset_workaround) { if (!host->reset_wa_applied) { host->ops->reset_workaround(host, 1 ); host->reset_wa_applied = 1 ; host->reset_wa_cnt++; goto retry_reset; } else { pr_err("%s: Reset 0x%x failed with workaround\n" , mmc_hostname(host->mmc), (int )mask); host->ops->reset_workaround(host, 0 ); host->reset_wa_applied = 0 ; } } sdhci_dumpregs(host); return ; } udelay(10 ); }
void sdhci_dumpregs (struct sdhci_host *host) { MMC_TRACE(host->mmc, "%s: 0x04=0x%08x 0x06=0x%08x 0x0E=0x%08x 0x30=0x%08x 0x34=0x%08x 0x38=0x%08x\n" , __func__, sdhci_readw(host, SDHCI_BLOCK_SIZE), sdhci_readw(host, SDHCI_BLOCK_COUNT), sdhci_readw(host, SDHCI_COMMAND), sdhci_readl(host, SDHCI_INT_STATUS), sdhci_readl(host, SDHCI_INT_ENABLE), sdhci_readl(host, SDHCI_SIGNAL_ENABLE)); mmc_stop_tracing(host->mmc); SDHCI_DUMP("============ SDHCI REGISTER DUMP ===========\n" );
so,是timedout
导致的这个问题,我们追下why timeout。
static inline int ktime_compare (const ktime_t cmp1, const ktime_t cmp2) { if (cmp1 < cmp2) return -1 ; if (cmp1 > cmp2) return 1 ; return 0 ; } static inline bool ktime_after (const ktime_t cmp1, const ktime_t cmp2) { return ktime_compare(cmp1, cmp2) > 0 ; }
ok, 说明 cmp1
>cmp2
。也就是ktime_get()
> timeout
。把前面的code check下:
void sdhci_reset (struct sdhci_host *host, u8 mask) { ktime_t timeout; retry_reset: sdhci_writeb(host, mask, SDHCI_SOFTWARE_RESET); if (mask & SDHCI_RESET_ALL) { host->clock = 0 ; if (host->quirks2 & SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON) sdhci_runtime_pm_bus_off(host); } timeout = ktime_add_ms(ktime_get(), 100 ); if (host->ops->check_power_status && host->pwr && (mask & SDHCI_RESET_ALL)) host->ops->check_power_status(host, REQ_BUS_OFF); sdhci_writel(host, sdhci_readl(host, SDHCI_INT_STATUS), SDHCI_INT_STATUS);
static inline ktime_t ktime_add_ms (const ktime_t kt, const u64 msec) { return ktime_add_ns(kt, msec * NSEC_PER_MSEC); } #define ktime_add_ns(kt, nsval) ((kt) + (nsval))
timeout
也就是ktime_get()
+ 100ms,也就是说过了100ms,SDHCI_SOFTWARE_RESET还没完成,而且也没有走workaround。
为啥会这样?一开始以为是个体问题,后来发现有app在ms级操作eMMC,就是他了:) although not confirm. rt?
版权声明: 本站所有文章均采用 CC BY-NC-SA 4.0 CN 许可协议。转载请注明原文链接!