eMMC CMDQ 超时分析
先看HW架构
CQE: Command Queueing Engine,负责管理software和eMMC device的data传输。
首先CQE接收来自SW的task(via TDL和doorbell),接下来的command flows主要有三步:
- Step1. Queuing a Transaction(CMD44+CMD45)
CQE发出CMD44/CMD45给eMMC用来queue a (data xfer) task, 当device有个R1响应就表示这个task已经queued in the device。可以通过读CQDPT寄存器来判断, CQE接收到R1响应就会置位,任务完成clear the bit。
- Step2. Checking the Queue Status(CMD13)
CQE发出CMD13读QSR(Queue Status Register)来决定执行哪个task, device会反应个R1, 这个R1就是32bit value,每个bit对应一个task,如果bit=0,那这个task没有ready for execution,bit=1就是ready了。
- Step3. Execution of a Queued Task(CMD46/CMD47)
CQE发出CMD46(读) or CMD47(写)给那些在QSR里已经"ready for execution"的tasks。
Linux代码分析
看下超时相关代码,msm kernel 4.14
mmc0: request with tag: 25 flags: 0x103001 timed out
find the code:
enum blk_eh_timer_return mmc_cmdq_rq_timed_out(struct request *req)
{
struct mmc_queue *mq = req->q->queuedata;
pr_err("%s: request with tag: %d flags: 0x%x timed out\n",
mmc_hostname(mq->card->host), req->tag, req->cmd_flags);
return mq->cmdq_req_timed_out(req);
}
int mmc_cmdq_init(struct mmc_queue *mq, struct mmc_card *card)
{
...
blk_queue_rq_timed_out(mq->queue, mmc_cmdq_rq_timed_out); //tj: here
blk_queue_rq_timeout(mq->queue, 120 * HZ); //tj: 120s
card->cmdq_init = true;
return ret;
}
block/blk-setting.c:
void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
{
q->rq_timeout = timeout;
}
void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)
{
WARN_ON_ONCE(q->mq_ops);
q->rq_timed_out_fn = fn;
}
看下who calls ->rq_timed_out_fn
:
static void blk_rq_timed_out(struct request *req)
{
struct request_queue *q = req->q;
enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
if (q->rq_timed_out_fn)
ret = q->rq_timed_out_fn(req); //tj: here
switch (ret) {
case BLK_EH_HANDLED:
__blk_complete_request(req); //tj: here
break;
case BLK_EH_RESET_TIMER:
blk_add_timer(req);
blk_clear_rq_complete(req);
break;
case BLK_EH_NOT_HANDLED:
/*
* LLD handles this for now but in the future
* we can send a request msg to abort the command
* and we can move more of the generic scsi eh code to
* the blk layer.
*/
break;
default:
printk(KERN_ERR "block: bad eh return: %d\n", ret);
break;
}
}
shoud be from blk_rq_check_expired()
:
static void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout,
unsigned int *next_set)
{
if (time_after_eq(jiffies, rq->deadline)) {
list_del_init(&rq->timeout_list);
/*
* Check if we raced with end io completion
*/
if (!blk_mark_rq_complete(rq))
blk_rq_timed_out(rq); //here
}
这里的rq->deadline
是120s,这个request超过了120s? 这就是异常了。
void blk_timeout_work(struct work_struct *work)
{
struct request_queue *q =
container_of(work, struct request_queue, timeout_work);
unsigned long flags, next = 0;
struct request *rq, *tmp;
int next_set = 0;
spin_lock_irqsave(q->queue_lock, flags);
list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list)
blk_rq_check_expired(rq, &next, &next_set);
INIT_WORK(&q->timeout_work, blk_timeout_work);
static void blk_rq_timed_out_timer(unsigned long data)
{
struct request_queue *q = (struct request_queue *)data;
kblockd_schedule_work(&q->timeout_work);
}
struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
{
...
setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
...
}
who trigger this timer? should be blk_add_timer()
when start request:
/**
* blk_add_timer - Start timeout timer for a single request
* @req: request that is about to start running.
*
* Notes:
* Each request has its own timer, and as it is added to the queue, we
* set up the timer. When the request completes, we cancel the timer.
*/
void blk_add_timer(struct request *req)
{
struct request_queue *q = req->q;
unsigned long expiry;
if (!q->mq_ops)
lockdep_assert_held(q->queue_lock);
/* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
if (!q->mq_ops && !q->rq_timed_out_fn)
return;
BUG_ON(!list_empty(&req->timeout_list));
/*
* Some LLDs, like scsi, peek at the timeout to prevent a
* command from being retried forever.
*/
if (!req->timeout)
req->timeout = q->rq_timeout;
req->deadline = jiffies + req->timeout;
看到没有, req->timeout
就是120s,给了req->deadline
。跟下:
blk_queue_start_tag() -> blk_start_request() -> blk_add_timer()
ok, 回到mmc driver:
static bool mmc_check_blk_queue_start_tag(struct request_queue *q,
struct request *req)
{
int ret;
spin_lock_irq(q->queue_lock);
ret = blk_queue_start_tag(q, req); //tj:here
spin_unlock_irq(q->queue_lock);
return !!ret;
}
static inline void mmc_cmdq_ready_wait(struct mmc_host *host,
struct mmc_queue *mq)
{
struct mmc_cmdq_context_info *ctx = &host->cmdq_ctx;
struct request_queue *q = mq->queue;
/*
* Wait until all of the following conditions are true:
* 1. There is a request pending in the block layer queue
* to be processed.
* 2. If the peeked request is flush/discard then there shouldn't
* be any other direct command active.
* 3. cmdq state should be unhalted.
* 4. cmdq state shouldn't be in error state.
* 5. There is no outstanding RPMB request pending.
* 6. free tag available to process the new request.
* (This must be the last condtion to check)
*/
wait_event(ctx->wait, kthread_should_stop()
|| (mmc_peek_request(mq) &&
!(((req_op(mq->cmdq_req_peeked) == REQ_OP_FLUSH) ||
(req_op(mq->cmdq_req_peeked) == REQ_OP_DISCARD) ||
(req_op(mq->cmdq_req_peeked) == REQ_OP_SECURE_ERASE))
&& test_bit(CMDQ_STATE_DCMD_ACTIVE, &ctx->curr_state))
&& !(!host->card->part_curr && !mmc_card_suspended(host->card)
&& mmc_host_halt(host))
&& !(!host->card->part_curr && mmc_host_cq_disable(host) &&
!mmc_card_suspended(host->card))
&& !test_bit(CMDQ_STATE_ERR, &ctx->curr_state)
&& !atomic_read(&host->rpmb_req_pending)
&& !mmc_check_blk_queue_start_tag(q, mq->cmdq_req_peeked))); //tj: here
}
check上面注释的no.6.
static int mmc_cmdq_thread(void *d)
{
struct mmc_queue *mq = d;
struct mmc_card *card = mq->card;
struct mmc_host *host = card->host;
current->flags |= PF_MEMALLOC;
if (card->host->wakeup_on_idle)
set_wake_up_idle(true);
while (1) {
int ret = 0;
mmc_cmdq_ready_wait(host, mq);
if (kthread_should_stop())
break;
ret = mmc_cmdq_down_rwsem(host, mq->cmdq_req_peeked);
if (ret) {
mmc_cmdq_up_rwsem(host);
continue;
}
ret = mq->cmdq_issue_fn(mq, mq->cmdq_req_peeked);
mmc_cmdq_up_rwsem(host);
/*
* Don't requeue if issue_fn fails.
* Recovery will be come by completion softirq
* Also we end the request if there is a partition switch
* error, so we should not requeue the request here.
*/
} /* loop */
return 0;
}
int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
spinlock_t *lock, const char *subname, int area_type)
{
...
mq->thread = kthread_run(mmc_cmdq_thread, mq,
"mmc-cmdqd/%d%s", //tj:here
host->index,
subname ? subname : "");
if (IS_ERR(mq->thread)) {
pr_err("%s: %d: cmdq: failed to start mmc-cmdqd thread\n",
mmc_hostname(card->host), ret);
ret = PTR_ERR(mq->thread);
}
也就是说,cmdq状态ok后可以仍请求给HW CMDQ, rt? 在check cmdq状态时发现有tag超时了。
我们再来看下超时后的异常处理, 超时的entry: ->cmdq_req_timed_out
:
static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
...
{
...
if (card->cmdq_init) {
md->flags |= MMC_BLK_CMD_QUEUE;
md->queue.cmdq_complete_fn = mmc_blk_cmdq_complete_rq;
md->queue.cmdq_issue_fn = mmc_blk_cmdq_issue_rq;
md->queue.cmdq_error_fn = mmc_blk_cmdq_err;
md->queue.cmdq_req_timed_out = mmc_blk_cmdq_req_timed_out; //tj: here
md->queue.cmdq_shutdown = mmc_blk_cmdq_shutdown;
}
static enum blk_eh_timer_return mmc_blk_cmdq_req_timed_out(struct request *req)
{
struct mmc_queue *mq = req->q->queuedata;
struct mmc_host *host = mq->card->host;
struct mmc_queue_req *mq_rq = req->special;
struct mmc_request *mrq;
struct mmc_cmdq_req *cmdq_req;
struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;
BUG_ON(!host);
/*
* The mmc_queue_req will be present only if the request
* is issued to the LLD. The request could be fetched from
* block layer queue but could be waiting to be issued
* (for e.g. clock scaling is waiting for an empty cmdq queue)
* Reset the timer in such cases to give LLD more time
*/
if (!mq_rq) {
pr_warn("%s: restart timer for tag: %d\n", __func__, req->tag);
return BLK_EH_RESET_TIMER;
}
mrq = &mq_rq->cmdq_req.mrq;
cmdq_req = &mq_rq->cmdq_req;
BUG_ON(!mrq || !cmdq_req);
if (cmdq_req->cmdq_req_flags & DCMD)
mrq->cmd->error = -ETIMEDOUT;
else
mrq->data->error = -ETIMEDOUT;
host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]++;
if (mrq->cmd && mrq->cmd->error) {
if (!(mrq->req->cmd_flags & REQ_PREFLUSH)) {
/*
* Notify completion for non flush commands like
* discard that wait for DCMD finish.
*/
set_bit(CMDQ_STATE_REQ_TIMED_OUT,
&ctx_info->curr_state);
complete(&mrq->completion);
return BLK_EH_NOT_HANDLED;
}
}
if (test_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state) ||
test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state))
return BLK_EH_NOT_HANDLED;
set_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state);
return BLK_EH_HANDLED;
}
先是记录为超时-ETIMEDOUT
, 然后把curr_state
标记CMDQ_STATE_REQ_TIMED_OUT
。
static void blk_rq_timed_out(struct request *req)
{
struct request_queue *q = req->q;
enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
if (q->rq_timed_out_fn)
ret = q->rq_timed_out_fn(req); //tj: here
switch (ret) {
case BLK_EH_HANDLED:
__blk_complete_request(req); //tj: here
block/blk-softirq.c:
void __blk_complete_request(struct request *req)
{
...
if (ccpu == cpu || shared) {
struct list_head *list;
do_local:
list = this_cpu_ptr(&blk_cpu_done);
list_add_tail(&req->ipi_list, list);
/*
* if the list only contains our just added request,
* signal a raise of the softirq. If there are already
* entries there, someone already raised the irq but it
* hasn't run yet.
*/
if (list->next == &req->ipi_list)
raise_softirq_irqoff(BLOCK_SOFTIRQ); //tj: here
} else if (raise_blk_irq(ccpu, req))
goto do_local;
...
}
softirq有关,should be raise_softirq_irqoff()
, 我们看下mmc drv:
int mmc_cmdq_init(struct mmc_queue *mq, struct mmc_card *card)
{
...
blk_queue_softirq_done(mq->queue, mmc_cmdq_softirq_done);
INIT_WORK(&mq->cmdq_err_work, mmc_cmdq_error_work);
void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
{
q->softirq_done_fn = fn;
}
blk_done_softirq() -> mmc_cmdq_softirq_done (->softirq_done_fn)
static __init int blk_softirq_init(void)
{
int i;
for_each_possible_cpu(i)
INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
ok, timerout error handler最后mmc_cmdq_softirq_done()
:
static void mmc_cmdq_softirq_done(struct request *rq)
{
struct mmc_queue *mq = rq->q->queuedata;
mq->cmdq_complete_fn(rq); //tj: mmc_blk_cmdq_complete_rq(), check above
}
/* invoked by block layer in softirq context */
void mmc_blk_cmdq_complete_rq(struct request *rq)
{
...
//tj: 先取得error
if (mrq->cmd && mrq->cmd->error)
err = mrq->cmd->error;
else if (mrq->data && mrq->data->error)
err = mrq->data->error;
if (cmdq_req->resp_err)
err_resp = cmdq_req->resp_err;
//tj: 然后check err
if ((err || err_resp) && !cmdq_req->skip_err_handling) {
pr_err("%s: %s: txfr error(%d)/resp_err(%d)\n",
mmc_hostname(mrq->host), __func__, err,
err_resp);
if (test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state)) {
pr_err("%s: CQ in error state, ending current req: %d\n",
__func__, err);
} else {
set_bit(CMDQ_STATE_ERR, &ctx_info->curr_state);
BUG_ON(host->err_mrq != NULL);
host->err_mrq = mrq;
schedule_work(&mq->cmdq_err_work); //tj: here
}
goto out;
}
}
因为我们->curr_state
是CMDQ_STATE_REQ_TIMED_OUT
,check ->cmdq_err_work
:
static void mmc_cmdq_error_work(struct work_struct *work)
{
struct mmc_queue *mq = container_of(work, struct mmc_queue,
cmdq_err_work);
mq->cmdq_error_fn(mq);
}
/*
* mmc_blk_cmdq_err: error handling of cmdq error requests.
* Function should be called in context of error out request
* which has claim_host and rpm acquired.
* This may be called with CQ engine halted. Make sure to
* unhalt it after error recovery.
*
* TODO: Currently cmdq error handler does reset_all in case
* of any erorr. Need to optimize error handling.
*/
static void mmc_blk_cmdq_err(struct mmc_queue *mq)
{
...
pr_err("%s: %s Starting cmdq Error handler\n",
mmc_hostname(host), __func__);
q = mrq->req->q;
err = mmc_cmdq_halt(host, true); //tj: 先halt cmdq
if (err) {
pr_err("halt: failed: %d\n", err);
goto reset;
}
...
//tj: 当timeout时获取device status
/*
* TIMEOUT errrors can happen because of execution error
* in the last command. So send cmd 13 to get device status
*/
if ((mrq->cmd && (mrq->cmd->error == -ETIMEDOUT)) ||
(mrq->data && (mrq->data->error == -ETIMEDOUT))) {
if (mmc_host_halt(host) || mmc_host_cq_disable(host)) {
ret = get_card_status(host->card, &status, 0);
if (ret)
pr_err("%s: CMD13 failed with err %d\n",
mmc_hostname(host), ret);
}
pr_err("%s: Timeout error detected with device status 0x%08x\n",
mmc_hostname(host), status);
}
/*
* In case of software request time-out, we schedule err work only for
* the first error out request and handles all other request in flight
* here.
*/
if (test_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state)) {
err = -ETIMEDOUT;
} else if (mrq->data && mrq->data->error) {
err = mrq->data->error;
} else if (mrq->cmd && mrq->cmd->error) {
/* DCMD commands */
err = mrq->cmd->error;
}
reset:
mmc_blk_cmdq_reset_all(host, err); //tj: reset all
if (mrq->cmdq_req->resp_err)
mrq->cmdq_req->resp_err = false;
mmc_cmdq_halt(host, false); //tj: unhalt cmdq
host->err_mrq = NULL;
//tj: clear CMDQ_STATE_REQ_TIMED_OUT/CMDQ_STATE_ERR
clear_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state);
WARN_ON(!test_and_clear_bit(CMDQ_STATE_ERR, &ctx_info->curr_state));
来看mmc_blk_cmdq_reset_all()
:
/**
* mmc_blk_cmdq_reset_all - Reset everything for CMDQ block request.
* @host: mmc_host pointer.
* @err: error for which reset is performed.
*
* This function implements reset_all functionality for
* cmdq. It resets the controller, power cycle the card,
* and invalidate all busy tags(requeue all request back to
* elevator).
*/
static void mmc_blk_cmdq_reset_all(struct mmc_host *host, int err)
{
看注释是复位controller,掉电复位eMMC,还有就是清理busy tags。call stack:
mmc_blk_cmdq_reset(,false) -> mmc_cmdq_hw_reset()
/*
* mmc_cmdq_hw_reset: Helper API for doing
* reset_all of host and reinitializing card.
* This must be called with mmc_claim_host
* acquired by the caller.
*/
int mmc_cmdq_hw_reset(struct mmc_host *host)
{
if (!host->bus_ops->reset)
return -EOPNOTSUPP;
return host->bus_ops->reset(host); //tj: mmc_reset()
}
static int mmc_reset(struct mmc_host *host)
{
struct mmc_card *card = host->card;
int ret;
if ((host->caps & MMC_CAP_HW_RESET) && host->ops->hw_reset &&
mmc_can_reset(card)) {
mmc_host_clk_hold(host);
/* If the card accept RST_n signal, send it. */
mmc_set_clock(host, host->f_init);
host->ops->hw_reset(host);
/* Set initial state and call mmc_set_ios */
mmc_set_initial_state(host);
mmc_host_clk_release(host);
} else {
/* Do a brute force power cycle */
mmc_power_cycle(host, card->ocr);
mmc_pwrseq_reset(host);
}
if (host->inlinecrypt_support)
host->inlinecrypt_reset_needed = true;
ret = mmc_init_card(host, host->card->ocr, host->card);
if (ret) {
pr_err("%s: %s: mmc_init_card failed (%d)\n",
mmc_hostname(host), __func__, ret);
return ret;
}
return ret;
}
主要看host有没有实现reset,如果有直接发复位信号给eMMC。如果没有就强迫power cycle(掉电)。最后然后走软复位card(mmc_init_card()
)。
refer
- JEDEC STANDARD JESD84-B51.pdf
本站采用CC BY-NC-SA 4.0进行许可 | 转载请注明原文链接 - eMMC CMDQ 超时分析