先看HW架构

host-arch-with-cqe.png

CQE: Command Queueing Engine,负责管理software和eMMC device的data传输。

首先CQE接收来自SW的task(via TDL和doorbell),接下来的command flows主要有三步:

  • Step1. Queuing a Transaction(CMD44+CMD45)

CQE发出CMD44/CMD45给eMMC用来queue a (data xfer) task, 当device有个R1响应就表示这个task已经queued in the device。可以通过读CQDPT寄存器来判断, CQE接收到R1响应就会置位,任务完成clear the bit。

  • Step2. Checking the Queue Status(CMD13)

CQE发出CMD13读QSR(Queue Status Register)来决定执行哪个task, device会反应个R1, 这个R1就是32bit value,每个bit对应一个task,如果bit=0,那这个task没有ready for execution,bit=1就是ready了。

  • Step3. Execution of a Queued Task(CMD46/CMD47)

CQE发出CMD46(读) or CMD47(写)给那些在QSR里已经”ready for execution”的tasks。

Linux代码分析

看下超时相关代码,msm kernel 4.14

mmc0: request with tag: 25 flags: 0x103001 timed out 

find the code:

enum blk_eh_timer_return mmc_cmdq_rq_timed_out(struct request *req)
{
struct mmc_queue *mq = req->q->queuedata;

pr_err("%s: request with tag: %d flags: 0x%x timed out\n",
mmc_hostname(mq->card->host), req->tag, req->cmd_flags);

return mq->cmdq_req_timed_out(req);
}
int mmc_cmdq_init(struct mmc_queue *mq, struct mmc_card *card)
{
...
blk_queue_rq_timed_out(mq->queue, mmc_cmdq_rq_timed_out); //tj: here
blk_queue_rq_timeout(mq->queue, 120 * HZ); //tj: 120s
card->cmdq_init = true;

return ret;
}

block/blk-setting.c:

void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
{
q->rq_timeout = timeout;
}

void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)
{
WARN_ON_ONCE(q->mq_ops);
q->rq_timed_out_fn = fn;
}

看下who calls ->rq_timed_out_fn:

static void blk_rq_timed_out(struct request *req)
{
struct request_queue *q = req->q;
enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;

if (q->rq_timed_out_fn)
ret = q->rq_timed_out_fn(req); //tj: here
switch (ret) {
case BLK_EH_HANDLED:
__blk_complete_request(req); //tj: here
break;
case BLK_EH_RESET_TIMER:
blk_add_timer(req);
blk_clear_rq_complete(req);
break;
case BLK_EH_NOT_HANDLED:
/*
* LLD handles this for now but in the future
* we can send a request msg to abort the command
* and we can move more of the generic scsi eh code to
* the blk layer.
*/
break;
default:
printk(KERN_ERR "block: bad eh return: %d\n", ret);
break;
}
}

shoud be from blk_rq_check_expired():

static void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout,
unsigned int *next_set)
{
if (time_after_eq(jiffies, rq->deadline)) {
list_del_init(&rq->timeout_list);

/*
* Check if we raced with end io completion
*/
if (!blk_mark_rq_complete(rq))
blk_rq_timed_out(rq); //here
}

这里的rq->deadline是120s,这个request超过了120s? 这就是异常了。

void blk_timeout_work(struct work_struct *work)
{
struct request_queue *q =
container_of(work, struct request_queue, timeout_work);
unsigned long flags, next = 0;
struct request *rq, *tmp;
int next_set = 0;

spin_lock_irqsave(q->queue_lock, flags);

list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list)
blk_rq_check_expired(rq, &next, &next_set);
INIT_WORK(&q->timeout_work, blk_timeout_work);
static void blk_rq_timed_out_timer(unsigned long data)
{
struct request_queue *q = (struct request_queue *)data;

kblockd_schedule_work(&q->timeout_work);
}
struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
{
...
setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
...
}

who trigger this timer? should be blk_add_timer() when start request:

/**
* blk_add_timer - Start timeout timer for a single request
* @req: request that is about to start running.
*
* Notes:
* Each request has its own timer, and as it is added to the queue, we
* set up the timer. When the request completes, we cancel the timer.
*/
void blk_add_timer(struct request *req)
{
struct request_queue *q = req->q;
unsigned long expiry;

if (!q->mq_ops)
lockdep_assert_held(q->queue_lock);

/* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
if (!q->mq_ops && !q->rq_timed_out_fn)
return;

BUG_ON(!list_empty(&req->timeout_list));

/*
* Some LLDs, like scsi, peek at the timeout to prevent a
* command from being retried forever.
*/
if (!req->timeout)
req->timeout = q->rq_timeout;

req->deadline = jiffies + req->timeout;

看到没有, req->timeout就是120s,给了req->deadline。跟下:

blk_queue_start_tag() -> blk_start_request() -> blk_add_timer()

ok, 回到mmc driver:

static bool mmc_check_blk_queue_start_tag(struct request_queue *q,
struct request *req)
{
int ret;

spin_lock_irq(q->queue_lock);
ret = blk_queue_start_tag(q, req); //tj:here
spin_unlock_irq(q->queue_lock);

return !!ret;
}
static inline void mmc_cmdq_ready_wait(struct mmc_host *host,
struct mmc_queue *mq)
{
struct mmc_cmdq_context_info *ctx = &host->cmdq_ctx;
struct request_queue *q = mq->queue;

/*
* Wait until all of the following conditions are true:
* 1. There is a request pending in the block layer queue
* to be processed.
* 2. If the peeked request is flush/discard then there shouldn't
* be any other direct command active.
* 3. cmdq state should be unhalted.
* 4. cmdq state shouldn't be in error state.
* 5. There is no outstanding RPMB request pending.
* 6. free tag available to process the new request.
* (This must be the last condtion to check)
*/
wait_event(ctx->wait, kthread_should_stop()
|| (mmc_peek_request(mq) &&
!(((req_op(mq->cmdq_req_peeked) == REQ_OP_FLUSH) ||
(req_op(mq->cmdq_req_peeked) == REQ_OP_DISCARD) ||
(req_op(mq->cmdq_req_peeked) == REQ_OP_SECURE_ERASE))
&& test_bit(CMDQ_STATE_DCMD_ACTIVE, &ctx->curr_state))
&& !(!host->card->part_curr && !mmc_card_suspended(host->card)
&& mmc_host_halt(host))
&& !(!host->card->part_curr && mmc_host_cq_disable(host) &&
!mmc_card_suspended(host->card))
&& !test_bit(CMDQ_STATE_ERR, &ctx->curr_state)
&& !atomic_read(&host->rpmb_req_pending)
&& !mmc_check_blk_queue_start_tag(q, mq->cmdq_req_peeked))); //tj: here
}

check上面注释的no.6.

static int mmc_cmdq_thread(void *d)
{
struct mmc_queue *mq = d;
struct mmc_card *card = mq->card;

struct mmc_host *host = card->host;

current->flags |= PF_MEMALLOC;
if (card->host->wakeup_on_idle)
set_wake_up_idle(true);

while (1) {
int ret = 0;

mmc_cmdq_ready_wait(host, mq);
if (kthread_should_stop())
break;

ret = mmc_cmdq_down_rwsem(host, mq->cmdq_req_peeked);
if (ret) {
mmc_cmdq_up_rwsem(host);
continue;
}
ret = mq->cmdq_issue_fn(mq, mq->cmdq_req_peeked);
mmc_cmdq_up_rwsem(host);

/*
* Don't requeue if issue_fn fails.
* Recovery will be come by completion softirq
* Also we end the request if there is a partition switch
* error, so we should not requeue the request here.
*/
} /* loop */

return 0;
}
int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
spinlock_t *lock, const char *subname, int area_type)
{
...
mq->thread = kthread_run(mmc_cmdq_thread, mq,
"mmc-cmdqd/%d%s", //tj:here
host->index,
subname ? subname : "");
if (IS_ERR(mq->thread)) {
pr_err("%s: %d: cmdq: failed to start mmc-cmdqd thread\n",
mmc_hostname(card->host), ret);
ret = PTR_ERR(mq->thread);
}

也就是说,cmdq状态ok后可以仍请求给HW CMDQ, rt? 在check cmdq状态时发现有tag超时了。

我们再来看下超时后的异常处理, 超时的entry: ->cmdq_req_timed_out:

static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
...
{
...
if (card->cmdq_init) {
md->flags |= MMC_BLK_CMD_QUEUE;
md->queue.cmdq_complete_fn = mmc_blk_cmdq_complete_rq;
md->queue.cmdq_issue_fn = mmc_blk_cmdq_issue_rq;
md->queue.cmdq_error_fn = mmc_blk_cmdq_err;
md->queue.cmdq_req_timed_out = mmc_blk_cmdq_req_timed_out; //tj: here
md->queue.cmdq_shutdown = mmc_blk_cmdq_shutdown;
}
static enum blk_eh_timer_return mmc_blk_cmdq_req_timed_out(struct request *req)
{
struct mmc_queue *mq = req->q->queuedata;
struct mmc_host *host = mq->card->host;
struct mmc_queue_req *mq_rq = req->special;
struct mmc_request *mrq;
struct mmc_cmdq_req *cmdq_req;
struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;

BUG_ON(!host);

/*
* The mmc_queue_req will be present only if the request
* is issued to the LLD. The request could be fetched from
* block layer queue but could be waiting to be issued
* (for e.g. clock scaling is waiting for an empty cmdq queue)
* Reset the timer in such cases to give LLD more time
*/
if (!mq_rq) {
pr_warn("%s: restart timer for tag: %d\n", __func__, req->tag);
return BLK_EH_RESET_TIMER;
}

mrq = &mq_rq->cmdq_req.mrq;
cmdq_req = &mq_rq->cmdq_req;

BUG_ON(!mrq || !cmdq_req);

if (cmdq_req->cmdq_req_flags & DCMD)
mrq->cmd->error = -ETIMEDOUT;
else
mrq->data->error = -ETIMEDOUT;

host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]++;

if (mrq->cmd && mrq->cmd->error) {
if (!(mrq->req->cmd_flags & REQ_PREFLUSH)) {
/*
* Notify completion for non flush commands like
* discard that wait for DCMD finish.
*/
set_bit(CMDQ_STATE_REQ_TIMED_OUT,
&ctx_info->curr_state);
complete(&mrq->completion);
return BLK_EH_NOT_HANDLED;
}
}

if (test_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state) ||
test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state))
return BLK_EH_NOT_HANDLED;

set_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state);
return BLK_EH_HANDLED;
}

先是记录为超时-ETIMEDOUT, 然后把curr_state标记CMDQ_STATE_REQ_TIMED_OUT

static void blk_rq_timed_out(struct request *req)
{
struct request_queue *q = req->q;
enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;

if (q->rq_timed_out_fn)
ret = q->rq_timed_out_fn(req); //tj: here
switch (ret) {
case BLK_EH_HANDLED:
__blk_complete_request(req); //tj: here

block/blk-softirq.c:

void __blk_complete_request(struct request *req)
{
...
if (ccpu == cpu || shared) {
struct list_head *list;
do_local:
list = this_cpu_ptr(&blk_cpu_done);
list_add_tail(&req->ipi_list, list);

/*
* if the list only contains our just added request,
* signal a raise of the softirq. If there are already
* entries there, someone already raised the irq but it
* hasn't run yet.
*/
if (list->next == &req->ipi_list)
raise_softirq_irqoff(BLOCK_SOFTIRQ); //tj: here
} else if (raise_blk_irq(ccpu, req))
goto do_local;
...
}

softirq有关,should be raise_softirq_irqoff(), 我们看下mmc drv:

int mmc_cmdq_init(struct mmc_queue *mq, struct mmc_card *card)
{
...
blk_queue_softirq_done(mq->queue, mmc_cmdq_softirq_done);
INIT_WORK(&mq->cmdq_err_work, mmc_cmdq_error_work);
void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
{
q->softirq_done_fn = fn;
}
blk_done_softirq() -> mmc_cmdq_softirq_done (->softirq_done_fn)
static __init int blk_softirq_init(void)
{
int i;

for_each_possible_cpu(i)
INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));

open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);

ok, timerout error handler最后mmc_cmdq_softirq_done():

static void mmc_cmdq_softirq_done(struct request *rq)
{
struct mmc_queue *mq = rq->q->queuedata;

mq->cmdq_complete_fn(rq); //tj: mmc_blk_cmdq_complete_rq(), check above
}
/* invoked by block layer in softirq context */
void mmc_blk_cmdq_complete_rq(struct request *rq)
{
...
//tj: 先取得error
if (mrq->cmd && mrq->cmd->error)
err = mrq->cmd->error;
else if (mrq->data && mrq->data->error)
err = mrq->data->error;
if (cmdq_req->resp_err)
err_resp = cmdq_req->resp_err;

//tj: 然后check err
if ((err || err_resp) && !cmdq_req->skip_err_handling) {
pr_err("%s: %s: txfr error(%d)/resp_err(%d)\n",
mmc_hostname(mrq->host), __func__, err,
err_resp);
if (test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state)) {
pr_err("%s: CQ in error state, ending current req: %d\n",
__func__, err);
} else {
set_bit(CMDQ_STATE_ERR, &ctx_info->curr_state);
BUG_ON(host->err_mrq != NULL);
host->err_mrq = mrq;
schedule_work(&mq->cmdq_err_work); //tj: here
}
goto out;
}
}

因为我们->curr_stateCMDQ_STATE_REQ_TIMED_OUT,check ->cmdq_err_work:

static void mmc_cmdq_error_work(struct work_struct *work)
{
struct mmc_queue *mq = container_of(work, struct mmc_queue,
cmdq_err_work);

mq->cmdq_error_fn(mq);
}
/*
* mmc_blk_cmdq_err: error handling of cmdq error requests.
* Function should be called in context of error out request
* which has claim_host and rpm acquired.
* This may be called with CQ engine halted. Make sure to
* unhalt it after error recovery.
*
* TODO: Currently cmdq error handler does reset_all in case
* of any erorr. Need to optimize error handling.
*/
static void mmc_blk_cmdq_err(struct mmc_queue *mq)
{
...
pr_err("%s: %s Starting cmdq Error handler\n",
mmc_hostname(host), __func__);
q = mrq->req->q;
err = mmc_cmdq_halt(host, true); //tj: 先halt cmdq
if (err) {
pr_err("halt: failed: %d\n", err);
goto reset;
}
...

//tj: 当timeout时获取device status
/*
* TIMEOUT errrors can happen because of execution error
* in the last command. So send cmd 13 to get device status
*/
if ((mrq->cmd && (mrq->cmd->error == -ETIMEDOUT)) ||
(mrq->data && (mrq->data->error == -ETIMEDOUT))) {
if (mmc_host_halt(host) || mmc_host_cq_disable(host)) {
ret = get_card_status(host->card, &status, 0);
if (ret)
pr_err("%s: CMD13 failed with err %d\n",
mmc_hostname(host), ret);
}
pr_err("%s: Timeout error detected with device status 0x%08x\n",
mmc_hostname(host), status);
}

/*
* In case of software request time-out, we schedule err work only for
* the first error out request and handles all other request in flight
* here.
*/
if (test_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state)) {
err = -ETIMEDOUT;
} else if (mrq->data && mrq->data->error) {
err = mrq->data->error;
} else if (mrq->cmd && mrq->cmd->error) {
/* DCMD commands */
err = mrq->cmd->error;
}

reset:
mmc_blk_cmdq_reset_all(host, err); //tj: reset all
if (mrq->cmdq_req->resp_err)
mrq->cmdq_req->resp_err = false;
mmc_cmdq_halt(host, false); //tj: unhalt cmdq

host->err_mrq = NULL;

//tj: clear CMDQ_STATE_REQ_TIMED_OUT/CMDQ_STATE_ERR
clear_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state);
WARN_ON(!test_and_clear_bit(CMDQ_STATE_ERR, &ctx_info->curr_state));

来看mmc_blk_cmdq_reset_all():

/**
* mmc_blk_cmdq_reset_all - Reset everything for CMDQ block request.
* @host: mmc_host pointer.
* @err: error for which reset is performed.
*
* This function implements reset_all functionality for
* cmdq. It resets the controller, power cycle the card,
* and invalidate all busy tags(requeue all request back to
* elevator).
*/
static void mmc_blk_cmdq_reset_all(struct mmc_host *host, int err)
{

看注释是复位controller,掉电复位eMMC,还有就是清理busy tags。call stack:

mmc_blk_cmdq_reset(,false) -> mmc_cmdq_hw_reset()
/*
* mmc_cmdq_hw_reset: Helper API for doing
* reset_all of host and reinitializing card.
* This must be called with mmc_claim_host
* acquired by the caller.
*/
int mmc_cmdq_hw_reset(struct mmc_host *host)
{
if (!host->bus_ops->reset)
return -EOPNOTSUPP;

return host->bus_ops->reset(host); //tj: mmc_reset()
}
static int mmc_reset(struct mmc_host *host)
{
struct mmc_card *card = host->card;
int ret;

if ((host->caps & MMC_CAP_HW_RESET) && host->ops->hw_reset &&
mmc_can_reset(card)) {
mmc_host_clk_hold(host);
/* If the card accept RST_n signal, send it. */
mmc_set_clock(host, host->f_init);
host->ops->hw_reset(host);
/* Set initial state and call mmc_set_ios */
mmc_set_initial_state(host);
mmc_host_clk_release(host);
} else {
/* Do a brute force power cycle */
mmc_power_cycle(host, card->ocr);
mmc_pwrseq_reset(host);
}

if (host->inlinecrypt_support)
host->inlinecrypt_reset_needed = true;

ret = mmc_init_card(host, host->card->ocr, host->card);
if (ret) {
pr_err("%s: %s: mmc_init_card failed (%d)\n",
mmc_hostname(host), __func__, ret);
return ret;
}

return ret;
}

主要看host有没有实现reset,如果有直接发复位信号给eMMC。如果没有就强迫power cycle(掉电)。最后然后走软复位card(mmc_init_card())。

refer

  • JEDEC STANDARD JESD84-B51.pdf