先看HW架构

host-arch-with-cqe.png

CQE: Command Queueing Engine,负责管理software和eMMC device的data传输。

首先CQE接收来自SW的task(via TDL和doorbell),接下来的command flows主要有三步:

  • Step1. Queuing a Transaction(CMD44+CMD45)

CQE发出CMD44/CMD45给eMMC用来queue a (data xfer) task, 当device有个R1响应就表示这个task已经queued in the device。可以通过读CQDPT寄存器来判断, CQE接收到R1响应就会置位,任务完成clear the bit。

  • Step2. Checking the Queue Status(CMD13)

CQE发出CMD13读QSR(Queue Status Register)来决定执行哪个task, device会反应个R1, 这个R1就是32bit value,每个bit对应一个task,如果bit=0,那这个task没有ready for execution,bit=1就是ready了。

  • Step3. Execution of a Queued Task(CMD46/CMD47)

CQE发出CMD46(读) or CMD47(写)给那些在QSR里已经"ready for execution"的tasks。

Linux代码分析

看下超时相关代码,msm kernel 4.14

mmc0: request with tag: 25 flags: 0x103001 timed out 

find the code:

enum blk_eh_timer_return mmc_cmdq_rq_timed_out(struct request *req)
{
    struct mmc_queue *mq = req->q->queuedata;

    pr_err("%s: request with tag: %d flags: 0x%x timed out\n",
           mmc_hostname(mq->card->host), req->tag, req->cmd_flags);

    return mq->cmdq_req_timed_out(req);
}
int mmc_cmdq_init(struct mmc_queue *mq, struct mmc_card *card)
{
    ...
    blk_queue_rq_timed_out(mq->queue, mmc_cmdq_rq_timed_out); //tj: here
    blk_queue_rq_timeout(mq->queue, 120 * HZ); //tj: 120s
    card->cmdq_init = true;

    return ret;
}

block/blk-setting.c:

void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
{
    q->rq_timeout = timeout;
}

void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)
{
    WARN_ON_ONCE(q->mq_ops);
    q->rq_timed_out_fn = fn;
}

看下who calls ->rq_timed_out_fn:

static void blk_rq_timed_out(struct request *req)
{
        struct request_queue *q = req->q;
        enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;

        if (q->rq_timed_out_fn)
                ret = q->rq_timed_out_fn(req);  //tj: here
        switch (ret) {
        case BLK_EH_HANDLED:
                __blk_complete_request(req); //tj: here
                break;
        case BLK_EH_RESET_TIMER:
                blk_add_timer(req);
                blk_clear_rq_complete(req);
                break;
        case BLK_EH_NOT_HANDLED:
                /*
                 * LLD handles this for now but in the future
                 * we can send a request msg to abort the command
                 * and we can move more of the generic scsi eh code to
                 * the blk layer.
                 */
                break;
        default:
                printk(KERN_ERR "block: bad eh return: %d\n", ret);
                break;
        }
}

shoud be from blk_rq_check_expired():

static void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout,
                          unsigned int *next_set)
{
        if (time_after_eq(jiffies, rq->deadline)) {
                list_del_init(&rq->timeout_list);

                /*
                 * Check if we raced with end io completion
                 */
                if (!blk_mark_rq_complete(rq))
                        blk_rq_timed_out(rq); //here
        } 

这里的rq->deadline是120s,这个request超过了120s? 这就是异常了。

void blk_timeout_work(struct work_struct *work)
{
        struct request_queue *q =
                container_of(work, struct request_queue, timeout_work);
        unsigned long flags, next = 0;
        struct request *rq, *tmp;
        int next_set = 0;

        spin_lock_irqsave(q->queue_lock, flags);

        list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list)
                blk_rq_check_expired(rq, &next, &next_set);
INIT_WORK(&q->timeout_work, blk_timeout_work);
static void blk_rq_timed_out_timer(unsigned long data)
{
        struct request_queue *q = (struct request_queue *)data;

        kblockd_schedule_work(&q->timeout_work);
}
struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
{
    ...
        setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
    ...
}

who trigger this timer? should be blk_add_timer() when start request:

/**
 * blk_add_timer - Start timeout timer for a single request
 * @req:        request that is about to start running.
 *
 * Notes:
 *    Each request has its own timer, and as it is added to the queue, we
 *    set up the timer. When the request completes, we cancel the timer.
 */
void blk_add_timer(struct request *req)
{
        struct request_queue *q = req->q;
        unsigned long expiry;

        if (!q->mq_ops)
                lockdep_assert_held(q->queue_lock);

        /* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
        if (!q->mq_ops && !q->rq_timed_out_fn)
                return;

        BUG_ON(!list_empty(&req->timeout_list));

        /*
         * Some LLDs, like scsi, peek at the timeout to prevent a
         * command from being retried forever.
         */
        if (!req->timeout)
                req->timeout = q->rq_timeout;

        req->deadline = jiffies + req->timeout;

看到没有, req->timeout就是120s,给了req->deadline。跟下:

blk_queue_start_tag() -> blk_start_request() -> blk_add_timer()

ok, 回到mmc driver:

static bool mmc_check_blk_queue_start_tag(struct request_queue *q,
                      struct request *req)
{
    int ret;

    spin_lock_irq(q->queue_lock);
    ret = blk_queue_start_tag(q, req); //tj:here
    spin_unlock_irq(q->queue_lock);

    return !!ret;
}
static inline void mmc_cmdq_ready_wait(struct mmc_host *host,
                    struct mmc_queue *mq)
{
    struct mmc_cmdq_context_info *ctx = &host->cmdq_ctx;
    struct request_queue *q = mq->queue;

    /*
     * Wait until all of the following conditions are true:
     * 1. There is a request pending in the block layer queue
     *    to be processed.
     * 2. If the peeked request is flush/discard then there shouldn't
     *    be any other direct command active.
     * 3. cmdq state should be unhalted.
     * 4. cmdq state shouldn't be in error state.
     * 5. There is no outstanding RPMB request pending.
     * 6. free tag available to process the new request.
     *    (This must be the last condtion to check)
     */
    wait_event(ctx->wait, kthread_should_stop()
        || (mmc_peek_request(mq) &&
        !(((req_op(mq->cmdq_req_peeked) == REQ_OP_FLUSH) ||
           (req_op(mq->cmdq_req_peeked) == REQ_OP_DISCARD) ||
           (req_op(mq->cmdq_req_peeked) == REQ_OP_SECURE_ERASE))
          && test_bit(CMDQ_STATE_DCMD_ACTIVE, &ctx->curr_state))
        && !(!host->card->part_curr && !mmc_card_suspended(host->card)
             && mmc_host_halt(host))
        && !(!host->card->part_curr && mmc_host_cq_disable(host) &&
            !mmc_card_suspended(host->card))
        && !test_bit(CMDQ_STATE_ERR, &ctx->curr_state)
        && !atomic_read(&host->rpmb_req_pending)
        && !mmc_check_blk_queue_start_tag(q, mq->cmdq_req_peeked))); //tj: here
}

check上面注释的no.6.

static int mmc_cmdq_thread(void *d)
{
    struct mmc_queue *mq = d;
    struct mmc_card *card = mq->card;

    struct mmc_host *host = card->host;

    current->flags |= PF_MEMALLOC;
    if (card->host->wakeup_on_idle)
        set_wake_up_idle(true);

    while (1) {
        int ret = 0;

        mmc_cmdq_ready_wait(host, mq);
        if (kthread_should_stop())
            break;

        ret = mmc_cmdq_down_rwsem(host, mq->cmdq_req_peeked);
        if (ret) {
            mmc_cmdq_up_rwsem(host);
            continue;
        }
        ret = mq->cmdq_issue_fn(mq, mq->cmdq_req_peeked);
        mmc_cmdq_up_rwsem(host);

        /*
         * Don't requeue if issue_fn fails.
         * Recovery will be come by completion softirq
         * Also we end the request if there is a partition switch
         * error, so we should not requeue the request here.
         */
    } /* loop */

    return 0;
}
int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
           spinlock_t *lock, const char *subname, int area_type)
{
    ...
            mq->thread = kthread_run(mmc_cmdq_thread, mq,
                         "mmc-cmdqd/%d%s", //tj:here
                         host->index,
                         subname ? subname : "");
            if (IS_ERR(mq->thread)) {
                pr_err("%s: %d: cmdq: failed to start mmc-cmdqd thread\n",
                    mmc_hostname(card->host), ret);
                ret = PTR_ERR(mq->thread);
            }

也就是说,cmdq状态ok后可以仍请求给HW CMDQ, rt? 在check cmdq状态时发现有tag超时了。

我们再来看下超时后的异常处理, 超时的entry: ->cmdq_req_timed_out:

static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
...
{
...
    if (card->cmdq_init) {
        md->flags |= MMC_BLK_CMD_QUEUE;
        md->queue.cmdq_complete_fn = mmc_blk_cmdq_complete_rq;
        md->queue.cmdq_issue_fn = mmc_blk_cmdq_issue_rq;
        md->queue.cmdq_error_fn = mmc_blk_cmdq_err;
        md->queue.cmdq_req_timed_out = mmc_blk_cmdq_req_timed_out;  //tj: here
        md->queue.cmdq_shutdown = mmc_blk_cmdq_shutdown;
    }
static enum blk_eh_timer_return mmc_blk_cmdq_req_timed_out(struct request *req)
{
    struct mmc_queue *mq = req->q->queuedata;
    struct mmc_host *host = mq->card->host;
    struct mmc_queue_req *mq_rq = req->special;
    struct mmc_request *mrq;
    struct mmc_cmdq_req *cmdq_req;
    struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;

    BUG_ON(!host);

    /*
     * The mmc_queue_req will be present only if the request
     * is issued to the LLD. The request could be fetched from
     * block layer queue but could be waiting to be issued
     * (for e.g. clock scaling is waiting for an empty cmdq queue)
     * Reset the timer in such cases to give LLD more time
     */
    if (!mq_rq) {
        pr_warn("%s: restart timer for tag: %d\n", __func__, req->tag);
        return BLK_EH_RESET_TIMER;
    }

    mrq = &mq_rq->cmdq_req.mrq;
    cmdq_req = &mq_rq->cmdq_req;

    BUG_ON(!mrq || !cmdq_req);

    if (cmdq_req->cmdq_req_flags & DCMD)
        mrq->cmd->error = -ETIMEDOUT;
    else
        mrq->data->error = -ETIMEDOUT;

    host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]++;

    if (mrq->cmd && mrq->cmd->error) {
        if (!(mrq->req->cmd_flags & REQ_PREFLUSH)) {
            /*
             * Notify completion for non flush commands like
             * discard that wait for DCMD finish.
             */
            set_bit(CMDQ_STATE_REQ_TIMED_OUT,
                    &ctx_info->curr_state);
            complete(&mrq->completion);
            return BLK_EH_NOT_HANDLED;
        }
    }

    if (test_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state) ||
        test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state))
        return BLK_EH_NOT_HANDLED;

    set_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state);
    return BLK_EH_HANDLED;
}

先是记录为超时-ETIMEDOUT, 然后把curr_state标记CMDQ_STATE_REQ_TIMED_OUT

static void blk_rq_timed_out(struct request *req)
{
        struct request_queue *q = req->q;
        enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;

        if (q->rq_timed_out_fn)
                ret = q->rq_timed_out_fn(req);  //tj: here
        switch (ret) {
        case BLK_EH_HANDLED:
                __blk_complete_request(req); //tj: here

block/blk-softirq.c:

void __blk_complete_request(struct request *req)
{
    ...
        if (ccpu == cpu || shared) {
                struct list_head *list;
do_local:
                list = this_cpu_ptr(&blk_cpu_done);
                list_add_tail(&req->ipi_list, list);

                /*
                 * if the list only contains our just added request,
                 * signal a raise of the softirq. If there are already
                 * entries there, someone already raised the irq but it
                 * hasn't run yet.
                 */
                if (list->next == &req->ipi_list)
                        raise_softirq_irqoff(BLOCK_SOFTIRQ); //tj: here
        } else if (raise_blk_irq(ccpu, req))
                goto do_local;
    ...
}

softirq有关,should be raise_softirq_irqoff(), 我们看下mmc drv:

int mmc_cmdq_init(struct mmc_queue *mq, struct mmc_card *card)
{
    ...
    blk_queue_softirq_done(mq->queue, mmc_cmdq_softirq_done);
    INIT_WORK(&mq->cmdq_err_work, mmc_cmdq_error_work);
void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
{
    q->softirq_done_fn = fn;
}
blk_done_softirq() -> mmc_cmdq_softirq_done (->softirq_done_fn)
static __init int blk_softirq_init(void)
{
        int i;

        for_each_possible_cpu(i)
                INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));

        open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);

ok, timerout error handler最后mmc_cmdq_softirq_done():

static void mmc_cmdq_softirq_done(struct request *rq)
{
    struct mmc_queue *mq = rq->q->queuedata;

    mq->cmdq_complete_fn(rq); //tj: mmc_blk_cmdq_complete_rq(), check above
}
/* invoked by block layer in softirq context */
void mmc_blk_cmdq_complete_rq(struct request *rq)
{
    ...
    //tj: 先取得error
    if (mrq->cmd && mrq->cmd->error)
        err = mrq->cmd->error;
    else if (mrq->data && mrq->data->error)
        err = mrq->data->error;
    if (cmdq_req->resp_err)
        err_resp = cmdq_req->resp_err;

    //tj: 然后check err
    if ((err || err_resp) && !cmdq_req->skip_err_handling) {
        pr_err("%s: %s: txfr error(%d)/resp_err(%d)\n",
                mmc_hostname(mrq->host), __func__, err,
                err_resp);
        if (test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state)) {
            pr_err("%s: CQ in error state, ending current req: %d\n",
                __func__, err);
        } else {
            set_bit(CMDQ_STATE_ERR, &ctx_info->curr_state);
            BUG_ON(host->err_mrq != NULL);
            host->err_mrq = mrq;
            schedule_work(&mq->cmdq_err_work); //tj: here
        }
        goto out;
    }    
}

因为我们->curr_stateCMDQ_STATE_REQ_TIMED_OUT,check ->cmdq_err_work:

static void mmc_cmdq_error_work(struct work_struct *work)
{
    struct mmc_queue *mq = container_of(work, struct mmc_queue,
                        cmdq_err_work);

    mq->cmdq_error_fn(mq);
}
/*
 * mmc_blk_cmdq_err: error handling of cmdq error requests.
 * Function should be called in context of error out request
 * which has claim_host and rpm acquired.
 * This may be called with CQ engine halted. Make sure to
 * unhalt it after error recovery.
 *
 * TODO: Currently cmdq error handler does reset_all in case
 * of any erorr. Need to optimize error handling.
 */
static void mmc_blk_cmdq_err(struct mmc_queue *mq)
{
    ...
    pr_err("%s: %s Starting cmdq Error handler\n",
        mmc_hostname(host), __func__);
    q = mrq->req->q;
    err = mmc_cmdq_halt(host, true); //tj: 先halt cmdq
    if (err) {
        pr_err("halt: failed: %d\n", err);
        goto reset;
    }
    ...

    //tj: 当timeout时获取device status
    /*
     * TIMEOUT errrors can happen because of execution error
     * in the last command. So send cmd 13 to get device status
     */
    if ((mrq->cmd && (mrq->cmd->error == -ETIMEDOUT)) ||
            (mrq->data && (mrq->data->error == -ETIMEDOUT))) {
        if (mmc_host_halt(host) || mmc_host_cq_disable(host)) {
            ret = get_card_status(host->card, &status, 0);
            if (ret)
                pr_err("%s: CMD13 failed with err %d\n",
                        mmc_hostname(host), ret);
        }
        pr_err("%s: Timeout error detected with device status 0x%08x\n",
            mmc_hostname(host), status);
    }

    /*
     * In case of software request time-out, we schedule err work only for
     * the first error out request and handles all other request in flight
     * here.
     */
    if (test_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state)) {
        err = -ETIMEDOUT;
    } else if (mrq->data && mrq->data->error) {
        err = mrq->data->error;
    } else if (mrq->cmd && mrq->cmd->error) {
        /* DCMD commands */
        err = mrq->cmd->error;
    }

reset:
    mmc_blk_cmdq_reset_all(host, err); //tj: reset all
    if (mrq->cmdq_req->resp_err)
        mrq->cmdq_req->resp_err = false;
    mmc_cmdq_halt(host, false); //tj: unhalt cmdq

    host->err_mrq = NULL;

    //tj: clear CMDQ_STATE_REQ_TIMED_OUT/CMDQ_STATE_ERR
    clear_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state);
    WARN_ON(!test_and_clear_bit(CMDQ_STATE_ERR, &ctx_info->curr_state));

来看mmc_blk_cmdq_reset_all():

/**
 * mmc_blk_cmdq_reset_all - Reset everything for CMDQ block request.
 * @host:    mmc_host pointer.
 * @err:    error for which reset is performed.
 *
 * This function implements reset_all functionality for
 * cmdq. It resets the controller, power cycle the card,
 * and invalidate all busy tags(requeue all request back to
 * elevator).
 */
static void mmc_blk_cmdq_reset_all(struct mmc_host *host, int err)
{

看注释是复位controller,掉电复位eMMC,还有就是清理busy tags。call stack:

mmc_blk_cmdq_reset(,false) -> mmc_cmdq_hw_reset()
/*
 * mmc_cmdq_hw_reset: Helper API for doing
 * reset_all of host and reinitializing card.
 * This must be called with mmc_claim_host
 * acquired by the caller.
 */
int mmc_cmdq_hw_reset(struct mmc_host *host)
{
    if (!host->bus_ops->reset)
        return -EOPNOTSUPP;

    return host->bus_ops->reset(host); //tj: mmc_reset()
}
static int mmc_reset(struct mmc_host *host)
{
    struct mmc_card *card = host->card;
    int ret;

    if ((host->caps & MMC_CAP_HW_RESET) && host->ops->hw_reset &&
         mmc_can_reset(card)) {
        mmc_host_clk_hold(host);
        /* If the card accept RST_n signal, send it. */
        mmc_set_clock(host, host->f_init);
        host->ops->hw_reset(host);
        /* Set initial state and call mmc_set_ios */
        mmc_set_initial_state(host);
        mmc_host_clk_release(host);
    } else {
        /* Do a brute force power cycle */
        mmc_power_cycle(host, card->ocr);
        mmc_pwrseq_reset(host);
    }

    if (host->inlinecrypt_support)
        host->inlinecrypt_reset_needed = true;

    ret = mmc_init_card(host, host->card->ocr, host->card);
    if (ret) {
        pr_err("%s: %s: mmc_init_card failed (%d)\n",
            mmc_hostname(host), __func__, ret);
        return ret;
    }

    return ret;
}

主要看host有没有实现reset,如果有直接发复位信号给eMMC。如果没有就强迫power cycle(掉电)。最后然后走软复位card(mmc_init_card())。

refer

  • JEDEC STANDARD JESD84-B51.pdf