if (q->rq_timed_out_fn) ret = q->rq_timed_out_fn(req); //tj: here switch (ret) { case BLK_EH_HANDLED: __blk_complete_request(req); //tj: here break; case BLK_EH_RESET_TIMER: blk_add_timer(req); blk_clear_rq_complete(req); break; case BLK_EH_NOT_HANDLED: /* * LLD handles this for now but in the future * we can send a request msg to abort the command * and we can move more of the generic scsi eh code to * the blk layer. */ break; default: printk(KERN_ERR "block: bad eh return: %d\n", ret); break; } }
who trigger this timer? should be blk_add_timer() when start request:
/** * blk_add_timer - Start timeout timer for a single request * @req: request that is about to start running. * * Notes: * Each request has its own timer, and as it is added to the queue, we * set up the timer. When the request completes, we cancel the timer. */ voidblk_add_timer(struct request *req) { structrequest_queue *q = req->q; unsignedlong expiry;
if (!q->mq_ops) lockdep_assert_held(q->queue_lock);
/* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */ if (!q->mq_ops && !q->rq_timed_out_fn) return;
BUG_ON(!list_empty(&req->timeout_list));
/* * Some LLDs, like scsi, peek at the timeout to prevent a * command from being retried forever. */ if (!req->timeout) req->timeout = q->rq_timeout;
/* * Wait until all of the following conditions are true: * 1. There is a request pending in the block layer queue * to be processed. * 2. If the peeked request is flush/discard then there shouldn't * be any other direct command active. * 3. cmdq state should be unhalted. * 4. cmdq state shouldn't be in error state. * 5. There is no outstanding RPMB request pending. * 6. free tag available to process the new request. * (This must be the last condtion to check) */ wait_event(ctx->wait, kthread_should_stop() || (mmc_peek_request(mq) && !(((req_op(mq->cmdq_req_peeked) == REQ_OP_FLUSH) || (req_op(mq->cmdq_req_peeked) == REQ_OP_DISCARD) || (req_op(mq->cmdq_req_peeked) == REQ_OP_SECURE_ERASE)) && test_bit(CMDQ_STATE_DCMD_ACTIVE, &ctx->curr_state)) && !(!host->card->part_curr && !mmc_card_suspended(host->card) && mmc_host_halt(host)) && !(!host->card->part_curr && mmc_host_cq_disable(host) && !mmc_card_suspended(host->card)) && !test_bit(CMDQ_STATE_ERR, &ctx->curr_state) && !atomic_read(&host->rpmb_req_pending) && !mmc_check_blk_queue_start_tag(q, mq->cmdq_req_peeked))); //tj: here }
current->flags |= PF_MEMALLOC; if (card->host->wakeup_on_idle) set_wake_up_idle(true);
while (1) { int ret = 0;
mmc_cmdq_ready_wait(host, mq); if (kthread_should_stop()) break;
ret = mmc_cmdq_down_rwsem(host, mq->cmdq_req_peeked); if (ret) { mmc_cmdq_up_rwsem(host); continue; } ret = mq->cmdq_issue_fn(mq, mq->cmdq_req_peeked); mmc_cmdq_up_rwsem(host);
/* * Don't requeue if issue_fn fails. * Recovery will be come by completion softirq * Also we end the request if there is a partition switch * error, so we should not requeue the request here. */ } /* loop */
/* * The mmc_queue_req will be present only if the request * is issued to the LLD. The request could be fetched from * block layer queue but could be waiting to be issued * (for e.g. clock scaling is waiting for an empty cmdq queue) * Reset the timer in such cases to give LLD more time */ if (!mq_rq) { pr_warn("%s: restart timer for tag: %d\n", __func__, req->tag); return BLK_EH_RESET_TIMER; }
if (mrq->cmd && mrq->cmd->error) { if (!(mrq->req->cmd_flags & REQ_PREFLUSH)) { /* * Notify completion for non flush commands like * discard that wait for DCMD finish. */ set_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state); complete(&mrq->completion); return BLK_EH_NOT_HANDLED; } }
if (test_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state) || test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state)) return BLK_EH_NOT_HANDLED;
if (q->rq_timed_out_fn) ret = q->rq_timed_out_fn(req); //tj: here switch (ret) { case BLK_EH_HANDLED: __blk_complete_request(req); //tj: here
block/blk-softirq.c:
void __blk_complete_request(struct request *req) { ... if (ccpu == cpu || shared) { structlist_head *list; do_local: list = this_cpu_ptr(&blk_cpu_done); list_add_tail(&req->ipi_list, list);
/* * if the list only contains our just added request, * signal a raise of the softirq. If there are already * entries there, someone already raised the irq but it * hasn't run yet. */ if (list->next == &req->ipi_list) raise_softirq_irqoff(BLOCK_SOFTIRQ); //tj: here } elseif (raise_blk_irq(ccpu, req)) goto do_local; ... }
softirq有关,should be raise_softirq_irqoff(), 我们看下mmc drv:
/* * mmc_blk_cmdq_err: error handling of cmdq error requests. * Function should be called in context of error out request * which has claim_host and rpm acquired. * This may be called with CQ engine halted. Make sure to * unhalt it after error recovery. * * TODO: Currently cmdq error handler does reset_all in case * of any erorr. Need to optimize error handling. */ staticvoidmmc_blk_cmdq_err(struct mmc_queue *mq) { ... pr_err("%s: %s Starting cmdq Error handler\n", mmc_hostname(host), __func__); q = mrq->req->q; err = mmc_cmdq_halt(host, true); //tj: 先halt cmdq if (err) { pr_err("halt: failed: %d\n", err); goto reset; } ...
//tj: 当timeout时获取device status /* * TIMEOUT errrors can happen because of execution error * in the last command. So send cmd 13 to get device status */ if ((mrq->cmd && (mrq->cmd->error == -ETIMEDOUT)) || (mrq->data && (mrq->data->error == -ETIMEDOUT))) { if (mmc_host_halt(host) || mmc_host_cq_disable(host)) { ret = get_card_status(host->card, &status, 0); if (ret) pr_err("%s: CMD13 failed with err %d\n", mmc_hostname(host), ret); } pr_err("%s: Timeout error detected with device status 0x%08x\n", mmc_hostname(host), status); }
/* * In case of software request time-out, we schedule err work only for * the first error out request and handles all other request in flight * here. */ if (test_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state)) { err = -ETIMEDOUT; } elseif (mrq->data && mrq->data->error) { err = mrq->data->error; } elseif (mrq->cmd && mrq->cmd->error) { /* DCMD commands */ err = mrq->cmd->error; }
reset: mmc_blk_cmdq_reset_all(host, err); //tj: reset all if (mrq->cmdq_req->resp_err) mrq->cmdq_req->resp_err = false; mmc_cmdq_halt(host, false); //tj: unhalt cmdq
/** * mmc_blk_cmdq_reset_all - Reset everything for CMDQ block request. * @host: mmc_host pointer. * @err: error for which reset is performed. * * This function implements reset_all functionality for * cmdq. It resets the controller, power cycle the card, * and invalidate all busy tags(requeue all request back to * elevator). */ staticvoidmmc_blk_cmdq_reset_all(struct mmc_host *host, int err) {
/* * mmc_cmdq_hw_reset: Helper API for doing * reset_all of host and reinitializing card. * This must be called with mmc_claim_host * acquired by the caller. */ intmmc_cmdq_hw_reset(struct mmc_host *host) { if (!host->bus_ops->reset) return -EOPNOTSUPP;
staticintmmc_reset(struct mmc_host *host) { structmmc_card *card = host->card; int ret;
if ((host->caps & MMC_CAP_HW_RESET) && host->ops->hw_reset && mmc_can_reset(card)) { mmc_host_clk_hold(host); /* If the card accept RST_n signal, send it. */ mmc_set_clock(host, host->f_init); host->ops->hw_reset(host); /* Set initial state and call mmc_set_ios */ mmc_set_initial_state(host); mmc_host_clk_release(host); } else { /* Do a brute force power cycle */ mmc_power_cycle(host, card->ocr); mmc_pwrseq_reset(host); }
if (host->inlinecrypt_support) host->inlinecrypt_reset_needed = true;
ret = mmc_init_card(host, host->card->ocr, host->card); if (ret) { pr_err("%s: %s: mmc_init_card failed (%d)\n", mmc_hostname(host), __func__, ret); return ret; }