NVME Doorbell 寄存器 数据请求时doorbell 处理

2024-05-08 12:12

本文主要是介绍NVME Doorbell 寄存器 数据请求时doorbell 处理,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!

3.NVMe寄存器配置
3.1 寄存器定义
NVMe寄存器主要分为两部分,一部分定义了Controller整体属性,一部分用来存放每组队列的头尾DB寄存器。

CAP——控制器能力,定义了内存页大小的最大最小值、支持的I/O指令集、DB寄存器步长、等待时间界限、仲裁机制、队列是否物理上连续、队列大小;
VS——版本号,定义了控制器实现NVMe协议的版本号;
INTMS——中断掩码,每个bit对应一个中断向量,使用MSI-X中断时,此寄存器无效;
INTMC——中断有效,每个bit对应一个中断向量,使用MSI-X中断时,此寄存器无效;
CC——控制器配置,定义了I/O SQ和CQ队列元素大小、关机状态提醒、仲裁机制、内存页大小、支持的I/O指令集、使能;
CSTS——控制器状态,包括关机状态、控制器致命错误、就绪状态;
AQA——Admin 队列属性,包括SQ大小和CQ大小;
ASQ——Admin SQ基地址;
ACQ——Admin CQ基地址;
1000h之后的寄存器定义了队列的头、尾DB寄存器。
3.2寄存器理解
CAP寄存器标识的是Controller具有多少能力,而CC寄存器则是指当前Controller选择了哪些能力,可以理解为CC是CAP的一个子集;如果重启(reset)的话,可以更换CC配置;
CC.EN置一,表示Controller已经可以开始处理NVM命令,从1到0表示Controller重启;
CC.EN与CSTS.RDY关系密切,CSTS.RDY总是在CC.EN之后由Controller改变,其他不符合执行顺序的操作都将产生未定义的行为;
Admin队列有host直接创建,AQA、ASQ、ACQ三个寄存器标识了Admin队列,而其他I/O队列则有Admin命令创建(eg,创建I/O CQ命令);
Admin队列的头、尾DB寄存器标识为0,其他I/O队列标识由host按照一定规则分配;只有16bit的有效位,是因为队列深度最大64K。
实际的物理设备CAP.DSTRD值为0,dev->db_stride为1,之后分析中默认db_stride为1
                        
原文链接:https://blog.csdn.net/qq_39021670/article/details/114896973

由dev->dbs使用方式可知,每一个DB寄存器对,前4个字节为SQ Tail DB,后四个字节为CQ Head DB

/** Write sq tail if we are asked to, or if the next command would wrap.*/
static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
{if (!write_sq) {u16 next_tail = nvmeq->sq_tail + 1;if (next_tail == nvmeq->q_depth)next_tail = 0;if (next_tail != nvmeq->last_sq_tail)return;}if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))//前4字节写入sq tialwritel(nvmeq->sq_tail, nvmeq->q_db);nvmeq->last_sq_tail = nvmeq->sq_tail;
}
static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
{u16 head = nvmeq->cq_head;//后4字节写入 cq headif (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,nvmeq->dbbuf_cq_ei))writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
}

static irqreturn_t nvme_irq(int irq, void *data)
{struct nvme_queue *nvmeq = data;irqreturn_t ret = IRQ_NONE;u16 start, end;/** The rmb/wmb pair ensures we see all updates from a previous run of* the irq handler, even if that was on another CPU.*/rmb();if (nvmeq->cq_head != nvmeq->last_cq_head)ret = IRQ_HANDLED;//找到当前CQ队列的尾部,并更新cq_headnvme_process_cq(nvmeq, &start, &end, -1);nvmeq->last_cq_head = nvmeq->cq_head;wmb();if (start != end) {// 依次处理CQ队列中的请求nvme_complete_cqes(nvmeq, start, end);return IRQ_HANDLED;}return ret;
}

依次取出ssd 中已经返回的数据,然后写入cq 的head 到Doorbell 寄存器

static inline int nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,u16 *end, unsigned int tag)
{int found = 0;*start = nvmeq->cq_head;while (nvme_cqe_pending(nvmeq)) {if (tag == -1U || nvmeq->cqes[nvmeq->cq_head].command_id == tag)found++;nvme_update_cq_head(nvmeq);}*end = nvmeq->cq_head;if (*start != *end)nvme_ring_cq_doorbell(nvmeq);return found;
}

static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
{u16 head = nvmeq->cq_head;if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,nvmeq->dbbuf_cq_ei))writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
}

依次处理cq 中的数据返回给block 层

static inline void nvme_end_request(struct request *req, __le16 status,union nvme_result result)
{struct nvme_request *rq = nvme_req(req);rq->status = le16_to_cpu(status) >> 1;rq->result = result;/* inject error when permitted by fault injection framework */nvme_should_fail(req);//block 请求返回blk_mq_complete_request(req);
}static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
{volatile struct nvme_completion *cqe = &nvmeq->cqes[idx];struct request *req;/** AEN requests are special as they don't time out and can* survive any kind of queue freeze and often don't respond to* aborts.  We don't even bother to allocate a struct request* for them but rather special case them here.*/if (unlikely(nvmeq->qid == 0 &&cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) {nvme_complete_async_event(&nvmeq->dev->ctrl,cqe->status, &cqe->result);return;}//将通过tag 将reqeust 转换出来req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id);if (unlikely(!req)) {dev_warn(nvmeq->dev->ctrl.device,"invalid id %d completed on queue %d\n",cqe->command_id, le16_to_cpu(cqe->sq_id));return;}trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);nvme_end_request(req, cqe->status, cqe->result);
}static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
{while (start != end) {nvme_handle_cqe(nvmeq, start);if (++start == nvmeq->q_depth)start = 0;}
}

static const struct blk_mq_ops nvme_mq_admin_ops = {.queue_rq       = nvme_queue_rq,.complete       = nvme_pci_complete_rq,.init_hctx      = nvme_admin_init_hctx,.init_request   = nvme_init_request,.timeout        = nvme_timeout,
};static const struct blk_mq_ops nvme_mq_ops = {.queue_rq       = nvme_queue_rq,.complete       = nvme_pci_complete_rq,.commit_rqs     = nvme_commit_rqs,.init_hctx      = nvme_init_hctx,.init_request   = nvme_init_request,.map_queues     = nvme_pci_map_queues,.timeout        = nvme_timeout,.poll           = nvme_poll,
};
 

admin  queue

nvme_queue_rq

io queue 

nvme_queue_rq

nvme_commit_rqs

static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,struct request *rq,blk_qc_t *cookie, bool last)
{struct request_queue *q = rq->q;struct blk_mq_queue_data bd = {.rq = rq,.last = last,};blk_qc_t new_cookie;blk_status_t ret;new_cookie = request_to_qc_t(hctx, rq);/** For OK queue, we are done. For error, caller may kill it.* Any other error (busy), just add it to our list as we* previously would have done.*/ret = q->mq_ops->queue_rq(hctx, &bd);switch (ret) {case BLK_STS_OK:blk_mq_update_dispatch_busy(hctx, false);*cookie = new_cookie;break;case BLK_STS_RESOURCE:case BLK_STS_DEV_RESOURCE:blk_mq_update_dispatch_busy(hctx, true);__blk_mq_requeue_request(rq);break;default:blk_mq_update_dispatch_busy(hctx, false);*cookie = BLK_QC_T_NONE;break;}return ret;
}

*/
bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,bool got_budget)
{struct blk_mq_hw_ctx *hctx;struct request *rq, *nxt;bool no_tag = false;int errors, queued;blk_status_t ret = BLK_STS_OK;bool no_budget_avail = false;if (list_empty(list))return false;WARN_ON(!list_is_singular(list) && got_budget);/** Now process all the entries, sending them to the driver.*/errors = queued = 0;do {struct blk_mq_queue_data bd;rq = list_first_entry(list, struct request, queuelist);hctx = rq->mq_hctx;if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) {blk_mq_put_driver_tag(rq);no_budget_avail = true;break;}if (!blk_mq_get_driver_tag(rq)) {/** The initial allocation attempt failed, so we need to* rerun the hardware queue when a tag is freed. The* waitqueue takes care of that. If the queue is run* before we add this entry back on the dispatch list,* we'll re-run it below.*/if (!blk_mq_mark_tag_wait(hctx, rq)) {blk_mq_put_dispatch_budget(hctx);/** For non-shared tags, the RESTART check* will suffice.*/if (hctx->flags & BLK_MQ_F_TAG_SHARED)no_tag = true;break;}}list_del_init(&rq->queuelist);bd.rq = rq;/** Flag last if we have no more requests, or if we have more* but can't assign a driver tag to it.*/if (list_empty(list))bd.last = true;else {nxt = list_first_entry(list, struct request, queuelist);bd.last = !blk_mq_get_driver_tag(nxt);}//下发ioret = q->mq_ops->queue_rq(hctx, &bd);if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {blk_mq_handle_dev_resource(rq, list);break;}if (unlikely(ret != BLK_STS_OK)) {errors++;blk_mq_end_request(rq, BLK_STS_IOERR);continue;}queued++;} while (!list_empty(list));hctx->dispatched[queued_to_index(queued)]++;/** Any items that need requeuing? Stuff them into hctx->dispatch,* that is where we will continue on next queue run.*/if (!list_empty(list)) {bool needs_restart;/** If we didn't flush the entire list, we could have told* the driver there was more coming, but that turned out to* be a lie.*/if (q->mq_ops->commit_rqs)//nvme io commitq->mq_ops->commit_rqs(hctx);spin_lock(&hctx->lock);list_splice_tail_init(list, &hctx->dispatch);spin_unlock(&hctx->lock);/** Order adding requests to hctx->dispatch and checking* SCHED_RESTART flag. The pair of this smp_mb() is the one* in blk_mq_sched_restart(). Avoid restart code path to* miss the new added requests to hctx->dispatch, meantime* SCHED_RESTART is observed here.*/smp_mb();/** If SCHED_RESTART was set by the caller of this function and* it is no longer set that means that it was cleared by another* thread and hence that a queue rerun is needed.** If 'no_tag' is set, that means that we failed getting* a driver tag with an I/O scheduler attached. If our dispatch* waitqueue is no longer active, ensure that we run the queue* AFTER adding our entries back to the list.** If no I/O scheduler has been configured it is possible that* the hardware queue got stopped and restarted before requests* were pushed back onto the dispatch list. Rerun the queue to* avoid starvation. Notes:* - blk_mq_run_hw_queue() checks whether or not a queue has*   been stopped before rerunning a queue.* - Some but not all block drivers stop a queue before*   returning BLK_STS_RESOURCE. Two exceptions are scsi-mq*   and dm-rq.** If driver returns BLK_STS_RESOURCE and SCHED_RESTART* bit is set, run queue after a delay to avoid IO stalls* that could otherwise occur if the queue is idle.  We'll do* similar if we couldn't get budget and SCHED_RESTART is set.*/needs_restart = blk_mq_sched_needs_restart(hctx);if (!needs_restart ||(no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))blk_mq_run_hw_queue(hctx, true);else if (needs_restart && (ret == BLK_STS_RESOURCE ||no_budget_avail))blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);blk_mq_update_dispatch_busy(hctx, true);return false;} elseblk_mq_update_dispatch_busy(hctx, false);/** If the host/device is unable to accept more work, inform the* caller of that.*/if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)return false;return (queued + errors) != 0;
}

这篇关于NVME Doorbell 寄存器 数据请求时doorbell 处理的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!



http://www.chinasem.cn/article/970278

相关文章

Spring 请求之传递 JSON 数据的操作方法

《Spring请求之传递JSON数据的操作方法》JSON就是一种数据格式,有自己的格式和语法,使用文本表示一个对象或数组的信息,因此JSON本质是字符串,主要负责在不同的语言中数据传递和交换,这... 目录jsON 概念JSON 语法JSON 的语法JSON 的两种结构JSON 字符串和 Java 对象互转

Python使用getopt处理命令行参数示例解析(最佳实践)

《Python使用getopt处理命令行参数示例解析(最佳实践)》getopt模块是Python标准库中一个简单但强大的命令行参数处理工具,它特别适合那些需要快速实现基本命令行参数解析的场景,或者需要... 目录为什么需要处理命令行参数?getopt模块基础实际应用示例与其他参数处理方式的比较常见问http

Java Response返回值的最佳处理方案

《JavaResponse返回值的最佳处理方案》在开发Web应用程序时,我们经常需要通过HTTP请求从服务器获取响应数据,这些数据可以是JSON、XML、甚至是文件,本篇文章将详细解析Java中处理... 目录摘要概述核心问题:关键技术点:源码解析示例 1:使用HttpURLConnection获取Resp

C++如何通过Qt反射机制实现数据类序列化

《C++如何通过Qt反射机制实现数据类序列化》在C++工程中经常需要使用数据类,并对数据类进行存储、打印、调试等操作,所以本文就来聊聊C++如何通过Qt反射机制实现数据类序列化吧... 目录设计预期设计思路代码实现使用方法在 C++ 工程中经常需要使用数据类,并对数据类进行存储、打印、调试等操作。由于数据类

Java中Switch Case多个条件处理方法举例

《Java中SwitchCase多个条件处理方法举例》Java中switch语句用于根据变量值执行不同代码块,适用于多个条件的处理,:本文主要介绍Java中SwitchCase多个条件处理的相... 目录前言基本语法处理多个条件示例1:合并相同代码的多个case示例2:通过字符串合并多个case进阶用法使用

Java实现优雅日期处理的方案详解

《Java实现优雅日期处理的方案详解》在我们的日常工作中,需要经常处理各种格式,各种类似的的日期或者时间,下面我们就来看看如何使用java处理这样的日期问题吧,感兴趣的小伙伴可以跟随小编一起学习一下... 目录前言一、日期的坑1.1 日期格式化陷阱1.2 时区转换二、优雅方案的进阶之路2.1 线程安全重构2

SpringBoot使用GZIP压缩反回数据问题

《SpringBoot使用GZIP压缩反回数据问题》:本文主要介绍SpringBoot使用GZIP压缩反回数据问题,具有很好的参考价值,希望对大家有所帮助,如有错误或未考虑完全的地方,望不吝赐教... 目录SpringBoot使用GZIP压缩反回数据1、初识gzip2、gzip是什么,可以干什么?3、Spr

Python处理函数调用超时的四种方法

《Python处理函数调用超时的四种方法》在实际开发过程中,我们可能会遇到一些场景,需要对函数的执行时间进行限制,例如,当一个函数执行时间过长时,可能会导致程序卡顿、资源占用过高,因此,在某些情况下,... 目录前言func-timeout1. 安装 func-timeout2. 基本用法自定义进程subp

Java字符串处理全解析(String、StringBuilder与StringBuffer)

《Java字符串处理全解析(String、StringBuilder与StringBuffer)》:本文主要介绍Java字符串处理全解析(String、StringBuilder与StringBu... 目录Java字符串处理全解析:String、StringBuilder与StringBuffer一、St

SpringBoot集成Milvus实现数据增删改查功能

《SpringBoot集成Milvus实现数据增删改查功能》milvus支持的语言比较多,支持python,Java,Go,node等开发语言,本文主要介绍如何使用Java语言,采用springboo... 目录1、Milvus基本概念2、添加maven依赖3、配置yml文件4、创建MilvusClient