NVME Doorbell 寄存器 数据请求时doorbell 处理

2024-05-08 12:12

本文主要是介绍NVME Doorbell 寄存器 数据请求时doorbell 处理,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!

3.NVMe寄存器配置
3.1 寄存器定义
NVMe寄存器主要分为两部分,一部分定义了Controller整体属性,一部分用来存放每组队列的头尾DB寄存器。

CAP——控制器能力,定义了内存页大小的最大最小值、支持的I/O指令集、DB寄存器步长、等待时间界限、仲裁机制、队列是否物理上连续、队列大小;
VS——版本号,定义了控制器实现NVMe协议的版本号;
INTMS——中断掩码,每个bit对应一个中断向量,使用MSI-X中断时,此寄存器无效;
INTMC——中断有效,每个bit对应一个中断向量,使用MSI-X中断时,此寄存器无效;
CC——控制器配置,定义了I/O SQ和CQ队列元素大小、关机状态提醒、仲裁机制、内存页大小、支持的I/O指令集、使能;
CSTS——控制器状态,包括关机状态、控制器致命错误、就绪状态;
AQA——Admin 队列属性,包括SQ大小和CQ大小;
ASQ——Admin SQ基地址;
ACQ——Admin CQ基地址;
1000h之后的寄存器定义了队列的头、尾DB寄存器。
3.2寄存器理解
CAP寄存器标识的是Controller具有多少能力,而CC寄存器则是指当前Controller选择了哪些能力,可以理解为CC是CAP的一个子集;如果重启(reset)的话,可以更换CC配置;
CC.EN置一,表示Controller已经可以开始处理NVM命令,从1到0表示Controller重启;
CC.EN与CSTS.RDY关系密切,CSTS.RDY总是在CC.EN之后由Controller改变,其他不符合执行顺序的操作都将产生未定义的行为;
Admin队列有host直接创建,AQA、ASQ、ACQ三个寄存器标识了Admin队列,而其他I/O队列则有Admin命令创建(eg,创建I/O CQ命令);
Admin队列的头、尾DB寄存器标识为0,其他I/O队列标识由host按照一定规则分配;只有16bit的有效位,是因为队列深度最大64K。
实际的物理设备CAP.DSTRD值为0,dev->db_stride为1,之后分析中默认db_stride为1
                        
原文链接:https://blog.csdn.net/qq_39021670/article/details/114896973

由dev->dbs使用方式可知,每一个DB寄存器对,前4个字节为SQ Tail DB,后四个字节为CQ Head DB

/** Write sq tail if we are asked to, or if the next command would wrap.*/
static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
{if (!write_sq) {u16 next_tail = nvmeq->sq_tail + 1;if (next_tail == nvmeq->q_depth)next_tail = 0;if (next_tail != nvmeq->last_sq_tail)return;}if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))//前4字节写入sq tialwritel(nvmeq->sq_tail, nvmeq->q_db);nvmeq->last_sq_tail = nvmeq->sq_tail;
}
static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
{u16 head = nvmeq->cq_head;//后4字节写入 cq headif (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,nvmeq->dbbuf_cq_ei))writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
}

static irqreturn_t nvme_irq(int irq, void *data)
{struct nvme_queue *nvmeq = data;irqreturn_t ret = IRQ_NONE;u16 start, end;/** The rmb/wmb pair ensures we see all updates from a previous run of* the irq handler, even if that was on another CPU.*/rmb();if (nvmeq->cq_head != nvmeq->last_cq_head)ret = IRQ_HANDLED;//找到当前CQ队列的尾部,并更新cq_headnvme_process_cq(nvmeq, &start, &end, -1);nvmeq->last_cq_head = nvmeq->cq_head;wmb();if (start != end) {// 依次处理CQ队列中的请求nvme_complete_cqes(nvmeq, start, end);return IRQ_HANDLED;}return ret;
}

依次取出ssd 中已经返回的数据,然后写入cq 的head 到Doorbell 寄存器

static inline int nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,u16 *end, unsigned int tag)
{int found = 0;*start = nvmeq->cq_head;while (nvme_cqe_pending(nvmeq)) {if (tag == -1U || nvmeq->cqes[nvmeq->cq_head].command_id == tag)found++;nvme_update_cq_head(nvmeq);}*end = nvmeq->cq_head;if (*start != *end)nvme_ring_cq_doorbell(nvmeq);return found;
}

static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
{u16 head = nvmeq->cq_head;if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,nvmeq->dbbuf_cq_ei))writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
}

依次处理cq 中的数据返回给block 层

static inline void nvme_end_request(struct request *req, __le16 status,union nvme_result result)
{struct nvme_request *rq = nvme_req(req);rq->status = le16_to_cpu(status) >> 1;rq->result = result;/* inject error when permitted by fault injection framework */nvme_should_fail(req);//block 请求返回blk_mq_complete_request(req);
}static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
{volatile struct nvme_completion *cqe = &nvmeq->cqes[idx];struct request *req;/** AEN requests are special as they don't time out and can* survive any kind of queue freeze and often don't respond to* aborts.  We don't even bother to allocate a struct request* for them but rather special case them here.*/if (unlikely(nvmeq->qid == 0 &&cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) {nvme_complete_async_event(&nvmeq->dev->ctrl,cqe->status, &cqe->result);return;}//将通过tag 将reqeust 转换出来req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id);if (unlikely(!req)) {dev_warn(nvmeq->dev->ctrl.device,"invalid id %d completed on queue %d\n",cqe->command_id, le16_to_cpu(cqe->sq_id));return;}trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);nvme_end_request(req, cqe->status, cqe->result);
}static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
{while (start != end) {nvme_handle_cqe(nvmeq, start);if (++start == nvmeq->q_depth)start = 0;}
}

static const struct blk_mq_ops nvme_mq_admin_ops = {.queue_rq       = nvme_queue_rq,.complete       = nvme_pci_complete_rq,.init_hctx      = nvme_admin_init_hctx,.init_request   = nvme_init_request,.timeout        = nvme_timeout,
};static const struct blk_mq_ops nvme_mq_ops = {.queue_rq       = nvme_queue_rq,.complete       = nvme_pci_complete_rq,.commit_rqs     = nvme_commit_rqs,.init_hctx      = nvme_init_hctx,.init_request   = nvme_init_request,.map_queues     = nvme_pci_map_queues,.timeout        = nvme_timeout,.poll           = nvme_poll,
};
 

admin  queue

nvme_queue_rq

io queue 

nvme_queue_rq

nvme_commit_rqs

static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,struct request *rq,blk_qc_t *cookie, bool last)
{struct request_queue *q = rq->q;struct blk_mq_queue_data bd = {.rq = rq,.last = last,};blk_qc_t new_cookie;blk_status_t ret;new_cookie = request_to_qc_t(hctx, rq);/** For OK queue, we are done. For error, caller may kill it.* Any other error (busy), just add it to our list as we* previously would have done.*/ret = q->mq_ops->queue_rq(hctx, &bd);switch (ret) {case BLK_STS_OK:blk_mq_update_dispatch_busy(hctx, false);*cookie = new_cookie;break;case BLK_STS_RESOURCE:case BLK_STS_DEV_RESOURCE:blk_mq_update_dispatch_busy(hctx, true);__blk_mq_requeue_request(rq);break;default:blk_mq_update_dispatch_busy(hctx, false);*cookie = BLK_QC_T_NONE;break;}return ret;
}

*/
bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,bool got_budget)
{struct blk_mq_hw_ctx *hctx;struct request *rq, *nxt;bool no_tag = false;int errors, queued;blk_status_t ret = BLK_STS_OK;bool no_budget_avail = false;if (list_empty(list))return false;WARN_ON(!list_is_singular(list) && got_budget);/** Now process all the entries, sending them to the driver.*/errors = queued = 0;do {struct blk_mq_queue_data bd;rq = list_first_entry(list, struct request, queuelist);hctx = rq->mq_hctx;if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) {blk_mq_put_driver_tag(rq);no_budget_avail = true;break;}if (!blk_mq_get_driver_tag(rq)) {/** The initial allocation attempt failed, so we need to* rerun the hardware queue when a tag is freed. The* waitqueue takes care of that. If the queue is run* before we add this entry back on the dispatch list,* we'll re-run it below.*/if (!blk_mq_mark_tag_wait(hctx, rq)) {blk_mq_put_dispatch_budget(hctx);/** For non-shared tags, the RESTART check* will suffice.*/if (hctx->flags & BLK_MQ_F_TAG_SHARED)no_tag = true;break;}}list_del_init(&rq->queuelist);bd.rq = rq;/** Flag last if we have no more requests, or if we have more* but can't assign a driver tag to it.*/if (list_empty(list))bd.last = true;else {nxt = list_first_entry(list, struct request, queuelist);bd.last = !blk_mq_get_driver_tag(nxt);}//下发ioret = q->mq_ops->queue_rq(hctx, &bd);if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {blk_mq_handle_dev_resource(rq, list);break;}if (unlikely(ret != BLK_STS_OK)) {errors++;blk_mq_end_request(rq, BLK_STS_IOERR);continue;}queued++;} while (!list_empty(list));hctx->dispatched[queued_to_index(queued)]++;/** Any items that need requeuing? Stuff them into hctx->dispatch,* that is where we will continue on next queue run.*/if (!list_empty(list)) {bool needs_restart;/** If we didn't flush the entire list, we could have told* the driver there was more coming, but that turned out to* be a lie.*/if (q->mq_ops->commit_rqs)//nvme io commitq->mq_ops->commit_rqs(hctx);spin_lock(&hctx->lock);list_splice_tail_init(list, &hctx->dispatch);spin_unlock(&hctx->lock);/** Order adding requests to hctx->dispatch and checking* SCHED_RESTART flag. The pair of this smp_mb() is the one* in blk_mq_sched_restart(). Avoid restart code path to* miss the new added requests to hctx->dispatch, meantime* SCHED_RESTART is observed here.*/smp_mb();/** If SCHED_RESTART was set by the caller of this function and* it is no longer set that means that it was cleared by another* thread and hence that a queue rerun is needed.** If 'no_tag' is set, that means that we failed getting* a driver tag with an I/O scheduler attached. If our dispatch* waitqueue is no longer active, ensure that we run the queue* AFTER adding our entries back to the list.** If no I/O scheduler has been configured it is possible that* the hardware queue got stopped and restarted before requests* were pushed back onto the dispatch list. Rerun the queue to* avoid starvation. Notes:* - blk_mq_run_hw_queue() checks whether or not a queue has*   been stopped before rerunning a queue.* - Some but not all block drivers stop a queue before*   returning BLK_STS_RESOURCE. Two exceptions are scsi-mq*   and dm-rq.** If driver returns BLK_STS_RESOURCE and SCHED_RESTART* bit is set, run queue after a delay to avoid IO stalls* that could otherwise occur if the queue is idle.  We'll do* similar if we couldn't get budget and SCHED_RESTART is set.*/needs_restart = blk_mq_sched_needs_restart(hctx);if (!needs_restart ||(no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))blk_mq_run_hw_queue(hctx, true);else if (needs_restart && (ret == BLK_STS_RESOURCE ||no_budget_avail))blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);blk_mq_update_dispatch_busy(hctx, true);return false;} elseblk_mq_update_dispatch_busy(hctx, false);/** If the host/device is unable to accept more work, inform the* caller of that.*/if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)return false;return (queued + errors) != 0;
}

这篇关于NVME Doorbell 寄存器 数据请求时doorbell 处理的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!



http://www.chinasem.cn/article/970278

相关文章

Python将大量遥感数据的值缩放指定倍数的方法(推荐)

《Python将大量遥感数据的值缩放指定倍数的方法(推荐)》本文介绍基于Python中的gdal模块,批量读取大量多波段遥感影像文件,分别对各波段数据加以数值处理,并将所得处理后数据保存为新的遥感影像... 本文介绍基于python中的gdal模块,批量读取大量多波段遥感影像文件,分别对各波段数据加以数值处

使用MongoDB进行数据存储的操作流程

《使用MongoDB进行数据存储的操作流程》在现代应用开发中,数据存储是一个至关重要的部分,随着数据量的增大和复杂性的增加,传统的关系型数据库有时难以应对高并发和大数据量的处理需求,MongoDB作为... 目录什么是MongoDB?MongoDB的优势使用MongoDB进行数据存储1. 安装MongoDB

C#使用HttpClient进行Post请求出现超时问题的解决及优化

《C#使用HttpClient进行Post请求出现超时问题的解决及优化》最近我的控制台程序发现有时候总是出现请求超时等问题,通常好几分钟最多只有3-4个请求,在使用apipost发现并发10个5分钟也... 目录优化结论单例HttpClient连接池耗尽和并发并发异步最终优化后优化结论我直接上优化结论吧,

Python MySQL如何通过Binlog获取变更记录恢复数据

《PythonMySQL如何通过Binlog获取变更记录恢复数据》本文介绍了如何使用Python和pymysqlreplication库通过MySQL的二进制日志(Binlog)获取数据库的变更记录... 目录python mysql通过Binlog获取变更记录恢复数据1.安装pymysqlreplicat

Linux使用dd命令来复制和转换数据的操作方法

《Linux使用dd命令来复制和转换数据的操作方法》Linux中的dd命令是一个功能强大的数据复制和转换实用程序,它以较低级别运行,通常用于创建可启动的USB驱动器、克隆磁盘和生成随机数据等任务,本文... 目录简介功能和能力语法常用选项示例用法基础用法创建可启动www.chinasem.cn的 USB 驱动

Oracle数据库使用 listagg去重删除重复数据的方法汇总

《Oracle数据库使用listagg去重删除重复数据的方法汇总》文章介绍了在Oracle数据库中使用LISTAGG和XMLAGG函数进行字符串聚合并去重的方法,包括去重聚合、使用XML解析和CLO... 目录案例表第一种:使用wm_concat() + distinct去重聚合第二种:使用listagg,

Go语言使用Buffer实现高性能处理字节和字符

《Go语言使用Buffer实现高性能处理字节和字符》在Go中,bytes.Buffer是一个非常高效的类型,用于处理字节数据的读写操作,本文将详细介绍一下如何使用Buffer实现高性能处理字节和... 目录1. bytes.Buffer 的基本用法1.1. 创建和初始化 Buffer1.2. 使用 Writ

Java后端接口中提取请求头中的Cookie和Token的方法

《Java后端接口中提取请求头中的Cookie和Token的方法》在现代Web开发中,HTTP请求头(Header)是客户端与服务器之间传递信息的重要方式之一,本文将详细介绍如何在Java后端(以Sp... 目录引言1. 背景1.1 什么是 HTTP 请求头?1.2 为什么需要提取请求头?2. 使用 Spr

Python实现将实体类列表数据导出到Excel文件

《Python实现将实体类列表数据导出到Excel文件》在数据处理和报告生成中,将实体类的列表数据导出到Excel文件是一项常见任务,Python提供了多种库来实现这一目标,下面就来跟随小编一起学习一... 目录一、环境准备二、定义实体类三、创建实体类列表四、将实体类列表转换为DataFrame五、导出Da

Python视频处理库VidGear使用小结

《Python视频处理库VidGear使用小结》VidGear是一个高性能的Python视频处理库,本文主要介绍了Python视频处理库VidGear使用小结,文中通过示例代码介绍的非常详细,对大家的... 目录一、VidGear的安装二、VidGear的主要功能三、VidGear的使用示例四、VidGea