1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation. All rights reserved.
5 * Copyright (c) 2017, Western Digital Corporation or its affiliates.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include "nvme_internal.h"
35
36 struct nvme_qpair_string {
37 uint16_t value;
38 const char *str;
39 };
40
41 static const struct nvme_qpair_string admin_opcode[] = {
42 { NVME_OPC_DELETE_IO_SQ, "DELETE IO SQ" },
43 { NVME_OPC_CREATE_IO_SQ, "CREATE IO SQ" },
44 { NVME_OPC_GET_LOG_PAGE, "GET LOG PAGE" },
45 { NVME_OPC_DELETE_IO_CQ, "DELETE IO CQ" },
46 { NVME_OPC_CREATE_IO_CQ, "CREATE IO CQ" },
47 { NVME_OPC_IDENTIFY, "IDENTIFY" },
48 { NVME_OPC_ABORT, "ABORT" },
49 { NVME_OPC_SET_FEATURES, "SET FEATURES" },
50 { NVME_OPC_GET_FEATURES, "GET FEATURES" },
51 { NVME_OPC_ASYNC_EVENT_REQUEST, "ASYNC EVENT REQUEST" },
52 { NVME_OPC_NS_MANAGEMENT, "NAMESPACE MANAGEMENT" },
53 { NVME_OPC_FIRMWARE_COMMIT, "FIRMWARE COMMIT" },
54 { NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD, "FIRMWARE IMAGE DOWNLOAD" },
55 { NVME_OPC_NS_ATTACHMENT, "NAMESPACE ATTACHMENT" },
56 { NVME_OPC_FORMAT_NVM, "FORMAT NVM" },
57 { NVME_OPC_SECURITY_SEND, "SECURITY SEND" },
58 { NVME_OPC_SECURITY_RECEIVE, "SECURITY RECEIVE" },
59 { 0xFFFF, "ADMIN COMMAND" }
60 };
61
62 static const struct nvme_qpair_string io_opcode[] = {
63 { NVME_OPC_FLUSH, "FLUSH" },
64 { NVME_OPC_WRITE, "WRITE" },
65 { NVME_OPC_READ, "READ" },
66 { NVME_OPC_WRITE_UNCORRECTABLE, "WRITE UNCORRECTABLE" },
67 { NVME_OPC_COMPARE, "COMPARE" },
68 { NVME_OPC_WRITE_ZEROES, "WRITE ZEROES" },
69 { NVME_OPC_DATASET_MANAGEMENT, "DATASET MANAGEMENT" },
70 { NVME_OPC_RESERVATION_REGISTER, "RESERVATION REGISTER" },
71 { NVME_OPC_RESERVATION_REPORT, "RESERVATION REPORT" },
72 { NVME_OPC_RESERVATION_ACQUIRE, "RESERVATION ACQUIRE" },
73 { NVME_OPC_RESERVATION_RELEASE, "RESERVATION RELEASE" },
74 { 0xFFFF, "IO COMMAND" }
75 };
76
77 static const struct nvme_qpair_string generic_status[] = {
78 { NVME_SC_SUCCESS, "SUCCESS" },
79 { NVME_SC_INVALID_OPCODE, "INVALID OPCODE" },
80 { NVME_SC_INVALID_FIELD, "INVALID FIELD" },
81 { NVME_SC_COMMAND_ID_CONFLICT, "COMMAND ID CONFLICT" },
82 { NVME_SC_DATA_TRANSFER_ERROR, "DATA TRANSFER ERROR" },
83 { NVME_SC_ABORTED_POWER_LOSS, "ABORTED - POWER LOSS" },
84 { NVME_SC_INTERNAL_DEVICE_ERROR, "INTERNAL DEVICE ERROR" },
85 { NVME_SC_ABORTED_BY_REQUEST, "ABORTED - BY REQUEST" },
86 { NVME_SC_ABORTED_SQ_DELETION, "ABORTED - SQ DELETION" },
87 { NVME_SC_ABORTED_FAILED_FUSED, "ABORTED - FAILED FUSED" },
88 { NVME_SC_ABORTED_MISSING_FUSED, "ABORTED - MISSING FUSED" },
89 { NVME_SC_INVALID_NAMESPACE_OR_FORMAT, "INVALID NAMESPACE OR FORMAT" },
90 { NVME_SC_COMMAND_SEQUENCE_ERROR, "COMMAND SEQUENCE ERROR" },
91 { NVME_SC_INVALID_SGL_SEG_DESCRIPTOR, "INVALID SGL SEGMENT DESCRIPTOR" },
92 { NVME_SC_INVALID_NUM_SGL_DESCIRPTORS, "INVALID NUMBER OF SGL DESCRIPTORS" },
93 { NVME_SC_DATA_SGL_LENGTH_INVALID, "DATA SGL LENGTH INVALID" },
94 { NVME_SC_METADATA_SGL_LENGTH_INVALID, "METADATA SGL LENGTH INVALID" },
95 { NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID, "SGL DESCRIPTOR TYPE INVALID" },
96 { NVME_SC_INVALID_CONTROLLER_MEM_BUF, "INVALID CONTROLLER MEMORY BUFFER" },
97 { NVME_SC_INVALID_PRP_OFFSET, "INVALID PRP OFFSET" },
98 { NVME_SC_ATOMIC_WRITE_UNIT_EXCEEDED, "ATOMIC WRITE UNIT EXCEEDED" },
99 { NVME_SC_LBA_OUT_OF_RANGE, "LBA OUT OF RANGE" },
100 { NVME_SC_CAPACITY_EXCEEDED, "CAPACITY EXCEEDED" },
101 { NVME_SC_NAMESPACE_NOT_READY, "NAMESPACE NOT READY" },
102 { NVME_SC_RESERVATION_CONFLICT, "RESERVATION CONFLICT" },
103 { NVME_SC_FORMAT_IN_PROGRESS, "FORMAT IN PROGRESS" },
104 { 0xFFFF, "GENERIC" }
105 };
106
107 static const struct nvme_qpair_string command_specific_status[] = {
108 { NVME_SC_COMPLETION_QUEUE_INVALID, "INVALID COMPLETION QUEUE" },
109 { NVME_SC_INVALID_QUEUE_IDENTIFIER, "INVALID QUEUE IDENTIFIER" },
110 { NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED, "MAX QUEUE SIZE EXCEEDED" },
111 { NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED, "ABORT CMD LIMIT EXCEEDED" },
112 { NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED,"ASYNC LIMIT EXCEEDED" },
113 { NVME_SC_INVALID_FIRMWARE_SLOT, "INVALID FIRMWARE SLOT" },
114 { NVME_SC_INVALID_FIRMWARE_IMAGE, "INVALID FIRMWARE IMAGE" },
115 { NVME_SC_INVALID_INTERRUPT_VECTOR, "INVALID INTERRUPT VECTOR" },
116 { NVME_SC_INVALID_LOG_PAGE, "INVALID LOG PAGE" },
117 { NVME_SC_INVALID_FORMAT, "INVALID FORMAT" },
118 { NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET,"FIRMWARE REQUIRES CONVENTIONAL RESET" },
119 { NVME_SC_INVALID_QUEUE_DELETION, "INVALID QUEUE DELETION" },
120 { NVME_SC_FEATURE_ID_NOT_SAVEABLE, "FEATURE ID NOT SAVEABLE" },
121 { NVME_SC_FEATURE_NOT_CHANGEABLE, "FEATURE NOT CHANGEABLE" },
122 { NVME_SC_FEATURE_NOT_NAMESPACE_SPECIFIC,"FEATURE NOT NAMESPACE SPECIFIC" },
123 { NVME_SC_FIRMWARE_REQ_NVM_RESET, "FIRMWARE REQUIRES NVM RESET" },
124 { NVME_SC_FIRMWARE_REQ_RESET, "FIRMWARE REQUIRES RESET" },
125 { NVME_SC_FIRMWARE_REQ_MAX_TIME_VIOLATION,"FIRMWARE REQUIRES MAX TIME VIOLATION" },
126 { NVME_SC_FIRMWARE_ACTIVATION_PROHIBITED,"FIRMWARE ACTIVATION PROHIBITED" },
127 { NVME_SC_OVERLAPPING_RANGE, "OVERLAPPING RANGE" },
128 { NVME_SC_NAMESPACE_INSUFFICIENT_CAPACITY,"NAMESPACE INSUFFICIENT CAPACITY" },
129 { NVME_SC_NAMESPACE_ID_UNAVAILABLE, "NAMESPACE ID UNAVAILABLE" },
130 { NVME_SC_NAMESPACE_ALREADY_ATTACHED, "NAMESPACE ALREADY ATTACHED" },
131 { NVME_SC_NAMESPACE_IS_PRIVATE, "NAMESPACE IS PRIVATE" },
132 { NVME_SC_NAMESPACE_NOT_ATTACHED, "NAMESPACE NOT ATTACHED" },
133 { NVME_SC_THINPROVISIONING_NOT_SUPPORTED,"THINPROVISIONING NOT SUPPORTED" },
134 { NVME_SC_CONTROLLER_LIST_INVALID, "CONTROLLER LIST INVALID" },
135 { NVME_SC_CONFLICTING_ATTRIBUTES, "CONFLICTING ATTRIBUTES" },
136 { NVME_SC_INVALID_PROTECTION_INFO, "INVALID PROTECTION INFO" },
137 { NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE, "WRITE TO RO PAGE" },
138 { 0xFFFF, "COMMAND SPECIFIC" }
139 };
140
141 static const struct nvme_qpair_string media_error_status[] = {
142 { NVME_SC_WRITE_FAULTS, "WRITE FAULTS" },
143 { NVME_SC_UNRECOVERED_READ_ERROR, "UNRECOVERED READ ERROR" },
144 { NVME_SC_GUARD_CHECK_ERROR, "GUARD CHECK ERROR" },
145 { NVME_SC_APPLICATION_TAG_CHECK_ERROR, "APPLICATION TAG CHECK ERROR" },
146 { NVME_SC_REFERENCE_TAG_CHECK_ERROR, "REFERENCE TAG CHECK ERROR" },
147 { NVME_SC_COMPARE_FAILURE, "COMPARE FAILURE" },
148 { NVME_SC_ACCESS_DENIED, "ACCESS DENIED" },
149 { NVME_SC_DEALLOCATED_OR_UNWRITTEN_BLOCK, "DEALLOCATED OR UNWRITTEN BLOCK" },
150 { 0xFFFF, "MEDIA ERROR" }
151 };
152
nvme_qpair_is_admin_queue(struct nvme_qpair * qpair)153 static inline bool nvme_qpair_is_admin_queue(struct nvme_qpair *qpair)
154 {
155 return qpair->id == 0;
156 }
157
nvme_qpair_is_io_queue(struct nvme_qpair * qpair)158 static inline bool nvme_qpair_is_io_queue(struct nvme_qpair *qpair)
159 {
160 return qpair->id != 0;
161 }
162
nvme_qpair_get_string(const struct nvme_qpair_string * strings,uint16_t value)163 static const char*nvme_qpair_get_string(const struct nvme_qpair_string *strings,
164 uint16_t value)
165 {
166 const struct nvme_qpair_string *entry;
167
168 entry = strings;
169
170 while (entry->value != 0xFFFF) {
171 if (entry->value == value)
172 return entry->str;
173 entry++;
174 }
175 return entry->str;
176 }
177
nvme_qpair_admin_qpair_print_command(struct nvme_qpair * qpair,struct nvme_cmd * cmd)178 static void nvme_qpair_admin_qpair_print_command(struct nvme_qpair *qpair,
179 struct nvme_cmd *cmd)
180 {
181 nvme_info("%s (%02x) sqid:%d cid:%d nsid:%x cdw10:%08x cdw11:%08x\n",
182 nvme_qpair_get_string(admin_opcode, cmd->opc), cmd->opc,
183 qpair->id, cmd->cid,
184 cmd->nsid, cmd->cdw10, cmd->cdw11);
185 }
186
nvme_qpair_io_qpair_print_command(struct nvme_qpair * qpair,struct nvme_cmd * cmd)187 static void nvme_qpair_io_qpair_print_command(struct nvme_qpair *qpair,
188 struct nvme_cmd *cmd)
189 {
190 nvme_assert(qpair != NULL, "print_command: qpair == NULL\n");
191 nvme_assert(cmd != NULL, "print_command: cmd == NULL\n");
192
193 switch ((int)cmd->opc) {
194 case NVME_OPC_WRITE:
195 case NVME_OPC_READ:
196 case NVME_OPC_WRITE_UNCORRECTABLE:
197 case NVME_OPC_COMPARE:
198 nvme_info("%s sqid:%d cid:%d nsid:%d lba:%llu len:%d\n",
199 nvme_qpair_get_string(io_opcode, cmd->opc),
200 qpair->id, cmd->cid, cmd->nsid,
201 ((unsigned long long)cmd->cdw11 << 32) + cmd->cdw10,
202 (cmd->cdw12 & 0xFFFF) + 1);
203 break;
204 case NVME_OPC_FLUSH:
205 case NVME_OPC_DATASET_MANAGEMENT:
206 nvme_info("%s sqid:%d cid:%d nsid:%d\n",
207 nvme_qpair_get_string(io_opcode, cmd->opc),
208 qpair->id, cmd->cid, cmd->nsid);
209 break;
210 default:
211 nvme_info("%s (%02x) sqid:%d cid:%d nsid:%d\n",
212 nvme_qpair_get_string(io_opcode, cmd->opc),
213 cmd->opc, qpair->id, cmd->cid, cmd->nsid);
214 break;
215 }
216 }
217
nvme_qpair_print_command(struct nvme_qpair * qpair,struct nvme_cmd * cmd)218 static void nvme_qpair_print_command(struct nvme_qpair *qpair,
219 struct nvme_cmd *cmd)
220 {
221 nvme_assert(qpair != NULL, "qpair can not be NULL");
222 nvme_assert(cmd != NULL, "cmd can not be NULL");
223
224 if (nvme_qpair_is_admin_queue(qpair))
225 return nvme_qpair_admin_qpair_print_command(qpair, cmd);
226
227 return nvme_qpair_io_qpair_print_command(qpair, cmd);
228 }
229
get_status_string(uint16_t sct,uint16_t sc)230 static const char *get_status_string(uint16_t sct, uint16_t sc)
231 {
232 const struct nvme_qpair_string *entry;
233
234 switch (sct) {
235 case NVME_SCT_GENERIC:
236 entry = generic_status;
237 break;
238 case NVME_SCT_COMMAND_SPECIFIC:
239 entry = command_specific_status;
240 break;
241 case NVME_SCT_MEDIA_ERROR:
242 entry = media_error_status;
243 break;
244 case NVME_SCT_VENDOR_SPECIFIC:
245 return "VENDOR SPECIFIC";
246 default:
247 return "RESERVED";
248 }
249
250 return nvme_qpair_get_string(entry, sc);
251 }
252
nvme_qpair_print_completion(struct nvme_qpair * qpair,struct nvme_cpl * cpl)253 static void nvme_qpair_print_completion(struct nvme_qpair *qpair,
254 struct nvme_cpl *cpl)
255 {
256 nvme_info("Cpl: %s (%02x/%02x) sqid:%d cid:%d "
257 "cdw0:%x sqhd:%04x p:%x m:%x dnr:%x\n",
258 get_status_string(cpl->status.sct, cpl->status.sc),
259 cpl->status.sct,
260 cpl->status.sc,
261 cpl->sqid,
262 cpl->cid,
263 cpl->cdw0,
264 cpl->sqhd,
265 cpl->status.p,
266 cpl->status.m,
267 cpl->status.dnr);
268 }
269
nvme_qpair_completion_retry(const struct nvme_cpl * cpl)270 static bool nvme_qpair_completion_retry(const struct nvme_cpl *cpl)
271 {
272 /*
273 * TODO: spec is not clear how commands that are aborted due
274 * to TLER will be marked. So for now, it seems
275 * NAMESPACE_NOT_READY is the only case where we should
276 * look at the DNR bit.
277 */
278 switch ((int)cpl->status.sct) {
279 case NVME_SCT_GENERIC:
280 switch ((int)cpl->status.sc) {
281 case NVME_SC_NAMESPACE_NOT_READY:
282 case NVME_SC_FORMAT_IN_PROGRESS:
283 if (cpl->status.dnr)
284 return false;
285 return true;
286 case NVME_SC_INVALID_OPCODE:
287 case NVME_SC_INVALID_FIELD:
288 case NVME_SC_COMMAND_ID_CONFLICT:
289 case NVME_SC_DATA_TRANSFER_ERROR:
290 case NVME_SC_ABORTED_POWER_LOSS:
291 case NVME_SC_INTERNAL_DEVICE_ERROR:
292 case NVME_SC_ABORTED_BY_REQUEST:
293 case NVME_SC_ABORTED_SQ_DELETION:
294 case NVME_SC_ABORTED_FAILED_FUSED:
295 case NVME_SC_ABORTED_MISSING_FUSED:
296 case NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
297 case NVME_SC_COMMAND_SEQUENCE_ERROR:
298 case NVME_SC_LBA_OUT_OF_RANGE:
299 case NVME_SC_CAPACITY_EXCEEDED:
300 default:
301 return false;
302 }
303 case NVME_SCT_COMMAND_SPECIFIC:
304 case NVME_SCT_MEDIA_ERROR:
305 case NVME_SCT_VENDOR_SPECIFIC:
306 default:
307 return false;
308 }
309 }
310
nvme_qpair_construct_tracker(struct nvme_tracker * tr,uint16_t cid,uint64_t phys_addr)311 static void nvme_qpair_construct_tracker(struct nvme_tracker *tr,
312 uint16_t cid, uint64_t phys_addr)
313 {
314 tr->prp_sgl_bus_addr = phys_addr + offsetof(struct nvme_tracker, u.prp);
315 tr->cid = cid;
316 tr->active = false;
317 }
318
nvme_qpair_copy_command(struct nvme_cmd * dst,const struct nvme_cmd * src)319 static inline void nvme_qpair_copy_command(struct nvme_cmd *dst,
320 const struct nvme_cmd *src)
321 {
322 /* dst and src are known to be non-overlapping and 64-byte aligned. */
323 #if defined(__AVX__)
324 __m256i *d256 = (__m256i *)dst;
325 const __m256i *s256 = (const __m256i *)src;
326
327 _mm256_store_si256(&d256[0], _mm256_load_si256(&s256[0]));
328 _mm256_store_si256(&d256[1], _mm256_load_si256(&s256[1]));
329 #elif defined(__SSE2__)
330 __m128i *d128 = (__m128i *)dst;
331 const __m128i *s128 = (const __m128i *)src;
332
333 _mm_store_si128(&d128[0], _mm_load_si128(&s128[0]));
334 _mm_store_si128(&d128[1], _mm_load_si128(&s128[1]));
335 _mm_store_si128(&d128[2], _mm_load_si128(&s128[2]));
336 _mm_store_si128(&d128[3], _mm_load_si128(&s128[3]));
337 #else
338 *dst = *src;
339 #endif
340 }
341
nvme_qpair_submit_tracker(struct nvme_qpair * qpair,struct nvme_tracker * tr)342 static void nvme_qpair_submit_tracker(struct nvme_qpair *qpair,
343 struct nvme_tracker *tr)
344 {
345 struct nvme_request *req = tr->req;
346
347 /*
348 * Set the tracker active and copy its command
349 * to the submission queue.
350 */
351 nvme_debug("qpair %d: Submit command, tail %d, cid %d / %d\n",
352 qpair->id,
353 (int)qpair->sq_tail,
354 (int)tr->cid,
355 (int)tr->req->cmd.cid);
356
357 qpair->tr[tr->cid].active = true;
358 nvme_qpair_copy_command(&qpair->cmd[qpair->sq_tail], &req->cmd);
359
360 if (++qpair->sq_tail == qpair->entries)
361 qpair->sq_tail = 0;
362
363 nvme_wmb();
364 nvme_mmio_write_4(qpair->sq_tdbl, qpair->sq_tail);
365 }
366
nvme_qpair_complete_tracker(struct nvme_qpair * qpair,struct nvme_tracker * tr,struct nvme_cpl * cpl,bool print_on_error)367 static void nvme_qpair_complete_tracker(struct nvme_qpair *qpair,
368 struct nvme_tracker *tr,
369 struct nvme_cpl *cpl,
370 bool print_on_error)
371 {
372 struct nvme_request *req = tr->req;
373 bool retry, error;
374
375 if (!req) {
376 nvme_crit("tracker has no request\n");
377 qpair->tr[cpl->cid].active = false;
378 goto done;
379 }
380
381 error = nvme_cpl_is_error(cpl);
382 retry = error && nvme_qpair_completion_retry(cpl) &&
383 (req->retries < NVME_MAX_RETRY_COUNT);
384 if (error && print_on_error) {
385 nvme_qpair_print_command(qpair, &req->cmd);
386 nvme_qpair_print_completion(qpair, cpl);
387 }
388
389 qpair->tr[cpl->cid].active = false;
390
391 if (cpl->cid != req->cmd.cid)
392 nvme_crit("cpl and command CID mismatch (%d / %d)\n",
393 (int)cpl->cid, (int)req->cmd.cid);
394
395 if (retry) {
396 req->retries++;
397 nvme_qpair_submit_tracker(qpair, tr);
398 return;
399 }
400
401 if (req->cb_fn)
402 req->cb_fn(req->cb_arg, cpl);
403
404 nvme_request_free_locked(req);
405
406 done:
407 tr->req = NULL;
408
409 LIST_REMOVE(tr, list);
410 LIST_INSERT_HEAD(&qpair->free_tr, tr, list);
411 }
412
nvme_qpair_submit_queued_requests(struct nvme_qpair * qpair)413 static void nvme_qpair_submit_queued_requests(struct nvme_qpair *qpair)
414 {
415 STAILQ_HEAD(, nvme_request) req_queue;
416 STAILQ_INIT(&req_queue);
417
418 pthread_mutex_lock(&qpair->lock);
419
420 STAILQ_CONCAT(&req_queue, &qpair->queued_req);
421
422 /*
423 * If the controller is in the middle of a reset, don't
424 * try to submit queued requests - let the reset logic
425 * handle that instead.
426 */
427 while (!qpair->ctrlr->resetting && LIST_FIRST(&qpair->free_tr)
428 && !STAILQ_EMPTY(&req_queue)) {
429 struct nvme_request *req = STAILQ_FIRST(&req_queue);
430 STAILQ_REMOVE_HEAD(&req_queue, stailq);
431
432 pthread_mutex_unlock(&qpair->lock);
433 nvme_qpair_submit_request(qpair, req);
434 pthread_mutex_lock(&qpair->lock);
435 }
436
437 STAILQ_CONCAT(&qpair->queued_req, &req_queue);
438
439 pthread_mutex_unlock(&qpair->lock);
440 }
441
nvme_qpair_manual_complete_tracker(struct nvme_qpair * qpair,struct nvme_tracker * tr,uint32_t sct,uint32_t sc,uint32_t dnr,bool print_on_error)442 static void nvme_qpair_manual_complete_tracker(struct nvme_qpair *qpair,
443 struct nvme_tracker *tr,
444 uint32_t sct,
445 uint32_t sc,
446 uint32_t dnr,
447 bool print_on_error)
448 {
449 struct nvme_cpl cpl;
450
451 memset(&cpl, 0, sizeof(cpl));
452 cpl.sqid = qpair->id;
453 cpl.cid = tr->cid;
454 cpl.status.sct = sct;
455 cpl.status.sc = sc;
456 cpl.status.dnr = dnr;
457
458 nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error);
459 }
460
nvme_qpair_manual_complete_request(struct nvme_qpair * qpair,struct nvme_request * req,uint32_t sct,uint32_t sc,bool print_on_error)461 static void nvme_qpair_manual_complete_request(struct nvme_qpair *qpair,
462 struct nvme_request *req,
463 uint32_t sct, uint32_t sc,
464 bool print_on_error)
465 {
466 struct nvme_cpl cpl;
467 bool error;
468
469 memset(&cpl, 0, sizeof(cpl));
470 cpl.sqid = qpair->id;
471 cpl.status.sct = sct;
472 cpl.status.sc = sc;
473
474 error = nvme_cpl_is_error(&cpl);
475
476 if (error && print_on_error) {
477 nvme_qpair_print_command(qpair, &req->cmd);
478 nvme_qpair_print_completion(qpair, &cpl);
479 }
480
481 if (req->cb_fn)
482 req->cb_fn(req->cb_arg, &cpl);
483
484 nvme_request_free_locked(req);
485 }
486
nvme_qpair_abort_aers(struct nvme_qpair * qpair)487 static void nvme_qpair_abort_aers(struct nvme_qpair *qpair)
488 {
489 struct nvme_tracker *tr;
490
491 tr = LIST_FIRST(&qpair->outstanding_tr);
492 while (tr != NULL) {
493 nvme_assert(tr->req != NULL,
494 "tr->req == NULL in abort_aers\n");
495 if (tr->req->cmd.opc == NVME_OPC_ASYNC_EVENT_REQUEST) {
496 nvme_qpair_manual_complete_tracker(qpair, tr,
497 NVME_SCT_GENERIC,
498 NVME_SC_ABORTED_SQ_DELETION,
499 0, false);
500 tr = LIST_FIRST(&qpair->outstanding_tr);
501 continue;
502 }
503 tr = LIST_NEXT(tr, list);
504 }
505 }
506
_nvme_qpair_admin_qpair_destroy(struct nvme_qpair * qpair)507 static inline void _nvme_qpair_admin_qpair_destroy(struct nvme_qpair *qpair)
508 {
509 nvme_qpair_abort_aers(qpair);
510 }
511
_nvme_qpair_req_bad_phys(struct nvme_qpair * qpair,struct nvme_tracker * tr)512 static inline void _nvme_qpair_req_bad_phys(struct nvme_qpair *qpair,
513 struct nvme_tracker *tr)
514 {
515 /*
516 * Bad vtophys translation, so abort this request
517 * and return immediately, without retry.
518 */
519 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
520 NVME_SC_INVALID_FIELD,
521 1, true);
522 }
523
524 /*
525 * Build PRP list describing physically contiguous payload buffer.
526 */
_nvme_qpair_build_contig_request(struct nvme_qpair * qpair,struct nvme_request * req,struct nvme_tracker * tr)527 static int _nvme_qpair_build_contig_request(struct nvme_qpair *qpair,
528 struct nvme_request *req,
529 struct nvme_tracker *tr)
530 {
531 uint64_t phys_addr;
532 void *seg_addr;
533 uint32_t nseg, cur_nseg, modulo, unaligned;
534 void *md_payload;
535 void *payload = req->payload.u.contig + req->payload_offset;
536
537 phys_addr = nvme_mem_vtophys(payload);
538 if (phys_addr == NVME_VTOPHYS_ERROR) {
539 _nvme_qpair_req_bad_phys(qpair, tr);
540 return -1;
541 }
542 nseg = req->payload_size >> PAGE_SHIFT;
543 modulo = req->payload_size & (PAGE_SIZE - 1);
544 unaligned = phys_addr & (PAGE_SIZE - 1);
545 if (modulo || unaligned)
546 nseg += 1 + ((modulo + unaligned - 1) >> PAGE_SHIFT);
547
548 if (req->payload.md) {
549 md_payload = req->payload.md + req->md_offset;
550 tr->req->cmd.mptr = nvme_mem_vtophys(md_payload);
551 if (tr->req->cmd.mptr == NVME_VTOPHYS_ERROR) {
552 _nvme_qpair_req_bad_phys(qpair, tr);
553 return -1;
554 }
555 }
556
557 tr->req->cmd.psdt = NVME_PSDT_PRP;
558 tr->req->cmd.dptr.prp.prp1 = phys_addr;
559 if (nseg == 2) {
560 seg_addr = payload + PAGE_SIZE - unaligned;
561 tr->req->cmd.dptr.prp.prp2 = nvme_mem_vtophys(seg_addr);
562 } else if (nseg > 2) {
563 cur_nseg = 1;
564 tr->req->cmd.dptr.prp.prp2 = (uint64_t)tr->prp_sgl_bus_addr;
565 while (cur_nseg < nseg) {
566 seg_addr = payload + cur_nseg * PAGE_SIZE - unaligned;
567 phys_addr = nvme_mem_vtophys(seg_addr);
568 if (phys_addr == NVME_VTOPHYS_ERROR) {
569 _nvme_qpair_req_bad_phys(qpair, tr);
570 return -1;
571 }
572 tr->u.prp[cur_nseg - 1] = phys_addr;
573 cur_nseg++;
574 }
575 }
576
577 return 0;
578 }
579
580 /*
581 * Build SGL list describing scattered payload buffer.
582 */
_nvme_qpair_build_hw_sgl_request(struct nvme_qpair * qpair,struct nvme_request * req,struct nvme_tracker * tr)583 static int _nvme_qpair_build_hw_sgl_request(struct nvme_qpair *qpair,
584 struct nvme_request *req,
585 struct nvme_tracker *tr)
586 {
587 struct nvme_sgl_descriptor *sgl;
588 uint64_t phys_addr;
589 uint32_t remaining_transfer_len, length, nseg = 0;
590 int ret;
591
592 /*
593 * Build scattered payloads.
594 */
595 nvme_assert(req->payload_size != 0,
596 "cannot build SGL for zero-length transfer\n");
597 nvme_assert(req->payload.type == NVME_PAYLOAD_TYPE_SGL,
598 "sgl payload type required\n");
599 nvme_assert(req->payload.u.sgl.reset_sgl_fn != NULL,
600 "sgl reset callback required\n");
601 nvme_assert(req->payload.u.sgl.next_sge_fn != NULL,
602 "sgl callback required\n");
603 req->payload.u.sgl.reset_sgl_fn(req->payload.u.sgl.cb_arg,
604 req->payload_offset);
605
606 sgl = tr->u.sgl;
607 req->cmd.psdt = NVME_PSDT_SGL_MPTR_SGL;
608 req->cmd.dptr.sgl1.unkeyed.subtype = 0;
609
610 remaining_transfer_len = req->payload_size;
611
612 while (remaining_transfer_len > 0) {
613
614 if (nseg >= NVME_MAX_SGL_DESCRIPTORS) {
615 _nvme_qpair_req_bad_phys(qpair, tr);
616 return -1;
617 }
618
619 ret = req->payload.u.sgl.next_sge_fn(req->payload.u.sgl.cb_arg,
620 &phys_addr, &length);
621 if (ret != 0) {
622 _nvme_qpair_req_bad_phys(qpair, tr);
623 return ret;
624 }
625
626 length = nvme_min(remaining_transfer_len, length);
627 remaining_transfer_len -= length;
628
629 sgl->unkeyed.type = NVME_SGL_TYPE_DATA_BLOCK;
630 sgl->unkeyed.length = length;
631 sgl->address = phys_addr;
632 sgl->unkeyed.subtype = 0;
633
634 sgl++;
635 nseg++;
636
637 }
638
639 if (nseg == 1) {
640 /*
641 * The whole transfer can be described by a single Scatter
642 * Gather List descriptor. Use the special case described
643 * by the spec where SGL1's type is Data Block.
644 * This means the SGL in the tracker is not used at all,
645 * so copy the first (and only) SGL element into SGL1.
646 */
647 req->cmd.dptr.sgl1.unkeyed.type = NVME_SGL_TYPE_DATA_BLOCK;
648 req->cmd.dptr.sgl1.address = tr->u.sgl[0].address;
649 req->cmd.dptr.sgl1.unkeyed.length = tr->u.sgl[0].unkeyed.length;
650 } else {
651 /* For now we only support 1 SGL segment in NVMe controller */
652 req->cmd.dptr.sgl1.unkeyed.type = NVME_SGL_TYPE_LAST_SEGMENT;
653 req->cmd.dptr.sgl1.address = tr->prp_sgl_bus_addr;
654 req->cmd.dptr.sgl1.unkeyed.length =
655 nseg * sizeof(struct nvme_sgl_descriptor);
656 }
657
658 return 0;
659 }
660
661 /*
662 * Build Physical Region Page list describing scattered payload buffer.
663 */
_nvme_qpair_build_prps_sgl_request(struct nvme_qpair * qpair,struct nvme_request * req,struct nvme_tracker * tr)664 static int _nvme_qpair_build_prps_sgl_request(struct nvme_qpair *qpair,
665 struct nvme_request *req,
666 struct nvme_tracker *tr)
667 {
668 uint64_t phys_addr, prp2 = 0;
669 uint32_t data_transferred, remaining_transfer_len, length;
670 uint32_t nseg, cur_nseg, total_nseg = 0, last_nseg = 0;
671 uint32_t modulo, unaligned, sge_count = 0;
672 int ret;
673
674 /*
675 * Build scattered payloads.
676 */
677 nvme_assert(req->payload.type == NVME_PAYLOAD_TYPE_SGL,
678 "sgl payload type required\n");
679 nvme_assert(req->payload.u.sgl.reset_sgl_fn != NULL,
680 "sgl reset callback required\n");
681 req->payload.u.sgl.reset_sgl_fn(req->payload.u.sgl.cb_arg,
682 req->payload_offset);
683
684 remaining_transfer_len = req->payload_size;
685
686 while (remaining_transfer_len > 0) {
687
688 nvme_assert(req->payload.u.sgl.next_sge_fn != NULL,
689 "sgl callback required\n");
690
691 ret = req->payload.u.sgl.next_sge_fn(req->payload.u.sgl.cb_arg,
692 &phys_addr, &length);
693 if (ret != 0) {
694 _nvme_qpair_req_bad_phys(qpair, tr);
695 return -1;
696 }
697
698 nvme_assert((phys_addr & 0x3) == 0, "address must be dword aligned\n");
699 nvme_assert((length >= remaining_transfer_len) || ((phys_addr + length) % PAGE_SIZE) == 0,
700 "All SGEs except last must end on a page boundary\n");
701 nvme_assert((sge_count == 0) || (phys_addr % PAGE_SIZE) == 0,
702 "All SGEs except first must start on a page boundary\n");
703
704 data_transferred = nvme_min(remaining_transfer_len, length);
705
706 nseg = data_transferred >> PAGE_SHIFT;
707 modulo = data_transferred & (PAGE_SIZE - 1);
708 unaligned = phys_addr & (PAGE_SIZE - 1);
709 if (modulo || unaligned)
710 nseg += 1 + ((modulo + unaligned - 1) >> PAGE_SHIFT);
711
712 if (total_nseg == 0) {
713 req->cmd.psdt = NVME_PSDT_PRP;
714 req->cmd.dptr.prp.prp1 = phys_addr;
715 }
716
717 total_nseg += nseg;
718 sge_count++;
719 remaining_transfer_len -= data_transferred;
720
721 if (total_nseg == 2) {
722 if (sge_count == 1)
723 tr->req->cmd.dptr.prp.prp2 = phys_addr +
724 PAGE_SIZE - unaligned;
725 else if (sge_count == 2)
726 tr->req->cmd.dptr.prp.prp2 = phys_addr;
727 /* save prp2 value */
728 prp2 = tr->req->cmd.dptr.prp.prp2;
729 } else if (total_nseg > 2) {
730 if (sge_count == 1)
731 cur_nseg = 1;
732 else
733 cur_nseg = 0;
734
735 tr->req->cmd.dptr.prp.prp2 =
736 (uint64_t)tr->prp_sgl_bus_addr;
737
738 while (cur_nseg < nseg) {
739 if (prp2) {
740 tr->u.prp[0] = prp2;
741 tr->u.prp[last_nseg + 1] = phys_addr +
742 cur_nseg * PAGE_SIZE - unaligned;
743 } else {
744 tr->u.prp[last_nseg] = phys_addr +
745 cur_nseg * PAGE_SIZE - unaligned;
746 }
747 last_nseg++;
748 cur_nseg++;
749 }
750 }
751 }
752
753 return 0;
754 }
755
_nvme_qpair_admin_qpair_enable(struct nvme_qpair * qpair)756 static void _nvme_qpair_admin_qpair_enable(struct nvme_qpair *qpair)
757 {
758 struct nvme_tracker *tr, *tr_temp;
759
760 /*
761 * Manually abort each outstanding admin command. Do not retry
762 * admin commands found here, since they will be left over from
763 * a controller reset and its likely the context in which the
764 * command was issued no longer applies.
765 */
766 LIST_FOREACH_SAFE(tr, &qpair->outstanding_tr, list, tr_temp) {
767 nvme_info("Aborting outstanding admin command\n");
768 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
769 NVME_SC_ABORTED_BY_REQUEST,
770 1 /* do not retry */, true);
771 }
772
773 qpair->enabled = true;
774 }
775
_nvme_qpair_io_qpair_enable(struct nvme_qpair * qpair)776 static void _nvme_qpair_io_qpair_enable(struct nvme_qpair *qpair)
777 {
778 struct nvme_tracker *tr, *temp;
779 struct nvme_request *req;
780
781 qpair->enabled = true;
782
783 qpair->ctrlr->enabled_io_qpairs++;
784
785 /* Manually abort each queued I/O. */
786 while (!STAILQ_EMPTY(&qpair->queued_req)) {
787 req = STAILQ_FIRST(&qpair->queued_req);
788 STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
789 nvme_info("Aborting queued I/O command\n");
790 nvme_qpair_manual_complete_request(qpair, req, NVME_SCT_GENERIC,
791 NVME_SC_ABORTED_BY_REQUEST,
792 true);
793 }
794
795 /* Manually abort each outstanding I/O. */
796 LIST_FOREACH_SAFE(tr, &qpair->outstanding_tr, list, temp) {
797 nvme_info("Aborting outstanding I/O command\n");
798 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
799 NVME_SC_ABORTED_BY_REQUEST,
800 0, true);
801 }
802 }
803
_nvme_qpair_admin_qpair_disable(struct nvme_qpair * qpair)804 static inline void _nvme_qpair_admin_qpair_disable(struct nvme_qpair *qpair)
805 {
806 qpair->enabled = false;
807 nvme_qpair_abort_aers(qpair);
808 }
809
_nvme_qpair_io_qpair_disable(struct nvme_qpair * qpair)810 static inline void _nvme_qpair_io_qpair_disable(struct nvme_qpair *qpair)
811 {
812 qpair->enabled = false;
813
814 qpair->ctrlr->enabled_io_qpairs--;
815 }
816
817 /*
818 * Reserve room for the submission queue
819 * in the controller memory buffer
820 */
nvme_ctrlr_reserve_sq_in_cmb(struct nvme_ctrlr * ctrlr,uint16_t entries,uint64_t aligned,uint64_t * offset)821 static int nvme_ctrlr_reserve_sq_in_cmb(struct nvme_ctrlr *ctrlr,
822 uint16_t entries,
823 uint64_t aligned, uint64_t *offset)
824 {
825 uint64_t round_offset;
826 const uint64_t length = entries * sizeof(struct nvme_cmd);
827
828 round_offset = ctrlr->cmb_current_offset;
829 round_offset = (round_offset + (aligned - 1)) & ~(aligned - 1);
830
831 if (round_offset + length > ctrlr->cmb_size)
832 return -1;
833
834 *offset = round_offset;
835 ctrlr->cmb_current_offset = round_offset + length;
836
837 return 0;
838 }
839
840 /*
841 * Initialize a queue pair on the host side.
842 */
nvme_qpair_construct(struct nvme_ctrlr * ctrlr,struct nvme_qpair * qpair,enum nvme_qprio qprio,uint16_t entries,uint16_t trackers)843 int nvme_qpair_construct(struct nvme_ctrlr *ctrlr, struct nvme_qpair *qpair,
844 enum nvme_qprio qprio,
845 uint16_t entries, uint16_t trackers)
846 {
847 volatile uint32_t *doorbell_base;
848 struct nvme_tracker *tr;
849 uint64_t offset;
850 unsigned long phys_addr = 0;
851 uint16_t i;
852 int ret;
853
854 nvme_assert(entries != 0, "Invalid number of entries\n");
855 nvme_assert(trackers != 0, "Invalid trackers\n");
856
857 pthread_mutex_init(&qpair->lock, NULL);
858
859 qpair->entries = entries;
860 qpair->trackers = trackers;
861 qpair->qprio = qprio;
862 qpair->sq_in_cmb = false;
863 qpair->ctrlr = ctrlr;
864
865 if (ctrlr->opts.use_cmb_sqs) {
866 /*
867 * Reserve room for the submission queue in ctrlr
868 * memory buffer.
869 */
870 ret = nvme_ctrlr_reserve_sq_in_cmb(ctrlr, entries,
871 PAGE_SIZE,
872 &offset);
873 if (ret == 0) {
874
875 qpair->cmd = ctrlr->cmb_bar_virt_addr + offset;
876 qpair->cmd_bus_addr = ctrlr->cmb_bar_phys_addr + offset;
877 qpair->sq_in_cmb = true;
878
879 nvme_debug("Allocated qpair %d cmd in cmb at %p / 0x%llx\n",
880 qpair->id,
881 qpair->cmd, qpair->cmd_bus_addr);
882
883 }
884 }
885
886 if (qpair->sq_in_cmb == false) {
887
888 qpair->cmd =
889 nvme_mem_alloc_node(sizeof(struct nvme_cmd) * entries,
890 PAGE_SIZE, NVME_NODE_ID_ANY,
891 (unsigned long *) &qpair->cmd_bus_addr);
892 if (!qpair->cmd) {
893 nvme_err("Allocate qpair commands failed\n");
894 goto fail;
895 }
896 memset(qpair->cmd, 0, sizeof(struct nvme_cmd) * entries);
897
898 nvme_debug("Allocated qpair %d cmd %p / 0x%llx\n",
899 qpair->id,
900 qpair->cmd, qpair->cmd_bus_addr);
901 }
902
903 qpair->cpl = nvme_mem_alloc_node(sizeof(struct nvme_cpl) * entries,
904 PAGE_SIZE, NVME_NODE_ID_ANY,
905 (unsigned long *) &qpair->cpl_bus_addr);
906 if (!qpair->cpl) {
907 nvme_err("Allocate qpair completions failed\n");
908 goto fail;
909 }
910 memset(qpair->cpl, 0, sizeof(struct nvme_cpl) * entries);
911
912 nvme_debug("Allocated qpair %d cpl at %p / 0x%llx\n",
913 qpair->id,
914 qpair->cpl,
915 qpair->cpl_bus_addr);
916
917 doorbell_base = &ctrlr->regs->doorbell[0].sq_tdbl;
918 qpair->sq_tdbl = doorbell_base +
919 (2 * qpair->id + 0) * ctrlr->doorbell_stride_u32;
920 qpair->cq_hdbl = doorbell_base +
921 (2 * qpair->id + 1) * ctrlr->doorbell_stride_u32;
922
923 LIST_INIT(&qpair->free_tr);
924 LIST_INIT(&qpair->outstanding_tr);
925 STAILQ_INIT(&qpair->free_req);
926 STAILQ_INIT(&qpair->queued_req);
927
928 /* Request pool */
929 if (nvme_request_pool_construct(qpair)) {
930 nvme_err("Create request pool failed\n");
931 goto fail;
932 }
933
934 /*
935 * Reserve space for all of the trackers in a single allocation.
936 * struct nvme_tracker must be padded so that its size is already
937 * a power of 2. This ensures the PRP list embedded in the nvme_tracker
938 * object will not span a 4KB boundary, while allowing access to
939 * trackers in tr[] via normal array indexing.
940 */
941 qpair->tr = nvme_mem_alloc_node(sizeof(struct nvme_tracker) * trackers,
942 sizeof(struct nvme_tracker),
943 NVME_NODE_ID_ANY, &phys_addr);
944 if (!qpair->tr) {
945 nvme_err("Allocate request trackers failed\n");
946 goto fail;
947 }
948 memset(qpair->tr, 0, sizeof(struct nvme_tracker) * trackers);
949
950 nvme_debug("Allocated qpair %d trackers at %p / 0x%lx\n",
951 qpair->id, qpair->tr, phys_addr);
952
953 for (i = 0; i < trackers; i++) {
954 tr = &qpair->tr[i];
955 nvme_qpair_construct_tracker(tr, i, phys_addr);
956 LIST_INSERT_HEAD(&qpair->free_tr, tr, list);
957 phys_addr += sizeof(struct nvme_tracker);
958 }
959
960 nvme_qpair_reset(qpair);
961
962 return 0;
963
964 fail:
965 nvme_qpair_destroy(qpair);
966
967 return -1;
968 }
969
nvme_qpair_destroy(struct nvme_qpair * qpair)970 void nvme_qpair_destroy(struct nvme_qpair *qpair)
971 {
972 if (!qpair->ctrlr)
973 return; // Not initialized.
974
975 if (nvme_qpair_is_admin_queue(qpair))
976 _nvme_qpair_admin_qpair_destroy(qpair);
977
978 if (qpair->cmd && !qpair->sq_in_cmb) {
979 nvme_free(qpair->cmd);
980 qpair->cmd = NULL;
981 }
982 if (qpair->cpl) {
983 nvme_free(qpair->cpl);
984 qpair->cpl = NULL;
985 }
986 if (qpair->tr) {
987 nvme_free(qpair->tr);
988 qpair->tr = NULL;
989 }
990 nvme_request_pool_destroy(qpair);
991
992 qpair->ctrlr = NULL;
993
994 pthread_mutex_destroy(&qpair->lock);
995 }
996
nvme_qpair_enabled(struct nvme_qpair * qpair)997 static bool nvme_qpair_enabled(struct nvme_qpair *qpair)
998 {
999 if (!qpair->enabled && !qpair->ctrlr->resetting)
1000 nvme_qpair_enable(qpair);
1001
1002 return qpair->enabled;
1003 }
1004
nvme_qpair_submit_request(struct nvme_qpair * qpair,struct nvme_request * req)1005 int nvme_qpair_submit_request(struct nvme_qpair *qpair,
1006 struct nvme_request *req)
1007 {
1008 struct nvme_tracker *tr;
1009 struct nvme_request *child_req, *tmp;
1010 struct nvme_ctrlr *ctrlr = qpair->ctrlr;
1011 bool child_req_failed = false;
1012 int ret = 0;
1013
1014 if (ctrlr->failed) {
1015 nvme_request_free(req);
1016 return ENXIO;
1017 }
1018
1019 nvme_qpair_enabled(qpair);
1020
1021 if (req->child_reqs) {
1022
1023 /*
1024 * This is a splitted (parent) request. Submit all of the
1025 * children but not the parent request itself, since the
1026 * parent is the original unsplit request.
1027 */
1028 TAILQ_FOREACH_SAFE(child_req, &req->children, child_tailq, tmp) {
1029 if (!child_req_failed) {
1030 ret = nvme_qpair_submit_request(qpair, child_req);
1031 if (ret != 0)
1032 child_req_failed = true;
1033 } else {
1034 /* free remaining child_reqs since
1035 * one child_req fails */
1036 nvme_request_remove_child(req, child_req);
1037 nvme_request_free(child_req);
1038 }
1039 }
1040
1041 return ret;
1042 }
1043
1044 pthread_mutex_lock(&qpair->lock);
1045
1046 tr = LIST_FIRST(&qpair->free_tr);
1047 if (tr == NULL || !qpair->enabled || !STAILQ_EMPTY(&qpair->queued_req)) {
1048 /*
1049 * No tracker is available, the qpair is disabled due
1050 * to an in-progress controller-level reset, or
1051 * there are already queued requests.
1052 *
1053 * Put the request on the qpair's request queue to be
1054 * processed when a tracker frees up via a command
1055 * completion or when the controller reset is completed.
1056 */
1057 STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
1058 pthread_mutex_unlock(&qpair->lock);
1059
1060 if (tr)
1061 nvme_qpair_submit_queued_requests(qpair);
1062 return 0;
1063 }
1064
1065 /* remove tr from free_tr */
1066 LIST_REMOVE(tr, list);
1067 LIST_INSERT_HEAD(&qpair->outstanding_tr, tr, list);
1068 tr->req = req;
1069 req->cmd.cid = tr->cid;
1070
1071 if (req->payload_size == 0) {
1072 /* Null payload - leave PRP fields zeroed */
1073 ret = 0;
1074 } else if (req->payload.type == NVME_PAYLOAD_TYPE_CONTIG) {
1075 ret = _nvme_qpair_build_contig_request(qpair, req, tr);
1076 } else if (req->payload.type == NVME_PAYLOAD_TYPE_SGL) {
1077 if (ctrlr->flags & NVME_CTRLR_SGL_SUPPORTED)
1078 ret = _nvme_qpair_build_hw_sgl_request(qpair, req, tr);
1079 else
1080 ret = _nvme_qpair_build_prps_sgl_request(qpair, req, tr);
1081 } else {
1082 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
1083 NVME_SC_INVALID_FIELD,
1084 1 /* do not retry */, true);
1085 ret = -EINVAL;
1086 }
1087
1088 if (ret == 0)
1089 nvme_qpair_submit_tracker(qpair, tr);
1090
1091 pthread_mutex_unlock(&qpair->lock);
1092
1093 return ret;
1094 }
1095
1096 /*
1097 * Poll for completion of NVMe commands submitted to the
1098 * specified I/O queue pair.
1099 */
nvme_qpair_poll(struct nvme_qpair * qpair,unsigned int max_completions)1100 unsigned int nvme_qpair_poll(struct nvme_qpair *qpair,
1101 unsigned int max_completions)
1102 {
1103 struct nvme_tracker *tr;
1104 struct nvme_cpl *cpl;
1105 uint32_t num_completions = 0;
1106
1107 if (!nvme_qpair_enabled(qpair))
1108 /*
1109 * qpair is not enabled, likely because a controller reset is
1110 * is in progress. Ignore the interrupt - any I/O that was
1111 * associated with this interrupt will get retried when the
1112 * reset is complete.
1113 */
1114 return 0;
1115
1116 if ((max_completions == 0) ||
1117 (max_completions > (qpair->entries - 1U)))
1118 /*
1119 * max_completions == 0 means unlimited, but complete at most
1120 * one queue depth batch of I/O at a time so that the completion
1121 * queue doorbells don't wrap around.
1122 */
1123 max_completions = qpair->entries - 1;
1124
1125 pthread_mutex_lock(&qpair->lock);
1126
1127 while (1) {
1128
1129 cpl = &qpair->cpl[qpair->cq_head];
1130 if (cpl->status.p != qpair->phase)
1131 break;
1132
1133 tr = &qpair->tr[cpl->cid];
1134 if (tr->active) {
1135 nvme_qpair_complete_tracker(qpair, tr, cpl, true);
1136 } else {
1137 nvme_info("cpl does not map to outstanding cmd\n");
1138 nvme_qpair_print_completion(qpair, cpl);
1139 nvme_panic("received completion for unknown cmd\n");
1140 }
1141
1142 if (++qpair->cq_head == qpair->entries) {
1143 qpair->cq_head = 0;
1144 qpair->phase = !qpair->phase;
1145 }
1146
1147 if (++num_completions == max_completions)
1148 break;
1149 }
1150
1151 if (num_completions > 0)
1152 nvme_mmio_write_4(qpair->cq_hdbl, qpair->cq_head);
1153
1154 pthread_mutex_unlock(&qpair->lock);
1155
1156 if (!STAILQ_EMPTY(&qpair->queued_req))
1157 nvme_qpair_submit_queued_requests(qpair);
1158
1159 return num_completions;
1160 }
1161
nvme_qpair_reset(struct nvme_qpair * qpair)1162 void nvme_qpair_reset(struct nvme_qpair *qpair)
1163 {
1164 pthread_mutex_lock(&qpair->lock);
1165
1166 qpair->sq_tail = qpair->cq_head = 0;
1167
1168 /*
1169 * First time through the completion queue, HW will set phase
1170 * bit on completions to 1. So set this to 1 here, indicating
1171 * we're looking for a 1 to know which entries have completed.
1172 * we'll toggle the bit each time when the completion queue rolls over.
1173 */
1174 qpair->phase = 1;
1175
1176 memset(qpair->cmd, 0, qpair->entries * sizeof(struct nvme_cmd));
1177 memset(qpair->cpl, 0, qpair->entries * sizeof(struct nvme_cpl));
1178
1179 pthread_mutex_unlock(&qpair->lock);
1180 }
1181
nvme_qpair_enable(struct nvme_qpair * qpair)1182 void nvme_qpair_enable(struct nvme_qpair *qpair)
1183 {
1184 pthread_mutex_lock(&qpair->lock);
1185
1186 if (nvme_qpair_is_io_queue(qpair))
1187 _nvme_qpair_io_qpair_enable(qpair);
1188 else
1189 _nvme_qpair_admin_qpair_enable(qpair);
1190
1191 pthread_mutex_unlock(&qpair->lock);
1192 }
1193
nvme_qpair_disable(struct nvme_qpair * qpair)1194 void nvme_qpair_disable(struct nvme_qpair *qpair)
1195 {
1196 pthread_mutex_lock(&qpair->lock);
1197
1198 if (nvme_qpair_is_io_queue(qpair))
1199 _nvme_qpair_io_qpair_disable(qpair);
1200 else
1201 _nvme_qpair_admin_qpair_disable(qpair);
1202
1203 pthread_mutex_unlock(&qpair->lock);
1204 }
1205
nvme_qpair_fail(struct nvme_qpair * qpair)1206 void nvme_qpair_fail(struct nvme_qpair *qpair)
1207 {
1208 struct nvme_tracker *tr;
1209 struct nvme_request *req;
1210
1211 pthread_mutex_lock(&qpair->lock);
1212
1213 while (!STAILQ_EMPTY(&qpair->queued_req)) {
1214
1215 nvme_notice("Failing queued I/O command\n");
1216 req = STAILQ_FIRST(&qpair->queued_req);
1217 STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
1218 nvme_qpair_manual_complete_request(qpair, req, NVME_SCT_GENERIC,
1219 NVME_SC_ABORTED_BY_REQUEST,
1220 true);
1221
1222 }
1223
1224 /* Manually abort each outstanding I/O. */
1225 while (!LIST_EMPTY(&qpair->outstanding_tr)) {
1226
1227 /*
1228 * Do not remove the tracker. The abort_tracker path
1229 * will do that for us.
1230 */
1231 nvme_notice("Failing outstanding I/O command\n");
1232 tr = LIST_FIRST(&qpair->outstanding_tr);
1233 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
1234 NVME_SC_ABORTED_BY_REQUEST,
1235 1, true);
1236
1237 }
1238
1239 pthread_mutex_unlock(&qpair->lock);
1240 }
1241
1242