1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "nvme_internal.h" 35 36 struct nvme_qpair_string { 37 uint16_t value; 38 const char *str; 39 }; 40 41 static const struct nvme_qpair_string admin_opcode[] = { 42 { NVME_OPC_DELETE_IO_SQ, "DELETE IO SQ" }, 43 { NVME_OPC_CREATE_IO_SQ, "CREATE IO SQ" }, 44 { NVME_OPC_GET_LOG_PAGE, "GET LOG PAGE" }, 45 { NVME_OPC_DELETE_IO_CQ, "DELETE IO CQ" }, 46 { NVME_OPC_CREATE_IO_CQ, "CREATE IO CQ" }, 47 { NVME_OPC_IDENTIFY, "IDENTIFY" }, 48 { NVME_OPC_ABORT, "ABORT" }, 49 { NVME_OPC_SET_FEATURES, "SET FEATURES" }, 50 { NVME_OPC_GET_FEATURES, "GET FEATURES" }, 51 { NVME_OPC_ASYNC_EVENT_REQUEST, "ASYNC EVENT REQUEST" }, 52 { NVME_OPC_NS_MANAGEMENT, "NAMESPACE MANAGEMENT" }, 53 { NVME_OPC_FIRMWARE_COMMIT, "FIRMWARE COMMIT" }, 54 { NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD, "FIRMWARE IMAGE DOWNLOAD" }, 55 { NVME_OPC_NS_ATTACHMENT, "NAMESPACE ATTACHMENT" }, 56 { NVME_OPC_FORMAT_NVM, "FORMAT NVM" }, 57 { NVME_OPC_SECURITY_SEND, "SECURITY SEND" }, 58 { NVME_OPC_SECURITY_RECEIVE, "SECURITY RECEIVE" }, 59 { 0xFFFF, "ADMIN COMMAND" } 60 }; 61 62 static const struct nvme_qpair_string io_opcode[] = { 63 { NVME_OPC_FLUSH, "FLUSH" }, 64 { NVME_OPC_WRITE, "WRITE" }, 65 { NVME_OPC_READ, "READ" }, 66 { NVME_OPC_WRITE_UNCORRECTABLE, "WRITE UNCORRECTABLE" }, 67 { NVME_OPC_COMPARE, "COMPARE" }, 68 { NVME_OPC_WRITE_ZEROES, "WRITE ZEROES" }, 69 { NVME_OPC_DATASET_MANAGEMENT, "DATASET MANAGEMENT" }, 70 { NVME_OPC_RESERVATION_REGISTER, "RESERVATION REGISTER" }, 71 { NVME_OPC_RESERVATION_REPORT, "RESERVATION REPORT" }, 72 { NVME_OPC_RESERVATION_ACQUIRE, "RESERVATION ACQUIRE" }, 73 { NVME_OPC_RESERVATION_RELEASE, "RESERVATION RELEASE" }, 74 { 0xFFFF, "IO COMMAND" } 75 }; 76 77 static const struct nvme_qpair_string generic_status[] = { 78 { NVME_SC_SUCCESS, "SUCCESS" }, 79 { NVME_SC_INVALID_OPCODE, "INVALID OPCODE" }, 80 { NVME_SC_INVALID_FIELD, "INVALID FIELD" }, 81 { NVME_SC_COMMAND_ID_CONFLICT, "COMMAND ID CONFLICT" }, 82 { NVME_SC_DATA_TRANSFER_ERROR, "DATA TRANSFER ERROR" }, 83 { NVME_SC_ABORTED_POWER_LOSS, "ABORTED - POWER LOSS" }, 84 { NVME_SC_INTERNAL_DEVICE_ERROR, "INTERNAL DEVICE ERROR" }, 85 { NVME_SC_ABORTED_BY_REQUEST, "ABORTED - BY REQUEST" }, 86 { NVME_SC_ABORTED_SQ_DELETION, "ABORTED - SQ DELETION" }, 87 { NVME_SC_ABORTED_FAILED_FUSED, "ABORTED - FAILED FUSED" }, 88 { NVME_SC_ABORTED_MISSING_FUSED, "ABORTED - MISSING FUSED" }, 89 { NVME_SC_INVALID_NAMESPACE_OR_FORMAT, "INVALID NAMESPACE OR FORMAT" }, 90 { NVME_SC_COMMAND_SEQUENCE_ERROR, "COMMAND SEQUENCE ERROR" }, 91 { NVME_SC_INVALID_SGL_SEG_DESCRIPTOR, "INVALID SGL SEGMENT DESCRIPTOR" }, 92 { NVME_SC_INVALID_NUM_SGL_DESCIRPTORS, "INVALID NUMBER OF SGL DESCRIPTORS" }, 93 { NVME_SC_DATA_SGL_LENGTH_INVALID, "DATA SGL LENGTH INVALID" }, 94 { NVME_SC_METADATA_SGL_LENGTH_INVALID, "METADATA SGL LENGTH INVALID" }, 95 { NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID, "SGL DESCRIPTOR TYPE INVALID" }, 96 { NVME_SC_INVALID_CONTROLLER_MEM_BUF, "INVALID CONTROLLER MEMORY BUFFER" }, 97 { NVME_SC_INVALID_PRP_OFFSET, "INVALID PRP OFFSET" }, 98 { NVME_SC_ATOMIC_WRITE_UNIT_EXCEEDED, "ATOMIC WRITE UNIT EXCEEDED" }, 99 { NVME_SC_LBA_OUT_OF_RANGE, "LBA OUT OF RANGE" }, 100 { NVME_SC_CAPACITY_EXCEEDED, "CAPACITY EXCEEDED" }, 101 { NVME_SC_NAMESPACE_NOT_READY, "NAMESPACE NOT READY" }, 102 { NVME_SC_RESERVATION_CONFLICT, "RESERVATION CONFLICT" }, 103 { NVME_SC_FORMAT_IN_PROGRESS, "FORMAT IN PROGRESS" }, 104 { 0xFFFF, "GENERIC" } 105 }; 106 107 static const struct nvme_qpair_string command_specific_status[] = { 108 { NVME_SC_COMPLETION_QUEUE_INVALID, "INVALID COMPLETION QUEUE" }, 109 { NVME_SC_INVALID_QUEUE_IDENTIFIER, "INVALID QUEUE IDENTIFIER" }, 110 { NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED, "MAX QUEUE SIZE EXCEEDED" }, 111 { NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED, "ABORT CMD LIMIT EXCEEDED" }, 112 { NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED,"ASYNC LIMIT EXCEEDED" }, 113 { NVME_SC_INVALID_FIRMWARE_SLOT, "INVALID FIRMWARE SLOT" }, 114 { NVME_SC_INVALID_FIRMWARE_IMAGE, "INVALID FIRMWARE IMAGE" }, 115 { NVME_SC_INVALID_INTERRUPT_VECTOR, "INVALID INTERRUPT VECTOR" }, 116 { NVME_SC_INVALID_LOG_PAGE, "INVALID LOG PAGE" }, 117 { NVME_SC_INVALID_FORMAT, "INVALID FORMAT" }, 118 { NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET,"FIRMWARE REQUIRES CONVENTIONAL RESET" }, 119 { NVME_SC_INVALID_QUEUE_DELETION, "INVALID QUEUE DELETION" }, 120 { NVME_SC_FEATURE_ID_NOT_SAVEABLE, "FEATURE ID NOT SAVEABLE" }, 121 { NVME_SC_FEATURE_NOT_CHANGEABLE, "FEATURE NOT CHANGEABLE" }, 122 { NVME_SC_FEATURE_NOT_NAMESPACE_SPECIFIC,"FEATURE NOT NAMESPACE SPECIFIC" }, 123 { NVME_SC_FIRMWARE_REQ_NVM_RESET, "FIRMWARE REQUIRES NVM RESET" }, 124 { NVME_SC_FIRMWARE_REQ_RESET, "FIRMWARE REQUIRES RESET" }, 125 { NVME_SC_FIRMWARE_REQ_MAX_TIME_VIOLATION,"FIRMWARE REQUIRES MAX TIME VIOLATION" }, 126 { NVME_SC_FIRMWARE_ACTIVATION_PROHIBITED,"FIRMWARE ACTIVATION PROHIBITED" }, 127 { NVME_SC_OVERLAPPING_RANGE, "OVERLAPPING RANGE" }, 128 { NVME_SC_NAMESPACE_INSUFFICIENT_CAPACITY,"NAMESPACE INSUFFICIENT CAPACITY" }, 129 { NVME_SC_NAMESPACE_ID_UNAVAILABLE, "NAMESPACE ID UNAVAILABLE" }, 130 { NVME_SC_NAMESPACE_ALREADY_ATTACHED, "NAMESPACE ALREADY ATTACHED" }, 131 { NVME_SC_NAMESPACE_IS_PRIVATE, "NAMESPACE IS PRIVATE" }, 132 { NVME_SC_NAMESPACE_NOT_ATTACHED, "NAMESPACE NOT ATTACHED" }, 133 { NVME_SC_THINPROVISIONING_NOT_SUPPORTED,"THINPROVISIONING NOT SUPPORTED" }, 134 { NVME_SC_CONTROLLER_LIST_INVALID, "CONTROLLER LIST INVALID" }, 135 { NVME_SC_CONFLICTING_ATTRIBUTES, "CONFLICTING ATTRIBUTES" }, 136 { NVME_SC_INVALID_PROTECTION_INFO, "INVALID PROTECTION INFO" }, 137 { NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE, "WRITE TO RO PAGE" }, 138 { 0xFFFF, "COMMAND SPECIFIC" } 139 }; 140 141 static const struct nvme_qpair_string media_error_status[] = { 142 { NVME_SC_WRITE_FAULTS, "WRITE FAULTS" }, 143 { NVME_SC_UNRECOVERED_READ_ERROR, "UNRECOVERED READ ERROR" }, 144 { NVME_SC_GUARD_CHECK_ERROR, "GUARD CHECK ERROR" }, 145 { NVME_SC_APPLICATION_TAG_CHECK_ERROR, "APPLICATION TAG CHECK ERROR" }, 146 { NVME_SC_REFERENCE_TAG_CHECK_ERROR, "REFERENCE TAG CHECK ERROR" }, 147 { NVME_SC_COMPARE_FAILURE, "COMPARE FAILURE" }, 148 { NVME_SC_ACCESS_DENIED, "ACCESS DENIED" }, 149 { NVME_SC_DEALLOCATED_OR_UNWRITTEN_BLOCK, "DEALLOCATED OR UNWRITTEN BLOCK" }, 150 { 0xFFFF, "MEDIA ERROR" } 151 }; 152 153 static inline bool nvme_qpair_is_admin_queue(struct nvme_qpair *qpair) 154 { 155 return qpair->id == 0; 156 } 157 158 static inline bool nvme_qpair_is_io_queue(struct nvme_qpair *qpair) 159 { 160 return qpair->id != 0; 161 } 162 163 static const char*nvme_qpair_get_string(const struct nvme_qpair_string *strings, 164 uint16_t value) 165 { 166 const struct nvme_qpair_string *entry; 167 168 entry = strings; 169 170 while (entry->value != 0xFFFF) { 171 if (entry->value == value) 172 return entry->str; 173 entry++; 174 } 175 return entry->str; 176 } 177 178 static void nvme_qpair_admin_qpair_print_command(struct nvme_qpair *qpair, 179 struct nvme_cmd *cmd) 180 { 181 nvme_info("%s (%02x) sqid:%d cid:%d nsid:%x cdw10:%08x cdw11:%08x\n", 182 nvme_qpair_get_string(admin_opcode, cmd->opc), cmd->opc, 183 qpair->id, cmd->cid, 184 cmd->nsid, cmd->cdw10, cmd->cdw11); 185 } 186 187 static void nvme_qpair_io_qpair_print_command(struct nvme_qpair *qpair, 188 struct nvme_cmd *cmd) 189 { 190 nvme_assert(qpair != NULL, "print_command: qpair == NULL\n"); 191 nvme_assert(cmd != NULL, "print_command: cmd == NULL\n"); 192 193 switch ((int)cmd->opc) { 194 case NVME_OPC_WRITE: 195 case NVME_OPC_READ: 196 case NVME_OPC_WRITE_UNCORRECTABLE: 197 case NVME_OPC_COMPARE: 198 nvme_info("%s sqid:%d cid:%d nsid:%d lba:%llu len:%d\n", 199 nvme_qpair_get_string(io_opcode, cmd->opc), 200 qpair->id, cmd->cid, cmd->nsid, 201 ((unsigned long long)cmd->cdw11 << 32) + cmd->cdw10, 202 (cmd->cdw12 & 0xFFFF) + 1); 203 break; 204 case NVME_OPC_FLUSH: 205 case NVME_OPC_DATASET_MANAGEMENT: 206 nvme_info("%s sqid:%d cid:%d nsid:%d\n", 207 nvme_qpair_get_string(io_opcode, cmd->opc), 208 qpair->id, cmd->cid, cmd->nsid); 209 break; 210 default: 211 nvme_info("%s (%02x) sqid:%d cid:%d nsid:%d\n", 212 nvme_qpair_get_string(io_opcode, cmd->opc), 213 cmd->opc, qpair->id, cmd->cid, cmd->nsid); 214 break; 215 } 216 } 217 218 static void nvme_qpair_print_command(struct nvme_qpair *qpair, 219 struct nvme_cmd *cmd) 220 { 221 nvme_assert(qpair != NULL, "qpair can not be NULL"); 222 nvme_assert(cmd != NULL, "cmd can not be NULL"); 223 224 if (nvme_qpair_is_admin_queue(qpair)) 225 return nvme_qpair_admin_qpair_print_command(qpair, cmd); 226 227 return nvme_qpair_io_qpair_print_command(qpair, cmd); 228 } 229 230 static const char *get_status_string(uint16_t sct, uint16_t sc) 231 { 232 const struct nvme_qpair_string *entry; 233 234 switch (sct) { 235 case NVME_SCT_GENERIC: 236 entry = generic_status; 237 break; 238 case NVME_SCT_COMMAND_SPECIFIC: 239 entry = command_specific_status; 240 break; 241 case NVME_SCT_MEDIA_ERROR: 242 entry = media_error_status; 243 break; 244 case NVME_SCT_VENDOR_SPECIFIC: 245 return "VENDOR SPECIFIC"; 246 default: 247 return "RESERVED"; 248 } 249 250 return nvme_qpair_get_string(entry, sc); 251 } 252 253 static void nvme_qpair_print_completion(struct nvme_qpair *qpair, 254 struct nvme_cpl *cpl) 255 { 256 nvme_info("Cpl: %s (%02x/%02x) sqid:%d cid:%d " 257 "cdw0:%x sqhd:%04x p:%x m:%x dnr:%x\n", 258 get_status_string(cpl->status.sct, cpl->status.sc), 259 cpl->status.sct, 260 cpl->status.sc, 261 cpl->sqid, 262 cpl->cid, 263 cpl->cdw0, 264 cpl->sqhd, 265 cpl->status.p, 266 cpl->status.m, 267 cpl->status.dnr); 268 } 269 270 static bool nvme_qpair_completion_retry(const struct nvme_cpl *cpl) 271 { 272 /* 273 * TODO: spec is not clear how commands that are aborted due 274 * to TLER will be marked. So for now, it seems 275 * NAMESPACE_NOT_READY is the only case where we should 276 * look at the DNR bit. 277 */ 278 switch ((int)cpl->status.sct) { 279 case NVME_SCT_GENERIC: 280 switch ((int)cpl->status.sc) { 281 case NVME_SC_NAMESPACE_NOT_READY: 282 case NVME_SC_FORMAT_IN_PROGRESS: 283 if (cpl->status.dnr) 284 return false; 285 return true; 286 case NVME_SC_INVALID_OPCODE: 287 case NVME_SC_INVALID_FIELD: 288 case NVME_SC_COMMAND_ID_CONFLICT: 289 case NVME_SC_DATA_TRANSFER_ERROR: 290 case NVME_SC_ABORTED_POWER_LOSS: 291 case NVME_SC_INTERNAL_DEVICE_ERROR: 292 case NVME_SC_ABORTED_BY_REQUEST: 293 case NVME_SC_ABORTED_SQ_DELETION: 294 case NVME_SC_ABORTED_FAILED_FUSED: 295 case NVME_SC_ABORTED_MISSING_FUSED: 296 case NVME_SC_INVALID_NAMESPACE_OR_FORMAT: 297 case NVME_SC_COMMAND_SEQUENCE_ERROR: 298 case NVME_SC_LBA_OUT_OF_RANGE: 299 case NVME_SC_CAPACITY_EXCEEDED: 300 default: 301 return false; 302 } 303 case NVME_SCT_COMMAND_SPECIFIC: 304 case NVME_SCT_MEDIA_ERROR: 305 case NVME_SCT_VENDOR_SPECIFIC: 306 default: 307 return false; 308 } 309 } 310 311 static void nvme_qpair_construct_tracker(struct nvme_tracker *tr, 312 uint16_t cid, uint64_t phys_addr) 313 { 314 tr->prp_sgl_bus_addr = phys_addr + offsetof(struct nvme_tracker, u.prp); 315 tr->cid = cid; 316 tr->active = false; 317 } 318 319 static inline void nvme_qpair_copy_command(struct nvme_cmd *dst, 320 const struct nvme_cmd *src) 321 { 322 /* dst and src are known to be non-overlapping and 64-byte aligned. */ 323 #if defined(__AVX__) 324 __m256i *d256 = (__m256i *)dst; 325 const __m256i *s256 = (const __m256i *)src; 326 327 _mm256_store_si256(&d256[0], _mm256_load_si256(&s256[0])); 328 _mm256_store_si256(&d256[1], _mm256_load_si256(&s256[1])); 329 #elif defined(__SSE2__) 330 __m128i *d128 = (__m128i *)dst; 331 const __m128i *s128 = (const __m128i *)src; 332 333 _mm_store_si128(&d128[0], _mm_load_si128(&s128[0])); 334 _mm_store_si128(&d128[1], _mm_load_si128(&s128[1])); 335 _mm_store_si128(&d128[2], _mm_load_si128(&s128[2])); 336 _mm_store_si128(&d128[3], _mm_load_si128(&s128[3])); 337 #else 338 *dst = *src; 339 #endif 340 } 341 342 static void nvme_qpair_submit_tracker(struct nvme_qpair *qpair, 343 struct nvme_tracker *tr) 344 { 345 struct nvme_request *req = tr->req; 346 347 /* 348 * Set the tracker active and copy its command 349 * to the submission queue. 350 */ 351 nvme_debug("qpair %d: Submit command, tail %d, cid %d / %d\n", 352 qpair->id, 353 (int)qpair->sq_tail, 354 (int)tr->cid, 355 (int)tr->req->cmd.cid); 356 357 qpair->tr[tr->cid].active = true; 358 nvme_qpair_copy_command(&qpair->cmd[qpair->sq_tail], &req->cmd); 359 360 if (++qpair->sq_tail == qpair->entries) 361 qpair->sq_tail = 0; 362 363 nvme_wmb(); 364 nvme_mmio_write_4(qpair->sq_tdbl, qpair->sq_tail); 365 } 366 367 static void nvme_qpair_complete_tracker(struct nvme_qpair *qpair, 368 struct nvme_tracker *tr, 369 struct nvme_cpl *cpl, 370 bool print_on_error) 371 { 372 struct nvme_request *req = tr->req; 373 bool retry, error; 374 375 if (!req) { 376 nvme_crit("tracker has no request\n"); 377 qpair->tr[cpl->cid].active = false; 378 goto done; 379 } 380 381 error = nvme_cpl_is_error(cpl); 382 retry = error && nvme_qpair_completion_retry(cpl) && 383 (req->retries < NVME_MAX_RETRY_COUNT); 384 if (error && print_on_error) { 385 nvme_qpair_print_command(qpair, &req->cmd); 386 nvme_qpair_print_completion(qpair, cpl); 387 } 388 389 qpair->tr[cpl->cid].active = false; 390 391 if (cpl->cid != req->cmd.cid) 392 nvme_crit("cpl and command CID mismatch (%d / %d)\n", 393 (int)cpl->cid, (int)req->cmd.cid); 394 395 if (retry) { 396 req->retries++; 397 nvme_qpair_submit_tracker(qpair, tr); 398 return; 399 } 400 401 if (req->cb_fn) 402 req->cb_fn(req->cb_arg, cpl); 403 404 nvme_request_free_locked(req); 405 406 done: 407 tr->req = NULL; 408 409 LIST_REMOVE(tr, list); 410 LIST_INSERT_HEAD(&qpair->free_tr, tr, list); 411 } 412 413 static void nvme_qpair_submit_queued_requests(struct nvme_qpair *qpair) 414 { 415 STAILQ_HEAD(, nvme_request) req_queue; 416 STAILQ_INIT(&req_queue); 417 418 pthread_mutex_lock(&qpair->lock); 419 420 STAILQ_CONCAT(&req_queue, &qpair->queued_req); 421 422 /* 423 * If the controller is in the middle of a reset, don't 424 * try to submit queued requests - let the reset logic 425 * handle that instead. 426 */ 427 while (!qpair->ctrlr->resetting && LIST_FIRST(&qpair->free_tr) 428 && !STAILQ_EMPTY(&req_queue)) { 429 struct nvme_request *req = STAILQ_FIRST(&req_queue); 430 STAILQ_REMOVE_HEAD(&req_queue, stailq); 431 432 pthread_mutex_unlock(&qpair->lock); 433 nvme_qpair_submit_request(qpair, req); 434 pthread_mutex_lock(&qpair->lock); 435 } 436 437 STAILQ_CONCAT(&qpair->queued_req, &req_queue); 438 439 pthread_mutex_unlock(&qpair->lock); 440 } 441 442 static void nvme_qpair_manual_complete_tracker(struct nvme_qpair *qpair, 443 struct nvme_tracker *tr, 444 uint32_t sct, 445 uint32_t sc, 446 uint32_t dnr, 447 bool print_on_error) 448 { 449 struct nvme_cpl cpl; 450 451 memset(&cpl, 0, sizeof(cpl)); 452 cpl.sqid = qpair->id; 453 cpl.cid = tr->cid; 454 cpl.status.sct = sct; 455 cpl.status.sc = sc; 456 cpl.status.dnr = dnr; 457 458 nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error); 459 } 460 461 static void nvme_qpair_manual_complete_request(struct nvme_qpair *qpair, 462 struct nvme_request *req, 463 uint32_t sct, uint32_t sc, 464 bool print_on_error) 465 { 466 struct nvme_cpl cpl; 467 bool error; 468 469 memset(&cpl, 0, sizeof(cpl)); 470 cpl.sqid = qpair->id; 471 cpl.status.sct = sct; 472 cpl.status.sc = sc; 473 474 error = nvme_cpl_is_error(&cpl); 475 476 if (error && print_on_error) { 477 nvme_qpair_print_command(qpair, &req->cmd); 478 nvme_qpair_print_completion(qpair, &cpl); 479 } 480 481 if (req->cb_fn) 482 req->cb_fn(req->cb_arg, &cpl); 483 484 nvme_request_free_locked(req); 485 } 486 487 static void nvme_qpair_abort_aers(struct nvme_qpair *qpair) 488 { 489 struct nvme_tracker *tr; 490 491 tr = LIST_FIRST(&qpair->outstanding_tr); 492 while (tr != NULL) { 493 nvme_assert(tr->req != NULL, 494 "tr->req == NULL in abort_aers\n"); 495 if (tr->req->cmd.opc == NVME_OPC_ASYNC_EVENT_REQUEST) { 496 nvme_qpair_manual_complete_tracker(qpair, tr, 497 NVME_SCT_GENERIC, 498 NVME_SC_ABORTED_SQ_DELETION, 499 0, false); 500 tr = LIST_FIRST(&qpair->outstanding_tr); 501 continue; 502 } 503 tr = LIST_NEXT(tr, list); 504 } 505 } 506 507 static inline void _nvme_qpair_admin_qpair_destroy(struct nvme_qpair *qpair) 508 { 509 nvme_qpair_abort_aers(qpair); 510 } 511 512 static inline void _nvme_qpair_req_bad_phys(struct nvme_qpair *qpair, 513 struct nvme_tracker *tr) 514 { 515 /* 516 * Bad vtophys translation, so abort this request 517 * and return immediately, without retry. 518 */ 519 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, 520 NVME_SC_INVALID_FIELD, 521 1, true); 522 } 523 524 /* 525 * Build PRP list describing physically contiguous payload buffer. 526 */ 527 static int _nvme_qpair_build_contig_request(struct nvme_qpair *qpair, 528 struct nvme_request *req, 529 struct nvme_tracker *tr) 530 { 531 uint64_t phys_addr; 532 void *seg_addr; 533 uint32_t nseg, cur_nseg, modulo, unaligned; 534 void *md_payload; 535 void *payload = req->payload.u.contig + req->payload_offset; 536 537 phys_addr = nvme_mem_vtophys(payload); 538 if (phys_addr == NVME_VTOPHYS_ERROR) { 539 _nvme_qpair_req_bad_phys(qpair, tr); 540 return -1; 541 } 542 nseg = req->payload_size >> PAGE_SHIFT; 543 modulo = req->payload_size & (PAGE_SIZE - 1); 544 unaligned = phys_addr & (PAGE_SIZE - 1); 545 if (modulo || unaligned) 546 nseg += 1 + ((modulo + unaligned - 1) >> PAGE_SHIFT); 547 548 if (req->payload.md) { 549 md_payload = req->payload.md + req->md_offset; 550 tr->req->cmd.mptr = nvme_mem_vtophys(md_payload); 551 if (tr->req->cmd.mptr == NVME_VTOPHYS_ERROR) { 552 _nvme_qpair_req_bad_phys(qpair, tr); 553 return -1; 554 } 555 } 556 557 tr->req->cmd.psdt = NVME_PSDT_PRP; 558 tr->req->cmd.dptr.prp.prp1 = phys_addr; 559 if (nseg == 2) { 560 seg_addr = payload + PAGE_SIZE - unaligned; 561 tr->req->cmd.dptr.prp.prp2 = nvme_mem_vtophys(seg_addr); 562 } else if (nseg > 2) { 563 cur_nseg = 1; 564 tr->req->cmd.dptr.prp.prp2 = (uint64_t)tr->prp_sgl_bus_addr; 565 while (cur_nseg < nseg) { 566 seg_addr = payload + cur_nseg * PAGE_SIZE - unaligned; 567 phys_addr = nvme_mem_vtophys(seg_addr); 568 if (phys_addr == NVME_VTOPHYS_ERROR) { 569 _nvme_qpair_req_bad_phys(qpair, tr); 570 return -1; 571 } 572 tr->u.prp[cur_nseg - 1] = phys_addr; 573 cur_nseg++; 574 } 575 } 576 577 return 0; 578 } 579 580 /* 581 * Build SGL list describing scattered payload buffer. 582 */ 583 static int _nvme_qpair_build_hw_sgl_request(struct nvme_qpair *qpair, 584 struct nvme_request *req, 585 struct nvme_tracker *tr) 586 { 587 struct nvme_sgl_descriptor *sgl; 588 uint64_t phys_addr; 589 uint32_t remaining_transfer_len, length, nseg = 0; 590 int ret; 591 592 /* 593 * Build scattered payloads. 594 */ 595 nvme_assert(req->payload_size != 0, 596 "cannot build SGL for zero-length transfer\n"); 597 nvme_assert(req->payload.type == NVME_PAYLOAD_TYPE_SGL, 598 "sgl payload type required\n"); 599 nvme_assert(req->payload.u.sgl.reset_sgl_fn != NULL, 600 "sgl reset callback required\n"); 601 nvme_assert(req->payload.u.sgl.next_sge_fn != NULL, 602 "sgl callback required\n"); 603 req->payload.u.sgl.reset_sgl_fn(req->payload.u.sgl.cb_arg, 604 req->payload_offset); 605 606 sgl = tr->u.sgl; 607 req->cmd.psdt = NVME_PSDT_SGL_MPTR_SGL; 608 req->cmd.dptr.sgl1.unkeyed.subtype = 0; 609 610 remaining_transfer_len = req->payload_size; 611 612 while (remaining_transfer_len > 0) { 613 614 if (nseg >= NVME_MAX_SGL_DESCRIPTORS) { 615 _nvme_qpair_req_bad_phys(qpair, tr); 616 return -1; 617 } 618 619 ret = req->payload.u.sgl.next_sge_fn(req->payload.u.sgl.cb_arg, 620 &phys_addr, &length); 621 if (ret != 0) { 622 _nvme_qpair_req_bad_phys(qpair, tr); 623 return ret; 624 } 625 626 length = nvme_min(remaining_transfer_len, length); 627 remaining_transfer_len -= length; 628 629 sgl->unkeyed.type = NVME_SGL_TYPE_DATA_BLOCK; 630 sgl->unkeyed.length = length; 631 sgl->address = phys_addr; 632 sgl->unkeyed.subtype = 0; 633 634 sgl++; 635 nseg++; 636 637 } 638 639 if (nseg == 1) { 640 /* 641 * The whole transfer can be described by a single Scatter 642 * Gather List descriptor. Use the special case described 643 * by the spec where SGL1's type is Data Block. 644 * This means the SGL in the tracker is not used at all, 645 * so copy the first (and only) SGL element into SGL1. 646 */ 647 req->cmd.dptr.sgl1.unkeyed.type = NVME_SGL_TYPE_DATA_BLOCK; 648 req->cmd.dptr.sgl1.address = tr->u.sgl[0].address; 649 req->cmd.dptr.sgl1.unkeyed.length = tr->u.sgl[0].unkeyed.length; 650 } else { 651 /* For now we only support 1 SGL segment in NVMe controller */ 652 req->cmd.dptr.sgl1.unkeyed.type = NVME_SGL_TYPE_LAST_SEGMENT; 653 req->cmd.dptr.sgl1.address = tr->prp_sgl_bus_addr; 654 req->cmd.dptr.sgl1.unkeyed.length = 655 nseg * sizeof(struct nvme_sgl_descriptor); 656 } 657 658 return 0; 659 } 660 661 /* 662 * Build Physical Region Page list describing scattered payload buffer. 663 */ 664 static int _nvme_qpair_build_prps_sgl_request(struct nvme_qpair *qpair, 665 struct nvme_request *req, 666 struct nvme_tracker *tr) 667 { 668 uint64_t phys_addr, prp2 = 0; 669 uint32_t data_transferred, remaining_transfer_len, length; 670 uint32_t nseg, cur_nseg, total_nseg = 0, last_nseg = 0; 671 uint32_t modulo, unaligned, sge_count = 0; 672 int ret; 673 674 /* 675 * Build scattered payloads. 676 */ 677 nvme_assert(req->payload.type == NVME_PAYLOAD_TYPE_SGL, 678 "sgl payload type required\n"); 679 nvme_assert(req->payload.u.sgl.reset_sgl_fn != NULL, 680 "sgl reset callback required\n"); 681 req->payload.u.sgl.reset_sgl_fn(req->payload.u.sgl.cb_arg, 682 req->payload_offset); 683 684 remaining_transfer_len = req->payload_size; 685 686 while (remaining_transfer_len > 0) { 687 688 nvme_assert(req->payload.u.sgl.next_sge_fn != NULL, 689 "sgl callback required\n"); 690 691 ret = req->payload.u.sgl.next_sge_fn(req->payload.u.sgl.cb_arg, 692 &phys_addr, &length); 693 if (ret != 0) { 694 _nvme_qpair_req_bad_phys(qpair, tr); 695 return -1; 696 } 697 698 nvme_assert((phys_addr & 0x3) == 0, "address must be dword aligned\n"); 699 nvme_assert((length >= remaining_transfer_len) || ((phys_addr + length) % PAGE_SIZE) == 0, 700 "All SGEs except last must end on a page boundary\n"); 701 nvme_assert((sge_count == 0) || (phys_addr % PAGE_SIZE) == 0, 702 "All SGEs except first must start on a page boundary\n"); 703 704 data_transferred = nvme_min(remaining_transfer_len, length); 705 706 nseg = data_transferred >> PAGE_SHIFT; 707 modulo = data_transferred & (PAGE_SIZE - 1); 708 unaligned = phys_addr & (PAGE_SIZE - 1); 709 if (modulo || unaligned) 710 nseg += 1 + ((modulo + unaligned - 1) >> PAGE_SHIFT); 711 712 if (total_nseg == 0) { 713 req->cmd.psdt = NVME_PSDT_PRP; 714 req->cmd.dptr.prp.prp1 = phys_addr; 715 } 716 717 total_nseg += nseg; 718 sge_count++; 719 remaining_transfer_len -= data_transferred; 720 721 if (total_nseg == 2) { 722 if (sge_count == 1) 723 tr->req->cmd.dptr.prp.prp2 = phys_addr + 724 PAGE_SIZE - unaligned; 725 else if (sge_count == 2) 726 tr->req->cmd.dptr.prp.prp2 = phys_addr; 727 /* save prp2 value */ 728 prp2 = tr->req->cmd.dptr.prp.prp2; 729 } else if (total_nseg > 2) { 730 if (sge_count == 1) 731 cur_nseg = 1; 732 else 733 cur_nseg = 0; 734 735 tr->req->cmd.dptr.prp.prp2 = 736 (uint64_t)tr->prp_sgl_bus_addr; 737 738 while (cur_nseg < nseg) { 739 if (prp2) { 740 tr->u.prp[0] = prp2; 741 tr->u.prp[last_nseg + 1] = phys_addr + 742 cur_nseg * PAGE_SIZE - unaligned; 743 } else { 744 tr->u.prp[last_nseg] = phys_addr + 745 cur_nseg * PAGE_SIZE - unaligned; 746 } 747 last_nseg++; 748 cur_nseg++; 749 } 750 } 751 } 752 753 return 0; 754 } 755 756 static void _nvme_qpair_admin_qpair_enable(struct nvme_qpair *qpair) 757 { 758 struct nvme_tracker *tr, *tr_temp; 759 760 /* 761 * Manually abort each outstanding admin command. Do not retry 762 * admin commands found here, since they will be left over from 763 * a controller reset and its likely the context in which the 764 * command was issued no longer applies. 765 */ 766 LIST_FOREACH_SAFE(tr, &qpair->outstanding_tr, list, tr_temp) { 767 nvme_info("Aborting outstanding admin command\n"); 768 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, 769 NVME_SC_ABORTED_BY_REQUEST, 770 1 /* do not retry */, true); 771 } 772 773 qpair->enabled = true; 774 } 775 776 static void _nvme_qpair_io_qpair_enable(struct nvme_qpair *qpair) 777 { 778 struct nvme_tracker *tr, *temp; 779 struct nvme_request *req; 780 781 qpair->enabled = true; 782 783 qpair->ctrlr->enabled_io_qpairs++; 784 785 /* Manually abort each queued I/O. */ 786 while (!STAILQ_EMPTY(&qpair->queued_req)) { 787 req = STAILQ_FIRST(&qpair->queued_req); 788 STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq); 789 nvme_info("Aborting queued I/O command\n"); 790 nvme_qpair_manual_complete_request(qpair, req, NVME_SCT_GENERIC, 791 NVME_SC_ABORTED_BY_REQUEST, 792 true); 793 } 794 795 /* Manually abort each outstanding I/O. */ 796 LIST_FOREACH_SAFE(tr, &qpair->outstanding_tr, list, temp) { 797 nvme_info("Aborting outstanding I/O command\n"); 798 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, 799 NVME_SC_ABORTED_BY_REQUEST, 800 0, true); 801 } 802 } 803 804 static inline void _nvme_qpair_admin_qpair_disable(struct nvme_qpair *qpair) 805 { 806 qpair->enabled = false; 807 nvme_qpair_abort_aers(qpair); 808 } 809 810 static inline void _nvme_qpair_io_qpair_disable(struct nvme_qpair *qpair) 811 { 812 qpair->enabled = false; 813 814 qpair->ctrlr->enabled_io_qpairs--; 815 } 816 817 /* 818 * Reserve room for the submission queue 819 * in the controller memory buffer 820 */ 821 static int nvme_ctrlr_reserve_sq_in_cmb(struct nvme_ctrlr *ctrlr, 822 uint16_t entries, 823 uint64_t aligned, uint64_t *offset) 824 { 825 uint64_t round_offset; 826 const uint64_t length = entries * sizeof(struct nvme_cmd); 827 828 round_offset = ctrlr->cmb_current_offset; 829 round_offset = (round_offset + (aligned - 1)) & ~(aligned - 1); 830 831 if (round_offset + length > ctrlr->cmb_size) 832 return -1; 833 834 *offset = round_offset; 835 ctrlr->cmb_current_offset = round_offset + length; 836 837 return 0; 838 } 839 840 /* 841 * Initialize a queue pair on the host side. 842 */ 843 int nvme_qpair_construct(struct nvme_ctrlr *ctrlr, struct nvme_qpair *qpair, 844 enum nvme_qprio qprio, 845 uint16_t entries, uint16_t trackers) 846 { 847 volatile uint32_t *doorbell_base; 848 struct nvme_tracker *tr; 849 uint64_t offset; 850 unsigned long phys_addr = 0; 851 uint16_t i; 852 int ret; 853 854 nvme_assert(entries != 0, "Invalid number of entries\n"); 855 nvme_assert(trackers != 0, "Invalid trackers\n"); 856 857 pthread_mutex_init(&qpair->lock, NULL); 858 859 qpair->entries = entries; 860 qpair->trackers = trackers; 861 qpair->qprio = qprio; 862 qpair->sq_in_cmb = false; 863 qpair->ctrlr = ctrlr; 864 865 if (ctrlr->opts.use_cmb_sqs) { 866 /* 867 * Reserve room for the submission queue in ctrlr 868 * memory buffer. 869 */ 870 ret = nvme_ctrlr_reserve_sq_in_cmb(ctrlr, entries, 871 PAGE_SIZE, 872 &offset); 873 if (ret == 0) { 874 875 qpair->cmd = ctrlr->cmb_bar_virt_addr + offset; 876 qpair->cmd_bus_addr = ctrlr->cmb_bar_phys_addr + offset; 877 qpair->sq_in_cmb = true; 878 879 nvme_debug("Allocated qpair %d cmd in cmb at %p / 0x%llx\n", 880 qpair->id, 881 qpair->cmd, qpair->cmd_bus_addr); 882 883 } 884 } 885 886 if (qpair->sq_in_cmb == false) { 887 888 qpair->cmd = 889 nvme_mem_alloc_node(sizeof(struct nvme_cmd) * entries, 890 PAGE_SIZE, NVME_NODE_ID_ANY, 891 (unsigned long *) &qpair->cmd_bus_addr); 892 if (!qpair->cmd) { 893 nvme_err("Allocate qpair commands failed\n"); 894 goto fail; 895 } 896 memset(qpair->cmd, 0, sizeof(struct nvme_cmd) * entries); 897 898 nvme_debug("Allocated qpair %d cmd %p / 0x%llx\n", 899 qpair->id, 900 qpair->cmd, qpair->cmd_bus_addr); 901 } 902 903 qpair->cpl = nvme_mem_alloc_node(sizeof(struct nvme_cpl) * entries, 904 PAGE_SIZE, NVME_NODE_ID_ANY, 905 (unsigned long *) &qpair->cpl_bus_addr); 906 if (!qpair->cpl) { 907 nvme_err("Allocate qpair completions failed\n"); 908 goto fail; 909 } 910 memset(qpair->cpl, 0, sizeof(struct nvme_cpl) * entries); 911 912 nvme_debug("Allocated qpair %d cpl at %p / 0x%llx\n", 913 qpair->id, 914 qpair->cpl, 915 qpair->cpl_bus_addr); 916 917 doorbell_base = &ctrlr->regs->doorbell[0].sq_tdbl; 918 qpair->sq_tdbl = doorbell_base + 919 (2 * qpair->id + 0) * ctrlr->doorbell_stride_u32; 920 qpair->cq_hdbl = doorbell_base + 921 (2 * qpair->id + 1) * ctrlr->doorbell_stride_u32; 922 923 LIST_INIT(&qpair->free_tr); 924 LIST_INIT(&qpair->outstanding_tr); 925 STAILQ_INIT(&qpair->free_req); 926 STAILQ_INIT(&qpair->queued_req); 927 928 /* Request pool */ 929 if (nvme_request_pool_construct(qpair)) { 930 nvme_err("Create request pool failed\n"); 931 goto fail; 932 } 933 934 /* 935 * Reserve space for all of the trackers in a single allocation. 936 * struct nvme_tracker must be padded so that its size is already 937 * a power of 2. This ensures the PRP list embedded in the nvme_tracker 938 * object will not span a 4KB boundary, while allowing access to 939 * trackers in tr[] via normal array indexing. 940 */ 941 qpair->tr = nvme_mem_alloc_node(sizeof(struct nvme_tracker) * trackers, 942 sizeof(struct nvme_tracker), 943 NVME_NODE_ID_ANY, &phys_addr); 944 if (!qpair->tr) { 945 nvme_err("Allocate request trackers failed\n"); 946 goto fail; 947 } 948 memset(qpair->tr, 0, sizeof(struct nvme_tracker) * trackers); 949 950 nvme_debug("Allocated qpair %d trackers at %p / 0x%lx\n", 951 qpair->id, qpair->tr, phys_addr); 952 953 for (i = 0; i < trackers; i++) { 954 tr = &qpair->tr[i]; 955 nvme_qpair_construct_tracker(tr, i, phys_addr); 956 LIST_INSERT_HEAD(&qpair->free_tr, tr, list); 957 phys_addr += sizeof(struct nvme_tracker); 958 } 959 960 nvme_qpair_reset(qpair); 961 962 return 0; 963 964 fail: 965 nvme_qpair_destroy(qpair); 966 967 return -1; 968 } 969 970 void nvme_qpair_destroy(struct nvme_qpair *qpair) 971 { 972 if (!qpair->ctrlr) 973 return; // Not initialized. 974 975 if (nvme_qpair_is_admin_queue(qpair)) 976 _nvme_qpair_admin_qpair_destroy(qpair); 977 978 if (qpair->cmd && !qpair->sq_in_cmb) { 979 nvme_free(qpair->cmd); 980 qpair->cmd = NULL; 981 } 982 if (qpair->cpl) { 983 nvme_free(qpair->cpl); 984 qpair->cpl = NULL; 985 } 986 if (qpair->tr) { 987 nvme_free(qpair->tr); 988 qpair->tr = NULL; 989 } 990 nvme_request_pool_destroy(qpair); 991 992 qpair->ctrlr = NULL; 993 994 pthread_mutex_destroy(&qpair->lock); 995 } 996 997 static bool nvme_qpair_enabled(struct nvme_qpair *qpair) 998 { 999 if (!qpair->enabled && !qpair->ctrlr->resetting) 1000 nvme_qpair_enable(qpair); 1001 1002 return qpair->enabled; 1003 } 1004 1005 int nvme_qpair_submit_request(struct nvme_qpair *qpair, 1006 struct nvme_request *req) 1007 { 1008 struct nvme_tracker *tr; 1009 struct nvme_request *child_req, *tmp; 1010 struct nvme_ctrlr *ctrlr = qpair->ctrlr; 1011 bool child_req_failed = false; 1012 int ret = 0; 1013 1014 if (ctrlr->failed) { 1015 nvme_request_free(req); 1016 return ENXIO; 1017 } 1018 1019 nvme_qpair_enabled(qpair); 1020 1021 if (req->child_reqs) { 1022 1023 /* 1024 * This is a splitted (parent) request. Submit all of the 1025 * children but not the parent request itself, since the 1026 * parent is the original unsplit request. 1027 */ 1028 TAILQ_FOREACH_SAFE(child_req, &req->children, child_tailq, tmp) { 1029 if (!child_req_failed) { 1030 ret = nvme_qpair_submit_request(qpair, child_req); 1031 if (ret != 0) 1032 child_req_failed = true; 1033 } else { 1034 /* free remaining child_reqs since 1035 * one child_req fails */ 1036 nvme_request_remove_child(req, child_req); 1037 nvme_request_free(child_req); 1038 } 1039 } 1040 1041 return ret; 1042 } 1043 1044 pthread_mutex_lock(&qpair->lock); 1045 1046 tr = LIST_FIRST(&qpair->free_tr); 1047 if (tr == NULL || !qpair->enabled || !STAILQ_EMPTY(&qpair->queued_req)) { 1048 /* 1049 * No tracker is available, the qpair is disabled due 1050 * to an in-progress controller-level reset, or 1051 * there are already queued requests. 1052 * 1053 * Put the request on the qpair's request queue to be 1054 * processed when a tracker frees up via a command 1055 * completion or when the controller reset is completed. 1056 */ 1057 STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq); 1058 pthread_mutex_unlock(&qpair->lock); 1059 1060 if (tr) 1061 nvme_qpair_submit_queued_requests(qpair); 1062 return 0; 1063 } 1064 1065 /* remove tr from free_tr */ 1066 LIST_REMOVE(tr, list); 1067 LIST_INSERT_HEAD(&qpair->outstanding_tr, tr, list); 1068 tr->req = req; 1069 req->cmd.cid = tr->cid; 1070 1071 if (req->payload_size == 0) { 1072 /* Null payload - leave PRP fields zeroed */ 1073 ret = 0; 1074 } else if (req->payload.type == NVME_PAYLOAD_TYPE_CONTIG) { 1075 ret = _nvme_qpair_build_contig_request(qpair, req, tr); 1076 } else if (req->payload.type == NVME_PAYLOAD_TYPE_SGL) { 1077 if (ctrlr->flags & NVME_CTRLR_SGL_SUPPORTED) 1078 ret = _nvme_qpair_build_hw_sgl_request(qpair, req, tr); 1079 else 1080 ret = _nvme_qpair_build_prps_sgl_request(qpair, req, tr); 1081 } else { 1082 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, 1083 NVME_SC_INVALID_FIELD, 1084 1 /* do not retry */, true); 1085 ret = -EINVAL; 1086 } 1087 1088 if (ret == 0) 1089 nvme_qpair_submit_tracker(qpair, tr); 1090 1091 pthread_mutex_unlock(&qpair->lock); 1092 1093 return ret; 1094 } 1095 1096 /* 1097 * Poll for completion of NVMe commands submitted to the 1098 * specified I/O queue pair. 1099 */ 1100 unsigned int nvme_qpair_poll(struct nvme_qpair *qpair, 1101 unsigned int max_completions) 1102 { 1103 struct nvme_tracker *tr; 1104 struct nvme_cpl *cpl; 1105 uint32_t num_completions = 0; 1106 1107 if (!nvme_qpair_enabled(qpair)) 1108 /* 1109 * qpair is not enabled, likely because a controller reset is 1110 * is in progress. Ignore the interrupt - any I/O that was 1111 * associated with this interrupt will get retried when the 1112 * reset is complete. 1113 */ 1114 return 0; 1115 1116 if ((max_completions == 0) || 1117 (max_completions > (qpair->entries - 1U))) 1118 /* 1119 * max_completions == 0 means unlimited, but complete at most 1120 * one queue depth batch of I/O at a time so that the completion 1121 * queue doorbells don't wrap around. 1122 */ 1123 max_completions = qpair->entries - 1; 1124 1125 pthread_mutex_lock(&qpair->lock); 1126 1127 while (1) { 1128 1129 cpl = &qpair->cpl[qpair->cq_head]; 1130 if (cpl->status.p != qpair->phase) 1131 break; 1132 1133 tr = &qpair->tr[cpl->cid]; 1134 if (tr->active) { 1135 nvme_qpair_complete_tracker(qpair, tr, cpl, true); 1136 } else { 1137 nvme_info("cpl does not map to outstanding cmd\n"); 1138 nvme_qpair_print_completion(qpair, cpl); 1139 nvme_panic("received completion for unknown cmd\n"); 1140 } 1141 1142 if (++qpair->cq_head == qpair->entries) { 1143 qpair->cq_head = 0; 1144 qpair->phase = !qpair->phase; 1145 } 1146 1147 if (++num_completions == max_completions) 1148 break; 1149 } 1150 1151 if (num_completions > 0) 1152 nvme_mmio_write_4(qpair->cq_hdbl, qpair->cq_head); 1153 1154 pthread_mutex_unlock(&qpair->lock); 1155 1156 if (!STAILQ_EMPTY(&qpair->queued_req)) 1157 nvme_qpair_submit_queued_requests(qpair); 1158 1159 return num_completions; 1160 } 1161 1162 void nvme_qpair_reset(struct nvme_qpair *qpair) 1163 { 1164 pthread_mutex_lock(&qpair->lock); 1165 1166 qpair->sq_tail = qpair->cq_head = 0; 1167 1168 /* 1169 * First time through the completion queue, HW will set phase 1170 * bit on completions to 1. So set this to 1 here, indicating 1171 * we're looking for a 1 to know which entries have completed. 1172 * we'll toggle the bit each time when the completion queue rolls over. 1173 */ 1174 qpair->phase = 1; 1175 1176 memset(qpair->cmd, 0, qpair->entries * sizeof(struct nvme_cmd)); 1177 memset(qpair->cpl, 0, qpair->entries * sizeof(struct nvme_cpl)); 1178 1179 pthread_mutex_unlock(&qpair->lock); 1180 } 1181 1182 void nvme_qpair_enable(struct nvme_qpair *qpair) 1183 { 1184 pthread_mutex_lock(&qpair->lock); 1185 1186 if (nvme_qpair_is_io_queue(qpair)) 1187 _nvme_qpair_io_qpair_enable(qpair); 1188 else 1189 _nvme_qpair_admin_qpair_enable(qpair); 1190 1191 pthread_mutex_unlock(&qpair->lock); 1192 } 1193 1194 void nvme_qpair_disable(struct nvme_qpair *qpair) 1195 { 1196 pthread_mutex_lock(&qpair->lock); 1197 1198 if (nvme_qpair_is_io_queue(qpair)) 1199 _nvme_qpair_io_qpair_disable(qpair); 1200 else 1201 _nvme_qpair_admin_qpair_disable(qpair); 1202 1203 pthread_mutex_unlock(&qpair->lock); 1204 } 1205 1206 void nvme_qpair_fail(struct nvme_qpair *qpair) 1207 { 1208 struct nvme_tracker *tr; 1209 struct nvme_request *req; 1210 1211 pthread_mutex_lock(&qpair->lock); 1212 1213 while (!STAILQ_EMPTY(&qpair->queued_req)) { 1214 1215 nvme_notice("Failing queued I/O command\n"); 1216 req = STAILQ_FIRST(&qpair->queued_req); 1217 STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq); 1218 nvme_qpair_manual_complete_request(qpair, req, NVME_SCT_GENERIC, 1219 NVME_SC_ABORTED_BY_REQUEST, 1220 true); 1221 1222 } 1223 1224 /* Manually abort each outstanding I/O. */ 1225 while (!LIST_EMPTY(&qpair->outstanding_tr)) { 1226 1227 /* 1228 * Do not remove the tracker. The abort_tracker path 1229 * will do that for us. 1230 */ 1231 nvme_notice("Failing outstanding I/O command\n"); 1232 tr = LIST_FIRST(&qpair->outstanding_tr); 1233 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, 1234 NVME_SC_ABORTED_BY_REQUEST, 1235 1, true); 1236 1237 } 1238 1239 pthread_mutex_unlock(&qpair->lock); 1240 } 1241 1242