1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "nvme_internal.h" 35 36 struct nvme_qpair_string { 37 uint16_t value; 38 const char *str; 39 }; 40 41 static const struct nvme_qpair_string admin_opcode[] = { 42 { NVME_OPC_DELETE_IO_SQ, "DELETE IO SQ" }, 43 { NVME_OPC_CREATE_IO_SQ, "CREATE IO SQ" }, 44 { NVME_OPC_GET_LOG_PAGE, "GET LOG PAGE" }, 45 { NVME_OPC_DELETE_IO_CQ, "DELETE IO CQ" }, 46 { NVME_OPC_CREATE_IO_CQ, "CREATE IO CQ" }, 47 { NVME_OPC_IDENTIFY, "IDENTIFY" }, 48 { NVME_OPC_ABORT, "ABORT" }, 49 { NVME_OPC_SET_FEATURES, "SET FEATURES" }, 50 { NVME_OPC_GET_FEATURES, "GET FEATURES" }, 51 { NVME_OPC_ASYNC_EVENT_REQUEST, "ASYNC EVENT REQUEST" }, 52 { NVME_OPC_NS_MANAGEMENT, "NAMESPACE MANAGEMENT" }, 53 { NVME_OPC_FIRMWARE_COMMIT, "FIRMWARE COMMIT" }, 54 { NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD, "FIRMWARE IMAGE DOWNLOAD" }, 55 { NVME_OPC_NS_ATTACHMENT, "NAMESPACE ATTACHMENT" }, 56 { NVME_OPC_FORMAT_NVM, "FORMAT NVM" }, 57 { NVME_OPC_SECURITY_SEND, "SECURITY SEND" }, 58 { NVME_OPC_SECURITY_RECEIVE, "SECURITY RECEIVE" }, 59 { 0xFFFF, "ADMIN COMMAND" } 60 }; 61 62 static const struct nvme_qpair_string io_opcode[] = { 63 { NVME_OPC_FLUSH, "FLUSH" }, 64 { NVME_OPC_WRITE, "WRITE" }, 65 { NVME_OPC_READ, "READ" }, 66 { NVME_OPC_WRITE_UNCORRECTABLE, "WRITE UNCORRECTABLE" }, 67 { NVME_OPC_COMPARE, "COMPARE" }, 68 { NVME_OPC_WRITE_ZEROES, "WRITE ZEROES" }, 69 { NVME_OPC_DATASET_MANAGEMENT, "DATASET MANAGEMENT" }, 70 { NVME_OPC_RESERVATION_REGISTER, "RESERVATION REGISTER" }, 71 { NVME_OPC_RESERVATION_REPORT, "RESERVATION REPORT" }, 72 { NVME_OPC_RESERVATION_ACQUIRE, "RESERVATION ACQUIRE" }, 73 { NVME_OPC_RESERVATION_RELEASE, "RESERVATION RELEASE" }, 74 { 0xFFFF, "IO COMMAND" } 75 }; 76 77 static const struct nvme_qpair_string generic_status[] = { 78 { NVME_SC_SUCCESS, "SUCCESS" }, 79 { NVME_SC_INVALID_OPCODE, "INVALID OPCODE" }, 80 { NVME_SC_INVALID_FIELD, "INVALID FIELD" }, 81 { NVME_SC_COMMAND_ID_CONFLICT, "COMMAND ID CONFLICT" }, 82 { NVME_SC_DATA_TRANSFER_ERROR, "DATA TRANSFER ERROR" }, 83 { NVME_SC_ABORTED_POWER_LOSS, "ABORTED - POWER LOSS" }, 84 { NVME_SC_INTERNAL_DEVICE_ERROR, "INTERNAL DEVICE ERROR" }, 85 { NVME_SC_ABORTED_BY_REQUEST, "ABORTED - BY REQUEST" }, 86 { NVME_SC_ABORTED_SQ_DELETION, "ABORTED - SQ DELETION" }, 87 { NVME_SC_ABORTED_FAILED_FUSED, "ABORTED - FAILED FUSED" }, 88 { NVME_SC_ABORTED_MISSING_FUSED, "ABORTED - MISSING FUSED" }, 89 { NVME_SC_INVALID_NAMESPACE_OR_FORMAT, "INVALID NAMESPACE OR FORMAT" }, 90 { NVME_SC_COMMAND_SEQUENCE_ERROR, "COMMAND SEQUENCE ERROR" }, 91 { NVME_SC_INVALID_SGL_SEG_DESCRIPTOR, "INVALID SGL SEGMENT DESCRIPTOR" }, 92 { NVME_SC_INVALID_NUM_SGL_DESCIRPTORS, "INVALID NUMBER OF SGL DESCRIPTORS" }, 93 { NVME_SC_DATA_SGL_LENGTH_INVALID, "DATA SGL LENGTH INVALID" }, 94 { NVME_SC_METADATA_SGL_LENGTH_INVALID, "METADATA SGL LENGTH INVALID" }, 95 { NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID, "SGL DESCRIPTOR TYPE INVALID" }, 96 { NVME_SC_INVALID_CONTROLLER_MEM_BUF, "INVALID CONTROLLER MEMORY BUFFER" }, 97 { NVME_SC_INVALID_PRP_OFFSET, "INVALID PRP OFFSET" }, 98 { NVME_SC_ATOMIC_WRITE_UNIT_EXCEEDED, "ATOMIC WRITE UNIT EXCEEDED" }, 99 { NVME_SC_LBA_OUT_OF_RANGE, "LBA OUT OF RANGE" }, 100 { NVME_SC_CAPACITY_EXCEEDED, "CAPACITY EXCEEDED" }, 101 { NVME_SC_NAMESPACE_NOT_READY, "NAMESPACE NOT READY" }, 102 { NVME_SC_RESERVATION_CONFLICT, "RESERVATION CONFLICT" }, 103 { NVME_SC_FORMAT_IN_PROGRESS, "FORMAT IN PROGRESS" }, 104 { 0xFFFF, "GENERIC" } 105 }; 106 107 static const struct nvme_qpair_string command_specific_status[] = { 108 { NVME_SC_COMPLETION_QUEUE_INVALID, "INVALID COMPLETION QUEUE" }, 109 { NVME_SC_INVALID_QUEUE_IDENTIFIER, "INVALID QUEUE IDENTIFIER" }, 110 { NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED, "MAX QUEUE SIZE EXCEEDED" }, 111 { NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED, "ABORT CMD LIMIT EXCEEDED" }, 112 { NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED,"ASYNC LIMIT EXCEEDED" }, 113 { NVME_SC_INVALID_FIRMWARE_SLOT, "INVALID FIRMWARE SLOT" }, 114 { NVME_SC_INVALID_FIRMWARE_IMAGE, "INVALID FIRMWARE IMAGE" }, 115 { NVME_SC_INVALID_INTERRUPT_VECTOR, "INVALID INTERRUPT VECTOR" }, 116 { NVME_SC_INVALID_LOG_PAGE, "INVALID LOG PAGE" }, 117 { NVME_SC_INVALID_FORMAT, "INVALID FORMAT" }, 118 { NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET,"FIRMWARE REQUIRES CONVENTIONAL RESET" }, 119 { NVME_SC_INVALID_QUEUE_DELETION, "INVALID QUEUE DELETION" }, 120 { NVME_SC_FEATURE_ID_NOT_SAVEABLE, "FEATURE ID NOT SAVEABLE" }, 121 { NVME_SC_FEATURE_NOT_CHANGEABLE, "FEATURE NOT CHANGEABLE" }, 122 { NVME_SC_FEATURE_NOT_NAMESPACE_SPECIFIC,"FEATURE NOT NAMESPACE SPECIFIC" }, 123 { NVME_SC_FIRMWARE_REQ_NVM_RESET, "FIRMWARE REQUIRES NVM RESET" }, 124 { NVME_SC_FIRMWARE_REQ_RESET, "FIRMWARE REQUIRES RESET" }, 125 { NVME_SC_FIRMWARE_REQ_MAX_TIME_VIOLATION,"FIRMWARE REQUIRES MAX TIME VIOLATION" }, 126 { NVME_SC_FIRMWARE_ACTIVATION_PROHIBITED,"FIRMWARE ACTIVATION PROHIBITED" }, 127 { NVME_SC_OVERLAPPING_RANGE, "OVERLAPPING RANGE" }, 128 { NVME_SC_NAMESPACE_INSUFFICIENT_CAPACITY,"NAMESPACE INSUFFICIENT CAPACITY" }, 129 { NVME_SC_NAMESPACE_ID_UNAVAILABLE, "NAMESPACE ID UNAVAILABLE" }, 130 { NVME_SC_NAMESPACE_ALREADY_ATTACHED, "NAMESPACE ALREADY ATTACHED" }, 131 { NVME_SC_NAMESPACE_IS_PRIVATE, "NAMESPACE IS PRIVATE" }, 132 { NVME_SC_NAMESPACE_NOT_ATTACHED, "NAMESPACE NOT ATTACHED" }, 133 { NVME_SC_THINPROVISIONING_NOT_SUPPORTED,"THINPROVISIONING NOT SUPPORTED" }, 134 { NVME_SC_CONTROLLER_LIST_INVALID, "CONTROLLER LIST INVALID" }, 135 { NVME_SC_CONFLICTING_ATTRIBUTES, "CONFLICTING ATTRIBUTES" }, 136 { NVME_SC_INVALID_PROTECTION_INFO, "INVALID PROTECTION INFO" }, 137 { NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE, "WRITE TO RO PAGE" }, 138 { 0xFFFF, "COMMAND SPECIFIC" } 139 }; 140 141 static const struct nvme_qpair_string media_error_status[] = { 142 { NVME_SC_WRITE_FAULTS, "WRITE FAULTS" }, 143 { NVME_SC_UNRECOVERED_READ_ERROR, "UNRECOVERED READ ERROR" }, 144 { NVME_SC_GUARD_CHECK_ERROR, "GUARD CHECK ERROR" }, 145 { NVME_SC_APPLICATION_TAG_CHECK_ERROR, "APPLICATION TAG CHECK ERROR" }, 146 { NVME_SC_REFERENCE_TAG_CHECK_ERROR, "REFERENCE TAG CHECK ERROR" }, 147 { NVME_SC_COMPARE_FAILURE, "COMPARE FAILURE" }, 148 { NVME_SC_ACCESS_DENIED, "ACCESS DENIED" }, 149 { NVME_SC_DEALLOCATED_OR_UNWRITTEN_BLOCK, "DEALLOCATED OR UNWRITTEN BLOCK" }, 150 { 0xFFFF, "MEDIA ERROR" } 151 }; 152 153 static inline bool nvme_qpair_is_admin_queue(struct nvme_qpair *qpair) 154 { 155 return qpair->id == 0; 156 } 157 158 static inline bool nvme_qpair_is_io_queue(struct nvme_qpair *qpair) 159 { 160 return qpair->id != 0; 161 } 162 163 static const char*nvme_qpair_get_string(const struct nvme_qpair_string *strings, 164 uint16_t value) 165 { 166 const struct nvme_qpair_string *entry; 167 168 entry = strings; 169 170 while (entry->value != 0xFFFF) { 171 if (entry->value == value) 172 return entry->str; 173 entry++; 174 } 175 return entry->str; 176 } 177 178 static void nvme_qpair_admin_qpair_print_command(struct nvme_qpair *qpair, 179 struct nvme_cmd *cmd) 180 { 181 nvme_info("%s (%02x) sqid:%d cid:%d nsid:%x cdw10:%08x cdw11:%08x\n", 182 nvme_qpair_get_string(admin_opcode, cmd->opc), cmd->opc, 183 qpair->id, cmd->cid, 184 cmd->nsid, cmd->cdw10, cmd->cdw11); 185 } 186 187 static void nvme_qpair_io_qpair_print_command(struct nvme_qpair *qpair, 188 struct nvme_cmd *cmd) 189 { 190 nvme_assert(qpair != NULL, "print_command: qpair == NULL\n"); 191 nvme_assert(cmd != NULL, "print_command: cmd == NULL\n"); 192 193 switch ((int)cmd->opc) { 194 case NVME_OPC_WRITE: 195 case NVME_OPC_READ: 196 case NVME_OPC_WRITE_UNCORRECTABLE: 197 case NVME_OPC_COMPARE: 198 nvme_info("%s sqid:%d cid:%d nsid:%d lba:%llu len:%d\n", 199 nvme_qpair_get_string(io_opcode, cmd->opc), 200 qpair->id, cmd->cid, cmd->nsid, 201 ((unsigned long long)cmd->cdw11 << 32) + cmd->cdw10, 202 (cmd->cdw12 & 0xFFFF) + 1); 203 break; 204 case NVME_OPC_FLUSH: 205 case NVME_OPC_DATASET_MANAGEMENT: 206 nvme_info("%s sqid:%d cid:%d nsid:%d\n", 207 nvme_qpair_get_string(io_opcode, cmd->opc), 208 qpair->id, cmd->cid, cmd->nsid); 209 break; 210 default: 211 nvme_info("%s (%02x) sqid:%d cid:%d nsid:%d\n", 212 nvme_qpair_get_string(io_opcode, cmd->opc), 213 cmd->opc, qpair->id, cmd->cid, cmd->nsid); 214 break; 215 } 216 } 217 218 static void nvme_qpair_print_command(struct nvme_qpair *qpair, 219 struct nvme_cmd *cmd) 220 { 221 nvme_assert(qpair != NULL, "qpair can not be NULL"); 222 nvme_assert(cmd != NULL, "cmd can not be NULL"); 223 224 if (nvme_qpair_is_admin_queue(qpair)) 225 return nvme_qpair_admin_qpair_print_command(qpair, cmd); 226 227 return nvme_qpair_io_qpair_print_command(qpair, cmd); 228 } 229 230 static const char *get_status_string(uint16_t sct, uint16_t sc) 231 { 232 const struct nvme_qpair_string *entry; 233 234 switch (sct) { 235 case NVME_SCT_GENERIC: 236 entry = generic_status; 237 break; 238 case NVME_SCT_COMMAND_SPECIFIC: 239 entry = command_specific_status; 240 break; 241 case NVME_SCT_MEDIA_ERROR: 242 entry = media_error_status; 243 break; 244 case NVME_SCT_VENDOR_SPECIFIC: 245 return "VENDOR SPECIFIC"; 246 default: 247 return "RESERVED"; 248 } 249 250 return nvme_qpair_get_string(entry, sc); 251 } 252 253 static void nvme_qpair_print_completion(struct nvme_qpair *qpair, 254 struct nvme_cpl *cpl) 255 { 256 nvme_info("Cpl: %s (%02x/%02x) sqid:%d cid:%d " 257 "cdw0:%x sqhd:%04x p:%x m:%x dnr:%x\n", 258 get_status_string(cpl->status.sct, cpl->status.sc), 259 cpl->status.sct, 260 cpl->status.sc, 261 cpl->sqid, 262 cpl->cid, 263 cpl->cdw0, 264 cpl->sqhd, 265 cpl->status.p, 266 cpl->status.m, 267 cpl->status.dnr); 268 } 269 270 static bool nvme_qpair_completion_retry(const struct nvme_cpl *cpl) 271 { 272 /* 273 * TODO: spec is not clear how commands that are aborted due 274 * to TLER will be marked. So for now, it seems 275 * NAMESPACE_NOT_READY is the only case where we should 276 * look at the DNR bit. 277 */ 278 switch ((int)cpl->status.sct) { 279 case NVME_SCT_GENERIC: 280 switch ((int)cpl->status.sc) { 281 case NVME_SC_NAMESPACE_NOT_READY: 282 case NVME_SC_FORMAT_IN_PROGRESS: 283 if (cpl->status.dnr) 284 return false; 285 return true; 286 case NVME_SC_INVALID_OPCODE: 287 case NVME_SC_INVALID_FIELD: 288 case NVME_SC_COMMAND_ID_CONFLICT: 289 case NVME_SC_DATA_TRANSFER_ERROR: 290 case NVME_SC_ABORTED_POWER_LOSS: 291 case NVME_SC_INTERNAL_DEVICE_ERROR: 292 case NVME_SC_ABORTED_BY_REQUEST: 293 case NVME_SC_ABORTED_SQ_DELETION: 294 case NVME_SC_ABORTED_FAILED_FUSED: 295 case NVME_SC_ABORTED_MISSING_FUSED: 296 case NVME_SC_INVALID_NAMESPACE_OR_FORMAT: 297 case NVME_SC_COMMAND_SEQUENCE_ERROR: 298 case NVME_SC_LBA_OUT_OF_RANGE: 299 case NVME_SC_CAPACITY_EXCEEDED: 300 default: 301 return false; 302 } 303 case NVME_SCT_COMMAND_SPECIFIC: 304 case NVME_SCT_MEDIA_ERROR: 305 case NVME_SCT_VENDOR_SPECIFIC: 306 default: 307 return false; 308 } 309 } 310 311 static void nvme_qpair_construct_tracker(struct nvme_tracker *tr, 312 uint16_t cid, uint64_t phys_addr) 313 { 314 tr->prp_sgl_bus_addr = phys_addr + offsetof(struct nvme_tracker, u.prp); 315 tr->cid = cid; 316 tr->active = false; 317 } 318 319 static inline void nvme_qpair_copy_command(struct nvme_cmd *dst, 320 const struct nvme_cmd *src) 321 { 322 /* dst and src are known to be non-overlapping and 64-byte aligned. */ 323 #if defined(__AVX__) 324 __m256i *d256 = (__m256i *)dst; 325 const __m256i *s256 = (const __m256i *)src; 326 327 _mm256_store_si256(&d256[0], _mm256_load_si256(&s256[0])); 328 _mm256_store_si256(&d256[1], _mm256_load_si256(&s256[1])); 329 #elif defined(__SSE2__) 330 __m128i *d128 = (__m128i *)dst; 331 const __m128i *s128 = (const __m128i *)src; 332 333 _mm_store_si128(&d128[0], _mm_load_si128(&s128[0])); 334 _mm_store_si128(&d128[1], _mm_load_si128(&s128[1])); 335 _mm_store_si128(&d128[2], _mm_load_si128(&s128[2])); 336 _mm_store_si128(&d128[3], _mm_load_si128(&s128[3])); 337 #else 338 *dst = *src; 339 #endif 340 } 341 342 static void nvme_qpair_submit_tracker(struct nvme_qpair *qpair, 343 struct nvme_tracker *tr) 344 { 345 struct nvme_request *req = tr->req; 346 347 /* 348 * Set the tracker active and copy its command 349 * to the submission queue. 350 */ 351 nvme_debug("qpair %d: Submit command, tail %d, cid %d / %d\n", 352 qpair->id, 353 (int)qpair->sq_tail, 354 (int)tr->cid, 355 (int)tr->req->cmd.cid); 356 357 qpair->tr[tr->cid].active = true; 358 nvme_qpair_copy_command(&qpair->cmd[qpair->sq_tail], &req->cmd); 359 360 if (++qpair->sq_tail == qpair->entries) 361 qpair->sq_tail = 0; 362 363 nvme_wmb(); 364 nvme_mmio_write_4(qpair->sq_tdbl, qpair->sq_tail); 365 } 366 367 static void nvme_qpair_complete_tracker(struct nvme_qpair *qpair, 368 struct nvme_tracker *tr, 369 struct nvme_cpl *cpl, 370 bool print_on_error) 371 { 372 struct nvme_request *req = tr->req; 373 bool retry, error; 374 375 if (!req) { 376 nvme_crit("tracker has no request\n"); 377 qpair->tr[cpl->cid].active = false; 378 goto done; 379 } 380 381 error = nvme_cpl_is_error(cpl); 382 retry = error && nvme_qpair_completion_retry(cpl) && 383 (req->retries < NVME_MAX_RETRY_COUNT); 384 if (error && print_on_error) { 385 nvme_qpair_print_command(qpair, &req->cmd); 386 nvme_qpair_print_completion(qpair, cpl); 387 } 388 389 qpair->tr[cpl->cid].active = false; 390 391 if (cpl->cid != req->cmd.cid) 392 nvme_crit("cpl and command CID mismatch (%d / %d)\n", 393 (int)cpl->cid, (int)req->cmd.cid); 394 395 if (retry) { 396 req->retries++; 397 nvme_qpair_submit_tracker(qpair, tr); 398 return; 399 } 400 401 if (req->cb_fn) 402 req->cb_fn(req->cb_arg, cpl); 403 404 nvme_request_free_locked(req); 405 406 done: 407 tr->req = NULL; 408 409 LIST_REMOVE(tr, list); 410 LIST_INSERT_HEAD(&qpair->free_tr, tr, list); 411 } 412 413 static void nvme_qpair_submit_queued_requests(struct nvme_qpair *qpair) 414 { 415 pthread_mutex_lock(&qpair->lock); 416 417 /* 418 * If the controller is in the middle of a reset, don't 419 * try to submit queued requests - let the reset logic 420 * handle that instead. 421 */ 422 while (!STAILQ_EMPTY(&qpair->queued_req) && !qpair->ctrlr->resetting) { 423 struct nvme_request *req = STAILQ_FIRST(&qpair->queued_req); 424 STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq); 425 426 pthread_mutex_unlock(&qpair->lock); 427 nvme_qpair_submit_request(qpair, req); 428 pthread_mutex_lock(&qpair->lock); 429 } 430 431 pthread_mutex_unlock(&qpair->lock); 432 } 433 434 static void nvme_qpair_manual_complete_tracker(struct nvme_qpair *qpair, 435 struct nvme_tracker *tr, 436 uint32_t sct, 437 uint32_t sc, 438 uint32_t dnr, 439 bool print_on_error) 440 { 441 struct nvme_cpl cpl; 442 443 memset(&cpl, 0, sizeof(cpl)); 444 cpl.sqid = qpair->id; 445 cpl.cid = tr->cid; 446 cpl.status.sct = sct; 447 cpl.status.sc = sc; 448 cpl.status.dnr = dnr; 449 450 nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error); 451 } 452 453 static void nvme_qpair_manual_complete_request(struct nvme_qpair *qpair, 454 struct nvme_request *req, 455 uint32_t sct, uint32_t sc, 456 bool print_on_error) 457 { 458 struct nvme_cpl cpl; 459 bool error; 460 461 memset(&cpl, 0, sizeof(cpl)); 462 cpl.sqid = qpair->id; 463 cpl.status.sct = sct; 464 cpl.status.sc = sc; 465 466 error = nvme_cpl_is_error(&cpl); 467 468 if (error && print_on_error) { 469 nvme_qpair_print_command(qpair, &req->cmd); 470 nvme_qpair_print_completion(qpair, &cpl); 471 } 472 473 if (req->cb_fn) 474 req->cb_fn(req->cb_arg, &cpl); 475 476 nvme_request_free_locked(req); 477 } 478 479 static void nvme_qpair_abort_aers(struct nvme_qpair *qpair) 480 { 481 struct nvme_tracker *tr; 482 483 tr = LIST_FIRST(&qpair->outstanding_tr); 484 while (tr != NULL) { 485 nvme_assert(tr->req != NULL, 486 "tr->req == NULL in abort_aers\n"); 487 if (tr->req->cmd.opc == NVME_OPC_ASYNC_EVENT_REQUEST) { 488 nvme_qpair_manual_complete_tracker(qpair, tr, 489 NVME_SCT_GENERIC, 490 NVME_SC_ABORTED_SQ_DELETION, 491 0, false); 492 tr = LIST_FIRST(&qpair->outstanding_tr); 493 continue; 494 } 495 tr = LIST_NEXT(tr, list); 496 } 497 } 498 499 static inline void _nvme_qpair_admin_qpair_destroy(struct nvme_qpair *qpair) 500 { 501 nvme_qpair_abort_aers(qpair); 502 } 503 504 static inline void _nvme_qpair_req_bad_phys(struct nvme_qpair *qpair, 505 struct nvme_tracker *tr) 506 { 507 /* 508 * Bad vtophys translation, so abort this request 509 * and return immediately, without retry. 510 */ 511 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, 512 NVME_SC_INVALID_FIELD, 513 1, true); 514 } 515 516 /* 517 * Build PRP list describing physically contiguous payload buffer. 518 */ 519 static int _nvme_qpair_build_contig_request(struct nvme_qpair *qpair, 520 struct nvme_request *req, 521 struct nvme_tracker *tr) 522 { 523 uint64_t phys_addr; 524 void *seg_addr; 525 uint32_t nseg, cur_nseg, modulo, unaligned; 526 void *md_payload; 527 void *payload = req->payload.u.contig + req->payload_offset; 528 529 phys_addr = nvme_mem_vtophys(payload); 530 if (phys_addr == NVME_VTOPHYS_ERROR) { 531 _nvme_qpair_req_bad_phys(qpair, tr); 532 return -1; 533 } 534 nseg = req->payload_size >> PAGE_SHIFT; 535 modulo = req->payload_size & (PAGE_SIZE - 1); 536 unaligned = phys_addr & (PAGE_SIZE - 1); 537 if (modulo || unaligned) 538 nseg += 1 + ((modulo + unaligned - 1) >> PAGE_SHIFT); 539 540 if (req->payload.md) { 541 md_payload = req->payload.md + req->md_offset; 542 tr->req->cmd.mptr = nvme_mem_vtophys(md_payload); 543 if (tr->req->cmd.mptr == NVME_VTOPHYS_ERROR) { 544 _nvme_qpair_req_bad_phys(qpair, tr); 545 return -1; 546 } 547 } 548 549 tr->req->cmd.psdt = NVME_PSDT_PRP; 550 tr->req->cmd.dptr.prp.prp1 = phys_addr; 551 if (nseg == 2) { 552 seg_addr = payload + PAGE_SIZE - unaligned; 553 tr->req->cmd.dptr.prp.prp2 = nvme_mem_vtophys(seg_addr); 554 } else if (nseg > 2) { 555 cur_nseg = 1; 556 tr->req->cmd.dptr.prp.prp2 = (uint64_t)tr->prp_sgl_bus_addr; 557 while (cur_nseg < nseg) { 558 seg_addr = payload + cur_nseg * PAGE_SIZE - unaligned; 559 phys_addr = nvme_mem_vtophys(seg_addr); 560 if (phys_addr == NVME_VTOPHYS_ERROR) { 561 _nvme_qpair_req_bad_phys(qpair, tr); 562 return -1; 563 } 564 tr->u.prp[cur_nseg - 1] = phys_addr; 565 cur_nseg++; 566 } 567 } 568 569 return 0; 570 } 571 572 /* 573 * Build SGL list describing scattered payload buffer. 574 */ 575 static int _nvme_qpair_build_hw_sgl_request(struct nvme_qpair *qpair, 576 struct nvme_request *req, 577 struct nvme_tracker *tr) 578 { 579 struct nvme_sgl_descriptor *sgl; 580 uint64_t phys_addr; 581 uint32_t remaining_transfer_len, length, nseg = 0; 582 int ret; 583 584 /* 585 * Build scattered payloads. 586 */ 587 nvme_assert(req->payload_size != 0, 588 "cannot build SGL for zero-length transfer\n"); 589 nvme_assert(req->payload.type == NVME_PAYLOAD_TYPE_SGL, 590 "sgl payload type required\n"); 591 nvme_assert(req->payload.u.sgl.reset_sgl_fn != NULL, 592 "sgl reset callback required\n"); 593 nvme_assert(req->payload.u.sgl.next_sge_fn != NULL, 594 "sgl callback required\n"); 595 req->payload.u.sgl.reset_sgl_fn(req->payload.u.sgl.cb_arg, 596 req->payload_offset); 597 598 sgl = tr->u.sgl; 599 req->cmd.psdt = NVME_PSDT_SGL_MPTR_SGL; 600 req->cmd.dptr.sgl1.unkeyed.subtype = 0; 601 602 remaining_transfer_len = req->payload_size; 603 604 while (remaining_transfer_len > 0) { 605 606 if (nseg >= NVME_MAX_SGL_DESCRIPTORS) { 607 _nvme_qpair_req_bad_phys(qpair, tr); 608 return -1; 609 } 610 611 ret = req->payload.u.sgl.next_sge_fn(req->payload.u.sgl.cb_arg, 612 &phys_addr, &length); 613 if (ret != 0) { 614 _nvme_qpair_req_bad_phys(qpair, tr); 615 return ret; 616 } 617 618 length = nvme_min(remaining_transfer_len, length); 619 remaining_transfer_len -= length; 620 621 sgl->unkeyed.type = NVME_SGL_TYPE_DATA_BLOCK; 622 sgl->unkeyed.length = length; 623 sgl->address = phys_addr; 624 sgl->unkeyed.subtype = 0; 625 626 sgl++; 627 nseg++; 628 629 } 630 631 if (nseg == 1) { 632 /* 633 * The whole transfer can be described by a single Scatter 634 * Gather List descriptor. Use the special case described 635 * by the spec where SGL1's type is Data Block. 636 * This means the SGL in the tracker is not used at all, 637 * so copy the first (and only) SGL element into SGL1. 638 */ 639 req->cmd.dptr.sgl1.unkeyed.type = NVME_SGL_TYPE_DATA_BLOCK; 640 req->cmd.dptr.sgl1.address = tr->u.sgl[0].address; 641 req->cmd.dptr.sgl1.unkeyed.length = tr->u.sgl[0].unkeyed.length; 642 } else { 643 /* For now we only support 1 SGL segment in NVMe controller */ 644 req->cmd.dptr.sgl1.unkeyed.type = NVME_SGL_TYPE_LAST_SEGMENT; 645 req->cmd.dptr.sgl1.address = tr->prp_sgl_bus_addr; 646 req->cmd.dptr.sgl1.unkeyed.length = 647 nseg * sizeof(struct nvme_sgl_descriptor); 648 } 649 650 return 0; 651 } 652 653 /* 654 * Build Physical Region Page list describing scattered payload buffer. 655 */ 656 static int _nvme_qpair_build_prps_sgl_request(struct nvme_qpair *qpair, 657 struct nvme_request *req, 658 struct nvme_tracker *tr) 659 { 660 uint64_t phys_addr, prp2 = 0; 661 uint32_t data_transferred, remaining_transfer_len, length; 662 uint32_t nseg, cur_nseg, total_nseg = 0, last_nseg = 0; 663 uint32_t modulo, unaligned, sge_count = 0; 664 int ret; 665 666 /* 667 * Build scattered payloads. 668 */ 669 nvme_assert(req->payload.type == NVME_PAYLOAD_TYPE_SGL, 670 "sgl payload type required\n"); 671 nvme_assert(req->payload.u.sgl.reset_sgl_fn != NULL, 672 "sgl reset callback required\n"); 673 req->payload.u.sgl.reset_sgl_fn(req->payload.u.sgl.cb_arg, 674 req->payload_offset); 675 676 remaining_transfer_len = req->payload_size; 677 678 while (remaining_transfer_len > 0) { 679 680 nvme_assert(req->payload.u.sgl.next_sge_fn != NULL, 681 "sgl callback required\n"); 682 683 ret = req->payload.u.sgl.next_sge_fn(req->payload.u.sgl.cb_arg, 684 &phys_addr, &length); 685 if (ret != 0) { 686 _nvme_qpair_req_bad_phys(qpair, tr); 687 return -1; 688 } 689 690 nvme_assert((phys_addr & 0x3) == 0, "address must be dword aligned\n"); 691 nvme_assert((length >= remaining_transfer_len) || ((phys_addr + length) % PAGE_SIZE) == 0, 692 "All SGEs except last must end on a page boundary\n"); 693 nvme_assert((sge_count == 0) || (phys_addr % PAGE_SIZE) == 0, 694 "All SGEs except first must start on a page boundary\n"); 695 696 data_transferred = nvme_min(remaining_transfer_len, length); 697 698 nseg = data_transferred >> PAGE_SHIFT; 699 modulo = data_transferred & (PAGE_SIZE - 1); 700 unaligned = phys_addr & (PAGE_SIZE - 1); 701 if (modulo || unaligned) 702 nseg += 1 + ((modulo + unaligned - 1) >> PAGE_SHIFT); 703 704 if (total_nseg == 0) { 705 req->cmd.psdt = NVME_PSDT_PRP; 706 req->cmd.dptr.prp.prp1 = phys_addr; 707 } 708 709 total_nseg += nseg; 710 sge_count++; 711 remaining_transfer_len -= data_transferred; 712 713 if (total_nseg == 2) { 714 if (sge_count == 1) 715 tr->req->cmd.dptr.prp.prp2 = phys_addr + 716 PAGE_SIZE - unaligned; 717 else if (sge_count == 2) 718 tr->req->cmd.dptr.prp.prp2 = phys_addr; 719 /* save prp2 value */ 720 prp2 = tr->req->cmd.dptr.prp.prp2; 721 } else if (total_nseg > 2) { 722 if (sge_count == 1) 723 cur_nseg = 1; 724 else 725 cur_nseg = 0; 726 727 tr->req->cmd.dptr.prp.prp2 = 728 (uint64_t)tr->prp_sgl_bus_addr; 729 730 while (cur_nseg < nseg) { 731 if (prp2) { 732 tr->u.prp[0] = prp2; 733 tr->u.prp[last_nseg + 1] = phys_addr + 734 cur_nseg * PAGE_SIZE - unaligned; 735 } else { 736 tr->u.prp[last_nseg] = phys_addr + 737 cur_nseg * PAGE_SIZE - unaligned; 738 } 739 last_nseg++; 740 cur_nseg++; 741 } 742 } 743 } 744 745 return 0; 746 } 747 748 static void _nvme_qpair_admin_qpair_enable(struct nvme_qpair *qpair) 749 { 750 struct nvme_tracker *tr, *tr_temp; 751 752 /* 753 * Manually abort each outstanding admin command. Do not retry 754 * admin commands found here, since they will be left over from 755 * a controller reset and its likely the context in which the 756 * command was issued no longer applies. 757 */ 758 LIST_FOREACH_SAFE(tr, &qpair->outstanding_tr, list, tr_temp) { 759 nvme_info("Aborting outstanding admin command\n"); 760 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, 761 NVME_SC_ABORTED_BY_REQUEST, 762 1 /* do not retry */, true); 763 } 764 765 qpair->enabled = true; 766 } 767 768 static void _nvme_qpair_io_qpair_enable(struct nvme_qpair *qpair) 769 { 770 struct nvme_tracker *tr, *temp; 771 struct nvme_request *req; 772 773 qpair->enabled = true; 774 775 qpair->ctrlr->enabled_io_qpairs++; 776 777 /* Manually abort each queued I/O. */ 778 while (!STAILQ_EMPTY(&qpair->queued_req)) { 779 req = STAILQ_FIRST(&qpair->queued_req); 780 STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq); 781 nvme_info("Aborting queued I/O command\n"); 782 nvme_qpair_manual_complete_request(qpair, req, NVME_SCT_GENERIC, 783 NVME_SC_ABORTED_BY_REQUEST, 784 true); 785 } 786 787 /* Manually abort each outstanding I/O. */ 788 LIST_FOREACH_SAFE(tr, &qpair->outstanding_tr, list, temp) { 789 nvme_info("Aborting outstanding I/O command\n"); 790 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, 791 NVME_SC_ABORTED_BY_REQUEST, 792 0, true); 793 } 794 } 795 796 static inline void _nvme_qpair_admin_qpair_disable(struct nvme_qpair *qpair) 797 { 798 qpair->enabled = false; 799 nvme_qpair_abort_aers(qpair); 800 } 801 802 static inline void _nvme_qpair_io_qpair_disable(struct nvme_qpair *qpair) 803 { 804 qpair->enabled = false; 805 806 qpair->ctrlr->enabled_io_qpairs--; 807 } 808 809 /* 810 * Reserve room for the submission queue 811 * in the controller memory buffer 812 */ 813 static int nvme_ctrlr_reserve_sq_in_cmb(struct nvme_ctrlr *ctrlr, 814 uint16_t entries, 815 uint64_t aligned, uint64_t *offset) 816 { 817 uint64_t round_offset; 818 const uint64_t length = entries * sizeof(struct nvme_cmd); 819 820 round_offset = ctrlr->cmb_current_offset; 821 round_offset = (round_offset + (aligned - 1)) & ~(aligned - 1); 822 823 if (round_offset + length > ctrlr->cmb_size) 824 return -1; 825 826 *offset = round_offset; 827 ctrlr->cmb_current_offset = round_offset + length; 828 829 return 0; 830 } 831 832 /* 833 * Initialize a queue pair on the host side. 834 */ 835 int nvme_qpair_construct(struct nvme_ctrlr *ctrlr, struct nvme_qpair *qpair, 836 enum nvme_qprio qprio, 837 uint16_t entries, uint16_t trackers) 838 { 839 volatile uint32_t *doorbell_base; 840 struct nvme_tracker *tr; 841 uint64_t offset; 842 unsigned long phys_addr = 0; 843 uint16_t i; 844 int ret; 845 846 nvme_assert(entries != 0, "Invalid number of entries\n"); 847 nvme_assert(trackers != 0, "Invalid trackers\n"); 848 849 pthread_mutex_init(&qpair->lock, NULL); 850 851 qpair->entries = entries; 852 qpair->trackers = trackers; 853 qpair->qprio = qprio; 854 qpair->sq_in_cmb = false; 855 qpair->ctrlr = ctrlr; 856 857 if (ctrlr->opts.use_cmb_sqs) { 858 /* 859 * Reserve room for the submission queue in ctrlr 860 * memory buffer. 861 */ 862 ret = nvme_ctrlr_reserve_sq_in_cmb(ctrlr, entries, 863 PAGE_SIZE, 864 &offset); 865 if (ret == 0) { 866 867 qpair->cmd = ctrlr->cmb_bar_virt_addr + offset; 868 qpair->cmd_bus_addr = ctrlr->cmb_bar_phys_addr + offset; 869 qpair->sq_in_cmb = true; 870 871 nvme_debug("Allocated qpair %d cmd in cmb at %p / 0x%llx\n", 872 qpair->id, 873 qpair->cmd, qpair->cmd_bus_addr); 874 875 } 876 } 877 878 if (qpair->sq_in_cmb == false) { 879 880 qpair->cmd = 881 nvme_mem_alloc_node(sizeof(struct nvme_cmd) * entries, 882 PAGE_SIZE, NVME_NODE_ID_ANY, 883 (unsigned long *) &qpair->cmd_bus_addr); 884 if (!qpair->cmd) { 885 nvme_err("Allocate qpair commands failed\n"); 886 goto fail; 887 } 888 memset(qpair->cmd, 0, sizeof(struct nvme_cmd) * entries); 889 890 nvme_debug("Allocated qpair %d cmd %p / 0x%llx\n", 891 qpair->id, 892 qpair->cmd, qpair->cmd_bus_addr); 893 } 894 895 qpair->cpl = nvme_mem_alloc_node(sizeof(struct nvme_cpl) * entries, 896 PAGE_SIZE, NVME_NODE_ID_ANY, 897 (unsigned long *) &qpair->cpl_bus_addr); 898 if (!qpair->cpl) { 899 nvme_err("Allocate qpair completions failed\n"); 900 goto fail; 901 } 902 memset(qpair->cpl, 0, sizeof(struct nvme_cpl) * entries); 903 904 nvme_debug("Allocated qpair %d cpl at %p / 0x%llx\n", 905 qpair->id, 906 qpair->cpl, 907 qpair->cpl_bus_addr); 908 909 doorbell_base = &ctrlr->regs->doorbell[0].sq_tdbl; 910 qpair->sq_tdbl = doorbell_base + 911 (2 * qpair->id + 0) * ctrlr->doorbell_stride_u32; 912 qpair->cq_hdbl = doorbell_base + 913 (2 * qpair->id + 1) * ctrlr->doorbell_stride_u32; 914 915 LIST_INIT(&qpair->free_tr); 916 LIST_INIT(&qpair->outstanding_tr); 917 STAILQ_INIT(&qpair->free_req); 918 STAILQ_INIT(&qpair->queued_req); 919 920 /* Request pool */ 921 if (nvme_request_pool_construct(qpair)) { 922 nvme_err("Create request pool failed\n"); 923 goto fail; 924 } 925 926 /* 927 * Reserve space for all of the trackers in a single allocation. 928 * struct nvme_tracker must be padded so that its size is already 929 * a power of 2. This ensures the PRP list embedded in the nvme_tracker 930 * object will not span a 4KB boundary, while allowing access to 931 * trackers in tr[] via normal array indexing. 932 */ 933 qpair->tr = nvme_mem_alloc_node(sizeof(struct nvme_tracker) * trackers, 934 sizeof(struct nvme_tracker), 935 NVME_NODE_ID_ANY, &phys_addr); 936 if (!qpair->tr) { 937 nvme_err("Allocate request trackers failed\n"); 938 goto fail; 939 } 940 memset(qpair->tr, 0, sizeof(struct nvme_tracker) * trackers); 941 942 nvme_debug("Allocated qpair %d trackers at %p / 0x%lx\n", 943 qpair->id, qpair->tr, phys_addr); 944 945 for (i = 0; i < trackers; i++) { 946 tr = &qpair->tr[i]; 947 nvme_qpair_construct_tracker(tr, i, phys_addr); 948 LIST_INSERT_HEAD(&qpair->free_tr, tr, list); 949 phys_addr += sizeof(struct nvme_tracker); 950 } 951 952 nvme_qpair_reset(qpair); 953 954 return 0; 955 956 fail: 957 nvme_qpair_destroy(qpair); 958 959 return -1; 960 } 961 962 void nvme_qpair_destroy(struct nvme_qpair *qpair) 963 { 964 if (!qpair->ctrlr) 965 return; // Not initialized. 966 967 if (nvme_qpair_is_admin_queue(qpair)) 968 _nvme_qpair_admin_qpair_destroy(qpair); 969 970 if (qpair->cmd && !qpair->sq_in_cmb) { 971 nvme_free(qpair->cmd); 972 qpair->cmd = NULL; 973 } 974 if (qpair->cpl) { 975 nvme_free(qpair->cpl); 976 qpair->cpl = NULL; 977 } 978 if (qpair->tr) { 979 nvme_free(qpair->tr); 980 qpair->tr = NULL; 981 } 982 nvme_request_pool_destroy(qpair); 983 984 qpair->ctrlr = NULL; 985 986 pthread_mutex_destroy(&qpair->lock); 987 } 988 989 static bool nvme_qpair_enabled(struct nvme_qpair *qpair) 990 { 991 if (!qpair->enabled && !qpair->ctrlr->resetting) 992 nvme_qpair_enable(qpair); 993 994 return qpair->enabled; 995 } 996 997 int nvme_qpair_submit_request(struct nvme_qpair *qpair, 998 struct nvme_request *req) 999 { 1000 struct nvme_tracker *tr; 1001 struct nvme_request *child_req, *tmp; 1002 struct nvme_ctrlr *ctrlr = qpair->ctrlr; 1003 bool child_req_failed = false; 1004 int ret = 0; 1005 1006 if (ctrlr->failed) { 1007 nvme_request_free(req); 1008 return ENXIO; 1009 } 1010 1011 nvme_qpair_enabled(qpair); 1012 1013 if (req->child_reqs) { 1014 1015 /* 1016 * This is a splitted (parent) request. Submit all of the 1017 * children but not the parent request itself, since the 1018 * parent is the original unsplit request. 1019 */ 1020 TAILQ_FOREACH_SAFE(child_req, &req->children, child_tailq, tmp) { 1021 if (!child_req_failed) { 1022 ret = nvme_qpair_submit_request(qpair, child_req); 1023 if (ret != 0) 1024 child_req_failed = true; 1025 } else { 1026 /* free remaining child_reqs since 1027 * one child_req fails */ 1028 nvme_request_remove_child(req, child_req); 1029 nvme_request_free(child_req); 1030 } 1031 } 1032 1033 return ret; 1034 } 1035 1036 pthread_mutex_lock(&qpair->lock); 1037 1038 tr = LIST_FIRST(&qpair->free_tr); 1039 if (tr == NULL || !qpair->enabled || !STAILQ_EMPTY(&qpair->queued_req)) { 1040 /* 1041 * No tracker is available, the qpair is disabled due 1042 * to an in-progress controller-level reset, or 1043 * there are already queued requests. 1044 * 1045 * Put the request on the qpair's request queue to be 1046 * processed when a tracker frees up via a command 1047 * completion or when the controller reset is completed. 1048 */ 1049 STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq); 1050 pthread_mutex_unlock(&qpair->lock); 1051 1052 if (tr) 1053 nvme_qpair_submit_queued_requests(qpair); 1054 return 0; 1055 } 1056 1057 /* remove tr from free_tr */ 1058 LIST_REMOVE(tr, list); 1059 LIST_INSERT_HEAD(&qpair->outstanding_tr, tr, list); 1060 tr->req = req; 1061 req->cmd.cid = tr->cid; 1062 1063 if (req->payload_size == 0) { 1064 /* Null payload - leave PRP fields zeroed */ 1065 ret = 0; 1066 } else if (req->payload.type == NVME_PAYLOAD_TYPE_CONTIG) { 1067 ret = _nvme_qpair_build_contig_request(qpair, req, tr); 1068 } else if (req->payload.type == NVME_PAYLOAD_TYPE_SGL) { 1069 if (ctrlr->flags & NVME_CTRLR_SGL_SUPPORTED) 1070 ret = _nvme_qpair_build_hw_sgl_request(qpair, req, tr); 1071 else 1072 ret = _nvme_qpair_build_prps_sgl_request(qpair, req, tr); 1073 } else { 1074 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, 1075 NVME_SC_INVALID_FIELD, 1076 1 /* do not retry */, true); 1077 ret = -EINVAL; 1078 } 1079 1080 if (ret == 0) 1081 nvme_qpair_submit_tracker(qpair, tr); 1082 1083 pthread_mutex_unlock(&qpair->lock); 1084 1085 return ret; 1086 } 1087 1088 /* 1089 * Poll for completion of NVMe commands submitted to the 1090 * specified I/O queue pair. 1091 */ 1092 unsigned int nvme_qpair_poll(struct nvme_qpair *qpair, 1093 unsigned int max_completions) 1094 { 1095 struct nvme_tracker *tr; 1096 struct nvme_cpl *cpl; 1097 uint32_t num_completions = 0; 1098 1099 if (!nvme_qpair_enabled(qpair)) 1100 /* 1101 * qpair is not enabled, likely because a controller reset is 1102 * is in progress. Ignore the interrupt - any I/O that was 1103 * associated with this interrupt will get retried when the 1104 * reset is complete. 1105 */ 1106 return 0; 1107 1108 if ((max_completions == 0) || 1109 (max_completions > (qpair->entries - 1U))) 1110 /* 1111 * max_completions == 0 means unlimited, but complete at most 1112 * one queue depth batch of I/O at a time so that the completion 1113 * queue doorbells don't wrap around. 1114 */ 1115 max_completions = qpair->entries - 1; 1116 1117 pthread_mutex_lock(&qpair->lock); 1118 1119 while (1) { 1120 1121 cpl = &qpair->cpl[qpair->cq_head]; 1122 if (cpl->status.p != qpair->phase) 1123 break; 1124 1125 tr = &qpair->tr[cpl->cid]; 1126 if (tr->active) { 1127 nvme_qpair_complete_tracker(qpair, tr, cpl, true); 1128 } else { 1129 nvme_info("cpl does not map to outstanding cmd\n"); 1130 nvme_qpair_print_completion(qpair, cpl); 1131 nvme_panic("received completion for unknown cmd\n"); 1132 } 1133 1134 if (++qpair->cq_head == qpair->entries) { 1135 qpair->cq_head = 0; 1136 qpair->phase = !qpair->phase; 1137 } 1138 1139 if (++num_completions == max_completions) 1140 break; 1141 } 1142 1143 if (num_completions > 0) 1144 nvme_mmio_write_4(qpair->cq_hdbl, qpair->cq_head); 1145 1146 pthread_mutex_unlock(&qpair->lock); 1147 1148 if (!STAILQ_EMPTY(&qpair->queued_req)) 1149 nvme_qpair_submit_queued_requests(qpair); 1150 1151 return num_completions; 1152 } 1153 1154 void nvme_qpair_reset(struct nvme_qpair *qpair) 1155 { 1156 pthread_mutex_lock(&qpair->lock); 1157 1158 qpair->sq_tail = qpair->cq_head = 0; 1159 1160 /* 1161 * First time through the completion queue, HW will set phase 1162 * bit on completions to 1. So set this to 1 here, indicating 1163 * we're looking for a 1 to know which entries have completed. 1164 * we'll toggle the bit each time when the completion queue rolls over. 1165 */ 1166 qpair->phase = 1; 1167 1168 memset(qpair->cmd, 0, qpair->entries * sizeof(struct nvme_cmd)); 1169 memset(qpair->cpl, 0, qpair->entries * sizeof(struct nvme_cpl)); 1170 1171 pthread_mutex_unlock(&qpair->lock); 1172 } 1173 1174 void nvme_qpair_enable(struct nvme_qpair *qpair) 1175 { 1176 pthread_mutex_lock(&qpair->lock); 1177 1178 if (nvme_qpair_is_io_queue(qpair)) 1179 _nvme_qpair_io_qpair_enable(qpair); 1180 else 1181 _nvme_qpair_admin_qpair_enable(qpair); 1182 1183 pthread_mutex_unlock(&qpair->lock); 1184 } 1185 1186 void nvme_qpair_disable(struct nvme_qpair *qpair) 1187 { 1188 pthread_mutex_lock(&qpair->lock); 1189 1190 if (nvme_qpair_is_io_queue(qpair)) 1191 _nvme_qpair_io_qpair_disable(qpair); 1192 else 1193 _nvme_qpair_admin_qpair_disable(qpair); 1194 1195 pthread_mutex_unlock(&qpair->lock); 1196 } 1197 1198 void nvme_qpair_fail(struct nvme_qpair *qpair) 1199 { 1200 struct nvme_tracker *tr; 1201 struct nvme_request *req; 1202 1203 pthread_mutex_lock(&qpair->lock); 1204 1205 while (!STAILQ_EMPTY(&qpair->queued_req)) { 1206 1207 nvme_notice("Failing queued I/O command\n"); 1208 req = STAILQ_FIRST(&qpair->queued_req); 1209 STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq); 1210 nvme_qpair_manual_complete_request(qpair, req, NVME_SCT_GENERIC, 1211 NVME_SC_ABORTED_BY_REQUEST, 1212 true); 1213 1214 } 1215 1216 /* Manually abort each outstanding I/O. */ 1217 while (!LIST_EMPTY(&qpair->outstanding_tr)) { 1218 1219 /* 1220 * Do not remove the tracker. The abort_tracker path 1221 * will do that for us. 1222 */ 1223 nvme_notice("Failing outstanding I/O command\n"); 1224 tr = LIST_FIRST(&qpair->outstanding_tr); 1225 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, 1226 NVME_SC_ABORTED_BY_REQUEST, 1227 1, true); 1228 1229 } 1230 1231 pthread_mutex_unlock(&qpair->lock); 1232 } 1233 1234