1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 * 7 * Redistribution and use in sourete and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of sourete code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "nvme_internal.h" 35 36 /* 37 * Host software shall wait a minimum of CAP.TO x 500 milleseconds for CSTS.RDY 38 * to be set to '1' after setting CC.EN to '1' from a previous value of '0'. 39 */ 40 static inline unsigned int 41 nvme_ctrlr_get_ready_to_in_ms(struct nvme_ctrlr *ctrlr) 42 { 43 union nvme_cap_register cap; 44 45 /* The TO unit in ms */ 46 #define NVME_READY_TIMEOUT_UNIT 500 47 48 cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw); 49 50 return (NVME_READY_TIMEOUT_UNIT * cap.bits.to); 51 } 52 53 /* 54 * Create a queue pair. 55 */ 56 static int nvme_ctrlr_create_qpair(struct nvme_ctrlr *ctrlr, 57 struct nvme_qpair *qpair) 58 { 59 int ret; 60 61 /* Create the completion queue */ 62 ret = nvme_admin_create_ioq(ctrlr, qpair, NVME_IO_COMPLETION_QUEUE); 63 if (ret != 0) { 64 nvme_notice("Create completion queue %u failed\n", 65 qpair->id); 66 return ret; 67 } 68 69 /* Create the submission queue */ 70 ret = nvme_admin_create_ioq(ctrlr, qpair, NVME_IO_SUBMISSION_QUEUE); 71 if (ret != 0) { 72 /* Attempt to delete the completion queue */ 73 nvme_notice("Create submission queue %u failed\n", 74 qpair->id); 75 nvme_admin_delete_ioq(ctrlr, qpair, NVME_IO_COMPLETION_QUEUE); 76 return ret; 77 } 78 79 nvme_qpair_reset(qpair); 80 81 return 0; 82 } 83 84 /* 85 * Delete a queue pair. 86 */ 87 static int nvme_ctrlr_delete_qpair(struct nvme_ctrlr *ctrlr, 88 struct nvme_qpair *qpair) 89 { 90 int ret; 91 92 /* Delete the submission queue */ 93 ret = nvme_admin_delete_ioq(ctrlr, qpair, NVME_IO_SUBMISSION_QUEUE); 94 if (ret != 0) { 95 nvme_notice("Delete submission queue %u failed\n", 96 qpair->id); 97 return ret; 98 } 99 100 /* Delete the completion queue */ 101 ret = nvme_admin_delete_ioq(ctrlr, qpair, NVME_IO_COMPLETION_QUEUE); 102 if (ret != 0) { 103 nvme_notice("Delete completion queue %u failed\n", 104 qpair->id); 105 return ret; 106 } 107 108 return 0; 109 } 110 111 /* 112 * Intel log page. 113 */ 114 static void 115 nvme_ctrlr_construct_intel_support_log_page_list(struct nvme_ctrlr *ctrlr, 116 struct nvme_intel_log_page_dir *log_page_dir) 117 { 118 119 if (ctrlr->cdata.vid != NVME_PCI_VID_INTEL || 120 log_page_dir == NULL) 121 return; 122 123 ctrlr->log_page_supported[NVME_INTEL_LOG_PAGE_DIR] = true; 124 125 if (log_page_dir->read_latency_log_len || 126 (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) 127 ctrlr->log_page_supported[NVME_INTEL_LOG_READ_CMD_LATENCY] = true; 128 129 if (log_page_dir->write_latency_log_len || 130 (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) 131 ctrlr->log_page_supported[NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true; 132 133 if (log_page_dir->temperature_statistics_log_len) 134 ctrlr->log_page_supported[NVME_INTEL_LOG_TEMPERATURE] = true; 135 136 if (log_page_dir->smart_log_len) 137 ctrlr->log_page_supported[NVME_INTEL_LOG_SMART] = true; 138 139 if (log_page_dir->marketing_description_log_len) 140 ctrlr->log_page_supported[NVME_INTEL_MARKETING_DESCRIPTION] = true; 141 } 142 143 /* 144 * Intel log page. 145 */ 146 static int nvme_ctrlr_set_intel_support_log_pages(struct nvme_ctrlr *ctrlr) 147 { 148 struct nvme_intel_log_page_dir *log_page_dir; 149 int ret; 150 151 log_page_dir = nvme_zmalloc(sizeof(struct nvme_intel_log_page_dir), 64); 152 if (!log_page_dir) { 153 nvme_err("Allocate log_page_directory failed\n"); 154 return ENOMEM; 155 } 156 157 ret = nvme_admin_get_log_page(ctrlr, NVME_INTEL_LOG_PAGE_DIR, 158 NVME_GLOBAL_NS_TAG, 159 log_page_dir, 160 sizeof(struct nvme_intel_log_page_dir)); 161 if (ret != 0) 162 nvme_notice("Get NVME_INTEL_LOG_PAGE_DIR log page failed\n"); 163 else 164 nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, 165 log_page_dir); 166 167 nvme_free(log_page_dir); 168 169 return ret; 170 } 171 172 /* 173 * Initialize log page support directory. 174 */ 175 static void nvme_ctrlr_set_supported_log_pages(struct nvme_ctrlr *ctrlr) 176 { 177 178 memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported)); 179 180 /* Mandatory pages */ 181 ctrlr->log_page_supported[NVME_LOG_ERROR] = true; 182 ctrlr->log_page_supported[NVME_LOG_HEALTH_INFORMATION] = true; 183 ctrlr->log_page_supported[NVME_LOG_FIRMWARE_SLOT] = true; 184 185 if (ctrlr->cdata.lpa.celp) 186 ctrlr->log_page_supported[NVME_LOG_COMMAND_EFFECTS_LOG] = true; 187 188 if (ctrlr->cdata.vid == NVME_PCI_VID_INTEL) 189 nvme_ctrlr_set_intel_support_log_pages(ctrlr); 190 } 191 192 /* 193 * Set Intel device features. 194 */ 195 static void nvme_ctrlr_set_intel_supported_features(struct nvme_ctrlr *ctrlr) 196 { 197 bool *supported_feature = ctrlr->feature_supported; 198 199 supported_feature[NVME_INTEL_FEAT_MAX_LBA] = true; 200 supported_feature[NVME_INTEL_FEAT_MAX_LBA] = true; 201 supported_feature[NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true; 202 supported_feature[NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true; 203 supported_feature[NVME_INTEL_FEAT_SMBUS_ADDRESS] = true; 204 supported_feature[NVME_INTEL_FEAT_LED_PATTERN] = true; 205 supported_feature[NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true; 206 supported_feature[NVME_INTEL_FEAT_LATENCY_TRACKING] = true; 207 } 208 209 /* 210 * Set device features. 211 */ 212 static void nvme_ctrlr_set_supported_features(struct nvme_ctrlr *ctrlr) 213 { 214 bool *supported_feature = ctrlr->feature_supported; 215 216 memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported)); 217 218 /* Mandatory features */ 219 supported_feature[NVME_FEAT_ARBITRATION] = true; 220 supported_feature[NVME_FEAT_POWER_MANAGEMENT] = true; 221 supported_feature[NVME_FEAT_TEMPERATURE_THRESHOLD] = true; 222 supported_feature[NVME_FEAT_ERROR_RECOVERY] = true; 223 supported_feature[NVME_FEAT_NUMBER_OF_QUEUES] = true; 224 supported_feature[NVME_FEAT_INTERRUPT_COALESCING] = true; 225 supported_feature[NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true; 226 supported_feature[NVME_FEAT_WRITE_ATOMICITY] = true; 227 supported_feature[NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true; 228 229 /* Optional features */ 230 if (ctrlr->cdata.vwc.present) 231 supported_feature[NVME_FEAT_VOLATILE_WRITE_CACHE] = true; 232 if (ctrlr->cdata.apsta.supported) 233 supported_feature[NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] 234 = true; 235 if (ctrlr->cdata.hmpre) 236 supported_feature[NVME_FEAT_HOST_MEM_BUFFER] = true; 237 if (ctrlr->cdata.vid == NVME_PCI_VID_INTEL) 238 nvme_ctrlr_set_intel_supported_features(ctrlr); 239 } 240 241 /* 242 * Initialize I/O queue pairs. 243 */ 244 static int nvme_ctrlr_init_io_qpairs(struct nvme_ctrlr *ctrlr) 245 { 246 struct nvme_qpair *qpair; 247 union nvme_cap_register cap; 248 uint32_t i; 249 250 if (ctrlr->ioq != NULL) 251 /* 252 * io_qpairs were already constructed, so just return. 253 * This typically happens when the controller is 254 * initialized a second (or subsequent) time after a 255 * controller reset. 256 */ 257 return 0; 258 259 /* 260 * NVMe spec sets a hard limit of 64K max entries, but 261 * devices may specify a smaller limit, so we need to check 262 * the MQES field in the capabilities register. 263 */ 264 cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw); 265 ctrlr->io_qpairs_max_entries = 266 nvme_min(NVME_IO_ENTRIES, (unsigned int)cap.bits.mqes + 1); 267 268 ctrlr->ioq = calloc(ctrlr->io_queues, sizeof(struct nvme_qpair)); 269 if (!ctrlr->ioq) 270 return ENOMEM; 271 272 /* Keep queue pair ID 0 for the admin queue */ 273 for (i = 0; i < ctrlr->io_queues; i++) { 274 qpair = &ctrlr->ioq[i]; 275 qpair->id = i + 1; 276 TAILQ_INSERT_TAIL(&ctrlr->free_io_qpairs, qpair, tailq); 277 } 278 279 return 0; 280 } 281 282 /* 283 * Shutdown a controller. 284 */ 285 static void nvme_ctrlr_shutdown(struct nvme_ctrlr *ctrlr) 286 { 287 union nvme_cc_register cc; 288 union nvme_csts_register csts; 289 int ms_waited = 0; 290 291 cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw); 292 cc.bits.shn = NVME_SHN_NORMAL; 293 nvme_reg_mmio_write_4(ctrlr, cc.raw, cc.raw); 294 295 csts.raw = nvme_reg_mmio_read_4(ctrlr, csts.raw); 296 /* 297 * The NVMe spec does not define a timeout period for shutdown 298 * notification, so we just pick 5 seconds as a reasonable amount 299 * of time to wait before proceeding. 300 */ 301 #define NVME_CTRLR_SHUTDOWN_TIMEOUT 5000 302 while (csts.bits.shst != NVME_SHST_COMPLETE) { 303 nvme_usleep(1000); 304 csts.raw = nvme_reg_mmio_read_4(ctrlr, csts.raw); 305 if (ms_waited++ >= NVME_CTRLR_SHUTDOWN_TIMEOUT) 306 break; 307 } 308 309 if (csts.bits.shst != NVME_SHST_COMPLETE) 310 nvme_err("Controller did not shutdown within %d seconds\n", 311 NVME_CTRLR_SHUTDOWN_TIMEOUT / 1000); 312 } 313 314 /* 315 * Enable a controller. 316 */ 317 static int nvme_ctrlr_enable(struct nvme_ctrlr *ctrlr) 318 { 319 union nvme_cc_register cc; 320 union nvme_aqa_register aqa; 321 union nvme_cap_register cap; 322 323 cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw); 324 325 if (cc.bits.en != 0) { 326 nvme_err("COntroller enable called with CC.EN = 1\n"); 327 return EINVAL; 328 } 329 330 nvme_reg_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr); 331 nvme_reg_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr); 332 333 aqa.raw = 0; 334 /* acqs and asqs are 0-based. */ 335 aqa.bits.acqs = ctrlr->adminq.entries - 1; 336 aqa.bits.asqs = ctrlr->adminq.entries - 1; 337 nvme_reg_mmio_write_4(ctrlr, aqa.raw, aqa.raw); 338 339 cc.bits.en = 1; 340 cc.bits.css = 0; 341 cc.bits.shn = 0; 342 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ 343 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ 344 345 /* Page size is 2 ^ (12 + mps). */ 346 cc.bits.mps = PAGE_SHIFT - 12; 347 348 cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw); 349 350 switch (ctrlr->opts.arb_mechanism) { 351 case NVME_CC_AMS_RR: 352 break; 353 case NVME_CC_AMS_WRR: 354 if (NVME_CAP_AMS_WRR & cap.bits.ams) 355 break; 356 return EINVAL; 357 case NVME_CC_AMS_VS: 358 if (NVME_CAP_AMS_VS & cap.bits.ams) 359 break; 360 return EINVAL; 361 default: 362 return EINVAL; 363 } 364 365 cc.bits.ams = ctrlr->opts.arb_mechanism; 366 367 nvme_reg_mmio_write_4(ctrlr, cc.raw, cc.raw); 368 369 return 0; 370 } 371 372 /* 373 * Disable a controller. 374 */ 375 static inline void nvme_ctrlr_disable(struct nvme_ctrlr *ctrlr) 376 { 377 union nvme_cc_register cc; 378 379 cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw); 380 cc.bits.en = 0; 381 382 nvme_reg_mmio_write_4(ctrlr, cc.raw, cc.raw); 383 } 384 385 /* 386 * Test if a controller is enabled. 387 */ 388 static inline int nvme_ctrlr_enabled(struct nvme_ctrlr *ctrlr) 389 { 390 union nvme_cc_register cc; 391 392 cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw); 393 394 return cc.bits.en; 395 } 396 397 /* 398 * Test if a controller is ready. 399 */ 400 static inline int nvme_ctrlr_ready(struct nvme_ctrlr *ctrlr) 401 { 402 union nvme_csts_register csts; 403 404 csts.raw = nvme_reg_mmio_read_4(ctrlr, csts.raw); 405 406 return csts.bits.rdy; 407 } 408 409 /* 410 * Set a controller state. 411 */ 412 static void nvme_ctrlr_set_state(struct nvme_ctrlr *ctrlr, 413 enum nvme_ctrlr_state state, 414 uint64_t timeout_in_ms) 415 { 416 ctrlr->state = state; 417 if (timeout_in_ms == NVME_TIMEOUT_INFINITE) 418 ctrlr->state_timeout_ms = NVME_TIMEOUT_INFINITE; 419 else 420 ctrlr->state_timeout_ms = nvme_time_msec() + timeout_in_ms; 421 } 422 423 /* 424 * Get a controller data. 425 */ 426 static int nvme_ctrlr_identify(struct nvme_ctrlr *ctrlr) 427 { 428 int ret; 429 430 ret = nvme_admin_identify_ctrlr(ctrlr, &ctrlr->cdata); 431 if (ret != 0) { 432 nvme_notice("Identify controller failed\n"); 433 return ret; 434 } 435 436 /* 437 * Use MDTS to ensure our default max_xfer_size doesn't 438 * exceed what the controller supports. 439 */ 440 if (ctrlr->cdata.mdts > 0) 441 ctrlr->max_xfer_size = nvme_min(ctrlr->max_xfer_size, 442 ctrlr->min_page_size 443 * (1 << (ctrlr->cdata.mdts))); 444 return 0; 445 } 446 447 /* 448 * Set the number of I/O queue pairs. 449 */ 450 static int nvme_ctrlr_get_max_io_qpairs(struct nvme_ctrlr *ctrlr) 451 { 452 unsigned int cdw0, cq_allocated, sq_allocated; 453 int ret; 454 455 ret = nvme_admin_get_feature(ctrlr, NVME_FEAT_CURRENT, 456 NVME_FEAT_NUMBER_OF_QUEUES, 457 0, &cdw0); 458 if (ret != 0) { 459 nvme_notice("Get feature NVME_FEAT_NUMBER_OF_QUEUES failed\n"); 460 return ret; 461 } 462 463 /* 464 * Data in cdw0 is 0-based. 465 * Lower 16-bits indicate number of submission queues allocated. 466 * Upper 16-bits indicate number of completion queues allocated. 467 */ 468 sq_allocated = (cdw0 & 0xFFFF) + 1; 469 cq_allocated = (cdw0 >> 16) + 1; 470 471 ctrlr->max_io_queues = nvme_min(sq_allocated, cq_allocated); 472 473 return 0; 474 } 475 476 /* 477 * Set the number of I/O queue pairs. 478 */ 479 static int nvme_ctrlr_set_num_qpairs(struct nvme_ctrlr *ctrlr) 480 { 481 unsigned int num_queues, cdw0; 482 unsigned int cq_allocated, sq_allocated; 483 int ret; 484 485 ret = nvme_ctrlr_get_max_io_qpairs(ctrlr); 486 if (ret != 0) { 487 nvme_notice("Failed to get the maximum of I/O qpairs\n"); 488 return ret; 489 } 490 491 /* 492 * Format number of I/O queue: 493 * Remove 1 as it as be be 0-based, 494 * bits 31:16 represent the number of completion queues, 495 * bits 0:15 represent the number of submission queues 496 */ 497 num_queues = ((ctrlr->opts.io_queues - 1) << 16) | 498 (ctrlr->opts.io_queues - 1); 499 500 /* 501 * Set the number of I/O queues. 502 * Note: The value allocated may be smaller or larger than the number 503 * of queues requested (see specifications). 504 */ 505 ret = nvme_admin_set_feature(ctrlr, false, NVME_FEAT_NUMBER_OF_QUEUES, 506 num_queues, 0, &cdw0); 507 if (ret != 0) { 508 nvme_notice("Set feature NVME_FEAT_NUMBER_OF_QUEUES failed\n"); 509 return ret; 510 } 511 512 /* 513 * Data in cdw0 is 0-based. 514 * Lower 16-bits indicate number of submission queues allocated. 515 * Upper 16-bits indicate number of completion queues allocated. 516 */ 517 sq_allocated = (cdw0 & 0xFFFF) + 1; 518 cq_allocated = (cdw0 >> 16) + 1; 519 ctrlr->io_queues = nvme_min(sq_allocated, cq_allocated); 520 521 /* 522 * Make sure the number of constructed qpair listed in free_io_qpairs 523 * will not be more than the requested one. 524 */ 525 ctrlr->io_queues = nvme_min(ctrlr->io_queues, ctrlr->opts.io_queues); 526 527 return 0; 528 } 529 530 static void nvme_ctrlr_destruct_namespaces(struct nvme_ctrlr *ctrlr) 531 { 532 533 if (ctrlr->ns) { 534 free(ctrlr->ns); 535 ctrlr->ns = NULL; 536 ctrlr->nr_ns = 0; 537 } 538 539 if (ctrlr->nsdata) { 540 nvme_free(ctrlr->nsdata); 541 ctrlr->nsdata = NULL; 542 } 543 } 544 545 static int nvme_ctrlr_construct_namespaces(struct nvme_ctrlr *ctrlr) 546 { 547 unsigned int i, nr_ns = ctrlr->cdata.nn; 548 struct nvme_ns *ns = NULL; 549 550 /* 551 * ctrlr->nr_ns may be 0 (startup) or a different number of 552 * namespaces (reset), so check if we need to reallocate. 553 */ 554 if (nr_ns != ctrlr->nr_ns) { 555 556 nvme_ctrlr_destruct_namespaces(ctrlr); 557 558 ctrlr->ns = calloc(nr_ns, sizeof(struct nvme_ns)); 559 if (!ctrlr->ns) 560 goto fail; 561 562 nvme_debug("Allocate %u namespace data\n", nr_ns); 563 ctrlr->nsdata = nvme_calloc(nr_ns, sizeof(struct nvme_ns_data), 564 PAGE_SIZE); 565 if (!ctrlr->nsdata) 566 goto fail; 567 568 ctrlr->nr_ns = nr_ns; 569 570 } 571 572 for (i = 0; i < nr_ns; i++) { 573 ns = &ctrlr->ns[i]; 574 if (nvme_ns_construct(ctrlr, ns, i + 1) != 0) 575 goto fail; 576 } 577 578 return 0; 579 580 fail: 581 nvme_ctrlr_destruct_namespaces(ctrlr); 582 583 return -1; 584 } 585 586 /* 587 * Forward declaration. 588 */ 589 static int nvme_ctrlr_construct_and_submit_aer(struct nvme_ctrlr *ctrlr, 590 struct nvme_async_event_request *aer); 591 592 /* 593 * Async event completion callback. 594 */ 595 static void nvme_ctrlr_async_event_cb(void *arg, const struct nvme_cpl *cpl) 596 { 597 struct nvme_async_event_request *aer = arg; 598 struct nvme_ctrlr *ctrlr = aer->ctrlr; 599 600 if (cpl->status.sc == NVME_SC_ABORTED_SQ_DELETION) 601 /* 602 * This is simulated when controller is being shut down, to 603 * effectively abort outstanding asynchronous event requests 604 * and make sure all memory is freed. Do not repost the 605 * request in this case. 606 */ 607 return; 608 609 if (ctrlr->aer_cb_fn != NULL) 610 ctrlr->aer_cb_fn(ctrlr->aer_cb_arg, cpl); 611 612 /* 613 * Repost another asynchronous event request to replace 614 * the one that just completed. 615 */ 616 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) 617 /* 618 * We can't do anything to recover from a failure here, 619 * so just print a warning message and leave the 620 * AER unsubmitted. 621 */ 622 nvme_err("Initialize AER failed\n"); 623 } 624 625 /* 626 * Issue an async event request. 627 */ 628 static int nvme_ctrlr_construct_and_submit_aer(struct nvme_ctrlr *ctrlr, 629 struct nvme_async_event_request *aer) 630 { 631 struct nvme_request *req; 632 633 req = nvme_request_allocate_null(&ctrlr->adminq, 634 nvme_ctrlr_async_event_cb, aer); 635 if (req == NULL) 636 return -1; 637 638 aer->ctrlr = ctrlr; 639 aer->req = req; 640 req->cmd.opc = NVME_OPC_ASYNC_EVENT_REQUEST; 641 642 return nvme_qpair_submit_request(&ctrlr->adminq, req); 643 } 644 645 /* 646 * Configure async event management. 647 */ 648 static int nvme_ctrlr_configure_aer(struct nvme_ctrlr *ctrlr) 649 { 650 union nvme_critical_warning_state state; 651 struct nvme_async_event_request *aer; 652 unsigned int i; 653 int ret; 654 655 state.raw = 0xFF; 656 state.bits.reserved = 0; 657 658 ret = nvme_admin_set_feature(ctrlr, false, 659 NVME_FEAT_ASYNC_EVENT_CONFIGURATION, 660 state.raw, 0, NULL); 661 if (ret != 0) { 662 nvme_notice("Set feature ASYNC_EVENT_CONFIGURATION failed\n"); 663 return ret; 664 } 665 666 /* aerl is a zero-based value, so we need to add 1 here. */ 667 ctrlr->num_aers = nvme_min(NVME_MAX_ASYNC_EVENTS, 668 (ctrlr->cdata.aerl + 1)); 669 670 for (i = 0; i < ctrlr->num_aers; i++) { 671 aer = &ctrlr->aer[i]; 672 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) { 673 nvme_notice("Construct AER failed\n"); 674 return -1; 675 } 676 } 677 678 return 0; 679 } 680 681 /* 682 * Start a controller. 683 */ 684 static int nvme_ctrlr_start(struct nvme_ctrlr *ctrlr) 685 { 686 687 nvme_qpair_reset(&ctrlr->adminq); 688 nvme_qpair_enable(&ctrlr->adminq); 689 690 if (nvme_ctrlr_identify(ctrlr) != 0) 691 return -1; 692 693 if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) 694 return -1; 695 696 if (nvme_ctrlr_init_io_qpairs(ctrlr)) 697 return -1; 698 699 if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) 700 return -1; 701 702 if (nvme_ctrlr_configure_aer(ctrlr) != 0) 703 nvme_warning("controller does not support AER!\n"); 704 705 nvme_ctrlr_set_supported_log_pages(ctrlr); 706 nvme_ctrlr_set_supported_features(ctrlr); 707 708 if (ctrlr->cdata.sgls.supported) 709 ctrlr->flags |= NVME_CTRLR_SGL_SUPPORTED; 710 711 return 0; 712 } 713 714 /* 715 * Memory map the controller side buffer. 716 */ 717 static void nvme_ctrlr_map_cmb(struct nvme_ctrlr *ctrlr) 718 { 719 int ret; 720 void *addr; 721 uint32_t bir; 722 union nvme_cmbsz_register cmbsz; 723 union nvme_cmbloc_register cmbloc; 724 uint64_t size, unit_size, offset, bar_size, bar_phys_addr; 725 726 cmbsz.raw = nvme_reg_mmio_read_4(ctrlr, cmbsz.raw); 727 cmbloc.raw = nvme_reg_mmio_read_4(ctrlr, cmbloc.raw); 728 if (!cmbsz.bits.sz) 729 goto out; 730 731 /* Values 0 2 3 4 5 are valid for BAR */ 732 bir = cmbloc.bits.bir; 733 if (bir > 5 || bir == 1) 734 goto out; 735 736 /* unit size for 4KB/64KB/1MB/16MB/256MB/4GB/64GB */ 737 unit_size = (uint64_t)1 << (12 + 4 * cmbsz.bits.szu); 738 739 /* controller memory buffer size in Bytes */ 740 size = unit_size * cmbsz.bits.sz; 741 742 /* controller memory buffer offset from BAR in Bytes */ 743 offset = unit_size * cmbloc.bits.ofst; 744 745 nvme_pcicfg_get_bar_addr_len(ctrlr->pci_dev, bir, &bar_phys_addr, 746 &bar_size); 747 748 if (offset > bar_size) 749 goto out; 750 751 if (size > bar_size - offset) 752 goto out; 753 754 ret = nvme_pcicfg_map_bar_write_combine(ctrlr->pci_dev, bir, &addr); 755 if ((ret != 0) || addr == NULL) 756 goto out; 757 758 ctrlr->cmb_bar_virt_addr = addr; 759 ctrlr->cmb_bar_phys_addr = bar_phys_addr; 760 ctrlr->cmb_size = size; 761 ctrlr->cmb_current_offset = offset; 762 763 if (!cmbsz.bits.sqs) 764 ctrlr->opts.use_cmb_sqs = false; 765 766 return; 767 768 out: 769 ctrlr->cmb_bar_virt_addr = NULL; 770 ctrlr->opts.use_cmb_sqs = false; 771 772 return; 773 } 774 775 /* 776 * Unmap the controller side buffer. 777 */ 778 static int nvme_ctrlr_unmap_cmb(struct nvme_ctrlr *ctrlr) 779 { 780 union nvme_cmbloc_register cmbloc; 781 void *addr = ctrlr->cmb_bar_virt_addr; 782 int ret = 0; 783 784 if (addr) { 785 cmbloc.raw = nvme_reg_mmio_read_4(ctrlr, cmbloc.raw); 786 ret = nvme_pcicfg_unmap_bar(ctrlr->pci_dev, cmbloc.bits.bir, 787 addr); 788 } 789 return ret; 790 } 791 792 /* 793 * Map the controller PCI bars. 794 */ 795 static int nvme_ctrlr_map_bars(struct nvme_ctrlr *ctrlr) 796 { 797 void *addr; 798 int ret; 799 800 ret = nvme_pcicfg_map_bar(ctrlr->pci_dev, 0, 0, &addr); 801 if (ret != 0 || addr == NULL) { 802 nvme_err("Map PCI device bar failed %d (%s)\n", 803 ret, strerror(ret)); 804 return ret; 805 } 806 807 nvme_debug("Controller BAR mapped at %p\n", addr); 808 809 ctrlr->regs = (volatile struct nvme_registers *)addr; 810 nvme_ctrlr_map_cmb(ctrlr); 811 812 return 0; 813 } 814 815 /* 816 * Unmap the controller PCI bars. 817 */ 818 static int nvme_ctrlr_unmap_bars(struct nvme_ctrlr *ctrlr) 819 { 820 void *addr = (void *)ctrlr->regs; 821 int ret; 822 823 ret = nvme_ctrlr_unmap_cmb(ctrlr); 824 if (ret != 0) { 825 nvme_err("Unmap controller side buffer failed %d\n", ret); 826 return ret; 827 } 828 829 if (addr) { 830 ret = nvme_pcicfg_unmap_bar(ctrlr->pci_dev, 0, addr); 831 if (ret != 0) { 832 nvme_err("Unmap PCI device bar failed %d\n", ret); 833 return ret; 834 } 835 } 836 837 return 0; 838 } 839 840 /* 841 * Set a controller in the failed state. 842 */ 843 static void nvme_ctrlr_fail(struct nvme_ctrlr *ctrlr) 844 { 845 unsigned int i; 846 847 ctrlr->failed = true; 848 849 nvme_qpair_fail(&ctrlr->adminq); 850 if (ctrlr->ioq) 851 for (i = 0; i < ctrlr->io_queues; i++) 852 nvme_qpair_fail(&ctrlr->ioq[i]); 853 } 854 855 /* 856 * This function will be called repeatedly during initialization 857 * until the controller is ready. 858 */ 859 static int nvme_ctrlr_init(struct nvme_ctrlr *ctrlr) 860 { 861 unsigned int ready_timeout_in_ms = nvme_ctrlr_get_ready_to_in_ms(ctrlr); 862 int ret; 863 864 /* 865 * Check if the current initialization step is done or has timed out. 866 */ 867 switch (ctrlr->state) { 868 869 case NVME_CTRLR_STATE_INIT: 870 871 /* Begin the hardware initialization by making 872 * sure the controller is disabled. */ 873 if (nvme_ctrlr_enabled(ctrlr)) { 874 /* 875 * Disable the controller to cause a reset. 876 */ 877 if (!nvme_ctrlr_ready(ctrlr)) { 878 /* Wait for the controller to be ready */ 879 nvme_ctrlr_set_state(ctrlr, 880 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, 881 ready_timeout_in_ms); 882 return 0; 883 } 884 885 /* 886 * The controller is enabled and ready. 887 * It can be immediatly disabled 888 */ 889 nvme_ctrlr_disable(ctrlr); 890 nvme_ctrlr_set_state(ctrlr, 891 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 892 ready_timeout_in_ms); 893 894 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) 895 nvme_msleep(2000); 896 897 return 0; 898 } 899 900 if (nvme_ctrlr_ready(ctrlr)) { 901 /* 902 * Controller is in the process of shutting down. 903 * We need to wait for CSTS.RDY to become 0. 904 */ 905 nvme_ctrlr_set_state(ctrlr, 906 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 907 ready_timeout_in_ms); 908 return 0; 909 } 910 911 /* 912 * Controller is currently disabled. 913 * We can jump straight to enabling it. 914 */ 915 ret = nvme_ctrlr_enable(ctrlr); 916 if (ret) 917 nvme_err("Enable controller failed\n"); 918 else 919 nvme_ctrlr_set_state(ctrlr, 920 NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 921 ready_timeout_in_ms); 922 return ret; 923 924 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 925 926 if (nvme_ctrlr_ready(ctrlr)) { 927 /* CC.EN = 1 && CSTS.RDY = 1, 928 * so we can disable the controller now. */ 929 nvme_ctrlr_disable(ctrlr); 930 nvme_ctrlr_set_state(ctrlr, 931 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 932 ready_timeout_in_ms); 933 return 0; 934 } 935 936 break; 937 938 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 939 940 if (!nvme_ctrlr_ready(ctrlr)) { 941 /* CC.EN = 0 && CSTS.RDY = 0, 942 * so we can enable the controller now. */ 943 ret = nvme_ctrlr_enable(ctrlr); 944 if (ret) 945 nvme_err("Enable controller failed\n"); 946 else 947 nvme_ctrlr_set_state(ctrlr, 948 NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 949 ready_timeout_in_ms); 950 return ret; 951 } 952 break; 953 954 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 955 956 if (nvme_ctrlr_ready(ctrlr)) { 957 if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_RDY) 958 nvme_msleep(2000); 959 960 ret = nvme_ctrlr_start(ctrlr); 961 if (ret) 962 nvme_err("Start controller failed\n"); 963 else 964 nvme_ctrlr_set_state(ctrlr, 965 NVME_CTRLR_STATE_READY, 966 NVME_TIMEOUT_INFINITE); 967 return ret; 968 } 969 break; 970 971 default: 972 nvme_panic("Unhandled ctrlr state %d\n", ctrlr->state); 973 nvme_ctrlr_fail(ctrlr); 974 return -1; 975 } 976 977 if ((ctrlr->state_timeout_ms != NVME_TIMEOUT_INFINITE) && 978 (nvme_time_msec() > ctrlr->state_timeout_ms)) { 979 nvme_err("Initialization timed out in state %d\n", 980 ctrlr->state); 981 nvme_ctrlr_fail(ctrlr); 982 return -1; 983 } 984 985 return 0; 986 } 987 988 /* 989 * Reset a controller. 990 */ 991 static int nvme_ctrlr_reset(struct nvme_ctrlr *ctrlr) 992 { 993 struct nvme_qpair *qpair; 994 unsigned int i; 995 996 if (ctrlr->resetting || ctrlr->failed) 997 /* 998 * Controller is already resetting or has failed. Return 999 * immediately since there is no need to kick off another 1000 * reset in these cases. 1001 */ 1002 return 0; 1003 1004 ctrlr->resetting = true; 1005 1006 /* Disable all queues before disabling the controller hardware. */ 1007 nvme_qpair_disable(&ctrlr->adminq); 1008 for (i = 0; i < ctrlr->io_queues; i++) 1009 nvme_qpair_disable(&ctrlr->ioq[i]); 1010 1011 /* Set the state back to INIT to cause a full hardware reset. */ 1012 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, 1013 NVME_TIMEOUT_INFINITE); 1014 1015 while (ctrlr->state != NVME_CTRLR_STATE_READY) { 1016 if (nvme_ctrlr_init(ctrlr) != 0) { 1017 nvme_crit("Controller reset failed\n"); 1018 nvme_ctrlr_fail(ctrlr); 1019 goto out; 1020 } 1021 } 1022 1023 /* Reinitialize qpairs */ 1024 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 1025 if (nvme_ctrlr_create_qpair(ctrlr, qpair) != 0) 1026 nvme_ctrlr_fail(ctrlr); 1027 } 1028 1029 out: 1030 ctrlr->resetting = false; 1031 1032 return ctrlr->failed ? -1 : 0; 1033 } 1034 1035 /* 1036 * Set a controller options. 1037 */ 1038 static void nvme_ctrlr_set_opts(struct nvme_ctrlr *ctrlr, 1039 struct nvme_ctrlr_opts *opts) 1040 { 1041 if (opts) 1042 memcpy(&ctrlr->opts, opts, sizeof(struct nvme_ctrlr_opts)); 1043 else 1044 memset(&ctrlr->opts, 0, sizeof(struct nvme_ctrlr_opts)); 1045 1046 if (ctrlr->opts.io_queues == 0) 1047 ctrlr->opts.io_queues = DEFAULT_MAX_IO_QUEUES; 1048 1049 if (ctrlr->opts.io_queues > NVME_MAX_IO_QUEUES) { 1050 nvme_info("Limiting requested I/O queues %u to %d\n", 1051 ctrlr->opts.io_queues, NVME_MAX_IO_QUEUES); 1052 ctrlr->opts.io_queues = NVME_MAX_IO_QUEUES; 1053 } 1054 } 1055 1056 /* 1057 * Attach a PCI controller. 1058 */ 1059 struct nvme_ctrlr * 1060 nvme_ctrlr_attach(struct pci_device *pci_dev, 1061 struct nvme_ctrlr_opts *opts) 1062 { 1063 struct nvme_ctrlr *ctrlr; 1064 union nvme_cap_register cap; 1065 uint32_t cmd_reg; 1066 int ret; 1067 1068 /* Get a new controller handle */ 1069 ctrlr = malloc(sizeof(struct nvme_ctrlr)); 1070 if (!ctrlr) { 1071 nvme_err("Allocate controller handle failed\n"); 1072 return NULL; 1073 } 1074 1075 nvme_debug("New controller handle %p\n", ctrlr); 1076 1077 /* Initialize the handle */ 1078 memset(ctrlr, 0, sizeof(struct nvme_ctrlr)); 1079 ctrlr->pci_dev = pci_dev; 1080 ctrlr->resetting = false; 1081 ctrlr->failed = false; 1082 TAILQ_INIT(&ctrlr->free_io_qpairs); 1083 TAILQ_INIT(&ctrlr->active_io_qpairs); 1084 pthread_mutex_init(&ctrlr->lock, NULL); 1085 ctrlr->quirks = nvme_ctrlr_get_quirks(pci_dev); 1086 1087 nvme_ctrlr_set_state(ctrlr, 1088 NVME_CTRLR_STATE_INIT, 1089 NVME_TIMEOUT_INFINITE); 1090 1091 ret = nvme_ctrlr_map_bars(ctrlr); 1092 if (ret != 0) { 1093 nvme_err("Map controller BAR failed\n"); 1094 pthread_mutex_destroy(&ctrlr->lock); 1095 free(ctrlr); 1096 return NULL; 1097 } 1098 1099 /* Enable PCI busmaster and disable INTx */ 1100 nvme_pcicfg_read32(pci_dev, &cmd_reg, 4); 1101 cmd_reg |= 0x0404; 1102 nvme_pcicfg_write32(pci_dev, cmd_reg, 4); 1103 1104 /* 1105 * Doorbell stride is 2 ^ (dstrd + 2), 1106 * but we want multiples of 4, so drop the + 2. 1107 */ 1108 cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw); 1109 ctrlr->doorbell_stride_u32 = 1 << cap.bits.dstrd; 1110 ctrlr->min_page_size = 1 << (12 + cap.bits.mpsmin); 1111 1112 /* Set default transfer size */ 1113 ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE; 1114 1115 /* Create the admin queue pair */ 1116 ret = nvme_qpair_construct(ctrlr, &ctrlr->adminq, 0, 1117 NVME_ADMIN_ENTRIES, NVME_ADMIN_TRACKERS); 1118 if (ret != 0) { 1119 nvme_err("Initialize admin queue pair failed\n"); 1120 goto err; 1121 } 1122 1123 /* Set options and then initialize */ 1124 nvme_ctrlr_set_opts(ctrlr, opts); 1125 do { 1126 ret = nvme_ctrlr_init(ctrlr); 1127 if (ret) 1128 goto err; 1129 } while (ctrlr->state != NVME_CTRLR_STATE_READY); 1130 1131 return ctrlr; 1132 1133 err: 1134 nvme_ctrlr_detach(ctrlr); 1135 1136 return NULL; 1137 } 1138 1139 /* 1140 * Detach a PCI controller. 1141 */ 1142 void nvme_ctrlr_detach(struct nvme_ctrlr *ctrlr) 1143 { 1144 struct nvme_qpair *qpair; 1145 uint32_t i; 1146 1147 while (!TAILQ_EMPTY(&ctrlr->active_io_qpairs)) { 1148 qpair = TAILQ_FIRST(&ctrlr->active_io_qpairs); 1149 nvme_ioqp_release(qpair); 1150 } 1151 1152 nvme_ctrlr_shutdown(ctrlr); 1153 1154 nvme_ctrlr_destruct_namespaces(ctrlr); 1155 if (ctrlr->ioq) { 1156 for (i = 0; i < ctrlr->io_queues; i++) 1157 nvme_qpair_destroy(&ctrlr->ioq[i]); 1158 free(ctrlr->ioq); 1159 } 1160 1161 nvme_qpair_destroy(&ctrlr->adminq); 1162 1163 nvme_ctrlr_unmap_bars(ctrlr); 1164 1165 pthread_mutex_destroy(&ctrlr->lock); 1166 free(ctrlr); 1167 } 1168 1169 /* 1170 * Get a controller feature. 1171 */ 1172 int nvme_ctrlr_get_feature(struct nvme_ctrlr *ctrlr, 1173 enum nvme_feat_sel sel, enum nvme_feat feature, 1174 uint32_t cdw11, 1175 uint32_t *attributes) 1176 { 1177 int ret; 1178 1179 pthread_mutex_lock(&ctrlr->lock); 1180 1181 ret = nvme_admin_get_feature(ctrlr, sel, feature, cdw11, attributes); 1182 if (ret != 0) 1183 nvme_notice("Get feature 0x%08x failed\n", 1184 (unsigned int) feature); 1185 1186 pthread_mutex_unlock(&ctrlr->lock); 1187 1188 return ret; 1189 } 1190 1191 /* 1192 * Set a controller feature. 1193 */ 1194 int nvme_ctrlr_set_feature(struct nvme_ctrlr *ctrlr, 1195 bool save, enum nvme_feat feature, 1196 uint32_t cdw11, uint32_t cdw12, 1197 uint32_t *attributes) 1198 { 1199 int ret; 1200 1201 pthread_mutex_lock(&ctrlr->lock); 1202 1203 ret = nvme_admin_set_feature(ctrlr, save, feature, 1204 cdw11, cdw12, attributes); 1205 if (ret != 0) 1206 nvme_notice("Set feature 0x%08x failed\n", 1207 (unsigned int) feature); 1208 1209 pthread_mutex_unlock(&ctrlr->lock); 1210 1211 return ret; 1212 } 1213 1214 /* 1215 * Attach a namespace. 1216 */ 1217 int nvme_ctrlr_attach_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid, 1218 struct nvme_ctrlr_list *clist) 1219 { 1220 int ret; 1221 1222 pthread_mutex_lock(&ctrlr->lock); 1223 1224 ret = nvme_admin_attach_ns(ctrlr, nsid, clist); 1225 if (ret) { 1226 nvme_notice("Attach namespace %u failed\n", nsid); 1227 goto out; 1228 } 1229 1230 ret = nvme_ctrlr_reset(ctrlr); 1231 if (ret != 0) 1232 nvme_notice("Reset controller failed\n"); 1233 1234 out: 1235 pthread_mutex_unlock(&ctrlr->lock); 1236 1237 return ret; 1238 } 1239 1240 /* 1241 * Detach a namespace. 1242 */ 1243 int nvme_ctrlr_detach_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid, 1244 struct nvme_ctrlr_list *clist) 1245 { 1246 int ret; 1247 1248 pthread_mutex_lock(&ctrlr->lock); 1249 1250 ret = nvme_admin_detach_ns(ctrlr, nsid, clist); 1251 if (ret != 0) { 1252 nvme_notice("Detach namespace %u failed\n", nsid); 1253 goto out; 1254 } 1255 1256 ret = nvme_ctrlr_reset(ctrlr); 1257 if (ret) 1258 nvme_notice("Reset controller failed\n"); 1259 1260 out: 1261 pthread_mutex_unlock(&ctrlr->lock); 1262 1263 return ret; 1264 } 1265 1266 /* 1267 * Create a namespace. 1268 */ 1269 unsigned int nvme_ctrlr_create_ns(struct nvme_ctrlr *ctrlr, 1270 struct nvme_ns_data *nsdata) 1271 { 1272 unsigned int nsid; 1273 int ret; 1274 1275 pthread_mutex_lock(&ctrlr->lock); 1276 1277 ret = nvme_admin_create_ns(ctrlr, nsdata, &nsid); 1278 if (ret != 0) { 1279 nvme_notice("Create namespace failed\n"); 1280 nsid = 0; 1281 } 1282 1283 pthread_mutex_unlock(&ctrlr->lock); 1284 1285 return nsid; 1286 } 1287 1288 /* 1289 * Delete a namespace. 1290 */ 1291 int nvme_ctrlr_delete_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid) 1292 { 1293 int ret; 1294 1295 pthread_mutex_lock(&ctrlr->lock); 1296 1297 ret = nvme_admin_delete_ns(ctrlr, nsid); 1298 if (ret != 0) { 1299 nvme_notice("Delete namespace %u failed\n", nsid); 1300 goto out; 1301 } 1302 1303 ret = nvme_ctrlr_reset(ctrlr); 1304 if (ret) 1305 nvme_notice("Reset controller failed\n"); 1306 1307 out: 1308 pthread_mutex_unlock(&ctrlr->lock); 1309 1310 return ret; 1311 } 1312 1313 /* 1314 * Format NVM media. 1315 */ 1316 int nvme_ctrlr_format_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid, 1317 struct nvme_format *format) 1318 { 1319 int ret; 1320 1321 pthread_mutex_lock(&ctrlr->lock); 1322 1323 ret = nvme_admin_format_nvm(ctrlr, nsid, format); 1324 if (ret != 0) { 1325 if (nsid == NVME_GLOBAL_NS_TAG) 1326 nvme_notice("Format device failed\n"); 1327 else 1328 nvme_notice("Format namespace %u failed\n", nsid); 1329 goto out; 1330 } 1331 1332 ret = nvme_ctrlr_reset(ctrlr); 1333 if (ret) 1334 nvme_notice("Reset controller failed\n"); 1335 1336 out: 1337 pthread_mutex_unlock(&ctrlr->lock); 1338 1339 return ret; 1340 } 1341 1342 /* 1343 * Update a device firmware. 1344 */ 1345 int nvme_ctrlr_update_firmware(struct nvme_ctrlr *ctrlr, 1346 void *fw, size_t size, int slot) 1347 { 1348 struct nvme_fw_commit fw_commit; 1349 unsigned int size_remaining = size, offset = 0, transfer; 1350 void *f = fw; 1351 int ret; 1352 1353 if (size & 0x3) { 1354 nvme_err("Invalid firmware size\n"); 1355 return EINVAL; 1356 } 1357 1358 pthread_mutex_lock(&ctrlr->lock); 1359 1360 /* Download firmware */ 1361 while (size_remaining > 0) { 1362 1363 transfer = nvme_min(size_remaining, ctrlr->min_page_size); 1364 1365 ret = nvme_admin_fw_image_dl(ctrlr, f, transfer, offset); 1366 if (ret != 0) { 1367 nvme_err("Download FW (%u B at %u) failed\n", 1368 transfer, offset); 1369 goto out; 1370 } 1371 1372 f += transfer; 1373 offset += transfer; 1374 size_remaining -= transfer; 1375 1376 } 1377 1378 /* Commit firmware */ 1379 memset(&fw_commit, 0, sizeof(struct nvme_fw_commit)); 1380 fw_commit.fs = slot; 1381 fw_commit.ca = NVME_FW_COMMIT_REPLACE_IMG; 1382 1383 ret = nvme_admin_fw_commit(ctrlr, &fw_commit); 1384 if (ret != 0) { 1385 nvme_err("Commit downloaded FW (%zu B) failed\n", 1386 size); 1387 goto out; 1388 } 1389 1390 ret = nvme_ctrlr_reset(ctrlr); 1391 if (ret) 1392 nvme_notice("Reset controller failed\n"); 1393 1394 out: 1395 pthread_mutex_unlock(&ctrlr->lock); 1396 1397 return ret; 1398 } 1399 1400 /* 1401 * Get an unused I/O queue pair. 1402 */ 1403 struct nvme_qpair *nvme_ioqp_get(struct nvme_ctrlr *ctrlr, 1404 enum nvme_qprio qprio, unsigned int qd) 1405 { 1406 struct nvme_qpair *qpair = NULL; 1407 union nvme_cc_register cc; 1408 uint32_t trackers; 1409 int ret; 1410 1411 cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw); 1412 1413 /* Only the low 2 bits (values 0, 1, 2, 3) of QPRIO are valid. */ 1414 if ((qprio & 3) != qprio) 1415 return NULL; 1416 1417 /* 1418 * Only value NVME_QPRIO_URGENT(0) is valid for the 1419 * default round robin arbitration method. 1420 */ 1421 if ((cc.bits.ams == NVME_CC_AMS_RR) && (qprio != NVME_QPRIO_URGENT)) { 1422 nvme_err("Invalid queue priority for default round " 1423 "robin arbitration method\n"); 1424 return NULL; 1425 } 1426 1427 /* I/O qpairs number of entries belong to [2, io_qpairs_max_entries] */ 1428 if (qd == 1) { 1429 nvme_err("Invalid queue depth\n"); 1430 return NULL; 1431 } 1432 1433 if (qd == 0 || qd > ctrlr->io_qpairs_max_entries) 1434 qd = ctrlr->io_qpairs_max_entries; 1435 1436 /* 1437 * No need to have more trackers than entries in the submit queue. 1438 * Note also that for a queue size of N, we can only have (N-1) 1439 * commands outstanding, hence the "-1" here. 1440 */ 1441 trackers = nvme_min(NVME_IO_TRACKERS, (qd - 1)); 1442 1443 pthread_mutex_lock(&ctrlr->lock); 1444 1445 /* Get the first available qpair structure */ 1446 qpair = TAILQ_FIRST(&ctrlr->free_io_qpairs); 1447 if (qpair == NULL) { 1448 /* No free queue IDs */ 1449 nvme_err("No free I/O queue pairs\n"); 1450 goto out; 1451 } 1452 1453 /* Construct the qpair */ 1454 ret = nvme_qpair_construct(ctrlr, qpair, qprio, qd, trackers); 1455 if (ret != 0) { 1456 nvme_qpair_destroy(qpair); 1457 qpair = NULL; 1458 goto out; 1459 } 1460 1461 /* 1462 * At this point, qpair contains a preallocated submission 1463 * and completion queue and a unique queue ID, but it is not 1464 * yet created on the controller. 1465 * Fill out the submission queue priority and send out the 1466 * Create I/O Queue commands. 1467 */ 1468 if (nvme_ctrlr_create_qpair(ctrlr, qpair) != 0) { 1469 nvme_err("Create queue pair on the controller failed\n"); 1470 nvme_qpair_destroy(qpair); 1471 qpair = NULL; 1472 goto out; 1473 } 1474 1475 TAILQ_REMOVE(&ctrlr->free_io_qpairs, qpair, tailq); 1476 TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq); 1477 1478 out: 1479 pthread_mutex_unlock(&ctrlr->lock); 1480 1481 return qpair; 1482 } 1483 1484 /* 1485 * Free an I/O queue pair. 1486 */ 1487 int nvme_ioqp_release(struct nvme_qpair *qpair) 1488 { 1489 struct nvme_ctrlr *ctrlr; 1490 int ret; 1491 1492 if (qpair == NULL) 1493 return 0; 1494 1495 ctrlr = qpair->ctrlr; 1496 1497 pthread_mutex_lock(&ctrlr->lock); 1498 1499 /* Delete the I/O submission and completion queues */ 1500 ret = nvme_ctrlr_delete_qpair(ctrlr, qpair); 1501 if (ret != 0) { 1502 nvme_notice("Delete queue pair %u failed\n", qpair->id); 1503 } else { 1504 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 1505 TAILQ_INSERT_HEAD(&ctrlr->free_io_qpairs, qpair, tailq); 1506 } 1507 1508 pthread_mutex_unlock(&ctrlr->lock); 1509 1510 return ret; 1511 } 1512 1513 /* 1514 * Submit an NVMe command using the specified I/O queue pair. 1515 */ 1516 int nvme_ioqp_submit_cmd(struct nvme_qpair *qpair, 1517 struct nvme_cmd *cmd, 1518 void *buf, size_t len, 1519 nvme_cmd_cb cb_fn, void *cb_arg) 1520 { 1521 struct nvme_ctrlr *ctrlr = qpair->ctrlr; 1522 struct nvme_request *req; 1523 int ret = ENOMEM; 1524 1525 pthread_mutex_lock(&ctrlr->lock); 1526 1527 req = nvme_request_allocate_contig(qpair, buf, len, cb_fn, cb_arg); 1528 if (req) { 1529 memcpy(&req->cmd, cmd, sizeof(req->cmd)); 1530 ret = nvme_qpair_submit_request(qpair, req); 1531 } 1532 1533 pthread_mutex_unlock(&ctrlr->lock); 1534 1535 return ret; 1536 } 1537 1538 /* 1539 * Poll for completion of NVMe commands submitted to the 1540 * specified I/O queue pair. 1541 */ 1542 unsigned int nvme_ioqp_poll(struct nvme_qpair *qpair, 1543 unsigned int max_completions) 1544 { 1545 struct nvme_ctrlr *ctrlr = qpair->ctrlr; 1546 int ret; 1547 1548 pthread_mutex_lock(&ctrlr->lock); 1549 ret = nvme_qpair_poll(qpair, max_completions); 1550 pthread_mutex_unlock(&ctrlr->lock); 1551 1552 return ret; 1553 } 1554