1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 * 7 * Redistribution and use in sourete and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of sourete code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "nvme_internal.h" 35 36 /* 37 * Host software shall wait a minimum of CAP.TO x 500 milleseconds for CSTS.RDY 38 * to be set to '1' after setting CC.EN to '1' from a previous value of '0'. 39 */ 40 static inline unsigned int 41 nvme_ctrlr_get_ready_to_in_ms(struct nvme_ctrlr *ctrlr) 42 { 43 union nvme_cap_register cap; 44 45 /* The TO unit in ms */ 46 #define NVME_READY_TIMEOUT_UNIT 500 47 48 cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw); 49 50 return (NVME_READY_TIMEOUT_UNIT * cap.bits.to); 51 } 52 53 /* 54 * Create a queue pair. 55 */ 56 static int nvme_ctrlr_create_qpair(struct nvme_ctrlr *ctrlr, 57 struct nvme_qpair *qpair) 58 { 59 int ret; 60 61 /* Create the completion queue */ 62 ret = nvme_admin_create_ioq(ctrlr, qpair, NVME_IO_COMPLETION_QUEUE); 63 if (ret != 0) { 64 nvme_notice("Create completion queue %u failed\n", 65 qpair->id); 66 return ret; 67 } 68 69 /* Create the submission queue */ 70 ret = nvme_admin_create_ioq(ctrlr, qpair, NVME_IO_SUBMISSION_QUEUE); 71 if (ret != 0) { 72 /* Attempt to delete the completion queue */ 73 nvme_notice("Create submission queue %u failed\n", 74 qpair->id); 75 nvme_admin_delete_ioq(ctrlr, qpair, NVME_IO_COMPLETION_QUEUE); 76 return ret; 77 } 78 79 nvme_qpair_reset(qpair); 80 81 return 0; 82 } 83 84 /* 85 * Delete a queue pair. 86 */ 87 static int nvme_ctrlr_delete_qpair(struct nvme_ctrlr *ctrlr, 88 struct nvme_qpair *qpair) 89 { 90 int ret; 91 92 /* Delete the submission queue */ 93 ret = nvme_admin_delete_ioq(ctrlr, qpair, NVME_IO_SUBMISSION_QUEUE); 94 if (ret != 0) { 95 nvme_notice("Delete submission queue %u failed\n", 96 qpair->id); 97 return ret; 98 } 99 100 /* Delete the completion queue */ 101 ret = nvme_admin_delete_ioq(ctrlr, qpair, NVME_IO_COMPLETION_QUEUE); 102 if (ret != 0) { 103 nvme_notice("Delete completion queue %u failed\n", 104 qpair->id); 105 return ret; 106 } 107 108 return 0; 109 } 110 111 /* 112 * Intel log page. 113 */ 114 static void 115 nvme_ctrlr_construct_intel_support_log_page_list(struct nvme_ctrlr *ctrlr, 116 struct nvme_intel_log_page_dir *log_page_dir) 117 { 118 119 if (ctrlr->cdata.vid != NVME_PCI_VID_INTEL || 120 log_page_dir == NULL) 121 return; 122 123 ctrlr->log_page_supported[NVME_INTEL_LOG_PAGE_DIR] = true; 124 125 if (log_page_dir->read_latency_log_len || 126 (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) 127 ctrlr->log_page_supported[NVME_INTEL_LOG_READ_CMD_LATENCY] = true; 128 129 if (log_page_dir->write_latency_log_len || 130 (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) 131 ctrlr->log_page_supported[NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true; 132 133 if (log_page_dir->temperature_statistics_log_len) 134 ctrlr->log_page_supported[NVME_INTEL_LOG_TEMPERATURE] = true; 135 136 if (log_page_dir->smart_log_len) 137 ctrlr->log_page_supported[NVME_INTEL_LOG_SMART] = true; 138 139 if (log_page_dir->marketing_description_log_len) 140 ctrlr->log_page_supported[NVME_INTEL_MARKETING_DESCRIPTION] = true; 141 } 142 143 /* 144 * Intel log page. 145 */ 146 static int nvme_ctrlr_set_intel_support_log_pages(struct nvme_ctrlr *ctrlr) 147 { 148 struct nvme_intel_log_page_dir *log_page_dir; 149 int ret; 150 151 log_page_dir = nvme_zmalloc(sizeof(struct nvme_intel_log_page_dir), 64); 152 if (!log_page_dir) { 153 nvme_err("Allocate log_page_directory failed\n"); 154 return ENOMEM; 155 } 156 157 ret = nvme_admin_get_log_page(ctrlr, NVME_INTEL_LOG_PAGE_DIR, 158 NVME_GLOBAL_NS_TAG, 159 log_page_dir, 160 sizeof(struct nvme_intel_log_page_dir)); 161 if (ret != 0) 162 nvme_notice("Get NVME_INTEL_LOG_PAGE_DIR log page failed\n"); 163 else 164 nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, 165 log_page_dir); 166 167 nvme_free(log_page_dir); 168 169 return ret; 170 } 171 172 /* 173 * Initialize log page support directory. 174 */ 175 static void nvme_ctrlr_set_supported_log_pages(struct nvme_ctrlr *ctrlr) 176 { 177 178 memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported)); 179 180 /* Mandatory pages */ 181 ctrlr->log_page_supported[NVME_LOG_ERROR] = true; 182 ctrlr->log_page_supported[NVME_LOG_HEALTH_INFORMATION] = true; 183 ctrlr->log_page_supported[NVME_LOG_FIRMWARE_SLOT] = true; 184 185 if (ctrlr->cdata.lpa.celp) 186 ctrlr->log_page_supported[NVME_LOG_COMMAND_EFFECTS_LOG] = true; 187 188 if (ctrlr->cdata.vid == NVME_PCI_VID_INTEL) 189 nvme_ctrlr_set_intel_support_log_pages(ctrlr); 190 } 191 192 /* 193 * Set Intel device features. 194 */ 195 static void nvme_ctrlr_set_intel_supported_features(struct nvme_ctrlr *ctrlr) 196 { 197 bool *supported_feature = ctrlr->feature_supported; 198 199 supported_feature[NVME_INTEL_FEAT_MAX_LBA] = true; 200 supported_feature[NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true; 201 supported_feature[NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true; 202 supported_feature[NVME_INTEL_FEAT_SMBUS_ADDRESS] = true; 203 supported_feature[NVME_INTEL_FEAT_LED_PATTERN] = true; 204 supported_feature[NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true; 205 supported_feature[NVME_INTEL_FEAT_LATENCY_TRACKING] = true; 206 } 207 208 /* 209 * Set device features. 210 */ 211 static void nvme_ctrlr_set_supported_features(struct nvme_ctrlr *ctrlr) 212 { 213 bool *supported_feature = ctrlr->feature_supported; 214 215 memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported)); 216 217 /* Mandatory features */ 218 supported_feature[NVME_FEAT_ARBITRATION] = true; 219 supported_feature[NVME_FEAT_POWER_MANAGEMENT] = true; 220 supported_feature[NVME_FEAT_TEMPERATURE_THRESHOLD] = true; 221 supported_feature[NVME_FEAT_ERROR_RECOVERY] = true; 222 supported_feature[NVME_FEAT_NUMBER_OF_QUEUES] = true; 223 supported_feature[NVME_FEAT_INTERRUPT_COALESCING] = true; 224 supported_feature[NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true; 225 supported_feature[NVME_FEAT_WRITE_ATOMICITY] = true; 226 supported_feature[NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true; 227 228 /* Optional features */ 229 if (ctrlr->cdata.vwc.present) 230 supported_feature[NVME_FEAT_VOLATILE_WRITE_CACHE] = true; 231 if (ctrlr->cdata.apsta.supported) 232 supported_feature[NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] 233 = true; 234 if (ctrlr->cdata.hmpre) 235 supported_feature[NVME_FEAT_HOST_MEM_BUFFER] = true; 236 if (ctrlr->cdata.vid == NVME_PCI_VID_INTEL) 237 nvme_ctrlr_set_intel_supported_features(ctrlr); 238 } 239 240 /* 241 * Initialize I/O queue pairs. 242 */ 243 static int nvme_ctrlr_init_io_qpairs(struct nvme_ctrlr *ctrlr) 244 { 245 struct nvme_qpair *qpair; 246 union nvme_cap_register cap; 247 uint32_t i; 248 249 if (ctrlr->ioq != NULL) 250 /* 251 * io_qpairs were already constructed, so just return. 252 * This typically happens when the controller is 253 * initialized a second (or subsequent) time after a 254 * controller reset. 255 */ 256 return 0; 257 258 /* 259 * NVMe spec sets a hard limit of 64K max entries, but 260 * devices may specify a smaller limit, so we need to check 261 * the MQES field in the capabilities register. 262 */ 263 cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw); 264 ctrlr->io_qpairs_max_entries = 265 nvme_min(NVME_IO_ENTRIES, (unsigned int)cap.bits.mqes + 1); 266 267 ctrlr->ioq = calloc(ctrlr->io_queues, sizeof(struct nvme_qpair)); 268 if (!ctrlr->ioq) 269 return ENOMEM; 270 271 /* Keep queue pair ID 0 for the admin queue */ 272 for (i = 0; i < ctrlr->io_queues; i++) { 273 qpair = &ctrlr->ioq[i]; 274 qpair->id = i + 1; 275 TAILQ_INSERT_TAIL(&ctrlr->free_io_qpairs, qpair, tailq); 276 } 277 278 return 0; 279 } 280 281 /* 282 * Shutdown a controller. 283 */ 284 static void nvme_ctrlr_shutdown(struct nvme_ctrlr *ctrlr) 285 { 286 union nvme_cc_register cc; 287 union nvme_csts_register csts; 288 int ms_waited = 0; 289 290 cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw); 291 cc.bits.shn = NVME_SHN_NORMAL; 292 nvme_reg_mmio_write_4(ctrlr, cc.raw, cc.raw); 293 294 csts.raw = nvme_reg_mmio_read_4(ctrlr, csts.raw); 295 /* 296 * The NVMe spec does not define a timeout period for shutdown 297 * notification, so we just pick 5 seconds as a reasonable amount 298 * of time to wait before proceeding. 299 */ 300 #define NVME_CTRLR_SHUTDOWN_TIMEOUT 5000 301 while (csts.bits.shst != NVME_SHST_COMPLETE) { 302 nvme_usleep(1000); 303 csts.raw = nvme_reg_mmio_read_4(ctrlr, csts.raw); 304 if (ms_waited++ >= NVME_CTRLR_SHUTDOWN_TIMEOUT) 305 break; 306 } 307 308 if (csts.bits.shst != NVME_SHST_COMPLETE) 309 nvme_err("Controller did not shutdown within %d seconds\n", 310 NVME_CTRLR_SHUTDOWN_TIMEOUT / 1000); 311 } 312 313 /* 314 * Enable a controller. 315 */ 316 static int nvme_ctrlr_enable(struct nvme_ctrlr *ctrlr) 317 { 318 union nvme_cc_register cc; 319 union nvme_aqa_register aqa; 320 union nvme_cap_register cap; 321 322 cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw); 323 324 if (cc.bits.en != 0) { 325 nvme_err("COntroller enable called with CC.EN = 1\n"); 326 return EINVAL; 327 } 328 329 nvme_reg_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr); 330 nvme_reg_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr); 331 332 aqa.raw = 0; 333 /* acqs and asqs are 0-based. */ 334 aqa.bits.acqs = ctrlr->adminq.entries - 1; 335 aqa.bits.asqs = ctrlr->adminq.entries - 1; 336 nvme_reg_mmio_write_4(ctrlr, aqa.raw, aqa.raw); 337 338 cc.bits.en = 1; 339 cc.bits.css = 0; 340 cc.bits.shn = 0; 341 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ 342 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ 343 344 /* Page size is 2 ^ (12 + mps). */ 345 cc.bits.mps = PAGE_SHIFT - 12; 346 347 cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw); 348 349 switch (ctrlr->opts.arb_mechanism) { 350 case NVME_CC_AMS_RR: 351 break; 352 case NVME_CC_AMS_WRR: 353 if (NVME_CAP_AMS_WRR & cap.bits.ams) 354 break; 355 return EINVAL; 356 case NVME_CC_AMS_VS: 357 if (NVME_CAP_AMS_VS & cap.bits.ams) 358 break; 359 return EINVAL; 360 default: 361 return EINVAL; 362 } 363 364 cc.bits.ams = ctrlr->opts.arb_mechanism; 365 366 nvme_reg_mmio_write_4(ctrlr, cc.raw, cc.raw); 367 368 return 0; 369 } 370 371 /* 372 * Disable a controller. 373 */ 374 static inline void nvme_ctrlr_disable(struct nvme_ctrlr *ctrlr) 375 { 376 union nvme_cc_register cc; 377 378 cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw); 379 cc.bits.en = 0; 380 381 nvme_reg_mmio_write_4(ctrlr, cc.raw, cc.raw); 382 } 383 384 /* 385 * Test if a controller is enabled. 386 */ 387 static inline int nvme_ctrlr_enabled(struct nvme_ctrlr *ctrlr) 388 { 389 union nvme_cc_register cc; 390 391 cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw); 392 393 return cc.bits.en; 394 } 395 396 /* 397 * Test if a controller is ready. 398 */ 399 static inline int nvme_ctrlr_ready(struct nvme_ctrlr *ctrlr) 400 { 401 union nvme_csts_register csts; 402 403 csts.raw = nvme_reg_mmio_read_4(ctrlr, csts.raw); 404 405 return csts.bits.rdy; 406 } 407 408 /* 409 * Set a controller state. 410 */ 411 static void nvme_ctrlr_set_state(struct nvme_ctrlr *ctrlr, 412 enum nvme_ctrlr_state state, 413 uint64_t timeout_in_ms) 414 { 415 ctrlr->state = state; 416 if (timeout_in_ms == NVME_TIMEOUT_INFINITE) 417 ctrlr->state_timeout_ms = NVME_TIMEOUT_INFINITE; 418 else 419 ctrlr->state_timeout_ms = nvme_time_msec() + timeout_in_ms; 420 } 421 422 /* 423 * Get a controller data. 424 */ 425 static int nvme_ctrlr_identify(struct nvme_ctrlr *ctrlr) 426 { 427 int ret; 428 429 ret = nvme_admin_identify_ctrlr(ctrlr, &ctrlr->cdata); 430 if (ret != 0) { 431 nvme_notice("Identify controller failed\n"); 432 return ret; 433 } 434 435 /* 436 * Use MDTS to ensure our default max_xfer_size doesn't 437 * exceed what the controller supports. 438 */ 439 if (ctrlr->cdata.mdts > 0) 440 ctrlr->max_xfer_size = nvme_min(ctrlr->max_xfer_size, 441 ctrlr->min_page_size 442 * (1 << (ctrlr->cdata.mdts))); 443 return 0; 444 } 445 446 /* 447 * Set the number of I/O queue pairs. 448 */ 449 static int nvme_ctrlr_get_max_io_qpairs(struct nvme_ctrlr *ctrlr) 450 { 451 unsigned int cdw0, cq_allocated, sq_allocated; 452 int ret; 453 454 ret = nvme_admin_get_feature(ctrlr, NVME_FEAT_CURRENT, 455 NVME_FEAT_NUMBER_OF_QUEUES, 456 0, &cdw0); 457 if (ret != 0) { 458 nvme_notice("Get feature NVME_FEAT_NUMBER_OF_QUEUES failed\n"); 459 return ret; 460 } 461 462 /* 463 * Data in cdw0 is 0-based. 464 * Lower 16-bits indicate number of submission queues allocated. 465 * Upper 16-bits indicate number of completion queues allocated. 466 */ 467 sq_allocated = (cdw0 & 0xFFFF) + 1; 468 cq_allocated = (cdw0 >> 16) + 1; 469 470 ctrlr->max_io_queues = nvme_min(sq_allocated, cq_allocated); 471 472 return 0; 473 } 474 475 /* 476 * Set the number of I/O queue pairs. 477 */ 478 static int nvme_ctrlr_set_num_qpairs(struct nvme_ctrlr *ctrlr) 479 { 480 unsigned int num_queues, cdw0; 481 unsigned int cq_allocated, sq_allocated; 482 int ret; 483 484 ret = nvme_ctrlr_get_max_io_qpairs(ctrlr); 485 if (ret != 0) { 486 nvme_notice("Failed to get the maximum of I/O qpairs\n"); 487 return ret; 488 } 489 490 /* 491 * Format number of I/O queue: 492 * Remove 1 as it as be be 0-based, 493 * bits 31:16 represent the number of completion queues, 494 * bits 0:15 represent the number of submission queues 495 */ 496 num_queues = ((ctrlr->opts.io_queues - 1) << 16) | 497 (ctrlr->opts.io_queues - 1); 498 499 /* 500 * Set the number of I/O queues. 501 * Note: The value allocated may be smaller or larger than the number 502 * of queues requested (see specifications). 503 */ 504 ret = nvme_admin_set_feature(ctrlr, false, NVME_FEAT_NUMBER_OF_QUEUES, 505 num_queues, 0, &cdw0); 506 if (ret != 0) { 507 nvme_notice("Set feature NVME_FEAT_NUMBER_OF_QUEUES failed\n"); 508 return ret; 509 } 510 511 /* 512 * Data in cdw0 is 0-based. 513 * Lower 16-bits indicate number of submission queues allocated. 514 * Upper 16-bits indicate number of completion queues allocated. 515 */ 516 sq_allocated = (cdw0 & 0xFFFF) + 1; 517 cq_allocated = (cdw0 >> 16) + 1; 518 ctrlr->io_queues = nvme_min(sq_allocated, cq_allocated); 519 520 /* 521 * Make sure the number of constructed qpair listed in free_io_qpairs 522 * will not be more than the requested one. 523 */ 524 ctrlr->io_queues = nvme_min(ctrlr->io_queues, ctrlr->opts.io_queues); 525 526 return 0; 527 } 528 529 static void nvme_ctrlr_destruct_namespaces(struct nvme_ctrlr *ctrlr) 530 { 531 532 if (ctrlr->ns) { 533 free(ctrlr->ns); 534 ctrlr->ns = NULL; 535 ctrlr->nr_ns = 0; 536 } 537 538 if (ctrlr->nsdata) { 539 nvme_free(ctrlr->nsdata); 540 ctrlr->nsdata = NULL; 541 } 542 } 543 544 static int nvme_ctrlr_construct_namespaces(struct nvme_ctrlr *ctrlr) 545 { 546 unsigned int i, nr_ns = ctrlr->cdata.nn; 547 struct nvme_ns *ns = NULL; 548 549 /* 550 * ctrlr->nr_ns may be 0 (startup) or a different number of 551 * namespaces (reset), so check if we need to reallocate. 552 */ 553 if (nr_ns != ctrlr->nr_ns) { 554 555 nvme_ctrlr_destruct_namespaces(ctrlr); 556 557 ctrlr->ns = calloc(nr_ns, sizeof(struct nvme_ns)); 558 if (!ctrlr->ns) 559 goto fail; 560 561 nvme_debug("Allocate %u namespace data\n", nr_ns); 562 ctrlr->nsdata = nvme_calloc(nr_ns, sizeof(struct nvme_ns_data), 563 PAGE_SIZE); 564 if (!ctrlr->nsdata) 565 goto fail; 566 567 ctrlr->nr_ns = nr_ns; 568 569 } 570 571 for (i = 0; i < nr_ns; i++) { 572 ns = &ctrlr->ns[i]; 573 if (nvme_ns_construct(ctrlr, ns, i + 1) != 0) 574 goto fail; 575 } 576 577 return 0; 578 579 fail: 580 nvme_ctrlr_destruct_namespaces(ctrlr); 581 582 return -1; 583 } 584 585 /* 586 * Forward declaration. 587 */ 588 static int nvme_ctrlr_construct_and_submit_aer(struct nvme_ctrlr *ctrlr, 589 struct nvme_async_event_request *aer); 590 591 /* 592 * Async event completion callback. 593 */ 594 static void nvme_ctrlr_async_event_cb(void *arg, const struct nvme_cpl *cpl) 595 { 596 struct nvme_async_event_request *aer = arg; 597 struct nvme_ctrlr *ctrlr = aer->ctrlr; 598 599 if (cpl->status.sc == NVME_SC_ABORTED_SQ_DELETION) 600 /* 601 * This is simulated when controller is being shut down, to 602 * effectively abort outstanding asynchronous event requests 603 * and make sure all memory is freed. Do not repost the 604 * request in this case. 605 */ 606 return; 607 608 if (ctrlr->aer_cb_fn != NULL) 609 ctrlr->aer_cb_fn(ctrlr->aer_cb_arg, cpl); 610 611 /* 612 * Repost another asynchronous event request to replace 613 * the one that just completed. 614 */ 615 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) 616 /* 617 * We can't do anything to recover from a failure here, 618 * so just print a warning message and leave the 619 * AER unsubmitted. 620 */ 621 nvme_err("Initialize AER failed\n"); 622 } 623 624 /* 625 * Issue an async event request. 626 */ 627 static int nvme_ctrlr_construct_and_submit_aer(struct nvme_ctrlr *ctrlr, 628 struct nvme_async_event_request *aer) 629 { 630 struct nvme_request *req; 631 632 req = nvme_request_allocate_null(&ctrlr->adminq, 633 nvme_ctrlr_async_event_cb, aer); 634 if (req == NULL) 635 return -1; 636 637 aer->ctrlr = ctrlr; 638 aer->req = req; 639 req->cmd.opc = NVME_OPC_ASYNC_EVENT_REQUEST; 640 641 return nvme_qpair_submit_request(&ctrlr->adminq, req); 642 } 643 644 /* 645 * Configure async event management. 646 */ 647 static int nvme_ctrlr_configure_aer(struct nvme_ctrlr *ctrlr) 648 { 649 union nvme_critical_warning_state state; 650 struct nvme_async_event_request *aer; 651 unsigned int i; 652 int ret; 653 654 state.raw = 0xFF; 655 state.bits.reserved = 0; 656 657 ret = nvme_admin_set_feature(ctrlr, false, 658 NVME_FEAT_ASYNC_EVENT_CONFIGURATION, 659 state.raw, 0, NULL); 660 if (ret != 0) { 661 nvme_notice("Set feature ASYNC_EVENT_CONFIGURATION failed\n"); 662 return ret; 663 } 664 665 /* aerl is a zero-based value, so we need to add 1 here. */ 666 ctrlr->num_aers = nvme_min(NVME_MAX_ASYNC_EVENTS, 667 (ctrlr->cdata.aerl + 1)); 668 669 for (i = 0; i < ctrlr->num_aers; i++) { 670 aer = &ctrlr->aer[i]; 671 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) { 672 nvme_notice("Construct AER failed\n"); 673 return -1; 674 } 675 } 676 677 return 0; 678 } 679 680 /* 681 * Start a controller. 682 */ 683 static int nvme_ctrlr_start(struct nvme_ctrlr *ctrlr) 684 { 685 686 nvme_qpair_reset(&ctrlr->adminq); 687 nvme_qpair_enable(&ctrlr->adminq); 688 689 if (nvme_ctrlr_identify(ctrlr) != 0) 690 return -1; 691 692 if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) 693 return -1; 694 695 if (nvme_ctrlr_init_io_qpairs(ctrlr)) 696 return -1; 697 698 if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) 699 return -1; 700 701 if (nvme_ctrlr_configure_aer(ctrlr) != 0) 702 nvme_warning("controller does not support AER!\n"); 703 704 nvme_ctrlr_set_supported_log_pages(ctrlr); 705 nvme_ctrlr_set_supported_features(ctrlr); 706 707 if (ctrlr->cdata.sgls.supported) 708 ctrlr->flags |= NVME_CTRLR_SGL_SUPPORTED; 709 710 return 0; 711 } 712 713 /* 714 * Memory map the controller side buffer. 715 */ 716 static void nvme_ctrlr_map_cmb(struct nvme_ctrlr *ctrlr) 717 { 718 int ret; 719 void *addr; 720 uint32_t bir; 721 union nvme_cmbsz_register cmbsz; 722 union nvme_cmbloc_register cmbloc; 723 uint64_t size, unit_size, offset, bar_size, bar_phys_addr; 724 725 cmbsz.raw = nvme_reg_mmio_read_4(ctrlr, cmbsz.raw); 726 cmbloc.raw = nvme_reg_mmio_read_4(ctrlr, cmbloc.raw); 727 if (!cmbsz.bits.sz) 728 goto out; 729 730 /* Values 0 2 3 4 5 are valid for BAR */ 731 bir = cmbloc.bits.bir; 732 if (bir > 5 || bir == 1) 733 goto out; 734 735 /* unit size for 4KB/64KB/1MB/16MB/256MB/4GB/64GB */ 736 unit_size = (uint64_t)1 << (12 + 4 * cmbsz.bits.szu); 737 738 /* controller memory buffer size in Bytes */ 739 size = unit_size * cmbsz.bits.sz; 740 741 /* controller memory buffer offset from BAR in Bytes */ 742 offset = unit_size * cmbloc.bits.ofst; 743 744 nvme_pcicfg_get_bar_addr_len(ctrlr->pci_dev, bir, &bar_phys_addr, 745 &bar_size); 746 747 if (offset > bar_size) 748 goto out; 749 750 if (size > bar_size - offset) 751 goto out; 752 753 ret = nvme_pcicfg_map_bar_write_combine(ctrlr->pci_dev, bir, &addr); 754 if ((ret != 0) || addr == NULL) 755 goto out; 756 757 ctrlr->cmb_bar_virt_addr = addr; 758 ctrlr->cmb_bar_phys_addr = bar_phys_addr; 759 ctrlr->cmb_size = size; 760 ctrlr->cmb_current_offset = offset; 761 762 if (!cmbsz.bits.sqs) 763 ctrlr->opts.use_cmb_sqs = false; 764 765 return; 766 767 out: 768 ctrlr->cmb_bar_virt_addr = NULL; 769 ctrlr->opts.use_cmb_sqs = false; 770 771 return; 772 } 773 774 /* 775 * Unmap the controller side buffer. 776 */ 777 static int nvme_ctrlr_unmap_cmb(struct nvme_ctrlr *ctrlr) 778 { 779 union nvme_cmbloc_register cmbloc; 780 void *addr = ctrlr->cmb_bar_virt_addr; 781 int ret = 0; 782 783 if (addr) { 784 cmbloc.raw = nvme_reg_mmio_read_4(ctrlr, cmbloc.raw); 785 ret = nvme_pcicfg_unmap_bar(ctrlr->pci_dev, cmbloc.bits.bir, 786 addr); 787 } 788 return ret; 789 } 790 791 /* 792 * Map the controller PCI bars. 793 */ 794 static int nvme_ctrlr_map_bars(struct nvme_ctrlr *ctrlr) 795 { 796 void *addr; 797 int ret; 798 799 ret = nvme_pcicfg_map_bar(ctrlr->pci_dev, 0, 0, &addr); 800 if (ret != 0 || addr == NULL) { 801 nvme_err("Map PCI device bar failed %d (%s)\n", 802 ret, strerror(ret)); 803 return ret; 804 } 805 806 nvme_debug("Controller BAR mapped at %p\n", addr); 807 808 ctrlr->regs = (volatile struct nvme_registers *)addr; 809 nvme_ctrlr_map_cmb(ctrlr); 810 811 return 0; 812 } 813 814 /* 815 * Unmap the controller PCI bars. 816 */ 817 static int nvme_ctrlr_unmap_bars(struct nvme_ctrlr *ctrlr) 818 { 819 void *addr = (void *)ctrlr->regs; 820 int ret; 821 822 ret = nvme_ctrlr_unmap_cmb(ctrlr); 823 if (ret != 0) { 824 nvme_err("Unmap controller side buffer failed %d\n", ret); 825 return ret; 826 } 827 828 if (addr) { 829 ret = nvme_pcicfg_unmap_bar(ctrlr->pci_dev, 0, addr); 830 if (ret != 0) { 831 nvme_err("Unmap PCI device bar failed %d\n", ret); 832 return ret; 833 } 834 } 835 836 return 0; 837 } 838 839 /* 840 * Set a controller in the failed state. 841 */ 842 static void nvme_ctrlr_fail(struct nvme_ctrlr *ctrlr) 843 { 844 unsigned int i; 845 846 ctrlr->failed = true; 847 848 nvme_qpair_fail(&ctrlr->adminq); 849 if (ctrlr->ioq) 850 for (i = 0; i < ctrlr->io_queues; i++) 851 nvme_qpair_fail(&ctrlr->ioq[i]); 852 } 853 854 /* 855 * This function will be called repeatedly during initialization 856 * until the controller is ready. 857 */ 858 static int nvme_ctrlr_init(struct nvme_ctrlr *ctrlr) 859 { 860 unsigned int ready_timeout_in_ms = nvme_ctrlr_get_ready_to_in_ms(ctrlr); 861 int ret; 862 863 /* 864 * Check if the current initialization step is done or has timed out. 865 */ 866 switch (ctrlr->state) { 867 868 case NVME_CTRLR_STATE_INIT: 869 870 /* Begin the hardware initialization by making 871 * sure the controller is disabled. */ 872 if (nvme_ctrlr_enabled(ctrlr)) { 873 /* 874 * Disable the controller to cause a reset. 875 */ 876 if (!nvme_ctrlr_ready(ctrlr)) { 877 /* Wait for the controller to be ready */ 878 nvme_ctrlr_set_state(ctrlr, 879 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, 880 ready_timeout_in_ms); 881 return 0; 882 } 883 884 /* 885 * The controller is enabled and ready. 886 * It can be immediatly disabled 887 */ 888 nvme_ctrlr_disable(ctrlr); 889 nvme_ctrlr_set_state(ctrlr, 890 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 891 ready_timeout_in_ms); 892 893 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) 894 nvme_msleep(2000); 895 896 return 0; 897 } 898 899 if (nvme_ctrlr_ready(ctrlr)) { 900 /* 901 * Controller is in the process of shutting down. 902 * We need to wait for CSTS.RDY to become 0. 903 */ 904 nvme_ctrlr_set_state(ctrlr, 905 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 906 ready_timeout_in_ms); 907 return 0; 908 } 909 910 /* 911 * Controller is currently disabled. 912 * We can jump straight to enabling it. 913 */ 914 ret = nvme_ctrlr_enable(ctrlr); 915 if (ret) 916 nvme_err("Enable controller failed\n"); 917 else 918 nvme_ctrlr_set_state(ctrlr, 919 NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 920 ready_timeout_in_ms); 921 return ret; 922 923 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 924 925 if (nvme_ctrlr_ready(ctrlr)) { 926 /* CC.EN = 1 && CSTS.RDY = 1, 927 * so we can disable the controller now. */ 928 nvme_ctrlr_disable(ctrlr); 929 nvme_ctrlr_set_state(ctrlr, 930 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 931 ready_timeout_in_ms); 932 return 0; 933 } 934 935 break; 936 937 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 938 939 if (!nvme_ctrlr_ready(ctrlr)) { 940 /* CC.EN = 0 && CSTS.RDY = 0, 941 * so we can enable the controller now. */ 942 ret = nvme_ctrlr_enable(ctrlr); 943 if (ret) 944 nvme_err("Enable controller failed\n"); 945 else 946 nvme_ctrlr_set_state(ctrlr, 947 NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 948 ready_timeout_in_ms); 949 return ret; 950 } 951 break; 952 953 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 954 955 if (nvme_ctrlr_ready(ctrlr)) { 956 if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_RDY) 957 nvme_msleep(2000); 958 959 ret = nvme_ctrlr_start(ctrlr); 960 if (ret) 961 nvme_err("Start controller failed\n"); 962 else 963 nvme_ctrlr_set_state(ctrlr, 964 NVME_CTRLR_STATE_READY, 965 NVME_TIMEOUT_INFINITE); 966 return ret; 967 } 968 break; 969 970 default: 971 nvme_panic("Unhandled ctrlr state %d\n", ctrlr->state); 972 nvme_ctrlr_fail(ctrlr); 973 return -1; 974 } 975 976 if ((ctrlr->state_timeout_ms != NVME_TIMEOUT_INFINITE) && 977 (nvme_time_msec() > ctrlr->state_timeout_ms)) { 978 nvme_err("Initialization timed out in state %d\n", 979 ctrlr->state); 980 nvme_ctrlr_fail(ctrlr); 981 return -1; 982 } 983 984 return 0; 985 } 986 987 /* 988 * Reset a controller. 989 */ 990 static int nvme_ctrlr_reset(struct nvme_ctrlr *ctrlr) 991 { 992 struct nvme_qpair *qpair; 993 unsigned int i; 994 995 if (ctrlr->resetting || ctrlr->failed) 996 /* 997 * Controller is already resetting or has failed. Return 998 * immediately since there is no need to kick off another 999 * reset in these cases. 1000 */ 1001 return 0; 1002 1003 ctrlr->resetting = true; 1004 1005 /* Disable all queues before disabling the controller hardware. */ 1006 nvme_qpair_disable(&ctrlr->adminq); 1007 for (i = 0; i < ctrlr->io_queues; i++) 1008 nvme_qpair_disable(&ctrlr->ioq[i]); 1009 1010 /* Set the state back to INIT to cause a full hardware reset. */ 1011 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, 1012 NVME_TIMEOUT_INFINITE); 1013 1014 while (ctrlr->state != NVME_CTRLR_STATE_READY) { 1015 if (nvme_ctrlr_init(ctrlr) != 0) { 1016 nvme_crit("Controller reset failed\n"); 1017 nvme_ctrlr_fail(ctrlr); 1018 goto out; 1019 } 1020 } 1021 1022 /* Reinitialize qpairs */ 1023 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 1024 if (nvme_ctrlr_create_qpair(ctrlr, qpair) != 0) 1025 nvme_ctrlr_fail(ctrlr); 1026 } 1027 1028 out: 1029 ctrlr->resetting = false; 1030 1031 return ctrlr->failed ? -1 : 0; 1032 } 1033 1034 /* 1035 * Set a controller options. 1036 */ 1037 static void nvme_ctrlr_set_opts(struct nvme_ctrlr *ctrlr, 1038 struct nvme_ctrlr_opts *opts) 1039 { 1040 if (opts) 1041 memcpy(&ctrlr->opts, opts, sizeof(struct nvme_ctrlr_opts)); 1042 else 1043 memset(&ctrlr->opts, 0, sizeof(struct nvme_ctrlr_opts)); 1044 1045 if (ctrlr->opts.io_queues == 0) 1046 ctrlr->opts.io_queues = DEFAULT_MAX_IO_QUEUES; 1047 1048 if (ctrlr->opts.io_queues > NVME_MAX_IO_QUEUES) { 1049 nvme_info("Limiting requested I/O queues %u to %d\n", 1050 ctrlr->opts.io_queues, NVME_MAX_IO_QUEUES); 1051 ctrlr->opts.io_queues = NVME_MAX_IO_QUEUES; 1052 } 1053 } 1054 1055 /* 1056 * Attach a PCI controller. 1057 */ 1058 struct nvme_ctrlr * 1059 nvme_ctrlr_attach(struct pci_device *pci_dev, 1060 struct nvme_ctrlr_opts *opts) 1061 { 1062 struct nvme_ctrlr *ctrlr; 1063 union nvme_cap_register cap; 1064 uint32_t cmd_reg; 1065 int ret; 1066 1067 /* Get a new controller handle */ 1068 ctrlr = malloc(sizeof(struct nvme_ctrlr)); 1069 if (!ctrlr) { 1070 nvme_err("Allocate controller handle failed\n"); 1071 return NULL; 1072 } 1073 1074 nvme_debug("New controller handle %p\n", ctrlr); 1075 1076 /* Initialize the handle */ 1077 memset(ctrlr, 0, sizeof(struct nvme_ctrlr)); 1078 ctrlr->pci_dev = pci_dev; 1079 ctrlr->resetting = false; 1080 ctrlr->failed = false; 1081 TAILQ_INIT(&ctrlr->free_io_qpairs); 1082 TAILQ_INIT(&ctrlr->active_io_qpairs); 1083 pthread_mutex_init(&ctrlr->lock, NULL); 1084 ctrlr->quirks = nvme_ctrlr_get_quirks(pci_dev); 1085 1086 nvme_ctrlr_set_state(ctrlr, 1087 NVME_CTRLR_STATE_INIT, 1088 NVME_TIMEOUT_INFINITE); 1089 1090 ret = nvme_ctrlr_map_bars(ctrlr); 1091 if (ret != 0) { 1092 nvme_err("Map controller BAR failed\n"); 1093 pthread_mutex_destroy(&ctrlr->lock); 1094 free(ctrlr); 1095 return NULL; 1096 } 1097 1098 /* Enable PCI busmaster and disable INTx */ 1099 nvme_pcicfg_read32(pci_dev, &cmd_reg, 4); 1100 cmd_reg |= 0x0404; 1101 nvme_pcicfg_write32(pci_dev, cmd_reg, 4); 1102 1103 /* 1104 * Doorbell stride is 2 ^ (dstrd + 2), 1105 * but we want multiples of 4, so drop the + 2. 1106 */ 1107 cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw); 1108 ctrlr->doorbell_stride_u32 = 1 << cap.bits.dstrd; 1109 ctrlr->min_page_size = 1 << (12 + cap.bits.mpsmin); 1110 1111 /* Set default transfer size */ 1112 ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE; 1113 1114 /* Create the admin queue pair */ 1115 ret = nvme_qpair_construct(ctrlr, &ctrlr->adminq, 0, 1116 NVME_ADMIN_ENTRIES, NVME_ADMIN_TRACKERS); 1117 if (ret != 0) { 1118 nvme_err("Initialize admin queue pair failed\n"); 1119 goto err; 1120 } 1121 1122 /* Set options and then initialize */ 1123 nvme_ctrlr_set_opts(ctrlr, opts); 1124 do { 1125 ret = nvme_ctrlr_init(ctrlr); 1126 if (ret) 1127 goto err; 1128 } while (ctrlr->state != NVME_CTRLR_STATE_READY); 1129 1130 return ctrlr; 1131 1132 err: 1133 nvme_ctrlr_detach(ctrlr); 1134 1135 return NULL; 1136 } 1137 1138 /* 1139 * Detach a PCI controller. 1140 */ 1141 void nvme_ctrlr_detach(struct nvme_ctrlr *ctrlr) 1142 { 1143 struct nvme_qpair *qpair; 1144 uint32_t i; 1145 1146 while (!TAILQ_EMPTY(&ctrlr->active_io_qpairs)) { 1147 qpair = TAILQ_FIRST(&ctrlr->active_io_qpairs); 1148 nvme_ioqp_release(qpair); 1149 } 1150 1151 nvme_ctrlr_shutdown(ctrlr); 1152 1153 nvme_ctrlr_destruct_namespaces(ctrlr); 1154 if (ctrlr->ioq) { 1155 for (i = 0; i < ctrlr->io_queues; i++) 1156 nvme_qpair_destroy(&ctrlr->ioq[i]); 1157 free(ctrlr->ioq); 1158 } 1159 1160 nvme_qpair_destroy(&ctrlr->adminq); 1161 1162 nvme_ctrlr_unmap_bars(ctrlr); 1163 1164 pthread_mutex_destroy(&ctrlr->lock); 1165 free(ctrlr); 1166 } 1167 1168 /* 1169 * Get a controller feature. 1170 */ 1171 int nvme_ctrlr_get_feature(struct nvme_ctrlr *ctrlr, 1172 enum nvme_feat_sel sel, enum nvme_feat feature, 1173 uint32_t cdw11, 1174 uint32_t *attributes) 1175 { 1176 int ret; 1177 1178 pthread_mutex_lock(&ctrlr->lock); 1179 1180 ret = nvme_admin_get_feature(ctrlr, sel, feature, cdw11, attributes); 1181 if (ret != 0) 1182 nvme_notice("Get feature 0x%08x failed\n", 1183 (unsigned int) feature); 1184 1185 pthread_mutex_unlock(&ctrlr->lock); 1186 1187 return ret; 1188 } 1189 1190 /* 1191 * Set a controller feature. 1192 */ 1193 int nvme_ctrlr_set_feature(struct nvme_ctrlr *ctrlr, 1194 bool save, enum nvme_feat feature, 1195 uint32_t cdw11, uint32_t cdw12, 1196 uint32_t *attributes) 1197 { 1198 int ret; 1199 1200 pthread_mutex_lock(&ctrlr->lock); 1201 1202 ret = nvme_admin_set_feature(ctrlr, save, feature, 1203 cdw11, cdw12, attributes); 1204 if (ret != 0) 1205 nvme_notice("Set feature 0x%08x failed\n", 1206 (unsigned int) feature); 1207 1208 pthread_mutex_unlock(&ctrlr->lock); 1209 1210 return ret; 1211 } 1212 1213 /* 1214 * Attach a namespace. 1215 */ 1216 int nvme_ctrlr_attach_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid, 1217 struct nvme_ctrlr_list *clist) 1218 { 1219 int ret; 1220 1221 pthread_mutex_lock(&ctrlr->lock); 1222 1223 ret = nvme_admin_attach_ns(ctrlr, nsid, clist); 1224 if (ret) { 1225 nvme_notice("Attach namespace %u failed\n", nsid); 1226 goto out; 1227 } 1228 1229 ret = nvme_ctrlr_reset(ctrlr); 1230 if (ret != 0) 1231 nvme_notice("Reset controller failed\n"); 1232 1233 out: 1234 pthread_mutex_unlock(&ctrlr->lock); 1235 1236 return ret; 1237 } 1238 1239 /* 1240 * Detach a namespace. 1241 */ 1242 int nvme_ctrlr_detach_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid, 1243 struct nvme_ctrlr_list *clist) 1244 { 1245 int ret; 1246 1247 pthread_mutex_lock(&ctrlr->lock); 1248 1249 ret = nvme_admin_detach_ns(ctrlr, nsid, clist); 1250 if (ret != 0) { 1251 nvme_notice("Detach namespace %u failed\n", nsid); 1252 goto out; 1253 } 1254 1255 ret = nvme_ctrlr_reset(ctrlr); 1256 if (ret) 1257 nvme_notice("Reset controller failed\n"); 1258 1259 out: 1260 pthread_mutex_unlock(&ctrlr->lock); 1261 1262 return ret; 1263 } 1264 1265 /* 1266 * Create a namespace. 1267 */ 1268 unsigned int nvme_ctrlr_create_ns(struct nvme_ctrlr *ctrlr, 1269 struct nvme_ns_data *nsdata) 1270 { 1271 unsigned int nsid; 1272 int ret; 1273 1274 pthread_mutex_lock(&ctrlr->lock); 1275 1276 ret = nvme_admin_create_ns(ctrlr, nsdata, &nsid); 1277 if (ret != 0) { 1278 nvme_notice("Create namespace failed\n"); 1279 nsid = 0; 1280 } 1281 1282 pthread_mutex_unlock(&ctrlr->lock); 1283 1284 return nsid; 1285 } 1286 1287 /* 1288 * Delete a namespace. 1289 */ 1290 int nvme_ctrlr_delete_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid) 1291 { 1292 int ret; 1293 1294 pthread_mutex_lock(&ctrlr->lock); 1295 1296 ret = nvme_admin_delete_ns(ctrlr, nsid); 1297 if (ret != 0) { 1298 nvme_notice("Delete namespace %u failed\n", nsid); 1299 goto out; 1300 } 1301 1302 ret = nvme_ctrlr_reset(ctrlr); 1303 if (ret) 1304 nvme_notice("Reset controller failed\n"); 1305 1306 out: 1307 pthread_mutex_unlock(&ctrlr->lock); 1308 1309 return ret; 1310 } 1311 1312 /* 1313 * Format NVM media. 1314 */ 1315 int nvme_ctrlr_format_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid, 1316 struct nvme_format *format) 1317 { 1318 int ret; 1319 1320 pthread_mutex_lock(&ctrlr->lock); 1321 1322 ret = nvme_admin_format_nvm(ctrlr, nsid, format); 1323 if (ret != 0) { 1324 if (nsid == NVME_GLOBAL_NS_TAG) 1325 nvme_notice("Format device failed\n"); 1326 else 1327 nvme_notice("Format namespace %u failed\n", nsid); 1328 goto out; 1329 } 1330 1331 ret = nvme_ctrlr_reset(ctrlr); 1332 if (ret) 1333 nvme_notice("Reset controller failed\n"); 1334 1335 out: 1336 pthread_mutex_unlock(&ctrlr->lock); 1337 1338 return ret; 1339 } 1340 1341 /* 1342 * Update a device firmware. 1343 */ 1344 int nvme_ctrlr_update_firmware(struct nvme_ctrlr *ctrlr, 1345 void *fw, size_t size, int slot) 1346 { 1347 struct nvme_fw_commit fw_commit; 1348 unsigned int size_remaining = size, offset = 0, transfer; 1349 void *f = fw; 1350 int ret; 1351 1352 if (size & 0x3) { 1353 nvme_err("Invalid firmware size\n"); 1354 return EINVAL; 1355 } 1356 1357 pthread_mutex_lock(&ctrlr->lock); 1358 1359 /* Download firmware */ 1360 while (size_remaining > 0) { 1361 1362 transfer = nvme_min(size_remaining, ctrlr->min_page_size); 1363 1364 ret = nvme_admin_fw_image_dl(ctrlr, f, transfer, offset); 1365 if (ret != 0) { 1366 nvme_err("Download FW (%u B at %u) failed\n", 1367 transfer, offset); 1368 goto out; 1369 } 1370 1371 f += transfer; 1372 offset += transfer; 1373 size_remaining -= transfer; 1374 1375 } 1376 1377 /* Commit firmware */ 1378 memset(&fw_commit, 0, sizeof(struct nvme_fw_commit)); 1379 fw_commit.fs = slot; 1380 fw_commit.ca = NVME_FW_COMMIT_REPLACE_IMG; 1381 1382 ret = nvme_admin_fw_commit(ctrlr, &fw_commit); 1383 if (ret != 0) { 1384 nvme_err("Commit downloaded FW (%zu B) failed\n", 1385 size); 1386 goto out; 1387 } 1388 1389 ret = nvme_ctrlr_reset(ctrlr); 1390 if (ret) 1391 nvme_notice("Reset controller failed\n"); 1392 1393 out: 1394 pthread_mutex_unlock(&ctrlr->lock); 1395 1396 return ret; 1397 } 1398 1399 /* 1400 * Get an unused I/O queue pair. 1401 */ 1402 struct nvme_qpair *nvme_ioqp_get(struct nvme_ctrlr *ctrlr, 1403 enum nvme_qprio qprio, unsigned int qd) 1404 { 1405 struct nvme_qpair *qpair = NULL; 1406 union nvme_cc_register cc; 1407 uint32_t trackers; 1408 int ret; 1409 1410 cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw); 1411 1412 /* Only the low 2 bits (values 0, 1, 2, 3) of QPRIO are valid. */ 1413 if ((qprio & 3) != qprio) 1414 return NULL; 1415 1416 /* 1417 * Only value NVME_QPRIO_URGENT(0) is valid for the 1418 * default round robin arbitration method. 1419 */ 1420 if ((cc.bits.ams == NVME_CC_AMS_RR) && (qprio != NVME_QPRIO_URGENT)) { 1421 nvme_err("Invalid queue priority for default round " 1422 "robin arbitration method\n"); 1423 return NULL; 1424 } 1425 1426 /* I/O qpairs number of entries belong to [2, io_qpairs_max_entries] */ 1427 if (qd == 1) { 1428 nvme_err("Invalid queue depth\n"); 1429 return NULL; 1430 } 1431 1432 if (qd == 0 || qd > ctrlr->io_qpairs_max_entries) 1433 qd = ctrlr->io_qpairs_max_entries; 1434 1435 /* 1436 * No need to have more trackers than entries in the submit queue. 1437 * Note also that for a queue size of N, we can only have (N-1) 1438 * commands outstanding, hence the "-1" here. 1439 */ 1440 trackers = nvme_min(NVME_IO_TRACKERS, (qd - 1)); 1441 1442 pthread_mutex_lock(&ctrlr->lock); 1443 1444 /* Get the first available qpair structure */ 1445 qpair = TAILQ_FIRST(&ctrlr->free_io_qpairs); 1446 if (qpair == NULL) { 1447 /* No free queue IDs */ 1448 nvme_err("No free I/O queue pairs\n"); 1449 goto out; 1450 } 1451 1452 /* Construct the qpair */ 1453 ret = nvme_qpair_construct(ctrlr, qpair, qprio, qd, trackers); 1454 if (ret != 0) { 1455 nvme_qpair_destroy(qpair); 1456 qpair = NULL; 1457 goto out; 1458 } 1459 1460 /* 1461 * At this point, qpair contains a preallocated submission 1462 * and completion queue and a unique queue ID, but it is not 1463 * yet created on the controller. 1464 * Fill out the submission queue priority and send out the 1465 * Create I/O Queue commands. 1466 */ 1467 if (nvme_ctrlr_create_qpair(ctrlr, qpair) != 0) { 1468 nvme_err("Create queue pair on the controller failed\n"); 1469 nvme_qpair_destroy(qpair); 1470 qpair = NULL; 1471 goto out; 1472 } 1473 1474 TAILQ_REMOVE(&ctrlr->free_io_qpairs, qpair, tailq); 1475 TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq); 1476 1477 out: 1478 pthread_mutex_unlock(&ctrlr->lock); 1479 1480 return qpair; 1481 } 1482 1483 /* 1484 * Free an I/O queue pair. 1485 */ 1486 int nvme_ioqp_release(struct nvme_qpair *qpair) 1487 { 1488 struct nvme_ctrlr *ctrlr; 1489 int ret; 1490 1491 if (qpair == NULL) 1492 return 0; 1493 1494 ctrlr = qpair->ctrlr; 1495 1496 pthread_mutex_lock(&ctrlr->lock); 1497 1498 /* Delete the I/O submission and completion queues */ 1499 ret = nvme_ctrlr_delete_qpair(ctrlr, qpair); 1500 if (ret != 0) { 1501 nvme_notice("Delete queue pair %u failed\n", qpair->id); 1502 } else { 1503 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 1504 TAILQ_INSERT_HEAD(&ctrlr->free_io_qpairs, qpair, tailq); 1505 } 1506 1507 pthread_mutex_unlock(&ctrlr->lock); 1508 1509 return ret; 1510 } 1511 1512 /* 1513 * Submit an NVMe command using the specified I/O queue pair. 1514 */ 1515 int nvme_ioqp_submit_cmd(struct nvme_qpair *qpair, 1516 struct nvme_cmd *cmd, 1517 void *buf, size_t len, 1518 nvme_cmd_cb cb_fn, void *cb_arg) 1519 { 1520 struct nvme_request *req; 1521 int ret = ENOMEM; 1522 1523 req = nvme_request_allocate_contig(qpair, buf, len, cb_fn, cb_arg); 1524 if (req) { 1525 memcpy(&req->cmd, cmd, sizeof(req->cmd)); 1526 ret = nvme_qpair_submit_request(qpair, req); 1527 } 1528 1529 return ret; 1530 } 1531