1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #ifndef __NVME_INTERNAL_H__ 35 #define __NVME_INTERNAL_H__ 36 37 #include "nvme_common.h" 38 #include "nvme_pci.h" 39 #include "nvme_intel.h" 40 #include "nvme_mem.h" 41 42 #ifndef __HAIKU__ 43 #include <pthread.h> 44 #include <sys/user.h> /* PAGE_SIZE */ 45 #else 46 #include "nvme_platform.h" 47 #endif 48 49 /* 50 * List functions. 51 */ 52 #define LIST_FOREACH_SAFE(var, head, field, tvar) \ 53 for ((var) = LIST_FIRST((head)); \ 54 (var) && ((tvar) = LIST_NEXT((var), field), 1); \ 55 (var) = (tvar)) 56 57 /* 58 * Tail queue functions. 59 */ 60 #define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ 61 for ((var) = TAILQ_FIRST((head)); \ 62 (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ 63 (var) = (tvar)) 64 65 #define INTEL_DC_P3X00_DEVID 0x09538086 66 67 #define NVME_TIMEOUT_INFINITE UINT64_MAX 68 69 /* 70 * Some Intel devices support vendor-unique read latency log page even 71 * though the log page directory says otherwise. 72 */ 73 #define NVME_INTEL_QUIRK_READ_LATENCY 0x1 74 75 /* 76 * Some Intel devices support vendor-unique write latency log page even 77 * though the log page directory says otherwise. 78 */ 79 #define NVME_INTEL_QUIRK_WRITE_LATENCY 0x2 80 81 /* 82 * Some controllers need a delay before starting to check the device 83 * readiness, which is done by reading the controller status register rdy bit. 84 */ 85 #define NVME_QUIRK_DELAY_BEFORE_CHK_RDY 0x4 86 87 /* 88 * Some controllers need a delay once the controller status register rdy bit 89 * switches from 0 to 1. 90 */ 91 #define NVME_QUIRK_DELAY_AFTER_RDY 0x8 92 93 /* 94 * Queues may consist of a contiguous block of physical 95 * memory or optionally a non-contiguous set of physical 96 * memory pages (defined by a Physical Region Pages List) 97 */ 98 #define NVME_MAX_PRP_LIST_ENTRIES (506) 99 100 /* 101 * For commands requiring more than 2 PRP entries, one PRP will be 102 * embedded in the command (prp1), and the rest of the PRP entries 103 * will be in a list pointed to by the command (prp2). This means 104 * that real max number of PRP entries we support is 506+1, which 105 * results in a max xfer size of 506*PAGE_SIZE. 106 */ 107 #define NVME_MAX_XFER_SIZE NVME_MAX_PRP_LIST_ENTRIES * PAGE_SIZE 108 109 #define NVME_ADMIN_TRACKERS (16) 110 #define NVME_ADMIN_ENTRIES (128) 111 112 /* 113 * NVME_IO_ENTRIES defines the size of an I/O qpair's submission and completion 114 * queues, while NVME_IO_TRACKERS defines the maximum number of I/O that we 115 * will allow outstanding on an I/O qpair at any time. The only advantage in 116 * having IO_ENTRIES > IO_TRACKERS is for debugging purposes - when dumping 117 * the contents of the submission and completion queues, it will show a longer 118 * history of data. 119 */ 120 #define NVME_IO_ENTRIES (1024U) 121 #define NVME_IO_TRACKERS (128U) 122 #define NVME_IO_ENTRIES_VS_TRACKERS_RATIO (NVME_IO_ENTRIES / NVME_IO_TRACKERS) 123 124 /* 125 * NVME_MAX_SGL_DESCRIPTORS defines the maximum number of descriptors in one SGL 126 * segment. 127 */ 128 #define NVME_MAX_SGL_DESCRIPTORS (253) 129 130 /* 131 * NVME_MAX_IO_ENTRIES is not defined, since it is specified in CC.MQES 132 * for each controller. 133 */ 134 135 #define NVME_MAX_ASYNC_EVENTS (8) 136 137 /* 138 * NVME_MAX_IO_QUEUES in nvme_spec.h defines the 64K spec-limit, but this 139 * define specifies the maximum number of queues this driver will actually 140 * try to configure, if available. 141 */ 142 #define DEFAULT_MAX_IO_QUEUES (1024) 143 144 /* 145 * Maximum of times a failed command can be retried. 146 */ 147 #define NVME_MAX_RETRY_COUNT (3) 148 149 /* 150 * I/O queue type. 151 */ 152 enum nvme_io_queue_type { 153 154 NVME_IO_QTYPE_INVALID = 0, 155 NVME_IO_SUBMISSION_QUEUE, 156 NVME_IO_COMPLETION_QUEUE, 157 }; 158 159 enum nvme_payload_type { 160 161 NVME_PAYLOAD_TYPE_INVALID = 0, 162 163 /* 164 * nvme_request::u.payload.contig_buffer is valid for this request. 165 */ 166 NVME_PAYLOAD_TYPE_CONTIG, 167 168 /* 169 * nvme_request::u.sgl is valid for this request 170 */ 171 NVME_PAYLOAD_TYPE_SGL, 172 }; 173 174 /* 175 * Controller support flags. 176 */ 177 enum nvme_ctrlr_flags { 178 179 /* 180 * The SGL is supported. 181 */ 182 NVME_CTRLR_SGL_SUPPORTED = 0x1, 183 184 }; 185 186 /* 187 * Descriptor for a request data payload. 188 * 189 * This struct is arranged so that it fits nicely in struct nvme_request. 190 */ 191 struct __attribute__((packed)) nvme_payload { 192 193 union { 194 /* 195 * Virtual memory address of a single 196 * physically contiguous buffer 197 */ 198 void *contig; 199 200 /* 201 * Call back functions for retrieving physical 202 * addresses for scattered payloads. 203 */ 204 struct { 205 nvme_req_reset_sgl_cb reset_sgl_fn; 206 nvme_req_next_sge_cb next_sge_fn; 207 void *cb_arg; 208 } sgl; 209 } u; 210 211 /* 212 * Virtual memory address of a single physically 213 * contiguous metadata buffer 214 */ 215 void *md; 216 217 /* 218 * Payload type. 219 */ 220 uint8_t type; 221 222 }; 223 224 struct nvme_request { 225 226 /* 227 * NVMe command: must be aligned on 64B. 228 */ 229 struct nvme_cmd cmd; 230 231 /* 232 * Data payload for this request's command. 233 */ 234 struct nvme_payload payload; 235 236 uint8_t retries; 237 238 /* 239 * Number of child requests still outstanding for this 240 * request which was split into multiple child requests. 241 */ 242 uint8_t child_reqs; 243 uint32_t payload_size; 244 245 /* 246 * Offset in bytes from the beginning of payload for this request. 247 * This is used for I/O commands that are split into multiple requests. 248 */ 249 uint32_t payload_offset; 250 uint32_t md_offset; 251 252 nvme_cmd_cb cb_fn; 253 void *cb_arg; 254 255 /* 256 * The following members should not be reordered with members 257 * above. These members are only needed when splitting 258 * requests which is done rarely, and the driver is careful 259 * to not touch the following fields until a split operation is 260 * needed, to avoid touching an extra cacheline. 261 */ 262 263 /* 264 * Points to the outstanding child requests for a parent request. 265 * Only valid if a request was split into multiple child 266 * requests, and is not initialized for non-split requests. 267 */ 268 TAILQ_HEAD(, nvme_request) children; 269 270 /* 271 * Linked-list pointers for a child request in its parent's list. 272 */ 273 TAILQ_ENTRY(nvme_request) child_tailq; 274 275 /* 276 * For queueing in qpair queued_req or free_req. 277 */ 278 struct nvme_qpair *qpair; 279 STAILQ_ENTRY(nvme_request) stailq; 280 281 /* 282 * Points to a parent request if part of a split request, 283 * NULL otherwise. 284 */ 285 struct nvme_request *parent; 286 287 /* 288 * Completion status for a parent request. Initialized to all 0's 289 * (SUCCESS) before child requests are submitted. If a child 290 * request completes with error, the error status is copied here, 291 * to ensure that the parent request is also completed with error 292 * status once all child requests are completed. 293 */ 294 struct nvme_cpl parent_status; 295 296 } __attribute__((aligned(64))); 297 298 struct nvme_completion_poll_status { 299 struct nvme_cpl cpl; 300 bool done; 301 }; 302 303 struct nvme_async_event_request { 304 struct nvme_ctrlr *ctrlr; 305 struct nvme_request *req; 306 struct nvme_cpl cpl; 307 }; 308 309 struct nvme_tracker { 310 311 LIST_ENTRY(nvme_tracker) list; 312 313 struct nvme_request *req; 314 uint16_t cid; 315 316 uint16_t rsvd1: 15; 317 uint16_t active: 1; 318 319 uint32_t rsvd2; 320 321 uint64_t prp_sgl_bus_addr; 322 323 union { 324 uint64_t prp[NVME_MAX_PRP_LIST_ENTRIES]; 325 struct nvme_sgl_descriptor sgl[NVME_MAX_SGL_DESCRIPTORS]; 326 } u; 327 328 uint64_t rsvd3; 329 }; 330 331 /* 332 * struct nvme_tracker must be exactly 4K so that the prp[] array does not 333 * cross a page boundery and so that there is no padding required to meet 334 * alignment requirements. 335 */ 336 nvme_static_assert(sizeof(struct nvme_tracker) == 4096, 337 "nvme_tracker is not 4K"); 338 nvme_static_assert((offsetof(struct nvme_tracker, u.sgl) & 7) == 0, 339 "SGL must be Qword aligned"); 340 341 struct nvme_qpair { 342 343 volatile uint32_t *sq_tdbl; 344 volatile uint32_t *cq_hdbl; 345 346 /* 347 * Submission queue 348 */ 349 struct nvme_cmd *cmd; 350 351 /* 352 * Completion queue 353 */ 354 struct nvme_cpl *cpl; 355 356 LIST_HEAD(, nvme_tracker) free_tr; 357 LIST_HEAD(, nvme_tracker) outstanding_tr; 358 359 /* 360 * Array of trackers indexed by command ID. 361 */ 362 uint16_t trackers; 363 struct nvme_tracker *tr; 364 365 struct nvme_request *reqs; 366 unsigned int num_reqs; 367 STAILQ_HEAD(, nvme_request) free_req; 368 STAILQ_HEAD(, nvme_request) queued_req; 369 370 uint16_t id; 371 372 uint16_t entries; 373 uint16_t sq_tail; 374 uint16_t cq_head; 375 376 uint8_t phase; 377 378 bool enabled; 379 bool sq_in_cmb; 380 381 /* 382 * Fields below this point should not be touched on the 383 * normal I/O happy path. 384 */ 385 386 uint8_t qprio; 387 388 struct nvme_ctrlr *ctrlr; 389 390 /* List entry for nvme_ctrlr::free_io_qpairs and active_io_qpairs */ 391 TAILQ_ENTRY(nvme_qpair) tailq; 392 393 phys_addr_t cmd_bus_addr; 394 phys_addr_t cpl_bus_addr; 395 }; 396 397 struct nvme_ns { 398 399 struct nvme_ctrlr *ctrlr; 400 401 uint32_t stripe_size; 402 uint32_t sector_size; 403 404 uint32_t md_size; 405 uint32_t pi_type; 406 407 uint32_t sectors_per_max_io; 408 uint32_t sectors_per_stripe; 409 410 uint16_t id; 411 uint16_t flags; 412 413 int open_count; 414 415 }; 416 417 /* 418 * State of struct nvme_ctrlr (in particular, during initialization). 419 */ 420 enum nvme_ctrlr_state { 421 422 /* 423 * Controller has not been initialized yet. 424 */ 425 NVME_CTRLR_STATE_INIT = 0, 426 427 /* 428 * Waiting for CSTS.RDY to transition from 0 to 1 429 * so that CC.EN may be set to 0. 430 */ 431 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, 432 433 /* 434 * Waiting for CSTS.RDY to transition from 1 to 0 435 * so that CC.EN may be set to 1. 436 */ 437 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 438 439 /* 440 * Waiting for CSTS.RDY to transition from 0 to 1 441 * after enabling the controller. 442 */ 443 NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 444 445 /* 446 * Controller initialization has completed and 447 * the controller is ready. 448 */ 449 NVME_CTRLR_STATE_READY 450 }; 451 452 /* 453 * One of these per allocated PCI device. 454 */ 455 struct nvme_ctrlr { 456 457 /* 458 * NVMe MMIO register space. 459 */ 460 volatile struct nvme_registers *regs; 461 462 /* 463 * Array of I/O queue pairs. 464 */ 465 struct nvme_qpair *ioq; 466 467 /* 468 * Size of the array of I/O queue pairs. 469 */ 470 unsigned int io_queues; 471 472 /* 473 * Maximum I/O queue pairs. 474 */ 475 unsigned int max_io_queues; 476 477 /* 478 * Number of I/O queue pairs enabled 479 */ 480 unsigned int enabled_io_qpairs; 481 482 /* 483 * Maximum entries for I/O qpairs 484 */ 485 unsigned int io_qpairs_max_entries; 486 487 /* 488 * Array of namespace IDs. 489 */ 490 unsigned int nr_ns; 491 struct nvme_ns *ns; 492 493 /* 494 * Controller state. 495 */ 496 bool resetting; 497 bool failed; 498 499 /* 500 * Controller support flags. 501 */ 502 uint64_t flags; 503 504 /* 505 * Cold data (not accessed in normal I/O path) is after this point. 506 */ 507 enum nvme_ctrlr_state state; 508 uint64_t state_timeout_ms; 509 510 /* 511 * All the log pages supported. 512 */ 513 bool log_page_supported[256]; 514 515 /* 516 * All the features supported. 517 */ 518 bool feature_supported[256]; 519 520 /* 521 * Associated PCI device information. 522 */ 523 struct pci_device *pci_dev; 524 525 /* 526 * Maximum i/o size in bytes. 527 */ 528 uint32_t max_xfer_size; 529 530 /* 531 * Minimum page size supported by this controller in bytes. 532 */ 533 uint32_t min_page_size; 534 535 /* 536 * Stride in uint32_t units between doorbell registers 537 * (1 = 4 bytes, 2 = 8 bytes, ...). 538 */ 539 uint32_t doorbell_stride_u32; 540 541 uint32_t num_aers; 542 struct nvme_async_event_request aer[NVME_MAX_ASYNC_EVENTS]; 543 nvme_aer_cb aer_cb_fn; 544 void *aer_cb_arg; 545 546 /* 547 * Guards access to the controller itself, including admin queues. 548 */ 549 pthread_mutex_t lock; 550 551 552 /* 553 * Admin queue pair. 554 */ 555 struct nvme_qpair adminq; 556 557 /* 558 * Identify Controller data. 559 */ 560 struct nvme_ctrlr_data cdata; 561 562 /* 563 * Array of Identify Namespace data. 564 * Stored separately from ns since nsdata should 565 * not normally be accessed during I/O. 566 */ 567 struct nvme_ns_data *nsdata; 568 569 TAILQ_HEAD(, nvme_qpair) free_io_qpairs; 570 TAILQ_HEAD(, nvme_qpair) active_io_qpairs; 571 572 /* 573 * Controller option set on open. 574 */ 575 struct nvme_ctrlr_opts opts; 576 577 /* 578 * BAR mapping address which contains controller memory buffer. 579 */ 580 void *cmb_bar_virt_addr; 581 582 /* 583 * BAR physical address which contains controller memory buffer. 584 */ 585 uint64_t cmb_bar_phys_addr; 586 587 /* 588 * Controller memory buffer size in Bytes. 589 */ 590 uint64_t cmb_size; 591 592 /* 593 * Current offset of controller memory buffer. 594 */ 595 uint64_t cmb_current_offset; 596 597 /* 598 * Quirks flags. 599 */ 600 unsigned int quirks; 601 602 /* 603 * For controller list. 604 */ 605 LIST_ENTRY(nvme_ctrlr) link; 606 607 } __attribute__((aligned(PAGE_SIZE))); 608 609 /* 610 * Admin functions. 611 */ 612 extern int nvme_admin_identify_ctrlr(struct nvme_ctrlr *ctrlr, 613 struct nvme_ctrlr_data *cdata); 614 615 extern int nvme_admin_get_feature(struct nvme_ctrlr *ctrlr, 616 enum nvme_feat_sel sel, 617 enum nvme_feat feature, 618 uint32_t cdw11, uint32_t *attributes); 619 620 extern int nvme_admin_set_feature(struct nvme_ctrlr *ctrlr, 621 bool save, 622 enum nvme_feat feature, 623 uint32_t cdw11, uint32_t cdw12, 624 uint32_t *attributes); 625 626 extern int nvme_admin_format_nvm(struct nvme_ctrlr *ctrlr, 627 unsigned int nsid, 628 struct nvme_format *format); 629 630 extern int nvme_admin_get_log_page(struct nvme_ctrlr *ctrlr, 631 uint8_t log_page, uint32_t nsid, 632 void *payload, uint32_t payload_size); 633 634 extern int nvme_admin_abort_cmd(struct nvme_ctrlr *ctrlr, 635 uint16_t cid, uint16_t sqid); 636 637 extern int nvme_admin_create_ioq(struct nvme_ctrlr *ctrlr, 638 struct nvme_qpair *io_que, 639 enum nvme_io_queue_type io_qtype); 640 641 extern int nvme_admin_delete_ioq(struct nvme_ctrlr *ctrlr, 642 struct nvme_qpair *qpair, 643 enum nvme_io_queue_type io_qtype); 644 645 extern int nvme_admin_identify_ns(struct nvme_ctrlr *ctrlr, 646 uint16_t nsid, 647 struct nvme_ns_data *nsdata); 648 649 extern int nvme_admin_attach_ns(struct nvme_ctrlr *ctrlr, 650 uint32_t nsid, 651 struct nvme_ctrlr_list *clist); 652 653 extern int nvme_admin_detach_ns(struct nvme_ctrlr *ctrlr, 654 uint32_t nsid, 655 struct nvme_ctrlr_list *clist); 656 657 extern int nvme_admin_create_ns(struct nvme_ctrlr *ctrlr, 658 struct nvme_ns_data *nsdata, 659 unsigned int *nsid); 660 661 extern int nvme_admin_delete_ns(struct nvme_ctrlr *ctrlr, 662 unsigned int nsid); 663 664 extern int nvme_admin_fw_commit(struct nvme_ctrlr *ctrlr, 665 const struct nvme_fw_commit *fw_commit); 666 667 extern int nvme_admin_fw_image_dl(struct nvme_ctrlr *ctrlr, 668 void *fw, uint32_t size, uint32_t offset); 669 670 extern void nvme_request_completion_poll_cb(void *arg, 671 const struct nvme_cpl *cpl); 672 673 extern struct nvme_ctrlr *nvme_ctrlr_attach(struct pci_device *pci_dev, 674 struct nvme_ctrlr_opts *opts); 675 676 extern void nvme_ctrlr_detach(struct nvme_ctrlr *ctrlr); 677 678 extern int nvme_qpair_construct(struct nvme_ctrlr *ctrlr, 679 struct nvme_qpair *qpair, enum nvme_qprio qprio, 680 uint16_t entries, uint16_t trackers); 681 682 extern void nvme_qpair_destroy(struct nvme_qpair *qpair); 683 extern void nvme_qpair_enable(struct nvme_qpair *qpair); 684 extern void nvme_qpair_disable(struct nvme_qpair *qpair); 685 extern int nvme_qpair_submit_request(struct nvme_qpair *qpair, 686 struct nvme_request *req); 687 extern void nvme_qpair_reset(struct nvme_qpair *qpair); 688 extern void nvme_qpair_fail(struct nvme_qpair *qpair); 689 690 extern unsigned int nvme_qpair_poll(struct nvme_qpair *qpair, 691 unsigned int max_completions); 692 693 extern int nvme_request_pool_construct(struct nvme_qpair *qpair); 694 695 extern void nvme_request_pool_destroy(struct nvme_qpair *qpair); 696 697 extern struct nvme_request *nvme_request_allocate(struct nvme_qpair *qpair, 698 const struct nvme_payload *payload, uint32_t payload_size, 699 nvme_cmd_cb cb_fn, void *cb_arg); 700 701 extern struct nvme_request *nvme_request_allocate_null(struct nvme_qpair *qpair, 702 nvme_cmd_cb cb_fn, 703 void *cb_arg); 704 705 extern struct nvme_request * 706 nvme_request_allocate_contig(struct nvme_qpair *qpair, 707 void *buffer, uint32_t payload_size, 708 nvme_cmd_cb cb_fn, void *cb_arg); 709 710 extern void nvme_request_free(struct nvme_request *req); 711 712 extern void nvme_request_add_child(struct nvme_request *parent, 713 struct nvme_request *child); 714 715 extern void nvme_request_remove_child(struct nvme_request *parent, 716 struct nvme_request *child); 717 718 extern unsigned int nvme_ctrlr_get_quirks(struct pci_device *pdev); 719 720 extern int nvme_ns_construct(struct nvme_ctrlr *ctrlr, 721 struct nvme_ns *ns, unsigned int id); 722 723 /* 724 * Registers mmio access. 725 */ 726 #define nvme_reg_mmio_read_4(sc, reg) \ 727 nvme_mmio_read_4((__u32 *)&(sc)->regs->reg) 728 729 #define nvme_reg_mmio_read_8(sc, reg) \ 730 nvme_mmio_read_8((__u64 *)&(sc)->regs->reg) 731 732 #define nvme_reg_mmio_write_4(sc, reg, val) \ 733 nvme_mmio_write_4((__u32 *)&(sc)->regs->reg, val) 734 735 #define nvme_reg_mmio_write_8(sc, reg, val) \ 736 nvme_mmio_write_8((__u64 *)&(sc)->regs->reg, val) 737 738 #endif /* __NVME_INTERNAL_H__ */ 739