/*- * BSD LICENSE * * Copyright (c) Intel Corporation. All rights reserved. * Copyright (c) 2017, Western Digital Corporation or its affiliates. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __NVME_INTERNAL_H__ #define __NVME_INTERNAL_H__ #include "nvme_common.h" #include "nvme_pci.h" #include "nvme_intel.h" #include "nvme_mem.h" #ifndef __HAIKU__ #include #include /* PAGE_SIZE */ #else #include "nvme_platform.h" #endif /* * List functions. */ #define LIST_FOREACH_SAFE(var, head, field, tvar) \ for ((var) = LIST_FIRST((head)); \ (var) && ((tvar) = LIST_NEXT((var), field), 1); \ (var) = (tvar)) /* * Tail queue functions. */ #define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ for ((var) = TAILQ_FIRST((head)); \ (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ (var) = (tvar)) #define INTEL_DC_P3X00_DEVID 0x09538086 #define NVME_TIMEOUT_INFINITE UINT64_MAX /* * Some Intel devices support vendor-unique read latency log page even * though the log page directory says otherwise. */ #define NVME_INTEL_QUIRK_READ_LATENCY 0x1 /* * Some Intel devices support vendor-unique write latency log page even * though the log page directory says otherwise. */ #define NVME_INTEL_QUIRK_WRITE_LATENCY 0x2 /* * Some controllers need a delay before starting to check the device * readiness, which is done by reading the controller status register rdy bit. */ #define NVME_QUIRK_DELAY_BEFORE_CHK_RDY 0x4 /* * Some controllers need a delay once the controller status register rdy bit * switches from 0 to 1. */ #define NVME_QUIRK_DELAY_AFTER_RDY 0x8 /* * Queues may consist of a contiguous block of physical * memory or optionally a non-contiguous set of physical * memory pages (defined by a Physical Region Pages List) */ #define NVME_MAX_PRP_LIST_ENTRIES (506) /* * For commands requiring more than 2 PRP entries, one PRP will be * embedded in the command (prp1), and the rest of the PRP entries * will be in a list pointed to by the command (prp2). This means * that real max number of PRP entries we support is 506+1, which * results in a max xfer size of 506*PAGE_SIZE. */ #define NVME_MAX_XFER_SIZE NVME_MAX_PRP_LIST_ENTRIES * PAGE_SIZE #define NVME_ADMIN_TRACKERS (16) #define NVME_ADMIN_ENTRIES (128) /* * NVME_IO_ENTRIES defines the size of an I/O qpair's submission and completion * queues, while NVME_IO_TRACKERS defines the maximum number of I/O that we * will allow outstanding on an I/O qpair at any time. The only advantage in * having IO_ENTRIES > IO_TRACKERS is for debugging purposes - when dumping * the contents of the submission and completion queues, it will show a longer * history of data. */ #define NVME_IO_ENTRIES (1024U) #define NVME_IO_TRACKERS (128U) #define NVME_IO_ENTRIES_VS_TRACKERS_RATIO (NVME_IO_ENTRIES / NVME_IO_TRACKERS) /* * NVME_MAX_SGL_DESCRIPTORS defines the maximum number of descriptors in one SGL * segment. */ #define NVME_MAX_SGL_DESCRIPTORS (253) /* * NVME_MAX_IO_ENTRIES is not defined, since it is specified in CC.MQES * for each controller. */ #define NVME_MAX_ASYNC_EVENTS (8) /* * NVME_MAX_IO_QUEUES in nvme_spec.h defines the 64K spec-limit, but this * define specifies the maximum number of queues this driver will actually * try to configure, if available. */ #define DEFAULT_MAX_IO_QUEUES (1024) /* * Maximum of times a failed command can be retried. */ #define NVME_MAX_RETRY_COUNT (3) /* * I/O queue type. */ enum nvme_io_queue_type { NVME_IO_QTYPE_INVALID = 0, NVME_IO_SUBMISSION_QUEUE, NVME_IO_COMPLETION_QUEUE, }; enum nvme_payload_type { NVME_PAYLOAD_TYPE_INVALID = 0, /* * nvme_request::u.payload.contig_buffer is valid for this request. */ NVME_PAYLOAD_TYPE_CONTIG, /* * nvme_request::u.sgl is valid for this request */ NVME_PAYLOAD_TYPE_SGL, }; /* * Controller support flags. */ enum nvme_ctrlr_flags { /* * The SGL is supported. */ NVME_CTRLR_SGL_SUPPORTED = 0x1, }; /* * Descriptor for a request data payload. * * This struct is arranged so that it fits nicely in struct nvme_request. */ struct __attribute__((packed)) nvme_payload { union { /* * Virtual memory address of a single * physically contiguous buffer */ void *contig; /* * Call back functions for retrieving physical * addresses for scattered payloads. */ struct { nvme_req_reset_sgl_cb reset_sgl_fn; nvme_req_next_sge_cb next_sge_fn; void *cb_arg; } sgl; } u; /* * Virtual memory address of a single physically * contiguous metadata buffer */ void *md; /* * Payload type. */ uint8_t type; }; struct nvme_request { /* * NVMe command: must be aligned on 64B. */ struct nvme_cmd cmd; /* * Data payload for this request's command. */ struct nvme_payload payload; uint8_t retries; /* * Number of child requests still outstanding for this * request which was split into multiple child requests. */ uint8_t child_reqs; uint32_t payload_size; /* * Offset in bytes from the beginning of payload for this request. * This is used for I/O commands that are split into multiple requests. */ uint32_t payload_offset; uint32_t md_offset; nvme_cmd_cb cb_fn; void *cb_arg; /* * The following members should not be reordered with members * above. These members are only needed when splitting * requests which is done rarely, and the driver is careful * to not touch the following fields until a split operation is * needed, to avoid touching an extra cacheline. */ /* * Points to the outstanding child requests for a parent request. * Only valid if a request was split into multiple child * requests, and is not initialized for non-split requests. */ TAILQ_HEAD(, nvme_request) children; /* * Linked-list pointers for a child request in its parent's list. */ TAILQ_ENTRY(nvme_request) child_tailq; /* * For queueing in qpair queued_req or free_req. */ struct nvme_qpair *qpair; STAILQ_ENTRY(nvme_request) stailq; /* * Points to a parent request if part of a split request, * NULL otherwise. */ struct nvme_request *parent; /* * Completion status for a parent request. Initialized to all 0's * (SUCCESS) before child requests are submitted. If a child * request completes with error, the error status is copied here, * to ensure that the parent request is also completed with error * status once all child requests are completed. */ struct nvme_cpl parent_status; } __attribute__((aligned(64))); struct nvme_completion_poll_status { struct nvme_cpl cpl; bool done; }; struct nvme_async_event_request { struct nvme_ctrlr *ctrlr; struct nvme_request *req; struct nvme_cpl cpl; }; struct nvme_tracker { LIST_ENTRY(nvme_tracker) list; struct nvme_request *req; #if INTPTR_MAX == INT32_MAX int32_t __pad[3]; #elif !defined(INTPTR_MAX) # error Need definition of INTPTR_MAX! #endif uint16_t cid; uint16_t rsvd1: 15; uint16_t active: 1; uint32_t rsvd2; uint64_t prp_sgl_bus_addr; union { uint64_t prp[NVME_MAX_PRP_LIST_ENTRIES]; struct nvme_sgl_descriptor sgl[NVME_MAX_SGL_DESCRIPTORS]; } u; uint64_t rsvd3; }; /* * struct nvme_tracker must be exactly 4K so that the prp[] array does not * cross a page boundery and so that there is no padding required to meet * alignment requirements. */ nvme_static_assert(sizeof(struct nvme_tracker) == 4096, "nvme_tracker is not 4K"); nvme_static_assert((offsetof(struct nvme_tracker, u.sgl) & 7) == 0, "SGL must be Qword aligned"); struct nvme_qpair { /* * Guards access to this structure. */ pthread_mutex_t lock; volatile uint32_t *sq_tdbl; volatile uint32_t *cq_hdbl; /* * Submission queue */ struct nvme_cmd *cmd; /* * Completion queue */ struct nvme_cpl *cpl; LIST_HEAD(, nvme_tracker) free_tr; LIST_HEAD(, nvme_tracker) outstanding_tr; /* * Array of trackers indexed by command ID. */ uint16_t trackers; struct nvme_tracker *tr; struct nvme_request *reqs; unsigned int num_reqs; STAILQ_HEAD(, nvme_request) free_req; STAILQ_HEAD(, nvme_request) queued_req; uint16_t id; uint16_t entries; uint16_t sq_tail; uint16_t cq_head; uint8_t phase; bool enabled; bool sq_in_cmb; /* * Fields below this point should not be touched on the * normal I/O happy path. */ uint8_t qprio; struct nvme_ctrlr *ctrlr; /* List entry for nvme_ctrlr::free_io_qpairs and active_io_qpairs */ TAILQ_ENTRY(nvme_qpair) tailq; phys_addr_t cmd_bus_addr; phys_addr_t cpl_bus_addr; }; struct nvme_ns { struct nvme_ctrlr *ctrlr; uint32_t stripe_size; uint32_t sector_size; uint32_t md_size; uint32_t pi_type; uint32_t sectors_per_max_io; uint32_t sectors_per_stripe; uint16_t id; uint16_t flags; int open_count; }; /* * State of struct nvme_ctrlr (in particular, during initialization). */ enum nvme_ctrlr_state { /* * Controller has not been initialized yet. */ NVME_CTRLR_STATE_INIT = 0, /* * Waiting for CSTS.RDY to transition from 0 to 1 * so that CC.EN may be set to 0. */ NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, /* * Waiting for CSTS.RDY to transition from 1 to 0 * so that CC.EN may be set to 1. */ NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, /* * Waiting for CSTS.RDY to transition from 0 to 1 * after enabling the controller. */ NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, /* * Controller initialization has completed and * the controller is ready. */ NVME_CTRLR_STATE_READY }; /* * One of these per allocated PCI device. */ struct nvme_ctrlr { /* * NVMe MMIO register space. */ volatile struct nvme_registers *regs; /* * Array of I/O queue pairs. */ struct nvme_qpair *ioq; /* * Size of the array of I/O queue pairs. */ unsigned int io_queues; /* * Maximum I/O queue pairs. */ unsigned int max_io_queues; /* * Number of I/O queue pairs enabled */ unsigned int enabled_io_qpairs; /* * Maximum entries for I/O qpairs */ unsigned int io_qpairs_max_entries; /* * Array of namespace IDs. */ unsigned int nr_ns; struct nvme_ns *ns; /* * Controller state. */ bool resetting; bool failed; /* * Controller support flags. */ uint64_t flags; /* * Cold data (not accessed in normal I/O path) is after this point. */ enum nvme_ctrlr_state state; uint64_t state_timeout_ms; /* * All the log pages supported. */ bool log_page_supported[256]; /* * All the features supported. */ bool feature_supported[256]; /* * Associated PCI device information. */ struct pci_device *pci_dev; /* * Maximum i/o size in bytes. */ uint32_t max_xfer_size; /* * Minimum page size supported by this controller in bytes. */ uint32_t min_page_size; /* * Stride in uint32_t units between doorbell registers * (1 = 4 bytes, 2 = 8 bytes, ...). */ uint32_t doorbell_stride_u32; uint32_t num_aers; struct nvme_async_event_request aer[NVME_MAX_ASYNC_EVENTS]; nvme_aer_cb aer_cb_fn; void *aer_cb_arg; /* * Admin queue pair. */ struct nvme_qpair adminq; /* * Guards access to the controller itself. */ pthread_mutex_t lock; /* * Identify Controller data. */ struct nvme_ctrlr_data cdata; /* * Array of Identify Namespace data. * Stored separately from ns since nsdata should * not normally be accessed during I/O. */ struct nvme_ns_data *nsdata; TAILQ_HEAD(, nvme_qpair) free_io_qpairs; TAILQ_HEAD(, nvme_qpair) active_io_qpairs; /* * Controller option set on open. */ struct nvme_ctrlr_opts opts; /* * BAR mapping address which contains controller memory buffer. */ void *cmb_bar_virt_addr; /* * BAR physical address which contains controller memory buffer. */ uint64_t cmb_bar_phys_addr; /* * Controller memory buffer size in Bytes. */ uint64_t cmb_size; /* * Current offset of controller memory buffer. */ uint64_t cmb_current_offset; /* * Quirks flags. */ unsigned int quirks; /* * For controller list. */ LIST_ENTRY(nvme_ctrlr) link; } __attribute__((aligned(PAGE_SIZE))); /* * Admin functions. */ extern int nvme_admin_identify_ctrlr(struct nvme_ctrlr *ctrlr, struct nvme_ctrlr_data *cdata); extern int nvme_admin_get_feature(struct nvme_ctrlr *ctrlr, enum nvme_feat_sel sel, enum nvme_feat feature, uint32_t cdw11, uint32_t *attributes); extern int nvme_admin_set_feature(struct nvme_ctrlr *ctrlr, bool save, enum nvme_feat feature, uint32_t cdw11, uint32_t cdw12, uint32_t *attributes); extern int nvme_admin_format_nvm(struct nvme_ctrlr *ctrlr, unsigned int nsid, struct nvme_format *format); extern int nvme_admin_get_log_page(struct nvme_ctrlr *ctrlr, uint8_t log_page, uint32_t nsid, void *payload, uint32_t payload_size); extern int nvme_admin_abort_cmd(struct nvme_ctrlr *ctrlr, uint16_t cid, uint16_t sqid); extern int nvme_admin_create_ioq(struct nvme_ctrlr *ctrlr, struct nvme_qpair *io_que, enum nvme_io_queue_type io_qtype); extern int nvme_admin_delete_ioq(struct nvme_ctrlr *ctrlr, struct nvme_qpair *qpair, enum nvme_io_queue_type io_qtype); extern int nvme_admin_identify_ns(struct nvme_ctrlr *ctrlr, uint16_t nsid, struct nvme_ns_data *nsdata); extern int nvme_admin_attach_ns(struct nvme_ctrlr *ctrlr, uint32_t nsid, struct nvme_ctrlr_list *clist); extern int nvme_admin_detach_ns(struct nvme_ctrlr *ctrlr, uint32_t nsid, struct nvme_ctrlr_list *clist); extern int nvme_admin_create_ns(struct nvme_ctrlr *ctrlr, struct nvme_ns_data *nsdata, unsigned int *nsid); extern int nvme_admin_delete_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid); extern int nvme_admin_fw_commit(struct nvme_ctrlr *ctrlr, const struct nvme_fw_commit *fw_commit); extern int nvme_admin_fw_image_dl(struct nvme_ctrlr *ctrlr, void *fw, uint32_t size, uint32_t offset); extern void nvme_request_completion_poll_cb(void *arg, const struct nvme_cpl *cpl); extern struct nvme_ctrlr *nvme_ctrlr_attach(struct pci_device *pci_dev, struct nvme_ctrlr_opts *opts); extern void nvme_ctrlr_detach(struct nvme_ctrlr *ctrlr); extern int nvme_qpair_construct(struct nvme_ctrlr *ctrlr, struct nvme_qpair *qpair, enum nvme_qprio qprio, uint16_t entries, uint16_t trackers); extern void nvme_qpair_destroy(struct nvme_qpair *qpair); extern void nvme_qpair_enable(struct nvme_qpair *qpair); extern void nvme_qpair_disable(struct nvme_qpair *qpair); extern int nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req); extern void nvme_qpair_reset(struct nvme_qpair *qpair); extern void nvme_qpair_fail(struct nvme_qpair *qpair); extern int nvme_request_pool_construct(struct nvme_qpair *qpair); extern void nvme_request_pool_destroy(struct nvme_qpair *qpair); extern struct nvme_request *nvme_request_allocate(struct nvme_qpair *qpair, const struct nvme_payload *payload, uint32_t payload_size, nvme_cmd_cb cb_fn, void *cb_arg); extern struct nvme_request *nvme_request_allocate_null(struct nvme_qpair *qpair, nvme_cmd_cb cb_fn, void *cb_arg); extern struct nvme_request * nvme_request_allocate_contig(struct nvme_qpair *qpair, void *buffer, uint32_t payload_size, nvme_cmd_cb cb_fn, void *cb_arg); extern void nvme_request_free(struct nvme_request *req); extern void nvme_request_free_locked(struct nvme_request *req); extern void nvme_request_add_child(struct nvme_request *parent, struct nvme_request *child); extern void nvme_request_remove_child(struct nvme_request *parent, struct nvme_request *child); extern unsigned int nvme_ctrlr_get_quirks(struct pci_device *pdev); extern int nvme_ns_construct(struct nvme_ctrlr *ctrlr, struct nvme_ns *ns, unsigned int id); /* * Registers mmio access. */ #define nvme_reg_mmio_read_4(sc, reg) \ nvme_mmio_read_4((__u32 *)&(sc)->regs->reg) #define nvme_reg_mmio_read_8(sc, reg) \ nvme_mmio_read_8((__u64 *)&(sc)->regs->reg) #define nvme_reg_mmio_write_4(sc, reg, val) \ nvme_mmio_write_4((__u32 *)&(sc)->regs->reg, val) #define nvme_reg_mmio_write_8(sc, reg, val) \ nvme_mmio_write_8((__u64 *)&(sc)->regs->reg, val) #endif /* __NVME_INTERNAL_H__ */