1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation. All rights reserved.
5 * Copyright (c) 2017, Western Digital Corporation or its affiliates.
6 *
7 * Redistribution and use in sourete and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of sourete code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include "nvme_internal.h"
35
36 /*
37 * Host software shall wait a minimum of CAP.TO x 500 milleseconds for CSTS.RDY
38 * to be set to '1' after setting CC.EN to '1' from a previous value of '0'.
39 */
40 static inline unsigned int
nvme_ctrlr_get_ready_to_in_ms(struct nvme_ctrlr * ctrlr)41 nvme_ctrlr_get_ready_to_in_ms(struct nvme_ctrlr *ctrlr)
42 {
43 union nvme_cap_register cap;
44
45 /* The TO unit in ms */
46 #define NVME_READY_TIMEOUT_UNIT 500
47
48 cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw);
49
50 return (NVME_READY_TIMEOUT_UNIT * cap.bits.to);
51 }
52
53 /*
54 * Create a queue pair.
55 */
nvme_ctrlr_create_qpair(struct nvme_ctrlr * ctrlr,struct nvme_qpair * qpair)56 static int nvme_ctrlr_create_qpair(struct nvme_ctrlr *ctrlr,
57 struct nvme_qpair *qpair)
58 {
59 int ret;
60
61 /* Create the completion queue */
62 ret = nvme_admin_create_ioq(ctrlr, qpair, NVME_IO_COMPLETION_QUEUE);
63 if (ret != 0) {
64 nvme_notice("Create completion queue %u failed\n",
65 qpair->id);
66 return ret;
67 }
68
69 /* Create the submission queue */
70 ret = nvme_admin_create_ioq(ctrlr, qpair, NVME_IO_SUBMISSION_QUEUE);
71 if (ret != 0) {
72 /* Attempt to delete the completion queue */
73 nvme_notice("Create submission queue %u failed\n",
74 qpair->id);
75 nvme_admin_delete_ioq(ctrlr, qpair, NVME_IO_COMPLETION_QUEUE);
76 return ret;
77 }
78
79 nvme_qpair_reset(qpair);
80
81 return 0;
82 }
83
84 /*
85 * Delete a queue pair.
86 */
nvme_ctrlr_delete_qpair(struct nvme_ctrlr * ctrlr,struct nvme_qpair * qpair)87 static int nvme_ctrlr_delete_qpair(struct nvme_ctrlr *ctrlr,
88 struct nvme_qpair *qpair)
89 {
90 int ret;
91
92 /* Delete the submission queue */
93 ret = nvme_admin_delete_ioq(ctrlr, qpair, NVME_IO_SUBMISSION_QUEUE);
94 if (ret != 0) {
95 nvme_notice("Delete submission queue %u failed\n",
96 qpair->id);
97 return ret;
98 }
99
100 /* Delete the completion queue */
101 ret = nvme_admin_delete_ioq(ctrlr, qpair, NVME_IO_COMPLETION_QUEUE);
102 if (ret != 0) {
103 nvme_notice("Delete completion queue %u failed\n",
104 qpair->id);
105 return ret;
106 }
107
108 return 0;
109 }
110
111 /*
112 * Intel log page.
113 */
114 static void
nvme_ctrlr_construct_intel_support_log_page_list(struct nvme_ctrlr * ctrlr,struct nvme_intel_log_page_dir * log_page_dir)115 nvme_ctrlr_construct_intel_support_log_page_list(struct nvme_ctrlr *ctrlr,
116 struct nvme_intel_log_page_dir *log_page_dir)
117 {
118
119 if (ctrlr->cdata.vid != NVME_PCI_VID_INTEL ||
120 log_page_dir == NULL)
121 return;
122
123 ctrlr->log_page_supported[NVME_INTEL_LOG_PAGE_DIR] = true;
124
125 if (log_page_dir->read_latency_log_len ||
126 (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY))
127 ctrlr->log_page_supported[NVME_INTEL_LOG_READ_CMD_LATENCY] = true;
128
129 if (log_page_dir->write_latency_log_len ||
130 (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY))
131 ctrlr->log_page_supported[NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true;
132
133 if (log_page_dir->temperature_statistics_log_len)
134 ctrlr->log_page_supported[NVME_INTEL_LOG_TEMPERATURE] = true;
135
136 if (log_page_dir->smart_log_len)
137 ctrlr->log_page_supported[NVME_INTEL_LOG_SMART] = true;
138
139 if (log_page_dir->marketing_description_log_len)
140 ctrlr->log_page_supported[NVME_INTEL_MARKETING_DESCRIPTION] = true;
141 }
142
143 /*
144 * Intel log page.
145 */
nvme_ctrlr_set_intel_support_log_pages(struct nvme_ctrlr * ctrlr)146 static int nvme_ctrlr_set_intel_support_log_pages(struct nvme_ctrlr *ctrlr)
147 {
148 struct nvme_intel_log_page_dir *log_page_dir;
149 int ret;
150
151 log_page_dir = nvme_zmalloc(sizeof(struct nvme_intel_log_page_dir), 64);
152 if (!log_page_dir) {
153 nvme_err("Allocate log_page_directory failed\n");
154 return ENOMEM;
155 }
156
157 ret = nvme_admin_get_log_page(ctrlr, NVME_INTEL_LOG_PAGE_DIR,
158 NVME_GLOBAL_NS_TAG,
159 log_page_dir,
160 sizeof(struct nvme_intel_log_page_dir));
161 if (ret != 0)
162 nvme_notice("Get NVME_INTEL_LOG_PAGE_DIR log page failed\n");
163 else
164 nvme_ctrlr_construct_intel_support_log_page_list(ctrlr,
165 log_page_dir);
166
167 nvme_free(log_page_dir);
168
169 return ret;
170 }
171
172 /*
173 * Initialize log page support directory.
174 */
nvme_ctrlr_set_supported_log_pages(struct nvme_ctrlr * ctrlr)175 static void nvme_ctrlr_set_supported_log_pages(struct nvme_ctrlr *ctrlr)
176 {
177
178 memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported));
179
180 /* Mandatory pages */
181 ctrlr->log_page_supported[NVME_LOG_ERROR] = true;
182 ctrlr->log_page_supported[NVME_LOG_HEALTH_INFORMATION] = true;
183 ctrlr->log_page_supported[NVME_LOG_FIRMWARE_SLOT] = true;
184
185 if (ctrlr->cdata.lpa.celp)
186 ctrlr->log_page_supported[NVME_LOG_COMMAND_EFFECTS_LOG] = true;
187
188 if (ctrlr->cdata.vid == NVME_PCI_VID_INTEL)
189 nvme_ctrlr_set_intel_support_log_pages(ctrlr);
190 }
191
192 /*
193 * Set Intel device features.
194 */
nvme_ctrlr_set_intel_supported_features(struct nvme_ctrlr * ctrlr)195 static void nvme_ctrlr_set_intel_supported_features(struct nvme_ctrlr *ctrlr)
196 {
197 bool *supported_feature = ctrlr->feature_supported;
198
199 supported_feature[NVME_INTEL_FEAT_MAX_LBA] = true;
200 supported_feature[NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true;
201 supported_feature[NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true;
202 supported_feature[NVME_INTEL_FEAT_SMBUS_ADDRESS] = true;
203 supported_feature[NVME_INTEL_FEAT_LED_PATTERN] = true;
204 supported_feature[NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true;
205 supported_feature[NVME_INTEL_FEAT_LATENCY_TRACKING] = true;
206 }
207
208 /*
209 * Set device features.
210 */
nvme_ctrlr_set_supported_features(struct nvme_ctrlr * ctrlr)211 static void nvme_ctrlr_set_supported_features(struct nvme_ctrlr *ctrlr)
212 {
213 bool *supported_feature = ctrlr->feature_supported;
214
215 memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported));
216
217 /* Mandatory features */
218 supported_feature[NVME_FEAT_ARBITRATION] = true;
219 supported_feature[NVME_FEAT_POWER_MANAGEMENT] = true;
220 supported_feature[NVME_FEAT_TEMPERATURE_THRESHOLD] = true;
221 supported_feature[NVME_FEAT_ERROR_RECOVERY] = true;
222 supported_feature[NVME_FEAT_NUMBER_OF_QUEUES] = true;
223 supported_feature[NVME_FEAT_INTERRUPT_COALESCING] = true;
224 supported_feature[NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true;
225 supported_feature[NVME_FEAT_WRITE_ATOMICITY] = true;
226 supported_feature[NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true;
227
228 /* Optional features */
229 if (ctrlr->cdata.vwc.present)
230 supported_feature[NVME_FEAT_VOLATILE_WRITE_CACHE] = true;
231 if (ctrlr->cdata.apsta.supported)
232 supported_feature[NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION]
233 = true;
234 if (ctrlr->cdata.hmpre)
235 supported_feature[NVME_FEAT_HOST_MEM_BUFFER] = true;
236 if (ctrlr->cdata.vid == NVME_PCI_VID_INTEL)
237 nvme_ctrlr_set_intel_supported_features(ctrlr);
238 }
239
240 /*
241 * Initialize I/O queue pairs.
242 */
nvme_ctrlr_init_io_qpairs(struct nvme_ctrlr * ctrlr)243 static int nvme_ctrlr_init_io_qpairs(struct nvme_ctrlr *ctrlr)
244 {
245 struct nvme_qpair *qpair;
246 union nvme_cap_register cap;
247 uint32_t i;
248
249 if (ctrlr->ioq != NULL)
250 /*
251 * io_qpairs were already constructed, so just return.
252 * This typically happens when the controller is
253 * initialized a second (or subsequent) time after a
254 * controller reset.
255 */
256 return 0;
257
258 /*
259 * NVMe spec sets a hard limit of 64K max entries, but
260 * devices may specify a smaller limit, so we need to check
261 * the MQES field in the capabilities register.
262 */
263 cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw);
264 ctrlr->io_qpairs_max_entries =
265 nvme_min(NVME_IO_ENTRIES, (unsigned int)cap.bits.mqes + 1);
266
267 ctrlr->ioq = calloc(ctrlr->io_queues, sizeof(struct nvme_qpair));
268 if (!ctrlr->ioq)
269 return ENOMEM;
270
271 /* Keep queue pair ID 0 for the admin queue */
272 for (i = 0; i < ctrlr->io_queues; i++) {
273 qpair = &ctrlr->ioq[i];
274 qpair->id = i + 1;
275 TAILQ_INSERT_TAIL(&ctrlr->free_io_qpairs, qpair, tailq);
276 }
277
278 return 0;
279 }
280
281 /*
282 * Shutdown a controller.
283 */
nvme_ctrlr_shutdown(struct nvme_ctrlr * ctrlr)284 static void nvme_ctrlr_shutdown(struct nvme_ctrlr *ctrlr)
285 {
286 union nvme_cc_register cc;
287 union nvme_csts_register csts;
288 int ms_waited = 0;
289
290 cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw);
291 cc.bits.shn = NVME_SHN_NORMAL;
292 nvme_reg_mmio_write_4(ctrlr, cc.raw, cc.raw);
293
294 csts.raw = nvme_reg_mmio_read_4(ctrlr, csts.raw);
295 /*
296 * The NVMe spec does not define a timeout period for shutdown
297 * notification, so we just pick 5 seconds as a reasonable amount
298 * of time to wait before proceeding.
299 */
300 #define NVME_CTRLR_SHUTDOWN_TIMEOUT 5000
301 while (csts.bits.shst != NVME_SHST_COMPLETE) {
302 nvme_usleep(1000);
303 csts.raw = nvme_reg_mmio_read_4(ctrlr, csts.raw);
304 if (ms_waited++ >= NVME_CTRLR_SHUTDOWN_TIMEOUT)
305 break;
306 }
307
308 if (csts.bits.shst != NVME_SHST_COMPLETE)
309 nvme_err("Controller did not shutdown within %d seconds\n",
310 NVME_CTRLR_SHUTDOWN_TIMEOUT / 1000);
311 }
312
313 /*
314 * Enable a controller.
315 */
nvme_ctrlr_enable(struct nvme_ctrlr * ctrlr)316 static int nvme_ctrlr_enable(struct nvme_ctrlr *ctrlr)
317 {
318 union nvme_cc_register cc;
319 union nvme_aqa_register aqa;
320 union nvme_cap_register cap;
321
322 cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw);
323
324 if (cc.bits.en != 0) {
325 nvme_err("COntroller enable called with CC.EN = 1\n");
326 return EINVAL;
327 }
328
329 nvme_reg_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr);
330 nvme_reg_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr);
331
332 aqa.raw = 0;
333 /* acqs and asqs are 0-based. */
334 aqa.bits.acqs = ctrlr->adminq.entries - 1;
335 aqa.bits.asqs = ctrlr->adminq.entries - 1;
336 nvme_reg_mmio_write_4(ctrlr, aqa.raw, aqa.raw);
337
338 cc.bits.en = 1;
339 cc.bits.css = 0;
340 cc.bits.shn = 0;
341 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
342 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
343
344 /* Page size is 2 ^ (12 + mps). */
345 cc.bits.mps = PAGE_SHIFT - 12;
346
347 cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw);
348
349 switch (ctrlr->opts.arb_mechanism) {
350 case NVME_CC_AMS_RR:
351 break;
352 case NVME_CC_AMS_WRR:
353 if (NVME_CAP_AMS_WRR & cap.bits.ams)
354 break;
355 return EINVAL;
356 case NVME_CC_AMS_VS:
357 if (NVME_CAP_AMS_VS & cap.bits.ams)
358 break;
359 return EINVAL;
360 default:
361 return EINVAL;
362 }
363
364 cc.bits.ams = ctrlr->opts.arb_mechanism;
365
366 nvme_reg_mmio_write_4(ctrlr, cc.raw, cc.raw);
367
368 return 0;
369 }
370
371 /*
372 * Disable a controller.
373 */
nvme_ctrlr_disable(struct nvme_ctrlr * ctrlr)374 static inline void nvme_ctrlr_disable(struct nvme_ctrlr *ctrlr)
375 {
376 union nvme_cc_register cc;
377
378 cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw);
379 cc.bits.en = 0;
380
381 nvme_reg_mmio_write_4(ctrlr, cc.raw, cc.raw);
382 }
383
384 /*
385 * Test if a controller is enabled.
386 */
nvme_ctrlr_enabled(struct nvme_ctrlr * ctrlr)387 static inline int nvme_ctrlr_enabled(struct nvme_ctrlr *ctrlr)
388 {
389 union nvme_cc_register cc;
390
391 cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw);
392
393 return cc.bits.en;
394 }
395
396 /*
397 * Test if a controller is ready.
398 */
nvme_ctrlr_ready(struct nvme_ctrlr * ctrlr)399 static inline int nvme_ctrlr_ready(struct nvme_ctrlr *ctrlr)
400 {
401 union nvme_csts_register csts;
402
403 csts.raw = nvme_reg_mmio_read_4(ctrlr, csts.raw);
404
405 return csts.bits.rdy;
406 }
407
408 /*
409 * Set a controller state.
410 */
nvme_ctrlr_set_state(struct nvme_ctrlr * ctrlr,enum nvme_ctrlr_state state,uint64_t timeout_in_ms)411 static void nvme_ctrlr_set_state(struct nvme_ctrlr *ctrlr,
412 enum nvme_ctrlr_state state,
413 uint64_t timeout_in_ms)
414 {
415 ctrlr->state = state;
416 if (timeout_in_ms == NVME_TIMEOUT_INFINITE)
417 ctrlr->state_timeout_ms = NVME_TIMEOUT_INFINITE;
418 else
419 ctrlr->state_timeout_ms = nvme_time_msec() + timeout_in_ms;
420 }
421
422 /*
423 * Get a controller data.
424 */
nvme_ctrlr_identify(struct nvme_ctrlr * ctrlr)425 static int nvme_ctrlr_identify(struct nvme_ctrlr *ctrlr)
426 {
427 int ret;
428
429 ret = nvme_admin_identify_ctrlr(ctrlr, &ctrlr->cdata);
430 if (ret != 0) {
431 nvme_notice("Identify controller failed\n");
432 return ret;
433 }
434
435 /*
436 * Use MDTS to ensure our default max_xfer_size doesn't
437 * exceed what the controller supports.
438 */
439 if (ctrlr->cdata.mdts > 0)
440 ctrlr->max_xfer_size = nvme_min(ctrlr->max_xfer_size,
441 ctrlr->min_page_size
442 * (1 << (ctrlr->cdata.mdts)));
443 return 0;
444 }
445
446 /*
447 * Set the number of I/O queue pairs.
448 */
nvme_ctrlr_get_max_io_qpairs(struct nvme_ctrlr * ctrlr)449 static int nvme_ctrlr_get_max_io_qpairs(struct nvme_ctrlr *ctrlr)
450 {
451 unsigned int cdw0, cq_allocated, sq_allocated;
452 int ret;
453
454 ret = nvme_admin_get_feature(ctrlr, NVME_FEAT_CURRENT,
455 NVME_FEAT_NUMBER_OF_QUEUES,
456 0, &cdw0);
457 if (ret != 0) {
458 nvme_notice("Get feature NVME_FEAT_NUMBER_OF_QUEUES failed\n");
459 return ret;
460 }
461
462 /*
463 * Data in cdw0 is 0-based.
464 * Lower 16-bits indicate number of submission queues allocated.
465 * Upper 16-bits indicate number of completion queues allocated.
466 */
467 sq_allocated = (cdw0 & 0xFFFF) + 1;
468 cq_allocated = (cdw0 >> 16) + 1;
469
470 ctrlr->max_io_queues = nvme_min(sq_allocated, cq_allocated);
471
472 return 0;
473 }
474
475 /*
476 * Set the number of I/O queue pairs.
477 */
nvme_ctrlr_set_num_qpairs(struct nvme_ctrlr * ctrlr)478 static int nvme_ctrlr_set_num_qpairs(struct nvme_ctrlr *ctrlr)
479 {
480 unsigned int num_queues, cdw0;
481 unsigned int cq_allocated, sq_allocated;
482 int ret;
483
484 ret = nvme_ctrlr_get_max_io_qpairs(ctrlr);
485 if (ret != 0) {
486 nvme_notice("Failed to get the maximum of I/O qpairs\n");
487 return ret;
488 }
489
490 /*
491 * Format number of I/O queue:
492 * Remove 1 as it as be be 0-based,
493 * bits 31:16 represent the number of completion queues,
494 * bits 0:15 represent the number of submission queues
495 */
496 num_queues = ((ctrlr->opts.io_queues - 1) << 16) |
497 (ctrlr->opts.io_queues - 1);
498
499 /*
500 * Set the number of I/O queues.
501 * Note: The value allocated may be smaller or larger than the number
502 * of queues requested (see specifications).
503 */
504 ret = nvme_admin_set_feature(ctrlr, false, NVME_FEAT_NUMBER_OF_QUEUES,
505 num_queues, 0, &cdw0);
506 if (ret != 0) {
507 nvme_notice("Set feature NVME_FEAT_NUMBER_OF_QUEUES failed\n");
508 return ret;
509 }
510
511 /*
512 * Data in cdw0 is 0-based.
513 * Lower 16-bits indicate number of submission queues allocated.
514 * Upper 16-bits indicate number of completion queues allocated.
515 */
516 sq_allocated = (cdw0 & 0xFFFF) + 1;
517 cq_allocated = (cdw0 >> 16) + 1;
518 ctrlr->io_queues = nvme_min(sq_allocated, cq_allocated);
519
520 /*
521 * Make sure the number of constructed qpair listed in free_io_qpairs
522 * will not be more than the requested one.
523 */
524 ctrlr->io_queues = nvme_min(ctrlr->io_queues, ctrlr->opts.io_queues);
525
526 return 0;
527 }
528
nvme_ctrlr_destruct_namespaces(struct nvme_ctrlr * ctrlr)529 static void nvme_ctrlr_destruct_namespaces(struct nvme_ctrlr *ctrlr)
530 {
531
532 if (ctrlr->ns) {
533 free(ctrlr->ns);
534 ctrlr->ns = NULL;
535 ctrlr->nr_ns = 0;
536 }
537
538 if (ctrlr->nsdata) {
539 nvme_free(ctrlr->nsdata);
540 ctrlr->nsdata = NULL;
541 }
542 }
543
nvme_ctrlr_construct_namespaces(struct nvme_ctrlr * ctrlr)544 static int nvme_ctrlr_construct_namespaces(struct nvme_ctrlr *ctrlr)
545 {
546 unsigned int i, nr_ns = ctrlr->cdata.nn;
547 struct nvme_ns *ns = NULL;
548
549 /*
550 * ctrlr->nr_ns may be 0 (startup) or a different number of
551 * namespaces (reset), so check if we need to reallocate.
552 */
553 if (nr_ns != ctrlr->nr_ns) {
554
555 nvme_ctrlr_destruct_namespaces(ctrlr);
556
557 ctrlr->ns = calloc(nr_ns, sizeof(struct nvme_ns));
558 if (!ctrlr->ns)
559 goto fail;
560
561 nvme_debug("Allocate %u namespace data\n", nr_ns);
562 ctrlr->nsdata = nvme_calloc(nr_ns, sizeof(struct nvme_ns_data),
563 PAGE_SIZE);
564 if (!ctrlr->nsdata)
565 goto fail;
566
567 ctrlr->nr_ns = nr_ns;
568
569 }
570
571 for (i = 0; i < nr_ns; i++) {
572 ns = &ctrlr->ns[i];
573 if (nvme_ns_construct(ctrlr, ns, i + 1) != 0)
574 goto fail;
575 }
576
577 return 0;
578
579 fail:
580 nvme_ctrlr_destruct_namespaces(ctrlr);
581
582 return -1;
583 }
584
585 /*
586 * Forward declaration.
587 */
588 static int nvme_ctrlr_construct_and_submit_aer(struct nvme_ctrlr *ctrlr,
589 struct nvme_async_event_request *aer);
590
591 /*
592 * Async event completion callback.
593 */
nvme_ctrlr_async_event_cb(void * arg,const struct nvme_cpl * cpl)594 static void nvme_ctrlr_async_event_cb(void *arg, const struct nvme_cpl *cpl)
595 {
596 struct nvme_async_event_request *aer = arg;
597 struct nvme_ctrlr *ctrlr = aer->ctrlr;
598
599 if (cpl->status.sc == NVME_SC_ABORTED_SQ_DELETION)
600 /*
601 * This is simulated when controller is being shut down, to
602 * effectively abort outstanding asynchronous event requests
603 * and make sure all memory is freed. Do not repost the
604 * request in this case.
605 */
606 return;
607
608 if (ctrlr->aer_cb_fn != NULL)
609 ctrlr->aer_cb_fn(ctrlr->aer_cb_arg, cpl);
610
611 /*
612 * Repost another asynchronous event request to replace
613 * the one that just completed.
614 */
615 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer))
616 /*
617 * We can't do anything to recover from a failure here,
618 * so just print a warning message and leave the
619 * AER unsubmitted.
620 */
621 nvme_err("Initialize AER failed\n");
622 }
623
624 /*
625 * Issue an async event request.
626 */
nvme_ctrlr_construct_and_submit_aer(struct nvme_ctrlr * ctrlr,struct nvme_async_event_request * aer)627 static int nvme_ctrlr_construct_and_submit_aer(struct nvme_ctrlr *ctrlr,
628 struct nvme_async_event_request *aer)
629 {
630 struct nvme_request *req;
631
632 req = nvme_request_allocate_null(&ctrlr->adminq,
633 nvme_ctrlr_async_event_cb, aer);
634 if (req == NULL)
635 return -1;
636
637 aer->ctrlr = ctrlr;
638 aer->req = req;
639 req->cmd.opc = NVME_OPC_ASYNC_EVENT_REQUEST;
640
641 return nvme_qpair_submit_request(&ctrlr->adminq, req);
642 }
643
644 /*
645 * Configure async event management.
646 */
nvme_ctrlr_configure_aer(struct nvme_ctrlr * ctrlr)647 static int nvme_ctrlr_configure_aer(struct nvme_ctrlr *ctrlr)
648 {
649 union nvme_critical_warning_state state;
650 struct nvme_async_event_request *aer;
651 unsigned int i;
652 int ret;
653
654 state.raw = 0xFF;
655 state.bits.reserved = 0;
656
657 ret = nvme_admin_set_feature(ctrlr, false,
658 NVME_FEAT_ASYNC_EVENT_CONFIGURATION,
659 state.raw, 0, NULL);
660 if (ret != 0) {
661 nvme_notice("Set feature ASYNC_EVENT_CONFIGURATION failed\n");
662 return ret;
663 }
664
665 /* aerl is a zero-based value, so we need to add 1 here. */
666 ctrlr->num_aers = nvme_min(NVME_MAX_ASYNC_EVENTS,
667 (ctrlr->cdata.aerl + 1));
668
669 for (i = 0; i < ctrlr->num_aers; i++) {
670 aer = &ctrlr->aer[i];
671 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
672 nvme_notice("Construct AER failed\n");
673 return -1;
674 }
675 }
676
677 return 0;
678 }
679
680 /*
681 * Start a controller.
682 */
nvme_ctrlr_start(struct nvme_ctrlr * ctrlr)683 static int nvme_ctrlr_start(struct nvme_ctrlr *ctrlr)
684 {
685
686 nvme_qpair_reset(&ctrlr->adminq);
687 nvme_qpair_enable(&ctrlr->adminq);
688
689 if (nvme_ctrlr_identify(ctrlr) != 0)
690 return -1;
691
692 if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0)
693 return -1;
694
695 if (nvme_ctrlr_init_io_qpairs(ctrlr))
696 return -1;
697
698 if (nvme_ctrlr_construct_namespaces(ctrlr) != 0)
699 return -1;
700
701 if (nvme_ctrlr_configure_aer(ctrlr) != 0)
702 nvme_warning("controller does not support AER!\n");
703
704 nvme_ctrlr_set_supported_log_pages(ctrlr);
705 nvme_ctrlr_set_supported_features(ctrlr);
706
707 if (ctrlr->cdata.sgls.supported)
708 ctrlr->flags |= NVME_CTRLR_SGL_SUPPORTED;
709
710 return 0;
711 }
712
713 /*
714 * Memory map the controller side buffer.
715 */
nvme_ctrlr_map_cmb(struct nvme_ctrlr * ctrlr)716 static void nvme_ctrlr_map_cmb(struct nvme_ctrlr *ctrlr)
717 {
718 int ret;
719 void *addr;
720 uint32_t bir;
721 union nvme_cmbsz_register cmbsz;
722 union nvme_cmbloc_register cmbloc;
723 uint64_t size, unit_size, offset, bar_size, bar_phys_addr;
724
725 cmbsz.raw = nvme_reg_mmio_read_4(ctrlr, cmbsz.raw);
726 cmbloc.raw = nvme_reg_mmio_read_4(ctrlr, cmbloc.raw);
727 if (!cmbsz.bits.sz)
728 goto out;
729
730 /* Values 0 2 3 4 5 are valid for BAR */
731 bir = cmbloc.bits.bir;
732 if (bir > 5 || bir == 1)
733 goto out;
734
735 /* unit size for 4KB/64KB/1MB/16MB/256MB/4GB/64GB */
736 unit_size = (uint64_t)1 << (12 + 4 * cmbsz.bits.szu);
737
738 /* controller memory buffer size in Bytes */
739 size = unit_size * cmbsz.bits.sz;
740
741 /* controller memory buffer offset from BAR in Bytes */
742 offset = unit_size * cmbloc.bits.ofst;
743
744 nvme_pcicfg_get_bar_addr_len(ctrlr->pci_dev, bir, &bar_phys_addr,
745 &bar_size);
746
747 if (offset > bar_size)
748 goto out;
749
750 if (size > bar_size - offset)
751 goto out;
752
753 ret = nvme_pcicfg_map_bar_write_combine(ctrlr->pci_dev, bir, &addr);
754 if ((ret != 0) || addr == NULL)
755 goto out;
756
757 ctrlr->cmb_bar_virt_addr = addr;
758 ctrlr->cmb_bar_phys_addr = bar_phys_addr;
759 ctrlr->cmb_size = size;
760 ctrlr->cmb_current_offset = offset;
761
762 if (!cmbsz.bits.sqs)
763 ctrlr->opts.use_cmb_sqs = false;
764
765 return;
766
767 out:
768 ctrlr->cmb_bar_virt_addr = NULL;
769 ctrlr->opts.use_cmb_sqs = false;
770
771 return;
772 }
773
774 /*
775 * Unmap the controller side buffer.
776 */
nvme_ctrlr_unmap_cmb(struct nvme_ctrlr * ctrlr)777 static int nvme_ctrlr_unmap_cmb(struct nvme_ctrlr *ctrlr)
778 {
779 union nvme_cmbloc_register cmbloc;
780 void *addr = ctrlr->cmb_bar_virt_addr;
781 int ret = 0;
782
783 if (addr) {
784 cmbloc.raw = nvme_reg_mmio_read_4(ctrlr, cmbloc.raw);
785 ret = nvme_pcicfg_unmap_bar(ctrlr->pci_dev, cmbloc.bits.bir,
786 addr);
787 }
788 return ret;
789 }
790
791 /*
792 * Map the controller PCI bars.
793 */
nvme_ctrlr_map_bars(struct nvme_ctrlr * ctrlr)794 static int nvme_ctrlr_map_bars(struct nvme_ctrlr *ctrlr)
795 {
796 void *addr;
797 int ret;
798
799 ret = nvme_pcicfg_map_bar(ctrlr->pci_dev, 0, 0, &addr);
800 if (ret != 0 || addr == NULL) {
801 nvme_err("Map PCI device bar failed %d (%s)\n",
802 ret, strerror(ret));
803 return ret;
804 }
805
806 nvme_debug("Controller BAR mapped at %p\n", addr);
807
808 ctrlr->regs = (volatile struct nvme_registers *)addr;
809 nvme_ctrlr_map_cmb(ctrlr);
810
811 return 0;
812 }
813
814 /*
815 * Unmap the controller PCI bars.
816 */
nvme_ctrlr_unmap_bars(struct nvme_ctrlr * ctrlr)817 static int nvme_ctrlr_unmap_bars(struct nvme_ctrlr *ctrlr)
818 {
819 void *addr = (void *)ctrlr->regs;
820 int ret;
821
822 ret = nvme_ctrlr_unmap_cmb(ctrlr);
823 if (ret != 0) {
824 nvme_err("Unmap controller side buffer failed %d\n", ret);
825 return ret;
826 }
827
828 if (addr) {
829 ret = nvme_pcicfg_unmap_bar(ctrlr->pci_dev, 0, addr);
830 if (ret != 0) {
831 nvme_err("Unmap PCI device bar failed %d\n", ret);
832 return ret;
833 }
834 }
835
836 return 0;
837 }
838
839 /*
840 * Set a controller in the failed state.
841 */
nvme_ctrlr_fail(struct nvme_ctrlr * ctrlr)842 static void nvme_ctrlr_fail(struct nvme_ctrlr *ctrlr)
843 {
844 unsigned int i;
845
846 ctrlr->failed = true;
847
848 nvme_qpair_fail(&ctrlr->adminq);
849 if (ctrlr->ioq)
850 for (i = 0; i < ctrlr->io_queues; i++)
851 nvme_qpair_fail(&ctrlr->ioq[i]);
852 }
853
854 /*
855 * This function will be called repeatedly during initialization
856 * until the controller is ready.
857 */
nvme_ctrlr_init(struct nvme_ctrlr * ctrlr)858 static int nvme_ctrlr_init(struct nvme_ctrlr *ctrlr)
859 {
860 unsigned int ready_timeout_in_ms = nvme_ctrlr_get_ready_to_in_ms(ctrlr);
861 int ret;
862
863 /*
864 * Check if the current initialization step is done or has timed out.
865 */
866 switch (ctrlr->state) {
867
868 case NVME_CTRLR_STATE_INIT:
869
870 /* Begin the hardware initialization by making
871 * sure the controller is disabled. */
872 if (nvme_ctrlr_enabled(ctrlr)) {
873 /*
874 * Disable the controller to cause a reset.
875 */
876 if (!nvme_ctrlr_ready(ctrlr)) {
877 /* Wait for the controller to be ready */
878 nvme_ctrlr_set_state(ctrlr,
879 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1,
880 ready_timeout_in_ms);
881 return 0;
882 }
883
884 /*
885 * The controller is enabled and ready.
886 * It can be immediatly disabled
887 */
888 nvme_ctrlr_disable(ctrlr);
889 nvme_ctrlr_set_state(ctrlr,
890 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
891 ready_timeout_in_ms);
892
893 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
894 nvme_msleep(2000);
895
896 return 0;
897 }
898
899 if (nvme_ctrlr_ready(ctrlr)) {
900 /*
901 * Controller is in the process of shutting down.
902 * We need to wait for CSTS.RDY to become 0.
903 */
904 nvme_ctrlr_set_state(ctrlr,
905 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
906 ready_timeout_in_ms);
907 return 0;
908 }
909
910 /*
911 * Controller is currently disabled.
912 * We can jump straight to enabling it.
913 */
914 ret = nvme_ctrlr_enable(ctrlr);
915 if (ret)
916 nvme_err("Enable controller failed\n");
917 else
918 nvme_ctrlr_set_state(ctrlr,
919 NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1,
920 ready_timeout_in_ms);
921 return ret;
922
923 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
924
925 if (nvme_ctrlr_ready(ctrlr)) {
926 /* CC.EN = 1 && CSTS.RDY = 1,
927 * so we can disable the controller now. */
928 nvme_ctrlr_disable(ctrlr);
929 nvme_ctrlr_set_state(ctrlr,
930 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
931 ready_timeout_in_ms);
932 return 0;
933 }
934
935 break;
936
937 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
938
939 if (!nvme_ctrlr_ready(ctrlr)) {
940 /* CC.EN = 0 && CSTS.RDY = 0,
941 * so we can enable the controller now. */
942 ret = nvme_ctrlr_enable(ctrlr);
943 if (ret)
944 nvme_err("Enable controller failed\n");
945 else
946 nvme_ctrlr_set_state(ctrlr,
947 NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1,
948 ready_timeout_in_ms);
949 return ret;
950 }
951 break;
952
953 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
954
955 if (nvme_ctrlr_ready(ctrlr)) {
956 if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_RDY)
957 nvme_msleep(2000);
958
959 ret = nvme_ctrlr_start(ctrlr);
960 if (ret)
961 nvme_err("Start controller failed\n");
962 else
963 nvme_ctrlr_set_state(ctrlr,
964 NVME_CTRLR_STATE_READY,
965 NVME_TIMEOUT_INFINITE);
966 return ret;
967 }
968 break;
969
970 default:
971 nvme_panic("Unhandled ctrlr state %d\n", ctrlr->state);
972 nvme_ctrlr_fail(ctrlr);
973 return -1;
974 }
975
976 if ((ctrlr->state_timeout_ms != NVME_TIMEOUT_INFINITE) &&
977 (nvme_time_msec() > ctrlr->state_timeout_ms)) {
978 nvme_err("Initialization timed out in state %d\n",
979 ctrlr->state);
980 nvme_ctrlr_fail(ctrlr);
981 return -1;
982 }
983
984 return 0;
985 }
986
987 /*
988 * Reset a controller.
989 */
nvme_ctrlr_reset(struct nvme_ctrlr * ctrlr)990 static int nvme_ctrlr_reset(struct nvme_ctrlr *ctrlr)
991 {
992 struct nvme_qpair *qpair;
993 unsigned int i;
994
995 if (ctrlr->resetting || ctrlr->failed)
996 /*
997 * Controller is already resetting or has failed. Return
998 * immediately since there is no need to kick off another
999 * reset in these cases.
1000 */
1001 return 0;
1002
1003 ctrlr->resetting = true;
1004
1005 /* Disable all queues before disabling the controller hardware. */
1006 nvme_qpair_disable(&ctrlr->adminq);
1007 for (i = 0; i < ctrlr->io_queues; i++)
1008 nvme_qpair_disable(&ctrlr->ioq[i]);
1009
1010 /* Set the state back to INIT to cause a full hardware reset. */
1011 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT,
1012 NVME_TIMEOUT_INFINITE);
1013
1014 while (ctrlr->state != NVME_CTRLR_STATE_READY) {
1015 if (nvme_ctrlr_init(ctrlr) != 0) {
1016 nvme_crit("Controller reset failed\n");
1017 nvme_ctrlr_fail(ctrlr);
1018 goto out;
1019 }
1020 }
1021
1022 /* Reinitialize qpairs */
1023 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
1024 if (nvme_ctrlr_create_qpair(ctrlr, qpair) != 0)
1025 nvme_ctrlr_fail(ctrlr);
1026 }
1027
1028 out:
1029 ctrlr->resetting = false;
1030
1031 return ctrlr->failed ? -1 : 0;
1032 }
1033
1034 /*
1035 * Set a controller options.
1036 */
nvme_ctrlr_set_opts(struct nvme_ctrlr * ctrlr,struct nvme_ctrlr_opts * opts)1037 static void nvme_ctrlr_set_opts(struct nvme_ctrlr *ctrlr,
1038 struct nvme_ctrlr_opts *opts)
1039 {
1040 if (opts)
1041 memcpy(&ctrlr->opts, opts, sizeof(struct nvme_ctrlr_opts));
1042 else
1043 memset(&ctrlr->opts, 0, sizeof(struct nvme_ctrlr_opts));
1044
1045 if (ctrlr->opts.io_queues == 0)
1046 ctrlr->opts.io_queues = DEFAULT_MAX_IO_QUEUES;
1047
1048 if (ctrlr->opts.io_queues > NVME_MAX_IO_QUEUES) {
1049 nvme_info("Limiting requested I/O queues %u to %d\n",
1050 ctrlr->opts.io_queues, NVME_MAX_IO_QUEUES);
1051 ctrlr->opts.io_queues = NVME_MAX_IO_QUEUES;
1052 }
1053 }
1054
1055 /*
1056 * Attach a PCI controller.
1057 */
1058 struct nvme_ctrlr *
nvme_ctrlr_attach(struct pci_device * pci_dev,struct nvme_ctrlr_opts * opts)1059 nvme_ctrlr_attach(struct pci_device *pci_dev,
1060 struct nvme_ctrlr_opts *opts)
1061 {
1062 struct nvme_ctrlr *ctrlr;
1063 union nvme_cap_register cap;
1064 uint32_t cmd_reg;
1065 int ret;
1066
1067 /* Get a new controller handle */
1068 ctrlr = malloc(sizeof(struct nvme_ctrlr));
1069 if (!ctrlr) {
1070 nvme_err("Allocate controller handle failed\n");
1071 return NULL;
1072 }
1073
1074 nvme_debug("New controller handle %p\n", ctrlr);
1075
1076 /* Initialize the handle */
1077 memset(ctrlr, 0, sizeof(struct nvme_ctrlr));
1078 ctrlr->pci_dev = pci_dev;
1079 ctrlr->resetting = false;
1080 ctrlr->failed = false;
1081 TAILQ_INIT(&ctrlr->free_io_qpairs);
1082 TAILQ_INIT(&ctrlr->active_io_qpairs);
1083 pthread_mutex_init(&ctrlr->lock, NULL);
1084 ctrlr->quirks = nvme_ctrlr_get_quirks(pci_dev);
1085
1086 nvme_ctrlr_set_state(ctrlr,
1087 NVME_CTRLR_STATE_INIT,
1088 NVME_TIMEOUT_INFINITE);
1089
1090 ret = nvme_ctrlr_map_bars(ctrlr);
1091 if (ret != 0) {
1092 nvme_err("Map controller BAR failed\n");
1093 pthread_mutex_destroy(&ctrlr->lock);
1094 free(ctrlr);
1095 return NULL;
1096 }
1097
1098 /* Enable PCI busmaster and disable INTx */
1099 nvme_pcicfg_read32(pci_dev, &cmd_reg, 4);
1100 cmd_reg |= 0x0404;
1101 nvme_pcicfg_write32(pci_dev, cmd_reg, 4);
1102
1103 /*
1104 * Doorbell stride is 2 ^ (dstrd + 2),
1105 * but we want multiples of 4, so drop the + 2.
1106 */
1107 cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw);
1108 ctrlr->doorbell_stride_u32 = 1 << cap.bits.dstrd;
1109 ctrlr->min_page_size = 1 << (12 + cap.bits.mpsmin);
1110
1111 /* Set default transfer size */
1112 ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
1113
1114 /* Create the admin queue pair */
1115 ret = nvme_qpair_construct(ctrlr, &ctrlr->adminq, 0,
1116 NVME_ADMIN_ENTRIES, NVME_ADMIN_TRACKERS);
1117 if (ret != 0) {
1118 nvme_err("Initialize admin queue pair failed\n");
1119 goto err;
1120 }
1121
1122 /* Set options and then initialize */
1123 nvme_ctrlr_set_opts(ctrlr, opts);
1124 do {
1125 ret = nvme_ctrlr_init(ctrlr);
1126 if (ret)
1127 goto err;
1128 } while (ctrlr->state != NVME_CTRLR_STATE_READY);
1129
1130 return ctrlr;
1131
1132 err:
1133 nvme_ctrlr_detach(ctrlr);
1134
1135 return NULL;
1136 }
1137
1138 /*
1139 * Detach a PCI controller.
1140 */
nvme_ctrlr_detach(struct nvme_ctrlr * ctrlr)1141 void nvme_ctrlr_detach(struct nvme_ctrlr *ctrlr)
1142 {
1143 struct nvme_qpair *qpair;
1144 uint32_t i;
1145
1146 while (!TAILQ_EMPTY(&ctrlr->active_io_qpairs)) {
1147 qpair = TAILQ_FIRST(&ctrlr->active_io_qpairs);
1148 nvme_ioqp_release(qpair);
1149 }
1150
1151 nvme_ctrlr_shutdown(ctrlr);
1152
1153 nvme_ctrlr_destruct_namespaces(ctrlr);
1154 if (ctrlr->ioq) {
1155 for (i = 0; i < ctrlr->io_queues; i++)
1156 nvme_qpair_destroy(&ctrlr->ioq[i]);
1157 free(ctrlr->ioq);
1158 }
1159
1160 nvme_qpair_destroy(&ctrlr->adminq);
1161
1162 nvme_ctrlr_unmap_bars(ctrlr);
1163
1164 pthread_mutex_destroy(&ctrlr->lock);
1165 free(ctrlr);
1166 }
1167
1168 /*
1169 * Get a controller feature.
1170 */
nvme_ctrlr_get_feature(struct nvme_ctrlr * ctrlr,enum nvme_feat_sel sel,enum nvme_feat feature,uint32_t cdw11,uint32_t * attributes)1171 int nvme_ctrlr_get_feature(struct nvme_ctrlr *ctrlr,
1172 enum nvme_feat_sel sel, enum nvme_feat feature,
1173 uint32_t cdw11,
1174 uint32_t *attributes)
1175 {
1176 int ret;
1177
1178 pthread_mutex_lock(&ctrlr->lock);
1179
1180 ret = nvme_admin_get_feature(ctrlr, sel, feature, cdw11, attributes);
1181 if (ret != 0)
1182 nvme_notice("Get feature 0x%08x failed\n",
1183 (unsigned int) feature);
1184
1185 pthread_mutex_unlock(&ctrlr->lock);
1186
1187 return ret;
1188 }
1189
1190 /*
1191 * Set a controller feature.
1192 */
nvme_ctrlr_set_feature(struct nvme_ctrlr * ctrlr,bool save,enum nvme_feat feature,uint32_t cdw11,uint32_t cdw12,uint32_t * attributes)1193 int nvme_ctrlr_set_feature(struct nvme_ctrlr *ctrlr,
1194 bool save, enum nvme_feat feature,
1195 uint32_t cdw11, uint32_t cdw12,
1196 uint32_t *attributes)
1197 {
1198 int ret;
1199
1200 pthread_mutex_lock(&ctrlr->lock);
1201
1202 ret = nvme_admin_set_feature(ctrlr, save, feature,
1203 cdw11, cdw12, attributes);
1204 if (ret != 0)
1205 nvme_notice("Set feature 0x%08x failed\n",
1206 (unsigned int) feature);
1207
1208 pthread_mutex_unlock(&ctrlr->lock);
1209
1210 return ret;
1211 }
1212
1213 /*
1214 * Attach a namespace.
1215 */
nvme_ctrlr_attach_ns(struct nvme_ctrlr * ctrlr,unsigned int nsid,struct nvme_ctrlr_list * clist)1216 int nvme_ctrlr_attach_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid,
1217 struct nvme_ctrlr_list *clist)
1218 {
1219 int ret;
1220
1221 pthread_mutex_lock(&ctrlr->lock);
1222
1223 ret = nvme_admin_attach_ns(ctrlr, nsid, clist);
1224 if (ret) {
1225 nvme_notice("Attach namespace %u failed\n", nsid);
1226 goto out;
1227 }
1228
1229 ret = nvme_ctrlr_reset(ctrlr);
1230 if (ret != 0)
1231 nvme_notice("Reset controller failed\n");
1232
1233 out:
1234 pthread_mutex_unlock(&ctrlr->lock);
1235
1236 return ret;
1237 }
1238
1239 /*
1240 * Detach a namespace.
1241 */
nvme_ctrlr_detach_ns(struct nvme_ctrlr * ctrlr,unsigned int nsid,struct nvme_ctrlr_list * clist)1242 int nvme_ctrlr_detach_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid,
1243 struct nvme_ctrlr_list *clist)
1244 {
1245 int ret;
1246
1247 pthread_mutex_lock(&ctrlr->lock);
1248
1249 ret = nvme_admin_detach_ns(ctrlr, nsid, clist);
1250 if (ret != 0) {
1251 nvme_notice("Detach namespace %u failed\n", nsid);
1252 goto out;
1253 }
1254
1255 ret = nvme_ctrlr_reset(ctrlr);
1256 if (ret)
1257 nvme_notice("Reset controller failed\n");
1258
1259 out:
1260 pthread_mutex_unlock(&ctrlr->lock);
1261
1262 return ret;
1263 }
1264
1265 /*
1266 * Create a namespace.
1267 */
nvme_ctrlr_create_ns(struct nvme_ctrlr * ctrlr,struct nvme_ns_data * nsdata)1268 unsigned int nvme_ctrlr_create_ns(struct nvme_ctrlr *ctrlr,
1269 struct nvme_ns_data *nsdata)
1270 {
1271 unsigned int nsid;
1272 int ret;
1273
1274 pthread_mutex_lock(&ctrlr->lock);
1275
1276 ret = nvme_admin_create_ns(ctrlr, nsdata, &nsid);
1277 if (ret != 0) {
1278 nvme_notice("Create namespace failed\n");
1279 nsid = 0;
1280 }
1281
1282 pthread_mutex_unlock(&ctrlr->lock);
1283
1284 return nsid;
1285 }
1286
1287 /*
1288 * Delete a namespace.
1289 */
nvme_ctrlr_delete_ns(struct nvme_ctrlr * ctrlr,unsigned int nsid)1290 int nvme_ctrlr_delete_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid)
1291 {
1292 int ret;
1293
1294 pthread_mutex_lock(&ctrlr->lock);
1295
1296 ret = nvme_admin_delete_ns(ctrlr, nsid);
1297 if (ret != 0) {
1298 nvme_notice("Delete namespace %u failed\n", nsid);
1299 goto out;
1300 }
1301
1302 ret = nvme_ctrlr_reset(ctrlr);
1303 if (ret)
1304 nvme_notice("Reset controller failed\n");
1305
1306 out:
1307 pthread_mutex_unlock(&ctrlr->lock);
1308
1309 return ret;
1310 }
1311
1312 /*
1313 * Format NVM media.
1314 */
nvme_ctrlr_format_ns(struct nvme_ctrlr * ctrlr,unsigned int nsid,struct nvme_format * format)1315 int nvme_ctrlr_format_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid,
1316 struct nvme_format *format)
1317 {
1318 int ret;
1319
1320 pthread_mutex_lock(&ctrlr->lock);
1321
1322 ret = nvme_admin_format_nvm(ctrlr, nsid, format);
1323 if (ret != 0) {
1324 if (nsid == NVME_GLOBAL_NS_TAG)
1325 nvme_notice("Format device failed\n");
1326 else
1327 nvme_notice("Format namespace %u failed\n", nsid);
1328 goto out;
1329 }
1330
1331 ret = nvme_ctrlr_reset(ctrlr);
1332 if (ret)
1333 nvme_notice("Reset controller failed\n");
1334
1335 out:
1336 pthread_mutex_unlock(&ctrlr->lock);
1337
1338 return ret;
1339 }
1340
1341 /*
1342 * Update a device firmware.
1343 */
nvme_ctrlr_update_firmware(struct nvme_ctrlr * ctrlr,void * fw,size_t size,int slot)1344 int nvme_ctrlr_update_firmware(struct nvme_ctrlr *ctrlr,
1345 void *fw, size_t size, int slot)
1346 {
1347 struct nvme_fw_commit fw_commit;
1348 unsigned int size_remaining = size, offset = 0, transfer;
1349 void *f = fw;
1350 int ret;
1351
1352 if (size & 0x3) {
1353 nvme_err("Invalid firmware size\n");
1354 return EINVAL;
1355 }
1356
1357 pthread_mutex_lock(&ctrlr->lock);
1358
1359 /* Download firmware */
1360 while (size_remaining > 0) {
1361
1362 transfer = nvme_min(size_remaining, ctrlr->min_page_size);
1363
1364 ret = nvme_admin_fw_image_dl(ctrlr, f, transfer, offset);
1365 if (ret != 0) {
1366 nvme_err("Download FW (%u B at %u) failed\n",
1367 transfer, offset);
1368 goto out;
1369 }
1370
1371 f += transfer;
1372 offset += transfer;
1373 size_remaining -= transfer;
1374
1375 }
1376
1377 /* Commit firmware */
1378 memset(&fw_commit, 0, sizeof(struct nvme_fw_commit));
1379 fw_commit.fs = slot;
1380 fw_commit.ca = NVME_FW_COMMIT_REPLACE_IMG;
1381
1382 ret = nvme_admin_fw_commit(ctrlr, &fw_commit);
1383 if (ret != 0) {
1384 nvme_err("Commit downloaded FW (%zu B) failed\n",
1385 size);
1386 goto out;
1387 }
1388
1389 ret = nvme_ctrlr_reset(ctrlr);
1390 if (ret)
1391 nvme_notice("Reset controller failed\n");
1392
1393 out:
1394 pthread_mutex_unlock(&ctrlr->lock);
1395
1396 return ret;
1397 }
1398
1399 /*
1400 * Get an unused I/O queue pair.
1401 */
nvme_ioqp_get(struct nvme_ctrlr * ctrlr,enum nvme_qprio qprio,unsigned int qd)1402 struct nvme_qpair *nvme_ioqp_get(struct nvme_ctrlr *ctrlr,
1403 enum nvme_qprio qprio, unsigned int qd)
1404 {
1405 struct nvme_qpair *qpair = NULL;
1406 union nvme_cc_register cc;
1407 uint32_t trackers;
1408 int ret;
1409
1410 cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw);
1411
1412 /* Only the low 2 bits (values 0, 1, 2, 3) of QPRIO are valid. */
1413 if ((qprio & 3) != qprio)
1414 return NULL;
1415
1416 /*
1417 * Only value NVME_QPRIO_URGENT(0) is valid for the
1418 * default round robin arbitration method.
1419 */
1420 if ((cc.bits.ams == NVME_CC_AMS_RR) && (qprio != NVME_QPRIO_URGENT)) {
1421 nvme_err("Invalid queue priority for default round "
1422 "robin arbitration method\n");
1423 return NULL;
1424 }
1425
1426 /* I/O qpairs number of entries belong to [2, io_qpairs_max_entries] */
1427 if (qd == 1) {
1428 nvme_err("Invalid queue depth\n");
1429 return NULL;
1430 }
1431
1432 if (qd == 0 || qd > ctrlr->io_qpairs_max_entries)
1433 qd = ctrlr->io_qpairs_max_entries;
1434
1435 /*
1436 * No need to have more trackers than entries in the submit queue.
1437 * Note also that for a queue size of N, we can only have (N-1)
1438 * commands outstanding, hence the "-1" here.
1439 */
1440 trackers = nvme_min(NVME_IO_TRACKERS, (qd - 1));
1441
1442 pthread_mutex_lock(&ctrlr->lock);
1443
1444 /* Get the first available qpair structure */
1445 qpair = TAILQ_FIRST(&ctrlr->free_io_qpairs);
1446 if (qpair == NULL) {
1447 /* No free queue IDs */
1448 nvme_err("No free I/O queue pairs\n");
1449 goto out;
1450 }
1451
1452 /* Construct the qpair */
1453 ret = nvme_qpair_construct(ctrlr, qpair, qprio, qd, trackers);
1454 if (ret != 0) {
1455 nvme_qpair_destroy(qpair);
1456 qpair = NULL;
1457 goto out;
1458 }
1459
1460 /*
1461 * At this point, qpair contains a preallocated submission
1462 * and completion queue and a unique queue ID, but it is not
1463 * yet created on the controller.
1464 * Fill out the submission queue priority and send out the
1465 * Create I/O Queue commands.
1466 */
1467 if (nvme_ctrlr_create_qpair(ctrlr, qpair) != 0) {
1468 nvme_err("Create queue pair on the controller failed\n");
1469 nvme_qpair_destroy(qpair);
1470 qpair = NULL;
1471 goto out;
1472 }
1473
1474 TAILQ_REMOVE(&ctrlr->free_io_qpairs, qpair, tailq);
1475 TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq);
1476
1477 out:
1478 pthread_mutex_unlock(&ctrlr->lock);
1479
1480 return qpair;
1481 }
1482
1483 /*
1484 * Free an I/O queue pair.
1485 */
nvme_ioqp_release(struct nvme_qpair * qpair)1486 int nvme_ioqp_release(struct nvme_qpair *qpair)
1487 {
1488 struct nvme_ctrlr *ctrlr;
1489 int ret;
1490
1491 if (qpair == NULL)
1492 return 0;
1493
1494 ctrlr = qpair->ctrlr;
1495
1496 pthread_mutex_lock(&ctrlr->lock);
1497
1498 /* Delete the I/O submission and completion queues */
1499 ret = nvme_ctrlr_delete_qpair(ctrlr, qpair);
1500 if (ret != 0) {
1501 nvme_notice("Delete queue pair %u failed\n", qpair->id);
1502 } else {
1503 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq);
1504 TAILQ_INSERT_HEAD(&ctrlr->free_io_qpairs, qpair, tailq);
1505 }
1506
1507 pthread_mutex_unlock(&ctrlr->lock);
1508
1509 return ret;
1510 }
1511
1512 /*
1513 * Submit an NVMe command using the specified I/O queue pair.
1514 */
nvme_ioqp_submit_cmd(struct nvme_qpair * qpair,struct nvme_cmd * cmd,void * buf,size_t len,nvme_cmd_cb cb_fn,void * cb_arg)1515 int nvme_ioqp_submit_cmd(struct nvme_qpair *qpair,
1516 struct nvme_cmd *cmd,
1517 void *buf, size_t len,
1518 nvme_cmd_cb cb_fn, void *cb_arg)
1519 {
1520 struct nvme_request *req;
1521 int ret = ENOMEM;
1522
1523 req = nvme_request_allocate_contig(qpair, buf, len, cb_fn, cb_arg);
1524 if (req) {
1525 memcpy(&req->cmd, cmd, sizeof(req->cmd));
1526 ret = nvme_qpair_submit_request(qpair, req);
1527 }
1528
1529 return ret;
1530 }
1531