xref: /haiku/src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme_ctrlr.c (revision 625dc38a28c0f6345adfdd4eb0b89b340a420ac0)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2017, Western Digital Corporation or its affiliates.
6  *
7  *   Redistribution and use in sourete and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of sourete code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "nvme_internal.h"
35 
36 /*
37  * Host software shall wait a minimum of CAP.TO x 500 milleseconds for CSTS.RDY
38  * to be set to '1' after setting CC.EN to '1' from a previous value of '0'.
39  */
40 static inline unsigned int
nvme_ctrlr_get_ready_to_in_ms(struct nvme_ctrlr * ctrlr)41 nvme_ctrlr_get_ready_to_in_ms(struct nvme_ctrlr *ctrlr)
42 {
43 	union nvme_cap_register	cap;
44 
45 /* The TO unit in ms */
46 #define NVME_READY_TIMEOUT_UNIT 500
47 
48 	cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw);
49 
50 	return (NVME_READY_TIMEOUT_UNIT * cap.bits.to);
51 }
52 
53 /*
54  * Create a queue pair.
55  */
nvme_ctrlr_create_qpair(struct nvme_ctrlr * ctrlr,struct nvme_qpair * qpair)56 static int nvme_ctrlr_create_qpair(struct nvme_ctrlr *ctrlr,
57 				   struct nvme_qpair *qpair)
58 {
59 	int ret;
60 
61 	/* Create the completion queue */
62 	ret = nvme_admin_create_ioq(ctrlr, qpair, NVME_IO_COMPLETION_QUEUE);
63 	if (ret != 0) {
64 		nvme_notice("Create completion queue %u failed\n",
65 			    qpair->id);
66 		return ret;
67 	}
68 
69 	/* Create the submission queue */
70 	ret = nvme_admin_create_ioq(ctrlr, qpair, NVME_IO_SUBMISSION_QUEUE);
71 	if (ret != 0) {
72 		/* Attempt to delete the completion queue */
73 		nvme_notice("Create submission queue %u failed\n",
74 			    qpair->id);
75 		nvme_admin_delete_ioq(ctrlr, qpair, NVME_IO_COMPLETION_QUEUE);
76 		return ret;
77 	}
78 
79 	nvme_qpair_reset(qpair);
80 
81 	return 0;
82 }
83 
84 /*
85  * Delete a queue pair.
86  */
nvme_ctrlr_delete_qpair(struct nvme_ctrlr * ctrlr,struct nvme_qpair * qpair)87 static int nvme_ctrlr_delete_qpair(struct nvme_ctrlr *ctrlr,
88 				   struct nvme_qpair *qpair)
89 {
90 	int ret;
91 
92 	/* Delete the submission queue */
93 	ret = nvme_admin_delete_ioq(ctrlr, qpair, NVME_IO_SUBMISSION_QUEUE);
94 	if (ret != 0) {
95 		nvme_notice("Delete submission queue %u failed\n",
96 			    qpair->id);
97 		return ret;
98 	}
99 
100 	/* Delete the completion queue */
101 	ret = nvme_admin_delete_ioq(ctrlr, qpair, NVME_IO_COMPLETION_QUEUE);
102 	if (ret != 0) {
103 		nvme_notice("Delete completion queue %u failed\n",
104 			    qpair->id);
105 		return ret;
106 	}
107 
108 	return 0;
109 }
110 
111 /*
112  * Intel log page.
113  */
114 static void
nvme_ctrlr_construct_intel_support_log_page_list(struct nvme_ctrlr * ctrlr,struct nvme_intel_log_page_dir * log_page_dir)115 nvme_ctrlr_construct_intel_support_log_page_list(struct nvme_ctrlr *ctrlr,
116 				struct nvme_intel_log_page_dir *log_page_dir)
117 {
118 
119 	if (ctrlr->cdata.vid != NVME_PCI_VID_INTEL ||
120 	    log_page_dir == NULL)
121 		return;
122 
123 	ctrlr->log_page_supported[NVME_INTEL_LOG_PAGE_DIR] = true;
124 
125 	if (log_page_dir->read_latency_log_len ||
126 	    (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY))
127 		ctrlr->log_page_supported[NVME_INTEL_LOG_READ_CMD_LATENCY] = true;
128 
129 	if (log_page_dir->write_latency_log_len ||
130 	    (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY))
131 		ctrlr->log_page_supported[NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true;
132 
133 	if (log_page_dir->temperature_statistics_log_len)
134 		ctrlr->log_page_supported[NVME_INTEL_LOG_TEMPERATURE] = true;
135 
136 	if (log_page_dir->smart_log_len)
137 		ctrlr->log_page_supported[NVME_INTEL_LOG_SMART] = true;
138 
139 	if (log_page_dir->marketing_description_log_len)
140 		ctrlr->log_page_supported[NVME_INTEL_MARKETING_DESCRIPTION] = true;
141 }
142 
143 /*
144  * Intel log page.
145  */
nvme_ctrlr_set_intel_support_log_pages(struct nvme_ctrlr * ctrlr)146 static int nvme_ctrlr_set_intel_support_log_pages(struct nvme_ctrlr *ctrlr)
147 {
148 	struct nvme_intel_log_page_dir *log_page_dir;
149 	int ret;
150 
151 	log_page_dir = nvme_zmalloc(sizeof(struct nvme_intel_log_page_dir), 64);
152 	if (!log_page_dir) {
153 		nvme_err("Allocate log_page_directory failed\n");
154 		return ENOMEM;
155 	}
156 
157 	ret = nvme_admin_get_log_page(ctrlr, NVME_INTEL_LOG_PAGE_DIR,
158 				      NVME_GLOBAL_NS_TAG,
159 				      log_page_dir,
160 				      sizeof(struct nvme_intel_log_page_dir));
161 	if (ret != 0)
162 		nvme_notice("Get NVME_INTEL_LOG_PAGE_DIR log page failed\n");
163 	else
164 		nvme_ctrlr_construct_intel_support_log_page_list(ctrlr,
165 								 log_page_dir);
166 
167 	nvme_free(log_page_dir);
168 
169 	return ret;
170 }
171 
172 /*
173  * Initialize log page support directory.
174  */
nvme_ctrlr_set_supported_log_pages(struct nvme_ctrlr * ctrlr)175 static void nvme_ctrlr_set_supported_log_pages(struct nvme_ctrlr *ctrlr)
176 {
177 
178 	memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported));
179 
180 	/* Mandatory pages */
181 	ctrlr->log_page_supported[NVME_LOG_ERROR] = true;
182 	ctrlr->log_page_supported[NVME_LOG_HEALTH_INFORMATION] = true;
183 	ctrlr->log_page_supported[NVME_LOG_FIRMWARE_SLOT] = true;
184 
185 	if (ctrlr->cdata.lpa.celp)
186 		ctrlr->log_page_supported[NVME_LOG_COMMAND_EFFECTS_LOG] = true;
187 
188 	if (ctrlr->cdata.vid == NVME_PCI_VID_INTEL)
189 		nvme_ctrlr_set_intel_support_log_pages(ctrlr);
190 }
191 
192 /*
193  * Set Intel device features.
194  */
nvme_ctrlr_set_intel_supported_features(struct nvme_ctrlr * ctrlr)195 static void nvme_ctrlr_set_intel_supported_features(struct nvme_ctrlr *ctrlr)
196 {
197 	bool *supported_feature = ctrlr->feature_supported;
198 
199 	supported_feature[NVME_INTEL_FEAT_MAX_LBA] = true;
200 	supported_feature[NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true;
201 	supported_feature[NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true;
202 	supported_feature[NVME_INTEL_FEAT_SMBUS_ADDRESS] = true;
203 	supported_feature[NVME_INTEL_FEAT_LED_PATTERN] = true;
204 	supported_feature[NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true;
205 	supported_feature[NVME_INTEL_FEAT_LATENCY_TRACKING] = true;
206 }
207 
208 /*
209  * Set device features.
210  */
nvme_ctrlr_set_supported_features(struct nvme_ctrlr * ctrlr)211 static void nvme_ctrlr_set_supported_features(struct nvme_ctrlr *ctrlr)
212 {
213 	bool *supported_feature = ctrlr->feature_supported;
214 
215 	memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported));
216 
217 	/* Mandatory features */
218 	supported_feature[NVME_FEAT_ARBITRATION] = true;
219 	supported_feature[NVME_FEAT_POWER_MANAGEMENT] = true;
220 	supported_feature[NVME_FEAT_TEMPERATURE_THRESHOLD] = true;
221 	supported_feature[NVME_FEAT_ERROR_RECOVERY] = true;
222 	supported_feature[NVME_FEAT_NUMBER_OF_QUEUES] = true;
223 	supported_feature[NVME_FEAT_INTERRUPT_COALESCING] = true;
224 	supported_feature[NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true;
225 	supported_feature[NVME_FEAT_WRITE_ATOMICITY] = true;
226 	supported_feature[NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true;
227 
228 	/* Optional features */
229 	if (ctrlr->cdata.vwc.present)
230 		supported_feature[NVME_FEAT_VOLATILE_WRITE_CACHE] = true;
231 	if (ctrlr->cdata.apsta.supported)
232 		supported_feature[NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION]
233 			= true;
234 	if (ctrlr->cdata.hmpre)
235 		supported_feature[NVME_FEAT_HOST_MEM_BUFFER] = true;
236 	if (ctrlr->cdata.vid == NVME_PCI_VID_INTEL)
237 		nvme_ctrlr_set_intel_supported_features(ctrlr);
238 }
239 
240 /*
241  * Initialize I/O queue pairs.
242  */
nvme_ctrlr_init_io_qpairs(struct nvme_ctrlr * ctrlr)243 static int nvme_ctrlr_init_io_qpairs(struct nvme_ctrlr *ctrlr)
244 {
245 	struct nvme_qpair *qpair;
246 	union nvme_cap_register	cap;
247 	uint32_t i;
248 
249 	if (ctrlr->ioq != NULL)
250 		/*
251 		 * io_qpairs were already constructed, so just return.
252 		 * This typically happens when the controller is
253 		 * initialized a second (or subsequent) time after a
254 		 * controller reset.
255 		 */
256 		return 0;
257 
258 	/*
259 	 * NVMe spec sets a hard limit of 64K max entries, but
260 	 * devices may specify a smaller limit, so we need to check
261 	 * the MQES field in the capabilities register.
262 	 */
263 	cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw);
264 	ctrlr->io_qpairs_max_entries =
265 		nvme_min(NVME_IO_ENTRIES, (unsigned int)cap.bits.mqes + 1);
266 
267 	ctrlr->ioq = calloc(ctrlr->io_queues, sizeof(struct nvme_qpair));
268 	if (!ctrlr->ioq)
269 		return ENOMEM;
270 
271 	/* Keep queue pair ID 0 for the admin queue */
272 	for (i = 0; i < ctrlr->io_queues; i++) {
273 		qpair = &ctrlr->ioq[i];
274 		qpair->id = i + 1;
275 		TAILQ_INSERT_TAIL(&ctrlr->free_io_qpairs, qpair, tailq);
276 	}
277 
278 	return 0;
279 }
280 
281 /*
282  * Shutdown a controller.
283  */
nvme_ctrlr_shutdown(struct nvme_ctrlr * ctrlr)284 static void nvme_ctrlr_shutdown(struct nvme_ctrlr *ctrlr)
285 {
286 	union nvme_cc_register	cc;
287 	union nvme_csts_register csts;
288 	int ms_waited = 0;
289 
290 	cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw);
291 	cc.bits.shn = NVME_SHN_NORMAL;
292 	nvme_reg_mmio_write_4(ctrlr, cc.raw, cc.raw);
293 
294 	csts.raw = nvme_reg_mmio_read_4(ctrlr, csts.raw);
295 	/*
296 	 * The NVMe spec does not define a timeout period for shutdown
297 	 * notification, so we just pick 5 seconds as a reasonable amount
298 	 * of time to wait before proceeding.
299 	 */
300 #define NVME_CTRLR_SHUTDOWN_TIMEOUT 5000
301 	while (csts.bits.shst != NVME_SHST_COMPLETE) {
302 		nvme_usleep(1000);
303 		csts.raw = nvme_reg_mmio_read_4(ctrlr, csts.raw);
304 		if (ms_waited++ >= NVME_CTRLR_SHUTDOWN_TIMEOUT)
305 			break;
306 	}
307 
308 	if (csts.bits.shst != NVME_SHST_COMPLETE)
309 		nvme_err("Controller did not shutdown within %d seconds\n",
310 			 NVME_CTRLR_SHUTDOWN_TIMEOUT / 1000);
311 }
312 
313 /*
314  * Enable a controller.
315  */
nvme_ctrlr_enable(struct nvme_ctrlr * ctrlr)316 static int nvme_ctrlr_enable(struct nvme_ctrlr *ctrlr)
317 {
318 	union nvme_cc_register	cc;
319 	union nvme_aqa_register	aqa;
320 	union nvme_cap_register	cap;
321 
322 	cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw);
323 
324 	if (cc.bits.en != 0) {
325 		nvme_err("COntroller enable called with CC.EN = 1\n");
326 		return EINVAL;
327 	}
328 
329 	nvme_reg_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr);
330 	nvme_reg_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr);
331 
332 	aqa.raw = 0;
333 	/* acqs and asqs are 0-based. */
334 	aqa.bits.acqs = ctrlr->adminq.entries - 1;
335 	aqa.bits.asqs = ctrlr->adminq.entries - 1;
336 	nvme_reg_mmio_write_4(ctrlr, aqa.raw, aqa.raw);
337 
338 	cc.bits.en = 1;
339 	cc.bits.css = 0;
340 	cc.bits.shn = 0;
341 	cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
342 	cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
343 
344 	/* Page size is 2 ^ (12 + mps). */
345 	cc.bits.mps = PAGE_SHIFT - 12;
346 
347 	cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw);
348 
349 	switch (ctrlr->opts.arb_mechanism) {
350 	case NVME_CC_AMS_RR:
351 		break;
352 	case NVME_CC_AMS_WRR:
353 		if (NVME_CAP_AMS_WRR & cap.bits.ams)
354 			break;
355 		return EINVAL;
356 	case NVME_CC_AMS_VS:
357 		if (NVME_CAP_AMS_VS & cap.bits.ams)
358 			break;
359 		return EINVAL;
360 	default:
361 		return EINVAL;
362 	}
363 
364 	cc.bits.ams = ctrlr->opts.arb_mechanism;
365 
366 	nvme_reg_mmio_write_4(ctrlr, cc.raw, cc.raw);
367 
368 	return 0;
369 }
370 
371 /*
372  * Disable a controller.
373  */
nvme_ctrlr_disable(struct nvme_ctrlr * ctrlr)374 static inline void nvme_ctrlr_disable(struct nvme_ctrlr *ctrlr)
375 {
376 	union nvme_cc_register cc;
377 
378 	cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw);
379 	cc.bits.en = 0;
380 
381 	nvme_reg_mmio_write_4(ctrlr, cc.raw, cc.raw);
382 }
383 
384 /*
385  * Test if a controller is enabled.
386  */
nvme_ctrlr_enabled(struct nvme_ctrlr * ctrlr)387 static inline int nvme_ctrlr_enabled(struct nvme_ctrlr *ctrlr)
388 {
389 	union nvme_cc_register cc;
390 
391 	cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw);
392 
393 	return cc.bits.en;
394 }
395 
396 /*
397  * Test if a controller is ready.
398  */
nvme_ctrlr_ready(struct nvme_ctrlr * ctrlr)399 static inline int nvme_ctrlr_ready(struct nvme_ctrlr *ctrlr)
400 {
401 	union nvme_csts_register csts;
402 
403 	csts.raw = nvme_reg_mmio_read_4(ctrlr, csts.raw);
404 
405 	return csts.bits.rdy;
406 }
407 
408 /*
409  * Set a controller state.
410  */
nvme_ctrlr_set_state(struct nvme_ctrlr * ctrlr,enum nvme_ctrlr_state state,uint64_t timeout_in_ms)411 static void nvme_ctrlr_set_state(struct nvme_ctrlr *ctrlr,
412 				 enum nvme_ctrlr_state state,
413 				 uint64_t timeout_in_ms)
414 {
415 	ctrlr->state = state;
416 	if (timeout_in_ms == NVME_TIMEOUT_INFINITE)
417 		ctrlr->state_timeout_ms = NVME_TIMEOUT_INFINITE;
418 	else
419 		ctrlr->state_timeout_ms = nvme_time_msec() + timeout_in_ms;
420 }
421 
422 /*
423  * Get a controller data.
424  */
nvme_ctrlr_identify(struct nvme_ctrlr * ctrlr)425 static int nvme_ctrlr_identify(struct nvme_ctrlr *ctrlr)
426 {
427 	int ret;
428 
429 	ret = nvme_admin_identify_ctrlr(ctrlr, &ctrlr->cdata);
430 	if (ret != 0) {
431 		nvme_notice("Identify controller failed\n");
432 		return ret;
433 	}
434 
435 	/*
436 	 * Use MDTS to ensure our default max_xfer_size doesn't
437 	 * exceed what the controller supports.
438 	 */
439 	if (ctrlr->cdata.mdts > 0)
440 		ctrlr->max_xfer_size = nvme_min(ctrlr->max_xfer_size,
441 						ctrlr->min_page_size
442 						* (1 << (ctrlr->cdata.mdts)));
443 	return 0;
444 }
445 
446 /*
447  * Set the number of I/O queue pairs.
448  */
nvme_ctrlr_get_max_io_qpairs(struct nvme_ctrlr * ctrlr)449 static int nvme_ctrlr_get_max_io_qpairs(struct nvme_ctrlr *ctrlr)
450 {
451 	unsigned int cdw0, cq_allocated, sq_allocated;
452 	int ret;
453 
454 	ret = nvme_admin_get_feature(ctrlr, NVME_FEAT_CURRENT,
455 				     NVME_FEAT_NUMBER_OF_QUEUES,
456 				     0, &cdw0);
457 	if (ret != 0) {
458 		nvme_notice("Get feature NVME_FEAT_NUMBER_OF_QUEUES failed\n");
459 		return ret;
460 	}
461 
462 	/*
463 	 * Data in cdw0 is 0-based.
464 	 * Lower 16-bits indicate number of submission queues allocated.
465 	 * Upper 16-bits indicate number of completion queues allocated.
466 	 */
467 	sq_allocated = (cdw0 & 0xFFFF) + 1;
468 	cq_allocated = (cdw0 >> 16) + 1;
469 
470 	ctrlr->max_io_queues = nvme_min(sq_allocated, cq_allocated);
471 
472 	return 0;
473 }
474 
475 /*
476  * Set the number of I/O queue pairs.
477  */
nvme_ctrlr_set_num_qpairs(struct nvme_ctrlr * ctrlr)478 static int nvme_ctrlr_set_num_qpairs(struct nvme_ctrlr *ctrlr)
479 {
480 	unsigned int num_queues, cdw0;
481 	unsigned int cq_allocated, sq_allocated;
482 	int ret;
483 
484 	ret = nvme_ctrlr_get_max_io_qpairs(ctrlr);
485 	if (ret != 0) {
486 		nvme_notice("Failed to get the maximum of I/O qpairs\n");
487 		return ret;
488 	}
489 
490 	/*
491 	 * Format number of I/O queue:
492 	 * Remove 1 as it as be be 0-based,
493 	 * bits 31:16 represent the number of completion queues,
494 	 * bits 0:15 represent the number of submission queues
495 	*/
496 	num_queues = ((ctrlr->opts.io_queues - 1) << 16) |
497 		(ctrlr->opts.io_queues - 1);
498 
499 	/*
500 	 * Set the number of I/O queues.
501 	 * Note: The value allocated may be smaller or larger than the number
502 	 * of queues requested (see specifications).
503 	 */
504 	ret = nvme_admin_set_feature(ctrlr, false, NVME_FEAT_NUMBER_OF_QUEUES,
505 				     num_queues, 0, &cdw0);
506 	if (ret != 0) {
507 		nvme_notice("Set feature NVME_FEAT_NUMBER_OF_QUEUES failed\n");
508 		return ret;
509 	}
510 
511 	/*
512 	 * Data in cdw0 is 0-based.
513 	 * Lower 16-bits indicate number of submission queues allocated.
514 	 * Upper 16-bits indicate number of completion queues allocated.
515 	 */
516 	sq_allocated = (cdw0 & 0xFFFF) + 1;
517 	cq_allocated = (cdw0 >> 16) + 1;
518 	ctrlr->io_queues = nvme_min(sq_allocated, cq_allocated);
519 
520 	/*
521 	 * Make sure the number of constructed qpair listed in free_io_qpairs
522 	 * will not be more than the requested one.
523 	 */
524 	ctrlr->io_queues = nvme_min(ctrlr->io_queues, ctrlr->opts.io_queues);
525 
526 	return 0;
527 }
528 
nvme_ctrlr_destruct_namespaces(struct nvme_ctrlr * ctrlr)529 static void nvme_ctrlr_destruct_namespaces(struct nvme_ctrlr *ctrlr)
530 {
531 
532 	if (ctrlr->ns) {
533 		free(ctrlr->ns);
534 		ctrlr->ns = NULL;
535 		ctrlr->nr_ns = 0;
536 	}
537 
538 	if (ctrlr->nsdata) {
539 		nvme_free(ctrlr->nsdata);
540 		ctrlr->nsdata = NULL;
541 	}
542 }
543 
nvme_ctrlr_construct_namespaces(struct nvme_ctrlr * ctrlr)544 static int nvme_ctrlr_construct_namespaces(struct nvme_ctrlr *ctrlr)
545 {
546 	unsigned int i, nr_ns = ctrlr->cdata.nn;
547 	struct nvme_ns *ns = NULL;
548 
549 	/*
550 	 * ctrlr->nr_ns may be 0 (startup) or a different number of
551 	 * namespaces (reset), so check if we need to reallocate.
552 	 */
553 	if (nr_ns != ctrlr->nr_ns) {
554 
555 		nvme_ctrlr_destruct_namespaces(ctrlr);
556 
557 		ctrlr->ns = calloc(nr_ns, sizeof(struct nvme_ns));
558 		if (!ctrlr->ns)
559 			goto fail;
560 
561 		nvme_debug("Allocate %u namespace data\n", nr_ns);
562 		ctrlr->nsdata = nvme_calloc(nr_ns, sizeof(struct nvme_ns_data),
563 					    PAGE_SIZE);
564 		if (!ctrlr->nsdata)
565 			goto fail;
566 
567 		ctrlr->nr_ns = nr_ns;
568 
569 	}
570 
571 	for (i = 0; i < nr_ns; i++) {
572 		ns = &ctrlr->ns[i];
573 		if (nvme_ns_construct(ctrlr, ns, i + 1) != 0)
574 			goto fail;
575 	}
576 
577 	return 0;
578 
579 fail:
580 	nvme_ctrlr_destruct_namespaces(ctrlr);
581 
582 	return -1;
583 }
584 
585 /*
586  * Forward declaration.
587  */
588 static int nvme_ctrlr_construct_and_submit_aer(struct nvme_ctrlr *ctrlr,
589 				struct nvme_async_event_request *aer);
590 
591 /*
592  * Async event completion callback.
593  */
nvme_ctrlr_async_event_cb(void * arg,const struct nvme_cpl * cpl)594 static void nvme_ctrlr_async_event_cb(void *arg, const struct nvme_cpl *cpl)
595 {
596 	struct nvme_async_event_request	*aer = arg;
597 	struct nvme_ctrlr *ctrlr = aer->ctrlr;
598 
599 	if (cpl->status.sc == NVME_SC_ABORTED_SQ_DELETION)
600 		/*
601 		 *  This is simulated when controller is being shut down, to
602 		 *  effectively abort outstanding asynchronous event requests
603 		 *  and make sure all memory is freed. Do not repost the
604 		 *  request in this case.
605 		 */
606 		return;
607 
608 	if (ctrlr->aer_cb_fn != NULL)
609 		ctrlr->aer_cb_fn(ctrlr->aer_cb_arg, cpl);
610 
611 	/*
612 	 * Repost another asynchronous event request to replace
613 	 * the one that just completed.
614 	 */
615 	if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer))
616 		/*
617 		 * We can't do anything to recover from a failure here,
618 		 * so just print a warning message and leave the
619 		 * AER unsubmitted.
620 		 */
621 		nvme_err("Initialize AER failed\n");
622 }
623 
624 /*
625  * Issue an async event request.
626  */
nvme_ctrlr_construct_and_submit_aer(struct nvme_ctrlr * ctrlr,struct nvme_async_event_request * aer)627 static int nvme_ctrlr_construct_and_submit_aer(struct nvme_ctrlr *ctrlr,
628 					       struct nvme_async_event_request *aer)
629 {
630 	struct nvme_request *req;
631 
632 	req = nvme_request_allocate_null(&ctrlr->adminq,
633 					 nvme_ctrlr_async_event_cb, aer);
634 	if (req == NULL)
635 		return -1;
636 
637 	aer->ctrlr = ctrlr;
638 	aer->req = req;
639 	req->cmd.opc = NVME_OPC_ASYNC_EVENT_REQUEST;
640 
641 	return nvme_qpair_submit_request(&ctrlr->adminq, req);
642 }
643 
644 /*
645  * Configure async event management.
646  */
nvme_ctrlr_configure_aer(struct nvme_ctrlr * ctrlr)647 static int nvme_ctrlr_configure_aer(struct nvme_ctrlr *ctrlr)
648 {
649 	union nvme_critical_warning_state state;
650 	struct nvme_async_event_request	*aer;
651 	unsigned int i;
652 	int ret;
653 
654 	state.raw = 0xFF;
655 	state.bits.reserved = 0;
656 
657 	ret =  nvme_admin_set_feature(ctrlr, false,
658 				      NVME_FEAT_ASYNC_EVENT_CONFIGURATION,
659 				      state.raw, 0, NULL);
660 	if (ret != 0) {
661 		nvme_notice("Set feature ASYNC_EVENT_CONFIGURATION failed\n");
662 		return ret;
663 	}
664 
665 	/* aerl is a zero-based value, so we need to add 1 here. */
666 	ctrlr->num_aers = nvme_min(NVME_MAX_ASYNC_EVENTS,
667 				   (ctrlr->cdata.aerl + 1));
668 
669 	for (i = 0; i < ctrlr->num_aers; i++) {
670 		aer = &ctrlr->aer[i];
671 		if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
672 			nvme_notice("Construct AER failed\n");
673 			return -1;
674 		}
675 	}
676 
677 	return 0;
678 }
679 
680 /*
681  * Start a controller.
682  */
nvme_ctrlr_start(struct nvme_ctrlr * ctrlr)683 static int nvme_ctrlr_start(struct nvme_ctrlr *ctrlr)
684 {
685 
686 	nvme_qpair_reset(&ctrlr->adminq);
687 	nvme_qpair_enable(&ctrlr->adminq);
688 
689 	if (nvme_ctrlr_identify(ctrlr) != 0)
690 		return -1;
691 
692 	if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0)
693 		return -1;
694 
695 	if (nvme_ctrlr_init_io_qpairs(ctrlr))
696 		return -1;
697 
698 	if (nvme_ctrlr_construct_namespaces(ctrlr) != 0)
699 		return -1;
700 
701 	if (nvme_ctrlr_configure_aer(ctrlr) != 0)
702 		nvme_warning("controller does not support AER!\n");
703 
704 	nvme_ctrlr_set_supported_log_pages(ctrlr);
705 	nvme_ctrlr_set_supported_features(ctrlr);
706 
707 	if (ctrlr->cdata.sgls.supported)
708 		ctrlr->flags |= NVME_CTRLR_SGL_SUPPORTED;
709 
710 	return 0;
711 }
712 
713 /*
714  * Memory map the controller side buffer.
715  */
nvme_ctrlr_map_cmb(struct nvme_ctrlr * ctrlr)716 static void nvme_ctrlr_map_cmb(struct nvme_ctrlr *ctrlr)
717 {
718 	int ret;
719 	void *addr;
720 	uint32_t bir;
721 	union nvme_cmbsz_register cmbsz;
722 	union nvme_cmbloc_register cmbloc;
723 	uint64_t size, unit_size, offset, bar_size, bar_phys_addr;
724 
725 	cmbsz.raw = nvme_reg_mmio_read_4(ctrlr, cmbsz.raw);
726 	cmbloc.raw = nvme_reg_mmio_read_4(ctrlr, cmbloc.raw);
727 	if (!cmbsz.bits.sz)
728 		goto out;
729 
730 	/* Values 0 2 3 4 5 are valid for BAR */
731 	bir = cmbloc.bits.bir;
732 	if (bir > 5 || bir == 1)
733 		goto out;
734 
735 	/* unit size for 4KB/64KB/1MB/16MB/256MB/4GB/64GB */
736 	unit_size = (uint64_t)1 << (12 + 4 * cmbsz.bits.szu);
737 
738 	/* controller memory buffer size in Bytes */
739 	size = unit_size * cmbsz.bits.sz;
740 
741 	/* controller memory buffer offset from BAR in Bytes */
742 	offset = unit_size * cmbloc.bits.ofst;
743 
744 	nvme_pcicfg_get_bar_addr_len(ctrlr->pci_dev, bir, &bar_phys_addr,
745 				     &bar_size);
746 
747 	if (offset > bar_size)
748 		goto out;
749 
750 	if (size > bar_size - offset)
751 		goto out;
752 
753 	ret = nvme_pcicfg_map_bar_write_combine(ctrlr->pci_dev, bir, &addr);
754 	if ((ret != 0) || addr == NULL)
755 		goto out;
756 
757 	ctrlr->cmb_bar_virt_addr = addr;
758 	ctrlr->cmb_bar_phys_addr = bar_phys_addr;
759 	ctrlr->cmb_size = size;
760 	ctrlr->cmb_current_offset = offset;
761 
762 	if (!cmbsz.bits.sqs)
763 		ctrlr->opts.use_cmb_sqs = false;
764 
765 	return;
766 
767 out:
768 	ctrlr->cmb_bar_virt_addr = NULL;
769 	ctrlr->opts.use_cmb_sqs = false;
770 
771 	return;
772 }
773 
774 /*
775  * Unmap the controller side buffer.
776  */
nvme_ctrlr_unmap_cmb(struct nvme_ctrlr * ctrlr)777 static int nvme_ctrlr_unmap_cmb(struct nvme_ctrlr *ctrlr)
778 {
779 	union nvme_cmbloc_register cmbloc;
780 	void *addr = ctrlr->cmb_bar_virt_addr;
781 	int ret = 0;
782 
783 	if (addr) {
784 		cmbloc.raw = nvme_reg_mmio_read_4(ctrlr, cmbloc.raw);
785 		ret = nvme_pcicfg_unmap_bar(ctrlr->pci_dev, cmbloc.bits.bir,
786 					    addr);
787 	}
788 	return ret;
789 }
790 
791 /*
792  * Map the controller PCI bars.
793  */
nvme_ctrlr_map_bars(struct nvme_ctrlr * ctrlr)794 static int nvme_ctrlr_map_bars(struct nvme_ctrlr *ctrlr)
795 {
796 	void *addr;
797 	int ret;
798 
799 	ret = nvme_pcicfg_map_bar(ctrlr->pci_dev, 0, 0, &addr);
800 	if (ret != 0 || addr == NULL) {
801 		nvme_err("Map PCI device bar failed %d (%s)\n",
802 			 ret, strerror(ret));
803 		return ret;
804 	}
805 
806 	nvme_debug("Controller BAR mapped at %p\n", addr);
807 
808 	ctrlr->regs = (volatile struct nvme_registers *)addr;
809 	nvme_ctrlr_map_cmb(ctrlr);
810 
811 	return 0;
812 }
813 
814 /*
815  * Unmap the controller PCI bars.
816  */
nvme_ctrlr_unmap_bars(struct nvme_ctrlr * ctrlr)817 static int nvme_ctrlr_unmap_bars(struct nvme_ctrlr *ctrlr)
818 {
819 	void *addr = (void *)ctrlr->regs;
820 	int ret;
821 
822 	ret = nvme_ctrlr_unmap_cmb(ctrlr);
823 	if (ret != 0) {
824 		nvme_err("Unmap controller side buffer failed %d\n", ret);
825 		return ret;
826 	}
827 
828 	if (addr) {
829 		ret = nvme_pcicfg_unmap_bar(ctrlr->pci_dev, 0, addr);
830 		if (ret != 0) {
831 			nvme_err("Unmap PCI device bar failed %d\n", ret);
832 			return ret;
833 		}
834 	}
835 
836 	return 0;
837 }
838 
839 /*
840  * Set a controller in the failed state.
841  */
nvme_ctrlr_fail(struct nvme_ctrlr * ctrlr)842 static void nvme_ctrlr_fail(struct nvme_ctrlr *ctrlr)
843 {
844 	unsigned int i;
845 
846 	ctrlr->failed = true;
847 
848 	nvme_qpair_fail(&ctrlr->adminq);
849 	if (ctrlr->ioq)
850 		for (i = 0; i < ctrlr->io_queues; i++)
851 			nvme_qpair_fail(&ctrlr->ioq[i]);
852 }
853 
854 /*
855  * This function will be called repeatedly during initialization
856  * until the controller is ready.
857  */
nvme_ctrlr_init(struct nvme_ctrlr * ctrlr)858 static int nvme_ctrlr_init(struct nvme_ctrlr *ctrlr)
859 {
860 	unsigned int ready_timeout_in_ms = nvme_ctrlr_get_ready_to_in_ms(ctrlr);
861 	int ret;
862 
863 	/*
864 	 * Check if the current initialization step is done or has timed out.
865 	 */
866 	switch (ctrlr->state) {
867 
868 	case NVME_CTRLR_STATE_INIT:
869 
870 		/* Begin the hardware initialization by making
871 		 * sure the controller is disabled. */
872 		if (nvme_ctrlr_enabled(ctrlr)) {
873 			/*
874 			 * Disable the controller to cause a reset.
875 			 */
876 			if (!nvme_ctrlr_ready(ctrlr)) {
877 				/* Wait for the controller to be ready */
878 				nvme_ctrlr_set_state(ctrlr,
879 				      NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1,
880 				      ready_timeout_in_ms);
881 				return 0;
882 			}
883 
884 			/*
885 			 * The controller is enabled and ready.
886 			 * It can be immediatly disabled
887 			 */
888 			nvme_ctrlr_disable(ctrlr);
889 			nvme_ctrlr_set_state(ctrlr,
890 				      NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
891 				      ready_timeout_in_ms);
892 
893 			if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
894 				nvme_msleep(2000);
895 
896 			return 0;
897 		}
898 
899 		if (nvme_ctrlr_ready(ctrlr)) {
900 			/*
901 			 * Controller is in the process of shutting down.
902 			 * We need to wait for CSTS.RDY to become 0.
903 			 */
904 			nvme_ctrlr_set_state(ctrlr,
905 				      NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
906 				      ready_timeout_in_ms);
907 			return 0;
908 		}
909 
910 		/*
911 		 * Controller is currently disabled.
912 		 * We can jump straight to enabling it.
913 		 */
914 		ret = nvme_ctrlr_enable(ctrlr);
915 		if (ret)
916 			nvme_err("Enable controller failed\n");
917 		else
918 			nvme_ctrlr_set_state(ctrlr,
919 				       NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1,
920 				       ready_timeout_in_ms);
921 		return ret;
922 
923 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
924 
925 		if (nvme_ctrlr_ready(ctrlr)) {
926 			/* CC.EN = 1 && CSTS.RDY = 1,
927 			 * so we can disable the controller now. */
928 			nvme_ctrlr_disable(ctrlr);
929 			nvme_ctrlr_set_state(ctrlr,
930 				      NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
931 				      ready_timeout_in_ms);
932 			return 0;
933 		}
934 
935 		break;
936 
937 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
938 
939 		if (!nvme_ctrlr_ready(ctrlr)) {
940 			/* CC.EN = 0 && CSTS.RDY = 0,
941 			 * so we can enable the controller now. */
942 			ret = nvme_ctrlr_enable(ctrlr);
943 			if (ret)
944 				nvme_err("Enable controller failed\n");
945 			else
946 				nvme_ctrlr_set_state(ctrlr,
947 				       NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1,
948 				       ready_timeout_in_ms);
949 			return ret;
950 		}
951 		break;
952 
953 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
954 
955 		if (nvme_ctrlr_ready(ctrlr)) {
956 			if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_RDY)
957 				nvme_msleep(2000);
958 
959 			ret = nvme_ctrlr_start(ctrlr);
960 			if (ret)
961 				nvme_err("Start controller failed\n");
962 			else
963 				nvme_ctrlr_set_state(ctrlr,
964 						     NVME_CTRLR_STATE_READY,
965 						     NVME_TIMEOUT_INFINITE);
966 			return ret;
967 		}
968 		break;
969 
970 	default:
971 		nvme_panic("Unhandled ctrlr state %d\n", ctrlr->state);
972 		nvme_ctrlr_fail(ctrlr);
973 		return -1;
974 	}
975 
976 	if ((ctrlr->state_timeout_ms != NVME_TIMEOUT_INFINITE) &&
977 	    (nvme_time_msec() > ctrlr->state_timeout_ms)) {
978 		nvme_err("Initialization timed out in state %d\n",
979 			 ctrlr->state);
980 		nvme_ctrlr_fail(ctrlr);
981 		return -1;
982 	}
983 
984 	return 0;
985 }
986 
987 /*
988  * Reset a controller.
989  */
nvme_ctrlr_reset(struct nvme_ctrlr * ctrlr)990 static int nvme_ctrlr_reset(struct nvme_ctrlr *ctrlr)
991 {
992 	struct nvme_qpair *qpair;
993 	unsigned int i;
994 
995 	if (ctrlr->resetting || ctrlr->failed)
996 		/*
997 		 * Controller is already resetting or has failed. Return
998 		 * immediately since there is no need to kick off another
999 		 * reset in these cases.
1000 		 */
1001 		return 0;
1002 
1003 	ctrlr->resetting = true;
1004 
1005 	/* Disable all queues before disabling the controller hardware. */
1006 	nvme_qpair_disable(&ctrlr->adminq);
1007 	for (i = 0; i < ctrlr->io_queues; i++)
1008 		nvme_qpair_disable(&ctrlr->ioq[i]);
1009 
1010 	/* Set the state back to INIT to cause a full hardware reset. */
1011 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT,
1012 			     NVME_TIMEOUT_INFINITE);
1013 
1014 	while (ctrlr->state != NVME_CTRLR_STATE_READY) {
1015 		if (nvme_ctrlr_init(ctrlr) != 0) {
1016 			nvme_crit("Controller reset failed\n");
1017 			nvme_ctrlr_fail(ctrlr);
1018 			goto out;
1019 		}
1020 	}
1021 
1022 	/* Reinitialize qpairs */
1023 	TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
1024 		if (nvme_ctrlr_create_qpair(ctrlr, qpair) != 0)
1025 			nvme_ctrlr_fail(ctrlr);
1026 	}
1027 
1028 out:
1029 	ctrlr->resetting = false;
1030 
1031 	return ctrlr->failed ? -1 : 0;
1032 }
1033 
1034 /*
1035  * Set a controller options.
1036  */
nvme_ctrlr_set_opts(struct nvme_ctrlr * ctrlr,struct nvme_ctrlr_opts * opts)1037 static void nvme_ctrlr_set_opts(struct nvme_ctrlr *ctrlr,
1038 				struct nvme_ctrlr_opts *opts)
1039 {
1040 	if (opts)
1041 		memcpy(&ctrlr->opts, opts, sizeof(struct nvme_ctrlr_opts));
1042 	else
1043 		memset(&ctrlr->opts, 0, sizeof(struct nvme_ctrlr_opts));
1044 
1045 	if (ctrlr->opts.io_queues == 0)
1046 		ctrlr->opts.io_queues = DEFAULT_MAX_IO_QUEUES;
1047 
1048 	if (ctrlr->opts.io_queues > NVME_MAX_IO_QUEUES) {
1049 		nvme_info("Limiting requested I/O queues %u to %d\n",
1050 			  ctrlr->opts.io_queues, NVME_MAX_IO_QUEUES);
1051 		ctrlr->opts.io_queues = NVME_MAX_IO_QUEUES;
1052 	}
1053 }
1054 
1055 /*
1056  * Attach a PCI controller.
1057  */
1058 struct nvme_ctrlr *
nvme_ctrlr_attach(struct pci_device * pci_dev,struct nvme_ctrlr_opts * opts)1059 nvme_ctrlr_attach(struct pci_device *pci_dev,
1060 		  struct nvme_ctrlr_opts *opts)
1061 {
1062 	struct nvme_ctrlr *ctrlr;
1063 	union nvme_cap_register	cap;
1064 	uint32_t cmd_reg;
1065 	int ret;
1066 
1067 	/* Get a new controller handle */
1068 	ctrlr = malloc(sizeof(struct nvme_ctrlr));
1069 	if (!ctrlr) {
1070 		nvme_err("Allocate controller handle failed\n");
1071 		return NULL;
1072 	}
1073 
1074 	nvme_debug("New controller handle %p\n", ctrlr);
1075 
1076 	/* Initialize the handle */
1077 	memset(ctrlr, 0, sizeof(struct nvme_ctrlr));
1078 	ctrlr->pci_dev = pci_dev;
1079 	ctrlr->resetting = false;
1080 	ctrlr->failed = false;
1081 	TAILQ_INIT(&ctrlr->free_io_qpairs);
1082 	TAILQ_INIT(&ctrlr->active_io_qpairs);
1083 	pthread_mutex_init(&ctrlr->lock, NULL);
1084 	ctrlr->quirks = nvme_ctrlr_get_quirks(pci_dev);
1085 
1086 	nvme_ctrlr_set_state(ctrlr,
1087 			     NVME_CTRLR_STATE_INIT,
1088 			     NVME_TIMEOUT_INFINITE);
1089 
1090 	ret = nvme_ctrlr_map_bars(ctrlr);
1091 	if (ret != 0) {
1092 		nvme_err("Map controller BAR failed\n");
1093 		pthread_mutex_destroy(&ctrlr->lock);
1094 		free(ctrlr);
1095 		return NULL;
1096 	}
1097 
1098 	/* Enable PCI busmaster and disable INTx */
1099 	nvme_pcicfg_read32(pci_dev, &cmd_reg, 4);
1100 	cmd_reg |= 0x0404;
1101 	nvme_pcicfg_write32(pci_dev, cmd_reg, 4);
1102 
1103 	/*
1104 	 * Doorbell stride is 2 ^ (dstrd + 2),
1105 	 * but we want multiples of 4, so drop the + 2.
1106 	 */
1107 	cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw);
1108 	ctrlr->doorbell_stride_u32 = 1 << cap.bits.dstrd;
1109 	ctrlr->min_page_size = 1 << (12 + cap.bits.mpsmin);
1110 
1111 	/* Set default transfer size */
1112 	ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
1113 
1114 	/* Create the admin queue pair */
1115 	ret = nvme_qpair_construct(ctrlr, &ctrlr->adminq, 0,
1116 				   NVME_ADMIN_ENTRIES, NVME_ADMIN_TRACKERS);
1117 	if (ret != 0) {
1118 		nvme_err("Initialize admin queue pair failed\n");
1119 		goto err;
1120 	}
1121 
1122 	/* Set options and then initialize */
1123 	nvme_ctrlr_set_opts(ctrlr, opts);
1124 	do {
1125 		ret = nvme_ctrlr_init(ctrlr);
1126 		if (ret)
1127 			goto err;
1128 	} while (ctrlr->state != NVME_CTRLR_STATE_READY);
1129 
1130 	return ctrlr;
1131 
1132 err:
1133 	nvme_ctrlr_detach(ctrlr);
1134 
1135 	return NULL;
1136 }
1137 
1138 /*
1139  * Detach a PCI controller.
1140  */
nvme_ctrlr_detach(struct nvme_ctrlr * ctrlr)1141 void nvme_ctrlr_detach(struct nvme_ctrlr *ctrlr)
1142 {
1143 	struct nvme_qpair *qpair;
1144 	uint32_t i;
1145 
1146 	while (!TAILQ_EMPTY(&ctrlr->active_io_qpairs)) {
1147 		qpair = TAILQ_FIRST(&ctrlr->active_io_qpairs);
1148 		nvme_ioqp_release(qpair);
1149 	}
1150 
1151 	nvme_ctrlr_shutdown(ctrlr);
1152 
1153 	nvme_ctrlr_destruct_namespaces(ctrlr);
1154 	if (ctrlr->ioq) {
1155 		for (i = 0; i < ctrlr->io_queues; i++)
1156 			nvme_qpair_destroy(&ctrlr->ioq[i]);
1157 		free(ctrlr->ioq);
1158 	}
1159 
1160 	nvme_qpair_destroy(&ctrlr->adminq);
1161 
1162 	nvme_ctrlr_unmap_bars(ctrlr);
1163 
1164 	pthread_mutex_destroy(&ctrlr->lock);
1165 	free(ctrlr);
1166 }
1167 
1168 /*
1169  * Get a controller feature.
1170  */
nvme_ctrlr_get_feature(struct nvme_ctrlr * ctrlr,enum nvme_feat_sel sel,enum nvme_feat feature,uint32_t cdw11,uint32_t * attributes)1171 int nvme_ctrlr_get_feature(struct nvme_ctrlr *ctrlr,
1172 			   enum nvme_feat_sel sel, enum nvme_feat feature,
1173 			   uint32_t cdw11,
1174 			   uint32_t *attributes)
1175 {
1176 	int ret;
1177 
1178 	pthread_mutex_lock(&ctrlr->lock);
1179 
1180 	ret = nvme_admin_get_feature(ctrlr, sel, feature, cdw11, attributes);
1181 	if (ret != 0)
1182 		nvme_notice("Get feature 0x%08x failed\n",
1183 			    (unsigned int) feature);
1184 
1185 	pthread_mutex_unlock(&ctrlr->lock);
1186 
1187 	return ret;
1188 }
1189 
1190 /*
1191  * Set a controller feature.
1192  */
nvme_ctrlr_set_feature(struct nvme_ctrlr * ctrlr,bool save,enum nvme_feat feature,uint32_t cdw11,uint32_t cdw12,uint32_t * attributes)1193 int nvme_ctrlr_set_feature(struct nvme_ctrlr *ctrlr,
1194 			   bool save, enum nvme_feat feature,
1195 			   uint32_t cdw11, uint32_t cdw12,
1196 			   uint32_t *attributes)
1197 {
1198 	int ret;
1199 
1200 	pthread_mutex_lock(&ctrlr->lock);
1201 
1202 	ret = nvme_admin_set_feature(ctrlr, save, feature,
1203 				     cdw11, cdw12, attributes);
1204 	if (ret != 0)
1205 		nvme_notice("Set feature 0x%08x failed\n",
1206 			    (unsigned int) feature);
1207 
1208 	pthread_mutex_unlock(&ctrlr->lock);
1209 
1210 	return ret;
1211 }
1212 
1213 /*
1214  * Attach a namespace.
1215  */
nvme_ctrlr_attach_ns(struct nvme_ctrlr * ctrlr,unsigned int nsid,struct nvme_ctrlr_list * clist)1216 int nvme_ctrlr_attach_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid,
1217 			 struct nvme_ctrlr_list *clist)
1218 {
1219 	int ret;
1220 
1221 	pthread_mutex_lock(&ctrlr->lock);
1222 
1223 	ret = nvme_admin_attach_ns(ctrlr, nsid, clist);
1224 	if (ret) {
1225 		nvme_notice("Attach namespace %u failed\n", nsid);
1226 		goto out;
1227 	}
1228 
1229 	ret = nvme_ctrlr_reset(ctrlr);
1230 	if (ret != 0)
1231 		nvme_notice("Reset controller failed\n");
1232 
1233 out:
1234 	pthread_mutex_unlock(&ctrlr->lock);
1235 
1236 	return ret;
1237 }
1238 
1239 /*
1240  * Detach a namespace.
1241  */
nvme_ctrlr_detach_ns(struct nvme_ctrlr * ctrlr,unsigned int nsid,struct nvme_ctrlr_list * clist)1242 int nvme_ctrlr_detach_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid,
1243 			 struct nvme_ctrlr_list *clist)
1244 {
1245 	int ret;
1246 
1247 	pthread_mutex_lock(&ctrlr->lock);
1248 
1249 	ret = nvme_admin_detach_ns(ctrlr, nsid, clist);
1250 	if (ret != 0) {
1251 		nvme_notice("Detach namespace %u failed\n", nsid);
1252 		goto out;
1253 	}
1254 
1255 	ret = nvme_ctrlr_reset(ctrlr);
1256 	if (ret)
1257 		nvme_notice("Reset controller failed\n");
1258 
1259 out:
1260 	pthread_mutex_unlock(&ctrlr->lock);
1261 
1262 	return ret;
1263 }
1264 
1265 /*
1266  * Create a namespace.
1267  */
nvme_ctrlr_create_ns(struct nvme_ctrlr * ctrlr,struct nvme_ns_data * nsdata)1268 unsigned int nvme_ctrlr_create_ns(struct nvme_ctrlr *ctrlr,
1269 				  struct nvme_ns_data *nsdata)
1270 {
1271 	unsigned int nsid;
1272 	int ret;
1273 
1274 	pthread_mutex_lock(&ctrlr->lock);
1275 
1276 	ret = nvme_admin_create_ns(ctrlr, nsdata, &nsid);
1277 	if (ret != 0) {
1278 		nvme_notice("Create namespace failed\n");
1279 		nsid = 0;
1280 	}
1281 
1282 	pthread_mutex_unlock(&ctrlr->lock);
1283 
1284 	return nsid;
1285 }
1286 
1287 /*
1288  * Delete a namespace.
1289  */
nvme_ctrlr_delete_ns(struct nvme_ctrlr * ctrlr,unsigned int nsid)1290 int nvme_ctrlr_delete_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid)
1291 {
1292 	int ret;
1293 
1294 	pthread_mutex_lock(&ctrlr->lock);
1295 
1296 	ret = nvme_admin_delete_ns(ctrlr, nsid);
1297 	if (ret != 0) {
1298 		nvme_notice("Delete namespace %u failed\n", nsid);
1299 		goto out;
1300 	}
1301 
1302 	ret = nvme_ctrlr_reset(ctrlr);
1303 	if (ret)
1304 		nvme_notice("Reset controller failed\n");
1305 
1306 out:
1307 	pthread_mutex_unlock(&ctrlr->lock);
1308 
1309 	return ret;
1310 }
1311 
1312 /*
1313  * Format NVM media.
1314  */
nvme_ctrlr_format_ns(struct nvme_ctrlr * ctrlr,unsigned int nsid,struct nvme_format * format)1315 int nvme_ctrlr_format_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid,
1316 			 struct nvme_format *format)
1317 {
1318 	int ret;
1319 
1320 	pthread_mutex_lock(&ctrlr->lock);
1321 
1322 	ret = nvme_admin_format_nvm(ctrlr, nsid, format);
1323 	if (ret != 0) {
1324 		if (nsid == NVME_GLOBAL_NS_TAG)
1325 			nvme_notice("Format device failed\n");
1326 		else
1327 			nvme_notice("Format namespace %u failed\n", nsid);
1328 		goto out;
1329 	}
1330 
1331 	ret = nvme_ctrlr_reset(ctrlr);
1332 	if (ret)
1333 		nvme_notice("Reset controller failed\n");
1334 
1335 out:
1336 	pthread_mutex_unlock(&ctrlr->lock);
1337 
1338 	return ret;
1339 }
1340 
1341 /*
1342  * Update a device firmware.
1343  */
nvme_ctrlr_update_firmware(struct nvme_ctrlr * ctrlr,void * fw,size_t size,int slot)1344 int nvme_ctrlr_update_firmware(struct nvme_ctrlr *ctrlr,
1345 			       void *fw, size_t size, int slot)
1346 {
1347 	struct nvme_fw_commit fw_commit;
1348 	unsigned int size_remaining = size, offset = 0, transfer;
1349 	void *f = fw;
1350 	int ret;
1351 
1352 	if (size & 0x3) {
1353 		nvme_err("Invalid firmware size\n");
1354 		return EINVAL;
1355 	}
1356 
1357 	pthread_mutex_lock(&ctrlr->lock);
1358 
1359 	/* Download firmware */
1360 	while (size_remaining > 0) {
1361 
1362 		transfer = nvme_min(size_remaining, ctrlr->min_page_size);
1363 
1364 		ret = nvme_admin_fw_image_dl(ctrlr, f, transfer, offset);
1365 		if (ret != 0) {
1366 			nvme_err("Download FW (%u B at %u) failed\n",
1367 				 transfer, offset);
1368 			goto out;
1369 		}
1370 
1371 		f += transfer;
1372 		offset += transfer;
1373 		size_remaining -= transfer;
1374 
1375 	}
1376 
1377 	/* Commit firmware */
1378 	memset(&fw_commit, 0, sizeof(struct nvme_fw_commit));
1379 	fw_commit.fs = slot;
1380 	fw_commit.ca = NVME_FW_COMMIT_REPLACE_IMG;
1381 
1382 	ret = nvme_admin_fw_commit(ctrlr, &fw_commit);
1383 	if (ret != 0) {
1384 		nvme_err("Commit downloaded FW (%zu B) failed\n",
1385 			 size);
1386 		goto out;
1387 	}
1388 
1389 	ret = nvme_ctrlr_reset(ctrlr);
1390 	if (ret)
1391 		nvme_notice("Reset controller failed\n");
1392 
1393 out:
1394 	pthread_mutex_unlock(&ctrlr->lock);
1395 
1396 	return ret;
1397 }
1398 
1399 /*
1400  * Get an unused I/O queue pair.
1401  */
nvme_ioqp_get(struct nvme_ctrlr * ctrlr,enum nvme_qprio qprio,unsigned int qd)1402 struct nvme_qpair *nvme_ioqp_get(struct nvme_ctrlr *ctrlr,
1403 				 enum nvme_qprio qprio, unsigned int qd)
1404 {
1405 	struct nvme_qpair *qpair = NULL;
1406 	union nvme_cc_register cc;
1407 	uint32_t trackers;
1408 	int ret;
1409 
1410 	cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw);
1411 
1412 	/* Only the low 2 bits (values 0, 1, 2, 3) of QPRIO are valid. */
1413 	if ((qprio & 3) != qprio)
1414 		return NULL;
1415 
1416 	/*
1417 	 * Only value NVME_QPRIO_URGENT(0) is valid for the
1418 	 * default round robin arbitration method.
1419 	 */
1420 	if ((cc.bits.ams == NVME_CC_AMS_RR) && (qprio != NVME_QPRIO_URGENT)) {
1421 		nvme_err("Invalid queue priority for default round "
1422 			 "robin arbitration method\n");
1423 		return NULL;
1424 	}
1425 
1426 	/* I/O qpairs number of entries belong to [2, io_qpairs_max_entries] */
1427 	if (qd == 1) {
1428 		nvme_err("Invalid queue depth\n");
1429 		return NULL;
1430 	}
1431 
1432 	if (qd == 0 || qd > ctrlr->io_qpairs_max_entries)
1433 		qd = ctrlr->io_qpairs_max_entries;
1434 
1435 	/*
1436 	 * No need to have more trackers than entries in the submit queue.
1437 	 * Note also that for a queue size of N, we can only have (N-1)
1438 	 * commands outstanding, hence the "-1" here.
1439 	 */
1440 	trackers = nvme_min(NVME_IO_TRACKERS, (qd - 1));
1441 
1442 	pthread_mutex_lock(&ctrlr->lock);
1443 
1444 	/* Get the first available qpair structure */
1445 	qpair = TAILQ_FIRST(&ctrlr->free_io_qpairs);
1446 	if (qpair == NULL) {
1447 		/* No free queue IDs */
1448 		nvme_err("No free I/O queue pairs\n");
1449 		goto out;
1450 	}
1451 
1452 	/* Construct the qpair */
1453 	ret = nvme_qpair_construct(ctrlr, qpair, qprio, qd, trackers);
1454 	if (ret != 0) {
1455 		nvme_qpair_destroy(qpair);
1456 		qpair = NULL;
1457 		goto out;
1458 	}
1459 
1460 	/*
1461 	 * At this point, qpair contains a preallocated submission
1462 	 * and completion queue and a unique queue ID, but it is not
1463 	 * yet created on the controller.
1464 	 * Fill out the submission queue priority and send out the
1465 	 * Create I/O Queue commands.
1466 	 */
1467 	if (nvme_ctrlr_create_qpair(ctrlr, qpair) != 0) {
1468 		nvme_err("Create queue pair on the controller failed\n");
1469 		nvme_qpair_destroy(qpair);
1470 		qpair = NULL;
1471 		goto out;
1472 	}
1473 
1474 	TAILQ_REMOVE(&ctrlr->free_io_qpairs, qpair, tailq);
1475 	TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq);
1476 
1477 out:
1478 	pthread_mutex_unlock(&ctrlr->lock);
1479 
1480 	return qpair;
1481 }
1482 
1483 /*
1484  * Free an I/O queue pair.
1485  */
nvme_ioqp_release(struct nvme_qpair * qpair)1486 int nvme_ioqp_release(struct nvme_qpair *qpair)
1487 {
1488 	struct nvme_ctrlr *ctrlr;
1489 	int ret;
1490 
1491 	if (qpair == NULL)
1492 		return 0;
1493 
1494 	ctrlr = qpair->ctrlr;
1495 
1496 	pthread_mutex_lock(&ctrlr->lock);
1497 
1498 	/* Delete the I/O submission and completion queues */
1499 	ret = nvme_ctrlr_delete_qpair(ctrlr, qpair);
1500 	if (ret != 0) {
1501 		nvme_notice("Delete queue pair %u failed\n", qpair->id);
1502 	} else {
1503 		TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq);
1504 		TAILQ_INSERT_HEAD(&ctrlr->free_io_qpairs, qpair, tailq);
1505 	}
1506 
1507 	pthread_mutex_unlock(&ctrlr->lock);
1508 
1509 	return ret;
1510 }
1511 
1512 /*
1513  * Submit an NVMe command using the specified I/O queue pair.
1514  */
nvme_ioqp_submit_cmd(struct nvme_qpair * qpair,struct nvme_cmd * cmd,void * buf,size_t len,nvme_cmd_cb cb_fn,void * cb_arg)1515 int nvme_ioqp_submit_cmd(struct nvme_qpair *qpair,
1516 			 struct nvme_cmd *cmd,
1517 			 void *buf, size_t len,
1518 			 nvme_cmd_cb cb_fn, void *cb_arg)
1519 {
1520 	struct nvme_request *req;
1521 	int ret = ENOMEM;
1522 
1523 	req = nvme_request_allocate_contig(qpair, buf, len, cb_fn, cb_arg);
1524 	if (req) {
1525 		memcpy(&req->cmd, cmd, sizeof(req->cmd));
1526 		ret = nvme_qpair_submit_request(qpair, req);
1527 	}
1528 
1529 	return ret;
1530 }
1531