1 /* 2 * Copyright 2019-2020, Haiku, Inc. All rights reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Augustin Cavalier <waddlesplash> 7 */ 8 9 10 #include <stdio.h> 11 #include <stdlib.h> 12 13 #include <algorithm> 14 #include <condition_variable.h> 15 #include <AutoDeleter.h> 16 #include <kernel.h> 17 #include <util/AutoLock.h> 18 19 #include <fs/devfs.h> 20 #include <bus/PCI.h> 21 #include <PCI_x86.h> 22 #include <vm/vm.h> 23 24 #include "IORequest.h" 25 26 extern "C" { 27 #include <libnvme/nvme.h> 28 #include <libnvme/nvme_internal.h> 29 } 30 31 32 //#define TRACE_NVME_DISK 33 #ifdef TRACE_NVME_DISK 34 # define TRACE(x...) dprintf("nvme_disk: " x) 35 #else 36 # define TRACE(x...) ; 37 #endif 38 #define TRACE_ALWAYS(x...) dprintf("nvme_disk: " x) 39 #define TRACE_ERROR(x...) dprintf("\33[33mnvme_disk:\33[0m " x) 40 #define CALLED() TRACE("CALLED %s\n", __PRETTY_FUNCTION__) 41 42 43 static const uint8 kDriveIcon[] = { 44 0x6e, 0x63, 0x69, 0x66, 0x08, 0x03, 0x01, 0x00, 0x00, 0x02, 0x00, 0x16, 45 0x02, 0x3c, 0xc7, 0xee, 0x38, 0x9b, 0xc0, 0xba, 0x16, 0x57, 0x3e, 0x39, 46 0xb0, 0x49, 0x77, 0xc8, 0x42, 0xad, 0xc7, 0x00, 0xff, 0xff, 0xd3, 0x02, 47 0x00, 0x06, 0x02, 0x3c, 0x96, 0x32, 0x3a, 0x4d, 0x3f, 0xba, 0xfc, 0x01, 48 0x3d, 0x5a, 0x97, 0x4b, 0x57, 0xa5, 0x49, 0x84, 0x4d, 0x00, 0x47, 0x47, 49 0x47, 0xff, 0xa5, 0xa0, 0xa0, 0x02, 0x00, 0x16, 0x02, 0xbc, 0x59, 0x2f, 50 0xbb, 0x29, 0xa7, 0x3c, 0x0c, 0xe4, 0xbd, 0x0b, 0x7c, 0x48, 0x92, 0xc0, 51 0x4b, 0x79, 0x66, 0x00, 0x7d, 0xff, 0xd4, 0x02, 0x00, 0x06, 0x02, 0x38, 52 0xdb, 0xb4, 0x39, 0x97, 0x33, 0xbc, 0x4a, 0x33, 0x3b, 0xa5, 0x42, 0x48, 53 0x6e, 0x66, 0x49, 0xee, 0x7b, 0x00, 0x59, 0x67, 0x56, 0xff, 0xeb, 0xb2, 54 0xb2, 0x03, 0xa7, 0xff, 0x00, 0x03, 0xff, 0x00, 0x00, 0x04, 0x01, 0x80, 55 0x07, 0x0a, 0x06, 0x22, 0x3c, 0x22, 0x49, 0x44, 0x5b, 0x5a, 0x3e, 0x5a, 56 0x31, 0x39, 0x25, 0x0a, 0x04, 0x22, 0x3c, 0x44, 0x4b, 0x5a, 0x31, 0x39, 57 0x25, 0x0a, 0x04, 0x44, 0x4b, 0x44, 0x5b, 0x5a, 0x3e, 0x5a, 0x31, 0x0a, 58 0x04, 0x22, 0x3c, 0x22, 0x49, 0x44, 0x5b, 0x44, 0x4b, 0x08, 0x02, 0x27, 59 0x43, 0xb8, 0x14, 0xc1, 0xf1, 0x08, 0x02, 0x26, 0x43, 0x29, 0x44, 0x0a, 60 0x05, 0x44, 0x5d, 0x49, 0x5d, 0x60, 0x3e, 0x5a, 0x3b, 0x5b, 0x3f, 0x08, 61 0x0a, 0x07, 0x01, 0x06, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x10, 0x01, 0x17, 62 0x84, 0x00, 0x04, 0x0a, 0x01, 0x01, 0x01, 0x00, 0x0a, 0x02, 0x01, 0x02, 63 0x00, 0x0a, 0x03, 0x01, 0x03, 0x00, 0x0a, 0x04, 0x01, 0x04, 0x10, 0x01, 64 0x17, 0x85, 0x20, 0x04, 0x0a, 0x06, 0x01, 0x05, 0x30, 0x24, 0xb3, 0x99, 65 0x01, 0x17, 0x82, 0x00, 0x04, 0x0a, 0x05, 0x01, 0x05, 0x30, 0x20, 0xb2, 66 0xe6, 0x01, 0x17, 0x82, 0x00, 0x04 67 }; 68 69 70 #define NVME_DISK_DRIVER_MODULE_NAME "drivers/disk/nvme_disk/driver_v1" 71 #define NVME_DISK_DEVICE_MODULE_NAME "drivers/disk/nvme_disk/device_v1" 72 #define NVME_DISK_DEVICE_ID_GENERATOR "nvme_disk/device_id" 73 74 #define NVME_MAX_QPAIRS (8) 75 76 77 static device_manager_info* sDeviceManager; 78 static pci_x86_module_info* sPCIx86Module; 79 80 typedef struct { 81 device_node* node; 82 pci_info info; 83 84 struct nvme_ctrlr* ctrlr; 85 86 struct nvme_ns* ns; 87 uint64 capacity; 88 uint32 block_size; 89 status_t media_status; 90 91 struct qpair_info { 92 struct nvme_qpair* qpair; 93 } qpairs[NVME_MAX_QPAIRS]; 94 uint32 qpair_count; 95 uint32 next_qpair; 96 97 DMAResource dma_resource; 98 sem_id dma_buffers_sem; 99 100 rw_lock rounded_write_lock; 101 102 ConditionVariable interrupt; 103 } nvme_disk_driver_info; 104 typedef nvme_disk_driver_info::qpair_info qpair_info; 105 106 107 typedef struct { 108 nvme_disk_driver_info* info; 109 } nvme_disk_handle; 110 111 112 static status_t 113 get_geometry(nvme_disk_handle* handle, device_geometry* geometry) 114 { 115 nvme_disk_driver_info* info = handle->info; 116 117 devfs_compute_geometry_size(geometry, info->capacity, info->block_size); 118 119 geometry->device_type = B_DISK; 120 geometry->removable = false; 121 122 geometry->read_only = false; 123 geometry->write_once = false; 124 125 TRACE("get_geometry(): %" B_PRId32 ", %" B_PRId32 ", %" B_PRId32 ", %" B_PRId32 ", %d, %d, %d, %d\n", 126 geometry->bytes_per_sector, geometry->sectors_per_track, 127 geometry->cylinder_count, geometry->head_count, geometry->device_type, 128 geometry->removable, geometry->read_only, geometry->write_once); 129 130 return B_OK; 131 } 132 133 134 static int 135 log2(uint32 x) 136 { 137 int y; 138 139 for (y = 31; y >= 0; --y) { 140 if (x == ((uint32)1 << y)) 141 break; 142 } 143 144 return y; 145 } 146 147 148 static void 149 nvme_disk_set_capacity(nvme_disk_driver_info* info, uint64 capacity, 150 uint32 blockSize) 151 { 152 TRACE("set_capacity(device = %p, capacity = %" B_PRIu64 ", blockSize = %" B_PRIu32 ")\n", 153 info, capacity, blockSize); 154 155 // get log2, if possible 156 uint32 blockShift = log2(blockSize); 157 158 if ((1UL << blockShift) != blockSize) 159 blockShift = 0; 160 161 info->capacity = capacity; 162 info->block_size = blockSize; 163 } 164 165 166 // #pragma mark - device module API 167 168 169 static int32 nvme_interrupt_handler(void* _info); 170 171 172 static status_t 173 nvme_disk_init_device(void* _info, void** _cookie) 174 { 175 CALLED(); 176 nvme_disk_driver_info* info = (nvme_disk_driver_info*)_info; 177 178 pci_device_module_info* pci; 179 pci_device* pcidev; 180 device_node* parent = sDeviceManager->get_parent_node(info->node); 181 sDeviceManager->get_driver(parent, (driver_module_info**)&pci, 182 (void**)&pcidev); 183 pci->get_pci_info(pcidev, &info->info); 184 sDeviceManager->put_node(parent); 185 186 // construct the libnvme pci_device struct 187 pci_device* device = new pci_device; 188 device->vendor_id = info->info.vendor_id; 189 device->device_id = info->info.device_id; 190 device->subvendor_id = 0; 191 device->subdevice_id = 0; 192 193 device->domain = 0; 194 device->bus = info->info.bus; 195 device->dev = info->info.device; 196 device->func = info->info.function; 197 198 device->pci_info = &info->info; 199 200 // open the controller 201 info->ctrlr = nvme_ctrlr_open(device, NULL); 202 if (info->ctrlr == NULL) { 203 TRACE_ERROR("failed to open the controller!\n"); 204 return B_ERROR; 205 } 206 207 struct nvme_ctrlr_stat cstat; 208 int err = nvme_ctrlr_stat(info->ctrlr, &cstat); 209 if (err != 0) { 210 TRACE_ERROR("failed to get controller information!\n"); 211 nvme_ctrlr_close(info->ctrlr); 212 return err; 213 } 214 215 TRACE_ALWAYS("attached to NVMe device \"%s (%s)\"\n", cstat.mn, cstat.sn); 216 TRACE_ALWAYS("\tmaximum transfer size: %" B_PRIuSIZE "\n", cstat.max_xfer_size); 217 TRACE_ALWAYS("\tqpair count: %d\n", cstat.io_qpairs); 218 219 // TODO: export more than just the first namespace! 220 info->ns = nvme_ns_open(info->ctrlr, cstat.ns_ids[0]); 221 if (info->ns == NULL) { 222 TRACE_ERROR("failed to open namespace!\n"); 223 nvme_ctrlr_close(info->ctrlr); 224 return B_ERROR; 225 } 226 227 struct nvme_ns_stat nsstat; 228 err = nvme_ns_stat(info->ns, &nsstat); 229 if (err != 0) { 230 TRACE_ERROR("failed to get namespace information!\n"); 231 nvme_ctrlr_close(info->ctrlr); 232 return err; 233 } 234 235 // store capacity information 236 nvme_disk_set_capacity(info, nsstat.sectors, nsstat.sector_size); 237 238 TRACE("capacity: %" B_PRIu64 ", block_size %" B_PRIu32 "\n", 239 info->capacity, info->block_size); 240 241 // allocate qpairs 242 info->qpair_count = info->next_qpair = 0; 243 for (uint32 i = 0; i < NVME_MAX_QPAIRS && i < cstat.io_qpairs; i++) { 244 info->qpairs[i].qpair = nvme_ioqp_get(info->ctrlr, 245 (enum nvme_qprio)0, 0); 246 if (info->qpairs[i].qpair == NULL) 247 break; 248 249 info->qpair_count++; 250 } 251 if (info->qpair_count == 0) { 252 TRACE_ERROR("failed to allocate qpairs!\n"); 253 nvme_ctrlr_close(info->ctrlr); 254 return B_NO_MEMORY; 255 } 256 257 // allocate DMA buffers 258 int buffers = info->qpair_count * 2; 259 260 dma_restrictions restrictions = {}; 261 restrictions.alignment = B_PAGE_SIZE; 262 // Technically, the first and last segments in a transfer can be 263 // unaligned, and the rest only need to have sizes that are a multiple 264 // of the block size. 265 restrictions.max_segment_count = (NVME_MAX_SGL_DESCRIPTORS / 2); 266 restrictions.max_transfer_size = cstat.max_xfer_size; 267 268 err = info->dma_resource.Init(restrictions, B_PAGE_SIZE, buffers, buffers); 269 if (err != 0) { 270 TRACE_ERROR("failed to initialize DMA resource!\n"); 271 nvme_ctrlr_close(info->ctrlr); 272 return err; 273 } 274 275 info->dma_buffers_sem = create_sem(buffers, "nvme buffers sem"); 276 if (info->dma_buffers_sem < 0) { 277 TRACE_ERROR("failed to create DMA buffers semaphore!\n"); 278 nvme_ctrlr_close(info->ctrlr); 279 return info->dma_buffers_sem; 280 } 281 282 // set up rounded-write lock 283 rw_lock_init(&info->rounded_write_lock, "nvme rounded writes"); 284 285 // set up interrupt 286 if (get_module(B_PCI_X86_MODULE_NAME, (module_info**)&sPCIx86Module) 287 != B_OK) { 288 sPCIx86Module = NULL; 289 } 290 291 uint16 command = pci->read_pci_config(pcidev, PCI_command, 2); 292 command &= ~(PCI_command_int_disable); 293 pci->write_pci_config(pcidev, PCI_command, 2, command); 294 295 uint8 irq = info->info.u.h0.interrupt_line; 296 if (sPCIx86Module != NULL) { 297 if (sPCIx86Module->get_msix_count(info->info.bus, info->info.device, 298 info->info.function)) { 299 uint8 msixVector = 0; 300 if (sPCIx86Module->configure_msix(info->info.bus, info->info.device, 301 info->info.function, 1, &msixVector) == B_OK 302 && sPCIx86Module->enable_msix(info->info.bus, info->info.device, 303 info->info.function) == B_OK) { 304 TRACE_ALWAYS("using MSI-X\n"); 305 irq = msixVector; 306 } 307 } else if (sPCIx86Module->get_msi_count(info->info.bus, 308 info->info.device, info->info.function) >= 1) { 309 uint8 msiVector = 0; 310 if (sPCIx86Module->configure_msi(info->info.bus, info->info.device, 311 info->info.function, 1, &msiVector) == B_OK 312 && sPCIx86Module->enable_msi(info->info.bus, info->info.device, 313 info->info.function) == B_OK) { 314 TRACE_ALWAYS("using message signaled interrupts\n"); 315 irq = msiVector; 316 } 317 } 318 } 319 320 if (irq == 0 || irq == 0xFF) { 321 TRACE_ERROR("device PCI:%d:%d:%d was assigned an invalid IRQ\n", 322 info->info.bus, info->info.device, info->info.function); 323 return B_ERROR; 324 } 325 info->interrupt.Init(NULL, NULL); 326 install_io_interrupt_handler(irq, nvme_interrupt_handler, (void*)info, B_NO_HANDLED_INFO); 327 328 if (info->ctrlr->feature_supported[NVME_FEAT_INTERRUPT_COALESCING]) { 329 uint32 microseconds = 16, threshold = 32; 330 nvme_admin_set_feature(info->ctrlr, false, NVME_FEAT_INTERRUPT_COALESCING, 331 ((microseconds / 100) << 8) | threshold, 0, NULL); 332 } 333 334 *_cookie = info; 335 return B_OK; 336 } 337 338 339 static void 340 nvme_disk_uninit_device(void* _cookie) 341 { 342 CALLED(); 343 nvme_disk_driver_info* info = (nvme_disk_driver_info*)_cookie; 344 345 remove_io_interrupt_handler(info->info.u.h0.interrupt_line, 346 nvme_interrupt_handler, (void*)info); 347 348 rw_lock_destroy(&info->rounded_write_lock); 349 350 nvme_ns_close(info->ns); 351 nvme_ctrlr_close(info->ctrlr); 352 353 // TODO: Deallocate MSI(-X). 354 // TODO: Deallocate PCI. 355 } 356 357 358 static status_t 359 nvme_disk_open(void* _info, const char* path, int openMode, void** _cookie) 360 { 361 CALLED(); 362 363 nvme_disk_driver_info* info = (nvme_disk_driver_info*)_info; 364 nvme_disk_handle* handle = (nvme_disk_handle*)malloc( 365 sizeof(nvme_disk_handle)); 366 if (handle == NULL) 367 return B_NO_MEMORY; 368 369 handle->info = info; 370 371 *_cookie = handle; 372 return B_OK; 373 } 374 375 376 static status_t 377 nvme_disk_close(void* cookie) 378 { 379 CALLED(); 380 381 //nvme_disk_handle* handle = (nvme_disk_handle*)cookie; 382 return B_OK; 383 } 384 385 386 static status_t 387 nvme_disk_free(void* cookie) 388 { 389 CALLED(); 390 391 nvme_disk_handle* handle = (nvme_disk_handle*)cookie; 392 free(handle); 393 return B_OK; 394 } 395 396 397 // #pragma mark - I/O 398 399 400 static int32 401 nvme_interrupt_handler(void* _info) 402 { 403 nvme_disk_driver_info* info = (nvme_disk_driver_info*)_info; 404 info->interrupt.NotifyAll(); 405 return 0; 406 } 407 408 409 static qpair_info* 410 get_qpair(nvme_disk_driver_info* info) 411 { 412 return &info->qpairs[atomic_add((int32*)&info->next_qpair, 1) 413 % info->qpair_count]; 414 } 415 416 417 static void 418 io_finished_callback(status_t* status, const struct nvme_cpl* cpl) 419 { 420 *status = nvme_cpl_is_error(cpl) ? B_IO_ERROR : B_OK; 421 } 422 423 424 static void 425 await_status(nvme_disk_driver_info* info, struct nvme_qpair* qpair, status_t& status) 426 { 427 CALLED(); 428 429 ConditionVariableEntry entry; 430 int timeouts = 0; 431 while (status == EINPROGRESS) { 432 info->interrupt.Add(&entry); 433 434 nvme_qpair_poll(qpair, 0); 435 436 if (status != EINPROGRESS) 437 return; 438 439 if (entry.Wait(B_RELATIVE_TIMEOUT, 5 * 1000 * 1000) != B_OK) { 440 // This should never happen, as we are woken up on every interrupt 441 // no matter the qpair or transfer within; so if it does occur, 442 // that probably means the controller stalled or something. 443 444 TRACE_ERROR("timed out waiting for interrupt!\n"); 445 if (timeouts++ >= 3) { 446 nvme_qpair_fail(qpair); 447 status = B_TIMED_OUT; 448 return; 449 } 450 } 451 452 nvme_qpair_poll(qpair, 0); 453 } 454 } 455 456 457 struct nvme_io_request { 458 status_t status; 459 460 bool write; 461 462 off_t lba_start; 463 size_t lba_count; 464 465 physical_entry* iovecs; 466 int32 iovec_count; 467 468 int32 iovec_i; 469 }; 470 471 472 void ior_reset_sgl(nvme_io_request* request, uint32_t offset) 473 { 474 request->iovec_i = offset; 475 } 476 477 478 int ior_next_sge(nvme_io_request* request, uint64_t* address, uint32_t* length) 479 { 480 int32 index = request->iovec_i; 481 if (index < 0 || index > request->iovec_count) 482 return -1; 483 484 *address = request->iovecs[index].address; 485 *length = request->iovecs[index].size; 486 487 TRACE("IOV %d: 0x%" B_PRIx64 ", %" B_PRIu32 "\n", request->iovec_i, *address, 488 *length); 489 490 request->iovec_i++; 491 return 0; 492 } 493 494 495 static status_t 496 do_nvme_io_request(nvme_disk_driver_info* info, nvme_io_request* request) 497 { 498 request->status = EINPROGRESS; 499 500 qpair_info* qpinfo = get_qpair(info); 501 int ret = -1; 502 if (request->write) { 503 ret = nvme_ns_writev(info->ns, qpinfo->qpair, request->lba_start, 504 request->lba_count, (nvme_cmd_cb)io_finished_callback, request, 505 0, (nvme_req_reset_sgl_cb)ior_reset_sgl, 506 (nvme_req_next_sge_cb)ior_next_sge); 507 } else { 508 ret = nvme_ns_readv(info->ns, qpinfo->qpair, request->lba_start, 509 request->lba_count, (nvme_cmd_cb)io_finished_callback, request, 510 0, (nvme_req_reset_sgl_cb)ior_reset_sgl, 511 (nvme_req_next_sge_cb)ior_next_sge); 512 } 513 if (ret != 0) { 514 TRACE_ERROR("attempt to queue %s I/O at LBA %" B_PRIdOFF " of %" B_PRIuSIZE 515 " blocks failed!\n", request->write ? "write" : "read", 516 request->lba_start, request->lba_count); 517 518 request->lba_count = 0; 519 return ret; 520 } 521 522 await_status(info, qpinfo->qpair, request->status); 523 524 if (request->status != B_OK) { 525 TRACE_ERROR("%s at LBA %" B_PRIdOFF " of %" B_PRIuSIZE 526 " blocks failed!\n", request->write ? "write" : "read", 527 request->lba_start, request->lba_count); 528 529 request->lba_count = 0; 530 } 531 return request->status; 532 } 533 534 535 static status_t 536 nvme_disk_bounced_io(nvme_disk_handle* handle, io_request* request) 537 { 538 CALLED(); 539 540 WriteLocker writeLocker; 541 if (request->IsWrite()) 542 writeLocker.SetTo(handle->info->rounded_write_lock, false); 543 544 status_t status = acquire_sem(handle->info->dma_buffers_sem); 545 if (status != B_OK) { 546 request->SetStatusAndNotify(status); 547 return status; 548 } 549 550 const size_t block_size = handle->info->block_size; 551 552 TRACE("%p: IOR Offset: %" B_PRIdOFF "; Length %" B_PRIuGENADDR 553 "; Write %s\n", request, request->Offset(), request->Length(), 554 request->IsWrite() ? "yes" : "no"); 555 556 nvme_io_request nvme_request; 557 while (request->RemainingBytes() > 0) { 558 IOOperation operation; 559 status = handle->info->dma_resource.TranslateNext(request, &operation, 0); 560 if (status != B_OK) 561 break; 562 563 size_t transferredBytes = 0; 564 do { 565 TRACE("%p: IOO offset: %" B_PRIdOFF ", length: %" B_PRIuGENADDR 566 ", write: %s\n", request, operation.Offset(), 567 operation.Length(), operation.IsWrite() ? "yes" : "no"); 568 569 nvme_request.write = operation.IsWrite(); 570 nvme_request.lba_start = operation.Offset() / block_size; 571 nvme_request.lba_count = operation.Length() / block_size; 572 nvme_request.iovecs = (physical_entry*)operation.Vecs(); 573 nvme_request.iovec_count = operation.VecCount(); 574 575 status = do_nvme_io_request(handle->info, &nvme_request); 576 if (status == B_OK && nvme_request.write == request->IsWrite()) 577 transferredBytes += operation.OriginalLength(); 578 579 operation.SetStatus(status); 580 } while (status == B_OK && !operation.Finish()); 581 582 if (status == B_OK && operation.Status() != B_OK) { 583 TRACE_ERROR("I/O succeeded but IOOperation failed!\n"); 584 status = operation.Status(); 585 } 586 587 operation.SetTransferredBytes(transferredBytes); 588 request->OperationFinished(&operation, status, status != B_OK, 589 operation.OriginalOffset() + transferredBytes); 590 591 handle->info->dma_resource.RecycleBuffer(operation.Buffer()); 592 593 TRACE("%p: status %s, remaining bytes %" B_PRIuGENADDR "\n", request, 594 strerror(status), request->RemainingBytes()); 595 if (status != B_OK) 596 break; 597 } 598 599 release_sem(handle->info->dma_buffers_sem); 600 601 // Notify() also takes care of UnlockMemory(). 602 if (status != B_OK && request->Status() == B_OK) 603 request->SetStatusAndNotify(status); 604 else 605 request->NotifyFinished(); 606 return status; 607 } 608 609 610 static status_t 611 nvme_disk_io(void* cookie, io_request* request) 612 { 613 CALLED(); 614 615 nvme_disk_handle* handle = (nvme_disk_handle*)cookie; 616 617 nvme_io_request nvme_request; 618 memset(&nvme_request, 0, sizeof(nvme_io_request)); 619 620 nvme_request.write = request->IsWrite(); 621 622 physical_entry* vtophys = NULL; 623 MemoryDeleter vtophysDeleter; 624 625 IOBuffer* buffer = request->Buffer(); 626 status_t status = B_OK; 627 if (!buffer->IsPhysical()) { 628 status = buffer->LockMemory(request->TeamID(), request->IsWrite()); 629 if (status != B_OK) { 630 TRACE_ERROR("failed to lock memory: %s\n", strerror(status)); 631 return status; 632 } 633 // SetStatusAndNotify() takes care of unlocking memory if necessary. 634 635 // This is slightly inefficient, as we could use a BStackOrHeapArray in 636 // the optimal case (few physical entries required), but we would not 637 // know whether or not that was possible until calling get_memory_map() 638 // and then potentially reallocating, which would complicate the logic. 639 640 int32 vtophys_length = (request->Length() / B_PAGE_SIZE) + 2; 641 nvme_request.iovecs = vtophys = (physical_entry*)malloc(sizeof(physical_entry) 642 * vtophys_length); 643 if (vtophys == NULL) { 644 TRACE_ERROR("failed to allocate memory for iovecs\n"); 645 request->SetStatusAndNotify(B_NO_MEMORY); 646 return B_NO_MEMORY; 647 } 648 vtophysDeleter.SetTo(vtophys); 649 650 for (size_t i = 0; i < buffer->VecCount(); i++) { 651 generic_io_vec virt = buffer->VecAt(i); 652 uint32 entries = vtophys_length - nvme_request.iovec_count; 653 654 // Avoid copies by going straight into the vtophys array. 655 status = get_memory_map_etc(request->TeamID(), (void*)virt.base, 656 virt.length, vtophys + nvme_request.iovec_count, &entries); 657 if (status == B_BUFFER_OVERFLOW) { 658 TRACE("vtophys array was too small, reallocating\n"); 659 660 vtophysDeleter.Detach(); 661 vtophys_length *= 2; 662 nvme_request.iovecs = vtophys = (physical_entry*)realloc(vtophys, 663 sizeof(physical_entry) * vtophys_length); 664 vtophysDeleter.SetTo(vtophys); 665 if (vtophys == NULL) { 666 status = B_NO_MEMORY; 667 } else { 668 // Try again, with the larger buffer this time. 669 i--; 670 continue; 671 } 672 } 673 if (status != B_OK) { 674 TRACE_ERROR("I/O get_memory_map failed: %s\n", strerror(status)); 675 request->SetStatusAndNotify(status); 676 return status; 677 } 678 679 nvme_request.iovec_count += entries; 680 } 681 } else { 682 nvme_request.iovecs = (physical_entry*)buffer->Vecs(); 683 nvme_request.iovec_count = buffer->VecCount(); 684 } 685 686 // See if we need to bounce anything other than the first or last vec. 687 const size_t block_size = handle->info->block_size; 688 bool bounceAll = false; 689 for (int32 i = 1; !bounceAll && i < (nvme_request.iovec_count - 1); i++) { 690 if ((nvme_request.iovecs[i].address % B_PAGE_SIZE) != 0) 691 bounceAll = true; 692 if ((nvme_request.iovecs[i].size % B_PAGE_SIZE) != 0) 693 bounceAll = true; 694 } 695 696 // See if we need to bounce due to the first or last vec. 697 if (nvme_request.iovec_count > 1) { 698 physical_entry* entry = &nvme_request.iovecs[0]; 699 if (!bounceAll && (((entry->address + entry->size) % B_PAGE_SIZE) != 0 700 || (entry->size % block_size) != 0)) 701 bounceAll = true; 702 703 entry = &nvme_request.iovecs[nvme_request.iovec_count - 1]; 704 if (!bounceAll && ((entry->address % B_PAGE_SIZE) != 0 705 || (entry->size % block_size) != 0)) 706 bounceAll = true; 707 } 708 709 // See if we need to bounce due to rounding. 710 const off_t rounded_pos = ROUNDDOWN(request->Offset(), block_size); 711 phys_size_t rounded_len = ROUNDUP(request->Length() + (request->Offset() 712 - rounded_pos), block_size); 713 if (rounded_pos != request->Offset() || rounded_len != request->Length()) 714 bounceAll = true; 715 716 if (bounceAll) { 717 // Let the bounced I/O routine take care of everything from here. 718 return nvme_disk_bounced_io(handle, request); 719 } 720 721 nvme_request.lba_start = rounded_pos / block_size; 722 nvme_request.lba_count = rounded_len / block_size; 723 724 // No bouncing was required. 725 ReadLocker readLocker; 726 if (nvme_request.write) 727 readLocker.SetTo(handle->info->rounded_write_lock, false); 728 729 // Error check before actually doing I/O. 730 if (status != B_OK) { 731 TRACE_ERROR("I/O failed early: %s\n", strerror(status)); 732 request->SetStatusAndNotify(status); 733 return status; 734 } 735 736 int32 remaining = nvme_request.iovec_count; 737 while (remaining > 0 && status == B_OK) { 738 nvme_request.iovec_count = min_c(remaining, 739 NVME_MAX_SGL_DESCRIPTORS / 2); 740 741 nvme_request.lba_count = 0; 742 for (int i = 0; i < nvme_request.iovec_count; i++) 743 nvme_request.lba_count += (nvme_request.iovecs[i].size / block_size); 744 745 status = do_nvme_io_request(handle->info, &nvme_request); 746 747 nvme_request.iovecs += nvme_request.iovec_count; 748 remaining -= nvme_request.iovec_count; 749 nvme_request.lba_start += nvme_request.lba_count; 750 } 751 752 if (status != B_OK) 753 TRACE_ERROR("I/O failed: %s\n", strerror(status)); 754 755 request->SetTransferredBytes(status != B_OK, 756 (nvme_request.lba_start * block_size) - rounded_pos); 757 request->SetStatusAndNotify(status); 758 return status; 759 } 760 761 762 static status_t 763 nvme_disk_read(void* cookie, off_t pos, void* buffer, size_t* length) 764 { 765 CALLED(); 766 nvme_disk_handle* handle = (nvme_disk_handle*)cookie; 767 768 const off_t end = (handle->info->capacity * handle->info->block_size); 769 if (pos >= end) 770 return B_BAD_VALUE; 771 if (pos + (off_t)*length > end) 772 *length = end - pos; 773 774 IORequest request; 775 status_t status = request.Init(pos, (addr_t)buffer, *length, false, 0); 776 if (status != B_OK) 777 return status; 778 779 status = nvme_disk_io(handle, &request); 780 *length = request.TransferredBytes(); 781 return status; 782 } 783 784 785 static status_t 786 nvme_disk_write(void* cookie, off_t pos, const void* buffer, size_t* length) 787 { 788 CALLED(); 789 nvme_disk_handle* handle = (nvme_disk_handle*)cookie; 790 791 const off_t end = (handle->info->capacity * handle->info->block_size); 792 if (pos >= end) 793 return B_BAD_VALUE; 794 if (pos + (off_t)*length > end) 795 *length = end - pos; 796 797 IORequest request; 798 status_t status = request.Init(pos, (addr_t)buffer, *length, true, 0); 799 if (status != B_OK) 800 return status; 801 802 status = nvme_disk_io(handle, &request); 803 *length = request.TransferredBytes(); 804 return status; 805 } 806 807 808 static status_t 809 nvme_disk_flush(nvme_disk_driver_info* info) 810 { 811 status_t status = EINPROGRESS; 812 813 qpair_info* qpinfo = get_qpair(info); 814 int ret = nvme_ns_flush(info->ns, qpinfo->qpair, 815 (nvme_cmd_cb)io_finished_callback, &status); 816 if (ret != 0) 817 return ret; 818 819 await_status(info, qpinfo->qpair, status); 820 return status; 821 } 822 823 824 static status_t 825 nvme_disk_ioctl(void* cookie, uint32 op, void* buffer, size_t length) 826 { 827 CALLED(); 828 nvme_disk_handle* handle = (nvme_disk_handle*)cookie; 829 nvme_disk_driver_info* info = handle->info; 830 831 TRACE("ioctl(op = %" B_PRId32 ")\n", op); 832 833 switch (op) { 834 case B_GET_MEDIA_STATUS: 835 { 836 *(status_t *)buffer = info->media_status; 837 info->media_status = B_OK; 838 return B_OK; 839 break; 840 } 841 842 case B_GET_DEVICE_SIZE: 843 { 844 size_t size = info->capacity * info->block_size; 845 return user_memcpy(buffer, &size, sizeof(size_t)); 846 } 847 848 case B_GET_GEOMETRY: 849 { 850 if (buffer == NULL /*|| length != sizeof(device_geometry)*/) 851 return B_BAD_VALUE; 852 853 device_geometry geometry; 854 status_t status = get_geometry(handle, &geometry); 855 if (status != B_OK) 856 return status; 857 858 return user_memcpy(buffer, &geometry, sizeof(device_geometry)); 859 } 860 861 case B_GET_ICON_NAME: 862 return user_strlcpy((char*)buffer, "devices/drive-harddisk", 863 B_FILE_NAME_LENGTH); 864 865 case B_GET_VECTOR_ICON: 866 { 867 device_icon iconData; 868 if (length != sizeof(device_icon)) 869 return B_BAD_VALUE; 870 if (user_memcpy(&iconData, buffer, sizeof(device_icon)) != B_OK) 871 return B_BAD_ADDRESS; 872 873 if (iconData.icon_size >= (int32)sizeof(kDriveIcon)) { 874 if (user_memcpy(iconData.icon_data, kDriveIcon, 875 sizeof(kDriveIcon)) != B_OK) 876 return B_BAD_ADDRESS; 877 } 878 879 iconData.icon_size = sizeof(kDriveIcon); 880 return user_memcpy(buffer, &iconData, sizeof(device_icon)); 881 } 882 883 case B_FLUSH_DRIVE_CACHE: 884 return nvme_disk_flush(info); 885 } 886 887 return B_DEV_INVALID_IOCTL; 888 } 889 890 891 // #pragma mark - driver module API 892 893 894 static float 895 nvme_disk_supports_device(device_node *parent) 896 { 897 CALLED(); 898 899 const char* bus; 900 uint16 baseClass, subClass; 901 902 if (sDeviceManager->get_attr_string(parent, B_DEVICE_BUS, &bus, false) != B_OK 903 || sDeviceManager->get_attr_uint16(parent, B_DEVICE_TYPE, &baseClass, false) != B_OK 904 || sDeviceManager->get_attr_uint16(parent, B_DEVICE_SUB_TYPE, &subClass, false) != B_OK) 905 return -1.0f; 906 907 if (strcmp(bus, "pci") != 0 || baseClass != PCI_mass_storage) 908 return 0.0f; 909 910 if (subClass != PCI_nvm) 911 return 0.0f; 912 913 TRACE("NVMe device found!\n"); 914 return 1.0f; 915 } 916 917 918 static status_t 919 nvme_disk_register_device(device_node* parent) 920 { 921 CALLED(); 922 923 device_attr attrs[] = { 924 { NULL } 925 }; 926 927 return sDeviceManager->register_node(parent, NVME_DISK_DRIVER_MODULE_NAME, 928 attrs, NULL, NULL); 929 } 930 931 932 static status_t 933 nvme_disk_init_driver(device_node* node, void** cookie) 934 { 935 CALLED(); 936 937 int ret = nvme_lib_init((enum nvme_log_level)0, (enum nvme_log_facility)0, NULL); 938 if (ret != 0) { 939 TRACE_ERROR("libnvme initialization failed!\n"); 940 return ret; 941 } 942 943 nvme_disk_driver_info* info = new nvme_disk_driver_info; 944 if (info == NULL) 945 return B_NO_MEMORY; 946 947 info->media_status = B_OK; 948 info->node = node; 949 950 info->ctrlr = NULL; 951 952 *cookie = info; 953 return B_OK; 954 } 955 956 957 static void 958 nvme_disk_uninit_driver(void* _cookie) 959 { 960 CALLED(); 961 962 nvme_disk_driver_info* info = (nvme_disk_driver_info*)_cookie; 963 free(info); 964 } 965 966 967 static status_t 968 nvme_disk_register_child_devices(void* _cookie) 969 { 970 CALLED(); 971 972 nvme_disk_driver_info* info = (nvme_disk_driver_info*)_cookie; 973 status_t status; 974 975 int32 id = sDeviceManager->create_id(NVME_DISK_DEVICE_ID_GENERATOR); 976 if (id < 0) 977 return id; 978 979 char name[64]; 980 snprintf(name, sizeof(name), "disk/nvme/%" B_PRId32 "/raw", 981 id); 982 983 status = sDeviceManager->publish_device(info->node, name, 984 NVME_DISK_DEVICE_MODULE_NAME); 985 986 return status; 987 } 988 989 990 // #pragma mark - 991 992 993 module_dependency module_dependencies[] = { 994 {B_DEVICE_MANAGER_MODULE_NAME, (module_info**)&sDeviceManager}, 995 {} 996 }; 997 998 struct device_module_info sNvmeDiskDevice = { 999 { 1000 NVME_DISK_DEVICE_MODULE_NAME, 1001 0, 1002 NULL 1003 }, 1004 1005 nvme_disk_init_device, 1006 nvme_disk_uninit_device, 1007 NULL, // remove, 1008 1009 nvme_disk_open, 1010 nvme_disk_close, 1011 nvme_disk_free, 1012 nvme_disk_read, 1013 nvme_disk_write, 1014 nvme_disk_io, 1015 nvme_disk_ioctl, 1016 1017 NULL, // select 1018 NULL, // deselect 1019 }; 1020 1021 struct driver_module_info sNvmeDiskDriver = { 1022 { 1023 NVME_DISK_DRIVER_MODULE_NAME, 1024 0, 1025 NULL 1026 }, 1027 1028 nvme_disk_supports_device, 1029 nvme_disk_register_device, 1030 nvme_disk_init_driver, 1031 nvme_disk_uninit_driver, 1032 nvme_disk_register_child_devices, 1033 NULL, // rescan 1034 NULL, // removed 1035 }; 1036 1037 module_info* modules[] = { 1038 (module_info*)&sNvmeDiskDriver, 1039 (module_info*)&sNvmeDiskDevice, 1040 NULL 1041 }; 1042