1 /* 2 * Copyright (C) 2017 NXP Semiconductors 3 * Copyright (C) 2017 Bin Meng <bmeng.cn@gmail.com> 4 * 5 * SPDX-License-Identifier: GPL-2.0+ 6 */ 7 8 #include <common.h> 9 #include <dm.h> 10 #include <errno.h> 11 #include <memalign.h> 12 #include <pci.h> 13 #include <dm/device-internal.h> 14 #include "nvme.h" 15 16 #define NVME_Q_DEPTH 2 17 #define NVME_AQ_DEPTH 2 18 #define NVME_SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) 19 #define NVME_CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) 20 #define NVME_CQ_ALLOCATION ALIGN(NVME_CQ_SIZE(NVME_Q_DEPTH), \ 21 ARCH_DMA_MINALIGN) 22 #define ADMIN_TIMEOUT 60 23 #define IO_TIMEOUT 30 24 #define MAX_PRP_POOL 512 25 26 enum nvme_queue_id { 27 NVME_ADMIN_Q, 28 NVME_IO_Q, 29 NVME_Q_NUM, 30 }; 31 32 /* 33 * An NVM Express queue. Each device has at least two (one for admin 34 * commands and one for I/O commands). 35 */ 36 struct nvme_queue { 37 struct nvme_dev *dev; 38 struct nvme_command *sq_cmds; 39 struct nvme_completion *cqes; 40 wait_queue_head_t sq_full; 41 u32 __iomem *q_db; 42 u16 q_depth; 43 s16 cq_vector; 44 u16 sq_head; 45 u16 sq_tail; 46 u16 cq_head; 47 u16 qid; 48 u8 cq_phase; 49 u8 cqe_seen; 50 unsigned long cmdid_data[]; 51 }; 52 53 static int nvme_wait_ready(struct nvme_dev *dev, bool enabled) 54 { 55 u32 bit = enabled ? NVME_CSTS_RDY : 0; 56 int timeout; 57 ulong start; 58 59 /* Timeout field in the CAP register is in 500 millisecond units */ 60 timeout = NVME_CAP_TIMEOUT(dev->cap) * 500; 61 62 start = get_timer(0); 63 while (get_timer(start) < timeout) { 64 if ((readl(&dev->bar->csts) & NVME_CSTS_RDY) == bit) 65 return 0; 66 } 67 68 return -ETIME; 69 } 70 71 static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2, 72 int total_len, u64 dma_addr) 73 { 74 u32 page_size = dev->page_size; 75 int offset = dma_addr & (page_size - 1); 76 u64 *prp_pool; 77 int length = total_len; 78 int i, nprps; 79 u32 prps_per_page = page_size >> 3; 80 u32 num_pages; 81 82 length -= (page_size - offset); 83 84 if (length <= 0) { 85 *prp2 = 0; 86 return 0; 87 } 88 89 if (length) 90 dma_addr += (page_size - offset); 91 92 if (length <= page_size) { 93 *prp2 = dma_addr; 94 return 0; 95 } 96 97 nprps = DIV_ROUND_UP(length, page_size); 98 num_pages = DIV_ROUND_UP(nprps + 1, prps_per_page); 99 100 if (nprps > dev->prp_entry_num) { 101 free(dev->prp_pool); 102 /* 103 * Always increase in increments of pages. It doesn't waste 104 * much memory and reduces the number of allocations. 105 */ 106 dev->prp_pool = memalign(page_size, num_pages * page_size); 107 if (!dev->prp_pool) { 108 printf("Error: malloc prp_pool fail\n"); 109 return -ENOMEM; 110 } 111 dev->prp_entry_num = prps_per_page * num_pages; 112 } 113 114 prp_pool = dev->prp_pool; 115 i = 0; 116 while (nprps) { 117 if (i == prps_per_page) { 118 *(prp_pool + i) = *(prp_pool + i - 1); 119 *(prp_pool + i - 1) = cpu_to_le64((ulong)prp_pool + 120 page_size); 121 i = 1; 122 prp_pool += page_size; 123 } 124 *(prp_pool + i++) = cpu_to_le64(dma_addr); 125 dma_addr += page_size; 126 nprps--; 127 } 128 *prp2 = (ulong)dev->prp_pool; 129 130 flush_dcache_range((ulong)dev->prp_pool, (ulong)dev->prp_pool + 131 dev->prp_entry_num * sizeof(u64)); 132 133 return 0; 134 } 135 136 static __le16 nvme_get_cmd_id(void) 137 { 138 static unsigned short cmdid; 139 140 return cpu_to_le16((cmdid < USHRT_MAX) ? cmdid++ : 0); 141 } 142 143 static u16 nvme_read_completion_status(struct nvme_queue *nvmeq, u16 index) 144 { 145 /* 146 * Single CQ entries are always smaller than a cache line, so we 147 * can't invalidate them individually. However CQ entries are 148 * read only by the CPU, so it's safe to always invalidate all of them, 149 * as the cache line should never become dirty. 150 */ 151 ulong start = (ulong)&nvmeq->cqes[0]; 152 ulong stop = start + NVME_CQ_ALLOCATION; 153 154 invalidate_dcache_range(start, stop); 155 156 return readw(&(nvmeq->cqes[index].status)); 157 } 158 159 /** 160 * nvme_submit_cmd() - copy a command into a queue and ring the doorbell 161 * 162 * @nvmeq: The queue to use 163 * @cmd: The command to send 164 */ 165 static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) 166 { 167 u16 tail = nvmeq->sq_tail; 168 169 memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); 170 flush_dcache_range((ulong)&nvmeq->sq_cmds[tail], 171 (ulong)&nvmeq->sq_cmds[tail] + sizeof(*cmd)); 172 173 if (++tail == nvmeq->q_depth) 174 tail = 0; 175 writel(tail, nvmeq->q_db); 176 nvmeq->sq_tail = tail; 177 } 178 179 static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, 180 struct nvme_command *cmd, 181 u32 *result, unsigned timeout) 182 { 183 u16 head = nvmeq->cq_head; 184 u16 phase = nvmeq->cq_phase; 185 u16 status; 186 ulong start_time; 187 ulong timeout_us = timeout * 100000; 188 189 cmd->common.command_id = nvme_get_cmd_id(); 190 nvme_submit_cmd(nvmeq, cmd); 191 192 start_time = timer_get_us(); 193 194 for (;;) { 195 status = nvme_read_completion_status(nvmeq, head); 196 if ((status & 0x01) == phase) 197 break; 198 if (timeout_us > 0 && (timer_get_us() - start_time) 199 >= timeout_us) 200 return -ETIMEDOUT; 201 } 202 203 status >>= 1; 204 if (status) { 205 printf("ERROR: status = %x, phase = %d, head = %d\n", 206 status, phase, head); 207 status = 0; 208 if (++head == nvmeq->q_depth) { 209 head = 0; 210 phase = !phase; 211 } 212 writel(head, nvmeq->q_db + nvmeq->dev->db_stride); 213 nvmeq->cq_head = head; 214 nvmeq->cq_phase = phase; 215 216 return -EIO; 217 } 218 219 if (result) 220 *result = readl(&(nvmeq->cqes[head].result)); 221 222 if (++head == nvmeq->q_depth) { 223 head = 0; 224 phase = !phase; 225 } 226 writel(head, nvmeq->q_db + nvmeq->dev->db_stride); 227 nvmeq->cq_head = head; 228 nvmeq->cq_phase = phase; 229 230 return status; 231 } 232 233 static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, 234 u32 *result) 235 { 236 return nvme_submit_sync_cmd(dev->queues[NVME_ADMIN_Q], cmd, 237 result, ADMIN_TIMEOUT); 238 } 239 240 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, 241 int qid, int depth) 242 { 243 struct nvme_queue *nvmeq = malloc(sizeof(*nvmeq)); 244 if (!nvmeq) 245 return NULL; 246 memset(nvmeq, 0, sizeof(*nvmeq)); 247 248 nvmeq->cqes = (void *)memalign(4096, NVME_CQ_ALLOCATION); 249 if (!nvmeq->cqes) 250 goto free_nvmeq; 251 memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(depth)); 252 253 nvmeq->sq_cmds = (void *)memalign(4096, NVME_SQ_SIZE(depth)); 254 if (!nvmeq->sq_cmds) 255 goto free_queue; 256 memset((void *)nvmeq->sq_cmds, 0, NVME_SQ_SIZE(depth)); 257 258 nvmeq->dev = dev; 259 260 nvmeq->cq_head = 0; 261 nvmeq->cq_phase = 1; 262 nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; 263 nvmeq->q_depth = depth; 264 nvmeq->qid = qid; 265 dev->queue_count++; 266 dev->queues[qid] = nvmeq; 267 268 return nvmeq; 269 270 free_queue: 271 free((void *)nvmeq->cqes); 272 free_nvmeq: 273 free(nvmeq); 274 275 return NULL; 276 } 277 278 static int nvme_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) 279 { 280 struct nvme_command c; 281 282 memset(&c, 0, sizeof(c)); 283 c.delete_queue.opcode = opcode; 284 c.delete_queue.qid = cpu_to_le16(id); 285 286 return nvme_submit_admin_cmd(dev, &c, NULL); 287 } 288 289 static int nvme_delete_sq(struct nvme_dev *dev, u16 sqid) 290 { 291 return nvme_delete_queue(dev, nvme_admin_delete_sq, sqid); 292 } 293 294 static int nvme_delete_cq(struct nvme_dev *dev, u16 cqid) 295 { 296 return nvme_delete_queue(dev, nvme_admin_delete_cq, cqid); 297 } 298 299 static int nvme_enable_ctrl(struct nvme_dev *dev) 300 { 301 dev->ctrl_config &= ~NVME_CC_SHN_MASK; 302 dev->ctrl_config |= NVME_CC_ENABLE; 303 writel(dev->ctrl_config, &dev->bar->cc); 304 305 return nvme_wait_ready(dev, true); 306 } 307 308 static int nvme_disable_ctrl(struct nvme_dev *dev) 309 { 310 dev->ctrl_config &= ~NVME_CC_SHN_MASK; 311 dev->ctrl_config &= ~NVME_CC_ENABLE; 312 writel(dev->ctrl_config, &dev->bar->cc); 313 314 return nvme_wait_ready(dev, false); 315 } 316 317 static void nvme_free_queue(struct nvme_queue *nvmeq) 318 { 319 free((void *)nvmeq->cqes); 320 free(nvmeq->sq_cmds); 321 free(nvmeq); 322 } 323 324 static void nvme_free_queues(struct nvme_dev *dev, int lowest) 325 { 326 int i; 327 328 for (i = dev->queue_count - 1; i >= lowest; i--) { 329 struct nvme_queue *nvmeq = dev->queues[i]; 330 dev->queue_count--; 331 dev->queues[i] = NULL; 332 nvme_free_queue(nvmeq); 333 } 334 } 335 336 static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) 337 { 338 struct nvme_dev *dev = nvmeq->dev; 339 340 nvmeq->sq_tail = 0; 341 nvmeq->cq_head = 0; 342 nvmeq->cq_phase = 1; 343 nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; 344 memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(nvmeq->q_depth)); 345 flush_dcache_range((ulong)nvmeq->cqes, 346 (ulong)nvmeq->cqes + NVME_CQ_ALLOCATION); 347 dev->online_queues++; 348 } 349 350 static int nvme_configure_admin_queue(struct nvme_dev *dev) 351 { 352 int result; 353 u32 aqa; 354 u64 cap = dev->cap; 355 struct nvme_queue *nvmeq; 356 /* most architectures use 4KB as the page size */ 357 unsigned page_shift = 12; 358 unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12; 359 unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12; 360 361 if (page_shift < dev_page_min) { 362 debug("Device minimum page size (%u) too large for host (%u)\n", 363 1 << dev_page_min, 1 << page_shift); 364 return -ENODEV; 365 } 366 367 if (page_shift > dev_page_max) { 368 debug("Device maximum page size (%u) smaller than host (%u)\n", 369 1 << dev_page_max, 1 << page_shift); 370 page_shift = dev_page_max; 371 } 372 373 result = nvme_disable_ctrl(dev); 374 if (result < 0) 375 return result; 376 377 nvmeq = dev->queues[NVME_ADMIN_Q]; 378 if (!nvmeq) { 379 nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH); 380 if (!nvmeq) 381 return -ENOMEM; 382 } 383 384 aqa = nvmeq->q_depth - 1; 385 aqa |= aqa << 16; 386 387 dev->page_size = 1 << page_shift; 388 389 dev->ctrl_config = NVME_CC_CSS_NVM; 390 dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; 391 dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; 392 dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; 393 394 writel(aqa, &dev->bar->aqa); 395 nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq); 396 nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq); 397 398 result = nvme_enable_ctrl(dev); 399 if (result) 400 goto free_nvmeq; 401 402 nvmeq->cq_vector = 0; 403 404 nvme_init_queue(dev->queues[NVME_ADMIN_Q], 0); 405 406 return result; 407 408 free_nvmeq: 409 nvme_free_queues(dev, 0); 410 411 return result; 412 } 413 414 static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid, 415 struct nvme_queue *nvmeq) 416 { 417 struct nvme_command c; 418 int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; 419 420 memset(&c, 0, sizeof(c)); 421 c.create_cq.opcode = nvme_admin_create_cq; 422 c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes); 423 c.create_cq.cqid = cpu_to_le16(qid); 424 c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); 425 c.create_cq.cq_flags = cpu_to_le16(flags); 426 c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); 427 428 return nvme_submit_admin_cmd(dev, &c, NULL); 429 } 430 431 static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid, 432 struct nvme_queue *nvmeq) 433 { 434 struct nvme_command c; 435 int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; 436 437 memset(&c, 0, sizeof(c)); 438 c.create_sq.opcode = nvme_admin_create_sq; 439 c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds); 440 c.create_sq.sqid = cpu_to_le16(qid); 441 c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1); 442 c.create_sq.sq_flags = cpu_to_le16(flags); 443 c.create_sq.cqid = cpu_to_le16(qid); 444 445 return nvme_submit_admin_cmd(dev, &c, NULL); 446 } 447 448 int nvme_identify(struct nvme_dev *dev, unsigned nsid, 449 unsigned cns, dma_addr_t dma_addr) 450 { 451 struct nvme_command c; 452 u32 page_size = dev->page_size; 453 int offset = dma_addr & (page_size - 1); 454 int length = sizeof(struct nvme_id_ctrl); 455 int ret; 456 457 memset(&c, 0, sizeof(c)); 458 c.identify.opcode = nvme_admin_identify; 459 c.identify.nsid = cpu_to_le32(nsid); 460 c.identify.prp1 = cpu_to_le64(dma_addr); 461 462 length -= (page_size - offset); 463 if (length <= 0) { 464 c.identify.prp2 = 0; 465 } else { 466 dma_addr += (page_size - offset); 467 c.identify.prp2 = cpu_to_le64(dma_addr); 468 } 469 470 c.identify.cns = cpu_to_le32(cns); 471 472 invalidate_dcache_range(dma_addr, 473 dma_addr + sizeof(struct nvme_id_ctrl)); 474 475 ret = nvme_submit_admin_cmd(dev, &c, NULL); 476 if (!ret) 477 invalidate_dcache_range(dma_addr, 478 dma_addr + sizeof(struct nvme_id_ctrl)); 479 480 return ret; 481 } 482 483 int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, 484 dma_addr_t dma_addr, u32 *result) 485 { 486 struct nvme_command c; 487 int ret; 488 489 memset(&c, 0, sizeof(c)); 490 c.features.opcode = nvme_admin_get_features; 491 c.features.nsid = cpu_to_le32(nsid); 492 c.features.prp1 = cpu_to_le64(dma_addr); 493 c.features.fid = cpu_to_le32(fid); 494 495 ret = nvme_submit_admin_cmd(dev, &c, result); 496 497 /* 498 * TODO: Add some cache invalidation when a DMA buffer is involved 499 * in the request, here and before the command gets submitted. The 500 * buffer size varies by feature, also some features use a different 501 * field in the command packet to hold the buffer address. 502 * Section 5.21.1 (Set Features command) in the NVMe specification 503 * details the buffer requirements for each feature. 504 * 505 * At the moment there is no user of this function. 506 */ 507 508 return ret; 509 } 510 511 int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, 512 dma_addr_t dma_addr, u32 *result) 513 { 514 struct nvme_command c; 515 516 memset(&c, 0, sizeof(c)); 517 c.features.opcode = nvme_admin_set_features; 518 c.features.prp1 = cpu_to_le64(dma_addr); 519 c.features.fid = cpu_to_le32(fid); 520 c.features.dword11 = cpu_to_le32(dword11); 521 522 /* 523 * TODO: Add a cache clean (aka flush) operation when a DMA buffer is 524 * involved in the request. The buffer size varies by feature, also 525 * some features use a different field in the command packet to hold 526 * the buffer address. Section 5.21.1 (Set Features command) in the 527 * NVMe specification details the buffer requirements for each 528 * feature. 529 * At the moment the only user of this function is not using 530 * any DMA buffer at all. 531 */ 532 533 return nvme_submit_admin_cmd(dev, &c, result); 534 } 535 536 static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) 537 { 538 struct nvme_dev *dev = nvmeq->dev; 539 int result; 540 541 nvmeq->cq_vector = qid - 1; 542 result = nvme_alloc_cq(dev, qid, nvmeq); 543 if (result < 0) 544 goto release_cq; 545 546 result = nvme_alloc_sq(dev, qid, nvmeq); 547 if (result < 0) 548 goto release_sq; 549 550 nvme_init_queue(nvmeq, qid); 551 552 return result; 553 554 release_sq: 555 nvme_delete_sq(dev, qid); 556 release_cq: 557 nvme_delete_cq(dev, qid); 558 559 return result; 560 } 561 562 static int nvme_set_queue_count(struct nvme_dev *dev, int count) 563 { 564 int status; 565 u32 result; 566 u32 q_count = (count - 1) | ((count - 1) << 16); 567 568 status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, 569 q_count, 0, &result); 570 571 if (status < 0) 572 return status; 573 if (status > 1) 574 return 0; 575 576 return min(result & 0xffff, result >> 16) + 1; 577 } 578 579 static void nvme_create_io_queues(struct nvme_dev *dev) 580 { 581 unsigned int i; 582 583 for (i = dev->queue_count; i <= dev->max_qid; i++) 584 if (!nvme_alloc_queue(dev, i, dev->q_depth)) 585 break; 586 587 for (i = dev->online_queues; i <= dev->queue_count - 1; i++) 588 if (nvme_create_queue(dev->queues[i], i)) 589 break; 590 } 591 592 static int nvme_setup_io_queues(struct nvme_dev *dev) 593 { 594 int nr_io_queues; 595 int result; 596 597 nr_io_queues = 1; 598 result = nvme_set_queue_count(dev, nr_io_queues); 599 if (result <= 0) 600 return result; 601 602 dev->max_qid = nr_io_queues; 603 604 /* Free previously allocated queues */ 605 nvme_free_queues(dev, nr_io_queues + 1); 606 nvme_create_io_queues(dev); 607 608 return 0; 609 } 610 611 static int nvme_get_info_from_identify(struct nvme_dev *dev) 612 { 613 struct nvme_id_ctrl *ctrl; 614 int ret; 615 int shift = NVME_CAP_MPSMIN(dev->cap) + 12; 616 617 ctrl = memalign(dev->page_size, sizeof(struct nvme_id_ctrl)); 618 if (!ctrl) 619 return -ENOMEM; 620 621 ret = nvme_identify(dev, 0, 1, (dma_addr_t)(long)ctrl); 622 if (ret) { 623 free(ctrl); 624 return -EIO; 625 } 626 627 dev->nn = le32_to_cpu(ctrl->nn); 628 dev->vwc = ctrl->vwc; 629 memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); 630 memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); 631 memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); 632 if (ctrl->mdts) 633 dev->max_transfer_shift = (ctrl->mdts + shift); 634 else { 635 /* 636 * Maximum Data Transfer Size (MDTS) field indicates the maximum 637 * data transfer size between the host and the controller. The 638 * host should not submit a command that exceeds this transfer 639 * size. The value is in units of the minimum memory page size 640 * and is reported as a power of two (2^n). 641 * 642 * The spec also says: a value of 0h indicates no restrictions 643 * on transfer size. But in nvme_blk_read/write() below we have 644 * the following algorithm for maximum number of logic blocks 645 * per transfer: 646 * 647 * u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift); 648 * 649 * In order for lbas not to overflow, the maximum number is 15 650 * which means dev->max_transfer_shift = 15 + 9 (ns->lba_shift). 651 * Let's use 20 which provides 1MB size. 652 */ 653 dev->max_transfer_shift = 20; 654 } 655 656 free(ctrl); 657 return 0; 658 } 659 660 int nvme_get_namespace_id(struct udevice *udev, u32 *ns_id, u8 *eui64) 661 { 662 struct nvme_ns *ns = dev_get_priv(udev); 663 664 if (ns_id) 665 *ns_id = ns->ns_id; 666 if (eui64) 667 memcpy(eui64, ns->eui64, sizeof(ns->eui64)); 668 669 return 0; 670 } 671 672 int nvme_scan_namespace(void) 673 { 674 struct uclass *uc; 675 struct udevice *dev; 676 int ret; 677 678 ret = uclass_get(UCLASS_NVME, &uc); 679 if (ret) 680 return ret; 681 682 uclass_foreach_dev(dev, uc) { 683 ret = device_probe(dev); 684 if (ret) 685 return ret; 686 } 687 688 return 0; 689 } 690 691 static int nvme_blk_probe(struct udevice *udev) 692 { 693 struct nvme_dev *ndev = dev_get_priv(udev->parent); 694 struct blk_desc *desc = dev_get_uclass_platdata(udev); 695 struct nvme_ns *ns = dev_get_priv(udev); 696 u8 flbas; 697 struct pci_child_platdata *pplat; 698 struct nvme_id_ns *id; 699 700 id = memalign(ndev->page_size, sizeof(struct nvme_id_ns)); 701 if (!id) 702 return -ENOMEM; 703 704 ns->dev = ndev; 705 /* extract the namespace id from the block device name */ 706 ns->ns_id = trailing_strtol(udev->name); 707 if (nvme_identify(ndev, ns->ns_id, 0, (dma_addr_t)(long)id)) { 708 free(id); 709 return -EIO; 710 } 711 712 memcpy(&ns->eui64, &id->eui64, sizeof(id->eui64)); 713 flbas = id->flbas & NVME_NS_FLBAS_LBA_MASK; 714 ns->flbas = flbas; 715 ns->lba_shift = id->lbaf[flbas].ds; 716 list_add(&ns->list, &ndev->namespaces); 717 718 desc->lba = le64_to_cpu(id->nsze); 719 desc->log2blksz = ns->lba_shift; 720 desc->blksz = 1 << ns->lba_shift; 721 desc->bdev = udev; 722 pplat = dev_get_parent_platdata(udev->parent); 723 sprintf(desc->vendor, "0x%.4x", pplat->vendor); 724 memcpy(desc->product, ndev->serial, sizeof(ndev->serial)); 725 memcpy(desc->revision, ndev->firmware_rev, sizeof(ndev->firmware_rev)); 726 part_init(desc); 727 728 free(id); 729 return 0; 730 } 731 732 static ulong nvme_blk_rw(struct udevice *udev, lbaint_t blknr, 733 lbaint_t blkcnt, void *buffer, bool read) 734 { 735 struct nvme_ns *ns = dev_get_priv(udev); 736 struct nvme_dev *dev = ns->dev; 737 struct nvme_command c; 738 struct blk_desc *desc = dev_get_uclass_platdata(udev); 739 int status; 740 u64 prp2; 741 u64 total_len = blkcnt << desc->log2blksz; 742 u64 temp_len = total_len; 743 744 u64 slba = blknr; 745 u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift); 746 u64 total_lbas = blkcnt; 747 748 flush_dcache_range((unsigned long)buffer, 749 (unsigned long)buffer + total_len); 750 751 c.rw.opcode = read ? nvme_cmd_read : nvme_cmd_write; 752 c.rw.flags = 0; 753 c.rw.nsid = cpu_to_le32(ns->ns_id); 754 c.rw.control = 0; 755 c.rw.dsmgmt = 0; 756 c.rw.reftag = 0; 757 c.rw.apptag = 0; 758 c.rw.appmask = 0; 759 c.rw.metadata = 0; 760 761 /* Enable FUA for data integrity if vwc is enabled */ 762 if (dev->vwc) 763 c.rw.control |= NVME_RW_FUA; 764 765 while (total_lbas) { 766 if (total_lbas < lbas) { 767 lbas = (u16)total_lbas; 768 total_lbas = 0; 769 } else { 770 total_lbas -= lbas; 771 } 772 773 if (nvme_setup_prps(dev, &prp2, 774 lbas << ns->lba_shift, (ulong)buffer)) 775 return -EIO; 776 c.rw.slba = cpu_to_le64(slba); 777 slba += lbas; 778 c.rw.length = cpu_to_le16(lbas - 1); 779 c.rw.prp1 = cpu_to_le64((ulong)buffer); 780 c.rw.prp2 = cpu_to_le64(prp2); 781 status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q], 782 &c, NULL, IO_TIMEOUT); 783 if (status) 784 break; 785 temp_len -= (u32)lbas << ns->lba_shift; 786 buffer += lbas << ns->lba_shift; 787 } 788 789 if (read) 790 invalidate_dcache_range((unsigned long)buffer, 791 (unsigned long)buffer + total_len); 792 793 return (total_len - temp_len) >> desc->log2blksz; 794 } 795 796 static ulong nvme_blk_read(struct udevice *udev, lbaint_t blknr, 797 lbaint_t blkcnt, void *buffer) 798 { 799 return nvme_blk_rw(udev, blknr, blkcnt, buffer, true); 800 } 801 802 static ulong nvme_blk_write(struct udevice *udev, lbaint_t blknr, 803 lbaint_t blkcnt, const void *buffer) 804 { 805 return nvme_blk_rw(udev, blknr, blkcnt, (void *)buffer, false); 806 } 807 808 static const struct blk_ops nvme_blk_ops = { 809 .read = nvme_blk_read, 810 .write = nvme_blk_write, 811 }; 812 813 U_BOOT_DRIVER(nvme_blk) = { 814 .name = "nvme-blk", 815 .id = UCLASS_BLK, 816 .probe = nvme_blk_probe, 817 .ops = &nvme_blk_ops, 818 .priv_auto_alloc_size = sizeof(struct nvme_ns), 819 }; 820 821 static int nvme_bind(struct udevice *udev) 822 { 823 static int ndev_num; 824 char name[20]; 825 826 sprintf(name, "nvme#%d", ndev_num++); 827 828 return device_set_name(udev, name); 829 } 830 831 static int nvme_probe(struct udevice *udev) 832 { 833 int ret; 834 struct nvme_dev *ndev = dev_get_priv(udev); 835 struct nvme_id_ns *id; 836 837 ndev->instance = trailing_strtol(udev->name); 838 839 INIT_LIST_HEAD(&ndev->namespaces); 840 ndev->bar = dm_pci_map_bar(udev, PCI_BASE_ADDRESS_0, 841 PCI_REGION_MEM); 842 if (readl(&ndev->bar->csts) == -1) { 843 ret = -ENODEV; 844 printf("Error: %s: Out of memory!\n", udev->name); 845 goto free_nvme; 846 } 847 848 ndev->queues = malloc(NVME_Q_NUM * sizeof(struct nvme_queue *)); 849 if (!ndev->queues) { 850 ret = -ENOMEM; 851 printf("Error: %s: Out of memory!\n", udev->name); 852 goto free_nvme; 853 } 854 memset(ndev->queues, 0, NVME_Q_NUM * sizeof(struct nvme_queue *)); 855 856 ndev->cap = nvme_readq(&ndev->bar->cap); 857 ndev->q_depth = min_t(int, NVME_CAP_MQES(ndev->cap) + 1, NVME_Q_DEPTH); 858 ndev->db_stride = 1 << NVME_CAP_STRIDE(ndev->cap); 859 ndev->dbs = ((void __iomem *)ndev->bar) + 4096; 860 861 ret = nvme_configure_admin_queue(ndev); 862 if (ret) 863 goto free_queue; 864 865 /* Allocate after the page size is known */ 866 ndev->prp_pool = memalign(ndev->page_size, MAX_PRP_POOL); 867 if (!ndev->prp_pool) { 868 ret = -ENOMEM; 869 printf("Error: %s: Out of memory!\n", udev->name); 870 goto free_nvme; 871 } 872 ndev->prp_entry_num = MAX_PRP_POOL >> 3; 873 874 ret = nvme_setup_io_queues(ndev); 875 if (ret) 876 goto free_queue; 877 878 nvme_get_info_from_identify(ndev); 879 880 /* Create a blk device for each namespace */ 881 882 id = memalign(ndev->page_size, sizeof(struct nvme_id_ns)); 883 if (!id) { 884 ret = -ENOMEM; 885 goto free_queue; 886 } 887 888 for (int i = 1; i <= ndev->nn; i++) { 889 struct udevice *ns_udev; 890 char name[20]; 891 892 memset(id, 0, sizeof(*id)); 893 if (nvme_identify(ndev, i, 0, (dma_addr_t)(long)id)) { 894 ret = -EIO; 895 goto free_id; 896 } 897 898 /* skip inactive namespace */ 899 if (!id->nsze) 900 continue; 901 902 /* 903 * Encode the namespace id to the device name so that 904 * we can extract it when doing the probe. 905 */ 906 sprintf(name, "blk#%d", i); 907 908 /* The real blksz and size will be set by nvme_blk_probe() */ 909 ret = blk_create_devicef(udev, "nvme-blk", name, IF_TYPE_NVME, 910 -1, 512, 0, &ns_udev); 911 if (ret) 912 goto free_id; 913 } 914 915 free(id); 916 return 0; 917 918 free_id: 919 free(id); 920 free_queue: 921 free((void *)ndev->queues); 922 free_nvme: 923 return ret; 924 } 925 926 U_BOOT_DRIVER(nvme) = { 927 .name = "nvme", 928 .id = UCLASS_NVME, 929 .bind = nvme_bind, 930 .probe = nvme_probe, 931 .priv_auto_alloc_size = sizeof(struct nvme_dev), 932 }; 933 934 struct pci_device_id nvme_supported[] = { 935 { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, ~0) }, 936 {} 937 }; 938 939 U_BOOT_PCI_DEVICE(nvme, nvme_supported); 940