1 /* 2 * Copyright (C) 2017 NXP Semiconductors 3 * Copyright (C) 2017 Bin Meng <bmeng.cn@gmail.com> 4 * 5 * SPDX-License-Identifier: GPL-2.0+ 6 */ 7 8 #include <common.h> 9 #include <bouncebuf.h> 10 #include <dm.h> 11 #include <errno.h> 12 #include <memalign.h> 13 #include <pci.h> 14 #include <dm/device-internal.h> 15 #include "nvme.h" 16 17 #define NVME_Q_DEPTH 2 18 #define NVME_AQ_DEPTH 2 19 #define NVME_SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) 20 #define NVME_CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) 21 #define NVME_CQ_ALLOCATION ALIGN(NVME_CQ_SIZE(NVME_Q_DEPTH), \ 22 ARCH_DMA_MINALIGN) 23 #define ADMIN_TIMEOUT 60 24 #define IO_TIMEOUT 30 25 #define MAX_PRP_POOL 512 26 27 enum nvme_queue_id { 28 NVME_ADMIN_Q, 29 NVME_IO_Q, 30 NVME_Q_NUM, 31 }; 32 33 /* 34 * An NVM Express queue. Each device has at least two (one for admin 35 * commands and one for I/O commands). 36 */ 37 struct nvme_queue { 38 struct nvme_dev *dev; 39 struct nvme_command *sq_cmds; 40 struct nvme_completion *cqes; 41 wait_queue_head_t sq_full; 42 u32 __iomem *q_db; 43 u16 q_depth; 44 s16 cq_vector; 45 u16 sq_head; 46 u16 sq_tail; 47 u16 cq_head; 48 u16 qid; 49 u8 cq_phase; 50 u8 cqe_seen; 51 unsigned long cmdid_data[]; 52 }; 53 54 static int nvme_wait_ready(struct nvme_dev *dev, bool enabled) 55 { 56 u32 bit = enabled ? NVME_CSTS_RDY : 0; 57 int timeout; 58 ulong start; 59 60 /* Timeout field in the CAP register is in 500 millisecond units */ 61 timeout = NVME_CAP_TIMEOUT(dev->cap) * 500; 62 63 start = get_timer(0); 64 while (get_timer(start) < timeout) { 65 if ((readl(&dev->bar->csts) & NVME_CSTS_RDY) == bit) 66 return 0; 67 } 68 69 return -ETIME; 70 } 71 72 static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2, 73 int total_len, u64 dma_addr) 74 { 75 u32 page_size = dev->page_size; 76 int offset = dma_addr & (page_size - 1); 77 u64 *prp_pool; 78 int length = total_len; 79 int i, nprps; 80 u32 prps_per_page = page_size >> 3; 81 u32 num_pages; 82 83 length -= (page_size - offset); 84 85 if (length <= 0) { 86 *prp2 = 0; 87 return 0; 88 } 89 90 if (length) 91 dma_addr += (page_size - offset); 92 93 if (length <= page_size) { 94 *prp2 = dma_addr; 95 return 0; 96 } 97 98 nprps = DIV_ROUND_UP(length, page_size); 99 num_pages = DIV_ROUND_UP(nprps + 1, prps_per_page); 100 101 if (nprps > dev->prp_entry_num) { 102 free(dev->prp_pool); 103 /* 104 * Always increase in increments of pages. It doesn't waste 105 * much memory and reduces the number of allocations. 106 */ 107 dev->prp_pool = memalign(page_size, num_pages * page_size); 108 if (!dev->prp_pool) { 109 printf("Error: malloc prp_pool fail\n"); 110 return -ENOMEM; 111 } 112 dev->prp_entry_num = prps_per_page * num_pages; 113 } 114 115 prp_pool = dev->prp_pool; 116 i = 0; 117 while (nprps) { 118 if (i == prps_per_page) { 119 *(prp_pool + i) = *(prp_pool + i - 1); 120 *(prp_pool + i - 1) = cpu_to_le64((ulong)prp_pool + 121 page_size); 122 i = 1; 123 prp_pool += page_size; 124 } 125 *(prp_pool + i++) = cpu_to_le64(dma_addr); 126 dma_addr += page_size; 127 nprps--; 128 } 129 *prp2 = (ulong)dev->prp_pool; 130 131 flush_dcache_range((ulong)dev->prp_pool, (ulong)dev->prp_pool + 132 dev->prp_entry_num * sizeof(u64)); 133 134 return 0; 135 } 136 137 static __le16 nvme_get_cmd_id(void) 138 { 139 static unsigned short cmdid; 140 141 return cpu_to_le16((cmdid < USHRT_MAX) ? cmdid++ : 0); 142 } 143 144 static u16 nvme_read_completion_status(struct nvme_queue *nvmeq, u16 index) 145 { 146 /* 147 * Single CQ entries are always smaller than a cache line, so we 148 * can't invalidate them individually. However CQ entries are 149 * read only by the CPU, so it's safe to always invalidate all of them, 150 * as the cache line should never become dirty. 151 */ 152 ulong start = (ulong)&nvmeq->cqes[0]; 153 ulong stop = start + NVME_CQ_ALLOCATION; 154 155 invalidate_dcache_range(start, stop); 156 157 return readw(&(nvmeq->cqes[index].status)); 158 } 159 160 /** 161 * nvme_submit_cmd() - copy a command into a queue and ring the doorbell 162 * 163 * @nvmeq: The queue to use 164 * @cmd: The command to send 165 */ 166 static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) 167 { 168 u16 tail = nvmeq->sq_tail; 169 170 memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); 171 flush_dcache_range((ulong)&nvmeq->sq_cmds[tail], 172 (ulong)&nvmeq->sq_cmds[tail] + sizeof(*cmd)); 173 174 if (++tail == nvmeq->q_depth) 175 tail = 0; 176 writel(tail, nvmeq->q_db); 177 nvmeq->sq_tail = tail; 178 } 179 180 static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, 181 struct nvme_command *cmd, 182 u32 *result, unsigned timeout) 183 { 184 u16 head = nvmeq->cq_head; 185 u16 phase = nvmeq->cq_phase; 186 u16 status; 187 ulong start_time; 188 ulong timeout_us = timeout * 100000; 189 190 cmd->common.command_id = nvme_get_cmd_id(); 191 nvme_submit_cmd(nvmeq, cmd); 192 193 start_time = timer_get_us(); 194 195 for (;;) { 196 status = nvme_read_completion_status(nvmeq, head); 197 if ((status & 0x01) == phase) 198 break; 199 if (timeout_us > 0 && (timer_get_us() - start_time) 200 >= timeout_us) 201 return -ETIMEDOUT; 202 } 203 204 status >>= 1; 205 if (status) { 206 printf("ERROR: status = %x, phase = %d, head = %d\n", 207 status, phase, head); 208 status = 0; 209 if (++head == nvmeq->q_depth) { 210 head = 0; 211 phase = !phase; 212 } 213 writel(head, nvmeq->q_db + nvmeq->dev->db_stride); 214 nvmeq->cq_head = head; 215 nvmeq->cq_phase = phase; 216 217 return -EIO; 218 } 219 220 if (result) 221 *result = readl(&(nvmeq->cqes[head].result)); 222 223 if (++head == nvmeq->q_depth) { 224 head = 0; 225 phase = !phase; 226 } 227 writel(head, nvmeq->q_db + nvmeq->dev->db_stride); 228 nvmeq->cq_head = head; 229 nvmeq->cq_phase = phase; 230 231 return status; 232 } 233 234 static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, 235 u32 *result) 236 { 237 return nvme_submit_sync_cmd(dev->queues[NVME_ADMIN_Q], cmd, 238 result, ADMIN_TIMEOUT); 239 } 240 241 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, 242 int qid, int depth) 243 { 244 struct nvme_queue *nvmeq = malloc(sizeof(*nvmeq)); 245 if (!nvmeq) 246 return NULL; 247 memset(nvmeq, 0, sizeof(*nvmeq)); 248 249 nvmeq->cqes = (void *)memalign(4096, NVME_CQ_ALLOCATION); 250 if (!nvmeq->cqes) 251 goto free_nvmeq; 252 memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(depth)); 253 254 nvmeq->sq_cmds = (void *)memalign(4096, NVME_SQ_SIZE(depth)); 255 if (!nvmeq->sq_cmds) 256 goto free_queue; 257 memset((void *)nvmeq->sq_cmds, 0, NVME_SQ_SIZE(depth)); 258 259 nvmeq->dev = dev; 260 261 nvmeq->cq_head = 0; 262 nvmeq->cq_phase = 1; 263 nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; 264 nvmeq->q_depth = depth; 265 nvmeq->qid = qid; 266 dev->queue_count++; 267 dev->queues[qid] = nvmeq; 268 269 return nvmeq; 270 271 free_queue: 272 free((void *)nvmeq->cqes); 273 free_nvmeq: 274 free(nvmeq); 275 276 return NULL; 277 } 278 279 static int nvme_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) 280 { 281 struct nvme_command c; 282 283 memset(&c, 0, sizeof(c)); 284 c.delete_queue.opcode = opcode; 285 c.delete_queue.qid = cpu_to_le16(id); 286 287 return nvme_submit_admin_cmd(dev, &c, NULL); 288 } 289 290 static int nvme_delete_sq(struct nvme_dev *dev, u16 sqid) 291 { 292 return nvme_delete_queue(dev, nvme_admin_delete_sq, sqid); 293 } 294 295 static int nvme_delete_cq(struct nvme_dev *dev, u16 cqid) 296 { 297 return nvme_delete_queue(dev, nvme_admin_delete_cq, cqid); 298 } 299 300 static int nvme_enable_ctrl(struct nvme_dev *dev) 301 { 302 dev->ctrl_config &= ~NVME_CC_SHN_MASK; 303 dev->ctrl_config |= NVME_CC_ENABLE; 304 writel(dev->ctrl_config, &dev->bar->cc); 305 306 return nvme_wait_ready(dev, true); 307 } 308 309 static int nvme_disable_ctrl(struct nvme_dev *dev) 310 { 311 dev->ctrl_config &= ~NVME_CC_SHN_MASK; 312 dev->ctrl_config &= ~NVME_CC_ENABLE; 313 writel(dev->ctrl_config, &dev->bar->cc); 314 315 return nvme_wait_ready(dev, false); 316 } 317 318 static void nvme_free_queue(struct nvme_queue *nvmeq) 319 { 320 free((void *)nvmeq->cqes); 321 free(nvmeq->sq_cmds); 322 free(nvmeq); 323 } 324 325 static void nvme_free_queues(struct nvme_dev *dev, int lowest) 326 { 327 int i; 328 329 for (i = dev->queue_count - 1; i >= lowest; i--) { 330 struct nvme_queue *nvmeq = dev->queues[i]; 331 dev->queue_count--; 332 dev->queues[i] = NULL; 333 nvme_free_queue(nvmeq); 334 } 335 } 336 337 static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) 338 { 339 struct nvme_dev *dev = nvmeq->dev; 340 341 nvmeq->sq_tail = 0; 342 nvmeq->cq_head = 0; 343 nvmeq->cq_phase = 1; 344 nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; 345 memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(nvmeq->q_depth)); 346 flush_dcache_range((ulong)nvmeq->cqes, 347 (ulong)nvmeq->cqes + NVME_CQ_ALLOCATION); 348 dev->online_queues++; 349 } 350 351 static int nvme_configure_admin_queue(struct nvme_dev *dev) 352 { 353 int result; 354 u32 aqa; 355 u64 cap = dev->cap; 356 struct nvme_queue *nvmeq; 357 /* most architectures use 4KB as the page size */ 358 unsigned page_shift = 12; 359 unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12; 360 unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12; 361 362 if (page_shift < dev_page_min) { 363 debug("Device minimum page size (%u) too large for host (%u)\n", 364 1 << dev_page_min, 1 << page_shift); 365 return -ENODEV; 366 } 367 368 if (page_shift > dev_page_max) { 369 debug("Device maximum page size (%u) smaller than host (%u)\n", 370 1 << dev_page_max, 1 << page_shift); 371 page_shift = dev_page_max; 372 } 373 374 result = nvme_disable_ctrl(dev); 375 if (result < 0) 376 return result; 377 378 nvmeq = dev->queues[NVME_ADMIN_Q]; 379 if (!nvmeq) { 380 nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH); 381 if (!nvmeq) 382 return -ENOMEM; 383 } 384 385 aqa = nvmeq->q_depth - 1; 386 aqa |= aqa << 16; 387 388 dev->page_size = 1 << page_shift; 389 390 dev->ctrl_config = NVME_CC_CSS_NVM; 391 dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; 392 dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; 393 dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; 394 395 writel(aqa, &dev->bar->aqa); 396 nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq); 397 nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq); 398 399 result = nvme_enable_ctrl(dev); 400 if (result) 401 goto free_nvmeq; 402 403 nvmeq->cq_vector = 0; 404 405 nvme_init_queue(dev->queues[NVME_ADMIN_Q], 0); 406 407 return result; 408 409 free_nvmeq: 410 nvme_free_queues(dev, 0); 411 412 return result; 413 } 414 415 static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid, 416 struct nvme_queue *nvmeq) 417 { 418 struct nvme_command c; 419 int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; 420 421 memset(&c, 0, sizeof(c)); 422 c.create_cq.opcode = nvme_admin_create_cq; 423 c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes); 424 c.create_cq.cqid = cpu_to_le16(qid); 425 c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); 426 c.create_cq.cq_flags = cpu_to_le16(flags); 427 c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); 428 429 return nvme_submit_admin_cmd(dev, &c, NULL); 430 } 431 432 static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid, 433 struct nvme_queue *nvmeq) 434 { 435 struct nvme_command c; 436 int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; 437 438 memset(&c, 0, sizeof(c)); 439 c.create_sq.opcode = nvme_admin_create_sq; 440 c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds); 441 c.create_sq.sqid = cpu_to_le16(qid); 442 c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1); 443 c.create_sq.sq_flags = cpu_to_le16(flags); 444 c.create_sq.cqid = cpu_to_le16(qid); 445 446 return nvme_submit_admin_cmd(dev, &c, NULL); 447 } 448 449 int nvme_identify(struct nvme_dev *dev, unsigned nsid, 450 unsigned cns, dma_addr_t dma_addr) 451 { 452 struct nvme_command c; 453 u32 page_size = dev->page_size; 454 int offset = dma_addr & (page_size - 1); 455 int length = sizeof(struct nvme_id_ctrl); 456 int ret; 457 458 memset(&c, 0, sizeof(c)); 459 c.identify.opcode = nvme_admin_identify; 460 c.identify.nsid = cpu_to_le32(nsid); 461 c.identify.prp1 = cpu_to_le64(dma_addr); 462 463 length -= (page_size - offset); 464 if (length <= 0) { 465 c.identify.prp2 = 0; 466 } else { 467 dma_addr += (page_size - offset); 468 c.identify.prp2 = cpu_to_le64(dma_addr); 469 } 470 471 c.identify.cns = cpu_to_le32(cns); 472 473 invalidate_dcache_range(dma_addr, 474 dma_addr + sizeof(struct nvme_id_ctrl)); 475 476 ret = nvme_submit_admin_cmd(dev, &c, NULL); 477 if (!ret) 478 invalidate_dcache_range(dma_addr, 479 dma_addr + sizeof(struct nvme_id_ctrl)); 480 481 return ret; 482 } 483 484 int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, 485 dma_addr_t dma_addr, u32 *result) 486 { 487 struct nvme_command c; 488 int ret; 489 490 memset(&c, 0, sizeof(c)); 491 c.features.opcode = nvme_admin_get_features; 492 c.features.nsid = cpu_to_le32(nsid); 493 c.features.prp1 = cpu_to_le64(dma_addr); 494 c.features.fid = cpu_to_le32(fid); 495 496 ret = nvme_submit_admin_cmd(dev, &c, result); 497 498 /* 499 * TODO: Add some cache invalidation when a DMA buffer is involved 500 * in the request, here and before the command gets submitted. The 501 * buffer size varies by feature, also some features use a different 502 * field in the command packet to hold the buffer address. 503 * Section 5.21.1 (Set Features command) in the NVMe specification 504 * details the buffer requirements for each feature. 505 * 506 * At the moment there is no user of this function. 507 */ 508 509 return ret; 510 } 511 512 int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, 513 dma_addr_t dma_addr, u32 *result) 514 { 515 struct nvme_command c; 516 517 memset(&c, 0, sizeof(c)); 518 c.features.opcode = nvme_admin_set_features; 519 c.features.prp1 = cpu_to_le64(dma_addr); 520 c.features.fid = cpu_to_le32(fid); 521 c.features.dword11 = cpu_to_le32(dword11); 522 523 /* 524 * TODO: Add a cache clean (aka flush) operation when a DMA buffer is 525 * involved in the request. The buffer size varies by feature, also 526 * some features use a different field in the command packet to hold 527 * the buffer address. Section 5.21.1 (Set Features command) in the 528 * NVMe specification details the buffer requirements for each 529 * feature. 530 * At the moment the only user of this function is not using 531 * any DMA buffer at all. 532 */ 533 534 return nvme_submit_admin_cmd(dev, &c, result); 535 } 536 537 static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) 538 { 539 struct nvme_dev *dev = nvmeq->dev; 540 int result; 541 542 nvmeq->cq_vector = qid - 1; 543 result = nvme_alloc_cq(dev, qid, nvmeq); 544 if (result < 0) 545 goto release_cq; 546 547 result = nvme_alloc_sq(dev, qid, nvmeq); 548 if (result < 0) 549 goto release_sq; 550 551 nvme_init_queue(nvmeq, qid); 552 553 return result; 554 555 release_sq: 556 nvme_delete_sq(dev, qid); 557 release_cq: 558 nvme_delete_cq(dev, qid); 559 560 return result; 561 } 562 563 static int nvme_set_queue_count(struct nvme_dev *dev, int count) 564 { 565 int status; 566 u32 result; 567 u32 q_count = (count - 1) | ((count - 1) << 16); 568 569 status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, 570 q_count, 0, &result); 571 572 if (status < 0) 573 return status; 574 if (status > 1) 575 return 0; 576 577 return min(result & 0xffff, result >> 16) + 1; 578 } 579 580 static void nvme_create_io_queues(struct nvme_dev *dev) 581 { 582 unsigned int i; 583 584 for (i = dev->queue_count; i <= dev->max_qid; i++) 585 if (!nvme_alloc_queue(dev, i, dev->q_depth)) 586 break; 587 588 for (i = dev->online_queues; i <= dev->queue_count - 1; i++) 589 if (nvme_create_queue(dev->queues[i], i)) 590 break; 591 } 592 593 static int nvme_setup_io_queues(struct nvme_dev *dev) 594 { 595 int nr_io_queues; 596 int result; 597 598 nr_io_queues = 1; 599 result = nvme_set_queue_count(dev, nr_io_queues); 600 if (result <= 0) 601 return result; 602 603 dev->max_qid = nr_io_queues; 604 605 /* Free previously allocated queues */ 606 nvme_free_queues(dev, nr_io_queues + 1); 607 nvme_create_io_queues(dev); 608 609 return 0; 610 } 611 612 static int nvme_get_info_from_identify(struct nvme_dev *dev) 613 { 614 struct nvme_id_ctrl *ctrl; 615 int ret; 616 int shift = NVME_CAP_MPSMIN(dev->cap) + 12; 617 618 ctrl = memalign(dev->page_size, sizeof(struct nvme_id_ctrl)); 619 if (!ctrl) 620 return -ENOMEM; 621 622 ret = nvme_identify(dev, 0, 1, (dma_addr_t)(long)ctrl); 623 if (ret) { 624 free(ctrl); 625 return -EIO; 626 } 627 628 dev->nn = le32_to_cpu(ctrl->nn); 629 dev->vwc = ctrl->vwc; 630 memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); 631 memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); 632 memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); 633 if (ctrl->mdts) 634 dev->max_transfer_shift = (ctrl->mdts + shift); 635 else { 636 /* 637 * Maximum Data Transfer Size (MDTS) field indicates the maximum 638 * data transfer size between the host and the controller. The 639 * host should not submit a command that exceeds this transfer 640 * size. The value is in units of the minimum memory page size 641 * and is reported as a power of two (2^n). 642 * 643 * The spec also says: a value of 0h indicates no restrictions 644 * on transfer size. But in nvme_blk_read/write() below we have 645 * the following algorithm for maximum number of logic blocks 646 * per transfer: 647 * 648 * u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift); 649 * 650 * In order for lbas not to overflow, the maximum number is 15 651 * which means dev->max_transfer_shift = 15 + 9 (ns->lba_shift). 652 * Let's use 20 which provides 1MB size. 653 */ 654 dev->max_transfer_shift = 20; 655 } 656 657 free(ctrl); 658 return 0; 659 } 660 661 int nvme_get_namespace_id(struct udevice *udev, u32 *ns_id, u8 *eui64) 662 { 663 struct nvme_ns *ns = dev_get_priv(udev); 664 665 if (ns_id) 666 *ns_id = ns->ns_id; 667 if (eui64) 668 memcpy(eui64, ns->eui64, sizeof(ns->eui64)); 669 670 return 0; 671 } 672 673 int nvme_scan_namespace(void) 674 { 675 struct uclass *uc; 676 struct udevice *dev; 677 int ret; 678 679 ret = uclass_get(UCLASS_NVME, &uc); 680 if (ret) 681 return ret; 682 683 uclass_foreach_dev(dev, uc) { 684 ret = device_probe(dev); 685 if (ret) 686 return ret; 687 } 688 689 return 0; 690 } 691 692 static int nvme_blk_probe(struct udevice *udev) 693 { 694 struct nvme_dev *ndev = dev_get_priv(udev->parent); 695 struct blk_desc *desc = dev_get_uclass_platdata(udev); 696 struct nvme_ns *ns = dev_get_priv(udev); 697 u8 flbas; 698 struct pci_child_platdata *pplat; 699 struct nvme_id_ns *id; 700 701 id = memalign(ndev->page_size, sizeof(struct nvme_id_ns)); 702 if (!id) 703 return -ENOMEM; 704 705 ns->dev = ndev; 706 /* extract the namespace id from the block device name */ 707 ns->ns_id = trailing_strtol(udev->name); 708 if (nvme_identify(ndev, ns->ns_id, 0, (dma_addr_t)(long)id)) { 709 free(id); 710 return -EIO; 711 } 712 713 memcpy(&ns->eui64, &id->eui64, sizeof(id->eui64)); 714 flbas = id->flbas & NVME_NS_FLBAS_LBA_MASK; 715 ns->flbas = flbas; 716 ns->lba_shift = id->lbaf[flbas].ds; 717 list_add(&ns->list, &ndev->namespaces); 718 719 desc->lba = le64_to_cpu(id->nsze); 720 desc->log2blksz = ns->lba_shift; 721 desc->blksz = 1 << ns->lba_shift; 722 desc->bdev = udev; 723 pplat = dev_get_parent_platdata(udev->parent); 724 sprintf(desc->vendor, "0x%.4x", pplat->vendor); 725 memcpy(desc->product, ndev->serial, sizeof(ndev->serial)); 726 memcpy(desc->revision, ndev->firmware_rev, sizeof(ndev->firmware_rev)); 727 part_init(desc); 728 729 free(id); 730 return 0; 731 } 732 733 static ulong nvme_blk_rw(struct udevice *udev, lbaint_t blknr, 734 lbaint_t blkcnt, void *buffer, bool read) 735 { 736 struct nvme_ns *ns = dev_get_priv(udev); 737 struct nvme_dev *dev = ns->dev; 738 struct nvme_command c; 739 struct blk_desc *desc = dev_get_uclass_platdata(udev); 740 int status; 741 u64 prp2; 742 u64 total_len = blkcnt << desc->log2blksz; 743 u64 temp_len = total_len; 744 uintptr_t temp_buffer; 745 746 u64 slba = blknr; 747 u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift); 748 u64 total_lbas = blkcnt; 749 750 struct bounce_buffer bb; 751 unsigned int bb_flags; 752 int ret; 753 754 if (read) 755 bb_flags = GEN_BB_WRITE; 756 else 757 bb_flags = GEN_BB_READ; 758 759 ret = bounce_buffer_start(&bb, buffer, total_len, bb_flags); 760 if (ret) 761 return -ENOMEM; 762 temp_buffer = (unsigned long)bb.bounce_buffer; 763 764 c.rw.opcode = read ? nvme_cmd_read : nvme_cmd_write; 765 c.rw.flags = 0; 766 c.rw.nsid = cpu_to_le32(ns->ns_id); 767 c.rw.control = 0; 768 c.rw.dsmgmt = 0; 769 c.rw.reftag = 0; 770 c.rw.apptag = 0; 771 c.rw.appmask = 0; 772 c.rw.metadata = 0; 773 774 /* Enable FUA for data integrity if vwc is enabled */ 775 if (dev->vwc) 776 c.rw.control |= NVME_RW_FUA; 777 778 while (total_lbas) { 779 if (total_lbas < lbas) { 780 lbas = (u16)total_lbas; 781 total_lbas = 0; 782 } else { 783 total_lbas -= lbas; 784 } 785 786 if (nvme_setup_prps(dev, &prp2, 787 lbas << ns->lba_shift, temp_buffer)) 788 return -EIO; 789 c.rw.slba = cpu_to_le64(slba); 790 slba += lbas; 791 c.rw.length = cpu_to_le16(lbas - 1); 792 c.rw.prp1 = cpu_to_le64(temp_buffer); 793 c.rw.prp2 = cpu_to_le64(prp2); 794 status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q], 795 &c, NULL, IO_TIMEOUT); 796 if (status) 797 break; 798 temp_len -= (u32)lbas << ns->lba_shift; 799 temp_buffer += lbas << ns->lba_shift; 800 } 801 802 bounce_buffer_stop(&bb); 803 804 return (total_len - temp_len) >> desc->log2blksz; 805 } 806 807 static ulong nvme_blk_read(struct udevice *udev, lbaint_t blknr, 808 lbaint_t blkcnt, void *buffer) 809 { 810 return nvme_blk_rw(udev, blknr, blkcnt, buffer, true); 811 } 812 813 static ulong nvme_blk_write(struct udevice *udev, lbaint_t blknr, 814 lbaint_t blkcnt, const void *buffer) 815 { 816 return nvme_blk_rw(udev, blknr, blkcnt, (void *)buffer, false); 817 } 818 819 static ulong nvme_blk_erase(struct udevice *udev, lbaint_t blknr, 820 lbaint_t blkcnt) 821 { 822 ALLOC_CACHE_ALIGN_BUFFER(struct nvme_dsm_range, range, sizeof(struct nvme_dsm_range)); 823 struct nvme_ns *ns = dev_get_priv(udev); 824 struct nvme_dev *dev = ns->dev; 825 struct nvme_command cmnd; 826 827 memset(&cmnd, 0, sizeof(cmnd)); 828 829 range->cattr = cpu_to_le32(0); 830 range->nlb = cpu_to_le32(blkcnt); 831 range->slba = cpu_to_le64(blknr); 832 833 cmnd.dsm.opcode = nvme_cmd_dsm; 834 cmnd.dsm.command_id = nvme_get_cmd_id(); 835 cmnd.dsm.nsid = cpu_to_le32(ns->ns_id); 836 cmnd.dsm.prp1 = cpu_to_le64((ulong)range); 837 cmnd.dsm.nr = 0; 838 cmnd.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); 839 cmnd.common.nsid = cpu_to_le32(ns->ns_id); 840 841 flush_dcache_range((ulong)range, 842 (ulong)range + sizeof(struct nvme_dsm_range)); 843 844 nvme_submit_cmd(dev->queues[NVME_IO_Q], &cmnd); 845 return blkcnt; 846 } 847 848 static const struct blk_ops nvme_blk_ops = { 849 .read = nvme_blk_read, 850 .write = nvme_blk_write, 851 .erase = nvme_blk_erase, 852 }; 853 854 U_BOOT_DRIVER(nvme_blk) = { 855 .name = "nvme-blk", 856 .id = UCLASS_BLK, 857 .probe = nvme_blk_probe, 858 .ops = &nvme_blk_ops, 859 .priv_auto_alloc_size = sizeof(struct nvme_ns), 860 }; 861 862 static int nvme_bind(struct udevice *udev) 863 { 864 static int ndev_num; 865 char name[20]; 866 867 sprintf(name, "nvme#%d", ndev_num++); 868 869 return device_set_name(udev, name); 870 } 871 872 static int nvme_probe(struct udevice *udev) 873 { 874 int ret; 875 struct nvme_dev *ndev = dev_get_priv(udev); 876 struct nvme_id_ns *id; 877 878 ndev->instance = trailing_strtol(udev->name); 879 880 INIT_LIST_HEAD(&ndev->namespaces); 881 ndev->bar = dm_pci_map_bar(udev, PCI_BASE_ADDRESS_0, 882 PCI_REGION_MEM); 883 if (readl(&ndev->bar->csts) == -1) { 884 ret = -ENODEV; 885 printf("Error: %s: Out of memory!\n", udev->name); 886 goto free_nvme; 887 } 888 889 ndev->queues = malloc(NVME_Q_NUM * sizeof(struct nvme_queue *)); 890 if (!ndev->queues) { 891 ret = -ENOMEM; 892 printf("Error: %s: Out of memory!\n", udev->name); 893 goto free_nvme; 894 } 895 memset(ndev->queues, 0, NVME_Q_NUM * sizeof(struct nvme_queue *)); 896 897 ndev->cap = nvme_readq(&ndev->bar->cap); 898 ndev->q_depth = min_t(int, NVME_CAP_MQES(ndev->cap) + 1, NVME_Q_DEPTH); 899 ndev->db_stride = 1 << NVME_CAP_STRIDE(ndev->cap); 900 ndev->dbs = ((void __iomem *)ndev->bar) + 4096; 901 902 ret = nvme_configure_admin_queue(ndev); 903 if (ret) 904 goto free_queue; 905 906 /* Allocate after the page size is known */ 907 ndev->prp_pool = memalign(ndev->page_size, MAX_PRP_POOL); 908 if (!ndev->prp_pool) { 909 ret = -ENOMEM; 910 printf("Error: %s: Out of memory!\n", udev->name); 911 goto free_nvme; 912 } 913 ndev->prp_entry_num = MAX_PRP_POOL >> 3; 914 915 ret = nvme_setup_io_queues(ndev); 916 if (ret) 917 goto free_queue; 918 919 nvme_get_info_from_identify(ndev); 920 921 /* Create a blk device for each namespace */ 922 923 id = memalign(ndev->page_size, sizeof(struct nvme_id_ns)); 924 if (!id) { 925 ret = -ENOMEM; 926 goto free_queue; 927 } 928 929 for (int i = 1; i <= ndev->nn; i++) { 930 struct udevice *ns_udev; 931 char name[20]; 932 933 memset(id, 0, sizeof(*id)); 934 if (nvme_identify(ndev, i, 0, (dma_addr_t)(long)id)) { 935 ret = -EIO; 936 goto free_id; 937 } 938 939 /* skip inactive namespace */ 940 if (!id->nsze) 941 continue; 942 943 /* 944 * Encode the namespace id to the device name so that 945 * we can extract it when doing the probe. 946 */ 947 sprintf(name, "blk#%d", i); 948 949 /* The real blksz and size will be set by nvme_blk_probe() */ 950 ret = blk_create_devicef(udev, "nvme-blk", name, IF_TYPE_NVME, 951 -1, 512, 0, &ns_udev); 952 if (ret) 953 goto free_id; 954 } 955 956 free(id); 957 return 0; 958 959 free_id: 960 free(id); 961 free_queue: 962 free((void *)ndev->queues); 963 free_nvme: 964 return ret; 965 } 966 967 U_BOOT_DRIVER(nvme) = { 968 .name = "nvme", 969 .id = UCLASS_NVME, 970 .bind = nvme_bind, 971 .probe = nvme_probe, 972 .priv_auto_alloc_size = sizeof(struct nvme_dev), 973 }; 974 975 struct pci_device_id nvme_supported[] = { 976 { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, ~0) }, 977 {} 978 }; 979 980 U_BOOT_PCI_DEVICE(nvme, nvme_supported); 981