1 /* 2 * Copyright (C) 2017 NXP Semiconductors 3 * Copyright (C) 2017 Bin Meng <bmeng.cn@gmail.com> 4 * 5 * SPDX-License-Identifier: GPL-2.0+ 6 */ 7 8 #include <common.h> 9 #include <bouncebuf.h> 10 #include <dm.h> 11 #include <errno.h> 12 #include <memalign.h> 13 #include <pci.h> 14 #include <dm/device-internal.h> 15 #include "nvme.h" 16 17 #define NVME_Q_DEPTH 2 18 #define NVME_AQ_DEPTH 2 19 #define NVME_SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) 20 #define NVME_CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) 21 #define NVME_CQ_ALLOCATION ALIGN(NVME_CQ_SIZE(NVME_Q_DEPTH), \ 22 ARCH_DMA_MINALIGN) 23 #define ADMIN_TIMEOUT 60 24 #define IO_TIMEOUT 30 25 #define MAX_PRP_POOL 512 26 27 enum nvme_queue_id { 28 NVME_ADMIN_Q, 29 NVME_IO_Q, 30 NVME_Q_NUM, 31 }; 32 33 /* 34 * An NVM Express queue. Each device has at least two (one for admin 35 * commands and one for I/O commands). 36 */ 37 struct nvme_queue { 38 struct nvme_dev *dev; 39 struct nvme_command *sq_cmds; 40 struct nvme_completion *cqes; 41 wait_queue_head_t sq_full; 42 u32 __iomem *q_db; 43 u16 q_depth; 44 s16 cq_vector; 45 u16 sq_head; 46 u16 sq_tail; 47 u16 cq_head; 48 u16 qid; 49 u8 cq_phase; 50 u8 cqe_seen; 51 unsigned long cmdid_data[]; 52 }; 53 54 static int nvme_wait_ready(struct nvme_dev *dev, bool enabled) 55 { 56 u32 bit = enabled ? NVME_CSTS_RDY : 0; 57 int timeout; 58 ulong start; 59 60 /* Timeout field in the CAP register is in 500 millisecond units */ 61 timeout = NVME_CAP_TIMEOUT(dev->cap) * 500; 62 63 start = get_timer(0); 64 while (get_timer(start) < timeout) { 65 if ((readl(&dev->bar->csts) & NVME_CSTS_RDY) == bit) 66 return 0; 67 } 68 69 return -ETIME; 70 } 71 72 static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2, 73 int total_len, u64 dma_addr) 74 { 75 u32 page_size = dev->page_size; 76 int offset = dma_addr & (page_size - 1); 77 u64 *prp_pool; 78 int length = total_len; 79 int i, nprps; 80 u32 prps_per_page = page_size >> 3; 81 u32 num_pages; 82 83 length -= (page_size - offset); 84 85 if (length <= 0) { 86 *prp2 = 0; 87 return 0; 88 } 89 90 if (length) 91 dma_addr += (page_size - offset); 92 93 if (length <= page_size) { 94 *prp2 = dma_addr; 95 return 0; 96 } 97 98 nprps = DIV_ROUND_UP(length, page_size); 99 num_pages = DIV_ROUND_UP(nprps + 1, prps_per_page); 100 101 if (nprps > dev->prp_entry_num) { 102 free(dev->prp_pool); 103 /* 104 * Always increase in increments of pages. It doesn't waste 105 * much memory and reduces the number of allocations. 106 */ 107 dev->prp_pool = memalign(page_size, num_pages * page_size); 108 if (!dev->prp_pool) { 109 printf("Error: malloc prp_pool fail\n"); 110 return -ENOMEM; 111 } 112 dev->prp_entry_num = prps_per_page * num_pages; 113 } 114 115 prp_pool = dev->prp_pool; 116 i = 0; 117 while (nprps) { 118 if (i == prps_per_page) { 119 *(prp_pool + i) = *(prp_pool + i - 1); 120 *(prp_pool + i - 1) = cpu_to_le64((ulong)prp_pool + 121 page_size); 122 i = 1; 123 prp_pool += page_size; 124 } 125 *(prp_pool + i++) = cpu_to_le64(dma_addr); 126 dma_addr += page_size; 127 nprps--; 128 } 129 *prp2 = (ulong)dev->prp_pool; 130 131 flush_dcache_range((ulong)dev->prp_pool, (ulong)dev->prp_pool + 132 dev->prp_entry_num * sizeof(u64)); 133 134 return 0; 135 } 136 137 static __le16 nvme_get_cmd_id(void) 138 { 139 static unsigned short cmdid; 140 141 return cpu_to_le16((cmdid < USHRT_MAX) ? cmdid++ : 0); 142 } 143 144 static u16 nvme_read_completion_status(struct nvme_queue *nvmeq, u16 index) 145 { 146 /* 147 * Single CQ entries are always smaller than a cache line, so we 148 * can't invalidate them individually. However CQ entries are 149 * read only by the CPU, so it's safe to always invalidate all of them, 150 * as the cache line should never become dirty. 151 */ 152 ulong start = (ulong)&nvmeq->cqes[0]; 153 ulong stop = start + NVME_CQ_ALLOCATION; 154 155 invalidate_dcache_range(start, stop); 156 157 return readw(&(nvmeq->cqes[index].status)); 158 } 159 160 /** 161 * nvme_submit_cmd() - copy a command into a queue and ring the doorbell 162 * 163 * @nvmeq: The queue to use 164 * @cmd: The command to send 165 */ 166 static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) 167 { 168 u16 tail = nvmeq->sq_tail; 169 170 memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); 171 flush_dcache_range((ulong)&nvmeq->sq_cmds[tail], 172 (ulong)&nvmeq->sq_cmds[tail] + sizeof(*cmd)); 173 174 if (++tail == nvmeq->q_depth) 175 tail = 0; 176 writel(tail, nvmeq->q_db); 177 nvmeq->sq_tail = tail; 178 } 179 180 static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, 181 struct nvme_command *cmd, 182 u32 *result, unsigned timeout) 183 { 184 u16 head = nvmeq->cq_head; 185 u16 phase = nvmeq->cq_phase; 186 u16 status; 187 ulong start_time; 188 ulong timeout_us = timeout * 100000; 189 190 cmd->common.command_id = nvme_get_cmd_id(); 191 nvme_submit_cmd(nvmeq, cmd); 192 193 start_time = timer_get_us(); 194 195 for (;;) { 196 status = nvme_read_completion_status(nvmeq, head); 197 if ((status & 0x01) == phase) 198 break; 199 if (timeout_us > 0 && (timer_get_us() - start_time) 200 >= timeout_us) 201 return -ETIMEDOUT; 202 } 203 204 status >>= 1; 205 if (status) { 206 printf("ERROR: status = %x, phase = %d, head = %d\n", 207 status, phase, head); 208 status = 0; 209 if (++head == nvmeq->q_depth) { 210 head = 0; 211 phase = !phase; 212 } 213 writel(head, nvmeq->q_db + nvmeq->dev->db_stride); 214 nvmeq->cq_head = head; 215 nvmeq->cq_phase = phase; 216 217 return -EIO; 218 } 219 220 if (result) 221 *result = readl(&(nvmeq->cqes[head].result)); 222 223 if (++head == nvmeq->q_depth) { 224 head = 0; 225 phase = !phase; 226 } 227 writel(head, nvmeq->q_db + nvmeq->dev->db_stride); 228 nvmeq->cq_head = head; 229 nvmeq->cq_phase = phase; 230 231 return status; 232 } 233 234 static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, 235 u32 *result) 236 { 237 return nvme_submit_sync_cmd(dev->queues[NVME_ADMIN_Q], cmd, 238 result, ADMIN_TIMEOUT); 239 } 240 241 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, 242 int qid, int depth) 243 { 244 struct nvme_queue *nvmeq = malloc(sizeof(*nvmeq)); 245 if (!nvmeq) 246 return NULL; 247 memset(nvmeq, 0, sizeof(*nvmeq)); 248 249 nvmeq->cqes = (void *)memalign(4096, NVME_CQ_ALLOCATION); 250 if (!nvmeq->cqes) 251 goto free_nvmeq; 252 memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(depth)); 253 254 nvmeq->sq_cmds = (void *)memalign(4096, NVME_SQ_SIZE(depth)); 255 if (!nvmeq->sq_cmds) 256 goto free_queue; 257 memset((void *)nvmeq->sq_cmds, 0, NVME_SQ_SIZE(depth)); 258 259 nvmeq->dev = dev; 260 261 nvmeq->cq_head = 0; 262 nvmeq->cq_phase = 1; 263 nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; 264 nvmeq->q_depth = depth; 265 nvmeq->qid = qid; 266 dev->queue_count++; 267 dev->queues[qid] = nvmeq; 268 269 return nvmeq; 270 271 free_queue: 272 free((void *)nvmeq->cqes); 273 free_nvmeq: 274 free(nvmeq); 275 276 return NULL; 277 } 278 279 static int nvme_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) 280 { 281 struct nvme_command c; 282 283 memset(&c, 0, sizeof(c)); 284 c.delete_queue.opcode = opcode; 285 c.delete_queue.qid = cpu_to_le16(id); 286 287 return nvme_submit_admin_cmd(dev, &c, NULL); 288 } 289 290 static int nvme_delete_sq(struct nvme_dev *dev, u16 sqid) 291 { 292 return nvme_delete_queue(dev, nvme_admin_delete_sq, sqid); 293 } 294 295 static int nvme_delete_cq(struct nvme_dev *dev, u16 cqid) 296 { 297 return nvme_delete_queue(dev, nvme_admin_delete_cq, cqid); 298 } 299 300 static int nvme_enable_ctrl(struct nvme_dev *dev) 301 { 302 dev->ctrl_config &= ~NVME_CC_SHN_MASK; 303 dev->ctrl_config |= NVME_CC_ENABLE; 304 writel(dev->ctrl_config, &dev->bar->cc); 305 306 return nvme_wait_ready(dev, true); 307 } 308 309 static int nvme_disable_ctrl(struct nvme_dev *dev) 310 { 311 dev->ctrl_config &= ~NVME_CC_SHN_MASK; 312 dev->ctrl_config &= ~NVME_CC_ENABLE; 313 writel(dev->ctrl_config, &dev->bar->cc); 314 315 if (dev->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) 316 mdelay(NVME_QUIRK_DELAY_AMOUNT); 317 318 return nvme_wait_ready(dev, false); 319 } 320 321 static void nvme_free_queue(struct nvme_queue *nvmeq) 322 { 323 free((void *)nvmeq->cqes); 324 free(nvmeq->sq_cmds); 325 free(nvmeq); 326 } 327 328 static void nvme_free_queues(struct nvme_dev *dev, int lowest) 329 { 330 int i; 331 332 for (i = dev->queue_count - 1; i >= lowest; i--) { 333 struct nvme_queue *nvmeq = dev->queues[i]; 334 dev->queue_count--; 335 dev->queues[i] = NULL; 336 nvme_free_queue(nvmeq); 337 } 338 } 339 340 static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) 341 { 342 struct nvme_dev *dev = nvmeq->dev; 343 344 nvmeq->sq_tail = 0; 345 nvmeq->cq_head = 0; 346 nvmeq->cq_phase = 1; 347 nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; 348 memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(nvmeq->q_depth)); 349 flush_dcache_range((ulong)nvmeq->cqes, 350 (ulong)nvmeq->cqes + NVME_CQ_ALLOCATION); 351 dev->online_queues++; 352 } 353 354 static int nvme_configure_admin_queue(struct nvme_dev *dev) 355 { 356 int result; 357 u32 aqa; 358 u64 cap = dev->cap; 359 struct nvme_queue *nvmeq; 360 /* most architectures use 4KB as the page size */ 361 unsigned page_shift = 12; 362 unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12; 363 unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12; 364 365 if (page_shift < dev_page_min) { 366 debug("Device minimum page size (%u) too large for host (%u)\n", 367 1 << dev_page_min, 1 << page_shift); 368 return -ENODEV; 369 } 370 371 if (page_shift > dev_page_max) { 372 debug("Device maximum page size (%u) smaller than host (%u)\n", 373 1 << dev_page_max, 1 << page_shift); 374 page_shift = dev_page_max; 375 } 376 377 result = nvme_disable_ctrl(dev); 378 if (result < 0) 379 return result; 380 381 nvmeq = dev->queues[NVME_ADMIN_Q]; 382 if (!nvmeq) { 383 nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH); 384 if (!nvmeq) 385 return -ENOMEM; 386 } 387 388 aqa = nvmeq->q_depth - 1; 389 aqa |= aqa << 16; 390 391 dev->page_size = 1 << page_shift; 392 393 dev->ctrl_config = NVME_CC_CSS_NVM; 394 dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; 395 dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; 396 dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; 397 398 writel(aqa, &dev->bar->aqa); 399 nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq); 400 nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq); 401 402 result = nvme_enable_ctrl(dev); 403 if (result) 404 goto free_nvmeq; 405 406 nvmeq->cq_vector = 0; 407 408 nvme_init_queue(dev->queues[NVME_ADMIN_Q], 0); 409 410 return result; 411 412 free_nvmeq: 413 nvme_free_queues(dev, 0); 414 415 return result; 416 } 417 418 static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid, 419 struct nvme_queue *nvmeq) 420 { 421 struct nvme_command c; 422 int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; 423 424 memset(&c, 0, sizeof(c)); 425 c.create_cq.opcode = nvme_admin_create_cq; 426 c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes); 427 c.create_cq.cqid = cpu_to_le16(qid); 428 c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); 429 c.create_cq.cq_flags = cpu_to_le16(flags); 430 c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); 431 432 return nvme_submit_admin_cmd(dev, &c, NULL); 433 } 434 435 static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid, 436 struct nvme_queue *nvmeq) 437 { 438 struct nvme_command c; 439 int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; 440 441 memset(&c, 0, sizeof(c)); 442 c.create_sq.opcode = nvme_admin_create_sq; 443 c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds); 444 c.create_sq.sqid = cpu_to_le16(qid); 445 c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1); 446 c.create_sq.sq_flags = cpu_to_le16(flags); 447 c.create_sq.cqid = cpu_to_le16(qid); 448 449 return nvme_submit_admin_cmd(dev, &c, NULL); 450 } 451 452 int nvme_identify(struct nvme_dev *dev, unsigned nsid, 453 unsigned cns, dma_addr_t dma_addr) 454 { 455 struct nvme_command c; 456 u32 page_size = dev->page_size; 457 int offset = dma_addr & (page_size - 1); 458 int length = sizeof(struct nvme_id_ctrl); 459 int ret; 460 461 memset(&c, 0, sizeof(c)); 462 c.identify.opcode = nvme_admin_identify; 463 c.identify.nsid = cpu_to_le32(nsid); 464 c.identify.prp1 = cpu_to_le64(dma_addr); 465 466 length -= (page_size - offset); 467 if (length <= 0) { 468 c.identify.prp2 = 0; 469 } else { 470 dma_addr += (page_size - offset); 471 c.identify.prp2 = cpu_to_le64(dma_addr); 472 } 473 474 c.identify.cns = cpu_to_le32(cns); 475 476 invalidate_dcache_range(dma_addr, 477 dma_addr + sizeof(struct nvme_id_ctrl)); 478 479 ret = nvme_submit_admin_cmd(dev, &c, NULL); 480 if (!ret) 481 invalidate_dcache_range(dma_addr, 482 dma_addr + sizeof(struct nvme_id_ctrl)); 483 484 return ret; 485 } 486 487 int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, 488 dma_addr_t dma_addr, u32 *result) 489 { 490 struct nvme_command c; 491 int ret; 492 493 memset(&c, 0, sizeof(c)); 494 c.features.opcode = nvme_admin_get_features; 495 c.features.nsid = cpu_to_le32(nsid); 496 c.features.prp1 = cpu_to_le64(dma_addr); 497 c.features.fid = cpu_to_le32(fid); 498 499 ret = nvme_submit_admin_cmd(dev, &c, result); 500 501 /* 502 * TODO: Add some cache invalidation when a DMA buffer is involved 503 * in the request, here and before the command gets submitted. The 504 * buffer size varies by feature, also some features use a different 505 * field in the command packet to hold the buffer address. 506 * Section 5.21.1 (Set Features command) in the NVMe specification 507 * details the buffer requirements for each feature. 508 * 509 * At the moment there is no user of this function. 510 */ 511 512 return ret; 513 } 514 515 int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, 516 dma_addr_t dma_addr, u32 *result) 517 { 518 struct nvme_command c; 519 520 memset(&c, 0, sizeof(c)); 521 c.features.opcode = nvme_admin_set_features; 522 c.features.prp1 = cpu_to_le64(dma_addr); 523 c.features.fid = cpu_to_le32(fid); 524 c.features.dword11 = cpu_to_le32(dword11); 525 526 /* 527 * TODO: Add a cache clean (aka flush) operation when a DMA buffer is 528 * involved in the request. The buffer size varies by feature, also 529 * some features use a different field in the command packet to hold 530 * the buffer address. Section 5.21.1 (Set Features command) in the 531 * NVMe specification details the buffer requirements for each 532 * feature. 533 * At the moment the only user of this function is not using 534 * any DMA buffer at all. 535 */ 536 537 return nvme_submit_admin_cmd(dev, &c, result); 538 } 539 540 static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) 541 { 542 struct nvme_dev *dev = nvmeq->dev; 543 int result; 544 545 nvmeq->cq_vector = qid - 1; 546 result = nvme_alloc_cq(dev, qid, nvmeq); 547 if (result < 0) 548 goto release_cq; 549 550 result = nvme_alloc_sq(dev, qid, nvmeq); 551 if (result < 0) 552 goto release_sq; 553 554 nvme_init_queue(nvmeq, qid); 555 556 return result; 557 558 release_sq: 559 nvme_delete_sq(dev, qid); 560 release_cq: 561 nvme_delete_cq(dev, qid); 562 563 return result; 564 } 565 566 static int nvme_set_queue_count(struct nvme_dev *dev, int count) 567 { 568 int status; 569 u32 result; 570 u32 q_count = (count - 1) | ((count - 1) << 16); 571 572 status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, 573 q_count, 0, &result); 574 575 if (status < 0) 576 return status; 577 if (status > 1) 578 return 0; 579 580 return min(result & 0xffff, result >> 16) + 1; 581 } 582 583 static void nvme_create_io_queues(struct nvme_dev *dev) 584 { 585 unsigned int i; 586 587 for (i = dev->queue_count; i <= dev->max_qid; i++) 588 if (!nvme_alloc_queue(dev, i, dev->q_depth)) 589 break; 590 591 for (i = dev->online_queues; i <= dev->queue_count - 1; i++) 592 if (nvme_create_queue(dev->queues[i], i)) 593 break; 594 } 595 596 static int nvme_setup_io_queues(struct nvme_dev *dev) 597 { 598 int nr_io_queues; 599 int result; 600 601 nr_io_queues = 1; 602 result = nvme_set_queue_count(dev, nr_io_queues); 603 if (result <= 0) 604 return result; 605 606 dev->max_qid = nr_io_queues; 607 608 /* Free previously allocated queues */ 609 nvme_free_queues(dev, nr_io_queues + 1); 610 nvme_create_io_queues(dev); 611 612 return 0; 613 } 614 615 static int nvme_get_info_from_identify(struct nvme_dev *dev) 616 { 617 struct nvme_id_ctrl *ctrl; 618 int ret; 619 int shift = NVME_CAP_MPSMIN(dev->cap) + 12; 620 621 ctrl = memalign(dev->page_size, sizeof(struct nvme_id_ctrl)); 622 if (!ctrl) 623 return -ENOMEM; 624 625 ret = nvme_identify(dev, 0, 1, (dma_addr_t)(long)ctrl); 626 if (ret) { 627 free(ctrl); 628 return -EIO; 629 } 630 631 dev->nn = le32_to_cpu(ctrl->nn); 632 dev->vwc = ctrl->vwc; 633 memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); 634 memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); 635 memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); 636 if (ctrl->mdts) 637 dev->max_transfer_shift = (ctrl->mdts + shift); 638 else { 639 /* 640 * Maximum Data Transfer Size (MDTS) field indicates the maximum 641 * data transfer size between the host and the controller. The 642 * host should not submit a command that exceeds this transfer 643 * size. The value is in units of the minimum memory page size 644 * and is reported as a power of two (2^n). 645 * 646 * The spec also says: a value of 0h indicates no restrictions 647 * on transfer size. But in nvme_blk_read/write() below we have 648 * the following algorithm for maximum number of logic blocks 649 * per transfer: 650 * 651 * u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift); 652 * 653 * In order for lbas not to overflow, the maximum number is 15 654 * which means dev->max_transfer_shift = 15 + 9 (ns->lba_shift). 655 * Let's use 20 which provides 1MB size. 656 */ 657 dev->max_transfer_shift = 20; 658 } 659 660 free(ctrl); 661 return 0; 662 } 663 664 int nvme_get_namespace_id(struct udevice *udev, u32 *ns_id, u8 *eui64) 665 { 666 struct nvme_ns *ns = dev_get_priv(udev); 667 668 if (ns_id) 669 *ns_id = ns->ns_id; 670 if (eui64) 671 memcpy(eui64, ns->eui64, sizeof(ns->eui64)); 672 673 return 0; 674 } 675 676 int nvme_scan_namespace(void) 677 { 678 struct uclass *uc; 679 struct udevice *dev; 680 int ret; 681 682 ret = uclass_get(UCLASS_NVME, &uc); 683 if (ret) 684 return ret; 685 686 uclass_foreach_dev(dev, uc) { 687 ret = device_probe(dev); 688 if (ret) { 689 printf("Failed to probe '%s': err=%dE\n", dev->name, 690 ret); 691 /* Bail if we ran out of memory, else keep trying */ 692 if (ret != -EBUSY) 693 return ret; 694 } 695 } 696 697 return 0; 698 } 699 700 static int nvme_blk_probe(struct udevice *udev) 701 { 702 struct nvme_dev *ndev = dev_get_priv(udev->parent); 703 struct blk_desc *desc = dev_get_uclass_platdata(udev); 704 struct nvme_ns *ns = dev_get_priv(udev); 705 u8 flbas; 706 struct pci_child_platdata *pplat; 707 struct nvme_id_ns *id; 708 709 id = memalign(ndev->page_size, sizeof(struct nvme_id_ns)); 710 if (!id) 711 return -ENOMEM; 712 713 ns->dev = ndev; 714 /* extract the namespace id from the block device name */ 715 ns->ns_id = trailing_strtol(udev->name); 716 if (nvme_identify(ndev, ns->ns_id, 0, (dma_addr_t)(long)id)) { 717 free(id); 718 return -EIO; 719 } 720 721 memcpy(&ns->eui64, &id->eui64, sizeof(id->eui64)); 722 flbas = id->flbas & NVME_NS_FLBAS_LBA_MASK; 723 ns->flbas = flbas; 724 ns->lba_shift = id->lbaf[flbas].ds; 725 list_add(&ns->list, &ndev->namespaces); 726 727 desc->lba = le64_to_cpu(id->nsze); 728 desc->log2blksz = ns->lba_shift; 729 desc->blksz = 1 << ns->lba_shift; 730 desc->bdev = udev; 731 pplat = dev_get_parent_platdata(udev->parent); 732 sprintf(desc->vendor, "0x%.4x", pplat->vendor); 733 memcpy(desc->product, ndev->serial, sizeof(ndev->serial)); 734 memcpy(desc->revision, ndev->firmware_rev, sizeof(ndev->firmware_rev)); 735 part_init(desc); 736 737 free(id); 738 return 0; 739 } 740 741 static ulong nvme_blk_rw(struct udevice *udev, lbaint_t blknr, 742 lbaint_t blkcnt, void *buffer, bool read) 743 { 744 struct nvme_ns *ns = dev_get_priv(udev); 745 struct nvme_dev *dev = ns->dev; 746 struct nvme_command c; 747 struct blk_desc *desc = dev_get_uclass_platdata(udev); 748 int status; 749 u64 prp2; 750 u64 total_len = blkcnt << desc->log2blksz; 751 u64 temp_len = total_len; 752 uintptr_t temp_buffer; 753 754 u64 slba = blknr; 755 u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift); 756 u64 total_lbas = blkcnt; 757 758 struct bounce_buffer bb; 759 unsigned int bb_flags; 760 int ret; 761 762 if (read) 763 bb_flags = GEN_BB_WRITE; 764 else 765 bb_flags = GEN_BB_READ; 766 767 ret = bounce_buffer_start(&bb, buffer, total_len, bb_flags); 768 if (ret) 769 return -ENOMEM; 770 temp_buffer = (unsigned long)bb.bounce_buffer; 771 772 c.rw.opcode = read ? nvme_cmd_read : nvme_cmd_write; 773 c.rw.flags = 0; 774 c.rw.nsid = cpu_to_le32(ns->ns_id); 775 c.rw.control = 0; 776 c.rw.dsmgmt = 0; 777 c.rw.reftag = 0; 778 c.rw.apptag = 0; 779 c.rw.appmask = 0; 780 c.rw.metadata = 0; 781 782 /* Enable FUA for data integrity if vwc is enabled */ 783 if (dev->vwc) 784 c.rw.control |= NVME_RW_FUA; 785 786 while (total_lbas) { 787 if (total_lbas < lbas) { 788 lbas = (u16)total_lbas; 789 total_lbas = 0; 790 } else { 791 total_lbas -= lbas; 792 } 793 794 if (nvme_setup_prps(dev, &prp2, 795 lbas << ns->lba_shift, temp_buffer)) 796 return -EIO; 797 c.rw.slba = cpu_to_le64(slba); 798 slba += lbas; 799 c.rw.length = cpu_to_le16(lbas - 1); 800 c.rw.prp1 = cpu_to_le64(temp_buffer); 801 c.rw.prp2 = cpu_to_le64(prp2); 802 status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q], 803 &c, NULL, IO_TIMEOUT); 804 if (status) 805 break; 806 temp_len -= (u32)lbas << ns->lba_shift; 807 temp_buffer += lbas << ns->lba_shift; 808 } 809 810 bounce_buffer_stop(&bb); 811 812 return (total_len - temp_len) >> desc->log2blksz; 813 } 814 815 static ulong nvme_blk_read(struct udevice *udev, lbaint_t blknr, 816 lbaint_t blkcnt, void *buffer) 817 { 818 return nvme_blk_rw(udev, blknr, blkcnt, buffer, true); 819 } 820 821 static ulong nvme_blk_write(struct udevice *udev, lbaint_t blknr, 822 lbaint_t blkcnt, const void *buffer) 823 { 824 return nvme_blk_rw(udev, blknr, blkcnt, (void *)buffer, false); 825 } 826 827 static ulong nvme_blk_erase(struct udevice *udev, lbaint_t blknr, 828 lbaint_t blkcnt) 829 { 830 ALLOC_CACHE_ALIGN_BUFFER(struct nvme_dsm_range, range, sizeof(struct nvme_dsm_range)); 831 struct nvme_ns *ns = dev_get_priv(udev); 832 struct nvme_dev *dev = ns->dev; 833 struct nvme_command cmnd; 834 835 memset(&cmnd, 0, sizeof(cmnd)); 836 837 range->cattr = cpu_to_le32(0); 838 range->nlb = cpu_to_le32(blkcnt); 839 range->slba = cpu_to_le64(blknr); 840 841 cmnd.dsm.opcode = nvme_cmd_dsm; 842 cmnd.dsm.command_id = nvme_get_cmd_id(); 843 cmnd.dsm.nsid = cpu_to_le32(ns->ns_id); 844 cmnd.dsm.prp1 = cpu_to_le64((ulong)range); 845 cmnd.dsm.nr = 0; 846 cmnd.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); 847 cmnd.common.nsid = cpu_to_le32(ns->ns_id); 848 849 flush_dcache_range((ulong)range, 850 (ulong)range + sizeof(struct nvme_dsm_range)); 851 852 nvme_submit_cmd(dev->queues[NVME_IO_Q], &cmnd); 853 return blkcnt; 854 } 855 856 static ulong nvme_blk_write_zeroes(struct udevice *udev, lbaint_t blknr, lbaint_t blkcnt) 857 { 858 struct nvme_ns *ns = dev_get_priv(udev); 859 struct nvme_dev *dev = ns->dev; 860 struct nvme_command cmnd; 861 862 if (dev->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) 863 nvme_blk_erase(udev, blknr, blkcnt); 864 865 memset(&cmnd, 0, sizeof(cmnd)); 866 867 cmnd.write_zeroes.opcode = nvme_cmd_write_zeroes; 868 cmnd.write_zeroes.nsid = cpu_to_le32(ns->ns_id); 869 cmnd.write_zeroes.slba = cpu_to_le64(blknr); 870 cmnd.write_zeroes.length = cpu_to_le16(blkcnt - 1); 871 cmnd.write_zeroes.control = 0; 872 cmnd.write_zeroes.command_id = nvme_get_cmd_id(); 873 874 nvme_submit_cmd(dev->queues[NVME_IO_Q], &cmnd); 875 return blkcnt; 876 } 877 878 static const struct blk_ops nvme_blk_ops = { 879 .read = nvme_blk_read, 880 .write = nvme_blk_write, 881 .write_zeroes = nvme_blk_write_zeroes, 882 .erase = nvme_blk_erase, 883 }; 884 885 U_BOOT_DRIVER(nvme_blk) = { 886 .name = "nvme-blk", 887 .id = UCLASS_BLK, 888 .probe = nvme_blk_probe, 889 .ops = &nvme_blk_ops, 890 .priv_auto_alloc_size = sizeof(struct nvme_ns), 891 }; 892 893 static int nvme_bind(struct udevice *udev) 894 { 895 static int ndev_num; 896 char name[20]; 897 898 sprintf(name, "nvme#%d", ndev_num++); 899 900 return device_set_name(udev, name); 901 } 902 903 static const struct pci_device_id nvme_id_table[] = { 904 { PCI_VDEVICE(INTEL, 0x0953), /* Intel 750/P3500/P3600/P3700 */ 905 .driver_data = NVME_QUIRK_DEALLOCATE_ZEROES, }, 906 { PCI_VDEVICE(INTEL, 0x0a53), /* Intel P3520 */ 907 .driver_data = NVME_QUIRK_DEALLOCATE_ZEROES, }, 908 { PCI_VDEVICE(INTEL, 0x0a54), /* Intel P4500/P4600 */ 909 .driver_data = NVME_QUIRK_DEALLOCATE_ZEROES }, 910 { PCI_VDEVICE(INTEL, 0x0a55), /* Dell Express Flash P4600 */ 911 .driver_data = NVME_QUIRK_DEALLOCATE_ZEROES, }, 912 { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ 913 .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, 914 { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ 915 .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, 916 { PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */ 917 .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, 918 { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */ 919 .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, 920 { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ 921 .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, 922 { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ 923 .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, 924 { PCI_DEVICE(0x1987, 0x5013), /* Phison E13 */ 925 .driver_data = NVME_QUIRK_LIMIT_IOQD32}, 926 }; 927 928 static void nvme_apply_quirks(struct udevice *udev) 929 { 930 struct nvme_dev *ndev = dev_get_priv(udev); 931 u16 vendor_id, device_id; 932 unsigned int i; 933 934 dm_pci_read_config16(udev, PCI_VENDOR_ID, &vendor_id); 935 dm_pci_read_config16(udev, PCI_DEVICE_ID, &device_id); 936 937 for (i = 0; i < ARRAY_SIZE(nvme_id_table); i++) { 938 if (vendor_id == nvme_id_table[i].vendor && 939 device_id == nvme_id_table[i].device) { 940 ndev->quirks |= nvme_id_table[i].driver_data; 941 debug("vid 0x%x, pid 0x%x apply quirks 0x%lx\n", 942 vendor_id, device_id, nvme_id_table[i].driver_data); 943 } 944 } 945 } 946 947 static int nvme_probe(struct udevice *udev) 948 { 949 int ret; 950 struct nvme_dev *ndev = dev_get_priv(udev); 951 struct nvme_id_ns *id; 952 953 ndev->instance = trailing_strtol(udev->name); 954 955 INIT_LIST_HEAD(&ndev->namespaces); 956 ndev->bar = dm_pci_map_bar(udev, PCI_BASE_ADDRESS_0, 957 PCI_REGION_MEM); 958 if (readl(&ndev->bar->csts) == -1) { 959 ret = -EBUSY; 960 printf("Error: %s: Controller not ready!\n", udev->name); 961 goto free_nvme; 962 } 963 964 ndev->queues = malloc(NVME_Q_NUM * sizeof(struct nvme_queue *)); 965 if (!ndev->queues) { 966 ret = -ENOMEM; 967 printf("Error: %s: Out of memory!\n", udev->name); 968 goto free_nvme; 969 } 970 memset(ndev->queues, 0, NVME_Q_NUM * sizeof(struct nvme_queue *)); 971 972 nvme_apply_quirks(udev); 973 974 ndev->cap = nvme_readq(&ndev->bar->cap); 975 ndev->q_depth = min_t(int, NVME_CAP_MQES(ndev->cap) + 1, NVME_Q_DEPTH); 976 if (ndev->quirks & NVME_QUIRK_LIMIT_IOQD32) 977 ndev->q_depth = min_t(int, ndev->q_depth, 32); 978 ndev->db_stride = 1 << NVME_CAP_STRIDE(ndev->cap); 979 ndev->dbs = ((void __iomem *)ndev->bar) + 4096; 980 981 ret = nvme_configure_admin_queue(ndev); 982 if (ret) 983 goto free_queue; 984 985 /* Allocate after the page size is known */ 986 ndev->prp_pool = memalign(ndev->page_size, MAX_PRP_POOL); 987 if (!ndev->prp_pool) { 988 ret = -ENOMEM; 989 printf("Error: %s: Out of memory!\n", udev->name); 990 goto free_nvme; 991 } 992 ndev->prp_entry_num = MAX_PRP_POOL >> 3; 993 994 ret = nvme_setup_io_queues(ndev); 995 if (ret) 996 goto free_queue; 997 998 nvme_get_info_from_identify(ndev); 999 1000 /* Create a blk device for each namespace */ 1001 1002 id = memalign(ndev->page_size, sizeof(struct nvme_id_ns)); 1003 if (!id) { 1004 ret = -ENOMEM; 1005 goto free_queue; 1006 } 1007 1008 for (int i = 1; i <= ndev->nn; i++) { 1009 struct udevice *ns_udev; 1010 char name[20]; 1011 1012 memset(id, 0, sizeof(*id)); 1013 if (nvme_identify(ndev, i, 0, (dma_addr_t)(long)id)) { 1014 ret = -EIO; 1015 goto free_id; 1016 } 1017 1018 /* skip inactive namespace */ 1019 if (!id->nsze) 1020 continue; 1021 1022 /* 1023 * Encode the namespace id to the device name so that 1024 * we can extract it when doing the probe. 1025 */ 1026 sprintf(name, "blk#%d", i); 1027 1028 /* The real blksz and size will be set by nvme_blk_probe() */ 1029 ret = blk_create_devicef(udev, "nvme-blk", name, IF_TYPE_NVME, 1030 -1, 512, 0, &ns_udev); 1031 if (ret) 1032 goto free_id; 1033 } 1034 1035 free(id); 1036 return 0; 1037 1038 free_id: 1039 free(id); 1040 free_queue: 1041 free((void *)ndev->queues); 1042 free_nvme: 1043 return ret; 1044 } 1045 1046 U_BOOT_DRIVER(nvme) = { 1047 .name = "nvme", 1048 .id = UCLASS_NVME, 1049 .bind = nvme_bind, 1050 .probe = nvme_probe, 1051 .priv_auto_alloc_size = sizeof(struct nvme_dev), 1052 }; 1053 1054 struct pci_device_id nvme_supported[] = { 1055 { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, ~0) }, 1056 {} 1057 }; 1058 1059 U_BOOT_PCI_DEVICE(nvme, nvme_supported); 1060