1*4882a593Smuzhiyun.. SPDX-License-Identifier: GPL-2.0 2*4882a593Smuzhiyun 3*4882a593Smuzhiyun============= 4*4882a593SmuzhiyunPage Pool API 5*4882a593Smuzhiyun============= 6*4882a593Smuzhiyun 7*4882a593SmuzhiyunThe page_pool allocator is optimized for the XDP mode that uses one frame 8*4882a593Smuzhiyunper-page, but it can fallback on the regular page allocator APIs. 9*4882a593Smuzhiyun 10*4882a593SmuzhiyunBasic use involves replacing alloc_pages() calls with the 11*4882a593Smuzhiyunpage_pool_alloc_pages() call. Drivers should use page_pool_dev_alloc_pages() 12*4882a593Smuzhiyunreplacing dev_alloc_pages(). 13*4882a593Smuzhiyun 14*4882a593SmuzhiyunAPI keeps track of inflight pages, in order to let API user know 15*4882a593Smuzhiyunwhen it is safe to free a page_pool object. Thus, API users 16*4882a593Smuzhiyunmust run page_pool_release_page() when a page is leaving the page_pool or 17*4882a593Smuzhiyuncall page_pool_put_page() where appropriate in order to maintain correct 18*4882a593Smuzhiyunaccounting. 19*4882a593Smuzhiyun 20*4882a593SmuzhiyunAPI user must call page_pool_put_page() once on a page, as it 21*4882a593Smuzhiyunwill either recycle the page, or in case of refcnt > 1, it will 22*4882a593Smuzhiyunrelease the DMA mapping and inflight state accounting. 23*4882a593Smuzhiyun 24*4882a593SmuzhiyunArchitecture overview 25*4882a593Smuzhiyun===================== 26*4882a593Smuzhiyun 27*4882a593Smuzhiyun.. code-block:: none 28*4882a593Smuzhiyun 29*4882a593Smuzhiyun +------------------+ 30*4882a593Smuzhiyun | Driver | 31*4882a593Smuzhiyun +------------------+ 32*4882a593Smuzhiyun ^ 33*4882a593Smuzhiyun | 34*4882a593Smuzhiyun | 35*4882a593Smuzhiyun | 36*4882a593Smuzhiyun v 37*4882a593Smuzhiyun +--------------------------------------------+ 38*4882a593Smuzhiyun | request memory | 39*4882a593Smuzhiyun +--------------------------------------------+ 40*4882a593Smuzhiyun ^ ^ 41*4882a593Smuzhiyun | | 42*4882a593Smuzhiyun | Pool empty | Pool has entries 43*4882a593Smuzhiyun | | 44*4882a593Smuzhiyun v v 45*4882a593Smuzhiyun +-----------------------+ +------------------------+ 46*4882a593Smuzhiyun | alloc (and map) pages | | get page from cache | 47*4882a593Smuzhiyun +-----------------------+ +------------------------+ 48*4882a593Smuzhiyun ^ ^ 49*4882a593Smuzhiyun | | 50*4882a593Smuzhiyun | cache available | No entries, refill 51*4882a593Smuzhiyun | | from ptr-ring 52*4882a593Smuzhiyun | | 53*4882a593Smuzhiyun v v 54*4882a593Smuzhiyun +-----------------+ +------------------+ 55*4882a593Smuzhiyun | Fast cache | | ptr-ring cache | 56*4882a593Smuzhiyun +-----------------+ +------------------+ 57*4882a593Smuzhiyun 58*4882a593SmuzhiyunAPI interface 59*4882a593Smuzhiyun============= 60*4882a593SmuzhiyunThe number of pools created **must** match the number of hardware queues 61*4882a593Smuzhiyununless hardware restrictions make that impossible. This would otherwise beat the 62*4882a593Smuzhiyunpurpose of page pool, which is allocate pages fast from cache without locking. 63*4882a593SmuzhiyunThis lockless guarantee naturally comes from running under a NAPI softirq. 64*4882a593SmuzhiyunThe protection doesn't strictly have to be NAPI, any guarantee that allocating 65*4882a593Smuzhiyuna page will cause no race conditions is enough. 66*4882a593Smuzhiyun 67*4882a593Smuzhiyun* page_pool_create(): Create a pool. 68*4882a593Smuzhiyun * flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV 69*4882a593Smuzhiyun * order: 2^order pages on allocation 70*4882a593Smuzhiyun * pool_size: size of the ptr_ring 71*4882a593Smuzhiyun * nid: preferred NUMA node for allocation 72*4882a593Smuzhiyun * dev: struct device. Used on DMA operations 73*4882a593Smuzhiyun * dma_dir: DMA direction 74*4882a593Smuzhiyun * max_len: max DMA sync memory size 75*4882a593Smuzhiyun * offset: DMA address offset 76*4882a593Smuzhiyun 77*4882a593Smuzhiyun* page_pool_put_page(): The outcome of this depends on the page refcnt. If the 78*4882a593Smuzhiyun driver bumps the refcnt > 1 this will unmap the page. If the page refcnt is 1 79*4882a593Smuzhiyun the allocator owns the page and will try to recycle it in one of the pool 80*4882a593Smuzhiyun caches. If PP_FLAG_DMA_SYNC_DEV is set, the page will be synced for_device 81*4882a593Smuzhiyun using dma_sync_single_range_for_device(). 82*4882a593Smuzhiyun 83*4882a593Smuzhiyun* page_pool_put_full_page(): Similar to page_pool_put_page(), but will DMA sync 84*4882a593Smuzhiyun for the entire memory area configured in area pool->max_len. 85*4882a593Smuzhiyun 86*4882a593Smuzhiyun* page_pool_recycle_direct(): Similar to page_pool_put_full_page() but caller 87*4882a593Smuzhiyun must guarantee safe context (e.g NAPI), since it will recycle the page 88*4882a593Smuzhiyun directly into the pool fast cache. 89*4882a593Smuzhiyun 90*4882a593Smuzhiyun* page_pool_release_page(): Unmap the page (if mapped) and account for it on 91*4882a593Smuzhiyun inflight counters. 92*4882a593Smuzhiyun 93*4882a593Smuzhiyun* page_pool_dev_alloc_pages(): Get a page from the page allocator or page_pool 94*4882a593Smuzhiyun caches. 95*4882a593Smuzhiyun 96*4882a593Smuzhiyun* page_pool_get_dma_addr(): Retrieve the stored DMA address. 97*4882a593Smuzhiyun 98*4882a593Smuzhiyun* page_pool_get_dma_dir(): Retrieve the stored DMA direction. 99*4882a593Smuzhiyun 100*4882a593SmuzhiyunCoding examples 101*4882a593Smuzhiyun=============== 102*4882a593Smuzhiyun 103*4882a593SmuzhiyunRegistration 104*4882a593Smuzhiyun------------ 105*4882a593Smuzhiyun 106*4882a593Smuzhiyun.. code-block:: c 107*4882a593Smuzhiyun 108*4882a593Smuzhiyun /* Page pool registration */ 109*4882a593Smuzhiyun struct page_pool_params pp_params = { 0 }; 110*4882a593Smuzhiyun struct xdp_rxq_info xdp_rxq; 111*4882a593Smuzhiyun int err; 112*4882a593Smuzhiyun 113*4882a593Smuzhiyun pp_params.order = 0; 114*4882a593Smuzhiyun /* internal DMA mapping in page_pool */ 115*4882a593Smuzhiyun pp_params.flags = PP_FLAG_DMA_MAP; 116*4882a593Smuzhiyun pp_params.pool_size = DESC_NUM; 117*4882a593Smuzhiyun pp_params.nid = NUMA_NO_NODE; 118*4882a593Smuzhiyun pp_params.dev = priv->dev; 119*4882a593Smuzhiyun pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; 120*4882a593Smuzhiyun page_pool = page_pool_create(&pp_params); 121*4882a593Smuzhiyun 122*4882a593Smuzhiyun err = xdp_rxq_info_reg(&xdp_rxq, ndev, 0); 123*4882a593Smuzhiyun if (err) 124*4882a593Smuzhiyun goto err_out; 125*4882a593Smuzhiyun 126*4882a593Smuzhiyun err = xdp_rxq_info_reg_mem_model(&xdp_rxq, MEM_TYPE_PAGE_POOL, page_pool); 127*4882a593Smuzhiyun if (err) 128*4882a593Smuzhiyun goto err_out; 129*4882a593Smuzhiyun 130*4882a593SmuzhiyunNAPI poller 131*4882a593Smuzhiyun----------- 132*4882a593Smuzhiyun 133*4882a593Smuzhiyun 134*4882a593Smuzhiyun.. code-block:: c 135*4882a593Smuzhiyun 136*4882a593Smuzhiyun /* NAPI Rx poller */ 137*4882a593Smuzhiyun enum dma_data_direction dma_dir; 138*4882a593Smuzhiyun 139*4882a593Smuzhiyun dma_dir = page_pool_get_dma_dir(dring->page_pool); 140*4882a593Smuzhiyun while (done < budget) { 141*4882a593Smuzhiyun if (some error) 142*4882a593Smuzhiyun page_pool_recycle_direct(page_pool, page); 143*4882a593Smuzhiyun if (packet_is_xdp) { 144*4882a593Smuzhiyun if XDP_DROP: 145*4882a593Smuzhiyun page_pool_recycle_direct(page_pool, page); 146*4882a593Smuzhiyun } else (packet_is_skb) { 147*4882a593Smuzhiyun page_pool_release_page(page_pool, page); 148*4882a593Smuzhiyun new_page = page_pool_dev_alloc_pages(page_pool); 149*4882a593Smuzhiyun } 150*4882a593Smuzhiyun } 151*4882a593Smuzhiyun 152*4882a593SmuzhiyunDriver unload 153*4882a593Smuzhiyun------------- 154*4882a593Smuzhiyun 155*4882a593Smuzhiyun.. code-block:: c 156*4882a593Smuzhiyun 157*4882a593Smuzhiyun /* Driver unload */ 158*4882a593Smuzhiyun page_pool_put_full_page(page_pool, page, false); 159*4882a593Smuzhiyun xdp_rxq_info_unreg(&xdp_rxq); 160