xref: /OK3568_Linux_fs/kernel/Documentation/networking/page_pool.rst (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun.. SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun
3*4882a593Smuzhiyun=============
4*4882a593SmuzhiyunPage Pool API
5*4882a593Smuzhiyun=============
6*4882a593Smuzhiyun
7*4882a593SmuzhiyunThe page_pool allocator is optimized for the XDP mode that uses one frame
8*4882a593Smuzhiyunper-page, but it can fallback on the regular page allocator APIs.
9*4882a593Smuzhiyun
10*4882a593SmuzhiyunBasic use involves replacing alloc_pages() calls with the
11*4882a593Smuzhiyunpage_pool_alloc_pages() call.  Drivers should use page_pool_dev_alloc_pages()
12*4882a593Smuzhiyunreplacing dev_alloc_pages().
13*4882a593Smuzhiyun
14*4882a593SmuzhiyunAPI keeps track of inflight pages, in order to let API user know
15*4882a593Smuzhiyunwhen it is safe to free a page_pool object.  Thus, API users
16*4882a593Smuzhiyunmust run page_pool_release_page() when a page is leaving the page_pool or
17*4882a593Smuzhiyuncall page_pool_put_page() where appropriate in order to maintain correct
18*4882a593Smuzhiyunaccounting.
19*4882a593Smuzhiyun
20*4882a593SmuzhiyunAPI user must call page_pool_put_page() once on a page, as it
21*4882a593Smuzhiyunwill either recycle the page, or in case of refcnt > 1, it will
22*4882a593Smuzhiyunrelease the DMA mapping and inflight state accounting.
23*4882a593Smuzhiyun
24*4882a593SmuzhiyunArchitecture overview
25*4882a593Smuzhiyun=====================
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun.. code-block:: none
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun    +------------------+
30*4882a593Smuzhiyun    |       Driver     |
31*4882a593Smuzhiyun    +------------------+
32*4882a593Smuzhiyun            ^
33*4882a593Smuzhiyun            |
34*4882a593Smuzhiyun            |
35*4882a593Smuzhiyun            |
36*4882a593Smuzhiyun            v
37*4882a593Smuzhiyun    +--------------------------------------------+
38*4882a593Smuzhiyun    |                request memory              |
39*4882a593Smuzhiyun    +--------------------------------------------+
40*4882a593Smuzhiyun        ^                                  ^
41*4882a593Smuzhiyun        |                                  |
42*4882a593Smuzhiyun        | Pool empty                       | Pool has entries
43*4882a593Smuzhiyun        |                                  |
44*4882a593Smuzhiyun        v                                  v
45*4882a593Smuzhiyun    +-----------------------+     +------------------------+
46*4882a593Smuzhiyun    | alloc (and map) pages |     |  get page from cache   |
47*4882a593Smuzhiyun    +-----------------------+     +------------------------+
48*4882a593Smuzhiyun                                    ^                    ^
49*4882a593Smuzhiyun                                    |                    |
50*4882a593Smuzhiyun                                    | cache available    | No entries, refill
51*4882a593Smuzhiyun                                    |                    | from ptr-ring
52*4882a593Smuzhiyun                                    |                    |
53*4882a593Smuzhiyun                                    v                    v
54*4882a593Smuzhiyun                          +-----------------+     +------------------+
55*4882a593Smuzhiyun                          |   Fast cache    |     |  ptr-ring cache  |
56*4882a593Smuzhiyun                          +-----------------+     +------------------+
57*4882a593Smuzhiyun
58*4882a593SmuzhiyunAPI interface
59*4882a593Smuzhiyun=============
60*4882a593SmuzhiyunThe number of pools created **must** match the number of hardware queues
61*4882a593Smuzhiyununless hardware restrictions make that impossible. This would otherwise beat the
62*4882a593Smuzhiyunpurpose of page pool, which is allocate pages fast from cache without locking.
63*4882a593SmuzhiyunThis lockless guarantee naturally comes from running under a NAPI softirq.
64*4882a593SmuzhiyunThe protection doesn't strictly have to be NAPI, any guarantee that allocating
65*4882a593Smuzhiyuna page will cause no race conditions is enough.
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun* page_pool_create(): Create a pool.
68*4882a593Smuzhiyun    * flags:      PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV
69*4882a593Smuzhiyun    * order:      2^order pages on allocation
70*4882a593Smuzhiyun    * pool_size:  size of the ptr_ring
71*4882a593Smuzhiyun    * nid:        preferred NUMA node for allocation
72*4882a593Smuzhiyun    * dev:        struct device. Used on DMA operations
73*4882a593Smuzhiyun    * dma_dir:    DMA direction
74*4882a593Smuzhiyun    * max_len:    max DMA sync memory size
75*4882a593Smuzhiyun    * offset:     DMA address offset
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun* page_pool_put_page(): The outcome of this depends on the page refcnt. If the
78*4882a593Smuzhiyun  driver bumps the refcnt > 1 this will unmap the page. If the page refcnt is 1
79*4882a593Smuzhiyun  the allocator owns the page and will try to recycle it in one of the pool
80*4882a593Smuzhiyun  caches. If PP_FLAG_DMA_SYNC_DEV is set, the page will be synced for_device
81*4882a593Smuzhiyun  using dma_sync_single_range_for_device().
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun* page_pool_put_full_page(): Similar to page_pool_put_page(), but will DMA sync
84*4882a593Smuzhiyun  for the entire memory area configured in area pool->max_len.
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun* page_pool_recycle_direct(): Similar to page_pool_put_full_page() but caller
87*4882a593Smuzhiyun  must guarantee safe context (e.g NAPI), since it will recycle the page
88*4882a593Smuzhiyun  directly into the pool fast cache.
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun* page_pool_release_page(): Unmap the page (if mapped) and account for it on
91*4882a593Smuzhiyun  inflight counters.
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun* page_pool_dev_alloc_pages(): Get a page from the page allocator or page_pool
94*4882a593Smuzhiyun  caches.
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun* page_pool_get_dma_addr(): Retrieve the stored DMA address.
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun* page_pool_get_dma_dir(): Retrieve the stored DMA direction.
99*4882a593Smuzhiyun
100*4882a593SmuzhiyunCoding examples
101*4882a593Smuzhiyun===============
102*4882a593Smuzhiyun
103*4882a593SmuzhiyunRegistration
104*4882a593Smuzhiyun------------
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun.. code-block:: c
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun    /* Page pool registration */
109*4882a593Smuzhiyun    struct page_pool_params pp_params = { 0 };
110*4882a593Smuzhiyun    struct xdp_rxq_info xdp_rxq;
111*4882a593Smuzhiyun    int err;
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun    pp_params.order = 0;
114*4882a593Smuzhiyun    /* internal DMA mapping in page_pool */
115*4882a593Smuzhiyun    pp_params.flags = PP_FLAG_DMA_MAP;
116*4882a593Smuzhiyun    pp_params.pool_size = DESC_NUM;
117*4882a593Smuzhiyun    pp_params.nid = NUMA_NO_NODE;
118*4882a593Smuzhiyun    pp_params.dev = priv->dev;
119*4882a593Smuzhiyun    pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
120*4882a593Smuzhiyun    page_pool = page_pool_create(&pp_params);
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun    err = xdp_rxq_info_reg(&xdp_rxq, ndev, 0);
123*4882a593Smuzhiyun    if (err)
124*4882a593Smuzhiyun        goto err_out;
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun    err = xdp_rxq_info_reg_mem_model(&xdp_rxq, MEM_TYPE_PAGE_POOL, page_pool);
127*4882a593Smuzhiyun    if (err)
128*4882a593Smuzhiyun        goto err_out;
129*4882a593Smuzhiyun
130*4882a593SmuzhiyunNAPI poller
131*4882a593Smuzhiyun-----------
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun.. code-block:: c
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun    /* NAPI Rx poller */
137*4882a593Smuzhiyun    enum dma_data_direction dma_dir;
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun    dma_dir = page_pool_get_dma_dir(dring->page_pool);
140*4882a593Smuzhiyun    while (done < budget) {
141*4882a593Smuzhiyun        if (some error)
142*4882a593Smuzhiyun            page_pool_recycle_direct(page_pool, page);
143*4882a593Smuzhiyun        if (packet_is_xdp) {
144*4882a593Smuzhiyun            if XDP_DROP:
145*4882a593Smuzhiyun                page_pool_recycle_direct(page_pool, page);
146*4882a593Smuzhiyun        } else (packet_is_skb) {
147*4882a593Smuzhiyun            page_pool_release_page(page_pool, page);
148*4882a593Smuzhiyun            new_page = page_pool_dev_alloc_pages(page_pool);
149*4882a593Smuzhiyun        }
150*4882a593Smuzhiyun    }
151*4882a593Smuzhiyun
152*4882a593SmuzhiyunDriver unload
153*4882a593Smuzhiyun-------------
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun.. code-block:: c
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun    /* Driver unload */
158*4882a593Smuzhiyun    page_pool_put_full_page(page_pool, page, false);
159*4882a593Smuzhiyun    xdp_rxq_info_unreg(&xdp_rxq);
160