1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright (c) 2016-present, Facebook, Inc.
4*4882a593Smuzhiyun * All rights reserved.
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include <linux/bio.h>
9*4882a593Smuzhiyun #include <linux/bitmap.h>
10*4882a593Smuzhiyun #include <linux/err.h>
11*4882a593Smuzhiyun #include <linux/init.h>
12*4882a593Smuzhiyun #include <linux/kernel.h>
13*4882a593Smuzhiyun #include <linux/mm.h>
14*4882a593Smuzhiyun #include <linux/sched/mm.h>
15*4882a593Smuzhiyun #include <linux/pagemap.h>
16*4882a593Smuzhiyun #include <linux/refcount.h>
17*4882a593Smuzhiyun #include <linux/sched.h>
18*4882a593Smuzhiyun #include <linux/slab.h>
19*4882a593Smuzhiyun #include <linux/zstd.h>
20*4882a593Smuzhiyun #include "misc.h"
21*4882a593Smuzhiyun #include "compression.h"
22*4882a593Smuzhiyun #include "ctree.h"
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun #define ZSTD_BTRFS_MAX_WINDOWLOG 17
25*4882a593Smuzhiyun #define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
26*4882a593Smuzhiyun #define ZSTD_BTRFS_DEFAULT_LEVEL 3
27*4882a593Smuzhiyun #define ZSTD_BTRFS_MAX_LEVEL 15
28*4882a593Smuzhiyun /* 307s to avoid pathologically clashing with transaction commit */
29*4882a593Smuzhiyun #define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ)
30*4882a593Smuzhiyun
zstd_get_btrfs_parameters(unsigned int level,size_t src_len)31*4882a593Smuzhiyun static ZSTD_parameters zstd_get_btrfs_parameters(unsigned int level,
32*4882a593Smuzhiyun size_t src_len)
33*4882a593Smuzhiyun {
34*4882a593Smuzhiyun ZSTD_parameters params = ZSTD_getParams(level, src_len, 0);
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
37*4882a593Smuzhiyun params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
38*4882a593Smuzhiyun WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT);
39*4882a593Smuzhiyun return params;
40*4882a593Smuzhiyun }
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun struct workspace {
43*4882a593Smuzhiyun void *mem;
44*4882a593Smuzhiyun size_t size;
45*4882a593Smuzhiyun char *buf;
46*4882a593Smuzhiyun unsigned int level;
47*4882a593Smuzhiyun unsigned int req_level;
48*4882a593Smuzhiyun unsigned long last_used; /* jiffies */
49*4882a593Smuzhiyun struct list_head list;
50*4882a593Smuzhiyun struct list_head lru_list;
51*4882a593Smuzhiyun ZSTD_inBuffer in_buf;
52*4882a593Smuzhiyun ZSTD_outBuffer out_buf;
53*4882a593Smuzhiyun };
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun /*
56*4882a593Smuzhiyun * Zstd Workspace Management
57*4882a593Smuzhiyun *
58*4882a593Smuzhiyun * Zstd workspaces have different memory requirements depending on the level.
59*4882a593Smuzhiyun * The zstd workspaces are managed by having individual lists for each level
60*4882a593Smuzhiyun * and a global lru. Forward progress is maintained by protecting a max level
61*4882a593Smuzhiyun * workspace.
62*4882a593Smuzhiyun *
63*4882a593Smuzhiyun * Getting a workspace is done by using the bitmap to identify the levels that
64*4882a593Smuzhiyun * have available workspaces and scans up. This lets us recycle higher level
65*4882a593Smuzhiyun * workspaces because of the monotonic memory guarantee. A workspace's
66*4882a593Smuzhiyun * last_used is only updated if it is being used by the corresponding memory
67*4882a593Smuzhiyun * level. Putting a workspace involves adding it back to the appropriate places
68*4882a593Smuzhiyun * and adding it back to the lru if necessary.
69*4882a593Smuzhiyun *
70*4882a593Smuzhiyun * A timer is used to reclaim workspaces if they have not been used for
71*4882a593Smuzhiyun * ZSTD_BTRFS_RECLAIM_JIFFIES. This helps keep only active workspaces around.
72*4882a593Smuzhiyun * The upper bound is provided by the workqueue limit which is 2 (percpu limit).
73*4882a593Smuzhiyun */
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun struct zstd_workspace_manager {
76*4882a593Smuzhiyun const struct btrfs_compress_op *ops;
77*4882a593Smuzhiyun spinlock_t lock;
78*4882a593Smuzhiyun struct list_head lru_list;
79*4882a593Smuzhiyun struct list_head idle_ws[ZSTD_BTRFS_MAX_LEVEL];
80*4882a593Smuzhiyun unsigned long active_map;
81*4882a593Smuzhiyun wait_queue_head_t wait;
82*4882a593Smuzhiyun struct timer_list timer;
83*4882a593Smuzhiyun };
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun static struct zstd_workspace_manager wsm;
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun static size_t zstd_ws_mem_sizes[ZSTD_BTRFS_MAX_LEVEL];
88*4882a593Smuzhiyun
list_to_workspace(struct list_head * list)89*4882a593Smuzhiyun static inline struct workspace *list_to_workspace(struct list_head *list)
90*4882a593Smuzhiyun {
91*4882a593Smuzhiyun return container_of(list, struct workspace, list);
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun void zstd_free_workspace(struct list_head *ws);
95*4882a593Smuzhiyun struct list_head *zstd_alloc_workspace(unsigned int level);
96*4882a593Smuzhiyun /*
97*4882a593Smuzhiyun * zstd_reclaim_timer_fn - reclaim timer
98*4882a593Smuzhiyun * @t: timer
99*4882a593Smuzhiyun *
100*4882a593Smuzhiyun * This scans the lru_list and attempts to reclaim any workspace that hasn't
101*4882a593Smuzhiyun * been used for ZSTD_BTRFS_RECLAIM_JIFFIES.
102*4882a593Smuzhiyun */
zstd_reclaim_timer_fn(struct timer_list * timer)103*4882a593Smuzhiyun static void zstd_reclaim_timer_fn(struct timer_list *timer)
104*4882a593Smuzhiyun {
105*4882a593Smuzhiyun unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
106*4882a593Smuzhiyun struct list_head *pos, *next;
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun spin_lock_bh(&wsm.lock);
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun if (list_empty(&wsm.lru_list)) {
111*4882a593Smuzhiyun spin_unlock_bh(&wsm.lock);
112*4882a593Smuzhiyun return;
113*4882a593Smuzhiyun }
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun list_for_each_prev_safe(pos, next, &wsm.lru_list) {
116*4882a593Smuzhiyun struct workspace *victim = container_of(pos, struct workspace,
117*4882a593Smuzhiyun lru_list);
118*4882a593Smuzhiyun unsigned int level;
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun if (time_after(victim->last_used, reclaim_threshold))
121*4882a593Smuzhiyun break;
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun /* workspace is in use */
124*4882a593Smuzhiyun if (victim->req_level)
125*4882a593Smuzhiyun continue;
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun level = victim->level;
128*4882a593Smuzhiyun list_del(&victim->lru_list);
129*4882a593Smuzhiyun list_del(&victim->list);
130*4882a593Smuzhiyun zstd_free_workspace(&victim->list);
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun if (list_empty(&wsm.idle_ws[level - 1]))
133*4882a593Smuzhiyun clear_bit(level - 1, &wsm.active_map);
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun }
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun if (!list_empty(&wsm.lru_list))
138*4882a593Smuzhiyun mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun spin_unlock_bh(&wsm.lock);
141*4882a593Smuzhiyun }
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun /*
144*4882a593Smuzhiyun * zstd_calc_ws_mem_sizes - calculate monotonic memory bounds
145*4882a593Smuzhiyun *
146*4882a593Smuzhiyun * It is possible based on the level configurations that a higher level
147*4882a593Smuzhiyun * workspace uses less memory than a lower level workspace. In order to reuse
148*4882a593Smuzhiyun * workspaces, this must be made a monotonic relationship. This precomputes
149*4882a593Smuzhiyun * the required memory for each level and enforces the monotonicity between
150*4882a593Smuzhiyun * level and memory required.
151*4882a593Smuzhiyun */
zstd_calc_ws_mem_sizes(void)152*4882a593Smuzhiyun static void zstd_calc_ws_mem_sizes(void)
153*4882a593Smuzhiyun {
154*4882a593Smuzhiyun size_t max_size = 0;
155*4882a593Smuzhiyun unsigned int level;
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun for (level = 1; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
158*4882a593Smuzhiyun ZSTD_parameters params =
159*4882a593Smuzhiyun zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT);
160*4882a593Smuzhiyun size_t level_size =
161*4882a593Smuzhiyun max_t(size_t,
162*4882a593Smuzhiyun ZSTD_CStreamWorkspaceBound(params.cParams),
163*4882a593Smuzhiyun ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT));
164*4882a593Smuzhiyun
165*4882a593Smuzhiyun max_size = max_t(size_t, max_size, level_size);
166*4882a593Smuzhiyun zstd_ws_mem_sizes[level - 1] = max_size;
167*4882a593Smuzhiyun }
168*4882a593Smuzhiyun }
169*4882a593Smuzhiyun
zstd_init_workspace_manager(void)170*4882a593Smuzhiyun void zstd_init_workspace_manager(void)
171*4882a593Smuzhiyun {
172*4882a593Smuzhiyun struct list_head *ws;
173*4882a593Smuzhiyun int i;
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun zstd_calc_ws_mem_sizes();
176*4882a593Smuzhiyun
177*4882a593Smuzhiyun wsm.ops = &btrfs_zstd_compress;
178*4882a593Smuzhiyun spin_lock_init(&wsm.lock);
179*4882a593Smuzhiyun init_waitqueue_head(&wsm.wait);
180*4882a593Smuzhiyun timer_setup(&wsm.timer, zstd_reclaim_timer_fn, 0);
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun INIT_LIST_HEAD(&wsm.lru_list);
183*4882a593Smuzhiyun for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
184*4882a593Smuzhiyun INIT_LIST_HEAD(&wsm.idle_ws[i]);
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun ws = zstd_alloc_workspace(ZSTD_BTRFS_MAX_LEVEL);
187*4882a593Smuzhiyun if (IS_ERR(ws)) {
188*4882a593Smuzhiyun pr_warn(
189*4882a593Smuzhiyun "BTRFS: cannot preallocate zstd compression workspace\n");
190*4882a593Smuzhiyun } else {
191*4882a593Smuzhiyun set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &wsm.active_map);
192*4882a593Smuzhiyun list_add(ws, &wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]);
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun }
195*4882a593Smuzhiyun
zstd_cleanup_workspace_manager(void)196*4882a593Smuzhiyun void zstd_cleanup_workspace_manager(void)
197*4882a593Smuzhiyun {
198*4882a593Smuzhiyun struct workspace *workspace;
199*4882a593Smuzhiyun int i;
200*4882a593Smuzhiyun
201*4882a593Smuzhiyun spin_lock_bh(&wsm.lock);
202*4882a593Smuzhiyun for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
203*4882a593Smuzhiyun while (!list_empty(&wsm.idle_ws[i])) {
204*4882a593Smuzhiyun workspace = container_of(wsm.idle_ws[i].next,
205*4882a593Smuzhiyun struct workspace, list);
206*4882a593Smuzhiyun list_del(&workspace->list);
207*4882a593Smuzhiyun list_del(&workspace->lru_list);
208*4882a593Smuzhiyun zstd_free_workspace(&workspace->list);
209*4882a593Smuzhiyun }
210*4882a593Smuzhiyun }
211*4882a593Smuzhiyun spin_unlock_bh(&wsm.lock);
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun del_timer_sync(&wsm.timer);
214*4882a593Smuzhiyun }
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun /*
217*4882a593Smuzhiyun * zstd_find_workspace - find workspace
218*4882a593Smuzhiyun * @level: compression level
219*4882a593Smuzhiyun *
220*4882a593Smuzhiyun * This iterates over the set bits in the active_map beginning at the requested
221*4882a593Smuzhiyun * compression level. This lets us utilize already allocated workspaces before
222*4882a593Smuzhiyun * allocating a new one. If the workspace is of a larger size, it is used, but
223*4882a593Smuzhiyun * the place in the lru_list and last_used times are not updated. This is to
224*4882a593Smuzhiyun * offer the opportunity to reclaim the workspace in favor of allocating an
225*4882a593Smuzhiyun * appropriately sized one in the future.
226*4882a593Smuzhiyun */
zstd_find_workspace(unsigned int level)227*4882a593Smuzhiyun static struct list_head *zstd_find_workspace(unsigned int level)
228*4882a593Smuzhiyun {
229*4882a593Smuzhiyun struct list_head *ws;
230*4882a593Smuzhiyun struct workspace *workspace;
231*4882a593Smuzhiyun int i = level - 1;
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun spin_lock_bh(&wsm.lock);
234*4882a593Smuzhiyun for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) {
235*4882a593Smuzhiyun if (!list_empty(&wsm.idle_ws[i])) {
236*4882a593Smuzhiyun ws = wsm.idle_ws[i].next;
237*4882a593Smuzhiyun workspace = list_to_workspace(ws);
238*4882a593Smuzhiyun list_del_init(ws);
239*4882a593Smuzhiyun /* keep its place if it's a lower level using this */
240*4882a593Smuzhiyun workspace->req_level = level;
241*4882a593Smuzhiyun if (level == workspace->level)
242*4882a593Smuzhiyun list_del(&workspace->lru_list);
243*4882a593Smuzhiyun if (list_empty(&wsm.idle_ws[i]))
244*4882a593Smuzhiyun clear_bit(i, &wsm.active_map);
245*4882a593Smuzhiyun spin_unlock_bh(&wsm.lock);
246*4882a593Smuzhiyun return ws;
247*4882a593Smuzhiyun }
248*4882a593Smuzhiyun }
249*4882a593Smuzhiyun spin_unlock_bh(&wsm.lock);
250*4882a593Smuzhiyun
251*4882a593Smuzhiyun return NULL;
252*4882a593Smuzhiyun }
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun /*
255*4882a593Smuzhiyun * zstd_get_workspace - zstd's get_workspace
256*4882a593Smuzhiyun * @level: compression level
257*4882a593Smuzhiyun *
258*4882a593Smuzhiyun * If @level is 0, then any compression level can be used. Therefore, we begin
259*4882a593Smuzhiyun * scanning from 1. We first scan through possible workspaces and then after
260*4882a593Smuzhiyun * attempt to allocate a new workspace. If we fail to allocate one due to
261*4882a593Smuzhiyun * memory pressure, go to sleep waiting for the max level workspace to free up.
262*4882a593Smuzhiyun */
zstd_get_workspace(unsigned int level)263*4882a593Smuzhiyun struct list_head *zstd_get_workspace(unsigned int level)
264*4882a593Smuzhiyun {
265*4882a593Smuzhiyun struct list_head *ws;
266*4882a593Smuzhiyun unsigned int nofs_flag;
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun /* level == 0 means we can use any workspace */
269*4882a593Smuzhiyun if (!level)
270*4882a593Smuzhiyun level = 1;
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun again:
273*4882a593Smuzhiyun ws = zstd_find_workspace(level);
274*4882a593Smuzhiyun if (ws)
275*4882a593Smuzhiyun return ws;
276*4882a593Smuzhiyun
277*4882a593Smuzhiyun nofs_flag = memalloc_nofs_save();
278*4882a593Smuzhiyun ws = zstd_alloc_workspace(level);
279*4882a593Smuzhiyun memalloc_nofs_restore(nofs_flag);
280*4882a593Smuzhiyun
281*4882a593Smuzhiyun if (IS_ERR(ws)) {
282*4882a593Smuzhiyun DEFINE_WAIT(wait);
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun prepare_to_wait(&wsm.wait, &wait, TASK_UNINTERRUPTIBLE);
285*4882a593Smuzhiyun schedule();
286*4882a593Smuzhiyun finish_wait(&wsm.wait, &wait);
287*4882a593Smuzhiyun
288*4882a593Smuzhiyun goto again;
289*4882a593Smuzhiyun }
290*4882a593Smuzhiyun
291*4882a593Smuzhiyun return ws;
292*4882a593Smuzhiyun }
293*4882a593Smuzhiyun
294*4882a593Smuzhiyun /*
295*4882a593Smuzhiyun * zstd_put_workspace - zstd put_workspace
296*4882a593Smuzhiyun * @ws: list_head for the workspace
297*4882a593Smuzhiyun *
298*4882a593Smuzhiyun * When putting back a workspace, we only need to update the LRU if we are of
299*4882a593Smuzhiyun * the requested compression level. Here is where we continue to protect the
300*4882a593Smuzhiyun * max level workspace or update last_used accordingly. If the reclaim timer
301*4882a593Smuzhiyun * isn't set, it is also set here. Only the max level workspace tries and wakes
302*4882a593Smuzhiyun * up waiting workspaces.
303*4882a593Smuzhiyun */
zstd_put_workspace(struct list_head * ws)304*4882a593Smuzhiyun void zstd_put_workspace(struct list_head *ws)
305*4882a593Smuzhiyun {
306*4882a593Smuzhiyun struct workspace *workspace = list_to_workspace(ws);
307*4882a593Smuzhiyun
308*4882a593Smuzhiyun spin_lock_bh(&wsm.lock);
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun /* A node is only taken off the lru if we are the corresponding level */
311*4882a593Smuzhiyun if (workspace->req_level == workspace->level) {
312*4882a593Smuzhiyun /* Hide a max level workspace from reclaim */
313*4882a593Smuzhiyun if (list_empty(&wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) {
314*4882a593Smuzhiyun INIT_LIST_HEAD(&workspace->lru_list);
315*4882a593Smuzhiyun } else {
316*4882a593Smuzhiyun workspace->last_used = jiffies;
317*4882a593Smuzhiyun list_add(&workspace->lru_list, &wsm.lru_list);
318*4882a593Smuzhiyun if (!timer_pending(&wsm.timer))
319*4882a593Smuzhiyun mod_timer(&wsm.timer,
320*4882a593Smuzhiyun jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
321*4882a593Smuzhiyun }
322*4882a593Smuzhiyun }
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun set_bit(workspace->level - 1, &wsm.active_map);
325*4882a593Smuzhiyun list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]);
326*4882a593Smuzhiyun workspace->req_level = 0;
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun spin_unlock_bh(&wsm.lock);
329*4882a593Smuzhiyun
330*4882a593Smuzhiyun if (workspace->level == ZSTD_BTRFS_MAX_LEVEL)
331*4882a593Smuzhiyun cond_wake_up(&wsm.wait);
332*4882a593Smuzhiyun }
333*4882a593Smuzhiyun
zstd_free_workspace(struct list_head * ws)334*4882a593Smuzhiyun void zstd_free_workspace(struct list_head *ws)
335*4882a593Smuzhiyun {
336*4882a593Smuzhiyun struct workspace *workspace = list_entry(ws, struct workspace, list);
337*4882a593Smuzhiyun
338*4882a593Smuzhiyun kvfree(workspace->mem);
339*4882a593Smuzhiyun kfree(workspace->buf);
340*4882a593Smuzhiyun kfree(workspace);
341*4882a593Smuzhiyun }
342*4882a593Smuzhiyun
zstd_alloc_workspace(unsigned int level)343*4882a593Smuzhiyun struct list_head *zstd_alloc_workspace(unsigned int level)
344*4882a593Smuzhiyun {
345*4882a593Smuzhiyun struct workspace *workspace;
346*4882a593Smuzhiyun
347*4882a593Smuzhiyun workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
348*4882a593Smuzhiyun if (!workspace)
349*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
350*4882a593Smuzhiyun
351*4882a593Smuzhiyun workspace->size = zstd_ws_mem_sizes[level - 1];
352*4882a593Smuzhiyun workspace->level = level;
353*4882a593Smuzhiyun workspace->req_level = level;
354*4882a593Smuzhiyun workspace->last_used = jiffies;
355*4882a593Smuzhiyun workspace->mem = kvmalloc(workspace->size, GFP_KERNEL);
356*4882a593Smuzhiyun workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
357*4882a593Smuzhiyun if (!workspace->mem || !workspace->buf)
358*4882a593Smuzhiyun goto fail;
359*4882a593Smuzhiyun
360*4882a593Smuzhiyun INIT_LIST_HEAD(&workspace->list);
361*4882a593Smuzhiyun INIT_LIST_HEAD(&workspace->lru_list);
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun return &workspace->list;
364*4882a593Smuzhiyun fail:
365*4882a593Smuzhiyun zstd_free_workspace(&workspace->list);
366*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
367*4882a593Smuzhiyun }
368*4882a593Smuzhiyun
zstd_compress_pages(struct list_head * ws,struct address_space * mapping,u64 start,struct page ** pages,unsigned long * out_pages,unsigned long * total_in,unsigned long * total_out)369*4882a593Smuzhiyun int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
370*4882a593Smuzhiyun u64 start, struct page **pages, unsigned long *out_pages,
371*4882a593Smuzhiyun unsigned long *total_in, unsigned long *total_out)
372*4882a593Smuzhiyun {
373*4882a593Smuzhiyun struct workspace *workspace = list_entry(ws, struct workspace, list);
374*4882a593Smuzhiyun ZSTD_CStream *stream;
375*4882a593Smuzhiyun int ret = 0;
376*4882a593Smuzhiyun int nr_pages = 0;
377*4882a593Smuzhiyun struct page *in_page = NULL; /* The current page to read */
378*4882a593Smuzhiyun struct page *out_page = NULL; /* The current page to write to */
379*4882a593Smuzhiyun unsigned long tot_in = 0;
380*4882a593Smuzhiyun unsigned long tot_out = 0;
381*4882a593Smuzhiyun unsigned long len = *total_out;
382*4882a593Smuzhiyun const unsigned long nr_dest_pages = *out_pages;
383*4882a593Smuzhiyun unsigned long max_out = nr_dest_pages * PAGE_SIZE;
384*4882a593Smuzhiyun ZSTD_parameters params = zstd_get_btrfs_parameters(workspace->req_level,
385*4882a593Smuzhiyun len);
386*4882a593Smuzhiyun
387*4882a593Smuzhiyun *out_pages = 0;
388*4882a593Smuzhiyun *total_out = 0;
389*4882a593Smuzhiyun *total_in = 0;
390*4882a593Smuzhiyun
391*4882a593Smuzhiyun /* Initialize the stream */
392*4882a593Smuzhiyun stream = ZSTD_initCStream(params, len, workspace->mem,
393*4882a593Smuzhiyun workspace->size);
394*4882a593Smuzhiyun if (!stream) {
395*4882a593Smuzhiyun pr_warn("BTRFS: ZSTD_initCStream failed\n");
396*4882a593Smuzhiyun ret = -EIO;
397*4882a593Smuzhiyun goto out;
398*4882a593Smuzhiyun }
399*4882a593Smuzhiyun
400*4882a593Smuzhiyun /* map in the first page of input data */
401*4882a593Smuzhiyun in_page = find_get_page(mapping, start >> PAGE_SHIFT);
402*4882a593Smuzhiyun workspace->in_buf.src = kmap(in_page);
403*4882a593Smuzhiyun workspace->in_buf.pos = 0;
404*4882a593Smuzhiyun workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
405*4882a593Smuzhiyun
406*4882a593Smuzhiyun
407*4882a593Smuzhiyun /* Allocate and map in the output buffer */
408*4882a593Smuzhiyun out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
409*4882a593Smuzhiyun if (out_page == NULL) {
410*4882a593Smuzhiyun ret = -ENOMEM;
411*4882a593Smuzhiyun goto out;
412*4882a593Smuzhiyun }
413*4882a593Smuzhiyun pages[nr_pages++] = out_page;
414*4882a593Smuzhiyun workspace->out_buf.dst = kmap(out_page);
415*4882a593Smuzhiyun workspace->out_buf.pos = 0;
416*4882a593Smuzhiyun workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
417*4882a593Smuzhiyun
418*4882a593Smuzhiyun while (1) {
419*4882a593Smuzhiyun size_t ret2;
420*4882a593Smuzhiyun
421*4882a593Smuzhiyun ret2 = ZSTD_compressStream(stream, &workspace->out_buf,
422*4882a593Smuzhiyun &workspace->in_buf);
423*4882a593Smuzhiyun if (ZSTD_isError(ret2)) {
424*4882a593Smuzhiyun pr_debug("BTRFS: ZSTD_compressStream returned %d\n",
425*4882a593Smuzhiyun ZSTD_getErrorCode(ret2));
426*4882a593Smuzhiyun ret = -EIO;
427*4882a593Smuzhiyun goto out;
428*4882a593Smuzhiyun }
429*4882a593Smuzhiyun
430*4882a593Smuzhiyun /* Check to see if we are making it bigger */
431*4882a593Smuzhiyun if (tot_in + workspace->in_buf.pos > 8192 &&
432*4882a593Smuzhiyun tot_in + workspace->in_buf.pos <
433*4882a593Smuzhiyun tot_out + workspace->out_buf.pos) {
434*4882a593Smuzhiyun ret = -E2BIG;
435*4882a593Smuzhiyun goto out;
436*4882a593Smuzhiyun }
437*4882a593Smuzhiyun
438*4882a593Smuzhiyun /* We've reached the end of our output range */
439*4882a593Smuzhiyun if (workspace->out_buf.pos >= max_out) {
440*4882a593Smuzhiyun tot_out += workspace->out_buf.pos;
441*4882a593Smuzhiyun ret = -E2BIG;
442*4882a593Smuzhiyun goto out;
443*4882a593Smuzhiyun }
444*4882a593Smuzhiyun
445*4882a593Smuzhiyun /* Check if we need more output space */
446*4882a593Smuzhiyun if (workspace->out_buf.pos == workspace->out_buf.size) {
447*4882a593Smuzhiyun tot_out += PAGE_SIZE;
448*4882a593Smuzhiyun max_out -= PAGE_SIZE;
449*4882a593Smuzhiyun kunmap(out_page);
450*4882a593Smuzhiyun if (nr_pages == nr_dest_pages) {
451*4882a593Smuzhiyun out_page = NULL;
452*4882a593Smuzhiyun ret = -E2BIG;
453*4882a593Smuzhiyun goto out;
454*4882a593Smuzhiyun }
455*4882a593Smuzhiyun out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
456*4882a593Smuzhiyun if (out_page == NULL) {
457*4882a593Smuzhiyun ret = -ENOMEM;
458*4882a593Smuzhiyun goto out;
459*4882a593Smuzhiyun }
460*4882a593Smuzhiyun pages[nr_pages++] = out_page;
461*4882a593Smuzhiyun workspace->out_buf.dst = kmap(out_page);
462*4882a593Smuzhiyun workspace->out_buf.pos = 0;
463*4882a593Smuzhiyun workspace->out_buf.size = min_t(size_t, max_out,
464*4882a593Smuzhiyun PAGE_SIZE);
465*4882a593Smuzhiyun }
466*4882a593Smuzhiyun
467*4882a593Smuzhiyun /* We've reached the end of the input */
468*4882a593Smuzhiyun if (workspace->in_buf.pos >= len) {
469*4882a593Smuzhiyun tot_in += workspace->in_buf.pos;
470*4882a593Smuzhiyun break;
471*4882a593Smuzhiyun }
472*4882a593Smuzhiyun
473*4882a593Smuzhiyun /* Check if we need more input */
474*4882a593Smuzhiyun if (workspace->in_buf.pos == workspace->in_buf.size) {
475*4882a593Smuzhiyun tot_in += PAGE_SIZE;
476*4882a593Smuzhiyun kunmap(in_page);
477*4882a593Smuzhiyun put_page(in_page);
478*4882a593Smuzhiyun
479*4882a593Smuzhiyun start += PAGE_SIZE;
480*4882a593Smuzhiyun len -= PAGE_SIZE;
481*4882a593Smuzhiyun in_page = find_get_page(mapping, start >> PAGE_SHIFT);
482*4882a593Smuzhiyun workspace->in_buf.src = kmap(in_page);
483*4882a593Smuzhiyun workspace->in_buf.pos = 0;
484*4882a593Smuzhiyun workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
485*4882a593Smuzhiyun }
486*4882a593Smuzhiyun }
487*4882a593Smuzhiyun while (1) {
488*4882a593Smuzhiyun size_t ret2;
489*4882a593Smuzhiyun
490*4882a593Smuzhiyun ret2 = ZSTD_endStream(stream, &workspace->out_buf);
491*4882a593Smuzhiyun if (ZSTD_isError(ret2)) {
492*4882a593Smuzhiyun pr_debug("BTRFS: ZSTD_endStream returned %d\n",
493*4882a593Smuzhiyun ZSTD_getErrorCode(ret2));
494*4882a593Smuzhiyun ret = -EIO;
495*4882a593Smuzhiyun goto out;
496*4882a593Smuzhiyun }
497*4882a593Smuzhiyun if (ret2 == 0) {
498*4882a593Smuzhiyun tot_out += workspace->out_buf.pos;
499*4882a593Smuzhiyun break;
500*4882a593Smuzhiyun }
501*4882a593Smuzhiyun if (workspace->out_buf.pos >= max_out) {
502*4882a593Smuzhiyun tot_out += workspace->out_buf.pos;
503*4882a593Smuzhiyun ret = -E2BIG;
504*4882a593Smuzhiyun goto out;
505*4882a593Smuzhiyun }
506*4882a593Smuzhiyun
507*4882a593Smuzhiyun tot_out += PAGE_SIZE;
508*4882a593Smuzhiyun max_out -= PAGE_SIZE;
509*4882a593Smuzhiyun kunmap(out_page);
510*4882a593Smuzhiyun if (nr_pages == nr_dest_pages) {
511*4882a593Smuzhiyun out_page = NULL;
512*4882a593Smuzhiyun ret = -E2BIG;
513*4882a593Smuzhiyun goto out;
514*4882a593Smuzhiyun }
515*4882a593Smuzhiyun out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
516*4882a593Smuzhiyun if (out_page == NULL) {
517*4882a593Smuzhiyun ret = -ENOMEM;
518*4882a593Smuzhiyun goto out;
519*4882a593Smuzhiyun }
520*4882a593Smuzhiyun pages[nr_pages++] = out_page;
521*4882a593Smuzhiyun workspace->out_buf.dst = kmap(out_page);
522*4882a593Smuzhiyun workspace->out_buf.pos = 0;
523*4882a593Smuzhiyun workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
524*4882a593Smuzhiyun }
525*4882a593Smuzhiyun
526*4882a593Smuzhiyun if (tot_out >= tot_in) {
527*4882a593Smuzhiyun ret = -E2BIG;
528*4882a593Smuzhiyun goto out;
529*4882a593Smuzhiyun }
530*4882a593Smuzhiyun
531*4882a593Smuzhiyun ret = 0;
532*4882a593Smuzhiyun *total_in = tot_in;
533*4882a593Smuzhiyun *total_out = tot_out;
534*4882a593Smuzhiyun out:
535*4882a593Smuzhiyun *out_pages = nr_pages;
536*4882a593Smuzhiyun /* Cleanup */
537*4882a593Smuzhiyun if (in_page) {
538*4882a593Smuzhiyun kunmap(in_page);
539*4882a593Smuzhiyun put_page(in_page);
540*4882a593Smuzhiyun }
541*4882a593Smuzhiyun if (out_page)
542*4882a593Smuzhiyun kunmap(out_page);
543*4882a593Smuzhiyun return ret;
544*4882a593Smuzhiyun }
545*4882a593Smuzhiyun
zstd_decompress_bio(struct list_head * ws,struct compressed_bio * cb)546*4882a593Smuzhiyun int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
547*4882a593Smuzhiyun {
548*4882a593Smuzhiyun struct workspace *workspace = list_entry(ws, struct workspace, list);
549*4882a593Smuzhiyun struct page **pages_in = cb->compressed_pages;
550*4882a593Smuzhiyun u64 disk_start = cb->start;
551*4882a593Smuzhiyun struct bio *orig_bio = cb->orig_bio;
552*4882a593Smuzhiyun size_t srclen = cb->compressed_len;
553*4882a593Smuzhiyun ZSTD_DStream *stream;
554*4882a593Smuzhiyun int ret = 0;
555*4882a593Smuzhiyun unsigned long page_in_index = 0;
556*4882a593Smuzhiyun unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
557*4882a593Smuzhiyun unsigned long buf_start;
558*4882a593Smuzhiyun unsigned long total_out = 0;
559*4882a593Smuzhiyun
560*4882a593Smuzhiyun stream = ZSTD_initDStream(
561*4882a593Smuzhiyun ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
562*4882a593Smuzhiyun if (!stream) {
563*4882a593Smuzhiyun pr_debug("BTRFS: ZSTD_initDStream failed\n");
564*4882a593Smuzhiyun ret = -EIO;
565*4882a593Smuzhiyun goto done;
566*4882a593Smuzhiyun }
567*4882a593Smuzhiyun
568*4882a593Smuzhiyun workspace->in_buf.src = kmap(pages_in[page_in_index]);
569*4882a593Smuzhiyun workspace->in_buf.pos = 0;
570*4882a593Smuzhiyun workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
571*4882a593Smuzhiyun
572*4882a593Smuzhiyun workspace->out_buf.dst = workspace->buf;
573*4882a593Smuzhiyun workspace->out_buf.pos = 0;
574*4882a593Smuzhiyun workspace->out_buf.size = PAGE_SIZE;
575*4882a593Smuzhiyun
576*4882a593Smuzhiyun while (1) {
577*4882a593Smuzhiyun size_t ret2;
578*4882a593Smuzhiyun
579*4882a593Smuzhiyun ret2 = ZSTD_decompressStream(stream, &workspace->out_buf,
580*4882a593Smuzhiyun &workspace->in_buf);
581*4882a593Smuzhiyun if (ZSTD_isError(ret2)) {
582*4882a593Smuzhiyun pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
583*4882a593Smuzhiyun ZSTD_getErrorCode(ret2));
584*4882a593Smuzhiyun ret = -EIO;
585*4882a593Smuzhiyun goto done;
586*4882a593Smuzhiyun }
587*4882a593Smuzhiyun buf_start = total_out;
588*4882a593Smuzhiyun total_out += workspace->out_buf.pos;
589*4882a593Smuzhiyun workspace->out_buf.pos = 0;
590*4882a593Smuzhiyun
591*4882a593Smuzhiyun ret = btrfs_decompress_buf2page(workspace->out_buf.dst,
592*4882a593Smuzhiyun buf_start, total_out, disk_start, orig_bio);
593*4882a593Smuzhiyun if (ret == 0)
594*4882a593Smuzhiyun break;
595*4882a593Smuzhiyun
596*4882a593Smuzhiyun if (workspace->in_buf.pos >= srclen)
597*4882a593Smuzhiyun break;
598*4882a593Smuzhiyun
599*4882a593Smuzhiyun /* Check if we've hit the end of a frame */
600*4882a593Smuzhiyun if (ret2 == 0)
601*4882a593Smuzhiyun break;
602*4882a593Smuzhiyun
603*4882a593Smuzhiyun if (workspace->in_buf.pos == workspace->in_buf.size) {
604*4882a593Smuzhiyun kunmap(pages_in[page_in_index++]);
605*4882a593Smuzhiyun if (page_in_index >= total_pages_in) {
606*4882a593Smuzhiyun workspace->in_buf.src = NULL;
607*4882a593Smuzhiyun ret = -EIO;
608*4882a593Smuzhiyun goto done;
609*4882a593Smuzhiyun }
610*4882a593Smuzhiyun srclen -= PAGE_SIZE;
611*4882a593Smuzhiyun workspace->in_buf.src = kmap(pages_in[page_in_index]);
612*4882a593Smuzhiyun workspace->in_buf.pos = 0;
613*4882a593Smuzhiyun workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
614*4882a593Smuzhiyun }
615*4882a593Smuzhiyun }
616*4882a593Smuzhiyun ret = 0;
617*4882a593Smuzhiyun zero_fill_bio(orig_bio);
618*4882a593Smuzhiyun done:
619*4882a593Smuzhiyun if (workspace->in_buf.src)
620*4882a593Smuzhiyun kunmap(pages_in[page_in_index]);
621*4882a593Smuzhiyun return ret;
622*4882a593Smuzhiyun }
623*4882a593Smuzhiyun
zstd_decompress(struct list_head * ws,unsigned char * data_in,struct page * dest_page,unsigned long start_byte,size_t srclen,size_t destlen)624*4882a593Smuzhiyun int zstd_decompress(struct list_head *ws, unsigned char *data_in,
625*4882a593Smuzhiyun struct page *dest_page, unsigned long start_byte, size_t srclen,
626*4882a593Smuzhiyun size_t destlen)
627*4882a593Smuzhiyun {
628*4882a593Smuzhiyun struct workspace *workspace = list_entry(ws, struct workspace, list);
629*4882a593Smuzhiyun ZSTD_DStream *stream;
630*4882a593Smuzhiyun int ret = 0;
631*4882a593Smuzhiyun size_t ret2;
632*4882a593Smuzhiyun unsigned long total_out = 0;
633*4882a593Smuzhiyun unsigned long pg_offset = 0;
634*4882a593Smuzhiyun char *kaddr;
635*4882a593Smuzhiyun
636*4882a593Smuzhiyun stream = ZSTD_initDStream(
637*4882a593Smuzhiyun ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
638*4882a593Smuzhiyun if (!stream) {
639*4882a593Smuzhiyun pr_warn("BTRFS: ZSTD_initDStream failed\n");
640*4882a593Smuzhiyun ret = -EIO;
641*4882a593Smuzhiyun goto finish;
642*4882a593Smuzhiyun }
643*4882a593Smuzhiyun
644*4882a593Smuzhiyun destlen = min_t(size_t, destlen, PAGE_SIZE);
645*4882a593Smuzhiyun
646*4882a593Smuzhiyun workspace->in_buf.src = data_in;
647*4882a593Smuzhiyun workspace->in_buf.pos = 0;
648*4882a593Smuzhiyun workspace->in_buf.size = srclen;
649*4882a593Smuzhiyun
650*4882a593Smuzhiyun workspace->out_buf.dst = workspace->buf;
651*4882a593Smuzhiyun workspace->out_buf.pos = 0;
652*4882a593Smuzhiyun workspace->out_buf.size = PAGE_SIZE;
653*4882a593Smuzhiyun
654*4882a593Smuzhiyun ret2 = 1;
655*4882a593Smuzhiyun while (pg_offset < destlen
656*4882a593Smuzhiyun && workspace->in_buf.pos < workspace->in_buf.size) {
657*4882a593Smuzhiyun unsigned long buf_start;
658*4882a593Smuzhiyun unsigned long buf_offset;
659*4882a593Smuzhiyun unsigned long bytes;
660*4882a593Smuzhiyun
661*4882a593Smuzhiyun /* Check if the frame is over and we still need more input */
662*4882a593Smuzhiyun if (ret2 == 0) {
663*4882a593Smuzhiyun pr_debug("BTRFS: ZSTD_decompressStream ended early\n");
664*4882a593Smuzhiyun ret = -EIO;
665*4882a593Smuzhiyun goto finish;
666*4882a593Smuzhiyun }
667*4882a593Smuzhiyun ret2 = ZSTD_decompressStream(stream, &workspace->out_buf,
668*4882a593Smuzhiyun &workspace->in_buf);
669*4882a593Smuzhiyun if (ZSTD_isError(ret2)) {
670*4882a593Smuzhiyun pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
671*4882a593Smuzhiyun ZSTD_getErrorCode(ret2));
672*4882a593Smuzhiyun ret = -EIO;
673*4882a593Smuzhiyun goto finish;
674*4882a593Smuzhiyun }
675*4882a593Smuzhiyun
676*4882a593Smuzhiyun buf_start = total_out;
677*4882a593Smuzhiyun total_out += workspace->out_buf.pos;
678*4882a593Smuzhiyun workspace->out_buf.pos = 0;
679*4882a593Smuzhiyun
680*4882a593Smuzhiyun if (total_out <= start_byte)
681*4882a593Smuzhiyun continue;
682*4882a593Smuzhiyun
683*4882a593Smuzhiyun if (total_out > start_byte && buf_start < start_byte)
684*4882a593Smuzhiyun buf_offset = start_byte - buf_start;
685*4882a593Smuzhiyun else
686*4882a593Smuzhiyun buf_offset = 0;
687*4882a593Smuzhiyun
688*4882a593Smuzhiyun bytes = min_t(unsigned long, destlen - pg_offset,
689*4882a593Smuzhiyun workspace->out_buf.size - buf_offset);
690*4882a593Smuzhiyun
691*4882a593Smuzhiyun kaddr = kmap_atomic(dest_page);
692*4882a593Smuzhiyun memcpy(kaddr + pg_offset, workspace->out_buf.dst + buf_offset,
693*4882a593Smuzhiyun bytes);
694*4882a593Smuzhiyun kunmap_atomic(kaddr);
695*4882a593Smuzhiyun
696*4882a593Smuzhiyun pg_offset += bytes;
697*4882a593Smuzhiyun }
698*4882a593Smuzhiyun ret = 0;
699*4882a593Smuzhiyun finish:
700*4882a593Smuzhiyun if (pg_offset < destlen) {
701*4882a593Smuzhiyun kaddr = kmap_atomic(dest_page);
702*4882a593Smuzhiyun memset(kaddr + pg_offset, 0, destlen - pg_offset);
703*4882a593Smuzhiyun kunmap_atomic(kaddr);
704*4882a593Smuzhiyun }
705*4882a593Smuzhiyun return ret;
706*4882a593Smuzhiyun }
707*4882a593Smuzhiyun
708*4882a593Smuzhiyun const struct btrfs_compress_op btrfs_zstd_compress = {
709*4882a593Smuzhiyun /* ZSTD uses own workspace manager */
710*4882a593Smuzhiyun .workspace_manager = NULL,
711*4882a593Smuzhiyun .max_level = ZSTD_BTRFS_MAX_LEVEL,
712*4882a593Smuzhiyun .default_level = ZSTD_BTRFS_DEFAULT_LEVEL,
713*4882a593Smuzhiyun };
714