1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Cleancache frontend
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * This code provides the generic "frontend" layer to call a matching
6*4882a593Smuzhiyun * "backend" driver implementation of cleancache. See
7*4882a593Smuzhiyun * Documentation/vm/cleancache.rst for more information.
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
10*4882a593Smuzhiyun * Author: Dan Magenheimer
11*4882a593Smuzhiyun */
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun #include <linux/module.h>
14*4882a593Smuzhiyun #include <linux/fs.h>
15*4882a593Smuzhiyun #include <linux/exportfs.h>
16*4882a593Smuzhiyun #include <linux/mm.h>
17*4882a593Smuzhiyun #include <linux/debugfs.h>
18*4882a593Smuzhiyun #include <linux/cleancache.h>
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun /*
21*4882a593Smuzhiyun * cleancache_ops is set by cleancache_register_ops to contain the pointers
22*4882a593Smuzhiyun * to the cleancache "backend" implementation functions.
23*4882a593Smuzhiyun */
24*4882a593Smuzhiyun static const struct cleancache_ops *cleancache_ops __read_mostly;
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun /*
27*4882a593Smuzhiyun * Counters available via /sys/kernel/debug/cleancache (if debugfs is
28*4882a593Smuzhiyun * properly configured. These are for information only so are not protected
29*4882a593Smuzhiyun * against increment races.
30*4882a593Smuzhiyun */
31*4882a593Smuzhiyun static u64 cleancache_succ_gets;
32*4882a593Smuzhiyun static u64 cleancache_failed_gets;
33*4882a593Smuzhiyun static u64 cleancache_puts;
34*4882a593Smuzhiyun static u64 cleancache_invalidates;
35*4882a593Smuzhiyun
cleancache_register_ops_sb(struct super_block * sb,void * unused)36*4882a593Smuzhiyun static void cleancache_register_ops_sb(struct super_block *sb, void *unused)
37*4882a593Smuzhiyun {
38*4882a593Smuzhiyun switch (sb->cleancache_poolid) {
39*4882a593Smuzhiyun case CLEANCACHE_NO_BACKEND:
40*4882a593Smuzhiyun __cleancache_init_fs(sb);
41*4882a593Smuzhiyun break;
42*4882a593Smuzhiyun case CLEANCACHE_NO_BACKEND_SHARED:
43*4882a593Smuzhiyun __cleancache_init_shared_fs(sb);
44*4882a593Smuzhiyun break;
45*4882a593Smuzhiyun }
46*4882a593Smuzhiyun }
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun /*
49*4882a593Smuzhiyun * Register operations for cleancache. Returns 0 on success.
50*4882a593Smuzhiyun */
cleancache_register_ops(const struct cleancache_ops * ops)51*4882a593Smuzhiyun int cleancache_register_ops(const struct cleancache_ops *ops)
52*4882a593Smuzhiyun {
53*4882a593Smuzhiyun if (cmpxchg(&cleancache_ops, NULL, ops))
54*4882a593Smuzhiyun return -EBUSY;
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun /*
57*4882a593Smuzhiyun * A cleancache backend can be built as a module and hence loaded after
58*4882a593Smuzhiyun * a cleancache enabled filesystem has called cleancache_init_fs. To
59*4882a593Smuzhiyun * handle such a scenario, here we call ->init_fs or ->init_shared_fs
60*4882a593Smuzhiyun * for each active super block. To differentiate between local and
61*4882a593Smuzhiyun * shared filesystems, we temporarily initialize sb->cleancache_poolid
62*4882a593Smuzhiyun * to CLEANCACHE_NO_BACKEND or CLEANCACHE_NO_BACKEND_SHARED
63*4882a593Smuzhiyun * respectively in case there is no backend registered at the time
64*4882a593Smuzhiyun * cleancache_init_fs or cleancache_init_shared_fs is called.
65*4882a593Smuzhiyun *
66*4882a593Smuzhiyun * Since filesystems can be mounted concurrently with cleancache
67*4882a593Smuzhiyun * backend registration, we have to be careful to guarantee that all
68*4882a593Smuzhiyun * cleancache enabled filesystems that has been mounted by the time
69*4882a593Smuzhiyun * cleancache_register_ops is called has got and all mounted later will
70*4882a593Smuzhiyun * get cleancache_poolid. This is assured by the following statements
71*4882a593Smuzhiyun * tied together:
72*4882a593Smuzhiyun *
73*4882a593Smuzhiyun * a) iterate_supers skips only those super blocks that has started
74*4882a593Smuzhiyun * ->kill_sb
75*4882a593Smuzhiyun *
76*4882a593Smuzhiyun * b) if iterate_supers encounters a super block that has not finished
77*4882a593Smuzhiyun * ->mount yet, it waits until it is finished
78*4882a593Smuzhiyun *
79*4882a593Smuzhiyun * c) cleancache_init_fs is called from ->mount and
80*4882a593Smuzhiyun * cleancache_invalidate_fs is called from ->kill_sb
81*4882a593Smuzhiyun *
82*4882a593Smuzhiyun * d) we call iterate_supers after cleancache_ops has been set
83*4882a593Smuzhiyun *
84*4882a593Smuzhiyun * From a) it follows that if iterate_supers skips a super block, then
85*4882a593Smuzhiyun * either the super block is already dead, in which case we do not need
86*4882a593Smuzhiyun * to bother initializing cleancache for it, or it was mounted after we
87*4882a593Smuzhiyun * initiated iterate_supers. In the latter case, it must have seen
88*4882a593Smuzhiyun * cleancache_ops set according to d) and initialized cleancache from
89*4882a593Smuzhiyun * ->mount by itself according to c). This proves that we call
90*4882a593Smuzhiyun * ->init_fs at least once for each active super block.
91*4882a593Smuzhiyun *
92*4882a593Smuzhiyun * From b) and c) it follows that if iterate_supers encounters a super
93*4882a593Smuzhiyun * block that has already started ->init_fs, it will wait until ->mount
94*4882a593Smuzhiyun * and hence ->init_fs has finished, then check cleancache_poolid, see
95*4882a593Smuzhiyun * that it has already been set and therefore do nothing. This proves
96*4882a593Smuzhiyun * that we call ->init_fs no more than once for each super block.
97*4882a593Smuzhiyun *
98*4882a593Smuzhiyun * Combined together, the last two paragraphs prove the function
99*4882a593Smuzhiyun * correctness.
100*4882a593Smuzhiyun *
101*4882a593Smuzhiyun * Note that various cleancache callbacks may proceed before this
102*4882a593Smuzhiyun * function is called or even concurrently with it, but since
103*4882a593Smuzhiyun * CLEANCACHE_NO_BACKEND is negative, they will all result in a noop
104*4882a593Smuzhiyun * until the corresponding ->init_fs has been actually called and
105*4882a593Smuzhiyun * cleancache_ops has been set.
106*4882a593Smuzhiyun */
107*4882a593Smuzhiyun iterate_supers(cleancache_register_ops_sb, NULL);
108*4882a593Smuzhiyun return 0;
109*4882a593Smuzhiyun }
110*4882a593Smuzhiyun EXPORT_SYMBOL(cleancache_register_ops);
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun /* Called by a cleancache-enabled filesystem at time of mount */
__cleancache_init_fs(struct super_block * sb)113*4882a593Smuzhiyun void __cleancache_init_fs(struct super_block *sb)
114*4882a593Smuzhiyun {
115*4882a593Smuzhiyun int pool_id = CLEANCACHE_NO_BACKEND;
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun if (cleancache_ops) {
118*4882a593Smuzhiyun pool_id = cleancache_ops->init_fs(PAGE_SIZE);
119*4882a593Smuzhiyun if (pool_id < 0)
120*4882a593Smuzhiyun pool_id = CLEANCACHE_NO_POOL;
121*4882a593Smuzhiyun }
122*4882a593Smuzhiyun sb->cleancache_poolid = pool_id;
123*4882a593Smuzhiyun }
124*4882a593Smuzhiyun EXPORT_SYMBOL(__cleancache_init_fs);
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun /* Called by a cleancache-enabled clustered filesystem at time of mount */
__cleancache_init_shared_fs(struct super_block * sb)127*4882a593Smuzhiyun void __cleancache_init_shared_fs(struct super_block *sb)
128*4882a593Smuzhiyun {
129*4882a593Smuzhiyun int pool_id = CLEANCACHE_NO_BACKEND_SHARED;
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun if (cleancache_ops) {
132*4882a593Smuzhiyun pool_id = cleancache_ops->init_shared_fs(&sb->s_uuid, PAGE_SIZE);
133*4882a593Smuzhiyun if (pool_id < 0)
134*4882a593Smuzhiyun pool_id = CLEANCACHE_NO_POOL;
135*4882a593Smuzhiyun }
136*4882a593Smuzhiyun sb->cleancache_poolid = pool_id;
137*4882a593Smuzhiyun }
138*4882a593Smuzhiyun EXPORT_SYMBOL(__cleancache_init_shared_fs);
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun /*
141*4882a593Smuzhiyun * If the filesystem uses exportable filehandles, use the filehandle as
142*4882a593Smuzhiyun * the key, else use the inode number.
143*4882a593Smuzhiyun */
cleancache_get_key(struct inode * inode,struct cleancache_filekey * key)144*4882a593Smuzhiyun static int cleancache_get_key(struct inode *inode,
145*4882a593Smuzhiyun struct cleancache_filekey *key)
146*4882a593Smuzhiyun {
147*4882a593Smuzhiyun int (*fhfn)(struct inode *, __u32 *fh, int *, struct inode *);
148*4882a593Smuzhiyun int len = 0, maxlen = CLEANCACHE_KEY_MAX;
149*4882a593Smuzhiyun struct super_block *sb = inode->i_sb;
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun key->u.ino = inode->i_ino;
152*4882a593Smuzhiyun if (sb->s_export_op != NULL) {
153*4882a593Smuzhiyun fhfn = sb->s_export_op->encode_fh;
154*4882a593Smuzhiyun if (fhfn) {
155*4882a593Smuzhiyun len = (*fhfn)(inode, &key->u.fh[0], &maxlen, NULL);
156*4882a593Smuzhiyun if (len <= FILEID_ROOT || len == FILEID_INVALID)
157*4882a593Smuzhiyun return -1;
158*4882a593Smuzhiyun if (maxlen > CLEANCACHE_KEY_MAX)
159*4882a593Smuzhiyun return -1;
160*4882a593Smuzhiyun }
161*4882a593Smuzhiyun }
162*4882a593Smuzhiyun return 0;
163*4882a593Smuzhiyun }
164*4882a593Smuzhiyun
165*4882a593Smuzhiyun /*
166*4882a593Smuzhiyun * "Get" data from cleancache associated with the poolid/inode/index
167*4882a593Smuzhiyun * that were specified when the data was put to cleanache and, if
168*4882a593Smuzhiyun * successful, use it to fill the specified page with data and return 0.
169*4882a593Smuzhiyun * The pageframe is unchanged and returns -1 if the get fails.
170*4882a593Smuzhiyun * Page must be locked by caller.
171*4882a593Smuzhiyun *
172*4882a593Smuzhiyun * The function has two checks before any action is taken - whether
173*4882a593Smuzhiyun * a backend is registered and whether the sb->cleancache_poolid
174*4882a593Smuzhiyun * is correct.
175*4882a593Smuzhiyun */
__cleancache_get_page(struct page * page)176*4882a593Smuzhiyun int __cleancache_get_page(struct page *page)
177*4882a593Smuzhiyun {
178*4882a593Smuzhiyun int ret = -1;
179*4882a593Smuzhiyun int pool_id;
180*4882a593Smuzhiyun struct cleancache_filekey key = { .u.key = { 0 } };
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun if (!cleancache_ops) {
183*4882a593Smuzhiyun cleancache_failed_gets++;
184*4882a593Smuzhiyun goto out;
185*4882a593Smuzhiyun }
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun VM_BUG_ON_PAGE(!PageLocked(page), page);
188*4882a593Smuzhiyun pool_id = page->mapping->host->i_sb->cleancache_poolid;
189*4882a593Smuzhiyun if (pool_id < 0)
190*4882a593Smuzhiyun goto out;
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun if (cleancache_get_key(page->mapping->host, &key) < 0)
193*4882a593Smuzhiyun goto out;
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun ret = cleancache_ops->get_page(pool_id, key, page->index, page);
196*4882a593Smuzhiyun if (ret == 0)
197*4882a593Smuzhiyun cleancache_succ_gets++;
198*4882a593Smuzhiyun else
199*4882a593Smuzhiyun cleancache_failed_gets++;
200*4882a593Smuzhiyun out:
201*4882a593Smuzhiyun return ret;
202*4882a593Smuzhiyun }
203*4882a593Smuzhiyun EXPORT_SYMBOL(__cleancache_get_page);
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun /*
206*4882a593Smuzhiyun * "Put" data from a page to cleancache and associate it with the
207*4882a593Smuzhiyun * (previously-obtained per-filesystem) poolid and the page's,
208*4882a593Smuzhiyun * inode and page index. Page must be locked. Note that a put_page
209*4882a593Smuzhiyun * always "succeeds", though a subsequent get_page may succeed or fail.
210*4882a593Smuzhiyun *
211*4882a593Smuzhiyun * The function has two checks before any action is taken - whether
212*4882a593Smuzhiyun * a backend is registered and whether the sb->cleancache_poolid
213*4882a593Smuzhiyun * is correct.
214*4882a593Smuzhiyun */
__cleancache_put_page(struct page * page)215*4882a593Smuzhiyun void __cleancache_put_page(struct page *page)
216*4882a593Smuzhiyun {
217*4882a593Smuzhiyun int pool_id;
218*4882a593Smuzhiyun struct cleancache_filekey key = { .u.key = { 0 } };
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun if (!cleancache_ops) {
221*4882a593Smuzhiyun cleancache_puts++;
222*4882a593Smuzhiyun return;
223*4882a593Smuzhiyun }
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun VM_BUG_ON_PAGE(!PageLocked(page), page);
226*4882a593Smuzhiyun pool_id = page->mapping->host->i_sb->cleancache_poolid;
227*4882a593Smuzhiyun if (pool_id >= 0 &&
228*4882a593Smuzhiyun cleancache_get_key(page->mapping->host, &key) >= 0) {
229*4882a593Smuzhiyun cleancache_ops->put_page(pool_id, key, page->index, page);
230*4882a593Smuzhiyun cleancache_puts++;
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun }
233*4882a593Smuzhiyun EXPORT_SYMBOL(__cleancache_put_page);
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun /*
236*4882a593Smuzhiyun * Invalidate any data from cleancache associated with the poolid and the
237*4882a593Smuzhiyun * page's inode and page index so that a subsequent "get" will fail.
238*4882a593Smuzhiyun *
239*4882a593Smuzhiyun * The function has two checks before any action is taken - whether
240*4882a593Smuzhiyun * a backend is registered and whether the sb->cleancache_poolid
241*4882a593Smuzhiyun * is correct.
242*4882a593Smuzhiyun */
__cleancache_invalidate_page(struct address_space * mapping,struct page * page)243*4882a593Smuzhiyun void __cleancache_invalidate_page(struct address_space *mapping,
244*4882a593Smuzhiyun struct page *page)
245*4882a593Smuzhiyun {
246*4882a593Smuzhiyun /* careful... page->mapping is NULL sometimes when this is called */
247*4882a593Smuzhiyun int pool_id = mapping->host->i_sb->cleancache_poolid;
248*4882a593Smuzhiyun struct cleancache_filekey key = { .u.key = { 0 } };
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun if (!cleancache_ops)
251*4882a593Smuzhiyun return;
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun if (pool_id >= 0) {
254*4882a593Smuzhiyun VM_BUG_ON_PAGE(!PageLocked(page), page);
255*4882a593Smuzhiyun if (cleancache_get_key(mapping->host, &key) >= 0) {
256*4882a593Smuzhiyun cleancache_ops->invalidate_page(pool_id,
257*4882a593Smuzhiyun key, page->index);
258*4882a593Smuzhiyun cleancache_invalidates++;
259*4882a593Smuzhiyun }
260*4882a593Smuzhiyun }
261*4882a593Smuzhiyun }
262*4882a593Smuzhiyun EXPORT_SYMBOL(__cleancache_invalidate_page);
263*4882a593Smuzhiyun
264*4882a593Smuzhiyun /*
265*4882a593Smuzhiyun * Invalidate all data from cleancache associated with the poolid and the
266*4882a593Smuzhiyun * mappings's inode so that all subsequent gets to this poolid/inode
267*4882a593Smuzhiyun * will fail.
268*4882a593Smuzhiyun *
269*4882a593Smuzhiyun * The function has two checks before any action is taken - whether
270*4882a593Smuzhiyun * a backend is registered and whether the sb->cleancache_poolid
271*4882a593Smuzhiyun * is correct.
272*4882a593Smuzhiyun */
__cleancache_invalidate_inode(struct address_space * mapping)273*4882a593Smuzhiyun void __cleancache_invalidate_inode(struct address_space *mapping)
274*4882a593Smuzhiyun {
275*4882a593Smuzhiyun int pool_id = mapping->host->i_sb->cleancache_poolid;
276*4882a593Smuzhiyun struct cleancache_filekey key = { .u.key = { 0 } };
277*4882a593Smuzhiyun
278*4882a593Smuzhiyun if (!cleancache_ops)
279*4882a593Smuzhiyun return;
280*4882a593Smuzhiyun
281*4882a593Smuzhiyun if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
282*4882a593Smuzhiyun cleancache_ops->invalidate_inode(pool_id, key);
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun EXPORT_SYMBOL(__cleancache_invalidate_inode);
285*4882a593Smuzhiyun
286*4882a593Smuzhiyun /*
287*4882a593Smuzhiyun * Called by any cleancache-enabled filesystem at time of unmount;
288*4882a593Smuzhiyun * note that pool_id is surrendered and may be returned by a subsequent
289*4882a593Smuzhiyun * cleancache_init_fs or cleancache_init_shared_fs.
290*4882a593Smuzhiyun */
__cleancache_invalidate_fs(struct super_block * sb)291*4882a593Smuzhiyun void __cleancache_invalidate_fs(struct super_block *sb)
292*4882a593Smuzhiyun {
293*4882a593Smuzhiyun int pool_id;
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun pool_id = sb->cleancache_poolid;
296*4882a593Smuzhiyun sb->cleancache_poolid = CLEANCACHE_NO_POOL;
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun if (cleancache_ops && pool_id >= 0)
299*4882a593Smuzhiyun cleancache_ops->invalidate_fs(pool_id);
300*4882a593Smuzhiyun }
301*4882a593Smuzhiyun EXPORT_SYMBOL(__cleancache_invalidate_fs);
302*4882a593Smuzhiyun
init_cleancache(void)303*4882a593Smuzhiyun static int __init init_cleancache(void)
304*4882a593Smuzhiyun {
305*4882a593Smuzhiyun #ifdef CONFIG_DEBUG_FS
306*4882a593Smuzhiyun struct dentry *root = debugfs_create_dir("cleancache", NULL);
307*4882a593Smuzhiyun
308*4882a593Smuzhiyun debugfs_create_u64("succ_gets", 0444, root, &cleancache_succ_gets);
309*4882a593Smuzhiyun debugfs_create_u64("failed_gets", 0444, root, &cleancache_failed_gets);
310*4882a593Smuzhiyun debugfs_create_u64("puts", 0444, root, &cleancache_puts);
311*4882a593Smuzhiyun debugfs_create_u64("invalidates", 0444, root, &cleancache_invalidates);
312*4882a593Smuzhiyun #endif
313*4882a593Smuzhiyun return 0;
314*4882a593Smuzhiyun }
315*4882a593Smuzhiyun module_init(init_cleancache)
316