xref: /OK3568_Linux_fs/kernel/fs/ubifs/shrinker.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * This file is part of UBIFS.
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (C) 2006-2008 Nokia Corporation.
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * Authors: Artem Bityutskiy (Битюцкий Артём)
8*4882a593Smuzhiyun  *          Adrian Hunter
9*4882a593Smuzhiyun  */
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun /*
12*4882a593Smuzhiyun  * This file implements UBIFS shrinker which evicts clean znodes from the TNC
13*4882a593Smuzhiyun  * tree when Linux VM needs more RAM.
14*4882a593Smuzhiyun  *
15*4882a593Smuzhiyun  * We do not implement any LRU lists to find oldest znodes to free because it
16*4882a593Smuzhiyun  * would add additional overhead to the file system fast paths. So the shrinker
17*4882a593Smuzhiyun  * just walks the TNC tree when searching for znodes to free.
18*4882a593Smuzhiyun  *
19*4882a593Smuzhiyun  * If the root of a TNC sub-tree is clean and old enough, then the children are
20*4882a593Smuzhiyun  * also clean and old enough. So the shrinker walks the TNC in level order and
21*4882a593Smuzhiyun  * dumps entire sub-trees.
22*4882a593Smuzhiyun  *
23*4882a593Smuzhiyun  * The age of znodes is just the time-stamp when they were last looked at.
24*4882a593Smuzhiyun  * The current shrinker first tries to evict old znodes, then young ones.
25*4882a593Smuzhiyun  *
26*4882a593Smuzhiyun  * Since the shrinker is global, it has to protect against races with FS
27*4882a593Smuzhiyun  * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
28*4882a593Smuzhiyun  */
29*4882a593Smuzhiyun 
30*4882a593Smuzhiyun #include "ubifs.h"
31*4882a593Smuzhiyun 
32*4882a593Smuzhiyun /* List of all UBIFS file-system instances */
33*4882a593Smuzhiyun LIST_HEAD(ubifs_infos);
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun /*
36*4882a593Smuzhiyun  * We number each shrinker run and record the number on the ubifs_info structure
37*4882a593Smuzhiyun  * so that we can easily work out which ubifs_info structures have already been
38*4882a593Smuzhiyun  * done by the current run.
39*4882a593Smuzhiyun  */
40*4882a593Smuzhiyun static unsigned int shrinker_run_no;
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun /* Protects 'ubifs_infos' list */
43*4882a593Smuzhiyun DEFINE_SPINLOCK(ubifs_infos_lock);
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun /* Global clean znode counter (for all mounted UBIFS instances) */
46*4882a593Smuzhiyun atomic_long_t ubifs_clean_zn_cnt;
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun /**
49*4882a593Smuzhiyun  * shrink_tnc - shrink TNC tree.
50*4882a593Smuzhiyun  * @c: UBIFS file-system description object
51*4882a593Smuzhiyun  * @nr: number of znodes to free
52*4882a593Smuzhiyun  * @age: the age of znodes to free
53*4882a593Smuzhiyun  * @contention: if any contention, this is set to %1
54*4882a593Smuzhiyun  *
55*4882a593Smuzhiyun  * This function traverses TNC tree and frees clean znodes. It does not free
56*4882a593Smuzhiyun  * clean znodes which younger then @age. Returns number of freed znodes.
57*4882a593Smuzhiyun  */
shrink_tnc(struct ubifs_info * c,int nr,int age,int * contention)58*4882a593Smuzhiyun static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
59*4882a593Smuzhiyun {
60*4882a593Smuzhiyun 	int total_freed = 0;
61*4882a593Smuzhiyun 	struct ubifs_znode *znode, *zprev;
62*4882a593Smuzhiyun 	time64_t time = ktime_get_seconds();
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun 	ubifs_assert(c, mutex_is_locked(&c->umount_mutex));
65*4882a593Smuzhiyun 	ubifs_assert(c, mutex_is_locked(&c->tnc_mutex));
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun 	if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
68*4882a593Smuzhiyun 		return 0;
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun 	/*
71*4882a593Smuzhiyun 	 * Traverse the TNC tree in levelorder manner, so that it is possible
72*4882a593Smuzhiyun 	 * to destroy large sub-trees. Indeed, if a znode is old, then all its
73*4882a593Smuzhiyun 	 * children are older or of the same age.
74*4882a593Smuzhiyun 	 *
75*4882a593Smuzhiyun 	 * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
76*4882a593Smuzhiyun 	 * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
77*4882a593Smuzhiyun 	 * changed only when the 'c->tnc_mutex' is held.
78*4882a593Smuzhiyun 	 */
79*4882a593Smuzhiyun 	zprev = NULL;
80*4882a593Smuzhiyun 	znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
81*4882a593Smuzhiyun 	while (znode && total_freed < nr &&
82*4882a593Smuzhiyun 	       atomic_long_read(&c->clean_zn_cnt) > 0) {
83*4882a593Smuzhiyun 		int freed;
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun 		/*
86*4882a593Smuzhiyun 		 * If the znode is clean, but it is in the 'c->cnext' list, this
87*4882a593Smuzhiyun 		 * means that this znode has just been written to flash as a
88*4882a593Smuzhiyun 		 * part of commit and was marked clean. They will be removed
89*4882a593Smuzhiyun 		 * from the list at end commit. We cannot change the list,
90*4882a593Smuzhiyun 		 * because it is not protected by any mutex (design decision to
91*4882a593Smuzhiyun 		 * make commit really independent and parallel to main I/O). So
92*4882a593Smuzhiyun 		 * we just skip these znodes.
93*4882a593Smuzhiyun 		 *
94*4882a593Smuzhiyun 		 * Note, the 'clean_zn_cnt' counters are not updated until
95*4882a593Smuzhiyun 		 * after the commit, so the UBIFS shrinker does not report
96*4882a593Smuzhiyun 		 * the znodes which are in the 'c->cnext' list as freeable.
97*4882a593Smuzhiyun 		 *
98*4882a593Smuzhiyun 		 * Also note, if the root of a sub-tree is not in 'c->cnext',
99*4882a593Smuzhiyun 		 * then the whole sub-tree is not in 'c->cnext' as well, so it
100*4882a593Smuzhiyun 		 * is safe to dump whole sub-tree.
101*4882a593Smuzhiyun 		 */
102*4882a593Smuzhiyun 
103*4882a593Smuzhiyun 		if (znode->cnext) {
104*4882a593Smuzhiyun 			/*
105*4882a593Smuzhiyun 			 * Very soon these znodes will be removed from the list
106*4882a593Smuzhiyun 			 * and become freeable.
107*4882a593Smuzhiyun 			 */
108*4882a593Smuzhiyun 			*contention = 1;
109*4882a593Smuzhiyun 		} else if (!ubifs_zn_dirty(znode) &&
110*4882a593Smuzhiyun 			   abs(time - znode->time) >= age) {
111*4882a593Smuzhiyun 			if (znode->parent)
112*4882a593Smuzhiyun 				znode->parent->zbranch[znode->iip].znode = NULL;
113*4882a593Smuzhiyun 			else
114*4882a593Smuzhiyun 				c->zroot.znode = NULL;
115*4882a593Smuzhiyun 
116*4882a593Smuzhiyun 			freed = ubifs_destroy_tnc_subtree(c, znode);
117*4882a593Smuzhiyun 			atomic_long_sub(freed, &ubifs_clean_zn_cnt);
118*4882a593Smuzhiyun 			atomic_long_sub(freed, &c->clean_zn_cnt);
119*4882a593Smuzhiyun 			total_freed += freed;
120*4882a593Smuzhiyun 			znode = zprev;
121*4882a593Smuzhiyun 		}
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun 		if (unlikely(!c->zroot.znode))
124*4882a593Smuzhiyun 			break;
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun 		zprev = znode;
127*4882a593Smuzhiyun 		znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
128*4882a593Smuzhiyun 		cond_resched();
129*4882a593Smuzhiyun 	}
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun 	return total_freed;
132*4882a593Smuzhiyun }
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun /**
135*4882a593Smuzhiyun  * shrink_tnc_trees - shrink UBIFS TNC trees.
136*4882a593Smuzhiyun  * @nr: number of znodes to free
137*4882a593Smuzhiyun  * @age: the age of znodes to free
138*4882a593Smuzhiyun  * @contention: if any contention, this is set to %1
139*4882a593Smuzhiyun  *
140*4882a593Smuzhiyun  * This function walks the list of mounted UBIFS file-systems and frees clean
141*4882a593Smuzhiyun  * znodes which are older than @age, until at least @nr znodes are freed.
142*4882a593Smuzhiyun  * Returns the number of freed znodes.
143*4882a593Smuzhiyun  */
shrink_tnc_trees(int nr,int age,int * contention)144*4882a593Smuzhiyun static int shrink_tnc_trees(int nr, int age, int *contention)
145*4882a593Smuzhiyun {
146*4882a593Smuzhiyun 	struct ubifs_info *c;
147*4882a593Smuzhiyun 	struct list_head *p;
148*4882a593Smuzhiyun 	unsigned int run_no;
149*4882a593Smuzhiyun 	int freed = 0;
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun 	spin_lock(&ubifs_infos_lock);
152*4882a593Smuzhiyun 	do {
153*4882a593Smuzhiyun 		run_no = ++shrinker_run_no;
154*4882a593Smuzhiyun 	} while (run_no == 0);
155*4882a593Smuzhiyun 	/* Iterate over all mounted UBIFS file-systems and try to shrink them */
156*4882a593Smuzhiyun 	p = ubifs_infos.next;
157*4882a593Smuzhiyun 	while (p != &ubifs_infos) {
158*4882a593Smuzhiyun 		c = list_entry(p, struct ubifs_info, infos_list);
159*4882a593Smuzhiyun 		/*
160*4882a593Smuzhiyun 		 * We move the ones we do to the end of the list, so we stop
161*4882a593Smuzhiyun 		 * when we see one we have already done.
162*4882a593Smuzhiyun 		 */
163*4882a593Smuzhiyun 		if (c->shrinker_run_no == run_no)
164*4882a593Smuzhiyun 			break;
165*4882a593Smuzhiyun 		if (!mutex_trylock(&c->umount_mutex)) {
166*4882a593Smuzhiyun 			/* Some un-mount is in progress, try next FS */
167*4882a593Smuzhiyun 			*contention = 1;
168*4882a593Smuzhiyun 			p = p->next;
169*4882a593Smuzhiyun 			continue;
170*4882a593Smuzhiyun 		}
171*4882a593Smuzhiyun 		/*
172*4882a593Smuzhiyun 		 * We're holding 'c->umount_mutex', so the file-system won't go
173*4882a593Smuzhiyun 		 * away.
174*4882a593Smuzhiyun 		 */
175*4882a593Smuzhiyun 		if (!mutex_trylock(&c->tnc_mutex)) {
176*4882a593Smuzhiyun 			mutex_unlock(&c->umount_mutex);
177*4882a593Smuzhiyun 			*contention = 1;
178*4882a593Smuzhiyun 			p = p->next;
179*4882a593Smuzhiyun 			continue;
180*4882a593Smuzhiyun 		}
181*4882a593Smuzhiyun 		spin_unlock(&ubifs_infos_lock);
182*4882a593Smuzhiyun 		/*
183*4882a593Smuzhiyun 		 * OK, now we have TNC locked, the file-system cannot go away -
184*4882a593Smuzhiyun 		 * it is safe to reap the cache.
185*4882a593Smuzhiyun 		 */
186*4882a593Smuzhiyun 		c->shrinker_run_no = run_no;
187*4882a593Smuzhiyun 		freed += shrink_tnc(c, nr, age, contention);
188*4882a593Smuzhiyun 		mutex_unlock(&c->tnc_mutex);
189*4882a593Smuzhiyun 		spin_lock(&ubifs_infos_lock);
190*4882a593Smuzhiyun 		/* Get the next list element before we move this one */
191*4882a593Smuzhiyun 		p = p->next;
192*4882a593Smuzhiyun 		/*
193*4882a593Smuzhiyun 		 * Move this one to the end of the list to provide some
194*4882a593Smuzhiyun 		 * fairness.
195*4882a593Smuzhiyun 		 */
196*4882a593Smuzhiyun 		list_move_tail(&c->infos_list, &ubifs_infos);
197*4882a593Smuzhiyun 		mutex_unlock(&c->umount_mutex);
198*4882a593Smuzhiyun 		if (freed >= nr)
199*4882a593Smuzhiyun 			break;
200*4882a593Smuzhiyun 	}
201*4882a593Smuzhiyun 	spin_unlock(&ubifs_infos_lock);
202*4882a593Smuzhiyun 	return freed;
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun /**
206*4882a593Smuzhiyun  * kick_a_thread - kick a background thread to start commit.
207*4882a593Smuzhiyun  *
208*4882a593Smuzhiyun  * This function kicks a background thread to start background commit. Returns
209*4882a593Smuzhiyun  * %-1 if a thread was kicked or there is another reason to assume the memory
210*4882a593Smuzhiyun  * will soon be freed or become freeable. If there are no dirty znodes, returns
211*4882a593Smuzhiyun  * %0.
212*4882a593Smuzhiyun  */
kick_a_thread(void)213*4882a593Smuzhiyun static int kick_a_thread(void)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun 	int i;
216*4882a593Smuzhiyun 	struct ubifs_info *c;
217*4882a593Smuzhiyun 
218*4882a593Smuzhiyun 	/*
219*4882a593Smuzhiyun 	 * Iterate over all mounted UBIFS file-systems and find out if there is
220*4882a593Smuzhiyun 	 * already an ongoing commit operation there. If no, then iterate for
221*4882a593Smuzhiyun 	 * the second time and initiate background commit.
222*4882a593Smuzhiyun 	 */
223*4882a593Smuzhiyun 	spin_lock(&ubifs_infos_lock);
224*4882a593Smuzhiyun 	for (i = 0; i < 2; i++) {
225*4882a593Smuzhiyun 		list_for_each_entry(c, &ubifs_infos, infos_list) {
226*4882a593Smuzhiyun 			long dirty_zn_cnt;
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun 			if (!mutex_trylock(&c->umount_mutex)) {
229*4882a593Smuzhiyun 				/*
230*4882a593Smuzhiyun 				 * Some un-mount is in progress, it will
231*4882a593Smuzhiyun 				 * certainly free memory, so just return.
232*4882a593Smuzhiyun 				 */
233*4882a593Smuzhiyun 				spin_unlock(&ubifs_infos_lock);
234*4882a593Smuzhiyun 				return -1;
235*4882a593Smuzhiyun 			}
236*4882a593Smuzhiyun 
237*4882a593Smuzhiyun 			dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 			if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
240*4882a593Smuzhiyun 			    c->ro_mount || c->ro_error) {
241*4882a593Smuzhiyun 				mutex_unlock(&c->umount_mutex);
242*4882a593Smuzhiyun 				continue;
243*4882a593Smuzhiyun 			}
244*4882a593Smuzhiyun 
245*4882a593Smuzhiyun 			if (c->cmt_state != COMMIT_RESTING) {
246*4882a593Smuzhiyun 				spin_unlock(&ubifs_infos_lock);
247*4882a593Smuzhiyun 				mutex_unlock(&c->umount_mutex);
248*4882a593Smuzhiyun 				return -1;
249*4882a593Smuzhiyun 			}
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 			if (i == 1) {
252*4882a593Smuzhiyun 				list_move_tail(&c->infos_list, &ubifs_infos);
253*4882a593Smuzhiyun 				spin_unlock(&ubifs_infos_lock);
254*4882a593Smuzhiyun 
255*4882a593Smuzhiyun 				ubifs_request_bg_commit(c);
256*4882a593Smuzhiyun 				mutex_unlock(&c->umount_mutex);
257*4882a593Smuzhiyun 				return -1;
258*4882a593Smuzhiyun 			}
259*4882a593Smuzhiyun 			mutex_unlock(&c->umount_mutex);
260*4882a593Smuzhiyun 		}
261*4882a593Smuzhiyun 	}
262*4882a593Smuzhiyun 	spin_unlock(&ubifs_infos_lock);
263*4882a593Smuzhiyun 
264*4882a593Smuzhiyun 	return 0;
265*4882a593Smuzhiyun }
266*4882a593Smuzhiyun 
ubifs_shrink_count(struct shrinker * shrink,struct shrink_control * sc)267*4882a593Smuzhiyun unsigned long ubifs_shrink_count(struct shrinker *shrink,
268*4882a593Smuzhiyun 				 struct shrink_control *sc)
269*4882a593Smuzhiyun {
270*4882a593Smuzhiyun 	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
271*4882a593Smuzhiyun 
272*4882a593Smuzhiyun 	/*
273*4882a593Smuzhiyun 	 * Due to the way UBIFS updates the clean znode counter it may
274*4882a593Smuzhiyun 	 * temporarily be negative.
275*4882a593Smuzhiyun 	 */
276*4882a593Smuzhiyun 	return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun 
ubifs_shrink_scan(struct shrinker * shrink,struct shrink_control * sc)279*4882a593Smuzhiyun unsigned long ubifs_shrink_scan(struct shrinker *shrink,
280*4882a593Smuzhiyun 				struct shrink_control *sc)
281*4882a593Smuzhiyun {
282*4882a593Smuzhiyun 	unsigned long nr = sc->nr_to_scan;
283*4882a593Smuzhiyun 	int contention = 0;
284*4882a593Smuzhiyun 	unsigned long freed;
285*4882a593Smuzhiyun 	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
286*4882a593Smuzhiyun 
287*4882a593Smuzhiyun 	if (!clean_zn_cnt) {
288*4882a593Smuzhiyun 		/*
289*4882a593Smuzhiyun 		 * No clean znodes, nothing to reap. All we can do in this case
290*4882a593Smuzhiyun 		 * is to kick background threads to start commit, which will
291*4882a593Smuzhiyun 		 * probably make clean znodes which, in turn, will be freeable.
292*4882a593Smuzhiyun 		 * And we return -1 which means will make VM call us again
293*4882a593Smuzhiyun 		 * later.
294*4882a593Smuzhiyun 		 */
295*4882a593Smuzhiyun 		dbg_tnc("no clean znodes, kick a thread");
296*4882a593Smuzhiyun 		return kick_a_thread();
297*4882a593Smuzhiyun 	}
298*4882a593Smuzhiyun 
299*4882a593Smuzhiyun 	freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
300*4882a593Smuzhiyun 	if (freed >= nr)
301*4882a593Smuzhiyun 		goto out;
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 	dbg_tnc("not enough old znodes, try to free young ones");
304*4882a593Smuzhiyun 	freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
305*4882a593Smuzhiyun 	if (freed >= nr)
306*4882a593Smuzhiyun 		goto out;
307*4882a593Smuzhiyun 
308*4882a593Smuzhiyun 	dbg_tnc("not enough young znodes, free all");
309*4882a593Smuzhiyun 	freed += shrink_tnc_trees(nr - freed, 0, &contention);
310*4882a593Smuzhiyun 
311*4882a593Smuzhiyun 	if (!freed && contention) {
312*4882a593Smuzhiyun 		dbg_tnc("freed nothing, but contention");
313*4882a593Smuzhiyun 		return SHRINK_STOP;
314*4882a593Smuzhiyun 	}
315*4882a593Smuzhiyun 
316*4882a593Smuzhiyun out:
317*4882a593Smuzhiyun 	dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
318*4882a593Smuzhiyun 	return freed;
319*4882a593Smuzhiyun }
320