xref: /OK3568_Linux_fs/kernel/mm/page_counter.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Lockless hierarchical page accounting & limiting
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
6*4882a593Smuzhiyun  */
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include <linux/page_counter.h>
9*4882a593Smuzhiyun #include <linux/atomic.h>
10*4882a593Smuzhiyun #include <linux/kernel.h>
11*4882a593Smuzhiyun #include <linux/string.h>
12*4882a593Smuzhiyun #include <linux/sched.h>
13*4882a593Smuzhiyun #include <linux/bug.h>
14*4882a593Smuzhiyun #include <asm/page.h>
15*4882a593Smuzhiyun 
propagate_protected_usage(struct page_counter * c,unsigned long usage)16*4882a593Smuzhiyun static void propagate_protected_usage(struct page_counter *c,
17*4882a593Smuzhiyun 				      unsigned long usage)
18*4882a593Smuzhiyun {
19*4882a593Smuzhiyun 	unsigned long protected, old_protected;
20*4882a593Smuzhiyun 	unsigned long low, min;
21*4882a593Smuzhiyun 	long delta;
22*4882a593Smuzhiyun 
23*4882a593Smuzhiyun 	if (!c->parent)
24*4882a593Smuzhiyun 		return;
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun 	min = READ_ONCE(c->min);
27*4882a593Smuzhiyun 	if (min || atomic_long_read(&c->min_usage)) {
28*4882a593Smuzhiyun 		protected = min(usage, min);
29*4882a593Smuzhiyun 		old_protected = atomic_long_xchg(&c->min_usage, protected);
30*4882a593Smuzhiyun 		delta = protected - old_protected;
31*4882a593Smuzhiyun 		if (delta)
32*4882a593Smuzhiyun 			atomic_long_add(delta, &c->parent->children_min_usage);
33*4882a593Smuzhiyun 	}
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun 	low = READ_ONCE(c->low);
36*4882a593Smuzhiyun 	if (low || atomic_long_read(&c->low_usage)) {
37*4882a593Smuzhiyun 		protected = min(usage, low);
38*4882a593Smuzhiyun 		old_protected = atomic_long_xchg(&c->low_usage, protected);
39*4882a593Smuzhiyun 		delta = protected - old_protected;
40*4882a593Smuzhiyun 		if (delta)
41*4882a593Smuzhiyun 			atomic_long_add(delta, &c->parent->children_low_usage);
42*4882a593Smuzhiyun 	}
43*4882a593Smuzhiyun }
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun /**
46*4882a593Smuzhiyun  * page_counter_cancel - take pages out of the local counter
47*4882a593Smuzhiyun  * @counter: counter
48*4882a593Smuzhiyun  * @nr_pages: number of pages to cancel
49*4882a593Smuzhiyun  */
page_counter_cancel(struct page_counter * counter,unsigned long nr_pages)50*4882a593Smuzhiyun void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
51*4882a593Smuzhiyun {
52*4882a593Smuzhiyun 	long new;
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun 	new = atomic_long_sub_return(nr_pages, &counter->usage);
55*4882a593Smuzhiyun 	propagate_protected_usage(counter, new);
56*4882a593Smuzhiyun 	/* More uncharges than charges? */
57*4882a593Smuzhiyun 	WARN_ON_ONCE(new < 0);
58*4882a593Smuzhiyun }
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun /**
61*4882a593Smuzhiyun  * page_counter_charge - hierarchically charge pages
62*4882a593Smuzhiyun  * @counter: counter
63*4882a593Smuzhiyun  * @nr_pages: number of pages to charge
64*4882a593Smuzhiyun  *
65*4882a593Smuzhiyun  * NOTE: This does not consider any configured counter limits.
66*4882a593Smuzhiyun  */
page_counter_charge(struct page_counter * counter,unsigned long nr_pages)67*4882a593Smuzhiyun void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
68*4882a593Smuzhiyun {
69*4882a593Smuzhiyun 	struct page_counter *c;
70*4882a593Smuzhiyun 
71*4882a593Smuzhiyun 	for (c = counter; c; c = c->parent) {
72*4882a593Smuzhiyun 		long new;
73*4882a593Smuzhiyun 
74*4882a593Smuzhiyun 		new = atomic_long_add_return(nr_pages, &c->usage);
75*4882a593Smuzhiyun 		propagate_protected_usage(c, new);
76*4882a593Smuzhiyun 		/*
77*4882a593Smuzhiyun 		 * This is indeed racy, but we can live with some
78*4882a593Smuzhiyun 		 * inaccuracy in the watermark.
79*4882a593Smuzhiyun 		 */
80*4882a593Smuzhiyun 		if (new > READ_ONCE(c->watermark))
81*4882a593Smuzhiyun 			WRITE_ONCE(c->watermark, new);
82*4882a593Smuzhiyun 	}
83*4882a593Smuzhiyun }
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun /**
86*4882a593Smuzhiyun  * page_counter_try_charge - try to hierarchically charge pages
87*4882a593Smuzhiyun  * @counter: counter
88*4882a593Smuzhiyun  * @nr_pages: number of pages to charge
89*4882a593Smuzhiyun  * @fail: points first counter to hit its limit, if any
90*4882a593Smuzhiyun  *
91*4882a593Smuzhiyun  * Returns %true on success, or %false and @fail if the counter or one
92*4882a593Smuzhiyun  * of its ancestors has hit its configured limit.
93*4882a593Smuzhiyun  */
page_counter_try_charge(struct page_counter * counter,unsigned long nr_pages,struct page_counter ** fail)94*4882a593Smuzhiyun bool page_counter_try_charge(struct page_counter *counter,
95*4882a593Smuzhiyun 			     unsigned long nr_pages,
96*4882a593Smuzhiyun 			     struct page_counter **fail)
97*4882a593Smuzhiyun {
98*4882a593Smuzhiyun 	struct page_counter *c;
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun 	for (c = counter; c; c = c->parent) {
101*4882a593Smuzhiyun 		long new;
102*4882a593Smuzhiyun 		/*
103*4882a593Smuzhiyun 		 * Charge speculatively to avoid an expensive CAS.  If
104*4882a593Smuzhiyun 		 * a bigger charge fails, it might falsely lock out a
105*4882a593Smuzhiyun 		 * racing smaller charge and send it into reclaim
106*4882a593Smuzhiyun 		 * early, but the error is limited to the difference
107*4882a593Smuzhiyun 		 * between the two sizes, which is less than 2M/4M in
108*4882a593Smuzhiyun 		 * case of a THP locking out a regular page charge.
109*4882a593Smuzhiyun 		 *
110*4882a593Smuzhiyun 		 * The atomic_long_add_return() implies a full memory
111*4882a593Smuzhiyun 		 * barrier between incrementing the count and reading
112*4882a593Smuzhiyun 		 * the limit.  When racing with page_counter_set_max(),
113*4882a593Smuzhiyun 		 * we either see the new limit or the setter sees the
114*4882a593Smuzhiyun 		 * counter has changed and retries.
115*4882a593Smuzhiyun 		 */
116*4882a593Smuzhiyun 		new = atomic_long_add_return(nr_pages, &c->usage);
117*4882a593Smuzhiyun 		if (new > c->max) {
118*4882a593Smuzhiyun 			atomic_long_sub(nr_pages, &c->usage);
119*4882a593Smuzhiyun 			propagate_protected_usage(c, new);
120*4882a593Smuzhiyun 			/*
121*4882a593Smuzhiyun 			 * This is racy, but we can live with some
122*4882a593Smuzhiyun 			 * inaccuracy in the failcnt which is only used
123*4882a593Smuzhiyun 			 * to report stats.
124*4882a593Smuzhiyun 			 */
125*4882a593Smuzhiyun 			data_race(c->failcnt++);
126*4882a593Smuzhiyun 			*fail = c;
127*4882a593Smuzhiyun 			goto failed;
128*4882a593Smuzhiyun 		}
129*4882a593Smuzhiyun 		propagate_protected_usage(c, new);
130*4882a593Smuzhiyun 		/*
131*4882a593Smuzhiyun 		 * Just like with failcnt, we can live with some
132*4882a593Smuzhiyun 		 * inaccuracy in the watermark.
133*4882a593Smuzhiyun 		 */
134*4882a593Smuzhiyun 		if (new > READ_ONCE(c->watermark))
135*4882a593Smuzhiyun 			WRITE_ONCE(c->watermark, new);
136*4882a593Smuzhiyun 	}
137*4882a593Smuzhiyun 	return true;
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun failed:
140*4882a593Smuzhiyun 	for (c = counter; c != *fail; c = c->parent)
141*4882a593Smuzhiyun 		page_counter_cancel(c, nr_pages);
142*4882a593Smuzhiyun 
143*4882a593Smuzhiyun 	return false;
144*4882a593Smuzhiyun }
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun /**
147*4882a593Smuzhiyun  * page_counter_uncharge - hierarchically uncharge pages
148*4882a593Smuzhiyun  * @counter: counter
149*4882a593Smuzhiyun  * @nr_pages: number of pages to uncharge
150*4882a593Smuzhiyun  */
page_counter_uncharge(struct page_counter * counter,unsigned long nr_pages)151*4882a593Smuzhiyun void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
152*4882a593Smuzhiyun {
153*4882a593Smuzhiyun 	struct page_counter *c;
154*4882a593Smuzhiyun 
155*4882a593Smuzhiyun 	for (c = counter; c; c = c->parent)
156*4882a593Smuzhiyun 		page_counter_cancel(c, nr_pages);
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun /**
160*4882a593Smuzhiyun  * page_counter_set_max - set the maximum number of pages allowed
161*4882a593Smuzhiyun  * @counter: counter
162*4882a593Smuzhiyun  * @nr_pages: limit to set
163*4882a593Smuzhiyun  *
164*4882a593Smuzhiyun  * Returns 0 on success, -EBUSY if the current number of pages on the
165*4882a593Smuzhiyun  * counter already exceeds the specified limit.
166*4882a593Smuzhiyun  *
167*4882a593Smuzhiyun  * The caller must serialize invocations on the same counter.
168*4882a593Smuzhiyun  */
page_counter_set_max(struct page_counter * counter,unsigned long nr_pages)169*4882a593Smuzhiyun int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages)
170*4882a593Smuzhiyun {
171*4882a593Smuzhiyun 	for (;;) {
172*4882a593Smuzhiyun 		unsigned long old;
173*4882a593Smuzhiyun 		long usage;
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 		/*
176*4882a593Smuzhiyun 		 * Update the limit while making sure that it's not
177*4882a593Smuzhiyun 		 * below the concurrently-changing counter value.
178*4882a593Smuzhiyun 		 *
179*4882a593Smuzhiyun 		 * The xchg implies two full memory barriers before
180*4882a593Smuzhiyun 		 * and after, so the read-swap-read is ordered and
181*4882a593Smuzhiyun 		 * ensures coherency with page_counter_try_charge():
182*4882a593Smuzhiyun 		 * that function modifies the count before checking
183*4882a593Smuzhiyun 		 * the limit, so if it sees the old limit, we see the
184*4882a593Smuzhiyun 		 * modified counter and retry.
185*4882a593Smuzhiyun 		 */
186*4882a593Smuzhiyun 		usage = atomic_long_read(&counter->usage);
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun 		if (usage > nr_pages)
189*4882a593Smuzhiyun 			return -EBUSY;
190*4882a593Smuzhiyun 
191*4882a593Smuzhiyun 		old = xchg(&counter->max, nr_pages);
192*4882a593Smuzhiyun 
193*4882a593Smuzhiyun 		if (atomic_long_read(&counter->usage) <= usage)
194*4882a593Smuzhiyun 			return 0;
195*4882a593Smuzhiyun 
196*4882a593Smuzhiyun 		counter->max = old;
197*4882a593Smuzhiyun 		cond_resched();
198*4882a593Smuzhiyun 	}
199*4882a593Smuzhiyun }
200*4882a593Smuzhiyun 
201*4882a593Smuzhiyun /**
202*4882a593Smuzhiyun  * page_counter_set_min - set the amount of protected memory
203*4882a593Smuzhiyun  * @counter: counter
204*4882a593Smuzhiyun  * @nr_pages: value to set
205*4882a593Smuzhiyun  *
206*4882a593Smuzhiyun  * The caller must serialize invocations on the same counter.
207*4882a593Smuzhiyun  */
page_counter_set_min(struct page_counter * counter,unsigned long nr_pages)208*4882a593Smuzhiyun void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages)
209*4882a593Smuzhiyun {
210*4882a593Smuzhiyun 	struct page_counter *c;
211*4882a593Smuzhiyun 
212*4882a593Smuzhiyun 	WRITE_ONCE(counter->min, nr_pages);
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	for (c = counter; c; c = c->parent)
215*4882a593Smuzhiyun 		propagate_protected_usage(c, atomic_long_read(&c->usage));
216*4882a593Smuzhiyun }
217*4882a593Smuzhiyun 
218*4882a593Smuzhiyun /**
219*4882a593Smuzhiyun  * page_counter_set_low - set the amount of protected memory
220*4882a593Smuzhiyun  * @counter: counter
221*4882a593Smuzhiyun  * @nr_pages: value to set
222*4882a593Smuzhiyun  *
223*4882a593Smuzhiyun  * The caller must serialize invocations on the same counter.
224*4882a593Smuzhiyun  */
page_counter_set_low(struct page_counter * counter,unsigned long nr_pages)225*4882a593Smuzhiyun void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
226*4882a593Smuzhiyun {
227*4882a593Smuzhiyun 	struct page_counter *c;
228*4882a593Smuzhiyun 
229*4882a593Smuzhiyun 	WRITE_ONCE(counter->low, nr_pages);
230*4882a593Smuzhiyun 
231*4882a593Smuzhiyun 	for (c = counter; c; c = c->parent)
232*4882a593Smuzhiyun 		propagate_protected_usage(c, atomic_long_read(&c->usage));
233*4882a593Smuzhiyun }
234*4882a593Smuzhiyun 
235*4882a593Smuzhiyun /**
236*4882a593Smuzhiyun  * page_counter_memparse - memparse() for page counter limits
237*4882a593Smuzhiyun  * @buf: string to parse
238*4882a593Smuzhiyun  * @max: string meaning maximum possible value
239*4882a593Smuzhiyun  * @nr_pages: returns the result in number of pages
240*4882a593Smuzhiyun  *
241*4882a593Smuzhiyun  * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be
242*4882a593Smuzhiyun  * limited to %PAGE_COUNTER_MAX.
243*4882a593Smuzhiyun  */
page_counter_memparse(const char * buf,const char * max,unsigned long * nr_pages)244*4882a593Smuzhiyun int page_counter_memparse(const char *buf, const char *max,
245*4882a593Smuzhiyun 			  unsigned long *nr_pages)
246*4882a593Smuzhiyun {
247*4882a593Smuzhiyun 	char *end;
248*4882a593Smuzhiyun 	u64 bytes;
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 	if (!strcmp(buf, max)) {
251*4882a593Smuzhiyun 		*nr_pages = PAGE_COUNTER_MAX;
252*4882a593Smuzhiyun 		return 0;
253*4882a593Smuzhiyun 	}
254*4882a593Smuzhiyun 
255*4882a593Smuzhiyun 	bytes = memparse(buf, &end);
256*4882a593Smuzhiyun 	if (*end != '\0')
257*4882a593Smuzhiyun 		return -EINVAL;
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun 	*nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 	return 0;
262*4882a593Smuzhiyun }
263