xref: /OK3568_Linux_fs/kernel/mm/swap_cgroup.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun #include <linux/swap_cgroup.h>
3*4882a593Smuzhiyun #include <linux/vmalloc.h>
4*4882a593Smuzhiyun #include <linux/mm.h>
5*4882a593Smuzhiyun 
6*4882a593Smuzhiyun #include <linux/swapops.h> /* depends on mm.h include */
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun static DEFINE_MUTEX(swap_cgroup_mutex);
9*4882a593Smuzhiyun struct swap_cgroup_ctrl {
10*4882a593Smuzhiyun 	struct page **map;
11*4882a593Smuzhiyun 	unsigned long length;
12*4882a593Smuzhiyun 	spinlock_t	lock;
13*4882a593Smuzhiyun };
14*4882a593Smuzhiyun 
15*4882a593Smuzhiyun static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
16*4882a593Smuzhiyun 
17*4882a593Smuzhiyun struct swap_cgroup {
18*4882a593Smuzhiyun 	unsigned short		id;
19*4882a593Smuzhiyun };
20*4882a593Smuzhiyun #define SC_PER_PAGE	(PAGE_SIZE/sizeof(struct swap_cgroup))
21*4882a593Smuzhiyun 
22*4882a593Smuzhiyun /*
23*4882a593Smuzhiyun  * SwapCgroup implements "lookup" and "exchange" operations.
24*4882a593Smuzhiyun  * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
25*4882a593Smuzhiyun  * against SwapCache. At swap_free(), this is accessed directly from swap.
26*4882a593Smuzhiyun  *
27*4882a593Smuzhiyun  * This means,
28*4882a593Smuzhiyun  *  - we have no race in "exchange" when we're accessed via SwapCache because
29*4882a593Smuzhiyun  *    SwapCache(and its swp_entry) is under lock.
30*4882a593Smuzhiyun  *  - When called via swap_free(), there is no user of this entry and no race.
31*4882a593Smuzhiyun  * Then, we don't need lock around "exchange".
32*4882a593Smuzhiyun  *
33*4882a593Smuzhiyun  * TODO: we can push these buffers out to HIGHMEM.
34*4882a593Smuzhiyun  */
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun /*
37*4882a593Smuzhiyun  * allocate buffer for swap_cgroup.
38*4882a593Smuzhiyun  */
swap_cgroup_prepare(int type)39*4882a593Smuzhiyun static int swap_cgroup_prepare(int type)
40*4882a593Smuzhiyun {
41*4882a593Smuzhiyun 	struct page *page;
42*4882a593Smuzhiyun 	struct swap_cgroup_ctrl *ctrl;
43*4882a593Smuzhiyun 	unsigned long idx, max;
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun 	ctrl = &swap_cgroup_ctrl[type];
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun 	for (idx = 0; idx < ctrl->length; idx++) {
48*4882a593Smuzhiyun 		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
49*4882a593Smuzhiyun 		if (!page)
50*4882a593Smuzhiyun 			goto not_enough_page;
51*4882a593Smuzhiyun 		ctrl->map[idx] = page;
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun 		if (!(idx % SWAP_CLUSTER_MAX))
54*4882a593Smuzhiyun 			cond_resched();
55*4882a593Smuzhiyun 	}
56*4882a593Smuzhiyun 	return 0;
57*4882a593Smuzhiyun not_enough_page:
58*4882a593Smuzhiyun 	max = idx;
59*4882a593Smuzhiyun 	for (idx = 0; idx < max; idx++)
60*4882a593Smuzhiyun 		__free_page(ctrl->map[idx]);
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun 	return -ENOMEM;
63*4882a593Smuzhiyun }
64*4882a593Smuzhiyun 
__lookup_swap_cgroup(struct swap_cgroup_ctrl * ctrl,pgoff_t offset)65*4882a593Smuzhiyun static struct swap_cgroup *__lookup_swap_cgroup(struct swap_cgroup_ctrl *ctrl,
66*4882a593Smuzhiyun 						pgoff_t offset)
67*4882a593Smuzhiyun {
68*4882a593Smuzhiyun 	struct page *mappage;
69*4882a593Smuzhiyun 	struct swap_cgroup *sc;
70*4882a593Smuzhiyun 
71*4882a593Smuzhiyun 	mappage = ctrl->map[offset / SC_PER_PAGE];
72*4882a593Smuzhiyun 	sc = page_address(mappage);
73*4882a593Smuzhiyun 	return sc + offset % SC_PER_PAGE;
74*4882a593Smuzhiyun }
75*4882a593Smuzhiyun 
lookup_swap_cgroup(swp_entry_t ent,struct swap_cgroup_ctrl ** ctrlp)76*4882a593Smuzhiyun static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
77*4882a593Smuzhiyun 					struct swap_cgroup_ctrl **ctrlp)
78*4882a593Smuzhiyun {
79*4882a593Smuzhiyun 	pgoff_t offset = swp_offset(ent);
80*4882a593Smuzhiyun 	struct swap_cgroup_ctrl *ctrl;
81*4882a593Smuzhiyun 
82*4882a593Smuzhiyun 	ctrl = &swap_cgroup_ctrl[swp_type(ent)];
83*4882a593Smuzhiyun 	if (ctrlp)
84*4882a593Smuzhiyun 		*ctrlp = ctrl;
85*4882a593Smuzhiyun 	return __lookup_swap_cgroup(ctrl, offset);
86*4882a593Smuzhiyun }
87*4882a593Smuzhiyun 
88*4882a593Smuzhiyun /**
89*4882a593Smuzhiyun  * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
90*4882a593Smuzhiyun  * @ent: swap entry to be cmpxchged
91*4882a593Smuzhiyun  * @old: old id
92*4882a593Smuzhiyun  * @new: new id
93*4882a593Smuzhiyun  *
94*4882a593Smuzhiyun  * Returns old id at success, 0 at failure.
95*4882a593Smuzhiyun  * (There is no mem_cgroup using 0 as its id)
96*4882a593Smuzhiyun  */
swap_cgroup_cmpxchg(swp_entry_t ent,unsigned short old,unsigned short new)97*4882a593Smuzhiyun unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
98*4882a593Smuzhiyun 					unsigned short old, unsigned short new)
99*4882a593Smuzhiyun {
100*4882a593Smuzhiyun 	struct swap_cgroup_ctrl *ctrl;
101*4882a593Smuzhiyun 	struct swap_cgroup *sc;
102*4882a593Smuzhiyun 	unsigned long flags;
103*4882a593Smuzhiyun 	unsigned short retval;
104*4882a593Smuzhiyun 
105*4882a593Smuzhiyun 	sc = lookup_swap_cgroup(ent, &ctrl);
106*4882a593Smuzhiyun 
107*4882a593Smuzhiyun 	spin_lock_irqsave(&ctrl->lock, flags);
108*4882a593Smuzhiyun 	retval = sc->id;
109*4882a593Smuzhiyun 	if (retval == old)
110*4882a593Smuzhiyun 		sc->id = new;
111*4882a593Smuzhiyun 	else
112*4882a593Smuzhiyun 		retval = 0;
113*4882a593Smuzhiyun 	spin_unlock_irqrestore(&ctrl->lock, flags);
114*4882a593Smuzhiyun 	return retval;
115*4882a593Smuzhiyun }
116*4882a593Smuzhiyun 
117*4882a593Smuzhiyun /**
118*4882a593Smuzhiyun  * swap_cgroup_record - record mem_cgroup for a set of swap entries
119*4882a593Smuzhiyun  * @ent: the first swap entry to be recorded into
120*4882a593Smuzhiyun  * @id: mem_cgroup to be recorded
121*4882a593Smuzhiyun  * @nr_ents: number of swap entries to be recorded
122*4882a593Smuzhiyun  *
123*4882a593Smuzhiyun  * Returns old value at success, 0 at failure.
124*4882a593Smuzhiyun  * (Of course, old value can be 0.)
125*4882a593Smuzhiyun  */
swap_cgroup_record(swp_entry_t ent,unsigned short id,unsigned int nr_ents)126*4882a593Smuzhiyun unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
127*4882a593Smuzhiyun 				  unsigned int nr_ents)
128*4882a593Smuzhiyun {
129*4882a593Smuzhiyun 	struct swap_cgroup_ctrl *ctrl;
130*4882a593Smuzhiyun 	struct swap_cgroup *sc;
131*4882a593Smuzhiyun 	unsigned short old;
132*4882a593Smuzhiyun 	unsigned long flags;
133*4882a593Smuzhiyun 	pgoff_t offset = swp_offset(ent);
134*4882a593Smuzhiyun 	pgoff_t end = offset + nr_ents;
135*4882a593Smuzhiyun 
136*4882a593Smuzhiyun 	sc = lookup_swap_cgroup(ent, &ctrl);
137*4882a593Smuzhiyun 
138*4882a593Smuzhiyun 	spin_lock_irqsave(&ctrl->lock, flags);
139*4882a593Smuzhiyun 	old = sc->id;
140*4882a593Smuzhiyun 	for (;;) {
141*4882a593Smuzhiyun 		VM_BUG_ON(sc->id != old);
142*4882a593Smuzhiyun 		sc->id = id;
143*4882a593Smuzhiyun 		offset++;
144*4882a593Smuzhiyun 		if (offset == end)
145*4882a593Smuzhiyun 			break;
146*4882a593Smuzhiyun 		if (offset % SC_PER_PAGE)
147*4882a593Smuzhiyun 			sc++;
148*4882a593Smuzhiyun 		else
149*4882a593Smuzhiyun 			sc = __lookup_swap_cgroup(ctrl, offset);
150*4882a593Smuzhiyun 	}
151*4882a593Smuzhiyun 	spin_unlock_irqrestore(&ctrl->lock, flags);
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun 	return old;
154*4882a593Smuzhiyun }
155*4882a593Smuzhiyun 
156*4882a593Smuzhiyun /**
157*4882a593Smuzhiyun  * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry
158*4882a593Smuzhiyun  * @ent: swap entry to be looked up.
159*4882a593Smuzhiyun  *
160*4882a593Smuzhiyun  * Returns ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
161*4882a593Smuzhiyun  */
lookup_swap_cgroup_id(swp_entry_t ent)162*4882a593Smuzhiyun unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
163*4882a593Smuzhiyun {
164*4882a593Smuzhiyun 	return lookup_swap_cgroup(ent, NULL)->id;
165*4882a593Smuzhiyun }
166*4882a593Smuzhiyun 
swap_cgroup_swapon(int type,unsigned long max_pages)167*4882a593Smuzhiyun int swap_cgroup_swapon(int type, unsigned long max_pages)
168*4882a593Smuzhiyun {
169*4882a593Smuzhiyun 	void *array;
170*4882a593Smuzhiyun 	unsigned long array_size;
171*4882a593Smuzhiyun 	unsigned long length;
172*4882a593Smuzhiyun 	struct swap_cgroup_ctrl *ctrl;
173*4882a593Smuzhiyun 
174*4882a593Smuzhiyun 	length = DIV_ROUND_UP(max_pages, SC_PER_PAGE);
175*4882a593Smuzhiyun 	array_size = length * sizeof(void *);
176*4882a593Smuzhiyun 
177*4882a593Smuzhiyun 	array = vzalloc(array_size);
178*4882a593Smuzhiyun 	if (!array)
179*4882a593Smuzhiyun 		goto nomem;
180*4882a593Smuzhiyun 
181*4882a593Smuzhiyun 	ctrl = &swap_cgroup_ctrl[type];
182*4882a593Smuzhiyun 	mutex_lock(&swap_cgroup_mutex);
183*4882a593Smuzhiyun 	ctrl->length = length;
184*4882a593Smuzhiyun 	ctrl->map = array;
185*4882a593Smuzhiyun 	spin_lock_init(&ctrl->lock);
186*4882a593Smuzhiyun 	if (swap_cgroup_prepare(type)) {
187*4882a593Smuzhiyun 		/* memory shortage */
188*4882a593Smuzhiyun 		ctrl->map = NULL;
189*4882a593Smuzhiyun 		ctrl->length = 0;
190*4882a593Smuzhiyun 		mutex_unlock(&swap_cgroup_mutex);
191*4882a593Smuzhiyun 		vfree(array);
192*4882a593Smuzhiyun 		goto nomem;
193*4882a593Smuzhiyun 	}
194*4882a593Smuzhiyun 	mutex_unlock(&swap_cgroup_mutex);
195*4882a593Smuzhiyun 
196*4882a593Smuzhiyun 	return 0;
197*4882a593Smuzhiyun nomem:
198*4882a593Smuzhiyun 	pr_info("couldn't allocate enough memory for swap_cgroup\n");
199*4882a593Smuzhiyun 	pr_info("swap_cgroup can be disabled by swapaccount=0 boot option\n");
200*4882a593Smuzhiyun 	return -ENOMEM;
201*4882a593Smuzhiyun }
202*4882a593Smuzhiyun 
swap_cgroup_swapoff(int type)203*4882a593Smuzhiyun void swap_cgroup_swapoff(int type)
204*4882a593Smuzhiyun {
205*4882a593Smuzhiyun 	struct page **map;
206*4882a593Smuzhiyun 	unsigned long i, length;
207*4882a593Smuzhiyun 	struct swap_cgroup_ctrl *ctrl;
208*4882a593Smuzhiyun 
209*4882a593Smuzhiyun 	mutex_lock(&swap_cgroup_mutex);
210*4882a593Smuzhiyun 	ctrl = &swap_cgroup_ctrl[type];
211*4882a593Smuzhiyun 	map = ctrl->map;
212*4882a593Smuzhiyun 	length = ctrl->length;
213*4882a593Smuzhiyun 	ctrl->map = NULL;
214*4882a593Smuzhiyun 	ctrl->length = 0;
215*4882a593Smuzhiyun 	mutex_unlock(&swap_cgroup_mutex);
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun 	if (map) {
218*4882a593Smuzhiyun 		for (i = 0; i < length; i++) {
219*4882a593Smuzhiyun 			struct page *page = map[i];
220*4882a593Smuzhiyun 			if (page)
221*4882a593Smuzhiyun 				__free_page(page);
222*4882a593Smuzhiyun 			if (!(i % SWAP_CLUSTER_MAX))
223*4882a593Smuzhiyun 				cond_resched();
224*4882a593Smuzhiyun 		}
225*4882a593Smuzhiyun 		vfree(map);
226*4882a593Smuzhiyun 	}
227*4882a593Smuzhiyun }
228