1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun #include <linux/swap_cgroup.h>
3*4882a593Smuzhiyun #include <linux/vmalloc.h>
4*4882a593Smuzhiyun #include <linux/mm.h>
5*4882a593Smuzhiyun
6*4882a593Smuzhiyun #include <linux/swapops.h> /* depends on mm.h include */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun static DEFINE_MUTEX(swap_cgroup_mutex);
9*4882a593Smuzhiyun struct swap_cgroup_ctrl {
10*4882a593Smuzhiyun struct page **map;
11*4882a593Smuzhiyun unsigned long length;
12*4882a593Smuzhiyun spinlock_t lock;
13*4882a593Smuzhiyun };
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
16*4882a593Smuzhiyun
17*4882a593Smuzhiyun struct swap_cgroup {
18*4882a593Smuzhiyun unsigned short id;
19*4882a593Smuzhiyun };
20*4882a593Smuzhiyun #define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup))
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun /*
23*4882a593Smuzhiyun * SwapCgroup implements "lookup" and "exchange" operations.
24*4882a593Smuzhiyun * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
25*4882a593Smuzhiyun * against SwapCache. At swap_free(), this is accessed directly from swap.
26*4882a593Smuzhiyun *
27*4882a593Smuzhiyun * This means,
28*4882a593Smuzhiyun * - we have no race in "exchange" when we're accessed via SwapCache because
29*4882a593Smuzhiyun * SwapCache(and its swp_entry) is under lock.
30*4882a593Smuzhiyun * - When called via swap_free(), there is no user of this entry and no race.
31*4882a593Smuzhiyun * Then, we don't need lock around "exchange".
32*4882a593Smuzhiyun *
33*4882a593Smuzhiyun * TODO: we can push these buffers out to HIGHMEM.
34*4882a593Smuzhiyun */
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun /*
37*4882a593Smuzhiyun * allocate buffer for swap_cgroup.
38*4882a593Smuzhiyun */
swap_cgroup_prepare(int type)39*4882a593Smuzhiyun static int swap_cgroup_prepare(int type)
40*4882a593Smuzhiyun {
41*4882a593Smuzhiyun struct page *page;
42*4882a593Smuzhiyun struct swap_cgroup_ctrl *ctrl;
43*4882a593Smuzhiyun unsigned long idx, max;
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun ctrl = &swap_cgroup_ctrl[type];
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun for (idx = 0; idx < ctrl->length; idx++) {
48*4882a593Smuzhiyun page = alloc_page(GFP_KERNEL | __GFP_ZERO);
49*4882a593Smuzhiyun if (!page)
50*4882a593Smuzhiyun goto not_enough_page;
51*4882a593Smuzhiyun ctrl->map[idx] = page;
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun if (!(idx % SWAP_CLUSTER_MAX))
54*4882a593Smuzhiyun cond_resched();
55*4882a593Smuzhiyun }
56*4882a593Smuzhiyun return 0;
57*4882a593Smuzhiyun not_enough_page:
58*4882a593Smuzhiyun max = idx;
59*4882a593Smuzhiyun for (idx = 0; idx < max; idx++)
60*4882a593Smuzhiyun __free_page(ctrl->map[idx]);
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun return -ENOMEM;
63*4882a593Smuzhiyun }
64*4882a593Smuzhiyun
__lookup_swap_cgroup(struct swap_cgroup_ctrl * ctrl,pgoff_t offset)65*4882a593Smuzhiyun static struct swap_cgroup *__lookup_swap_cgroup(struct swap_cgroup_ctrl *ctrl,
66*4882a593Smuzhiyun pgoff_t offset)
67*4882a593Smuzhiyun {
68*4882a593Smuzhiyun struct page *mappage;
69*4882a593Smuzhiyun struct swap_cgroup *sc;
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun mappage = ctrl->map[offset / SC_PER_PAGE];
72*4882a593Smuzhiyun sc = page_address(mappage);
73*4882a593Smuzhiyun return sc + offset % SC_PER_PAGE;
74*4882a593Smuzhiyun }
75*4882a593Smuzhiyun
lookup_swap_cgroup(swp_entry_t ent,struct swap_cgroup_ctrl ** ctrlp)76*4882a593Smuzhiyun static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
77*4882a593Smuzhiyun struct swap_cgroup_ctrl **ctrlp)
78*4882a593Smuzhiyun {
79*4882a593Smuzhiyun pgoff_t offset = swp_offset(ent);
80*4882a593Smuzhiyun struct swap_cgroup_ctrl *ctrl;
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun ctrl = &swap_cgroup_ctrl[swp_type(ent)];
83*4882a593Smuzhiyun if (ctrlp)
84*4882a593Smuzhiyun *ctrlp = ctrl;
85*4882a593Smuzhiyun return __lookup_swap_cgroup(ctrl, offset);
86*4882a593Smuzhiyun }
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun /**
89*4882a593Smuzhiyun * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
90*4882a593Smuzhiyun * @ent: swap entry to be cmpxchged
91*4882a593Smuzhiyun * @old: old id
92*4882a593Smuzhiyun * @new: new id
93*4882a593Smuzhiyun *
94*4882a593Smuzhiyun * Returns old id at success, 0 at failure.
95*4882a593Smuzhiyun * (There is no mem_cgroup using 0 as its id)
96*4882a593Smuzhiyun */
swap_cgroup_cmpxchg(swp_entry_t ent,unsigned short old,unsigned short new)97*4882a593Smuzhiyun unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
98*4882a593Smuzhiyun unsigned short old, unsigned short new)
99*4882a593Smuzhiyun {
100*4882a593Smuzhiyun struct swap_cgroup_ctrl *ctrl;
101*4882a593Smuzhiyun struct swap_cgroup *sc;
102*4882a593Smuzhiyun unsigned long flags;
103*4882a593Smuzhiyun unsigned short retval;
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun sc = lookup_swap_cgroup(ent, &ctrl);
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun spin_lock_irqsave(&ctrl->lock, flags);
108*4882a593Smuzhiyun retval = sc->id;
109*4882a593Smuzhiyun if (retval == old)
110*4882a593Smuzhiyun sc->id = new;
111*4882a593Smuzhiyun else
112*4882a593Smuzhiyun retval = 0;
113*4882a593Smuzhiyun spin_unlock_irqrestore(&ctrl->lock, flags);
114*4882a593Smuzhiyun return retval;
115*4882a593Smuzhiyun }
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun /**
118*4882a593Smuzhiyun * swap_cgroup_record - record mem_cgroup for a set of swap entries
119*4882a593Smuzhiyun * @ent: the first swap entry to be recorded into
120*4882a593Smuzhiyun * @id: mem_cgroup to be recorded
121*4882a593Smuzhiyun * @nr_ents: number of swap entries to be recorded
122*4882a593Smuzhiyun *
123*4882a593Smuzhiyun * Returns old value at success, 0 at failure.
124*4882a593Smuzhiyun * (Of course, old value can be 0.)
125*4882a593Smuzhiyun */
swap_cgroup_record(swp_entry_t ent,unsigned short id,unsigned int nr_ents)126*4882a593Smuzhiyun unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
127*4882a593Smuzhiyun unsigned int nr_ents)
128*4882a593Smuzhiyun {
129*4882a593Smuzhiyun struct swap_cgroup_ctrl *ctrl;
130*4882a593Smuzhiyun struct swap_cgroup *sc;
131*4882a593Smuzhiyun unsigned short old;
132*4882a593Smuzhiyun unsigned long flags;
133*4882a593Smuzhiyun pgoff_t offset = swp_offset(ent);
134*4882a593Smuzhiyun pgoff_t end = offset + nr_ents;
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun sc = lookup_swap_cgroup(ent, &ctrl);
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun spin_lock_irqsave(&ctrl->lock, flags);
139*4882a593Smuzhiyun old = sc->id;
140*4882a593Smuzhiyun for (;;) {
141*4882a593Smuzhiyun VM_BUG_ON(sc->id != old);
142*4882a593Smuzhiyun sc->id = id;
143*4882a593Smuzhiyun offset++;
144*4882a593Smuzhiyun if (offset == end)
145*4882a593Smuzhiyun break;
146*4882a593Smuzhiyun if (offset % SC_PER_PAGE)
147*4882a593Smuzhiyun sc++;
148*4882a593Smuzhiyun else
149*4882a593Smuzhiyun sc = __lookup_swap_cgroup(ctrl, offset);
150*4882a593Smuzhiyun }
151*4882a593Smuzhiyun spin_unlock_irqrestore(&ctrl->lock, flags);
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun return old;
154*4882a593Smuzhiyun }
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun /**
157*4882a593Smuzhiyun * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry
158*4882a593Smuzhiyun * @ent: swap entry to be looked up.
159*4882a593Smuzhiyun *
160*4882a593Smuzhiyun * Returns ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
161*4882a593Smuzhiyun */
lookup_swap_cgroup_id(swp_entry_t ent)162*4882a593Smuzhiyun unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
163*4882a593Smuzhiyun {
164*4882a593Smuzhiyun return lookup_swap_cgroup(ent, NULL)->id;
165*4882a593Smuzhiyun }
166*4882a593Smuzhiyun
swap_cgroup_swapon(int type,unsigned long max_pages)167*4882a593Smuzhiyun int swap_cgroup_swapon(int type, unsigned long max_pages)
168*4882a593Smuzhiyun {
169*4882a593Smuzhiyun void *array;
170*4882a593Smuzhiyun unsigned long array_size;
171*4882a593Smuzhiyun unsigned long length;
172*4882a593Smuzhiyun struct swap_cgroup_ctrl *ctrl;
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun length = DIV_ROUND_UP(max_pages, SC_PER_PAGE);
175*4882a593Smuzhiyun array_size = length * sizeof(void *);
176*4882a593Smuzhiyun
177*4882a593Smuzhiyun array = vzalloc(array_size);
178*4882a593Smuzhiyun if (!array)
179*4882a593Smuzhiyun goto nomem;
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun ctrl = &swap_cgroup_ctrl[type];
182*4882a593Smuzhiyun mutex_lock(&swap_cgroup_mutex);
183*4882a593Smuzhiyun ctrl->length = length;
184*4882a593Smuzhiyun ctrl->map = array;
185*4882a593Smuzhiyun spin_lock_init(&ctrl->lock);
186*4882a593Smuzhiyun if (swap_cgroup_prepare(type)) {
187*4882a593Smuzhiyun /* memory shortage */
188*4882a593Smuzhiyun ctrl->map = NULL;
189*4882a593Smuzhiyun ctrl->length = 0;
190*4882a593Smuzhiyun mutex_unlock(&swap_cgroup_mutex);
191*4882a593Smuzhiyun vfree(array);
192*4882a593Smuzhiyun goto nomem;
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun mutex_unlock(&swap_cgroup_mutex);
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun return 0;
197*4882a593Smuzhiyun nomem:
198*4882a593Smuzhiyun pr_info("couldn't allocate enough memory for swap_cgroup\n");
199*4882a593Smuzhiyun pr_info("swap_cgroup can be disabled by swapaccount=0 boot option\n");
200*4882a593Smuzhiyun return -ENOMEM;
201*4882a593Smuzhiyun }
202*4882a593Smuzhiyun
swap_cgroup_swapoff(int type)203*4882a593Smuzhiyun void swap_cgroup_swapoff(int type)
204*4882a593Smuzhiyun {
205*4882a593Smuzhiyun struct page **map;
206*4882a593Smuzhiyun unsigned long i, length;
207*4882a593Smuzhiyun struct swap_cgroup_ctrl *ctrl;
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun mutex_lock(&swap_cgroup_mutex);
210*4882a593Smuzhiyun ctrl = &swap_cgroup_ctrl[type];
211*4882a593Smuzhiyun map = ctrl->map;
212*4882a593Smuzhiyun length = ctrl->length;
213*4882a593Smuzhiyun ctrl->map = NULL;
214*4882a593Smuzhiyun ctrl->length = 0;
215*4882a593Smuzhiyun mutex_unlock(&swap_cgroup_mutex);
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun if (map) {
218*4882a593Smuzhiyun for (i = 0; i < length; i++) {
219*4882a593Smuzhiyun struct page *page = map[i];
220*4882a593Smuzhiyun if (page)
221*4882a593Smuzhiyun __free_page(page);
222*4882a593Smuzhiyun if (!(i % SWAP_CLUSTER_MAX))
223*4882a593Smuzhiyun cond_resched();
224*4882a593Smuzhiyun }
225*4882a593Smuzhiyun vfree(map);
226*4882a593Smuzhiyun }
227*4882a593Smuzhiyun }
228