1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun lru_cache.c
4*4882a593Smuzhiyun
5*4882a593Smuzhiyun This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
8*4882a593Smuzhiyun Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
9*4882a593Smuzhiyun Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun */
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun #include <linux/module.h>
15*4882a593Smuzhiyun #include <linux/bitops.h>
16*4882a593Smuzhiyun #include <linux/slab.h>
17*4882a593Smuzhiyun #include <linux/string.h> /* for memset */
18*4882a593Smuzhiyun #include <linux/seq_file.h> /* for seq_printf */
19*4882a593Smuzhiyun #include <linux/lru_cache.h>
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
22*4882a593Smuzhiyun "Lars Ellenberg <lars@linbit.com>");
23*4882a593Smuzhiyun MODULE_DESCRIPTION("lru_cache - Track sets of hot objects");
24*4882a593Smuzhiyun MODULE_LICENSE("GPL");
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun /* this is developers aid only.
27*4882a593Smuzhiyun * it catches concurrent access (lack of locking on the users part) */
28*4882a593Smuzhiyun #define PARANOIA_ENTRY() do { \
29*4882a593Smuzhiyun BUG_ON(!lc); \
30*4882a593Smuzhiyun BUG_ON(!lc->nr_elements); \
31*4882a593Smuzhiyun BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \
32*4882a593Smuzhiyun } while (0)
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun #define RETURN(x...) do { \
35*4882a593Smuzhiyun clear_bit_unlock(__LC_PARANOIA, &lc->flags); \
36*4882a593Smuzhiyun return x ; } while (0)
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun /* BUG() if e is not one of the elements tracked by lc */
39*4882a593Smuzhiyun #define PARANOIA_LC_ELEMENT(lc, e) do { \
40*4882a593Smuzhiyun struct lru_cache *lc_ = (lc); \
41*4882a593Smuzhiyun struct lc_element *e_ = (e); \
42*4882a593Smuzhiyun unsigned i = e_->lc_index; \
43*4882a593Smuzhiyun BUG_ON(i >= lc_->nr_elements); \
44*4882a593Smuzhiyun BUG_ON(lc_->lc_element[i] != e_); } while (0)
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun /* We need to atomically
48*4882a593Smuzhiyun * - try to grab the lock (set LC_LOCKED)
49*4882a593Smuzhiyun * - only if there is no pending transaction
50*4882a593Smuzhiyun * (neither LC_DIRTY nor LC_STARVING is set)
51*4882a593Smuzhiyun * Because of PARANOIA_ENTRY() above abusing lc->flags as well,
52*4882a593Smuzhiyun * it is not sufficient to just say
53*4882a593Smuzhiyun * return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED);
54*4882a593Smuzhiyun */
lc_try_lock(struct lru_cache * lc)55*4882a593Smuzhiyun int lc_try_lock(struct lru_cache *lc)
56*4882a593Smuzhiyun {
57*4882a593Smuzhiyun unsigned long val;
58*4882a593Smuzhiyun do {
59*4882a593Smuzhiyun val = cmpxchg(&lc->flags, 0, LC_LOCKED);
60*4882a593Smuzhiyun } while (unlikely (val == LC_PARANOIA));
61*4882a593Smuzhiyun /* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */
62*4882a593Smuzhiyun return 0 == val;
63*4882a593Smuzhiyun #if 0
64*4882a593Smuzhiyun /* Alternative approach, spin in case someone enters or leaves a
65*4882a593Smuzhiyun * PARANOIA_ENTRY()/RETURN() section. */
66*4882a593Smuzhiyun unsigned long old, new, val;
67*4882a593Smuzhiyun do {
68*4882a593Smuzhiyun old = lc->flags & LC_PARANOIA;
69*4882a593Smuzhiyun new = old | LC_LOCKED;
70*4882a593Smuzhiyun val = cmpxchg(&lc->flags, old, new);
71*4882a593Smuzhiyun } while (unlikely (val == (old ^ LC_PARANOIA)));
72*4882a593Smuzhiyun return old == val;
73*4882a593Smuzhiyun #endif
74*4882a593Smuzhiyun }
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun /**
77*4882a593Smuzhiyun * lc_create - prepares to track objects in an active set
78*4882a593Smuzhiyun * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
79*4882a593Smuzhiyun * @max_pending_changes: maximum changes to accumulate until a transaction is required
80*4882a593Smuzhiyun * @e_count: number of elements allowed to be active simultaneously
81*4882a593Smuzhiyun * @e_size: size of the tracked objects
82*4882a593Smuzhiyun * @e_off: offset to the &struct lc_element member in a tracked object
83*4882a593Smuzhiyun *
84*4882a593Smuzhiyun * Returns a pointer to a newly initialized struct lru_cache on success,
85*4882a593Smuzhiyun * or NULL on (allocation) failure.
86*4882a593Smuzhiyun */
lc_create(const char * name,struct kmem_cache * cache,unsigned max_pending_changes,unsigned e_count,size_t e_size,size_t e_off)87*4882a593Smuzhiyun struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
88*4882a593Smuzhiyun unsigned max_pending_changes,
89*4882a593Smuzhiyun unsigned e_count, size_t e_size, size_t e_off)
90*4882a593Smuzhiyun {
91*4882a593Smuzhiyun struct hlist_head *slot = NULL;
92*4882a593Smuzhiyun struct lc_element **element = NULL;
93*4882a593Smuzhiyun struct lru_cache *lc;
94*4882a593Smuzhiyun struct lc_element *e;
95*4882a593Smuzhiyun unsigned cache_obj_size = kmem_cache_size(cache);
96*4882a593Smuzhiyun unsigned i;
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun WARN_ON(cache_obj_size < e_size);
99*4882a593Smuzhiyun if (cache_obj_size < e_size)
100*4882a593Smuzhiyun return NULL;
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun /* e_count too big; would probably fail the allocation below anyways.
103*4882a593Smuzhiyun * for typical use cases, e_count should be few thousand at most. */
104*4882a593Smuzhiyun if (e_count > LC_MAX_ACTIVE)
105*4882a593Smuzhiyun return NULL;
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun slot = kcalloc(e_count, sizeof(struct hlist_head), GFP_KERNEL);
108*4882a593Smuzhiyun if (!slot)
109*4882a593Smuzhiyun goto out_fail;
110*4882a593Smuzhiyun element = kcalloc(e_count, sizeof(struct lc_element *), GFP_KERNEL);
111*4882a593Smuzhiyun if (!element)
112*4882a593Smuzhiyun goto out_fail;
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun lc = kzalloc(sizeof(*lc), GFP_KERNEL);
115*4882a593Smuzhiyun if (!lc)
116*4882a593Smuzhiyun goto out_fail;
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun INIT_LIST_HEAD(&lc->in_use);
119*4882a593Smuzhiyun INIT_LIST_HEAD(&lc->lru);
120*4882a593Smuzhiyun INIT_LIST_HEAD(&lc->free);
121*4882a593Smuzhiyun INIT_LIST_HEAD(&lc->to_be_changed);
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun lc->name = name;
124*4882a593Smuzhiyun lc->element_size = e_size;
125*4882a593Smuzhiyun lc->element_off = e_off;
126*4882a593Smuzhiyun lc->nr_elements = e_count;
127*4882a593Smuzhiyun lc->max_pending_changes = max_pending_changes;
128*4882a593Smuzhiyun lc->lc_cache = cache;
129*4882a593Smuzhiyun lc->lc_element = element;
130*4882a593Smuzhiyun lc->lc_slot = slot;
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun /* preallocate all objects */
133*4882a593Smuzhiyun for (i = 0; i < e_count; i++) {
134*4882a593Smuzhiyun void *p = kmem_cache_alloc(cache, GFP_KERNEL);
135*4882a593Smuzhiyun if (!p)
136*4882a593Smuzhiyun break;
137*4882a593Smuzhiyun memset(p, 0, lc->element_size);
138*4882a593Smuzhiyun e = p + e_off;
139*4882a593Smuzhiyun e->lc_index = i;
140*4882a593Smuzhiyun e->lc_number = LC_FREE;
141*4882a593Smuzhiyun e->lc_new_number = LC_FREE;
142*4882a593Smuzhiyun list_add(&e->list, &lc->free);
143*4882a593Smuzhiyun element[i] = e;
144*4882a593Smuzhiyun }
145*4882a593Smuzhiyun if (i == e_count)
146*4882a593Smuzhiyun return lc;
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun /* else: could not allocate all elements, give up */
149*4882a593Smuzhiyun for (i--; i; i--) {
150*4882a593Smuzhiyun void *p = element[i];
151*4882a593Smuzhiyun kmem_cache_free(cache, p - e_off);
152*4882a593Smuzhiyun }
153*4882a593Smuzhiyun kfree(lc);
154*4882a593Smuzhiyun out_fail:
155*4882a593Smuzhiyun kfree(element);
156*4882a593Smuzhiyun kfree(slot);
157*4882a593Smuzhiyun return NULL;
158*4882a593Smuzhiyun }
159*4882a593Smuzhiyun
lc_free_by_index(struct lru_cache * lc,unsigned i)160*4882a593Smuzhiyun static void lc_free_by_index(struct lru_cache *lc, unsigned i)
161*4882a593Smuzhiyun {
162*4882a593Smuzhiyun void *p = lc->lc_element[i];
163*4882a593Smuzhiyun WARN_ON(!p);
164*4882a593Smuzhiyun if (p) {
165*4882a593Smuzhiyun p -= lc->element_off;
166*4882a593Smuzhiyun kmem_cache_free(lc->lc_cache, p);
167*4882a593Smuzhiyun }
168*4882a593Smuzhiyun }
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun /**
171*4882a593Smuzhiyun * lc_destroy - frees memory allocated by lc_create()
172*4882a593Smuzhiyun * @lc: the lru cache to destroy
173*4882a593Smuzhiyun */
lc_destroy(struct lru_cache * lc)174*4882a593Smuzhiyun void lc_destroy(struct lru_cache *lc)
175*4882a593Smuzhiyun {
176*4882a593Smuzhiyun unsigned i;
177*4882a593Smuzhiyun if (!lc)
178*4882a593Smuzhiyun return;
179*4882a593Smuzhiyun for (i = 0; i < lc->nr_elements; i++)
180*4882a593Smuzhiyun lc_free_by_index(lc, i);
181*4882a593Smuzhiyun kfree(lc->lc_element);
182*4882a593Smuzhiyun kfree(lc->lc_slot);
183*4882a593Smuzhiyun kfree(lc);
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun /**
187*4882a593Smuzhiyun * lc_reset - does a full reset for @lc and the hash table slots.
188*4882a593Smuzhiyun * @lc: the lru cache to operate on
189*4882a593Smuzhiyun *
190*4882a593Smuzhiyun * It is roughly the equivalent of re-allocating a fresh lru_cache object,
191*4882a593Smuzhiyun * basically a short cut to lc_destroy(lc); lc = lc_create(...);
192*4882a593Smuzhiyun */
lc_reset(struct lru_cache * lc)193*4882a593Smuzhiyun void lc_reset(struct lru_cache *lc)
194*4882a593Smuzhiyun {
195*4882a593Smuzhiyun unsigned i;
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun INIT_LIST_HEAD(&lc->in_use);
198*4882a593Smuzhiyun INIT_LIST_HEAD(&lc->lru);
199*4882a593Smuzhiyun INIT_LIST_HEAD(&lc->free);
200*4882a593Smuzhiyun INIT_LIST_HEAD(&lc->to_be_changed);
201*4882a593Smuzhiyun lc->used = 0;
202*4882a593Smuzhiyun lc->hits = 0;
203*4882a593Smuzhiyun lc->misses = 0;
204*4882a593Smuzhiyun lc->starving = 0;
205*4882a593Smuzhiyun lc->locked = 0;
206*4882a593Smuzhiyun lc->changed = 0;
207*4882a593Smuzhiyun lc->pending_changes = 0;
208*4882a593Smuzhiyun lc->flags = 0;
209*4882a593Smuzhiyun memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun for (i = 0; i < lc->nr_elements; i++) {
212*4882a593Smuzhiyun struct lc_element *e = lc->lc_element[i];
213*4882a593Smuzhiyun void *p = e;
214*4882a593Smuzhiyun p -= lc->element_off;
215*4882a593Smuzhiyun memset(p, 0, lc->element_size);
216*4882a593Smuzhiyun /* re-init it */
217*4882a593Smuzhiyun e->lc_index = i;
218*4882a593Smuzhiyun e->lc_number = LC_FREE;
219*4882a593Smuzhiyun e->lc_new_number = LC_FREE;
220*4882a593Smuzhiyun list_add(&e->list, &lc->free);
221*4882a593Smuzhiyun }
222*4882a593Smuzhiyun }
223*4882a593Smuzhiyun
224*4882a593Smuzhiyun /**
225*4882a593Smuzhiyun * lc_seq_printf_stats - print stats about @lc into @seq
226*4882a593Smuzhiyun * @seq: the seq_file to print into
227*4882a593Smuzhiyun * @lc: the lru cache to print statistics of
228*4882a593Smuzhiyun */
lc_seq_printf_stats(struct seq_file * seq,struct lru_cache * lc)229*4882a593Smuzhiyun void lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
230*4882a593Smuzhiyun {
231*4882a593Smuzhiyun /* NOTE:
232*4882a593Smuzhiyun * total calls to lc_get are
233*4882a593Smuzhiyun * (starving + hits + misses)
234*4882a593Smuzhiyun * misses include "locked" count (update from an other thread in
235*4882a593Smuzhiyun * progress) and "changed", when this in fact lead to an successful
236*4882a593Smuzhiyun * update of the cache.
237*4882a593Smuzhiyun */
238*4882a593Smuzhiyun seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
239*4882a593Smuzhiyun lc->name, lc->used, lc->nr_elements,
240*4882a593Smuzhiyun lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
241*4882a593Smuzhiyun }
242*4882a593Smuzhiyun
lc_hash_slot(struct lru_cache * lc,unsigned int enr)243*4882a593Smuzhiyun static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
244*4882a593Smuzhiyun {
245*4882a593Smuzhiyun return lc->lc_slot + (enr % lc->nr_elements);
246*4882a593Smuzhiyun }
247*4882a593Smuzhiyun
248*4882a593Smuzhiyun
__lc_find(struct lru_cache * lc,unsigned int enr,bool include_changing)249*4882a593Smuzhiyun static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr,
250*4882a593Smuzhiyun bool include_changing)
251*4882a593Smuzhiyun {
252*4882a593Smuzhiyun struct lc_element *e;
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun BUG_ON(!lc);
255*4882a593Smuzhiyun BUG_ON(!lc->nr_elements);
256*4882a593Smuzhiyun hlist_for_each_entry(e, lc_hash_slot(lc, enr), colision) {
257*4882a593Smuzhiyun /* "about to be changed" elements, pending transaction commit,
258*4882a593Smuzhiyun * are hashed by their "new number". "Normal" elements have
259*4882a593Smuzhiyun * lc_number == lc_new_number. */
260*4882a593Smuzhiyun if (e->lc_new_number != enr)
261*4882a593Smuzhiyun continue;
262*4882a593Smuzhiyun if (e->lc_new_number == e->lc_number || include_changing)
263*4882a593Smuzhiyun return e;
264*4882a593Smuzhiyun break;
265*4882a593Smuzhiyun }
266*4882a593Smuzhiyun return NULL;
267*4882a593Smuzhiyun }
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun /**
270*4882a593Smuzhiyun * lc_find - find element by label, if present in the hash table
271*4882a593Smuzhiyun * @lc: The lru_cache object
272*4882a593Smuzhiyun * @enr: element number
273*4882a593Smuzhiyun *
274*4882a593Smuzhiyun * Returns the pointer to an element, if the element with the requested
275*4882a593Smuzhiyun * "label" or element number is present in the hash table,
276*4882a593Smuzhiyun * or NULL if not found. Does not change the refcnt.
277*4882a593Smuzhiyun * Ignores elements that are "about to be used", i.e. not yet in the active
278*4882a593Smuzhiyun * set, but still pending transaction commit.
279*4882a593Smuzhiyun */
lc_find(struct lru_cache * lc,unsigned int enr)280*4882a593Smuzhiyun struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
281*4882a593Smuzhiyun {
282*4882a593Smuzhiyun return __lc_find(lc, enr, 0);
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun
285*4882a593Smuzhiyun /**
286*4882a593Smuzhiyun * lc_is_used - find element by label
287*4882a593Smuzhiyun * @lc: The lru_cache object
288*4882a593Smuzhiyun * @enr: element number
289*4882a593Smuzhiyun *
290*4882a593Smuzhiyun * Returns true, if the element with the requested "label" or element number is
291*4882a593Smuzhiyun * present in the hash table, and is used (refcnt > 0).
292*4882a593Smuzhiyun * Also finds elements that are not _currently_ used but only "about to be
293*4882a593Smuzhiyun * used", i.e. on the "to_be_changed" list, pending transaction commit.
294*4882a593Smuzhiyun */
lc_is_used(struct lru_cache * lc,unsigned int enr)295*4882a593Smuzhiyun bool lc_is_used(struct lru_cache *lc, unsigned int enr)
296*4882a593Smuzhiyun {
297*4882a593Smuzhiyun struct lc_element *e = __lc_find(lc, enr, 1);
298*4882a593Smuzhiyun return e && e->refcnt;
299*4882a593Smuzhiyun }
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun /**
302*4882a593Smuzhiyun * lc_del - removes an element from the cache
303*4882a593Smuzhiyun * @lc: The lru_cache object
304*4882a593Smuzhiyun * @e: The element to remove
305*4882a593Smuzhiyun *
306*4882a593Smuzhiyun * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list,
307*4882a593Smuzhiyun * sets @e->enr to %LC_FREE.
308*4882a593Smuzhiyun */
lc_del(struct lru_cache * lc,struct lc_element * e)309*4882a593Smuzhiyun void lc_del(struct lru_cache *lc, struct lc_element *e)
310*4882a593Smuzhiyun {
311*4882a593Smuzhiyun PARANOIA_ENTRY();
312*4882a593Smuzhiyun PARANOIA_LC_ELEMENT(lc, e);
313*4882a593Smuzhiyun BUG_ON(e->refcnt);
314*4882a593Smuzhiyun
315*4882a593Smuzhiyun e->lc_number = e->lc_new_number = LC_FREE;
316*4882a593Smuzhiyun hlist_del_init(&e->colision);
317*4882a593Smuzhiyun list_move(&e->list, &lc->free);
318*4882a593Smuzhiyun RETURN();
319*4882a593Smuzhiyun }
320*4882a593Smuzhiyun
lc_prepare_for_change(struct lru_cache * lc,unsigned new_number)321*4882a593Smuzhiyun static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number)
322*4882a593Smuzhiyun {
323*4882a593Smuzhiyun struct list_head *n;
324*4882a593Smuzhiyun struct lc_element *e;
325*4882a593Smuzhiyun
326*4882a593Smuzhiyun if (!list_empty(&lc->free))
327*4882a593Smuzhiyun n = lc->free.next;
328*4882a593Smuzhiyun else if (!list_empty(&lc->lru))
329*4882a593Smuzhiyun n = lc->lru.prev;
330*4882a593Smuzhiyun else
331*4882a593Smuzhiyun return NULL;
332*4882a593Smuzhiyun
333*4882a593Smuzhiyun e = list_entry(n, struct lc_element, list);
334*4882a593Smuzhiyun PARANOIA_LC_ELEMENT(lc, e);
335*4882a593Smuzhiyun
336*4882a593Smuzhiyun e->lc_new_number = new_number;
337*4882a593Smuzhiyun if (!hlist_unhashed(&e->colision))
338*4882a593Smuzhiyun __hlist_del(&e->colision);
339*4882a593Smuzhiyun hlist_add_head(&e->colision, lc_hash_slot(lc, new_number));
340*4882a593Smuzhiyun list_move(&e->list, &lc->to_be_changed);
341*4882a593Smuzhiyun
342*4882a593Smuzhiyun return e;
343*4882a593Smuzhiyun }
344*4882a593Smuzhiyun
lc_unused_element_available(struct lru_cache * lc)345*4882a593Smuzhiyun static int lc_unused_element_available(struct lru_cache *lc)
346*4882a593Smuzhiyun {
347*4882a593Smuzhiyun if (!list_empty(&lc->free))
348*4882a593Smuzhiyun return 1; /* something on the free list */
349*4882a593Smuzhiyun if (!list_empty(&lc->lru))
350*4882a593Smuzhiyun return 1; /* something to evict */
351*4882a593Smuzhiyun
352*4882a593Smuzhiyun return 0;
353*4882a593Smuzhiyun }
354*4882a593Smuzhiyun
355*4882a593Smuzhiyun /* used as internal flags to __lc_get */
356*4882a593Smuzhiyun enum {
357*4882a593Smuzhiyun LC_GET_MAY_CHANGE = 1,
358*4882a593Smuzhiyun LC_GET_MAY_USE_UNCOMMITTED = 2,
359*4882a593Smuzhiyun };
360*4882a593Smuzhiyun
__lc_get(struct lru_cache * lc,unsigned int enr,unsigned int flags)361*4882a593Smuzhiyun static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, unsigned int flags)
362*4882a593Smuzhiyun {
363*4882a593Smuzhiyun struct lc_element *e;
364*4882a593Smuzhiyun
365*4882a593Smuzhiyun PARANOIA_ENTRY();
366*4882a593Smuzhiyun if (lc->flags & LC_STARVING) {
367*4882a593Smuzhiyun ++lc->starving;
368*4882a593Smuzhiyun RETURN(NULL);
369*4882a593Smuzhiyun }
370*4882a593Smuzhiyun
371*4882a593Smuzhiyun e = __lc_find(lc, enr, 1);
372*4882a593Smuzhiyun /* if lc_new_number != lc_number,
373*4882a593Smuzhiyun * this enr is currently being pulled in already,
374*4882a593Smuzhiyun * and will be available once the pending transaction
375*4882a593Smuzhiyun * has been committed. */
376*4882a593Smuzhiyun if (e) {
377*4882a593Smuzhiyun if (e->lc_new_number != e->lc_number) {
378*4882a593Smuzhiyun /* It has been found above, but on the "to_be_changed"
379*4882a593Smuzhiyun * list, not yet committed. Don't pull it in twice,
380*4882a593Smuzhiyun * wait for the transaction, then try again...
381*4882a593Smuzhiyun */
382*4882a593Smuzhiyun if (!(flags & LC_GET_MAY_USE_UNCOMMITTED))
383*4882a593Smuzhiyun RETURN(NULL);
384*4882a593Smuzhiyun /* ... unless the caller is aware of the implications,
385*4882a593Smuzhiyun * probably preparing a cumulative transaction. */
386*4882a593Smuzhiyun ++e->refcnt;
387*4882a593Smuzhiyun ++lc->hits;
388*4882a593Smuzhiyun RETURN(e);
389*4882a593Smuzhiyun }
390*4882a593Smuzhiyun /* else: lc_new_number == lc_number; a real hit. */
391*4882a593Smuzhiyun ++lc->hits;
392*4882a593Smuzhiyun if (e->refcnt++ == 0)
393*4882a593Smuzhiyun lc->used++;
394*4882a593Smuzhiyun list_move(&e->list, &lc->in_use); /* Not evictable... */
395*4882a593Smuzhiyun RETURN(e);
396*4882a593Smuzhiyun }
397*4882a593Smuzhiyun /* e == NULL */
398*4882a593Smuzhiyun
399*4882a593Smuzhiyun ++lc->misses;
400*4882a593Smuzhiyun if (!(flags & LC_GET_MAY_CHANGE))
401*4882a593Smuzhiyun RETURN(NULL);
402*4882a593Smuzhiyun
403*4882a593Smuzhiyun /* To avoid races with lc_try_lock(), first, mark us dirty
404*4882a593Smuzhiyun * (using test_and_set_bit, as it implies memory barriers), ... */
405*4882a593Smuzhiyun test_and_set_bit(__LC_DIRTY, &lc->flags);
406*4882a593Smuzhiyun
407*4882a593Smuzhiyun /* ... only then check if it is locked anyways. If lc_unlock clears
408*4882a593Smuzhiyun * the dirty bit again, that's not a problem, we will come here again.
409*4882a593Smuzhiyun */
410*4882a593Smuzhiyun if (test_bit(__LC_LOCKED, &lc->flags)) {
411*4882a593Smuzhiyun ++lc->locked;
412*4882a593Smuzhiyun RETURN(NULL);
413*4882a593Smuzhiyun }
414*4882a593Smuzhiyun
415*4882a593Smuzhiyun /* In case there is nothing available and we can not kick out
416*4882a593Smuzhiyun * the LRU element, we have to wait ...
417*4882a593Smuzhiyun */
418*4882a593Smuzhiyun if (!lc_unused_element_available(lc)) {
419*4882a593Smuzhiyun __set_bit(__LC_STARVING, &lc->flags);
420*4882a593Smuzhiyun RETURN(NULL);
421*4882a593Smuzhiyun }
422*4882a593Smuzhiyun
423*4882a593Smuzhiyun /* It was not present in the active set. We are going to recycle an
424*4882a593Smuzhiyun * unused (or even "free") element, but we won't accumulate more than
425*4882a593Smuzhiyun * max_pending_changes changes. */
426*4882a593Smuzhiyun if (lc->pending_changes >= lc->max_pending_changes)
427*4882a593Smuzhiyun RETURN(NULL);
428*4882a593Smuzhiyun
429*4882a593Smuzhiyun e = lc_prepare_for_change(lc, enr);
430*4882a593Smuzhiyun BUG_ON(!e);
431*4882a593Smuzhiyun
432*4882a593Smuzhiyun clear_bit(__LC_STARVING, &lc->flags);
433*4882a593Smuzhiyun BUG_ON(++e->refcnt != 1);
434*4882a593Smuzhiyun lc->used++;
435*4882a593Smuzhiyun lc->pending_changes++;
436*4882a593Smuzhiyun
437*4882a593Smuzhiyun RETURN(e);
438*4882a593Smuzhiyun }
439*4882a593Smuzhiyun
440*4882a593Smuzhiyun /**
441*4882a593Smuzhiyun * lc_get - get element by label, maybe change the active set
442*4882a593Smuzhiyun * @lc: the lru cache to operate on
443*4882a593Smuzhiyun * @enr: the label to look up
444*4882a593Smuzhiyun *
445*4882a593Smuzhiyun * Finds an element in the cache, increases its usage count,
446*4882a593Smuzhiyun * "touches" and returns it.
447*4882a593Smuzhiyun *
448*4882a593Smuzhiyun * In case the requested number is not present, it needs to be added to the
449*4882a593Smuzhiyun * cache. Therefore it is possible that an other element becomes evicted from
450*4882a593Smuzhiyun * the cache. In either case, the user is notified so he is able to e.g. keep
451*4882a593Smuzhiyun * a persistent log of the cache changes, and therefore the objects in use.
452*4882a593Smuzhiyun *
453*4882a593Smuzhiyun * Return values:
454*4882a593Smuzhiyun * NULL
455*4882a593Smuzhiyun * The cache was marked %LC_STARVING,
456*4882a593Smuzhiyun * or the requested label was not in the active set
457*4882a593Smuzhiyun * and a changing transaction is still pending (@lc was marked %LC_DIRTY).
458*4882a593Smuzhiyun * Or no unused or free element could be recycled (@lc will be marked as
459*4882a593Smuzhiyun * %LC_STARVING, blocking further lc_get() operations).
460*4882a593Smuzhiyun *
461*4882a593Smuzhiyun * pointer to the element with the REQUESTED element number.
462*4882a593Smuzhiyun * In this case, it can be used right away
463*4882a593Smuzhiyun *
464*4882a593Smuzhiyun * pointer to an UNUSED element with some different element number,
465*4882a593Smuzhiyun * where that different number may also be %LC_FREE.
466*4882a593Smuzhiyun *
467*4882a593Smuzhiyun * In this case, the cache is marked %LC_DIRTY,
468*4882a593Smuzhiyun * so lc_try_lock() will no longer succeed.
469*4882a593Smuzhiyun * The returned element pointer is moved to the "to_be_changed" list,
470*4882a593Smuzhiyun * and registered with the new element number on the hash collision chains,
471*4882a593Smuzhiyun * so it is possible to pick it up from lc_is_used().
472*4882a593Smuzhiyun * Up to "max_pending_changes" (see lc_create()) can be accumulated.
473*4882a593Smuzhiyun * The user now should do whatever housekeeping is necessary,
474*4882a593Smuzhiyun * typically serialize on lc_try_lock_for_transaction(), then call
475*4882a593Smuzhiyun * lc_committed(lc) and lc_unlock(), to finish the change.
476*4882a593Smuzhiyun *
477*4882a593Smuzhiyun * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
478*4882a593Smuzhiyun * any cache set change.
479*4882a593Smuzhiyun */
lc_get(struct lru_cache * lc,unsigned int enr)480*4882a593Smuzhiyun struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
481*4882a593Smuzhiyun {
482*4882a593Smuzhiyun return __lc_get(lc, enr, LC_GET_MAY_CHANGE);
483*4882a593Smuzhiyun }
484*4882a593Smuzhiyun
485*4882a593Smuzhiyun /**
486*4882a593Smuzhiyun * lc_get_cumulative - like lc_get; also finds to-be-changed elements
487*4882a593Smuzhiyun * @lc: the lru cache to operate on
488*4882a593Smuzhiyun * @enr: the label to look up
489*4882a593Smuzhiyun *
490*4882a593Smuzhiyun * Unlike lc_get this also returns the element for @enr, if it is belonging to
491*4882a593Smuzhiyun * a pending transaction, so the return values are like for lc_get(),
492*4882a593Smuzhiyun * plus:
493*4882a593Smuzhiyun *
494*4882a593Smuzhiyun * pointer to an element already on the "to_be_changed" list.
495*4882a593Smuzhiyun * In this case, the cache was already marked %LC_DIRTY.
496*4882a593Smuzhiyun *
497*4882a593Smuzhiyun * Caller needs to make sure that the pending transaction is completed,
498*4882a593Smuzhiyun * before proceeding to actually use this element.
499*4882a593Smuzhiyun */
lc_get_cumulative(struct lru_cache * lc,unsigned int enr)500*4882a593Smuzhiyun struct lc_element *lc_get_cumulative(struct lru_cache *lc, unsigned int enr)
501*4882a593Smuzhiyun {
502*4882a593Smuzhiyun return __lc_get(lc, enr, LC_GET_MAY_CHANGE|LC_GET_MAY_USE_UNCOMMITTED);
503*4882a593Smuzhiyun }
504*4882a593Smuzhiyun
505*4882a593Smuzhiyun /**
506*4882a593Smuzhiyun * lc_try_get - get element by label, if present; do not change the active set
507*4882a593Smuzhiyun * @lc: the lru cache to operate on
508*4882a593Smuzhiyun * @enr: the label to look up
509*4882a593Smuzhiyun *
510*4882a593Smuzhiyun * Finds an element in the cache, increases its usage count,
511*4882a593Smuzhiyun * "touches" and returns it.
512*4882a593Smuzhiyun *
513*4882a593Smuzhiyun * Return values:
514*4882a593Smuzhiyun * NULL
515*4882a593Smuzhiyun * The cache was marked %LC_STARVING,
516*4882a593Smuzhiyun * or the requested label was not in the active set
517*4882a593Smuzhiyun *
518*4882a593Smuzhiyun * pointer to the element with the REQUESTED element number.
519*4882a593Smuzhiyun * In this case, it can be used right away
520*4882a593Smuzhiyun */
lc_try_get(struct lru_cache * lc,unsigned int enr)521*4882a593Smuzhiyun struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
522*4882a593Smuzhiyun {
523*4882a593Smuzhiyun return __lc_get(lc, enr, 0);
524*4882a593Smuzhiyun }
525*4882a593Smuzhiyun
526*4882a593Smuzhiyun /**
527*4882a593Smuzhiyun * lc_committed - tell @lc that pending changes have been recorded
528*4882a593Smuzhiyun * @lc: the lru cache to operate on
529*4882a593Smuzhiyun *
530*4882a593Smuzhiyun * User is expected to serialize on explicit lc_try_lock_for_transaction()
531*4882a593Smuzhiyun * before the transaction is started, and later needs to lc_unlock() explicitly
532*4882a593Smuzhiyun * as well.
533*4882a593Smuzhiyun */
lc_committed(struct lru_cache * lc)534*4882a593Smuzhiyun void lc_committed(struct lru_cache *lc)
535*4882a593Smuzhiyun {
536*4882a593Smuzhiyun struct lc_element *e, *tmp;
537*4882a593Smuzhiyun
538*4882a593Smuzhiyun PARANOIA_ENTRY();
539*4882a593Smuzhiyun list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) {
540*4882a593Smuzhiyun /* count number of changes, not number of transactions */
541*4882a593Smuzhiyun ++lc->changed;
542*4882a593Smuzhiyun e->lc_number = e->lc_new_number;
543*4882a593Smuzhiyun list_move(&e->list, &lc->in_use);
544*4882a593Smuzhiyun }
545*4882a593Smuzhiyun lc->pending_changes = 0;
546*4882a593Smuzhiyun RETURN();
547*4882a593Smuzhiyun }
548*4882a593Smuzhiyun
549*4882a593Smuzhiyun
550*4882a593Smuzhiyun /**
551*4882a593Smuzhiyun * lc_put - give up refcnt of @e
552*4882a593Smuzhiyun * @lc: the lru cache to operate on
553*4882a593Smuzhiyun * @e: the element to put
554*4882a593Smuzhiyun *
555*4882a593Smuzhiyun * If refcnt reaches zero, the element is moved to the lru list,
556*4882a593Smuzhiyun * and a %LC_STARVING (if set) is cleared.
557*4882a593Smuzhiyun * Returns the new (post-decrement) refcnt.
558*4882a593Smuzhiyun */
lc_put(struct lru_cache * lc,struct lc_element * e)559*4882a593Smuzhiyun unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
560*4882a593Smuzhiyun {
561*4882a593Smuzhiyun PARANOIA_ENTRY();
562*4882a593Smuzhiyun PARANOIA_LC_ELEMENT(lc, e);
563*4882a593Smuzhiyun BUG_ON(e->refcnt == 0);
564*4882a593Smuzhiyun BUG_ON(e->lc_number != e->lc_new_number);
565*4882a593Smuzhiyun if (--e->refcnt == 0) {
566*4882a593Smuzhiyun /* move it to the front of LRU. */
567*4882a593Smuzhiyun list_move(&e->list, &lc->lru);
568*4882a593Smuzhiyun lc->used--;
569*4882a593Smuzhiyun clear_bit_unlock(__LC_STARVING, &lc->flags);
570*4882a593Smuzhiyun }
571*4882a593Smuzhiyun RETURN(e->refcnt);
572*4882a593Smuzhiyun }
573*4882a593Smuzhiyun
574*4882a593Smuzhiyun /**
575*4882a593Smuzhiyun * lc_element_by_index
576*4882a593Smuzhiyun * @lc: the lru cache to operate on
577*4882a593Smuzhiyun * @i: the index of the element to return
578*4882a593Smuzhiyun */
lc_element_by_index(struct lru_cache * lc,unsigned i)579*4882a593Smuzhiyun struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i)
580*4882a593Smuzhiyun {
581*4882a593Smuzhiyun BUG_ON(i >= lc->nr_elements);
582*4882a593Smuzhiyun BUG_ON(lc->lc_element[i] == NULL);
583*4882a593Smuzhiyun BUG_ON(lc->lc_element[i]->lc_index != i);
584*4882a593Smuzhiyun return lc->lc_element[i];
585*4882a593Smuzhiyun }
586*4882a593Smuzhiyun
587*4882a593Smuzhiyun /**
588*4882a593Smuzhiyun * lc_index_of
589*4882a593Smuzhiyun * @lc: the lru cache to operate on
590*4882a593Smuzhiyun * @e: the element to query for its index position in lc->element
591*4882a593Smuzhiyun */
lc_index_of(struct lru_cache * lc,struct lc_element * e)592*4882a593Smuzhiyun unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e)
593*4882a593Smuzhiyun {
594*4882a593Smuzhiyun PARANOIA_LC_ELEMENT(lc, e);
595*4882a593Smuzhiyun return e->lc_index;
596*4882a593Smuzhiyun }
597*4882a593Smuzhiyun
598*4882a593Smuzhiyun /**
599*4882a593Smuzhiyun * lc_set - associate index with label
600*4882a593Smuzhiyun * @lc: the lru cache to operate on
601*4882a593Smuzhiyun * @enr: the label to set
602*4882a593Smuzhiyun * @index: the element index to associate label with.
603*4882a593Smuzhiyun *
604*4882a593Smuzhiyun * Used to initialize the active set to some previously recorded state.
605*4882a593Smuzhiyun */
lc_set(struct lru_cache * lc,unsigned int enr,int index)606*4882a593Smuzhiyun void lc_set(struct lru_cache *lc, unsigned int enr, int index)
607*4882a593Smuzhiyun {
608*4882a593Smuzhiyun struct lc_element *e;
609*4882a593Smuzhiyun struct list_head *lh;
610*4882a593Smuzhiyun
611*4882a593Smuzhiyun if (index < 0 || index >= lc->nr_elements)
612*4882a593Smuzhiyun return;
613*4882a593Smuzhiyun
614*4882a593Smuzhiyun e = lc_element_by_index(lc, index);
615*4882a593Smuzhiyun BUG_ON(e->lc_number != e->lc_new_number);
616*4882a593Smuzhiyun BUG_ON(e->refcnt != 0);
617*4882a593Smuzhiyun
618*4882a593Smuzhiyun e->lc_number = e->lc_new_number = enr;
619*4882a593Smuzhiyun hlist_del_init(&e->colision);
620*4882a593Smuzhiyun if (enr == LC_FREE)
621*4882a593Smuzhiyun lh = &lc->free;
622*4882a593Smuzhiyun else {
623*4882a593Smuzhiyun hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
624*4882a593Smuzhiyun lh = &lc->lru;
625*4882a593Smuzhiyun }
626*4882a593Smuzhiyun list_move(&e->list, lh);
627*4882a593Smuzhiyun }
628*4882a593Smuzhiyun
629*4882a593Smuzhiyun /**
630*4882a593Smuzhiyun * lc_dump - Dump a complete LRU cache to seq in textual form.
631*4882a593Smuzhiyun * @lc: the lru cache to operate on
632*4882a593Smuzhiyun * @seq: the &struct seq_file pointer to seq_printf into
633*4882a593Smuzhiyun * @utext: user supplied additional "heading" or other info
634*4882a593Smuzhiyun * @detail: function pointer the user may provide to dump further details
635*4882a593Smuzhiyun * of the object the lc_element is embedded in. May be NULL.
636*4882a593Smuzhiyun * Note: a leading space ' ' and trailing newline '\n' is implied.
637*4882a593Smuzhiyun */
lc_seq_dump_details(struct seq_file * seq,struct lru_cache * lc,char * utext,void (* detail)(struct seq_file *,struct lc_element *))638*4882a593Smuzhiyun void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
639*4882a593Smuzhiyun void (*detail) (struct seq_file *, struct lc_element *))
640*4882a593Smuzhiyun {
641*4882a593Smuzhiyun unsigned int nr_elements = lc->nr_elements;
642*4882a593Smuzhiyun struct lc_element *e;
643*4882a593Smuzhiyun int i;
644*4882a593Smuzhiyun
645*4882a593Smuzhiyun seq_printf(seq, "\tnn: lc_number (new nr) refcnt %s\n ", utext);
646*4882a593Smuzhiyun for (i = 0; i < nr_elements; i++) {
647*4882a593Smuzhiyun e = lc_element_by_index(lc, i);
648*4882a593Smuzhiyun if (e->lc_number != e->lc_new_number)
649*4882a593Smuzhiyun seq_printf(seq, "\t%5d: %6d %8d %6d ",
650*4882a593Smuzhiyun i, e->lc_number, e->lc_new_number, e->refcnt);
651*4882a593Smuzhiyun else
652*4882a593Smuzhiyun seq_printf(seq, "\t%5d: %6d %-8s %6d ",
653*4882a593Smuzhiyun i, e->lc_number, "-\"-", e->refcnt);
654*4882a593Smuzhiyun if (detail)
655*4882a593Smuzhiyun detail(seq, e);
656*4882a593Smuzhiyun seq_putc(seq, '\n');
657*4882a593Smuzhiyun }
658*4882a593Smuzhiyun }
659*4882a593Smuzhiyun
660*4882a593Smuzhiyun EXPORT_SYMBOL(lc_create);
661*4882a593Smuzhiyun EXPORT_SYMBOL(lc_reset);
662*4882a593Smuzhiyun EXPORT_SYMBOL(lc_destroy);
663*4882a593Smuzhiyun EXPORT_SYMBOL(lc_set);
664*4882a593Smuzhiyun EXPORT_SYMBOL(lc_del);
665*4882a593Smuzhiyun EXPORT_SYMBOL(lc_try_get);
666*4882a593Smuzhiyun EXPORT_SYMBOL(lc_find);
667*4882a593Smuzhiyun EXPORT_SYMBOL(lc_get);
668*4882a593Smuzhiyun EXPORT_SYMBOL(lc_put);
669*4882a593Smuzhiyun EXPORT_SYMBOL(lc_committed);
670*4882a593Smuzhiyun EXPORT_SYMBOL(lc_element_by_index);
671*4882a593Smuzhiyun EXPORT_SYMBOL(lc_index_of);
672*4882a593Smuzhiyun EXPORT_SYMBOL(lc_seq_printf_stats);
673*4882a593Smuzhiyun EXPORT_SYMBOL(lc_seq_dump_details);
674*4882a593Smuzhiyun EXPORT_SYMBOL(lc_try_lock);
675*4882a593Smuzhiyun EXPORT_SYMBOL(lc_is_used);
676*4882a593Smuzhiyun EXPORT_SYMBOL(lc_get_cumulative);
677