1*4882a593Smuzhiyun=================================================== 2*4882a593SmuzhiyunAdding reference counters (krefs) to kernel objects 3*4882a593Smuzhiyun=================================================== 4*4882a593Smuzhiyun 5*4882a593Smuzhiyun:Author: Corey Minyard <minyard@acm.org> 6*4882a593Smuzhiyun:Author: Thomas Hellstrom <thellstrom@vmware.com> 7*4882a593Smuzhiyun 8*4882a593SmuzhiyunA lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and 9*4882a593Smuzhiyunpresentation on krefs, which can be found at: 10*4882a593Smuzhiyun 11*4882a593Smuzhiyun - http://www.kroah.com/linux/talks/ols_2004_kref_paper/Reprint-Kroah-Hartman-OLS2004.pdf 12*4882a593Smuzhiyun - http://www.kroah.com/linux/talks/ols_2004_kref_talk/ 13*4882a593Smuzhiyun 14*4882a593SmuzhiyunIntroduction 15*4882a593Smuzhiyun============ 16*4882a593Smuzhiyun 17*4882a593Smuzhiyunkrefs allow you to add reference counters to your objects. If you 18*4882a593Smuzhiyunhave objects that are used in multiple places and passed around, and 19*4882a593Smuzhiyunyou don't have refcounts, your code is almost certainly broken. If 20*4882a593Smuzhiyunyou want refcounts, krefs are the way to go. 21*4882a593Smuzhiyun 22*4882a593SmuzhiyunTo use a kref, add one to your data structures like:: 23*4882a593Smuzhiyun 24*4882a593Smuzhiyun struct my_data 25*4882a593Smuzhiyun { 26*4882a593Smuzhiyun . 27*4882a593Smuzhiyun . 28*4882a593Smuzhiyun struct kref refcount; 29*4882a593Smuzhiyun . 30*4882a593Smuzhiyun . 31*4882a593Smuzhiyun }; 32*4882a593Smuzhiyun 33*4882a593SmuzhiyunThe kref can occur anywhere within the data structure. 34*4882a593Smuzhiyun 35*4882a593SmuzhiyunInitialization 36*4882a593Smuzhiyun============== 37*4882a593Smuzhiyun 38*4882a593SmuzhiyunYou must initialize the kref after you allocate it. To do this, call 39*4882a593Smuzhiyunkref_init as so:: 40*4882a593Smuzhiyun 41*4882a593Smuzhiyun struct my_data *data; 42*4882a593Smuzhiyun 43*4882a593Smuzhiyun data = kmalloc(sizeof(*data), GFP_KERNEL); 44*4882a593Smuzhiyun if (!data) 45*4882a593Smuzhiyun return -ENOMEM; 46*4882a593Smuzhiyun kref_init(&data->refcount); 47*4882a593Smuzhiyun 48*4882a593SmuzhiyunThis sets the refcount in the kref to 1. 49*4882a593Smuzhiyun 50*4882a593SmuzhiyunKref rules 51*4882a593Smuzhiyun========== 52*4882a593Smuzhiyun 53*4882a593SmuzhiyunOnce you have an initialized kref, you must follow the following 54*4882a593Smuzhiyunrules: 55*4882a593Smuzhiyun 56*4882a593Smuzhiyun1) If you make a non-temporary copy of a pointer, especially if 57*4882a593Smuzhiyun it can be passed to another thread of execution, you must 58*4882a593Smuzhiyun increment the refcount with kref_get() before passing it off:: 59*4882a593Smuzhiyun 60*4882a593Smuzhiyun kref_get(&data->refcount); 61*4882a593Smuzhiyun 62*4882a593Smuzhiyun If you already have a valid pointer to a kref-ed structure (the 63*4882a593Smuzhiyun refcount cannot go to zero) you may do this without a lock. 64*4882a593Smuzhiyun 65*4882a593Smuzhiyun2) When you are done with a pointer, you must call kref_put():: 66*4882a593Smuzhiyun 67*4882a593Smuzhiyun kref_put(&data->refcount, data_release); 68*4882a593Smuzhiyun 69*4882a593Smuzhiyun If this is the last reference to the pointer, the release 70*4882a593Smuzhiyun routine will be called. If the code never tries to get 71*4882a593Smuzhiyun a valid pointer to a kref-ed structure without already 72*4882a593Smuzhiyun holding a valid pointer, it is safe to do this without 73*4882a593Smuzhiyun a lock. 74*4882a593Smuzhiyun 75*4882a593Smuzhiyun3) If the code attempts to gain a reference to a kref-ed structure 76*4882a593Smuzhiyun without already holding a valid pointer, it must serialize access 77*4882a593Smuzhiyun where a kref_put() cannot occur during the kref_get(), and the 78*4882a593Smuzhiyun structure must remain valid during the kref_get(). 79*4882a593Smuzhiyun 80*4882a593SmuzhiyunFor example, if you allocate some data and then pass it to another 81*4882a593Smuzhiyunthread to process:: 82*4882a593Smuzhiyun 83*4882a593Smuzhiyun void data_release(struct kref *ref) 84*4882a593Smuzhiyun { 85*4882a593Smuzhiyun struct my_data *data = container_of(ref, struct my_data, refcount); 86*4882a593Smuzhiyun kfree(data); 87*4882a593Smuzhiyun } 88*4882a593Smuzhiyun 89*4882a593Smuzhiyun void more_data_handling(void *cb_data) 90*4882a593Smuzhiyun { 91*4882a593Smuzhiyun struct my_data *data = cb_data; 92*4882a593Smuzhiyun . 93*4882a593Smuzhiyun . do stuff with data here 94*4882a593Smuzhiyun . 95*4882a593Smuzhiyun kref_put(&data->refcount, data_release); 96*4882a593Smuzhiyun } 97*4882a593Smuzhiyun 98*4882a593Smuzhiyun int my_data_handler(void) 99*4882a593Smuzhiyun { 100*4882a593Smuzhiyun int rv = 0; 101*4882a593Smuzhiyun struct my_data *data; 102*4882a593Smuzhiyun struct task_struct *task; 103*4882a593Smuzhiyun data = kmalloc(sizeof(*data), GFP_KERNEL); 104*4882a593Smuzhiyun if (!data) 105*4882a593Smuzhiyun return -ENOMEM; 106*4882a593Smuzhiyun kref_init(&data->refcount); 107*4882a593Smuzhiyun 108*4882a593Smuzhiyun kref_get(&data->refcount); 109*4882a593Smuzhiyun task = kthread_run(more_data_handling, data, "more_data_handling"); 110*4882a593Smuzhiyun if (task == ERR_PTR(-ENOMEM)) { 111*4882a593Smuzhiyun rv = -ENOMEM; 112*4882a593Smuzhiyun kref_put(&data->refcount, data_release); 113*4882a593Smuzhiyun goto out; 114*4882a593Smuzhiyun } 115*4882a593Smuzhiyun 116*4882a593Smuzhiyun . 117*4882a593Smuzhiyun . do stuff with data here 118*4882a593Smuzhiyun . 119*4882a593Smuzhiyun out: 120*4882a593Smuzhiyun kref_put(&data->refcount, data_release); 121*4882a593Smuzhiyun return rv; 122*4882a593Smuzhiyun } 123*4882a593Smuzhiyun 124*4882a593SmuzhiyunThis way, it doesn't matter what order the two threads handle the 125*4882a593Smuzhiyundata, the kref_put() handles knowing when the data is not referenced 126*4882a593Smuzhiyunany more and releasing it. The kref_get() does not require a lock, 127*4882a593Smuzhiyunsince we already have a valid pointer that we own a refcount for. The 128*4882a593Smuzhiyunput needs no lock because nothing tries to get the data without 129*4882a593Smuzhiyunalready holding a pointer. 130*4882a593Smuzhiyun 131*4882a593SmuzhiyunIn the above example, kref_put() will be called 2 times in both success 132*4882a593Smuzhiyunand error paths. This is necessary because the reference count got 133*4882a593Smuzhiyunincremented 2 times by kref_init() and kref_get(). 134*4882a593Smuzhiyun 135*4882a593SmuzhiyunNote that the "before" in rule 1 is very important. You should never 136*4882a593Smuzhiyundo something like:: 137*4882a593Smuzhiyun 138*4882a593Smuzhiyun task = kthread_run(more_data_handling, data, "more_data_handling"); 139*4882a593Smuzhiyun if (task == ERR_PTR(-ENOMEM)) { 140*4882a593Smuzhiyun rv = -ENOMEM; 141*4882a593Smuzhiyun goto out; 142*4882a593Smuzhiyun } else 143*4882a593Smuzhiyun /* BAD BAD BAD - get is after the handoff */ 144*4882a593Smuzhiyun kref_get(&data->refcount); 145*4882a593Smuzhiyun 146*4882a593SmuzhiyunDon't assume you know what you are doing and use the above construct. 147*4882a593SmuzhiyunFirst of all, you may not know what you are doing. Second, you may 148*4882a593Smuzhiyunknow what you are doing (there are some situations where locking is 149*4882a593Smuzhiyuninvolved where the above may be legal) but someone else who doesn't 150*4882a593Smuzhiyunknow what they are doing may change the code or copy the code. It's 151*4882a593Smuzhiyunbad style. Don't do it. 152*4882a593Smuzhiyun 153*4882a593SmuzhiyunThere are some situations where you can optimize the gets and puts. 154*4882a593SmuzhiyunFor instance, if you are done with an object and enqueuing it for 155*4882a593Smuzhiyunsomething else or passing it off to something else, there is no reason 156*4882a593Smuzhiyunto do a get then a put:: 157*4882a593Smuzhiyun 158*4882a593Smuzhiyun /* Silly extra get and put */ 159*4882a593Smuzhiyun kref_get(&obj->ref); 160*4882a593Smuzhiyun enqueue(obj); 161*4882a593Smuzhiyun kref_put(&obj->ref, obj_cleanup); 162*4882a593Smuzhiyun 163*4882a593SmuzhiyunJust do the enqueue. A comment about this is always welcome:: 164*4882a593Smuzhiyun 165*4882a593Smuzhiyun enqueue(obj); 166*4882a593Smuzhiyun /* We are done with obj, so we pass our refcount off 167*4882a593Smuzhiyun to the queue. DON'T TOUCH obj AFTER HERE! */ 168*4882a593Smuzhiyun 169*4882a593SmuzhiyunThe last rule (rule 3) is the nastiest one to handle. Say, for 170*4882a593Smuzhiyuninstance, you have a list of items that are each kref-ed, and you wish 171*4882a593Smuzhiyunto get the first one. You can't just pull the first item off the list 172*4882a593Smuzhiyunand kref_get() it. That violates rule 3 because you are not already 173*4882a593Smuzhiyunholding a valid pointer. You must add a mutex (or some other lock). 174*4882a593SmuzhiyunFor instance:: 175*4882a593Smuzhiyun 176*4882a593Smuzhiyun static DEFINE_MUTEX(mutex); 177*4882a593Smuzhiyun static LIST_HEAD(q); 178*4882a593Smuzhiyun struct my_data 179*4882a593Smuzhiyun { 180*4882a593Smuzhiyun struct kref refcount; 181*4882a593Smuzhiyun struct list_head link; 182*4882a593Smuzhiyun }; 183*4882a593Smuzhiyun 184*4882a593Smuzhiyun static struct my_data *get_entry() 185*4882a593Smuzhiyun { 186*4882a593Smuzhiyun struct my_data *entry = NULL; 187*4882a593Smuzhiyun mutex_lock(&mutex); 188*4882a593Smuzhiyun if (!list_empty(&q)) { 189*4882a593Smuzhiyun entry = container_of(q.next, struct my_data, link); 190*4882a593Smuzhiyun kref_get(&entry->refcount); 191*4882a593Smuzhiyun } 192*4882a593Smuzhiyun mutex_unlock(&mutex); 193*4882a593Smuzhiyun return entry; 194*4882a593Smuzhiyun } 195*4882a593Smuzhiyun 196*4882a593Smuzhiyun static void release_entry(struct kref *ref) 197*4882a593Smuzhiyun { 198*4882a593Smuzhiyun struct my_data *entry = container_of(ref, struct my_data, refcount); 199*4882a593Smuzhiyun 200*4882a593Smuzhiyun list_del(&entry->link); 201*4882a593Smuzhiyun kfree(entry); 202*4882a593Smuzhiyun } 203*4882a593Smuzhiyun 204*4882a593Smuzhiyun static void put_entry(struct my_data *entry) 205*4882a593Smuzhiyun { 206*4882a593Smuzhiyun mutex_lock(&mutex); 207*4882a593Smuzhiyun kref_put(&entry->refcount, release_entry); 208*4882a593Smuzhiyun mutex_unlock(&mutex); 209*4882a593Smuzhiyun } 210*4882a593Smuzhiyun 211*4882a593SmuzhiyunThe kref_put() return value is useful if you do not want to hold the 212*4882a593Smuzhiyunlock during the whole release operation. Say you didn't want to call 213*4882a593Smuzhiyunkfree() with the lock held in the example above (since it is kind of 214*4882a593Smuzhiyunpointless to do so). You could use kref_put() as follows:: 215*4882a593Smuzhiyun 216*4882a593Smuzhiyun static void release_entry(struct kref *ref) 217*4882a593Smuzhiyun { 218*4882a593Smuzhiyun /* All work is done after the return from kref_put(). */ 219*4882a593Smuzhiyun } 220*4882a593Smuzhiyun 221*4882a593Smuzhiyun static void put_entry(struct my_data *entry) 222*4882a593Smuzhiyun { 223*4882a593Smuzhiyun mutex_lock(&mutex); 224*4882a593Smuzhiyun if (kref_put(&entry->refcount, release_entry)) { 225*4882a593Smuzhiyun list_del(&entry->link); 226*4882a593Smuzhiyun mutex_unlock(&mutex); 227*4882a593Smuzhiyun kfree(entry); 228*4882a593Smuzhiyun } else 229*4882a593Smuzhiyun mutex_unlock(&mutex); 230*4882a593Smuzhiyun } 231*4882a593Smuzhiyun 232*4882a593SmuzhiyunThis is really more useful if you have to call other routines as part 233*4882a593Smuzhiyunof the free operations that could take a long time or might claim the 234*4882a593Smuzhiyunsame lock. Note that doing everything in the release routine is still 235*4882a593Smuzhiyunpreferred as it is a little neater. 236*4882a593Smuzhiyun 237*4882a593SmuzhiyunThe above example could also be optimized using kref_get_unless_zero() in 238*4882a593Smuzhiyunthe following way:: 239*4882a593Smuzhiyun 240*4882a593Smuzhiyun static struct my_data *get_entry() 241*4882a593Smuzhiyun { 242*4882a593Smuzhiyun struct my_data *entry = NULL; 243*4882a593Smuzhiyun mutex_lock(&mutex); 244*4882a593Smuzhiyun if (!list_empty(&q)) { 245*4882a593Smuzhiyun entry = container_of(q.next, struct my_data, link); 246*4882a593Smuzhiyun if (!kref_get_unless_zero(&entry->refcount)) 247*4882a593Smuzhiyun entry = NULL; 248*4882a593Smuzhiyun } 249*4882a593Smuzhiyun mutex_unlock(&mutex); 250*4882a593Smuzhiyun return entry; 251*4882a593Smuzhiyun } 252*4882a593Smuzhiyun 253*4882a593Smuzhiyun static void release_entry(struct kref *ref) 254*4882a593Smuzhiyun { 255*4882a593Smuzhiyun struct my_data *entry = container_of(ref, struct my_data, refcount); 256*4882a593Smuzhiyun 257*4882a593Smuzhiyun mutex_lock(&mutex); 258*4882a593Smuzhiyun list_del(&entry->link); 259*4882a593Smuzhiyun mutex_unlock(&mutex); 260*4882a593Smuzhiyun kfree(entry); 261*4882a593Smuzhiyun } 262*4882a593Smuzhiyun 263*4882a593Smuzhiyun static void put_entry(struct my_data *entry) 264*4882a593Smuzhiyun { 265*4882a593Smuzhiyun kref_put(&entry->refcount, release_entry); 266*4882a593Smuzhiyun } 267*4882a593Smuzhiyun 268*4882a593SmuzhiyunWhich is useful to remove the mutex lock around kref_put() in put_entry(), but 269*4882a593Smuzhiyunit's important that kref_get_unless_zero is enclosed in the same critical 270*4882a593Smuzhiyunsection that finds the entry in the lookup table, 271*4882a593Smuzhiyunotherwise kref_get_unless_zero may reference already freed memory. 272*4882a593SmuzhiyunNote that it is illegal to use kref_get_unless_zero without checking its 273*4882a593Smuzhiyunreturn value. If you are sure (by already having a valid pointer) that 274*4882a593Smuzhiyunkref_get_unless_zero() will return true, then use kref_get() instead. 275*4882a593Smuzhiyun 276*4882a593SmuzhiyunKrefs and RCU 277*4882a593Smuzhiyun============= 278*4882a593Smuzhiyun 279*4882a593SmuzhiyunThe function kref_get_unless_zero also makes it possible to use rcu 280*4882a593Smuzhiyunlocking for lookups in the above example:: 281*4882a593Smuzhiyun 282*4882a593Smuzhiyun struct my_data 283*4882a593Smuzhiyun { 284*4882a593Smuzhiyun struct rcu_head rhead; 285*4882a593Smuzhiyun . 286*4882a593Smuzhiyun struct kref refcount; 287*4882a593Smuzhiyun . 288*4882a593Smuzhiyun . 289*4882a593Smuzhiyun }; 290*4882a593Smuzhiyun 291*4882a593Smuzhiyun static struct my_data *get_entry_rcu() 292*4882a593Smuzhiyun { 293*4882a593Smuzhiyun struct my_data *entry = NULL; 294*4882a593Smuzhiyun rcu_read_lock(); 295*4882a593Smuzhiyun if (!list_empty(&q)) { 296*4882a593Smuzhiyun entry = container_of(q.next, struct my_data, link); 297*4882a593Smuzhiyun if (!kref_get_unless_zero(&entry->refcount)) 298*4882a593Smuzhiyun entry = NULL; 299*4882a593Smuzhiyun } 300*4882a593Smuzhiyun rcu_read_unlock(); 301*4882a593Smuzhiyun return entry; 302*4882a593Smuzhiyun } 303*4882a593Smuzhiyun 304*4882a593Smuzhiyun static void release_entry_rcu(struct kref *ref) 305*4882a593Smuzhiyun { 306*4882a593Smuzhiyun struct my_data *entry = container_of(ref, struct my_data, refcount); 307*4882a593Smuzhiyun 308*4882a593Smuzhiyun mutex_lock(&mutex); 309*4882a593Smuzhiyun list_del_rcu(&entry->link); 310*4882a593Smuzhiyun mutex_unlock(&mutex); 311*4882a593Smuzhiyun kfree_rcu(entry, rhead); 312*4882a593Smuzhiyun } 313*4882a593Smuzhiyun 314*4882a593Smuzhiyun static void put_entry(struct my_data *entry) 315*4882a593Smuzhiyun { 316*4882a593Smuzhiyun kref_put(&entry->refcount, release_entry_rcu); 317*4882a593Smuzhiyun } 318*4882a593Smuzhiyun 319*4882a593SmuzhiyunBut note that the struct kref member needs to remain in valid memory for a 320*4882a593Smuzhiyunrcu grace period after release_entry_rcu was called. That can be accomplished 321*4882a593Smuzhiyunby using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu() 322*4882a593Smuzhiyunbefore using kfree, but note that synchronize_rcu() may sleep for a 323*4882a593Smuzhiyunsubstantial amount of time. 324