xref: /OK3568_Linux_fs/kernel/Documentation/RCU/rcuref.rst (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun.. SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun
3*4882a593Smuzhiyun====================================================================
4*4882a593SmuzhiyunReference-count design for elements of lists/arrays protected by RCU
5*4882a593Smuzhiyun====================================================================
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun
8*4882a593SmuzhiyunPlease note that the percpu-ref feature is likely your first
9*4882a593Smuzhiyunstop if you need to combine reference counts and RCU.  Please see
10*4882a593Smuzhiyuninclude/linux/percpu-refcount.h for more information.  However, in
11*4882a593Smuzhiyunthose unusual cases where percpu-ref would consume too much memory,
12*4882a593Smuzhiyunplease read on.
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun------------------------------------------------------------------------
15*4882a593Smuzhiyun
16*4882a593SmuzhiyunReference counting on elements of lists which are protected by traditional
17*4882a593Smuzhiyunreader/writer spinlocks or semaphores are straightforward:
18*4882a593Smuzhiyun
19*4882a593SmuzhiyunCODE LISTING A::
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun    1.					    2.
22*4882a593Smuzhiyun    add()				    search_and_reference()
23*4882a593Smuzhiyun    {					    {
24*4882a593Smuzhiyun	alloc_object				read_lock(&list_lock);
25*4882a593Smuzhiyun	...					search_for_element
26*4882a593Smuzhiyun	atomic_set(&el->rc, 1);			atomic_inc(&el->rc);
27*4882a593Smuzhiyun	write_lock(&list_lock);			 ...
28*4882a593Smuzhiyun	add_element				read_unlock(&list_lock);
29*4882a593Smuzhiyun	...					...
30*4882a593Smuzhiyun	write_unlock(&list_lock);	   }
31*4882a593Smuzhiyun    }
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun    3.					    4.
34*4882a593Smuzhiyun    release_referenced()		    delete()
35*4882a593Smuzhiyun    {					    {
36*4882a593Smuzhiyun	...					write_lock(&list_lock);
37*4882a593Smuzhiyun	if(atomic_dec_and_test(&el->rc))	...
38*4882a593Smuzhiyun	    kfree(el);
39*4882a593Smuzhiyun	...					remove_element
40*4882a593Smuzhiyun    }						write_unlock(&list_lock);
41*4882a593Smuzhiyun						...
42*4882a593Smuzhiyun						if (atomic_dec_and_test(&el->rc))
43*4882a593Smuzhiyun						    kfree(el);
44*4882a593Smuzhiyun						...
45*4882a593Smuzhiyun					    }
46*4882a593Smuzhiyun
47*4882a593SmuzhiyunIf this list/array is made lock free using RCU as in changing the
48*4882a593Smuzhiyunwrite_lock() in add() and delete() to spin_lock() and changing read_lock()
49*4882a593Smuzhiyunin search_and_reference() to rcu_read_lock(), the atomic_inc() in
50*4882a593Smuzhiyunsearch_and_reference() could potentially hold reference to an element which
51*4882a593Smuzhiyunhas already been deleted from the list/array.  Use atomic_inc_not_zero()
52*4882a593Smuzhiyunin this scenario as follows:
53*4882a593Smuzhiyun
54*4882a593SmuzhiyunCODE LISTING B::
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun    1.					    2.
57*4882a593Smuzhiyun    add()				    search_and_reference()
58*4882a593Smuzhiyun    {					    {
59*4882a593Smuzhiyun	alloc_object				rcu_read_lock();
60*4882a593Smuzhiyun	...					search_for_element
61*4882a593Smuzhiyun	atomic_set(&el->rc, 1);			if (!atomic_inc_not_zero(&el->rc)) {
62*4882a593Smuzhiyun	spin_lock(&list_lock);			    rcu_read_unlock();
63*4882a593Smuzhiyun						    return FAIL;
64*4882a593Smuzhiyun	add_element				}
65*4882a593Smuzhiyun	...					...
66*4882a593Smuzhiyun	spin_unlock(&list_lock);		rcu_read_unlock();
67*4882a593Smuzhiyun    }					    }
68*4882a593Smuzhiyun    3.					    4.
69*4882a593Smuzhiyun    release_referenced()		    delete()
70*4882a593Smuzhiyun    {					    {
71*4882a593Smuzhiyun	...					spin_lock(&list_lock);
72*4882a593Smuzhiyun	if (atomic_dec_and_test(&el->rc))	...
73*4882a593Smuzhiyun	    call_rcu(&el->head, el_free);	remove_element
74*4882a593Smuzhiyun	...					spin_unlock(&list_lock);
75*4882a593Smuzhiyun    }						...
76*4882a593Smuzhiyun						if (atomic_dec_and_test(&el->rc))
77*4882a593Smuzhiyun						    call_rcu(&el->head, el_free);
78*4882a593Smuzhiyun						...
79*4882a593Smuzhiyun					    }
80*4882a593Smuzhiyun
81*4882a593SmuzhiyunSometimes, a reference to the element needs to be obtained in the
82*4882a593Smuzhiyunupdate (write) stream.	In such cases, atomic_inc_not_zero() might be
83*4882a593Smuzhiyunoverkill, since we hold the update-side spinlock.  One might instead
84*4882a593Smuzhiyunuse atomic_inc() in such cases.
85*4882a593Smuzhiyun
86*4882a593SmuzhiyunIt is not always convenient to deal with "FAIL" in the
87*4882a593Smuzhiyunsearch_and_reference() code path.  In such cases, the
88*4882a593Smuzhiyunatomic_dec_and_test() may be moved from delete() to el_free()
89*4882a593Smuzhiyunas follows:
90*4882a593Smuzhiyun
91*4882a593SmuzhiyunCODE LISTING C::
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun    1.					    2.
94*4882a593Smuzhiyun    add()				    search_and_reference()
95*4882a593Smuzhiyun    {					    {
96*4882a593Smuzhiyun	alloc_object				rcu_read_lock();
97*4882a593Smuzhiyun	...					search_for_element
98*4882a593Smuzhiyun	atomic_set(&el->rc, 1);			atomic_inc(&el->rc);
99*4882a593Smuzhiyun	spin_lock(&list_lock);			...
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun	add_element				rcu_read_unlock();
102*4882a593Smuzhiyun	...				    }
103*4882a593Smuzhiyun	spin_unlock(&list_lock);	    4.
104*4882a593Smuzhiyun    }					    delete()
105*4882a593Smuzhiyun    3.					    {
106*4882a593Smuzhiyun    release_referenced()			spin_lock(&list_lock);
107*4882a593Smuzhiyun    {						...
108*4882a593Smuzhiyun	...					remove_element
109*4882a593Smuzhiyun	if (atomic_dec_and_test(&el->rc))	spin_unlock(&list_lock);
110*4882a593Smuzhiyun	    kfree(el);				...
111*4882a593Smuzhiyun	...					call_rcu(&el->head, el_free);
112*4882a593Smuzhiyun    }						...
113*4882a593Smuzhiyun    5.					    }
114*4882a593Smuzhiyun    void el_free(struct rcu_head *rhp)
115*4882a593Smuzhiyun    {
116*4882a593Smuzhiyun	release_referenced();
117*4882a593Smuzhiyun    }
118*4882a593Smuzhiyun
119*4882a593SmuzhiyunThe key point is that the initial reference added by add() is not removed
120*4882a593Smuzhiyununtil after a grace period has elapsed following removal.  This means that
121*4882a593Smuzhiyunsearch_and_reference() cannot find this element, which means that the value
122*4882a593Smuzhiyunof el->rc cannot increase.  Thus, once it reaches zero, there are no
123*4882a593Smuzhiyunreaders that can or ever will be able to reference the element.	 The
124*4882a593Smuzhiyunelement can therefore safely be freed.	This in turn guarantees that if
125*4882a593Smuzhiyunany reader finds the element, that reader may safely acquire a reference
126*4882a593Smuzhiyunwithout checking the value of the reference counter.
127*4882a593Smuzhiyun
128*4882a593SmuzhiyunA clear advantage of the RCU-based pattern in listing C over the one
129*4882a593Smuzhiyunin listing B is that any call to search_and_reference() that locates
130*4882a593Smuzhiyuna given object will succeed in obtaining a reference to that object,
131*4882a593Smuzhiyuneven given a concurrent invocation of delete() for that same object.
132*4882a593SmuzhiyunSimilarly, a clear advantage of both listings B and C over listing A is
133*4882a593Smuzhiyunthat a call to delete() is not delayed even if there are an arbitrarily
134*4882a593Smuzhiyunlarge number of calls to search_and_reference() searching for the same
135*4882a593Smuzhiyunobject that delete() was invoked on.  Instead, all that is delayed is
136*4882a593Smuzhiyunthe eventual invocation of kfree(), which is usually not a problem on
137*4882a593Smuzhiyunmodern computer systems, even the small ones.
138*4882a593Smuzhiyun
139*4882a593SmuzhiyunIn cases where delete() can sleep, synchronize_rcu() can be called from
140*4882a593Smuzhiyundelete(), so that el_free() can be subsumed into delete as follows::
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun    4.
143*4882a593Smuzhiyun    delete()
144*4882a593Smuzhiyun    {
145*4882a593Smuzhiyun	spin_lock(&list_lock);
146*4882a593Smuzhiyun	...
147*4882a593Smuzhiyun	remove_element
148*4882a593Smuzhiyun	spin_unlock(&list_lock);
149*4882a593Smuzhiyun	...
150*4882a593Smuzhiyun	synchronize_rcu();
151*4882a593Smuzhiyun	if (atomic_dec_and_test(&el->rc))
152*4882a593Smuzhiyun	    kfree(el);
153*4882a593Smuzhiyun	...
154*4882a593Smuzhiyun    }
155*4882a593Smuzhiyun
156*4882a593SmuzhiyunAs additional examples in the kernel, the pattern in listing C is used by
157*4882a593Smuzhiyunreference counting of struct pid, while the pattern in listing B is used by
158*4882a593Smuzhiyunstruct posix_acl.
159