xref: /OK3568_Linux_fs/kernel/Documentation/RCU/arrayRCU.rst (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun.. _array_rcu_doc:
2*4882a593Smuzhiyun
3*4882a593SmuzhiyunUsing RCU to Protect Read-Mostly Arrays
4*4882a593Smuzhiyun=======================================
5*4882a593Smuzhiyun
6*4882a593SmuzhiyunAlthough RCU is more commonly used to protect linked lists, it can
7*4882a593Smuzhiyunalso be used to protect arrays.  Three situations are as follows:
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun1.  :ref:`Hash Tables <hash_tables>`
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun2.  :ref:`Static Arrays <static_arrays>`
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun3.  :ref:`Resizable Arrays <resizable_arrays>`
14*4882a593Smuzhiyun
15*4882a593SmuzhiyunEach of these three situations involves an RCU-protected pointer to an
16*4882a593Smuzhiyunarray that is separately indexed.  It might be tempting to consider use
17*4882a593Smuzhiyunof RCU to instead protect the index into an array, however, this use
18*4882a593Smuzhiyuncase is **not** supported.  The problem with RCU-protected indexes into
19*4882a593Smuzhiyunarrays is that compilers can play way too many optimization games with
20*4882a593Smuzhiyunintegers, which means that the rules governing handling of these indexes
21*4882a593Smuzhiyunare far more trouble than they are worth.  If RCU-protected indexes into
22*4882a593Smuzhiyunarrays prove to be particularly valuable (which they have not thus far),
23*4882a593Smuzhiyunexplicit cooperation from the compiler will be required to permit them
24*4882a593Smuzhiyunto be safely used.
25*4882a593Smuzhiyun
26*4882a593SmuzhiyunThat aside, each of the three RCU-protected pointer situations are
27*4882a593Smuzhiyundescribed in the following sections.
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun.. _hash_tables:
30*4882a593Smuzhiyun
31*4882a593SmuzhiyunSituation 1: Hash Tables
32*4882a593Smuzhiyun------------------------
33*4882a593Smuzhiyun
34*4882a593SmuzhiyunHash tables are often implemented as an array, where each array entry
35*4882a593Smuzhiyunhas a linked-list hash chain.  Each hash chain can be protected by RCU
36*4882a593Smuzhiyunas described in the listRCU.txt document.  This approach also applies
37*4882a593Smuzhiyunto other array-of-list situations, such as radix trees.
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun.. _static_arrays:
40*4882a593Smuzhiyun
41*4882a593SmuzhiyunSituation 2: Static Arrays
42*4882a593Smuzhiyun--------------------------
43*4882a593Smuzhiyun
44*4882a593SmuzhiyunStatic arrays, where the data (rather than a pointer to the data) is
45*4882a593Smuzhiyunlocated in each array element, and where the array is never resized,
46*4882a593Smuzhiyunhave not been used with RCU.  Rik van Riel recommends using seqlock in
47*4882a593Smuzhiyunthis situation, which would also have minimal read-side overhead as long
48*4882a593Smuzhiyunas updates are rare.
49*4882a593Smuzhiyun
50*4882a593SmuzhiyunQuick Quiz:
51*4882a593Smuzhiyun		Why is it so important that updates be rare when using seqlock?
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun:ref:`Answer to Quick Quiz <answer_quick_quiz_seqlock>`
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun.. _resizable_arrays:
56*4882a593Smuzhiyun
57*4882a593SmuzhiyunSituation 3: Resizable Arrays
58*4882a593Smuzhiyun------------------------------
59*4882a593Smuzhiyun
60*4882a593SmuzhiyunUse of RCU for resizable arrays is demonstrated by the grow_ary()
61*4882a593Smuzhiyunfunction formerly used by the System V IPC code.  The array is used
62*4882a593Smuzhiyunto map from semaphore, message-queue, and shared-memory IDs to the data
63*4882a593Smuzhiyunstructure that represents the corresponding IPC construct.  The grow_ary()
64*4882a593Smuzhiyunfunction does not acquire any locks; instead its caller must hold the
65*4882a593Smuzhiyunids->sem semaphore.
66*4882a593Smuzhiyun
67*4882a593SmuzhiyunThe grow_ary() function, shown below, does some limit checks, allocates a
68*4882a593Smuzhiyunnew ipc_id_ary, copies the old to the new portion of the new, initializes
69*4882a593Smuzhiyunthe remainder of the new, updates the ids->entries pointer to point to
70*4882a593Smuzhiyunthe new array, and invokes ipc_rcu_putref() to free up the old array.
71*4882a593SmuzhiyunNote that rcu_assign_pointer() is used to update the ids->entries pointer,
72*4882a593Smuzhiyunwhich includes any memory barriers required on whatever architecture
73*4882a593Smuzhiyunyou are running on::
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun	static int grow_ary(struct ipc_ids* ids, int newsize)
76*4882a593Smuzhiyun	{
77*4882a593Smuzhiyun		struct ipc_id_ary* new;
78*4882a593Smuzhiyun		struct ipc_id_ary* old;
79*4882a593Smuzhiyun		int i;
80*4882a593Smuzhiyun		int size = ids->entries->size;
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun		if(newsize > IPCMNI)
83*4882a593Smuzhiyun			newsize = IPCMNI;
84*4882a593Smuzhiyun		if(newsize <= size)
85*4882a593Smuzhiyun			return newsize;
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun		new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize +
88*4882a593Smuzhiyun				    sizeof(struct ipc_id_ary));
89*4882a593Smuzhiyun		if(new == NULL)
90*4882a593Smuzhiyun			return size;
91*4882a593Smuzhiyun		new->size = newsize;
92*4882a593Smuzhiyun		memcpy(new->p, ids->entries->p,
93*4882a593Smuzhiyun		       sizeof(struct kern_ipc_perm *)*size +
94*4882a593Smuzhiyun		       sizeof(struct ipc_id_ary));
95*4882a593Smuzhiyun		for(i=size;i<newsize;i++) {
96*4882a593Smuzhiyun			new->p[i] = NULL;
97*4882a593Smuzhiyun		}
98*4882a593Smuzhiyun		old = ids->entries;
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun		/*
101*4882a593Smuzhiyun		 * Use rcu_assign_pointer() to make sure the memcpyed
102*4882a593Smuzhiyun		 * contents of the new array are visible before the new
103*4882a593Smuzhiyun		 * array becomes visible.
104*4882a593Smuzhiyun		 */
105*4882a593Smuzhiyun		rcu_assign_pointer(ids->entries, new);
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun		ipc_rcu_putref(old);
108*4882a593Smuzhiyun		return newsize;
109*4882a593Smuzhiyun	}
110*4882a593Smuzhiyun
111*4882a593SmuzhiyunThe ipc_rcu_putref() function decrements the array's reference count
112*4882a593Smuzhiyunand then, if the reference count has dropped to zero, uses call_rcu()
113*4882a593Smuzhiyunto free the array after a grace period has elapsed.
114*4882a593Smuzhiyun
115*4882a593SmuzhiyunThe array is traversed by the ipc_lock() function.  This function
116*4882a593Smuzhiyunindexes into the array under the protection of rcu_read_lock(),
117*4882a593Smuzhiyunusing rcu_dereference() to pick up the pointer to the array so
118*4882a593Smuzhiyunthat it may later safely be dereferenced -- memory barriers are
119*4882a593Smuzhiyunrequired on the Alpha CPU.  Since the size of the array is stored
120*4882a593Smuzhiyunwith the array itself, there can be no array-size mismatches, so
121*4882a593Smuzhiyuna simple check suffices.  The pointer to the structure corresponding
122*4882a593Smuzhiyunto the desired IPC object is placed in "out", with NULL indicating
123*4882a593Smuzhiyuna non-existent entry.  After acquiring "out->lock", the "out->deleted"
124*4882a593Smuzhiyunflag indicates whether the IPC object is in the process of being
125*4882a593Smuzhiyundeleted, and, if not, the pointer is returned::
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun	struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id)
128*4882a593Smuzhiyun	{
129*4882a593Smuzhiyun		struct kern_ipc_perm* out;
130*4882a593Smuzhiyun		int lid = id % SEQ_MULTIPLIER;
131*4882a593Smuzhiyun		struct ipc_id_ary* entries;
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun		rcu_read_lock();
134*4882a593Smuzhiyun		entries = rcu_dereference(ids->entries);
135*4882a593Smuzhiyun		if(lid >= entries->size) {
136*4882a593Smuzhiyun			rcu_read_unlock();
137*4882a593Smuzhiyun			return NULL;
138*4882a593Smuzhiyun		}
139*4882a593Smuzhiyun		out = entries->p[lid];
140*4882a593Smuzhiyun		if(out == NULL) {
141*4882a593Smuzhiyun			rcu_read_unlock();
142*4882a593Smuzhiyun			return NULL;
143*4882a593Smuzhiyun		}
144*4882a593Smuzhiyun		spin_lock(&out->lock);
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun		/* ipc_rmid() may have already freed the ID while ipc_lock
147*4882a593Smuzhiyun		 * was spinning: here verify that the structure is still valid
148*4882a593Smuzhiyun		 */
149*4882a593Smuzhiyun		if (out->deleted) {
150*4882a593Smuzhiyun			spin_unlock(&out->lock);
151*4882a593Smuzhiyun			rcu_read_unlock();
152*4882a593Smuzhiyun			return NULL;
153*4882a593Smuzhiyun		}
154*4882a593Smuzhiyun		return out;
155*4882a593Smuzhiyun	}
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun.. _answer_quick_quiz_seqlock:
158*4882a593Smuzhiyun
159*4882a593SmuzhiyunAnswer to Quick Quiz:
160*4882a593Smuzhiyun	Why is it so important that updates be rare when using seqlock?
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun	The reason that it is important that updates be rare when
163*4882a593Smuzhiyun	using seqlock is that frequent updates can livelock readers.
164*4882a593Smuzhiyun	One way to avoid this problem is to assign a seqlock for
165*4882a593Smuzhiyun	each array entry rather than to the entire array.
166