xref: /OK3568_Linux_fs/kernel/drivers/md/persistent-data/dm-array.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * Copyright (C) 2012 Red Hat, Inc.
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * This file is released under the GPL.
5*4882a593Smuzhiyun  */
6*4882a593Smuzhiyun 
7*4882a593Smuzhiyun #include "dm-array.h"
8*4882a593Smuzhiyun #include "dm-space-map.h"
9*4882a593Smuzhiyun #include "dm-transaction-manager.h"
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun #include <linux/export.h>
12*4882a593Smuzhiyun #include <linux/device-mapper.h>
13*4882a593Smuzhiyun 
14*4882a593Smuzhiyun #define DM_MSG_PREFIX "array"
15*4882a593Smuzhiyun 
16*4882a593Smuzhiyun /*----------------------------------------------------------------*/
17*4882a593Smuzhiyun 
18*4882a593Smuzhiyun /*
19*4882a593Smuzhiyun  * The array is implemented as a fully populated btree, which points to
20*4882a593Smuzhiyun  * blocks that contain the packed values.  This is more space efficient
21*4882a593Smuzhiyun  * than just using a btree since we don't store 1 key per value.
22*4882a593Smuzhiyun  */
23*4882a593Smuzhiyun struct array_block {
24*4882a593Smuzhiyun 	__le32 csum;
25*4882a593Smuzhiyun 	__le32 max_entries;
26*4882a593Smuzhiyun 	__le32 nr_entries;
27*4882a593Smuzhiyun 	__le32 value_size;
28*4882a593Smuzhiyun 	__le64 blocknr; /* Block this node is supposed to live in. */
29*4882a593Smuzhiyun } __packed;
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun /*----------------------------------------------------------------*/
32*4882a593Smuzhiyun 
33*4882a593Smuzhiyun /*
34*4882a593Smuzhiyun  * Validator methods.  As usual we calculate a checksum, and also write the
35*4882a593Smuzhiyun  * block location into the header (paranoia about ssds remapping areas by
36*4882a593Smuzhiyun  * mistake).
37*4882a593Smuzhiyun  */
38*4882a593Smuzhiyun #define CSUM_XOR 595846735
39*4882a593Smuzhiyun 
array_block_prepare_for_write(struct dm_block_validator * v,struct dm_block * b,size_t size_of_block)40*4882a593Smuzhiyun static void array_block_prepare_for_write(struct dm_block_validator *v,
41*4882a593Smuzhiyun 					  struct dm_block *b,
42*4882a593Smuzhiyun 					  size_t size_of_block)
43*4882a593Smuzhiyun {
44*4882a593Smuzhiyun 	struct array_block *bh_le = dm_block_data(b);
45*4882a593Smuzhiyun 
46*4882a593Smuzhiyun 	bh_le->blocknr = cpu_to_le64(dm_block_location(b));
47*4882a593Smuzhiyun 	bh_le->csum = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
48*4882a593Smuzhiyun 						 size_of_block - sizeof(__le32),
49*4882a593Smuzhiyun 						 CSUM_XOR));
50*4882a593Smuzhiyun }
51*4882a593Smuzhiyun 
array_block_check(struct dm_block_validator * v,struct dm_block * b,size_t size_of_block)52*4882a593Smuzhiyun static int array_block_check(struct dm_block_validator *v,
53*4882a593Smuzhiyun 			     struct dm_block *b,
54*4882a593Smuzhiyun 			     size_t size_of_block)
55*4882a593Smuzhiyun {
56*4882a593Smuzhiyun 	struct array_block *bh_le = dm_block_data(b);
57*4882a593Smuzhiyun 	__le32 csum_disk;
58*4882a593Smuzhiyun 
59*4882a593Smuzhiyun 	if (dm_block_location(b) != le64_to_cpu(bh_le->blocknr)) {
60*4882a593Smuzhiyun 		DMERR_LIMIT("array_block_check failed: blocknr %llu != wanted %llu",
61*4882a593Smuzhiyun 			    (unsigned long long) le64_to_cpu(bh_le->blocknr),
62*4882a593Smuzhiyun 			    (unsigned long long) dm_block_location(b));
63*4882a593Smuzhiyun 		return -ENOTBLK;
64*4882a593Smuzhiyun 	}
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun 	csum_disk = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
67*4882a593Smuzhiyun 					       size_of_block - sizeof(__le32),
68*4882a593Smuzhiyun 					       CSUM_XOR));
69*4882a593Smuzhiyun 	if (csum_disk != bh_le->csum) {
70*4882a593Smuzhiyun 		DMERR_LIMIT("array_block_check failed: csum %u != wanted %u",
71*4882a593Smuzhiyun 			    (unsigned) le32_to_cpu(csum_disk),
72*4882a593Smuzhiyun 			    (unsigned) le32_to_cpu(bh_le->csum));
73*4882a593Smuzhiyun 		return -EILSEQ;
74*4882a593Smuzhiyun 	}
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun 	return 0;
77*4882a593Smuzhiyun }
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun static struct dm_block_validator array_validator = {
80*4882a593Smuzhiyun 	.name = "array",
81*4882a593Smuzhiyun 	.prepare_for_write = array_block_prepare_for_write,
82*4882a593Smuzhiyun 	.check = array_block_check
83*4882a593Smuzhiyun };
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun /*----------------------------------------------------------------*/
86*4882a593Smuzhiyun 
87*4882a593Smuzhiyun /*
88*4882a593Smuzhiyun  * Functions for manipulating the array blocks.
89*4882a593Smuzhiyun  */
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun /*
92*4882a593Smuzhiyun  * Returns a pointer to a value within an array block.
93*4882a593Smuzhiyun  *
94*4882a593Smuzhiyun  * index - The index into _this_ specific block.
95*4882a593Smuzhiyun  */
element_at(struct dm_array_info * info,struct array_block * ab,unsigned index)96*4882a593Smuzhiyun static void *element_at(struct dm_array_info *info, struct array_block *ab,
97*4882a593Smuzhiyun 			unsigned index)
98*4882a593Smuzhiyun {
99*4882a593Smuzhiyun 	unsigned char *entry = (unsigned char *) (ab + 1);
100*4882a593Smuzhiyun 
101*4882a593Smuzhiyun 	entry += index * info->value_type.size;
102*4882a593Smuzhiyun 
103*4882a593Smuzhiyun 	return entry;
104*4882a593Smuzhiyun }
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun /*
107*4882a593Smuzhiyun  * Utility function that calls one of the value_type methods on every value
108*4882a593Smuzhiyun  * in an array block.
109*4882a593Smuzhiyun  */
on_entries(struct dm_array_info * info,struct array_block * ab,void (* fn)(void *,const void *))110*4882a593Smuzhiyun static void on_entries(struct dm_array_info *info, struct array_block *ab,
111*4882a593Smuzhiyun 		       void (*fn)(void *, const void *))
112*4882a593Smuzhiyun {
113*4882a593Smuzhiyun 	unsigned i, nr_entries = le32_to_cpu(ab->nr_entries);
114*4882a593Smuzhiyun 
115*4882a593Smuzhiyun 	for (i = 0; i < nr_entries; i++)
116*4882a593Smuzhiyun 		fn(info->value_type.context, element_at(info, ab, i));
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun /*
120*4882a593Smuzhiyun  * Increment every value in an array block.
121*4882a593Smuzhiyun  */
inc_ablock_entries(struct dm_array_info * info,struct array_block * ab)122*4882a593Smuzhiyun static void inc_ablock_entries(struct dm_array_info *info, struct array_block *ab)
123*4882a593Smuzhiyun {
124*4882a593Smuzhiyun 	struct dm_btree_value_type *vt = &info->value_type;
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun 	if (vt->inc)
127*4882a593Smuzhiyun 		on_entries(info, ab, vt->inc);
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun /*
131*4882a593Smuzhiyun  * Decrement every value in an array block.
132*4882a593Smuzhiyun  */
dec_ablock_entries(struct dm_array_info * info,struct array_block * ab)133*4882a593Smuzhiyun static void dec_ablock_entries(struct dm_array_info *info, struct array_block *ab)
134*4882a593Smuzhiyun {
135*4882a593Smuzhiyun 	struct dm_btree_value_type *vt = &info->value_type;
136*4882a593Smuzhiyun 
137*4882a593Smuzhiyun 	if (vt->dec)
138*4882a593Smuzhiyun 		on_entries(info, ab, vt->dec);
139*4882a593Smuzhiyun }
140*4882a593Smuzhiyun 
141*4882a593Smuzhiyun /*
142*4882a593Smuzhiyun  * Each array block can hold this many values.
143*4882a593Smuzhiyun  */
calc_max_entries(size_t value_size,size_t size_of_block)144*4882a593Smuzhiyun static uint32_t calc_max_entries(size_t value_size, size_t size_of_block)
145*4882a593Smuzhiyun {
146*4882a593Smuzhiyun 	return (size_of_block - sizeof(struct array_block)) / value_size;
147*4882a593Smuzhiyun }
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun /*
150*4882a593Smuzhiyun  * Allocate a new array block.  The caller will need to unlock block.
151*4882a593Smuzhiyun  */
alloc_ablock(struct dm_array_info * info,size_t size_of_block,uint32_t max_entries,struct dm_block ** block,struct array_block ** ab)152*4882a593Smuzhiyun static int alloc_ablock(struct dm_array_info *info, size_t size_of_block,
153*4882a593Smuzhiyun 			uint32_t max_entries,
154*4882a593Smuzhiyun 			struct dm_block **block, struct array_block **ab)
155*4882a593Smuzhiyun {
156*4882a593Smuzhiyun 	int r;
157*4882a593Smuzhiyun 
158*4882a593Smuzhiyun 	r = dm_tm_new_block(info->btree_info.tm, &array_validator, block);
159*4882a593Smuzhiyun 	if (r)
160*4882a593Smuzhiyun 		return r;
161*4882a593Smuzhiyun 
162*4882a593Smuzhiyun 	(*ab) = dm_block_data(*block);
163*4882a593Smuzhiyun 	(*ab)->max_entries = cpu_to_le32(max_entries);
164*4882a593Smuzhiyun 	(*ab)->nr_entries = cpu_to_le32(0);
165*4882a593Smuzhiyun 	(*ab)->value_size = cpu_to_le32(info->value_type.size);
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun 	return 0;
168*4882a593Smuzhiyun }
169*4882a593Smuzhiyun 
170*4882a593Smuzhiyun /*
171*4882a593Smuzhiyun  * Pad an array block out with a particular value.  Every instance will
172*4882a593Smuzhiyun  * cause an increment of the value_type.  new_nr must always be more than
173*4882a593Smuzhiyun  * the current number of entries.
174*4882a593Smuzhiyun  */
fill_ablock(struct dm_array_info * info,struct array_block * ab,const void * value,unsigned new_nr)175*4882a593Smuzhiyun static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
176*4882a593Smuzhiyun 			const void *value, unsigned new_nr)
177*4882a593Smuzhiyun {
178*4882a593Smuzhiyun 	unsigned i;
179*4882a593Smuzhiyun 	uint32_t nr_entries;
180*4882a593Smuzhiyun 	struct dm_btree_value_type *vt = &info->value_type;
181*4882a593Smuzhiyun 
182*4882a593Smuzhiyun 	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
183*4882a593Smuzhiyun 	BUG_ON(new_nr < le32_to_cpu(ab->nr_entries));
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	nr_entries = le32_to_cpu(ab->nr_entries);
186*4882a593Smuzhiyun 	for (i = nr_entries; i < new_nr; i++) {
187*4882a593Smuzhiyun 		if (vt->inc)
188*4882a593Smuzhiyun 			vt->inc(vt->context, value);
189*4882a593Smuzhiyun 		memcpy(element_at(info, ab, i), value, vt->size);
190*4882a593Smuzhiyun 	}
191*4882a593Smuzhiyun 	ab->nr_entries = cpu_to_le32(new_nr);
192*4882a593Smuzhiyun }
193*4882a593Smuzhiyun 
194*4882a593Smuzhiyun /*
195*4882a593Smuzhiyun  * Remove some entries from the back of an array block.  Every value
196*4882a593Smuzhiyun  * removed will be decremented.  new_nr must be <= the current number of
197*4882a593Smuzhiyun  * entries.
198*4882a593Smuzhiyun  */
trim_ablock(struct dm_array_info * info,struct array_block * ab,unsigned new_nr)199*4882a593Smuzhiyun static void trim_ablock(struct dm_array_info *info, struct array_block *ab,
200*4882a593Smuzhiyun 			unsigned new_nr)
201*4882a593Smuzhiyun {
202*4882a593Smuzhiyun 	unsigned i;
203*4882a593Smuzhiyun 	uint32_t nr_entries;
204*4882a593Smuzhiyun 	struct dm_btree_value_type *vt = &info->value_type;
205*4882a593Smuzhiyun 
206*4882a593Smuzhiyun 	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
207*4882a593Smuzhiyun 	BUG_ON(new_nr > le32_to_cpu(ab->nr_entries));
208*4882a593Smuzhiyun 
209*4882a593Smuzhiyun 	nr_entries = le32_to_cpu(ab->nr_entries);
210*4882a593Smuzhiyun 	for (i = nr_entries; i > new_nr; i--)
211*4882a593Smuzhiyun 		if (vt->dec)
212*4882a593Smuzhiyun 			vt->dec(vt->context, element_at(info, ab, i - 1));
213*4882a593Smuzhiyun 	ab->nr_entries = cpu_to_le32(new_nr);
214*4882a593Smuzhiyun }
215*4882a593Smuzhiyun 
216*4882a593Smuzhiyun /*
217*4882a593Smuzhiyun  * Read locks a block, and coerces it to an array block.  The caller must
218*4882a593Smuzhiyun  * unlock 'block' when finished.
219*4882a593Smuzhiyun  */
get_ablock(struct dm_array_info * info,dm_block_t b,struct dm_block ** block,struct array_block ** ab)220*4882a593Smuzhiyun static int get_ablock(struct dm_array_info *info, dm_block_t b,
221*4882a593Smuzhiyun 		      struct dm_block **block, struct array_block **ab)
222*4882a593Smuzhiyun {
223*4882a593Smuzhiyun 	int r;
224*4882a593Smuzhiyun 
225*4882a593Smuzhiyun 	r = dm_tm_read_lock(info->btree_info.tm, b, &array_validator, block);
226*4882a593Smuzhiyun 	if (r)
227*4882a593Smuzhiyun 		return r;
228*4882a593Smuzhiyun 
229*4882a593Smuzhiyun 	*ab = dm_block_data(*block);
230*4882a593Smuzhiyun 	return 0;
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun /*
234*4882a593Smuzhiyun  * Unlocks an array block.
235*4882a593Smuzhiyun  */
unlock_ablock(struct dm_array_info * info,struct dm_block * block)236*4882a593Smuzhiyun static void unlock_ablock(struct dm_array_info *info, struct dm_block *block)
237*4882a593Smuzhiyun {
238*4882a593Smuzhiyun 	dm_tm_unlock(info->btree_info.tm, block);
239*4882a593Smuzhiyun }
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun /*----------------------------------------------------------------*/
242*4882a593Smuzhiyun 
243*4882a593Smuzhiyun /*
244*4882a593Smuzhiyun  * Btree manipulation.
245*4882a593Smuzhiyun  */
246*4882a593Smuzhiyun 
247*4882a593Smuzhiyun /*
248*4882a593Smuzhiyun  * Looks up an array block in the btree, and then read locks it.
249*4882a593Smuzhiyun  *
250*4882a593Smuzhiyun  * index is the index of the index of the array_block, (ie. the array index
251*4882a593Smuzhiyun  * / max_entries).
252*4882a593Smuzhiyun  */
lookup_ablock(struct dm_array_info * info,dm_block_t root,unsigned index,struct dm_block ** block,struct array_block ** ab)253*4882a593Smuzhiyun static int lookup_ablock(struct dm_array_info *info, dm_block_t root,
254*4882a593Smuzhiyun 			 unsigned index, struct dm_block **block,
255*4882a593Smuzhiyun 			 struct array_block **ab)
256*4882a593Smuzhiyun {
257*4882a593Smuzhiyun 	int r;
258*4882a593Smuzhiyun 	uint64_t key = index;
259*4882a593Smuzhiyun 	__le64 block_le;
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 	r = dm_btree_lookup(&info->btree_info, root, &key, &block_le);
262*4882a593Smuzhiyun 	if (r)
263*4882a593Smuzhiyun 		return r;
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun 	return get_ablock(info, le64_to_cpu(block_le), block, ab);
266*4882a593Smuzhiyun }
267*4882a593Smuzhiyun 
268*4882a593Smuzhiyun /*
269*4882a593Smuzhiyun  * Insert an array block into the btree.  The block is _not_ unlocked.
270*4882a593Smuzhiyun  */
insert_ablock(struct dm_array_info * info,uint64_t index,struct dm_block * block,dm_block_t * root)271*4882a593Smuzhiyun static int insert_ablock(struct dm_array_info *info, uint64_t index,
272*4882a593Smuzhiyun 			 struct dm_block *block, dm_block_t *root)
273*4882a593Smuzhiyun {
274*4882a593Smuzhiyun 	__le64 block_le = cpu_to_le64(dm_block_location(block));
275*4882a593Smuzhiyun 
276*4882a593Smuzhiyun 	__dm_bless_for_disk(block_le);
277*4882a593Smuzhiyun 	return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root);
278*4882a593Smuzhiyun }
279*4882a593Smuzhiyun 
280*4882a593Smuzhiyun /*----------------------------------------------------------------*/
281*4882a593Smuzhiyun 
__shadow_ablock(struct dm_array_info * info,dm_block_t b,struct dm_block ** block,struct array_block ** ab)282*4882a593Smuzhiyun static int __shadow_ablock(struct dm_array_info *info, dm_block_t b,
283*4882a593Smuzhiyun 			   struct dm_block **block, struct array_block **ab)
284*4882a593Smuzhiyun {
285*4882a593Smuzhiyun 	int inc;
286*4882a593Smuzhiyun 	int r = dm_tm_shadow_block(info->btree_info.tm, b,
287*4882a593Smuzhiyun 				   &array_validator, block, &inc);
288*4882a593Smuzhiyun 	if (r)
289*4882a593Smuzhiyun 		return r;
290*4882a593Smuzhiyun 
291*4882a593Smuzhiyun 	*ab = dm_block_data(*block);
292*4882a593Smuzhiyun 	if (inc)
293*4882a593Smuzhiyun 		inc_ablock_entries(info, *ab);
294*4882a593Smuzhiyun 
295*4882a593Smuzhiyun 	return 0;
296*4882a593Smuzhiyun }
297*4882a593Smuzhiyun 
298*4882a593Smuzhiyun /*
299*4882a593Smuzhiyun  * The shadow op will often be a noop.  Only insert if it really
300*4882a593Smuzhiyun  * copied data.
301*4882a593Smuzhiyun  */
__reinsert_ablock(struct dm_array_info * info,unsigned index,struct dm_block * block,dm_block_t b,dm_block_t * root)302*4882a593Smuzhiyun static int __reinsert_ablock(struct dm_array_info *info, unsigned index,
303*4882a593Smuzhiyun 			     struct dm_block *block, dm_block_t b,
304*4882a593Smuzhiyun 			     dm_block_t *root)
305*4882a593Smuzhiyun {
306*4882a593Smuzhiyun 	int r = 0;
307*4882a593Smuzhiyun 
308*4882a593Smuzhiyun 	if (dm_block_location(block) != b) {
309*4882a593Smuzhiyun 		/*
310*4882a593Smuzhiyun 		 * dm_tm_shadow_block will have already decremented the old
311*4882a593Smuzhiyun 		 * block, but it is still referenced by the btree.  We
312*4882a593Smuzhiyun 		 * increment to stop the insert decrementing it below zero
313*4882a593Smuzhiyun 		 * when overwriting the old value.
314*4882a593Smuzhiyun 		 */
315*4882a593Smuzhiyun 		dm_tm_inc(info->btree_info.tm, b);
316*4882a593Smuzhiyun 		r = insert_ablock(info, index, block, root);
317*4882a593Smuzhiyun 	}
318*4882a593Smuzhiyun 
319*4882a593Smuzhiyun 	return r;
320*4882a593Smuzhiyun }
321*4882a593Smuzhiyun 
322*4882a593Smuzhiyun /*
323*4882a593Smuzhiyun  * Looks up an array block in the btree.  Then shadows it, and updates the
324*4882a593Smuzhiyun  * btree to point to this new shadow.  'root' is an input/output parameter
325*4882a593Smuzhiyun  * for both the current root block, and the new one.
326*4882a593Smuzhiyun  */
shadow_ablock(struct dm_array_info * info,dm_block_t * root,unsigned index,struct dm_block ** block,struct array_block ** ab)327*4882a593Smuzhiyun static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
328*4882a593Smuzhiyun 			 unsigned index, struct dm_block **block,
329*4882a593Smuzhiyun 			 struct array_block **ab)
330*4882a593Smuzhiyun {
331*4882a593Smuzhiyun 	int r;
332*4882a593Smuzhiyun 	uint64_t key = index;
333*4882a593Smuzhiyun 	dm_block_t b;
334*4882a593Smuzhiyun 	__le64 block_le;
335*4882a593Smuzhiyun 
336*4882a593Smuzhiyun 	r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le);
337*4882a593Smuzhiyun 	if (r)
338*4882a593Smuzhiyun 		return r;
339*4882a593Smuzhiyun 	b = le64_to_cpu(block_le);
340*4882a593Smuzhiyun 
341*4882a593Smuzhiyun 	r = __shadow_ablock(info, b, block, ab);
342*4882a593Smuzhiyun 	if (r)
343*4882a593Smuzhiyun 		return r;
344*4882a593Smuzhiyun 
345*4882a593Smuzhiyun 	return __reinsert_ablock(info, index, *block, b, root);
346*4882a593Smuzhiyun }
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun /*
349*4882a593Smuzhiyun  * Allocate an new array block, and fill it with some values.
350*4882a593Smuzhiyun  */
insert_new_ablock(struct dm_array_info * info,size_t size_of_block,uint32_t max_entries,unsigned block_index,uint32_t nr,const void * value,dm_block_t * root)351*4882a593Smuzhiyun static int insert_new_ablock(struct dm_array_info *info, size_t size_of_block,
352*4882a593Smuzhiyun 			     uint32_t max_entries,
353*4882a593Smuzhiyun 			     unsigned block_index, uint32_t nr,
354*4882a593Smuzhiyun 			     const void *value, dm_block_t *root)
355*4882a593Smuzhiyun {
356*4882a593Smuzhiyun 	int r;
357*4882a593Smuzhiyun 	struct dm_block *block;
358*4882a593Smuzhiyun 	struct array_block *ab;
359*4882a593Smuzhiyun 
360*4882a593Smuzhiyun 	r = alloc_ablock(info, size_of_block, max_entries, &block, &ab);
361*4882a593Smuzhiyun 	if (r)
362*4882a593Smuzhiyun 		return r;
363*4882a593Smuzhiyun 
364*4882a593Smuzhiyun 	fill_ablock(info, ab, value, nr);
365*4882a593Smuzhiyun 	r = insert_ablock(info, block_index, block, root);
366*4882a593Smuzhiyun 	unlock_ablock(info, block);
367*4882a593Smuzhiyun 
368*4882a593Smuzhiyun 	return r;
369*4882a593Smuzhiyun }
370*4882a593Smuzhiyun 
insert_full_ablocks(struct dm_array_info * info,size_t size_of_block,unsigned begin_block,unsigned end_block,unsigned max_entries,const void * value,dm_block_t * root)371*4882a593Smuzhiyun static int insert_full_ablocks(struct dm_array_info *info, size_t size_of_block,
372*4882a593Smuzhiyun 			       unsigned begin_block, unsigned end_block,
373*4882a593Smuzhiyun 			       unsigned max_entries, const void *value,
374*4882a593Smuzhiyun 			       dm_block_t *root)
375*4882a593Smuzhiyun {
376*4882a593Smuzhiyun 	int r = 0;
377*4882a593Smuzhiyun 
378*4882a593Smuzhiyun 	for (; !r && begin_block != end_block; begin_block++)
379*4882a593Smuzhiyun 		r = insert_new_ablock(info, size_of_block, max_entries, begin_block, max_entries, value, root);
380*4882a593Smuzhiyun 
381*4882a593Smuzhiyun 	return r;
382*4882a593Smuzhiyun }
383*4882a593Smuzhiyun 
384*4882a593Smuzhiyun /*
385*4882a593Smuzhiyun  * There are a bunch of functions involved with resizing an array.  This
386*4882a593Smuzhiyun  * structure holds information that commonly needed by them.  Purely here
387*4882a593Smuzhiyun  * to reduce parameter count.
388*4882a593Smuzhiyun  */
389*4882a593Smuzhiyun struct resize {
390*4882a593Smuzhiyun 	/*
391*4882a593Smuzhiyun 	 * Describes the array.
392*4882a593Smuzhiyun 	 */
393*4882a593Smuzhiyun 	struct dm_array_info *info;
394*4882a593Smuzhiyun 
395*4882a593Smuzhiyun 	/*
396*4882a593Smuzhiyun 	 * The current root of the array.  This gets updated.
397*4882a593Smuzhiyun 	 */
398*4882a593Smuzhiyun 	dm_block_t root;
399*4882a593Smuzhiyun 
400*4882a593Smuzhiyun 	/*
401*4882a593Smuzhiyun 	 * Metadata block size.  Used to calculate the nr entries in an
402*4882a593Smuzhiyun 	 * array block.
403*4882a593Smuzhiyun 	 */
404*4882a593Smuzhiyun 	size_t size_of_block;
405*4882a593Smuzhiyun 
406*4882a593Smuzhiyun 	/*
407*4882a593Smuzhiyun 	 * Maximum nr entries in an array block.
408*4882a593Smuzhiyun 	 */
409*4882a593Smuzhiyun 	unsigned max_entries;
410*4882a593Smuzhiyun 
411*4882a593Smuzhiyun 	/*
412*4882a593Smuzhiyun 	 * nr of completely full blocks in the array.
413*4882a593Smuzhiyun 	 *
414*4882a593Smuzhiyun 	 * 'old' refers to before the resize, 'new' after.
415*4882a593Smuzhiyun 	 */
416*4882a593Smuzhiyun 	unsigned old_nr_full_blocks, new_nr_full_blocks;
417*4882a593Smuzhiyun 
418*4882a593Smuzhiyun 	/*
419*4882a593Smuzhiyun 	 * Number of entries in the final block.  0 iff only full blocks in
420*4882a593Smuzhiyun 	 * the array.
421*4882a593Smuzhiyun 	 */
422*4882a593Smuzhiyun 	unsigned old_nr_entries_in_last_block, new_nr_entries_in_last_block;
423*4882a593Smuzhiyun 
424*4882a593Smuzhiyun 	/*
425*4882a593Smuzhiyun 	 * The default value used when growing the array.
426*4882a593Smuzhiyun 	 */
427*4882a593Smuzhiyun 	const void *value;
428*4882a593Smuzhiyun };
429*4882a593Smuzhiyun 
430*4882a593Smuzhiyun /*
431*4882a593Smuzhiyun  * Removes a consecutive set of array blocks from the btree.  The values
432*4882a593Smuzhiyun  * in block are decremented as a side effect of the btree remove.
433*4882a593Smuzhiyun  *
434*4882a593Smuzhiyun  * begin_index - the index of the first array block to remove.
435*4882a593Smuzhiyun  * end_index - the one-past-the-end value.  ie. this block is not removed.
436*4882a593Smuzhiyun  */
drop_blocks(struct resize * resize,unsigned begin_index,unsigned end_index)437*4882a593Smuzhiyun static int drop_blocks(struct resize *resize, unsigned begin_index,
438*4882a593Smuzhiyun 		       unsigned end_index)
439*4882a593Smuzhiyun {
440*4882a593Smuzhiyun 	int r;
441*4882a593Smuzhiyun 
442*4882a593Smuzhiyun 	while (begin_index != end_index) {
443*4882a593Smuzhiyun 		uint64_t key = begin_index++;
444*4882a593Smuzhiyun 		r = dm_btree_remove(&resize->info->btree_info, resize->root,
445*4882a593Smuzhiyun 				    &key, &resize->root);
446*4882a593Smuzhiyun 		if (r)
447*4882a593Smuzhiyun 			return r;
448*4882a593Smuzhiyun 	}
449*4882a593Smuzhiyun 
450*4882a593Smuzhiyun 	return 0;
451*4882a593Smuzhiyun }
452*4882a593Smuzhiyun 
453*4882a593Smuzhiyun /*
454*4882a593Smuzhiyun  * Calculates how many blocks are needed for the array.
455*4882a593Smuzhiyun  */
total_nr_blocks_needed(unsigned nr_full_blocks,unsigned nr_entries_in_last_block)456*4882a593Smuzhiyun static unsigned total_nr_blocks_needed(unsigned nr_full_blocks,
457*4882a593Smuzhiyun 				       unsigned nr_entries_in_last_block)
458*4882a593Smuzhiyun {
459*4882a593Smuzhiyun 	return nr_full_blocks + (nr_entries_in_last_block ? 1 : 0);
460*4882a593Smuzhiyun }
461*4882a593Smuzhiyun 
462*4882a593Smuzhiyun /*
463*4882a593Smuzhiyun  * Shrink an array.
464*4882a593Smuzhiyun  */
shrink(struct resize * resize)465*4882a593Smuzhiyun static int shrink(struct resize *resize)
466*4882a593Smuzhiyun {
467*4882a593Smuzhiyun 	int r;
468*4882a593Smuzhiyun 	unsigned begin, end;
469*4882a593Smuzhiyun 	struct dm_block *block;
470*4882a593Smuzhiyun 	struct array_block *ab;
471*4882a593Smuzhiyun 
472*4882a593Smuzhiyun 	/*
473*4882a593Smuzhiyun 	 * Lose some blocks from the back?
474*4882a593Smuzhiyun 	 */
475*4882a593Smuzhiyun 	if (resize->new_nr_full_blocks < resize->old_nr_full_blocks) {
476*4882a593Smuzhiyun 		begin = total_nr_blocks_needed(resize->new_nr_full_blocks,
477*4882a593Smuzhiyun 					       resize->new_nr_entries_in_last_block);
478*4882a593Smuzhiyun 		end = total_nr_blocks_needed(resize->old_nr_full_blocks,
479*4882a593Smuzhiyun 					     resize->old_nr_entries_in_last_block);
480*4882a593Smuzhiyun 
481*4882a593Smuzhiyun 		r = drop_blocks(resize, begin, end);
482*4882a593Smuzhiyun 		if (r)
483*4882a593Smuzhiyun 			return r;
484*4882a593Smuzhiyun 	}
485*4882a593Smuzhiyun 
486*4882a593Smuzhiyun 	/*
487*4882a593Smuzhiyun 	 * Trim the new tail block
488*4882a593Smuzhiyun 	 */
489*4882a593Smuzhiyun 	if (resize->new_nr_entries_in_last_block) {
490*4882a593Smuzhiyun 		r = shadow_ablock(resize->info, &resize->root,
491*4882a593Smuzhiyun 				  resize->new_nr_full_blocks, &block, &ab);
492*4882a593Smuzhiyun 		if (r)
493*4882a593Smuzhiyun 			return r;
494*4882a593Smuzhiyun 
495*4882a593Smuzhiyun 		trim_ablock(resize->info, ab, resize->new_nr_entries_in_last_block);
496*4882a593Smuzhiyun 		unlock_ablock(resize->info, block);
497*4882a593Smuzhiyun 	}
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun 	return 0;
500*4882a593Smuzhiyun }
501*4882a593Smuzhiyun 
502*4882a593Smuzhiyun /*
503*4882a593Smuzhiyun  * Grow an array.
504*4882a593Smuzhiyun  */
grow_extend_tail_block(struct resize * resize,uint32_t new_nr_entries)505*4882a593Smuzhiyun static int grow_extend_tail_block(struct resize *resize, uint32_t new_nr_entries)
506*4882a593Smuzhiyun {
507*4882a593Smuzhiyun 	int r;
508*4882a593Smuzhiyun 	struct dm_block *block;
509*4882a593Smuzhiyun 	struct array_block *ab;
510*4882a593Smuzhiyun 
511*4882a593Smuzhiyun 	r = shadow_ablock(resize->info, &resize->root,
512*4882a593Smuzhiyun 			  resize->old_nr_full_blocks, &block, &ab);
513*4882a593Smuzhiyun 	if (r)
514*4882a593Smuzhiyun 		return r;
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 	fill_ablock(resize->info, ab, resize->value, new_nr_entries);
517*4882a593Smuzhiyun 	unlock_ablock(resize->info, block);
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun 	return r;
520*4882a593Smuzhiyun }
521*4882a593Smuzhiyun 
grow_add_tail_block(struct resize * resize)522*4882a593Smuzhiyun static int grow_add_tail_block(struct resize *resize)
523*4882a593Smuzhiyun {
524*4882a593Smuzhiyun 	return insert_new_ablock(resize->info, resize->size_of_block,
525*4882a593Smuzhiyun 				 resize->max_entries,
526*4882a593Smuzhiyun 				 resize->new_nr_full_blocks,
527*4882a593Smuzhiyun 				 resize->new_nr_entries_in_last_block,
528*4882a593Smuzhiyun 				 resize->value, &resize->root);
529*4882a593Smuzhiyun }
530*4882a593Smuzhiyun 
grow_needs_more_blocks(struct resize * resize)531*4882a593Smuzhiyun static int grow_needs_more_blocks(struct resize *resize)
532*4882a593Smuzhiyun {
533*4882a593Smuzhiyun 	int r;
534*4882a593Smuzhiyun 	unsigned old_nr_blocks = resize->old_nr_full_blocks;
535*4882a593Smuzhiyun 
536*4882a593Smuzhiyun 	if (resize->old_nr_entries_in_last_block > 0) {
537*4882a593Smuzhiyun 		old_nr_blocks++;
538*4882a593Smuzhiyun 
539*4882a593Smuzhiyun 		r = grow_extend_tail_block(resize, resize->max_entries);
540*4882a593Smuzhiyun 		if (r)
541*4882a593Smuzhiyun 			return r;
542*4882a593Smuzhiyun 	}
543*4882a593Smuzhiyun 
544*4882a593Smuzhiyun 	r = insert_full_ablocks(resize->info, resize->size_of_block,
545*4882a593Smuzhiyun 				old_nr_blocks,
546*4882a593Smuzhiyun 				resize->new_nr_full_blocks,
547*4882a593Smuzhiyun 				resize->max_entries, resize->value,
548*4882a593Smuzhiyun 				&resize->root);
549*4882a593Smuzhiyun 	if (r)
550*4882a593Smuzhiyun 		return r;
551*4882a593Smuzhiyun 
552*4882a593Smuzhiyun 	if (resize->new_nr_entries_in_last_block)
553*4882a593Smuzhiyun 		r = grow_add_tail_block(resize);
554*4882a593Smuzhiyun 
555*4882a593Smuzhiyun 	return r;
556*4882a593Smuzhiyun }
557*4882a593Smuzhiyun 
grow(struct resize * resize)558*4882a593Smuzhiyun static int grow(struct resize *resize)
559*4882a593Smuzhiyun {
560*4882a593Smuzhiyun 	if (resize->new_nr_full_blocks > resize->old_nr_full_blocks)
561*4882a593Smuzhiyun 		return grow_needs_more_blocks(resize);
562*4882a593Smuzhiyun 
563*4882a593Smuzhiyun 	else if (resize->old_nr_entries_in_last_block)
564*4882a593Smuzhiyun 		return grow_extend_tail_block(resize, resize->new_nr_entries_in_last_block);
565*4882a593Smuzhiyun 
566*4882a593Smuzhiyun 	else
567*4882a593Smuzhiyun 		return grow_add_tail_block(resize);
568*4882a593Smuzhiyun }
569*4882a593Smuzhiyun 
570*4882a593Smuzhiyun /*----------------------------------------------------------------*/
571*4882a593Smuzhiyun 
572*4882a593Smuzhiyun /*
573*4882a593Smuzhiyun  * These are the value_type functions for the btree elements, which point
574*4882a593Smuzhiyun  * to array blocks.
575*4882a593Smuzhiyun  */
block_inc(void * context,const void * value)576*4882a593Smuzhiyun static void block_inc(void *context, const void *value)
577*4882a593Smuzhiyun {
578*4882a593Smuzhiyun 	__le64 block_le;
579*4882a593Smuzhiyun 	struct dm_array_info *info = context;
580*4882a593Smuzhiyun 
581*4882a593Smuzhiyun 	memcpy(&block_le, value, sizeof(block_le));
582*4882a593Smuzhiyun 	dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le));
583*4882a593Smuzhiyun }
584*4882a593Smuzhiyun 
block_dec(void * context,const void * value)585*4882a593Smuzhiyun static void block_dec(void *context, const void *value)
586*4882a593Smuzhiyun {
587*4882a593Smuzhiyun 	int r;
588*4882a593Smuzhiyun 	uint64_t b;
589*4882a593Smuzhiyun 	__le64 block_le;
590*4882a593Smuzhiyun 	uint32_t ref_count;
591*4882a593Smuzhiyun 	struct dm_block *block;
592*4882a593Smuzhiyun 	struct array_block *ab;
593*4882a593Smuzhiyun 	struct dm_array_info *info = context;
594*4882a593Smuzhiyun 
595*4882a593Smuzhiyun 	memcpy(&block_le, value, sizeof(block_le));
596*4882a593Smuzhiyun 	b = le64_to_cpu(block_le);
597*4882a593Smuzhiyun 
598*4882a593Smuzhiyun 	r = dm_tm_ref(info->btree_info.tm, b, &ref_count);
599*4882a593Smuzhiyun 	if (r) {
600*4882a593Smuzhiyun 		DMERR_LIMIT("couldn't get reference count for block %llu",
601*4882a593Smuzhiyun 			    (unsigned long long) b);
602*4882a593Smuzhiyun 		return;
603*4882a593Smuzhiyun 	}
604*4882a593Smuzhiyun 
605*4882a593Smuzhiyun 	if (ref_count == 1) {
606*4882a593Smuzhiyun 		/*
607*4882a593Smuzhiyun 		 * We're about to drop the last reference to this ablock.
608*4882a593Smuzhiyun 		 * So we need to decrement the ref count of the contents.
609*4882a593Smuzhiyun 		 */
610*4882a593Smuzhiyun 		r = get_ablock(info, b, &block, &ab);
611*4882a593Smuzhiyun 		if (r) {
612*4882a593Smuzhiyun 			DMERR_LIMIT("couldn't get array block %llu",
613*4882a593Smuzhiyun 				    (unsigned long long) b);
614*4882a593Smuzhiyun 			return;
615*4882a593Smuzhiyun 		}
616*4882a593Smuzhiyun 
617*4882a593Smuzhiyun 		dec_ablock_entries(info, ab);
618*4882a593Smuzhiyun 		unlock_ablock(info, block);
619*4882a593Smuzhiyun 	}
620*4882a593Smuzhiyun 
621*4882a593Smuzhiyun 	dm_tm_dec(info->btree_info.tm, b);
622*4882a593Smuzhiyun }
623*4882a593Smuzhiyun 
block_equal(void * context,const void * value1,const void * value2)624*4882a593Smuzhiyun static int block_equal(void *context, const void *value1, const void *value2)
625*4882a593Smuzhiyun {
626*4882a593Smuzhiyun 	return !memcmp(value1, value2, sizeof(__le64));
627*4882a593Smuzhiyun }
628*4882a593Smuzhiyun 
629*4882a593Smuzhiyun /*----------------------------------------------------------------*/
630*4882a593Smuzhiyun 
dm_array_info_init(struct dm_array_info * info,struct dm_transaction_manager * tm,struct dm_btree_value_type * vt)631*4882a593Smuzhiyun void dm_array_info_init(struct dm_array_info *info,
632*4882a593Smuzhiyun 			struct dm_transaction_manager *tm,
633*4882a593Smuzhiyun 			struct dm_btree_value_type *vt)
634*4882a593Smuzhiyun {
635*4882a593Smuzhiyun 	struct dm_btree_value_type *bvt = &info->btree_info.value_type;
636*4882a593Smuzhiyun 
637*4882a593Smuzhiyun 	memcpy(&info->value_type, vt, sizeof(info->value_type));
638*4882a593Smuzhiyun 	info->btree_info.tm = tm;
639*4882a593Smuzhiyun 	info->btree_info.levels = 1;
640*4882a593Smuzhiyun 
641*4882a593Smuzhiyun 	bvt->context = info;
642*4882a593Smuzhiyun 	bvt->size = sizeof(__le64);
643*4882a593Smuzhiyun 	bvt->inc = block_inc;
644*4882a593Smuzhiyun 	bvt->dec = block_dec;
645*4882a593Smuzhiyun 	bvt->equal = block_equal;
646*4882a593Smuzhiyun }
647*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dm_array_info_init);
648*4882a593Smuzhiyun 
dm_array_empty(struct dm_array_info * info,dm_block_t * root)649*4882a593Smuzhiyun int dm_array_empty(struct dm_array_info *info, dm_block_t *root)
650*4882a593Smuzhiyun {
651*4882a593Smuzhiyun 	return dm_btree_empty(&info->btree_info, root);
652*4882a593Smuzhiyun }
653*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dm_array_empty);
654*4882a593Smuzhiyun 
array_resize(struct dm_array_info * info,dm_block_t root,uint32_t old_size,uint32_t new_size,const void * value,dm_block_t * new_root)655*4882a593Smuzhiyun static int array_resize(struct dm_array_info *info, dm_block_t root,
656*4882a593Smuzhiyun 			uint32_t old_size, uint32_t new_size,
657*4882a593Smuzhiyun 			const void *value, dm_block_t *new_root)
658*4882a593Smuzhiyun {
659*4882a593Smuzhiyun 	int r;
660*4882a593Smuzhiyun 	struct resize resize;
661*4882a593Smuzhiyun 
662*4882a593Smuzhiyun 	if (old_size == new_size) {
663*4882a593Smuzhiyun 		*new_root = root;
664*4882a593Smuzhiyun 		return 0;
665*4882a593Smuzhiyun 	}
666*4882a593Smuzhiyun 
667*4882a593Smuzhiyun 	resize.info = info;
668*4882a593Smuzhiyun 	resize.root = root;
669*4882a593Smuzhiyun 	resize.size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
670*4882a593Smuzhiyun 	resize.max_entries = calc_max_entries(info->value_type.size,
671*4882a593Smuzhiyun 					      resize.size_of_block);
672*4882a593Smuzhiyun 
673*4882a593Smuzhiyun 	resize.old_nr_full_blocks = old_size / resize.max_entries;
674*4882a593Smuzhiyun 	resize.old_nr_entries_in_last_block = old_size % resize.max_entries;
675*4882a593Smuzhiyun 	resize.new_nr_full_blocks = new_size / resize.max_entries;
676*4882a593Smuzhiyun 	resize.new_nr_entries_in_last_block = new_size % resize.max_entries;
677*4882a593Smuzhiyun 	resize.value = value;
678*4882a593Smuzhiyun 
679*4882a593Smuzhiyun 	r = ((new_size > old_size) ? grow : shrink)(&resize);
680*4882a593Smuzhiyun 	if (r)
681*4882a593Smuzhiyun 		return r;
682*4882a593Smuzhiyun 
683*4882a593Smuzhiyun 	*new_root = resize.root;
684*4882a593Smuzhiyun 	return 0;
685*4882a593Smuzhiyun }
686*4882a593Smuzhiyun 
dm_array_resize(struct dm_array_info * info,dm_block_t root,uint32_t old_size,uint32_t new_size,const void * value,dm_block_t * new_root)687*4882a593Smuzhiyun int dm_array_resize(struct dm_array_info *info, dm_block_t root,
688*4882a593Smuzhiyun 		    uint32_t old_size, uint32_t new_size,
689*4882a593Smuzhiyun 		    const void *value, dm_block_t *new_root)
690*4882a593Smuzhiyun 		    __dm_written_to_disk(value)
691*4882a593Smuzhiyun {
692*4882a593Smuzhiyun 	int r = array_resize(info, root, old_size, new_size, value, new_root);
693*4882a593Smuzhiyun 	__dm_unbless_for_disk(value);
694*4882a593Smuzhiyun 	return r;
695*4882a593Smuzhiyun }
696*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dm_array_resize);
697*4882a593Smuzhiyun 
populate_ablock_with_values(struct dm_array_info * info,struct array_block * ab,value_fn fn,void * context,unsigned base,unsigned new_nr)698*4882a593Smuzhiyun static int populate_ablock_with_values(struct dm_array_info *info, struct array_block *ab,
699*4882a593Smuzhiyun 				       value_fn fn, void *context, unsigned base, unsigned new_nr)
700*4882a593Smuzhiyun {
701*4882a593Smuzhiyun 	int r;
702*4882a593Smuzhiyun 	unsigned i;
703*4882a593Smuzhiyun 	struct dm_btree_value_type *vt = &info->value_type;
704*4882a593Smuzhiyun 
705*4882a593Smuzhiyun 	BUG_ON(le32_to_cpu(ab->nr_entries));
706*4882a593Smuzhiyun 	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
707*4882a593Smuzhiyun 
708*4882a593Smuzhiyun 	for (i = 0; i < new_nr; i++) {
709*4882a593Smuzhiyun 		r = fn(base + i, element_at(info, ab, i), context);
710*4882a593Smuzhiyun 		if (r)
711*4882a593Smuzhiyun 			return r;
712*4882a593Smuzhiyun 
713*4882a593Smuzhiyun 		if (vt->inc)
714*4882a593Smuzhiyun 			vt->inc(vt->context, element_at(info, ab, i));
715*4882a593Smuzhiyun 	}
716*4882a593Smuzhiyun 
717*4882a593Smuzhiyun 	ab->nr_entries = cpu_to_le32(new_nr);
718*4882a593Smuzhiyun 	return 0;
719*4882a593Smuzhiyun }
720*4882a593Smuzhiyun 
dm_array_new(struct dm_array_info * info,dm_block_t * root,uint32_t size,value_fn fn,void * context)721*4882a593Smuzhiyun int dm_array_new(struct dm_array_info *info, dm_block_t *root,
722*4882a593Smuzhiyun 		 uint32_t size, value_fn fn, void *context)
723*4882a593Smuzhiyun {
724*4882a593Smuzhiyun 	int r;
725*4882a593Smuzhiyun 	struct dm_block *block;
726*4882a593Smuzhiyun 	struct array_block *ab;
727*4882a593Smuzhiyun 	unsigned block_index, end_block, size_of_block, max_entries;
728*4882a593Smuzhiyun 
729*4882a593Smuzhiyun 	r = dm_array_empty(info, root);
730*4882a593Smuzhiyun 	if (r)
731*4882a593Smuzhiyun 		return r;
732*4882a593Smuzhiyun 
733*4882a593Smuzhiyun 	size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
734*4882a593Smuzhiyun 	max_entries = calc_max_entries(info->value_type.size, size_of_block);
735*4882a593Smuzhiyun 	end_block = dm_div_up(size, max_entries);
736*4882a593Smuzhiyun 
737*4882a593Smuzhiyun 	for (block_index = 0; block_index != end_block; block_index++) {
738*4882a593Smuzhiyun 		r = alloc_ablock(info, size_of_block, max_entries, &block, &ab);
739*4882a593Smuzhiyun 		if (r)
740*4882a593Smuzhiyun 			break;
741*4882a593Smuzhiyun 
742*4882a593Smuzhiyun 		r = populate_ablock_with_values(info, ab, fn, context,
743*4882a593Smuzhiyun 						block_index * max_entries,
744*4882a593Smuzhiyun 						min(max_entries, size));
745*4882a593Smuzhiyun 		if (r) {
746*4882a593Smuzhiyun 			unlock_ablock(info, block);
747*4882a593Smuzhiyun 			break;
748*4882a593Smuzhiyun 		}
749*4882a593Smuzhiyun 
750*4882a593Smuzhiyun 		r = insert_ablock(info, block_index, block, root);
751*4882a593Smuzhiyun 		unlock_ablock(info, block);
752*4882a593Smuzhiyun 		if (r)
753*4882a593Smuzhiyun 			break;
754*4882a593Smuzhiyun 
755*4882a593Smuzhiyun 		size -= max_entries;
756*4882a593Smuzhiyun 	}
757*4882a593Smuzhiyun 
758*4882a593Smuzhiyun 	return r;
759*4882a593Smuzhiyun }
760*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dm_array_new);
761*4882a593Smuzhiyun 
dm_array_del(struct dm_array_info * info,dm_block_t root)762*4882a593Smuzhiyun int dm_array_del(struct dm_array_info *info, dm_block_t root)
763*4882a593Smuzhiyun {
764*4882a593Smuzhiyun 	return dm_btree_del(&info->btree_info, root);
765*4882a593Smuzhiyun }
766*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dm_array_del);
767*4882a593Smuzhiyun 
dm_array_get_value(struct dm_array_info * info,dm_block_t root,uint32_t index,void * value_le)768*4882a593Smuzhiyun int dm_array_get_value(struct dm_array_info *info, dm_block_t root,
769*4882a593Smuzhiyun 		       uint32_t index, void *value_le)
770*4882a593Smuzhiyun {
771*4882a593Smuzhiyun 	int r;
772*4882a593Smuzhiyun 	struct dm_block *block;
773*4882a593Smuzhiyun 	struct array_block *ab;
774*4882a593Smuzhiyun 	size_t size_of_block;
775*4882a593Smuzhiyun 	unsigned entry, max_entries;
776*4882a593Smuzhiyun 
777*4882a593Smuzhiyun 	size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
778*4882a593Smuzhiyun 	max_entries = calc_max_entries(info->value_type.size, size_of_block);
779*4882a593Smuzhiyun 
780*4882a593Smuzhiyun 	r = lookup_ablock(info, root, index / max_entries, &block, &ab);
781*4882a593Smuzhiyun 	if (r)
782*4882a593Smuzhiyun 		return r;
783*4882a593Smuzhiyun 
784*4882a593Smuzhiyun 	entry = index % max_entries;
785*4882a593Smuzhiyun 	if (entry >= le32_to_cpu(ab->nr_entries))
786*4882a593Smuzhiyun 		r = -ENODATA;
787*4882a593Smuzhiyun 	else
788*4882a593Smuzhiyun 		memcpy(value_le, element_at(info, ab, entry),
789*4882a593Smuzhiyun 		       info->value_type.size);
790*4882a593Smuzhiyun 
791*4882a593Smuzhiyun 	unlock_ablock(info, block);
792*4882a593Smuzhiyun 	return r;
793*4882a593Smuzhiyun }
794*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dm_array_get_value);
795*4882a593Smuzhiyun 
array_set_value(struct dm_array_info * info,dm_block_t root,uint32_t index,const void * value,dm_block_t * new_root)796*4882a593Smuzhiyun static int array_set_value(struct dm_array_info *info, dm_block_t root,
797*4882a593Smuzhiyun 			   uint32_t index, const void *value, dm_block_t *new_root)
798*4882a593Smuzhiyun {
799*4882a593Smuzhiyun 	int r;
800*4882a593Smuzhiyun 	struct dm_block *block;
801*4882a593Smuzhiyun 	struct array_block *ab;
802*4882a593Smuzhiyun 	size_t size_of_block;
803*4882a593Smuzhiyun 	unsigned max_entries;
804*4882a593Smuzhiyun 	unsigned entry;
805*4882a593Smuzhiyun 	void *old_value;
806*4882a593Smuzhiyun 	struct dm_btree_value_type *vt = &info->value_type;
807*4882a593Smuzhiyun 
808*4882a593Smuzhiyun 	size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
809*4882a593Smuzhiyun 	max_entries = calc_max_entries(info->value_type.size, size_of_block);
810*4882a593Smuzhiyun 
811*4882a593Smuzhiyun 	r = shadow_ablock(info, &root, index / max_entries, &block, &ab);
812*4882a593Smuzhiyun 	if (r)
813*4882a593Smuzhiyun 		return r;
814*4882a593Smuzhiyun 	*new_root = root;
815*4882a593Smuzhiyun 
816*4882a593Smuzhiyun 	entry = index % max_entries;
817*4882a593Smuzhiyun 	if (entry >= le32_to_cpu(ab->nr_entries)) {
818*4882a593Smuzhiyun 		r = -ENODATA;
819*4882a593Smuzhiyun 		goto out;
820*4882a593Smuzhiyun 	}
821*4882a593Smuzhiyun 
822*4882a593Smuzhiyun 	old_value = element_at(info, ab, entry);
823*4882a593Smuzhiyun 	if (vt->dec &&
824*4882a593Smuzhiyun 	    (!vt->equal || !vt->equal(vt->context, old_value, value))) {
825*4882a593Smuzhiyun 		vt->dec(vt->context, old_value);
826*4882a593Smuzhiyun 		if (vt->inc)
827*4882a593Smuzhiyun 			vt->inc(vt->context, value);
828*4882a593Smuzhiyun 	}
829*4882a593Smuzhiyun 
830*4882a593Smuzhiyun 	memcpy(old_value, value, info->value_type.size);
831*4882a593Smuzhiyun 
832*4882a593Smuzhiyun out:
833*4882a593Smuzhiyun 	unlock_ablock(info, block);
834*4882a593Smuzhiyun 	return r;
835*4882a593Smuzhiyun }
836*4882a593Smuzhiyun 
dm_array_set_value(struct dm_array_info * info,dm_block_t root,uint32_t index,const void * value,dm_block_t * new_root)837*4882a593Smuzhiyun int dm_array_set_value(struct dm_array_info *info, dm_block_t root,
838*4882a593Smuzhiyun 		 uint32_t index, const void *value, dm_block_t *new_root)
839*4882a593Smuzhiyun 		 __dm_written_to_disk(value)
840*4882a593Smuzhiyun {
841*4882a593Smuzhiyun 	int r;
842*4882a593Smuzhiyun 
843*4882a593Smuzhiyun 	r = array_set_value(info, root, index, value, new_root);
844*4882a593Smuzhiyun 	__dm_unbless_for_disk(value);
845*4882a593Smuzhiyun 	return r;
846*4882a593Smuzhiyun }
847*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dm_array_set_value);
848*4882a593Smuzhiyun 
849*4882a593Smuzhiyun struct walk_info {
850*4882a593Smuzhiyun 	struct dm_array_info *info;
851*4882a593Smuzhiyun 	int (*fn)(void *context, uint64_t key, void *leaf);
852*4882a593Smuzhiyun 	void *context;
853*4882a593Smuzhiyun };
854*4882a593Smuzhiyun 
walk_ablock(void * context,uint64_t * keys,void * leaf)855*4882a593Smuzhiyun static int walk_ablock(void *context, uint64_t *keys, void *leaf)
856*4882a593Smuzhiyun {
857*4882a593Smuzhiyun 	struct walk_info *wi = context;
858*4882a593Smuzhiyun 
859*4882a593Smuzhiyun 	int r;
860*4882a593Smuzhiyun 	unsigned i;
861*4882a593Smuzhiyun 	__le64 block_le;
862*4882a593Smuzhiyun 	unsigned nr_entries, max_entries;
863*4882a593Smuzhiyun 	struct dm_block *block;
864*4882a593Smuzhiyun 	struct array_block *ab;
865*4882a593Smuzhiyun 
866*4882a593Smuzhiyun 	memcpy(&block_le, leaf, sizeof(block_le));
867*4882a593Smuzhiyun 	r = get_ablock(wi->info, le64_to_cpu(block_le), &block, &ab);
868*4882a593Smuzhiyun 	if (r)
869*4882a593Smuzhiyun 		return r;
870*4882a593Smuzhiyun 
871*4882a593Smuzhiyun 	max_entries = le32_to_cpu(ab->max_entries);
872*4882a593Smuzhiyun 	nr_entries = le32_to_cpu(ab->nr_entries);
873*4882a593Smuzhiyun 	for (i = 0; i < nr_entries; i++) {
874*4882a593Smuzhiyun 		r = wi->fn(wi->context, keys[0] * max_entries + i,
875*4882a593Smuzhiyun 			   element_at(wi->info, ab, i));
876*4882a593Smuzhiyun 
877*4882a593Smuzhiyun 		if (r)
878*4882a593Smuzhiyun 			break;
879*4882a593Smuzhiyun 	}
880*4882a593Smuzhiyun 
881*4882a593Smuzhiyun 	unlock_ablock(wi->info, block);
882*4882a593Smuzhiyun 	return r;
883*4882a593Smuzhiyun }
884*4882a593Smuzhiyun 
dm_array_walk(struct dm_array_info * info,dm_block_t root,int (* fn)(void *,uint64_t key,void * leaf),void * context)885*4882a593Smuzhiyun int dm_array_walk(struct dm_array_info *info, dm_block_t root,
886*4882a593Smuzhiyun 		  int (*fn)(void *, uint64_t key, void *leaf),
887*4882a593Smuzhiyun 		  void *context)
888*4882a593Smuzhiyun {
889*4882a593Smuzhiyun 	struct walk_info wi;
890*4882a593Smuzhiyun 
891*4882a593Smuzhiyun 	wi.info = info;
892*4882a593Smuzhiyun 	wi.fn = fn;
893*4882a593Smuzhiyun 	wi.context = context;
894*4882a593Smuzhiyun 
895*4882a593Smuzhiyun 	return dm_btree_walk(&info->btree_info, root, walk_ablock, &wi);
896*4882a593Smuzhiyun }
897*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dm_array_walk);
898*4882a593Smuzhiyun 
899*4882a593Smuzhiyun /*----------------------------------------------------------------*/
900*4882a593Smuzhiyun 
load_ablock(struct dm_array_cursor * c)901*4882a593Smuzhiyun static int load_ablock(struct dm_array_cursor *c)
902*4882a593Smuzhiyun {
903*4882a593Smuzhiyun 	int r;
904*4882a593Smuzhiyun 	__le64 value_le;
905*4882a593Smuzhiyun 	uint64_t key;
906*4882a593Smuzhiyun 
907*4882a593Smuzhiyun 	if (c->block)
908*4882a593Smuzhiyun 		unlock_ablock(c->info, c->block);
909*4882a593Smuzhiyun 
910*4882a593Smuzhiyun 	c->block = NULL;
911*4882a593Smuzhiyun 	c->ab = NULL;
912*4882a593Smuzhiyun 	c->index = 0;
913*4882a593Smuzhiyun 
914*4882a593Smuzhiyun 	r = dm_btree_cursor_get_value(&c->cursor, &key, &value_le);
915*4882a593Smuzhiyun 	if (r) {
916*4882a593Smuzhiyun 		DMERR("dm_btree_cursor_get_value failed");
917*4882a593Smuzhiyun 		dm_btree_cursor_end(&c->cursor);
918*4882a593Smuzhiyun 
919*4882a593Smuzhiyun 	} else {
920*4882a593Smuzhiyun 		r = get_ablock(c->info, le64_to_cpu(value_le), &c->block, &c->ab);
921*4882a593Smuzhiyun 		if (r) {
922*4882a593Smuzhiyun 			DMERR("get_ablock failed");
923*4882a593Smuzhiyun 			dm_btree_cursor_end(&c->cursor);
924*4882a593Smuzhiyun 		}
925*4882a593Smuzhiyun 	}
926*4882a593Smuzhiyun 
927*4882a593Smuzhiyun 	return r;
928*4882a593Smuzhiyun }
929*4882a593Smuzhiyun 
dm_array_cursor_begin(struct dm_array_info * info,dm_block_t root,struct dm_array_cursor * c)930*4882a593Smuzhiyun int dm_array_cursor_begin(struct dm_array_info *info, dm_block_t root,
931*4882a593Smuzhiyun 			  struct dm_array_cursor *c)
932*4882a593Smuzhiyun {
933*4882a593Smuzhiyun 	int r;
934*4882a593Smuzhiyun 
935*4882a593Smuzhiyun 	memset(c, 0, sizeof(*c));
936*4882a593Smuzhiyun 	c->info = info;
937*4882a593Smuzhiyun 	r = dm_btree_cursor_begin(&info->btree_info, root, true, &c->cursor);
938*4882a593Smuzhiyun 	if (r) {
939*4882a593Smuzhiyun 		DMERR("couldn't create btree cursor");
940*4882a593Smuzhiyun 		return r;
941*4882a593Smuzhiyun 	}
942*4882a593Smuzhiyun 
943*4882a593Smuzhiyun 	return load_ablock(c);
944*4882a593Smuzhiyun }
945*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dm_array_cursor_begin);
946*4882a593Smuzhiyun 
dm_array_cursor_end(struct dm_array_cursor * c)947*4882a593Smuzhiyun void dm_array_cursor_end(struct dm_array_cursor *c)
948*4882a593Smuzhiyun {
949*4882a593Smuzhiyun 	if (c->block) {
950*4882a593Smuzhiyun 		unlock_ablock(c->info, c->block);
951*4882a593Smuzhiyun 		dm_btree_cursor_end(&c->cursor);
952*4882a593Smuzhiyun 	}
953*4882a593Smuzhiyun }
954*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dm_array_cursor_end);
955*4882a593Smuzhiyun 
dm_array_cursor_next(struct dm_array_cursor * c)956*4882a593Smuzhiyun int dm_array_cursor_next(struct dm_array_cursor *c)
957*4882a593Smuzhiyun {
958*4882a593Smuzhiyun 	int r;
959*4882a593Smuzhiyun 
960*4882a593Smuzhiyun 	if (!c->block)
961*4882a593Smuzhiyun 		return -ENODATA;
962*4882a593Smuzhiyun 
963*4882a593Smuzhiyun 	c->index++;
964*4882a593Smuzhiyun 
965*4882a593Smuzhiyun 	if (c->index >= le32_to_cpu(c->ab->nr_entries)) {
966*4882a593Smuzhiyun 		r = dm_btree_cursor_next(&c->cursor);
967*4882a593Smuzhiyun 		if (r)
968*4882a593Smuzhiyun 			return r;
969*4882a593Smuzhiyun 
970*4882a593Smuzhiyun 		r = load_ablock(c);
971*4882a593Smuzhiyun 		if (r)
972*4882a593Smuzhiyun 			return r;
973*4882a593Smuzhiyun 	}
974*4882a593Smuzhiyun 
975*4882a593Smuzhiyun 	return 0;
976*4882a593Smuzhiyun }
977*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dm_array_cursor_next);
978*4882a593Smuzhiyun 
dm_array_cursor_skip(struct dm_array_cursor * c,uint32_t count)979*4882a593Smuzhiyun int dm_array_cursor_skip(struct dm_array_cursor *c, uint32_t count)
980*4882a593Smuzhiyun {
981*4882a593Smuzhiyun 	int r;
982*4882a593Smuzhiyun 
983*4882a593Smuzhiyun 	do {
984*4882a593Smuzhiyun 		uint32_t remaining = le32_to_cpu(c->ab->nr_entries) - c->index;
985*4882a593Smuzhiyun 
986*4882a593Smuzhiyun 		if (count < remaining) {
987*4882a593Smuzhiyun 			c->index += count;
988*4882a593Smuzhiyun 			return 0;
989*4882a593Smuzhiyun 		}
990*4882a593Smuzhiyun 
991*4882a593Smuzhiyun 		count -= remaining;
992*4882a593Smuzhiyun 		r = dm_array_cursor_next(c);
993*4882a593Smuzhiyun 
994*4882a593Smuzhiyun 	} while (!r);
995*4882a593Smuzhiyun 
996*4882a593Smuzhiyun 	return r;
997*4882a593Smuzhiyun }
998*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dm_array_cursor_skip);
999*4882a593Smuzhiyun 
dm_array_cursor_get_value(struct dm_array_cursor * c,void ** value_le)1000*4882a593Smuzhiyun void dm_array_cursor_get_value(struct dm_array_cursor *c, void **value_le)
1001*4882a593Smuzhiyun {
1002*4882a593Smuzhiyun 	*value_le = element_at(c->info, c->ab, c->index);
1003*4882a593Smuzhiyun }
1004*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dm_array_cursor_get_value);
1005*4882a593Smuzhiyun 
1006*4882a593Smuzhiyun /*----------------------------------------------------------------*/
1007