xref: /OK3568_Linux_fs/kernel/net/ceph/striper.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun 
3*4882a593Smuzhiyun #include <linux/ceph/ceph_debug.h>
4*4882a593Smuzhiyun 
5*4882a593Smuzhiyun #include <linux/math64.h>
6*4882a593Smuzhiyun #include <linux/slab.h>
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include <linux/ceph/striper.h>
9*4882a593Smuzhiyun #include <linux/ceph/types.h>
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun /*
12*4882a593Smuzhiyun  * Map a file extent to a stripe unit within an object.
13*4882a593Smuzhiyun  * Fill in objno, offset into object, and object extent length (i.e. the
14*4882a593Smuzhiyun  * number of bytes mapped, less than or equal to @l->stripe_unit).
15*4882a593Smuzhiyun  *
16*4882a593Smuzhiyun  * Example for stripe_count = 3, stripes_per_object = 4:
17*4882a593Smuzhiyun  *
18*4882a593Smuzhiyun  * blockno   |  0  3  6  9 |  1  4  7 10 |  2  5  8 11 | 12 15 18 21 | 13 16 19
19*4882a593Smuzhiyun  * stripeno  |  0  1  2  3 |  0  1  2  3 |  0  1  2  3 |  4  5  6  7 |  4  5  6
20*4882a593Smuzhiyun  * stripepos |      0      |      1      |      2      |      0      |      1
21*4882a593Smuzhiyun  * objno     |      0      |      1      |      2      |      3      |      4
22*4882a593Smuzhiyun  * objsetno  |                    0                    |                    1
23*4882a593Smuzhiyun  */
ceph_calc_file_object_mapping(struct ceph_file_layout * l,u64 off,u64 len,u64 * objno,u64 * objoff,u32 * xlen)24*4882a593Smuzhiyun void ceph_calc_file_object_mapping(struct ceph_file_layout *l,
25*4882a593Smuzhiyun 				   u64 off, u64 len,
26*4882a593Smuzhiyun 				   u64 *objno, u64 *objoff, u32 *xlen)
27*4882a593Smuzhiyun {
28*4882a593Smuzhiyun 	u32 stripes_per_object = l->object_size / l->stripe_unit;
29*4882a593Smuzhiyun 	u64 blockno;	/* which su in the file (i.e. globally) */
30*4882a593Smuzhiyun 	u32 blockoff;	/* offset into su */
31*4882a593Smuzhiyun 	u64 stripeno;	/* which stripe */
32*4882a593Smuzhiyun 	u32 stripepos;	/* which su in the stripe,
33*4882a593Smuzhiyun 			   which object in the object set */
34*4882a593Smuzhiyun 	u64 objsetno;	/* which object set */
35*4882a593Smuzhiyun 	u32 objsetpos;	/* which stripe in the object set */
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun 	blockno = div_u64_rem(off, l->stripe_unit, &blockoff);
38*4882a593Smuzhiyun 	stripeno = div_u64_rem(blockno, l->stripe_count, &stripepos);
39*4882a593Smuzhiyun 	objsetno = div_u64_rem(stripeno, stripes_per_object, &objsetpos);
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun 	*objno = objsetno * l->stripe_count + stripepos;
42*4882a593Smuzhiyun 	*objoff = objsetpos * l->stripe_unit + blockoff;
43*4882a593Smuzhiyun 	*xlen = min_t(u64, len, l->stripe_unit - blockoff);
44*4882a593Smuzhiyun }
45*4882a593Smuzhiyun EXPORT_SYMBOL(ceph_calc_file_object_mapping);
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun /*
48*4882a593Smuzhiyun  * Return the last extent with given objno (@object_extents is sorted
49*4882a593Smuzhiyun  * by objno).  If not found, return NULL and set @add_pos so that the
50*4882a593Smuzhiyun  * new extent can be added with list_add(add_pos, new_ex).
51*4882a593Smuzhiyun  */
52*4882a593Smuzhiyun static struct ceph_object_extent *
lookup_last(struct list_head * object_extents,u64 objno,struct list_head ** add_pos)53*4882a593Smuzhiyun lookup_last(struct list_head *object_extents, u64 objno,
54*4882a593Smuzhiyun 	    struct list_head **add_pos)
55*4882a593Smuzhiyun {
56*4882a593Smuzhiyun 	struct list_head *pos;
57*4882a593Smuzhiyun 
58*4882a593Smuzhiyun 	list_for_each_prev(pos, object_extents) {
59*4882a593Smuzhiyun 		struct ceph_object_extent *ex =
60*4882a593Smuzhiyun 		    list_entry(pos, typeof(*ex), oe_item);
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun 		if (ex->oe_objno == objno)
63*4882a593Smuzhiyun 			return ex;
64*4882a593Smuzhiyun 
65*4882a593Smuzhiyun 		if (ex->oe_objno < objno)
66*4882a593Smuzhiyun 			break;
67*4882a593Smuzhiyun 	}
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun 	*add_pos = pos;
70*4882a593Smuzhiyun 	return NULL;
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun 
73*4882a593Smuzhiyun static struct ceph_object_extent *
lookup_containing(struct list_head * object_extents,u64 objno,u64 objoff,u32 xlen)74*4882a593Smuzhiyun lookup_containing(struct list_head *object_extents, u64 objno,
75*4882a593Smuzhiyun 		  u64 objoff, u32 xlen)
76*4882a593Smuzhiyun {
77*4882a593Smuzhiyun 	struct ceph_object_extent *ex;
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun 	list_for_each_entry(ex, object_extents, oe_item) {
80*4882a593Smuzhiyun 		if (ex->oe_objno == objno &&
81*4882a593Smuzhiyun 		    ex->oe_off <= objoff &&
82*4882a593Smuzhiyun 		    ex->oe_off + ex->oe_len >= objoff + xlen) /* paranoia */
83*4882a593Smuzhiyun 			return ex;
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun 		if (ex->oe_objno > objno)
86*4882a593Smuzhiyun 			break;
87*4882a593Smuzhiyun 	}
88*4882a593Smuzhiyun 
89*4882a593Smuzhiyun 	return NULL;
90*4882a593Smuzhiyun }
91*4882a593Smuzhiyun 
92*4882a593Smuzhiyun /*
93*4882a593Smuzhiyun  * Map a file extent to a sorted list of object extents.
94*4882a593Smuzhiyun  *
95*4882a593Smuzhiyun  * We want only one (or as few as possible) object extents per object.
96*4882a593Smuzhiyun  * Adjacent object extents will be merged together, each returned object
97*4882a593Smuzhiyun  * extent may reverse map to multiple different file extents.
98*4882a593Smuzhiyun  *
99*4882a593Smuzhiyun  * Call @alloc_fn for each new object extent and @action_fn for each
100*4882a593Smuzhiyun  * mapped stripe unit, whether it was merged into an already allocated
101*4882a593Smuzhiyun  * object extent or started a new object extent.
102*4882a593Smuzhiyun  *
103*4882a593Smuzhiyun  * Newly allocated object extents are added to @object_extents.
104*4882a593Smuzhiyun  * To keep @object_extents sorted, successive calls to this function
105*4882a593Smuzhiyun  * must map successive file extents (i.e. the list of file extents that
106*4882a593Smuzhiyun  * are mapped using the same @object_extents must be sorted).
107*4882a593Smuzhiyun  *
108*4882a593Smuzhiyun  * The caller is responsible for @object_extents.
109*4882a593Smuzhiyun  */
ceph_file_to_extents(struct ceph_file_layout * l,u64 off,u64 len,struct list_head * object_extents,struct ceph_object_extent * alloc_fn (void * arg),void * alloc_arg,ceph_object_extent_fn_t action_fn,void * action_arg)110*4882a593Smuzhiyun int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len,
111*4882a593Smuzhiyun 			 struct list_head *object_extents,
112*4882a593Smuzhiyun 			 struct ceph_object_extent *alloc_fn(void *arg),
113*4882a593Smuzhiyun 			 void *alloc_arg,
114*4882a593Smuzhiyun 			 ceph_object_extent_fn_t action_fn,
115*4882a593Smuzhiyun 			 void *action_arg)
116*4882a593Smuzhiyun {
117*4882a593Smuzhiyun 	struct ceph_object_extent *last_ex, *ex;
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun 	while (len) {
120*4882a593Smuzhiyun 		struct list_head *add_pos = NULL;
121*4882a593Smuzhiyun 		u64 objno, objoff;
122*4882a593Smuzhiyun 		u32 xlen;
123*4882a593Smuzhiyun 
124*4882a593Smuzhiyun 		ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
125*4882a593Smuzhiyun 					      &xlen);
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun 		last_ex = lookup_last(object_extents, objno, &add_pos);
128*4882a593Smuzhiyun 		if (!last_ex || last_ex->oe_off + last_ex->oe_len != objoff) {
129*4882a593Smuzhiyun 			ex = alloc_fn(alloc_arg);
130*4882a593Smuzhiyun 			if (!ex)
131*4882a593Smuzhiyun 				return -ENOMEM;
132*4882a593Smuzhiyun 
133*4882a593Smuzhiyun 			ex->oe_objno = objno;
134*4882a593Smuzhiyun 			ex->oe_off = objoff;
135*4882a593Smuzhiyun 			ex->oe_len = xlen;
136*4882a593Smuzhiyun 			if (action_fn)
137*4882a593Smuzhiyun 				action_fn(ex, xlen, action_arg);
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun 			if (!last_ex)
140*4882a593Smuzhiyun 				list_add(&ex->oe_item, add_pos);
141*4882a593Smuzhiyun 			else
142*4882a593Smuzhiyun 				list_add(&ex->oe_item, &last_ex->oe_item);
143*4882a593Smuzhiyun 		} else {
144*4882a593Smuzhiyun 			last_ex->oe_len += xlen;
145*4882a593Smuzhiyun 			if (action_fn)
146*4882a593Smuzhiyun 				action_fn(last_ex, xlen, action_arg);
147*4882a593Smuzhiyun 		}
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 		off += xlen;
150*4882a593Smuzhiyun 		len -= xlen;
151*4882a593Smuzhiyun 	}
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun 	for (last_ex = list_first_entry(object_extents, typeof(*ex), oe_item),
154*4882a593Smuzhiyun 	     ex = list_next_entry(last_ex, oe_item);
155*4882a593Smuzhiyun 	     &ex->oe_item != object_extents;
156*4882a593Smuzhiyun 	     last_ex = ex, ex = list_next_entry(ex, oe_item)) {
157*4882a593Smuzhiyun 		if (last_ex->oe_objno > ex->oe_objno ||
158*4882a593Smuzhiyun 		    (last_ex->oe_objno == ex->oe_objno &&
159*4882a593Smuzhiyun 		     last_ex->oe_off + last_ex->oe_len >= ex->oe_off)) {
160*4882a593Smuzhiyun 			WARN(1, "%s: object_extents list not sorted!\n",
161*4882a593Smuzhiyun 			     __func__);
162*4882a593Smuzhiyun 			return -EINVAL;
163*4882a593Smuzhiyun 		}
164*4882a593Smuzhiyun 	}
165*4882a593Smuzhiyun 
166*4882a593Smuzhiyun 	return 0;
167*4882a593Smuzhiyun }
168*4882a593Smuzhiyun EXPORT_SYMBOL(ceph_file_to_extents);
169*4882a593Smuzhiyun 
170*4882a593Smuzhiyun /*
171*4882a593Smuzhiyun  * A stripped down, non-allocating version of ceph_file_to_extents(),
172*4882a593Smuzhiyun  * for when @object_extents is already populated.
173*4882a593Smuzhiyun  */
ceph_iterate_extents(struct ceph_file_layout * l,u64 off,u64 len,struct list_head * object_extents,ceph_object_extent_fn_t action_fn,void * action_arg)174*4882a593Smuzhiyun int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len,
175*4882a593Smuzhiyun 			 struct list_head *object_extents,
176*4882a593Smuzhiyun 			 ceph_object_extent_fn_t action_fn,
177*4882a593Smuzhiyun 			 void *action_arg)
178*4882a593Smuzhiyun {
179*4882a593Smuzhiyun 	while (len) {
180*4882a593Smuzhiyun 		struct ceph_object_extent *ex;
181*4882a593Smuzhiyun 		u64 objno, objoff;
182*4882a593Smuzhiyun 		u32 xlen;
183*4882a593Smuzhiyun 
184*4882a593Smuzhiyun 		ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
185*4882a593Smuzhiyun 					      &xlen);
186*4882a593Smuzhiyun 
187*4882a593Smuzhiyun 		ex = lookup_containing(object_extents, objno, objoff, xlen);
188*4882a593Smuzhiyun 		if (!ex) {
189*4882a593Smuzhiyun 			WARN(1, "%s: objno %llu %llu~%u not found!\n",
190*4882a593Smuzhiyun 			     __func__, objno, objoff, xlen);
191*4882a593Smuzhiyun 			return -EINVAL;
192*4882a593Smuzhiyun 		}
193*4882a593Smuzhiyun 
194*4882a593Smuzhiyun 		action_fn(ex, xlen, action_arg);
195*4882a593Smuzhiyun 
196*4882a593Smuzhiyun 		off += xlen;
197*4882a593Smuzhiyun 		len -= xlen;
198*4882a593Smuzhiyun 	}
199*4882a593Smuzhiyun 
200*4882a593Smuzhiyun 	return 0;
201*4882a593Smuzhiyun }
202*4882a593Smuzhiyun EXPORT_SYMBOL(ceph_iterate_extents);
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun /*
205*4882a593Smuzhiyun  * Reverse map an object extent to a sorted list of file extents.
206*4882a593Smuzhiyun  *
207*4882a593Smuzhiyun  * On success, the caller is responsible for:
208*4882a593Smuzhiyun  *
209*4882a593Smuzhiyun  *     kfree(file_extents)
210*4882a593Smuzhiyun  */
ceph_extent_to_file(struct ceph_file_layout * l,u64 objno,u64 objoff,u64 objlen,struct ceph_file_extent ** file_extents,u32 * num_file_extents)211*4882a593Smuzhiyun int ceph_extent_to_file(struct ceph_file_layout *l,
212*4882a593Smuzhiyun 			u64 objno, u64 objoff, u64 objlen,
213*4882a593Smuzhiyun 			struct ceph_file_extent **file_extents,
214*4882a593Smuzhiyun 			u32 *num_file_extents)
215*4882a593Smuzhiyun {
216*4882a593Smuzhiyun 	u32 stripes_per_object = l->object_size / l->stripe_unit;
217*4882a593Smuzhiyun 	u64 blockno;	/* which su */
218*4882a593Smuzhiyun 	u32 blockoff;	/* offset into su */
219*4882a593Smuzhiyun 	u64 stripeno;	/* which stripe */
220*4882a593Smuzhiyun 	u32 stripepos;	/* which su in the stripe,
221*4882a593Smuzhiyun 			   which object in the object set */
222*4882a593Smuzhiyun 	u64 objsetno;	/* which object set */
223*4882a593Smuzhiyun 	u32 i = 0;
224*4882a593Smuzhiyun 
225*4882a593Smuzhiyun 	if (!objlen) {
226*4882a593Smuzhiyun 		*file_extents = NULL;
227*4882a593Smuzhiyun 		*num_file_extents = 0;
228*4882a593Smuzhiyun 		return 0;
229*4882a593Smuzhiyun 	}
230*4882a593Smuzhiyun 
231*4882a593Smuzhiyun 	*num_file_extents = DIV_ROUND_UP_ULL(objoff + objlen, l->stripe_unit) -
232*4882a593Smuzhiyun 				     DIV_ROUND_DOWN_ULL(objoff, l->stripe_unit);
233*4882a593Smuzhiyun 	*file_extents = kmalloc_array(*num_file_extents, sizeof(**file_extents),
234*4882a593Smuzhiyun 				      GFP_NOIO);
235*4882a593Smuzhiyun 	if (!*file_extents)
236*4882a593Smuzhiyun 		return -ENOMEM;
237*4882a593Smuzhiyun 
238*4882a593Smuzhiyun 	div_u64_rem(objoff, l->stripe_unit, &blockoff);
239*4882a593Smuzhiyun 	while (objlen) {
240*4882a593Smuzhiyun 		u64 off, len;
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun 		objsetno = div_u64_rem(objno, l->stripe_count, &stripepos);
243*4882a593Smuzhiyun 		stripeno = div_u64(objoff, l->stripe_unit) +
244*4882a593Smuzhiyun 						objsetno * stripes_per_object;
245*4882a593Smuzhiyun 		blockno = stripeno * l->stripe_count + stripepos;
246*4882a593Smuzhiyun 		off = blockno * l->stripe_unit + blockoff;
247*4882a593Smuzhiyun 		len = min_t(u64, objlen, l->stripe_unit - blockoff);
248*4882a593Smuzhiyun 
249*4882a593Smuzhiyun 		(*file_extents)[i].fe_off = off;
250*4882a593Smuzhiyun 		(*file_extents)[i].fe_len = len;
251*4882a593Smuzhiyun 
252*4882a593Smuzhiyun 		blockoff = 0;
253*4882a593Smuzhiyun 		objoff += len;
254*4882a593Smuzhiyun 		objlen -= len;
255*4882a593Smuzhiyun 		i++;
256*4882a593Smuzhiyun 	}
257*4882a593Smuzhiyun 
258*4882a593Smuzhiyun 	BUG_ON(i != *num_file_extents);
259*4882a593Smuzhiyun 	return 0;
260*4882a593Smuzhiyun }
261*4882a593Smuzhiyun EXPORT_SYMBOL(ceph_extent_to_file);
262*4882a593Smuzhiyun 
ceph_get_num_objects(struct ceph_file_layout * l,u64 size)263*4882a593Smuzhiyun u64 ceph_get_num_objects(struct ceph_file_layout *l, u64 size)
264*4882a593Smuzhiyun {
265*4882a593Smuzhiyun 	u64 period = (u64)l->stripe_count * l->object_size;
266*4882a593Smuzhiyun 	u64 num_periods = DIV64_U64_ROUND_UP(size, period);
267*4882a593Smuzhiyun 	u64 remainder_bytes;
268*4882a593Smuzhiyun 	u64 remainder_objs = 0;
269*4882a593Smuzhiyun 
270*4882a593Smuzhiyun 	div64_u64_rem(size, period, &remainder_bytes);
271*4882a593Smuzhiyun 	if (remainder_bytes > 0 &&
272*4882a593Smuzhiyun 	    remainder_bytes < (u64)l->stripe_count * l->stripe_unit)
273*4882a593Smuzhiyun 		remainder_objs = l->stripe_count -
274*4882a593Smuzhiyun 			    DIV_ROUND_UP_ULL(remainder_bytes, l->stripe_unit);
275*4882a593Smuzhiyun 
276*4882a593Smuzhiyun 	return num_periods * l->stripe_count - remainder_objs;
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun EXPORT_SYMBOL(ceph_get_num_objects);
279