1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun
3*4882a593Smuzhiyun #include <linux/ceph/ceph_debug.h>
4*4882a593Smuzhiyun
5*4882a593Smuzhiyun #include <linux/math64.h>
6*4882a593Smuzhiyun #include <linux/slab.h>
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include <linux/ceph/striper.h>
9*4882a593Smuzhiyun #include <linux/ceph/types.h>
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun /*
12*4882a593Smuzhiyun * Map a file extent to a stripe unit within an object.
13*4882a593Smuzhiyun * Fill in objno, offset into object, and object extent length (i.e. the
14*4882a593Smuzhiyun * number of bytes mapped, less than or equal to @l->stripe_unit).
15*4882a593Smuzhiyun *
16*4882a593Smuzhiyun * Example for stripe_count = 3, stripes_per_object = 4:
17*4882a593Smuzhiyun *
18*4882a593Smuzhiyun * blockno | 0 3 6 9 | 1 4 7 10 | 2 5 8 11 | 12 15 18 21 | 13 16 19
19*4882a593Smuzhiyun * stripeno | 0 1 2 3 | 0 1 2 3 | 0 1 2 3 | 4 5 6 7 | 4 5 6
20*4882a593Smuzhiyun * stripepos | 0 | 1 | 2 | 0 | 1
21*4882a593Smuzhiyun * objno | 0 | 1 | 2 | 3 | 4
22*4882a593Smuzhiyun * objsetno | 0 | 1
23*4882a593Smuzhiyun */
ceph_calc_file_object_mapping(struct ceph_file_layout * l,u64 off,u64 len,u64 * objno,u64 * objoff,u32 * xlen)24*4882a593Smuzhiyun void ceph_calc_file_object_mapping(struct ceph_file_layout *l,
25*4882a593Smuzhiyun u64 off, u64 len,
26*4882a593Smuzhiyun u64 *objno, u64 *objoff, u32 *xlen)
27*4882a593Smuzhiyun {
28*4882a593Smuzhiyun u32 stripes_per_object = l->object_size / l->stripe_unit;
29*4882a593Smuzhiyun u64 blockno; /* which su in the file (i.e. globally) */
30*4882a593Smuzhiyun u32 blockoff; /* offset into su */
31*4882a593Smuzhiyun u64 stripeno; /* which stripe */
32*4882a593Smuzhiyun u32 stripepos; /* which su in the stripe,
33*4882a593Smuzhiyun which object in the object set */
34*4882a593Smuzhiyun u64 objsetno; /* which object set */
35*4882a593Smuzhiyun u32 objsetpos; /* which stripe in the object set */
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun blockno = div_u64_rem(off, l->stripe_unit, &blockoff);
38*4882a593Smuzhiyun stripeno = div_u64_rem(blockno, l->stripe_count, &stripepos);
39*4882a593Smuzhiyun objsetno = div_u64_rem(stripeno, stripes_per_object, &objsetpos);
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun *objno = objsetno * l->stripe_count + stripepos;
42*4882a593Smuzhiyun *objoff = objsetpos * l->stripe_unit + blockoff;
43*4882a593Smuzhiyun *xlen = min_t(u64, len, l->stripe_unit - blockoff);
44*4882a593Smuzhiyun }
45*4882a593Smuzhiyun EXPORT_SYMBOL(ceph_calc_file_object_mapping);
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun /*
48*4882a593Smuzhiyun * Return the last extent with given objno (@object_extents is sorted
49*4882a593Smuzhiyun * by objno). If not found, return NULL and set @add_pos so that the
50*4882a593Smuzhiyun * new extent can be added with list_add(add_pos, new_ex).
51*4882a593Smuzhiyun */
52*4882a593Smuzhiyun static struct ceph_object_extent *
lookup_last(struct list_head * object_extents,u64 objno,struct list_head ** add_pos)53*4882a593Smuzhiyun lookup_last(struct list_head *object_extents, u64 objno,
54*4882a593Smuzhiyun struct list_head **add_pos)
55*4882a593Smuzhiyun {
56*4882a593Smuzhiyun struct list_head *pos;
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun list_for_each_prev(pos, object_extents) {
59*4882a593Smuzhiyun struct ceph_object_extent *ex =
60*4882a593Smuzhiyun list_entry(pos, typeof(*ex), oe_item);
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun if (ex->oe_objno == objno)
63*4882a593Smuzhiyun return ex;
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun if (ex->oe_objno < objno)
66*4882a593Smuzhiyun break;
67*4882a593Smuzhiyun }
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun *add_pos = pos;
70*4882a593Smuzhiyun return NULL;
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun static struct ceph_object_extent *
lookup_containing(struct list_head * object_extents,u64 objno,u64 objoff,u32 xlen)74*4882a593Smuzhiyun lookup_containing(struct list_head *object_extents, u64 objno,
75*4882a593Smuzhiyun u64 objoff, u32 xlen)
76*4882a593Smuzhiyun {
77*4882a593Smuzhiyun struct ceph_object_extent *ex;
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun list_for_each_entry(ex, object_extents, oe_item) {
80*4882a593Smuzhiyun if (ex->oe_objno == objno &&
81*4882a593Smuzhiyun ex->oe_off <= objoff &&
82*4882a593Smuzhiyun ex->oe_off + ex->oe_len >= objoff + xlen) /* paranoia */
83*4882a593Smuzhiyun return ex;
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun if (ex->oe_objno > objno)
86*4882a593Smuzhiyun break;
87*4882a593Smuzhiyun }
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun return NULL;
90*4882a593Smuzhiyun }
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun /*
93*4882a593Smuzhiyun * Map a file extent to a sorted list of object extents.
94*4882a593Smuzhiyun *
95*4882a593Smuzhiyun * We want only one (or as few as possible) object extents per object.
96*4882a593Smuzhiyun * Adjacent object extents will be merged together, each returned object
97*4882a593Smuzhiyun * extent may reverse map to multiple different file extents.
98*4882a593Smuzhiyun *
99*4882a593Smuzhiyun * Call @alloc_fn for each new object extent and @action_fn for each
100*4882a593Smuzhiyun * mapped stripe unit, whether it was merged into an already allocated
101*4882a593Smuzhiyun * object extent or started a new object extent.
102*4882a593Smuzhiyun *
103*4882a593Smuzhiyun * Newly allocated object extents are added to @object_extents.
104*4882a593Smuzhiyun * To keep @object_extents sorted, successive calls to this function
105*4882a593Smuzhiyun * must map successive file extents (i.e. the list of file extents that
106*4882a593Smuzhiyun * are mapped using the same @object_extents must be sorted).
107*4882a593Smuzhiyun *
108*4882a593Smuzhiyun * The caller is responsible for @object_extents.
109*4882a593Smuzhiyun */
ceph_file_to_extents(struct ceph_file_layout * l,u64 off,u64 len,struct list_head * object_extents,struct ceph_object_extent * alloc_fn (void * arg),void * alloc_arg,ceph_object_extent_fn_t action_fn,void * action_arg)110*4882a593Smuzhiyun int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len,
111*4882a593Smuzhiyun struct list_head *object_extents,
112*4882a593Smuzhiyun struct ceph_object_extent *alloc_fn(void *arg),
113*4882a593Smuzhiyun void *alloc_arg,
114*4882a593Smuzhiyun ceph_object_extent_fn_t action_fn,
115*4882a593Smuzhiyun void *action_arg)
116*4882a593Smuzhiyun {
117*4882a593Smuzhiyun struct ceph_object_extent *last_ex, *ex;
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun while (len) {
120*4882a593Smuzhiyun struct list_head *add_pos = NULL;
121*4882a593Smuzhiyun u64 objno, objoff;
122*4882a593Smuzhiyun u32 xlen;
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
125*4882a593Smuzhiyun &xlen);
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun last_ex = lookup_last(object_extents, objno, &add_pos);
128*4882a593Smuzhiyun if (!last_ex || last_ex->oe_off + last_ex->oe_len != objoff) {
129*4882a593Smuzhiyun ex = alloc_fn(alloc_arg);
130*4882a593Smuzhiyun if (!ex)
131*4882a593Smuzhiyun return -ENOMEM;
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun ex->oe_objno = objno;
134*4882a593Smuzhiyun ex->oe_off = objoff;
135*4882a593Smuzhiyun ex->oe_len = xlen;
136*4882a593Smuzhiyun if (action_fn)
137*4882a593Smuzhiyun action_fn(ex, xlen, action_arg);
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun if (!last_ex)
140*4882a593Smuzhiyun list_add(&ex->oe_item, add_pos);
141*4882a593Smuzhiyun else
142*4882a593Smuzhiyun list_add(&ex->oe_item, &last_ex->oe_item);
143*4882a593Smuzhiyun } else {
144*4882a593Smuzhiyun last_ex->oe_len += xlen;
145*4882a593Smuzhiyun if (action_fn)
146*4882a593Smuzhiyun action_fn(last_ex, xlen, action_arg);
147*4882a593Smuzhiyun }
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun off += xlen;
150*4882a593Smuzhiyun len -= xlen;
151*4882a593Smuzhiyun }
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun for (last_ex = list_first_entry(object_extents, typeof(*ex), oe_item),
154*4882a593Smuzhiyun ex = list_next_entry(last_ex, oe_item);
155*4882a593Smuzhiyun &ex->oe_item != object_extents;
156*4882a593Smuzhiyun last_ex = ex, ex = list_next_entry(ex, oe_item)) {
157*4882a593Smuzhiyun if (last_ex->oe_objno > ex->oe_objno ||
158*4882a593Smuzhiyun (last_ex->oe_objno == ex->oe_objno &&
159*4882a593Smuzhiyun last_ex->oe_off + last_ex->oe_len >= ex->oe_off)) {
160*4882a593Smuzhiyun WARN(1, "%s: object_extents list not sorted!\n",
161*4882a593Smuzhiyun __func__);
162*4882a593Smuzhiyun return -EINVAL;
163*4882a593Smuzhiyun }
164*4882a593Smuzhiyun }
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun return 0;
167*4882a593Smuzhiyun }
168*4882a593Smuzhiyun EXPORT_SYMBOL(ceph_file_to_extents);
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun /*
171*4882a593Smuzhiyun * A stripped down, non-allocating version of ceph_file_to_extents(),
172*4882a593Smuzhiyun * for when @object_extents is already populated.
173*4882a593Smuzhiyun */
ceph_iterate_extents(struct ceph_file_layout * l,u64 off,u64 len,struct list_head * object_extents,ceph_object_extent_fn_t action_fn,void * action_arg)174*4882a593Smuzhiyun int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len,
175*4882a593Smuzhiyun struct list_head *object_extents,
176*4882a593Smuzhiyun ceph_object_extent_fn_t action_fn,
177*4882a593Smuzhiyun void *action_arg)
178*4882a593Smuzhiyun {
179*4882a593Smuzhiyun while (len) {
180*4882a593Smuzhiyun struct ceph_object_extent *ex;
181*4882a593Smuzhiyun u64 objno, objoff;
182*4882a593Smuzhiyun u32 xlen;
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
185*4882a593Smuzhiyun &xlen);
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun ex = lookup_containing(object_extents, objno, objoff, xlen);
188*4882a593Smuzhiyun if (!ex) {
189*4882a593Smuzhiyun WARN(1, "%s: objno %llu %llu~%u not found!\n",
190*4882a593Smuzhiyun __func__, objno, objoff, xlen);
191*4882a593Smuzhiyun return -EINVAL;
192*4882a593Smuzhiyun }
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun action_fn(ex, xlen, action_arg);
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun off += xlen;
197*4882a593Smuzhiyun len -= xlen;
198*4882a593Smuzhiyun }
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun return 0;
201*4882a593Smuzhiyun }
202*4882a593Smuzhiyun EXPORT_SYMBOL(ceph_iterate_extents);
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun /*
205*4882a593Smuzhiyun * Reverse map an object extent to a sorted list of file extents.
206*4882a593Smuzhiyun *
207*4882a593Smuzhiyun * On success, the caller is responsible for:
208*4882a593Smuzhiyun *
209*4882a593Smuzhiyun * kfree(file_extents)
210*4882a593Smuzhiyun */
ceph_extent_to_file(struct ceph_file_layout * l,u64 objno,u64 objoff,u64 objlen,struct ceph_file_extent ** file_extents,u32 * num_file_extents)211*4882a593Smuzhiyun int ceph_extent_to_file(struct ceph_file_layout *l,
212*4882a593Smuzhiyun u64 objno, u64 objoff, u64 objlen,
213*4882a593Smuzhiyun struct ceph_file_extent **file_extents,
214*4882a593Smuzhiyun u32 *num_file_extents)
215*4882a593Smuzhiyun {
216*4882a593Smuzhiyun u32 stripes_per_object = l->object_size / l->stripe_unit;
217*4882a593Smuzhiyun u64 blockno; /* which su */
218*4882a593Smuzhiyun u32 blockoff; /* offset into su */
219*4882a593Smuzhiyun u64 stripeno; /* which stripe */
220*4882a593Smuzhiyun u32 stripepos; /* which su in the stripe,
221*4882a593Smuzhiyun which object in the object set */
222*4882a593Smuzhiyun u64 objsetno; /* which object set */
223*4882a593Smuzhiyun u32 i = 0;
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun if (!objlen) {
226*4882a593Smuzhiyun *file_extents = NULL;
227*4882a593Smuzhiyun *num_file_extents = 0;
228*4882a593Smuzhiyun return 0;
229*4882a593Smuzhiyun }
230*4882a593Smuzhiyun
231*4882a593Smuzhiyun *num_file_extents = DIV_ROUND_UP_ULL(objoff + objlen, l->stripe_unit) -
232*4882a593Smuzhiyun DIV_ROUND_DOWN_ULL(objoff, l->stripe_unit);
233*4882a593Smuzhiyun *file_extents = kmalloc_array(*num_file_extents, sizeof(**file_extents),
234*4882a593Smuzhiyun GFP_NOIO);
235*4882a593Smuzhiyun if (!*file_extents)
236*4882a593Smuzhiyun return -ENOMEM;
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun div_u64_rem(objoff, l->stripe_unit, &blockoff);
239*4882a593Smuzhiyun while (objlen) {
240*4882a593Smuzhiyun u64 off, len;
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun objsetno = div_u64_rem(objno, l->stripe_count, &stripepos);
243*4882a593Smuzhiyun stripeno = div_u64(objoff, l->stripe_unit) +
244*4882a593Smuzhiyun objsetno * stripes_per_object;
245*4882a593Smuzhiyun blockno = stripeno * l->stripe_count + stripepos;
246*4882a593Smuzhiyun off = blockno * l->stripe_unit + blockoff;
247*4882a593Smuzhiyun len = min_t(u64, objlen, l->stripe_unit - blockoff);
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun (*file_extents)[i].fe_off = off;
250*4882a593Smuzhiyun (*file_extents)[i].fe_len = len;
251*4882a593Smuzhiyun
252*4882a593Smuzhiyun blockoff = 0;
253*4882a593Smuzhiyun objoff += len;
254*4882a593Smuzhiyun objlen -= len;
255*4882a593Smuzhiyun i++;
256*4882a593Smuzhiyun }
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun BUG_ON(i != *num_file_extents);
259*4882a593Smuzhiyun return 0;
260*4882a593Smuzhiyun }
261*4882a593Smuzhiyun EXPORT_SYMBOL(ceph_extent_to_file);
262*4882a593Smuzhiyun
ceph_get_num_objects(struct ceph_file_layout * l,u64 size)263*4882a593Smuzhiyun u64 ceph_get_num_objects(struct ceph_file_layout *l, u64 size)
264*4882a593Smuzhiyun {
265*4882a593Smuzhiyun u64 period = (u64)l->stripe_count * l->object_size;
266*4882a593Smuzhiyun u64 num_periods = DIV64_U64_ROUND_UP(size, period);
267*4882a593Smuzhiyun u64 remainder_bytes;
268*4882a593Smuzhiyun u64 remainder_objs = 0;
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun div64_u64_rem(size, period, &remainder_bytes);
271*4882a593Smuzhiyun if (remainder_bytes > 0 &&
272*4882a593Smuzhiyun remainder_bytes < (u64)l->stripe_count * l->stripe_unit)
273*4882a593Smuzhiyun remainder_objs = l->stripe_count -
274*4882a593Smuzhiyun DIV_ROUND_UP_ULL(remainder_bytes, l->stripe_unit);
275*4882a593Smuzhiyun
276*4882a593Smuzhiyun return num_periods * l->stripe_count - remainder_objs;
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun EXPORT_SYMBOL(ceph_get_num_objects);
279