1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3*4882a593Smuzhiyun */
4*4882a593Smuzhiyun
5*4882a593Smuzhiyun #include <linux/time.h>
6*4882a593Smuzhiyun #include <linux/slab.h>
7*4882a593Smuzhiyun #include <linux/string.h>
8*4882a593Smuzhiyun #include "reiserfs.h"
9*4882a593Smuzhiyun #include <linux/buffer_head.h>
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun /*
12*4882a593Smuzhiyun * To make any changes in the tree we find a node that contains item
13*4882a593Smuzhiyun * to be changed/deleted or position in the node we insert a new item
14*4882a593Smuzhiyun * to. We call this node S. To do balancing we need to decide what we
15*4882a593Smuzhiyun * will shift to left/right neighbor, or to a new node, where new item
16*4882a593Smuzhiyun * will be etc. To make this analysis simpler we build virtual
17*4882a593Smuzhiyun * node. Virtual node is an array of items, that will replace items of
18*4882a593Smuzhiyun * node S. (For instance if we are going to delete an item, virtual
19*4882a593Smuzhiyun * node does not contain it). Virtual node keeps information about
20*4882a593Smuzhiyun * item sizes and types, mergeability of first and last items, sizes
21*4882a593Smuzhiyun * of all entries in directory item. We use this array of items when
22*4882a593Smuzhiyun * calculating what we can shift to neighbors and how many nodes we
23*4882a593Smuzhiyun * have to have if we do not any shiftings, if we shift to left/right
24*4882a593Smuzhiyun * neighbor or to both.
25*4882a593Smuzhiyun */
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun /*
28*4882a593Smuzhiyun * Takes item number in virtual node, returns number of item
29*4882a593Smuzhiyun * that it has in source buffer
30*4882a593Smuzhiyun */
old_item_num(int new_num,int affected_item_num,int mode)31*4882a593Smuzhiyun static inline int old_item_num(int new_num, int affected_item_num, int mode)
32*4882a593Smuzhiyun {
33*4882a593Smuzhiyun if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num)
34*4882a593Smuzhiyun return new_num;
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun if (mode == M_INSERT) {
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun RFALSE(new_num == 0,
39*4882a593Smuzhiyun "vs-8005: for INSERT mode and item number of inserted item");
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun return new_num - 1;
42*4882a593Smuzhiyun }
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun RFALSE(mode != M_DELETE,
45*4882a593Smuzhiyun "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'",
46*4882a593Smuzhiyun mode);
47*4882a593Smuzhiyun /* delete mode */
48*4882a593Smuzhiyun return new_num + 1;
49*4882a593Smuzhiyun }
50*4882a593Smuzhiyun
create_virtual_node(struct tree_balance * tb,int h)51*4882a593Smuzhiyun static void create_virtual_node(struct tree_balance *tb, int h)
52*4882a593Smuzhiyun {
53*4882a593Smuzhiyun struct item_head *ih;
54*4882a593Smuzhiyun struct virtual_node *vn = tb->tb_vn;
55*4882a593Smuzhiyun int new_num;
56*4882a593Smuzhiyun struct buffer_head *Sh; /* this comes from tb->S[h] */
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun Sh = PATH_H_PBUFFER(tb->tb_path, h);
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun /* size of changed node */
61*4882a593Smuzhiyun vn->vn_size =
62*4882a593Smuzhiyun MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h];
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun /* for internal nodes array if virtual items is not created */
65*4882a593Smuzhiyun if (h) {
66*4882a593Smuzhiyun vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE);
67*4882a593Smuzhiyun return;
68*4882a593Smuzhiyun }
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun /* number of items in virtual node */
71*4882a593Smuzhiyun vn->vn_nr_item =
72*4882a593Smuzhiyun B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) -
73*4882a593Smuzhiyun ((vn->vn_mode == M_DELETE) ? 1 : 0);
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun /* first virtual item */
76*4882a593Smuzhiyun vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1);
77*4882a593Smuzhiyun memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item));
78*4882a593Smuzhiyun vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item);
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun /* first item in the node */
81*4882a593Smuzhiyun ih = item_head(Sh, 0);
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun /* define the mergeability for 0-th item (if it is not being deleted) */
84*4882a593Smuzhiyun if (op_is_left_mergeable(&ih->ih_key, Sh->b_size)
85*4882a593Smuzhiyun && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
86*4882a593Smuzhiyun vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun /*
89*4882a593Smuzhiyun * go through all items that remain in the virtual
90*4882a593Smuzhiyun * node (except for the new (inserted) one)
91*4882a593Smuzhiyun */
92*4882a593Smuzhiyun for (new_num = 0; new_num < vn->vn_nr_item; new_num++) {
93*4882a593Smuzhiyun int j;
94*4882a593Smuzhiyun struct virtual_item *vi = vn->vn_vi + new_num;
95*4882a593Smuzhiyun int is_affected =
96*4882a593Smuzhiyun ((new_num != vn->vn_affected_item_num) ? 0 : 1);
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun if (is_affected && vn->vn_mode == M_INSERT)
99*4882a593Smuzhiyun continue;
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun /* get item number in source node */
102*4882a593Smuzhiyun j = old_item_num(new_num, vn->vn_affected_item_num,
103*4882a593Smuzhiyun vn->vn_mode);
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE;
106*4882a593Smuzhiyun vi->vi_ih = ih + j;
107*4882a593Smuzhiyun vi->vi_item = ih_item_body(Sh, ih + j);
108*4882a593Smuzhiyun vi->vi_uarea = vn->vn_free_ptr;
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun /*
111*4882a593Smuzhiyun * FIXME: there is no check that item operation did not
112*4882a593Smuzhiyun * consume too much memory
113*4882a593Smuzhiyun */
114*4882a593Smuzhiyun vn->vn_free_ptr +=
115*4882a593Smuzhiyun op_create_vi(vn, vi, is_affected, tb->insert_size[0]);
116*4882a593Smuzhiyun if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
117*4882a593Smuzhiyun reiserfs_panic(tb->tb_sb, "vs-8030",
118*4882a593Smuzhiyun "virtual node space consumed");
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun if (!is_affected)
121*4882a593Smuzhiyun /* this is not being changed */
122*4882a593Smuzhiyun continue;
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
125*4882a593Smuzhiyun vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
126*4882a593Smuzhiyun /* pointer to data which is going to be pasted */
127*4882a593Smuzhiyun vi->vi_new_data = vn->vn_data;
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun }
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun /* virtual inserted item is not defined yet */
132*4882a593Smuzhiyun if (vn->vn_mode == M_INSERT) {
133*4882a593Smuzhiyun struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num;
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun RFALSE(vn->vn_ins_ih == NULL,
136*4882a593Smuzhiyun "vs-8040: item header of inserted item is not specified");
137*4882a593Smuzhiyun vi->vi_item_len = tb->insert_size[0];
138*4882a593Smuzhiyun vi->vi_ih = vn->vn_ins_ih;
139*4882a593Smuzhiyun vi->vi_item = vn->vn_data;
140*4882a593Smuzhiyun vi->vi_uarea = vn->vn_free_ptr;
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun op_create_vi(vn, vi, 0 /*not pasted or cut */ ,
143*4882a593Smuzhiyun tb->insert_size[0]);
144*4882a593Smuzhiyun }
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun /*
147*4882a593Smuzhiyun * set right merge flag we take right delimiting key and
148*4882a593Smuzhiyun * check whether it is a mergeable item
149*4882a593Smuzhiyun */
150*4882a593Smuzhiyun if (tb->CFR[0]) {
151*4882a593Smuzhiyun struct reiserfs_key *key;
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun key = internal_key(tb->CFR[0], tb->rkey[0]);
154*4882a593Smuzhiyun if (op_is_left_mergeable(key, Sh->b_size)
155*4882a593Smuzhiyun && (vn->vn_mode != M_DELETE
156*4882a593Smuzhiyun || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1))
157*4882a593Smuzhiyun vn->vn_vi[vn->vn_nr_item - 1].vi_type |=
158*4882a593Smuzhiyun VI_TYPE_RIGHT_MERGEABLE;
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun #ifdef CONFIG_REISERFS_CHECK
161*4882a593Smuzhiyun if (op_is_left_mergeable(key, Sh->b_size) &&
162*4882a593Smuzhiyun !(vn->vn_mode != M_DELETE
163*4882a593Smuzhiyun || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) {
164*4882a593Smuzhiyun /*
165*4882a593Smuzhiyun * we delete last item and it could be merged
166*4882a593Smuzhiyun * with right neighbor's first item
167*4882a593Smuzhiyun */
168*4882a593Smuzhiyun if (!
169*4882a593Smuzhiyun (B_NR_ITEMS(Sh) == 1
170*4882a593Smuzhiyun && is_direntry_le_ih(item_head(Sh, 0))
171*4882a593Smuzhiyun && ih_entry_count(item_head(Sh, 0)) == 1)) {
172*4882a593Smuzhiyun /*
173*4882a593Smuzhiyun * node contains more than 1 item, or item
174*4882a593Smuzhiyun * is not directory item, or this item
175*4882a593Smuzhiyun * contains more than 1 entry
176*4882a593Smuzhiyun */
177*4882a593Smuzhiyun print_block(Sh, 0, -1, -1);
178*4882a593Smuzhiyun reiserfs_panic(tb->tb_sb, "vs-8045",
179*4882a593Smuzhiyun "rdkey %k, affected item==%d "
180*4882a593Smuzhiyun "(mode==%c) Must be %c",
181*4882a593Smuzhiyun key, vn->vn_affected_item_num,
182*4882a593Smuzhiyun vn->vn_mode, M_DELETE);
183*4882a593Smuzhiyun }
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun #endif
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun }
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun /*
191*4882a593Smuzhiyun * Using virtual node check, how many items can be
192*4882a593Smuzhiyun * shifted to left neighbor
193*4882a593Smuzhiyun */
check_left(struct tree_balance * tb,int h,int cur_free)194*4882a593Smuzhiyun static void check_left(struct tree_balance *tb, int h, int cur_free)
195*4882a593Smuzhiyun {
196*4882a593Smuzhiyun int i;
197*4882a593Smuzhiyun struct virtual_node *vn = tb->tb_vn;
198*4882a593Smuzhiyun struct virtual_item *vi;
199*4882a593Smuzhiyun int d_size, ih_size;
200*4882a593Smuzhiyun
201*4882a593Smuzhiyun RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free);
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun /* internal level */
204*4882a593Smuzhiyun if (h > 0) {
205*4882a593Smuzhiyun tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
206*4882a593Smuzhiyun return;
207*4882a593Smuzhiyun }
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun /* leaf level */
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun if (!cur_free || !vn->vn_nr_item) {
212*4882a593Smuzhiyun /* no free space or nothing to move */
213*4882a593Smuzhiyun tb->lnum[h] = 0;
214*4882a593Smuzhiyun tb->lbytes = -1;
215*4882a593Smuzhiyun return;
216*4882a593Smuzhiyun }
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
219*4882a593Smuzhiyun "vs-8055: parent does not exist or invalid");
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun vi = vn->vn_vi;
222*4882a593Smuzhiyun if ((unsigned int)cur_free >=
223*4882a593Smuzhiyun (vn->vn_size -
224*4882a593Smuzhiyun ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) {
225*4882a593Smuzhiyun /* all contents of S[0] fits into L[0] */
226*4882a593Smuzhiyun
227*4882a593Smuzhiyun RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
228*4882a593Smuzhiyun "vs-8055: invalid mode or balance condition failed");
229*4882a593Smuzhiyun
230*4882a593Smuzhiyun tb->lnum[0] = vn->vn_nr_item;
231*4882a593Smuzhiyun tb->lbytes = -1;
232*4882a593Smuzhiyun return;
233*4882a593Smuzhiyun }
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun d_size = 0, ih_size = IH_SIZE;
236*4882a593Smuzhiyun
237*4882a593Smuzhiyun /* first item may be merge with last item in left neighbor */
238*4882a593Smuzhiyun if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE)
239*4882a593Smuzhiyun d_size = -((int)IH_SIZE), ih_size = 0;
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun tb->lnum[0] = 0;
242*4882a593Smuzhiyun for (i = 0; i < vn->vn_nr_item;
243*4882a593Smuzhiyun i++, ih_size = IH_SIZE, d_size = 0, vi++) {
244*4882a593Smuzhiyun d_size += vi->vi_item_len;
245*4882a593Smuzhiyun if (cur_free >= d_size) {
246*4882a593Smuzhiyun /* the item can be shifted entirely */
247*4882a593Smuzhiyun cur_free -= d_size;
248*4882a593Smuzhiyun tb->lnum[0]++;
249*4882a593Smuzhiyun continue;
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun
252*4882a593Smuzhiyun /* the item cannot be shifted entirely, try to split it */
253*4882a593Smuzhiyun /*
254*4882a593Smuzhiyun * check whether L[0] can hold ih and at least one byte
255*4882a593Smuzhiyun * of the item body
256*4882a593Smuzhiyun */
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun /* cannot shift even a part of the current item */
259*4882a593Smuzhiyun if (cur_free <= ih_size) {
260*4882a593Smuzhiyun tb->lbytes = -1;
261*4882a593Smuzhiyun return;
262*4882a593Smuzhiyun }
263*4882a593Smuzhiyun cur_free -= ih_size;
264*4882a593Smuzhiyun
265*4882a593Smuzhiyun tb->lbytes = op_check_left(vi, cur_free, 0, 0);
266*4882a593Smuzhiyun if (tb->lbytes != -1)
267*4882a593Smuzhiyun /* count partially shifted item */
268*4882a593Smuzhiyun tb->lnum[0]++;
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun break;
271*4882a593Smuzhiyun }
272*4882a593Smuzhiyun
273*4882a593Smuzhiyun return;
274*4882a593Smuzhiyun }
275*4882a593Smuzhiyun
276*4882a593Smuzhiyun /*
277*4882a593Smuzhiyun * Using virtual node check, how many items can be
278*4882a593Smuzhiyun * shifted to right neighbor
279*4882a593Smuzhiyun */
check_right(struct tree_balance * tb,int h,int cur_free)280*4882a593Smuzhiyun static void check_right(struct tree_balance *tb, int h, int cur_free)
281*4882a593Smuzhiyun {
282*4882a593Smuzhiyun int i;
283*4882a593Smuzhiyun struct virtual_node *vn = tb->tb_vn;
284*4882a593Smuzhiyun struct virtual_item *vi;
285*4882a593Smuzhiyun int d_size, ih_size;
286*4882a593Smuzhiyun
287*4882a593Smuzhiyun RFALSE(cur_free < 0, "vs-8070: cur_free < 0");
288*4882a593Smuzhiyun
289*4882a593Smuzhiyun /* internal level */
290*4882a593Smuzhiyun if (h > 0) {
291*4882a593Smuzhiyun tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
292*4882a593Smuzhiyun return;
293*4882a593Smuzhiyun }
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun /* leaf level */
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun if (!cur_free || !vn->vn_nr_item) {
298*4882a593Smuzhiyun /* no free space */
299*4882a593Smuzhiyun tb->rnum[h] = 0;
300*4882a593Smuzhiyun tb->rbytes = -1;
301*4882a593Smuzhiyun return;
302*4882a593Smuzhiyun }
303*4882a593Smuzhiyun
304*4882a593Smuzhiyun RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
305*4882a593Smuzhiyun "vs-8075: parent does not exist or invalid");
306*4882a593Smuzhiyun
307*4882a593Smuzhiyun vi = vn->vn_vi + vn->vn_nr_item - 1;
308*4882a593Smuzhiyun if ((unsigned int)cur_free >=
309*4882a593Smuzhiyun (vn->vn_size -
310*4882a593Smuzhiyun ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) {
311*4882a593Smuzhiyun /* all contents of S[0] fits into R[0] */
312*4882a593Smuzhiyun
313*4882a593Smuzhiyun RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
314*4882a593Smuzhiyun "vs-8080: invalid mode or balance condition failed");
315*4882a593Smuzhiyun
316*4882a593Smuzhiyun tb->rnum[h] = vn->vn_nr_item;
317*4882a593Smuzhiyun tb->rbytes = -1;
318*4882a593Smuzhiyun return;
319*4882a593Smuzhiyun }
320*4882a593Smuzhiyun
321*4882a593Smuzhiyun d_size = 0, ih_size = IH_SIZE;
322*4882a593Smuzhiyun
323*4882a593Smuzhiyun /* last item may be merge with first item in right neighbor */
324*4882a593Smuzhiyun if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE)
325*4882a593Smuzhiyun d_size = -(int)IH_SIZE, ih_size = 0;
326*4882a593Smuzhiyun
327*4882a593Smuzhiyun tb->rnum[0] = 0;
328*4882a593Smuzhiyun for (i = vn->vn_nr_item - 1; i >= 0;
329*4882a593Smuzhiyun i--, d_size = 0, ih_size = IH_SIZE, vi--) {
330*4882a593Smuzhiyun d_size += vi->vi_item_len;
331*4882a593Smuzhiyun if (cur_free >= d_size) {
332*4882a593Smuzhiyun /* the item can be shifted entirely */
333*4882a593Smuzhiyun cur_free -= d_size;
334*4882a593Smuzhiyun tb->rnum[0]++;
335*4882a593Smuzhiyun continue;
336*4882a593Smuzhiyun }
337*4882a593Smuzhiyun
338*4882a593Smuzhiyun /*
339*4882a593Smuzhiyun * check whether R[0] can hold ih and at least one
340*4882a593Smuzhiyun * byte of the item body
341*4882a593Smuzhiyun */
342*4882a593Smuzhiyun
343*4882a593Smuzhiyun /* cannot shift even a part of the current item */
344*4882a593Smuzhiyun if (cur_free <= ih_size) {
345*4882a593Smuzhiyun tb->rbytes = -1;
346*4882a593Smuzhiyun return;
347*4882a593Smuzhiyun }
348*4882a593Smuzhiyun
349*4882a593Smuzhiyun /*
350*4882a593Smuzhiyun * R[0] can hold the header of the item and at least
351*4882a593Smuzhiyun * one byte of its body
352*4882a593Smuzhiyun */
353*4882a593Smuzhiyun cur_free -= ih_size; /* cur_free is still > 0 */
354*4882a593Smuzhiyun
355*4882a593Smuzhiyun tb->rbytes = op_check_right(vi, cur_free);
356*4882a593Smuzhiyun if (tb->rbytes != -1)
357*4882a593Smuzhiyun /* count partially shifted item */
358*4882a593Smuzhiyun tb->rnum[0]++;
359*4882a593Smuzhiyun
360*4882a593Smuzhiyun break;
361*4882a593Smuzhiyun }
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun return;
364*4882a593Smuzhiyun }
365*4882a593Smuzhiyun
366*4882a593Smuzhiyun /*
367*4882a593Smuzhiyun * from - number of items, which are shifted to left neighbor entirely
368*4882a593Smuzhiyun * to - number of item, which are shifted to right neighbor entirely
369*4882a593Smuzhiyun * from_bytes - number of bytes of boundary item (or directory entries)
370*4882a593Smuzhiyun * which are shifted to left neighbor
371*4882a593Smuzhiyun * to_bytes - number of bytes of boundary item (or directory entries)
372*4882a593Smuzhiyun * which are shifted to right neighbor
373*4882a593Smuzhiyun */
get_num_ver(int mode,struct tree_balance * tb,int h,int from,int from_bytes,int to,int to_bytes,short * snum012,int flow)374*4882a593Smuzhiyun static int get_num_ver(int mode, struct tree_balance *tb, int h,
375*4882a593Smuzhiyun int from, int from_bytes,
376*4882a593Smuzhiyun int to, int to_bytes, short *snum012, int flow)
377*4882a593Smuzhiyun {
378*4882a593Smuzhiyun int i;
379*4882a593Smuzhiyun int units;
380*4882a593Smuzhiyun struct virtual_node *vn = tb->tb_vn;
381*4882a593Smuzhiyun int total_node_size, max_node_size, current_item_size;
382*4882a593Smuzhiyun int needed_nodes;
383*4882a593Smuzhiyun
384*4882a593Smuzhiyun /* position of item we start filling node from */
385*4882a593Smuzhiyun int start_item;
386*4882a593Smuzhiyun
387*4882a593Smuzhiyun /* position of item we finish filling node by */
388*4882a593Smuzhiyun int end_item;
389*4882a593Smuzhiyun
390*4882a593Smuzhiyun /*
391*4882a593Smuzhiyun * number of first bytes (entries for directory) of start_item-th item
392*4882a593Smuzhiyun * we do not include into node that is being filled
393*4882a593Smuzhiyun */
394*4882a593Smuzhiyun int start_bytes;
395*4882a593Smuzhiyun
396*4882a593Smuzhiyun /*
397*4882a593Smuzhiyun * number of last bytes (entries for directory) of end_item-th item
398*4882a593Smuzhiyun * we do node include into node that is being filled
399*4882a593Smuzhiyun */
400*4882a593Smuzhiyun int end_bytes;
401*4882a593Smuzhiyun
402*4882a593Smuzhiyun /*
403*4882a593Smuzhiyun * these are positions in virtual item of items, that are split
404*4882a593Smuzhiyun * between S[0] and S1new and S1new and S2new
405*4882a593Smuzhiyun */
406*4882a593Smuzhiyun int split_item_positions[2];
407*4882a593Smuzhiyun
408*4882a593Smuzhiyun split_item_positions[0] = -1;
409*4882a593Smuzhiyun split_item_positions[1] = -1;
410*4882a593Smuzhiyun
411*4882a593Smuzhiyun /*
412*4882a593Smuzhiyun * We only create additional nodes if we are in insert or paste mode
413*4882a593Smuzhiyun * or we are in replace mode at the internal level. If h is 0 and
414*4882a593Smuzhiyun * the mode is M_REPLACE then in fix_nodes we change the mode to
415*4882a593Smuzhiyun * paste or insert before we get here in the code.
416*4882a593Smuzhiyun */
417*4882a593Smuzhiyun RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE),
418*4882a593Smuzhiyun "vs-8100: insert_size < 0 in overflow");
419*4882a593Smuzhiyun
420*4882a593Smuzhiyun max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h));
421*4882a593Smuzhiyun
422*4882a593Smuzhiyun /*
423*4882a593Smuzhiyun * snum012 [0-2] - number of items, that lay
424*4882a593Smuzhiyun * to S[0], first new node and second new node
425*4882a593Smuzhiyun */
426*4882a593Smuzhiyun snum012[3] = -1; /* s1bytes */
427*4882a593Smuzhiyun snum012[4] = -1; /* s2bytes */
428*4882a593Smuzhiyun
429*4882a593Smuzhiyun /* internal level */
430*4882a593Smuzhiyun if (h > 0) {
431*4882a593Smuzhiyun i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE);
432*4882a593Smuzhiyun if (i == max_node_size)
433*4882a593Smuzhiyun return 1;
434*4882a593Smuzhiyun return (i / max_node_size + 1);
435*4882a593Smuzhiyun }
436*4882a593Smuzhiyun
437*4882a593Smuzhiyun /* leaf level */
438*4882a593Smuzhiyun needed_nodes = 1;
439*4882a593Smuzhiyun total_node_size = 0;
440*4882a593Smuzhiyun
441*4882a593Smuzhiyun /* start from 'from'-th item */
442*4882a593Smuzhiyun start_item = from;
443*4882a593Smuzhiyun /* skip its first 'start_bytes' units */
444*4882a593Smuzhiyun start_bytes = ((from_bytes != -1) ? from_bytes : 0);
445*4882a593Smuzhiyun
446*4882a593Smuzhiyun /* last included item is the 'end_item'-th one */
447*4882a593Smuzhiyun end_item = vn->vn_nr_item - to - 1;
448*4882a593Smuzhiyun /* do not count last 'end_bytes' units of 'end_item'-th item */
449*4882a593Smuzhiyun end_bytes = (to_bytes != -1) ? to_bytes : 0;
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun /*
452*4882a593Smuzhiyun * go through all item beginning from the start_item-th item
453*4882a593Smuzhiyun * and ending by the end_item-th item. Do not count first
454*4882a593Smuzhiyun * 'start_bytes' units of 'start_item'-th item and last
455*4882a593Smuzhiyun * 'end_bytes' of 'end_item'-th item
456*4882a593Smuzhiyun */
457*4882a593Smuzhiyun for (i = start_item; i <= end_item; i++) {
458*4882a593Smuzhiyun struct virtual_item *vi = vn->vn_vi + i;
459*4882a593Smuzhiyun int skip_from_end = ((i == end_item) ? end_bytes : 0);
460*4882a593Smuzhiyun
461*4882a593Smuzhiyun RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed");
462*4882a593Smuzhiyun
463*4882a593Smuzhiyun /* get size of current item */
464*4882a593Smuzhiyun current_item_size = vi->vi_item_len;
465*4882a593Smuzhiyun
466*4882a593Smuzhiyun /*
467*4882a593Smuzhiyun * do not take in calculation head part (from_bytes)
468*4882a593Smuzhiyun * of from-th item
469*4882a593Smuzhiyun */
470*4882a593Smuzhiyun current_item_size -=
471*4882a593Smuzhiyun op_part_size(vi, 0 /*from start */ , start_bytes);
472*4882a593Smuzhiyun
473*4882a593Smuzhiyun /* do not take in calculation tail part of last item */
474*4882a593Smuzhiyun current_item_size -=
475*4882a593Smuzhiyun op_part_size(vi, 1 /*from end */ , skip_from_end);
476*4882a593Smuzhiyun
477*4882a593Smuzhiyun /* if item fits into current node entierly */
478*4882a593Smuzhiyun if (total_node_size + current_item_size <= max_node_size) {
479*4882a593Smuzhiyun snum012[needed_nodes - 1]++;
480*4882a593Smuzhiyun total_node_size += current_item_size;
481*4882a593Smuzhiyun start_bytes = 0;
482*4882a593Smuzhiyun continue;
483*4882a593Smuzhiyun }
484*4882a593Smuzhiyun
485*4882a593Smuzhiyun /*
486*4882a593Smuzhiyun * virtual item length is longer, than max size of item in
487*4882a593Smuzhiyun * a node. It is impossible for direct item
488*4882a593Smuzhiyun */
489*4882a593Smuzhiyun if (current_item_size > max_node_size) {
490*4882a593Smuzhiyun RFALSE(is_direct_le_ih(vi->vi_ih),
491*4882a593Smuzhiyun "vs-8110: "
492*4882a593Smuzhiyun "direct item length is %d. It can not be longer than %d",
493*4882a593Smuzhiyun current_item_size, max_node_size);
494*4882a593Smuzhiyun /* we will try to split it */
495*4882a593Smuzhiyun flow = 1;
496*4882a593Smuzhiyun }
497*4882a593Smuzhiyun
498*4882a593Smuzhiyun /* as we do not split items, take new node and continue */
499*4882a593Smuzhiyun if (!flow) {
500*4882a593Smuzhiyun needed_nodes++;
501*4882a593Smuzhiyun i--;
502*4882a593Smuzhiyun total_node_size = 0;
503*4882a593Smuzhiyun continue;
504*4882a593Smuzhiyun }
505*4882a593Smuzhiyun
506*4882a593Smuzhiyun /*
507*4882a593Smuzhiyun * calculate number of item units which fit into node being
508*4882a593Smuzhiyun * filled
509*4882a593Smuzhiyun */
510*4882a593Smuzhiyun {
511*4882a593Smuzhiyun int free_space;
512*4882a593Smuzhiyun
513*4882a593Smuzhiyun free_space = max_node_size - total_node_size - IH_SIZE;
514*4882a593Smuzhiyun units =
515*4882a593Smuzhiyun op_check_left(vi, free_space, start_bytes,
516*4882a593Smuzhiyun skip_from_end);
517*4882a593Smuzhiyun /*
518*4882a593Smuzhiyun * nothing fits into current node, take new
519*4882a593Smuzhiyun * node and continue
520*4882a593Smuzhiyun */
521*4882a593Smuzhiyun if (units == -1) {
522*4882a593Smuzhiyun needed_nodes++, i--, total_node_size = 0;
523*4882a593Smuzhiyun continue;
524*4882a593Smuzhiyun }
525*4882a593Smuzhiyun }
526*4882a593Smuzhiyun
527*4882a593Smuzhiyun /* something fits into the current node */
528*4882a593Smuzhiyun start_bytes += units;
529*4882a593Smuzhiyun snum012[needed_nodes - 1 + 3] = units;
530*4882a593Smuzhiyun
531*4882a593Smuzhiyun if (needed_nodes > 2)
532*4882a593Smuzhiyun reiserfs_warning(tb->tb_sb, "vs-8111",
533*4882a593Smuzhiyun "split_item_position is out of range");
534*4882a593Smuzhiyun snum012[needed_nodes - 1]++;
535*4882a593Smuzhiyun split_item_positions[needed_nodes - 1] = i;
536*4882a593Smuzhiyun needed_nodes++;
537*4882a593Smuzhiyun /* continue from the same item with start_bytes != -1 */
538*4882a593Smuzhiyun start_item = i;
539*4882a593Smuzhiyun i--;
540*4882a593Smuzhiyun total_node_size = 0;
541*4882a593Smuzhiyun }
542*4882a593Smuzhiyun
543*4882a593Smuzhiyun /*
544*4882a593Smuzhiyun * sum012[4] (if it is not -1) contains number of units of which
545*4882a593Smuzhiyun * are to be in S1new, snum012[3] - to be in S0. They are supposed
546*4882a593Smuzhiyun * to be S1bytes and S2bytes correspondingly, so recalculate
547*4882a593Smuzhiyun */
548*4882a593Smuzhiyun if (snum012[4] > 0) {
549*4882a593Smuzhiyun int split_item_num;
550*4882a593Smuzhiyun int bytes_to_r, bytes_to_l;
551*4882a593Smuzhiyun int bytes_to_S1new;
552*4882a593Smuzhiyun
553*4882a593Smuzhiyun split_item_num = split_item_positions[1];
554*4882a593Smuzhiyun bytes_to_l =
555*4882a593Smuzhiyun ((from == split_item_num
556*4882a593Smuzhiyun && from_bytes != -1) ? from_bytes : 0);
557*4882a593Smuzhiyun bytes_to_r =
558*4882a593Smuzhiyun ((end_item == split_item_num
559*4882a593Smuzhiyun && end_bytes != -1) ? end_bytes : 0);
560*4882a593Smuzhiyun bytes_to_S1new =
561*4882a593Smuzhiyun ((split_item_positions[0] ==
562*4882a593Smuzhiyun split_item_positions[1]) ? snum012[3] : 0);
563*4882a593Smuzhiyun
564*4882a593Smuzhiyun /* s2bytes */
565*4882a593Smuzhiyun snum012[4] =
566*4882a593Smuzhiyun op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] -
567*4882a593Smuzhiyun bytes_to_r - bytes_to_l - bytes_to_S1new;
568*4882a593Smuzhiyun
569*4882a593Smuzhiyun if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY &&
570*4882a593Smuzhiyun vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT)
571*4882a593Smuzhiyun reiserfs_warning(tb->tb_sb, "vs-8115",
572*4882a593Smuzhiyun "not directory or indirect item");
573*4882a593Smuzhiyun }
574*4882a593Smuzhiyun
575*4882a593Smuzhiyun /* now we know S2bytes, calculate S1bytes */
576*4882a593Smuzhiyun if (snum012[3] > 0) {
577*4882a593Smuzhiyun int split_item_num;
578*4882a593Smuzhiyun int bytes_to_r, bytes_to_l;
579*4882a593Smuzhiyun int bytes_to_S2new;
580*4882a593Smuzhiyun
581*4882a593Smuzhiyun split_item_num = split_item_positions[0];
582*4882a593Smuzhiyun bytes_to_l =
583*4882a593Smuzhiyun ((from == split_item_num
584*4882a593Smuzhiyun && from_bytes != -1) ? from_bytes : 0);
585*4882a593Smuzhiyun bytes_to_r =
586*4882a593Smuzhiyun ((end_item == split_item_num
587*4882a593Smuzhiyun && end_bytes != -1) ? end_bytes : 0);
588*4882a593Smuzhiyun bytes_to_S2new =
589*4882a593Smuzhiyun ((split_item_positions[0] == split_item_positions[1]
590*4882a593Smuzhiyun && snum012[4] != -1) ? snum012[4] : 0);
591*4882a593Smuzhiyun
592*4882a593Smuzhiyun /* s1bytes */
593*4882a593Smuzhiyun snum012[3] =
594*4882a593Smuzhiyun op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] -
595*4882a593Smuzhiyun bytes_to_r - bytes_to_l - bytes_to_S2new;
596*4882a593Smuzhiyun }
597*4882a593Smuzhiyun
598*4882a593Smuzhiyun return needed_nodes;
599*4882a593Smuzhiyun }
600*4882a593Smuzhiyun
601*4882a593Smuzhiyun
602*4882a593Smuzhiyun /*
603*4882a593Smuzhiyun * Set parameters for balancing.
604*4882a593Smuzhiyun * Performs write of results of analysis of balancing into structure tb,
605*4882a593Smuzhiyun * where it will later be used by the functions that actually do the balancing.
606*4882a593Smuzhiyun * Parameters:
607*4882a593Smuzhiyun * tb tree_balance structure;
608*4882a593Smuzhiyun * h current level of the node;
609*4882a593Smuzhiyun * lnum number of items from S[h] that must be shifted to L[h];
610*4882a593Smuzhiyun * rnum number of items from S[h] that must be shifted to R[h];
611*4882a593Smuzhiyun * blk_num number of blocks that S[h] will be splitted into;
612*4882a593Smuzhiyun * s012 number of items that fall into splitted nodes.
613*4882a593Smuzhiyun * lbytes number of bytes which flow to the left neighbor from the
614*4882a593Smuzhiyun * item that is not shifted entirely
615*4882a593Smuzhiyun * rbytes number of bytes which flow to the right neighbor from the
616*4882a593Smuzhiyun * item that is not shifted entirely
617*4882a593Smuzhiyun * s1bytes number of bytes which flow to the first new node when
618*4882a593Smuzhiyun * S[0] splits (this number is contained in s012 array)
619*4882a593Smuzhiyun */
620*4882a593Smuzhiyun
set_parameters(struct tree_balance * tb,int h,int lnum,int rnum,int blk_num,short * s012,int lb,int rb)621*4882a593Smuzhiyun static void set_parameters(struct tree_balance *tb, int h, int lnum,
622*4882a593Smuzhiyun int rnum, int blk_num, short *s012, int lb, int rb)
623*4882a593Smuzhiyun {
624*4882a593Smuzhiyun
625*4882a593Smuzhiyun tb->lnum[h] = lnum;
626*4882a593Smuzhiyun tb->rnum[h] = rnum;
627*4882a593Smuzhiyun tb->blknum[h] = blk_num;
628*4882a593Smuzhiyun
629*4882a593Smuzhiyun /* only for leaf level */
630*4882a593Smuzhiyun if (h == 0) {
631*4882a593Smuzhiyun if (s012 != NULL) {
632*4882a593Smuzhiyun tb->s0num = *s012++;
633*4882a593Smuzhiyun tb->snum[0] = *s012++;
634*4882a593Smuzhiyun tb->snum[1] = *s012++;
635*4882a593Smuzhiyun tb->sbytes[0] = *s012++;
636*4882a593Smuzhiyun tb->sbytes[1] = *s012;
637*4882a593Smuzhiyun }
638*4882a593Smuzhiyun tb->lbytes = lb;
639*4882a593Smuzhiyun tb->rbytes = rb;
640*4882a593Smuzhiyun }
641*4882a593Smuzhiyun PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum);
642*4882a593Smuzhiyun PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum);
643*4882a593Smuzhiyun
644*4882a593Smuzhiyun PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb);
645*4882a593Smuzhiyun PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb);
646*4882a593Smuzhiyun }
647*4882a593Smuzhiyun
648*4882a593Smuzhiyun /*
649*4882a593Smuzhiyun * check if node disappears if we shift tb->lnum[0] items to left
650*4882a593Smuzhiyun * neighbor and tb->rnum[0] to the right one.
651*4882a593Smuzhiyun */
is_leaf_removable(struct tree_balance * tb)652*4882a593Smuzhiyun static int is_leaf_removable(struct tree_balance *tb)
653*4882a593Smuzhiyun {
654*4882a593Smuzhiyun struct virtual_node *vn = tb->tb_vn;
655*4882a593Smuzhiyun int to_left, to_right;
656*4882a593Smuzhiyun int size;
657*4882a593Smuzhiyun int remain_items;
658*4882a593Smuzhiyun
659*4882a593Smuzhiyun /*
660*4882a593Smuzhiyun * number of items that will be shifted to left (right) neighbor
661*4882a593Smuzhiyun * entirely
662*4882a593Smuzhiyun */
663*4882a593Smuzhiyun to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0);
664*4882a593Smuzhiyun to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0);
665*4882a593Smuzhiyun remain_items = vn->vn_nr_item;
666*4882a593Smuzhiyun
667*4882a593Smuzhiyun /* how many items remain in S[0] after shiftings to neighbors */
668*4882a593Smuzhiyun remain_items -= (to_left + to_right);
669*4882a593Smuzhiyun
670*4882a593Smuzhiyun /* all content of node can be shifted to neighbors */
671*4882a593Smuzhiyun if (remain_items < 1) {
672*4882a593Smuzhiyun set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0,
673*4882a593Smuzhiyun NULL, -1, -1);
674*4882a593Smuzhiyun return 1;
675*4882a593Smuzhiyun }
676*4882a593Smuzhiyun
677*4882a593Smuzhiyun /* S[0] is not removable */
678*4882a593Smuzhiyun if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
679*4882a593Smuzhiyun return 0;
680*4882a593Smuzhiyun
681*4882a593Smuzhiyun /* check whether we can divide 1 remaining item between neighbors */
682*4882a593Smuzhiyun
683*4882a593Smuzhiyun /* get size of remaining item (in item units) */
684*4882a593Smuzhiyun size = op_unit_num(&vn->vn_vi[to_left]);
685*4882a593Smuzhiyun
686*4882a593Smuzhiyun if (tb->lbytes + tb->rbytes >= size) {
687*4882a593Smuzhiyun set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL,
688*4882a593Smuzhiyun tb->lbytes, -1);
689*4882a593Smuzhiyun return 1;
690*4882a593Smuzhiyun }
691*4882a593Smuzhiyun
692*4882a593Smuzhiyun return 0;
693*4882a593Smuzhiyun }
694*4882a593Smuzhiyun
695*4882a593Smuzhiyun /* check whether L, S, R can be joined in one node */
are_leaves_removable(struct tree_balance * tb,int lfree,int rfree)696*4882a593Smuzhiyun static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree)
697*4882a593Smuzhiyun {
698*4882a593Smuzhiyun struct virtual_node *vn = tb->tb_vn;
699*4882a593Smuzhiyun int ih_size;
700*4882a593Smuzhiyun struct buffer_head *S0;
701*4882a593Smuzhiyun
702*4882a593Smuzhiyun S0 = PATH_H_PBUFFER(tb->tb_path, 0);
703*4882a593Smuzhiyun
704*4882a593Smuzhiyun ih_size = 0;
705*4882a593Smuzhiyun if (vn->vn_nr_item) {
706*4882a593Smuzhiyun if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE)
707*4882a593Smuzhiyun ih_size += IH_SIZE;
708*4882a593Smuzhiyun
709*4882a593Smuzhiyun if (vn->vn_vi[vn->vn_nr_item - 1].
710*4882a593Smuzhiyun vi_type & VI_TYPE_RIGHT_MERGEABLE)
711*4882a593Smuzhiyun ih_size += IH_SIZE;
712*4882a593Smuzhiyun } else {
713*4882a593Smuzhiyun /* there was only one item and it will be deleted */
714*4882a593Smuzhiyun struct item_head *ih;
715*4882a593Smuzhiyun
716*4882a593Smuzhiyun RFALSE(B_NR_ITEMS(S0) != 1,
717*4882a593Smuzhiyun "vs-8125: item number must be 1: it is %d",
718*4882a593Smuzhiyun B_NR_ITEMS(S0));
719*4882a593Smuzhiyun
720*4882a593Smuzhiyun ih = item_head(S0, 0);
721*4882a593Smuzhiyun if (tb->CFR[0]
722*4882a593Smuzhiyun && !comp_short_le_keys(&ih->ih_key,
723*4882a593Smuzhiyun internal_key(tb->CFR[0],
724*4882a593Smuzhiyun tb->rkey[0])))
725*4882a593Smuzhiyun /*
726*4882a593Smuzhiyun * Directory must be in correct state here: that is
727*4882a593Smuzhiyun * somewhere at the left side should exist first
728*4882a593Smuzhiyun * directory item. But the item being deleted can
729*4882a593Smuzhiyun * not be that first one because its right neighbor
730*4882a593Smuzhiyun * is item of the same directory. (But first item
731*4882a593Smuzhiyun * always gets deleted in last turn). So, neighbors
732*4882a593Smuzhiyun * of deleted item can be merged, so we can save
733*4882a593Smuzhiyun * ih_size
734*4882a593Smuzhiyun */
735*4882a593Smuzhiyun if (is_direntry_le_ih(ih)) {
736*4882a593Smuzhiyun ih_size = IH_SIZE;
737*4882a593Smuzhiyun
738*4882a593Smuzhiyun /*
739*4882a593Smuzhiyun * we might check that left neighbor exists
740*4882a593Smuzhiyun * and is of the same directory
741*4882a593Smuzhiyun */
742*4882a593Smuzhiyun RFALSE(le_ih_k_offset(ih) == DOT_OFFSET,
743*4882a593Smuzhiyun "vs-8130: first directory item can not be removed until directory is not empty");
744*4882a593Smuzhiyun }
745*4882a593Smuzhiyun
746*4882a593Smuzhiyun }
747*4882a593Smuzhiyun
748*4882a593Smuzhiyun if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) {
749*4882a593Smuzhiyun set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1);
750*4882a593Smuzhiyun PROC_INFO_INC(tb->tb_sb, leaves_removable);
751*4882a593Smuzhiyun return 1;
752*4882a593Smuzhiyun }
753*4882a593Smuzhiyun return 0;
754*4882a593Smuzhiyun
755*4882a593Smuzhiyun }
756*4882a593Smuzhiyun
757*4882a593Smuzhiyun /* when we do not split item, lnum and rnum are numbers of entire items */
758*4882a593Smuzhiyun #define SET_PAR_SHIFT_LEFT \
759*4882a593Smuzhiyun if (h)\
760*4882a593Smuzhiyun {\
761*4882a593Smuzhiyun int to_l;\
762*4882a593Smuzhiyun \
763*4882a593Smuzhiyun to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\
764*4882a593Smuzhiyun (MAX_NR_KEY(Sh) + 1 - lpar);\
765*4882a593Smuzhiyun \
766*4882a593Smuzhiyun set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\
767*4882a593Smuzhiyun }\
768*4882a593Smuzhiyun else \
769*4882a593Smuzhiyun {\
770*4882a593Smuzhiyun if (lset==LEFT_SHIFT_FLOW)\
771*4882a593Smuzhiyun set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\
772*4882a593Smuzhiyun tb->lbytes, -1);\
773*4882a593Smuzhiyun else\
774*4882a593Smuzhiyun set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\
775*4882a593Smuzhiyun -1, -1);\
776*4882a593Smuzhiyun }
777*4882a593Smuzhiyun
778*4882a593Smuzhiyun #define SET_PAR_SHIFT_RIGHT \
779*4882a593Smuzhiyun if (h)\
780*4882a593Smuzhiyun {\
781*4882a593Smuzhiyun int to_r;\
782*4882a593Smuzhiyun \
783*4882a593Smuzhiyun to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\
784*4882a593Smuzhiyun \
785*4882a593Smuzhiyun set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\
786*4882a593Smuzhiyun }\
787*4882a593Smuzhiyun else \
788*4882a593Smuzhiyun {\
789*4882a593Smuzhiyun if (rset==RIGHT_SHIFT_FLOW)\
790*4882a593Smuzhiyun set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\
791*4882a593Smuzhiyun -1, tb->rbytes);\
792*4882a593Smuzhiyun else\
793*4882a593Smuzhiyun set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\
794*4882a593Smuzhiyun -1, -1);\
795*4882a593Smuzhiyun }
796*4882a593Smuzhiyun
free_buffers_in_tb(struct tree_balance * tb)797*4882a593Smuzhiyun static void free_buffers_in_tb(struct tree_balance *tb)
798*4882a593Smuzhiyun {
799*4882a593Smuzhiyun int i;
800*4882a593Smuzhiyun
801*4882a593Smuzhiyun pathrelse(tb->tb_path);
802*4882a593Smuzhiyun
803*4882a593Smuzhiyun for (i = 0; i < MAX_HEIGHT; i++) {
804*4882a593Smuzhiyun brelse(tb->L[i]);
805*4882a593Smuzhiyun brelse(tb->R[i]);
806*4882a593Smuzhiyun brelse(tb->FL[i]);
807*4882a593Smuzhiyun brelse(tb->FR[i]);
808*4882a593Smuzhiyun brelse(tb->CFL[i]);
809*4882a593Smuzhiyun brelse(tb->CFR[i]);
810*4882a593Smuzhiyun
811*4882a593Smuzhiyun tb->L[i] = NULL;
812*4882a593Smuzhiyun tb->R[i] = NULL;
813*4882a593Smuzhiyun tb->FL[i] = NULL;
814*4882a593Smuzhiyun tb->FR[i] = NULL;
815*4882a593Smuzhiyun tb->CFL[i] = NULL;
816*4882a593Smuzhiyun tb->CFR[i] = NULL;
817*4882a593Smuzhiyun }
818*4882a593Smuzhiyun }
819*4882a593Smuzhiyun
820*4882a593Smuzhiyun /*
821*4882a593Smuzhiyun * Get new buffers for storing new nodes that are created while balancing.
822*4882a593Smuzhiyun * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
823*4882a593Smuzhiyun * CARRY_ON - schedule didn't occur while the function worked;
824*4882a593Smuzhiyun * NO_DISK_SPACE - no disk space.
825*4882a593Smuzhiyun */
826*4882a593Smuzhiyun /* The function is NOT SCHEDULE-SAFE! */
get_empty_nodes(struct tree_balance * tb,int h)827*4882a593Smuzhiyun static int get_empty_nodes(struct tree_balance *tb, int h)
828*4882a593Smuzhiyun {
829*4882a593Smuzhiyun struct buffer_head *new_bh, *Sh = PATH_H_PBUFFER(tb->tb_path, h);
830*4882a593Smuzhiyun b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, };
831*4882a593Smuzhiyun int counter, number_of_freeblk;
832*4882a593Smuzhiyun int amount_needed; /* number of needed empty blocks */
833*4882a593Smuzhiyun int retval = CARRY_ON;
834*4882a593Smuzhiyun struct super_block *sb = tb->tb_sb;
835*4882a593Smuzhiyun
836*4882a593Smuzhiyun /*
837*4882a593Smuzhiyun * number_of_freeblk is the number of empty blocks which have been
838*4882a593Smuzhiyun * acquired for use by the balancing algorithm minus the number of
839*4882a593Smuzhiyun * empty blocks used in the previous levels of the analysis,
840*4882a593Smuzhiyun * number_of_freeblk = tb->cur_blknum can be non-zero if a schedule
841*4882a593Smuzhiyun * occurs after empty blocks are acquired, and the balancing analysis
842*4882a593Smuzhiyun * is then restarted, amount_needed is the number needed by this
843*4882a593Smuzhiyun * level (h) of the balancing analysis.
844*4882a593Smuzhiyun *
845*4882a593Smuzhiyun * Note that for systems with many processes writing, it would be
846*4882a593Smuzhiyun * more layout optimal to calculate the total number needed by all
847*4882a593Smuzhiyun * levels and then to run reiserfs_new_blocks to get all of them at
848*4882a593Smuzhiyun * once.
849*4882a593Smuzhiyun */
850*4882a593Smuzhiyun
851*4882a593Smuzhiyun /*
852*4882a593Smuzhiyun * Initiate number_of_freeblk to the amount acquired prior to the
853*4882a593Smuzhiyun * restart of the analysis or 0 if not restarted, then subtract the
854*4882a593Smuzhiyun * amount needed by all of the levels of the tree below h.
855*4882a593Smuzhiyun */
856*4882a593Smuzhiyun /* blknum includes S[h], so we subtract 1 in this calculation */
857*4882a593Smuzhiyun for (counter = 0, number_of_freeblk = tb->cur_blknum;
858*4882a593Smuzhiyun counter < h; counter++)
859*4882a593Smuzhiyun number_of_freeblk -=
860*4882a593Smuzhiyun (tb->blknum[counter]) ? (tb->blknum[counter] -
861*4882a593Smuzhiyun 1) : 0;
862*4882a593Smuzhiyun
863*4882a593Smuzhiyun /* Allocate missing empty blocks. */
864*4882a593Smuzhiyun /* if Sh == 0 then we are getting a new root */
865*4882a593Smuzhiyun amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1;
866*4882a593Smuzhiyun /*
867*4882a593Smuzhiyun * Amount_needed = the amount that we need more than the
868*4882a593Smuzhiyun * amount that we have.
869*4882a593Smuzhiyun */
870*4882a593Smuzhiyun if (amount_needed > number_of_freeblk)
871*4882a593Smuzhiyun amount_needed -= number_of_freeblk;
872*4882a593Smuzhiyun else /* If we have enough already then there is nothing to do. */
873*4882a593Smuzhiyun return CARRY_ON;
874*4882a593Smuzhiyun
875*4882a593Smuzhiyun /*
876*4882a593Smuzhiyun * No need to check quota - is not allocated for blocks used
877*4882a593Smuzhiyun * for formatted nodes
878*4882a593Smuzhiyun */
879*4882a593Smuzhiyun if (reiserfs_new_form_blocknrs(tb, blocknrs,
880*4882a593Smuzhiyun amount_needed) == NO_DISK_SPACE)
881*4882a593Smuzhiyun return NO_DISK_SPACE;
882*4882a593Smuzhiyun
883*4882a593Smuzhiyun /* for each blocknumber we just got, get a buffer and stick it on FEB */
884*4882a593Smuzhiyun for (blocknr = blocknrs, counter = 0;
885*4882a593Smuzhiyun counter < amount_needed; blocknr++, counter++) {
886*4882a593Smuzhiyun
887*4882a593Smuzhiyun RFALSE(!*blocknr,
888*4882a593Smuzhiyun "PAP-8135: reiserfs_new_blocknrs failed when got new blocks");
889*4882a593Smuzhiyun
890*4882a593Smuzhiyun new_bh = sb_getblk(sb, *blocknr);
891*4882a593Smuzhiyun RFALSE(buffer_dirty(new_bh) ||
892*4882a593Smuzhiyun buffer_journaled(new_bh) ||
893*4882a593Smuzhiyun buffer_journal_dirty(new_bh),
894*4882a593Smuzhiyun "PAP-8140: journaled or dirty buffer %b for the new block",
895*4882a593Smuzhiyun new_bh);
896*4882a593Smuzhiyun
897*4882a593Smuzhiyun /* Put empty buffers into the array. */
898*4882a593Smuzhiyun RFALSE(tb->FEB[tb->cur_blknum],
899*4882a593Smuzhiyun "PAP-8141: busy slot for new buffer");
900*4882a593Smuzhiyun
901*4882a593Smuzhiyun set_buffer_journal_new(new_bh);
902*4882a593Smuzhiyun tb->FEB[tb->cur_blknum++] = new_bh;
903*4882a593Smuzhiyun }
904*4882a593Smuzhiyun
905*4882a593Smuzhiyun if (retval == CARRY_ON && FILESYSTEM_CHANGED_TB(tb))
906*4882a593Smuzhiyun retval = REPEAT_SEARCH;
907*4882a593Smuzhiyun
908*4882a593Smuzhiyun return retval;
909*4882a593Smuzhiyun }
910*4882a593Smuzhiyun
911*4882a593Smuzhiyun /*
912*4882a593Smuzhiyun * Get free space of the left neighbor, which is stored in the parent
913*4882a593Smuzhiyun * node of the left neighbor.
914*4882a593Smuzhiyun */
get_lfree(struct tree_balance * tb,int h)915*4882a593Smuzhiyun static int get_lfree(struct tree_balance *tb, int h)
916*4882a593Smuzhiyun {
917*4882a593Smuzhiyun struct buffer_head *l, *f;
918*4882a593Smuzhiyun int order;
919*4882a593Smuzhiyun
920*4882a593Smuzhiyun if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL ||
921*4882a593Smuzhiyun (l = tb->FL[h]) == NULL)
922*4882a593Smuzhiyun return 0;
923*4882a593Smuzhiyun
924*4882a593Smuzhiyun if (f == l)
925*4882a593Smuzhiyun order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1;
926*4882a593Smuzhiyun else {
927*4882a593Smuzhiyun order = B_NR_ITEMS(l);
928*4882a593Smuzhiyun f = l;
929*4882a593Smuzhiyun }
930*4882a593Smuzhiyun
931*4882a593Smuzhiyun return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
932*4882a593Smuzhiyun }
933*4882a593Smuzhiyun
934*4882a593Smuzhiyun /*
935*4882a593Smuzhiyun * Get free space of the right neighbor,
936*4882a593Smuzhiyun * which is stored in the parent node of the right neighbor.
937*4882a593Smuzhiyun */
get_rfree(struct tree_balance * tb,int h)938*4882a593Smuzhiyun static int get_rfree(struct tree_balance *tb, int h)
939*4882a593Smuzhiyun {
940*4882a593Smuzhiyun struct buffer_head *r, *f;
941*4882a593Smuzhiyun int order;
942*4882a593Smuzhiyun
943*4882a593Smuzhiyun if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL ||
944*4882a593Smuzhiyun (r = tb->FR[h]) == NULL)
945*4882a593Smuzhiyun return 0;
946*4882a593Smuzhiyun
947*4882a593Smuzhiyun if (f == r)
948*4882a593Smuzhiyun order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1;
949*4882a593Smuzhiyun else {
950*4882a593Smuzhiyun order = 0;
951*4882a593Smuzhiyun f = r;
952*4882a593Smuzhiyun }
953*4882a593Smuzhiyun
954*4882a593Smuzhiyun return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
955*4882a593Smuzhiyun
956*4882a593Smuzhiyun }
957*4882a593Smuzhiyun
958*4882a593Smuzhiyun /* Check whether left neighbor is in memory. */
is_left_neighbor_in_cache(struct tree_balance * tb,int h)959*4882a593Smuzhiyun static int is_left_neighbor_in_cache(struct tree_balance *tb, int h)
960*4882a593Smuzhiyun {
961*4882a593Smuzhiyun struct buffer_head *father, *left;
962*4882a593Smuzhiyun struct super_block *sb = tb->tb_sb;
963*4882a593Smuzhiyun b_blocknr_t left_neighbor_blocknr;
964*4882a593Smuzhiyun int left_neighbor_position;
965*4882a593Smuzhiyun
966*4882a593Smuzhiyun /* Father of the left neighbor does not exist. */
967*4882a593Smuzhiyun if (!tb->FL[h])
968*4882a593Smuzhiyun return 0;
969*4882a593Smuzhiyun
970*4882a593Smuzhiyun /* Calculate father of the node to be balanced. */
971*4882a593Smuzhiyun father = PATH_H_PBUFFER(tb->tb_path, h + 1);
972*4882a593Smuzhiyun
973*4882a593Smuzhiyun RFALSE(!father ||
974*4882a593Smuzhiyun !B_IS_IN_TREE(father) ||
975*4882a593Smuzhiyun !B_IS_IN_TREE(tb->FL[h]) ||
976*4882a593Smuzhiyun !buffer_uptodate(father) ||
977*4882a593Smuzhiyun !buffer_uptodate(tb->FL[h]),
978*4882a593Smuzhiyun "vs-8165: F[h] (%b) or FL[h] (%b) is invalid",
979*4882a593Smuzhiyun father, tb->FL[h]);
980*4882a593Smuzhiyun
981*4882a593Smuzhiyun /*
982*4882a593Smuzhiyun * Get position of the pointer to the left neighbor
983*4882a593Smuzhiyun * into the left father.
984*4882a593Smuzhiyun */
985*4882a593Smuzhiyun left_neighbor_position = (father == tb->FL[h]) ?
986*4882a593Smuzhiyun tb->lkey[h] : B_NR_ITEMS(tb->FL[h]);
987*4882a593Smuzhiyun /* Get left neighbor block number. */
988*4882a593Smuzhiyun left_neighbor_blocknr =
989*4882a593Smuzhiyun B_N_CHILD_NUM(tb->FL[h], left_neighbor_position);
990*4882a593Smuzhiyun /* Look for the left neighbor in the cache. */
991*4882a593Smuzhiyun if ((left = sb_find_get_block(sb, left_neighbor_blocknr))) {
992*4882a593Smuzhiyun
993*4882a593Smuzhiyun RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left),
994*4882a593Smuzhiyun "vs-8170: left neighbor (%b %z) is not in the tree",
995*4882a593Smuzhiyun left, left);
996*4882a593Smuzhiyun put_bh(left);
997*4882a593Smuzhiyun return 1;
998*4882a593Smuzhiyun }
999*4882a593Smuzhiyun
1000*4882a593Smuzhiyun return 0;
1001*4882a593Smuzhiyun }
1002*4882a593Smuzhiyun
1003*4882a593Smuzhiyun #define LEFT_PARENTS 'l'
1004*4882a593Smuzhiyun #define RIGHT_PARENTS 'r'
1005*4882a593Smuzhiyun
decrement_key(struct cpu_key * key)1006*4882a593Smuzhiyun static void decrement_key(struct cpu_key *key)
1007*4882a593Smuzhiyun {
1008*4882a593Smuzhiyun /* call item specific function for this key */
1009*4882a593Smuzhiyun item_ops[cpu_key_k_type(key)]->decrement_key(key);
1010*4882a593Smuzhiyun }
1011*4882a593Smuzhiyun
1012*4882a593Smuzhiyun /*
1013*4882a593Smuzhiyun * Calculate far left/right parent of the left/right neighbor of the
1014*4882a593Smuzhiyun * current node, that is calculate the left/right (FL[h]/FR[h]) neighbor
1015*4882a593Smuzhiyun * of the parent F[h].
1016*4882a593Smuzhiyun * Calculate left/right common parent of the current node and L[h]/R[h].
1017*4882a593Smuzhiyun * Calculate left/right delimiting key position.
1018*4882a593Smuzhiyun * Returns: PATH_INCORRECT - path in the tree is not correct
1019*4882a593Smuzhiyun * SCHEDULE_OCCURRED - schedule occurred while the function worked
1020*4882a593Smuzhiyun * CARRY_ON - schedule didn't occur while the function
1021*4882a593Smuzhiyun * worked
1022*4882a593Smuzhiyun */
get_far_parent(struct tree_balance * tb,int h,struct buffer_head ** pfather,struct buffer_head ** pcom_father,char c_lr_par)1023*4882a593Smuzhiyun static int get_far_parent(struct tree_balance *tb,
1024*4882a593Smuzhiyun int h,
1025*4882a593Smuzhiyun struct buffer_head **pfather,
1026*4882a593Smuzhiyun struct buffer_head **pcom_father, char c_lr_par)
1027*4882a593Smuzhiyun {
1028*4882a593Smuzhiyun struct buffer_head *parent;
1029*4882a593Smuzhiyun INITIALIZE_PATH(s_path_to_neighbor_father);
1030*4882a593Smuzhiyun struct treepath *path = tb->tb_path;
1031*4882a593Smuzhiyun struct cpu_key s_lr_father_key;
1032*4882a593Smuzhiyun int counter,
1033*4882a593Smuzhiyun position = INT_MAX,
1034*4882a593Smuzhiyun first_last_position = 0,
1035*4882a593Smuzhiyun path_offset = PATH_H_PATH_OFFSET(path, h);
1036*4882a593Smuzhiyun
1037*4882a593Smuzhiyun /*
1038*4882a593Smuzhiyun * Starting from F[h] go upwards in the tree, and look for the common
1039*4882a593Smuzhiyun * ancestor of F[h], and its neighbor l/r, that should be obtained.
1040*4882a593Smuzhiyun */
1041*4882a593Smuzhiyun
1042*4882a593Smuzhiyun counter = path_offset;
1043*4882a593Smuzhiyun
1044*4882a593Smuzhiyun RFALSE(counter < FIRST_PATH_ELEMENT_OFFSET,
1045*4882a593Smuzhiyun "PAP-8180: invalid path length");
1046*4882a593Smuzhiyun
1047*4882a593Smuzhiyun for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) {
1048*4882a593Smuzhiyun /*
1049*4882a593Smuzhiyun * Check whether parent of the current buffer in the path
1050*4882a593Smuzhiyun * is really parent in the tree.
1051*4882a593Smuzhiyun */
1052*4882a593Smuzhiyun if (!B_IS_IN_TREE
1053*4882a593Smuzhiyun (parent = PATH_OFFSET_PBUFFER(path, counter - 1)))
1054*4882a593Smuzhiyun return REPEAT_SEARCH;
1055*4882a593Smuzhiyun
1056*4882a593Smuzhiyun /* Check whether position in the parent is correct. */
1057*4882a593Smuzhiyun if ((position =
1058*4882a593Smuzhiyun PATH_OFFSET_POSITION(path,
1059*4882a593Smuzhiyun counter - 1)) >
1060*4882a593Smuzhiyun B_NR_ITEMS(parent))
1061*4882a593Smuzhiyun return REPEAT_SEARCH;
1062*4882a593Smuzhiyun
1063*4882a593Smuzhiyun /*
1064*4882a593Smuzhiyun * Check whether parent at the path really points
1065*4882a593Smuzhiyun * to the child.
1066*4882a593Smuzhiyun */
1067*4882a593Smuzhiyun if (B_N_CHILD_NUM(parent, position) !=
1068*4882a593Smuzhiyun PATH_OFFSET_PBUFFER(path, counter)->b_blocknr)
1069*4882a593Smuzhiyun return REPEAT_SEARCH;
1070*4882a593Smuzhiyun
1071*4882a593Smuzhiyun /*
1072*4882a593Smuzhiyun * Return delimiting key if position in the parent is not
1073*4882a593Smuzhiyun * equal to first/last one.
1074*4882a593Smuzhiyun */
1075*4882a593Smuzhiyun if (c_lr_par == RIGHT_PARENTS)
1076*4882a593Smuzhiyun first_last_position = B_NR_ITEMS(parent);
1077*4882a593Smuzhiyun if (position != first_last_position) {
1078*4882a593Smuzhiyun *pcom_father = parent;
1079*4882a593Smuzhiyun get_bh(*pcom_father);
1080*4882a593Smuzhiyun /*(*pcom_father = parent)->b_count++; */
1081*4882a593Smuzhiyun break;
1082*4882a593Smuzhiyun }
1083*4882a593Smuzhiyun }
1084*4882a593Smuzhiyun
1085*4882a593Smuzhiyun /* if we are in the root of the tree, then there is no common father */
1086*4882a593Smuzhiyun if (counter == FIRST_PATH_ELEMENT_OFFSET) {
1087*4882a593Smuzhiyun /*
1088*4882a593Smuzhiyun * Check whether first buffer in the path is the
1089*4882a593Smuzhiyun * root of the tree.
1090*4882a593Smuzhiyun */
1091*4882a593Smuzhiyun if (PATH_OFFSET_PBUFFER
1092*4882a593Smuzhiyun (tb->tb_path,
1093*4882a593Smuzhiyun FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
1094*4882a593Smuzhiyun SB_ROOT_BLOCK(tb->tb_sb)) {
1095*4882a593Smuzhiyun *pfather = *pcom_father = NULL;
1096*4882a593Smuzhiyun return CARRY_ON;
1097*4882a593Smuzhiyun }
1098*4882a593Smuzhiyun return REPEAT_SEARCH;
1099*4882a593Smuzhiyun }
1100*4882a593Smuzhiyun
1101*4882a593Smuzhiyun RFALSE(B_LEVEL(*pcom_father) <= DISK_LEAF_NODE_LEVEL,
1102*4882a593Smuzhiyun "PAP-8185: (%b %z) level too small",
1103*4882a593Smuzhiyun *pcom_father, *pcom_father);
1104*4882a593Smuzhiyun
1105*4882a593Smuzhiyun /* Check whether the common parent is locked. */
1106*4882a593Smuzhiyun
1107*4882a593Smuzhiyun if (buffer_locked(*pcom_father)) {
1108*4882a593Smuzhiyun
1109*4882a593Smuzhiyun /* Release the write lock while the buffer is busy */
1110*4882a593Smuzhiyun int depth = reiserfs_write_unlock_nested(tb->tb_sb);
1111*4882a593Smuzhiyun __wait_on_buffer(*pcom_father);
1112*4882a593Smuzhiyun reiserfs_write_lock_nested(tb->tb_sb, depth);
1113*4882a593Smuzhiyun if (FILESYSTEM_CHANGED_TB(tb)) {
1114*4882a593Smuzhiyun brelse(*pcom_father);
1115*4882a593Smuzhiyun return REPEAT_SEARCH;
1116*4882a593Smuzhiyun }
1117*4882a593Smuzhiyun }
1118*4882a593Smuzhiyun
1119*4882a593Smuzhiyun /*
1120*4882a593Smuzhiyun * So, we got common parent of the current node and its
1121*4882a593Smuzhiyun * left/right neighbor. Now we are getting the parent of the
1122*4882a593Smuzhiyun * left/right neighbor.
1123*4882a593Smuzhiyun */
1124*4882a593Smuzhiyun
1125*4882a593Smuzhiyun /* Form key to get parent of the left/right neighbor. */
1126*4882a593Smuzhiyun le_key2cpu_key(&s_lr_father_key,
1127*4882a593Smuzhiyun internal_key(*pcom_father,
1128*4882a593Smuzhiyun (c_lr_par ==
1129*4882a593Smuzhiyun LEFT_PARENTS) ? (tb->lkey[h - 1] =
1130*4882a593Smuzhiyun position -
1131*4882a593Smuzhiyun 1) : (tb->rkey[h -
1132*4882a593Smuzhiyun 1] =
1133*4882a593Smuzhiyun position)));
1134*4882a593Smuzhiyun
1135*4882a593Smuzhiyun if (c_lr_par == LEFT_PARENTS)
1136*4882a593Smuzhiyun decrement_key(&s_lr_father_key);
1137*4882a593Smuzhiyun
1138*4882a593Smuzhiyun if (search_by_key
1139*4882a593Smuzhiyun (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father,
1140*4882a593Smuzhiyun h + 1) == IO_ERROR)
1141*4882a593Smuzhiyun /* path is released */
1142*4882a593Smuzhiyun return IO_ERROR;
1143*4882a593Smuzhiyun
1144*4882a593Smuzhiyun if (FILESYSTEM_CHANGED_TB(tb)) {
1145*4882a593Smuzhiyun pathrelse(&s_path_to_neighbor_father);
1146*4882a593Smuzhiyun brelse(*pcom_father);
1147*4882a593Smuzhiyun return REPEAT_SEARCH;
1148*4882a593Smuzhiyun }
1149*4882a593Smuzhiyun
1150*4882a593Smuzhiyun *pfather = PATH_PLAST_BUFFER(&s_path_to_neighbor_father);
1151*4882a593Smuzhiyun
1152*4882a593Smuzhiyun RFALSE(B_LEVEL(*pfather) != h + 1,
1153*4882a593Smuzhiyun "PAP-8190: (%b %z) level too small", *pfather, *pfather);
1154*4882a593Smuzhiyun RFALSE(s_path_to_neighbor_father.path_length <
1155*4882a593Smuzhiyun FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small");
1156*4882a593Smuzhiyun
1157*4882a593Smuzhiyun s_path_to_neighbor_father.path_length--;
1158*4882a593Smuzhiyun pathrelse(&s_path_to_neighbor_father);
1159*4882a593Smuzhiyun return CARRY_ON;
1160*4882a593Smuzhiyun }
1161*4882a593Smuzhiyun
1162*4882a593Smuzhiyun /*
1163*4882a593Smuzhiyun * Get parents of neighbors of node in the path(S[path_offset]) and
1164*4882a593Smuzhiyun * common parents of S[path_offset] and L[path_offset]/R[path_offset]:
1165*4882a593Smuzhiyun * F[path_offset], FL[path_offset], FR[path_offset], CFL[path_offset],
1166*4882a593Smuzhiyun * CFR[path_offset].
1167*4882a593Smuzhiyun * Calculate numbers of left and right delimiting keys position:
1168*4882a593Smuzhiyun * lkey[path_offset], rkey[path_offset].
1169*4882a593Smuzhiyun * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked
1170*4882a593Smuzhiyun * CARRY_ON - schedule didn't occur while the function worked
1171*4882a593Smuzhiyun */
get_parents(struct tree_balance * tb,int h)1172*4882a593Smuzhiyun static int get_parents(struct tree_balance *tb, int h)
1173*4882a593Smuzhiyun {
1174*4882a593Smuzhiyun struct treepath *path = tb->tb_path;
1175*4882a593Smuzhiyun int position,
1176*4882a593Smuzhiyun ret,
1177*4882a593Smuzhiyun path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h);
1178*4882a593Smuzhiyun struct buffer_head *curf, *curcf;
1179*4882a593Smuzhiyun
1180*4882a593Smuzhiyun /* Current node is the root of the tree or will be root of the tree */
1181*4882a593Smuzhiyun if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1182*4882a593Smuzhiyun /*
1183*4882a593Smuzhiyun * The root can not have parents.
1184*4882a593Smuzhiyun * Release nodes which previously were obtained as
1185*4882a593Smuzhiyun * parents of the current node neighbors.
1186*4882a593Smuzhiyun */
1187*4882a593Smuzhiyun brelse(tb->FL[h]);
1188*4882a593Smuzhiyun brelse(tb->CFL[h]);
1189*4882a593Smuzhiyun brelse(tb->FR[h]);
1190*4882a593Smuzhiyun brelse(tb->CFR[h]);
1191*4882a593Smuzhiyun tb->FL[h] = NULL;
1192*4882a593Smuzhiyun tb->CFL[h] = NULL;
1193*4882a593Smuzhiyun tb->FR[h] = NULL;
1194*4882a593Smuzhiyun tb->CFR[h] = NULL;
1195*4882a593Smuzhiyun return CARRY_ON;
1196*4882a593Smuzhiyun }
1197*4882a593Smuzhiyun
1198*4882a593Smuzhiyun /* Get parent FL[path_offset] of L[path_offset]. */
1199*4882a593Smuzhiyun position = PATH_OFFSET_POSITION(path, path_offset - 1);
1200*4882a593Smuzhiyun if (position) {
1201*4882a593Smuzhiyun /* Current node is not the first child of its parent. */
1202*4882a593Smuzhiyun curf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1203*4882a593Smuzhiyun curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1204*4882a593Smuzhiyun get_bh(curf);
1205*4882a593Smuzhiyun get_bh(curf);
1206*4882a593Smuzhiyun tb->lkey[h] = position - 1;
1207*4882a593Smuzhiyun } else {
1208*4882a593Smuzhiyun /*
1209*4882a593Smuzhiyun * Calculate current parent of L[path_offset], which is the
1210*4882a593Smuzhiyun * left neighbor of the current node. Calculate current
1211*4882a593Smuzhiyun * common parent of L[path_offset] and the current node.
1212*4882a593Smuzhiyun * Note that CFL[path_offset] not equal FL[path_offset] and
1213*4882a593Smuzhiyun * CFL[path_offset] not equal F[path_offset].
1214*4882a593Smuzhiyun * Calculate lkey[path_offset].
1215*4882a593Smuzhiyun */
1216*4882a593Smuzhiyun if ((ret = get_far_parent(tb, h + 1, &curf,
1217*4882a593Smuzhiyun &curcf,
1218*4882a593Smuzhiyun LEFT_PARENTS)) != CARRY_ON)
1219*4882a593Smuzhiyun return ret;
1220*4882a593Smuzhiyun }
1221*4882a593Smuzhiyun
1222*4882a593Smuzhiyun brelse(tb->FL[h]);
1223*4882a593Smuzhiyun tb->FL[h] = curf; /* New initialization of FL[h]. */
1224*4882a593Smuzhiyun brelse(tb->CFL[h]);
1225*4882a593Smuzhiyun tb->CFL[h] = curcf; /* New initialization of CFL[h]. */
1226*4882a593Smuzhiyun
1227*4882a593Smuzhiyun RFALSE((curf && !B_IS_IN_TREE(curf)) ||
1228*4882a593Smuzhiyun (curcf && !B_IS_IN_TREE(curcf)),
1229*4882a593Smuzhiyun "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf);
1230*4882a593Smuzhiyun
1231*4882a593Smuzhiyun /* Get parent FR[h] of R[h]. */
1232*4882a593Smuzhiyun
1233*4882a593Smuzhiyun /* Current node is the last child of F[h]. FR[h] != F[h]. */
1234*4882a593Smuzhiyun if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) {
1235*4882a593Smuzhiyun /*
1236*4882a593Smuzhiyun * Calculate current parent of R[h], which is the right
1237*4882a593Smuzhiyun * neighbor of F[h]. Calculate current common parent of
1238*4882a593Smuzhiyun * R[h] and current node. Note that CFR[h] not equal
1239*4882a593Smuzhiyun * FR[path_offset] and CFR[h] not equal F[h].
1240*4882a593Smuzhiyun */
1241*4882a593Smuzhiyun if ((ret =
1242*4882a593Smuzhiyun get_far_parent(tb, h + 1, &curf, &curcf,
1243*4882a593Smuzhiyun RIGHT_PARENTS)) != CARRY_ON)
1244*4882a593Smuzhiyun return ret;
1245*4882a593Smuzhiyun } else {
1246*4882a593Smuzhiyun /* Current node is not the last child of its parent F[h]. */
1247*4882a593Smuzhiyun curf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1248*4882a593Smuzhiyun curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1249*4882a593Smuzhiyun get_bh(curf);
1250*4882a593Smuzhiyun get_bh(curf);
1251*4882a593Smuzhiyun tb->rkey[h] = position;
1252*4882a593Smuzhiyun }
1253*4882a593Smuzhiyun
1254*4882a593Smuzhiyun brelse(tb->FR[h]);
1255*4882a593Smuzhiyun /* New initialization of FR[path_offset]. */
1256*4882a593Smuzhiyun tb->FR[h] = curf;
1257*4882a593Smuzhiyun
1258*4882a593Smuzhiyun brelse(tb->CFR[h]);
1259*4882a593Smuzhiyun /* New initialization of CFR[path_offset]. */
1260*4882a593Smuzhiyun tb->CFR[h] = curcf;
1261*4882a593Smuzhiyun
1262*4882a593Smuzhiyun RFALSE((curf && !B_IS_IN_TREE(curf)) ||
1263*4882a593Smuzhiyun (curcf && !B_IS_IN_TREE(curcf)),
1264*4882a593Smuzhiyun "PAP-8205: FR (%b) or CFR (%b) is invalid", curf, curcf);
1265*4882a593Smuzhiyun
1266*4882a593Smuzhiyun return CARRY_ON;
1267*4882a593Smuzhiyun }
1268*4882a593Smuzhiyun
1269*4882a593Smuzhiyun /*
1270*4882a593Smuzhiyun * it is possible to remove node as result of shiftings to
1271*4882a593Smuzhiyun * neighbors even when we insert or paste item.
1272*4882a593Smuzhiyun */
can_node_be_removed(int mode,int lfree,int sfree,int rfree,struct tree_balance * tb,int h)1273*4882a593Smuzhiyun static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1274*4882a593Smuzhiyun struct tree_balance *tb, int h)
1275*4882a593Smuzhiyun {
1276*4882a593Smuzhiyun struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h);
1277*4882a593Smuzhiyun int levbytes = tb->insert_size[h];
1278*4882a593Smuzhiyun struct item_head *ih;
1279*4882a593Smuzhiyun struct reiserfs_key *r_key = NULL;
1280*4882a593Smuzhiyun
1281*4882a593Smuzhiyun ih = item_head(Sh, 0);
1282*4882a593Smuzhiyun if (tb->CFR[h])
1283*4882a593Smuzhiyun r_key = internal_key(tb->CFR[h], tb->rkey[h]);
1284*4882a593Smuzhiyun
1285*4882a593Smuzhiyun if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes
1286*4882a593Smuzhiyun /* shifting may merge items which might save space */
1287*4882a593Smuzhiyun -
1288*4882a593Smuzhiyun ((!h
1289*4882a593Smuzhiyun && op_is_left_mergeable(&ih->ih_key, Sh->b_size)) ? IH_SIZE : 0)
1290*4882a593Smuzhiyun -
1291*4882a593Smuzhiyun ((!h && r_key
1292*4882a593Smuzhiyun && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0)
1293*4882a593Smuzhiyun + ((h) ? KEY_SIZE : 0)) {
1294*4882a593Smuzhiyun /* node can not be removed */
1295*4882a593Smuzhiyun if (sfree >= levbytes) {
1296*4882a593Smuzhiyun /* new item fits into node S[h] without any shifting */
1297*4882a593Smuzhiyun if (!h)
1298*4882a593Smuzhiyun tb->s0num =
1299*4882a593Smuzhiyun B_NR_ITEMS(Sh) +
1300*4882a593Smuzhiyun ((mode == M_INSERT) ? 1 : 0);
1301*4882a593Smuzhiyun set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1302*4882a593Smuzhiyun return NO_BALANCING_NEEDED;
1303*4882a593Smuzhiyun }
1304*4882a593Smuzhiyun }
1305*4882a593Smuzhiyun PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]);
1306*4882a593Smuzhiyun return !NO_BALANCING_NEEDED;
1307*4882a593Smuzhiyun }
1308*4882a593Smuzhiyun
1309*4882a593Smuzhiyun /*
1310*4882a593Smuzhiyun * Check whether current node S[h] is balanced when increasing its size by
1311*4882a593Smuzhiyun * Inserting or Pasting.
1312*4882a593Smuzhiyun * Calculate parameters for balancing for current level h.
1313*4882a593Smuzhiyun * Parameters:
1314*4882a593Smuzhiyun * tb tree_balance structure;
1315*4882a593Smuzhiyun * h current level of the node;
1316*4882a593Smuzhiyun * inum item number in S[h];
1317*4882a593Smuzhiyun * mode i - insert, p - paste;
1318*4882a593Smuzhiyun * Returns: 1 - schedule occurred;
1319*4882a593Smuzhiyun * 0 - balancing for higher levels needed;
1320*4882a593Smuzhiyun * -1 - no balancing for higher levels needed;
1321*4882a593Smuzhiyun * -2 - no disk space.
1322*4882a593Smuzhiyun */
1323*4882a593Smuzhiyun /* ip means Inserting or Pasting */
ip_check_balance(struct tree_balance * tb,int h)1324*4882a593Smuzhiyun static int ip_check_balance(struct tree_balance *tb, int h)
1325*4882a593Smuzhiyun {
1326*4882a593Smuzhiyun struct virtual_node *vn = tb->tb_vn;
1327*4882a593Smuzhiyun /*
1328*4882a593Smuzhiyun * Number of bytes that must be inserted into (value is negative
1329*4882a593Smuzhiyun * if bytes are deleted) buffer which contains node being balanced.
1330*4882a593Smuzhiyun * The mnemonic is that the attempted change in node space used
1331*4882a593Smuzhiyun * level is levbytes bytes.
1332*4882a593Smuzhiyun */
1333*4882a593Smuzhiyun int levbytes;
1334*4882a593Smuzhiyun int ret;
1335*4882a593Smuzhiyun
1336*4882a593Smuzhiyun int lfree, sfree, rfree /* free space in L, S and R */ ;
1337*4882a593Smuzhiyun
1338*4882a593Smuzhiyun /*
1339*4882a593Smuzhiyun * nver is short for number of vertixes, and lnver is the number if
1340*4882a593Smuzhiyun * we shift to the left, rnver is the number if we shift to the
1341*4882a593Smuzhiyun * right, and lrnver is the number if we shift in both directions.
1342*4882a593Smuzhiyun * The goal is to minimize first the number of vertixes, and second,
1343*4882a593Smuzhiyun * the number of vertixes whose contents are changed by shifting,
1344*4882a593Smuzhiyun * and third the number of uncached vertixes whose contents are
1345*4882a593Smuzhiyun * changed by shifting and must be read from disk.
1346*4882a593Smuzhiyun */
1347*4882a593Smuzhiyun int nver, lnver, rnver, lrnver;
1348*4882a593Smuzhiyun
1349*4882a593Smuzhiyun /*
1350*4882a593Smuzhiyun * used at leaf level only, S0 = S[0] is the node being balanced,
1351*4882a593Smuzhiyun * sInum [ I = 0,1,2 ] is the number of items that will
1352*4882a593Smuzhiyun * remain in node SI after balancing. S1 and S2 are new
1353*4882a593Smuzhiyun * nodes that might be created.
1354*4882a593Smuzhiyun */
1355*4882a593Smuzhiyun
1356*4882a593Smuzhiyun /*
1357*4882a593Smuzhiyun * we perform 8 calls to get_num_ver(). For each call we
1358*4882a593Smuzhiyun * calculate five parameters. where 4th parameter is s1bytes
1359*4882a593Smuzhiyun * and 5th - s2bytes
1360*4882a593Smuzhiyun *
1361*4882a593Smuzhiyun * s0num, s1num, s2num for 8 cases
1362*4882a593Smuzhiyun * 0,1 - do not shift and do not shift but bottle
1363*4882a593Smuzhiyun * 2 - shift only whole item to left
1364*4882a593Smuzhiyun * 3 - shift to left and bottle as much as possible
1365*4882a593Smuzhiyun * 4,5 - shift to right (whole items and as much as possible
1366*4882a593Smuzhiyun * 6,7 - shift to both directions (whole items and as much as possible)
1367*4882a593Smuzhiyun */
1368*4882a593Smuzhiyun short snum012[40] = { 0, };
1369*4882a593Smuzhiyun
1370*4882a593Smuzhiyun /* Sh is the node whose balance is currently being checked */
1371*4882a593Smuzhiyun struct buffer_head *Sh;
1372*4882a593Smuzhiyun
1373*4882a593Smuzhiyun Sh = PATH_H_PBUFFER(tb->tb_path, h);
1374*4882a593Smuzhiyun levbytes = tb->insert_size[h];
1375*4882a593Smuzhiyun
1376*4882a593Smuzhiyun /* Calculate balance parameters for creating new root. */
1377*4882a593Smuzhiyun if (!Sh) {
1378*4882a593Smuzhiyun if (!h)
1379*4882a593Smuzhiyun reiserfs_panic(tb->tb_sb, "vs-8210",
1380*4882a593Smuzhiyun "S[0] can not be 0");
1381*4882a593Smuzhiyun switch (ret = get_empty_nodes(tb, h)) {
1382*4882a593Smuzhiyun /* no balancing for higher levels needed */
1383*4882a593Smuzhiyun case CARRY_ON:
1384*4882a593Smuzhiyun set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1385*4882a593Smuzhiyun return NO_BALANCING_NEEDED;
1386*4882a593Smuzhiyun
1387*4882a593Smuzhiyun case NO_DISK_SPACE:
1388*4882a593Smuzhiyun case REPEAT_SEARCH:
1389*4882a593Smuzhiyun return ret;
1390*4882a593Smuzhiyun default:
1391*4882a593Smuzhiyun reiserfs_panic(tb->tb_sb, "vs-8215", "incorrect "
1392*4882a593Smuzhiyun "return value of get_empty_nodes");
1393*4882a593Smuzhiyun }
1394*4882a593Smuzhiyun }
1395*4882a593Smuzhiyun
1396*4882a593Smuzhiyun /* get parents of S[h] neighbors. */
1397*4882a593Smuzhiyun ret = get_parents(tb, h);
1398*4882a593Smuzhiyun if (ret != CARRY_ON)
1399*4882a593Smuzhiyun return ret;
1400*4882a593Smuzhiyun
1401*4882a593Smuzhiyun sfree = B_FREE_SPACE(Sh);
1402*4882a593Smuzhiyun
1403*4882a593Smuzhiyun /* get free space of neighbors */
1404*4882a593Smuzhiyun rfree = get_rfree(tb, h);
1405*4882a593Smuzhiyun lfree = get_lfree(tb, h);
1406*4882a593Smuzhiyun
1407*4882a593Smuzhiyun /* and new item fits into node S[h] without any shifting */
1408*4882a593Smuzhiyun if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) ==
1409*4882a593Smuzhiyun NO_BALANCING_NEEDED)
1410*4882a593Smuzhiyun return NO_BALANCING_NEEDED;
1411*4882a593Smuzhiyun
1412*4882a593Smuzhiyun create_virtual_node(tb, h);
1413*4882a593Smuzhiyun
1414*4882a593Smuzhiyun /*
1415*4882a593Smuzhiyun * determine maximal number of items we can shift to the left
1416*4882a593Smuzhiyun * neighbor (in tb structure) and the maximal number of bytes
1417*4882a593Smuzhiyun * that can flow to the left neighbor from the left most liquid
1418*4882a593Smuzhiyun * item that cannot be shifted from S[0] entirely (returned value)
1419*4882a593Smuzhiyun */
1420*4882a593Smuzhiyun check_left(tb, h, lfree);
1421*4882a593Smuzhiyun
1422*4882a593Smuzhiyun /*
1423*4882a593Smuzhiyun * determine maximal number of items we can shift to the right
1424*4882a593Smuzhiyun * neighbor (in tb structure) and the maximal number of bytes
1425*4882a593Smuzhiyun * that can flow to the right neighbor from the right most liquid
1426*4882a593Smuzhiyun * item that cannot be shifted from S[0] entirely (returned value)
1427*4882a593Smuzhiyun */
1428*4882a593Smuzhiyun check_right(tb, h, rfree);
1429*4882a593Smuzhiyun
1430*4882a593Smuzhiyun /*
1431*4882a593Smuzhiyun * all contents of internal node S[h] can be moved into its
1432*4882a593Smuzhiyun * neighbors, S[h] will be removed after balancing
1433*4882a593Smuzhiyun */
1434*4882a593Smuzhiyun if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
1435*4882a593Smuzhiyun int to_r;
1436*4882a593Smuzhiyun
1437*4882a593Smuzhiyun /*
1438*4882a593Smuzhiyun * Since we are working on internal nodes, and our internal
1439*4882a593Smuzhiyun * nodes have fixed size entries, then we can balance by the
1440*4882a593Smuzhiyun * number of items rather than the space they consume. In this
1441*4882a593Smuzhiyun * routine we set the left node equal to the right node,
1442*4882a593Smuzhiyun * allowing a difference of less than or equal to 1 child
1443*4882a593Smuzhiyun * pointer.
1444*4882a593Smuzhiyun */
1445*4882a593Smuzhiyun to_r =
1446*4882a593Smuzhiyun ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1447*4882a593Smuzhiyun vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1448*4882a593Smuzhiyun tb->rnum[h]);
1449*4882a593Smuzhiyun set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1450*4882a593Smuzhiyun -1, -1);
1451*4882a593Smuzhiyun return CARRY_ON;
1452*4882a593Smuzhiyun }
1453*4882a593Smuzhiyun
1454*4882a593Smuzhiyun /*
1455*4882a593Smuzhiyun * this checks balance condition, that any two neighboring nodes
1456*4882a593Smuzhiyun * can not fit in one node
1457*4882a593Smuzhiyun */
1458*4882a593Smuzhiyun RFALSE(h &&
1459*4882a593Smuzhiyun (tb->lnum[h] >= vn->vn_nr_item + 1 ||
1460*4882a593Smuzhiyun tb->rnum[h] >= vn->vn_nr_item + 1),
1461*4882a593Smuzhiyun "vs-8220: tree is not balanced on internal level");
1462*4882a593Smuzhiyun RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) ||
1463*4882a593Smuzhiyun (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))),
1464*4882a593Smuzhiyun "vs-8225: tree is not balanced on leaf level");
1465*4882a593Smuzhiyun
1466*4882a593Smuzhiyun /*
1467*4882a593Smuzhiyun * all contents of S[0] can be moved into its neighbors
1468*4882a593Smuzhiyun * S[0] will be removed after balancing.
1469*4882a593Smuzhiyun */
1470*4882a593Smuzhiyun if (!h && is_leaf_removable(tb))
1471*4882a593Smuzhiyun return CARRY_ON;
1472*4882a593Smuzhiyun
1473*4882a593Smuzhiyun /*
1474*4882a593Smuzhiyun * why do we perform this check here rather than earlier??
1475*4882a593Smuzhiyun * Answer: we can win 1 node in some cases above. Moreover we
1476*4882a593Smuzhiyun * checked it above, when we checked, that S[0] is not removable
1477*4882a593Smuzhiyun * in principle
1478*4882a593Smuzhiyun */
1479*4882a593Smuzhiyun
1480*4882a593Smuzhiyun /* new item fits into node S[h] without any shifting */
1481*4882a593Smuzhiyun if (sfree >= levbytes) {
1482*4882a593Smuzhiyun if (!h)
1483*4882a593Smuzhiyun tb->s0num = vn->vn_nr_item;
1484*4882a593Smuzhiyun set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1485*4882a593Smuzhiyun return NO_BALANCING_NEEDED;
1486*4882a593Smuzhiyun }
1487*4882a593Smuzhiyun
1488*4882a593Smuzhiyun {
1489*4882a593Smuzhiyun int lpar, rpar, nset, lset, rset, lrset;
1490*4882a593Smuzhiyun /* regular overflowing of the node */
1491*4882a593Smuzhiyun
1492*4882a593Smuzhiyun /*
1493*4882a593Smuzhiyun * get_num_ver works in 2 modes (FLOW & NO_FLOW)
1494*4882a593Smuzhiyun * lpar, rpar - number of items we can shift to left/right
1495*4882a593Smuzhiyun * neighbor (including splitting item)
1496*4882a593Smuzhiyun * nset, lset, rset, lrset - shows, whether flowing items
1497*4882a593Smuzhiyun * give better packing
1498*4882a593Smuzhiyun */
1499*4882a593Smuzhiyun #define FLOW 1
1500*4882a593Smuzhiyun #define NO_FLOW 0 /* do not any splitting */
1501*4882a593Smuzhiyun
1502*4882a593Smuzhiyun /* we choose one of the following */
1503*4882a593Smuzhiyun #define NOTHING_SHIFT_NO_FLOW 0
1504*4882a593Smuzhiyun #define NOTHING_SHIFT_FLOW 5
1505*4882a593Smuzhiyun #define LEFT_SHIFT_NO_FLOW 10
1506*4882a593Smuzhiyun #define LEFT_SHIFT_FLOW 15
1507*4882a593Smuzhiyun #define RIGHT_SHIFT_NO_FLOW 20
1508*4882a593Smuzhiyun #define RIGHT_SHIFT_FLOW 25
1509*4882a593Smuzhiyun #define LR_SHIFT_NO_FLOW 30
1510*4882a593Smuzhiyun #define LR_SHIFT_FLOW 35
1511*4882a593Smuzhiyun
1512*4882a593Smuzhiyun lpar = tb->lnum[h];
1513*4882a593Smuzhiyun rpar = tb->rnum[h];
1514*4882a593Smuzhiyun
1515*4882a593Smuzhiyun /*
1516*4882a593Smuzhiyun * calculate number of blocks S[h] must be split into when
1517*4882a593Smuzhiyun * nothing is shifted to the neighbors, as well as number of
1518*4882a593Smuzhiyun * items in each part of the split node (s012 numbers),
1519*4882a593Smuzhiyun * and number of bytes (s1bytes) of the shared drop which
1520*4882a593Smuzhiyun * flow to S1 if any
1521*4882a593Smuzhiyun */
1522*4882a593Smuzhiyun nset = NOTHING_SHIFT_NO_FLOW;
1523*4882a593Smuzhiyun nver = get_num_ver(vn->vn_mode, tb, h,
1524*4882a593Smuzhiyun 0, -1, h ? vn->vn_nr_item : 0, -1,
1525*4882a593Smuzhiyun snum012, NO_FLOW);
1526*4882a593Smuzhiyun
1527*4882a593Smuzhiyun if (!h) {
1528*4882a593Smuzhiyun int nver1;
1529*4882a593Smuzhiyun
1530*4882a593Smuzhiyun /*
1531*4882a593Smuzhiyun * note, that in this case we try to bottle
1532*4882a593Smuzhiyun * between S[0] and S1 (S1 - the first new node)
1533*4882a593Smuzhiyun */
1534*4882a593Smuzhiyun nver1 = get_num_ver(vn->vn_mode, tb, h,
1535*4882a593Smuzhiyun 0, -1, 0, -1,
1536*4882a593Smuzhiyun snum012 + NOTHING_SHIFT_FLOW, FLOW);
1537*4882a593Smuzhiyun if (nver > nver1)
1538*4882a593Smuzhiyun nset = NOTHING_SHIFT_FLOW, nver = nver1;
1539*4882a593Smuzhiyun }
1540*4882a593Smuzhiyun
1541*4882a593Smuzhiyun /*
1542*4882a593Smuzhiyun * calculate number of blocks S[h] must be split into when
1543*4882a593Smuzhiyun * l_shift_num first items and l_shift_bytes of the right
1544*4882a593Smuzhiyun * most liquid item to be shifted are shifted to the left
1545*4882a593Smuzhiyun * neighbor, as well as number of items in each part of the
1546*4882a593Smuzhiyun * splitted node (s012 numbers), and number of bytes
1547*4882a593Smuzhiyun * (s1bytes) of the shared drop which flow to S1 if any
1548*4882a593Smuzhiyun */
1549*4882a593Smuzhiyun lset = LEFT_SHIFT_NO_FLOW;
1550*4882a593Smuzhiyun lnver = get_num_ver(vn->vn_mode, tb, h,
1551*4882a593Smuzhiyun lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1552*4882a593Smuzhiyun -1, h ? vn->vn_nr_item : 0, -1,
1553*4882a593Smuzhiyun snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW);
1554*4882a593Smuzhiyun if (!h) {
1555*4882a593Smuzhiyun int lnver1;
1556*4882a593Smuzhiyun
1557*4882a593Smuzhiyun lnver1 = get_num_ver(vn->vn_mode, tb, h,
1558*4882a593Smuzhiyun lpar -
1559*4882a593Smuzhiyun ((tb->lbytes != -1) ? 1 : 0),
1560*4882a593Smuzhiyun tb->lbytes, 0, -1,
1561*4882a593Smuzhiyun snum012 + LEFT_SHIFT_FLOW, FLOW);
1562*4882a593Smuzhiyun if (lnver > lnver1)
1563*4882a593Smuzhiyun lset = LEFT_SHIFT_FLOW, lnver = lnver1;
1564*4882a593Smuzhiyun }
1565*4882a593Smuzhiyun
1566*4882a593Smuzhiyun /*
1567*4882a593Smuzhiyun * calculate number of blocks S[h] must be split into when
1568*4882a593Smuzhiyun * r_shift_num first items and r_shift_bytes of the left most
1569*4882a593Smuzhiyun * liquid item to be shifted are shifted to the right neighbor,
1570*4882a593Smuzhiyun * as well as number of items in each part of the splitted
1571*4882a593Smuzhiyun * node (s012 numbers), and number of bytes (s1bytes) of the
1572*4882a593Smuzhiyun * shared drop which flow to S1 if any
1573*4882a593Smuzhiyun */
1574*4882a593Smuzhiyun rset = RIGHT_SHIFT_NO_FLOW;
1575*4882a593Smuzhiyun rnver = get_num_ver(vn->vn_mode, tb, h,
1576*4882a593Smuzhiyun 0, -1,
1577*4882a593Smuzhiyun h ? (vn->vn_nr_item - rpar) : (rpar -
1578*4882a593Smuzhiyun ((tb->
1579*4882a593Smuzhiyun rbytes !=
1580*4882a593Smuzhiyun -1) ? 1 :
1581*4882a593Smuzhiyun 0)), -1,
1582*4882a593Smuzhiyun snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW);
1583*4882a593Smuzhiyun if (!h) {
1584*4882a593Smuzhiyun int rnver1;
1585*4882a593Smuzhiyun
1586*4882a593Smuzhiyun rnver1 = get_num_ver(vn->vn_mode, tb, h,
1587*4882a593Smuzhiyun 0, -1,
1588*4882a593Smuzhiyun (rpar -
1589*4882a593Smuzhiyun ((tb->rbytes != -1) ? 1 : 0)),
1590*4882a593Smuzhiyun tb->rbytes,
1591*4882a593Smuzhiyun snum012 + RIGHT_SHIFT_FLOW, FLOW);
1592*4882a593Smuzhiyun
1593*4882a593Smuzhiyun if (rnver > rnver1)
1594*4882a593Smuzhiyun rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
1595*4882a593Smuzhiyun }
1596*4882a593Smuzhiyun
1597*4882a593Smuzhiyun /*
1598*4882a593Smuzhiyun * calculate number of blocks S[h] must be split into when
1599*4882a593Smuzhiyun * items are shifted in both directions, as well as number
1600*4882a593Smuzhiyun * of items in each part of the splitted node (s012 numbers),
1601*4882a593Smuzhiyun * and number of bytes (s1bytes) of the shared drop which
1602*4882a593Smuzhiyun * flow to S1 if any
1603*4882a593Smuzhiyun */
1604*4882a593Smuzhiyun lrset = LR_SHIFT_NO_FLOW;
1605*4882a593Smuzhiyun lrnver = get_num_ver(vn->vn_mode, tb, h,
1606*4882a593Smuzhiyun lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1607*4882a593Smuzhiyun -1,
1608*4882a593Smuzhiyun h ? (vn->vn_nr_item - rpar) : (rpar -
1609*4882a593Smuzhiyun ((tb->
1610*4882a593Smuzhiyun rbytes !=
1611*4882a593Smuzhiyun -1) ? 1 :
1612*4882a593Smuzhiyun 0)), -1,
1613*4882a593Smuzhiyun snum012 + LR_SHIFT_NO_FLOW, NO_FLOW);
1614*4882a593Smuzhiyun if (!h) {
1615*4882a593Smuzhiyun int lrnver1;
1616*4882a593Smuzhiyun
1617*4882a593Smuzhiyun lrnver1 = get_num_ver(vn->vn_mode, tb, h,
1618*4882a593Smuzhiyun lpar -
1619*4882a593Smuzhiyun ((tb->lbytes != -1) ? 1 : 0),
1620*4882a593Smuzhiyun tb->lbytes,
1621*4882a593Smuzhiyun (rpar -
1622*4882a593Smuzhiyun ((tb->rbytes != -1) ? 1 : 0)),
1623*4882a593Smuzhiyun tb->rbytes,
1624*4882a593Smuzhiyun snum012 + LR_SHIFT_FLOW, FLOW);
1625*4882a593Smuzhiyun if (lrnver > lrnver1)
1626*4882a593Smuzhiyun lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
1627*4882a593Smuzhiyun }
1628*4882a593Smuzhiyun
1629*4882a593Smuzhiyun /*
1630*4882a593Smuzhiyun * Our general shifting strategy is:
1631*4882a593Smuzhiyun * 1) to minimized number of new nodes;
1632*4882a593Smuzhiyun * 2) to minimized number of neighbors involved in shifting;
1633*4882a593Smuzhiyun * 3) to minimized number of disk reads;
1634*4882a593Smuzhiyun */
1635*4882a593Smuzhiyun
1636*4882a593Smuzhiyun /* we can win TWO or ONE nodes by shifting in both directions */
1637*4882a593Smuzhiyun if (lrnver < lnver && lrnver < rnver) {
1638*4882a593Smuzhiyun RFALSE(h &&
1639*4882a593Smuzhiyun (tb->lnum[h] != 1 ||
1640*4882a593Smuzhiyun tb->rnum[h] != 1 ||
1641*4882a593Smuzhiyun lrnver != 1 || rnver != 2 || lnver != 2
1642*4882a593Smuzhiyun || h != 1), "vs-8230: bad h");
1643*4882a593Smuzhiyun if (lrset == LR_SHIFT_FLOW)
1644*4882a593Smuzhiyun set_parameters(tb, h, tb->lnum[h], tb->rnum[h],
1645*4882a593Smuzhiyun lrnver, snum012 + lrset,
1646*4882a593Smuzhiyun tb->lbytes, tb->rbytes);
1647*4882a593Smuzhiyun else
1648*4882a593Smuzhiyun set_parameters(tb, h,
1649*4882a593Smuzhiyun tb->lnum[h] -
1650*4882a593Smuzhiyun ((tb->lbytes == -1) ? 0 : 1),
1651*4882a593Smuzhiyun tb->rnum[h] -
1652*4882a593Smuzhiyun ((tb->rbytes == -1) ? 0 : 1),
1653*4882a593Smuzhiyun lrnver, snum012 + lrset, -1, -1);
1654*4882a593Smuzhiyun
1655*4882a593Smuzhiyun return CARRY_ON;
1656*4882a593Smuzhiyun }
1657*4882a593Smuzhiyun
1658*4882a593Smuzhiyun /*
1659*4882a593Smuzhiyun * if shifting doesn't lead to better packing
1660*4882a593Smuzhiyun * then don't shift
1661*4882a593Smuzhiyun */
1662*4882a593Smuzhiyun if (nver == lrnver) {
1663*4882a593Smuzhiyun set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1,
1664*4882a593Smuzhiyun -1);
1665*4882a593Smuzhiyun return CARRY_ON;
1666*4882a593Smuzhiyun }
1667*4882a593Smuzhiyun
1668*4882a593Smuzhiyun /*
1669*4882a593Smuzhiyun * now we know that for better packing shifting in only one
1670*4882a593Smuzhiyun * direction either to the left or to the right is required
1671*4882a593Smuzhiyun */
1672*4882a593Smuzhiyun
1673*4882a593Smuzhiyun /*
1674*4882a593Smuzhiyun * if shifting to the left is better than
1675*4882a593Smuzhiyun * shifting to the right
1676*4882a593Smuzhiyun */
1677*4882a593Smuzhiyun if (lnver < rnver) {
1678*4882a593Smuzhiyun SET_PAR_SHIFT_LEFT;
1679*4882a593Smuzhiyun return CARRY_ON;
1680*4882a593Smuzhiyun }
1681*4882a593Smuzhiyun
1682*4882a593Smuzhiyun /*
1683*4882a593Smuzhiyun * if shifting to the right is better than
1684*4882a593Smuzhiyun * shifting to the left
1685*4882a593Smuzhiyun */
1686*4882a593Smuzhiyun if (lnver > rnver) {
1687*4882a593Smuzhiyun SET_PAR_SHIFT_RIGHT;
1688*4882a593Smuzhiyun return CARRY_ON;
1689*4882a593Smuzhiyun }
1690*4882a593Smuzhiyun
1691*4882a593Smuzhiyun /*
1692*4882a593Smuzhiyun * now shifting in either direction gives the same number
1693*4882a593Smuzhiyun * of nodes and we can make use of the cached neighbors
1694*4882a593Smuzhiyun */
1695*4882a593Smuzhiyun if (is_left_neighbor_in_cache(tb, h)) {
1696*4882a593Smuzhiyun SET_PAR_SHIFT_LEFT;
1697*4882a593Smuzhiyun return CARRY_ON;
1698*4882a593Smuzhiyun }
1699*4882a593Smuzhiyun
1700*4882a593Smuzhiyun /*
1701*4882a593Smuzhiyun * shift to the right independently on whether the
1702*4882a593Smuzhiyun * right neighbor in cache or not
1703*4882a593Smuzhiyun */
1704*4882a593Smuzhiyun SET_PAR_SHIFT_RIGHT;
1705*4882a593Smuzhiyun return CARRY_ON;
1706*4882a593Smuzhiyun }
1707*4882a593Smuzhiyun }
1708*4882a593Smuzhiyun
1709*4882a593Smuzhiyun /*
1710*4882a593Smuzhiyun * Check whether current node S[h] is balanced when Decreasing its size by
1711*4882a593Smuzhiyun * Deleting or Cutting for INTERNAL node of S+tree.
1712*4882a593Smuzhiyun * Calculate parameters for balancing for current level h.
1713*4882a593Smuzhiyun * Parameters:
1714*4882a593Smuzhiyun * tb tree_balance structure;
1715*4882a593Smuzhiyun * h current level of the node;
1716*4882a593Smuzhiyun * inum item number in S[h];
1717*4882a593Smuzhiyun * mode i - insert, p - paste;
1718*4882a593Smuzhiyun * Returns: 1 - schedule occurred;
1719*4882a593Smuzhiyun * 0 - balancing for higher levels needed;
1720*4882a593Smuzhiyun * -1 - no balancing for higher levels needed;
1721*4882a593Smuzhiyun * -2 - no disk space.
1722*4882a593Smuzhiyun *
1723*4882a593Smuzhiyun * Note: Items of internal nodes have fixed size, so the balance condition for
1724*4882a593Smuzhiyun * the internal part of S+tree is as for the B-trees.
1725*4882a593Smuzhiyun */
dc_check_balance_internal(struct tree_balance * tb,int h)1726*4882a593Smuzhiyun static int dc_check_balance_internal(struct tree_balance *tb, int h)
1727*4882a593Smuzhiyun {
1728*4882a593Smuzhiyun struct virtual_node *vn = tb->tb_vn;
1729*4882a593Smuzhiyun
1730*4882a593Smuzhiyun /*
1731*4882a593Smuzhiyun * Sh is the node whose balance is currently being checked,
1732*4882a593Smuzhiyun * and Fh is its father.
1733*4882a593Smuzhiyun */
1734*4882a593Smuzhiyun struct buffer_head *Sh, *Fh;
1735*4882a593Smuzhiyun int ret;
1736*4882a593Smuzhiyun int lfree, rfree /* free space in L and R */ ;
1737*4882a593Smuzhiyun
1738*4882a593Smuzhiyun Sh = PATH_H_PBUFFER(tb->tb_path, h);
1739*4882a593Smuzhiyun Fh = PATH_H_PPARENT(tb->tb_path, h);
1740*4882a593Smuzhiyun
1741*4882a593Smuzhiyun /*
1742*4882a593Smuzhiyun * using tb->insert_size[h], which is negative in this case,
1743*4882a593Smuzhiyun * create_virtual_node calculates:
1744*4882a593Smuzhiyun * new_nr_item = number of items node would have if operation is
1745*4882a593Smuzhiyun * performed without balancing (new_nr_item);
1746*4882a593Smuzhiyun */
1747*4882a593Smuzhiyun create_virtual_node(tb, h);
1748*4882a593Smuzhiyun
1749*4882a593Smuzhiyun if (!Fh) { /* S[h] is the root. */
1750*4882a593Smuzhiyun /* no balancing for higher levels needed */
1751*4882a593Smuzhiyun if (vn->vn_nr_item > 0) {
1752*4882a593Smuzhiyun set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1753*4882a593Smuzhiyun return NO_BALANCING_NEEDED;
1754*4882a593Smuzhiyun }
1755*4882a593Smuzhiyun /*
1756*4882a593Smuzhiyun * new_nr_item == 0.
1757*4882a593Smuzhiyun * Current root will be deleted resulting in
1758*4882a593Smuzhiyun * decrementing the tree height.
1759*4882a593Smuzhiyun */
1760*4882a593Smuzhiyun set_parameters(tb, h, 0, 0, 0, NULL, -1, -1);
1761*4882a593Smuzhiyun return CARRY_ON;
1762*4882a593Smuzhiyun }
1763*4882a593Smuzhiyun
1764*4882a593Smuzhiyun if ((ret = get_parents(tb, h)) != CARRY_ON)
1765*4882a593Smuzhiyun return ret;
1766*4882a593Smuzhiyun
1767*4882a593Smuzhiyun /* get free space of neighbors */
1768*4882a593Smuzhiyun rfree = get_rfree(tb, h);
1769*4882a593Smuzhiyun lfree = get_lfree(tb, h);
1770*4882a593Smuzhiyun
1771*4882a593Smuzhiyun /* determine maximal number of items we can fit into neighbors */
1772*4882a593Smuzhiyun check_left(tb, h, lfree);
1773*4882a593Smuzhiyun check_right(tb, h, rfree);
1774*4882a593Smuzhiyun
1775*4882a593Smuzhiyun /*
1776*4882a593Smuzhiyun * Balance condition for the internal node is valid.
1777*4882a593Smuzhiyun * In this case we balance only if it leads to better packing.
1778*4882a593Smuzhiyun */
1779*4882a593Smuzhiyun if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) {
1780*4882a593Smuzhiyun /*
1781*4882a593Smuzhiyun * Here we join S[h] with one of its neighbors,
1782*4882a593Smuzhiyun * which is impossible with greater values of new_nr_item.
1783*4882a593Smuzhiyun */
1784*4882a593Smuzhiyun if (vn->vn_nr_item == MIN_NR_KEY(Sh)) {
1785*4882a593Smuzhiyun /* All contents of S[h] can be moved to L[h]. */
1786*4882a593Smuzhiyun if (tb->lnum[h] >= vn->vn_nr_item + 1) {
1787*4882a593Smuzhiyun int n;
1788*4882a593Smuzhiyun int order_L;
1789*4882a593Smuzhiyun
1790*4882a593Smuzhiyun order_L =
1791*4882a593Smuzhiyun ((n =
1792*4882a593Smuzhiyun PATH_H_B_ITEM_ORDER(tb->tb_path,
1793*4882a593Smuzhiyun h)) ==
1794*4882a593Smuzhiyun 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1795*4882a593Smuzhiyun n = dc_size(B_N_CHILD(tb->FL[h], order_L)) /
1796*4882a593Smuzhiyun (DC_SIZE + KEY_SIZE);
1797*4882a593Smuzhiyun set_parameters(tb, h, -n - 1, 0, 0, NULL, -1,
1798*4882a593Smuzhiyun -1);
1799*4882a593Smuzhiyun return CARRY_ON;
1800*4882a593Smuzhiyun }
1801*4882a593Smuzhiyun
1802*4882a593Smuzhiyun /* All contents of S[h] can be moved to R[h]. */
1803*4882a593Smuzhiyun if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1804*4882a593Smuzhiyun int n;
1805*4882a593Smuzhiyun int order_R;
1806*4882a593Smuzhiyun
1807*4882a593Smuzhiyun order_R =
1808*4882a593Smuzhiyun ((n =
1809*4882a593Smuzhiyun PATH_H_B_ITEM_ORDER(tb->tb_path,
1810*4882a593Smuzhiyun h)) ==
1811*4882a593Smuzhiyun B_NR_ITEMS(Fh)) ? 0 : n + 1;
1812*4882a593Smuzhiyun n = dc_size(B_N_CHILD(tb->FR[h], order_R)) /
1813*4882a593Smuzhiyun (DC_SIZE + KEY_SIZE);
1814*4882a593Smuzhiyun set_parameters(tb, h, 0, -n - 1, 0, NULL, -1,
1815*4882a593Smuzhiyun -1);
1816*4882a593Smuzhiyun return CARRY_ON;
1817*4882a593Smuzhiyun }
1818*4882a593Smuzhiyun }
1819*4882a593Smuzhiyun
1820*4882a593Smuzhiyun /*
1821*4882a593Smuzhiyun * All contents of S[h] can be moved to the neighbors
1822*4882a593Smuzhiyun * (L[h] & R[h]).
1823*4882a593Smuzhiyun */
1824*4882a593Smuzhiyun if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1825*4882a593Smuzhiyun int to_r;
1826*4882a593Smuzhiyun
1827*4882a593Smuzhiyun to_r =
1828*4882a593Smuzhiyun ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] -
1829*4882a593Smuzhiyun tb->rnum[h] + vn->vn_nr_item + 1) / 2 -
1830*4882a593Smuzhiyun (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1831*4882a593Smuzhiyun set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r,
1832*4882a593Smuzhiyun 0, NULL, -1, -1);
1833*4882a593Smuzhiyun return CARRY_ON;
1834*4882a593Smuzhiyun }
1835*4882a593Smuzhiyun
1836*4882a593Smuzhiyun /* Balancing does not lead to better packing. */
1837*4882a593Smuzhiyun set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1838*4882a593Smuzhiyun return NO_BALANCING_NEEDED;
1839*4882a593Smuzhiyun }
1840*4882a593Smuzhiyun
1841*4882a593Smuzhiyun /*
1842*4882a593Smuzhiyun * Current node contain insufficient number of items.
1843*4882a593Smuzhiyun * Balancing is required.
1844*4882a593Smuzhiyun */
1845*4882a593Smuzhiyun /* Check whether we can merge S[h] with left neighbor. */
1846*4882a593Smuzhiyun if (tb->lnum[h] >= vn->vn_nr_item + 1)
1847*4882a593Smuzhiyun if (is_left_neighbor_in_cache(tb, h)
1848*4882a593Smuzhiyun || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) {
1849*4882a593Smuzhiyun int n;
1850*4882a593Smuzhiyun int order_L;
1851*4882a593Smuzhiyun
1852*4882a593Smuzhiyun order_L =
1853*4882a593Smuzhiyun ((n =
1854*4882a593Smuzhiyun PATH_H_B_ITEM_ORDER(tb->tb_path,
1855*4882a593Smuzhiyun h)) ==
1856*4882a593Smuzhiyun 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1857*4882a593Smuzhiyun n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE +
1858*4882a593Smuzhiyun KEY_SIZE);
1859*4882a593Smuzhiyun set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1);
1860*4882a593Smuzhiyun return CARRY_ON;
1861*4882a593Smuzhiyun }
1862*4882a593Smuzhiyun
1863*4882a593Smuzhiyun /* Check whether we can merge S[h] with right neighbor. */
1864*4882a593Smuzhiyun if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1865*4882a593Smuzhiyun int n;
1866*4882a593Smuzhiyun int order_R;
1867*4882a593Smuzhiyun
1868*4882a593Smuzhiyun order_R =
1869*4882a593Smuzhiyun ((n =
1870*4882a593Smuzhiyun PATH_H_B_ITEM_ORDER(tb->tb_path,
1871*4882a593Smuzhiyun h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1);
1872*4882a593Smuzhiyun n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE +
1873*4882a593Smuzhiyun KEY_SIZE);
1874*4882a593Smuzhiyun set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1);
1875*4882a593Smuzhiyun return CARRY_ON;
1876*4882a593Smuzhiyun }
1877*4882a593Smuzhiyun
1878*4882a593Smuzhiyun /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1879*4882a593Smuzhiyun if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1880*4882a593Smuzhiyun int to_r;
1881*4882a593Smuzhiyun
1882*4882a593Smuzhiyun to_r =
1883*4882a593Smuzhiyun ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1884*4882a593Smuzhiyun vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1885*4882a593Smuzhiyun tb->rnum[h]);
1886*4882a593Smuzhiyun set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1887*4882a593Smuzhiyun -1, -1);
1888*4882a593Smuzhiyun return CARRY_ON;
1889*4882a593Smuzhiyun }
1890*4882a593Smuzhiyun
1891*4882a593Smuzhiyun /* For internal nodes try to borrow item from a neighbor */
1892*4882a593Smuzhiyun RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root");
1893*4882a593Smuzhiyun
1894*4882a593Smuzhiyun /* Borrow one or two items from caching neighbor */
1895*4882a593Smuzhiyun if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) {
1896*4882a593Smuzhiyun int from_l;
1897*4882a593Smuzhiyun
1898*4882a593Smuzhiyun from_l =
1899*4882a593Smuzhiyun (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item +
1900*4882a593Smuzhiyun 1) / 2 - (vn->vn_nr_item + 1);
1901*4882a593Smuzhiyun set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1);
1902*4882a593Smuzhiyun return CARRY_ON;
1903*4882a593Smuzhiyun }
1904*4882a593Smuzhiyun
1905*4882a593Smuzhiyun set_parameters(tb, h, 0,
1906*4882a593Smuzhiyun -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item +
1907*4882a593Smuzhiyun 1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1);
1908*4882a593Smuzhiyun return CARRY_ON;
1909*4882a593Smuzhiyun }
1910*4882a593Smuzhiyun
1911*4882a593Smuzhiyun /*
1912*4882a593Smuzhiyun * Check whether current node S[h] is balanced when Decreasing its size by
1913*4882a593Smuzhiyun * Deleting or Truncating for LEAF node of S+tree.
1914*4882a593Smuzhiyun * Calculate parameters for balancing for current level h.
1915*4882a593Smuzhiyun * Parameters:
1916*4882a593Smuzhiyun * tb tree_balance structure;
1917*4882a593Smuzhiyun * h current level of the node;
1918*4882a593Smuzhiyun * inum item number in S[h];
1919*4882a593Smuzhiyun * mode i - insert, p - paste;
1920*4882a593Smuzhiyun * Returns: 1 - schedule occurred;
1921*4882a593Smuzhiyun * 0 - balancing for higher levels needed;
1922*4882a593Smuzhiyun * -1 - no balancing for higher levels needed;
1923*4882a593Smuzhiyun * -2 - no disk space.
1924*4882a593Smuzhiyun */
dc_check_balance_leaf(struct tree_balance * tb,int h)1925*4882a593Smuzhiyun static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1926*4882a593Smuzhiyun {
1927*4882a593Smuzhiyun struct virtual_node *vn = tb->tb_vn;
1928*4882a593Smuzhiyun
1929*4882a593Smuzhiyun /*
1930*4882a593Smuzhiyun * Number of bytes that must be deleted from
1931*4882a593Smuzhiyun * (value is negative if bytes are deleted) buffer which
1932*4882a593Smuzhiyun * contains node being balanced. The mnemonic is that the
1933*4882a593Smuzhiyun * attempted change in node space used level is levbytes bytes.
1934*4882a593Smuzhiyun */
1935*4882a593Smuzhiyun int levbytes;
1936*4882a593Smuzhiyun
1937*4882a593Smuzhiyun /* the maximal item size */
1938*4882a593Smuzhiyun int maxsize, ret;
1939*4882a593Smuzhiyun
1940*4882a593Smuzhiyun /*
1941*4882a593Smuzhiyun * S0 is the node whose balance is currently being checked,
1942*4882a593Smuzhiyun * and F0 is its father.
1943*4882a593Smuzhiyun */
1944*4882a593Smuzhiyun struct buffer_head *S0, *F0;
1945*4882a593Smuzhiyun int lfree, rfree /* free space in L and R */ ;
1946*4882a593Smuzhiyun
1947*4882a593Smuzhiyun S0 = PATH_H_PBUFFER(tb->tb_path, 0);
1948*4882a593Smuzhiyun F0 = PATH_H_PPARENT(tb->tb_path, 0);
1949*4882a593Smuzhiyun
1950*4882a593Smuzhiyun levbytes = tb->insert_size[h];
1951*4882a593Smuzhiyun
1952*4882a593Smuzhiyun maxsize = MAX_CHILD_SIZE(S0); /* maximal possible size of an item */
1953*4882a593Smuzhiyun
1954*4882a593Smuzhiyun if (!F0) { /* S[0] is the root now. */
1955*4882a593Smuzhiyun
1956*4882a593Smuzhiyun RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0),
1957*4882a593Smuzhiyun "vs-8240: attempt to create empty buffer tree");
1958*4882a593Smuzhiyun
1959*4882a593Smuzhiyun set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1960*4882a593Smuzhiyun return NO_BALANCING_NEEDED;
1961*4882a593Smuzhiyun }
1962*4882a593Smuzhiyun
1963*4882a593Smuzhiyun if ((ret = get_parents(tb, h)) != CARRY_ON)
1964*4882a593Smuzhiyun return ret;
1965*4882a593Smuzhiyun
1966*4882a593Smuzhiyun /* get free space of neighbors */
1967*4882a593Smuzhiyun rfree = get_rfree(tb, h);
1968*4882a593Smuzhiyun lfree = get_lfree(tb, h);
1969*4882a593Smuzhiyun
1970*4882a593Smuzhiyun create_virtual_node(tb, h);
1971*4882a593Smuzhiyun
1972*4882a593Smuzhiyun /* if 3 leaves can be merge to one, set parameters and return */
1973*4882a593Smuzhiyun if (are_leaves_removable(tb, lfree, rfree))
1974*4882a593Smuzhiyun return CARRY_ON;
1975*4882a593Smuzhiyun
1976*4882a593Smuzhiyun /*
1977*4882a593Smuzhiyun * determine maximal number of items we can shift to the left/right
1978*4882a593Smuzhiyun * neighbor and the maximal number of bytes that can flow to the
1979*4882a593Smuzhiyun * left/right neighbor from the left/right most liquid item that
1980*4882a593Smuzhiyun * cannot be shifted from S[0] entirely
1981*4882a593Smuzhiyun */
1982*4882a593Smuzhiyun check_left(tb, h, lfree);
1983*4882a593Smuzhiyun check_right(tb, h, rfree);
1984*4882a593Smuzhiyun
1985*4882a593Smuzhiyun /* check whether we can merge S with left neighbor. */
1986*4882a593Smuzhiyun if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1)
1987*4882a593Smuzhiyun if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */
1988*4882a593Smuzhiyun !tb->FR[h]) {
1989*4882a593Smuzhiyun
1990*4882a593Smuzhiyun RFALSE(!tb->FL[h],
1991*4882a593Smuzhiyun "vs-8245: dc_check_balance_leaf: FL[h] must exist");
1992*4882a593Smuzhiyun
1993*4882a593Smuzhiyun /* set parameter to merge S[0] with its left neighbor */
1994*4882a593Smuzhiyun set_parameters(tb, h, -1, 0, 0, NULL, -1, -1);
1995*4882a593Smuzhiyun return CARRY_ON;
1996*4882a593Smuzhiyun }
1997*4882a593Smuzhiyun
1998*4882a593Smuzhiyun /* check whether we can merge S[0] with right neighbor. */
1999*4882a593Smuzhiyun if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) {
2000*4882a593Smuzhiyun set_parameters(tb, h, 0, -1, 0, NULL, -1, -1);
2001*4882a593Smuzhiyun return CARRY_ON;
2002*4882a593Smuzhiyun }
2003*4882a593Smuzhiyun
2004*4882a593Smuzhiyun /*
2005*4882a593Smuzhiyun * All contents of S[0] can be moved to the neighbors (L[0] & R[0]).
2006*4882a593Smuzhiyun * Set parameters and return
2007*4882a593Smuzhiyun */
2008*4882a593Smuzhiyun if (is_leaf_removable(tb))
2009*4882a593Smuzhiyun return CARRY_ON;
2010*4882a593Smuzhiyun
2011*4882a593Smuzhiyun /* Balancing is not required. */
2012*4882a593Smuzhiyun tb->s0num = vn->vn_nr_item;
2013*4882a593Smuzhiyun set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
2014*4882a593Smuzhiyun return NO_BALANCING_NEEDED;
2015*4882a593Smuzhiyun }
2016*4882a593Smuzhiyun
2017*4882a593Smuzhiyun /*
2018*4882a593Smuzhiyun * Check whether current node S[h] is balanced when Decreasing its size by
2019*4882a593Smuzhiyun * Deleting or Cutting.
2020*4882a593Smuzhiyun * Calculate parameters for balancing for current level h.
2021*4882a593Smuzhiyun * Parameters:
2022*4882a593Smuzhiyun * tb tree_balance structure;
2023*4882a593Smuzhiyun * h current level of the node;
2024*4882a593Smuzhiyun * inum item number in S[h];
2025*4882a593Smuzhiyun * mode d - delete, c - cut.
2026*4882a593Smuzhiyun * Returns: 1 - schedule occurred;
2027*4882a593Smuzhiyun * 0 - balancing for higher levels needed;
2028*4882a593Smuzhiyun * -1 - no balancing for higher levels needed;
2029*4882a593Smuzhiyun * -2 - no disk space.
2030*4882a593Smuzhiyun */
dc_check_balance(struct tree_balance * tb,int h)2031*4882a593Smuzhiyun static int dc_check_balance(struct tree_balance *tb, int h)
2032*4882a593Smuzhiyun {
2033*4882a593Smuzhiyun RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)),
2034*4882a593Smuzhiyun "vs-8250: S is not initialized");
2035*4882a593Smuzhiyun
2036*4882a593Smuzhiyun if (h)
2037*4882a593Smuzhiyun return dc_check_balance_internal(tb, h);
2038*4882a593Smuzhiyun else
2039*4882a593Smuzhiyun return dc_check_balance_leaf(tb, h);
2040*4882a593Smuzhiyun }
2041*4882a593Smuzhiyun
2042*4882a593Smuzhiyun /*
2043*4882a593Smuzhiyun * Check whether current node S[h] is balanced.
2044*4882a593Smuzhiyun * Calculate parameters for balancing for current level h.
2045*4882a593Smuzhiyun * Parameters:
2046*4882a593Smuzhiyun *
2047*4882a593Smuzhiyun * tb tree_balance structure:
2048*4882a593Smuzhiyun *
2049*4882a593Smuzhiyun * tb is a large structure that must be read about in the header
2050*4882a593Smuzhiyun * file at the same time as this procedure if the reader is
2051*4882a593Smuzhiyun * to successfully understand this procedure
2052*4882a593Smuzhiyun *
2053*4882a593Smuzhiyun * h current level of the node;
2054*4882a593Smuzhiyun * inum item number in S[h];
2055*4882a593Smuzhiyun * mode i - insert, p - paste, d - delete, c - cut.
2056*4882a593Smuzhiyun * Returns: 1 - schedule occurred;
2057*4882a593Smuzhiyun * 0 - balancing for higher levels needed;
2058*4882a593Smuzhiyun * -1 - no balancing for higher levels needed;
2059*4882a593Smuzhiyun * -2 - no disk space.
2060*4882a593Smuzhiyun */
check_balance(int mode,struct tree_balance * tb,int h,int inum,int pos_in_item,struct item_head * ins_ih,const void * data)2061*4882a593Smuzhiyun static int check_balance(int mode,
2062*4882a593Smuzhiyun struct tree_balance *tb,
2063*4882a593Smuzhiyun int h,
2064*4882a593Smuzhiyun int inum,
2065*4882a593Smuzhiyun int pos_in_item,
2066*4882a593Smuzhiyun struct item_head *ins_ih, const void *data)
2067*4882a593Smuzhiyun {
2068*4882a593Smuzhiyun struct virtual_node *vn;
2069*4882a593Smuzhiyun
2070*4882a593Smuzhiyun vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf);
2071*4882a593Smuzhiyun vn->vn_free_ptr = (char *)(tb->tb_vn + 1);
2072*4882a593Smuzhiyun vn->vn_mode = mode;
2073*4882a593Smuzhiyun vn->vn_affected_item_num = inum;
2074*4882a593Smuzhiyun vn->vn_pos_in_item = pos_in_item;
2075*4882a593Smuzhiyun vn->vn_ins_ih = ins_ih;
2076*4882a593Smuzhiyun vn->vn_data = data;
2077*4882a593Smuzhiyun
2078*4882a593Smuzhiyun RFALSE(mode == M_INSERT && !vn->vn_ins_ih,
2079*4882a593Smuzhiyun "vs-8255: ins_ih can not be 0 in insert mode");
2080*4882a593Smuzhiyun
2081*4882a593Smuzhiyun /* Calculate balance parameters when size of node is increasing. */
2082*4882a593Smuzhiyun if (tb->insert_size[h] > 0)
2083*4882a593Smuzhiyun return ip_check_balance(tb, h);
2084*4882a593Smuzhiyun
2085*4882a593Smuzhiyun /* Calculate balance parameters when size of node is decreasing. */
2086*4882a593Smuzhiyun return dc_check_balance(tb, h);
2087*4882a593Smuzhiyun }
2088*4882a593Smuzhiyun
2089*4882a593Smuzhiyun /* Check whether parent at the path is the really parent of the current node.*/
get_direct_parent(struct tree_balance * tb,int h)2090*4882a593Smuzhiyun static int get_direct_parent(struct tree_balance *tb, int h)
2091*4882a593Smuzhiyun {
2092*4882a593Smuzhiyun struct buffer_head *bh;
2093*4882a593Smuzhiyun struct treepath *path = tb->tb_path;
2094*4882a593Smuzhiyun int position,
2095*4882a593Smuzhiyun path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h);
2096*4882a593Smuzhiyun
2097*4882a593Smuzhiyun /* We are in the root or in the new root. */
2098*4882a593Smuzhiyun if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
2099*4882a593Smuzhiyun
2100*4882a593Smuzhiyun RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET - 1,
2101*4882a593Smuzhiyun "PAP-8260: invalid offset in the path");
2102*4882a593Smuzhiyun
2103*4882a593Smuzhiyun if (PATH_OFFSET_PBUFFER(path, FIRST_PATH_ELEMENT_OFFSET)->
2104*4882a593Smuzhiyun b_blocknr == SB_ROOT_BLOCK(tb->tb_sb)) {
2105*4882a593Smuzhiyun /* Root is not changed. */
2106*4882a593Smuzhiyun PATH_OFFSET_PBUFFER(path, path_offset - 1) = NULL;
2107*4882a593Smuzhiyun PATH_OFFSET_POSITION(path, path_offset - 1) = 0;
2108*4882a593Smuzhiyun return CARRY_ON;
2109*4882a593Smuzhiyun }
2110*4882a593Smuzhiyun /* Root is changed and we must recalculate the path. */
2111*4882a593Smuzhiyun return REPEAT_SEARCH;
2112*4882a593Smuzhiyun }
2113*4882a593Smuzhiyun
2114*4882a593Smuzhiyun /* Parent in the path is not in the tree. */
2115*4882a593Smuzhiyun if (!B_IS_IN_TREE
2116*4882a593Smuzhiyun (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1)))
2117*4882a593Smuzhiyun return REPEAT_SEARCH;
2118*4882a593Smuzhiyun
2119*4882a593Smuzhiyun if ((position =
2120*4882a593Smuzhiyun PATH_OFFSET_POSITION(path,
2121*4882a593Smuzhiyun path_offset - 1)) > B_NR_ITEMS(bh))
2122*4882a593Smuzhiyun return REPEAT_SEARCH;
2123*4882a593Smuzhiyun
2124*4882a593Smuzhiyun /* Parent in the path is not parent of the current node in the tree. */
2125*4882a593Smuzhiyun if (B_N_CHILD_NUM(bh, position) !=
2126*4882a593Smuzhiyun PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr)
2127*4882a593Smuzhiyun return REPEAT_SEARCH;
2128*4882a593Smuzhiyun
2129*4882a593Smuzhiyun if (buffer_locked(bh)) {
2130*4882a593Smuzhiyun int depth = reiserfs_write_unlock_nested(tb->tb_sb);
2131*4882a593Smuzhiyun __wait_on_buffer(bh);
2132*4882a593Smuzhiyun reiserfs_write_lock_nested(tb->tb_sb, depth);
2133*4882a593Smuzhiyun if (FILESYSTEM_CHANGED_TB(tb))
2134*4882a593Smuzhiyun return REPEAT_SEARCH;
2135*4882a593Smuzhiyun }
2136*4882a593Smuzhiyun
2137*4882a593Smuzhiyun /*
2138*4882a593Smuzhiyun * Parent in the path is unlocked and really parent
2139*4882a593Smuzhiyun * of the current node.
2140*4882a593Smuzhiyun */
2141*4882a593Smuzhiyun return CARRY_ON;
2142*4882a593Smuzhiyun }
2143*4882a593Smuzhiyun
2144*4882a593Smuzhiyun /*
2145*4882a593Smuzhiyun * Using lnum[h] and rnum[h] we should determine what neighbors
2146*4882a593Smuzhiyun * of S[h] we
2147*4882a593Smuzhiyun * need in order to balance S[h], and get them if necessary.
2148*4882a593Smuzhiyun * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
2149*4882a593Smuzhiyun * CARRY_ON - schedule didn't occur while the function worked;
2150*4882a593Smuzhiyun */
get_neighbors(struct tree_balance * tb,int h)2151*4882a593Smuzhiyun static int get_neighbors(struct tree_balance *tb, int h)
2152*4882a593Smuzhiyun {
2153*4882a593Smuzhiyun int child_position,
2154*4882a593Smuzhiyun path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h + 1);
2155*4882a593Smuzhiyun unsigned long son_number;
2156*4882a593Smuzhiyun struct super_block *sb = tb->tb_sb;
2157*4882a593Smuzhiyun struct buffer_head *bh;
2158*4882a593Smuzhiyun int depth;
2159*4882a593Smuzhiyun
2160*4882a593Smuzhiyun PROC_INFO_INC(sb, get_neighbors[h]);
2161*4882a593Smuzhiyun
2162*4882a593Smuzhiyun if (tb->lnum[h]) {
2163*4882a593Smuzhiyun /* We need left neighbor to balance S[h]. */
2164*4882a593Smuzhiyun PROC_INFO_INC(sb, need_l_neighbor[h]);
2165*4882a593Smuzhiyun bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset);
2166*4882a593Smuzhiyun
2167*4882a593Smuzhiyun RFALSE(bh == tb->FL[h] &&
2168*4882a593Smuzhiyun !PATH_OFFSET_POSITION(tb->tb_path, path_offset),
2169*4882a593Smuzhiyun "PAP-8270: invalid position in the parent");
2170*4882a593Smuzhiyun
2171*4882a593Smuzhiyun child_position =
2172*4882a593Smuzhiyun (bh ==
2173*4882a593Smuzhiyun tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb->
2174*4882a593Smuzhiyun FL[h]);
2175*4882a593Smuzhiyun son_number = B_N_CHILD_NUM(tb->FL[h], child_position);
2176*4882a593Smuzhiyun depth = reiserfs_write_unlock_nested(tb->tb_sb);
2177*4882a593Smuzhiyun bh = sb_bread(sb, son_number);
2178*4882a593Smuzhiyun reiserfs_write_lock_nested(tb->tb_sb, depth);
2179*4882a593Smuzhiyun if (!bh)
2180*4882a593Smuzhiyun return IO_ERROR;
2181*4882a593Smuzhiyun if (FILESYSTEM_CHANGED_TB(tb)) {
2182*4882a593Smuzhiyun brelse(bh);
2183*4882a593Smuzhiyun PROC_INFO_INC(sb, get_neighbors_restart[h]);
2184*4882a593Smuzhiyun return REPEAT_SEARCH;
2185*4882a593Smuzhiyun }
2186*4882a593Smuzhiyun
2187*4882a593Smuzhiyun RFALSE(!B_IS_IN_TREE(tb->FL[h]) ||
2188*4882a593Smuzhiyun child_position > B_NR_ITEMS(tb->FL[h]) ||
2189*4882a593Smuzhiyun B_N_CHILD_NUM(tb->FL[h], child_position) !=
2190*4882a593Smuzhiyun bh->b_blocknr, "PAP-8275: invalid parent");
2191*4882a593Smuzhiyun RFALSE(!B_IS_IN_TREE(bh), "PAP-8280: invalid child");
2192*4882a593Smuzhiyun RFALSE(!h &&
2193*4882a593Smuzhiyun B_FREE_SPACE(bh) !=
2194*4882a593Smuzhiyun MAX_CHILD_SIZE(bh) -
2195*4882a593Smuzhiyun dc_size(B_N_CHILD(tb->FL[0], child_position)),
2196*4882a593Smuzhiyun "PAP-8290: invalid child size of left neighbor");
2197*4882a593Smuzhiyun
2198*4882a593Smuzhiyun brelse(tb->L[h]);
2199*4882a593Smuzhiyun tb->L[h] = bh;
2200*4882a593Smuzhiyun }
2201*4882a593Smuzhiyun
2202*4882a593Smuzhiyun /* We need right neighbor to balance S[path_offset]. */
2203*4882a593Smuzhiyun if (tb->rnum[h]) {
2204*4882a593Smuzhiyun PROC_INFO_INC(sb, need_r_neighbor[h]);
2205*4882a593Smuzhiyun bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset);
2206*4882a593Smuzhiyun
2207*4882a593Smuzhiyun RFALSE(bh == tb->FR[h] &&
2208*4882a593Smuzhiyun PATH_OFFSET_POSITION(tb->tb_path,
2209*4882a593Smuzhiyun path_offset) >=
2210*4882a593Smuzhiyun B_NR_ITEMS(bh),
2211*4882a593Smuzhiyun "PAP-8295: invalid position in the parent");
2212*4882a593Smuzhiyun
2213*4882a593Smuzhiyun child_position =
2214*4882a593Smuzhiyun (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0;
2215*4882a593Smuzhiyun son_number = B_N_CHILD_NUM(tb->FR[h], child_position);
2216*4882a593Smuzhiyun depth = reiserfs_write_unlock_nested(tb->tb_sb);
2217*4882a593Smuzhiyun bh = sb_bread(sb, son_number);
2218*4882a593Smuzhiyun reiserfs_write_lock_nested(tb->tb_sb, depth);
2219*4882a593Smuzhiyun if (!bh)
2220*4882a593Smuzhiyun return IO_ERROR;
2221*4882a593Smuzhiyun if (FILESYSTEM_CHANGED_TB(tb)) {
2222*4882a593Smuzhiyun brelse(bh);
2223*4882a593Smuzhiyun PROC_INFO_INC(sb, get_neighbors_restart[h]);
2224*4882a593Smuzhiyun return REPEAT_SEARCH;
2225*4882a593Smuzhiyun }
2226*4882a593Smuzhiyun brelse(tb->R[h]);
2227*4882a593Smuzhiyun tb->R[h] = bh;
2228*4882a593Smuzhiyun
2229*4882a593Smuzhiyun RFALSE(!h
2230*4882a593Smuzhiyun && B_FREE_SPACE(bh) !=
2231*4882a593Smuzhiyun MAX_CHILD_SIZE(bh) -
2232*4882a593Smuzhiyun dc_size(B_N_CHILD(tb->FR[0], child_position)),
2233*4882a593Smuzhiyun "PAP-8300: invalid child size of right neighbor (%d != %d - %d)",
2234*4882a593Smuzhiyun B_FREE_SPACE(bh), MAX_CHILD_SIZE(bh),
2235*4882a593Smuzhiyun dc_size(B_N_CHILD(tb->FR[0], child_position)));
2236*4882a593Smuzhiyun
2237*4882a593Smuzhiyun }
2238*4882a593Smuzhiyun return CARRY_ON;
2239*4882a593Smuzhiyun }
2240*4882a593Smuzhiyun
get_virtual_node_size(struct super_block * sb,struct buffer_head * bh)2241*4882a593Smuzhiyun static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh)
2242*4882a593Smuzhiyun {
2243*4882a593Smuzhiyun int max_num_of_items;
2244*4882a593Smuzhiyun int max_num_of_entries;
2245*4882a593Smuzhiyun unsigned long blocksize = sb->s_blocksize;
2246*4882a593Smuzhiyun
2247*4882a593Smuzhiyun #define MIN_NAME_LEN 1
2248*4882a593Smuzhiyun
2249*4882a593Smuzhiyun max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN);
2250*4882a593Smuzhiyun max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) /
2251*4882a593Smuzhiyun (DEH_SIZE + MIN_NAME_LEN);
2252*4882a593Smuzhiyun
2253*4882a593Smuzhiyun return sizeof(struct virtual_node) +
2254*4882a593Smuzhiyun max(max_num_of_items * sizeof(struct virtual_item),
2255*4882a593Smuzhiyun sizeof(struct virtual_item) + sizeof(struct direntry_uarea) +
2256*4882a593Smuzhiyun (max_num_of_entries - 1) * sizeof(__u16));
2257*4882a593Smuzhiyun }
2258*4882a593Smuzhiyun
2259*4882a593Smuzhiyun /*
2260*4882a593Smuzhiyun * maybe we should fail balancing we are going to perform when kmalloc
2261*4882a593Smuzhiyun * fails several times. But now it will loop until kmalloc gets
2262*4882a593Smuzhiyun * required memory
2263*4882a593Smuzhiyun */
get_mem_for_virtual_node(struct tree_balance * tb)2264*4882a593Smuzhiyun static int get_mem_for_virtual_node(struct tree_balance *tb)
2265*4882a593Smuzhiyun {
2266*4882a593Smuzhiyun int check_fs = 0;
2267*4882a593Smuzhiyun int size;
2268*4882a593Smuzhiyun char *buf;
2269*4882a593Smuzhiyun
2270*4882a593Smuzhiyun size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path));
2271*4882a593Smuzhiyun
2272*4882a593Smuzhiyun /* we have to allocate more memory for virtual node */
2273*4882a593Smuzhiyun if (size > tb->vn_buf_size) {
2274*4882a593Smuzhiyun if (tb->vn_buf) {
2275*4882a593Smuzhiyun /* free memory allocated before */
2276*4882a593Smuzhiyun kfree(tb->vn_buf);
2277*4882a593Smuzhiyun /* this is not needed if kfree is atomic */
2278*4882a593Smuzhiyun check_fs = 1;
2279*4882a593Smuzhiyun }
2280*4882a593Smuzhiyun
2281*4882a593Smuzhiyun /* virtual node requires now more memory */
2282*4882a593Smuzhiyun tb->vn_buf_size = size;
2283*4882a593Smuzhiyun
2284*4882a593Smuzhiyun /* get memory for virtual item */
2285*4882a593Smuzhiyun buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN);
2286*4882a593Smuzhiyun if (!buf) {
2287*4882a593Smuzhiyun /*
2288*4882a593Smuzhiyun * getting memory with GFP_KERNEL priority may involve
2289*4882a593Smuzhiyun * balancing now (due to indirect_to_direct conversion
2290*4882a593Smuzhiyun * on dcache shrinking). So, release path and collected
2291*4882a593Smuzhiyun * resources here
2292*4882a593Smuzhiyun */
2293*4882a593Smuzhiyun free_buffers_in_tb(tb);
2294*4882a593Smuzhiyun buf = kmalloc(size, GFP_NOFS);
2295*4882a593Smuzhiyun if (!buf) {
2296*4882a593Smuzhiyun tb->vn_buf_size = 0;
2297*4882a593Smuzhiyun }
2298*4882a593Smuzhiyun tb->vn_buf = buf;
2299*4882a593Smuzhiyun schedule();
2300*4882a593Smuzhiyun return REPEAT_SEARCH;
2301*4882a593Smuzhiyun }
2302*4882a593Smuzhiyun
2303*4882a593Smuzhiyun tb->vn_buf = buf;
2304*4882a593Smuzhiyun }
2305*4882a593Smuzhiyun
2306*4882a593Smuzhiyun if (check_fs && FILESYSTEM_CHANGED_TB(tb))
2307*4882a593Smuzhiyun return REPEAT_SEARCH;
2308*4882a593Smuzhiyun
2309*4882a593Smuzhiyun return CARRY_ON;
2310*4882a593Smuzhiyun }
2311*4882a593Smuzhiyun
2312*4882a593Smuzhiyun #ifdef CONFIG_REISERFS_CHECK
tb_buffer_sanity_check(struct super_block * sb,struct buffer_head * bh,const char * descr,int level)2313*4882a593Smuzhiyun static void tb_buffer_sanity_check(struct super_block *sb,
2314*4882a593Smuzhiyun struct buffer_head *bh,
2315*4882a593Smuzhiyun const char *descr, int level)
2316*4882a593Smuzhiyun {
2317*4882a593Smuzhiyun if (bh) {
2318*4882a593Smuzhiyun if (atomic_read(&(bh->b_count)) <= 0)
2319*4882a593Smuzhiyun
2320*4882a593Smuzhiyun reiserfs_panic(sb, "jmacd-1", "negative or zero "
2321*4882a593Smuzhiyun "reference counter for buffer %s[%d] "
2322*4882a593Smuzhiyun "(%b)", descr, level, bh);
2323*4882a593Smuzhiyun
2324*4882a593Smuzhiyun if (!buffer_uptodate(bh))
2325*4882a593Smuzhiyun reiserfs_panic(sb, "jmacd-2", "buffer is not up "
2326*4882a593Smuzhiyun "to date %s[%d] (%b)",
2327*4882a593Smuzhiyun descr, level, bh);
2328*4882a593Smuzhiyun
2329*4882a593Smuzhiyun if (!B_IS_IN_TREE(bh))
2330*4882a593Smuzhiyun reiserfs_panic(sb, "jmacd-3", "buffer is not "
2331*4882a593Smuzhiyun "in tree %s[%d] (%b)",
2332*4882a593Smuzhiyun descr, level, bh);
2333*4882a593Smuzhiyun
2334*4882a593Smuzhiyun if (bh->b_bdev != sb->s_bdev)
2335*4882a593Smuzhiyun reiserfs_panic(sb, "jmacd-4", "buffer has wrong "
2336*4882a593Smuzhiyun "device %s[%d] (%b)",
2337*4882a593Smuzhiyun descr, level, bh);
2338*4882a593Smuzhiyun
2339*4882a593Smuzhiyun if (bh->b_size != sb->s_blocksize)
2340*4882a593Smuzhiyun reiserfs_panic(sb, "jmacd-5", "buffer has wrong "
2341*4882a593Smuzhiyun "blocksize %s[%d] (%b)",
2342*4882a593Smuzhiyun descr, level, bh);
2343*4882a593Smuzhiyun
2344*4882a593Smuzhiyun if (bh->b_blocknr > SB_BLOCK_COUNT(sb))
2345*4882a593Smuzhiyun reiserfs_panic(sb, "jmacd-6", "buffer block "
2346*4882a593Smuzhiyun "number too high %s[%d] (%b)",
2347*4882a593Smuzhiyun descr, level, bh);
2348*4882a593Smuzhiyun }
2349*4882a593Smuzhiyun }
2350*4882a593Smuzhiyun #else
tb_buffer_sanity_check(struct super_block * sb,struct buffer_head * bh,const char * descr,int level)2351*4882a593Smuzhiyun static void tb_buffer_sanity_check(struct super_block *sb,
2352*4882a593Smuzhiyun struct buffer_head *bh,
2353*4882a593Smuzhiyun const char *descr, int level)
2354*4882a593Smuzhiyun {;
2355*4882a593Smuzhiyun }
2356*4882a593Smuzhiyun #endif
2357*4882a593Smuzhiyun
clear_all_dirty_bits(struct super_block * s,struct buffer_head * bh)2358*4882a593Smuzhiyun static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh)
2359*4882a593Smuzhiyun {
2360*4882a593Smuzhiyun return reiserfs_prepare_for_journal(s, bh, 0);
2361*4882a593Smuzhiyun }
2362*4882a593Smuzhiyun
wait_tb_buffers_until_unlocked(struct tree_balance * tb)2363*4882a593Smuzhiyun static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
2364*4882a593Smuzhiyun {
2365*4882a593Smuzhiyun struct buffer_head *locked;
2366*4882a593Smuzhiyun #ifdef CONFIG_REISERFS_CHECK
2367*4882a593Smuzhiyun int repeat_counter = 0;
2368*4882a593Smuzhiyun #endif
2369*4882a593Smuzhiyun int i;
2370*4882a593Smuzhiyun
2371*4882a593Smuzhiyun do {
2372*4882a593Smuzhiyun
2373*4882a593Smuzhiyun locked = NULL;
2374*4882a593Smuzhiyun
2375*4882a593Smuzhiyun for (i = tb->tb_path->path_length;
2376*4882a593Smuzhiyun !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) {
2377*4882a593Smuzhiyun if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) {
2378*4882a593Smuzhiyun /*
2379*4882a593Smuzhiyun * if I understand correctly, we can only
2380*4882a593Smuzhiyun * be sure the last buffer in the path is
2381*4882a593Smuzhiyun * in the tree --clm
2382*4882a593Smuzhiyun */
2383*4882a593Smuzhiyun #ifdef CONFIG_REISERFS_CHECK
2384*4882a593Smuzhiyun if (PATH_PLAST_BUFFER(tb->tb_path) ==
2385*4882a593Smuzhiyun PATH_OFFSET_PBUFFER(tb->tb_path, i))
2386*4882a593Smuzhiyun tb_buffer_sanity_check(tb->tb_sb,
2387*4882a593Smuzhiyun PATH_OFFSET_PBUFFER
2388*4882a593Smuzhiyun (tb->tb_path,
2389*4882a593Smuzhiyun i), "S",
2390*4882a593Smuzhiyun tb->tb_path->
2391*4882a593Smuzhiyun path_length - i);
2392*4882a593Smuzhiyun #endif
2393*4882a593Smuzhiyun if (!clear_all_dirty_bits(tb->tb_sb,
2394*4882a593Smuzhiyun PATH_OFFSET_PBUFFER
2395*4882a593Smuzhiyun (tb->tb_path,
2396*4882a593Smuzhiyun i))) {
2397*4882a593Smuzhiyun locked =
2398*4882a593Smuzhiyun PATH_OFFSET_PBUFFER(tb->tb_path,
2399*4882a593Smuzhiyun i);
2400*4882a593Smuzhiyun }
2401*4882a593Smuzhiyun }
2402*4882a593Smuzhiyun }
2403*4882a593Smuzhiyun
2404*4882a593Smuzhiyun for (i = 0; !locked && i < MAX_HEIGHT && tb->insert_size[i];
2405*4882a593Smuzhiyun i++) {
2406*4882a593Smuzhiyun
2407*4882a593Smuzhiyun if (tb->lnum[i]) {
2408*4882a593Smuzhiyun
2409*4882a593Smuzhiyun if (tb->L[i]) {
2410*4882a593Smuzhiyun tb_buffer_sanity_check(tb->tb_sb,
2411*4882a593Smuzhiyun tb->L[i],
2412*4882a593Smuzhiyun "L", i);
2413*4882a593Smuzhiyun if (!clear_all_dirty_bits
2414*4882a593Smuzhiyun (tb->tb_sb, tb->L[i]))
2415*4882a593Smuzhiyun locked = tb->L[i];
2416*4882a593Smuzhiyun }
2417*4882a593Smuzhiyun
2418*4882a593Smuzhiyun if (!locked && tb->FL[i]) {
2419*4882a593Smuzhiyun tb_buffer_sanity_check(tb->tb_sb,
2420*4882a593Smuzhiyun tb->FL[i],
2421*4882a593Smuzhiyun "FL", i);
2422*4882a593Smuzhiyun if (!clear_all_dirty_bits
2423*4882a593Smuzhiyun (tb->tb_sb, tb->FL[i]))
2424*4882a593Smuzhiyun locked = tb->FL[i];
2425*4882a593Smuzhiyun }
2426*4882a593Smuzhiyun
2427*4882a593Smuzhiyun if (!locked && tb->CFL[i]) {
2428*4882a593Smuzhiyun tb_buffer_sanity_check(tb->tb_sb,
2429*4882a593Smuzhiyun tb->CFL[i],
2430*4882a593Smuzhiyun "CFL", i);
2431*4882a593Smuzhiyun if (!clear_all_dirty_bits
2432*4882a593Smuzhiyun (tb->tb_sb, tb->CFL[i]))
2433*4882a593Smuzhiyun locked = tb->CFL[i];
2434*4882a593Smuzhiyun }
2435*4882a593Smuzhiyun
2436*4882a593Smuzhiyun }
2437*4882a593Smuzhiyun
2438*4882a593Smuzhiyun if (!locked && (tb->rnum[i])) {
2439*4882a593Smuzhiyun
2440*4882a593Smuzhiyun if (tb->R[i]) {
2441*4882a593Smuzhiyun tb_buffer_sanity_check(tb->tb_sb,
2442*4882a593Smuzhiyun tb->R[i],
2443*4882a593Smuzhiyun "R", i);
2444*4882a593Smuzhiyun if (!clear_all_dirty_bits
2445*4882a593Smuzhiyun (tb->tb_sb, tb->R[i]))
2446*4882a593Smuzhiyun locked = tb->R[i];
2447*4882a593Smuzhiyun }
2448*4882a593Smuzhiyun
2449*4882a593Smuzhiyun if (!locked && tb->FR[i]) {
2450*4882a593Smuzhiyun tb_buffer_sanity_check(tb->tb_sb,
2451*4882a593Smuzhiyun tb->FR[i],
2452*4882a593Smuzhiyun "FR", i);
2453*4882a593Smuzhiyun if (!clear_all_dirty_bits
2454*4882a593Smuzhiyun (tb->tb_sb, tb->FR[i]))
2455*4882a593Smuzhiyun locked = tb->FR[i];
2456*4882a593Smuzhiyun }
2457*4882a593Smuzhiyun
2458*4882a593Smuzhiyun if (!locked && tb->CFR[i]) {
2459*4882a593Smuzhiyun tb_buffer_sanity_check(tb->tb_sb,
2460*4882a593Smuzhiyun tb->CFR[i],
2461*4882a593Smuzhiyun "CFR", i);
2462*4882a593Smuzhiyun if (!clear_all_dirty_bits
2463*4882a593Smuzhiyun (tb->tb_sb, tb->CFR[i]))
2464*4882a593Smuzhiyun locked = tb->CFR[i];
2465*4882a593Smuzhiyun }
2466*4882a593Smuzhiyun }
2467*4882a593Smuzhiyun }
2468*4882a593Smuzhiyun
2469*4882a593Smuzhiyun /*
2470*4882a593Smuzhiyun * as far as I can tell, this is not required. The FEB list
2471*4882a593Smuzhiyun * seems to be full of newly allocated nodes, which will
2472*4882a593Smuzhiyun * never be locked, dirty, or anything else.
2473*4882a593Smuzhiyun * To be safe, I'm putting in the checks and waits in.
2474*4882a593Smuzhiyun * For the moment, they are needed to keep the code in
2475*4882a593Smuzhiyun * journal.c from complaining about the buffer.
2476*4882a593Smuzhiyun * That code is inside CONFIG_REISERFS_CHECK as well. --clm
2477*4882a593Smuzhiyun */
2478*4882a593Smuzhiyun for (i = 0; !locked && i < MAX_FEB_SIZE; i++) {
2479*4882a593Smuzhiyun if (tb->FEB[i]) {
2480*4882a593Smuzhiyun if (!clear_all_dirty_bits
2481*4882a593Smuzhiyun (tb->tb_sb, tb->FEB[i]))
2482*4882a593Smuzhiyun locked = tb->FEB[i];
2483*4882a593Smuzhiyun }
2484*4882a593Smuzhiyun }
2485*4882a593Smuzhiyun
2486*4882a593Smuzhiyun if (locked) {
2487*4882a593Smuzhiyun int depth;
2488*4882a593Smuzhiyun #ifdef CONFIG_REISERFS_CHECK
2489*4882a593Smuzhiyun repeat_counter++;
2490*4882a593Smuzhiyun if ((repeat_counter % 10000) == 0) {
2491*4882a593Smuzhiyun reiserfs_warning(tb->tb_sb, "reiserfs-8200",
2492*4882a593Smuzhiyun "too many iterations waiting "
2493*4882a593Smuzhiyun "for buffer to unlock "
2494*4882a593Smuzhiyun "(%b)", locked);
2495*4882a593Smuzhiyun
2496*4882a593Smuzhiyun /* Don't loop forever. Try to recover from possible error. */
2497*4882a593Smuzhiyun
2498*4882a593Smuzhiyun return (FILESYSTEM_CHANGED_TB(tb)) ?
2499*4882a593Smuzhiyun REPEAT_SEARCH : CARRY_ON;
2500*4882a593Smuzhiyun }
2501*4882a593Smuzhiyun #endif
2502*4882a593Smuzhiyun depth = reiserfs_write_unlock_nested(tb->tb_sb);
2503*4882a593Smuzhiyun __wait_on_buffer(locked);
2504*4882a593Smuzhiyun reiserfs_write_lock_nested(tb->tb_sb, depth);
2505*4882a593Smuzhiyun if (FILESYSTEM_CHANGED_TB(tb))
2506*4882a593Smuzhiyun return REPEAT_SEARCH;
2507*4882a593Smuzhiyun }
2508*4882a593Smuzhiyun
2509*4882a593Smuzhiyun } while (locked);
2510*4882a593Smuzhiyun
2511*4882a593Smuzhiyun return CARRY_ON;
2512*4882a593Smuzhiyun }
2513*4882a593Smuzhiyun
2514*4882a593Smuzhiyun /*
2515*4882a593Smuzhiyun * Prepare for balancing, that is
2516*4882a593Smuzhiyun * get all necessary parents, and neighbors;
2517*4882a593Smuzhiyun * analyze what and where should be moved;
2518*4882a593Smuzhiyun * get sufficient number of new nodes;
2519*4882a593Smuzhiyun * Balancing will start only after all resources will be collected at a time.
2520*4882a593Smuzhiyun *
2521*4882a593Smuzhiyun * When ported to SMP kernels, only at the last moment after all needed nodes
2522*4882a593Smuzhiyun * are collected in cache, will the resources be locked using the usual
2523*4882a593Smuzhiyun * textbook ordered lock acquisition algorithms. Note that ensuring that
2524*4882a593Smuzhiyun * this code neither write locks what it does not need to write lock nor locks
2525*4882a593Smuzhiyun * out of order will be a pain in the butt that could have been avoided.
2526*4882a593Smuzhiyun * Grumble grumble. -Hans
2527*4882a593Smuzhiyun *
2528*4882a593Smuzhiyun * fix is meant in the sense of render unchanging
2529*4882a593Smuzhiyun *
2530*4882a593Smuzhiyun * Latency might be improved by first gathering a list of what buffers
2531*4882a593Smuzhiyun * are needed and then getting as many of them in parallel as possible? -Hans
2532*4882a593Smuzhiyun *
2533*4882a593Smuzhiyun * Parameters:
2534*4882a593Smuzhiyun * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append)
2535*4882a593Smuzhiyun * tb tree_balance structure;
2536*4882a593Smuzhiyun * inum item number in S[h];
2537*4882a593Smuzhiyun * pos_in_item - comment this if you can
2538*4882a593Smuzhiyun * ins_ih item head of item being inserted
2539*4882a593Smuzhiyun * data inserted item or data to be pasted
2540*4882a593Smuzhiyun * Returns: 1 - schedule occurred while the function worked;
2541*4882a593Smuzhiyun * 0 - schedule didn't occur while the function worked;
2542*4882a593Smuzhiyun * -1 - if no_disk_space
2543*4882a593Smuzhiyun */
2544*4882a593Smuzhiyun
fix_nodes(int op_mode,struct tree_balance * tb,struct item_head * ins_ih,const void * data)2545*4882a593Smuzhiyun int fix_nodes(int op_mode, struct tree_balance *tb,
2546*4882a593Smuzhiyun struct item_head *ins_ih, const void *data)
2547*4882a593Smuzhiyun {
2548*4882a593Smuzhiyun int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path);
2549*4882a593Smuzhiyun int pos_in_item;
2550*4882a593Smuzhiyun
2551*4882a593Smuzhiyun /*
2552*4882a593Smuzhiyun * we set wait_tb_buffers_run when we have to restore any dirty
2553*4882a593Smuzhiyun * bits cleared during wait_tb_buffers_run
2554*4882a593Smuzhiyun */
2555*4882a593Smuzhiyun int wait_tb_buffers_run = 0;
2556*4882a593Smuzhiyun struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
2557*4882a593Smuzhiyun
2558*4882a593Smuzhiyun ++REISERFS_SB(tb->tb_sb)->s_fix_nodes;
2559*4882a593Smuzhiyun
2560*4882a593Smuzhiyun pos_in_item = tb->tb_path->pos_in_item;
2561*4882a593Smuzhiyun
2562*4882a593Smuzhiyun tb->fs_gen = get_generation(tb->tb_sb);
2563*4882a593Smuzhiyun
2564*4882a593Smuzhiyun /*
2565*4882a593Smuzhiyun * we prepare and log the super here so it will already be in the
2566*4882a593Smuzhiyun * transaction when do_balance needs to change it.
2567*4882a593Smuzhiyun * This way do_balance won't have to schedule when trying to prepare
2568*4882a593Smuzhiyun * the super for logging
2569*4882a593Smuzhiyun */
2570*4882a593Smuzhiyun reiserfs_prepare_for_journal(tb->tb_sb,
2571*4882a593Smuzhiyun SB_BUFFER_WITH_SB(tb->tb_sb), 1);
2572*4882a593Smuzhiyun journal_mark_dirty(tb->transaction_handle,
2573*4882a593Smuzhiyun SB_BUFFER_WITH_SB(tb->tb_sb));
2574*4882a593Smuzhiyun if (FILESYSTEM_CHANGED_TB(tb))
2575*4882a593Smuzhiyun return REPEAT_SEARCH;
2576*4882a593Smuzhiyun
2577*4882a593Smuzhiyun /* if it possible in indirect_to_direct conversion */
2578*4882a593Smuzhiyun if (buffer_locked(tbS0)) {
2579*4882a593Smuzhiyun int depth = reiserfs_write_unlock_nested(tb->tb_sb);
2580*4882a593Smuzhiyun __wait_on_buffer(tbS0);
2581*4882a593Smuzhiyun reiserfs_write_lock_nested(tb->tb_sb, depth);
2582*4882a593Smuzhiyun if (FILESYSTEM_CHANGED_TB(tb))
2583*4882a593Smuzhiyun return REPEAT_SEARCH;
2584*4882a593Smuzhiyun }
2585*4882a593Smuzhiyun #ifdef CONFIG_REISERFS_CHECK
2586*4882a593Smuzhiyun if (REISERFS_SB(tb->tb_sb)->cur_tb) {
2587*4882a593Smuzhiyun print_cur_tb("fix_nodes");
2588*4882a593Smuzhiyun reiserfs_panic(tb->tb_sb, "PAP-8305",
2589*4882a593Smuzhiyun "there is pending do_balance");
2590*4882a593Smuzhiyun }
2591*4882a593Smuzhiyun
2592*4882a593Smuzhiyun if (!buffer_uptodate(tbS0) || !B_IS_IN_TREE(tbS0))
2593*4882a593Smuzhiyun reiserfs_panic(tb->tb_sb, "PAP-8320", "S[0] (%b %z) is "
2594*4882a593Smuzhiyun "not uptodate at the beginning of fix_nodes "
2595*4882a593Smuzhiyun "or not in tree (mode %c)",
2596*4882a593Smuzhiyun tbS0, tbS0, op_mode);
2597*4882a593Smuzhiyun
2598*4882a593Smuzhiyun /* Check parameters. */
2599*4882a593Smuzhiyun switch (op_mode) {
2600*4882a593Smuzhiyun case M_INSERT:
2601*4882a593Smuzhiyun if (item_num <= 0 || item_num > B_NR_ITEMS(tbS0))
2602*4882a593Smuzhiyun reiserfs_panic(tb->tb_sb, "PAP-8330", "Incorrect "
2603*4882a593Smuzhiyun "item number %d (in S0 - %d) in case "
2604*4882a593Smuzhiyun "of insert", item_num,
2605*4882a593Smuzhiyun B_NR_ITEMS(tbS0));
2606*4882a593Smuzhiyun break;
2607*4882a593Smuzhiyun case M_PASTE:
2608*4882a593Smuzhiyun case M_DELETE:
2609*4882a593Smuzhiyun case M_CUT:
2610*4882a593Smuzhiyun if (item_num < 0 || item_num >= B_NR_ITEMS(tbS0)) {
2611*4882a593Smuzhiyun print_block(tbS0, 0, -1, -1);
2612*4882a593Smuzhiyun reiserfs_panic(tb->tb_sb, "PAP-8335", "Incorrect "
2613*4882a593Smuzhiyun "item number(%d); mode = %c "
2614*4882a593Smuzhiyun "insert_size = %d",
2615*4882a593Smuzhiyun item_num, op_mode,
2616*4882a593Smuzhiyun tb->insert_size[0]);
2617*4882a593Smuzhiyun }
2618*4882a593Smuzhiyun break;
2619*4882a593Smuzhiyun default:
2620*4882a593Smuzhiyun reiserfs_panic(tb->tb_sb, "PAP-8340", "Incorrect mode "
2621*4882a593Smuzhiyun "of operation");
2622*4882a593Smuzhiyun }
2623*4882a593Smuzhiyun #endif
2624*4882a593Smuzhiyun
2625*4882a593Smuzhiyun if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH)
2626*4882a593Smuzhiyun /* FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat */
2627*4882a593Smuzhiyun return REPEAT_SEARCH;
2628*4882a593Smuzhiyun
2629*4882a593Smuzhiyun /* Starting from the leaf level; for all levels h of the tree. */
2630*4882a593Smuzhiyun for (h = 0; h < MAX_HEIGHT && tb->insert_size[h]; h++) {
2631*4882a593Smuzhiyun ret = get_direct_parent(tb, h);
2632*4882a593Smuzhiyun if (ret != CARRY_ON)
2633*4882a593Smuzhiyun goto repeat;
2634*4882a593Smuzhiyun
2635*4882a593Smuzhiyun ret = check_balance(op_mode, tb, h, item_num,
2636*4882a593Smuzhiyun pos_in_item, ins_ih, data);
2637*4882a593Smuzhiyun if (ret != CARRY_ON) {
2638*4882a593Smuzhiyun if (ret == NO_BALANCING_NEEDED) {
2639*4882a593Smuzhiyun /* No balancing for higher levels needed. */
2640*4882a593Smuzhiyun ret = get_neighbors(tb, h);
2641*4882a593Smuzhiyun if (ret != CARRY_ON)
2642*4882a593Smuzhiyun goto repeat;
2643*4882a593Smuzhiyun if (h != MAX_HEIGHT - 1)
2644*4882a593Smuzhiyun tb->insert_size[h + 1] = 0;
2645*4882a593Smuzhiyun /*
2646*4882a593Smuzhiyun * ok, analysis and resource gathering
2647*4882a593Smuzhiyun * are complete
2648*4882a593Smuzhiyun */
2649*4882a593Smuzhiyun break;
2650*4882a593Smuzhiyun }
2651*4882a593Smuzhiyun goto repeat;
2652*4882a593Smuzhiyun }
2653*4882a593Smuzhiyun
2654*4882a593Smuzhiyun ret = get_neighbors(tb, h);
2655*4882a593Smuzhiyun if (ret != CARRY_ON)
2656*4882a593Smuzhiyun goto repeat;
2657*4882a593Smuzhiyun
2658*4882a593Smuzhiyun /*
2659*4882a593Smuzhiyun * No disk space, or schedule occurred and analysis may be
2660*4882a593Smuzhiyun * invalid and needs to be redone.
2661*4882a593Smuzhiyun */
2662*4882a593Smuzhiyun ret = get_empty_nodes(tb, h);
2663*4882a593Smuzhiyun if (ret != CARRY_ON)
2664*4882a593Smuzhiyun goto repeat;
2665*4882a593Smuzhiyun
2666*4882a593Smuzhiyun /*
2667*4882a593Smuzhiyun * We have a positive insert size but no nodes exist on this
2668*4882a593Smuzhiyun * level, this means that we are creating a new root.
2669*4882a593Smuzhiyun */
2670*4882a593Smuzhiyun if (!PATH_H_PBUFFER(tb->tb_path, h)) {
2671*4882a593Smuzhiyun
2672*4882a593Smuzhiyun RFALSE(tb->blknum[h] != 1,
2673*4882a593Smuzhiyun "PAP-8350: creating new empty root");
2674*4882a593Smuzhiyun
2675*4882a593Smuzhiyun if (h < MAX_HEIGHT - 1)
2676*4882a593Smuzhiyun tb->insert_size[h + 1] = 0;
2677*4882a593Smuzhiyun } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) {
2678*4882a593Smuzhiyun /*
2679*4882a593Smuzhiyun * The tree needs to be grown, so this node S[h]
2680*4882a593Smuzhiyun * which is the root node is split into two nodes,
2681*4882a593Smuzhiyun * and a new node (S[h+1]) will be created to
2682*4882a593Smuzhiyun * become the root node.
2683*4882a593Smuzhiyun */
2684*4882a593Smuzhiyun if (tb->blknum[h] > 1) {
2685*4882a593Smuzhiyun
2686*4882a593Smuzhiyun RFALSE(h == MAX_HEIGHT - 1,
2687*4882a593Smuzhiyun "PAP-8355: attempt to create too high of a tree");
2688*4882a593Smuzhiyun
2689*4882a593Smuzhiyun tb->insert_size[h + 1] =
2690*4882a593Smuzhiyun (DC_SIZE +
2691*4882a593Smuzhiyun KEY_SIZE) * (tb->blknum[h] - 1) +
2692*4882a593Smuzhiyun DC_SIZE;
2693*4882a593Smuzhiyun } else if (h < MAX_HEIGHT - 1)
2694*4882a593Smuzhiyun tb->insert_size[h + 1] = 0;
2695*4882a593Smuzhiyun } else
2696*4882a593Smuzhiyun tb->insert_size[h + 1] =
2697*4882a593Smuzhiyun (DC_SIZE + KEY_SIZE) * (tb->blknum[h] - 1);
2698*4882a593Smuzhiyun }
2699*4882a593Smuzhiyun
2700*4882a593Smuzhiyun ret = wait_tb_buffers_until_unlocked(tb);
2701*4882a593Smuzhiyun if (ret == CARRY_ON) {
2702*4882a593Smuzhiyun if (FILESYSTEM_CHANGED_TB(tb)) {
2703*4882a593Smuzhiyun wait_tb_buffers_run = 1;
2704*4882a593Smuzhiyun ret = REPEAT_SEARCH;
2705*4882a593Smuzhiyun goto repeat;
2706*4882a593Smuzhiyun } else {
2707*4882a593Smuzhiyun return CARRY_ON;
2708*4882a593Smuzhiyun }
2709*4882a593Smuzhiyun } else {
2710*4882a593Smuzhiyun wait_tb_buffers_run = 1;
2711*4882a593Smuzhiyun goto repeat;
2712*4882a593Smuzhiyun }
2713*4882a593Smuzhiyun
2714*4882a593Smuzhiyun repeat:
2715*4882a593Smuzhiyun /*
2716*4882a593Smuzhiyun * fix_nodes was unable to perform its calculation due to
2717*4882a593Smuzhiyun * filesystem got changed under us, lack of free disk space or i/o
2718*4882a593Smuzhiyun * failure. If the first is the case - the search will be
2719*4882a593Smuzhiyun * repeated. For now - free all resources acquired so far except
2720*4882a593Smuzhiyun * for the new allocated nodes
2721*4882a593Smuzhiyun */
2722*4882a593Smuzhiyun {
2723*4882a593Smuzhiyun int i;
2724*4882a593Smuzhiyun
2725*4882a593Smuzhiyun /* Release path buffers. */
2726*4882a593Smuzhiyun if (wait_tb_buffers_run) {
2727*4882a593Smuzhiyun pathrelse_and_restore(tb->tb_sb, tb->tb_path);
2728*4882a593Smuzhiyun } else {
2729*4882a593Smuzhiyun pathrelse(tb->tb_path);
2730*4882a593Smuzhiyun }
2731*4882a593Smuzhiyun /* brelse all resources collected for balancing */
2732*4882a593Smuzhiyun for (i = 0; i < MAX_HEIGHT; i++) {
2733*4882a593Smuzhiyun if (wait_tb_buffers_run) {
2734*4882a593Smuzhiyun reiserfs_restore_prepared_buffer(tb->tb_sb,
2735*4882a593Smuzhiyun tb->L[i]);
2736*4882a593Smuzhiyun reiserfs_restore_prepared_buffer(tb->tb_sb,
2737*4882a593Smuzhiyun tb->R[i]);
2738*4882a593Smuzhiyun reiserfs_restore_prepared_buffer(tb->tb_sb,
2739*4882a593Smuzhiyun tb->FL[i]);
2740*4882a593Smuzhiyun reiserfs_restore_prepared_buffer(tb->tb_sb,
2741*4882a593Smuzhiyun tb->FR[i]);
2742*4882a593Smuzhiyun reiserfs_restore_prepared_buffer(tb->tb_sb,
2743*4882a593Smuzhiyun tb->
2744*4882a593Smuzhiyun CFL[i]);
2745*4882a593Smuzhiyun reiserfs_restore_prepared_buffer(tb->tb_sb,
2746*4882a593Smuzhiyun tb->
2747*4882a593Smuzhiyun CFR[i]);
2748*4882a593Smuzhiyun }
2749*4882a593Smuzhiyun
2750*4882a593Smuzhiyun brelse(tb->L[i]);
2751*4882a593Smuzhiyun brelse(tb->R[i]);
2752*4882a593Smuzhiyun brelse(tb->FL[i]);
2753*4882a593Smuzhiyun brelse(tb->FR[i]);
2754*4882a593Smuzhiyun brelse(tb->CFL[i]);
2755*4882a593Smuzhiyun brelse(tb->CFR[i]);
2756*4882a593Smuzhiyun
2757*4882a593Smuzhiyun tb->L[i] = NULL;
2758*4882a593Smuzhiyun tb->R[i] = NULL;
2759*4882a593Smuzhiyun tb->FL[i] = NULL;
2760*4882a593Smuzhiyun tb->FR[i] = NULL;
2761*4882a593Smuzhiyun tb->CFL[i] = NULL;
2762*4882a593Smuzhiyun tb->CFR[i] = NULL;
2763*4882a593Smuzhiyun }
2764*4882a593Smuzhiyun
2765*4882a593Smuzhiyun if (wait_tb_buffers_run) {
2766*4882a593Smuzhiyun for (i = 0; i < MAX_FEB_SIZE; i++) {
2767*4882a593Smuzhiyun if (tb->FEB[i])
2768*4882a593Smuzhiyun reiserfs_restore_prepared_buffer
2769*4882a593Smuzhiyun (tb->tb_sb, tb->FEB[i]);
2770*4882a593Smuzhiyun }
2771*4882a593Smuzhiyun }
2772*4882a593Smuzhiyun return ret;
2773*4882a593Smuzhiyun }
2774*4882a593Smuzhiyun
2775*4882a593Smuzhiyun }
2776*4882a593Smuzhiyun
unfix_nodes(struct tree_balance * tb)2777*4882a593Smuzhiyun void unfix_nodes(struct tree_balance *tb)
2778*4882a593Smuzhiyun {
2779*4882a593Smuzhiyun int i;
2780*4882a593Smuzhiyun
2781*4882a593Smuzhiyun /* Release path buffers. */
2782*4882a593Smuzhiyun pathrelse_and_restore(tb->tb_sb, tb->tb_path);
2783*4882a593Smuzhiyun
2784*4882a593Smuzhiyun /* brelse all resources collected for balancing */
2785*4882a593Smuzhiyun for (i = 0; i < MAX_HEIGHT; i++) {
2786*4882a593Smuzhiyun reiserfs_restore_prepared_buffer(tb->tb_sb, tb->L[i]);
2787*4882a593Smuzhiyun reiserfs_restore_prepared_buffer(tb->tb_sb, tb->R[i]);
2788*4882a593Smuzhiyun reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FL[i]);
2789*4882a593Smuzhiyun reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FR[i]);
2790*4882a593Smuzhiyun reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFL[i]);
2791*4882a593Smuzhiyun reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFR[i]);
2792*4882a593Smuzhiyun
2793*4882a593Smuzhiyun brelse(tb->L[i]);
2794*4882a593Smuzhiyun brelse(tb->R[i]);
2795*4882a593Smuzhiyun brelse(tb->FL[i]);
2796*4882a593Smuzhiyun brelse(tb->FR[i]);
2797*4882a593Smuzhiyun brelse(tb->CFL[i]);
2798*4882a593Smuzhiyun brelse(tb->CFR[i]);
2799*4882a593Smuzhiyun }
2800*4882a593Smuzhiyun
2801*4882a593Smuzhiyun /* deal with list of allocated (used and unused) nodes */
2802*4882a593Smuzhiyun for (i = 0; i < MAX_FEB_SIZE; i++) {
2803*4882a593Smuzhiyun if (tb->FEB[i]) {
2804*4882a593Smuzhiyun b_blocknr_t blocknr = tb->FEB[i]->b_blocknr;
2805*4882a593Smuzhiyun /*
2806*4882a593Smuzhiyun * de-allocated block which was not used by
2807*4882a593Smuzhiyun * balancing and bforget about buffer for it
2808*4882a593Smuzhiyun */
2809*4882a593Smuzhiyun brelse(tb->FEB[i]);
2810*4882a593Smuzhiyun reiserfs_free_block(tb->transaction_handle, NULL,
2811*4882a593Smuzhiyun blocknr, 0);
2812*4882a593Smuzhiyun }
2813*4882a593Smuzhiyun if (tb->used[i]) {
2814*4882a593Smuzhiyun /* release used as new nodes including a new root */
2815*4882a593Smuzhiyun brelse(tb->used[i]);
2816*4882a593Smuzhiyun }
2817*4882a593Smuzhiyun }
2818*4882a593Smuzhiyun
2819*4882a593Smuzhiyun kfree(tb->vn_buf);
2820*4882a593Smuzhiyun
2821*4882a593Smuzhiyun }
2822