xref: /OK3568_Linux_fs/kernel/drivers/block/drbd/drbd_worker.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun    drbd_worker.c
4*4882a593Smuzhiyun 
5*4882a593Smuzhiyun    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6*4882a593Smuzhiyun 
7*4882a593Smuzhiyun    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
8*4882a593Smuzhiyun    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
9*4882a593Smuzhiyun    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun */
13*4882a593Smuzhiyun 
14*4882a593Smuzhiyun #include <linux/module.h>
15*4882a593Smuzhiyun #include <linux/drbd.h>
16*4882a593Smuzhiyun #include <linux/sched/signal.h>
17*4882a593Smuzhiyun #include <linux/wait.h>
18*4882a593Smuzhiyun #include <linux/mm.h>
19*4882a593Smuzhiyun #include <linux/memcontrol.h>
20*4882a593Smuzhiyun #include <linux/mm_inline.h>
21*4882a593Smuzhiyun #include <linux/slab.h>
22*4882a593Smuzhiyun #include <linux/random.h>
23*4882a593Smuzhiyun #include <linux/string.h>
24*4882a593Smuzhiyun #include <linux/scatterlist.h>
25*4882a593Smuzhiyun #include <linux/part_stat.h>
26*4882a593Smuzhiyun 
27*4882a593Smuzhiyun #include "drbd_int.h"
28*4882a593Smuzhiyun #include "drbd_protocol.h"
29*4882a593Smuzhiyun #include "drbd_req.h"
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun static int make_ov_request(struct drbd_device *, int);
32*4882a593Smuzhiyun static int make_resync_request(struct drbd_device *, int);
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun /* endio handlers:
35*4882a593Smuzhiyun  *   drbd_md_endio (defined here)
36*4882a593Smuzhiyun  *   drbd_request_endio (defined here)
37*4882a593Smuzhiyun  *   drbd_peer_request_endio (defined here)
38*4882a593Smuzhiyun  *   drbd_bm_endio (defined in drbd_bitmap.c)
39*4882a593Smuzhiyun  *
40*4882a593Smuzhiyun  * For all these callbacks, note the following:
41*4882a593Smuzhiyun  * The callbacks will be called in irq context by the IDE drivers,
42*4882a593Smuzhiyun  * and in Softirqs/Tasklets/BH context by the SCSI drivers.
43*4882a593Smuzhiyun  * Try to get the locking right :)
44*4882a593Smuzhiyun  *
45*4882a593Smuzhiyun  */
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun /* used for synchronous meta data and bitmap IO
48*4882a593Smuzhiyun  * submitted by drbd_md_sync_page_io()
49*4882a593Smuzhiyun  */
drbd_md_endio(struct bio * bio)50*4882a593Smuzhiyun void drbd_md_endio(struct bio *bio)
51*4882a593Smuzhiyun {
52*4882a593Smuzhiyun 	struct drbd_device *device;
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun 	device = bio->bi_private;
55*4882a593Smuzhiyun 	device->md_io.error = blk_status_to_errno(bio->bi_status);
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun 	/* special case: drbd_md_read() during drbd_adm_attach() */
58*4882a593Smuzhiyun 	if (device->ldev)
59*4882a593Smuzhiyun 		put_ldev(device);
60*4882a593Smuzhiyun 	bio_put(bio);
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun 	/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
63*4882a593Smuzhiyun 	 * to timeout on the lower level device, and eventually detach from it.
64*4882a593Smuzhiyun 	 * If this io completion runs after that timeout expired, this
65*4882a593Smuzhiyun 	 * drbd_md_put_buffer() may allow us to finally try and re-attach.
66*4882a593Smuzhiyun 	 * During normal operation, this only puts that extra reference
67*4882a593Smuzhiyun 	 * down to 1 again.
68*4882a593Smuzhiyun 	 * Make sure we first drop the reference, and only then signal
69*4882a593Smuzhiyun 	 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
70*4882a593Smuzhiyun 	 * next drbd_md_sync_page_io(), that we trigger the
71*4882a593Smuzhiyun 	 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
72*4882a593Smuzhiyun 	 */
73*4882a593Smuzhiyun 	drbd_md_put_buffer(device);
74*4882a593Smuzhiyun 	device->md_io.done = 1;
75*4882a593Smuzhiyun 	wake_up(&device->misc_wait);
76*4882a593Smuzhiyun }
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun /* reads on behalf of the partner,
79*4882a593Smuzhiyun  * "submitted" by the receiver
80*4882a593Smuzhiyun  */
drbd_endio_read_sec_final(struct drbd_peer_request * peer_req)81*4882a593Smuzhiyun static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
82*4882a593Smuzhiyun {
83*4882a593Smuzhiyun 	unsigned long flags = 0;
84*4882a593Smuzhiyun 	struct drbd_peer_device *peer_device = peer_req->peer_device;
85*4882a593Smuzhiyun 	struct drbd_device *device = peer_device->device;
86*4882a593Smuzhiyun 
87*4882a593Smuzhiyun 	spin_lock_irqsave(&device->resource->req_lock, flags);
88*4882a593Smuzhiyun 	device->read_cnt += peer_req->i.size >> 9;
89*4882a593Smuzhiyun 	list_del(&peer_req->w.list);
90*4882a593Smuzhiyun 	if (list_empty(&device->read_ee))
91*4882a593Smuzhiyun 		wake_up(&device->ee_wait);
92*4882a593Smuzhiyun 	if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
93*4882a593Smuzhiyun 		__drbd_chk_io_error(device, DRBD_READ_ERROR);
94*4882a593Smuzhiyun 	spin_unlock_irqrestore(&device->resource->req_lock, flags);
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun 	drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w);
97*4882a593Smuzhiyun 	put_ldev(device);
98*4882a593Smuzhiyun }
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun /* writes on behalf of the partner, or resync writes,
101*4882a593Smuzhiyun  * "submitted" by the receiver, final stage.  */
drbd_endio_write_sec_final(struct drbd_peer_request * peer_req)102*4882a593Smuzhiyun void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
103*4882a593Smuzhiyun {
104*4882a593Smuzhiyun 	unsigned long flags = 0;
105*4882a593Smuzhiyun 	struct drbd_peer_device *peer_device = peer_req->peer_device;
106*4882a593Smuzhiyun 	struct drbd_device *device = peer_device->device;
107*4882a593Smuzhiyun 	struct drbd_connection *connection = peer_device->connection;
108*4882a593Smuzhiyun 	struct drbd_interval i;
109*4882a593Smuzhiyun 	int do_wake;
110*4882a593Smuzhiyun 	u64 block_id;
111*4882a593Smuzhiyun 	int do_al_complete_io;
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun 	/* after we moved peer_req to done_ee,
114*4882a593Smuzhiyun 	 * we may no longer access it,
115*4882a593Smuzhiyun 	 * it may be freed/reused already!
116*4882a593Smuzhiyun 	 * (as soon as we release the req_lock) */
117*4882a593Smuzhiyun 	i = peer_req->i;
118*4882a593Smuzhiyun 	do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
119*4882a593Smuzhiyun 	block_id = peer_req->block_id;
120*4882a593Smuzhiyun 	peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun 	if (peer_req->flags & EE_WAS_ERROR) {
123*4882a593Smuzhiyun 		/* In protocol != C, we usually do not send write acks.
124*4882a593Smuzhiyun 		 * In case of a write error, send the neg ack anyways. */
125*4882a593Smuzhiyun 		if (!__test_and_set_bit(__EE_SEND_WRITE_ACK, &peer_req->flags))
126*4882a593Smuzhiyun 			inc_unacked(device);
127*4882a593Smuzhiyun 		drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
128*4882a593Smuzhiyun 	}
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 	spin_lock_irqsave(&device->resource->req_lock, flags);
131*4882a593Smuzhiyun 	device->writ_cnt += peer_req->i.size >> 9;
132*4882a593Smuzhiyun 	list_move_tail(&peer_req->w.list, &device->done_ee);
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun 	/*
135*4882a593Smuzhiyun 	 * Do not remove from the write_requests tree here: we did not send the
136*4882a593Smuzhiyun 	 * Ack yet and did not wake possibly waiting conflicting requests.
137*4882a593Smuzhiyun 	 * Removed from the tree from "drbd_process_done_ee" within the
138*4882a593Smuzhiyun 	 * appropriate dw.cb (e_end_block/e_end_resync_block) or from
139*4882a593Smuzhiyun 	 * _drbd_clear_done_ee.
140*4882a593Smuzhiyun 	 */
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun 	do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
143*4882a593Smuzhiyun 
144*4882a593Smuzhiyun 	/* FIXME do we want to detach for failed REQ_OP_DISCARD?
145*4882a593Smuzhiyun 	 * ((peer_req->flags & (EE_WAS_ERROR|EE_TRIM)) == EE_WAS_ERROR) */
146*4882a593Smuzhiyun 	if (peer_req->flags & EE_WAS_ERROR)
147*4882a593Smuzhiyun 		__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 	if (connection->cstate >= C_WF_REPORT_PARAMS) {
150*4882a593Smuzhiyun 		kref_get(&device->kref); /* put is in drbd_send_acks_wf() */
151*4882a593Smuzhiyun 		if (!queue_work(connection->ack_sender, &peer_device->send_acks_work))
152*4882a593Smuzhiyun 			kref_put(&device->kref, drbd_destroy_device);
153*4882a593Smuzhiyun 	}
154*4882a593Smuzhiyun 	spin_unlock_irqrestore(&device->resource->req_lock, flags);
155*4882a593Smuzhiyun 
156*4882a593Smuzhiyun 	if (block_id == ID_SYNCER)
157*4882a593Smuzhiyun 		drbd_rs_complete_io(device, i.sector);
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun 	if (do_wake)
160*4882a593Smuzhiyun 		wake_up(&device->ee_wait);
161*4882a593Smuzhiyun 
162*4882a593Smuzhiyun 	if (do_al_complete_io)
163*4882a593Smuzhiyun 		drbd_al_complete_io(device, &i);
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun 	put_ldev(device);
166*4882a593Smuzhiyun }
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun /* writes on behalf of the partner, or resync writes,
169*4882a593Smuzhiyun  * "submitted" by the receiver.
170*4882a593Smuzhiyun  */
drbd_peer_request_endio(struct bio * bio)171*4882a593Smuzhiyun void drbd_peer_request_endio(struct bio *bio)
172*4882a593Smuzhiyun {
173*4882a593Smuzhiyun 	struct drbd_peer_request *peer_req = bio->bi_private;
174*4882a593Smuzhiyun 	struct drbd_device *device = peer_req->peer_device->device;
175*4882a593Smuzhiyun 	bool is_write = bio_data_dir(bio) == WRITE;
176*4882a593Smuzhiyun 	bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES ||
177*4882a593Smuzhiyun 			  bio_op(bio) == REQ_OP_DISCARD;
178*4882a593Smuzhiyun 
179*4882a593Smuzhiyun 	if (bio->bi_status && __ratelimit(&drbd_ratelimit_state))
180*4882a593Smuzhiyun 		drbd_warn(device, "%s: error=%d s=%llus\n",
181*4882a593Smuzhiyun 				is_write ? (is_discard ? "discard" : "write")
182*4882a593Smuzhiyun 					: "read", bio->bi_status,
183*4882a593Smuzhiyun 				(unsigned long long)peer_req->i.sector);
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	if (bio->bi_status)
186*4882a593Smuzhiyun 		set_bit(__EE_WAS_ERROR, &peer_req->flags);
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun 	bio_put(bio); /* no need for the bio anymore */
189*4882a593Smuzhiyun 	if (atomic_dec_and_test(&peer_req->pending_bios)) {
190*4882a593Smuzhiyun 		if (is_write)
191*4882a593Smuzhiyun 			drbd_endio_write_sec_final(peer_req);
192*4882a593Smuzhiyun 		else
193*4882a593Smuzhiyun 			drbd_endio_read_sec_final(peer_req);
194*4882a593Smuzhiyun 	}
195*4882a593Smuzhiyun }
196*4882a593Smuzhiyun 
197*4882a593Smuzhiyun static void
drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device * device)198*4882a593Smuzhiyun drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
199*4882a593Smuzhiyun {
200*4882a593Smuzhiyun 	panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n",
201*4882a593Smuzhiyun 		device->minor, device->resource->name, device->vnr);
202*4882a593Smuzhiyun }
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun /* read, readA or write requests on R_PRIMARY coming from drbd_make_request
205*4882a593Smuzhiyun  */
drbd_request_endio(struct bio * bio)206*4882a593Smuzhiyun void drbd_request_endio(struct bio *bio)
207*4882a593Smuzhiyun {
208*4882a593Smuzhiyun 	unsigned long flags;
209*4882a593Smuzhiyun 	struct drbd_request *req = bio->bi_private;
210*4882a593Smuzhiyun 	struct drbd_device *device = req->device;
211*4882a593Smuzhiyun 	struct bio_and_error m;
212*4882a593Smuzhiyun 	enum drbd_req_event what;
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	/* If this request was aborted locally before,
215*4882a593Smuzhiyun 	 * but now was completed "successfully",
216*4882a593Smuzhiyun 	 * chances are that this caused arbitrary data corruption.
217*4882a593Smuzhiyun 	 *
218*4882a593Smuzhiyun 	 * "aborting" requests, or force-detaching the disk, is intended for
219*4882a593Smuzhiyun 	 * completely blocked/hung local backing devices which do no longer
220*4882a593Smuzhiyun 	 * complete requests at all, not even do error completions.  In this
221*4882a593Smuzhiyun 	 * situation, usually a hard-reset and failover is the only way out.
222*4882a593Smuzhiyun 	 *
223*4882a593Smuzhiyun 	 * By "aborting", basically faking a local error-completion,
224*4882a593Smuzhiyun 	 * we allow for a more graceful swichover by cleanly migrating services.
225*4882a593Smuzhiyun 	 * Still the affected node has to be rebooted "soon".
226*4882a593Smuzhiyun 	 *
227*4882a593Smuzhiyun 	 * By completing these requests, we allow the upper layers to re-use
228*4882a593Smuzhiyun 	 * the associated data pages.
229*4882a593Smuzhiyun 	 *
230*4882a593Smuzhiyun 	 * If later the local backing device "recovers", and now DMAs some data
231*4882a593Smuzhiyun 	 * from disk into the original request pages, in the best case it will
232*4882a593Smuzhiyun 	 * just put random data into unused pages; but typically it will corrupt
233*4882a593Smuzhiyun 	 * meanwhile completely unrelated data, causing all sorts of damage.
234*4882a593Smuzhiyun 	 *
235*4882a593Smuzhiyun 	 * Which means delayed successful completion,
236*4882a593Smuzhiyun 	 * especially for READ requests,
237*4882a593Smuzhiyun 	 * is a reason to panic().
238*4882a593Smuzhiyun 	 *
239*4882a593Smuzhiyun 	 * We assume that a delayed *error* completion is OK,
240*4882a593Smuzhiyun 	 * though we still will complain noisily about it.
241*4882a593Smuzhiyun 	 */
242*4882a593Smuzhiyun 	if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
243*4882a593Smuzhiyun 		if (__ratelimit(&drbd_ratelimit_state))
244*4882a593Smuzhiyun 			drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
245*4882a593Smuzhiyun 
246*4882a593Smuzhiyun 		if (!bio->bi_status)
247*4882a593Smuzhiyun 			drbd_panic_after_delayed_completion_of_aborted_request(device);
248*4882a593Smuzhiyun 	}
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 	/* to avoid recursion in __req_mod */
251*4882a593Smuzhiyun 	if (unlikely(bio->bi_status)) {
252*4882a593Smuzhiyun 		switch (bio_op(bio)) {
253*4882a593Smuzhiyun 		case REQ_OP_WRITE_ZEROES:
254*4882a593Smuzhiyun 		case REQ_OP_DISCARD:
255*4882a593Smuzhiyun 			if (bio->bi_status == BLK_STS_NOTSUPP)
256*4882a593Smuzhiyun 				what = DISCARD_COMPLETED_NOTSUPP;
257*4882a593Smuzhiyun 			else
258*4882a593Smuzhiyun 				what = DISCARD_COMPLETED_WITH_ERROR;
259*4882a593Smuzhiyun 			break;
260*4882a593Smuzhiyun 		case REQ_OP_READ:
261*4882a593Smuzhiyun 			if (bio->bi_opf & REQ_RAHEAD)
262*4882a593Smuzhiyun 				what = READ_AHEAD_COMPLETED_WITH_ERROR;
263*4882a593Smuzhiyun 			else
264*4882a593Smuzhiyun 				what = READ_COMPLETED_WITH_ERROR;
265*4882a593Smuzhiyun 			break;
266*4882a593Smuzhiyun 		default:
267*4882a593Smuzhiyun 			what = WRITE_COMPLETED_WITH_ERROR;
268*4882a593Smuzhiyun 			break;
269*4882a593Smuzhiyun 		}
270*4882a593Smuzhiyun 	} else {
271*4882a593Smuzhiyun 		what = COMPLETED_OK;
272*4882a593Smuzhiyun 	}
273*4882a593Smuzhiyun 
274*4882a593Smuzhiyun 	req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status));
275*4882a593Smuzhiyun 	bio_put(bio);
276*4882a593Smuzhiyun 
277*4882a593Smuzhiyun 	/* not req_mod(), we need irqsave here! */
278*4882a593Smuzhiyun 	spin_lock_irqsave(&device->resource->req_lock, flags);
279*4882a593Smuzhiyun 	__req_mod(req, what, &m);
280*4882a593Smuzhiyun 	spin_unlock_irqrestore(&device->resource->req_lock, flags);
281*4882a593Smuzhiyun 	put_ldev(device);
282*4882a593Smuzhiyun 
283*4882a593Smuzhiyun 	if (m.bio)
284*4882a593Smuzhiyun 		complete_master_bio(device, &m);
285*4882a593Smuzhiyun }
286*4882a593Smuzhiyun 
drbd_csum_ee(struct crypto_shash * tfm,struct drbd_peer_request * peer_req,void * digest)287*4882a593Smuzhiyun void drbd_csum_ee(struct crypto_shash *tfm, struct drbd_peer_request *peer_req, void *digest)
288*4882a593Smuzhiyun {
289*4882a593Smuzhiyun 	SHASH_DESC_ON_STACK(desc, tfm);
290*4882a593Smuzhiyun 	struct page *page = peer_req->pages;
291*4882a593Smuzhiyun 	struct page *tmp;
292*4882a593Smuzhiyun 	unsigned len;
293*4882a593Smuzhiyun 	void *src;
294*4882a593Smuzhiyun 
295*4882a593Smuzhiyun 	desc->tfm = tfm;
296*4882a593Smuzhiyun 
297*4882a593Smuzhiyun 	crypto_shash_init(desc);
298*4882a593Smuzhiyun 
299*4882a593Smuzhiyun 	src = kmap_atomic(page);
300*4882a593Smuzhiyun 	while ((tmp = page_chain_next(page))) {
301*4882a593Smuzhiyun 		/* all but the last page will be fully used */
302*4882a593Smuzhiyun 		crypto_shash_update(desc, src, PAGE_SIZE);
303*4882a593Smuzhiyun 		kunmap_atomic(src);
304*4882a593Smuzhiyun 		page = tmp;
305*4882a593Smuzhiyun 		src = kmap_atomic(page);
306*4882a593Smuzhiyun 	}
307*4882a593Smuzhiyun 	/* and now the last, possibly only partially used page */
308*4882a593Smuzhiyun 	len = peer_req->i.size & (PAGE_SIZE - 1);
309*4882a593Smuzhiyun 	crypto_shash_update(desc, src, len ?: PAGE_SIZE);
310*4882a593Smuzhiyun 	kunmap_atomic(src);
311*4882a593Smuzhiyun 
312*4882a593Smuzhiyun 	crypto_shash_final(desc, digest);
313*4882a593Smuzhiyun 	shash_desc_zero(desc);
314*4882a593Smuzhiyun }
315*4882a593Smuzhiyun 
drbd_csum_bio(struct crypto_shash * tfm,struct bio * bio,void * digest)316*4882a593Smuzhiyun void drbd_csum_bio(struct crypto_shash *tfm, struct bio *bio, void *digest)
317*4882a593Smuzhiyun {
318*4882a593Smuzhiyun 	SHASH_DESC_ON_STACK(desc, tfm);
319*4882a593Smuzhiyun 	struct bio_vec bvec;
320*4882a593Smuzhiyun 	struct bvec_iter iter;
321*4882a593Smuzhiyun 
322*4882a593Smuzhiyun 	desc->tfm = tfm;
323*4882a593Smuzhiyun 
324*4882a593Smuzhiyun 	crypto_shash_init(desc);
325*4882a593Smuzhiyun 
326*4882a593Smuzhiyun 	bio_for_each_segment(bvec, bio, iter) {
327*4882a593Smuzhiyun 		u8 *src;
328*4882a593Smuzhiyun 
329*4882a593Smuzhiyun 		src = kmap_atomic(bvec.bv_page);
330*4882a593Smuzhiyun 		crypto_shash_update(desc, src + bvec.bv_offset, bvec.bv_len);
331*4882a593Smuzhiyun 		kunmap_atomic(src);
332*4882a593Smuzhiyun 
333*4882a593Smuzhiyun 		/* REQ_OP_WRITE_SAME has only one segment,
334*4882a593Smuzhiyun 		 * checksum the payload only once. */
335*4882a593Smuzhiyun 		if (bio_op(bio) == REQ_OP_WRITE_SAME)
336*4882a593Smuzhiyun 			break;
337*4882a593Smuzhiyun 	}
338*4882a593Smuzhiyun 	crypto_shash_final(desc, digest);
339*4882a593Smuzhiyun 	shash_desc_zero(desc);
340*4882a593Smuzhiyun }
341*4882a593Smuzhiyun 
342*4882a593Smuzhiyun /* MAYBE merge common code with w_e_end_ov_req */
w_e_send_csum(struct drbd_work * w,int cancel)343*4882a593Smuzhiyun static int w_e_send_csum(struct drbd_work *w, int cancel)
344*4882a593Smuzhiyun {
345*4882a593Smuzhiyun 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
346*4882a593Smuzhiyun 	struct drbd_peer_device *peer_device = peer_req->peer_device;
347*4882a593Smuzhiyun 	struct drbd_device *device = peer_device->device;
348*4882a593Smuzhiyun 	int digest_size;
349*4882a593Smuzhiyun 	void *digest;
350*4882a593Smuzhiyun 	int err = 0;
351*4882a593Smuzhiyun 
352*4882a593Smuzhiyun 	if (unlikely(cancel))
353*4882a593Smuzhiyun 		goto out;
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun 	if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
356*4882a593Smuzhiyun 		goto out;
357*4882a593Smuzhiyun 
358*4882a593Smuzhiyun 	digest_size = crypto_shash_digestsize(peer_device->connection->csums_tfm);
359*4882a593Smuzhiyun 	digest = kmalloc(digest_size, GFP_NOIO);
360*4882a593Smuzhiyun 	if (digest) {
361*4882a593Smuzhiyun 		sector_t sector = peer_req->i.sector;
362*4882a593Smuzhiyun 		unsigned int size = peer_req->i.size;
363*4882a593Smuzhiyun 		drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
364*4882a593Smuzhiyun 		/* Free peer_req and pages before send.
365*4882a593Smuzhiyun 		 * In case we block on congestion, we could otherwise run into
366*4882a593Smuzhiyun 		 * some distributed deadlock, if the other side blocks on
367*4882a593Smuzhiyun 		 * congestion as well, because our receiver blocks in
368*4882a593Smuzhiyun 		 * drbd_alloc_pages due to pp_in_use > max_buffers. */
369*4882a593Smuzhiyun 		drbd_free_peer_req(device, peer_req);
370*4882a593Smuzhiyun 		peer_req = NULL;
371*4882a593Smuzhiyun 		inc_rs_pending(device);
372*4882a593Smuzhiyun 		err = drbd_send_drequest_csum(peer_device, sector, size,
373*4882a593Smuzhiyun 					      digest, digest_size,
374*4882a593Smuzhiyun 					      P_CSUM_RS_REQUEST);
375*4882a593Smuzhiyun 		kfree(digest);
376*4882a593Smuzhiyun 	} else {
377*4882a593Smuzhiyun 		drbd_err(device, "kmalloc() of digest failed.\n");
378*4882a593Smuzhiyun 		err = -ENOMEM;
379*4882a593Smuzhiyun 	}
380*4882a593Smuzhiyun 
381*4882a593Smuzhiyun out:
382*4882a593Smuzhiyun 	if (peer_req)
383*4882a593Smuzhiyun 		drbd_free_peer_req(device, peer_req);
384*4882a593Smuzhiyun 
385*4882a593Smuzhiyun 	if (unlikely(err))
386*4882a593Smuzhiyun 		drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
387*4882a593Smuzhiyun 	return err;
388*4882a593Smuzhiyun }
389*4882a593Smuzhiyun 
390*4882a593Smuzhiyun #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
391*4882a593Smuzhiyun 
read_for_csum(struct drbd_peer_device * peer_device,sector_t sector,int size)392*4882a593Smuzhiyun static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size)
393*4882a593Smuzhiyun {
394*4882a593Smuzhiyun 	struct drbd_device *device = peer_device->device;
395*4882a593Smuzhiyun 	struct drbd_peer_request *peer_req;
396*4882a593Smuzhiyun 
397*4882a593Smuzhiyun 	if (!get_ldev(device))
398*4882a593Smuzhiyun 		return -EIO;
399*4882a593Smuzhiyun 
400*4882a593Smuzhiyun 	/* GFP_TRY, because if there is no memory available right now, this may
401*4882a593Smuzhiyun 	 * be rescheduled for later. It is "only" background resync, after all. */
402*4882a593Smuzhiyun 	peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
403*4882a593Smuzhiyun 				       size, size, GFP_TRY);
404*4882a593Smuzhiyun 	if (!peer_req)
405*4882a593Smuzhiyun 		goto defer;
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun 	peer_req->w.cb = w_e_send_csum;
408*4882a593Smuzhiyun 	spin_lock_irq(&device->resource->req_lock);
409*4882a593Smuzhiyun 	list_add_tail(&peer_req->w.list, &device->read_ee);
410*4882a593Smuzhiyun 	spin_unlock_irq(&device->resource->req_lock);
411*4882a593Smuzhiyun 
412*4882a593Smuzhiyun 	atomic_add(size >> 9, &device->rs_sect_ev);
413*4882a593Smuzhiyun 	if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
414*4882a593Smuzhiyun 				     DRBD_FAULT_RS_RD) == 0)
415*4882a593Smuzhiyun 		return 0;
416*4882a593Smuzhiyun 
417*4882a593Smuzhiyun 	/* If it failed because of ENOMEM, retry should help.  If it failed
418*4882a593Smuzhiyun 	 * because bio_add_page failed (probably broken lower level driver),
419*4882a593Smuzhiyun 	 * retry may or may not help.
420*4882a593Smuzhiyun 	 * If it does not, you may need to force disconnect. */
421*4882a593Smuzhiyun 	spin_lock_irq(&device->resource->req_lock);
422*4882a593Smuzhiyun 	list_del(&peer_req->w.list);
423*4882a593Smuzhiyun 	spin_unlock_irq(&device->resource->req_lock);
424*4882a593Smuzhiyun 
425*4882a593Smuzhiyun 	drbd_free_peer_req(device, peer_req);
426*4882a593Smuzhiyun defer:
427*4882a593Smuzhiyun 	put_ldev(device);
428*4882a593Smuzhiyun 	return -EAGAIN;
429*4882a593Smuzhiyun }
430*4882a593Smuzhiyun 
w_resync_timer(struct drbd_work * w,int cancel)431*4882a593Smuzhiyun int w_resync_timer(struct drbd_work *w, int cancel)
432*4882a593Smuzhiyun {
433*4882a593Smuzhiyun 	struct drbd_device *device =
434*4882a593Smuzhiyun 		container_of(w, struct drbd_device, resync_work);
435*4882a593Smuzhiyun 
436*4882a593Smuzhiyun 	switch (device->state.conn) {
437*4882a593Smuzhiyun 	case C_VERIFY_S:
438*4882a593Smuzhiyun 		make_ov_request(device, cancel);
439*4882a593Smuzhiyun 		break;
440*4882a593Smuzhiyun 	case C_SYNC_TARGET:
441*4882a593Smuzhiyun 		make_resync_request(device, cancel);
442*4882a593Smuzhiyun 		break;
443*4882a593Smuzhiyun 	}
444*4882a593Smuzhiyun 
445*4882a593Smuzhiyun 	return 0;
446*4882a593Smuzhiyun }
447*4882a593Smuzhiyun 
resync_timer_fn(struct timer_list * t)448*4882a593Smuzhiyun void resync_timer_fn(struct timer_list *t)
449*4882a593Smuzhiyun {
450*4882a593Smuzhiyun 	struct drbd_device *device = from_timer(device, t, resync_timer);
451*4882a593Smuzhiyun 
452*4882a593Smuzhiyun 	drbd_queue_work_if_unqueued(
453*4882a593Smuzhiyun 		&first_peer_device(device)->connection->sender_work,
454*4882a593Smuzhiyun 		&device->resync_work);
455*4882a593Smuzhiyun }
456*4882a593Smuzhiyun 
fifo_set(struct fifo_buffer * fb,int value)457*4882a593Smuzhiyun static void fifo_set(struct fifo_buffer *fb, int value)
458*4882a593Smuzhiyun {
459*4882a593Smuzhiyun 	int i;
460*4882a593Smuzhiyun 
461*4882a593Smuzhiyun 	for (i = 0; i < fb->size; i++)
462*4882a593Smuzhiyun 		fb->values[i] = value;
463*4882a593Smuzhiyun }
464*4882a593Smuzhiyun 
fifo_push(struct fifo_buffer * fb,int value)465*4882a593Smuzhiyun static int fifo_push(struct fifo_buffer *fb, int value)
466*4882a593Smuzhiyun {
467*4882a593Smuzhiyun 	int ov;
468*4882a593Smuzhiyun 
469*4882a593Smuzhiyun 	ov = fb->values[fb->head_index];
470*4882a593Smuzhiyun 	fb->values[fb->head_index++] = value;
471*4882a593Smuzhiyun 
472*4882a593Smuzhiyun 	if (fb->head_index >= fb->size)
473*4882a593Smuzhiyun 		fb->head_index = 0;
474*4882a593Smuzhiyun 
475*4882a593Smuzhiyun 	return ov;
476*4882a593Smuzhiyun }
477*4882a593Smuzhiyun 
fifo_add_val(struct fifo_buffer * fb,int value)478*4882a593Smuzhiyun static void fifo_add_val(struct fifo_buffer *fb, int value)
479*4882a593Smuzhiyun {
480*4882a593Smuzhiyun 	int i;
481*4882a593Smuzhiyun 
482*4882a593Smuzhiyun 	for (i = 0; i < fb->size; i++)
483*4882a593Smuzhiyun 		fb->values[i] += value;
484*4882a593Smuzhiyun }
485*4882a593Smuzhiyun 
fifo_alloc(unsigned int fifo_size)486*4882a593Smuzhiyun struct fifo_buffer *fifo_alloc(unsigned int fifo_size)
487*4882a593Smuzhiyun {
488*4882a593Smuzhiyun 	struct fifo_buffer *fb;
489*4882a593Smuzhiyun 
490*4882a593Smuzhiyun 	fb = kzalloc(struct_size(fb, values, fifo_size), GFP_NOIO);
491*4882a593Smuzhiyun 	if (!fb)
492*4882a593Smuzhiyun 		return NULL;
493*4882a593Smuzhiyun 
494*4882a593Smuzhiyun 	fb->head_index = 0;
495*4882a593Smuzhiyun 	fb->size = fifo_size;
496*4882a593Smuzhiyun 	fb->total = 0;
497*4882a593Smuzhiyun 
498*4882a593Smuzhiyun 	return fb;
499*4882a593Smuzhiyun }
500*4882a593Smuzhiyun 
drbd_rs_controller(struct drbd_device * device,unsigned int sect_in)501*4882a593Smuzhiyun static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
502*4882a593Smuzhiyun {
503*4882a593Smuzhiyun 	struct disk_conf *dc;
504*4882a593Smuzhiyun 	unsigned int want;     /* The number of sectors we want in-flight */
505*4882a593Smuzhiyun 	int req_sect; /* Number of sectors to request in this turn */
506*4882a593Smuzhiyun 	int correction; /* Number of sectors more we need in-flight */
507*4882a593Smuzhiyun 	int cps; /* correction per invocation of drbd_rs_controller() */
508*4882a593Smuzhiyun 	int steps; /* Number of time steps to plan ahead */
509*4882a593Smuzhiyun 	int curr_corr;
510*4882a593Smuzhiyun 	int max_sect;
511*4882a593Smuzhiyun 	struct fifo_buffer *plan;
512*4882a593Smuzhiyun 
513*4882a593Smuzhiyun 	dc = rcu_dereference(device->ldev->disk_conf);
514*4882a593Smuzhiyun 	plan = rcu_dereference(device->rs_plan_s);
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 	steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
517*4882a593Smuzhiyun 
518*4882a593Smuzhiyun 	if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
519*4882a593Smuzhiyun 		want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
520*4882a593Smuzhiyun 	} else { /* normal path */
521*4882a593Smuzhiyun 		want = dc->c_fill_target ? dc->c_fill_target :
522*4882a593Smuzhiyun 			sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
523*4882a593Smuzhiyun 	}
524*4882a593Smuzhiyun 
525*4882a593Smuzhiyun 	correction = want - device->rs_in_flight - plan->total;
526*4882a593Smuzhiyun 
527*4882a593Smuzhiyun 	/* Plan ahead */
528*4882a593Smuzhiyun 	cps = correction / steps;
529*4882a593Smuzhiyun 	fifo_add_val(plan, cps);
530*4882a593Smuzhiyun 	plan->total += cps * steps;
531*4882a593Smuzhiyun 
532*4882a593Smuzhiyun 	/* What we do in this step */
533*4882a593Smuzhiyun 	curr_corr = fifo_push(plan, 0);
534*4882a593Smuzhiyun 	plan->total -= curr_corr;
535*4882a593Smuzhiyun 
536*4882a593Smuzhiyun 	req_sect = sect_in + curr_corr;
537*4882a593Smuzhiyun 	if (req_sect < 0)
538*4882a593Smuzhiyun 		req_sect = 0;
539*4882a593Smuzhiyun 
540*4882a593Smuzhiyun 	max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
541*4882a593Smuzhiyun 	if (req_sect > max_sect)
542*4882a593Smuzhiyun 		req_sect = max_sect;
543*4882a593Smuzhiyun 
544*4882a593Smuzhiyun 	/*
545*4882a593Smuzhiyun 	drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
546*4882a593Smuzhiyun 		 sect_in, device->rs_in_flight, want, correction,
547*4882a593Smuzhiyun 		 steps, cps, device->rs_planed, curr_corr, req_sect);
548*4882a593Smuzhiyun 	*/
549*4882a593Smuzhiyun 
550*4882a593Smuzhiyun 	return req_sect;
551*4882a593Smuzhiyun }
552*4882a593Smuzhiyun 
drbd_rs_number_requests(struct drbd_device * device)553*4882a593Smuzhiyun static int drbd_rs_number_requests(struct drbd_device *device)
554*4882a593Smuzhiyun {
555*4882a593Smuzhiyun 	unsigned int sect_in;  /* Number of sectors that came in since the last turn */
556*4882a593Smuzhiyun 	int number, mxb;
557*4882a593Smuzhiyun 
558*4882a593Smuzhiyun 	sect_in = atomic_xchg(&device->rs_sect_in, 0);
559*4882a593Smuzhiyun 	device->rs_in_flight -= sect_in;
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun 	rcu_read_lock();
562*4882a593Smuzhiyun 	mxb = drbd_get_max_buffers(device) / 2;
563*4882a593Smuzhiyun 	if (rcu_dereference(device->rs_plan_s)->size) {
564*4882a593Smuzhiyun 		number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
565*4882a593Smuzhiyun 		device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
566*4882a593Smuzhiyun 	} else {
567*4882a593Smuzhiyun 		device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
568*4882a593Smuzhiyun 		number = SLEEP_TIME * device->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
569*4882a593Smuzhiyun 	}
570*4882a593Smuzhiyun 	rcu_read_unlock();
571*4882a593Smuzhiyun 
572*4882a593Smuzhiyun 	/* Don't have more than "max-buffers"/2 in-flight.
573*4882a593Smuzhiyun 	 * Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
574*4882a593Smuzhiyun 	 * potentially causing a distributed deadlock on congestion during
575*4882a593Smuzhiyun 	 * online-verify or (checksum-based) resync, if max-buffers,
576*4882a593Smuzhiyun 	 * socket buffer sizes and resync rate settings are mis-configured. */
577*4882a593Smuzhiyun 
578*4882a593Smuzhiyun 	/* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k),
579*4882a593Smuzhiyun 	 * mxb (as used here, and in drbd_alloc_pages on the peer) is
580*4882a593Smuzhiyun 	 * "number of pages" (typically also 4k),
581*4882a593Smuzhiyun 	 * but "rs_in_flight" is in "sectors" (512 Byte). */
582*4882a593Smuzhiyun 	if (mxb - device->rs_in_flight/8 < number)
583*4882a593Smuzhiyun 		number = mxb - device->rs_in_flight/8;
584*4882a593Smuzhiyun 
585*4882a593Smuzhiyun 	return number;
586*4882a593Smuzhiyun }
587*4882a593Smuzhiyun 
make_resync_request(struct drbd_device * const device,int cancel)588*4882a593Smuzhiyun static int make_resync_request(struct drbd_device *const device, int cancel)
589*4882a593Smuzhiyun {
590*4882a593Smuzhiyun 	struct drbd_peer_device *const peer_device = first_peer_device(device);
591*4882a593Smuzhiyun 	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
592*4882a593Smuzhiyun 	unsigned long bit;
593*4882a593Smuzhiyun 	sector_t sector;
594*4882a593Smuzhiyun 	const sector_t capacity = get_capacity(device->vdisk);
595*4882a593Smuzhiyun 	int max_bio_size;
596*4882a593Smuzhiyun 	int number, rollback_i, size;
597*4882a593Smuzhiyun 	int align, requeue = 0;
598*4882a593Smuzhiyun 	int i = 0;
599*4882a593Smuzhiyun 	int discard_granularity = 0;
600*4882a593Smuzhiyun 
601*4882a593Smuzhiyun 	if (unlikely(cancel))
602*4882a593Smuzhiyun 		return 0;
603*4882a593Smuzhiyun 
604*4882a593Smuzhiyun 	if (device->rs_total == 0) {
605*4882a593Smuzhiyun 		/* empty resync? */
606*4882a593Smuzhiyun 		drbd_resync_finished(device);
607*4882a593Smuzhiyun 		return 0;
608*4882a593Smuzhiyun 	}
609*4882a593Smuzhiyun 
610*4882a593Smuzhiyun 	if (!get_ldev(device)) {
611*4882a593Smuzhiyun 		/* Since we only need to access device->rsync a
612*4882a593Smuzhiyun 		   get_ldev_if_state(device,D_FAILED) would be sufficient, but
613*4882a593Smuzhiyun 		   to continue resync with a broken disk makes no sense at
614*4882a593Smuzhiyun 		   all */
615*4882a593Smuzhiyun 		drbd_err(device, "Disk broke down during resync!\n");
616*4882a593Smuzhiyun 		return 0;
617*4882a593Smuzhiyun 	}
618*4882a593Smuzhiyun 
619*4882a593Smuzhiyun 	if (connection->agreed_features & DRBD_FF_THIN_RESYNC) {
620*4882a593Smuzhiyun 		rcu_read_lock();
621*4882a593Smuzhiyun 		discard_granularity = rcu_dereference(device->ldev->disk_conf)->rs_discard_granularity;
622*4882a593Smuzhiyun 		rcu_read_unlock();
623*4882a593Smuzhiyun 	}
624*4882a593Smuzhiyun 
625*4882a593Smuzhiyun 	max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
626*4882a593Smuzhiyun 	number = drbd_rs_number_requests(device);
627*4882a593Smuzhiyun 	if (number <= 0)
628*4882a593Smuzhiyun 		goto requeue;
629*4882a593Smuzhiyun 
630*4882a593Smuzhiyun 	for (i = 0; i < number; i++) {
631*4882a593Smuzhiyun 		/* Stop generating RS requests when half of the send buffer is filled,
632*4882a593Smuzhiyun 		 * but notify TCP that we'd like to have more space. */
633*4882a593Smuzhiyun 		mutex_lock(&connection->data.mutex);
634*4882a593Smuzhiyun 		if (connection->data.socket) {
635*4882a593Smuzhiyun 			struct sock *sk = connection->data.socket->sk;
636*4882a593Smuzhiyun 			int queued = sk->sk_wmem_queued;
637*4882a593Smuzhiyun 			int sndbuf = sk->sk_sndbuf;
638*4882a593Smuzhiyun 			if (queued > sndbuf / 2) {
639*4882a593Smuzhiyun 				requeue = 1;
640*4882a593Smuzhiyun 				if (sk->sk_socket)
641*4882a593Smuzhiyun 					set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
642*4882a593Smuzhiyun 			}
643*4882a593Smuzhiyun 		} else
644*4882a593Smuzhiyun 			requeue = 1;
645*4882a593Smuzhiyun 		mutex_unlock(&connection->data.mutex);
646*4882a593Smuzhiyun 		if (requeue)
647*4882a593Smuzhiyun 			goto requeue;
648*4882a593Smuzhiyun 
649*4882a593Smuzhiyun next_sector:
650*4882a593Smuzhiyun 		size = BM_BLOCK_SIZE;
651*4882a593Smuzhiyun 		bit  = drbd_bm_find_next(device, device->bm_resync_fo);
652*4882a593Smuzhiyun 
653*4882a593Smuzhiyun 		if (bit == DRBD_END_OF_BITMAP) {
654*4882a593Smuzhiyun 			device->bm_resync_fo = drbd_bm_bits(device);
655*4882a593Smuzhiyun 			put_ldev(device);
656*4882a593Smuzhiyun 			return 0;
657*4882a593Smuzhiyun 		}
658*4882a593Smuzhiyun 
659*4882a593Smuzhiyun 		sector = BM_BIT_TO_SECT(bit);
660*4882a593Smuzhiyun 
661*4882a593Smuzhiyun 		if (drbd_try_rs_begin_io(device, sector)) {
662*4882a593Smuzhiyun 			device->bm_resync_fo = bit;
663*4882a593Smuzhiyun 			goto requeue;
664*4882a593Smuzhiyun 		}
665*4882a593Smuzhiyun 		device->bm_resync_fo = bit + 1;
666*4882a593Smuzhiyun 
667*4882a593Smuzhiyun 		if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
668*4882a593Smuzhiyun 			drbd_rs_complete_io(device, sector);
669*4882a593Smuzhiyun 			goto next_sector;
670*4882a593Smuzhiyun 		}
671*4882a593Smuzhiyun 
672*4882a593Smuzhiyun #if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
673*4882a593Smuzhiyun 		/* try to find some adjacent bits.
674*4882a593Smuzhiyun 		 * we stop if we have already the maximum req size.
675*4882a593Smuzhiyun 		 *
676*4882a593Smuzhiyun 		 * Additionally always align bigger requests, in order to
677*4882a593Smuzhiyun 		 * be prepared for all stripe sizes of software RAIDs.
678*4882a593Smuzhiyun 		 */
679*4882a593Smuzhiyun 		align = 1;
680*4882a593Smuzhiyun 		rollback_i = i;
681*4882a593Smuzhiyun 		while (i < number) {
682*4882a593Smuzhiyun 			if (size + BM_BLOCK_SIZE > max_bio_size)
683*4882a593Smuzhiyun 				break;
684*4882a593Smuzhiyun 
685*4882a593Smuzhiyun 			/* Be always aligned */
686*4882a593Smuzhiyun 			if (sector & ((1<<(align+3))-1))
687*4882a593Smuzhiyun 				break;
688*4882a593Smuzhiyun 
689*4882a593Smuzhiyun 			if (discard_granularity && size == discard_granularity)
690*4882a593Smuzhiyun 				break;
691*4882a593Smuzhiyun 
692*4882a593Smuzhiyun 			/* do not cross extent boundaries */
693*4882a593Smuzhiyun 			if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
694*4882a593Smuzhiyun 				break;
695*4882a593Smuzhiyun 			/* now, is it actually dirty, after all?
696*4882a593Smuzhiyun 			 * caution, drbd_bm_test_bit is tri-state for some
697*4882a593Smuzhiyun 			 * obscure reason; ( b == 0 ) would get the out-of-band
698*4882a593Smuzhiyun 			 * only accidentally right because of the "oddly sized"
699*4882a593Smuzhiyun 			 * adjustment below */
700*4882a593Smuzhiyun 			if (drbd_bm_test_bit(device, bit+1) != 1)
701*4882a593Smuzhiyun 				break;
702*4882a593Smuzhiyun 			bit++;
703*4882a593Smuzhiyun 			size += BM_BLOCK_SIZE;
704*4882a593Smuzhiyun 			if ((BM_BLOCK_SIZE << align) <= size)
705*4882a593Smuzhiyun 				align++;
706*4882a593Smuzhiyun 			i++;
707*4882a593Smuzhiyun 		}
708*4882a593Smuzhiyun 		/* if we merged some,
709*4882a593Smuzhiyun 		 * reset the offset to start the next drbd_bm_find_next from */
710*4882a593Smuzhiyun 		if (size > BM_BLOCK_SIZE)
711*4882a593Smuzhiyun 			device->bm_resync_fo = bit + 1;
712*4882a593Smuzhiyun #endif
713*4882a593Smuzhiyun 
714*4882a593Smuzhiyun 		/* adjust very last sectors, in case we are oddly sized */
715*4882a593Smuzhiyun 		if (sector + (size>>9) > capacity)
716*4882a593Smuzhiyun 			size = (capacity-sector)<<9;
717*4882a593Smuzhiyun 
718*4882a593Smuzhiyun 		if (device->use_csums) {
719*4882a593Smuzhiyun 			switch (read_for_csum(peer_device, sector, size)) {
720*4882a593Smuzhiyun 			case -EIO: /* Disk failure */
721*4882a593Smuzhiyun 				put_ldev(device);
722*4882a593Smuzhiyun 				return -EIO;
723*4882a593Smuzhiyun 			case -EAGAIN: /* allocation failed, or ldev busy */
724*4882a593Smuzhiyun 				drbd_rs_complete_io(device, sector);
725*4882a593Smuzhiyun 				device->bm_resync_fo = BM_SECT_TO_BIT(sector);
726*4882a593Smuzhiyun 				i = rollback_i;
727*4882a593Smuzhiyun 				goto requeue;
728*4882a593Smuzhiyun 			case 0:
729*4882a593Smuzhiyun 				/* everything ok */
730*4882a593Smuzhiyun 				break;
731*4882a593Smuzhiyun 			default:
732*4882a593Smuzhiyun 				BUG();
733*4882a593Smuzhiyun 			}
734*4882a593Smuzhiyun 		} else {
735*4882a593Smuzhiyun 			int err;
736*4882a593Smuzhiyun 
737*4882a593Smuzhiyun 			inc_rs_pending(device);
738*4882a593Smuzhiyun 			err = drbd_send_drequest(peer_device,
739*4882a593Smuzhiyun 						 size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST,
740*4882a593Smuzhiyun 						 sector, size, ID_SYNCER);
741*4882a593Smuzhiyun 			if (err) {
742*4882a593Smuzhiyun 				drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
743*4882a593Smuzhiyun 				dec_rs_pending(device);
744*4882a593Smuzhiyun 				put_ldev(device);
745*4882a593Smuzhiyun 				return err;
746*4882a593Smuzhiyun 			}
747*4882a593Smuzhiyun 		}
748*4882a593Smuzhiyun 	}
749*4882a593Smuzhiyun 
750*4882a593Smuzhiyun 	if (device->bm_resync_fo >= drbd_bm_bits(device)) {
751*4882a593Smuzhiyun 		/* last syncer _request_ was sent,
752*4882a593Smuzhiyun 		 * but the P_RS_DATA_REPLY not yet received.  sync will end (and
753*4882a593Smuzhiyun 		 * next sync group will resume), as soon as we receive the last
754*4882a593Smuzhiyun 		 * resync data block, and the last bit is cleared.
755*4882a593Smuzhiyun 		 * until then resync "work" is "inactive" ...
756*4882a593Smuzhiyun 		 */
757*4882a593Smuzhiyun 		put_ldev(device);
758*4882a593Smuzhiyun 		return 0;
759*4882a593Smuzhiyun 	}
760*4882a593Smuzhiyun 
761*4882a593Smuzhiyun  requeue:
762*4882a593Smuzhiyun 	device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
763*4882a593Smuzhiyun 	mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
764*4882a593Smuzhiyun 	put_ldev(device);
765*4882a593Smuzhiyun 	return 0;
766*4882a593Smuzhiyun }
767*4882a593Smuzhiyun 
make_ov_request(struct drbd_device * device,int cancel)768*4882a593Smuzhiyun static int make_ov_request(struct drbd_device *device, int cancel)
769*4882a593Smuzhiyun {
770*4882a593Smuzhiyun 	int number, i, size;
771*4882a593Smuzhiyun 	sector_t sector;
772*4882a593Smuzhiyun 	const sector_t capacity = get_capacity(device->vdisk);
773*4882a593Smuzhiyun 	bool stop_sector_reached = false;
774*4882a593Smuzhiyun 
775*4882a593Smuzhiyun 	if (unlikely(cancel))
776*4882a593Smuzhiyun 		return 1;
777*4882a593Smuzhiyun 
778*4882a593Smuzhiyun 	number = drbd_rs_number_requests(device);
779*4882a593Smuzhiyun 
780*4882a593Smuzhiyun 	sector = device->ov_position;
781*4882a593Smuzhiyun 	for (i = 0; i < number; i++) {
782*4882a593Smuzhiyun 		if (sector >= capacity)
783*4882a593Smuzhiyun 			return 1;
784*4882a593Smuzhiyun 
785*4882a593Smuzhiyun 		/* We check for "finished" only in the reply path:
786*4882a593Smuzhiyun 		 * w_e_end_ov_reply().
787*4882a593Smuzhiyun 		 * We need to send at least one request out. */
788*4882a593Smuzhiyun 		stop_sector_reached = i > 0
789*4882a593Smuzhiyun 			&& verify_can_do_stop_sector(device)
790*4882a593Smuzhiyun 			&& sector >= device->ov_stop_sector;
791*4882a593Smuzhiyun 		if (stop_sector_reached)
792*4882a593Smuzhiyun 			break;
793*4882a593Smuzhiyun 
794*4882a593Smuzhiyun 		size = BM_BLOCK_SIZE;
795*4882a593Smuzhiyun 
796*4882a593Smuzhiyun 		if (drbd_try_rs_begin_io(device, sector)) {
797*4882a593Smuzhiyun 			device->ov_position = sector;
798*4882a593Smuzhiyun 			goto requeue;
799*4882a593Smuzhiyun 		}
800*4882a593Smuzhiyun 
801*4882a593Smuzhiyun 		if (sector + (size>>9) > capacity)
802*4882a593Smuzhiyun 			size = (capacity-sector)<<9;
803*4882a593Smuzhiyun 
804*4882a593Smuzhiyun 		inc_rs_pending(device);
805*4882a593Smuzhiyun 		if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
806*4882a593Smuzhiyun 			dec_rs_pending(device);
807*4882a593Smuzhiyun 			return 0;
808*4882a593Smuzhiyun 		}
809*4882a593Smuzhiyun 		sector += BM_SECT_PER_BIT;
810*4882a593Smuzhiyun 	}
811*4882a593Smuzhiyun 	device->ov_position = sector;
812*4882a593Smuzhiyun 
813*4882a593Smuzhiyun  requeue:
814*4882a593Smuzhiyun 	device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
815*4882a593Smuzhiyun 	if (i == 0 || !stop_sector_reached)
816*4882a593Smuzhiyun 		mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
817*4882a593Smuzhiyun 	return 1;
818*4882a593Smuzhiyun }
819*4882a593Smuzhiyun 
w_ov_finished(struct drbd_work * w,int cancel)820*4882a593Smuzhiyun int w_ov_finished(struct drbd_work *w, int cancel)
821*4882a593Smuzhiyun {
822*4882a593Smuzhiyun 	struct drbd_device_work *dw =
823*4882a593Smuzhiyun 		container_of(w, struct drbd_device_work, w);
824*4882a593Smuzhiyun 	struct drbd_device *device = dw->device;
825*4882a593Smuzhiyun 	kfree(dw);
826*4882a593Smuzhiyun 	ov_out_of_sync_print(device);
827*4882a593Smuzhiyun 	drbd_resync_finished(device);
828*4882a593Smuzhiyun 
829*4882a593Smuzhiyun 	return 0;
830*4882a593Smuzhiyun }
831*4882a593Smuzhiyun 
w_resync_finished(struct drbd_work * w,int cancel)832*4882a593Smuzhiyun static int w_resync_finished(struct drbd_work *w, int cancel)
833*4882a593Smuzhiyun {
834*4882a593Smuzhiyun 	struct drbd_device_work *dw =
835*4882a593Smuzhiyun 		container_of(w, struct drbd_device_work, w);
836*4882a593Smuzhiyun 	struct drbd_device *device = dw->device;
837*4882a593Smuzhiyun 	kfree(dw);
838*4882a593Smuzhiyun 
839*4882a593Smuzhiyun 	drbd_resync_finished(device);
840*4882a593Smuzhiyun 
841*4882a593Smuzhiyun 	return 0;
842*4882a593Smuzhiyun }
843*4882a593Smuzhiyun 
ping_peer(struct drbd_device * device)844*4882a593Smuzhiyun static void ping_peer(struct drbd_device *device)
845*4882a593Smuzhiyun {
846*4882a593Smuzhiyun 	struct drbd_connection *connection = first_peer_device(device)->connection;
847*4882a593Smuzhiyun 
848*4882a593Smuzhiyun 	clear_bit(GOT_PING_ACK, &connection->flags);
849*4882a593Smuzhiyun 	request_ping(connection);
850*4882a593Smuzhiyun 	wait_event(connection->ping_wait,
851*4882a593Smuzhiyun 		   test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
852*4882a593Smuzhiyun }
853*4882a593Smuzhiyun 
drbd_resync_finished(struct drbd_device * device)854*4882a593Smuzhiyun int drbd_resync_finished(struct drbd_device *device)
855*4882a593Smuzhiyun {
856*4882a593Smuzhiyun 	struct drbd_connection *connection = first_peer_device(device)->connection;
857*4882a593Smuzhiyun 	unsigned long db, dt, dbdt;
858*4882a593Smuzhiyun 	unsigned long n_oos;
859*4882a593Smuzhiyun 	union drbd_state os, ns;
860*4882a593Smuzhiyun 	struct drbd_device_work *dw;
861*4882a593Smuzhiyun 	char *khelper_cmd = NULL;
862*4882a593Smuzhiyun 	int verify_done = 0;
863*4882a593Smuzhiyun 
864*4882a593Smuzhiyun 	/* Remove all elements from the resync LRU. Since future actions
865*4882a593Smuzhiyun 	 * might set bits in the (main) bitmap, then the entries in the
866*4882a593Smuzhiyun 	 * resync LRU would be wrong. */
867*4882a593Smuzhiyun 	if (drbd_rs_del_all(device)) {
868*4882a593Smuzhiyun 		/* In case this is not possible now, most probably because
869*4882a593Smuzhiyun 		 * there are P_RS_DATA_REPLY Packets lingering on the worker's
870*4882a593Smuzhiyun 		 * queue (or even the read operations for those packets
871*4882a593Smuzhiyun 		 * is not finished by now).   Retry in 100ms. */
872*4882a593Smuzhiyun 
873*4882a593Smuzhiyun 		schedule_timeout_interruptible(HZ / 10);
874*4882a593Smuzhiyun 		dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC);
875*4882a593Smuzhiyun 		if (dw) {
876*4882a593Smuzhiyun 			dw->w.cb = w_resync_finished;
877*4882a593Smuzhiyun 			dw->device = device;
878*4882a593Smuzhiyun 			drbd_queue_work(&connection->sender_work, &dw->w);
879*4882a593Smuzhiyun 			return 1;
880*4882a593Smuzhiyun 		}
881*4882a593Smuzhiyun 		drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n");
882*4882a593Smuzhiyun 	}
883*4882a593Smuzhiyun 
884*4882a593Smuzhiyun 	dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
885*4882a593Smuzhiyun 	if (dt <= 0)
886*4882a593Smuzhiyun 		dt = 1;
887*4882a593Smuzhiyun 
888*4882a593Smuzhiyun 	db = device->rs_total;
889*4882a593Smuzhiyun 	/* adjust for verify start and stop sectors, respective reached position */
890*4882a593Smuzhiyun 	if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
891*4882a593Smuzhiyun 		db -= device->ov_left;
892*4882a593Smuzhiyun 
893*4882a593Smuzhiyun 	dbdt = Bit2KB(db/dt);
894*4882a593Smuzhiyun 	device->rs_paused /= HZ;
895*4882a593Smuzhiyun 
896*4882a593Smuzhiyun 	if (!get_ldev(device))
897*4882a593Smuzhiyun 		goto out;
898*4882a593Smuzhiyun 
899*4882a593Smuzhiyun 	ping_peer(device);
900*4882a593Smuzhiyun 
901*4882a593Smuzhiyun 	spin_lock_irq(&device->resource->req_lock);
902*4882a593Smuzhiyun 	os = drbd_read_state(device);
903*4882a593Smuzhiyun 
904*4882a593Smuzhiyun 	verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
905*4882a593Smuzhiyun 
906*4882a593Smuzhiyun 	/* This protects us against multiple calls (that can happen in the presence
907*4882a593Smuzhiyun 	   of application IO), and against connectivity loss just before we arrive here. */
908*4882a593Smuzhiyun 	if (os.conn <= C_CONNECTED)
909*4882a593Smuzhiyun 		goto out_unlock;
910*4882a593Smuzhiyun 
911*4882a593Smuzhiyun 	ns = os;
912*4882a593Smuzhiyun 	ns.conn = C_CONNECTED;
913*4882a593Smuzhiyun 
914*4882a593Smuzhiyun 	drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
915*4882a593Smuzhiyun 	     verify_done ? "Online verify" : "Resync",
916*4882a593Smuzhiyun 	     dt + device->rs_paused, device->rs_paused, dbdt);
917*4882a593Smuzhiyun 
918*4882a593Smuzhiyun 	n_oos = drbd_bm_total_weight(device);
919*4882a593Smuzhiyun 
920*4882a593Smuzhiyun 	if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
921*4882a593Smuzhiyun 		if (n_oos) {
922*4882a593Smuzhiyun 			drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
923*4882a593Smuzhiyun 			      n_oos, Bit2KB(1));
924*4882a593Smuzhiyun 			khelper_cmd = "out-of-sync";
925*4882a593Smuzhiyun 		}
926*4882a593Smuzhiyun 	} else {
927*4882a593Smuzhiyun 		D_ASSERT(device, (n_oos - device->rs_failed) == 0);
928*4882a593Smuzhiyun 
929*4882a593Smuzhiyun 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
930*4882a593Smuzhiyun 			khelper_cmd = "after-resync-target";
931*4882a593Smuzhiyun 
932*4882a593Smuzhiyun 		if (device->use_csums && device->rs_total) {
933*4882a593Smuzhiyun 			const unsigned long s = device->rs_same_csum;
934*4882a593Smuzhiyun 			const unsigned long t = device->rs_total;
935*4882a593Smuzhiyun 			const int ratio =
936*4882a593Smuzhiyun 				(t == 0)     ? 0 :
937*4882a593Smuzhiyun 			(t < 100000) ? ((s*100)/t) : (s/(t/100));
938*4882a593Smuzhiyun 			drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
939*4882a593Smuzhiyun 			     "transferred %luK total %luK\n",
940*4882a593Smuzhiyun 			     ratio,
941*4882a593Smuzhiyun 			     Bit2KB(device->rs_same_csum),
942*4882a593Smuzhiyun 			     Bit2KB(device->rs_total - device->rs_same_csum),
943*4882a593Smuzhiyun 			     Bit2KB(device->rs_total));
944*4882a593Smuzhiyun 		}
945*4882a593Smuzhiyun 	}
946*4882a593Smuzhiyun 
947*4882a593Smuzhiyun 	if (device->rs_failed) {
948*4882a593Smuzhiyun 		drbd_info(device, "            %lu failed blocks\n", device->rs_failed);
949*4882a593Smuzhiyun 
950*4882a593Smuzhiyun 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
951*4882a593Smuzhiyun 			ns.disk = D_INCONSISTENT;
952*4882a593Smuzhiyun 			ns.pdsk = D_UP_TO_DATE;
953*4882a593Smuzhiyun 		} else {
954*4882a593Smuzhiyun 			ns.disk = D_UP_TO_DATE;
955*4882a593Smuzhiyun 			ns.pdsk = D_INCONSISTENT;
956*4882a593Smuzhiyun 		}
957*4882a593Smuzhiyun 	} else {
958*4882a593Smuzhiyun 		ns.disk = D_UP_TO_DATE;
959*4882a593Smuzhiyun 		ns.pdsk = D_UP_TO_DATE;
960*4882a593Smuzhiyun 
961*4882a593Smuzhiyun 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
962*4882a593Smuzhiyun 			if (device->p_uuid) {
963*4882a593Smuzhiyun 				int i;
964*4882a593Smuzhiyun 				for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
965*4882a593Smuzhiyun 					_drbd_uuid_set(device, i, device->p_uuid[i]);
966*4882a593Smuzhiyun 				drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
967*4882a593Smuzhiyun 				_drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
968*4882a593Smuzhiyun 			} else {
969*4882a593Smuzhiyun 				drbd_err(device, "device->p_uuid is NULL! BUG\n");
970*4882a593Smuzhiyun 			}
971*4882a593Smuzhiyun 		}
972*4882a593Smuzhiyun 
973*4882a593Smuzhiyun 		if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
974*4882a593Smuzhiyun 			/* for verify runs, we don't update uuids here,
975*4882a593Smuzhiyun 			 * so there would be nothing to report. */
976*4882a593Smuzhiyun 			drbd_uuid_set_bm(device, 0UL);
977*4882a593Smuzhiyun 			drbd_print_uuids(device, "updated UUIDs");
978*4882a593Smuzhiyun 			if (device->p_uuid) {
979*4882a593Smuzhiyun 				/* Now the two UUID sets are equal, update what we
980*4882a593Smuzhiyun 				 * know of the peer. */
981*4882a593Smuzhiyun 				int i;
982*4882a593Smuzhiyun 				for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
983*4882a593Smuzhiyun 					device->p_uuid[i] = device->ldev->md.uuid[i];
984*4882a593Smuzhiyun 			}
985*4882a593Smuzhiyun 		}
986*4882a593Smuzhiyun 	}
987*4882a593Smuzhiyun 
988*4882a593Smuzhiyun 	_drbd_set_state(device, ns, CS_VERBOSE, NULL);
989*4882a593Smuzhiyun out_unlock:
990*4882a593Smuzhiyun 	spin_unlock_irq(&device->resource->req_lock);
991*4882a593Smuzhiyun 
992*4882a593Smuzhiyun 	/* If we have been sync source, and have an effective fencing-policy,
993*4882a593Smuzhiyun 	 * once *all* volumes are back in sync, call "unfence". */
994*4882a593Smuzhiyun 	if (os.conn == C_SYNC_SOURCE) {
995*4882a593Smuzhiyun 		enum drbd_disk_state disk_state = D_MASK;
996*4882a593Smuzhiyun 		enum drbd_disk_state pdsk_state = D_MASK;
997*4882a593Smuzhiyun 		enum drbd_fencing_p fp = FP_DONT_CARE;
998*4882a593Smuzhiyun 
999*4882a593Smuzhiyun 		rcu_read_lock();
1000*4882a593Smuzhiyun 		fp = rcu_dereference(device->ldev->disk_conf)->fencing;
1001*4882a593Smuzhiyun 		if (fp != FP_DONT_CARE) {
1002*4882a593Smuzhiyun 			struct drbd_peer_device *peer_device;
1003*4882a593Smuzhiyun 			int vnr;
1004*4882a593Smuzhiyun 			idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1005*4882a593Smuzhiyun 				struct drbd_device *device = peer_device->device;
1006*4882a593Smuzhiyun 				disk_state = min_t(enum drbd_disk_state, disk_state, device->state.disk);
1007*4882a593Smuzhiyun 				pdsk_state = min_t(enum drbd_disk_state, pdsk_state, device->state.pdsk);
1008*4882a593Smuzhiyun 			}
1009*4882a593Smuzhiyun 		}
1010*4882a593Smuzhiyun 		rcu_read_unlock();
1011*4882a593Smuzhiyun 		if (disk_state == D_UP_TO_DATE && pdsk_state == D_UP_TO_DATE)
1012*4882a593Smuzhiyun 			conn_khelper(connection, "unfence-peer");
1013*4882a593Smuzhiyun 	}
1014*4882a593Smuzhiyun 
1015*4882a593Smuzhiyun 	put_ldev(device);
1016*4882a593Smuzhiyun out:
1017*4882a593Smuzhiyun 	device->rs_total  = 0;
1018*4882a593Smuzhiyun 	device->rs_failed = 0;
1019*4882a593Smuzhiyun 	device->rs_paused = 0;
1020*4882a593Smuzhiyun 
1021*4882a593Smuzhiyun 	/* reset start sector, if we reached end of device */
1022*4882a593Smuzhiyun 	if (verify_done && device->ov_left == 0)
1023*4882a593Smuzhiyun 		device->ov_start_sector = 0;
1024*4882a593Smuzhiyun 
1025*4882a593Smuzhiyun 	drbd_md_sync(device);
1026*4882a593Smuzhiyun 
1027*4882a593Smuzhiyun 	if (khelper_cmd)
1028*4882a593Smuzhiyun 		drbd_khelper(device, khelper_cmd);
1029*4882a593Smuzhiyun 
1030*4882a593Smuzhiyun 	return 1;
1031*4882a593Smuzhiyun }
1032*4882a593Smuzhiyun 
1033*4882a593Smuzhiyun /* helper */
move_to_net_ee_or_free(struct drbd_device * device,struct drbd_peer_request * peer_req)1034*4882a593Smuzhiyun static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
1035*4882a593Smuzhiyun {
1036*4882a593Smuzhiyun 	if (drbd_peer_req_has_active_page(peer_req)) {
1037*4882a593Smuzhiyun 		/* This might happen if sendpage() has not finished */
1038*4882a593Smuzhiyun 		int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
1039*4882a593Smuzhiyun 		atomic_add(i, &device->pp_in_use_by_net);
1040*4882a593Smuzhiyun 		atomic_sub(i, &device->pp_in_use);
1041*4882a593Smuzhiyun 		spin_lock_irq(&device->resource->req_lock);
1042*4882a593Smuzhiyun 		list_add_tail(&peer_req->w.list, &device->net_ee);
1043*4882a593Smuzhiyun 		spin_unlock_irq(&device->resource->req_lock);
1044*4882a593Smuzhiyun 		wake_up(&drbd_pp_wait);
1045*4882a593Smuzhiyun 	} else
1046*4882a593Smuzhiyun 		drbd_free_peer_req(device, peer_req);
1047*4882a593Smuzhiyun }
1048*4882a593Smuzhiyun 
1049*4882a593Smuzhiyun /**
1050*4882a593Smuzhiyun  * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
1051*4882a593Smuzhiyun  * @w:		work object.
1052*4882a593Smuzhiyun  * @cancel:	The connection will be closed anyways
1053*4882a593Smuzhiyun  */
w_e_end_data_req(struct drbd_work * w,int cancel)1054*4882a593Smuzhiyun int w_e_end_data_req(struct drbd_work *w, int cancel)
1055*4882a593Smuzhiyun {
1056*4882a593Smuzhiyun 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
1057*4882a593Smuzhiyun 	struct drbd_peer_device *peer_device = peer_req->peer_device;
1058*4882a593Smuzhiyun 	struct drbd_device *device = peer_device->device;
1059*4882a593Smuzhiyun 	int err;
1060*4882a593Smuzhiyun 
1061*4882a593Smuzhiyun 	if (unlikely(cancel)) {
1062*4882a593Smuzhiyun 		drbd_free_peer_req(device, peer_req);
1063*4882a593Smuzhiyun 		dec_unacked(device);
1064*4882a593Smuzhiyun 		return 0;
1065*4882a593Smuzhiyun 	}
1066*4882a593Smuzhiyun 
1067*4882a593Smuzhiyun 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1068*4882a593Smuzhiyun 		err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req);
1069*4882a593Smuzhiyun 	} else {
1070*4882a593Smuzhiyun 		if (__ratelimit(&drbd_ratelimit_state))
1071*4882a593Smuzhiyun 			drbd_err(device, "Sending NegDReply. sector=%llus.\n",
1072*4882a593Smuzhiyun 			    (unsigned long long)peer_req->i.sector);
1073*4882a593Smuzhiyun 
1074*4882a593Smuzhiyun 		err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req);
1075*4882a593Smuzhiyun 	}
1076*4882a593Smuzhiyun 
1077*4882a593Smuzhiyun 	dec_unacked(device);
1078*4882a593Smuzhiyun 
1079*4882a593Smuzhiyun 	move_to_net_ee_or_free(device, peer_req);
1080*4882a593Smuzhiyun 
1081*4882a593Smuzhiyun 	if (unlikely(err))
1082*4882a593Smuzhiyun 		drbd_err(device, "drbd_send_block() failed\n");
1083*4882a593Smuzhiyun 	return err;
1084*4882a593Smuzhiyun }
1085*4882a593Smuzhiyun 
all_zero(struct drbd_peer_request * peer_req)1086*4882a593Smuzhiyun static bool all_zero(struct drbd_peer_request *peer_req)
1087*4882a593Smuzhiyun {
1088*4882a593Smuzhiyun 	struct page *page = peer_req->pages;
1089*4882a593Smuzhiyun 	unsigned int len = peer_req->i.size;
1090*4882a593Smuzhiyun 
1091*4882a593Smuzhiyun 	page_chain_for_each(page) {
1092*4882a593Smuzhiyun 		unsigned int l = min_t(unsigned int, len, PAGE_SIZE);
1093*4882a593Smuzhiyun 		unsigned int i, words = l / sizeof(long);
1094*4882a593Smuzhiyun 		unsigned long *d;
1095*4882a593Smuzhiyun 
1096*4882a593Smuzhiyun 		d = kmap_atomic(page);
1097*4882a593Smuzhiyun 		for (i = 0; i < words; i++) {
1098*4882a593Smuzhiyun 			if (d[i]) {
1099*4882a593Smuzhiyun 				kunmap_atomic(d);
1100*4882a593Smuzhiyun 				return false;
1101*4882a593Smuzhiyun 			}
1102*4882a593Smuzhiyun 		}
1103*4882a593Smuzhiyun 		kunmap_atomic(d);
1104*4882a593Smuzhiyun 		len -= l;
1105*4882a593Smuzhiyun 	}
1106*4882a593Smuzhiyun 
1107*4882a593Smuzhiyun 	return true;
1108*4882a593Smuzhiyun }
1109*4882a593Smuzhiyun 
1110*4882a593Smuzhiyun /**
1111*4882a593Smuzhiyun  * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
1112*4882a593Smuzhiyun  * @w:		work object.
1113*4882a593Smuzhiyun  * @cancel:	The connection will be closed anyways
1114*4882a593Smuzhiyun  */
w_e_end_rsdata_req(struct drbd_work * w,int cancel)1115*4882a593Smuzhiyun int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
1116*4882a593Smuzhiyun {
1117*4882a593Smuzhiyun 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
1118*4882a593Smuzhiyun 	struct drbd_peer_device *peer_device = peer_req->peer_device;
1119*4882a593Smuzhiyun 	struct drbd_device *device = peer_device->device;
1120*4882a593Smuzhiyun 	int err;
1121*4882a593Smuzhiyun 
1122*4882a593Smuzhiyun 	if (unlikely(cancel)) {
1123*4882a593Smuzhiyun 		drbd_free_peer_req(device, peer_req);
1124*4882a593Smuzhiyun 		dec_unacked(device);
1125*4882a593Smuzhiyun 		return 0;
1126*4882a593Smuzhiyun 	}
1127*4882a593Smuzhiyun 
1128*4882a593Smuzhiyun 	if (get_ldev_if_state(device, D_FAILED)) {
1129*4882a593Smuzhiyun 		drbd_rs_complete_io(device, peer_req->i.sector);
1130*4882a593Smuzhiyun 		put_ldev(device);
1131*4882a593Smuzhiyun 	}
1132*4882a593Smuzhiyun 
1133*4882a593Smuzhiyun 	if (device->state.conn == C_AHEAD) {
1134*4882a593Smuzhiyun 		err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
1135*4882a593Smuzhiyun 	} else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1136*4882a593Smuzhiyun 		if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1137*4882a593Smuzhiyun 			inc_rs_pending(device);
1138*4882a593Smuzhiyun 			if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req))
1139*4882a593Smuzhiyun 				err = drbd_send_rs_deallocated(peer_device, peer_req);
1140*4882a593Smuzhiyun 			else
1141*4882a593Smuzhiyun 				err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
1142*4882a593Smuzhiyun 		} else {
1143*4882a593Smuzhiyun 			if (__ratelimit(&drbd_ratelimit_state))
1144*4882a593Smuzhiyun 				drbd_err(device, "Not sending RSDataReply, "
1145*4882a593Smuzhiyun 				    "partner DISKLESS!\n");
1146*4882a593Smuzhiyun 			err = 0;
1147*4882a593Smuzhiyun 		}
1148*4882a593Smuzhiyun 	} else {
1149*4882a593Smuzhiyun 		if (__ratelimit(&drbd_ratelimit_state))
1150*4882a593Smuzhiyun 			drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
1151*4882a593Smuzhiyun 			    (unsigned long long)peer_req->i.sector);
1152*4882a593Smuzhiyun 
1153*4882a593Smuzhiyun 		err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
1154*4882a593Smuzhiyun 
1155*4882a593Smuzhiyun 		/* update resync data with failure */
1156*4882a593Smuzhiyun 		drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
1157*4882a593Smuzhiyun 	}
1158*4882a593Smuzhiyun 
1159*4882a593Smuzhiyun 	dec_unacked(device);
1160*4882a593Smuzhiyun 
1161*4882a593Smuzhiyun 	move_to_net_ee_or_free(device, peer_req);
1162*4882a593Smuzhiyun 
1163*4882a593Smuzhiyun 	if (unlikely(err))
1164*4882a593Smuzhiyun 		drbd_err(device, "drbd_send_block() failed\n");
1165*4882a593Smuzhiyun 	return err;
1166*4882a593Smuzhiyun }
1167*4882a593Smuzhiyun 
w_e_end_csum_rs_req(struct drbd_work * w,int cancel)1168*4882a593Smuzhiyun int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
1169*4882a593Smuzhiyun {
1170*4882a593Smuzhiyun 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
1171*4882a593Smuzhiyun 	struct drbd_peer_device *peer_device = peer_req->peer_device;
1172*4882a593Smuzhiyun 	struct drbd_device *device = peer_device->device;
1173*4882a593Smuzhiyun 	struct digest_info *di;
1174*4882a593Smuzhiyun 	int digest_size;
1175*4882a593Smuzhiyun 	void *digest = NULL;
1176*4882a593Smuzhiyun 	int err, eq = 0;
1177*4882a593Smuzhiyun 
1178*4882a593Smuzhiyun 	if (unlikely(cancel)) {
1179*4882a593Smuzhiyun 		drbd_free_peer_req(device, peer_req);
1180*4882a593Smuzhiyun 		dec_unacked(device);
1181*4882a593Smuzhiyun 		return 0;
1182*4882a593Smuzhiyun 	}
1183*4882a593Smuzhiyun 
1184*4882a593Smuzhiyun 	if (get_ldev(device)) {
1185*4882a593Smuzhiyun 		drbd_rs_complete_io(device, peer_req->i.sector);
1186*4882a593Smuzhiyun 		put_ldev(device);
1187*4882a593Smuzhiyun 	}
1188*4882a593Smuzhiyun 
1189*4882a593Smuzhiyun 	di = peer_req->digest;
1190*4882a593Smuzhiyun 
1191*4882a593Smuzhiyun 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1192*4882a593Smuzhiyun 		/* quick hack to try to avoid a race against reconfiguration.
1193*4882a593Smuzhiyun 		 * a real fix would be much more involved,
1194*4882a593Smuzhiyun 		 * introducing more locking mechanisms */
1195*4882a593Smuzhiyun 		if (peer_device->connection->csums_tfm) {
1196*4882a593Smuzhiyun 			digest_size = crypto_shash_digestsize(peer_device->connection->csums_tfm);
1197*4882a593Smuzhiyun 			D_ASSERT(device, digest_size == di->digest_size);
1198*4882a593Smuzhiyun 			digest = kmalloc(digest_size, GFP_NOIO);
1199*4882a593Smuzhiyun 		}
1200*4882a593Smuzhiyun 		if (digest) {
1201*4882a593Smuzhiyun 			drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
1202*4882a593Smuzhiyun 			eq = !memcmp(digest, di->digest, digest_size);
1203*4882a593Smuzhiyun 			kfree(digest);
1204*4882a593Smuzhiyun 		}
1205*4882a593Smuzhiyun 
1206*4882a593Smuzhiyun 		if (eq) {
1207*4882a593Smuzhiyun 			drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
1208*4882a593Smuzhiyun 			/* rs_same_csums unit is BM_BLOCK_SIZE */
1209*4882a593Smuzhiyun 			device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
1210*4882a593Smuzhiyun 			err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req);
1211*4882a593Smuzhiyun 		} else {
1212*4882a593Smuzhiyun 			inc_rs_pending(device);
1213*4882a593Smuzhiyun 			peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1214*4882a593Smuzhiyun 			peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
1215*4882a593Smuzhiyun 			kfree(di);
1216*4882a593Smuzhiyun 			err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
1217*4882a593Smuzhiyun 		}
1218*4882a593Smuzhiyun 	} else {
1219*4882a593Smuzhiyun 		err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
1220*4882a593Smuzhiyun 		if (__ratelimit(&drbd_ratelimit_state))
1221*4882a593Smuzhiyun 			drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
1222*4882a593Smuzhiyun 	}
1223*4882a593Smuzhiyun 
1224*4882a593Smuzhiyun 	dec_unacked(device);
1225*4882a593Smuzhiyun 	move_to_net_ee_or_free(device, peer_req);
1226*4882a593Smuzhiyun 
1227*4882a593Smuzhiyun 	if (unlikely(err))
1228*4882a593Smuzhiyun 		drbd_err(device, "drbd_send_block/ack() failed\n");
1229*4882a593Smuzhiyun 	return err;
1230*4882a593Smuzhiyun }
1231*4882a593Smuzhiyun 
w_e_end_ov_req(struct drbd_work * w,int cancel)1232*4882a593Smuzhiyun int w_e_end_ov_req(struct drbd_work *w, int cancel)
1233*4882a593Smuzhiyun {
1234*4882a593Smuzhiyun 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
1235*4882a593Smuzhiyun 	struct drbd_peer_device *peer_device = peer_req->peer_device;
1236*4882a593Smuzhiyun 	struct drbd_device *device = peer_device->device;
1237*4882a593Smuzhiyun 	sector_t sector = peer_req->i.sector;
1238*4882a593Smuzhiyun 	unsigned int size = peer_req->i.size;
1239*4882a593Smuzhiyun 	int digest_size;
1240*4882a593Smuzhiyun 	void *digest;
1241*4882a593Smuzhiyun 	int err = 0;
1242*4882a593Smuzhiyun 
1243*4882a593Smuzhiyun 	if (unlikely(cancel))
1244*4882a593Smuzhiyun 		goto out;
1245*4882a593Smuzhiyun 
1246*4882a593Smuzhiyun 	digest_size = crypto_shash_digestsize(peer_device->connection->verify_tfm);
1247*4882a593Smuzhiyun 	digest = kmalloc(digest_size, GFP_NOIO);
1248*4882a593Smuzhiyun 	if (!digest) {
1249*4882a593Smuzhiyun 		err = 1;	/* terminate the connection in case the allocation failed */
1250*4882a593Smuzhiyun 		goto out;
1251*4882a593Smuzhiyun 	}
1252*4882a593Smuzhiyun 
1253*4882a593Smuzhiyun 	if (likely(!(peer_req->flags & EE_WAS_ERROR)))
1254*4882a593Smuzhiyun 		drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
1255*4882a593Smuzhiyun 	else
1256*4882a593Smuzhiyun 		memset(digest, 0, digest_size);
1257*4882a593Smuzhiyun 
1258*4882a593Smuzhiyun 	/* Free e and pages before send.
1259*4882a593Smuzhiyun 	 * In case we block on congestion, we could otherwise run into
1260*4882a593Smuzhiyun 	 * some distributed deadlock, if the other side blocks on
1261*4882a593Smuzhiyun 	 * congestion as well, because our receiver blocks in
1262*4882a593Smuzhiyun 	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
1263*4882a593Smuzhiyun 	drbd_free_peer_req(device, peer_req);
1264*4882a593Smuzhiyun 	peer_req = NULL;
1265*4882a593Smuzhiyun 	inc_rs_pending(device);
1266*4882a593Smuzhiyun 	err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY);
1267*4882a593Smuzhiyun 	if (err)
1268*4882a593Smuzhiyun 		dec_rs_pending(device);
1269*4882a593Smuzhiyun 	kfree(digest);
1270*4882a593Smuzhiyun 
1271*4882a593Smuzhiyun out:
1272*4882a593Smuzhiyun 	if (peer_req)
1273*4882a593Smuzhiyun 		drbd_free_peer_req(device, peer_req);
1274*4882a593Smuzhiyun 	dec_unacked(device);
1275*4882a593Smuzhiyun 	return err;
1276*4882a593Smuzhiyun }
1277*4882a593Smuzhiyun 
drbd_ov_out_of_sync_found(struct drbd_device * device,sector_t sector,int size)1278*4882a593Smuzhiyun void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
1279*4882a593Smuzhiyun {
1280*4882a593Smuzhiyun 	if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1281*4882a593Smuzhiyun 		device->ov_last_oos_size += size>>9;
1282*4882a593Smuzhiyun 	} else {
1283*4882a593Smuzhiyun 		device->ov_last_oos_start = sector;
1284*4882a593Smuzhiyun 		device->ov_last_oos_size = size>>9;
1285*4882a593Smuzhiyun 	}
1286*4882a593Smuzhiyun 	drbd_set_out_of_sync(device, sector, size);
1287*4882a593Smuzhiyun }
1288*4882a593Smuzhiyun 
w_e_end_ov_reply(struct drbd_work * w,int cancel)1289*4882a593Smuzhiyun int w_e_end_ov_reply(struct drbd_work *w, int cancel)
1290*4882a593Smuzhiyun {
1291*4882a593Smuzhiyun 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
1292*4882a593Smuzhiyun 	struct drbd_peer_device *peer_device = peer_req->peer_device;
1293*4882a593Smuzhiyun 	struct drbd_device *device = peer_device->device;
1294*4882a593Smuzhiyun 	struct digest_info *di;
1295*4882a593Smuzhiyun 	void *digest;
1296*4882a593Smuzhiyun 	sector_t sector = peer_req->i.sector;
1297*4882a593Smuzhiyun 	unsigned int size = peer_req->i.size;
1298*4882a593Smuzhiyun 	int digest_size;
1299*4882a593Smuzhiyun 	int err, eq = 0;
1300*4882a593Smuzhiyun 	bool stop_sector_reached = false;
1301*4882a593Smuzhiyun 
1302*4882a593Smuzhiyun 	if (unlikely(cancel)) {
1303*4882a593Smuzhiyun 		drbd_free_peer_req(device, peer_req);
1304*4882a593Smuzhiyun 		dec_unacked(device);
1305*4882a593Smuzhiyun 		return 0;
1306*4882a593Smuzhiyun 	}
1307*4882a593Smuzhiyun 
1308*4882a593Smuzhiyun 	/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1309*4882a593Smuzhiyun 	 * the resync lru has been cleaned up already */
1310*4882a593Smuzhiyun 	if (get_ldev(device)) {
1311*4882a593Smuzhiyun 		drbd_rs_complete_io(device, peer_req->i.sector);
1312*4882a593Smuzhiyun 		put_ldev(device);
1313*4882a593Smuzhiyun 	}
1314*4882a593Smuzhiyun 
1315*4882a593Smuzhiyun 	di = peer_req->digest;
1316*4882a593Smuzhiyun 
1317*4882a593Smuzhiyun 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1318*4882a593Smuzhiyun 		digest_size = crypto_shash_digestsize(peer_device->connection->verify_tfm);
1319*4882a593Smuzhiyun 		digest = kmalloc(digest_size, GFP_NOIO);
1320*4882a593Smuzhiyun 		if (digest) {
1321*4882a593Smuzhiyun 			drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
1322*4882a593Smuzhiyun 
1323*4882a593Smuzhiyun 			D_ASSERT(device, digest_size == di->digest_size);
1324*4882a593Smuzhiyun 			eq = !memcmp(digest, di->digest, digest_size);
1325*4882a593Smuzhiyun 			kfree(digest);
1326*4882a593Smuzhiyun 		}
1327*4882a593Smuzhiyun 	}
1328*4882a593Smuzhiyun 
1329*4882a593Smuzhiyun 	/* Free peer_req and pages before send.
1330*4882a593Smuzhiyun 	 * In case we block on congestion, we could otherwise run into
1331*4882a593Smuzhiyun 	 * some distributed deadlock, if the other side blocks on
1332*4882a593Smuzhiyun 	 * congestion as well, because our receiver blocks in
1333*4882a593Smuzhiyun 	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
1334*4882a593Smuzhiyun 	drbd_free_peer_req(device, peer_req);
1335*4882a593Smuzhiyun 	if (!eq)
1336*4882a593Smuzhiyun 		drbd_ov_out_of_sync_found(device, sector, size);
1337*4882a593Smuzhiyun 	else
1338*4882a593Smuzhiyun 		ov_out_of_sync_print(device);
1339*4882a593Smuzhiyun 
1340*4882a593Smuzhiyun 	err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size,
1341*4882a593Smuzhiyun 			       eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
1342*4882a593Smuzhiyun 
1343*4882a593Smuzhiyun 	dec_unacked(device);
1344*4882a593Smuzhiyun 
1345*4882a593Smuzhiyun 	--device->ov_left;
1346*4882a593Smuzhiyun 
1347*4882a593Smuzhiyun 	/* let's advance progress step marks only for every other megabyte */
1348*4882a593Smuzhiyun 	if ((device->ov_left & 0x200) == 0x200)
1349*4882a593Smuzhiyun 		drbd_advance_rs_marks(device, device->ov_left);
1350*4882a593Smuzhiyun 
1351*4882a593Smuzhiyun 	stop_sector_reached = verify_can_do_stop_sector(device) &&
1352*4882a593Smuzhiyun 		(sector + (size>>9)) >= device->ov_stop_sector;
1353*4882a593Smuzhiyun 
1354*4882a593Smuzhiyun 	if (device->ov_left == 0 || stop_sector_reached) {
1355*4882a593Smuzhiyun 		ov_out_of_sync_print(device);
1356*4882a593Smuzhiyun 		drbd_resync_finished(device);
1357*4882a593Smuzhiyun 	}
1358*4882a593Smuzhiyun 
1359*4882a593Smuzhiyun 	return err;
1360*4882a593Smuzhiyun }
1361*4882a593Smuzhiyun 
1362*4882a593Smuzhiyun /* FIXME
1363*4882a593Smuzhiyun  * We need to track the number of pending barrier acks,
1364*4882a593Smuzhiyun  * and to be able to wait for them.
1365*4882a593Smuzhiyun  * See also comment in drbd_adm_attach before drbd_suspend_io.
1366*4882a593Smuzhiyun  */
drbd_send_barrier(struct drbd_connection * connection)1367*4882a593Smuzhiyun static int drbd_send_barrier(struct drbd_connection *connection)
1368*4882a593Smuzhiyun {
1369*4882a593Smuzhiyun 	struct p_barrier *p;
1370*4882a593Smuzhiyun 	struct drbd_socket *sock;
1371*4882a593Smuzhiyun 
1372*4882a593Smuzhiyun 	sock = &connection->data;
1373*4882a593Smuzhiyun 	p = conn_prepare_command(connection, sock);
1374*4882a593Smuzhiyun 	if (!p)
1375*4882a593Smuzhiyun 		return -EIO;
1376*4882a593Smuzhiyun 	p->barrier = connection->send.current_epoch_nr;
1377*4882a593Smuzhiyun 	p->pad = 0;
1378*4882a593Smuzhiyun 	connection->send.current_epoch_writes = 0;
1379*4882a593Smuzhiyun 	connection->send.last_sent_barrier_jif = jiffies;
1380*4882a593Smuzhiyun 
1381*4882a593Smuzhiyun 	return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
1382*4882a593Smuzhiyun }
1383*4882a593Smuzhiyun 
pd_send_unplug_remote(struct drbd_peer_device * pd)1384*4882a593Smuzhiyun static int pd_send_unplug_remote(struct drbd_peer_device *pd)
1385*4882a593Smuzhiyun {
1386*4882a593Smuzhiyun 	struct drbd_socket *sock = &pd->connection->data;
1387*4882a593Smuzhiyun 	if (!drbd_prepare_command(pd, sock))
1388*4882a593Smuzhiyun 		return -EIO;
1389*4882a593Smuzhiyun 	return drbd_send_command(pd, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
1390*4882a593Smuzhiyun }
1391*4882a593Smuzhiyun 
w_send_write_hint(struct drbd_work * w,int cancel)1392*4882a593Smuzhiyun int w_send_write_hint(struct drbd_work *w, int cancel)
1393*4882a593Smuzhiyun {
1394*4882a593Smuzhiyun 	struct drbd_device *device =
1395*4882a593Smuzhiyun 		container_of(w, struct drbd_device, unplug_work);
1396*4882a593Smuzhiyun 
1397*4882a593Smuzhiyun 	if (cancel)
1398*4882a593Smuzhiyun 		return 0;
1399*4882a593Smuzhiyun 	return pd_send_unplug_remote(first_peer_device(device));
1400*4882a593Smuzhiyun }
1401*4882a593Smuzhiyun 
re_init_if_first_write(struct drbd_connection * connection,unsigned int epoch)1402*4882a593Smuzhiyun static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
1403*4882a593Smuzhiyun {
1404*4882a593Smuzhiyun 	if (!connection->send.seen_any_write_yet) {
1405*4882a593Smuzhiyun 		connection->send.seen_any_write_yet = true;
1406*4882a593Smuzhiyun 		connection->send.current_epoch_nr = epoch;
1407*4882a593Smuzhiyun 		connection->send.current_epoch_writes = 0;
1408*4882a593Smuzhiyun 		connection->send.last_sent_barrier_jif = jiffies;
1409*4882a593Smuzhiyun 	}
1410*4882a593Smuzhiyun }
1411*4882a593Smuzhiyun 
maybe_send_barrier(struct drbd_connection * connection,unsigned int epoch)1412*4882a593Smuzhiyun static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
1413*4882a593Smuzhiyun {
1414*4882a593Smuzhiyun 	/* re-init if first write on this connection */
1415*4882a593Smuzhiyun 	if (!connection->send.seen_any_write_yet)
1416*4882a593Smuzhiyun 		return;
1417*4882a593Smuzhiyun 	if (connection->send.current_epoch_nr != epoch) {
1418*4882a593Smuzhiyun 		if (connection->send.current_epoch_writes)
1419*4882a593Smuzhiyun 			drbd_send_barrier(connection);
1420*4882a593Smuzhiyun 		connection->send.current_epoch_nr = epoch;
1421*4882a593Smuzhiyun 	}
1422*4882a593Smuzhiyun }
1423*4882a593Smuzhiyun 
w_send_out_of_sync(struct drbd_work * w,int cancel)1424*4882a593Smuzhiyun int w_send_out_of_sync(struct drbd_work *w, int cancel)
1425*4882a593Smuzhiyun {
1426*4882a593Smuzhiyun 	struct drbd_request *req = container_of(w, struct drbd_request, w);
1427*4882a593Smuzhiyun 	struct drbd_device *device = req->device;
1428*4882a593Smuzhiyun 	struct drbd_peer_device *const peer_device = first_peer_device(device);
1429*4882a593Smuzhiyun 	struct drbd_connection *const connection = peer_device->connection;
1430*4882a593Smuzhiyun 	int err;
1431*4882a593Smuzhiyun 
1432*4882a593Smuzhiyun 	if (unlikely(cancel)) {
1433*4882a593Smuzhiyun 		req_mod(req, SEND_CANCELED);
1434*4882a593Smuzhiyun 		return 0;
1435*4882a593Smuzhiyun 	}
1436*4882a593Smuzhiyun 	req->pre_send_jif = jiffies;
1437*4882a593Smuzhiyun 
1438*4882a593Smuzhiyun 	/* this time, no connection->send.current_epoch_writes++;
1439*4882a593Smuzhiyun 	 * If it was sent, it was the closing barrier for the last
1440*4882a593Smuzhiyun 	 * replicated epoch, before we went into AHEAD mode.
1441*4882a593Smuzhiyun 	 * No more barriers will be sent, until we leave AHEAD mode again. */
1442*4882a593Smuzhiyun 	maybe_send_barrier(connection, req->epoch);
1443*4882a593Smuzhiyun 
1444*4882a593Smuzhiyun 	err = drbd_send_out_of_sync(peer_device, req);
1445*4882a593Smuzhiyun 	req_mod(req, OOS_HANDED_TO_NETWORK);
1446*4882a593Smuzhiyun 
1447*4882a593Smuzhiyun 	return err;
1448*4882a593Smuzhiyun }
1449*4882a593Smuzhiyun 
1450*4882a593Smuzhiyun /**
1451*4882a593Smuzhiyun  * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1452*4882a593Smuzhiyun  * @w:		work object.
1453*4882a593Smuzhiyun  * @cancel:	The connection will be closed anyways
1454*4882a593Smuzhiyun  */
w_send_dblock(struct drbd_work * w,int cancel)1455*4882a593Smuzhiyun int w_send_dblock(struct drbd_work *w, int cancel)
1456*4882a593Smuzhiyun {
1457*4882a593Smuzhiyun 	struct drbd_request *req = container_of(w, struct drbd_request, w);
1458*4882a593Smuzhiyun 	struct drbd_device *device = req->device;
1459*4882a593Smuzhiyun 	struct drbd_peer_device *const peer_device = first_peer_device(device);
1460*4882a593Smuzhiyun 	struct drbd_connection *connection = peer_device->connection;
1461*4882a593Smuzhiyun 	bool do_send_unplug = req->rq_state & RQ_UNPLUG;
1462*4882a593Smuzhiyun 	int err;
1463*4882a593Smuzhiyun 
1464*4882a593Smuzhiyun 	if (unlikely(cancel)) {
1465*4882a593Smuzhiyun 		req_mod(req, SEND_CANCELED);
1466*4882a593Smuzhiyun 		return 0;
1467*4882a593Smuzhiyun 	}
1468*4882a593Smuzhiyun 	req->pre_send_jif = jiffies;
1469*4882a593Smuzhiyun 
1470*4882a593Smuzhiyun 	re_init_if_first_write(connection, req->epoch);
1471*4882a593Smuzhiyun 	maybe_send_barrier(connection, req->epoch);
1472*4882a593Smuzhiyun 	connection->send.current_epoch_writes++;
1473*4882a593Smuzhiyun 
1474*4882a593Smuzhiyun 	err = drbd_send_dblock(peer_device, req);
1475*4882a593Smuzhiyun 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1476*4882a593Smuzhiyun 
1477*4882a593Smuzhiyun 	if (do_send_unplug && !err)
1478*4882a593Smuzhiyun 		pd_send_unplug_remote(peer_device);
1479*4882a593Smuzhiyun 
1480*4882a593Smuzhiyun 	return err;
1481*4882a593Smuzhiyun }
1482*4882a593Smuzhiyun 
1483*4882a593Smuzhiyun /**
1484*4882a593Smuzhiyun  * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1485*4882a593Smuzhiyun  * @w:		work object.
1486*4882a593Smuzhiyun  * @cancel:	The connection will be closed anyways
1487*4882a593Smuzhiyun  */
w_send_read_req(struct drbd_work * w,int cancel)1488*4882a593Smuzhiyun int w_send_read_req(struct drbd_work *w, int cancel)
1489*4882a593Smuzhiyun {
1490*4882a593Smuzhiyun 	struct drbd_request *req = container_of(w, struct drbd_request, w);
1491*4882a593Smuzhiyun 	struct drbd_device *device = req->device;
1492*4882a593Smuzhiyun 	struct drbd_peer_device *const peer_device = first_peer_device(device);
1493*4882a593Smuzhiyun 	struct drbd_connection *connection = peer_device->connection;
1494*4882a593Smuzhiyun 	bool do_send_unplug = req->rq_state & RQ_UNPLUG;
1495*4882a593Smuzhiyun 	int err;
1496*4882a593Smuzhiyun 
1497*4882a593Smuzhiyun 	if (unlikely(cancel)) {
1498*4882a593Smuzhiyun 		req_mod(req, SEND_CANCELED);
1499*4882a593Smuzhiyun 		return 0;
1500*4882a593Smuzhiyun 	}
1501*4882a593Smuzhiyun 	req->pre_send_jif = jiffies;
1502*4882a593Smuzhiyun 
1503*4882a593Smuzhiyun 	/* Even read requests may close a write epoch,
1504*4882a593Smuzhiyun 	 * if there was any yet. */
1505*4882a593Smuzhiyun 	maybe_send_barrier(connection, req->epoch);
1506*4882a593Smuzhiyun 
1507*4882a593Smuzhiyun 	err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size,
1508*4882a593Smuzhiyun 				 (unsigned long)req);
1509*4882a593Smuzhiyun 
1510*4882a593Smuzhiyun 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1511*4882a593Smuzhiyun 
1512*4882a593Smuzhiyun 	if (do_send_unplug && !err)
1513*4882a593Smuzhiyun 		pd_send_unplug_remote(peer_device);
1514*4882a593Smuzhiyun 
1515*4882a593Smuzhiyun 	return err;
1516*4882a593Smuzhiyun }
1517*4882a593Smuzhiyun 
w_restart_disk_io(struct drbd_work * w,int cancel)1518*4882a593Smuzhiyun int w_restart_disk_io(struct drbd_work *w, int cancel)
1519*4882a593Smuzhiyun {
1520*4882a593Smuzhiyun 	struct drbd_request *req = container_of(w, struct drbd_request, w);
1521*4882a593Smuzhiyun 	struct drbd_device *device = req->device;
1522*4882a593Smuzhiyun 
1523*4882a593Smuzhiyun 	if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
1524*4882a593Smuzhiyun 		drbd_al_begin_io(device, &req->i);
1525*4882a593Smuzhiyun 
1526*4882a593Smuzhiyun 	drbd_req_make_private_bio(req, req->master_bio);
1527*4882a593Smuzhiyun 	bio_set_dev(req->private_bio, device->ldev->backing_bdev);
1528*4882a593Smuzhiyun 	submit_bio_noacct(req->private_bio);
1529*4882a593Smuzhiyun 
1530*4882a593Smuzhiyun 	return 0;
1531*4882a593Smuzhiyun }
1532*4882a593Smuzhiyun 
_drbd_may_sync_now(struct drbd_device * device)1533*4882a593Smuzhiyun static int _drbd_may_sync_now(struct drbd_device *device)
1534*4882a593Smuzhiyun {
1535*4882a593Smuzhiyun 	struct drbd_device *odev = device;
1536*4882a593Smuzhiyun 	int resync_after;
1537*4882a593Smuzhiyun 
1538*4882a593Smuzhiyun 	while (1) {
1539*4882a593Smuzhiyun 		if (!odev->ldev || odev->state.disk == D_DISKLESS)
1540*4882a593Smuzhiyun 			return 1;
1541*4882a593Smuzhiyun 		rcu_read_lock();
1542*4882a593Smuzhiyun 		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1543*4882a593Smuzhiyun 		rcu_read_unlock();
1544*4882a593Smuzhiyun 		if (resync_after == -1)
1545*4882a593Smuzhiyun 			return 1;
1546*4882a593Smuzhiyun 		odev = minor_to_device(resync_after);
1547*4882a593Smuzhiyun 		if (!odev)
1548*4882a593Smuzhiyun 			return 1;
1549*4882a593Smuzhiyun 		if ((odev->state.conn >= C_SYNC_SOURCE &&
1550*4882a593Smuzhiyun 		     odev->state.conn <= C_PAUSED_SYNC_T) ||
1551*4882a593Smuzhiyun 		    odev->state.aftr_isp || odev->state.peer_isp ||
1552*4882a593Smuzhiyun 		    odev->state.user_isp)
1553*4882a593Smuzhiyun 			return 0;
1554*4882a593Smuzhiyun 	}
1555*4882a593Smuzhiyun }
1556*4882a593Smuzhiyun 
1557*4882a593Smuzhiyun /**
1558*4882a593Smuzhiyun  * drbd_pause_after() - Pause resync on all devices that may not resync now
1559*4882a593Smuzhiyun  * @device:	DRBD device.
1560*4882a593Smuzhiyun  *
1561*4882a593Smuzhiyun  * Called from process context only (admin command and after_state_ch).
1562*4882a593Smuzhiyun  */
drbd_pause_after(struct drbd_device * device)1563*4882a593Smuzhiyun static bool drbd_pause_after(struct drbd_device *device)
1564*4882a593Smuzhiyun {
1565*4882a593Smuzhiyun 	bool changed = false;
1566*4882a593Smuzhiyun 	struct drbd_device *odev;
1567*4882a593Smuzhiyun 	int i;
1568*4882a593Smuzhiyun 
1569*4882a593Smuzhiyun 	rcu_read_lock();
1570*4882a593Smuzhiyun 	idr_for_each_entry(&drbd_devices, odev, i) {
1571*4882a593Smuzhiyun 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1572*4882a593Smuzhiyun 			continue;
1573*4882a593Smuzhiyun 		if (!_drbd_may_sync_now(odev) &&
1574*4882a593Smuzhiyun 		    _drbd_set_state(_NS(odev, aftr_isp, 1),
1575*4882a593Smuzhiyun 				    CS_HARD, NULL) != SS_NOTHING_TO_DO)
1576*4882a593Smuzhiyun 			changed = true;
1577*4882a593Smuzhiyun 	}
1578*4882a593Smuzhiyun 	rcu_read_unlock();
1579*4882a593Smuzhiyun 
1580*4882a593Smuzhiyun 	return changed;
1581*4882a593Smuzhiyun }
1582*4882a593Smuzhiyun 
1583*4882a593Smuzhiyun /**
1584*4882a593Smuzhiyun  * drbd_resume_next() - Resume resync on all devices that may resync now
1585*4882a593Smuzhiyun  * @device:	DRBD device.
1586*4882a593Smuzhiyun  *
1587*4882a593Smuzhiyun  * Called from process context only (admin command and worker).
1588*4882a593Smuzhiyun  */
drbd_resume_next(struct drbd_device * device)1589*4882a593Smuzhiyun static bool drbd_resume_next(struct drbd_device *device)
1590*4882a593Smuzhiyun {
1591*4882a593Smuzhiyun 	bool changed = false;
1592*4882a593Smuzhiyun 	struct drbd_device *odev;
1593*4882a593Smuzhiyun 	int i;
1594*4882a593Smuzhiyun 
1595*4882a593Smuzhiyun 	rcu_read_lock();
1596*4882a593Smuzhiyun 	idr_for_each_entry(&drbd_devices, odev, i) {
1597*4882a593Smuzhiyun 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1598*4882a593Smuzhiyun 			continue;
1599*4882a593Smuzhiyun 		if (odev->state.aftr_isp) {
1600*4882a593Smuzhiyun 			if (_drbd_may_sync_now(odev) &&
1601*4882a593Smuzhiyun 			    _drbd_set_state(_NS(odev, aftr_isp, 0),
1602*4882a593Smuzhiyun 					    CS_HARD, NULL) != SS_NOTHING_TO_DO)
1603*4882a593Smuzhiyun 				changed = true;
1604*4882a593Smuzhiyun 		}
1605*4882a593Smuzhiyun 	}
1606*4882a593Smuzhiyun 	rcu_read_unlock();
1607*4882a593Smuzhiyun 	return changed;
1608*4882a593Smuzhiyun }
1609*4882a593Smuzhiyun 
resume_next_sg(struct drbd_device * device)1610*4882a593Smuzhiyun void resume_next_sg(struct drbd_device *device)
1611*4882a593Smuzhiyun {
1612*4882a593Smuzhiyun 	lock_all_resources();
1613*4882a593Smuzhiyun 	drbd_resume_next(device);
1614*4882a593Smuzhiyun 	unlock_all_resources();
1615*4882a593Smuzhiyun }
1616*4882a593Smuzhiyun 
suspend_other_sg(struct drbd_device * device)1617*4882a593Smuzhiyun void suspend_other_sg(struct drbd_device *device)
1618*4882a593Smuzhiyun {
1619*4882a593Smuzhiyun 	lock_all_resources();
1620*4882a593Smuzhiyun 	drbd_pause_after(device);
1621*4882a593Smuzhiyun 	unlock_all_resources();
1622*4882a593Smuzhiyun }
1623*4882a593Smuzhiyun 
1624*4882a593Smuzhiyun /* caller must lock_all_resources() */
drbd_resync_after_valid(struct drbd_device * device,int o_minor)1625*4882a593Smuzhiyun enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
1626*4882a593Smuzhiyun {
1627*4882a593Smuzhiyun 	struct drbd_device *odev;
1628*4882a593Smuzhiyun 	int resync_after;
1629*4882a593Smuzhiyun 
1630*4882a593Smuzhiyun 	if (o_minor == -1)
1631*4882a593Smuzhiyun 		return NO_ERROR;
1632*4882a593Smuzhiyun 	if (o_minor < -1 || o_minor > MINORMASK)
1633*4882a593Smuzhiyun 		return ERR_RESYNC_AFTER;
1634*4882a593Smuzhiyun 
1635*4882a593Smuzhiyun 	/* check for loops */
1636*4882a593Smuzhiyun 	odev = minor_to_device(o_minor);
1637*4882a593Smuzhiyun 	while (1) {
1638*4882a593Smuzhiyun 		if (odev == device)
1639*4882a593Smuzhiyun 			return ERR_RESYNC_AFTER_CYCLE;
1640*4882a593Smuzhiyun 
1641*4882a593Smuzhiyun 		/* You are free to depend on diskless, non-existing,
1642*4882a593Smuzhiyun 		 * or not yet/no longer existing minors.
1643*4882a593Smuzhiyun 		 * We only reject dependency loops.
1644*4882a593Smuzhiyun 		 * We cannot follow the dependency chain beyond a detached or
1645*4882a593Smuzhiyun 		 * missing minor.
1646*4882a593Smuzhiyun 		 */
1647*4882a593Smuzhiyun 		if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1648*4882a593Smuzhiyun 			return NO_ERROR;
1649*4882a593Smuzhiyun 
1650*4882a593Smuzhiyun 		rcu_read_lock();
1651*4882a593Smuzhiyun 		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1652*4882a593Smuzhiyun 		rcu_read_unlock();
1653*4882a593Smuzhiyun 		/* dependency chain ends here, no cycles. */
1654*4882a593Smuzhiyun 		if (resync_after == -1)
1655*4882a593Smuzhiyun 			return NO_ERROR;
1656*4882a593Smuzhiyun 
1657*4882a593Smuzhiyun 		/* follow the dependency chain */
1658*4882a593Smuzhiyun 		odev = minor_to_device(resync_after);
1659*4882a593Smuzhiyun 	}
1660*4882a593Smuzhiyun }
1661*4882a593Smuzhiyun 
1662*4882a593Smuzhiyun /* caller must lock_all_resources() */
drbd_resync_after_changed(struct drbd_device * device)1663*4882a593Smuzhiyun void drbd_resync_after_changed(struct drbd_device *device)
1664*4882a593Smuzhiyun {
1665*4882a593Smuzhiyun 	int changed;
1666*4882a593Smuzhiyun 
1667*4882a593Smuzhiyun 	do {
1668*4882a593Smuzhiyun 		changed  = drbd_pause_after(device);
1669*4882a593Smuzhiyun 		changed |= drbd_resume_next(device);
1670*4882a593Smuzhiyun 	} while (changed);
1671*4882a593Smuzhiyun }
1672*4882a593Smuzhiyun 
drbd_rs_controller_reset(struct drbd_device * device)1673*4882a593Smuzhiyun void drbd_rs_controller_reset(struct drbd_device *device)
1674*4882a593Smuzhiyun {
1675*4882a593Smuzhiyun 	struct gendisk *disk = device->ldev->backing_bdev->bd_disk;
1676*4882a593Smuzhiyun 	struct fifo_buffer *plan;
1677*4882a593Smuzhiyun 
1678*4882a593Smuzhiyun 	atomic_set(&device->rs_sect_in, 0);
1679*4882a593Smuzhiyun 	atomic_set(&device->rs_sect_ev, 0);
1680*4882a593Smuzhiyun 	device->rs_in_flight = 0;
1681*4882a593Smuzhiyun 	device->rs_last_events = (int)part_stat_read_accum(&disk->part0, sectors);
1682*4882a593Smuzhiyun 
1683*4882a593Smuzhiyun 	/* Updating the RCU protected object in place is necessary since
1684*4882a593Smuzhiyun 	   this function gets called from atomic context.
1685*4882a593Smuzhiyun 	   It is valid since all other updates also lead to an completely
1686*4882a593Smuzhiyun 	   empty fifo */
1687*4882a593Smuzhiyun 	rcu_read_lock();
1688*4882a593Smuzhiyun 	plan = rcu_dereference(device->rs_plan_s);
1689*4882a593Smuzhiyun 	plan->total = 0;
1690*4882a593Smuzhiyun 	fifo_set(plan, 0);
1691*4882a593Smuzhiyun 	rcu_read_unlock();
1692*4882a593Smuzhiyun }
1693*4882a593Smuzhiyun 
start_resync_timer_fn(struct timer_list * t)1694*4882a593Smuzhiyun void start_resync_timer_fn(struct timer_list *t)
1695*4882a593Smuzhiyun {
1696*4882a593Smuzhiyun 	struct drbd_device *device = from_timer(device, t, start_resync_timer);
1697*4882a593Smuzhiyun 	drbd_device_post_work(device, RS_START);
1698*4882a593Smuzhiyun }
1699*4882a593Smuzhiyun 
do_start_resync(struct drbd_device * device)1700*4882a593Smuzhiyun static void do_start_resync(struct drbd_device *device)
1701*4882a593Smuzhiyun {
1702*4882a593Smuzhiyun 	if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
1703*4882a593Smuzhiyun 		drbd_warn(device, "postponing start_resync ...\n");
1704*4882a593Smuzhiyun 		device->start_resync_timer.expires = jiffies + HZ/10;
1705*4882a593Smuzhiyun 		add_timer(&device->start_resync_timer);
1706*4882a593Smuzhiyun 		return;
1707*4882a593Smuzhiyun 	}
1708*4882a593Smuzhiyun 
1709*4882a593Smuzhiyun 	drbd_start_resync(device, C_SYNC_SOURCE);
1710*4882a593Smuzhiyun 	clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
1711*4882a593Smuzhiyun }
1712*4882a593Smuzhiyun 
use_checksum_based_resync(struct drbd_connection * connection,struct drbd_device * device)1713*4882a593Smuzhiyun static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
1714*4882a593Smuzhiyun {
1715*4882a593Smuzhiyun 	bool csums_after_crash_only;
1716*4882a593Smuzhiyun 	rcu_read_lock();
1717*4882a593Smuzhiyun 	csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
1718*4882a593Smuzhiyun 	rcu_read_unlock();
1719*4882a593Smuzhiyun 	return connection->agreed_pro_version >= 89 &&		/* supported? */
1720*4882a593Smuzhiyun 		connection->csums_tfm &&			/* configured? */
1721*4882a593Smuzhiyun 		(csums_after_crash_only == false		/* use for each resync? */
1722*4882a593Smuzhiyun 		 || test_bit(CRASHED_PRIMARY, &device->flags));	/* or only after Primary crash? */
1723*4882a593Smuzhiyun }
1724*4882a593Smuzhiyun 
1725*4882a593Smuzhiyun /**
1726*4882a593Smuzhiyun  * drbd_start_resync() - Start the resync process
1727*4882a593Smuzhiyun  * @device:	DRBD device.
1728*4882a593Smuzhiyun  * @side:	Either C_SYNC_SOURCE or C_SYNC_TARGET
1729*4882a593Smuzhiyun  *
1730*4882a593Smuzhiyun  * This function might bring you directly into one of the
1731*4882a593Smuzhiyun  * C_PAUSED_SYNC_* states.
1732*4882a593Smuzhiyun  */
drbd_start_resync(struct drbd_device * device,enum drbd_conns side)1733*4882a593Smuzhiyun void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1734*4882a593Smuzhiyun {
1735*4882a593Smuzhiyun 	struct drbd_peer_device *peer_device = first_peer_device(device);
1736*4882a593Smuzhiyun 	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
1737*4882a593Smuzhiyun 	union drbd_state ns;
1738*4882a593Smuzhiyun 	int r;
1739*4882a593Smuzhiyun 
1740*4882a593Smuzhiyun 	if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
1741*4882a593Smuzhiyun 		drbd_err(device, "Resync already running!\n");
1742*4882a593Smuzhiyun 		return;
1743*4882a593Smuzhiyun 	}
1744*4882a593Smuzhiyun 
1745*4882a593Smuzhiyun 	if (!connection) {
1746*4882a593Smuzhiyun 		drbd_err(device, "No connection to peer, aborting!\n");
1747*4882a593Smuzhiyun 		return;
1748*4882a593Smuzhiyun 	}
1749*4882a593Smuzhiyun 
1750*4882a593Smuzhiyun 	if (!test_bit(B_RS_H_DONE, &device->flags)) {
1751*4882a593Smuzhiyun 		if (side == C_SYNC_TARGET) {
1752*4882a593Smuzhiyun 			/* Since application IO was locked out during C_WF_BITMAP_T and
1753*4882a593Smuzhiyun 			   C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1754*4882a593Smuzhiyun 			   we check that we might make the data inconsistent. */
1755*4882a593Smuzhiyun 			r = drbd_khelper(device, "before-resync-target");
1756*4882a593Smuzhiyun 			r = (r >> 8) & 0xff;
1757*4882a593Smuzhiyun 			if (r > 0) {
1758*4882a593Smuzhiyun 				drbd_info(device, "before-resync-target handler returned %d, "
1759*4882a593Smuzhiyun 					 "dropping connection.\n", r);
1760*4882a593Smuzhiyun 				conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
1761*4882a593Smuzhiyun 				return;
1762*4882a593Smuzhiyun 			}
1763*4882a593Smuzhiyun 		} else /* C_SYNC_SOURCE */ {
1764*4882a593Smuzhiyun 			r = drbd_khelper(device, "before-resync-source");
1765*4882a593Smuzhiyun 			r = (r >> 8) & 0xff;
1766*4882a593Smuzhiyun 			if (r > 0) {
1767*4882a593Smuzhiyun 				if (r == 3) {
1768*4882a593Smuzhiyun 					drbd_info(device, "before-resync-source handler returned %d, "
1769*4882a593Smuzhiyun 						 "ignoring. Old userland tools?", r);
1770*4882a593Smuzhiyun 				} else {
1771*4882a593Smuzhiyun 					drbd_info(device, "before-resync-source handler returned %d, "
1772*4882a593Smuzhiyun 						 "dropping connection.\n", r);
1773*4882a593Smuzhiyun 					conn_request_state(connection,
1774*4882a593Smuzhiyun 							   NS(conn, C_DISCONNECTING), CS_HARD);
1775*4882a593Smuzhiyun 					return;
1776*4882a593Smuzhiyun 				}
1777*4882a593Smuzhiyun 			}
1778*4882a593Smuzhiyun 		}
1779*4882a593Smuzhiyun 	}
1780*4882a593Smuzhiyun 
1781*4882a593Smuzhiyun 	if (current == connection->worker.task) {
1782*4882a593Smuzhiyun 		/* The worker should not sleep waiting for state_mutex,
1783*4882a593Smuzhiyun 		   that can take long */
1784*4882a593Smuzhiyun 		if (!mutex_trylock(device->state_mutex)) {
1785*4882a593Smuzhiyun 			set_bit(B_RS_H_DONE, &device->flags);
1786*4882a593Smuzhiyun 			device->start_resync_timer.expires = jiffies + HZ/5;
1787*4882a593Smuzhiyun 			add_timer(&device->start_resync_timer);
1788*4882a593Smuzhiyun 			return;
1789*4882a593Smuzhiyun 		}
1790*4882a593Smuzhiyun 	} else {
1791*4882a593Smuzhiyun 		mutex_lock(device->state_mutex);
1792*4882a593Smuzhiyun 	}
1793*4882a593Smuzhiyun 
1794*4882a593Smuzhiyun 	lock_all_resources();
1795*4882a593Smuzhiyun 	clear_bit(B_RS_H_DONE, &device->flags);
1796*4882a593Smuzhiyun 	/* Did some connection breakage or IO error race with us? */
1797*4882a593Smuzhiyun 	if (device->state.conn < C_CONNECTED
1798*4882a593Smuzhiyun 	|| !get_ldev_if_state(device, D_NEGOTIATING)) {
1799*4882a593Smuzhiyun 		unlock_all_resources();
1800*4882a593Smuzhiyun 		goto out;
1801*4882a593Smuzhiyun 	}
1802*4882a593Smuzhiyun 
1803*4882a593Smuzhiyun 	ns = drbd_read_state(device);
1804*4882a593Smuzhiyun 
1805*4882a593Smuzhiyun 	ns.aftr_isp = !_drbd_may_sync_now(device);
1806*4882a593Smuzhiyun 
1807*4882a593Smuzhiyun 	ns.conn = side;
1808*4882a593Smuzhiyun 
1809*4882a593Smuzhiyun 	if (side == C_SYNC_TARGET)
1810*4882a593Smuzhiyun 		ns.disk = D_INCONSISTENT;
1811*4882a593Smuzhiyun 	else /* side == C_SYNC_SOURCE */
1812*4882a593Smuzhiyun 		ns.pdsk = D_INCONSISTENT;
1813*4882a593Smuzhiyun 
1814*4882a593Smuzhiyun 	r = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
1815*4882a593Smuzhiyun 	ns = drbd_read_state(device);
1816*4882a593Smuzhiyun 
1817*4882a593Smuzhiyun 	if (ns.conn < C_CONNECTED)
1818*4882a593Smuzhiyun 		r = SS_UNKNOWN_ERROR;
1819*4882a593Smuzhiyun 
1820*4882a593Smuzhiyun 	if (r == SS_SUCCESS) {
1821*4882a593Smuzhiyun 		unsigned long tw = drbd_bm_total_weight(device);
1822*4882a593Smuzhiyun 		unsigned long now = jiffies;
1823*4882a593Smuzhiyun 		int i;
1824*4882a593Smuzhiyun 
1825*4882a593Smuzhiyun 		device->rs_failed    = 0;
1826*4882a593Smuzhiyun 		device->rs_paused    = 0;
1827*4882a593Smuzhiyun 		device->rs_same_csum = 0;
1828*4882a593Smuzhiyun 		device->rs_last_sect_ev = 0;
1829*4882a593Smuzhiyun 		device->rs_total     = tw;
1830*4882a593Smuzhiyun 		device->rs_start     = now;
1831*4882a593Smuzhiyun 		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1832*4882a593Smuzhiyun 			device->rs_mark_left[i] = tw;
1833*4882a593Smuzhiyun 			device->rs_mark_time[i] = now;
1834*4882a593Smuzhiyun 		}
1835*4882a593Smuzhiyun 		drbd_pause_after(device);
1836*4882a593Smuzhiyun 		/* Forget potentially stale cached per resync extent bit-counts.
1837*4882a593Smuzhiyun 		 * Open coded drbd_rs_cancel_all(device), we already have IRQs
1838*4882a593Smuzhiyun 		 * disabled, and know the disk state is ok. */
1839*4882a593Smuzhiyun 		spin_lock(&device->al_lock);
1840*4882a593Smuzhiyun 		lc_reset(device->resync);
1841*4882a593Smuzhiyun 		device->resync_locked = 0;
1842*4882a593Smuzhiyun 		device->resync_wenr = LC_FREE;
1843*4882a593Smuzhiyun 		spin_unlock(&device->al_lock);
1844*4882a593Smuzhiyun 	}
1845*4882a593Smuzhiyun 	unlock_all_resources();
1846*4882a593Smuzhiyun 
1847*4882a593Smuzhiyun 	if (r == SS_SUCCESS) {
1848*4882a593Smuzhiyun 		wake_up(&device->al_wait); /* for lc_reset() above */
1849*4882a593Smuzhiyun 		/* reset rs_last_bcast when a resync or verify is started,
1850*4882a593Smuzhiyun 		 * to deal with potential jiffies wrap. */
1851*4882a593Smuzhiyun 		device->rs_last_bcast = jiffies - HZ;
1852*4882a593Smuzhiyun 
1853*4882a593Smuzhiyun 		drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
1854*4882a593Smuzhiyun 		     drbd_conn_str(ns.conn),
1855*4882a593Smuzhiyun 		     (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1856*4882a593Smuzhiyun 		     (unsigned long) device->rs_total);
1857*4882a593Smuzhiyun 		if (side == C_SYNC_TARGET) {
1858*4882a593Smuzhiyun 			device->bm_resync_fo = 0;
1859*4882a593Smuzhiyun 			device->use_csums = use_checksum_based_resync(connection, device);
1860*4882a593Smuzhiyun 		} else {
1861*4882a593Smuzhiyun 			device->use_csums = false;
1862*4882a593Smuzhiyun 		}
1863*4882a593Smuzhiyun 
1864*4882a593Smuzhiyun 		/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1865*4882a593Smuzhiyun 		 * with w_send_oos, or the sync target will get confused as to
1866*4882a593Smuzhiyun 		 * how much bits to resync.  We cannot do that always, because for an
1867*4882a593Smuzhiyun 		 * empty resync and protocol < 95, we need to do it here, as we call
1868*4882a593Smuzhiyun 		 * drbd_resync_finished from here in that case.
1869*4882a593Smuzhiyun 		 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1870*4882a593Smuzhiyun 		 * and from after_state_ch otherwise. */
1871*4882a593Smuzhiyun 		if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96)
1872*4882a593Smuzhiyun 			drbd_gen_and_send_sync_uuid(peer_device);
1873*4882a593Smuzhiyun 
1874*4882a593Smuzhiyun 		if (connection->agreed_pro_version < 95 && device->rs_total == 0) {
1875*4882a593Smuzhiyun 			/* This still has a race (about when exactly the peers
1876*4882a593Smuzhiyun 			 * detect connection loss) that can lead to a full sync
1877*4882a593Smuzhiyun 			 * on next handshake. In 8.3.9 we fixed this with explicit
1878*4882a593Smuzhiyun 			 * resync-finished notifications, but the fix
1879*4882a593Smuzhiyun 			 * introduces a protocol change.  Sleeping for some
1880*4882a593Smuzhiyun 			 * time longer than the ping interval + timeout on the
1881*4882a593Smuzhiyun 			 * SyncSource, to give the SyncTarget the chance to
1882*4882a593Smuzhiyun 			 * detect connection loss, then waiting for a ping
1883*4882a593Smuzhiyun 			 * response (implicit in drbd_resync_finished) reduces
1884*4882a593Smuzhiyun 			 * the race considerably, but does not solve it. */
1885*4882a593Smuzhiyun 			if (side == C_SYNC_SOURCE) {
1886*4882a593Smuzhiyun 				struct net_conf *nc;
1887*4882a593Smuzhiyun 				int timeo;
1888*4882a593Smuzhiyun 
1889*4882a593Smuzhiyun 				rcu_read_lock();
1890*4882a593Smuzhiyun 				nc = rcu_dereference(connection->net_conf);
1891*4882a593Smuzhiyun 				timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1892*4882a593Smuzhiyun 				rcu_read_unlock();
1893*4882a593Smuzhiyun 				schedule_timeout_interruptible(timeo);
1894*4882a593Smuzhiyun 			}
1895*4882a593Smuzhiyun 			drbd_resync_finished(device);
1896*4882a593Smuzhiyun 		}
1897*4882a593Smuzhiyun 
1898*4882a593Smuzhiyun 		drbd_rs_controller_reset(device);
1899*4882a593Smuzhiyun 		/* ns.conn may already be != device->state.conn,
1900*4882a593Smuzhiyun 		 * we may have been paused in between, or become paused until
1901*4882a593Smuzhiyun 		 * the timer triggers.
1902*4882a593Smuzhiyun 		 * No matter, that is handled in resync_timer_fn() */
1903*4882a593Smuzhiyun 		if (ns.conn == C_SYNC_TARGET)
1904*4882a593Smuzhiyun 			mod_timer(&device->resync_timer, jiffies);
1905*4882a593Smuzhiyun 
1906*4882a593Smuzhiyun 		drbd_md_sync(device);
1907*4882a593Smuzhiyun 	}
1908*4882a593Smuzhiyun 	put_ldev(device);
1909*4882a593Smuzhiyun out:
1910*4882a593Smuzhiyun 	mutex_unlock(device->state_mutex);
1911*4882a593Smuzhiyun }
1912*4882a593Smuzhiyun 
update_on_disk_bitmap(struct drbd_device * device,bool resync_done)1913*4882a593Smuzhiyun static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
1914*4882a593Smuzhiyun {
1915*4882a593Smuzhiyun 	struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
1916*4882a593Smuzhiyun 	device->rs_last_bcast = jiffies;
1917*4882a593Smuzhiyun 
1918*4882a593Smuzhiyun 	if (!get_ldev(device))
1919*4882a593Smuzhiyun 		return;
1920*4882a593Smuzhiyun 
1921*4882a593Smuzhiyun 	drbd_bm_write_lazy(device, 0);
1922*4882a593Smuzhiyun 	if (resync_done && is_sync_state(device->state.conn))
1923*4882a593Smuzhiyun 		drbd_resync_finished(device);
1924*4882a593Smuzhiyun 
1925*4882a593Smuzhiyun 	drbd_bcast_event(device, &sib);
1926*4882a593Smuzhiyun 	/* update timestamp, in case it took a while to write out stuff */
1927*4882a593Smuzhiyun 	device->rs_last_bcast = jiffies;
1928*4882a593Smuzhiyun 	put_ldev(device);
1929*4882a593Smuzhiyun }
1930*4882a593Smuzhiyun 
drbd_ldev_destroy(struct drbd_device * device)1931*4882a593Smuzhiyun static void drbd_ldev_destroy(struct drbd_device *device)
1932*4882a593Smuzhiyun {
1933*4882a593Smuzhiyun 	lc_destroy(device->resync);
1934*4882a593Smuzhiyun 	device->resync = NULL;
1935*4882a593Smuzhiyun 	lc_destroy(device->act_log);
1936*4882a593Smuzhiyun 	device->act_log = NULL;
1937*4882a593Smuzhiyun 
1938*4882a593Smuzhiyun 	__acquire(local);
1939*4882a593Smuzhiyun 	drbd_backing_dev_free(device, device->ldev);
1940*4882a593Smuzhiyun 	device->ldev = NULL;
1941*4882a593Smuzhiyun 	__release(local);
1942*4882a593Smuzhiyun 
1943*4882a593Smuzhiyun 	clear_bit(GOING_DISKLESS, &device->flags);
1944*4882a593Smuzhiyun 	wake_up(&device->misc_wait);
1945*4882a593Smuzhiyun }
1946*4882a593Smuzhiyun 
go_diskless(struct drbd_device * device)1947*4882a593Smuzhiyun static void go_diskless(struct drbd_device *device)
1948*4882a593Smuzhiyun {
1949*4882a593Smuzhiyun 	D_ASSERT(device, device->state.disk == D_FAILED);
1950*4882a593Smuzhiyun 	/* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
1951*4882a593Smuzhiyun 	 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
1952*4882a593Smuzhiyun 	 * the protected members anymore, though, so once put_ldev reaches zero
1953*4882a593Smuzhiyun 	 * again, it will be safe to free them. */
1954*4882a593Smuzhiyun 
1955*4882a593Smuzhiyun 	/* Try to write changed bitmap pages, read errors may have just
1956*4882a593Smuzhiyun 	 * set some bits outside the area covered by the activity log.
1957*4882a593Smuzhiyun 	 *
1958*4882a593Smuzhiyun 	 * If we have an IO error during the bitmap writeout,
1959*4882a593Smuzhiyun 	 * we will want a full sync next time, just in case.
1960*4882a593Smuzhiyun 	 * (Do we want a specific meta data flag for this?)
1961*4882a593Smuzhiyun 	 *
1962*4882a593Smuzhiyun 	 * If that does not make it to stable storage either,
1963*4882a593Smuzhiyun 	 * we cannot do anything about that anymore.
1964*4882a593Smuzhiyun 	 *
1965*4882a593Smuzhiyun 	 * We still need to check if both bitmap and ldev are present, we may
1966*4882a593Smuzhiyun 	 * end up here after a failed attach, before ldev was even assigned.
1967*4882a593Smuzhiyun 	 */
1968*4882a593Smuzhiyun 	if (device->bitmap && device->ldev) {
1969*4882a593Smuzhiyun 		/* An interrupted resync or similar is allowed to recounts bits
1970*4882a593Smuzhiyun 		 * while we detach.
1971*4882a593Smuzhiyun 		 * Any modifications would not be expected anymore, though.
1972*4882a593Smuzhiyun 		 */
1973*4882a593Smuzhiyun 		if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
1974*4882a593Smuzhiyun 					"detach", BM_LOCKED_TEST_ALLOWED)) {
1975*4882a593Smuzhiyun 			if (test_bit(WAS_READ_ERROR, &device->flags)) {
1976*4882a593Smuzhiyun 				drbd_md_set_flag(device, MDF_FULL_SYNC);
1977*4882a593Smuzhiyun 				drbd_md_sync(device);
1978*4882a593Smuzhiyun 			}
1979*4882a593Smuzhiyun 		}
1980*4882a593Smuzhiyun 	}
1981*4882a593Smuzhiyun 
1982*4882a593Smuzhiyun 	drbd_force_state(device, NS(disk, D_DISKLESS));
1983*4882a593Smuzhiyun }
1984*4882a593Smuzhiyun 
do_md_sync(struct drbd_device * device)1985*4882a593Smuzhiyun static int do_md_sync(struct drbd_device *device)
1986*4882a593Smuzhiyun {
1987*4882a593Smuzhiyun 	drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
1988*4882a593Smuzhiyun 	drbd_md_sync(device);
1989*4882a593Smuzhiyun 	return 0;
1990*4882a593Smuzhiyun }
1991*4882a593Smuzhiyun 
1992*4882a593Smuzhiyun /* only called from drbd_worker thread, no locking */
__update_timing_details(struct drbd_thread_timing_details * tdp,unsigned int * cb_nr,void * cb,const char * fn,const unsigned int line)1993*4882a593Smuzhiyun void __update_timing_details(
1994*4882a593Smuzhiyun 		struct drbd_thread_timing_details *tdp,
1995*4882a593Smuzhiyun 		unsigned int *cb_nr,
1996*4882a593Smuzhiyun 		void *cb,
1997*4882a593Smuzhiyun 		const char *fn, const unsigned int line)
1998*4882a593Smuzhiyun {
1999*4882a593Smuzhiyun 	unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST;
2000*4882a593Smuzhiyun 	struct drbd_thread_timing_details *td = tdp + i;
2001*4882a593Smuzhiyun 
2002*4882a593Smuzhiyun 	td->start_jif = jiffies;
2003*4882a593Smuzhiyun 	td->cb_addr = cb;
2004*4882a593Smuzhiyun 	td->caller_fn = fn;
2005*4882a593Smuzhiyun 	td->line = line;
2006*4882a593Smuzhiyun 	td->cb_nr = *cb_nr;
2007*4882a593Smuzhiyun 
2008*4882a593Smuzhiyun 	i = (i+1) % DRBD_THREAD_DETAILS_HIST;
2009*4882a593Smuzhiyun 	td = tdp + i;
2010*4882a593Smuzhiyun 	memset(td, 0, sizeof(*td));
2011*4882a593Smuzhiyun 
2012*4882a593Smuzhiyun 	++(*cb_nr);
2013*4882a593Smuzhiyun }
2014*4882a593Smuzhiyun 
do_device_work(struct drbd_device * device,const unsigned long todo)2015*4882a593Smuzhiyun static void do_device_work(struct drbd_device *device, const unsigned long todo)
2016*4882a593Smuzhiyun {
2017*4882a593Smuzhiyun 	if (test_bit(MD_SYNC, &todo))
2018*4882a593Smuzhiyun 		do_md_sync(device);
2019*4882a593Smuzhiyun 	if (test_bit(RS_DONE, &todo) ||
2020*4882a593Smuzhiyun 	    test_bit(RS_PROGRESS, &todo))
2021*4882a593Smuzhiyun 		update_on_disk_bitmap(device, test_bit(RS_DONE, &todo));
2022*4882a593Smuzhiyun 	if (test_bit(GO_DISKLESS, &todo))
2023*4882a593Smuzhiyun 		go_diskless(device);
2024*4882a593Smuzhiyun 	if (test_bit(DESTROY_DISK, &todo))
2025*4882a593Smuzhiyun 		drbd_ldev_destroy(device);
2026*4882a593Smuzhiyun 	if (test_bit(RS_START, &todo))
2027*4882a593Smuzhiyun 		do_start_resync(device);
2028*4882a593Smuzhiyun }
2029*4882a593Smuzhiyun 
2030*4882a593Smuzhiyun #define DRBD_DEVICE_WORK_MASK	\
2031*4882a593Smuzhiyun 	((1UL << GO_DISKLESS)	\
2032*4882a593Smuzhiyun 	|(1UL << DESTROY_DISK)	\
2033*4882a593Smuzhiyun 	|(1UL << MD_SYNC)	\
2034*4882a593Smuzhiyun 	|(1UL << RS_START)	\
2035*4882a593Smuzhiyun 	|(1UL << RS_PROGRESS)	\
2036*4882a593Smuzhiyun 	|(1UL << RS_DONE)	\
2037*4882a593Smuzhiyun 	)
2038*4882a593Smuzhiyun 
get_work_bits(unsigned long * flags)2039*4882a593Smuzhiyun static unsigned long get_work_bits(unsigned long *flags)
2040*4882a593Smuzhiyun {
2041*4882a593Smuzhiyun 	unsigned long old, new;
2042*4882a593Smuzhiyun 	do {
2043*4882a593Smuzhiyun 		old = *flags;
2044*4882a593Smuzhiyun 		new = old & ~DRBD_DEVICE_WORK_MASK;
2045*4882a593Smuzhiyun 	} while (cmpxchg(flags, old, new) != old);
2046*4882a593Smuzhiyun 	return old & DRBD_DEVICE_WORK_MASK;
2047*4882a593Smuzhiyun }
2048*4882a593Smuzhiyun 
do_unqueued_work(struct drbd_connection * connection)2049*4882a593Smuzhiyun static void do_unqueued_work(struct drbd_connection *connection)
2050*4882a593Smuzhiyun {
2051*4882a593Smuzhiyun 	struct drbd_peer_device *peer_device;
2052*4882a593Smuzhiyun 	int vnr;
2053*4882a593Smuzhiyun 
2054*4882a593Smuzhiyun 	rcu_read_lock();
2055*4882a593Smuzhiyun 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2056*4882a593Smuzhiyun 		struct drbd_device *device = peer_device->device;
2057*4882a593Smuzhiyun 		unsigned long todo = get_work_bits(&device->flags);
2058*4882a593Smuzhiyun 		if (!todo)
2059*4882a593Smuzhiyun 			continue;
2060*4882a593Smuzhiyun 
2061*4882a593Smuzhiyun 		kref_get(&device->kref);
2062*4882a593Smuzhiyun 		rcu_read_unlock();
2063*4882a593Smuzhiyun 		do_device_work(device, todo);
2064*4882a593Smuzhiyun 		kref_put(&device->kref, drbd_destroy_device);
2065*4882a593Smuzhiyun 		rcu_read_lock();
2066*4882a593Smuzhiyun 	}
2067*4882a593Smuzhiyun 	rcu_read_unlock();
2068*4882a593Smuzhiyun }
2069*4882a593Smuzhiyun 
dequeue_work_batch(struct drbd_work_queue * queue,struct list_head * work_list)2070*4882a593Smuzhiyun static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
2071*4882a593Smuzhiyun {
2072*4882a593Smuzhiyun 	spin_lock_irq(&queue->q_lock);
2073*4882a593Smuzhiyun 	list_splice_tail_init(&queue->q, work_list);
2074*4882a593Smuzhiyun 	spin_unlock_irq(&queue->q_lock);
2075*4882a593Smuzhiyun 	return !list_empty(work_list);
2076*4882a593Smuzhiyun }
2077*4882a593Smuzhiyun 
wait_for_work(struct drbd_connection * connection,struct list_head * work_list)2078*4882a593Smuzhiyun static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
2079*4882a593Smuzhiyun {
2080*4882a593Smuzhiyun 	DEFINE_WAIT(wait);
2081*4882a593Smuzhiyun 	struct net_conf *nc;
2082*4882a593Smuzhiyun 	int uncork, cork;
2083*4882a593Smuzhiyun 
2084*4882a593Smuzhiyun 	dequeue_work_batch(&connection->sender_work, work_list);
2085*4882a593Smuzhiyun 	if (!list_empty(work_list))
2086*4882a593Smuzhiyun 		return;
2087*4882a593Smuzhiyun 
2088*4882a593Smuzhiyun 	/* Still nothing to do?
2089*4882a593Smuzhiyun 	 * Maybe we still need to close the current epoch,
2090*4882a593Smuzhiyun 	 * even if no new requests are queued yet.
2091*4882a593Smuzhiyun 	 *
2092*4882a593Smuzhiyun 	 * Also, poke TCP, just in case.
2093*4882a593Smuzhiyun 	 * Then wait for new work (or signal). */
2094*4882a593Smuzhiyun 	rcu_read_lock();
2095*4882a593Smuzhiyun 	nc = rcu_dereference(connection->net_conf);
2096*4882a593Smuzhiyun 	uncork = nc ? nc->tcp_cork : 0;
2097*4882a593Smuzhiyun 	rcu_read_unlock();
2098*4882a593Smuzhiyun 	if (uncork) {
2099*4882a593Smuzhiyun 		mutex_lock(&connection->data.mutex);
2100*4882a593Smuzhiyun 		if (connection->data.socket)
2101*4882a593Smuzhiyun 			tcp_sock_set_cork(connection->data.socket->sk, false);
2102*4882a593Smuzhiyun 		mutex_unlock(&connection->data.mutex);
2103*4882a593Smuzhiyun 	}
2104*4882a593Smuzhiyun 
2105*4882a593Smuzhiyun 	for (;;) {
2106*4882a593Smuzhiyun 		int send_barrier;
2107*4882a593Smuzhiyun 		prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
2108*4882a593Smuzhiyun 		spin_lock_irq(&connection->resource->req_lock);
2109*4882a593Smuzhiyun 		spin_lock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
2110*4882a593Smuzhiyun 		if (!list_empty(&connection->sender_work.q))
2111*4882a593Smuzhiyun 			list_splice_tail_init(&connection->sender_work.q, work_list);
2112*4882a593Smuzhiyun 		spin_unlock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
2113*4882a593Smuzhiyun 		if (!list_empty(work_list) || signal_pending(current)) {
2114*4882a593Smuzhiyun 			spin_unlock_irq(&connection->resource->req_lock);
2115*4882a593Smuzhiyun 			break;
2116*4882a593Smuzhiyun 		}
2117*4882a593Smuzhiyun 
2118*4882a593Smuzhiyun 		/* We found nothing new to do, no to-be-communicated request,
2119*4882a593Smuzhiyun 		 * no other work item.  We may still need to close the last
2120*4882a593Smuzhiyun 		 * epoch.  Next incoming request epoch will be connection ->
2121*4882a593Smuzhiyun 		 * current transfer log epoch number.  If that is different
2122*4882a593Smuzhiyun 		 * from the epoch of the last request we communicated, it is
2123*4882a593Smuzhiyun 		 * safe to send the epoch separating barrier now.
2124*4882a593Smuzhiyun 		 */
2125*4882a593Smuzhiyun 		send_barrier =
2126*4882a593Smuzhiyun 			atomic_read(&connection->current_tle_nr) !=
2127*4882a593Smuzhiyun 			connection->send.current_epoch_nr;
2128*4882a593Smuzhiyun 		spin_unlock_irq(&connection->resource->req_lock);
2129*4882a593Smuzhiyun 
2130*4882a593Smuzhiyun 		if (send_barrier)
2131*4882a593Smuzhiyun 			maybe_send_barrier(connection,
2132*4882a593Smuzhiyun 					connection->send.current_epoch_nr + 1);
2133*4882a593Smuzhiyun 
2134*4882a593Smuzhiyun 		if (test_bit(DEVICE_WORK_PENDING, &connection->flags))
2135*4882a593Smuzhiyun 			break;
2136*4882a593Smuzhiyun 
2137*4882a593Smuzhiyun 		/* drbd_send() may have called flush_signals() */
2138*4882a593Smuzhiyun 		if (get_t_state(&connection->worker) != RUNNING)
2139*4882a593Smuzhiyun 			break;
2140*4882a593Smuzhiyun 
2141*4882a593Smuzhiyun 		schedule();
2142*4882a593Smuzhiyun 		/* may be woken up for other things but new work, too,
2143*4882a593Smuzhiyun 		 * e.g. if the current epoch got closed.
2144*4882a593Smuzhiyun 		 * In which case we send the barrier above. */
2145*4882a593Smuzhiyun 	}
2146*4882a593Smuzhiyun 	finish_wait(&connection->sender_work.q_wait, &wait);
2147*4882a593Smuzhiyun 
2148*4882a593Smuzhiyun 	/* someone may have changed the config while we have been waiting above. */
2149*4882a593Smuzhiyun 	rcu_read_lock();
2150*4882a593Smuzhiyun 	nc = rcu_dereference(connection->net_conf);
2151*4882a593Smuzhiyun 	cork = nc ? nc->tcp_cork : 0;
2152*4882a593Smuzhiyun 	rcu_read_unlock();
2153*4882a593Smuzhiyun 	mutex_lock(&connection->data.mutex);
2154*4882a593Smuzhiyun 	if (connection->data.socket) {
2155*4882a593Smuzhiyun 		if (cork)
2156*4882a593Smuzhiyun 			tcp_sock_set_cork(connection->data.socket->sk, true);
2157*4882a593Smuzhiyun 		else if (!uncork)
2158*4882a593Smuzhiyun 			tcp_sock_set_cork(connection->data.socket->sk, false);
2159*4882a593Smuzhiyun 	}
2160*4882a593Smuzhiyun 	mutex_unlock(&connection->data.mutex);
2161*4882a593Smuzhiyun }
2162*4882a593Smuzhiyun 
drbd_worker(struct drbd_thread * thi)2163*4882a593Smuzhiyun int drbd_worker(struct drbd_thread *thi)
2164*4882a593Smuzhiyun {
2165*4882a593Smuzhiyun 	struct drbd_connection *connection = thi->connection;
2166*4882a593Smuzhiyun 	struct drbd_work *w = NULL;
2167*4882a593Smuzhiyun 	struct drbd_peer_device *peer_device;
2168*4882a593Smuzhiyun 	LIST_HEAD(work_list);
2169*4882a593Smuzhiyun 	int vnr;
2170*4882a593Smuzhiyun 
2171*4882a593Smuzhiyun 	while (get_t_state(thi) == RUNNING) {
2172*4882a593Smuzhiyun 		drbd_thread_current_set_cpu(thi);
2173*4882a593Smuzhiyun 
2174*4882a593Smuzhiyun 		if (list_empty(&work_list)) {
2175*4882a593Smuzhiyun 			update_worker_timing_details(connection, wait_for_work);
2176*4882a593Smuzhiyun 			wait_for_work(connection, &work_list);
2177*4882a593Smuzhiyun 		}
2178*4882a593Smuzhiyun 
2179*4882a593Smuzhiyun 		if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2180*4882a593Smuzhiyun 			update_worker_timing_details(connection, do_unqueued_work);
2181*4882a593Smuzhiyun 			do_unqueued_work(connection);
2182*4882a593Smuzhiyun 		}
2183*4882a593Smuzhiyun 
2184*4882a593Smuzhiyun 		if (signal_pending(current)) {
2185*4882a593Smuzhiyun 			flush_signals(current);
2186*4882a593Smuzhiyun 			if (get_t_state(thi) == RUNNING) {
2187*4882a593Smuzhiyun 				drbd_warn(connection, "Worker got an unexpected signal\n");
2188*4882a593Smuzhiyun 				continue;
2189*4882a593Smuzhiyun 			}
2190*4882a593Smuzhiyun 			break;
2191*4882a593Smuzhiyun 		}
2192*4882a593Smuzhiyun 
2193*4882a593Smuzhiyun 		if (get_t_state(thi) != RUNNING)
2194*4882a593Smuzhiyun 			break;
2195*4882a593Smuzhiyun 
2196*4882a593Smuzhiyun 		if (!list_empty(&work_list)) {
2197*4882a593Smuzhiyun 			w = list_first_entry(&work_list, struct drbd_work, list);
2198*4882a593Smuzhiyun 			list_del_init(&w->list);
2199*4882a593Smuzhiyun 			update_worker_timing_details(connection, w->cb);
2200*4882a593Smuzhiyun 			if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
2201*4882a593Smuzhiyun 				continue;
2202*4882a593Smuzhiyun 			if (connection->cstate >= C_WF_REPORT_PARAMS)
2203*4882a593Smuzhiyun 				conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
2204*4882a593Smuzhiyun 		}
2205*4882a593Smuzhiyun 	}
2206*4882a593Smuzhiyun 
2207*4882a593Smuzhiyun 	do {
2208*4882a593Smuzhiyun 		if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2209*4882a593Smuzhiyun 			update_worker_timing_details(connection, do_unqueued_work);
2210*4882a593Smuzhiyun 			do_unqueued_work(connection);
2211*4882a593Smuzhiyun 		}
2212*4882a593Smuzhiyun 		if (!list_empty(&work_list)) {
2213*4882a593Smuzhiyun 			w = list_first_entry(&work_list, struct drbd_work, list);
2214*4882a593Smuzhiyun 			list_del_init(&w->list);
2215*4882a593Smuzhiyun 			update_worker_timing_details(connection, w->cb);
2216*4882a593Smuzhiyun 			w->cb(w, 1);
2217*4882a593Smuzhiyun 		} else
2218*4882a593Smuzhiyun 			dequeue_work_batch(&connection->sender_work, &work_list);
2219*4882a593Smuzhiyun 	} while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags));
2220*4882a593Smuzhiyun 
2221*4882a593Smuzhiyun 	rcu_read_lock();
2222*4882a593Smuzhiyun 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2223*4882a593Smuzhiyun 		struct drbd_device *device = peer_device->device;
2224*4882a593Smuzhiyun 		D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
2225*4882a593Smuzhiyun 		kref_get(&device->kref);
2226*4882a593Smuzhiyun 		rcu_read_unlock();
2227*4882a593Smuzhiyun 		drbd_device_cleanup(device);
2228*4882a593Smuzhiyun 		kref_put(&device->kref, drbd_destroy_device);
2229*4882a593Smuzhiyun 		rcu_read_lock();
2230*4882a593Smuzhiyun 	}
2231*4882a593Smuzhiyun 	rcu_read_unlock();
2232*4882a593Smuzhiyun 
2233*4882a593Smuzhiyun 	return 0;
2234*4882a593Smuzhiyun }
2235