1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun drbd.c
4*4882a593Smuzhiyun
5*4882a593Smuzhiyun This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
8*4882a593Smuzhiyun Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
9*4882a593Smuzhiyun Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
12*4882a593Smuzhiyun from Logicworks, Inc. for making SDP replication support possible.
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun */
16*4882a593Smuzhiyun
17*4882a593Smuzhiyun #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun #include <linux/module.h>
20*4882a593Smuzhiyun #include <linux/jiffies.h>
21*4882a593Smuzhiyun #include <linux/drbd.h>
22*4882a593Smuzhiyun #include <linux/uaccess.h>
23*4882a593Smuzhiyun #include <asm/types.h>
24*4882a593Smuzhiyun #include <net/sock.h>
25*4882a593Smuzhiyun #include <linux/ctype.h>
26*4882a593Smuzhiyun #include <linux/mutex.h>
27*4882a593Smuzhiyun #include <linux/fs.h>
28*4882a593Smuzhiyun #include <linux/file.h>
29*4882a593Smuzhiyun #include <linux/proc_fs.h>
30*4882a593Smuzhiyun #include <linux/init.h>
31*4882a593Smuzhiyun #include <linux/mm.h>
32*4882a593Smuzhiyun #include <linux/memcontrol.h>
33*4882a593Smuzhiyun #include <linux/mm_inline.h>
34*4882a593Smuzhiyun #include <linux/slab.h>
35*4882a593Smuzhiyun #include <linux/random.h>
36*4882a593Smuzhiyun #include <linux/reboot.h>
37*4882a593Smuzhiyun #include <linux/notifier.h>
38*4882a593Smuzhiyun #include <linux/kthread.h>
39*4882a593Smuzhiyun #include <linux/workqueue.h>
40*4882a593Smuzhiyun #define __KERNEL_SYSCALLS__
41*4882a593Smuzhiyun #include <linux/unistd.h>
42*4882a593Smuzhiyun #include <linux/vmalloc.h>
43*4882a593Smuzhiyun #include <linux/sched/signal.h>
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun #include <linux/drbd_limits.h>
46*4882a593Smuzhiyun #include "drbd_int.h"
47*4882a593Smuzhiyun #include "drbd_protocol.h"
48*4882a593Smuzhiyun #include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
49*4882a593Smuzhiyun #include "drbd_vli.h"
50*4882a593Smuzhiyun #include "drbd_debugfs.h"
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun static DEFINE_MUTEX(drbd_main_mutex);
53*4882a593Smuzhiyun static int drbd_open(struct block_device *bdev, fmode_t mode);
54*4882a593Smuzhiyun static void drbd_release(struct gendisk *gd, fmode_t mode);
55*4882a593Smuzhiyun static void md_sync_timer_fn(struct timer_list *t);
56*4882a593Smuzhiyun static int w_bitmap_io(struct drbd_work *w, int unused);
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
59*4882a593Smuzhiyun "Lars Ellenberg <lars@linbit.com>");
60*4882a593Smuzhiyun MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
61*4882a593Smuzhiyun MODULE_VERSION(REL_VERSION);
62*4882a593Smuzhiyun MODULE_LICENSE("GPL");
63*4882a593Smuzhiyun MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices ("
64*4882a593Smuzhiyun __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
65*4882a593Smuzhiyun MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun #include <linux/moduleparam.h>
68*4882a593Smuzhiyun /* thanks to these macros, if compiled into the kernel (not-module),
69*4882a593Smuzhiyun * these become boot parameters (e.g., drbd.minor_count) */
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun #ifdef CONFIG_DRBD_FAULT_INJECTION
72*4882a593Smuzhiyun int drbd_enable_faults;
73*4882a593Smuzhiyun int drbd_fault_rate;
74*4882a593Smuzhiyun static int drbd_fault_count;
75*4882a593Smuzhiyun static int drbd_fault_devs;
76*4882a593Smuzhiyun /* bitmap of enabled faults */
77*4882a593Smuzhiyun module_param_named(enable_faults, drbd_enable_faults, int, 0664);
78*4882a593Smuzhiyun /* fault rate % value - applies to all enabled faults */
79*4882a593Smuzhiyun module_param_named(fault_rate, drbd_fault_rate, int, 0664);
80*4882a593Smuzhiyun /* count of faults inserted */
81*4882a593Smuzhiyun module_param_named(fault_count, drbd_fault_count, int, 0664);
82*4882a593Smuzhiyun /* bitmap of devices to insert faults on */
83*4882a593Smuzhiyun module_param_named(fault_devs, drbd_fault_devs, int, 0644);
84*4882a593Smuzhiyun #endif
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun /* module parameters we can keep static */
87*4882a593Smuzhiyun static bool drbd_allow_oos; /* allow_open_on_secondary */
88*4882a593Smuzhiyun static bool drbd_disable_sendpage;
89*4882a593Smuzhiyun MODULE_PARM_DESC(allow_oos, "DONT USE!");
90*4882a593Smuzhiyun module_param_named(allow_oos, drbd_allow_oos, bool, 0);
91*4882a593Smuzhiyun module_param_named(disable_sendpage, drbd_disable_sendpage, bool, 0644);
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun /* module parameters we share */
94*4882a593Smuzhiyun int drbd_proc_details; /* Detail level in proc drbd*/
95*4882a593Smuzhiyun module_param_named(proc_details, drbd_proc_details, int, 0644);
96*4882a593Smuzhiyun /* module parameters shared with defaults */
97*4882a593Smuzhiyun unsigned int drbd_minor_count = DRBD_MINOR_COUNT_DEF;
98*4882a593Smuzhiyun /* Module parameter for setting the user mode helper program
99*4882a593Smuzhiyun * to run. Default is /sbin/drbdadm */
100*4882a593Smuzhiyun char drbd_usermode_helper[80] = "/sbin/drbdadm";
101*4882a593Smuzhiyun module_param_named(minor_count, drbd_minor_count, uint, 0444);
102*4882a593Smuzhiyun module_param_string(usermode_helper, drbd_usermode_helper, sizeof(drbd_usermode_helper), 0644);
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun /* in 2.6.x, our device mapping and config info contains our virtual gendisks
105*4882a593Smuzhiyun * as member "struct gendisk *vdisk;"
106*4882a593Smuzhiyun */
107*4882a593Smuzhiyun struct idr drbd_devices;
108*4882a593Smuzhiyun struct list_head drbd_resources;
109*4882a593Smuzhiyun struct mutex resources_mutex;
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun struct kmem_cache *drbd_request_cache;
112*4882a593Smuzhiyun struct kmem_cache *drbd_ee_cache; /* peer requests */
113*4882a593Smuzhiyun struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
114*4882a593Smuzhiyun struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
115*4882a593Smuzhiyun mempool_t drbd_request_mempool;
116*4882a593Smuzhiyun mempool_t drbd_ee_mempool;
117*4882a593Smuzhiyun mempool_t drbd_md_io_page_pool;
118*4882a593Smuzhiyun struct bio_set drbd_md_io_bio_set;
119*4882a593Smuzhiyun struct bio_set drbd_io_bio_set;
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun /* I do not use a standard mempool, because:
122*4882a593Smuzhiyun 1) I want to hand out the pre-allocated objects first.
123*4882a593Smuzhiyun 2) I want to be able to interrupt sleeping allocation with a signal.
124*4882a593Smuzhiyun Note: This is a single linked list, the next pointer is the private
125*4882a593Smuzhiyun member of struct page.
126*4882a593Smuzhiyun */
127*4882a593Smuzhiyun struct page *drbd_pp_pool;
128*4882a593Smuzhiyun spinlock_t drbd_pp_lock;
129*4882a593Smuzhiyun int drbd_pp_vacant;
130*4882a593Smuzhiyun wait_queue_head_t drbd_pp_wait;
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun static const struct block_device_operations drbd_ops = {
135*4882a593Smuzhiyun .owner = THIS_MODULE,
136*4882a593Smuzhiyun .submit_bio = drbd_submit_bio,
137*4882a593Smuzhiyun .open = drbd_open,
138*4882a593Smuzhiyun .release = drbd_release,
139*4882a593Smuzhiyun };
140*4882a593Smuzhiyun
bio_alloc_drbd(gfp_t gfp_mask)141*4882a593Smuzhiyun struct bio *bio_alloc_drbd(gfp_t gfp_mask)
142*4882a593Smuzhiyun {
143*4882a593Smuzhiyun struct bio *bio;
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun if (!bioset_initialized(&drbd_md_io_bio_set))
146*4882a593Smuzhiyun return bio_alloc(gfp_mask, 1);
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun bio = bio_alloc_bioset(gfp_mask, 1, &drbd_md_io_bio_set);
149*4882a593Smuzhiyun if (!bio)
150*4882a593Smuzhiyun return NULL;
151*4882a593Smuzhiyun return bio;
152*4882a593Smuzhiyun }
153*4882a593Smuzhiyun
154*4882a593Smuzhiyun #ifdef __CHECKER__
155*4882a593Smuzhiyun /* When checking with sparse, and this is an inline function, sparse will
156*4882a593Smuzhiyun give tons of false positives. When this is a real functions sparse works.
157*4882a593Smuzhiyun */
_get_ldev_if_state(struct drbd_device * device,enum drbd_disk_state mins)158*4882a593Smuzhiyun int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins)
159*4882a593Smuzhiyun {
160*4882a593Smuzhiyun int io_allowed;
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun atomic_inc(&device->local_cnt);
163*4882a593Smuzhiyun io_allowed = (device->state.disk >= mins);
164*4882a593Smuzhiyun if (!io_allowed) {
165*4882a593Smuzhiyun if (atomic_dec_and_test(&device->local_cnt))
166*4882a593Smuzhiyun wake_up(&device->misc_wait);
167*4882a593Smuzhiyun }
168*4882a593Smuzhiyun return io_allowed;
169*4882a593Smuzhiyun }
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun #endif
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun /**
174*4882a593Smuzhiyun * tl_release() - mark as BARRIER_ACKED all requests in the corresponding transfer log epoch
175*4882a593Smuzhiyun * @connection: DRBD connection.
176*4882a593Smuzhiyun * @barrier_nr: Expected identifier of the DRBD write barrier packet.
177*4882a593Smuzhiyun * @set_size: Expected number of requests before that barrier.
178*4882a593Smuzhiyun *
179*4882a593Smuzhiyun * In case the passed barrier_nr or set_size does not match the oldest
180*4882a593Smuzhiyun * epoch of not yet barrier-acked requests, this function will cause a
181*4882a593Smuzhiyun * termination of the connection.
182*4882a593Smuzhiyun */
tl_release(struct drbd_connection * connection,unsigned int barrier_nr,unsigned int set_size)183*4882a593Smuzhiyun void tl_release(struct drbd_connection *connection, unsigned int barrier_nr,
184*4882a593Smuzhiyun unsigned int set_size)
185*4882a593Smuzhiyun {
186*4882a593Smuzhiyun struct drbd_request *r;
187*4882a593Smuzhiyun struct drbd_request *req = NULL, *tmp = NULL;
188*4882a593Smuzhiyun int expect_epoch = 0;
189*4882a593Smuzhiyun int expect_size = 0;
190*4882a593Smuzhiyun
191*4882a593Smuzhiyun spin_lock_irq(&connection->resource->req_lock);
192*4882a593Smuzhiyun
193*4882a593Smuzhiyun /* find oldest not yet barrier-acked write request,
194*4882a593Smuzhiyun * count writes in its epoch. */
195*4882a593Smuzhiyun list_for_each_entry(r, &connection->transfer_log, tl_requests) {
196*4882a593Smuzhiyun const unsigned s = r->rq_state;
197*4882a593Smuzhiyun if (!req) {
198*4882a593Smuzhiyun if (!(s & RQ_WRITE))
199*4882a593Smuzhiyun continue;
200*4882a593Smuzhiyun if (!(s & RQ_NET_MASK))
201*4882a593Smuzhiyun continue;
202*4882a593Smuzhiyun if (s & RQ_NET_DONE)
203*4882a593Smuzhiyun continue;
204*4882a593Smuzhiyun req = r;
205*4882a593Smuzhiyun expect_epoch = req->epoch;
206*4882a593Smuzhiyun expect_size ++;
207*4882a593Smuzhiyun } else {
208*4882a593Smuzhiyun if (r->epoch != expect_epoch)
209*4882a593Smuzhiyun break;
210*4882a593Smuzhiyun if (!(s & RQ_WRITE))
211*4882a593Smuzhiyun continue;
212*4882a593Smuzhiyun /* if (s & RQ_DONE): not expected */
213*4882a593Smuzhiyun /* if (!(s & RQ_NET_MASK)): not expected */
214*4882a593Smuzhiyun expect_size++;
215*4882a593Smuzhiyun }
216*4882a593Smuzhiyun }
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun /* first some paranoia code */
219*4882a593Smuzhiyun if (req == NULL) {
220*4882a593Smuzhiyun drbd_err(connection, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
221*4882a593Smuzhiyun barrier_nr);
222*4882a593Smuzhiyun goto bail;
223*4882a593Smuzhiyun }
224*4882a593Smuzhiyun if (expect_epoch != barrier_nr) {
225*4882a593Smuzhiyun drbd_err(connection, "BAD! BarrierAck #%u received, expected #%u!\n",
226*4882a593Smuzhiyun barrier_nr, expect_epoch);
227*4882a593Smuzhiyun goto bail;
228*4882a593Smuzhiyun }
229*4882a593Smuzhiyun
230*4882a593Smuzhiyun if (expect_size != set_size) {
231*4882a593Smuzhiyun drbd_err(connection, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
232*4882a593Smuzhiyun barrier_nr, set_size, expect_size);
233*4882a593Smuzhiyun goto bail;
234*4882a593Smuzhiyun }
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun /* Clean up list of requests processed during current epoch. */
237*4882a593Smuzhiyun /* this extra list walk restart is paranoia,
238*4882a593Smuzhiyun * to catch requests being barrier-acked "unexpectedly".
239*4882a593Smuzhiyun * It usually should find the same req again, or some READ preceding it. */
240*4882a593Smuzhiyun list_for_each_entry(req, &connection->transfer_log, tl_requests)
241*4882a593Smuzhiyun if (req->epoch == expect_epoch) {
242*4882a593Smuzhiyun tmp = req;
243*4882a593Smuzhiyun break;
244*4882a593Smuzhiyun }
245*4882a593Smuzhiyun req = list_prepare_entry(tmp, &connection->transfer_log, tl_requests);
246*4882a593Smuzhiyun list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) {
247*4882a593Smuzhiyun if (req->epoch != expect_epoch)
248*4882a593Smuzhiyun break;
249*4882a593Smuzhiyun _req_mod(req, BARRIER_ACKED);
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun spin_unlock_irq(&connection->resource->req_lock);
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun return;
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun bail:
256*4882a593Smuzhiyun spin_unlock_irq(&connection->resource->req_lock);
257*4882a593Smuzhiyun conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
258*4882a593Smuzhiyun }
259*4882a593Smuzhiyun
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun /**
262*4882a593Smuzhiyun * _tl_restart() - Walks the transfer log, and applies an action to all requests
263*4882a593Smuzhiyun * @connection: DRBD connection to operate on.
264*4882a593Smuzhiyun * @what: The action/event to perform with all request objects
265*4882a593Smuzhiyun *
266*4882a593Smuzhiyun * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO,
267*4882a593Smuzhiyun * RESTART_FROZEN_DISK_IO.
268*4882a593Smuzhiyun */
269*4882a593Smuzhiyun /* must hold resource->req_lock */
_tl_restart(struct drbd_connection * connection,enum drbd_req_event what)270*4882a593Smuzhiyun void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what)
271*4882a593Smuzhiyun {
272*4882a593Smuzhiyun struct drbd_request *req, *r;
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests)
275*4882a593Smuzhiyun _req_mod(req, what);
276*4882a593Smuzhiyun }
277*4882a593Smuzhiyun
tl_restart(struct drbd_connection * connection,enum drbd_req_event what)278*4882a593Smuzhiyun void tl_restart(struct drbd_connection *connection, enum drbd_req_event what)
279*4882a593Smuzhiyun {
280*4882a593Smuzhiyun spin_lock_irq(&connection->resource->req_lock);
281*4882a593Smuzhiyun _tl_restart(connection, what);
282*4882a593Smuzhiyun spin_unlock_irq(&connection->resource->req_lock);
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun
285*4882a593Smuzhiyun /**
286*4882a593Smuzhiyun * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
287*4882a593Smuzhiyun * @device: DRBD device.
288*4882a593Smuzhiyun *
289*4882a593Smuzhiyun * This is called after the connection to the peer was lost. The storage covered
290*4882a593Smuzhiyun * by the requests on the transfer gets marked as our of sync. Called from the
291*4882a593Smuzhiyun * receiver thread and the worker thread.
292*4882a593Smuzhiyun */
tl_clear(struct drbd_connection * connection)293*4882a593Smuzhiyun void tl_clear(struct drbd_connection *connection)
294*4882a593Smuzhiyun {
295*4882a593Smuzhiyun tl_restart(connection, CONNECTION_LOST_WHILE_PENDING);
296*4882a593Smuzhiyun }
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun /**
299*4882a593Smuzhiyun * tl_abort_disk_io() - Abort disk I/O for all requests for a certain device in the TL
300*4882a593Smuzhiyun * @device: DRBD device.
301*4882a593Smuzhiyun */
tl_abort_disk_io(struct drbd_device * device)302*4882a593Smuzhiyun void tl_abort_disk_io(struct drbd_device *device)
303*4882a593Smuzhiyun {
304*4882a593Smuzhiyun struct drbd_connection *connection = first_peer_device(device)->connection;
305*4882a593Smuzhiyun struct drbd_request *req, *r;
306*4882a593Smuzhiyun
307*4882a593Smuzhiyun spin_lock_irq(&connection->resource->req_lock);
308*4882a593Smuzhiyun list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests) {
309*4882a593Smuzhiyun if (!(req->rq_state & RQ_LOCAL_PENDING))
310*4882a593Smuzhiyun continue;
311*4882a593Smuzhiyun if (req->device != device)
312*4882a593Smuzhiyun continue;
313*4882a593Smuzhiyun _req_mod(req, ABORT_DISK_IO);
314*4882a593Smuzhiyun }
315*4882a593Smuzhiyun spin_unlock_irq(&connection->resource->req_lock);
316*4882a593Smuzhiyun }
317*4882a593Smuzhiyun
drbd_thread_setup(void * arg)318*4882a593Smuzhiyun static int drbd_thread_setup(void *arg)
319*4882a593Smuzhiyun {
320*4882a593Smuzhiyun struct drbd_thread *thi = (struct drbd_thread *) arg;
321*4882a593Smuzhiyun struct drbd_resource *resource = thi->resource;
322*4882a593Smuzhiyun unsigned long flags;
323*4882a593Smuzhiyun int retval;
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s",
326*4882a593Smuzhiyun thi->name[0],
327*4882a593Smuzhiyun resource->name);
328*4882a593Smuzhiyun
329*4882a593Smuzhiyun allow_kernel_signal(DRBD_SIGKILL);
330*4882a593Smuzhiyun allow_kernel_signal(SIGXCPU);
331*4882a593Smuzhiyun restart:
332*4882a593Smuzhiyun retval = thi->function(thi);
333*4882a593Smuzhiyun
334*4882a593Smuzhiyun spin_lock_irqsave(&thi->t_lock, flags);
335*4882a593Smuzhiyun
336*4882a593Smuzhiyun /* if the receiver has been "EXITING", the last thing it did
337*4882a593Smuzhiyun * was set the conn state to "StandAlone",
338*4882a593Smuzhiyun * if now a re-connect request comes in, conn state goes C_UNCONNECTED,
339*4882a593Smuzhiyun * and receiver thread will be "started".
340*4882a593Smuzhiyun * drbd_thread_start needs to set "RESTARTING" in that case.
341*4882a593Smuzhiyun * t_state check and assignment needs to be within the same spinlock,
342*4882a593Smuzhiyun * so either thread_start sees EXITING, and can remap to RESTARTING,
343*4882a593Smuzhiyun * or thread_start see NONE, and can proceed as normal.
344*4882a593Smuzhiyun */
345*4882a593Smuzhiyun
346*4882a593Smuzhiyun if (thi->t_state == RESTARTING) {
347*4882a593Smuzhiyun drbd_info(resource, "Restarting %s thread\n", thi->name);
348*4882a593Smuzhiyun thi->t_state = RUNNING;
349*4882a593Smuzhiyun spin_unlock_irqrestore(&thi->t_lock, flags);
350*4882a593Smuzhiyun goto restart;
351*4882a593Smuzhiyun }
352*4882a593Smuzhiyun
353*4882a593Smuzhiyun thi->task = NULL;
354*4882a593Smuzhiyun thi->t_state = NONE;
355*4882a593Smuzhiyun smp_mb();
356*4882a593Smuzhiyun complete_all(&thi->stop);
357*4882a593Smuzhiyun spin_unlock_irqrestore(&thi->t_lock, flags);
358*4882a593Smuzhiyun
359*4882a593Smuzhiyun drbd_info(resource, "Terminating %s\n", current->comm);
360*4882a593Smuzhiyun
361*4882a593Smuzhiyun /* Release mod reference taken when thread was started */
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun if (thi->connection)
364*4882a593Smuzhiyun kref_put(&thi->connection->kref, drbd_destroy_connection);
365*4882a593Smuzhiyun kref_put(&resource->kref, drbd_destroy_resource);
366*4882a593Smuzhiyun module_put(THIS_MODULE);
367*4882a593Smuzhiyun return retval;
368*4882a593Smuzhiyun }
369*4882a593Smuzhiyun
drbd_thread_init(struct drbd_resource * resource,struct drbd_thread * thi,int (* func)(struct drbd_thread *),const char * name)370*4882a593Smuzhiyun static void drbd_thread_init(struct drbd_resource *resource, struct drbd_thread *thi,
371*4882a593Smuzhiyun int (*func) (struct drbd_thread *), const char *name)
372*4882a593Smuzhiyun {
373*4882a593Smuzhiyun spin_lock_init(&thi->t_lock);
374*4882a593Smuzhiyun thi->task = NULL;
375*4882a593Smuzhiyun thi->t_state = NONE;
376*4882a593Smuzhiyun thi->function = func;
377*4882a593Smuzhiyun thi->resource = resource;
378*4882a593Smuzhiyun thi->connection = NULL;
379*4882a593Smuzhiyun thi->name = name;
380*4882a593Smuzhiyun }
381*4882a593Smuzhiyun
drbd_thread_start(struct drbd_thread * thi)382*4882a593Smuzhiyun int drbd_thread_start(struct drbd_thread *thi)
383*4882a593Smuzhiyun {
384*4882a593Smuzhiyun struct drbd_resource *resource = thi->resource;
385*4882a593Smuzhiyun struct task_struct *nt;
386*4882a593Smuzhiyun unsigned long flags;
387*4882a593Smuzhiyun
388*4882a593Smuzhiyun /* is used from state engine doing drbd_thread_stop_nowait,
389*4882a593Smuzhiyun * while holding the req lock irqsave */
390*4882a593Smuzhiyun spin_lock_irqsave(&thi->t_lock, flags);
391*4882a593Smuzhiyun
392*4882a593Smuzhiyun switch (thi->t_state) {
393*4882a593Smuzhiyun case NONE:
394*4882a593Smuzhiyun drbd_info(resource, "Starting %s thread (from %s [%d])\n",
395*4882a593Smuzhiyun thi->name, current->comm, current->pid);
396*4882a593Smuzhiyun
397*4882a593Smuzhiyun /* Get ref on module for thread - this is released when thread exits */
398*4882a593Smuzhiyun if (!try_module_get(THIS_MODULE)) {
399*4882a593Smuzhiyun drbd_err(resource, "Failed to get module reference in drbd_thread_start\n");
400*4882a593Smuzhiyun spin_unlock_irqrestore(&thi->t_lock, flags);
401*4882a593Smuzhiyun return false;
402*4882a593Smuzhiyun }
403*4882a593Smuzhiyun
404*4882a593Smuzhiyun kref_get(&resource->kref);
405*4882a593Smuzhiyun if (thi->connection)
406*4882a593Smuzhiyun kref_get(&thi->connection->kref);
407*4882a593Smuzhiyun
408*4882a593Smuzhiyun init_completion(&thi->stop);
409*4882a593Smuzhiyun thi->reset_cpu_mask = 1;
410*4882a593Smuzhiyun thi->t_state = RUNNING;
411*4882a593Smuzhiyun spin_unlock_irqrestore(&thi->t_lock, flags);
412*4882a593Smuzhiyun flush_signals(current); /* otherw. may get -ERESTARTNOINTR */
413*4882a593Smuzhiyun
414*4882a593Smuzhiyun nt = kthread_create(drbd_thread_setup, (void *) thi,
415*4882a593Smuzhiyun "drbd_%c_%s", thi->name[0], thi->resource->name);
416*4882a593Smuzhiyun
417*4882a593Smuzhiyun if (IS_ERR(nt)) {
418*4882a593Smuzhiyun drbd_err(resource, "Couldn't start thread\n");
419*4882a593Smuzhiyun
420*4882a593Smuzhiyun if (thi->connection)
421*4882a593Smuzhiyun kref_put(&thi->connection->kref, drbd_destroy_connection);
422*4882a593Smuzhiyun kref_put(&resource->kref, drbd_destroy_resource);
423*4882a593Smuzhiyun module_put(THIS_MODULE);
424*4882a593Smuzhiyun return false;
425*4882a593Smuzhiyun }
426*4882a593Smuzhiyun spin_lock_irqsave(&thi->t_lock, flags);
427*4882a593Smuzhiyun thi->task = nt;
428*4882a593Smuzhiyun thi->t_state = RUNNING;
429*4882a593Smuzhiyun spin_unlock_irqrestore(&thi->t_lock, flags);
430*4882a593Smuzhiyun wake_up_process(nt);
431*4882a593Smuzhiyun break;
432*4882a593Smuzhiyun case EXITING:
433*4882a593Smuzhiyun thi->t_state = RESTARTING;
434*4882a593Smuzhiyun drbd_info(resource, "Restarting %s thread (from %s [%d])\n",
435*4882a593Smuzhiyun thi->name, current->comm, current->pid);
436*4882a593Smuzhiyun fallthrough;
437*4882a593Smuzhiyun case RUNNING:
438*4882a593Smuzhiyun case RESTARTING:
439*4882a593Smuzhiyun default:
440*4882a593Smuzhiyun spin_unlock_irqrestore(&thi->t_lock, flags);
441*4882a593Smuzhiyun break;
442*4882a593Smuzhiyun }
443*4882a593Smuzhiyun
444*4882a593Smuzhiyun return true;
445*4882a593Smuzhiyun }
446*4882a593Smuzhiyun
447*4882a593Smuzhiyun
_drbd_thread_stop(struct drbd_thread * thi,int restart,int wait)448*4882a593Smuzhiyun void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
449*4882a593Smuzhiyun {
450*4882a593Smuzhiyun unsigned long flags;
451*4882a593Smuzhiyun
452*4882a593Smuzhiyun enum drbd_thread_state ns = restart ? RESTARTING : EXITING;
453*4882a593Smuzhiyun
454*4882a593Smuzhiyun /* may be called from state engine, holding the req lock irqsave */
455*4882a593Smuzhiyun spin_lock_irqsave(&thi->t_lock, flags);
456*4882a593Smuzhiyun
457*4882a593Smuzhiyun if (thi->t_state == NONE) {
458*4882a593Smuzhiyun spin_unlock_irqrestore(&thi->t_lock, flags);
459*4882a593Smuzhiyun if (restart)
460*4882a593Smuzhiyun drbd_thread_start(thi);
461*4882a593Smuzhiyun return;
462*4882a593Smuzhiyun }
463*4882a593Smuzhiyun
464*4882a593Smuzhiyun if (thi->t_state != ns) {
465*4882a593Smuzhiyun if (thi->task == NULL) {
466*4882a593Smuzhiyun spin_unlock_irqrestore(&thi->t_lock, flags);
467*4882a593Smuzhiyun return;
468*4882a593Smuzhiyun }
469*4882a593Smuzhiyun
470*4882a593Smuzhiyun thi->t_state = ns;
471*4882a593Smuzhiyun smp_mb();
472*4882a593Smuzhiyun init_completion(&thi->stop);
473*4882a593Smuzhiyun if (thi->task != current)
474*4882a593Smuzhiyun send_sig(DRBD_SIGKILL, thi->task, 1);
475*4882a593Smuzhiyun }
476*4882a593Smuzhiyun
477*4882a593Smuzhiyun spin_unlock_irqrestore(&thi->t_lock, flags);
478*4882a593Smuzhiyun
479*4882a593Smuzhiyun if (wait)
480*4882a593Smuzhiyun wait_for_completion(&thi->stop);
481*4882a593Smuzhiyun }
482*4882a593Smuzhiyun
conn_lowest_minor(struct drbd_connection * connection)483*4882a593Smuzhiyun int conn_lowest_minor(struct drbd_connection *connection)
484*4882a593Smuzhiyun {
485*4882a593Smuzhiyun struct drbd_peer_device *peer_device;
486*4882a593Smuzhiyun int vnr = 0, minor = -1;
487*4882a593Smuzhiyun
488*4882a593Smuzhiyun rcu_read_lock();
489*4882a593Smuzhiyun peer_device = idr_get_next(&connection->peer_devices, &vnr);
490*4882a593Smuzhiyun if (peer_device)
491*4882a593Smuzhiyun minor = device_to_minor(peer_device->device);
492*4882a593Smuzhiyun rcu_read_unlock();
493*4882a593Smuzhiyun
494*4882a593Smuzhiyun return minor;
495*4882a593Smuzhiyun }
496*4882a593Smuzhiyun
497*4882a593Smuzhiyun #ifdef CONFIG_SMP
498*4882a593Smuzhiyun /**
499*4882a593Smuzhiyun * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
500*4882a593Smuzhiyun *
501*4882a593Smuzhiyun * Forces all threads of a resource onto the same CPU. This is beneficial for
502*4882a593Smuzhiyun * DRBD's performance. May be overwritten by user's configuration.
503*4882a593Smuzhiyun */
drbd_calc_cpu_mask(cpumask_var_t * cpu_mask)504*4882a593Smuzhiyun static void drbd_calc_cpu_mask(cpumask_var_t *cpu_mask)
505*4882a593Smuzhiyun {
506*4882a593Smuzhiyun unsigned int *resources_per_cpu, min_index = ~0;
507*4882a593Smuzhiyun
508*4882a593Smuzhiyun resources_per_cpu = kcalloc(nr_cpu_ids, sizeof(*resources_per_cpu),
509*4882a593Smuzhiyun GFP_KERNEL);
510*4882a593Smuzhiyun if (resources_per_cpu) {
511*4882a593Smuzhiyun struct drbd_resource *resource;
512*4882a593Smuzhiyun unsigned int cpu, min = ~0;
513*4882a593Smuzhiyun
514*4882a593Smuzhiyun rcu_read_lock();
515*4882a593Smuzhiyun for_each_resource_rcu(resource, &drbd_resources) {
516*4882a593Smuzhiyun for_each_cpu(cpu, resource->cpu_mask)
517*4882a593Smuzhiyun resources_per_cpu[cpu]++;
518*4882a593Smuzhiyun }
519*4882a593Smuzhiyun rcu_read_unlock();
520*4882a593Smuzhiyun for_each_online_cpu(cpu) {
521*4882a593Smuzhiyun if (resources_per_cpu[cpu] < min) {
522*4882a593Smuzhiyun min = resources_per_cpu[cpu];
523*4882a593Smuzhiyun min_index = cpu;
524*4882a593Smuzhiyun }
525*4882a593Smuzhiyun }
526*4882a593Smuzhiyun kfree(resources_per_cpu);
527*4882a593Smuzhiyun }
528*4882a593Smuzhiyun if (min_index == ~0) {
529*4882a593Smuzhiyun cpumask_setall(*cpu_mask);
530*4882a593Smuzhiyun return;
531*4882a593Smuzhiyun }
532*4882a593Smuzhiyun cpumask_set_cpu(min_index, *cpu_mask);
533*4882a593Smuzhiyun }
534*4882a593Smuzhiyun
535*4882a593Smuzhiyun /**
536*4882a593Smuzhiyun * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
537*4882a593Smuzhiyun * @device: DRBD device.
538*4882a593Smuzhiyun * @thi: drbd_thread object
539*4882a593Smuzhiyun *
540*4882a593Smuzhiyun * call in the "main loop" of _all_ threads, no need for any mutex, current won't die
541*4882a593Smuzhiyun * prematurely.
542*4882a593Smuzhiyun */
drbd_thread_current_set_cpu(struct drbd_thread * thi)543*4882a593Smuzhiyun void drbd_thread_current_set_cpu(struct drbd_thread *thi)
544*4882a593Smuzhiyun {
545*4882a593Smuzhiyun struct drbd_resource *resource = thi->resource;
546*4882a593Smuzhiyun struct task_struct *p = current;
547*4882a593Smuzhiyun
548*4882a593Smuzhiyun if (!thi->reset_cpu_mask)
549*4882a593Smuzhiyun return;
550*4882a593Smuzhiyun thi->reset_cpu_mask = 0;
551*4882a593Smuzhiyun set_cpus_allowed_ptr(p, resource->cpu_mask);
552*4882a593Smuzhiyun }
553*4882a593Smuzhiyun #else
554*4882a593Smuzhiyun #define drbd_calc_cpu_mask(A) ({})
555*4882a593Smuzhiyun #endif
556*4882a593Smuzhiyun
557*4882a593Smuzhiyun /**
558*4882a593Smuzhiyun * drbd_header_size - size of a packet header
559*4882a593Smuzhiyun *
560*4882a593Smuzhiyun * The header size is a multiple of 8, so any payload following the header is
561*4882a593Smuzhiyun * word aligned on 64-bit architectures. (The bitmap send and receive code
562*4882a593Smuzhiyun * relies on this.)
563*4882a593Smuzhiyun */
drbd_header_size(struct drbd_connection * connection)564*4882a593Smuzhiyun unsigned int drbd_header_size(struct drbd_connection *connection)
565*4882a593Smuzhiyun {
566*4882a593Smuzhiyun if (connection->agreed_pro_version >= 100) {
567*4882a593Smuzhiyun BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header100), 8));
568*4882a593Smuzhiyun return sizeof(struct p_header100);
569*4882a593Smuzhiyun } else {
570*4882a593Smuzhiyun BUILD_BUG_ON(sizeof(struct p_header80) !=
571*4882a593Smuzhiyun sizeof(struct p_header95));
572*4882a593Smuzhiyun BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8));
573*4882a593Smuzhiyun return sizeof(struct p_header80);
574*4882a593Smuzhiyun }
575*4882a593Smuzhiyun }
576*4882a593Smuzhiyun
prepare_header80(struct p_header80 * h,enum drbd_packet cmd,int size)577*4882a593Smuzhiyun static unsigned int prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size)
578*4882a593Smuzhiyun {
579*4882a593Smuzhiyun h->magic = cpu_to_be32(DRBD_MAGIC);
580*4882a593Smuzhiyun h->command = cpu_to_be16(cmd);
581*4882a593Smuzhiyun h->length = cpu_to_be16(size);
582*4882a593Smuzhiyun return sizeof(struct p_header80);
583*4882a593Smuzhiyun }
584*4882a593Smuzhiyun
prepare_header95(struct p_header95 * h,enum drbd_packet cmd,int size)585*4882a593Smuzhiyun static unsigned int prepare_header95(struct p_header95 *h, enum drbd_packet cmd, int size)
586*4882a593Smuzhiyun {
587*4882a593Smuzhiyun h->magic = cpu_to_be16(DRBD_MAGIC_BIG);
588*4882a593Smuzhiyun h->command = cpu_to_be16(cmd);
589*4882a593Smuzhiyun h->length = cpu_to_be32(size);
590*4882a593Smuzhiyun return sizeof(struct p_header95);
591*4882a593Smuzhiyun }
592*4882a593Smuzhiyun
prepare_header100(struct p_header100 * h,enum drbd_packet cmd,int size,int vnr)593*4882a593Smuzhiyun static unsigned int prepare_header100(struct p_header100 *h, enum drbd_packet cmd,
594*4882a593Smuzhiyun int size, int vnr)
595*4882a593Smuzhiyun {
596*4882a593Smuzhiyun h->magic = cpu_to_be32(DRBD_MAGIC_100);
597*4882a593Smuzhiyun h->volume = cpu_to_be16(vnr);
598*4882a593Smuzhiyun h->command = cpu_to_be16(cmd);
599*4882a593Smuzhiyun h->length = cpu_to_be32(size);
600*4882a593Smuzhiyun h->pad = 0;
601*4882a593Smuzhiyun return sizeof(struct p_header100);
602*4882a593Smuzhiyun }
603*4882a593Smuzhiyun
prepare_header(struct drbd_connection * connection,int vnr,void * buffer,enum drbd_packet cmd,int size)604*4882a593Smuzhiyun static unsigned int prepare_header(struct drbd_connection *connection, int vnr,
605*4882a593Smuzhiyun void *buffer, enum drbd_packet cmd, int size)
606*4882a593Smuzhiyun {
607*4882a593Smuzhiyun if (connection->agreed_pro_version >= 100)
608*4882a593Smuzhiyun return prepare_header100(buffer, cmd, size, vnr);
609*4882a593Smuzhiyun else if (connection->agreed_pro_version >= 95 &&
610*4882a593Smuzhiyun size > DRBD_MAX_SIZE_H80_PACKET)
611*4882a593Smuzhiyun return prepare_header95(buffer, cmd, size);
612*4882a593Smuzhiyun else
613*4882a593Smuzhiyun return prepare_header80(buffer, cmd, size);
614*4882a593Smuzhiyun }
615*4882a593Smuzhiyun
__conn_prepare_command(struct drbd_connection * connection,struct drbd_socket * sock)616*4882a593Smuzhiyun static void *__conn_prepare_command(struct drbd_connection *connection,
617*4882a593Smuzhiyun struct drbd_socket *sock)
618*4882a593Smuzhiyun {
619*4882a593Smuzhiyun if (!sock->socket)
620*4882a593Smuzhiyun return NULL;
621*4882a593Smuzhiyun return sock->sbuf + drbd_header_size(connection);
622*4882a593Smuzhiyun }
623*4882a593Smuzhiyun
conn_prepare_command(struct drbd_connection * connection,struct drbd_socket * sock)624*4882a593Smuzhiyun void *conn_prepare_command(struct drbd_connection *connection, struct drbd_socket *sock)
625*4882a593Smuzhiyun {
626*4882a593Smuzhiyun void *p;
627*4882a593Smuzhiyun
628*4882a593Smuzhiyun mutex_lock(&sock->mutex);
629*4882a593Smuzhiyun p = __conn_prepare_command(connection, sock);
630*4882a593Smuzhiyun if (!p)
631*4882a593Smuzhiyun mutex_unlock(&sock->mutex);
632*4882a593Smuzhiyun
633*4882a593Smuzhiyun return p;
634*4882a593Smuzhiyun }
635*4882a593Smuzhiyun
drbd_prepare_command(struct drbd_peer_device * peer_device,struct drbd_socket * sock)636*4882a593Smuzhiyun void *drbd_prepare_command(struct drbd_peer_device *peer_device, struct drbd_socket *sock)
637*4882a593Smuzhiyun {
638*4882a593Smuzhiyun return conn_prepare_command(peer_device->connection, sock);
639*4882a593Smuzhiyun }
640*4882a593Smuzhiyun
__send_command(struct drbd_connection * connection,int vnr,struct drbd_socket * sock,enum drbd_packet cmd,unsigned int header_size,void * data,unsigned int size)641*4882a593Smuzhiyun static int __send_command(struct drbd_connection *connection, int vnr,
642*4882a593Smuzhiyun struct drbd_socket *sock, enum drbd_packet cmd,
643*4882a593Smuzhiyun unsigned int header_size, void *data,
644*4882a593Smuzhiyun unsigned int size)
645*4882a593Smuzhiyun {
646*4882a593Smuzhiyun int msg_flags;
647*4882a593Smuzhiyun int err;
648*4882a593Smuzhiyun
649*4882a593Smuzhiyun /*
650*4882a593Smuzhiyun * Called with @data == NULL and the size of the data blocks in @size
651*4882a593Smuzhiyun * for commands that send data blocks. For those commands, omit the
652*4882a593Smuzhiyun * MSG_MORE flag: this will increase the likelihood that data blocks
653*4882a593Smuzhiyun * which are page aligned on the sender will end up page aligned on the
654*4882a593Smuzhiyun * receiver.
655*4882a593Smuzhiyun */
656*4882a593Smuzhiyun msg_flags = data ? MSG_MORE : 0;
657*4882a593Smuzhiyun
658*4882a593Smuzhiyun header_size += prepare_header(connection, vnr, sock->sbuf, cmd,
659*4882a593Smuzhiyun header_size + size);
660*4882a593Smuzhiyun err = drbd_send_all(connection, sock->socket, sock->sbuf, header_size,
661*4882a593Smuzhiyun msg_flags);
662*4882a593Smuzhiyun if (data && !err)
663*4882a593Smuzhiyun err = drbd_send_all(connection, sock->socket, data, size, 0);
664*4882a593Smuzhiyun /* DRBD protocol "pings" are latency critical.
665*4882a593Smuzhiyun * This is supposed to trigger tcp_push_pending_frames() */
666*4882a593Smuzhiyun if (!err && (cmd == P_PING || cmd == P_PING_ACK))
667*4882a593Smuzhiyun tcp_sock_set_nodelay(sock->socket->sk);
668*4882a593Smuzhiyun
669*4882a593Smuzhiyun return err;
670*4882a593Smuzhiyun }
671*4882a593Smuzhiyun
__conn_send_command(struct drbd_connection * connection,struct drbd_socket * sock,enum drbd_packet cmd,unsigned int header_size,void * data,unsigned int size)672*4882a593Smuzhiyun static int __conn_send_command(struct drbd_connection *connection, struct drbd_socket *sock,
673*4882a593Smuzhiyun enum drbd_packet cmd, unsigned int header_size,
674*4882a593Smuzhiyun void *data, unsigned int size)
675*4882a593Smuzhiyun {
676*4882a593Smuzhiyun return __send_command(connection, 0, sock, cmd, header_size, data, size);
677*4882a593Smuzhiyun }
678*4882a593Smuzhiyun
conn_send_command(struct drbd_connection * connection,struct drbd_socket * sock,enum drbd_packet cmd,unsigned int header_size,void * data,unsigned int size)679*4882a593Smuzhiyun int conn_send_command(struct drbd_connection *connection, struct drbd_socket *sock,
680*4882a593Smuzhiyun enum drbd_packet cmd, unsigned int header_size,
681*4882a593Smuzhiyun void *data, unsigned int size)
682*4882a593Smuzhiyun {
683*4882a593Smuzhiyun int err;
684*4882a593Smuzhiyun
685*4882a593Smuzhiyun err = __conn_send_command(connection, sock, cmd, header_size, data, size);
686*4882a593Smuzhiyun mutex_unlock(&sock->mutex);
687*4882a593Smuzhiyun return err;
688*4882a593Smuzhiyun }
689*4882a593Smuzhiyun
drbd_send_command(struct drbd_peer_device * peer_device,struct drbd_socket * sock,enum drbd_packet cmd,unsigned int header_size,void * data,unsigned int size)690*4882a593Smuzhiyun int drbd_send_command(struct drbd_peer_device *peer_device, struct drbd_socket *sock,
691*4882a593Smuzhiyun enum drbd_packet cmd, unsigned int header_size,
692*4882a593Smuzhiyun void *data, unsigned int size)
693*4882a593Smuzhiyun {
694*4882a593Smuzhiyun int err;
695*4882a593Smuzhiyun
696*4882a593Smuzhiyun err = __send_command(peer_device->connection, peer_device->device->vnr,
697*4882a593Smuzhiyun sock, cmd, header_size, data, size);
698*4882a593Smuzhiyun mutex_unlock(&sock->mutex);
699*4882a593Smuzhiyun return err;
700*4882a593Smuzhiyun }
701*4882a593Smuzhiyun
drbd_send_ping(struct drbd_connection * connection)702*4882a593Smuzhiyun int drbd_send_ping(struct drbd_connection *connection)
703*4882a593Smuzhiyun {
704*4882a593Smuzhiyun struct drbd_socket *sock;
705*4882a593Smuzhiyun
706*4882a593Smuzhiyun sock = &connection->meta;
707*4882a593Smuzhiyun if (!conn_prepare_command(connection, sock))
708*4882a593Smuzhiyun return -EIO;
709*4882a593Smuzhiyun return conn_send_command(connection, sock, P_PING, 0, NULL, 0);
710*4882a593Smuzhiyun }
711*4882a593Smuzhiyun
drbd_send_ping_ack(struct drbd_connection * connection)712*4882a593Smuzhiyun int drbd_send_ping_ack(struct drbd_connection *connection)
713*4882a593Smuzhiyun {
714*4882a593Smuzhiyun struct drbd_socket *sock;
715*4882a593Smuzhiyun
716*4882a593Smuzhiyun sock = &connection->meta;
717*4882a593Smuzhiyun if (!conn_prepare_command(connection, sock))
718*4882a593Smuzhiyun return -EIO;
719*4882a593Smuzhiyun return conn_send_command(connection, sock, P_PING_ACK, 0, NULL, 0);
720*4882a593Smuzhiyun }
721*4882a593Smuzhiyun
drbd_send_sync_param(struct drbd_peer_device * peer_device)722*4882a593Smuzhiyun int drbd_send_sync_param(struct drbd_peer_device *peer_device)
723*4882a593Smuzhiyun {
724*4882a593Smuzhiyun struct drbd_socket *sock;
725*4882a593Smuzhiyun struct p_rs_param_95 *p;
726*4882a593Smuzhiyun int size;
727*4882a593Smuzhiyun const int apv = peer_device->connection->agreed_pro_version;
728*4882a593Smuzhiyun enum drbd_packet cmd;
729*4882a593Smuzhiyun struct net_conf *nc;
730*4882a593Smuzhiyun struct disk_conf *dc;
731*4882a593Smuzhiyun
732*4882a593Smuzhiyun sock = &peer_device->connection->data;
733*4882a593Smuzhiyun p = drbd_prepare_command(peer_device, sock);
734*4882a593Smuzhiyun if (!p)
735*4882a593Smuzhiyun return -EIO;
736*4882a593Smuzhiyun
737*4882a593Smuzhiyun rcu_read_lock();
738*4882a593Smuzhiyun nc = rcu_dereference(peer_device->connection->net_conf);
739*4882a593Smuzhiyun
740*4882a593Smuzhiyun size = apv <= 87 ? sizeof(struct p_rs_param)
741*4882a593Smuzhiyun : apv == 88 ? sizeof(struct p_rs_param)
742*4882a593Smuzhiyun + strlen(nc->verify_alg) + 1
743*4882a593Smuzhiyun : apv <= 94 ? sizeof(struct p_rs_param_89)
744*4882a593Smuzhiyun : /* apv >= 95 */ sizeof(struct p_rs_param_95);
745*4882a593Smuzhiyun
746*4882a593Smuzhiyun cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
747*4882a593Smuzhiyun
748*4882a593Smuzhiyun /* initialize verify_alg and csums_alg */
749*4882a593Smuzhiyun memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
750*4882a593Smuzhiyun
751*4882a593Smuzhiyun if (get_ldev(peer_device->device)) {
752*4882a593Smuzhiyun dc = rcu_dereference(peer_device->device->ldev->disk_conf);
753*4882a593Smuzhiyun p->resync_rate = cpu_to_be32(dc->resync_rate);
754*4882a593Smuzhiyun p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead);
755*4882a593Smuzhiyun p->c_delay_target = cpu_to_be32(dc->c_delay_target);
756*4882a593Smuzhiyun p->c_fill_target = cpu_to_be32(dc->c_fill_target);
757*4882a593Smuzhiyun p->c_max_rate = cpu_to_be32(dc->c_max_rate);
758*4882a593Smuzhiyun put_ldev(peer_device->device);
759*4882a593Smuzhiyun } else {
760*4882a593Smuzhiyun p->resync_rate = cpu_to_be32(DRBD_RESYNC_RATE_DEF);
761*4882a593Smuzhiyun p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF);
762*4882a593Smuzhiyun p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF);
763*4882a593Smuzhiyun p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF);
764*4882a593Smuzhiyun p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF);
765*4882a593Smuzhiyun }
766*4882a593Smuzhiyun
767*4882a593Smuzhiyun if (apv >= 88)
768*4882a593Smuzhiyun strcpy(p->verify_alg, nc->verify_alg);
769*4882a593Smuzhiyun if (apv >= 89)
770*4882a593Smuzhiyun strcpy(p->csums_alg, nc->csums_alg);
771*4882a593Smuzhiyun rcu_read_unlock();
772*4882a593Smuzhiyun
773*4882a593Smuzhiyun return drbd_send_command(peer_device, sock, cmd, size, NULL, 0);
774*4882a593Smuzhiyun }
775*4882a593Smuzhiyun
__drbd_send_protocol(struct drbd_connection * connection,enum drbd_packet cmd)776*4882a593Smuzhiyun int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cmd)
777*4882a593Smuzhiyun {
778*4882a593Smuzhiyun struct drbd_socket *sock;
779*4882a593Smuzhiyun struct p_protocol *p;
780*4882a593Smuzhiyun struct net_conf *nc;
781*4882a593Smuzhiyun int size, cf;
782*4882a593Smuzhiyun
783*4882a593Smuzhiyun sock = &connection->data;
784*4882a593Smuzhiyun p = __conn_prepare_command(connection, sock);
785*4882a593Smuzhiyun if (!p)
786*4882a593Smuzhiyun return -EIO;
787*4882a593Smuzhiyun
788*4882a593Smuzhiyun rcu_read_lock();
789*4882a593Smuzhiyun nc = rcu_dereference(connection->net_conf);
790*4882a593Smuzhiyun
791*4882a593Smuzhiyun if (nc->tentative && connection->agreed_pro_version < 92) {
792*4882a593Smuzhiyun rcu_read_unlock();
793*4882a593Smuzhiyun drbd_err(connection, "--dry-run is not supported by peer");
794*4882a593Smuzhiyun return -EOPNOTSUPP;
795*4882a593Smuzhiyun }
796*4882a593Smuzhiyun
797*4882a593Smuzhiyun size = sizeof(*p);
798*4882a593Smuzhiyun if (connection->agreed_pro_version >= 87)
799*4882a593Smuzhiyun size += strlen(nc->integrity_alg) + 1;
800*4882a593Smuzhiyun
801*4882a593Smuzhiyun p->protocol = cpu_to_be32(nc->wire_protocol);
802*4882a593Smuzhiyun p->after_sb_0p = cpu_to_be32(nc->after_sb_0p);
803*4882a593Smuzhiyun p->after_sb_1p = cpu_to_be32(nc->after_sb_1p);
804*4882a593Smuzhiyun p->after_sb_2p = cpu_to_be32(nc->after_sb_2p);
805*4882a593Smuzhiyun p->two_primaries = cpu_to_be32(nc->two_primaries);
806*4882a593Smuzhiyun cf = 0;
807*4882a593Smuzhiyun if (nc->discard_my_data)
808*4882a593Smuzhiyun cf |= CF_DISCARD_MY_DATA;
809*4882a593Smuzhiyun if (nc->tentative)
810*4882a593Smuzhiyun cf |= CF_DRY_RUN;
811*4882a593Smuzhiyun p->conn_flags = cpu_to_be32(cf);
812*4882a593Smuzhiyun
813*4882a593Smuzhiyun if (connection->agreed_pro_version >= 87)
814*4882a593Smuzhiyun strcpy(p->integrity_alg, nc->integrity_alg);
815*4882a593Smuzhiyun rcu_read_unlock();
816*4882a593Smuzhiyun
817*4882a593Smuzhiyun return __conn_send_command(connection, sock, cmd, size, NULL, 0);
818*4882a593Smuzhiyun }
819*4882a593Smuzhiyun
drbd_send_protocol(struct drbd_connection * connection)820*4882a593Smuzhiyun int drbd_send_protocol(struct drbd_connection *connection)
821*4882a593Smuzhiyun {
822*4882a593Smuzhiyun int err;
823*4882a593Smuzhiyun
824*4882a593Smuzhiyun mutex_lock(&connection->data.mutex);
825*4882a593Smuzhiyun err = __drbd_send_protocol(connection, P_PROTOCOL);
826*4882a593Smuzhiyun mutex_unlock(&connection->data.mutex);
827*4882a593Smuzhiyun
828*4882a593Smuzhiyun return err;
829*4882a593Smuzhiyun }
830*4882a593Smuzhiyun
_drbd_send_uuids(struct drbd_peer_device * peer_device,u64 uuid_flags)831*4882a593Smuzhiyun static int _drbd_send_uuids(struct drbd_peer_device *peer_device, u64 uuid_flags)
832*4882a593Smuzhiyun {
833*4882a593Smuzhiyun struct drbd_device *device = peer_device->device;
834*4882a593Smuzhiyun struct drbd_socket *sock;
835*4882a593Smuzhiyun struct p_uuids *p;
836*4882a593Smuzhiyun int i;
837*4882a593Smuzhiyun
838*4882a593Smuzhiyun if (!get_ldev_if_state(device, D_NEGOTIATING))
839*4882a593Smuzhiyun return 0;
840*4882a593Smuzhiyun
841*4882a593Smuzhiyun sock = &peer_device->connection->data;
842*4882a593Smuzhiyun p = drbd_prepare_command(peer_device, sock);
843*4882a593Smuzhiyun if (!p) {
844*4882a593Smuzhiyun put_ldev(device);
845*4882a593Smuzhiyun return -EIO;
846*4882a593Smuzhiyun }
847*4882a593Smuzhiyun spin_lock_irq(&device->ldev->md.uuid_lock);
848*4882a593Smuzhiyun for (i = UI_CURRENT; i < UI_SIZE; i++)
849*4882a593Smuzhiyun p->uuid[i] = cpu_to_be64(device->ldev->md.uuid[i]);
850*4882a593Smuzhiyun spin_unlock_irq(&device->ldev->md.uuid_lock);
851*4882a593Smuzhiyun
852*4882a593Smuzhiyun device->comm_bm_set = drbd_bm_total_weight(device);
853*4882a593Smuzhiyun p->uuid[UI_SIZE] = cpu_to_be64(device->comm_bm_set);
854*4882a593Smuzhiyun rcu_read_lock();
855*4882a593Smuzhiyun uuid_flags |= rcu_dereference(peer_device->connection->net_conf)->discard_my_data ? 1 : 0;
856*4882a593Smuzhiyun rcu_read_unlock();
857*4882a593Smuzhiyun uuid_flags |= test_bit(CRASHED_PRIMARY, &device->flags) ? 2 : 0;
858*4882a593Smuzhiyun uuid_flags |= device->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
859*4882a593Smuzhiyun p->uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
860*4882a593Smuzhiyun
861*4882a593Smuzhiyun put_ldev(device);
862*4882a593Smuzhiyun return drbd_send_command(peer_device, sock, P_UUIDS, sizeof(*p), NULL, 0);
863*4882a593Smuzhiyun }
864*4882a593Smuzhiyun
drbd_send_uuids(struct drbd_peer_device * peer_device)865*4882a593Smuzhiyun int drbd_send_uuids(struct drbd_peer_device *peer_device)
866*4882a593Smuzhiyun {
867*4882a593Smuzhiyun return _drbd_send_uuids(peer_device, 0);
868*4882a593Smuzhiyun }
869*4882a593Smuzhiyun
drbd_send_uuids_skip_initial_sync(struct drbd_peer_device * peer_device)870*4882a593Smuzhiyun int drbd_send_uuids_skip_initial_sync(struct drbd_peer_device *peer_device)
871*4882a593Smuzhiyun {
872*4882a593Smuzhiyun return _drbd_send_uuids(peer_device, 8);
873*4882a593Smuzhiyun }
874*4882a593Smuzhiyun
drbd_print_uuids(struct drbd_device * device,const char * text)875*4882a593Smuzhiyun void drbd_print_uuids(struct drbd_device *device, const char *text)
876*4882a593Smuzhiyun {
877*4882a593Smuzhiyun if (get_ldev_if_state(device, D_NEGOTIATING)) {
878*4882a593Smuzhiyun u64 *uuid = device->ldev->md.uuid;
879*4882a593Smuzhiyun drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX\n",
880*4882a593Smuzhiyun text,
881*4882a593Smuzhiyun (unsigned long long)uuid[UI_CURRENT],
882*4882a593Smuzhiyun (unsigned long long)uuid[UI_BITMAP],
883*4882a593Smuzhiyun (unsigned long long)uuid[UI_HISTORY_START],
884*4882a593Smuzhiyun (unsigned long long)uuid[UI_HISTORY_END]);
885*4882a593Smuzhiyun put_ldev(device);
886*4882a593Smuzhiyun } else {
887*4882a593Smuzhiyun drbd_info(device, "%s effective data uuid: %016llX\n",
888*4882a593Smuzhiyun text,
889*4882a593Smuzhiyun (unsigned long long)device->ed_uuid);
890*4882a593Smuzhiyun }
891*4882a593Smuzhiyun }
892*4882a593Smuzhiyun
drbd_gen_and_send_sync_uuid(struct drbd_peer_device * peer_device)893*4882a593Smuzhiyun void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device)
894*4882a593Smuzhiyun {
895*4882a593Smuzhiyun struct drbd_device *device = peer_device->device;
896*4882a593Smuzhiyun struct drbd_socket *sock;
897*4882a593Smuzhiyun struct p_rs_uuid *p;
898*4882a593Smuzhiyun u64 uuid;
899*4882a593Smuzhiyun
900*4882a593Smuzhiyun D_ASSERT(device, device->state.disk == D_UP_TO_DATE);
901*4882a593Smuzhiyun
902*4882a593Smuzhiyun uuid = device->ldev->md.uuid[UI_BITMAP];
903*4882a593Smuzhiyun if (uuid && uuid != UUID_JUST_CREATED)
904*4882a593Smuzhiyun uuid = uuid + UUID_NEW_BM_OFFSET;
905*4882a593Smuzhiyun else
906*4882a593Smuzhiyun get_random_bytes(&uuid, sizeof(u64));
907*4882a593Smuzhiyun drbd_uuid_set(device, UI_BITMAP, uuid);
908*4882a593Smuzhiyun drbd_print_uuids(device, "updated sync UUID");
909*4882a593Smuzhiyun drbd_md_sync(device);
910*4882a593Smuzhiyun
911*4882a593Smuzhiyun sock = &peer_device->connection->data;
912*4882a593Smuzhiyun p = drbd_prepare_command(peer_device, sock);
913*4882a593Smuzhiyun if (p) {
914*4882a593Smuzhiyun p->uuid = cpu_to_be64(uuid);
915*4882a593Smuzhiyun drbd_send_command(peer_device, sock, P_SYNC_UUID, sizeof(*p), NULL, 0);
916*4882a593Smuzhiyun }
917*4882a593Smuzhiyun }
918*4882a593Smuzhiyun
919*4882a593Smuzhiyun /* communicated if (agreed_features & DRBD_FF_WSAME) */
920*4882a593Smuzhiyun static void
assign_p_sizes_qlim(struct drbd_device * device,struct p_sizes * p,struct request_queue * q)921*4882a593Smuzhiyun assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p,
922*4882a593Smuzhiyun struct request_queue *q)
923*4882a593Smuzhiyun {
924*4882a593Smuzhiyun if (q) {
925*4882a593Smuzhiyun p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
926*4882a593Smuzhiyun p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
927*4882a593Smuzhiyun p->qlim->alignment_offset = cpu_to_be32(queue_alignment_offset(q));
928*4882a593Smuzhiyun p->qlim->io_min = cpu_to_be32(queue_io_min(q));
929*4882a593Smuzhiyun p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
930*4882a593Smuzhiyun p->qlim->discard_enabled = blk_queue_discard(q);
931*4882a593Smuzhiyun p->qlim->write_same_capable = !!q->limits.max_write_same_sectors;
932*4882a593Smuzhiyun } else {
933*4882a593Smuzhiyun q = device->rq_queue;
934*4882a593Smuzhiyun p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
935*4882a593Smuzhiyun p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
936*4882a593Smuzhiyun p->qlim->alignment_offset = 0;
937*4882a593Smuzhiyun p->qlim->io_min = cpu_to_be32(queue_io_min(q));
938*4882a593Smuzhiyun p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
939*4882a593Smuzhiyun p->qlim->discard_enabled = 0;
940*4882a593Smuzhiyun p->qlim->write_same_capable = 0;
941*4882a593Smuzhiyun }
942*4882a593Smuzhiyun }
943*4882a593Smuzhiyun
drbd_send_sizes(struct drbd_peer_device * peer_device,int trigger_reply,enum dds_flags flags)944*4882a593Smuzhiyun int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enum dds_flags flags)
945*4882a593Smuzhiyun {
946*4882a593Smuzhiyun struct drbd_device *device = peer_device->device;
947*4882a593Smuzhiyun struct drbd_socket *sock;
948*4882a593Smuzhiyun struct p_sizes *p;
949*4882a593Smuzhiyun sector_t d_size, u_size;
950*4882a593Smuzhiyun int q_order_type;
951*4882a593Smuzhiyun unsigned int max_bio_size;
952*4882a593Smuzhiyun unsigned int packet_size;
953*4882a593Smuzhiyun
954*4882a593Smuzhiyun sock = &peer_device->connection->data;
955*4882a593Smuzhiyun p = drbd_prepare_command(peer_device, sock);
956*4882a593Smuzhiyun if (!p)
957*4882a593Smuzhiyun return -EIO;
958*4882a593Smuzhiyun
959*4882a593Smuzhiyun packet_size = sizeof(*p);
960*4882a593Smuzhiyun if (peer_device->connection->agreed_features & DRBD_FF_WSAME)
961*4882a593Smuzhiyun packet_size += sizeof(p->qlim[0]);
962*4882a593Smuzhiyun
963*4882a593Smuzhiyun memset(p, 0, packet_size);
964*4882a593Smuzhiyun if (get_ldev_if_state(device, D_NEGOTIATING)) {
965*4882a593Smuzhiyun struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
966*4882a593Smuzhiyun d_size = drbd_get_max_capacity(device->ldev);
967*4882a593Smuzhiyun rcu_read_lock();
968*4882a593Smuzhiyun u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
969*4882a593Smuzhiyun rcu_read_unlock();
970*4882a593Smuzhiyun q_order_type = drbd_queue_order_type(device);
971*4882a593Smuzhiyun max_bio_size = queue_max_hw_sectors(q) << 9;
972*4882a593Smuzhiyun max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE);
973*4882a593Smuzhiyun assign_p_sizes_qlim(device, p, q);
974*4882a593Smuzhiyun put_ldev(device);
975*4882a593Smuzhiyun } else {
976*4882a593Smuzhiyun d_size = 0;
977*4882a593Smuzhiyun u_size = 0;
978*4882a593Smuzhiyun q_order_type = QUEUE_ORDERED_NONE;
979*4882a593Smuzhiyun max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
980*4882a593Smuzhiyun assign_p_sizes_qlim(device, p, NULL);
981*4882a593Smuzhiyun }
982*4882a593Smuzhiyun
983*4882a593Smuzhiyun if (peer_device->connection->agreed_pro_version <= 94)
984*4882a593Smuzhiyun max_bio_size = min(max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
985*4882a593Smuzhiyun else if (peer_device->connection->agreed_pro_version < 100)
986*4882a593Smuzhiyun max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE_P95);
987*4882a593Smuzhiyun
988*4882a593Smuzhiyun p->d_size = cpu_to_be64(d_size);
989*4882a593Smuzhiyun p->u_size = cpu_to_be64(u_size);
990*4882a593Smuzhiyun if (trigger_reply)
991*4882a593Smuzhiyun p->c_size = 0;
992*4882a593Smuzhiyun else
993*4882a593Smuzhiyun p->c_size = cpu_to_be64(get_capacity(device->vdisk));
994*4882a593Smuzhiyun p->max_bio_size = cpu_to_be32(max_bio_size);
995*4882a593Smuzhiyun p->queue_order_type = cpu_to_be16(q_order_type);
996*4882a593Smuzhiyun p->dds_flags = cpu_to_be16(flags);
997*4882a593Smuzhiyun
998*4882a593Smuzhiyun return drbd_send_command(peer_device, sock, P_SIZES, packet_size, NULL, 0);
999*4882a593Smuzhiyun }
1000*4882a593Smuzhiyun
1001*4882a593Smuzhiyun /**
1002*4882a593Smuzhiyun * drbd_send_current_state() - Sends the drbd state to the peer
1003*4882a593Smuzhiyun * @peer_device: DRBD peer device.
1004*4882a593Smuzhiyun */
drbd_send_current_state(struct drbd_peer_device * peer_device)1005*4882a593Smuzhiyun int drbd_send_current_state(struct drbd_peer_device *peer_device)
1006*4882a593Smuzhiyun {
1007*4882a593Smuzhiyun struct drbd_socket *sock;
1008*4882a593Smuzhiyun struct p_state *p;
1009*4882a593Smuzhiyun
1010*4882a593Smuzhiyun sock = &peer_device->connection->data;
1011*4882a593Smuzhiyun p = drbd_prepare_command(peer_device, sock);
1012*4882a593Smuzhiyun if (!p)
1013*4882a593Smuzhiyun return -EIO;
1014*4882a593Smuzhiyun p->state = cpu_to_be32(peer_device->device->state.i); /* Within the send mutex */
1015*4882a593Smuzhiyun return drbd_send_command(peer_device, sock, P_STATE, sizeof(*p), NULL, 0);
1016*4882a593Smuzhiyun }
1017*4882a593Smuzhiyun
1018*4882a593Smuzhiyun /**
1019*4882a593Smuzhiyun * drbd_send_state() - After a state change, sends the new state to the peer
1020*4882a593Smuzhiyun * @peer_device: DRBD peer device.
1021*4882a593Smuzhiyun * @state: the state to send, not necessarily the current state.
1022*4882a593Smuzhiyun *
1023*4882a593Smuzhiyun * Each state change queues an "after_state_ch" work, which will eventually
1024*4882a593Smuzhiyun * send the resulting new state to the peer. If more state changes happen
1025*4882a593Smuzhiyun * between queuing and processing of the after_state_ch work, we still
1026*4882a593Smuzhiyun * want to send each intermediary state in the order it occurred.
1027*4882a593Smuzhiyun */
drbd_send_state(struct drbd_peer_device * peer_device,union drbd_state state)1028*4882a593Smuzhiyun int drbd_send_state(struct drbd_peer_device *peer_device, union drbd_state state)
1029*4882a593Smuzhiyun {
1030*4882a593Smuzhiyun struct drbd_socket *sock;
1031*4882a593Smuzhiyun struct p_state *p;
1032*4882a593Smuzhiyun
1033*4882a593Smuzhiyun sock = &peer_device->connection->data;
1034*4882a593Smuzhiyun p = drbd_prepare_command(peer_device, sock);
1035*4882a593Smuzhiyun if (!p)
1036*4882a593Smuzhiyun return -EIO;
1037*4882a593Smuzhiyun p->state = cpu_to_be32(state.i); /* Within the send mutex */
1038*4882a593Smuzhiyun return drbd_send_command(peer_device, sock, P_STATE, sizeof(*p), NULL, 0);
1039*4882a593Smuzhiyun }
1040*4882a593Smuzhiyun
drbd_send_state_req(struct drbd_peer_device * peer_device,union drbd_state mask,union drbd_state val)1041*4882a593Smuzhiyun int drbd_send_state_req(struct drbd_peer_device *peer_device, union drbd_state mask, union drbd_state val)
1042*4882a593Smuzhiyun {
1043*4882a593Smuzhiyun struct drbd_socket *sock;
1044*4882a593Smuzhiyun struct p_req_state *p;
1045*4882a593Smuzhiyun
1046*4882a593Smuzhiyun sock = &peer_device->connection->data;
1047*4882a593Smuzhiyun p = drbd_prepare_command(peer_device, sock);
1048*4882a593Smuzhiyun if (!p)
1049*4882a593Smuzhiyun return -EIO;
1050*4882a593Smuzhiyun p->mask = cpu_to_be32(mask.i);
1051*4882a593Smuzhiyun p->val = cpu_to_be32(val.i);
1052*4882a593Smuzhiyun return drbd_send_command(peer_device, sock, P_STATE_CHG_REQ, sizeof(*p), NULL, 0);
1053*4882a593Smuzhiyun }
1054*4882a593Smuzhiyun
conn_send_state_req(struct drbd_connection * connection,union drbd_state mask,union drbd_state val)1055*4882a593Smuzhiyun int conn_send_state_req(struct drbd_connection *connection, union drbd_state mask, union drbd_state val)
1056*4882a593Smuzhiyun {
1057*4882a593Smuzhiyun enum drbd_packet cmd;
1058*4882a593Smuzhiyun struct drbd_socket *sock;
1059*4882a593Smuzhiyun struct p_req_state *p;
1060*4882a593Smuzhiyun
1061*4882a593Smuzhiyun cmd = connection->agreed_pro_version < 100 ? P_STATE_CHG_REQ : P_CONN_ST_CHG_REQ;
1062*4882a593Smuzhiyun sock = &connection->data;
1063*4882a593Smuzhiyun p = conn_prepare_command(connection, sock);
1064*4882a593Smuzhiyun if (!p)
1065*4882a593Smuzhiyun return -EIO;
1066*4882a593Smuzhiyun p->mask = cpu_to_be32(mask.i);
1067*4882a593Smuzhiyun p->val = cpu_to_be32(val.i);
1068*4882a593Smuzhiyun return conn_send_command(connection, sock, cmd, sizeof(*p), NULL, 0);
1069*4882a593Smuzhiyun }
1070*4882a593Smuzhiyun
drbd_send_sr_reply(struct drbd_peer_device * peer_device,enum drbd_state_rv retcode)1071*4882a593Smuzhiyun void drbd_send_sr_reply(struct drbd_peer_device *peer_device, enum drbd_state_rv retcode)
1072*4882a593Smuzhiyun {
1073*4882a593Smuzhiyun struct drbd_socket *sock;
1074*4882a593Smuzhiyun struct p_req_state_reply *p;
1075*4882a593Smuzhiyun
1076*4882a593Smuzhiyun sock = &peer_device->connection->meta;
1077*4882a593Smuzhiyun p = drbd_prepare_command(peer_device, sock);
1078*4882a593Smuzhiyun if (p) {
1079*4882a593Smuzhiyun p->retcode = cpu_to_be32(retcode);
1080*4882a593Smuzhiyun drbd_send_command(peer_device, sock, P_STATE_CHG_REPLY, sizeof(*p), NULL, 0);
1081*4882a593Smuzhiyun }
1082*4882a593Smuzhiyun }
1083*4882a593Smuzhiyun
conn_send_sr_reply(struct drbd_connection * connection,enum drbd_state_rv retcode)1084*4882a593Smuzhiyun void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode)
1085*4882a593Smuzhiyun {
1086*4882a593Smuzhiyun struct drbd_socket *sock;
1087*4882a593Smuzhiyun struct p_req_state_reply *p;
1088*4882a593Smuzhiyun enum drbd_packet cmd = connection->agreed_pro_version < 100 ? P_STATE_CHG_REPLY : P_CONN_ST_CHG_REPLY;
1089*4882a593Smuzhiyun
1090*4882a593Smuzhiyun sock = &connection->meta;
1091*4882a593Smuzhiyun p = conn_prepare_command(connection, sock);
1092*4882a593Smuzhiyun if (p) {
1093*4882a593Smuzhiyun p->retcode = cpu_to_be32(retcode);
1094*4882a593Smuzhiyun conn_send_command(connection, sock, cmd, sizeof(*p), NULL, 0);
1095*4882a593Smuzhiyun }
1096*4882a593Smuzhiyun }
1097*4882a593Smuzhiyun
dcbp_set_code(struct p_compressed_bm * p,enum drbd_bitmap_code code)1098*4882a593Smuzhiyun static void dcbp_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code)
1099*4882a593Smuzhiyun {
1100*4882a593Smuzhiyun BUG_ON(code & ~0xf);
1101*4882a593Smuzhiyun p->encoding = (p->encoding & ~0xf) | code;
1102*4882a593Smuzhiyun }
1103*4882a593Smuzhiyun
dcbp_set_start(struct p_compressed_bm * p,int set)1104*4882a593Smuzhiyun static void dcbp_set_start(struct p_compressed_bm *p, int set)
1105*4882a593Smuzhiyun {
1106*4882a593Smuzhiyun p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0);
1107*4882a593Smuzhiyun }
1108*4882a593Smuzhiyun
dcbp_set_pad_bits(struct p_compressed_bm * p,int n)1109*4882a593Smuzhiyun static void dcbp_set_pad_bits(struct p_compressed_bm *p, int n)
1110*4882a593Smuzhiyun {
1111*4882a593Smuzhiyun BUG_ON(n & ~0x7);
1112*4882a593Smuzhiyun p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4);
1113*4882a593Smuzhiyun }
1114*4882a593Smuzhiyun
fill_bitmap_rle_bits(struct drbd_device * device,struct p_compressed_bm * p,unsigned int size,struct bm_xfer_ctx * c)1115*4882a593Smuzhiyun static int fill_bitmap_rle_bits(struct drbd_device *device,
1116*4882a593Smuzhiyun struct p_compressed_bm *p,
1117*4882a593Smuzhiyun unsigned int size,
1118*4882a593Smuzhiyun struct bm_xfer_ctx *c)
1119*4882a593Smuzhiyun {
1120*4882a593Smuzhiyun struct bitstream bs;
1121*4882a593Smuzhiyun unsigned long plain_bits;
1122*4882a593Smuzhiyun unsigned long tmp;
1123*4882a593Smuzhiyun unsigned long rl;
1124*4882a593Smuzhiyun unsigned len;
1125*4882a593Smuzhiyun unsigned toggle;
1126*4882a593Smuzhiyun int bits, use_rle;
1127*4882a593Smuzhiyun
1128*4882a593Smuzhiyun /* may we use this feature? */
1129*4882a593Smuzhiyun rcu_read_lock();
1130*4882a593Smuzhiyun use_rle = rcu_dereference(first_peer_device(device)->connection->net_conf)->use_rle;
1131*4882a593Smuzhiyun rcu_read_unlock();
1132*4882a593Smuzhiyun if (!use_rle || first_peer_device(device)->connection->agreed_pro_version < 90)
1133*4882a593Smuzhiyun return 0;
1134*4882a593Smuzhiyun
1135*4882a593Smuzhiyun if (c->bit_offset >= c->bm_bits)
1136*4882a593Smuzhiyun return 0; /* nothing to do. */
1137*4882a593Smuzhiyun
1138*4882a593Smuzhiyun /* use at most thus many bytes */
1139*4882a593Smuzhiyun bitstream_init(&bs, p->code, size, 0);
1140*4882a593Smuzhiyun memset(p->code, 0, size);
1141*4882a593Smuzhiyun /* plain bits covered in this code string */
1142*4882a593Smuzhiyun plain_bits = 0;
1143*4882a593Smuzhiyun
1144*4882a593Smuzhiyun /* p->encoding & 0x80 stores whether the first run length is set.
1145*4882a593Smuzhiyun * bit offset is implicit.
1146*4882a593Smuzhiyun * start with toggle == 2 to be able to tell the first iteration */
1147*4882a593Smuzhiyun toggle = 2;
1148*4882a593Smuzhiyun
1149*4882a593Smuzhiyun /* see how much plain bits we can stuff into one packet
1150*4882a593Smuzhiyun * using RLE and VLI. */
1151*4882a593Smuzhiyun do {
1152*4882a593Smuzhiyun tmp = (toggle == 0) ? _drbd_bm_find_next_zero(device, c->bit_offset)
1153*4882a593Smuzhiyun : _drbd_bm_find_next(device, c->bit_offset);
1154*4882a593Smuzhiyun if (tmp == -1UL)
1155*4882a593Smuzhiyun tmp = c->bm_bits;
1156*4882a593Smuzhiyun rl = tmp - c->bit_offset;
1157*4882a593Smuzhiyun
1158*4882a593Smuzhiyun if (toggle == 2) { /* first iteration */
1159*4882a593Smuzhiyun if (rl == 0) {
1160*4882a593Smuzhiyun /* the first checked bit was set,
1161*4882a593Smuzhiyun * store start value, */
1162*4882a593Smuzhiyun dcbp_set_start(p, 1);
1163*4882a593Smuzhiyun /* but skip encoding of zero run length */
1164*4882a593Smuzhiyun toggle = !toggle;
1165*4882a593Smuzhiyun continue;
1166*4882a593Smuzhiyun }
1167*4882a593Smuzhiyun dcbp_set_start(p, 0);
1168*4882a593Smuzhiyun }
1169*4882a593Smuzhiyun
1170*4882a593Smuzhiyun /* paranoia: catch zero runlength.
1171*4882a593Smuzhiyun * can only happen if bitmap is modified while we scan it. */
1172*4882a593Smuzhiyun if (rl == 0) {
1173*4882a593Smuzhiyun drbd_err(device, "unexpected zero runlength while encoding bitmap "
1174*4882a593Smuzhiyun "t:%u bo:%lu\n", toggle, c->bit_offset);
1175*4882a593Smuzhiyun return -1;
1176*4882a593Smuzhiyun }
1177*4882a593Smuzhiyun
1178*4882a593Smuzhiyun bits = vli_encode_bits(&bs, rl);
1179*4882a593Smuzhiyun if (bits == -ENOBUFS) /* buffer full */
1180*4882a593Smuzhiyun break;
1181*4882a593Smuzhiyun if (bits <= 0) {
1182*4882a593Smuzhiyun drbd_err(device, "error while encoding bitmap: %d\n", bits);
1183*4882a593Smuzhiyun return 0;
1184*4882a593Smuzhiyun }
1185*4882a593Smuzhiyun
1186*4882a593Smuzhiyun toggle = !toggle;
1187*4882a593Smuzhiyun plain_bits += rl;
1188*4882a593Smuzhiyun c->bit_offset = tmp;
1189*4882a593Smuzhiyun } while (c->bit_offset < c->bm_bits);
1190*4882a593Smuzhiyun
1191*4882a593Smuzhiyun len = bs.cur.b - p->code + !!bs.cur.bit;
1192*4882a593Smuzhiyun
1193*4882a593Smuzhiyun if (plain_bits < (len << 3)) {
1194*4882a593Smuzhiyun /* incompressible with this method.
1195*4882a593Smuzhiyun * we need to rewind both word and bit position. */
1196*4882a593Smuzhiyun c->bit_offset -= plain_bits;
1197*4882a593Smuzhiyun bm_xfer_ctx_bit_to_word_offset(c);
1198*4882a593Smuzhiyun c->bit_offset = c->word_offset * BITS_PER_LONG;
1199*4882a593Smuzhiyun return 0;
1200*4882a593Smuzhiyun }
1201*4882a593Smuzhiyun
1202*4882a593Smuzhiyun /* RLE + VLI was able to compress it just fine.
1203*4882a593Smuzhiyun * update c->word_offset. */
1204*4882a593Smuzhiyun bm_xfer_ctx_bit_to_word_offset(c);
1205*4882a593Smuzhiyun
1206*4882a593Smuzhiyun /* store pad_bits */
1207*4882a593Smuzhiyun dcbp_set_pad_bits(p, (8 - bs.cur.bit) & 0x7);
1208*4882a593Smuzhiyun
1209*4882a593Smuzhiyun return len;
1210*4882a593Smuzhiyun }
1211*4882a593Smuzhiyun
1212*4882a593Smuzhiyun /**
1213*4882a593Smuzhiyun * send_bitmap_rle_or_plain
1214*4882a593Smuzhiyun *
1215*4882a593Smuzhiyun * Return 0 when done, 1 when another iteration is needed, and a negative error
1216*4882a593Smuzhiyun * code upon failure.
1217*4882a593Smuzhiyun */
1218*4882a593Smuzhiyun static int
send_bitmap_rle_or_plain(struct drbd_device * device,struct bm_xfer_ctx * c)1219*4882a593Smuzhiyun send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c)
1220*4882a593Smuzhiyun {
1221*4882a593Smuzhiyun struct drbd_socket *sock = &first_peer_device(device)->connection->data;
1222*4882a593Smuzhiyun unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
1223*4882a593Smuzhiyun struct p_compressed_bm *p = sock->sbuf + header_size;
1224*4882a593Smuzhiyun int len, err;
1225*4882a593Smuzhiyun
1226*4882a593Smuzhiyun len = fill_bitmap_rle_bits(device, p,
1227*4882a593Smuzhiyun DRBD_SOCKET_BUFFER_SIZE - header_size - sizeof(*p), c);
1228*4882a593Smuzhiyun if (len < 0)
1229*4882a593Smuzhiyun return -EIO;
1230*4882a593Smuzhiyun
1231*4882a593Smuzhiyun if (len) {
1232*4882a593Smuzhiyun dcbp_set_code(p, RLE_VLI_Bits);
1233*4882a593Smuzhiyun err = __send_command(first_peer_device(device)->connection, device->vnr, sock,
1234*4882a593Smuzhiyun P_COMPRESSED_BITMAP, sizeof(*p) + len,
1235*4882a593Smuzhiyun NULL, 0);
1236*4882a593Smuzhiyun c->packets[0]++;
1237*4882a593Smuzhiyun c->bytes[0] += header_size + sizeof(*p) + len;
1238*4882a593Smuzhiyun
1239*4882a593Smuzhiyun if (c->bit_offset >= c->bm_bits)
1240*4882a593Smuzhiyun len = 0; /* DONE */
1241*4882a593Smuzhiyun } else {
1242*4882a593Smuzhiyun /* was not compressible.
1243*4882a593Smuzhiyun * send a buffer full of plain text bits instead. */
1244*4882a593Smuzhiyun unsigned int data_size;
1245*4882a593Smuzhiyun unsigned long num_words;
1246*4882a593Smuzhiyun unsigned long *p = sock->sbuf + header_size;
1247*4882a593Smuzhiyun
1248*4882a593Smuzhiyun data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
1249*4882a593Smuzhiyun num_words = min_t(size_t, data_size / sizeof(*p),
1250*4882a593Smuzhiyun c->bm_words - c->word_offset);
1251*4882a593Smuzhiyun len = num_words * sizeof(*p);
1252*4882a593Smuzhiyun if (len)
1253*4882a593Smuzhiyun drbd_bm_get_lel(device, c->word_offset, num_words, p);
1254*4882a593Smuzhiyun err = __send_command(first_peer_device(device)->connection, device->vnr, sock, P_BITMAP, len, NULL, 0);
1255*4882a593Smuzhiyun c->word_offset += num_words;
1256*4882a593Smuzhiyun c->bit_offset = c->word_offset * BITS_PER_LONG;
1257*4882a593Smuzhiyun
1258*4882a593Smuzhiyun c->packets[1]++;
1259*4882a593Smuzhiyun c->bytes[1] += header_size + len;
1260*4882a593Smuzhiyun
1261*4882a593Smuzhiyun if (c->bit_offset > c->bm_bits)
1262*4882a593Smuzhiyun c->bit_offset = c->bm_bits;
1263*4882a593Smuzhiyun }
1264*4882a593Smuzhiyun if (!err) {
1265*4882a593Smuzhiyun if (len == 0) {
1266*4882a593Smuzhiyun INFO_bm_xfer_stats(device, "send", c);
1267*4882a593Smuzhiyun return 0;
1268*4882a593Smuzhiyun } else
1269*4882a593Smuzhiyun return 1;
1270*4882a593Smuzhiyun }
1271*4882a593Smuzhiyun return -EIO;
1272*4882a593Smuzhiyun }
1273*4882a593Smuzhiyun
1274*4882a593Smuzhiyun /* See the comment at receive_bitmap() */
_drbd_send_bitmap(struct drbd_device * device)1275*4882a593Smuzhiyun static int _drbd_send_bitmap(struct drbd_device *device)
1276*4882a593Smuzhiyun {
1277*4882a593Smuzhiyun struct bm_xfer_ctx c;
1278*4882a593Smuzhiyun int err;
1279*4882a593Smuzhiyun
1280*4882a593Smuzhiyun if (!expect(device->bitmap))
1281*4882a593Smuzhiyun return false;
1282*4882a593Smuzhiyun
1283*4882a593Smuzhiyun if (get_ldev(device)) {
1284*4882a593Smuzhiyun if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC)) {
1285*4882a593Smuzhiyun drbd_info(device, "Writing the whole bitmap, MDF_FullSync was set.\n");
1286*4882a593Smuzhiyun drbd_bm_set_all(device);
1287*4882a593Smuzhiyun if (drbd_bm_write(device)) {
1288*4882a593Smuzhiyun /* write_bm did fail! Leave full sync flag set in Meta P_DATA
1289*4882a593Smuzhiyun * but otherwise process as per normal - need to tell other
1290*4882a593Smuzhiyun * side that a full resync is required! */
1291*4882a593Smuzhiyun drbd_err(device, "Failed to write bitmap to disk!\n");
1292*4882a593Smuzhiyun } else {
1293*4882a593Smuzhiyun drbd_md_clear_flag(device, MDF_FULL_SYNC);
1294*4882a593Smuzhiyun drbd_md_sync(device);
1295*4882a593Smuzhiyun }
1296*4882a593Smuzhiyun }
1297*4882a593Smuzhiyun put_ldev(device);
1298*4882a593Smuzhiyun }
1299*4882a593Smuzhiyun
1300*4882a593Smuzhiyun c = (struct bm_xfer_ctx) {
1301*4882a593Smuzhiyun .bm_bits = drbd_bm_bits(device),
1302*4882a593Smuzhiyun .bm_words = drbd_bm_words(device),
1303*4882a593Smuzhiyun };
1304*4882a593Smuzhiyun
1305*4882a593Smuzhiyun do {
1306*4882a593Smuzhiyun err = send_bitmap_rle_or_plain(device, &c);
1307*4882a593Smuzhiyun } while (err > 0);
1308*4882a593Smuzhiyun
1309*4882a593Smuzhiyun return err == 0;
1310*4882a593Smuzhiyun }
1311*4882a593Smuzhiyun
drbd_send_bitmap(struct drbd_device * device)1312*4882a593Smuzhiyun int drbd_send_bitmap(struct drbd_device *device)
1313*4882a593Smuzhiyun {
1314*4882a593Smuzhiyun struct drbd_socket *sock = &first_peer_device(device)->connection->data;
1315*4882a593Smuzhiyun int err = -1;
1316*4882a593Smuzhiyun
1317*4882a593Smuzhiyun mutex_lock(&sock->mutex);
1318*4882a593Smuzhiyun if (sock->socket)
1319*4882a593Smuzhiyun err = !_drbd_send_bitmap(device);
1320*4882a593Smuzhiyun mutex_unlock(&sock->mutex);
1321*4882a593Smuzhiyun return err;
1322*4882a593Smuzhiyun }
1323*4882a593Smuzhiyun
drbd_send_b_ack(struct drbd_connection * connection,u32 barrier_nr,u32 set_size)1324*4882a593Smuzhiyun void drbd_send_b_ack(struct drbd_connection *connection, u32 barrier_nr, u32 set_size)
1325*4882a593Smuzhiyun {
1326*4882a593Smuzhiyun struct drbd_socket *sock;
1327*4882a593Smuzhiyun struct p_barrier_ack *p;
1328*4882a593Smuzhiyun
1329*4882a593Smuzhiyun if (connection->cstate < C_WF_REPORT_PARAMS)
1330*4882a593Smuzhiyun return;
1331*4882a593Smuzhiyun
1332*4882a593Smuzhiyun sock = &connection->meta;
1333*4882a593Smuzhiyun p = conn_prepare_command(connection, sock);
1334*4882a593Smuzhiyun if (!p)
1335*4882a593Smuzhiyun return;
1336*4882a593Smuzhiyun p->barrier = barrier_nr;
1337*4882a593Smuzhiyun p->set_size = cpu_to_be32(set_size);
1338*4882a593Smuzhiyun conn_send_command(connection, sock, P_BARRIER_ACK, sizeof(*p), NULL, 0);
1339*4882a593Smuzhiyun }
1340*4882a593Smuzhiyun
1341*4882a593Smuzhiyun /**
1342*4882a593Smuzhiyun * _drbd_send_ack() - Sends an ack packet
1343*4882a593Smuzhiyun * @device: DRBD device.
1344*4882a593Smuzhiyun * @cmd: Packet command code.
1345*4882a593Smuzhiyun * @sector: sector, needs to be in big endian byte order
1346*4882a593Smuzhiyun * @blksize: size in byte, needs to be in big endian byte order
1347*4882a593Smuzhiyun * @block_id: Id, big endian byte order
1348*4882a593Smuzhiyun */
_drbd_send_ack(struct drbd_peer_device * peer_device,enum drbd_packet cmd,u64 sector,u32 blksize,u64 block_id)1349*4882a593Smuzhiyun static int _drbd_send_ack(struct drbd_peer_device *peer_device, enum drbd_packet cmd,
1350*4882a593Smuzhiyun u64 sector, u32 blksize, u64 block_id)
1351*4882a593Smuzhiyun {
1352*4882a593Smuzhiyun struct drbd_socket *sock;
1353*4882a593Smuzhiyun struct p_block_ack *p;
1354*4882a593Smuzhiyun
1355*4882a593Smuzhiyun if (peer_device->device->state.conn < C_CONNECTED)
1356*4882a593Smuzhiyun return -EIO;
1357*4882a593Smuzhiyun
1358*4882a593Smuzhiyun sock = &peer_device->connection->meta;
1359*4882a593Smuzhiyun p = drbd_prepare_command(peer_device, sock);
1360*4882a593Smuzhiyun if (!p)
1361*4882a593Smuzhiyun return -EIO;
1362*4882a593Smuzhiyun p->sector = sector;
1363*4882a593Smuzhiyun p->block_id = block_id;
1364*4882a593Smuzhiyun p->blksize = blksize;
1365*4882a593Smuzhiyun p->seq_num = cpu_to_be32(atomic_inc_return(&peer_device->device->packet_seq));
1366*4882a593Smuzhiyun return drbd_send_command(peer_device, sock, cmd, sizeof(*p), NULL, 0);
1367*4882a593Smuzhiyun }
1368*4882a593Smuzhiyun
1369*4882a593Smuzhiyun /* dp->sector and dp->block_id already/still in network byte order,
1370*4882a593Smuzhiyun * data_size is payload size according to dp->head,
1371*4882a593Smuzhiyun * and may need to be corrected for digest size. */
drbd_send_ack_dp(struct drbd_peer_device * peer_device,enum drbd_packet cmd,struct p_data * dp,int data_size)1372*4882a593Smuzhiyun void drbd_send_ack_dp(struct drbd_peer_device *peer_device, enum drbd_packet cmd,
1373*4882a593Smuzhiyun struct p_data *dp, int data_size)
1374*4882a593Smuzhiyun {
1375*4882a593Smuzhiyun if (peer_device->connection->peer_integrity_tfm)
1376*4882a593Smuzhiyun data_size -= crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
1377*4882a593Smuzhiyun _drbd_send_ack(peer_device, cmd, dp->sector, cpu_to_be32(data_size),
1378*4882a593Smuzhiyun dp->block_id);
1379*4882a593Smuzhiyun }
1380*4882a593Smuzhiyun
drbd_send_ack_rp(struct drbd_peer_device * peer_device,enum drbd_packet cmd,struct p_block_req * rp)1381*4882a593Smuzhiyun void drbd_send_ack_rp(struct drbd_peer_device *peer_device, enum drbd_packet cmd,
1382*4882a593Smuzhiyun struct p_block_req *rp)
1383*4882a593Smuzhiyun {
1384*4882a593Smuzhiyun _drbd_send_ack(peer_device, cmd, rp->sector, rp->blksize, rp->block_id);
1385*4882a593Smuzhiyun }
1386*4882a593Smuzhiyun
1387*4882a593Smuzhiyun /**
1388*4882a593Smuzhiyun * drbd_send_ack() - Sends an ack packet
1389*4882a593Smuzhiyun * @device: DRBD device
1390*4882a593Smuzhiyun * @cmd: packet command code
1391*4882a593Smuzhiyun * @peer_req: peer request
1392*4882a593Smuzhiyun */
drbd_send_ack(struct drbd_peer_device * peer_device,enum drbd_packet cmd,struct drbd_peer_request * peer_req)1393*4882a593Smuzhiyun int drbd_send_ack(struct drbd_peer_device *peer_device, enum drbd_packet cmd,
1394*4882a593Smuzhiyun struct drbd_peer_request *peer_req)
1395*4882a593Smuzhiyun {
1396*4882a593Smuzhiyun return _drbd_send_ack(peer_device, cmd,
1397*4882a593Smuzhiyun cpu_to_be64(peer_req->i.sector),
1398*4882a593Smuzhiyun cpu_to_be32(peer_req->i.size),
1399*4882a593Smuzhiyun peer_req->block_id);
1400*4882a593Smuzhiyun }
1401*4882a593Smuzhiyun
1402*4882a593Smuzhiyun /* This function misuses the block_id field to signal if the blocks
1403*4882a593Smuzhiyun * are is sync or not. */
drbd_send_ack_ex(struct drbd_peer_device * peer_device,enum drbd_packet cmd,sector_t sector,int blksize,u64 block_id)1404*4882a593Smuzhiyun int drbd_send_ack_ex(struct drbd_peer_device *peer_device, enum drbd_packet cmd,
1405*4882a593Smuzhiyun sector_t sector, int blksize, u64 block_id)
1406*4882a593Smuzhiyun {
1407*4882a593Smuzhiyun return _drbd_send_ack(peer_device, cmd,
1408*4882a593Smuzhiyun cpu_to_be64(sector),
1409*4882a593Smuzhiyun cpu_to_be32(blksize),
1410*4882a593Smuzhiyun cpu_to_be64(block_id));
1411*4882a593Smuzhiyun }
1412*4882a593Smuzhiyun
drbd_send_rs_deallocated(struct drbd_peer_device * peer_device,struct drbd_peer_request * peer_req)1413*4882a593Smuzhiyun int drbd_send_rs_deallocated(struct drbd_peer_device *peer_device,
1414*4882a593Smuzhiyun struct drbd_peer_request *peer_req)
1415*4882a593Smuzhiyun {
1416*4882a593Smuzhiyun struct drbd_socket *sock;
1417*4882a593Smuzhiyun struct p_block_desc *p;
1418*4882a593Smuzhiyun
1419*4882a593Smuzhiyun sock = &peer_device->connection->data;
1420*4882a593Smuzhiyun p = drbd_prepare_command(peer_device, sock);
1421*4882a593Smuzhiyun if (!p)
1422*4882a593Smuzhiyun return -EIO;
1423*4882a593Smuzhiyun p->sector = cpu_to_be64(peer_req->i.sector);
1424*4882a593Smuzhiyun p->blksize = cpu_to_be32(peer_req->i.size);
1425*4882a593Smuzhiyun p->pad = 0;
1426*4882a593Smuzhiyun return drbd_send_command(peer_device, sock, P_RS_DEALLOCATED, sizeof(*p), NULL, 0);
1427*4882a593Smuzhiyun }
1428*4882a593Smuzhiyun
drbd_send_drequest(struct drbd_peer_device * peer_device,int cmd,sector_t sector,int size,u64 block_id)1429*4882a593Smuzhiyun int drbd_send_drequest(struct drbd_peer_device *peer_device, int cmd,
1430*4882a593Smuzhiyun sector_t sector, int size, u64 block_id)
1431*4882a593Smuzhiyun {
1432*4882a593Smuzhiyun struct drbd_socket *sock;
1433*4882a593Smuzhiyun struct p_block_req *p;
1434*4882a593Smuzhiyun
1435*4882a593Smuzhiyun sock = &peer_device->connection->data;
1436*4882a593Smuzhiyun p = drbd_prepare_command(peer_device, sock);
1437*4882a593Smuzhiyun if (!p)
1438*4882a593Smuzhiyun return -EIO;
1439*4882a593Smuzhiyun p->sector = cpu_to_be64(sector);
1440*4882a593Smuzhiyun p->block_id = block_id;
1441*4882a593Smuzhiyun p->blksize = cpu_to_be32(size);
1442*4882a593Smuzhiyun return drbd_send_command(peer_device, sock, cmd, sizeof(*p), NULL, 0);
1443*4882a593Smuzhiyun }
1444*4882a593Smuzhiyun
drbd_send_drequest_csum(struct drbd_peer_device * peer_device,sector_t sector,int size,void * digest,int digest_size,enum drbd_packet cmd)1445*4882a593Smuzhiyun int drbd_send_drequest_csum(struct drbd_peer_device *peer_device, sector_t sector, int size,
1446*4882a593Smuzhiyun void *digest, int digest_size, enum drbd_packet cmd)
1447*4882a593Smuzhiyun {
1448*4882a593Smuzhiyun struct drbd_socket *sock;
1449*4882a593Smuzhiyun struct p_block_req *p;
1450*4882a593Smuzhiyun
1451*4882a593Smuzhiyun /* FIXME: Put the digest into the preallocated socket buffer. */
1452*4882a593Smuzhiyun
1453*4882a593Smuzhiyun sock = &peer_device->connection->data;
1454*4882a593Smuzhiyun p = drbd_prepare_command(peer_device, sock);
1455*4882a593Smuzhiyun if (!p)
1456*4882a593Smuzhiyun return -EIO;
1457*4882a593Smuzhiyun p->sector = cpu_to_be64(sector);
1458*4882a593Smuzhiyun p->block_id = ID_SYNCER /* unused */;
1459*4882a593Smuzhiyun p->blksize = cpu_to_be32(size);
1460*4882a593Smuzhiyun return drbd_send_command(peer_device, sock, cmd, sizeof(*p), digest, digest_size);
1461*4882a593Smuzhiyun }
1462*4882a593Smuzhiyun
drbd_send_ov_request(struct drbd_peer_device * peer_device,sector_t sector,int size)1463*4882a593Smuzhiyun int drbd_send_ov_request(struct drbd_peer_device *peer_device, sector_t sector, int size)
1464*4882a593Smuzhiyun {
1465*4882a593Smuzhiyun struct drbd_socket *sock;
1466*4882a593Smuzhiyun struct p_block_req *p;
1467*4882a593Smuzhiyun
1468*4882a593Smuzhiyun sock = &peer_device->connection->data;
1469*4882a593Smuzhiyun p = drbd_prepare_command(peer_device, sock);
1470*4882a593Smuzhiyun if (!p)
1471*4882a593Smuzhiyun return -EIO;
1472*4882a593Smuzhiyun p->sector = cpu_to_be64(sector);
1473*4882a593Smuzhiyun p->block_id = ID_SYNCER /* unused */;
1474*4882a593Smuzhiyun p->blksize = cpu_to_be32(size);
1475*4882a593Smuzhiyun return drbd_send_command(peer_device, sock, P_OV_REQUEST, sizeof(*p), NULL, 0);
1476*4882a593Smuzhiyun }
1477*4882a593Smuzhiyun
1478*4882a593Smuzhiyun /* called on sndtimeo
1479*4882a593Smuzhiyun * returns false if we should retry,
1480*4882a593Smuzhiyun * true if we think connection is dead
1481*4882a593Smuzhiyun */
we_should_drop_the_connection(struct drbd_connection * connection,struct socket * sock)1482*4882a593Smuzhiyun static int we_should_drop_the_connection(struct drbd_connection *connection, struct socket *sock)
1483*4882a593Smuzhiyun {
1484*4882a593Smuzhiyun int drop_it;
1485*4882a593Smuzhiyun /* long elapsed = (long)(jiffies - device->last_received); */
1486*4882a593Smuzhiyun
1487*4882a593Smuzhiyun drop_it = connection->meta.socket == sock
1488*4882a593Smuzhiyun || !connection->ack_receiver.task
1489*4882a593Smuzhiyun || get_t_state(&connection->ack_receiver) != RUNNING
1490*4882a593Smuzhiyun || connection->cstate < C_WF_REPORT_PARAMS;
1491*4882a593Smuzhiyun
1492*4882a593Smuzhiyun if (drop_it)
1493*4882a593Smuzhiyun return true;
1494*4882a593Smuzhiyun
1495*4882a593Smuzhiyun drop_it = !--connection->ko_count;
1496*4882a593Smuzhiyun if (!drop_it) {
1497*4882a593Smuzhiyun drbd_err(connection, "[%s/%d] sock_sendmsg time expired, ko = %u\n",
1498*4882a593Smuzhiyun current->comm, current->pid, connection->ko_count);
1499*4882a593Smuzhiyun request_ping(connection);
1500*4882a593Smuzhiyun }
1501*4882a593Smuzhiyun
1502*4882a593Smuzhiyun return drop_it; /* && (device->state == R_PRIMARY) */;
1503*4882a593Smuzhiyun }
1504*4882a593Smuzhiyun
drbd_update_congested(struct drbd_connection * connection)1505*4882a593Smuzhiyun static void drbd_update_congested(struct drbd_connection *connection)
1506*4882a593Smuzhiyun {
1507*4882a593Smuzhiyun struct sock *sk = connection->data.socket->sk;
1508*4882a593Smuzhiyun if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5)
1509*4882a593Smuzhiyun set_bit(NET_CONGESTED, &connection->flags);
1510*4882a593Smuzhiyun }
1511*4882a593Smuzhiyun
1512*4882a593Smuzhiyun /* The idea of sendpage seems to be to put some kind of reference
1513*4882a593Smuzhiyun * to the page into the skb, and to hand it over to the NIC. In
1514*4882a593Smuzhiyun * this process get_page() gets called.
1515*4882a593Smuzhiyun *
1516*4882a593Smuzhiyun * As soon as the page was really sent over the network put_page()
1517*4882a593Smuzhiyun * gets called by some part of the network layer. [ NIC driver? ]
1518*4882a593Smuzhiyun *
1519*4882a593Smuzhiyun * [ get_page() / put_page() increment/decrement the count. If count
1520*4882a593Smuzhiyun * reaches 0 the page will be freed. ]
1521*4882a593Smuzhiyun *
1522*4882a593Smuzhiyun * This works nicely with pages from FSs.
1523*4882a593Smuzhiyun * But this means that in protocol A we might signal IO completion too early!
1524*4882a593Smuzhiyun *
1525*4882a593Smuzhiyun * In order not to corrupt data during a resync we must make sure
1526*4882a593Smuzhiyun * that we do not reuse our own buffer pages (EEs) to early, therefore
1527*4882a593Smuzhiyun * we have the net_ee list.
1528*4882a593Smuzhiyun *
1529*4882a593Smuzhiyun * XFS seems to have problems, still, it submits pages with page_count == 0!
1530*4882a593Smuzhiyun * As a workaround, we disable sendpage on pages
1531*4882a593Smuzhiyun * with page_count == 0 or PageSlab.
1532*4882a593Smuzhiyun */
_drbd_no_send_page(struct drbd_peer_device * peer_device,struct page * page,int offset,size_t size,unsigned msg_flags)1533*4882a593Smuzhiyun static int _drbd_no_send_page(struct drbd_peer_device *peer_device, struct page *page,
1534*4882a593Smuzhiyun int offset, size_t size, unsigned msg_flags)
1535*4882a593Smuzhiyun {
1536*4882a593Smuzhiyun struct socket *socket;
1537*4882a593Smuzhiyun void *addr;
1538*4882a593Smuzhiyun int err;
1539*4882a593Smuzhiyun
1540*4882a593Smuzhiyun socket = peer_device->connection->data.socket;
1541*4882a593Smuzhiyun addr = kmap(page) + offset;
1542*4882a593Smuzhiyun err = drbd_send_all(peer_device->connection, socket, addr, size, msg_flags);
1543*4882a593Smuzhiyun kunmap(page);
1544*4882a593Smuzhiyun if (!err)
1545*4882a593Smuzhiyun peer_device->device->send_cnt += size >> 9;
1546*4882a593Smuzhiyun return err;
1547*4882a593Smuzhiyun }
1548*4882a593Smuzhiyun
_drbd_send_page(struct drbd_peer_device * peer_device,struct page * page,int offset,size_t size,unsigned msg_flags)1549*4882a593Smuzhiyun static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *page,
1550*4882a593Smuzhiyun int offset, size_t size, unsigned msg_flags)
1551*4882a593Smuzhiyun {
1552*4882a593Smuzhiyun struct socket *socket = peer_device->connection->data.socket;
1553*4882a593Smuzhiyun int len = size;
1554*4882a593Smuzhiyun int err = -EIO;
1555*4882a593Smuzhiyun
1556*4882a593Smuzhiyun /* e.g. XFS meta- & log-data is in slab pages, which have a
1557*4882a593Smuzhiyun * page_count of 0 and/or have PageSlab() set.
1558*4882a593Smuzhiyun * we cannot use send_page for those, as that does get_page();
1559*4882a593Smuzhiyun * put_page(); and would cause either a VM_BUG directly, or
1560*4882a593Smuzhiyun * __page_cache_release a page that would actually still be referenced
1561*4882a593Smuzhiyun * by someone, leading to some obscure delayed Oops somewhere else. */
1562*4882a593Smuzhiyun if (drbd_disable_sendpage || !sendpage_ok(page))
1563*4882a593Smuzhiyun return _drbd_no_send_page(peer_device, page, offset, size, msg_flags);
1564*4882a593Smuzhiyun
1565*4882a593Smuzhiyun msg_flags |= MSG_NOSIGNAL;
1566*4882a593Smuzhiyun drbd_update_congested(peer_device->connection);
1567*4882a593Smuzhiyun do {
1568*4882a593Smuzhiyun int sent;
1569*4882a593Smuzhiyun
1570*4882a593Smuzhiyun sent = socket->ops->sendpage(socket, page, offset, len, msg_flags);
1571*4882a593Smuzhiyun if (sent <= 0) {
1572*4882a593Smuzhiyun if (sent == -EAGAIN) {
1573*4882a593Smuzhiyun if (we_should_drop_the_connection(peer_device->connection, socket))
1574*4882a593Smuzhiyun break;
1575*4882a593Smuzhiyun continue;
1576*4882a593Smuzhiyun }
1577*4882a593Smuzhiyun drbd_warn(peer_device->device, "%s: size=%d len=%d sent=%d\n",
1578*4882a593Smuzhiyun __func__, (int)size, len, sent);
1579*4882a593Smuzhiyun if (sent < 0)
1580*4882a593Smuzhiyun err = sent;
1581*4882a593Smuzhiyun break;
1582*4882a593Smuzhiyun }
1583*4882a593Smuzhiyun len -= sent;
1584*4882a593Smuzhiyun offset += sent;
1585*4882a593Smuzhiyun } while (len > 0 /* THINK && device->cstate >= C_CONNECTED*/);
1586*4882a593Smuzhiyun clear_bit(NET_CONGESTED, &peer_device->connection->flags);
1587*4882a593Smuzhiyun
1588*4882a593Smuzhiyun if (len == 0) {
1589*4882a593Smuzhiyun err = 0;
1590*4882a593Smuzhiyun peer_device->device->send_cnt += size >> 9;
1591*4882a593Smuzhiyun }
1592*4882a593Smuzhiyun return err;
1593*4882a593Smuzhiyun }
1594*4882a593Smuzhiyun
_drbd_send_bio(struct drbd_peer_device * peer_device,struct bio * bio)1595*4882a593Smuzhiyun static int _drbd_send_bio(struct drbd_peer_device *peer_device, struct bio *bio)
1596*4882a593Smuzhiyun {
1597*4882a593Smuzhiyun struct bio_vec bvec;
1598*4882a593Smuzhiyun struct bvec_iter iter;
1599*4882a593Smuzhiyun
1600*4882a593Smuzhiyun /* hint all but last page with MSG_MORE */
1601*4882a593Smuzhiyun bio_for_each_segment(bvec, bio, iter) {
1602*4882a593Smuzhiyun int err;
1603*4882a593Smuzhiyun
1604*4882a593Smuzhiyun err = _drbd_no_send_page(peer_device, bvec.bv_page,
1605*4882a593Smuzhiyun bvec.bv_offset, bvec.bv_len,
1606*4882a593Smuzhiyun bio_iter_last(bvec, iter)
1607*4882a593Smuzhiyun ? 0 : MSG_MORE);
1608*4882a593Smuzhiyun if (err)
1609*4882a593Smuzhiyun return err;
1610*4882a593Smuzhiyun /* REQ_OP_WRITE_SAME has only one segment */
1611*4882a593Smuzhiyun if (bio_op(bio) == REQ_OP_WRITE_SAME)
1612*4882a593Smuzhiyun break;
1613*4882a593Smuzhiyun }
1614*4882a593Smuzhiyun return 0;
1615*4882a593Smuzhiyun }
1616*4882a593Smuzhiyun
_drbd_send_zc_bio(struct drbd_peer_device * peer_device,struct bio * bio)1617*4882a593Smuzhiyun static int _drbd_send_zc_bio(struct drbd_peer_device *peer_device, struct bio *bio)
1618*4882a593Smuzhiyun {
1619*4882a593Smuzhiyun struct bio_vec bvec;
1620*4882a593Smuzhiyun struct bvec_iter iter;
1621*4882a593Smuzhiyun
1622*4882a593Smuzhiyun /* hint all but last page with MSG_MORE */
1623*4882a593Smuzhiyun bio_for_each_segment(bvec, bio, iter) {
1624*4882a593Smuzhiyun int err;
1625*4882a593Smuzhiyun
1626*4882a593Smuzhiyun err = _drbd_send_page(peer_device, bvec.bv_page,
1627*4882a593Smuzhiyun bvec.bv_offset, bvec.bv_len,
1628*4882a593Smuzhiyun bio_iter_last(bvec, iter) ? 0 : MSG_MORE);
1629*4882a593Smuzhiyun if (err)
1630*4882a593Smuzhiyun return err;
1631*4882a593Smuzhiyun /* REQ_OP_WRITE_SAME has only one segment */
1632*4882a593Smuzhiyun if (bio_op(bio) == REQ_OP_WRITE_SAME)
1633*4882a593Smuzhiyun break;
1634*4882a593Smuzhiyun }
1635*4882a593Smuzhiyun return 0;
1636*4882a593Smuzhiyun }
1637*4882a593Smuzhiyun
_drbd_send_zc_ee(struct drbd_peer_device * peer_device,struct drbd_peer_request * peer_req)1638*4882a593Smuzhiyun static int _drbd_send_zc_ee(struct drbd_peer_device *peer_device,
1639*4882a593Smuzhiyun struct drbd_peer_request *peer_req)
1640*4882a593Smuzhiyun {
1641*4882a593Smuzhiyun struct page *page = peer_req->pages;
1642*4882a593Smuzhiyun unsigned len = peer_req->i.size;
1643*4882a593Smuzhiyun int err;
1644*4882a593Smuzhiyun
1645*4882a593Smuzhiyun /* hint all but last page with MSG_MORE */
1646*4882a593Smuzhiyun page_chain_for_each(page) {
1647*4882a593Smuzhiyun unsigned l = min_t(unsigned, len, PAGE_SIZE);
1648*4882a593Smuzhiyun
1649*4882a593Smuzhiyun err = _drbd_send_page(peer_device, page, 0, l,
1650*4882a593Smuzhiyun page_chain_next(page) ? MSG_MORE : 0);
1651*4882a593Smuzhiyun if (err)
1652*4882a593Smuzhiyun return err;
1653*4882a593Smuzhiyun len -= l;
1654*4882a593Smuzhiyun }
1655*4882a593Smuzhiyun return 0;
1656*4882a593Smuzhiyun }
1657*4882a593Smuzhiyun
bio_flags_to_wire(struct drbd_connection * connection,struct bio * bio)1658*4882a593Smuzhiyun static u32 bio_flags_to_wire(struct drbd_connection *connection,
1659*4882a593Smuzhiyun struct bio *bio)
1660*4882a593Smuzhiyun {
1661*4882a593Smuzhiyun if (connection->agreed_pro_version >= 95)
1662*4882a593Smuzhiyun return (bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0) |
1663*4882a593Smuzhiyun (bio->bi_opf & REQ_FUA ? DP_FUA : 0) |
1664*4882a593Smuzhiyun (bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) |
1665*4882a593Smuzhiyun (bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) |
1666*4882a593Smuzhiyun (bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0) |
1667*4882a593Smuzhiyun (bio_op(bio) == REQ_OP_WRITE_ZEROES ?
1668*4882a593Smuzhiyun ((connection->agreed_features & DRBD_FF_WZEROES) ?
1669*4882a593Smuzhiyun (DP_ZEROES |(!(bio->bi_opf & REQ_NOUNMAP) ? DP_DISCARD : 0))
1670*4882a593Smuzhiyun : DP_DISCARD)
1671*4882a593Smuzhiyun : 0);
1672*4882a593Smuzhiyun else
1673*4882a593Smuzhiyun return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0;
1674*4882a593Smuzhiyun }
1675*4882a593Smuzhiyun
1676*4882a593Smuzhiyun /* Used to send write or TRIM aka REQ_OP_DISCARD requests
1677*4882a593Smuzhiyun * R_PRIMARY -> Peer (P_DATA, P_TRIM)
1678*4882a593Smuzhiyun */
drbd_send_dblock(struct drbd_peer_device * peer_device,struct drbd_request * req)1679*4882a593Smuzhiyun int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *req)
1680*4882a593Smuzhiyun {
1681*4882a593Smuzhiyun struct drbd_device *device = peer_device->device;
1682*4882a593Smuzhiyun struct drbd_socket *sock;
1683*4882a593Smuzhiyun struct p_data *p;
1684*4882a593Smuzhiyun struct p_wsame *wsame = NULL;
1685*4882a593Smuzhiyun void *digest_out;
1686*4882a593Smuzhiyun unsigned int dp_flags = 0;
1687*4882a593Smuzhiyun int digest_size;
1688*4882a593Smuzhiyun int err;
1689*4882a593Smuzhiyun
1690*4882a593Smuzhiyun sock = &peer_device->connection->data;
1691*4882a593Smuzhiyun p = drbd_prepare_command(peer_device, sock);
1692*4882a593Smuzhiyun digest_size = peer_device->connection->integrity_tfm ?
1693*4882a593Smuzhiyun crypto_shash_digestsize(peer_device->connection->integrity_tfm) : 0;
1694*4882a593Smuzhiyun
1695*4882a593Smuzhiyun if (!p)
1696*4882a593Smuzhiyun return -EIO;
1697*4882a593Smuzhiyun p->sector = cpu_to_be64(req->i.sector);
1698*4882a593Smuzhiyun p->block_id = (unsigned long)req;
1699*4882a593Smuzhiyun p->seq_num = cpu_to_be32(atomic_inc_return(&device->packet_seq));
1700*4882a593Smuzhiyun dp_flags = bio_flags_to_wire(peer_device->connection, req->master_bio);
1701*4882a593Smuzhiyun if (device->state.conn >= C_SYNC_SOURCE &&
1702*4882a593Smuzhiyun device->state.conn <= C_PAUSED_SYNC_T)
1703*4882a593Smuzhiyun dp_flags |= DP_MAY_SET_IN_SYNC;
1704*4882a593Smuzhiyun if (peer_device->connection->agreed_pro_version >= 100) {
1705*4882a593Smuzhiyun if (req->rq_state & RQ_EXP_RECEIVE_ACK)
1706*4882a593Smuzhiyun dp_flags |= DP_SEND_RECEIVE_ACK;
1707*4882a593Smuzhiyun /* During resync, request an explicit write ack,
1708*4882a593Smuzhiyun * even in protocol != C */
1709*4882a593Smuzhiyun if (req->rq_state & RQ_EXP_WRITE_ACK
1710*4882a593Smuzhiyun || (dp_flags & DP_MAY_SET_IN_SYNC))
1711*4882a593Smuzhiyun dp_flags |= DP_SEND_WRITE_ACK;
1712*4882a593Smuzhiyun }
1713*4882a593Smuzhiyun p->dp_flags = cpu_to_be32(dp_flags);
1714*4882a593Smuzhiyun
1715*4882a593Smuzhiyun if (dp_flags & (DP_DISCARD|DP_ZEROES)) {
1716*4882a593Smuzhiyun enum drbd_packet cmd = (dp_flags & DP_ZEROES) ? P_ZEROES : P_TRIM;
1717*4882a593Smuzhiyun struct p_trim *t = (struct p_trim*)p;
1718*4882a593Smuzhiyun t->size = cpu_to_be32(req->i.size);
1719*4882a593Smuzhiyun err = __send_command(peer_device->connection, device->vnr, sock, cmd, sizeof(*t), NULL, 0);
1720*4882a593Smuzhiyun goto out;
1721*4882a593Smuzhiyun }
1722*4882a593Smuzhiyun if (dp_flags & DP_WSAME) {
1723*4882a593Smuzhiyun /* this will only work if DRBD_FF_WSAME is set AND the
1724*4882a593Smuzhiyun * handshake agreed that all nodes and backend devices are
1725*4882a593Smuzhiyun * WRITE_SAME capable and agree on logical_block_size */
1726*4882a593Smuzhiyun wsame = (struct p_wsame*)p;
1727*4882a593Smuzhiyun digest_out = wsame + 1;
1728*4882a593Smuzhiyun wsame->size = cpu_to_be32(req->i.size);
1729*4882a593Smuzhiyun } else
1730*4882a593Smuzhiyun digest_out = p + 1;
1731*4882a593Smuzhiyun
1732*4882a593Smuzhiyun /* our digest is still only over the payload.
1733*4882a593Smuzhiyun * TRIM does not carry any payload. */
1734*4882a593Smuzhiyun if (digest_size)
1735*4882a593Smuzhiyun drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, digest_out);
1736*4882a593Smuzhiyun if (wsame) {
1737*4882a593Smuzhiyun err =
1738*4882a593Smuzhiyun __send_command(peer_device->connection, device->vnr, sock, P_WSAME,
1739*4882a593Smuzhiyun sizeof(*wsame) + digest_size, NULL,
1740*4882a593Smuzhiyun bio_iovec(req->master_bio).bv_len);
1741*4882a593Smuzhiyun } else
1742*4882a593Smuzhiyun err =
1743*4882a593Smuzhiyun __send_command(peer_device->connection, device->vnr, sock, P_DATA,
1744*4882a593Smuzhiyun sizeof(*p) + digest_size, NULL, req->i.size);
1745*4882a593Smuzhiyun if (!err) {
1746*4882a593Smuzhiyun /* For protocol A, we have to memcpy the payload into
1747*4882a593Smuzhiyun * socket buffers, as we may complete right away
1748*4882a593Smuzhiyun * as soon as we handed it over to tcp, at which point the data
1749*4882a593Smuzhiyun * pages may become invalid.
1750*4882a593Smuzhiyun *
1751*4882a593Smuzhiyun * For data-integrity enabled, we copy it as well, so we can be
1752*4882a593Smuzhiyun * sure that even if the bio pages may still be modified, it
1753*4882a593Smuzhiyun * won't change the data on the wire, thus if the digest checks
1754*4882a593Smuzhiyun * out ok after sending on this side, but does not fit on the
1755*4882a593Smuzhiyun * receiving side, we sure have detected corruption elsewhere.
1756*4882a593Smuzhiyun */
1757*4882a593Smuzhiyun if (!(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK)) || digest_size)
1758*4882a593Smuzhiyun err = _drbd_send_bio(peer_device, req->master_bio);
1759*4882a593Smuzhiyun else
1760*4882a593Smuzhiyun err = _drbd_send_zc_bio(peer_device, req->master_bio);
1761*4882a593Smuzhiyun
1762*4882a593Smuzhiyun /* double check digest, sometimes buffers have been modified in flight. */
1763*4882a593Smuzhiyun if (digest_size > 0 && digest_size <= 64) {
1764*4882a593Smuzhiyun /* 64 byte, 512 bit, is the largest digest size
1765*4882a593Smuzhiyun * currently supported in kernel crypto. */
1766*4882a593Smuzhiyun unsigned char digest[64];
1767*4882a593Smuzhiyun drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, digest);
1768*4882a593Smuzhiyun if (memcmp(p + 1, digest, digest_size)) {
1769*4882a593Smuzhiyun drbd_warn(device,
1770*4882a593Smuzhiyun "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
1771*4882a593Smuzhiyun (unsigned long long)req->i.sector, req->i.size);
1772*4882a593Smuzhiyun }
1773*4882a593Smuzhiyun } /* else if (digest_size > 64) {
1774*4882a593Smuzhiyun ... Be noisy about digest too large ...
1775*4882a593Smuzhiyun } */
1776*4882a593Smuzhiyun }
1777*4882a593Smuzhiyun out:
1778*4882a593Smuzhiyun mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */
1779*4882a593Smuzhiyun
1780*4882a593Smuzhiyun return err;
1781*4882a593Smuzhiyun }
1782*4882a593Smuzhiyun
1783*4882a593Smuzhiyun /* answer packet, used to send data back for read requests:
1784*4882a593Smuzhiyun * Peer -> (diskless) R_PRIMARY (P_DATA_REPLY)
1785*4882a593Smuzhiyun * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY)
1786*4882a593Smuzhiyun */
drbd_send_block(struct drbd_peer_device * peer_device,enum drbd_packet cmd,struct drbd_peer_request * peer_req)1787*4882a593Smuzhiyun int drbd_send_block(struct drbd_peer_device *peer_device, enum drbd_packet cmd,
1788*4882a593Smuzhiyun struct drbd_peer_request *peer_req)
1789*4882a593Smuzhiyun {
1790*4882a593Smuzhiyun struct drbd_device *device = peer_device->device;
1791*4882a593Smuzhiyun struct drbd_socket *sock;
1792*4882a593Smuzhiyun struct p_data *p;
1793*4882a593Smuzhiyun int err;
1794*4882a593Smuzhiyun int digest_size;
1795*4882a593Smuzhiyun
1796*4882a593Smuzhiyun sock = &peer_device->connection->data;
1797*4882a593Smuzhiyun p = drbd_prepare_command(peer_device, sock);
1798*4882a593Smuzhiyun
1799*4882a593Smuzhiyun digest_size = peer_device->connection->integrity_tfm ?
1800*4882a593Smuzhiyun crypto_shash_digestsize(peer_device->connection->integrity_tfm) : 0;
1801*4882a593Smuzhiyun
1802*4882a593Smuzhiyun if (!p)
1803*4882a593Smuzhiyun return -EIO;
1804*4882a593Smuzhiyun p->sector = cpu_to_be64(peer_req->i.sector);
1805*4882a593Smuzhiyun p->block_id = peer_req->block_id;
1806*4882a593Smuzhiyun p->seq_num = 0; /* unused */
1807*4882a593Smuzhiyun p->dp_flags = 0;
1808*4882a593Smuzhiyun if (digest_size)
1809*4882a593Smuzhiyun drbd_csum_ee(peer_device->connection->integrity_tfm, peer_req, p + 1);
1810*4882a593Smuzhiyun err = __send_command(peer_device->connection, device->vnr, sock, cmd, sizeof(*p) + digest_size, NULL, peer_req->i.size);
1811*4882a593Smuzhiyun if (!err)
1812*4882a593Smuzhiyun err = _drbd_send_zc_ee(peer_device, peer_req);
1813*4882a593Smuzhiyun mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */
1814*4882a593Smuzhiyun
1815*4882a593Smuzhiyun return err;
1816*4882a593Smuzhiyun }
1817*4882a593Smuzhiyun
drbd_send_out_of_sync(struct drbd_peer_device * peer_device,struct drbd_request * req)1818*4882a593Smuzhiyun int drbd_send_out_of_sync(struct drbd_peer_device *peer_device, struct drbd_request *req)
1819*4882a593Smuzhiyun {
1820*4882a593Smuzhiyun struct drbd_socket *sock;
1821*4882a593Smuzhiyun struct p_block_desc *p;
1822*4882a593Smuzhiyun
1823*4882a593Smuzhiyun sock = &peer_device->connection->data;
1824*4882a593Smuzhiyun p = drbd_prepare_command(peer_device, sock);
1825*4882a593Smuzhiyun if (!p)
1826*4882a593Smuzhiyun return -EIO;
1827*4882a593Smuzhiyun p->sector = cpu_to_be64(req->i.sector);
1828*4882a593Smuzhiyun p->blksize = cpu_to_be32(req->i.size);
1829*4882a593Smuzhiyun return drbd_send_command(peer_device, sock, P_OUT_OF_SYNC, sizeof(*p), NULL, 0);
1830*4882a593Smuzhiyun }
1831*4882a593Smuzhiyun
1832*4882a593Smuzhiyun /*
1833*4882a593Smuzhiyun drbd_send distinguishes two cases:
1834*4882a593Smuzhiyun
1835*4882a593Smuzhiyun Packets sent via the data socket "sock"
1836*4882a593Smuzhiyun and packets sent via the meta data socket "msock"
1837*4882a593Smuzhiyun
1838*4882a593Smuzhiyun sock msock
1839*4882a593Smuzhiyun -----------------+-------------------------+------------------------------
1840*4882a593Smuzhiyun timeout conf.timeout / 2 conf.timeout / 2
1841*4882a593Smuzhiyun timeout action send a ping via msock Abort communication
1842*4882a593Smuzhiyun and close all sockets
1843*4882a593Smuzhiyun */
1844*4882a593Smuzhiyun
1845*4882a593Smuzhiyun /*
1846*4882a593Smuzhiyun * you must have down()ed the appropriate [m]sock_mutex elsewhere!
1847*4882a593Smuzhiyun */
drbd_send(struct drbd_connection * connection,struct socket * sock,void * buf,size_t size,unsigned msg_flags)1848*4882a593Smuzhiyun int drbd_send(struct drbd_connection *connection, struct socket *sock,
1849*4882a593Smuzhiyun void *buf, size_t size, unsigned msg_flags)
1850*4882a593Smuzhiyun {
1851*4882a593Smuzhiyun struct kvec iov = {.iov_base = buf, .iov_len = size};
1852*4882a593Smuzhiyun struct msghdr msg = {.msg_flags = msg_flags | MSG_NOSIGNAL};
1853*4882a593Smuzhiyun int rv, sent = 0;
1854*4882a593Smuzhiyun
1855*4882a593Smuzhiyun if (!sock)
1856*4882a593Smuzhiyun return -EBADR;
1857*4882a593Smuzhiyun
1858*4882a593Smuzhiyun /* THINK if (signal_pending) return ... ? */
1859*4882a593Smuzhiyun
1860*4882a593Smuzhiyun iov_iter_kvec(&msg.msg_iter, WRITE, &iov, 1, size);
1861*4882a593Smuzhiyun
1862*4882a593Smuzhiyun if (sock == connection->data.socket) {
1863*4882a593Smuzhiyun rcu_read_lock();
1864*4882a593Smuzhiyun connection->ko_count = rcu_dereference(connection->net_conf)->ko_count;
1865*4882a593Smuzhiyun rcu_read_unlock();
1866*4882a593Smuzhiyun drbd_update_congested(connection);
1867*4882a593Smuzhiyun }
1868*4882a593Smuzhiyun do {
1869*4882a593Smuzhiyun rv = sock_sendmsg(sock, &msg);
1870*4882a593Smuzhiyun if (rv == -EAGAIN) {
1871*4882a593Smuzhiyun if (we_should_drop_the_connection(connection, sock))
1872*4882a593Smuzhiyun break;
1873*4882a593Smuzhiyun else
1874*4882a593Smuzhiyun continue;
1875*4882a593Smuzhiyun }
1876*4882a593Smuzhiyun if (rv == -EINTR) {
1877*4882a593Smuzhiyun flush_signals(current);
1878*4882a593Smuzhiyun rv = 0;
1879*4882a593Smuzhiyun }
1880*4882a593Smuzhiyun if (rv < 0)
1881*4882a593Smuzhiyun break;
1882*4882a593Smuzhiyun sent += rv;
1883*4882a593Smuzhiyun } while (sent < size);
1884*4882a593Smuzhiyun
1885*4882a593Smuzhiyun if (sock == connection->data.socket)
1886*4882a593Smuzhiyun clear_bit(NET_CONGESTED, &connection->flags);
1887*4882a593Smuzhiyun
1888*4882a593Smuzhiyun if (rv <= 0) {
1889*4882a593Smuzhiyun if (rv != -EAGAIN) {
1890*4882a593Smuzhiyun drbd_err(connection, "%s_sendmsg returned %d\n",
1891*4882a593Smuzhiyun sock == connection->meta.socket ? "msock" : "sock",
1892*4882a593Smuzhiyun rv);
1893*4882a593Smuzhiyun conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
1894*4882a593Smuzhiyun } else
1895*4882a593Smuzhiyun conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
1896*4882a593Smuzhiyun }
1897*4882a593Smuzhiyun
1898*4882a593Smuzhiyun return sent;
1899*4882a593Smuzhiyun }
1900*4882a593Smuzhiyun
1901*4882a593Smuzhiyun /**
1902*4882a593Smuzhiyun * drbd_send_all - Send an entire buffer
1903*4882a593Smuzhiyun *
1904*4882a593Smuzhiyun * Returns 0 upon success and a negative error value otherwise.
1905*4882a593Smuzhiyun */
drbd_send_all(struct drbd_connection * connection,struct socket * sock,void * buffer,size_t size,unsigned msg_flags)1906*4882a593Smuzhiyun int drbd_send_all(struct drbd_connection *connection, struct socket *sock, void *buffer,
1907*4882a593Smuzhiyun size_t size, unsigned msg_flags)
1908*4882a593Smuzhiyun {
1909*4882a593Smuzhiyun int err;
1910*4882a593Smuzhiyun
1911*4882a593Smuzhiyun err = drbd_send(connection, sock, buffer, size, msg_flags);
1912*4882a593Smuzhiyun if (err < 0)
1913*4882a593Smuzhiyun return err;
1914*4882a593Smuzhiyun if (err != size)
1915*4882a593Smuzhiyun return -EIO;
1916*4882a593Smuzhiyun return 0;
1917*4882a593Smuzhiyun }
1918*4882a593Smuzhiyun
drbd_open(struct block_device * bdev,fmode_t mode)1919*4882a593Smuzhiyun static int drbd_open(struct block_device *bdev, fmode_t mode)
1920*4882a593Smuzhiyun {
1921*4882a593Smuzhiyun struct drbd_device *device = bdev->bd_disk->private_data;
1922*4882a593Smuzhiyun unsigned long flags;
1923*4882a593Smuzhiyun int rv = 0;
1924*4882a593Smuzhiyun
1925*4882a593Smuzhiyun mutex_lock(&drbd_main_mutex);
1926*4882a593Smuzhiyun spin_lock_irqsave(&device->resource->req_lock, flags);
1927*4882a593Smuzhiyun /* to have a stable device->state.role
1928*4882a593Smuzhiyun * and no race with updating open_cnt */
1929*4882a593Smuzhiyun
1930*4882a593Smuzhiyun if (device->state.role != R_PRIMARY) {
1931*4882a593Smuzhiyun if (mode & FMODE_WRITE)
1932*4882a593Smuzhiyun rv = -EROFS;
1933*4882a593Smuzhiyun else if (!drbd_allow_oos)
1934*4882a593Smuzhiyun rv = -EMEDIUMTYPE;
1935*4882a593Smuzhiyun }
1936*4882a593Smuzhiyun
1937*4882a593Smuzhiyun if (!rv)
1938*4882a593Smuzhiyun device->open_cnt++;
1939*4882a593Smuzhiyun spin_unlock_irqrestore(&device->resource->req_lock, flags);
1940*4882a593Smuzhiyun mutex_unlock(&drbd_main_mutex);
1941*4882a593Smuzhiyun
1942*4882a593Smuzhiyun return rv;
1943*4882a593Smuzhiyun }
1944*4882a593Smuzhiyun
drbd_release(struct gendisk * gd,fmode_t mode)1945*4882a593Smuzhiyun static void drbd_release(struct gendisk *gd, fmode_t mode)
1946*4882a593Smuzhiyun {
1947*4882a593Smuzhiyun struct drbd_device *device = gd->private_data;
1948*4882a593Smuzhiyun mutex_lock(&drbd_main_mutex);
1949*4882a593Smuzhiyun device->open_cnt--;
1950*4882a593Smuzhiyun mutex_unlock(&drbd_main_mutex);
1951*4882a593Smuzhiyun }
1952*4882a593Smuzhiyun
1953*4882a593Smuzhiyun /* need to hold resource->req_lock */
drbd_queue_unplug(struct drbd_device * device)1954*4882a593Smuzhiyun void drbd_queue_unplug(struct drbd_device *device)
1955*4882a593Smuzhiyun {
1956*4882a593Smuzhiyun if (device->state.pdsk >= D_INCONSISTENT && device->state.conn >= C_CONNECTED) {
1957*4882a593Smuzhiyun D_ASSERT(device, device->state.role == R_PRIMARY);
1958*4882a593Smuzhiyun if (test_and_clear_bit(UNPLUG_REMOTE, &device->flags)) {
1959*4882a593Smuzhiyun drbd_queue_work_if_unqueued(
1960*4882a593Smuzhiyun &first_peer_device(device)->connection->sender_work,
1961*4882a593Smuzhiyun &device->unplug_work);
1962*4882a593Smuzhiyun }
1963*4882a593Smuzhiyun }
1964*4882a593Smuzhiyun }
1965*4882a593Smuzhiyun
drbd_set_defaults(struct drbd_device * device)1966*4882a593Smuzhiyun static void drbd_set_defaults(struct drbd_device *device)
1967*4882a593Smuzhiyun {
1968*4882a593Smuzhiyun /* Beware! The actual layout differs
1969*4882a593Smuzhiyun * between big endian and little endian */
1970*4882a593Smuzhiyun device->state = (union drbd_dev_state) {
1971*4882a593Smuzhiyun { .role = R_SECONDARY,
1972*4882a593Smuzhiyun .peer = R_UNKNOWN,
1973*4882a593Smuzhiyun .conn = C_STANDALONE,
1974*4882a593Smuzhiyun .disk = D_DISKLESS,
1975*4882a593Smuzhiyun .pdsk = D_UNKNOWN,
1976*4882a593Smuzhiyun } };
1977*4882a593Smuzhiyun }
1978*4882a593Smuzhiyun
drbd_init_set_defaults(struct drbd_device * device)1979*4882a593Smuzhiyun void drbd_init_set_defaults(struct drbd_device *device)
1980*4882a593Smuzhiyun {
1981*4882a593Smuzhiyun /* the memset(,0,) did most of this.
1982*4882a593Smuzhiyun * note: only assignments, no allocation in here */
1983*4882a593Smuzhiyun
1984*4882a593Smuzhiyun drbd_set_defaults(device);
1985*4882a593Smuzhiyun
1986*4882a593Smuzhiyun atomic_set(&device->ap_bio_cnt, 0);
1987*4882a593Smuzhiyun atomic_set(&device->ap_actlog_cnt, 0);
1988*4882a593Smuzhiyun atomic_set(&device->ap_pending_cnt, 0);
1989*4882a593Smuzhiyun atomic_set(&device->rs_pending_cnt, 0);
1990*4882a593Smuzhiyun atomic_set(&device->unacked_cnt, 0);
1991*4882a593Smuzhiyun atomic_set(&device->local_cnt, 0);
1992*4882a593Smuzhiyun atomic_set(&device->pp_in_use_by_net, 0);
1993*4882a593Smuzhiyun atomic_set(&device->rs_sect_in, 0);
1994*4882a593Smuzhiyun atomic_set(&device->rs_sect_ev, 0);
1995*4882a593Smuzhiyun atomic_set(&device->ap_in_flight, 0);
1996*4882a593Smuzhiyun atomic_set(&device->md_io.in_use, 0);
1997*4882a593Smuzhiyun
1998*4882a593Smuzhiyun mutex_init(&device->own_state_mutex);
1999*4882a593Smuzhiyun device->state_mutex = &device->own_state_mutex;
2000*4882a593Smuzhiyun
2001*4882a593Smuzhiyun spin_lock_init(&device->al_lock);
2002*4882a593Smuzhiyun spin_lock_init(&device->peer_seq_lock);
2003*4882a593Smuzhiyun
2004*4882a593Smuzhiyun INIT_LIST_HEAD(&device->active_ee);
2005*4882a593Smuzhiyun INIT_LIST_HEAD(&device->sync_ee);
2006*4882a593Smuzhiyun INIT_LIST_HEAD(&device->done_ee);
2007*4882a593Smuzhiyun INIT_LIST_HEAD(&device->read_ee);
2008*4882a593Smuzhiyun INIT_LIST_HEAD(&device->net_ee);
2009*4882a593Smuzhiyun INIT_LIST_HEAD(&device->resync_reads);
2010*4882a593Smuzhiyun INIT_LIST_HEAD(&device->resync_work.list);
2011*4882a593Smuzhiyun INIT_LIST_HEAD(&device->unplug_work.list);
2012*4882a593Smuzhiyun INIT_LIST_HEAD(&device->bm_io_work.w.list);
2013*4882a593Smuzhiyun INIT_LIST_HEAD(&device->pending_master_completion[0]);
2014*4882a593Smuzhiyun INIT_LIST_HEAD(&device->pending_master_completion[1]);
2015*4882a593Smuzhiyun INIT_LIST_HEAD(&device->pending_completion[0]);
2016*4882a593Smuzhiyun INIT_LIST_HEAD(&device->pending_completion[1]);
2017*4882a593Smuzhiyun
2018*4882a593Smuzhiyun device->resync_work.cb = w_resync_timer;
2019*4882a593Smuzhiyun device->unplug_work.cb = w_send_write_hint;
2020*4882a593Smuzhiyun device->bm_io_work.w.cb = w_bitmap_io;
2021*4882a593Smuzhiyun
2022*4882a593Smuzhiyun timer_setup(&device->resync_timer, resync_timer_fn, 0);
2023*4882a593Smuzhiyun timer_setup(&device->md_sync_timer, md_sync_timer_fn, 0);
2024*4882a593Smuzhiyun timer_setup(&device->start_resync_timer, start_resync_timer_fn, 0);
2025*4882a593Smuzhiyun timer_setup(&device->request_timer, request_timer_fn, 0);
2026*4882a593Smuzhiyun
2027*4882a593Smuzhiyun init_waitqueue_head(&device->misc_wait);
2028*4882a593Smuzhiyun init_waitqueue_head(&device->state_wait);
2029*4882a593Smuzhiyun init_waitqueue_head(&device->ee_wait);
2030*4882a593Smuzhiyun init_waitqueue_head(&device->al_wait);
2031*4882a593Smuzhiyun init_waitqueue_head(&device->seq_wait);
2032*4882a593Smuzhiyun
2033*4882a593Smuzhiyun device->resync_wenr = LC_FREE;
2034*4882a593Smuzhiyun device->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
2035*4882a593Smuzhiyun device->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
2036*4882a593Smuzhiyun }
2037*4882a593Smuzhiyun
drbd_set_my_capacity(struct drbd_device * device,sector_t size)2038*4882a593Smuzhiyun void drbd_set_my_capacity(struct drbd_device *device, sector_t size)
2039*4882a593Smuzhiyun {
2040*4882a593Smuzhiyun char ppb[10];
2041*4882a593Smuzhiyun
2042*4882a593Smuzhiyun set_capacity(device->vdisk, size);
2043*4882a593Smuzhiyun revalidate_disk_size(device->vdisk, false);
2044*4882a593Smuzhiyun
2045*4882a593Smuzhiyun drbd_info(device, "size = %s (%llu KB)\n",
2046*4882a593Smuzhiyun ppsize(ppb, size>>1), (unsigned long long)size>>1);
2047*4882a593Smuzhiyun }
2048*4882a593Smuzhiyun
drbd_device_cleanup(struct drbd_device * device)2049*4882a593Smuzhiyun void drbd_device_cleanup(struct drbd_device *device)
2050*4882a593Smuzhiyun {
2051*4882a593Smuzhiyun int i;
2052*4882a593Smuzhiyun if (first_peer_device(device)->connection->receiver.t_state != NONE)
2053*4882a593Smuzhiyun drbd_err(device, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
2054*4882a593Smuzhiyun first_peer_device(device)->connection->receiver.t_state);
2055*4882a593Smuzhiyun
2056*4882a593Smuzhiyun device->al_writ_cnt =
2057*4882a593Smuzhiyun device->bm_writ_cnt =
2058*4882a593Smuzhiyun device->read_cnt =
2059*4882a593Smuzhiyun device->recv_cnt =
2060*4882a593Smuzhiyun device->send_cnt =
2061*4882a593Smuzhiyun device->writ_cnt =
2062*4882a593Smuzhiyun device->p_size =
2063*4882a593Smuzhiyun device->rs_start =
2064*4882a593Smuzhiyun device->rs_total =
2065*4882a593Smuzhiyun device->rs_failed = 0;
2066*4882a593Smuzhiyun device->rs_last_events = 0;
2067*4882a593Smuzhiyun device->rs_last_sect_ev = 0;
2068*4882a593Smuzhiyun for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2069*4882a593Smuzhiyun device->rs_mark_left[i] = 0;
2070*4882a593Smuzhiyun device->rs_mark_time[i] = 0;
2071*4882a593Smuzhiyun }
2072*4882a593Smuzhiyun D_ASSERT(device, first_peer_device(device)->connection->net_conf == NULL);
2073*4882a593Smuzhiyun
2074*4882a593Smuzhiyun set_capacity(device->vdisk, 0);
2075*4882a593Smuzhiyun revalidate_disk_size(device->vdisk, false);
2076*4882a593Smuzhiyun if (device->bitmap) {
2077*4882a593Smuzhiyun /* maybe never allocated. */
2078*4882a593Smuzhiyun drbd_bm_resize(device, 0, 1);
2079*4882a593Smuzhiyun drbd_bm_cleanup(device);
2080*4882a593Smuzhiyun }
2081*4882a593Smuzhiyun
2082*4882a593Smuzhiyun drbd_backing_dev_free(device, device->ldev);
2083*4882a593Smuzhiyun device->ldev = NULL;
2084*4882a593Smuzhiyun
2085*4882a593Smuzhiyun clear_bit(AL_SUSPENDED, &device->flags);
2086*4882a593Smuzhiyun
2087*4882a593Smuzhiyun D_ASSERT(device, list_empty(&device->active_ee));
2088*4882a593Smuzhiyun D_ASSERT(device, list_empty(&device->sync_ee));
2089*4882a593Smuzhiyun D_ASSERT(device, list_empty(&device->done_ee));
2090*4882a593Smuzhiyun D_ASSERT(device, list_empty(&device->read_ee));
2091*4882a593Smuzhiyun D_ASSERT(device, list_empty(&device->net_ee));
2092*4882a593Smuzhiyun D_ASSERT(device, list_empty(&device->resync_reads));
2093*4882a593Smuzhiyun D_ASSERT(device, list_empty(&first_peer_device(device)->connection->sender_work.q));
2094*4882a593Smuzhiyun D_ASSERT(device, list_empty(&device->resync_work.list));
2095*4882a593Smuzhiyun D_ASSERT(device, list_empty(&device->unplug_work.list));
2096*4882a593Smuzhiyun
2097*4882a593Smuzhiyun drbd_set_defaults(device);
2098*4882a593Smuzhiyun }
2099*4882a593Smuzhiyun
2100*4882a593Smuzhiyun
drbd_destroy_mempools(void)2101*4882a593Smuzhiyun static void drbd_destroy_mempools(void)
2102*4882a593Smuzhiyun {
2103*4882a593Smuzhiyun struct page *page;
2104*4882a593Smuzhiyun
2105*4882a593Smuzhiyun while (drbd_pp_pool) {
2106*4882a593Smuzhiyun page = drbd_pp_pool;
2107*4882a593Smuzhiyun drbd_pp_pool = (struct page *)page_private(page);
2108*4882a593Smuzhiyun __free_page(page);
2109*4882a593Smuzhiyun drbd_pp_vacant--;
2110*4882a593Smuzhiyun }
2111*4882a593Smuzhiyun
2112*4882a593Smuzhiyun /* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */
2113*4882a593Smuzhiyun
2114*4882a593Smuzhiyun bioset_exit(&drbd_io_bio_set);
2115*4882a593Smuzhiyun bioset_exit(&drbd_md_io_bio_set);
2116*4882a593Smuzhiyun mempool_exit(&drbd_md_io_page_pool);
2117*4882a593Smuzhiyun mempool_exit(&drbd_ee_mempool);
2118*4882a593Smuzhiyun mempool_exit(&drbd_request_mempool);
2119*4882a593Smuzhiyun kmem_cache_destroy(drbd_ee_cache);
2120*4882a593Smuzhiyun kmem_cache_destroy(drbd_request_cache);
2121*4882a593Smuzhiyun kmem_cache_destroy(drbd_bm_ext_cache);
2122*4882a593Smuzhiyun kmem_cache_destroy(drbd_al_ext_cache);
2123*4882a593Smuzhiyun
2124*4882a593Smuzhiyun drbd_ee_cache = NULL;
2125*4882a593Smuzhiyun drbd_request_cache = NULL;
2126*4882a593Smuzhiyun drbd_bm_ext_cache = NULL;
2127*4882a593Smuzhiyun drbd_al_ext_cache = NULL;
2128*4882a593Smuzhiyun
2129*4882a593Smuzhiyun return;
2130*4882a593Smuzhiyun }
2131*4882a593Smuzhiyun
drbd_create_mempools(void)2132*4882a593Smuzhiyun static int drbd_create_mempools(void)
2133*4882a593Smuzhiyun {
2134*4882a593Smuzhiyun struct page *page;
2135*4882a593Smuzhiyun const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count;
2136*4882a593Smuzhiyun int i, ret;
2137*4882a593Smuzhiyun
2138*4882a593Smuzhiyun /* caches */
2139*4882a593Smuzhiyun drbd_request_cache = kmem_cache_create(
2140*4882a593Smuzhiyun "drbd_req", sizeof(struct drbd_request), 0, 0, NULL);
2141*4882a593Smuzhiyun if (drbd_request_cache == NULL)
2142*4882a593Smuzhiyun goto Enomem;
2143*4882a593Smuzhiyun
2144*4882a593Smuzhiyun drbd_ee_cache = kmem_cache_create(
2145*4882a593Smuzhiyun "drbd_ee", sizeof(struct drbd_peer_request), 0, 0, NULL);
2146*4882a593Smuzhiyun if (drbd_ee_cache == NULL)
2147*4882a593Smuzhiyun goto Enomem;
2148*4882a593Smuzhiyun
2149*4882a593Smuzhiyun drbd_bm_ext_cache = kmem_cache_create(
2150*4882a593Smuzhiyun "drbd_bm", sizeof(struct bm_extent), 0, 0, NULL);
2151*4882a593Smuzhiyun if (drbd_bm_ext_cache == NULL)
2152*4882a593Smuzhiyun goto Enomem;
2153*4882a593Smuzhiyun
2154*4882a593Smuzhiyun drbd_al_ext_cache = kmem_cache_create(
2155*4882a593Smuzhiyun "drbd_al", sizeof(struct lc_element), 0, 0, NULL);
2156*4882a593Smuzhiyun if (drbd_al_ext_cache == NULL)
2157*4882a593Smuzhiyun goto Enomem;
2158*4882a593Smuzhiyun
2159*4882a593Smuzhiyun /* mempools */
2160*4882a593Smuzhiyun ret = bioset_init(&drbd_io_bio_set, BIO_POOL_SIZE, 0, 0);
2161*4882a593Smuzhiyun if (ret)
2162*4882a593Smuzhiyun goto Enomem;
2163*4882a593Smuzhiyun
2164*4882a593Smuzhiyun ret = bioset_init(&drbd_md_io_bio_set, DRBD_MIN_POOL_PAGES, 0,
2165*4882a593Smuzhiyun BIOSET_NEED_BVECS);
2166*4882a593Smuzhiyun if (ret)
2167*4882a593Smuzhiyun goto Enomem;
2168*4882a593Smuzhiyun
2169*4882a593Smuzhiyun ret = mempool_init_page_pool(&drbd_md_io_page_pool, DRBD_MIN_POOL_PAGES, 0);
2170*4882a593Smuzhiyun if (ret)
2171*4882a593Smuzhiyun goto Enomem;
2172*4882a593Smuzhiyun
2173*4882a593Smuzhiyun ret = mempool_init_slab_pool(&drbd_request_mempool, number,
2174*4882a593Smuzhiyun drbd_request_cache);
2175*4882a593Smuzhiyun if (ret)
2176*4882a593Smuzhiyun goto Enomem;
2177*4882a593Smuzhiyun
2178*4882a593Smuzhiyun ret = mempool_init_slab_pool(&drbd_ee_mempool, number, drbd_ee_cache);
2179*4882a593Smuzhiyun if (ret)
2180*4882a593Smuzhiyun goto Enomem;
2181*4882a593Smuzhiyun
2182*4882a593Smuzhiyun /* drbd's page pool */
2183*4882a593Smuzhiyun spin_lock_init(&drbd_pp_lock);
2184*4882a593Smuzhiyun
2185*4882a593Smuzhiyun for (i = 0; i < number; i++) {
2186*4882a593Smuzhiyun page = alloc_page(GFP_HIGHUSER);
2187*4882a593Smuzhiyun if (!page)
2188*4882a593Smuzhiyun goto Enomem;
2189*4882a593Smuzhiyun set_page_private(page, (unsigned long)drbd_pp_pool);
2190*4882a593Smuzhiyun drbd_pp_pool = page;
2191*4882a593Smuzhiyun }
2192*4882a593Smuzhiyun drbd_pp_vacant = number;
2193*4882a593Smuzhiyun
2194*4882a593Smuzhiyun return 0;
2195*4882a593Smuzhiyun
2196*4882a593Smuzhiyun Enomem:
2197*4882a593Smuzhiyun drbd_destroy_mempools(); /* in case we allocated some */
2198*4882a593Smuzhiyun return -ENOMEM;
2199*4882a593Smuzhiyun }
2200*4882a593Smuzhiyun
drbd_release_all_peer_reqs(struct drbd_device * device)2201*4882a593Smuzhiyun static void drbd_release_all_peer_reqs(struct drbd_device *device)
2202*4882a593Smuzhiyun {
2203*4882a593Smuzhiyun int rr;
2204*4882a593Smuzhiyun
2205*4882a593Smuzhiyun rr = drbd_free_peer_reqs(device, &device->active_ee);
2206*4882a593Smuzhiyun if (rr)
2207*4882a593Smuzhiyun drbd_err(device, "%d EEs in active list found!\n", rr);
2208*4882a593Smuzhiyun
2209*4882a593Smuzhiyun rr = drbd_free_peer_reqs(device, &device->sync_ee);
2210*4882a593Smuzhiyun if (rr)
2211*4882a593Smuzhiyun drbd_err(device, "%d EEs in sync list found!\n", rr);
2212*4882a593Smuzhiyun
2213*4882a593Smuzhiyun rr = drbd_free_peer_reqs(device, &device->read_ee);
2214*4882a593Smuzhiyun if (rr)
2215*4882a593Smuzhiyun drbd_err(device, "%d EEs in read list found!\n", rr);
2216*4882a593Smuzhiyun
2217*4882a593Smuzhiyun rr = drbd_free_peer_reqs(device, &device->done_ee);
2218*4882a593Smuzhiyun if (rr)
2219*4882a593Smuzhiyun drbd_err(device, "%d EEs in done list found!\n", rr);
2220*4882a593Smuzhiyun
2221*4882a593Smuzhiyun rr = drbd_free_peer_reqs(device, &device->net_ee);
2222*4882a593Smuzhiyun if (rr)
2223*4882a593Smuzhiyun drbd_err(device, "%d EEs in net list found!\n", rr);
2224*4882a593Smuzhiyun }
2225*4882a593Smuzhiyun
2226*4882a593Smuzhiyun /* caution. no locking. */
drbd_destroy_device(struct kref * kref)2227*4882a593Smuzhiyun void drbd_destroy_device(struct kref *kref)
2228*4882a593Smuzhiyun {
2229*4882a593Smuzhiyun struct drbd_device *device = container_of(kref, struct drbd_device, kref);
2230*4882a593Smuzhiyun struct drbd_resource *resource = device->resource;
2231*4882a593Smuzhiyun struct drbd_peer_device *peer_device, *tmp_peer_device;
2232*4882a593Smuzhiyun
2233*4882a593Smuzhiyun del_timer_sync(&device->request_timer);
2234*4882a593Smuzhiyun
2235*4882a593Smuzhiyun /* paranoia asserts */
2236*4882a593Smuzhiyun D_ASSERT(device, device->open_cnt == 0);
2237*4882a593Smuzhiyun /* end paranoia asserts */
2238*4882a593Smuzhiyun
2239*4882a593Smuzhiyun /* cleanup stuff that may have been allocated during
2240*4882a593Smuzhiyun * device (re-)configuration or state changes */
2241*4882a593Smuzhiyun
2242*4882a593Smuzhiyun drbd_backing_dev_free(device, device->ldev);
2243*4882a593Smuzhiyun device->ldev = NULL;
2244*4882a593Smuzhiyun
2245*4882a593Smuzhiyun drbd_release_all_peer_reqs(device);
2246*4882a593Smuzhiyun
2247*4882a593Smuzhiyun lc_destroy(device->act_log);
2248*4882a593Smuzhiyun lc_destroy(device->resync);
2249*4882a593Smuzhiyun
2250*4882a593Smuzhiyun kfree(device->p_uuid);
2251*4882a593Smuzhiyun /* device->p_uuid = NULL; */
2252*4882a593Smuzhiyun
2253*4882a593Smuzhiyun if (device->bitmap) /* should no longer be there. */
2254*4882a593Smuzhiyun drbd_bm_cleanup(device);
2255*4882a593Smuzhiyun __free_page(device->md_io.page);
2256*4882a593Smuzhiyun put_disk(device->vdisk);
2257*4882a593Smuzhiyun blk_cleanup_queue(device->rq_queue);
2258*4882a593Smuzhiyun kfree(device->rs_plan_s);
2259*4882a593Smuzhiyun
2260*4882a593Smuzhiyun /* not for_each_connection(connection, resource):
2261*4882a593Smuzhiyun * those may have been cleaned up and disassociated already.
2262*4882a593Smuzhiyun */
2263*4882a593Smuzhiyun for_each_peer_device_safe(peer_device, tmp_peer_device, device) {
2264*4882a593Smuzhiyun kref_put(&peer_device->connection->kref, drbd_destroy_connection);
2265*4882a593Smuzhiyun kfree(peer_device);
2266*4882a593Smuzhiyun }
2267*4882a593Smuzhiyun memset(device, 0xfd, sizeof(*device));
2268*4882a593Smuzhiyun kfree(device);
2269*4882a593Smuzhiyun kref_put(&resource->kref, drbd_destroy_resource);
2270*4882a593Smuzhiyun }
2271*4882a593Smuzhiyun
2272*4882a593Smuzhiyun /* One global retry thread, if we need to push back some bio and have it
2273*4882a593Smuzhiyun * reinserted through our make request function.
2274*4882a593Smuzhiyun */
2275*4882a593Smuzhiyun static struct retry_worker {
2276*4882a593Smuzhiyun struct workqueue_struct *wq;
2277*4882a593Smuzhiyun struct work_struct worker;
2278*4882a593Smuzhiyun
2279*4882a593Smuzhiyun spinlock_t lock;
2280*4882a593Smuzhiyun struct list_head writes;
2281*4882a593Smuzhiyun } retry;
2282*4882a593Smuzhiyun
do_retry(struct work_struct * ws)2283*4882a593Smuzhiyun static void do_retry(struct work_struct *ws)
2284*4882a593Smuzhiyun {
2285*4882a593Smuzhiyun struct retry_worker *retry = container_of(ws, struct retry_worker, worker);
2286*4882a593Smuzhiyun LIST_HEAD(writes);
2287*4882a593Smuzhiyun struct drbd_request *req, *tmp;
2288*4882a593Smuzhiyun
2289*4882a593Smuzhiyun spin_lock_irq(&retry->lock);
2290*4882a593Smuzhiyun list_splice_init(&retry->writes, &writes);
2291*4882a593Smuzhiyun spin_unlock_irq(&retry->lock);
2292*4882a593Smuzhiyun
2293*4882a593Smuzhiyun list_for_each_entry_safe(req, tmp, &writes, tl_requests) {
2294*4882a593Smuzhiyun struct drbd_device *device = req->device;
2295*4882a593Smuzhiyun struct bio *bio = req->master_bio;
2296*4882a593Smuzhiyun unsigned long start_jif = req->start_jif;
2297*4882a593Smuzhiyun bool expected;
2298*4882a593Smuzhiyun
2299*4882a593Smuzhiyun expected =
2300*4882a593Smuzhiyun expect(atomic_read(&req->completion_ref) == 0) &&
2301*4882a593Smuzhiyun expect(req->rq_state & RQ_POSTPONED) &&
2302*4882a593Smuzhiyun expect((req->rq_state & RQ_LOCAL_PENDING) == 0 ||
2303*4882a593Smuzhiyun (req->rq_state & RQ_LOCAL_ABORTED) != 0);
2304*4882a593Smuzhiyun
2305*4882a593Smuzhiyun if (!expected)
2306*4882a593Smuzhiyun drbd_err(device, "req=%p completion_ref=%d rq_state=%x\n",
2307*4882a593Smuzhiyun req, atomic_read(&req->completion_ref),
2308*4882a593Smuzhiyun req->rq_state);
2309*4882a593Smuzhiyun
2310*4882a593Smuzhiyun /* We still need to put one kref associated with the
2311*4882a593Smuzhiyun * "completion_ref" going zero in the code path that queued it
2312*4882a593Smuzhiyun * here. The request object may still be referenced by a
2313*4882a593Smuzhiyun * frozen local req->private_bio, in case we force-detached.
2314*4882a593Smuzhiyun */
2315*4882a593Smuzhiyun kref_put(&req->kref, drbd_req_destroy);
2316*4882a593Smuzhiyun
2317*4882a593Smuzhiyun /* A single suspended or otherwise blocking device may stall
2318*4882a593Smuzhiyun * all others as well. Fortunately, this code path is to
2319*4882a593Smuzhiyun * recover from a situation that "should not happen":
2320*4882a593Smuzhiyun * concurrent writes in multi-primary setup.
2321*4882a593Smuzhiyun * In a "normal" lifecycle, this workqueue is supposed to be
2322*4882a593Smuzhiyun * destroyed without ever doing anything.
2323*4882a593Smuzhiyun * If it turns out to be an issue anyways, we can do per
2324*4882a593Smuzhiyun * resource (replication group) or per device (minor) retry
2325*4882a593Smuzhiyun * workqueues instead.
2326*4882a593Smuzhiyun */
2327*4882a593Smuzhiyun
2328*4882a593Smuzhiyun /* We are not just doing submit_bio_noacct(),
2329*4882a593Smuzhiyun * as we want to keep the start_time information. */
2330*4882a593Smuzhiyun inc_ap_bio(device);
2331*4882a593Smuzhiyun __drbd_make_request(device, bio, start_jif);
2332*4882a593Smuzhiyun }
2333*4882a593Smuzhiyun }
2334*4882a593Smuzhiyun
2335*4882a593Smuzhiyun /* called via drbd_req_put_completion_ref(),
2336*4882a593Smuzhiyun * holds resource->req_lock */
drbd_restart_request(struct drbd_request * req)2337*4882a593Smuzhiyun void drbd_restart_request(struct drbd_request *req)
2338*4882a593Smuzhiyun {
2339*4882a593Smuzhiyun unsigned long flags;
2340*4882a593Smuzhiyun spin_lock_irqsave(&retry.lock, flags);
2341*4882a593Smuzhiyun list_move_tail(&req->tl_requests, &retry.writes);
2342*4882a593Smuzhiyun spin_unlock_irqrestore(&retry.lock, flags);
2343*4882a593Smuzhiyun
2344*4882a593Smuzhiyun /* Drop the extra reference that would otherwise
2345*4882a593Smuzhiyun * have been dropped by complete_master_bio.
2346*4882a593Smuzhiyun * do_retry() needs to grab a new one. */
2347*4882a593Smuzhiyun dec_ap_bio(req->device);
2348*4882a593Smuzhiyun
2349*4882a593Smuzhiyun queue_work(retry.wq, &retry.worker);
2350*4882a593Smuzhiyun }
2351*4882a593Smuzhiyun
drbd_destroy_resource(struct kref * kref)2352*4882a593Smuzhiyun void drbd_destroy_resource(struct kref *kref)
2353*4882a593Smuzhiyun {
2354*4882a593Smuzhiyun struct drbd_resource *resource =
2355*4882a593Smuzhiyun container_of(kref, struct drbd_resource, kref);
2356*4882a593Smuzhiyun
2357*4882a593Smuzhiyun idr_destroy(&resource->devices);
2358*4882a593Smuzhiyun free_cpumask_var(resource->cpu_mask);
2359*4882a593Smuzhiyun kfree(resource->name);
2360*4882a593Smuzhiyun memset(resource, 0xf2, sizeof(*resource));
2361*4882a593Smuzhiyun kfree(resource);
2362*4882a593Smuzhiyun }
2363*4882a593Smuzhiyun
drbd_free_resource(struct drbd_resource * resource)2364*4882a593Smuzhiyun void drbd_free_resource(struct drbd_resource *resource)
2365*4882a593Smuzhiyun {
2366*4882a593Smuzhiyun struct drbd_connection *connection, *tmp;
2367*4882a593Smuzhiyun
2368*4882a593Smuzhiyun for_each_connection_safe(connection, tmp, resource) {
2369*4882a593Smuzhiyun list_del(&connection->connections);
2370*4882a593Smuzhiyun drbd_debugfs_connection_cleanup(connection);
2371*4882a593Smuzhiyun kref_put(&connection->kref, drbd_destroy_connection);
2372*4882a593Smuzhiyun }
2373*4882a593Smuzhiyun drbd_debugfs_resource_cleanup(resource);
2374*4882a593Smuzhiyun kref_put(&resource->kref, drbd_destroy_resource);
2375*4882a593Smuzhiyun }
2376*4882a593Smuzhiyun
drbd_cleanup(void)2377*4882a593Smuzhiyun static void drbd_cleanup(void)
2378*4882a593Smuzhiyun {
2379*4882a593Smuzhiyun unsigned int i;
2380*4882a593Smuzhiyun struct drbd_device *device;
2381*4882a593Smuzhiyun struct drbd_resource *resource, *tmp;
2382*4882a593Smuzhiyun
2383*4882a593Smuzhiyun /* first remove proc,
2384*4882a593Smuzhiyun * drbdsetup uses it's presence to detect
2385*4882a593Smuzhiyun * whether DRBD is loaded.
2386*4882a593Smuzhiyun * If we would get stuck in proc removal,
2387*4882a593Smuzhiyun * but have netlink already deregistered,
2388*4882a593Smuzhiyun * some drbdsetup commands may wait forever
2389*4882a593Smuzhiyun * for an answer.
2390*4882a593Smuzhiyun */
2391*4882a593Smuzhiyun if (drbd_proc)
2392*4882a593Smuzhiyun remove_proc_entry("drbd", NULL);
2393*4882a593Smuzhiyun
2394*4882a593Smuzhiyun if (retry.wq)
2395*4882a593Smuzhiyun destroy_workqueue(retry.wq);
2396*4882a593Smuzhiyun
2397*4882a593Smuzhiyun drbd_genl_unregister();
2398*4882a593Smuzhiyun
2399*4882a593Smuzhiyun idr_for_each_entry(&drbd_devices, device, i)
2400*4882a593Smuzhiyun drbd_delete_device(device);
2401*4882a593Smuzhiyun
2402*4882a593Smuzhiyun /* not _rcu since, no other updater anymore. Genl already unregistered */
2403*4882a593Smuzhiyun for_each_resource_safe(resource, tmp, &drbd_resources) {
2404*4882a593Smuzhiyun list_del(&resource->resources);
2405*4882a593Smuzhiyun drbd_free_resource(resource);
2406*4882a593Smuzhiyun }
2407*4882a593Smuzhiyun
2408*4882a593Smuzhiyun drbd_debugfs_cleanup();
2409*4882a593Smuzhiyun
2410*4882a593Smuzhiyun drbd_destroy_mempools();
2411*4882a593Smuzhiyun unregister_blkdev(DRBD_MAJOR, "drbd");
2412*4882a593Smuzhiyun
2413*4882a593Smuzhiyun idr_destroy(&drbd_devices);
2414*4882a593Smuzhiyun
2415*4882a593Smuzhiyun pr_info("module cleanup done.\n");
2416*4882a593Smuzhiyun }
2417*4882a593Smuzhiyun
drbd_init_workqueue(struct drbd_work_queue * wq)2418*4882a593Smuzhiyun static void drbd_init_workqueue(struct drbd_work_queue* wq)
2419*4882a593Smuzhiyun {
2420*4882a593Smuzhiyun spin_lock_init(&wq->q_lock);
2421*4882a593Smuzhiyun INIT_LIST_HEAD(&wq->q);
2422*4882a593Smuzhiyun init_waitqueue_head(&wq->q_wait);
2423*4882a593Smuzhiyun }
2424*4882a593Smuzhiyun
2425*4882a593Smuzhiyun struct completion_work {
2426*4882a593Smuzhiyun struct drbd_work w;
2427*4882a593Smuzhiyun struct completion done;
2428*4882a593Smuzhiyun };
2429*4882a593Smuzhiyun
w_complete(struct drbd_work * w,int cancel)2430*4882a593Smuzhiyun static int w_complete(struct drbd_work *w, int cancel)
2431*4882a593Smuzhiyun {
2432*4882a593Smuzhiyun struct completion_work *completion_work =
2433*4882a593Smuzhiyun container_of(w, struct completion_work, w);
2434*4882a593Smuzhiyun
2435*4882a593Smuzhiyun complete(&completion_work->done);
2436*4882a593Smuzhiyun return 0;
2437*4882a593Smuzhiyun }
2438*4882a593Smuzhiyun
drbd_flush_workqueue(struct drbd_work_queue * work_queue)2439*4882a593Smuzhiyun void drbd_flush_workqueue(struct drbd_work_queue *work_queue)
2440*4882a593Smuzhiyun {
2441*4882a593Smuzhiyun struct completion_work completion_work;
2442*4882a593Smuzhiyun
2443*4882a593Smuzhiyun completion_work.w.cb = w_complete;
2444*4882a593Smuzhiyun init_completion(&completion_work.done);
2445*4882a593Smuzhiyun drbd_queue_work(work_queue, &completion_work.w);
2446*4882a593Smuzhiyun wait_for_completion(&completion_work.done);
2447*4882a593Smuzhiyun }
2448*4882a593Smuzhiyun
drbd_find_resource(const char * name)2449*4882a593Smuzhiyun struct drbd_resource *drbd_find_resource(const char *name)
2450*4882a593Smuzhiyun {
2451*4882a593Smuzhiyun struct drbd_resource *resource;
2452*4882a593Smuzhiyun
2453*4882a593Smuzhiyun if (!name || !name[0])
2454*4882a593Smuzhiyun return NULL;
2455*4882a593Smuzhiyun
2456*4882a593Smuzhiyun rcu_read_lock();
2457*4882a593Smuzhiyun for_each_resource_rcu(resource, &drbd_resources) {
2458*4882a593Smuzhiyun if (!strcmp(resource->name, name)) {
2459*4882a593Smuzhiyun kref_get(&resource->kref);
2460*4882a593Smuzhiyun goto found;
2461*4882a593Smuzhiyun }
2462*4882a593Smuzhiyun }
2463*4882a593Smuzhiyun resource = NULL;
2464*4882a593Smuzhiyun found:
2465*4882a593Smuzhiyun rcu_read_unlock();
2466*4882a593Smuzhiyun return resource;
2467*4882a593Smuzhiyun }
2468*4882a593Smuzhiyun
conn_get_by_addrs(void * my_addr,int my_addr_len,void * peer_addr,int peer_addr_len)2469*4882a593Smuzhiyun struct drbd_connection *conn_get_by_addrs(void *my_addr, int my_addr_len,
2470*4882a593Smuzhiyun void *peer_addr, int peer_addr_len)
2471*4882a593Smuzhiyun {
2472*4882a593Smuzhiyun struct drbd_resource *resource;
2473*4882a593Smuzhiyun struct drbd_connection *connection;
2474*4882a593Smuzhiyun
2475*4882a593Smuzhiyun rcu_read_lock();
2476*4882a593Smuzhiyun for_each_resource_rcu(resource, &drbd_resources) {
2477*4882a593Smuzhiyun for_each_connection_rcu(connection, resource) {
2478*4882a593Smuzhiyun if (connection->my_addr_len == my_addr_len &&
2479*4882a593Smuzhiyun connection->peer_addr_len == peer_addr_len &&
2480*4882a593Smuzhiyun !memcmp(&connection->my_addr, my_addr, my_addr_len) &&
2481*4882a593Smuzhiyun !memcmp(&connection->peer_addr, peer_addr, peer_addr_len)) {
2482*4882a593Smuzhiyun kref_get(&connection->kref);
2483*4882a593Smuzhiyun goto found;
2484*4882a593Smuzhiyun }
2485*4882a593Smuzhiyun }
2486*4882a593Smuzhiyun }
2487*4882a593Smuzhiyun connection = NULL;
2488*4882a593Smuzhiyun found:
2489*4882a593Smuzhiyun rcu_read_unlock();
2490*4882a593Smuzhiyun return connection;
2491*4882a593Smuzhiyun }
2492*4882a593Smuzhiyun
drbd_alloc_socket(struct drbd_socket * socket)2493*4882a593Smuzhiyun static int drbd_alloc_socket(struct drbd_socket *socket)
2494*4882a593Smuzhiyun {
2495*4882a593Smuzhiyun socket->rbuf = (void *) __get_free_page(GFP_KERNEL);
2496*4882a593Smuzhiyun if (!socket->rbuf)
2497*4882a593Smuzhiyun return -ENOMEM;
2498*4882a593Smuzhiyun socket->sbuf = (void *) __get_free_page(GFP_KERNEL);
2499*4882a593Smuzhiyun if (!socket->sbuf)
2500*4882a593Smuzhiyun return -ENOMEM;
2501*4882a593Smuzhiyun return 0;
2502*4882a593Smuzhiyun }
2503*4882a593Smuzhiyun
drbd_free_socket(struct drbd_socket * socket)2504*4882a593Smuzhiyun static void drbd_free_socket(struct drbd_socket *socket)
2505*4882a593Smuzhiyun {
2506*4882a593Smuzhiyun free_page((unsigned long) socket->sbuf);
2507*4882a593Smuzhiyun free_page((unsigned long) socket->rbuf);
2508*4882a593Smuzhiyun }
2509*4882a593Smuzhiyun
conn_free_crypto(struct drbd_connection * connection)2510*4882a593Smuzhiyun void conn_free_crypto(struct drbd_connection *connection)
2511*4882a593Smuzhiyun {
2512*4882a593Smuzhiyun drbd_free_sock(connection);
2513*4882a593Smuzhiyun
2514*4882a593Smuzhiyun crypto_free_shash(connection->csums_tfm);
2515*4882a593Smuzhiyun crypto_free_shash(connection->verify_tfm);
2516*4882a593Smuzhiyun crypto_free_shash(connection->cram_hmac_tfm);
2517*4882a593Smuzhiyun crypto_free_shash(connection->integrity_tfm);
2518*4882a593Smuzhiyun crypto_free_shash(connection->peer_integrity_tfm);
2519*4882a593Smuzhiyun kfree(connection->int_dig_in);
2520*4882a593Smuzhiyun kfree(connection->int_dig_vv);
2521*4882a593Smuzhiyun
2522*4882a593Smuzhiyun connection->csums_tfm = NULL;
2523*4882a593Smuzhiyun connection->verify_tfm = NULL;
2524*4882a593Smuzhiyun connection->cram_hmac_tfm = NULL;
2525*4882a593Smuzhiyun connection->integrity_tfm = NULL;
2526*4882a593Smuzhiyun connection->peer_integrity_tfm = NULL;
2527*4882a593Smuzhiyun connection->int_dig_in = NULL;
2528*4882a593Smuzhiyun connection->int_dig_vv = NULL;
2529*4882a593Smuzhiyun }
2530*4882a593Smuzhiyun
set_resource_options(struct drbd_resource * resource,struct res_opts * res_opts)2531*4882a593Smuzhiyun int set_resource_options(struct drbd_resource *resource, struct res_opts *res_opts)
2532*4882a593Smuzhiyun {
2533*4882a593Smuzhiyun struct drbd_connection *connection;
2534*4882a593Smuzhiyun cpumask_var_t new_cpu_mask;
2535*4882a593Smuzhiyun int err;
2536*4882a593Smuzhiyun
2537*4882a593Smuzhiyun if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL))
2538*4882a593Smuzhiyun return -ENOMEM;
2539*4882a593Smuzhiyun
2540*4882a593Smuzhiyun /* silently ignore cpu mask on UP kernel */
2541*4882a593Smuzhiyun if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) {
2542*4882a593Smuzhiyun err = bitmap_parse(res_opts->cpu_mask, DRBD_CPU_MASK_SIZE,
2543*4882a593Smuzhiyun cpumask_bits(new_cpu_mask), nr_cpu_ids);
2544*4882a593Smuzhiyun if (err == -EOVERFLOW) {
2545*4882a593Smuzhiyun /* So what. mask it out. */
2546*4882a593Smuzhiyun cpumask_var_t tmp_cpu_mask;
2547*4882a593Smuzhiyun if (zalloc_cpumask_var(&tmp_cpu_mask, GFP_KERNEL)) {
2548*4882a593Smuzhiyun cpumask_setall(tmp_cpu_mask);
2549*4882a593Smuzhiyun cpumask_and(new_cpu_mask, new_cpu_mask, tmp_cpu_mask);
2550*4882a593Smuzhiyun drbd_warn(resource, "Overflow in bitmap_parse(%.12s%s), truncating to %u bits\n",
2551*4882a593Smuzhiyun res_opts->cpu_mask,
2552*4882a593Smuzhiyun strlen(res_opts->cpu_mask) > 12 ? "..." : "",
2553*4882a593Smuzhiyun nr_cpu_ids);
2554*4882a593Smuzhiyun free_cpumask_var(tmp_cpu_mask);
2555*4882a593Smuzhiyun err = 0;
2556*4882a593Smuzhiyun }
2557*4882a593Smuzhiyun }
2558*4882a593Smuzhiyun if (err) {
2559*4882a593Smuzhiyun drbd_warn(resource, "bitmap_parse() failed with %d\n", err);
2560*4882a593Smuzhiyun /* retcode = ERR_CPU_MASK_PARSE; */
2561*4882a593Smuzhiyun goto fail;
2562*4882a593Smuzhiyun }
2563*4882a593Smuzhiyun }
2564*4882a593Smuzhiyun resource->res_opts = *res_opts;
2565*4882a593Smuzhiyun if (cpumask_empty(new_cpu_mask))
2566*4882a593Smuzhiyun drbd_calc_cpu_mask(&new_cpu_mask);
2567*4882a593Smuzhiyun if (!cpumask_equal(resource->cpu_mask, new_cpu_mask)) {
2568*4882a593Smuzhiyun cpumask_copy(resource->cpu_mask, new_cpu_mask);
2569*4882a593Smuzhiyun for_each_connection_rcu(connection, resource) {
2570*4882a593Smuzhiyun connection->receiver.reset_cpu_mask = 1;
2571*4882a593Smuzhiyun connection->ack_receiver.reset_cpu_mask = 1;
2572*4882a593Smuzhiyun connection->worker.reset_cpu_mask = 1;
2573*4882a593Smuzhiyun }
2574*4882a593Smuzhiyun }
2575*4882a593Smuzhiyun err = 0;
2576*4882a593Smuzhiyun
2577*4882a593Smuzhiyun fail:
2578*4882a593Smuzhiyun free_cpumask_var(new_cpu_mask);
2579*4882a593Smuzhiyun return err;
2580*4882a593Smuzhiyun
2581*4882a593Smuzhiyun }
2582*4882a593Smuzhiyun
drbd_create_resource(const char * name)2583*4882a593Smuzhiyun struct drbd_resource *drbd_create_resource(const char *name)
2584*4882a593Smuzhiyun {
2585*4882a593Smuzhiyun struct drbd_resource *resource;
2586*4882a593Smuzhiyun
2587*4882a593Smuzhiyun resource = kzalloc(sizeof(struct drbd_resource), GFP_KERNEL);
2588*4882a593Smuzhiyun if (!resource)
2589*4882a593Smuzhiyun goto fail;
2590*4882a593Smuzhiyun resource->name = kstrdup(name, GFP_KERNEL);
2591*4882a593Smuzhiyun if (!resource->name)
2592*4882a593Smuzhiyun goto fail_free_resource;
2593*4882a593Smuzhiyun if (!zalloc_cpumask_var(&resource->cpu_mask, GFP_KERNEL))
2594*4882a593Smuzhiyun goto fail_free_name;
2595*4882a593Smuzhiyun kref_init(&resource->kref);
2596*4882a593Smuzhiyun idr_init(&resource->devices);
2597*4882a593Smuzhiyun INIT_LIST_HEAD(&resource->connections);
2598*4882a593Smuzhiyun resource->write_ordering = WO_BDEV_FLUSH;
2599*4882a593Smuzhiyun list_add_tail_rcu(&resource->resources, &drbd_resources);
2600*4882a593Smuzhiyun mutex_init(&resource->conf_update);
2601*4882a593Smuzhiyun mutex_init(&resource->adm_mutex);
2602*4882a593Smuzhiyun spin_lock_init(&resource->req_lock);
2603*4882a593Smuzhiyun drbd_debugfs_resource_add(resource);
2604*4882a593Smuzhiyun return resource;
2605*4882a593Smuzhiyun
2606*4882a593Smuzhiyun fail_free_name:
2607*4882a593Smuzhiyun kfree(resource->name);
2608*4882a593Smuzhiyun fail_free_resource:
2609*4882a593Smuzhiyun kfree(resource);
2610*4882a593Smuzhiyun fail:
2611*4882a593Smuzhiyun return NULL;
2612*4882a593Smuzhiyun }
2613*4882a593Smuzhiyun
2614*4882a593Smuzhiyun /* caller must be under adm_mutex */
conn_create(const char * name,struct res_opts * res_opts)2615*4882a593Smuzhiyun struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
2616*4882a593Smuzhiyun {
2617*4882a593Smuzhiyun struct drbd_resource *resource;
2618*4882a593Smuzhiyun struct drbd_connection *connection;
2619*4882a593Smuzhiyun
2620*4882a593Smuzhiyun connection = kzalloc(sizeof(struct drbd_connection), GFP_KERNEL);
2621*4882a593Smuzhiyun if (!connection)
2622*4882a593Smuzhiyun return NULL;
2623*4882a593Smuzhiyun
2624*4882a593Smuzhiyun if (drbd_alloc_socket(&connection->data))
2625*4882a593Smuzhiyun goto fail;
2626*4882a593Smuzhiyun if (drbd_alloc_socket(&connection->meta))
2627*4882a593Smuzhiyun goto fail;
2628*4882a593Smuzhiyun
2629*4882a593Smuzhiyun connection->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
2630*4882a593Smuzhiyun if (!connection->current_epoch)
2631*4882a593Smuzhiyun goto fail;
2632*4882a593Smuzhiyun
2633*4882a593Smuzhiyun INIT_LIST_HEAD(&connection->transfer_log);
2634*4882a593Smuzhiyun
2635*4882a593Smuzhiyun INIT_LIST_HEAD(&connection->current_epoch->list);
2636*4882a593Smuzhiyun connection->epochs = 1;
2637*4882a593Smuzhiyun spin_lock_init(&connection->epoch_lock);
2638*4882a593Smuzhiyun
2639*4882a593Smuzhiyun connection->send.seen_any_write_yet = false;
2640*4882a593Smuzhiyun connection->send.current_epoch_nr = 0;
2641*4882a593Smuzhiyun connection->send.current_epoch_writes = 0;
2642*4882a593Smuzhiyun
2643*4882a593Smuzhiyun resource = drbd_create_resource(name);
2644*4882a593Smuzhiyun if (!resource)
2645*4882a593Smuzhiyun goto fail;
2646*4882a593Smuzhiyun
2647*4882a593Smuzhiyun connection->cstate = C_STANDALONE;
2648*4882a593Smuzhiyun mutex_init(&connection->cstate_mutex);
2649*4882a593Smuzhiyun init_waitqueue_head(&connection->ping_wait);
2650*4882a593Smuzhiyun idr_init(&connection->peer_devices);
2651*4882a593Smuzhiyun
2652*4882a593Smuzhiyun drbd_init_workqueue(&connection->sender_work);
2653*4882a593Smuzhiyun mutex_init(&connection->data.mutex);
2654*4882a593Smuzhiyun mutex_init(&connection->meta.mutex);
2655*4882a593Smuzhiyun
2656*4882a593Smuzhiyun drbd_thread_init(resource, &connection->receiver, drbd_receiver, "receiver");
2657*4882a593Smuzhiyun connection->receiver.connection = connection;
2658*4882a593Smuzhiyun drbd_thread_init(resource, &connection->worker, drbd_worker, "worker");
2659*4882a593Smuzhiyun connection->worker.connection = connection;
2660*4882a593Smuzhiyun drbd_thread_init(resource, &connection->ack_receiver, drbd_ack_receiver, "ack_recv");
2661*4882a593Smuzhiyun connection->ack_receiver.connection = connection;
2662*4882a593Smuzhiyun
2663*4882a593Smuzhiyun kref_init(&connection->kref);
2664*4882a593Smuzhiyun
2665*4882a593Smuzhiyun connection->resource = resource;
2666*4882a593Smuzhiyun
2667*4882a593Smuzhiyun if (set_resource_options(resource, res_opts))
2668*4882a593Smuzhiyun goto fail_resource;
2669*4882a593Smuzhiyun
2670*4882a593Smuzhiyun kref_get(&resource->kref);
2671*4882a593Smuzhiyun list_add_tail_rcu(&connection->connections, &resource->connections);
2672*4882a593Smuzhiyun drbd_debugfs_connection_add(connection);
2673*4882a593Smuzhiyun return connection;
2674*4882a593Smuzhiyun
2675*4882a593Smuzhiyun fail_resource:
2676*4882a593Smuzhiyun list_del(&resource->resources);
2677*4882a593Smuzhiyun drbd_free_resource(resource);
2678*4882a593Smuzhiyun fail:
2679*4882a593Smuzhiyun kfree(connection->current_epoch);
2680*4882a593Smuzhiyun drbd_free_socket(&connection->meta);
2681*4882a593Smuzhiyun drbd_free_socket(&connection->data);
2682*4882a593Smuzhiyun kfree(connection);
2683*4882a593Smuzhiyun return NULL;
2684*4882a593Smuzhiyun }
2685*4882a593Smuzhiyun
drbd_destroy_connection(struct kref * kref)2686*4882a593Smuzhiyun void drbd_destroy_connection(struct kref *kref)
2687*4882a593Smuzhiyun {
2688*4882a593Smuzhiyun struct drbd_connection *connection = container_of(kref, struct drbd_connection, kref);
2689*4882a593Smuzhiyun struct drbd_resource *resource = connection->resource;
2690*4882a593Smuzhiyun
2691*4882a593Smuzhiyun if (atomic_read(&connection->current_epoch->epoch_size) != 0)
2692*4882a593Smuzhiyun drbd_err(connection, "epoch_size:%d\n", atomic_read(&connection->current_epoch->epoch_size));
2693*4882a593Smuzhiyun kfree(connection->current_epoch);
2694*4882a593Smuzhiyun
2695*4882a593Smuzhiyun idr_destroy(&connection->peer_devices);
2696*4882a593Smuzhiyun
2697*4882a593Smuzhiyun drbd_free_socket(&connection->meta);
2698*4882a593Smuzhiyun drbd_free_socket(&connection->data);
2699*4882a593Smuzhiyun kfree(connection->int_dig_in);
2700*4882a593Smuzhiyun kfree(connection->int_dig_vv);
2701*4882a593Smuzhiyun memset(connection, 0xfc, sizeof(*connection));
2702*4882a593Smuzhiyun kfree(connection);
2703*4882a593Smuzhiyun kref_put(&resource->kref, drbd_destroy_resource);
2704*4882a593Smuzhiyun }
2705*4882a593Smuzhiyun
init_submitter(struct drbd_device * device)2706*4882a593Smuzhiyun static int init_submitter(struct drbd_device *device)
2707*4882a593Smuzhiyun {
2708*4882a593Smuzhiyun /* opencoded create_singlethread_workqueue(),
2709*4882a593Smuzhiyun * to be able to say "drbd%d", ..., minor */
2710*4882a593Smuzhiyun device->submit.wq =
2711*4882a593Smuzhiyun alloc_ordered_workqueue("drbd%u_submit", WQ_MEM_RECLAIM, device->minor);
2712*4882a593Smuzhiyun if (!device->submit.wq)
2713*4882a593Smuzhiyun return -ENOMEM;
2714*4882a593Smuzhiyun
2715*4882a593Smuzhiyun INIT_WORK(&device->submit.worker, do_submit);
2716*4882a593Smuzhiyun INIT_LIST_HEAD(&device->submit.writes);
2717*4882a593Smuzhiyun return 0;
2718*4882a593Smuzhiyun }
2719*4882a593Smuzhiyun
drbd_create_device(struct drbd_config_context * adm_ctx,unsigned int minor)2720*4882a593Smuzhiyun enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor)
2721*4882a593Smuzhiyun {
2722*4882a593Smuzhiyun struct drbd_resource *resource = adm_ctx->resource;
2723*4882a593Smuzhiyun struct drbd_connection *connection, *n;
2724*4882a593Smuzhiyun struct drbd_device *device;
2725*4882a593Smuzhiyun struct drbd_peer_device *peer_device, *tmp_peer_device;
2726*4882a593Smuzhiyun struct gendisk *disk;
2727*4882a593Smuzhiyun struct request_queue *q;
2728*4882a593Smuzhiyun int id;
2729*4882a593Smuzhiyun int vnr = adm_ctx->volume;
2730*4882a593Smuzhiyun enum drbd_ret_code err = ERR_NOMEM;
2731*4882a593Smuzhiyun
2732*4882a593Smuzhiyun device = minor_to_device(minor);
2733*4882a593Smuzhiyun if (device)
2734*4882a593Smuzhiyun return ERR_MINOR_OR_VOLUME_EXISTS;
2735*4882a593Smuzhiyun
2736*4882a593Smuzhiyun /* GFP_KERNEL, we are outside of all write-out paths */
2737*4882a593Smuzhiyun device = kzalloc(sizeof(struct drbd_device), GFP_KERNEL);
2738*4882a593Smuzhiyun if (!device)
2739*4882a593Smuzhiyun return ERR_NOMEM;
2740*4882a593Smuzhiyun kref_init(&device->kref);
2741*4882a593Smuzhiyun
2742*4882a593Smuzhiyun kref_get(&resource->kref);
2743*4882a593Smuzhiyun device->resource = resource;
2744*4882a593Smuzhiyun device->minor = minor;
2745*4882a593Smuzhiyun device->vnr = vnr;
2746*4882a593Smuzhiyun
2747*4882a593Smuzhiyun drbd_init_set_defaults(device);
2748*4882a593Smuzhiyun
2749*4882a593Smuzhiyun q = blk_alloc_queue(NUMA_NO_NODE);
2750*4882a593Smuzhiyun if (!q)
2751*4882a593Smuzhiyun goto out_no_q;
2752*4882a593Smuzhiyun device->rq_queue = q;
2753*4882a593Smuzhiyun
2754*4882a593Smuzhiyun disk = alloc_disk(1);
2755*4882a593Smuzhiyun if (!disk)
2756*4882a593Smuzhiyun goto out_no_disk;
2757*4882a593Smuzhiyun device->vdisk = disk;
2758*4882a593Smuzhiyun
2759*4882a593Smuzhiyun set_disk_ro(disk, true);
2760*4882a593Smuzhiyun
2761*4882a593Smuzhiyun disk->queue = q;
2762*4882a593Smuzhiyun disk->major = DRBD_MAJOR;
2763*4882a593Smuzhiyun disk->first_minor = minor;
2764*4882a593Smuzhiyun disk->fops = &drbd_ops;
2765*4882a593Smuzhiyun sprintf(disk->disk_name, "drbd%d", minor);
2766*4882a593Smuzhiyun disk->private_data = device;
2767*4882a593Smuzhiyun
2768*4882a593Smuzhiyun blk_queue_write_cache(q, true, true);
2769*4882a593Smuzhiyun /* Setting the max_hw_sectors to an odd value of 8kibyte here
2770*4882a593Smuzhiyun This triggers a max_bio_size message upon first attach or connect */
2771*4882a593Smuzhiyun blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
2772*4882a593Smuzhiyun
2773*4882a593Smuzhiyun device->md_io.page = alloc_page(GFP_KERNEL);
2774*4882a593Smuzhiyun if (!device->md_io.page)
2775*4882a593Smuzhiyun goto out_no_io_page;
2776*4882a593Smuzhiyun
2777*4882a593Smuzhiyun if (drbd_bm_init(device))
2778*4882a593Smuzhiyun goto out_no_bitmap;
2779*4882a593Smuzhiyun device->read_requests = RB_ROOT;
2780*4882a593Smuzhiyun device->write_requests = RB_ROOT;
2781*4882a593Smuzhiyun
2782*4882a593Smuzhiyun id = idr_alloc(&drbd_devices, device, minor, minor + 1, GFP_KERNEL);
2783*4882a593Smuzhiyun if (id < 0) {
2784*4882a593Smuzhiyun if (id == -ENOSPC)
2785*4882a593Smuzhiyun err = ERR_MINOR_OR_VOLUME_EXISTS;
2786*4882a593Smuzhiyun goto out_no_minor_idr;
2787*4882a593Smuzhiyun }
2788*4882a593Smuzhiyun kref_get(&device->kref);
2789*4882a593Smuzhiyun
2790*4882a593Smuzhiyun id = idr_alloc(&resource->devices, device, vnr, vnr + 1, GFP_KERNEL);
2791*4882a593Smuzhiyun if (id < 0) {
2792*4882a593Smuzhiyun if (id == -ENOSPC)
2793*4882a593Smuzhiyun err = ERR_MINOR_OR_VOLUME_EXISTS;
2794*4882a593Smuzhiyun goto out_idr_remove_minor;
2795*4882a593Smuzhiyun }
2796*4882a593Smuzhiyun kref_get(&device->kref);
2797*4882a593Smuzhiyun
2798*4882a593Smuzhiyun INIT_LIST_HEAD(&device->peer_devices);
2799*4882a593Smuzhiyun INIT_LIST_HEAD(&device->pending_bitmap_io);
2800*4882a593Smuzhiyun for_each_connection(connection, resource) {
2801*4882a593Smuzhiyun peer_device = kzalloc(sizeof(struct drbd_peer_device), GFP_KERNEL);
2802*4882a593Smuzhiyun if (!peer_device)
2803*4882a593Smuzhiyun goto out_idr_remove_from_resource;
2804*4882a593Smuzhiyun peer_device->connection = connection;
2805*4882a593Smuzhiyun peer_device->device = device;
2806*4882a593Smuzhiyun
2807*4882a593Smuzhiyun list_add(&peer_device->peer_devices, &device->peer_devices);
2808*4882a593Smuzhiyun kref_get(&device->kref);
2809*4882a593Smuzhiyun
2810*4882a593Smuzhiyun id = idr_alloc(&connection->peer_devices, peer_device, vnr, vnr + 1, GFP_KERNEL);
2811*4882a593Smuzhiyun if (id < 0) {
2812*4882a593Smuzhiyun if (id == -ENOSPC)
2813*4882a593Smuzhiyun err = ERR_INVALID_REQUEST;
2814*4882a593Smuzhiyun goto out_idr_remove_from_resource;
2815*4882a593Smuzhiyun }
2816*4882a593Smuzhiyun kref_get(&connection->kref);
2817*4882a593Smuzhiyun INIT_WORK(&peer_device->send_acks_work, drbd_send_acks_wf);
2818*4882a593Smuzhiyun }
2819*4882a593Smuzhiyun
2820*4882a593Smuzhiyun if (init_submitter(device)) {
2821*4882a593Smuzhiyun err = ERR_NOMEM;
2822*4882a593Smuzhiyun goto out_idr_remove_vol;
2823*4882a593Smuzhiyun }
2824*4882a593Smuzhiyun
2825*4882a593Smuzhiyun add_disk(disk);
2826*4882a593Smuzhiyun
2827*4882a593Smuzhiyun /* inherit the connection state */
2828*4882a593Smuzhiyun device->state.conn = first_connection(resource)->cstate;
2829*4882a593Smuzhiyun if (device->state.conn == C_WF_REPORT_PARAMS) {
2830*4882a593Smuzhiyun for_each_peer_device(peer_device, device)
2831*4882a593Smuzhiyun drbd_connected(peer_device);
2832*4882a593Smuzhiyun }
2833*4882a593Smuzhiyun /* move to create_peer_device() */
2834*4882a593Smuzhiyun for_each_peer_device(peer_device, device)
2835*4882a593Smuzhiyun drbd_debugfs_peer_device_add(peer_device);
2836*4882a593Smuzhiyun drbd_debugfs_device_add(device);
2837*4882a593Smuzhiyun return NO_ERROR;
2838*4882a593Smuzhiyun
2839*4882a593Smuzhiyun out_idr_remove_vol:
2840*4882a593Smuzhiyun idr_remove(&connection->peer_devices, vnr);
2841*4882a593Smuzhiyun out_idr_remove_from_resource:
2842*4882a593Smuzhiyun for_each_connection_safe(connection, n, resource) {
2843*4882a593Smuzhiyun peer_device = idr_remove(&connection->peer_devices, vnr);
2844*4882a593Smuzhiyun if (peer_device)
2845*4882a593Smuzhiyun kref_put(&connection->kref, drbd_destroy_connection);
2846*4882a593Smuzhiyun }
2847*4882a593Smuzhiyun for_each_peer_device_safe(peer_device, tmp_peer_device, device) {
2848*4882a593Smuzhiyun list_del(&peer_device->peer_devices);
2849*4882a593Smuzhiyun kfree(peer_device);
2850*4882a593Smuzhiyun }
2851*4882a593Smuzhiyun idr_remove(&resource->devices, vnr);
2852*4882a593Smuzhiyun out_idr_remove_minor:
2853*4882a593Smuzhiyun idr_remove(&drbd_devices, minor);
2854*4882a593Smuzhiyun synchronize_rcu();
2855*4882a593Smuzhiyun out_no_minor_idr:
2856*4882a593Smuzhiyun drbd_bm_cleanup(device);
2857*4882a593Smuzhiyun out_no_bitmap:
2858*4882a593Smuzhiyun __free_page(device->md_io.page);
2859*4882a593Smuzhiyun out_no_io_page:
2860*4882a593Smuzhiyun put_disk(disk);
2861*4882a593Smuzhiyun out_no_disk:
2862*4882a593Smuzhiyun blk_cleanup_queue(q);
2863*4882a593Smuzhiyun out_no_q:
2864*4882a593Smuzhiyun kref_put(&resource->kref, drbd_destroy_resource);
2865*4882a593Smuzhiyun kfree(device);
2866*4882a593Smuzhiyun return err;
2867*4882a593Smuzhiyun }
2868*4882a593Smuzhiyun
drbd_delete_device(struct drbd_device * device)2869*4882a593Smuzhiyun void drbd_delete_device(struct drbd_device *device)
2870*4882a593Smuzhiyun {
2871*4882a593Smuzhiyun struct drbd_resource *resource = device->resource;
2872*4882a593Smuzhiyun struct drbd_connection *connection;
2873*4882a593Smuzhiyun struct drbd_peer_device *peer_device;
2874*4882a593Smuzhiyun
2875*4882a593Smuzhiyun /* move to free_peer_device() */
2876*4882a593Smuzhiyun for_each_peer_device(peer_device, device)
2877*4882a593Smuzhiyun drbd_debugfs_peer_device_cleanup(peer_device);
2878*4882a593Smuzhiyun drbd_debugfs_device_cleanup(device);
2879*4882a593Smuzhiyun for_each_connection(connection, resource) {
2880*4882a593Smuzhiyun idr_remove(&connection->peer_devices, device->vnr);
2881*4882a593Smuzhiyun kref_put(&device->kref, drbd_destroy_device);
2882*4882a593Smuzhiyun }
2883*4882a593Smuzhiyun idr_remove(&resource->devices, device->vnr);
2884*4882a593Smuzhiyun kref_put(&device->kref, drbd_destroy_device);
2885*4882a593Smuzhiyun idr_remove(&drbd_devices, device_to_minor(device));
2886*4882a593Smuzhiyun kref_put(&device->kref, drbd_destroy_device);
2887*4882a593Smuzhiyun del_gendisk(device->vdisk);
2888*4882a593Smuzhiyun synchronize_rcu();
2889*4882a593Smuzhiyun kref_put(&device->kref, drbd_destroy_device);
2890*4882a593Smuzhiyun }
2891*4882a593Smuzhiyun
drbd_init(void)2892*4882a593Smuzhiyun static int __init drbd_init(void)
2893*4882a593Smuzhiyun {
2894*4882a593Smuzhiyun int err;
2895*4882a593Smuzhiyun
2896*4882a593Smuzhiyun if (drbd_minor_count < DRBD_MINOR_COUNT_MIN || drbd_minor_count > DRBD_MINOR_COUNT_MAX) {
2897*4882a593Smuzhiyun pr_err("invalid minor_count (%d)\n", drbd_minor_count);
2898*4882a593Smuzhiyun #ifdef MODULE
2899*4882a593Smuzhiyun return -EINVAL;
2900*4882a593Smuzhiyun #else
2901*4882a593Smuzhiyun drbd_minor_count = DRBD_MINOR_COUNT_DEF;
2902*4882a593Smuzhiyun #endif
2903*4882a593Smuzhiyun }
2904*4882a593Smuzhiyun
2905*4882a593Smuzhiyun err = register_blkdev(DRBD_MAJOR, "drbd");
2906*4882a593Smuzhiyun if (err) {
2907*4882a593Smuzhiyun pr_err("unable to register block device major %d\n",
2908*4882a593Smuzhiyun DRBD_MAJOR);
2909*4882a593Smuzhiyun return err;
2910*4882a593Smuzhiyun }
2911*4882a593Smuzhiyun
2912*4882a593Smuzhiyun /*
2913*4882a593Smuzhiyun * allocate all necessary structs
2914*4882a593Smuzhiyun */
2915*4882a593Smuzhiyun init_waitqueue_head(&drbd_pp_wait);
2916*4882a593Smuzhiyun
2917*4882a593Smuzhiyun drbd_proc = NULL; /* play safe for drbd_cleanup */
2918*4882a593Smuzhiyun idr_init(&drbd_devices);
2919*4882a593Smuzhiyun
2920*4882a593Smuzhiyun mutex_init(&resources_mutex);
2921*4882a593Smuzhiyun INIT_LIST_HEAD(&drbd_resources);
2922*4882a593Smuzhiyun
2923*4882a593Smuzhiyun err = drbd_genl_register();
2924*4882a593Smuzhiyun if (err) {
2925*4882a593Smuzhiyun pr_err("unable to register generic netlink family\n");
2926*4882a593Smuzhiyun goto fail;
2927*4882a593Smuzhiyun }
2928*4882a593Smuzhiyun
2929*4882a593Smuzhiyun err = drbd_create_mempools();
2930*4882a593Smuzhiyun if (err)
2931*4882a593Smuzhiyun goto fail;
2932*4882a593Smuzhiyun
2933*4882a593Smuzhiyun err = -ENOMEM;
2934*4882a593Smuzhiyun drbd_proc = proc_create_single("drbd", S_IFREG | 0444 , NULL, drbd_seq_show);
2935*4882a593Smuzhiyun if (!drbd_proc) {
2936*4882a593Smuzhiyun pr_err("unable to register proc file\n");
2937*4882a593Smuzhiyun goto fail;
2938*4882a593Smuzhiyun }
2939*4882a593Smuzhiyun
2940*4882a593Smuzhiyun retry.wq = create_singlethread_workqueue("drbd-reissue");
2941*4882a593Smuzhiyun if (!retry.wq) {
2942*4882a593Smuzhiyun pr_err("unable to create retry workqueue\n");
2943*4882a593Smuzhiyun goto fail;
2944*4882a593Smuzhiyun }
2945*4882a593Smuzhiyun INIT_WORK(&retry.worker, do_retry);
2946*4882a593Smuzhiyun spin_lock_init(&retry.lock);
2947*4882a593Smuzhiyun INIT_LIST_HEAD(&retry.writes);
2948*4882a593Smuzhiyun
2949*4882a593Smuzhiyun drbd_debugfs_init();
2950*4882a593Smuzhiyun
2951*4882a593Smuzhiyun pr_info("initialized. "
2952*4882a593Smuzhiyun "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
2953*4882a593Smuzhiyun API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
2954*4882a593Smuzhiyun pr_info("%s\n", drbd_buildtag());
2955*4882a593Smuzhiyun pr_info("registered as block device major %d\n", DRBD_MAJOR);
2956*4882a593Smuzhiyun return 0; /* Success! */
2957*4882a593Smuzhiyun
2958*4882a593Smuzhiyun fail:
2959*4882a593Smuzhiyun drbd_cleanup();
2960*4882a593Smuzhiyun if (err == -ENOMEM)
2961*4882a593Smuzhiyun pr_err("ran out of memory\n");
2962*4882a593Smuzhiyun else
2963*4882a593Smuzhiyun pr_err("initialization failure\n");
2964*4882a593Smuzhiyun return err;
2965*4882a593Smuzhiyun }
2966*4882a593Smuzhiyun
drbd_free_one_sock(struct drbd_socket * ds)2967*4882a593Smuzhiyun static void drbd_free_one_sock(struct drbd_socket *ds)
2968*4882a593Smuzhiyun {
2969*4882a593Smuzhiyun struct socket *s;
2970*4882a593Smuzhiyun mutex_lock(&ds->mutex);
2971*4882a593Smuzhiyun s = ds->socket;
2972*4882a593Smuzhiyun ds->socket = NULL;
2973*4882a593Smuzhiyun mutex_unlock(&ds->mutex);
2974*4882a593Smuzhiyun if (s) {
2975*4882a593Smuzhiyun /* so debugfs does not need to mutex_lock() */
2976*4882a593Smuzhiyun synchronize_rcu();
2977*4882a593Smuzhiyun kernel_sock_shutdown(s, SHUT_RDWR);
2978*4882a593Smuzhiyun sock_release(s);
2979*4882a593Smuzhiyun }
2980*4882a593Smuzhiyun }
2981*4882a593Smuzhiyun
drbd_free_sock(struct drbd_connection * connection)2982*4882a593Smuzhiyun void drbd_free_sock(struct drbd_connection *connection)
2983*4882a593Smuzhiyun {
2984*4882a593Smuzhiyun if (connection->data.socket)
2985*4882a593Smuzhiyun drbd_free_one_sock(&connection->data);
2986*4882a593Smuzhiyun if (connection->meta.socket)
2987*4882a593Smuzhiyun drbd_free_one_sock(&connection->meta);
2988*4882a593Smuzhiyun }
2989*4882a593Smuzhiyun
2990*4882a593Smuzhiyun /* meta data management */
2991*4882a593Smuzhiyun
conn_md_sync(struct drbd_connection * connection)2992*4882a593Smuzhiyun void conn_md_sync(struct drbd_connection *connection)
2993*4882a593Smuzhiyun {
2994*4882a593Smuzhiyun struct drbd_peer_device *peer_device;
2995*4882a593Smuzhiyun int vnr;
2996*4882a593Smuzhiyun
2997*4882a593Smuzhiyun rcu_read_lock();
2998*4882a593Smuzhiyun idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2999*4882a593Smuzhiyun struct drbd_device *device = peer_device->device;
3000*4882a593Smuzhiyun
3001*4882a593Smuzhiyun kref_get(&device->kref);
3002*4882a593Smuzhiyun rcu_read_unlock();
3003*4882a593Smuzhiyun drbd_md_sync(device);
3004*4882a593Smuzhiyun kref_put(&device->kref, drbd_destroy_device);
3005*4882a593Smuzhiyun rcu_read_lock();
3006*4882a593Smuzhiyun }
3007*4882a593Smuzhiyun rcu_read_unlock();
3008*4882a593Smuzhiyun }
3009*4882a593Smuzhiyun
3010*4882a593Smuzhiyun /* aligned 4kByte */
3011*4882a593Smuzhiyun struct meta_data_on_disk {
3012*4882a593Smuzhiyun u64 la_size_sect; /* last agreed size. */
3013*4882a593Smuzhiyun u64 uuid[UI_SIZE]; /* UUIDs. */
3014*4882a593Smuzhiyun u64 device_uuid;
3015*4882a593Smuzhiyun u64 reserved_u64_1;
3016*4882a593Smuzhiyun u32 flags; /* MDF */
3017*4882a593Smuzhiyun u32 magic;
3018*4882a593Smuzhiyun u32 md_size_sect;
3019*4882a593Smuzhiyun u32 al_offset; /* offset to this block */
3020*4882a593Smuzhiyun u32 al_nr_extents; /* important for restoring the AL (userspace) */
3021*4882a593Smuzhiyun /* `-- act_log->nr_elements <-- ldev->dc.al_extents */
3022*4882a593Smuzhiyun u32 bm_offset; /* offset to the bitmap, from here */
3023*4882a593Smuzhiyun u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */
3024*4882a593Smuzhiyun u32 la_peer_max_bio_size; /* last peer max_bio_size */
3025*4882a593Smuzhiyun
3026*4882a593Smuzhiyun /* see al_tr_number_to_on_disk_sector() */
3027*4882a593Smuzhiyun u32 al_stripes;
3028*4882a593Smuzhiyun u32 al_stripe_size_4k;
3029*4882a593Smuzhiyun
3030*4882a593Smuzhiyun u8 reserved_u8[4096 - (7*8 + 10*4)];
3031*4882a593Smuzhiyun } __packed;
3032*4882a593Smuzhiyun
3033*4882a593Smuzhiyun
3034*4882a593Smuzhiyun
drbd_md_write(struct drbd_device * device,void * b)3035*4882a593Smuzhiyun void drbd_md_write(struct drbd_device *device, void *b)
3036*4882a593Smuzhiyun {
3037*4882a593Smuzhiyun struct meta_data_on_disk *buffer = b;
3038*4882a593Smuzhiyun sector_t sector;
3039*4882a593Smuzhiyun int i;
3040*4882a593Smuzhiyun
3041*4882a593Smuzhiyun memset(buffer, 0, sizeof(*buffer));
3042*4882a593Smuzhiyun
3043*4882a593Smuzhiyun buffer->la_size_sect = cpu_to_be64(get_capacity(device->vdisk));
3044*4882a593Smuzhiyun for (i = UI_CURRENT; i < UI_SIZE; i++)
3045*4882a593Smuzhiyun buffer->uuid[i] = cpu_to_be64(device->ldev->md.uuid[i]);
3046*4882a593Smuzhiyun buffer->flags = cpu_to_be32(device->ldev->md.flags);
3047*4882a593Smuzhiyun buffer->magic = cpu_to_be32(DRBD_MD_MAGIC_84_UNCLEAN);
3048*4882a593Smuzhiyun
3049*4882a593Smuzhiyun buffer->md_size_sect = cpu_to_be32(device->ldev->md.md_size_sect);
3050*4882a593Smuzhiyun buffer->al_offset = cpu_to_be32(device->ldev->md.al_offset);
3051*4882a593Smuzhiyun buffer->al_nr_extents = cpu_to_be32(device->act_log->nr_elements);
3052*4882a593Smuzhiyun buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE);
3053*4882a593Smuzhiyun buffer->device_uuid = cpu_to_be64(device->ldev->md.device_uuid);
3054*4882a593Smuzhiyun
3055*4882a593Smuzhiyun buffer->bm_offset = cpu_to_be32(device->ldev->md.bm_offset);
3056*4882a593Smuzhiyun buffer->la_peer_max_bio_size = cpu_to_be32(device->peer_max_bio_size);
3057*4882a593Smuzhiyun
3058*4882a593Smuzhiyun buffer->al_stripes = cpu_to_be32(device->ldev->md.al_stripes);
3059*4882a593Smuzhiyun buffer->al_stripe_size_4k = cpu_to_be32(device->ldev->md.al_stripe_size_4k);
3060*4882a593Smuzhiyun
3061*4882a593Smuzhiyun D_ASSERT(device, drbd_md_ss(device->ldev) == device->ldev->md.md_offset);
3062*4882a593Smuzhiyun sector = device->ldev->md.md_offset;
3063*4882a593Smuzhiyun
3064*4882a593Smuzhiyun if (drbd_md_sync_page_io(device, device->ldev, sector, REQ_OP_WRITE)) {
3065*4882a593Smuzhiyun /* this was a try anyways ... */
3066*4882a593Smuzhiyun drbd_err(device, "meta data update failed!\n");
3067*4882a593Smuzhiyun drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
3068*4882a593Smuzhiyun }
3069*4882a593Smuzhiyun }
3070*4882a593Smuzhiyun
3071*4882a593Smuzhiyun /**
3072*4882a593Smuzhiyun * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
3073*4882a593Smuzhiyun * @device: DRBD device.
3074*4882a593Smuzhiyun */
drbd_md_sync(struct drbd_device * device)3075*4882a593Smuzhiyun void drbd_md_sync(struct drbd_device *device)
3076*4882a593Smuzhiyun {
3077*4882a593Smuzhiyun struct meta_data_on_disk *buffer;
3078*4882a593Smuzhiyun
3079*4882a593Smuzhiyun /* Don't accidentally change the DRBD meta data layout. */
3080*4882a593Smuzhiyun BUILD_BUG_ON(UI_SIZE != 4);
3081*4882a593Smuzhiyun BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);
3082*4882a593Smuzhiyun
3083*4882a593Smuzhiyun del_timer(&device->md_sync_timer);
3084*4882a593Smuzhiyun /* timer may be rearmed by drbd_md_mark_dirty() now. */
3085*4882a593Smuzhiyun if (!test_and_clear_bit(MD_DIRTY, &device->flags))
3086*4882a593Smuzhiyun return;
3087*4882a593Smuzhiyun
3088*4882a593Smuzhiyun /* We use here D_FAILED and not D_ATTACHING because we try to write
3089*4882a593Smuzhiyun * metadata even if we detach due to a disk failure! */
3090*4882a593Smuzhiyun if (!get_ldev_if_state(device, D_FAILED))
3091*4882a593Smuzhiyun return;
3092*4882a593Smuzhiyun
3093*4882a593Smuzhiyun buffer = drbd_md_get_buffer(device, __func__);
3094*4882a593Smuzhiyun if (!buffer)
3095*4882a593Smuzhiyun goto out;
3096*4882a593Smuzhiyun
3097*4882a593Smuzhiyun drbd_md_write(device, buffer);
3098*4882a593Smuzhiyun
3099*4882a593Smuzhiyun /* Update device->ldev->md.la_size_sect,
3100*4882a593Smuzhiyun * since we updated it on metadata. */
3101*4882a593Smuzhiyun device->ldev->md.la_size_sect = get_capacity(device->vdisk);
3102*4882a593Smuzhiyun
3103*4882a593Smuzhiyun drbd_md_put_buffer(device);
3104*4882a593Smuzhiyun out:
3105*4882a593Smuzhiyun put_ldev(device);
3106*4882a593Smuzhiyun }
3107*4882a593Smuzhiyun
check_activity_log_stripe_size(struct drbd_device * device,struct meta_data_on_disk * on_disk,struct drbd_md * in_core)3108*4882a593Smuzhiyun static int check_activity_log_stripe_size(struct drbd_device *device,
3109*4882a593Smuzhiyun struct meta_data_on_disk *on_disk,
3110*4882a593Smuzhiyun struct drbd_md *in_core)
3111*4882a593Smuzhiyun {
3112*4882a593Smuzhiyun u32 al_stripes = be32_to_cpu(on_disk->al_stripes);
3113*4882a593Smuzhiyun u32 al_stripe_size_4k = be32_to_cpu(on_disk->al_stripe_size_4k);
3114*4882a593Smuzhiyun u64 al_size_4k;
3115*4882a593Smuzhiyun
3116*4882a593Smuzhiyun /* both not set: default to old fixed size activity log */
3117*4882a593Smuzhiyun if (al_stripes == 0 && al_stripe_size_4k == 0) {
3118*4882a593Smuzhiyun al_stripes = 1;
3119*4882a593Smuzhiyun al_stripe_size_4k = MD_32kB_SECT/8;
3120*4882a593Smuzhiyun }
3121*4882a593Smuzhiyun
3122*4882a593Smuzhiyun /* some paranoia plausibility checks */
3123*4882a593Smuzhiyun
3124*4882a593Smuzhiyun /* we need both values to be set */
3125*4882a593Smuzhiyun if (al_stripes == 0 || al_stripe_size_4k == 0)
3126*4882a593Smuzhiyun goto err;
3127*4882a593Smuzhiyun
3128*4882a593Smuzhiyun al_size_4k = (u64)al_stripes * al_stripe_size_4k;
3129*4882a593Smuzhiyun
3130*4882a593Smuzhiyun /* Upper limit of activity log area, to avoid potential overflow
3131*4882a593Smuzhiyun * problems in al_tr_number_to_on_disk_sector(). As right now, more
3132*4882a593Smuzhiyun * than 72 * 4k blocks total only increases the amount of history,
3133*4882a593Smuzhiyun * limiting this arbitrarily to 16 GB is not a real limitation ;-) */
3134*4882a593Smuzhiyun if (al_size_4k > (16 * 1024 * 1024/4))
3135*4882a593Smuzhiyun goto err;
3136*4882a593Smuzhiyun
3137*4882a593Smuzhiyun /* Lower limit: we need at least 8 transaction slots (32kB)
3138*4882a593Smuzhiyun * to not break existing setups */
3139*4882a593Smuzhiyun if (al_size_4k < MD_32kB_SECT/8)
3140*4882a593Smuzhiyun goto err;
3141*4882a593Smuzhiyun
3142*4882a593Smuzhiyun in_core->al_stripe_size_4k = al_stripe_size_4k;
3143*4882a593Smuzhiyun in_core->al_stripes = al_stripes;
3144*4882a593Smuzhiyun in_core->al_size_4k = al_size_4k;
3145*4882a593Smuzhiyun
3146*4882a593Smuzhiyun return 0;
3147*4882a593Smuzhiyun err:
3148*4882a593Smuzhiyun drbd_err(device, "invalid activity log striping: al_stripes=%u, al_stripe_size_4k=%u\n",
3149*4882a593Smuzhiyun al_stripes, al_stripe_size_4k);
3150*4882a593Smuzhiyun return -EINVAL;
3151*4882a593Smuzhiyun }
3152*4882a593Smuzhiyun
check_offsets_and_sizes(struct drbd_device * device,struct drbd_backing_dev * bdev)3153*4882a593Smuzhiyun static int check_offsets_and_sizes(struct drbd_device *device, struct drbd_backing_dev *bdev)
3154*4882a593Smuzhiyun {
3155*4882a593Smuzhiyun sector_t capacity = drbd_get_capacity(bdev->md_bdev);
3156*4882a593Smuzhiyun struct drbd_md *in_core = &bdev->md;
3157*4882a593Smuzhiyun s32 on_disk_al_sect;
3158*4882a593Smuzhiyun s32 on_disk_bm_sect;
3159*4882a593Smuzhiyun
3160*4882a593Smuzhiyun /* The on-disk size of the activity log, calculated from offsets, and
3161*4882a593Smuzhiyun * the size of the activity log calculated from the stripe settings,
3162*4882a593Smuzhiyun * should match.
3163*4882a593Smuzhiyun * Though we could relax this a bit: it is ok, if the striped activity log
3164*4882a593Smuzhiyun * fits in the available on-disk activity log size.
3165*4882a593Smuzhiyun * Right now, that would break how resize is implemented.
3166*4882a593Smuzhiyun * TODO: make drbd_determine_dev_size() (and the drbdmeta tool) aware
3167*4882a593Smuzhiyun * of possible unused padding space in the on disk layout. */
3168*4882a593Smuzhiyun if (in_core->al_offset < 0) {
3169*4882a593Smuzhiyun if (in_core->bm_offset > in_core->al_offset)
3170*4882a593Smuzhiyun goto err;
3171*4882a593Smuzhiyun on_disk_al_sect = -in_core->al_offset;
3172*4882a593Smuzhiyun on_disk_bm_sect = in_core->al_offset - in_core->bm_offset;
3173*4882a593Smuzhiyun } else {
3174*4882a593Smuzhiyun if (in_core->al_offset != MD_4kB_SECT)
3175*4882a593Smuzhiyun goto err;
3176*4882a593Smuzhiyun if (in_core->bm_offset < in_core->al_offset + in_core->al_size_4k * MD_4kB_SECT)
3177*4882a593Smuzhiyun goto err;
3178*4882a593Smuzhiyun
3179*4882a593Smuzhiyun on_disk_al_sect = in_core->bm_offset - MD_4kB_SECT;
3180*4882a593Smuzhiyun on_disk_bm_sect = in_core->md_size_sect - in_core->bm_offset;
3181*4882a593Smuzhiyun }
3182*4882a593Smuzhiyun
3183*4882a593Smuzhiyun /* old fixed size meta data is exactly that: fixed. */
3184*4882a593Smuzhiyun if (in_core->meta_dev_idx >= 0) {
3185*4882a593Smuzhiyun if (in_core->md_size_sect != MD_128MB_SECT
3186*4882a593Smuzhiyun || in_core->al_offset != MD_4kB_SECT
3187*4882a593Smuzhiyun || in_core->bm_offset != MD_4kB_SECT + MD_32kB_SECT
3188*4882a593Smuzhiyun || in_core->al_stripes != 1
3189*4882a593Smuzhiyun || in_core->al_stripe_size_4k != MD_32kB_SECT/8)
3190*4882a593Smuzhiyun goto err;
3191*4882a593Smuzhiyun }
3192*4882a593Smuzhiyun
3193*4882a593Smuzhiyun if (capacity < in_core->md_size_sect)
3194*4882a593Smuzhiyun goto err;
3195*4882a593Smuzhiyun if (capacity - in_core->md_size_sect < drbd_md_first_sector(bdev))
3196*4882a593Smuzhiyun goto err;
3197*4882a593Smuzhiyun
3198*4882a593Smuzhiyun /* should be aligned, and at least 32k */
3199*4882a593Smuzhiyun if ((on_disk_al_sect & 7) || (on_disk_al_sect < MD_32kB_SECT))
3200*4882a593Smuzhiyun goto err;
3201*4882a593Smuzhiyun
3202*4882a593Smuzhiyun /* should fit (for now: exactly) into the available on-disk space;
3203*4882a593Smuzhiyun * overflow prevention is in check_activity_log_stripe_size() above. */
3204*4882a593Smuzhiyun if (on_disk_al_sect != in_core->al_size_4k * MD_4kB_SECT)
3205*4882a593Smuzhiyun goto err;
3206*4882a593Smuzhiyun
3207*4882a593Smuzhiyun /* again, should be aligned */
3208*4882a593Smuzhiyun if (in_core->bm_offset & 7)
3209*4882a593Smuzhiyun goto err;
3210*4882a593Smuzhiyun
3211*4882a593Smuzhiyun /* FIXME check for device grow with flex external meta data? */
3212*4882a593Smuzhiyun
3213*4882a593Smuzhiyun /* can the available bitmap space cover the last agreed device size? */
3214*4882a593Smuzhiyun if (on_disk_bm_sect < (in_core->la_size_sect+7)/MD_4kB_SECT/8/512)
3215*4882a593Smuzhiyun goto err;
3216*4882a593Smuzhiyun
3217*4882a593Smuzhiyun return 0;
3218*4882a593Smuzhiyun
3219*4882a593Smuzhiyun err:
3220*4882a593Smuzhiyun drbd_err(device, "meta data offsets don't make sense: idx=%d "
3221*4882a593Smuzhiyun "al_s=%u, al_sz4k=%u, al_offset=%d, bm_offset=%d, "
3222*4882a593Smuzhiyun "md_size_sect=%u, la_size=%llu, md_capacity=%llu\n",
3223*4882a593Smuzhiyun in_core->meta_dev_idx,
3224*4882a593Smuzhiyun in_core->al_stripes, in_core->al_stripe_size_4k,
3225*4882a593Smuzhiyun in_core->al_offset, in_core->bm_offset, in_core->md_size_sect,
3226*4882a593Smuzhiyun (unsigned long long)in_core->la_size_sect,
3227*4882a593Smuzhiyun (unsigned long long)capacity);
3228*4882a593Smuzhiyun
3229*4882a593Smuzhiyun return -EINVAL;
3230*4882a593Smuzhiyun }
3231*4882a593Smuzhiyun
3232*4882a593Smuzhiyun
3233*4882a593Smuzhiyun /**
3234*4882a593Smuzhiyun * drbd_md_read() - Reads in the meta data super block
3235*4882a593Smuzhiyun * @device: DRBD device.
3236*4882a593Smuzhiyun * @bdev: Device from which the meta data should be read in.
3237*4882a593Smuzhiyun *
3238*4882a593Smuzhiyun * Return NO_ERROR on success, and an enum drbd_ret_code in case
3239*4882a593Smuzhiyun * something goes wrong.
3240*4882a593Smuzhiyun *
3241*4882a593Smuzhiyun * Called exactly once during drbd_adm_attach(), while still being D_DISKLESS,
3242*4882a593Smuzhiyun * even before @bdev is assigned to @device->ldev.
3243*4882a593Smuzhiyun */
drbd_md_read(struct drbd_device * device,struct drbd_backing_dev * bdev)3244*4882a593Smuzhiyun int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev)
3245*4882a593Smuzhiyun {
3246*4882a593Smuzhiyun struct meta_data_on_disk *buffer;
3247*4882a593Smuzhiyun u32 magic, flags;
3248*4882a593Smuzhiyun int i, rv = NO_ERROR;
3249*4882a593Smuzhiyun
3250*4882a593Smuzhiyun if (device->state.disk != D_DISKLESS)
3251*4882a593Smuzhiyun return ERR_DISK_CONFIGURED;
3252*4882a593Smuzhiyun
3253*4882a593Smuzhiyun buffer = drbd_md_get_buffer(device, __func__);
3254*4882a593Smuzhiyun if (!buffer)
3255*4882a593Smuzhiyun return ERR_NOMEM;
3256*4882a593Smuzhiyun
3257*4882a593Smuzhiyun /* First, figure out where our meta data superblock is located,
3258*4882a593Smuzhiyun * and read it. */
3259*4882a593Smuzhiyun bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx;
3260*4882a593Smuzhiyun bdev->md.md_offset = drbd_md_ss(bdev);
3261*4882a593Smuzhiyun /* Even for (flexible or indexed) external meta data,
3262*4882a593Smuzhiyun * initially restrict us to the 4k superblock for now.
3263*4882a593Smuzhiyun * Affects the paranoia out-of-range access check in drbd_md_sync_page_io(). */
3264*4882a593Smuzhiyun bdev->md.md_size_sect = 8;
3265*4882a593Smuzhiyun
3266*4882a593Smuzhiyun if (drbd_md_sync_page_io(device, bdev, bdev->md.md_offset,
3267*4882a593Smuzhiyun REQ_OP_READ)) {
3268*4882a593Smuzhiyun /* NOTE: can't do normal error processing here as this is
3269*4882a593Smuzhiyun called BEFORE disk is attached */
3270*4882a593Smuzhiyun drbd_err(device, "Error while reading metadata.\n");
3271*4882a593Smuzhiyun rv = ERR_IO_MD_DISK;
3272*4882a593Smuzhiyun goto err;
3273*4882a593Smuzhiyun }
3274*4882a593Smuzhiyun
3275*4882a593Smuzhiyun magic = be32_to_cpu(buffer->magic);
3276*4882a593Smuzhiyun flags = be32_to_cpu(buffer->flags);
3277*4882a593Smuzhiyun if (magic == DRBD_MD_MAGIC_84_UNCLEAN ||
3278*4882a593Smuzhiyun (magic == DRBD_MD_MAGIC_08 && !(flags & MDF_AL_CLEAN))) {
3279*4882a593Smuzhiyun /* btw: that's Activity Log clean, not "all" clean. */
3280*4882a593Smuzhiyun drbd_err(device, "Found unclean meta data. Did you \"drbdadm apply-al\"?\n");
3281*4882a593Smuzhiyun rv = ERR_MD_UNCLEAN;
3282*4882a593Smuzhiyun goto err;
3283*4882a593Smuzhiyun }
3284*4882a593Smuzhiyun
3285*4882a593Smuzhiyun rv = ERR_MD_INVALID;
3286*4882a593Smuzhiyun if (magic != DRBD_MD_MAGIC_08) {
3287*4882a593Smuzhiyun if (magic == DRBD_MD_MAGIC_07)
3288*4882a593Smuzhiyun drbd_err(device, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n");
3289*4882a593Smuzhiyun else
3290*4882a593Smuzhiyun drbd_err(device, "Meta data magic not found. Did you \"drbdadm create-md\"?\n");
3291*4882a593Smuzhiyun goto err;
3292*4882a593Smuzhiyun }
3293*4882a593Smuzhiyun
3294*4882a593Smuzhiyun if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
3295*4882a593Smuzhiyun drbd_err(device, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
3296*4882a593Smuzhiyun be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
3297*4882a593Smuzhiyun goto err;
3298*4882a593Smuzhiyun }
3299*4882a593Smuzhiyun
3300*4882a593Smuzhiyun
3301*4882a593Smuzhiyun /* convert to in_core endian */
3302*4882a593Smuzhiyun bdev->md.la_size_sect = be64_to_cpu(buffer->la_size_sect);
3303*4882a593Smuzhiyun for (i = UI_CURRENT; i < UI_SIZE; i++)
3304*4882a593Smuzhiyun bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
3305*4882a593Smuzhiyun bdev->md.flags = be32_to_cpu(buffer->flags);
3306*4882a593Smuzhiyun bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
3307*4882a593Smuzhiyun
3308*4882a593Smuzhiyun bdev->md.md_size_sect = be32_to_cpu(buffer->md_size_sect);
3309*4882a593Smuzhiyun bdev->md.al_offset = be32_to_cpu(buffer->al_offset);
3310*4882a593Smuzhiyun bdev->md.bm_offset = be32_to_cpu(buffer->bm_offset);
3311*4882a593Smuzhiyun
3312*4882a593Smuzhiyun if (check_activity_log_stripe_size(device, buffer, &bdev->md))
3313*4882a593Smuzhiyun goto err;
3314*4882a593Smuzhiyun if (check_offsets_and_sizes(device, bdev))
3315*4882a593Smuzhiyun goto err;
3316*4882a593Smuzhiyun
3317*4882a593Smuzhiyun if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
3318*4882a593Smuzhiyun drbd_err(device, "unexpected bm_offset: %d (expected %d)\n",
3319*4882a593Smuzhiyun be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
3320*4882a593Smuzhiyun goto err;
3321*4882a593Smuzhiyun }
3322*4882a593Smuzhiyun if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
3323*4882a593Smuzhiyun drbd_err(device, "unexpected md_size: %u (expected %u)\n",
3324*4882a593Smuzhiyun be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
3325*4882a593Smuzhiyun goto err;
3326*4882a593Smuzhiyun }
3327*4882a593Smuzhiyun
3328*4882a593Smuzhiyun rv = NO_ERROR;
3329*4882a593Smuzhiyun
3330*4882a593Smuzhiyun spin_lock_irq(&device->resource->req_lock);
3331*4882a593Smuzhiyun if (device->state.conn < C_CONNECTED) {
3332*4882a593Smuzhiyun unsigned int peer;
3333*4882a593Smuzhiyun peer = be32_to_cpu(buffer->la_peer_max_bio_size);
3334*4882a593Smuzhiyun peer = max(peer, DRBD_MAX_BIO_SIZE_SAFE);
3335*4882a593Smuzhiyun device->peer_max_bio_size = peer;
3336*4882a593Smuzhiyun }
3337*4882a593Smuzhiyun spin_unlock_irq(&device->resource->req_lock);
3338*4882a593Smuzhiyun
3339*4882a593Smuzhiyun err:
3340*4882a593Smuzhiyun drbd_md_put_buffer(device);
3341*4882a593Smuzhiyun
3342*4882a593Smuzhiyun return rv;
3343*4882a593Smuzhiyun }
3344*4882a593Smuzhiyun
3345*4882a593Smuzhiyun /**
3346*4882a593Smuzhiyun * drbd_md_mark_dirty() - Mark meta data super block as dirty
3347*4882a593Smuzhiyun * @device: DRBD device.
3348*4882a593Smuzhiyun *
3349*4882a593Smuzhiyun * Call this function if you change anything that should be written to
3350*4882a593Smuzhiyun * the meta-data super block. This function sets MD_DIRTY, and starts a
3351*4882a593Smuzhiyun * timer that ensures that within five seconds you have to call drbd_md_sync().
3352*4882a593Smuzhiyun */
drbd_md_mark_dirty(struct drbd_device * device)3353*4882a593Smuzhiyun void drbd_md_mark_dirty(struct drbd_device *device)
3354*4882a593Smuzhiyun {
3355*4882a593Smuzhiyun if (!test_and_set_bit(MD_DIRTY, &device->flags))
3356*4882a593Smuzhiyun mod_timer(&device->md_sync_timer, jiffies + 5*HZ);
3357*4882a593Smuzhiyun }
3358*4882a593Smuzhiyun
drbd_uuid_move_history(struct drbd_device * device)3359*4882a593Smuzhiyun void drbd_uuid_move_history(struct drbd_device *device) __must_hold(local)
3360*4882a593Smuzhiyun {
3361*4882a593Smuzhiyun int i;
3362*4882a593Smuzhiyun
3363*4882a593Smuzhiyun for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
3364*4882a593Smuzhiyun device->ldev->md.uuid[i+1] = device->ldev->md.uuid[i];
3365*4882a593Smuzhiyun }
3366*4882a593Smuzhiyun
__drbd_uuid_set(struct drbd_device * device,int idx,u64 val)3367*4882a593Smuzhiyun void __drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local)
3368*4882a593Smuzhiyun {
3369*4882a593Smuzhiyun if (idx == UI_CURRENT) {
3370*4882a593Smuzhiyun if (device->state.role == R_PRIMARY)
3371*4882a593Smuzhiyun val |= 1;
3372*4882a593Smuzhiyun else
3373*4882a593Smuzhiyun val &= ~((u64)1);
3374*4882a593Smuzhiyun
3375*4882a593Smuzhiyun drbd_set_ed_uuid(device, val);
3376*4882a593Smuzhiyun }
3377*4882a593Smuzhiyun
3378*4882a593Smuzhiyun device->ldev->md.uuid[idx] = val;
3379*4882a593Smuzhiyun drbd_md_mark_dirty(device);
3380*4882a593Smuzhiyun }
3381*4882a593Smuzhiyun
_drbd_uuid_set(struct drbd_device * device,int idx,u64 val)3382*4882a593Smuzhiyun void _drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local)
3383*4882a593Smuzhiyun {
3384*4882a593Smuzhiyun unsigned long flags;
3385*4882a593Smuzhiyun spin_lock_irqsave(&device->ldev->md.uuid_lock, flags);
3386*4882a593Smuzhiyun __drbd_uuid_set(device, idx, val);
3387*4882a593Smuzhiyun spin_unlock_irqrestore(&device->ldev->md.uuid_lock, flags);
3388*4882a593Smuzhiyun }
3389*4882a593Smuzhiyun
drbd_uuid_set(struct drbd_device * device,int idx,u64 val)3390*4882a593Smuzhiyun void drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local)
3391*4882a593Smuzhiyun {
3392*4882a593Smuzhiyun unsigned long flags;
3393*4882a593Smuzhiyun spin_lock_irqsave(&device->ldev->md.uuid_lock, flags);
3394*4882a593Smuzhiyun if (device->ldev->md.uuid[idx]) {
3395*4882a593Smuzhiyun drbd_uuid_move_history(device);
3396*4882a593Smuzhiyun device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[idx];
3397*4882a593Smuzhiyun }
3398*4882a593Smuzhiyun __drbd_uuid_set(device, idx, val);
3399*4882a593Smuzhiyun spin_unlock_irqrestore(&device->ldev->md.uuid_lock, flags);
3400*4882a593Smuzhiyun }
3401*4882a593Smuzhiyun
3402*4882a593Smuzhiyun /**
3403*4882a593Smuzhiyun * drbd_uuid_new_current() - Creates a new current UUID
3404*4882a593Smuzhiyun * @device: DRBD device.
3405*4882a593Smuzhiyun *
3406*4882a593Smuzhiyun * Creates a new current UUID, and rotates the old current UUID into
3407*4882a593Smuzhiyun * the bitmap slot. Causes an incremental resync upon next connect.
3408*4882a593Smuzhiyun */
drbd_uuid_new_current(struct drbd_device * device)3409*4882a593Smuzhiyun void drbd_uuid_new_current(struct drbd_device *device) __must_hold(local)
3410*4882a593Smuzhiyun {
3411*4882a593Smuzhiyun u64 val;
3412*4882a593Smuzhiyun unsigned long long bm_uuid;
3413*4882a593Smuzhiyun
3414*4882a593Smuzhiyun get_random_bytes(&val, sizeof(u64));
3415*4882a593Smuzhiyun
3416*4882a593Smuzhiyun spin_lock_irq(&device->ldev->md.uuid_lock);
3417*4882a593Smuzhiyun bm_uuid = device->ldev->md.uuid[UI_BITMAP];
3418*4882a593Smuzhiyun
3419*4882a593Smuzhiyun if (bm_uuid)
3420*4882a593Smuzhiyun drbd_warn(device, "bm UUID was already set: %llX\n", bm_uuid);
3421*4882a593Smuzhiyun
3422*4882a593Smuzhiyun device->ldev->md.uuid[UI_BITMAP] = device->ldev->md.uuid[UI_CURRENT];
3423*4882a593Smuzhiyun __drbd_uuid_set(device, UI_CURRENT, val);
3424*4882a593Smuzhiyun spin_unlock_irq(&device->ldev->md.uuid_lock);
3425*4882a593Smuzhiyun
3426*4882a593Smuzhiyun drbd_print_uuids(device, "new current UUID");
3427*4882a593Smuzhiyun /* get it to stable storage _now_ */
3428*4882a593Smuzhiyun drbd_md_sync(device);
3429*4882a593Smuzhiyun }
3430*4882a593Smuzhiyun
drbd_uuid_set_bm(struct drbd_device * device,u64 val)3431*4882a593Smuzhiyun void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local)
3432*4882a593Smuzhiyun {
3433*4882a593Smuzhiyun unsigned long flags;
3434*4882a593Smuzhiyun if (device->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
3435*4882a593Smuzhiyun return;
3436*4882a593Smuzhiyun
3437*4882a593Smuzhiyun spin_lock_irqsave(&device->ldev->md.uuid_lock, flags);
3438*4882a593Smuzhiyun if (val == 0) {
3439*4882a593Smuzhiyun drbd_uuid_move_history(device);
3440*4882a593Smuzhiyun device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
3441*4882a593Smuzhiyun device->ldev->md.uuid[UI_BITMAP] = 0;
3442*4882a593Smuzhiyun } else {
3443*4882a593Smuzhiyun unsigned long long bm_uuid = device->ldev->md.uuid[UI_BITMAP];
3444*4882a593Smuzhiyun if (bm_uuid)
3445*4882a593Smuzhiyun drbd_warn(device, "bm UUID was already set: %llX\n", bm_uuid);
3446*4882a593Smuzhiyun
3447*4882a593Smuzhiyun device->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
3448*4882a593Smuzhiyun }
3449*4882a593Smuzhiyun spin_unlock_irqrestore(&device->ldev->md.uuid_lock, flags);
3450*4882a593Smuzhiyun
3451*4882a593Smuzhiyun drbd_md_mark_dirty(device);
3452*4882a593Smuzhiyun }
3453*4882a593Smuzhiyun
3454*4882a593Smuzhiyun /**
3455*4882a593Smuzhiyun * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
3456*4882a593Smuzhiyun * @device: DRBD device.
3457*4882a593Smuzhiyun *
3458*4882a593Smuzhiyun * Sets all bits in the bitmap and writes the whole bitmap to stable storage.
3459*4882a593Smuzhiyun */
drbd_bmio_set_n_write(struct drbd_device * device)3460*4882a593Smuzhiyun int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local)
3461*4882a593Smuzhiyun {
3462*4882a593Smuzhiyun int rv = -EIO;
3463*4882a593Smuzhiyun
3464*4882a593Smuzhiyun drbd_md_set_flag(device, MDF_FULL_SYNC);
3465*4882a593Smuzhiyun drbd_md_sync(device);
3466*4882a593Smuzhiyun drbd_bm_set_all(device);
3467*4882a593Smuzhiyun
3468*4882a593Smuzhiyun rv = drbd_bm_write(device);
3469*4882a593Smuzhiyun
3470*4882a593Smuzhiyun if (!rv) {
3471*4882a593Smuzhiyun drbd_md_clear_flag(device, MDF_FULL_SYNC);
3472*4882a593Smuzhiyun drbd_md_sync(device);
3473*4882a593Smuzhiyun }
3474*4882a593Smuzhiyun
3475*4882a593Smuzhiyun return rv;
3476*4882a593Smuzhiyun }
3477*4882a593Smuzhiyun
3478*4882a593Smuzhiyun /**
3479*4882a593Smuzhiyun * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
3480*4882a593Smuzhiyun * @device: DRBD device.
3481*4882a593Smuzhiyun *
3482*4882a593Smuzhiyun * Clears all bits in the bitmap and writes the whole bitmap to stable storage.
3483*4882a593Smuzhiyun */
drbd_bmio_clear_n_write(struct drbd_device * device)3484*4882a593Smuzhiyun int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local)
3485*4882a593Smuzhiyun {
3486*4882a593Smuzhiyun drbd_resume_al(device);
3487*4882a593Smuzhiyun drbd_bm_clear_all(device);
3488*4882a593Smuzhiyun return drbd_bm_write(device);
3489*4882a593Smuzhiyun }
3490*4882a593Smuzhiyun
w_bitmap_io(struct drbd_work * w,int unused)3491*4882a593Smuzhiyun static int w_bitmap_io(struct drbd_work *w, int unused)
3492*4882a593Smuzhiyun {
3493*4882a593Smuzhiyun struct drbd_device *device =
3494*4882a593Smuzhiyun container_of(w, struct drbd_device, bm_io_work.w);
3495*4882a593Smuzhiyun struct bm_io_work *work = &device->bm_io_work;
3496*4882a593Smuzhiyun int rv = -EIO;
3497*4882a593Smuzhiyun
3498*4882a593Smuzhiyun if (work->flags != BM_LOCKED_CHANGE_ALLOWED) {
3499*4882a593Smuzhiyun int cnt = atomic_read(&device->ap_bio_cnt);
3500*4882a593Smuzhiyun if (cnt)
3501*4882a593Smuzhiyun drbd_err(device, "FIXME: ap_bio_cnt %d, expected 0; queued for '%s'\n",
3502*4882a593Smuzhiyun cnt, work->why);
3503*4882a593Smuzhiyun }
3504*4882a593Smuzhiyun
3505*4882a593Smuzhiyun if (get_ldev(device)) {
3506*4882a593Smuzhiyun drbd_bm_lock(device, work->why, work->flags);
3507*4882a593Smuzhiyun rv = work->io_fn(device);
3508*4882a593Smuzhiyun drbd_bm_unlock(device);
3509*4882a593Smuzhiyun put_ldev(device);
3510*4882a593Smuzhiyun }
3511*4882a593Smuzhiyun
3512*4882a593Smuzhiyun clear_bit_unlock(BITMAP_IO, &device->flags);
3513*4882a593Smuzhiyun wake_up(&device->misc_wait);
3514*4882a593Smuzhiyun
3515*4882a593Smuzhiyun if (work->done)
3516*4882a593Smuzhiyun work->done(device, rv);
3517*4882a593Smuzhiyun
3518*4882a593Smuzhiyun clear_bit(BITMAP_IO_QUEUED, &device->flags);
3519*4882a593Smuzhiyun work->why = NULL;
3520*4882a593Smuzhiyun work->flags = 0;
3521*4882a593Smuzhiyun
3522*4882a593Smuzhiyun return 0;
3523*4882a593Smuzhiyun }
3524*4882a593Smuzhiyun
3525*4882a593Smuzhiyun /**
3526*4882a593Smuzhiyun * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
3527*4882a593Smuzhiyun * @device: DRBD device.
3528*4882a593Smuzhiyun * @io_fn: IO callback to be called when bitmap IO is possible
3529*4882a593Smuzhiyun * @done: callback to be called after the bitmap IO was performed
3530*4882a593Smuzhiyun * @why: Descriptive text of the reason for doing the IO
3531*4882a593Smuzhiyun *
3532*4882a593Smuzhiyun * While IO on the bitmap happens we freeze application IO thus we ensure
3533*4882a593Smuzhiyun * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
3534*4882a593Smuzhiyun * called from worker context. It MUST NOT be used while a previous such
3535*4882a593Smuzhiyun * work is still pending!
3536*4882a593Smuzhiyun *
3537*4882a593Smuzhiyun * Its worker function encloses the call of io_fn() by get_ldev() and
3538*4882a593Smuzhiyun * put_ldev().
3539*4882a593Smuzhiyun */
drbd_queue_bitmap_io(struct drbd_device * device,int (* io_fn)(struct drbd_device *),void (* done)(struct drbd_device *,int),char * why,enum bm_flag flags)3540*4882a593Smuzhiyun void drbd_queue_bitmap_io(struct drbd_device *device,
3541*4882a593Smuzhiyun int (*io_fn)(struct drbd_device *),
3542*4882a593Smuzhiyun void (*done)(struct drbd_device *, int),
3543*4882a593Smuzhiyun char *why, enum bm_flag flags)
3544*4882a593Smuzhiyun {
3545*4882a593Smuzhiyun D_ASSERT(device, current == first_peer_device(device)->connection->worker.task);
3546*4882a593Smuzhiyun
3547*4882a593Smuzhiyun D_ASSERT(device, !test_bit(BITMAP_IO_QUEUED, &device->flags));
3548*4882a593Smuzhiyun D_ASSERT(device, !test_bit(BITMAP_IO, &device->flags));
3549*4882a593Smuzhiyun D_ASSERT(device, list_empty(&device->bm_io_work.w.list));
3550*4882a593Smuzhiyun if (device->bm_io_work.why)
3551*4882a593Smuzhiyun drbd_err(device, "FIXME going to queue '%s' but '%s' still pending?\n",
3552*4882a593Smuzhiyun why, device->bm_io_work.why);
3553*4882a593Smuzhiyun
3554*4882a593Smuzhiyun device->bm_io_work.io_fn = io_fn;
3555*4882a593Smuzhiyun device->bm_io_work.done = done;
3556*4882a593Smuzhiyun device->bm_io_work.why = why;
3557*4882a593Smuzhiyun device->bm_io_work.flags = flags;
3558*4882a593Smuzhiyun
3559*4882a593Smuzhiyun spin_lock_irq(&device->resource->req_lock);
3560*4882a593Smuzhiyun set_bit(BITMAP_IO, &device->flags);
3561*4882a593Smuzhiyun /* don't wait for pending application IO if the caller indicates that
3562*4882a593Smuzhiyun * application IO does not conflict anyways. */
3563*4882a593Smuzhiyun if (flags == BM_LOCKED_CHANGE_ALLOWED || atomic_read(&device->ap_bio_cnt) == 0) {
3564*4882a593Smuzhiyun if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags))
3565*4882a593Smuzhiyun drbd_queue_work(&first_peer_device(device)->connection->sender_work,
3566*4882a593Smuzhiyun &device->bm_io_work.w);
3567*4882a593Smuzhiyun }
3568*4882a593Smuzhiyun spin_unlock_irq(&device->resource->req_lock);
3569*4882a593Smuzhiyun }
3570*4882a593Smuzhiyun
3571*4882a593Smuzhiyun /**
3572*4882a593Smuzhiyun * drbd_bitmap_io() - Does an IO operation on the whole bitmap
3573*4882a593Smuzhiyun * @device: DRBD device.
3574*4882a593Smuzhiyun * @io_fn: IO callback to be called when bitmap IO is possible
3575*4882a593Smuzhiyun * @why: Descriptive text of the reason for doing the IO
3576*4882a593Smuzhiyun *
3577*4882a593Smuzhiyun * freezes application IO while that the actual IO operations runs. This
3578*4882a593Smuzhiyun * functions MAY NOT be called from worker context.
3579*4882a593Smuzhiyun */
drbd_bitmap_io(struct drbd_device * device,int (* io_fn)(struct drbd_device *),char * why,enum bm_flag flags)3580*4882a593Smuzhiyun int drbd_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device *),
3581*4882a593Smuzhiyun char *why, enum bm_flag flags)
3582*4882a593Smuzhiyun {
3583*4882a593Smuzhiyun /* Only suspend io, if some operation is supposed to be locked out */
3584*4882a593Smuzhiyun const bool do_suspend_io = flags & (BM_DONT_CLEAR|BM_DONT_SET|BM_DONT_TEST);
3585*4882a593Smuzhiyun int rv;
3586*4882a593Smuzhiyun
3587*4882a593Smuzhiyun D_ASSERT(device, current != first_peer_device(device)->connection->worker.task);
3588*4882a593Smuzhiyun
3589*4882a593Smuzhiyun if (do_suspend_io)
3590*4882a593Smuzhiyun drbd_suspend_io(device);
3591*4882a593Smuzhiyun
3592*4882a593Smuzhiyun drbd_bm_lock(device, why, flags);
3593*4882a593Smuzhiyun rv = io_fn(device);
3594*4882a593Smuzhiyun drbd_bm_unlock(device);
3595*4882a593Smuzhiyun
3596*4882a593Smuzhiyun if (do_suspend_io)
3597*4882a593Smuzhiyun drbd_resume_io(device);
3598*4882a593Smuzhiyun
3599*4882a593Smuzhiyun return rv;
3600*4882a593Smuzhiyun }
3601*4882a593Smuzhiyun
drbd_md_set_flag(struct drbd_device * device,int flag)3602*4882a593Smuzhiyun void drbd_md_set_flag(struct drbd_device *device, int flag) __must_hold(local)
3603*4882a593Smuzhiyun {
3604*4882a593Smuzhiyun if ((device->ldev->md.flags & flag) != flag) {
3605*4882a593Smuzhiyun drbd_md_mark_dirty(device);
3606*4882a593Smuzhiyun device->ldev->md.flags |= flag;
3607*4882a593Smuzhiyun }
3608*4882a593Smuzhiyun }
3609*4882a593Smuzhiyun
drbd_md_clear_flag(struct drbd_device * device,int flag)3610*4882a593Smuzhiyun void drbd_md_clear_flag(struct drbd_device *device, int flag) __must_hold(local)
3611*4882a593Smuzhiyun {
3612*4882a593Smuzhiyun if ((device->ldev->md.flags & flag) != 0) {
3613*4882a593Smuzhiyun drbd_md_mark_dirty(device);
3614*4882a593Smuzhiyun device->ldev->md.flags &= ~flag;
3615*4882a593Smuzhiyun }
3616*4882a593Smuzhiyun }
drbd_md_test_flag(struct drbd_backing_dev * bdev,int flag)3617*4882a593Smuzhiyun int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
3618*4882a593Smuzhiyun {
3619*4882a593Smuzhiyun return (bdev->md.flags & flag) != 0;
3620*4882a593Smuzhiyun }
3621*4882a593Smuzhiyun
md_sync_timer_fn(struct timer_list * t)3622*4882a593Smuzhiyun static void md_sync_timer_fn(struct timer_list *t)
3623*4882a593Smuzhiyun {
3624*4882a593Smuzhiyun struct drbd_device *device = from_timer(device, t, md_sync_timer);
3625*4882a593Smuzhiyun drbd_device_post_work(device, MD_SYNC);
3626*4882a593Smuzhiyun }
3627*4882a593Smuzhiyun
cmdname(enum drbd_packet cmd)3628*4882a593Smuzhiyun const char *cmdname(enum drbd_packet cmd)
3629*4882a593Smuzhiyun {
3630*4882a593Smuzhiyun /* THINK may need to become several global tables
3631*4882a593Smuzhiyun * when we want to support more than
3632*4882a593Smuzhiyun * one PRO_VERSION */
3633*4882a593Smuzhiyun static const char *cmdnames[] = {
3634*4882a593Smuzhiyun
3635*4882a593Smuzhiyun [P_DATA] = "Data",
3636*4882a593Smuzhiyun [P_DATA_REPLY] = "DataReply",
3637*4882a593Smuzhiyun [P_RS_DATA_REPLY] = "RSDataReply",
3638*4882a593Smuzhiyun [P_BARRIER] = "Barrier",
3639*4882a593Smuzhiyun [P_BITMAP] = "ReportBitMap",
3640*4882a593Smuzhiyun [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget",
3641*4882a593Smuzhiyun [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource",
3642*4882a593Smuzhiyun [P_UNPLUG_REMOTE] = "UnplugRemote",
3643*4882a593Smuzhiyun [P_DATA_REQUEST] = "DataRequest",
3644*4882a593Smuzhiyun [P_RS_DATA_REQUEST] = "RSDataRequest",
3645*4882a593Smuzhiyun [P_SYNC_PARAM] = "SyncParam",
3646*4882a593Smuzhiyun [P_PROTOCOL] = "ReportProtocol",
3647*4882a593Smuzhiyun [P_UUIDS] = "ReportUUIDs",
3648*4882a593Smuzhiyun [P_SIZES] = "ReportSizes",
3649*4882a593Smuzhiyun [P_STATE] = "ReportState",
3650*4882a593Smuzhiyun [P_SYNC_UUID] = "ReportSyncUUID",
3651*4882a593Smuzhiyun [P_AUTH_CHALLENGE] = "AuthChallenge",
3652*4882a593Smuzhiyun [P_AUTH_RESPONSE] = "AuthResponse",
3653*4882a593Smuzhiyun [P_STATE_CHG_REQ] = "StateChgRequest",
3654*4882a593Smuzhiyun [P_PING] = "Ping",
3655*4882a593Smuzhiyun [P_PING_ACK] = "PingAck",
3656*4882a593Smuzhiyun [P_RECV_ACK] = "RecvAck",
3657*4882a593Smuzhiyun [P_WRITE_ACK] = "WriteAck",
3658*4882a593Smuzhiyun [P_RS_WRITE_ACK] = "RSWriteAck",
3659*4882a593Smuzhiyun [P_SUPERSEDED] = "Superseded",
3660*4882a593Smuzhiyun [P_NEG_ACK] = "NegAck",
3661*4882a593Smuzhiyun [P_NEG_DREPLY] = "NegDReply",
3662*4882a593Smuzhiyun [P_NEG_RS_DREPLY] = "NegRSDReply",
3663*4882a593Smuzhiyun [P_BARRIER_ACK] = "BarrierAck",
3664*4882a593Smuzhiyun [P_STATE_CHG_REPLY] = "StateChgReply",
3665*4882a593Smuzhiyun [P_OV_REQUEST] = "OVRequest",
3666*4882a593Smuzhiyun [P_OV_REPLY] = "OVReply",
3667*4882a593Smuzhiyun [P_OV_RESULT] = "OVResult",
3668*4882a593Smuzhiyun [P_CSUM_RS_REQUEST] = "CsumRSRequest",
3669*4882a593Smuzhiyun [P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
3670*4882a593Smuzhiyun [P_SYNC_PARAM89] = "SyncParam89",
3671*4882a593Smuzhiyun [P_COMPRESSED_BITMAP] = "CBitmap",
3672*4882a593Smuzhiyun [P_DELAY_PROBE] = "DelayProbe",
3673*4882a593Smuzhiyun [P_OUT_OF_SYNC] = "OutOfSync",
3674*4882a593Smuzhiyun [P_RS_CANCEL] = "RSCancel",
3675*4882a593Smuzhiyun [P_CONN_ST_CHG_REQ] = "conn_st_chg_req",
3676*4882a593Smuzhiyun [P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply",
3677*4882a593Smuzhiyun [P_RETRY_WRITE] = "retry_write",
3678*4882a593Smuzhiyun [P_PROTOCOL_UPDATE] = "protocol_update",
3679*4882a593Smuzhiyun [P_TRIM] = "Trim",
3680*4882a593Smuzhiyun [P_RS_THIN_REQ] = "rs_thin_req",
3681*4882a593Smuzhiyun [P_RS_DEALLOCATED] = "rs_deallocated",
3682*4882a593Smuzhiyun [P_WSAME] = "WriteSame",
3683*4882a593Smuzhiyun [P_ZEROES] = "Zeroes",
3684*4882a593Smuzhiyun
3685*4882a593Smuzhiyun /* enum drbd_packet, but not commands - obsoleted flags:
3686*4882a593Smuzhiyun * P_MAY_IGNORE
3687*4882a593Smuzhiyun * P_MAX_OPT_CMD
3688*4882a593Smuzhiyun */
3689*4882a593Smuzhiyun };
3690*4882a593Smuzhiyun
3691*4882a593Smuzhiyun /* too big for the array: 0xfffX */
3692*4882a593Smuzhiyun if (cmd == P_INITIAL_META)
3693*4882a593Smuzhiyun return "InitialMeta";
3694*4882a593Smuzhiyun if (cmd == P_INITIAL_DATA)
3695*4882a593Smuzhiyun return "InitialData";
3696*4882a593Smuzhiyun if (cmd == P_CONNECTION_FEATURES)
3697*4882a593Smuzhiyun return "ConnectionFeatures";
3698*4882a593Smuzhiyun if (cmd >= ARRAY_SIZE(cmdnames))
3699*4882a593Smuzhiyun return "Unknown";
3700*4882a593Smuzhiyun return cmdnames[cmd];
3701*4882a593Smuzhiyun }
3702*4882a593Smuzhiyun
3703*4882a593Smuzhiyun /**
3704*4882a593Smuzhiyun * drbd_wait_misc - wait for a request to make progress
3705*4882a593Smuzhiyun * @device: device associated with the request
3706*4882a593Smuzhiyun * @i: the struct drbd_interval embedded in struct drbd_request or
3707*4882a593Smuzhiyun * struct drbd_peer_request
3708*4882a593Smuzhiyun */
drbd_wait_misc(struct drbd_device * device,struct drbd_interval * i)3709*4882a593Smuzhiyun int drbd_wait_misc(struct drbd_device *device, struct drbd_interval *i)
3710*4882a593Smuzhiyun {
3711*4882a593Smuzhiyun struct net_conf *nc;
3712*4882a593Smuzhiyun DEFINE_WAIT(wait);
3713*4882a593Smuzhiyun long timeout;
3714*4882a593Smuzhiyun
3715*4882a593Smuzhiyun rcu_read_lock();
3716*4882a593Smuzhiyun nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
3717*4882a593Smuzhiyun if (!nc) {
3718*4882a593Smuzhiyun rcu_read_unlock();
3719*4882a593Smuzhiyun return -ETIMEDOUT;
3720*4882a593Smuzhiyun }
3721*4882a593Smuzhiyun timeout = nc->ko_count ? nc->timeout * HZ / 10 * nc->ko_count : MAX_SCHEDULE_TIMEOUT;
3722*4882a593Smuzhiyun rcu_read_unlock();
3723*4882a593Smuzhiyun
3724*4882a593Smuzhiyun /* Indicate to wake up device->misc_wait on progress. */
3725*4882a593Smuzhiyun i->waiting = true;
3726*4882a593Smuzhiyun prepare_to_wait(&device->misc_wait, &wait, TASK_INTERRUPTIBLE);
3727*4882a593Smuzhiyun spin_unlock_irq(&device->resource->req_lock);
3728*4882a593Smuzhiyun timeout = schedule_timeout(timeout);
3729*4882a593Smuzhiyun finish_wait(&device->misc_wait, &wait);
3730*4882a593Smuzhiyun spin_lock_irq(&device->resource->req_lock);
3731*4882a593Smuzhiyun if (!timeout || device->state.conn < C_CONNECTED)
3732*4882a593Smuzhiyun return -ETIMEDOUT;
3733*4882a593Smuzhiyun if (signal_pending(current))
3734*4882a593Smuzhiyun return -ERESTARTSYS;
3735*4882a593Smuzhiyun return 0;
3736*4882a593Smuzhiyun }
3737*4882a593Smuzhiyun
lock_all_resources(void)3738*4882a593Smuzhiyun void lock_all_resources(void)
3739*4882a593Smuzhiyun {
3740*4882a593Smuzhiyun struct drbd_resource *resource;
3741*4882a593Smuzhiyun int __maybe_unused i = 0;
3742*4882a593Smuzhiyun
3743*4882a593Smuzhiyun mutex_lock(&resources_mutex);
3744*4882a593Smuzhiyun local_irq_disable();
3745*4882a593Smuzhiyun for_each_resource(resource, &drbd_resources)
3746*4882a593Smuzhiyun spin_lock_nested(&resource->req_lock, i++);
3747*4882a593Smuzhiyun }
3748*4882a593Smuzhiyun
unlock_all_resources(void)3749*4882a593Smuzhiyun void unlock_all_resources(void)
3750*4882a593Smuzhiyun {
3751*4882a593Smuzhiyun struct drbd_resource *resource;
3752*4882a593Smuzhiyun
3753*4882a593Smuzhiyun for_each_resource(resource, &drbd_resources)
3754*4882a593Smuzhiyun spin_unlock(&resource->req_lock);
3755*4882a593Smuzhiyun local_irq_enable();
3756*4882a593Smuzhiyun mutex_unlock(&resources_mutex);
3757*4882a593Smuzhiyun }
3758*4882a593Smuzhiyun
3759*4882a593Smuzhiyun #ifdef CONFIG_DRBD_FAULT_INJECTION
3760*4882a593Smuzhiyun /* Fault insertion support including random number generator shamelessly
3761*4882a593Smuzhiyun * stolen from kernel/rcutorture.c */
3762*4882a593Smuzhiyun struct fault_random_state {
3763*4882a593Smuzhiyun unsigned long state;
3764*4882a593Smuzhiyun unsigned long count;
3765*4882a593Smuzhiyun };
3766*4882a593Smuzhiyun
3767*4882a593Smuzhiyun #define FAULT_RANDOM_MULT 39916801 /* prime */
3768*4882a593Smuzhiyun #define FAULT_RANDOM_ADD 479001701 /* prime */
3769*4882a593Smuzhiyun #define FAULT_RANDOM_REFRESH 10000
3770*4882a593Smuzhiyun
3771*4882a593Smuzhiyun /*
3772*4882a593Smuzhiyun * Crude but fast random-number generator. Uses a linear congruential
3773*4882a593Smuzhiyun * generator, with occasional help from get_random_bytes().
3774*4882a593Smuzhiyun */
3775*4882a593Smuzhiyun static unsigned long
_drbd_fault_random(struct fault_random_state * rsp)3776*4882a593Smuzhiyun _drbd_fault_random(struct fault_random_state *rsp)
3777*4882a593Smuzhiyun {
3778*4882a593Smuzhiyun long refresh;
3779*4882a593Smuzhiyun
3780*4882a593Smuzhiyun if (!rsp->count--) {
3781*4882a593Smuzhiyun get_random_bytes(&refresh, sizeof(refresh));
3782*4882a593Smuzhiyun rsp->state += refresh;
3783*4882a593Smuzhiyun rsp->count = FAULT_RANDOM_REFRESH;
3784*4882a593Smuzhiyun }
3785*4882a593Smuzhiyun rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD;
3786*4882a593Smuzhiyun return swahw32(rsp->state);
3787*4882a593Smuzhiyun }
3788*4882a593Smuzhiyun
3789*4882a593Smuzhiyun static char *
_drbd_fault_str(unsigned int type)3790*4882a593Smuzhiyun _drbd_fault_str(unsigned int type) {
3791*4882a593Smuzhiyun static char *_faults[] = {
3792*4882a593Smuzhiyun [DRBD_FAULT_MD_WR] = "Meta-data write",
3793*4882a593Smuzhiyun [DRBD_FAULT_MD_RD] = "Meta-data read",
3794*4882a593Smuzhiyun [DRBD_FAULT_RS_WR] = "Resync write",
3795*4882a593Smuzhiyun [DRBD_FAULT_RS_RD] = "Resync read",
3796*4882a593Smuzhiyun [DRBD_FAULT_DT_WR] = "Data write",
3797*4882a593Smuzhiyun [DRBD_FAULT_DT_RD] = "Data read",
3798*4882a593Smuzhiyun [DRBD_FAULT_DT_RA] = "Data read ahead",
3799*4882a593Smuzhiyun [DRBD_FAULT_BM_ALLOC] = "BM allocation",
3800*4882a593Smuzhiyun [DRBD_FAULT_AL_EE] = "EE allocation",
3801*4882a593Smuzhiyun [DRBD_FAULT_RECEIVE] = "receive data corruption",
3802*4882a593Smuzhiyun };
3803*4882a593Smuzhiyun
3804*4882a593Smuzhiyun return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**";
3805*4882a593Smuzhiyun }
3806*4882a593Smuzhiyun
3807*4882a593Smuzhiyun unsigned int
_drbd_insert_fault(struct drbd_device * device,unsigned int type)3808*4882a593Smuzhiyun _drbd_insert_fault(struct drbd_device *device, unsigned int type)
3809*4882a593Smuzhiyun {
3810*4882a593Smuzhiyun static struct fault_random_state rrs = {0, 0};
3811*4882a593Smuzhiyun
3812*4882a593Smuzhiyun unsigned int ret = (
3813*4882a593Smuzhiyun (drbd_fault_devs == 0 ||
3814*4882a593Smuzhiyun ((1 << device_to_minor(device)) & drbd_fault_devs) != 0) &&
3815*4882a593Smuzhiyun (((_drbd_fault_random(&rrs) % 100) + 1) <= drbd_fault_rate));
3816*4882a593Smuzhiyun
3817*4882a593Smuzhiyun if (ret) {
3818*4882a593Smuzhiyun drbd_fault_count++;
3819*4882a593Smuzhiyun
3820*4882a593Smuzhiyun if (__ratelimit(&drbd_ratelimit_state))
3821*4882a593Smuzhiyun drbd_warn(device, "***Simulating %s failure\n",
3822*4882a593Smuzhiyun _drbd_fault_str(type));
3823*4882a593Smuzhiyun }
3824*4882a593Smuzhiyun
3825*4882a593Smuzhiyun return ret;
3826*4882a593Smuzhiyun }
3827*4882a593Smuzhiyun #endif
3828*4882a593Smuzhiyun
drbd_buildtag(void)3829*4882a593Smuzhiyun const char *drbd_buildtag(void)
3830*4882a593Smuzhiyun {
3831*4882a593Smuzhiyun /* DRBD built from external sources has here a reference to the
3832*4882a593Smuzhiyun git hash of the source code. */
3833*4882a593Smuzhiyun
3834*4882a593Smuzhiyun static char buildtag[38] = "\0uilt-in";
3835*4882a593Smuzhiyun
3836*4882a593Smuzhiyun if (buildtag[0] == 0) {
3837*4882a593Smuzhiyun #ifdef MODULE
3838*4882a593Smuzhiyun sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
3839*4882a593Smuzhiyun #else
3840*4882a593Smuzhiyun buildtag[0] = 'b';
3841*4882a593Smuzhiyun #endif
3842*4882a593Smuzhiyun }
3843*4882a593Smuzhiyun
3844*4882a593Smuzhiyun return buildtag;
3845*4882a593Smuzhiyun }
3846*4882a593Smuzhiyun
3847*4882a593Smuzhiyun module_init(drbd_init)
3848*4882a593Smuzhiyun module_exit(drbd_cleanup)
3849*4882a593Smuzhiyun
3850*4882a593Smuzhiyun EXPORT_SYMBOL(drbd_conn_str);
3851*4882a593Smuzhiyun EXPORT_SYMBOL(drbd_role_str);
3852*4882a593Smuzhiyun EXPORT_SYMBOL(drbd_disk_str);
3853*4882a593Smuzhiyun EXPORT_SYMBOL(drbd_set_st_err_str);
3854