1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Network block device - make block devices work over TCP
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Note that you can not swap over this thing, yet. Seems to work but
6*4882a593Smuzhiyun * deadlocks sometimes - you can not swap over TCP in general.
7*4882a593Smuzhiyun *
8*4882a593Smuzhiyun * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz>
9*4882a593Smuzhiyun * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
10*4882a593Smuzhiyun *
11*4882a593Smuzhiyun * (part of code stolen from loop.c)
12*4882a593Smuzhiyun */
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun #include <linux/major.h>
15*4882a593Smuzhiyun
16*4882a593Smuzhiyun #include <linux/blkdev.h>
17*4882a593Smuzhiyun #include <linux/module.h>
18*4882a593Smuzhiyun #include <linux/init.h>
19*4882a593Smuzhiyun #include <linux/sched.h>
20*4882a593Smuzhiyun #include <linux/sched/mm.h>
21*4882a593Smuzhiyun #include <linux/fs.h>
22*4882a593Smuzhiyun #include <linux/bio.h>
23*4882a593Smuzhiyun #include <linux/stat.h>
24*4882a593Smuzhiyun #include <linux/errno.h>
25*4882a593Smuzhiyun #include <linux/file.h>
26*4882a593Smuzhiyun #include <linux/ioctl.h>
27*4882a593Smuzhiyun #include <linux/mutex.h>
28*4882a593Smuzhiyun #include <linux/compiler.h>
29*4882a593Smuzhiyun #include <linux/completion.h>
30*4882a593Smuzhiyun #include <linux/err.h>
31*4882a593Smuzhiyun #include <linux/kernel.h>
32*4882a593Smuzhiyun #include <linux/slab.h>
33*4882a593Smuzhiyun #include <net/sock.h>
34*4882a593Smuzhiyun #include <linux/net.h>
35*4882a593Smuzhiyun #include <linux/kthread.h>
36*4882a593Smuzhiyun #include <linux/types.h>
37*4882a593Smuzhiyun #include <linux/debugfs.h>
38*4882a593Smuzhiyun #include <linux/blk-mq.h>
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun #include <linux/uaccess.h>
41*4882a593Smuzhiyun #include <asm/types.h>
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun #include <linux/nbd.h>
44*4882a593Smuzhiyun #include <linux/nbd-netlink.h>
45*4882a593Smuzhiyun #include <net/genetlink.h>
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun #define CREATE_TRACE_POINTS
48*4882a593Smuzhiyun #include <trace/events/nbd.h>
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun static DEFINE_IDR(nbd_index_idr);
51*4882a593Smuzhiyun static DEFINE_MUTEX(nbd_index_mutex);
52*4882a593Smuzhiyun static int nbd_total_devices = 0;
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun struct nbd_sock {
55*4882a593Smuzhiyun struct socket *sock;
56*4882a593Smuzhiyun struct mutex tx_lock;
57*4882a593Smuzhiyun struct request *pending;
58*4882a593Smuzhiyun int sent;
59*4882a593Smuzhiyun bool dead;
60*4882a593Smuzhiyun int fallback_index;
61*4882a593Smuzhiyun int cookie;
62*4882a593Smuzhiyun };
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun struct recv_thread_args {
65*4882a593Smuzhiyun struct work_struct work;
66*4882a593Smuzhiyun struct nbd_device *nbd;
67*4882a593Smuzhiyun int index;
68*4882a593Smuzhiyun };
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun struct link_dead_args {
71*4882a593Smuzhiyun struct work_struct work;
72*4882a593Smuzhiyun int index;
73*4882a593Smuzhiyun };
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun #define NBD_RT_TIMEDOUT 0
76*4882a593Smuzhiyun #define NBD_RT_DISCONNECT_REQUESTED 1
77*4882a593Smuzhiyun #define NBD_RT_DISCONNECTED 2
78*4882a593Smuzhiyun #define NBD_RT_HAS_PID_FILE 3
79*4882a593Smuzhiyun #define NBD_RT_HAS_CONFIG_REF 4
80*4882a593Smuzhiyun #define NBD_RT_BOUND 5
81*4882a593Smuzhiyun #define NBD_RT_DISCONNECT_ON_CLOSE 6
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun #define NBD_DESTROY_ON_DISCONNECT 0
84*4882a593Smuzhiyun #define NBD_DISCONNECT_REQUESTED 1
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun struct nbd_config {
87*4882a593Smuzhiyun u32 flags;
88*4882a593Smuzhiyun unsigned long runtime_flags;
89*4882a593Smuzhiyun u64 dead_conn_timeout;
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun struct nbd_sock **socks;
92*4882a593Smuzhiyun int num_connections;
93*4882a593Smuzhiyun atomic_t live_connections;
94*4882a593Smuzhiyun wait_queue_head_t conn_wait;
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun atomic_t recv_threads;
97*4882a593Smuzhiyun wait_queue_head_t recv_wq;
98*4882a593Smuzhiyun loff_t blksize;
99*4882a593Smuzhiyun loff_t bytesize;
100*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_DEBUG_FS)
101*4882a593Smuzhiyun struct dentry *dbg_dir;
102*4882a593Smuzhiyun #endif
103*4882a593Smuzhiyun };
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun struct nbd_device {
106*4882a593Smuzhiyun struct blk_mq_tag_set tag_set;
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun int index;
109*4882a593Smuzhiyun refcount_t config_refs;
110*4882a593Smuzhiyun refcount_t refs;
111*4882a593Smuzhiyun struct nbd_config *config;
112*4882a593Smuzhiyun struct mutex config_lock;
113*4882a593Smuzhiyun struct gendisk *disk;
114*4882a593Smuzhiyun struct workqueue_struct *recv_workq;
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun struct list_head list;
117*4882a593Smuzhiyun struct task_struct *task_recv;
118*4882a593Smuzhiyun struct task_struct *task_setup;
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun struct completion *destroy_complete;
121*4882a593Smuzhiyun unsigned long flags;
122*4882a593Smuzhiyun };
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun #define NBD_CMD_REQUEUED 1
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun struct nbd_cmd {
127*4882a593Smuzhiyun struct nbd_device *nbd;
128*4882a593Smuzhiyun struct mutex lock;
129*4882a593Smuzhiyun int index;
130*4882a593Smuzhiyun int cookie;
131*4882a593Smuzhiyun int retries;
132*4882a593Smuzhiyun blk_status_t status;
133*4882a593Smuzhiyun unsigned long flags;
134*4882a593Smuzhiyun u32 cmd_cookie;
135*4882a593Smuzhiyun };
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_DEBUG_FS)
138*4882a593Smuzhiyun static struct dentry *nbd_dbg_dir;
139*4882a593Smuzhiyun #endif
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun #define nbd_name(nbd) ((nbd)->disk->disk_name)
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun #define NBD_MAGIC 0x68797548
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun #define NBD_DEF_BLKSIZE 1024
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun static unsigned int nbds_max = 16;
148*4882a593Smuzhiyun static int max_part = 16;
149*4882a593Smuzhiyun static int part_shift;
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun static int nbd_dev_dbg_init(struct nbd_device *nbd);
152*4882a593Smuzhiyun static void nbd_dev_dbg_close(struct nbd_device *nbd);
153*4882a593Smuzhiyun static void nbd_config_put(struct nbd_device *nbd);
154*4882a593Smuzhiyun static void nbd_connect_reply(struct genl_info *info, int index);
155*4882a593Smuzhiyun static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info);
156*4882a593Smuzhiyun static void nbd_dead_link_work(struct work_struct *work);
157*4882a593Smuzhiyun static void nbd_disconnect_and_put(struct nbd_device *nbd);
158*4882a593Smuzhiyun
nbd_to_dev(struct nbd_device * nbd)159*4882a593Smuzhiyun static inline struct device *nbd_to_dev(struct nbd_device *nbd)
160*4882a593Smuzhiyun {
161*4882a593Smuzhiyun return disk_to_dev(nbd->disk);
162*4882a593Smuzhiyun }
163*4882a593Smuzhiyun
nbd_requeue_cmd(struct nbd_cmd * cmd)164*4882a593Smuzhiyun static void nbd_requeue_cmd(struct nbd_cmd *cmd)
165*4882a593Smuzhiyun {
166*4882a593Smuzhiyun struct request *req = blk_mq_rq_from_pdu(cmd);
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags))
169*4882a593Smuzhiyun blk_mq_requeue_request(req, true);
170*4882a593Smuzhiyun }
171*4882a593Smuzhiyun
172*4882a593Smuzhiyun #define NBD_COOKIE_BITS 32
173*4882a593Smuzhiyun
nbd_cmd_handle(struct nbd_cmd * cmd)174*4882a593Smuzhiyun static u64 nbd_cmd_handle(struct nbd_cmd *cmd)
175*4882a593Smuzhiyun {
176*4882a593Smuzhiyun struct request *req = blk_mq_rq_from_pdu(cmd);
177*4882a593Smuzhiyun u32 tag = blk_mq_unique_tag(req);
178*4882a593Smuzhiyun u64 cookie = cmd->cmd_cookie;
179*4882a593Smuzhiyun
180*4882a593Smuzhiyun return (cookie << NBD_COOKIE_BITS) | tag;
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun
nbd_handle_to_tag(u64 handle)183*4882a593Smuzhiyun static u32 nbd_handle_to_tag(u64 handle)
184*4882a593Smuzhiyun {
185*4882a593Smuzhiyun return (u32)handle;
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun
nbd_handle_to_cookie(u64 handle)188*4882a593Smuzhiyun static u32 nbd_handle_to_cookie(u64 handle)
189*4882a593Smuzhiyun {
190*4882a593Smuzhiyun return (u32)(handle >> NBD_COOKIE_BITS);
191*4882a593Smuzhiyun }
192*4882a593Smuzhiyun
nbdcmd_to_ascii(int cmd)193*4882a593Smuzhiyun static const char *nbdcmd_to_ascii(int cmd)
194*4882a593Smuzhiyun {
195*4882a593Smuzhiyun switch (cmd) {
196*4882a593Smuzhiyun case NBD_CMD_READ: return "read";
197*4882a593Smuzhiyun case NBD_CMD_WRITE: return "write";
198*4882a593Smuzhiyun case NBD_CMD_DISC: return "disconnect";
199*4882a593Smuzhiyun case NBD_CMD_FLUSH: return "flush";
200*4882a593Smuzhiyun case NBD_CMD_TRIM: return "trim/discard";
201*4882a593Smuzhiyun }
202*4882a593Smuzhiyun return "invalid";
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun
pid_show(struct device * dev,struct device_attribute * attr,char * buf)205*4882a593Smuzhiyun static ssize_t pid_show(struct device *dev,
206*4882a593Smuzhiyun struct device_attribute *attr, char *buf)
207*4882a593Smuzhiyun {
208*4882a593Smuzhiyun struct gendisk *disk = dev_to_disk(dev);
209*4882a593Smuzhiyun struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
212*4882a593Smuzhiyun }
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun static const struct device_attribute pid_attr = {
215*4882a593Smuzhiyun .attr = { .name = "pid", .mode = 0444},
216*4882a593Smuzhiyun .show = pid_show,
217*4882a593Smuzhiyun };
218*4882a593Smuzhiyun
nbd_dev_remove(struct nbd_device * nbd)219*4882a593Smuzhiyun static void nbd_dev_remove(struct nbd_device *nbd)
220*4882a593Smuzhiyun {
221*4882a593Smuzhiyun struct gendisk *disk = nbd->disk;
222*4882a593Smuzhiyun struct request_queue *q;
223*4882a593Smuzhiyun
224*4882a593Smuzhiyun if (disk) {
225*4882a593Smuzhiyun q = disk->queue;
226*4882a593Smuzhiyun del_gendisk(disk);
227*4882a593Smuzhiyun blk_cleanup_queue(q);
228*4882a593Smuzhiyun blk_mq_free_tag_set(&nbd->tag_set);
229*4882a593Smuzhiyun disk->private_data = NULL;
230*4882a593Smuzhiyun put_disk(disk);
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun /*
234*4882a593Smuzhiyun * Place this in the last just before the nbd is freed to
235*4882a593Smuzhiyun * make sure that the disk and the related kobject are also
236*4882a593Smuzhiyun * totally removed to avoid duplicate creation of the same
237*4882a593Smuzhiyun * one.
238*4882a593Smuzhiyun */
239*4882a593Smuzhiyun if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && nbd->destroy_complete)
240*4882a593Smuzhiyun complete(nbd->destroy_complete);
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun kfree(nbd);
243*4882a593Smuzhiyun }
244*4882a593Smuzhiyun
nbd_put(struct nbd_device * nbd)245*4882a593Smuzhiyun static void nbd_put(struct nbd_device *nbd)
246*4882a593Smuzhiyun {
247*4882a593Smuzhiyun if (refcount_dec_and_mutex_lock(&nbd->refs,
248*4882a593Smuzhiyun &nbd_index_mutex)) {
249*4882a593Smuzhiyun idr_remove(&nbd_index_idr, nbd->index);
250*4882a593Smuzhiyun nbd_dev_remove(nbd);
251*4882a593Smuzhiyun mutex_unlock(&nbd_index_mutex);
252*4882a593Smuzhiyun }
253*4882a593Smuzhiyun }
254*4882a593Smuzhiyun
nbd_disconnected(struct nbd_config * config)255*4882a593Smuzhiyun static int nbd_disconnected(struct nbd_config *config)
256*4882a593Smuzhiyun {
257*4882a593Smuzhiyun return test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags) ||
258*4882a593Smuzhiyun test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
259*4882a593Smuzhiyun }
260*4882a593Smuzhiyun
nbd_mark_nsock_dead(struct nbd_device * nbd,struct nbd_sock * nsock,int notify)261*4882a593Smuzhiyun static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
262*4882a593Smuzhiyun int notify)
263*4882a593Smuzhiyun {
264*4882a593Smuzhiyun if (!nsock->dead && notify && !nbd_disconnected(nbd->config)) {
265*4882a593Smuzhiyun struct link_dead_args *args;
266*4882a593Smuzhiyun args = kmalloc(sizeof(struct link_dead_args), GFP_NOIO);
267*4882a593Smuzhiyun if (args) {
268*4882a593Smuzhiyun INIT_WORK(&args->work, nbd_dead_link_work);
269*4882a593Smuzhiyun args->index = nbd->index;
270*4882a593Smuzhiyun queue_work(system_wq, &args->work);
271*4882a593Smuzhiyun }
272*4882a593Smuzhiyun }
273*4882a593Smuzhiyun if (!nsock->dead) {
274*4882a593Smuzhiyun kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
275*4882a593Smuzhiyun if (atomic_dec_return(&nbd->config->live_connections) == 0) {
276*4882a593Smuzhiyun if (test_and_clear_bit(NBD_RT_DISCONNECT_REQUESTED,
277*4882a593Smuzhiyun &nbd->config->runtime_flags)) {
278*4882a593Smuzhiyun set_bit(NBD_RT_DISCONNECTED,
279*4882a593Smuzhiyun &nbd->config->runtime_flags);
280*4882a593Smuzhiyun dev_info(nbd_to_dev(nbd),
281*4882a593Smuzhiyun "Disconnected due to user request.\n");
282*4882a593Smuzhiyun }
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun }
285*4882a593Smuzhiyun nsock->dead = true;
286*4882a593Smuzhiyun nsock->pending = NULL;
287*4882a593Smuzhiyun nsock->sent = 0;
288*4882a593Smuzhiyun }
289*4882a593Smuzhiyun
nbd_size_clear(struct nbd_device * nbd)290*4882a593Smuzhiyun static void nbd_size_clear(struct nbd_device *nbd)
291*4882a593Smuzhiyun {
292*4882a593Smuzhiyun if (nbd->config->bytesize) {
293*4882a593Smuzhiyun set_capacity(nbd->disk, 0);
294*4882a593Smuzhiyun kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
295*4882a593Smuzhiyun }
296*4882a593Smuzhiyun }
297*4882a593Smuzhiyun
nbd_size_update(struct nbd_device * nbd,bool start)298*4882a593Smuzhiyun static void nbd_size_update(struct nbd_device *nbd, bool start)
299*4882a593Smuzhiyun {
300*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
301*4882a593Smuzhiyun struct block_device *bdev = bdget_disk(nbd->disk, 0);
302*4882a593Smuzhiyun sector_t nr_sectors = config->bytesize >> 9;
303*4882a593Smuzhiyun
304*4882a593Smuzhiyun if (config->flags & NBD_FLAG_SEND_TRIM) {
305*4882a593Smuzhiyun nbd->disk->queue->limits.discard_granularity = config->blksize;
306*4882a593Smuzhiyun nbd->disk->queue->limits.discard_alignment = config->blksize;
307*4882a593Smuzhiyun blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
308*4882a593Smuzhiyun }
309*4882a593Smuzhiyun blk_queue_logical_block_size(nbd->disk->queue, config->blksize);
310*4882a593Smuzhiyun blk_queue_physical_block_size(nbd->disk->queue, config->blksize);
311*4882a593Smuzhiyun set_capacity(nbd->disk, nr_sectors);
312*4882a593Smuzhiyun if (bdev) {
313*4882a593Smuzhiyun if (bdev->bd_disk) {
314*4882a593Smuzhiyun bd_set_nr_sectors(bdev, nr_sectors);
315*4882a593Smuzhiyun if (start)
316*4882a593Smuzhiyun set_blocksize(bdev, config->blksize);
317*4882a593Smuzhiyun } else
318*4882a593Smuzhiyun set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
319*4882a593Smuzhiyun bdput(bdev);
320*4882a593Smuzhiyun }
321*4882a593Smuzhiyun kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
322*4882a593Smuzhiyun }
323*4882a593Smuzhiyun
nbd_size_set(struct nbd_device * nbd,loff_t blocksize,loff_t nr_blocks)324*4882a593Smuzhiyun static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize,
325*4882a593Smuzhiyun loff_t nr_blocks)
326*4882a593Smuzhiyun {
327*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
328*4882a593Smuzhiyun config->blksize = blocksize;
329*4882a593Smuzhiyun config->bytesize = blocksize * nr_blocks;
330*4882a593Smuzhiyun if (nbd->task_recv != NULL)
331*4882a593Smuzhiyun nbd_size_update(nbd, false);
332*4882a593Smuzhiyun }
333*4882a593Smuzhiyun
nbd_complete_rq(struct request * req)334*4882a593Smuzhiyun static void nbd_complete_rq(struct request *req)
335*4882a593Smuzhiyun {
336*4882a593Smuzhiyun struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
337*4882a593Smuzhiyun
338*4882a593Smuzhiyun dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", req,
339*4882a593Smuzhiyun cmd->status ? "failed" : "done");
340*4882a593Smuzhiyun
341*4882a593Smuzhiyun blk_mq_end_request(req, cmd->status);
342*4882a593Smuzhiyun }
343*4882a593Smuzhiyun
344*4882a593Smuzhiyun /*
345*4882a593Smuzhiyun * Forcibly shutdown the socket causing all listeners to error
346*4882a593Smuzhiyun */
sock_shutdown(struct nbd_device * nbd)347*4882a593Smuzhiyun static void sock_shutdown(struct nbd_device *nbd)
348*4882a593Smuzhiyun {
349*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
350*4882a593Smuzhiyun int i;
351*4882a593Smuzhiyun
352*4882a593Smuzhiyun if (config->num_connections == 0)
353*4882a593Smuzhiyun return;
354*4882a593Smuzhiyun if (test_and_set_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
355*4882a593Smuzhiyun return;
356*4882a593Smuzhiyun
357*4882a593Smuzhiyun for (i = 0; i < config->num_connections; i++) {
358*4882a593Smuzhiyun struct nbd_sock *nsock = config->socks[i];
359*4882a593Smuzhiyun mutex_lock(&nsock->tx_lock);
360*4882a593Smuzhiyun nbd_mark_nsock_dead(nbd, nsock, 0);
361*4882a593Smuzhiyun mutex_unlock(&nsock->tx_lock);
362*4882a593Smuzhiyun }
363*4882a593Smuzhiyun dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n");
364*4882a593Smuzhiyun }
365*4882a593Smuzhiyun
req_to_nbd_cmd_type(struct request * req)366*4882a593Smuzhiyun static u32 req_to_nbd_cmd_type(struct request *req)
367*4882a593Smuzhiyun {
368*4882a593Smuzhiyun switch (req_op(req)) {
369*4882a593Smuzhiyun case REQ_OP_DISCARD:
370*4882a593Smuzhiyun return NBD_CMD_TRIM;
371*4882a593Smuzhiyun case REQ_OP_FLUSH:
372*4882a593Smuzhiyun return NBD_CMD_FLUSH;
373*4882a593Smuzhiyun case REQ_OP_WRITE:
374*4882a593Smuzhiyun return NBD_CMD_WRITE;
375*4882a593Smuzhiyun case REQ_OP_READ:
376*4882a593Smuzhiyun return NBD_CMD_READ;
377*4882a593Smuzhiyun default:
378*4882a593Smuzhiyun return U32_MAX;
379*4882a593Smuzhiyun }
380*4882a593Smuzhiyun }
381*4882a593Smuzhiyun
nbd_xmit_timeout(struct request * req,bool reserved)382*4882a593Smuzhiyun static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
383*4882a593Smuzhiyun bool reserved)
384*4882a593Smuzhiyun {
385*4882a593Smuzhiyun struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
386*4882a593Smuzhiyun struct nbd_device *nbd = cmd->nbd;
387*4882a593Smuzhiyun struct nbd_config *config;
388*4882a593Smuzhiyun
389*4882a593Smuzhiyun if (!mutex_trylock(&cmd->lock))
390*4882a593Smuzhiyun return BLK_EH_RESET_TIMER;
391*4882a593Smuzhiyun
392*4882a593Smuzhiyun if (!refcount_inc_not_zero(&nbd->config_refs)) {
393*4882a593Smuzhiyun cmd->status = BLK_STS_TIMEOUT;
394*4882a593Smuzhiyun mutex_unlock(&cmd->lock);
395*4882a593Smuzhiyun goto done;
396*4882a593Smuzhiyun }
397*4882a593Smuzhiyun config = nbd->config;
398*4882a593Smuzhiyun
399*4882a593Smuzhiyun if (config->num_connections > 1 ||
400*4882a593Smuzhiyun (config->num_connections == 1 && nbd->tag_set.timeout)) {
401*4882a593Smuzhiyun dev_err_ratelimited(nbd_to_dev(nbd),
402*4882a593Smuzhiyun "Connection timed out, retrying (%d/%d alive)\n",
403*4882a593Smuzhiyun atomic_read(&config->live_connections),
404*4882a593Smuzhiyun config->num_connections);
405*4882a593Smuzhiyun /*
406*4882a593Smuzhiyun * Hooray we have more connections, requeue this IO, the submit
407*4882a593Smuzhiyun * path will put it on a real connection. Or if only one
408*4882a593Smuzhiyun * connection is configured, the submit path will wait util
409*4882a593Smuzhiyun * a new connection is reconfigured or util dead timeout.
410*4882a593Smuzhiyun */
411*4882a593Smuzhiyun if (config->socks) {
412*4882a593Smuzhiyun if (cmd->index < config->num_connections) {
413*4882a593Smuzhiyun struct nbd_sock *nsock =
414*4882a593Smuzhiyun config->socks[cmd->index];
415*4882a593Smuzhiyun mutex_lock(&nsock->tx_lock);
416*4882a593Smuzhiyun /* We can have multiple outstanding requests, so
417*4882a593Smuzhiyun * we don't want to mark the nsock dead if we've
418*4882a593Smuzhiyun * already reconnected with a new socket, so
419*4882a593Smuzhiyun * only mark it dead if its the same socket we
420*4882a593Smuzhiyun * were sent out on.
421*4882a593Smuzhiyun */
422*4882a593Smuzhiyun if (cmd->cookie == nsock->cookie)
423*4882a593Smuzhiyun nbd_mark_nsock_dead(nbd, nsock, 1);
424*4882a593Smuzhiyun mutex_unlock(&nsock->tx_lock);
425*4882a593Smuzhiyun }
426*4882a593Smuzhiyun mutex_unlock(&cmd->lock);
427*4882a593Smuzhiyun nbd_requeue_cmd(cmd);
428*4882a593Smuzhiyun nbd_config_put(nbd);
429*4882a593Smuzhiyun return BLK_EH_DONE;
430*4882a593Smuzhiyun }
431*4882a593Smuzhiyun }
432*4882a593Smuzhiyun
433*4882a593Smuzhiyun if (!nbd->tag_set.timeout) {
434*4882a593Smuzhiyun /*
435*4882a593Smuzhiyun * Userspace sets timeout=0 to disable socket disconnection,
436*4882a593Smuzhiyun * so just warn and reset the timer.
437*4882a593Smuzhiyun */
438*4882a593Smuzhiyun struct nbd_sock *nsock = config->socks[cmd->index];
439*4882a593Smuzhiyun cmd->retries++;
440*4882a593Smuzhiyun dev_info(nbd_to_dev(nbd), "Possible stuck request %p: control (%s@%llu,%uB). Runtime %u seconds\n",
441*4882a593Smuzhiyun req, nbdcmd_to_ascii(req_to_nbd_cmd_type(req)),
442*4882a593Smuzhiyun (unsigned long long)blk_rq_pos(req) << 9,
443*4882a593Smuzhiyun blk_rq_bytes(req), (req->timeout / HZ) * cmd->retries);
444*4882a593Smuzhiyun
445*4882a593Smuzhiyun mutex_lock(&nsock->tx_lock);
446*4882a593Smuzhiyun if (cmd->cookie != nsock->cookie) {
447*4882a593Smuzhiyun nbd_requeue_cmd(cmd);
448*4882a593Smuzhiyun mutex_unlock(&nsock->tx_lock);
449*4882a593Smuzhiyun mutex_unlock(&cmd->lock);
450*4882a593Smuzhiyun nbd_config_put(nbd);
451*4882a593Smuzhiyun return BLK_EH_DONE;
452*4882a593Smuzhiyun }
453*4882a593Smuzhiyun mutex_unlock(&nsock->tx_lock);
454*4882a593Smuzhiyun mutex_unlock(&cmd->lock);
455*4882a593Smuzhiyun nbd_config_put(nbd);
456*4882a593Smuzhiyun return BLK_EH_RESET_TIMER;
457*4882a593Smuzhiyun }
458*4882a593Smuzhiyun
459*4882a593Smuzhiyun dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
460*4882a593Smuzhiyun set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
461*4882a593Smuzhiyun cmd->status = BLK_STS_IOERR;
462*4882a593Smuzhiyun mutex_unlock(&cmd->lock);
463*4882a593Smuzhiyun sock_shutdown(nbd);
464*4882a593Smuzhiyun nbd_config_put(nbd);
465*4882a593Smuzhiyun done:
466*4882a593Smuzhiyun blk_mq_complete_request(req);
467*4882a593Smuzhiyun return BLK_EH_DONE;
468*4882a593Smuzhiyun }
469*4882a593Smuzhiyun
470*4882a593Smuzhiyun /*
471*4882a593Smuzhiyun * Send or receive packet.
472*4882a593Smuzhiyun */
sock_xmit(struct nbd_device * nbd,int index,int send,struct iov_iter * iter,int msg_flags,int * sent)473*4882a593Smuzhiyun static int sock_xmit(struct nbd_device *nbd, int index, int send,
474*4882a593Smuzhiyun struct iov_iter *iter, int msg_flags, int *sent)
475*4882a593Smuzhiyun {
476*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
477*4882a593Smuzhiyun struct socket *sock = config->socks[index]->sock;
478*4882a593Smuzhiyun int result;
479*4882a593Smuzhiyun struct msghdr msg;
480*4882a593Smuzhiyun unsigned int noreclaim_flag;
481*4882a593Smuzhiyun
482*4882a593Smuzhiyun if (unlikely(!sock)) {
483*4882a593Smuzhiyun dev_err_ratelimited(disk_to_dev(nbd->disk),
484*4882a593Smuzhiyun "Attempted %s on closed socket in sock_xmit\n",
485*4882a593Smuzhiyun (send ? "send" : "recv"));
486*4882a593Smuzhiyun return -EINVAL;
487*4882a593Smuzhiyun }
488*4882a593Smuzhiyun
489*4882a593Smuzhiyun msg.msg_iter = *iter;
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun noreclaim_flag = memalloc_noreclaim_save();
492*4882a593Smuzhiyun do {
493*4882a593Smuzhiyun sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
494*4882a593Smuzhiyun msg.msg_name = NULL;
495*4882a593Smuzhiyun msg.msg_namelen = 0;
496*4882a593Smuzhiyun msg.msg_control = NULL;
497*4882a593Smuzhiyun msg.msg_controllen = 0;
498*4882a593Smuzhiyun msg.msg_flags = msg_flags | MSG_NOSIGNAL;
499*4882a593Smuzhiyun
500*4882a593Smuzhiyun if (send)
501*4882a593Smuzhiyun result = sock_sendmsg(sock, &msg);
502*4882a593Smuzhiyun else
503*4882a593Smuzhiyun result = sock_recvmsg(sock, &msg, msg.msg_flags);
504*4882a593Smuzhiyun
505*4882a593Smuzhiyun if (result <= 0) {
506*4882a593Smuzhiyun if (result == 0)
507*4882a593Smuzhiyun result = -EPIPE; /* short read */
508*4882a593Smuzhiyun break;
509*4882a593Smuzhiyun }
510*4882a593Smuzhiyun if (sent)
511*4882a593Smuzhiyun *sent += result;
512*4882a593Smuzhiyun } while (msg_data_left(&msg));
513*4882a593Smuzhiyun
514*4882a593Smuzhiyun memalloc_noreclaim_restore(noreclaim_flag);
515*4882a593Smuzhiyun
516*4882a593Smuzhiyun return result;
517*4882a593Smuzhiyun }
518*4882a593Smuzhiyun
519*4882a593Smuzhiyun /*
520*4882a593Smuzhiyun * Different settings for sk->sk_sndtimeo can result in different return values
521*4882a593Smuzhiyun * if there is a signal pending when we enter sendmsg, because reasons?
522*4882a593Smuzhiyun */
was_interrupted(int result)523*4882a593Smuzhiyun static inline int was_interrupted(int result)
524*4882a593Smuzhiyun {
525*4882a593Smuzhiyun return result == -ERESTARTSYS || result == -EINTR;
526*4882a593Smuzhiyun }
527*4882a593Smuzhiyun
528*4882a593Smuzhiyun /* always call with the tx_lock held */
nbd_send_cmd(struct nbd_device * nbd,struct nbd_cmd * cmd,int index)529*4882a593Smuzhiyun static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
530*4882a593Smuzhiyun {
531*4882a593Smuzhiyun struct request *req = blk_mq_rq_from_pdu(cmd);
532*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
533*4882a593Smuzhiyun struct nbd_sock *nsock = config->socks[index];
534*4882a593Smuzhiyun int result;
535*4882a593Smuzhiyun struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)};
536*4882a593Smuzhiyun struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
537*4882a593Smuzhiyun struct iov_iter from;
538*4882a593Smuzhiyun unsigned long size = blk_rq_bytes(req);
539*4882a593Smuzhiyun struct bio *bio;
540*4882a593Smuzhiyun u64 handle;
541*4882a593Smuzhiyun u32 type;
542*4882a593Smuzhiyun u32 nbd_cmd_flags = 0;
543*4882a593Smuzhiyun int sent = nsock->sent, skip = 0;
544*4882a593Smuzhiyun
545*4882a593Smuzhiyun iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
546*4882a593Smuzhiyun
547*4882a593Smuzhiyun type = req_to_nbd_cmd_type(req);
548*4882a593Smuzhiyun if (type == U32_MAX)
549*4882a593Smuzhiyun return -EIO;
550*4882a593Smuzhiyun
551*4882a593Smuzhiyun if (rq_data_dir(req) == WRITE &&
552*4882a593Smuzhiyun (config->flags & NBD_FLAG_READ_ONLY)) {
553*4882a593Smuzhiyun dev_err_ratelimited(disk_to_dev(nbd->disk),
554*4882a593Smuzhiyun "Write on read-only\n");
555*4882a593Smuzhiyun return -EIO;
556*4882a593Smuzhiyun }
557*4882a593Smuzhiyun
558*4882a593Smuzhiyun if (req->cmd_flags & REQ_FUA)
559*4882a593Smuzhiyun nbd_cmd_flags |= NBD_CMD_FLAG_FUA;
560*4882a593Smuzhiyun
561*4882a593Smuzhiyun /* We did a partial send previously, and we at least sent the whole
562*4882a593Smuzhiyun * request struct, so just go and send the rest of the pages in the
563*4882a593Smuzhiyun * request.
564*4882a593Smuzhiyun */
565*4882a593Smuzhiyun if (sent) {
566*4882a593Smuzhiyun if (sent >= sizeof(request)) {
567*4882a593Smuzhiyun skip = sent - sizeof(request);
568*4882a593Smuzhiyun
569*4882a593Smuzhiyun /* initialize handle for tracing purposes */
570*4882a593Smuzhiyun handle = nbd_cmd_handle(cmd);
571*4882a593Smuzhiyun
572*4882a593Smuzhiyun goto send_pages;
573*4882a593Smuzhiyun }
574*4882a593Smuzhiyun iov_iter_advance(&from, sent);
575*4882a593Smuzhiyun } else {
576*4882a593Smuzhiyun cmd->cmd_cookie++;
577*4882a593Smuzhiyun }
578*4882a593Smuzhiyun cmd->index = index;
579*4882a593Smuzhiyun cmd->cookie = nsock->cookie;
580*4882a593Smuzhiyun cmd->retries = 0;
581*4882a593Smuzhiyun request.type = htonl(type | nbd_cmd_flags);
582*4882a593Smuzhiyun if (type != NBD_CMD_FLUSH) {
583*4882a593Smuzhiyun request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
584*4882a593Smuzhiyun request.len = htonl(size);
585*4882a593Smuzhiyun }
586*4882a593Smuzhiyun handle = nbd_cmd_handle(cmd);
587*4882a593Smuzhiyun memcpy(request.handle, &handle, sizeof(handle));
588*4882a593Smuzhiyun
589*4882a593Smuzhiyun trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd));
590*4882a593Smuzhiyun
591*4882a593Smuzhiyun dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
592*4882a593Smuzhiyun req, nbdcmd_to_ascii(type),
593*4882a593Smuzhiyun (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
594*4882a593Smuzhiyun result = sock_xmit(nbd, index, 1, &from,
595*4882a593Smuzhiyun (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
596*4882a593Smuzhiyun trace_nbd_header_sent(req, handle);
597*4882a593Smuzhiyun if (result <= 0) {
598*4882a593Smuzhiyun if (was_interrupted(result)) {
599*4882a593Smuzhiyun /* If we havne't sent anything we can just return BUSY,
600*4882a593Smuzhiyun * however if we have sent something we need to make
601*4882a593Smuzhiyun * sure we only allow this req to be sent until we are
602*4882a593Smuzhiyun * completely done.
603*4882a593Smuzhiyun */
604*4882a593Smuzhiyun if (sent) {
605*4882a593Smuzhiyun nsock->pending = req;
606*4882a593Smuzhiyun nsock->sent = sent;
607*4882a593Smuzhiyun }
608*4882a593Smuzhiyun set_bit(NBD_CMD_REQUEUED, &cmd->flags);
609*4882a593Smuzhiyun return BLK_STS_RESOURCE;
610*4882a593Smuzhiyun }
611*4882a593Smuzhiyun dev_err_ratelimited(disk_to_dev(nbd->disk),
612*4882a593Smuzhiyun "Send control failed (result %d)\n", result);
613*4882a593Smuzhiyun return -EAGAIN;
614*4882a593Smuzhiyun }
615*4882a593Smuzhiyun send_pages:
616*4882a593Smuzhiyun if (type != NBD_CMD_WRITE)
617*4882a593Smuzhiyun goto out;
618*4882a593Smuzhiyun
619*4882a593Smuzhiyun bio = req->bio;
620*4882a593Smuzhiyun while (bio) {
621*4882a593Smuzhiyun struct bio *next = bio->bi_next;
622*4882a593Smuzhiyun struct bvec_iter iter;
623*4882a593Smuzhiyun struct bio_vec bvec;
624*4882a593Smuzhiyun
625*4882a593Smuzhiyun bio_for_each_segment(bvec, bio, iter) {
626*4882a593Smuzhiyun bool is_last = !next && bio_iter_last(bvec, iter);
627*4882a593Smuzhiyun int flags = is_last ? 0 : MSG_MORE;
628*4882a593Smuzhiyun
629*4882a593Smuzhiyun dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
630*4882a593Smuzhiyun req, bvec.bv_len);
631*4882a593Smuzhiyun iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len);
632*4882a593Smuzhiyun if (skip) {
633*4882a593Smuzhiyun if (skip >= iov_iter_count(&from)) {
634*4882a593Smuzhiyun skip -= iov_iter_count(&from);
635*4882a593Smuzhiyun continue;
636*4882a593Smuzhiyun }
637*4882a593Smuzhiyun iov_iter_advance(&from, skip);
638*4882a593Smuzhiyun skip = 0;
639*4882a593Smuzhiyun }
640*4882a593Smuzhiyun result = sock_xmit(nbd, index, 1, &from, flags, &sent);
641*4882a593Smuzhiyun if (result <= 0) {
642*4882a593Smuzhiyun if (was_interrupted(result)) {
643*4882a593Smuzhiyun /* We've already sent the header, we
644*4882a593Smuzhiyun * have no choice but to set pending and
645*4882a593Smuzhiyun * return BUSY.
646*4882a593Smuzhiyun */
647*4882a593Smuzhiyun nsock->pending = req;
648*4882a593Smuzhiyun nsock->sent = sent;
649*4882a593Smuzhiyun set_bit(NBD_CMD_REQUEUED, &cmd->flags);
650*4882a593Smuzhiyun return BLK_STS_RESOURCE;
651*4882a593Smuzhiyun }
652*4882a593Smuzhiyun dev_err(disk_to_dev(nbd->disk),
653*4882a593Smuzhiyun "Send data failed (result %d)\n",
654*4882a593Smuzhiyun result);
655*4882a593Smuzhiyun return -EAGAIN;
656*4882a593Smuzhiyun }
657*4882a593Smuzhiyun /*
658*4882a593Smuzhiyun * The completion might already have come in,
659*4882a593Smuzhiyun * so break for the last one instead of letting
660*4882a593Smuzhiyun * the iterator do it. This prevents use-after-free
661*4882a593Smuzhiyun * of the bio.
662*4882a593Smuzhiyun */
663*4882a593Smuzhiyun if (is_last)
664*4882a593Smuzhiyun break;
665*4882a593Smuzhiyun }
666*4882a593Smuzhiyun bio = next;
667*4882a593Smuzhiyun }
668*4882a593Smuzhiyun out:
669*4882a593Smuzhiyun trace_nbd_payload_sent(req, handle);
670*4882a593Smuzhiyun nsock->pending = NULL;
671*4882a593Smuzhiyun nsock->sent = 0;
672*4882a593Smuzhiyun return 0;
673*4882a593Smuzhiyun }
674*4882a593Smuzhiyun
675*4882a593Smuzhiyun /* NULL returned = something went wrong, inform userspace */
nbd_read_stat(struct nbd_device * nbd,int index)676*4882a593Smuzhiyun static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
677*4882a593Smuzhiyun {
678*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
679*4882a593Smuzhiyun int result;
680*4882a593Smuzhiyun struct nbd_reply reply;
681*4882a593Smuzhiyun struct nbd_cmd *cmd;
682*4882a593Smuzhiyun struct request *req = NULL;
683*4882a593Smuzhiyun u64 handle;
684*4882a593Smuzhiyun u16 hwq;
685*4882a593Smuzhiyun u32 tag;
686*4882a593Smuzhiyun struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)};
687*4882a593Smuzhiyun struct iov_iter to;
688*4882a593Smuzhiyun int ret = 0;
689*4882a593Smuzhiyun
690*4882a593Smuzhiyun reply.magic = 0;
691*4882a593Smuzhiyun iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply));
692*4882a593Smuzhiyun result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
693*4882a593Smuzhiyun if (result <= 0) {
694*4882a593Smuzhiyun if (!nbd_disconnected(config))
695*4882a593Smuzhiyun dev_err(disk_to_dev(nbd->disk),
696*4882a593Smuzhiyun "Receive control failed (result %d)\n", result);
697*4882a593Smuzhiyun return ERR_PTR(result);
698*4882a593Smuzhiyun }
699*4882a593Smuzhiyun
700*4882a593Smuzhiyun if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
701*4882a593Smuzhiyun dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
702*4882a593Smuzhiyun (unsigned long)ntohl(reply.magic));
703*4882a593Smuzhiyun return ERR_PTR(-EPROTO);
704*4882a593Smuzhiyun }
705*4882a593Smuzhiyun
706*4882a593Smuzhiyun memcpy(&handle, reply.handle, sizeof(handle));
707*4882a593Smuzhiyun tag = nbd_handle_to_tag(handle);
708*4882a593Smuzhiyun hwq = blk_mq_unique_tag_to_hwq(tag);
709*4882a593Smuzhiyun if (hwq < nbd->tag_set.nr_hw_queues)
710*4882a593Smuzhiyun req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
711*4882a593Smuzhiyun blk_mq_unique_tag_to_tag(tag));
712*4882a593Smuzhiyun if (!req || !blk_mq_request_started(req)) {
713*4882a593Smuzhiyun dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n",
714*4882a593Smuzhiyun tag, req);
715*4882a593Smuzhiyun return ERR_PTR(-ENOENT);
716*4882a593Smuzhiyun }
717*4882a593Smuzhiyun trace_nbd_header_received(req, handle);
718*4882a593Smuzhiyun cmd = blk_mq_rq_to_pdu(req);
719*4882a593Smuzhiyun
720*4882a593Smuzhiyun mutex_lock(&cmd->lock);
721*4882a593Smuzhiyun if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) {
722*4882a593Smuzhiyun dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n",
723*4882a593Smuzhiyun req, cmd->cmd_cookie, nbd_handle_to_cookie(handle));
724*4882a593Smuzhiyun ret = -ENOENT;
725*4882a593Smuzhiyun goto out;
726*4882a593Smuzhiyun }
727*4882a593Smuzhiyun if (cmd->status != BLK_STS_OK) {
728*4882a593Smuzhiyun dev_err(disk_to_dev(nbd->disk), "Command already handled %p\n",
729*4882a593Smuzhiyun req);
730*4882a593Smuzhiyun ret = -ENOENT;
731*4882a593Smuzhiyun goto out;
732*4882a593Smuzhiyun }
733*4882a593Smuzhiyun if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) {
734*4882a593Smuzhiyun dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n",
735*4882a593Smuzhiyun req);
736*4882a593Smuzhiyun ret = -ENOENT;
737*4882a593Smuzhiyun goto out;
738*4882a593Smuzhiyun }
739*4882a593Smuzhiyun if (ntohl(reply.error)) {
740*4882a593Smuzhiyun dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
741*4882a593Smuzhiyun ntohl(reply.error));
742*4882a593Smuzhiyun cmd->status = BLK_STS_IOERR;
743*4882a593Smuzhiyun goto out;
744*4882a593Smuzhiyun }
745*4882a593Smuzhiyun
746*4882a593Smuzhiyun dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
747*4882a593Smuzhiyun if (rq_data_dir(req) != WRITE) {
748*4882a593Smuzhiyun struct req_iterator iter;
749*4882a593Smuzhiyun struct bio_vec bvec;
750*4882a593Smuzhiyun
751*4882a593Smuzhiyun rq_for_each_segment(bvec, req, iter) {
752*4882a593Smuzhiyun iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len);
753*4882a593Smuzhiyun result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
754*4882a593Smuzhiyun if (result <= 0) {
755*4882a593Smuzhiyun dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
756*4882a593Smuzhiyun result);
757*4882a593Smuzhiyun /*
758*4882a593Smuzhiyun * If we've disconnected, we need to make sure we
759*4882a593Smuzhiyun * complete this request, otherwise error out
760*4882a593Smuzhiyun * and let the timeout stuff handle resubmitting
761*4882a593Smuzhiyun * this request onto another connection.
762*4882a593Smuzhiyun */
763*4882a593Smuzhiyun if (nbd_disconnected(config)) {
764*4882a593Smuzhiyun cmd->status = BLK_STS_IOERR;
765*4882a593Smuzhiyun goto out;
766*4882a593Smuzhiyun }
767*4882a593Smuzhiyun ret = -EIO;
768*4882a593Smuzhiyun goto out;
769*4882a593Smuzhiyun }
770*4882a593Smuzhiyun dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
771*4882a593Smuzhiyun req, bvec.bv_len);
772*4882a593Smuzhiyun }
773*4882a593Smuzhiyun }
774*4882a593Smuzhiyun out:
775*4882a593Smuzhiyun trace_nbd_payload_received(req, handle);
776*4882a593Smuzhiyun mutex_unlock(&cmd->lock);
777*4882a593Smuzhiyun return ret ? ERR_PTR(ret) : cmd;
778*4882a593Smuzhiyun }
779*4882a593Smuzhiyun
recv_work(struct work_struct * work)780*4882a593Smuzhiyun static void recv_work(struct work_struct *work)
781*4882a593Smuzhiyun {
782*4882a593Smuzhiyun struct recv_thread_args *args = container_of(work,
783*4882a593Smuzhiyun struct recv_thread_args,
784*4882a593Smuzhiyun work);
785*4882a593Smuzhiyun struct nbd_device *nbd = args->nbd;
786*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
787*4882a593Smuzhiyun struct nbd_cmd *cmd;
788*4882a593Smuzhiyun struct request *rq;
789*4882a593Smuzhiyun
790*4882a593Smuzhiyun while (1) {
791*4882a593Smuzhiyun cmd = nbd_read_stat(nbd, args->index);
792*4882a593Smuzhiyun if (IS_ERR(cmd)) {
793*4882a593Smuzhiyun struct nbd_sock *nsock = config->socks[args->index];
794*4882a593Smuzhiyun
795*4882a593Smuzhiyun mutex_lock(&nsock->tx_lock);
796*4882a593Smuzhiyun nbd_mark_nsock_dead(nbd, nsock, 1);
797*4882a593Smuzhiyun mutex_unlock(&nsock->tx_lock);
798*4882a593Smuzhiyun break;
799*4882a593Smuzhiyun }
800*4882a593Smuzhiyun
801*4882a593Smuzhiyun rq = blk_mq_rq_from_pdu(cmd);
802*4882a593Smuzhiyun if (likely(!blk_should_fake_timeout(rq->q)))
803*4882a593Smuzhiyun blk_mq_complete_request(rq);
804*4882a593Smuzhiyun }
805*4882a593Smuzhiyun nbd_config_put(nbd);
806*4882a593Smuzhiyun atomic_dec(&config->recv_threads);
807*4882a593Smuzhiyun wake_up(&config->recv_wq);
808*4882a593Smuzhiyun kfree(args);
809*4882a593Smuzhiyun }
810*4882a593Smuzhiyun
nbd_clear_req(struct request * req,void * data,bool reserved)811*4882a593Smuzhiyun static bool nbd_clear_req(struct request *req, void *data, bool reserved)
812*4882a593Smuzhiyun {
813*4882a593Smuzhiyun struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
814*4882a593Smuzhiyun
815*4882a593Smuzhiyun /* don't abort one completed request */
816*4882a593Smuzhiyun if (blk_mq_request_completed(req))
817*4882a593Smuzhiyun return true;
818*4882a593Smuzhiyun
819*4882a593Smuzhiyun mutex_lock(&cmd->lock);
820*4882a593Smuzhiyun cmd->status = BLK_STS_IOERR;
821*4882a593Smuzhiyun mutex_unlock(&cmd->lock);
822*4882a593Smuzhiyun
823*4882a593Smuzhiyun blk_mq_complete_request(req);
824*4882a593Smuzhiyun return true;
825*4882a593Smuzhiyun }
826*4882a593Smuzhiyun
nbd_clear_que(struct nbd_device * nbd)827*4882a593Smuzhiyun static void nbd_clear_que(struct nbd_device *nbd)
828*4882a593Smuzhiyun {
829*4882a593Smuzhiyun blk_mq_quiesce_queue(nbd->disk->queue);
830*4882a593Smuzhiyun blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
831*4882a593Smuzhiyun blk_mq_unquiesce_queue(nbd->disk->queue);
832*4882a593Smuzhiyun dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
833*4882a593Smuzhiyun }
834*4882a593Smuzhiyun
find_fallback(struct nbd_device * nbd,int index)835*4882a593Smuzhiyun static int find_fallback(struct nbd_device *nbd, int index)
836*4882a593Smuzhiyun {
837*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
838*4882a593Smuzhiyun int new_index = -1;
839*4882a593Smuzhiyun struct nbd_sock *nsock = config->socks[index];
840*4882a593Smuzhiyun int fallback = nsock->fallback_index;
841*4882a593Smuzhiyun
842*4882a593Smuzhiyun if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
843*4882a593Smuzhiyun return new_index;
844*4882a593Smuzhiyun
845*4882a593Smuzhiyun if (config->num_connections <= 1) {
846*4882a593Smuzhiyun dev_err_ratelimited(disk_to_dev(nbd->disk),
847*4882a593Smuzhiyun "Dead connection, failed to find a fallback\n");
848*4882a593Smuzhiyun return new_index;
849*4882a593Smuzhiyun }
850*4882a593Smuzhiyun
851*4882a593Smuzhiyun if (fallback >= 0 && fallback < config->num_connections &&
852*4882a593Smuzhiyun !config->socks[fallback]->dead)
853*4882a593Smuzhiyun return fallback;
854*4882a593Smuzhiyun
855*4882a593Smuzhiyun if (nsock->fallback_index < 0 ||
856*4882a593Smuzhiyun nsock->fallback_index >= config->num_connections ||
857*4882a593Smuzhiyun config->socks[nsock->fallback_index]->dead) {
858*4882a593Smuzhiyun int i;
859*4882a593Smuzhiyun for (i = 0; i < config->num_connections; i++) {
860*4882a593Smuzhiyun if (i == index)
861*4882a593Smuzhiyun continue;
862*4882a593Smuzhiyun if (!config->socks[i]->dead) {
863*4882a593Smuzhiyun new_index = i;
864*4882a593Smuzhiyun break;
865*4882a593Smuzhiyun }
866*4882a593Smuzhiyun }
867*4882a593Smuzhiyun nsock->fallback_index = new_index;
868*4882a593Smuzhiyun if (new_index < 0) {
869*4882a593Smuzhiyun dev_err_ratelimited(disk_to_dev(nbd->disk),
870*4882a593Smuzhiyun "Dead connection, failed to find a fallback\n");
871*4882a593Smuzhiyun return new_index;
872*4882a593Smuzhiyun }
873*4882a593Smuzhiyun }
874*4882a593Smuzhiyun new_index = nsock->fallback_index;
875*4882a593Smuzhiyun return new_index;
876*4882a593Smuzhiyun }
877*4882a593Smuzhiyun
wait_for_reconnect(struct nbd_device * nbd)878*4882a593Smuzhiyun static int wait_for_reconnect(struct nbd_device *nbd)
879*4882a593Smuzhiyun {
880*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
881*4882a593Smuzhiyun if (!config->dead_conn_timeout)
882*4882a593Smuzhiyun return 0;
883*4882a593Smuzhiyun
884*4882a593Smuzhiyun if (!wait_event_timeout(config->conn_wait,
885*4882a593Smuzhiyun test_bit(NBD_RT_DISCONNECTED,
886*4882a593Smuzhiyun &config->runtime_flags) ||
887*4882a593Smuzhiyun atomic_read(&config->live_connections) > 0,
888*4882a593Smuzhiyun config->dead_conn_timeout))
889*4882a593Smuzhiyun return 0;
890*4882a593Smuzhiyun
891*4882a593Smuzhiyun return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
892*4882a593Smuzhiyun }
893*4882a593Smuzhiyun
nbd_handle_cmd(struct nbd_cmd * cmd,int index)894*4882a593Smuzhiyun static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
895*4882a593Smuzhiyun {
896*4882a593Smuzhiyun struct request *req = blk_mq_rq_from_pdu(cmd);
897*4882a593Smuzhiyun struct nbd_device *nbd = cmd->nbd;
898*4882a593Smuzhiyun struct nbd_config *config;
899*4882a593Smuzhiyun struct nbd_sock *nsock;
900*4882a593Smuzhiyun int ret;
901*4882a593Smuzhiyun
902*4882a593Smuzhiyun if (!refcount_inc_not_zero(&nbd->config_refs)) {
903*4882a593Smuzhiyun dev_err_ratelimited(disk_to_dev(nbd->disk),
904*4882a593Smuzhiyun "Socks array is empty\n");
905*4882a593Smuzhiyun blk_mq_start_request(req);
906*4882a593Smuzhiyun return -EINVAL;
907*4882a593Smuzhiyun }
908*4882a593Smuzhiyun config = nbd->config;
909*4882a593Smuzhiyun
910*4882a593Smuzhiyun if (index >= config->num_connections) {
911*4882a593Smuzhiyun dev_err_ratelimited(disk_to_dev(nbd->disk),
912*4882a593Smuzhiyun "Attempted send on invalid socket\n");
913*4882a593Smuzhiyun nbd_config_put(nbd);
914*4882a593Smuzhiyun blk_mq_start_request(req);
915*4882a593Smuzhiyun return -EINVAL;
916*4882a593Smuzhiyun }
917*4882a593Smuzhiyun cmd->status = BLK_STS_OK;
918*4882a593Smuzhiyun again:
919*4882a593Smuzhiyun nsock = config->socks[index];
920*4882a593Smuzhiyun mutex_lock(&nsock->tx_lock);
921*4882a593Smuzhiyun if (nsock->dead) {
922*4882a593Smuzhiyun int old_index = index;
923*4882a593Smuzhiyun index = find_fallback(nbd, index);
924*4882a593Smuzhiyun mutex_unlock(&nsock->tx_lock);
925*4882a593Smuzhiyun if (index < 0) {
926*4882a593Smuzhiyun if (wait_for_reconnect(nbd)) {
927*4882a593Smuzhiyun index = old_index;
928*4882a593Smuzhiyun goto again;
929*4882a593Smuzhiyun }
930*4882a593Smuzhiyun /* All the sockets should already be down at this point,
931*4882a593Smuzhiyun * we just want to make sure that DISCONNECTED is set so
932*4882a593Smuzhiyun * any requests that come in that were queue'ed waiting
933*4882a593Smuzhiyun * for the reconnect timer don't trigger the timer again
934*4882a593Smuzhiyun * and instead just error out.
935*4882a593Smuzhiyun */
936*4882a593Smuzhiyun sock_shutdown(nbd);
937*4882a593Smuzhiyun nbd_config_put(nbd);
938*4882a593Smuzhiyun blk_mq_start_request(req);
939*4882a593Smuzhiyun return -EIO;
940*4882a593Smuzhiyun }
941*4882a593Smuzhiyun goto again;
942*4882a593Smuzhiyun }
943*4882a593Smuzhiyun
944*4882a593Smuzhiyun /* Handle the case that we have a pending request that was partially
945*4882a593Smuzhiyun * transmitted that _has_ to be serviced first. We need to call requeue
946*4882a593Smuzhiyun * here so that it gets put _after_ the request that is already on the
947*4882a593Smuzhiyun * dispatch list.
948*4882a593Smuzhiyun */
949*4882a593Smuzhiyun blk_mq_start_request(req);
950*4882a593Smuzhiyun if (unlikely(nsock->pending && nsock->pending != req)) {
951*4882a593Smuzhiyun nbd_requeue_cmd(cmd);
952*4882a593Smuzhiyun ret = 0;
953*4882a593Smuzhiyun goto out;
954*4882a593Smuzhiyun }
955*4882a593Smuzhiyun /*
956*4882a593Smuzhiyun * Some failures are related to the link going down, so anything that
957*4882a593Smuzhiyun * returns EAGAIN can be retried on a different socket.
958*4882a593Smuzhiyun */
959*4882a593Smuzhiyun ret = nbd_send_cmd(nbd, cmd, index);
960*4882a593Smuzhiyun if (ret == -EAGAIN) {
961*4882a593Smuzhiyun dev_err_ratelimited(disk_to_dev(nbd->disk),
962*4882a593Smuzhiyun "Request send failed, requeueing\n");
963*4882a593Smuzhiyun nbd_mark_nsock_dead(nbd, nsock, 1);
964*4882a593Smuzhiyun nbd_requeue_cmd(cmd);
965*4882a593Smuzhiyun ret = 0;
966*4882a593Smuzhiyun }
967*4882a593Smuzhiyun out:
968*4882a593Smuzhiyun mutex_unlock(&nsock->tx_lock);
969*4882a593Smuzhiyun nbd_config_put(nbd);
970*4882a593Smuzhiyun return ret;
971*4882a593Smuzhiyun }
972*4882a593Smuzhiyun
nbd_queue_rq(struct blk_mq_hw_ctx * hctx,const struct blk_mq_queue_data * bd)973*4882a593Smuzhiyun static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
974*4882a593Smuzhiyun const struct blk_mq_queue_data *bd)
975*4882a593Smuzhiyun {
976*4882a593Smuzhiyun struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
977*4882a593Smuzhiyun int ret;
978*4882a593Smuzhiyun
979*4882a593Smuzhiyun /*
980*4882a593Smuzhiyun * Since we look at the bio's to send the request over the network we
981*4882a593Smuzhiyun * need to make sure the completion work doesn't mark this request done
982*4882a593Smuzhiyun * before we are done doing our send. This keeps us from dereferencing
983*4882a593Smuzhiyun * freed data if we have particularly fast completions (ie we get the
984*4882a593Smuzhiyun * completion before we exit sock_xmit on the last bvec) or in the case
985*4882a593Smuzhiyun * that the server is misbehaving (or there was an error) before we're
986*4882a593Smuzhiyun * done sending everything over the wire.
987*4882a593Smuzhiyun */
988*4882a593Smuzhiyun mutex_lock(&cmd->lock);
989*4882a593Smuzhiyun clear_bit(NBD_CMD_REQUEUED, &cmd->flags);
990*4882a593Smuzhiyun
991*4882a593Smuzhiyun /* We can be called directly from the user space process, which means we
992*4882a593Smuzhiyun * could possibly have signals pending so our sendmsg will fail. In
993*4882a593Smuzhiyun * this case we need to return that we are busy, otherwise error out as
994*4882a593Smuzhiyun * appropriate.
995*4882a593Smuzhiyun */
996*4882a593Smuzhiyun ret = nbd_handle_cmd(cmd, hctx->queue_num);
997*4882a593Smuzhiyun if (ret < 0)
998*4882a593Smuzhiyun ret = BLK_STS_IOERR;
999*4882a593Smuzhiyun else if (!ret)
1000*4882a593Smuzhiyun ret = BLK_STS_OK;
1001*4882a593Smuzhiyun mutex_unlock(&cmd->lock);
1002*4882a593Smuzhiyun
1003*4882a593Smuzhiyun return ret;
1004*4882a593Smuzhiyun }
1005*4882a593Smuzhiyun
nbd_get_socket(struct nbd_device * nbd,unsigned long fd,int * err)1006*4882a593Smuzhiyun static struct socket *nbd_get_socket(struct nbd_device *nbd, unsigned long fd,
1007*4882a593Smuzhiyun int *err)
1008*4882a593Smuzhiyun {
1009*4882a593Smuzhiyun struct socket *sock;
1010*4882a593Smuzhiyun
1011*4882a593Smuzhiyun *err = 0;
1012*4882a593Smuzhiyun sock = sockfd_lookup(fd, err);
1013*4882a593Smuzhiyun if (!sock)
1014*4882a593Smuzhiyun return NULL;
1015*4882a593Smuzhiyun
1016*4882a593Smuzhiyun if (sock->ops->shutdown == sock_no_shutdown) {
1017*4882a593Smuzhiyun dev_err(disk_to_dev(nbd->disk), "Unsupported socket: shutdown callout must be supported.\n");
1018*4882a593Smuzhiyun *err = -EINVAL;
1019*4882a593Smuzhiyun sockfd_put(sock);
1020*4882a593Smuzhiyun return NULL;
1021*4882a593Smuzhiyun }
1022*4882a593Smuzhiyun
1023*4882a593Smuzhiyun return sock;
1024*4882a593Smuzhiyun }
1025*4882a593Smuzhiyun
nbd_add_socket(struct nbd_device * nbd,unsigned long arg,bool netlink)1026*4882a593Smuzhiyun static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
1027*4882a593Smuzhiyun bool netlink)
1028*4882a593Smuzhiyun {
1029*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
1030*4882a593Smuzhiyun struct socket *sock;
1031*4882a593Smuzhiyun struct nbd_sock **socks;
1032*4882a593Smuzhiyun struct nbd_sock *nsock;
1033*4882a593Smuzhiyun int err;
1034*4882a593Smuzhiyun
1035*4882a593Smuzhiyun sock = nbd_get_socket(nbd, arg, &err);
1036*4882a593Smuzhiyun if (!sock)
1037*4882a593Smuzhiyun return err;
1038*4882a593Smuzhiyun
1039*4882a593Smuzhiyun /*
1040*4882a593Smuzhiyun * We need to make sure we don't get any errant requests while we're
1041*4882a593Smuzhiyun * reallocating the ->socks array.
1042*4882a593Smuzhiyun */
1043*4882a593Smuzhiyun blk_mq_freeze_queue(nbd->disk->queue);
1044*4882a593Smuzhiyun
1045*4882a593Smuzhiyun if (!netlink && !nbd->task_setup &&
1046*4882a593Smuzhiyun !test_bit(NBD_RT_BOUND, &config->runtime_flags))
1047*4882a593Smuzhiyun nbd->task_setup = current;
1048*4882a593Smuzhiyun
1049*4882a593Smuzhiyun if (!netlink &&
1050*4882a593Smuzhiyun (nbd->task_setup != current ||
1051*4882a593Smuzhiyun test_bit(NBD_RT_BOUND, &config->runtime_flags))) {
1052*4882a593Smuzhiyun dev_err(disk_to_dev(nbd->disk),
1053*4882a593Smuzhiyun "Device being setup by another task");
1054*4882a593Smuzhiyun err = -EBUSY;
1055*4882a593Smuzhiyun goto put_socket;
1056*4882a593Smuzhiyun }
1057*4882a593Smuzhiyun
1058*4882a593Smuzhiyun nsock = kzalloc(sizeof(*nsock), GFP_KERNEL);
1059*4882a593Smuzhiyun if (!nsock) {
1060*4882a593Smuzhiyun err = -ENOMEM;
1061*4882a593Smuzhiyun goto put_socket;
1062*4882a593Smuzhiyun }
1063*4882a593Smuzhiyun
1064*4882a593Smuzhiyun socks = krealloc(config->socks, (config->num_connections + 1) *
1065*4882a593Smuzhiyun sizeof(struct nbd_sock *), GFP_KERNEL);
1066*4882a593Smuzhiyun if (!socks) {
1067*4882a593Smuzhiyun kfree(nsock);
1068*4882a593Smuzhiyun err = -ENOMEM;
1069*4882a593Smuzhiyun goto put_socket;
1070*4882a593Smuzhiyun }
1071*4882a593Smuzhiyun
1072*4882a593Smuzhiyun config->socks = socks;
1073*4882a593Smuzhiyun
1074*4882a593Smuzhiyun nsock->fallback_index = -1;
1075*4882a593Smuzhiyun nsock->dead = false;
1076*4882a593Smuzhiyun mutex_init(&nsock->tx_lock);
1077*4882a593Smuzhiyun nsock->sock = sock;
1078*4882a593Smuzhiyun nsock->pending = NULL;
1079*4882a593Smuzhiyun nsock->sent = 0;
1080*4882a593Smuzhiyun nsock->cookie = 0;
1081*4882a593Smuzhiyun socks[config->num_connections++] = nsock;
1082*4882a593Smuzhiyun atomic_inc(&config->live_connections);
1083*4882a593Smuzhiyun blk_mq_unfreeze_queue(nbd->disk->queue);
1084*4882a593Smuzhiyun
1085*4882a593Smuzhiyun return 0;
1086*4882a593Smuzhiyun
1087*4882a593Smuzhiyun put_socket:
1088*4882a593Smuzhiyun blk_mq_unfreeze_queue(nbd->disk->queue);
1089*4882a593Smuzhiyun sockfd_put(sock);
1090*4882a593Smuzhiyun return err;
1091*4882a593Smuzhiyun }
1092*4882a593Smuzhiyun
nbd_reconnect_socket(struct nbd_device * nbd,unsigned long arg)1093*4882a593Smuzhiyun static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
1094*4882a593Smuzhiyun {
1095*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
1096*4882a593Smuzhiyun struct socket *sock, *old;
1097*4882a593Smuzhiyun struct recv_thread_args *args;
1098*4882a593Smuzhiyun int i;
1099*4882a593Smuzhiyun int err;
1100*4882a593Smuzhiyun
1101*4882a593Smuzhiyun sock = nbd_get_socket(nbd, arg, &err);
1102*4882a593Smuzhiyun if (!sock)
1103*4882a593Smuzhiyun return err;
1104*4882a593Smuzhiyun
1105*4882a593Smuzhiyun args = kzalloc(sizeof(*args), GFP_KERNEL);
1106*4882a593Smuzhiyun if (!args) {
1107*4882a593Smuzhiyun sockfd_put(sock);
1108*4882a593Smuzhiyun return -ENOMEM;
1109*4882a593Smuzhiyun }
1110*4882a593Smuzhiyun
1111*4882a593Smuzhiyun for (i = 0; i < config->num_connections; i++) {
1112*4882a593Smuzhiyun struct nbd_sock *nsock = config->socks[i];
1113*4882a593Smuzhiyun
1114*4882a593Smuzhiyun if (!nsock->dead)
1115*4882a593Smuzhiyun continue;
1116*4882a593Smuzhiyun
1117*4882a593Smuzhiyun mutex_lock(&nsock->tx_lock);
1118*4882a593Smuzhiyun if (!nsock->dead) {
1119*4882a593Smuzhiyun mutex_unlock(&nsock->tx_lock);
1120*4882a593Smuzhiyun continue;
1121*4882a593Smuzhiyun }
1122*4882a593Smuzhiyun sk_set_memalloc(sock->sk);
1123*4882a593Smuzhiyun if (nbd->tag_set.timeout)
1124*4882a593Smuzhiyun sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
1125*4882a593Smuzhiyun atomic_inc(&config->recv_threads);
1126*4882a593Smuzhiyun refcount_inc(&nbd->config_refs);
1127*4882a593Smuzhiyun old = nsock->sock;
1128*4882a593Smuzhiyun nsock->fallback_index = -1;
1129*4882a593Smuzhiyun nsock->sock = sock;
1130*4882a593Smuzhiyun nsock->dead = false;
1131*4882a593Smuzhiyun INIT_WORK(&args->work, recv_work);
1132*4882a593Smuzhiyun args->index = i;
1133*4882a593Smuzhiyun args->nbd = nbd;
1134*4882a593Smuzhiyun nsock->cookie++;
1135*4882a593Smuzhiyun mutex_unlock(&nsock->tx_lock);
1136*4882a593Smuzhiyun sockfd_put(old);
1137*4882a593Smuzhiyun
1138*4882a593Smuzhiyun clear_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
1139*4882a593Smuzhiyun
1140*4882a593Smuzhiyun /* We take the tx_mutex in an error path in the recv_work, so we
1141*4882a593Smuzhiyun * need to queue_work outside of the tx_mutex.
1142*4882a593Smuzhiyun */
1143*4882a593Smuzhiyun queue_work(nbd->recv_workq, &args->work);
1144*4882a593Smuzhiyun
1145*4882a593Smuzhiyun atomic_inc(&config->live_connections);
1146*4882a593Smuzhiyun wake_up(&config->conn_wait);
1147*4882a593Smuzhiyun return 0;
1148*4882a593Smuzhiyun }
1149*4882a593Smuzhiyun sockfd_put(sock);
1150*4882a593Smuzhiyun kfree(args);
1151*4882a593Smuzhiyun return -ENOSPC;
1152*4882a593Smuzhiyun }
1153*4882a593Smuzhiyun
nbd_bdev_reset(struct block_device * bdev)1154*4882a593Smuzhiyun static void nbd_bdev_reset(struct block_device *bdev)
1155*4882a593Smuzhiyun {
1156*4882a593Smuzhiyun if (bdev->bd_openers > 1)
1157*4882a593Smuzhiyun return;
1158*4882a593Smuzhiyun bd_set_nr_sectors(bdev, 0);
1159*4882a593Smuzhiyun }
1160*4882a593Smuzhiyun
nbd_parse_flags(struct nbd_device * nbd)1161*4882a593Smuzhiyun static void nbd_parse_flags(struct nbd_device *nbd)
1162*4882a593Smuzhiyun {
1163*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
1164*4882a593Smuzhiyun if (config->flags & NBD_FLAG_READ_ONLY)
1165*4882a593Smuzhiyun set_disk_ro(nbd->disk, true);
1166*4882a593Smuzhiyun else
1167*4882a593Smuzhiyun set_disk_ro(nbd->disk, false);
1168*4882a593Smuzhiyun if (config->flags & NBD_FLAG_SEND_TRIM)
1169*4882a593Smuzhiyun blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue);
1170*4882a593Smuzhiyun if (config->flags & NBD_FLAG_SEND_FLUSH) {
1171*4882a593Smuzhiyun if (config->flags & NBD_FLAG_SEND_FUA)
1172*4882a593Smuzhiyun blk_queue_write_cache(nbd->disk->queue, true, true);
1173*4882a593Smuzhiyun else
1174*4882a593Smuzhiyun blk_queue_write_cache(nbd->disk->queue, true, false);
1175*4882a593Smuzhiyun }
1176*4882a593Smuzhiyun else
1177*4882a593Smuzhiyun blk_queue_write_cache(nbd->disk->queue, false, false);
1178*4882a593Smuzhiyun }
1179*4882a593Smuzhiyun
send_disconnects(struct nbd_device * nbd)1180*4882a593Smuzhiyun static void send_disconnects(struct nbd_device *nbd)
1181*4882a593Smuzhiyun {
1182*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
1183*4882a593Smuzhiyun struct nbd_request request = {
1184*4882a593Smuzhiyun .magic = htonl(NBD_REQUEST_MAGIC),
1185*4882a593Smuzhiyun .type = htonl(NBD_CMD_DISC),
1186*4882a593Smuzhiyun };
1187*4882a593Smuzhiyun struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
1188*4882a593Smuzhiyun struct iov_iter from;
1189*4882a593Smuzhiyun int i, ret;
1190*4882a593Smuzhiyun
1191*4882a593Smuzhiyun for (i = 0; i < config->num_connections; i++) {
1192*4882a593Smuzhiyun struct nbd_sock *nsock = config->socks[i];
1193*4882a593Smuzhiyun
1194*4882a593Smuzhiyun iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
1195*4882a593Smuzhiyun mutex_lock(&nsock->tx_lock);
1196*4882a593Smuzhiyun ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
1197*4882a593Smuzhiyun if (ret <= 0)
1198*4882a593Smuzhiyun dev_err(disk_to_dev(nbd->disk),
1199*4882a593Smuzhiyun "Send disconnect failed %d\n", ret);
1200*4882a593Smuzhiyun mutex_unlock(&nsock->tx_lock);
1201*4882a593Smuzhiyun }
1202*4882a593Smuzhiyun }
1203*4882a593Smuzhiyun
nbd_disconnect(struct nbd_device * nbd)1204*4882a593Smuzhiyun static int nbd_disconnect(struct nbd_device *nbd)
1205*4882a593Smuzhiyun {
1206*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
1207*4882a593Smuzhiyun
1208*4882a593Smuzhiyun dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
1209*4882a593Smuzhiyun set_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
1210*4882a593Smuzhiyun set_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags);
1211*4882a593Smuzhiyun send_disconnects(nbd);
1212*4882a593Smuzhiyun return 0;
1213*4882a593Smuzhiyun }
1214*4882a593Smuzhiyun
nbd_clear_sock(struct nbd_device * nbd)1215*4882a593Smuzhiyun static void nbd_clear_sock(struct nbd_device *nbd)
1216*4882a593Smuzhiyun {
1217*4882a593Smuzhiyun sock_shutdown(nbd);
1218*4882a593Smuzhiyun nbd_clear_que(nbd);
1219*4882a593Smuzhiyun nbd->task_setup = NULL;
1220*4882a593Smuzhiyun }
1221*4882a593Smuzhiyun
nbd_config_put(struct nbd_device * nbd)1222*4882a593Smuzhiyun static void nbd_config_put(struct nbd_device *nbd)
1223*4882a593Smuzhiyun {
1224*4882a593Smuzhiyun if (refcount_dec_and_mutex_lock(&nbd->config_refs,
1225*4882a593Smuzhiyun &nbd->config_lock)) {
1226*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
1227*4882a593Smuzhiyun nbd_dev_dbg_close(nbd);
1228*4882a593Smuzhiyun nbd_size_clear(nbd);
1229*4882a593Smuzhiyun if (test_and_clear_bit(NBD_RT_HAS_PID_FILE,
1230*4882a593Smuzhiyun &config->runtime_flags))
1231*4882a593Smuzhiyun device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
1232*4882a593Smuzhiyun nbd->task_recv = NULL;
1233*4882a593Smuzhiyun nbd_clear_sock(nbd);
1234*4882a593Smuzhiyun if (config->num_connections) {
1235*4882a593Smuzhiyun int i;
1236*4882a593Smuzhiyun for (i = 0; i < config->num_connections; i++) {
1237*4882a593Smuzhiyun sockfd_put(config->socks[i]->sock);
1238*4882a593Smuzhiyun kfree(config->socks[i]);
1239*4882a593Smuzhiyun }
1240*4882a593Smuzhiyun kfree(config->socks);
1241*4882a593Smuzhiyun }
1242*4882a593Smuzhiyun kfree(nbd->config);
1243*4882a593Smuzhiyun nbd->config = NULL;
1244*4882a593Smuzhiyun
1245*4882a593Smuzhiyun if (nbd->recv_workq)
1246*4882a593Smuzhiyun destroy_workqueue(nbd->recv_workq);
1247*4882a593Smuzhiyun nbd->recv_workq = NULL;
1248*4882a593Smuzhiyun
1249*4882a593Smuzhiyun nbd->tag_set.timeout = 0;
1250*4882a593Smuzhiyun nbd->disk->queue->limits.discard_granularity = 0;
1251*4882a593Smuzhiyun nbd->disk->queue->limits.discard_alignment = 0;
1252*4882a593Smuzhiyun blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
1253*4882a593Smuzhiyun blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
1254*4882a593Smuzhiyun
1255*4882a593Smuzhiyun mutex_unlock(&nbd->config_lock);
1256*4882a593Smuzhiyun nbd_put(nbd);
1257*4882a593Smuzhiyun module_put(THIS_MODULE);
1258*4882a593Smuzhiyun }
1259*4882a593Smuzhiyun }
1260*4882a593Smuzhiyun
nbd_start_device(struct nbd_device * nbd)1261*4882a593Smuzhiyun static int nbd_start_device(struct nbd_device *nbd)
1262*4882a593Smuzhiyun {
1263*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
1264*4882a593Smuzhiyun int num_connections = config->num_connections;
1265*4882a593Smuzhiyun int error = 0, i;
1266*4882a593Smuzhiyun
1267*4882a593Smuzhiyun if (nbd->task_recv)
1268*4882a593Smuzhiyun return -EBUSY;
1269*4882a593Smuzhiyun if (!config->socks)
1270*4882a593Smuzhiyun return -EINVAL;
1271*4882a593Smuzhiyun if (num_connections > 1 &&
1272*4882a593Smuzhiyun !(config->flags & NBD_FLAG_CAN_MULTI_CONN)) {
1273*4882a593Smuzhiyun dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
1274*4882a593Smuzhiyun return -EINVAL;
1275*4882a593Smuzhiyun }
1276*4882a593Smuzhiyun
1277*4882a593Smuzhiyun nbd->recv_workq = alloc_workqueue("knbd%d-recv",
1278*4882a593Smuzhiyun WQ_MEM_RECLAIM | WQ_HIGHPRI |
1279*4882a593Smuzhiyun WQ_UNBOUND, 0, nbd->index);
1280*4882a593Smuzhiyun if (!nbd->recv_workq) {
1281*4882a593Smuzhiyun dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
1282*4882a593Smuzhiyun return -ENOMEM;
1283*4882a593Smuzhiyun }
1284*4882a593Smuzhiyun
1285*4882a593Smuzhiyun blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
1286*4882a593Smuzhiyun nbd->task_recv = current;
1287*4882a593Smuzhiyun
1288*4882a593Smuzhiyun nbd_parse_flags(nbd);
1289*4882a593Smuzhiyun
1290*4882a593Smuzhiyun error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
1291*4882a593Smuzhiyun if (error) {
1292*4882a593Smuzhiyun dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
1293*4882a593Smuzhiyun return error;
1294*4882a593Smuzhiyun }
1295*4882a593Smuzhiyun set_bit(NBD_RT_HAS_PID_FILE, &config->runtime_flags);
1296*4882a593Smuzhiyun
1297*4882a593Smuzhiyun nbd_dev_dbg_init(nbd);
1298*4882a593Smuzhiyun for (i = 0; i < num_connections; i++) {
1299*4882a593Smuzhiyun struct recv_thread_args *args;
1300*4882a593Smuzhiyun
1301*4882a593Smuzhiyun args = kzalloc(sizeof(*args), GFP_KERNEL);
1302*4882a593Smuzhiyun if (!args) {
1303*4882a593Smuzhiyun sock_shutdown(nbd);
1304*4882a593Smuzhiyun /*
1305*4882a593Smuzhiyun * If num_connections is m (2 < m),
1306*4882a593Smuzhiyun * and NO.1 ~ NO.n(1 < n < m) kzallocs are successful.
1307*4882a593Smuzhiyun * But NO.(n + 1) failed. We still have n recv threads.
1308*4882a593Smuzhiyun * So, add flush_workqueue here to prevent recv threads
1309*4882a593Smuzhiyun * dropping the last config_refs and trying to destroy
1310*4882a593Smuzhiyun * the workqueue from inside the workqueue.
1311*4882a593Smuzhiyun */
1312*4882a593Smuzhiyun if (i)
1313*4882a593Smuzhiyun flush_workqueue(nbd->recv_workq);
1314*4882a593Smuzhiyun return -ENOMEM;
1315*4882a593Smuzhiyun }
1316*4882a593Smuzhiyun sk_set_memalloc(config->socks[i]->sock->sk);
1317*4882a593Smuzhiyun if (nbd->tag_set.timeout)
1318*4882a593Smuzhiyun config->socks[i]->sock->sk->sk_sndtimeo =
1319*4882a593Smuzhiyun nbd->tag_set.timeout;
1320*4882a593Smuzhiyun atomic_inc(&config->recv_threads);
1321*4882a593Smuzhiyun refcount_inc(&nbd->config_refs);
1322*4882a593Smuzhiyun INIT_WORK(&args->work, recv_work);
1323*4882a593Smuzhiyun args->nbd = nbd;
1324*4882a593Smuzhiyun args->index = i;
1325*4882a593Smuzhiyun queue_work(nbd->recv_workq, &args->work);
1326*4882a593Smuzhiyun }
1327*4882a593Smuzhiyun nbd_size_update(nbd, true);
1328*4882a593Smuzhiyun return error;
1329*4882a593Smuzhiyun }
1330*4882a593Smuzhiyun
nbd_start_device_ioctl(struct nbd_device * nbd,struct block_device * bdev)1331*4882a593Smuzhiyun static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
1332*4882a593Smuzhiyun {
1333*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
1334*4882a593Smuzhiyun int ret;
1335*4882a593Smuzhiyun
1336*4882a593Smuzhiyun ret = nbd_start_device(nbd);
1337*4882a593Smuzhiyun if (ret)
1338*4882a593Smuzhiyun return ret;
1339*4882a593Smuzhiyun
1340*4882a593Smuzhiyun if (max_part)
1341*4882a593Smuzhiyun set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
1342*4882a593Smuzhiyun mutex_unlock(&nbd->config_lock);
1343*4882a593Smuzhiyun ret = wait_event_interruptible(config->recv_wq,
1344*4882a593Smuzhiyun atomic_read(&config->recv_threads) == 0);
1345*4882a593Smuzhiyun if (ret) {
1346*4882a593Smuzhiyun sock_shutdown(nbd);
1347*4882a593Smuzhiyun nbd_clear_que(nbd);
1348*4882a593Smuzhiyun }
1349*4882a593Smuzhiyun
1350*4882a593Smuzhiyun flush_workqueue(nbd->recv_workq);
1351*4882a593Smuzhiyun mutex_lock(&nbd->config_lock);
1352*4882a593Smuzhiyun nbd_bdev_reset(bdev);
1353*4882a593Smuzhiyun /* user requested, ignore socket errors */
1354*4882a593Smuzhiyun if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
1355*4882a593Smuzhiyun ret = 0;
1356*4882a593Smuzhiyun if (test_bit(NBD_RT_TIMEDOUT, &config->runtime_flags))
1357*4882a593Smuzhiyun ret = -ETIMEDOUT;
1358*4882a593Smuzhiyun return ret;
1359*4882a593Smuzhiyun }
1360*4882a593Smuzhiyun
nbd_clear_sock_ioctl(struct nbd_device * nbd,struct block_device * bdev)1361*4882a593Smuzhiyun static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
1362*4882a593Smuzhiyun struct block_device *bdev)
1363*4882a593Smuzhiyun {
1364*4882a593Smuzhiyun nbd_clear_sock(nbd);
1365*4882a593Smuzhiyun __invalidate_device(bdev, true);
1366*4882a593Smuzhiyun nbd_bdev_reset(bdev);
1367*4882a593Smuzhiyun if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
1368*4882a593Smuzhiyun &nbd->config->runtime_flags))
1369*4882a593Smuzhiyun nbd_config_put(nbd);
1370*4882a593Smuzhiyun }
1371*4882a593Smuzhiyun
nbd_is_valid_blksize(unsigned long blksize)1372*4882a593Smuzhiyun static bool nbd_is_valid_blksize(unsigned long blksize)
1373*4882a593Smuzhiyun {
1374*4882a593Smuzhiyun if (!blksize || !is_power_of_2(blksize) || blksize < 512 ||
1375*4882a593Smuzhiyun blksize > PAGE_SIZE)
1376*4882a593Smuzhiyun return false;
1377*4882a593Smuzhiyun return true;
1378*4882a593Smuzhiyun }
1379*4882a593Smuzhiyun
nbd_set_cmd_timeout(struct nbd_device * nbd,u64 timeout)1380*4882a593Smuzhiyun static void nbd_set_cmd_timeout(struct nbd_device *nbd, u64 timeout)
1381*4882a593Smuzhiyun {
1382*4882a593Smuzhiyun nbd->tag_set.timeout = timeout * HZ;
1383*4882a593Smuzhiyun if (timeout)
1384*4882a593Smuzhiyun blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ);
1385*4882a593Smuzhiyun else
1386*4882a593Smuzhiyun blk_queue_rq_timeout(nbd->disk->queue, 30 * HZ);
1387*4882a593Smuzhiyun }
1388*4882a593Smuzhiyun
1389*4882a593Smuzhiyun /* Must be called with config_lock held */
__nbd_ioctl(struct block_device * bdev,struct nbd_device * nbd,unsigned int cmd,unsigned long arg)1390*4882a593Smuzhiyun static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
1391*4882a593Smuzhiyun unsigned int cmd, unsigned long arg)
1392*4882a593Smuzhiyun {
1393*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
1394*4882a593Smuzhiyun
1395*4882a593Smuzhiyun switch (cmd) {
1396*4882a593Smuzhiyun case NBD_DISCONNECT:
1397*4882a593Smuzhiyun return nbd_disconnect(nbd);
1398*4882a593Smuzhiyun case NBD_CLEAR_SOCK:
1399*4882a593Smuzhiyun nbd_clear_sock_ioctl(nbd, bdev);
1400*4882a593Smuzhiyun return 0;
1401*4882a593Smuzhiyun case NBD_SET_SOCK:
1402*4882a593Smuzhiyun return nbd_add_socket(nbd, arg, false);
1403*4882a593Smuzhiyun case NBD_SET_BLKSIZE:
1404*4882a593Smuzhiyun if (!arg)
1405*4882a593Smuzhiyun arg = NBD_DEF_BLKSIZE;
1406*4882a593Smuzhiyun if (!nbd_is_valid_blksize(arg))
1407*4882a593Smuzhiyun return -EINVAL;
1408*4882a593Smuzhiyun nbd_size_set(nbd, arg,
1409*4882a593Smuzhiyun div_s64(config->bytesize, arg));
1410*4882a593Smuzhiyun return 0;
1411*4882a593Smuzhiyun case NBD_SET_SIZE:
1412*4882a593Smuzhiyun nbd_size_set(nbd, config->blksize,
1413*4882a593Smuzhiyun div_s64(arg, config->blksize));
1414*4882a593Smuzhiyun return 0;
1415*4882a593Smuzhiyun case NBD_SET_SIZE_BLOCKS:
1416*4882a593Smuzhiyun nbd_size_set(nbd, config->blksize, arg);
1417*4882a593Smuzhiyun return 0;
1418*4882a593Smuzhiyun case NBD_SET_TIMEOUT:
1419*4882a593Smuzhiyun nbd_set_cmd_timeout(nbd, arg);
1420*4882a593Smuzhiyun return 0;
1421*4882a593Smuzhiyun
1422*4882a593Smuzhiyun case NBD_SET_FLAGS:
1423*4882a593Smuzhiyun config->flags = arg;
1424*4882a593Smuzhiyun return 0;
1425*4882a593Smuzhiyun case NBD_DO_IT:
1426*4882a593Smuzhiyun return nbd_start_device_ioctl(nbd, bdev);
1427*4882a593Smuzhiyun case NBD_CLEAR_QUE:
1428*4882a593Smuzhiyun /*
1429*4882a593Smuzhiyun * This is for compatibility only. The queue is always cleared
1430*4882a593Smuzhiyun * by NBD_DO_IT or NBD_CLEAR_SOCK.
1431*4882a593Smuzhiyun */
1432*4882a593Smuzhiyun return 0;
1433*4882a593Smuzhiyun case NBD_PRINT_DEBUG:
1434*4882a593Smuzhiyun /*
1435*4882a593Smuzhiyun * For compatibility only, we no longer keep a list of
1436*4882a593Smuzhiyun * outstanding requests.
1437*4882a593Smuzhiyun */
1438*4882a593Smuzhiyun return 0;
1439*4882a593Smuzhiyun }
1440*4882a593Smuzhiyun return -ENOTTY;
1441*4882a593Smuzhiyun }
1442*4882a593Smuzhiyun
nbd_ioctl(struct block_device * bdev,fmode_t mode,unsigned int cmd,unsigned long arg)1443*4882a593Smuzhiyun static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
1444*4882a593Smuzhiyun unsigned int cmd, unsigned long arg)
1445*4882a593Smuzhiyun {
1446*4882a593Smuzhiyun struct nbd_device *nbd = bdev->bd_disk->private_data;
1447*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
1448*4882a593Smuzhiyun int error = -EINVAL;
1449*4882a593Smuzhiyun
1450*4882a593Smuzhiyun if (!capable(CAP_SYS_ADMIN))
1451*4882a593Smuzhiyun return -EPERM;
1452*4882a593Smuzhiyun
1453*4882a593Smuzhiyun /* The block layer will pass back some non-nbd ioctls in case we have
1454*4882a593Smuzhiyun * special handling for them, but we don't so just return an error.
1455*4882a593Smuzhiyun */
1456*4882a593Smuzhiyun if (_IOC_TYPE(cmd) != 0xab)
1457*4882a593Smuzhiyun return -EINVAL;
1458*4882a593Smuzhiyun
1459*4882a593Smuzhiyun mutex_lock(&nbd->config_lock);
1460*4882a593Smuzhiyun
1461*4882a593Smuzhiyun /* Don't allow ioctl operations on a nbd device that was created with
1462*4882a593Smuzhiyun * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine.
1463*4882a593Smuzhiyun */
1464*4882a593Smuzhiyun if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
1465*4882a593Smuzhiyun (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK))
1466*4882a593Smuzhiyun error = __nbd_ioctl(bdev, nbd, cmd, arg);
1467*4882a593Smuzhiyun else
1468*4882a593Smuzhiyun dev_err(nbd_to_dev(nbd), "Cannot use ioctl interface on a netlink controlled device.\n");
1469*4882a593Smuzhiyun mutex_unlock(&nbd->config_lock);
1470*4882a593Smuzhiyun return error;
1471*4882a593Smuzhiyun }
1472*4882a593Smuzhiyun
nbd_alloc_config(void)1473*4882a593Smuzhiyun static struct nbd_config *nbd_alloc_config(void)
1474*4882a593Smuzhiyun {
1475*4882a593Smuzhiyun struct nbd_config *config;
1476*4882a593Smuzhiyun
1477*4882a593Smuzhiyun if (!try_module_get(THIS_MODULE))
1478*4882a593Smuzhiyun return ERR_PTR(-ENODEV);
1479*4882a593Smuzhiyun
1480*4882a593Smuzhiyun config = kzalloc(sizeof(struct nbd_config), GFP_NOFS);
1481*4882a593Smuzhiyun if (!config) {
1482*4882a593Smuzhiyun module_put(THIS_MODULE);
1483*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
1484*4882a593Smuzhiyun }
1485*4882a593Smuzhiyun
1486*4882a593Smuzhiyun atomic_set(&config->recv_threads, 0);
1487*4882a593Smuzhiyun init_waitqueue_head(&config->recv_wq);
1488*4882a593Smuzhiyun init_waitqueue_head(&config->conn_wait);
1489*4882a593Smuzhiyun config->blksize = NBD_DEF_BLKSIZE;
1490*4882a593Smuzhiyun atomic_set(&config->live_connections, 0);
1491*4882a593Smuzhiyun return config;
1492*4882a593Smuzhiyun }
1493*4882a593Smuzhiyun
nbd_open(struct block_device * bdev,fmode_t mode)1494*4882a593Smuzhiyun static int nbd_open(struct block_device *bdev, fmode_t mode)
1495*4882a593Smuzhiyun {
1496*4882a593Smuzhiyun struct nbd_device *nbd;
1497*4882a593Smuzhiyun int ret = 0;
1498*4882a593Smuzhiyun
1499*4882a593Smuzhiyun mutex_lock(&nbd_index_mutex);
1500*4882a593Smuzhiyun nbd = bdev->bd_disk->private_data;
1501*4882a593Smuzhiyun if (!nbd) {
1502*4882a593Smuzhiyun ret = -ENXIO;
1503*4882a593Smuzhiyun goto out;
1504*4882a593Smuzhiyun }
1505*4882a593Smuzhiyun if (!refcount_inc_not_zero(&nbd->refs)) {
1506*4882a593Smuzhiyun ret = -ENXIO;
1507*4882a593Smuzhiyun goto out;
1508*4882a593Smuzhiyun }
1509*4882a593Smuzhiyun if (!refcount_inc_not_zero(&nbd->config_refs)) {
1510*4882a593Smuzhiyun struct nbd_config *config;
1511*4882a593Smuzhiyun
1512*4882a593Smuzhiyun mutex_lock(&nbd->config_lock);
1513*4882a593Smuzhiyun if (refcount_inc_not_zero(&nbd->config_refs)) {
1514*4882a593Smuzhiyun mutex_unlock(&nbd->config_lock);
1515*4882a593Smuzhiyun goto out;
1516*4882a593Smuzhiyun }
1517*4882a593Smuzhiyun config = nbd_alloc_config();
1518*4882a593Smuzhiyun if (IS_ERR(config)) {
1519*4882a593Smuzhiyun ret = PTR_ERR(config);
1520*4882a593Smuzhiyun mutex_unlock(&nbd->config_lock);
1521*4882a593Smuzhiyun goto out;
1522*4882a593Smuzhiyun }
1523*4882a593Smuzhiyun nbd->config = config;
1524*4882a593Smuzhiyun refcount_set(&nbd->config_refs, 1);
1525*4882a593Smuzhiyun refcount_inc(&nbd->refs);
1526*4882a593Smuzhiyun mutex_unlock(&nbd->config_lock);
1527*4882a593Smuzhiyun set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
1528*4882a593Smuzhiyun } else if (nbd_disconnected(nbd->config)) {
1529*4882a593Smuzhiyun set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
1530*4882a593Smuzhiyun }
1531*4882a593Smuzhiyun out:
1532*4882a593Smuzhiyun mutex_unlock(&nbd_index_mutex);
1533*4882a593Smuzhiyun return ret;
1534*4882a593Smuzhiyun }
1535*4882a593Smuzhiyun
nbd_release(struct gendisk * disk,fmode_t mode)1536*4882a593Smuzhiyun static void nbd_release(struct gendisk *disk, fmode_t mode)
1537*4882a593Smuzhiyun {
1538*4882a593Smuzhiyun struct nbd_device *nbd = disk->private_data;
1539*4882a593Smuzhiyun struct block_device *bdev = bdget_disk(disk, 0);
1540*4882a593Smuzhiyun
1541*4882a593Smuzhiyun if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
1542*4882a593Smuzhiyun bdev->bd_openers == 0)
1543*4882a593Smuzhiyun nbd_disconnect_and_put(nbd);
1544*4882a593Smuzhiyun bdput(bdev);
1545*4882a593Smuzhiyun
1546*4882a593Smuzhiyun nbd_config_put(nbd);
1547*4882a593Smuzhiyun nbd_put(nbd);
1548*4882a593Smuzhiyun }
1549*4882a593Smuzhiyun
1550*4882a593Smuzhiyun static const struct block_device_operations nbd_fops =
1551*4882a593Smuzhiyun {
1552*4882a593Smuzhiyun .owner = THIS_MODULE,
1553*4882a593Smuzhiyun .open = nbd_open,
1554*4882a593Smuzhiyun .release = nbd_release,
1555*4882a593Smuzhiyun .ioctl = nbd_ioctl,
1556*4882a593Smuzhiyun .compat_ioctl = nbd_ioctl,
1557*4882a593Smuzhiyun };
1558*4882a593Smuzhiyun
1559*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_DEBUG_FS)
1560*4882a593Smuzhiyun
nbd_dbg_tasks_show(struct seq_file * s,void * unused)1561*4882a593Smuzhiyun static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
1562*4882a593Smuzhiyun {
1563*4882a593Smuzhiyun struct nbd_device *nbd = s->private;
1564*4882a593Smuzhiyun
1565*4882a593Smuzhiyun if (nbd->task_recv)
1566*4882a593Smuzhiyun seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv));
1567*4882a593Smuzhiyun
1568*4882a593Smuzhiyun return 0;
1569*4882a593Smuzhiyun }
1570*4882a593Smuzhiyun
nbd_dbg_tasks_open(struct inode * inode,struct file * file)1571*4882a593Smuzhiyun static int nbd_dbg_tasks_open(struct inode *inode, struct file *file)
1572*4882a593Smuzhiyun {
1573*4882a593Smuzhiyun return single_open(file, nbd_dbg_tasks_show, inode->i_private);
1574*4882a593Smuzhiyun }
1575*4882a593Smuzhiyun
1576*4882a593Smuzhiyun static const struct file_operations nbd_dbg_tasks_ops = {
1577*4882a593Smuzhiyun .open = nbd_dbg_tasks_open,
1578*4882a593Smuzhiyun .read = seq_read,
1579*4882a593Smuzhiyun .llseek = seq_lseek,
1580*4882a593Smuzhiyun .release = single_release,
1581*4882a593Smuzhiyun };
1582*4882a593Smuzhiyun
nbd_dbg_flags_show(struct seq_file * s,void * unused)1583*4882a593Smuzhiyun static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
1584*4882a593Smuzhiyun {
1585*4882a593Smuzhiyun struct nbd_device *nbd = s->private;
1586*4882a593Smuzhiyun u32 flags = nbd->config->flags;
1587*4882a593Smuzhiyun
1588*4882a593Smuzhiyun seq_printf(s, "Hex: 0x%08x\n\n", flags);
1589*4882a593Smuzhiyun
1590*4882a593Smuzhiyun seq_puts(s, "Known flags:\n");
1591*4882a593Smuzhiyun
1592*4882a593Smuzhiyun if (flags & NBD_FLAG_HAS_FLAGS)
1593*4882a593Smuzhiyun seq_puts(s, "NBD_FLAG_HAS_FLAGS\n");
1594*4882a593Smuzhiyun if (flags & NBD_FLAG_READ_ONLY)
1595*4882a593Smuzhiyun seq_puts(s, "NBD_FLAG_READ_ONLY\n");
1596*4882a593Smuzhiyun if (flags & NBD_FLAG_SEND_FLUSH)
1597*4882a593Smuzhiyun seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
1598*4882a593Smuzhiyun if (flags & NBD_FLAG_SEND_FUA)
1599*4882a593Smuzhiyun seq_puts(s, "NBD_FLAG_SEND_FUA\n");
1600*4882a593Smuzhiyun if (flags & NBD_FLAG_SEND_TRIM)
1601*4882a593Smuzhiyun seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
1602*4882a593Smuzhiyun
1603*4882a593Smuzhiyun return 0;
1604*4882a593Smuzhiyun }
1605*4882a593Smuzhiyun
nbd_dbg_flags_open(struct inode * inode,struct file * file)1606*4882a593Smuzhiyun static int nbd_dbg_flags_open(struct inode *inode, struct file *file)
1607*4882a593Smuzhiyun {
1608*4882a593Smuzhiyun return single_open(file, nbd_dbg_flags_show, inode->i_private);
1609*4882a593Smuzhiyun }
1610*4882a593Smuzhiyun
1611*4882a593Smuzhiyun static const struct file_operations nbd_dbg_flags_ops = {
1612*4882a593Smuzhiyun .open = nbd_dbg_flags_open,
1613*4882a593Smuzhiyun .read = seq_read,
1614*4882a593Smuzhiyun .llseek = seq_lseek,
1615*4882a593Smuzhiyun .release = single_release,
1616*4882a593Smuzhiyun };
1617*4882a593Smuzhiyun
nbd_dev_dbg_init(struct nbd_device * nbd)1618*4882a593Smuzhiyun static int nbd_dev_dbg_init(struct nbd_device *nbd)
1619*4882a593Smuzhiyun {
1620*4882a593Smuzhiyun struct dentry *dir;
1621*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
1622*4882a593Smuzhiyun
1623*4882a593Smuzhiyun if (!nbd_dbg_dir)
1624*4882a593Smuzhiyun return -EIO;
1625*4882a593Smuzhiyun
1626*4882a593Smuzhiyun dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
1627*4882a593Smuzhiyun if (!dir) {
1628*4882a593Smuzhiyun dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n",
1629*4882a593Smuzhiyun nbd_name(nbd));
1630*4882a593Smuzhiyun return -EIO;
1631*4882a593Smuzhiyun }
1632*4882a593Smuzhiyun config->dbg_dir = dir;
1633*4882a593Smuzhiyun
1634*4882a593Smuzhiyun debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops);
1635*4882a593Smuzhiyun debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize);
1636*4882a593Smuzhiyun debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout);
1637*4882a593Smuzhiyun debugfs_create_u64("blocksize", 0444, dir, &config->blksize);
1638*4882a593Smuzhiyun debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops);
1639*4882a593Smuzhiyun
1640*4882a593Smuzhiyun return 0;
1641*4882a593Smuzhiyun }
1642*4882a593Smuzhiyun
nbd_dev_dbg_close(struct nbd_device * nbd)1643*4882a593Smuzhiyun static void nbd_dev_dbg_close(struct nbd_device *nbd)
1644*4882a593Smuzhiyun {
1645*4882a593Smuzhiyun debugfs_remove_recursive(nbd->config->dbg_dir);
1646*4882a593Smuzhiyun }
1647*4882a593Smuzhiyun
nbd_dbg_init(void)1648*4882a593Smuzhiyun static int nbd_dbg_init(void)
1649*4882a593Smuzhiyun {
1650*4882a593Smuzhiyun struct dentry *dbg_dir;
1651*4882a593Smuzhiyun
1652*4882a593Smuzhiyun dbg_dir = debugfs_create_dir("nbd", NULL);
1653*4882a593Smuzhiyun if (!dbg_dir)
1654*4882a593Smuzhiyun return -EIO;
1655*4882a593Smuzhiyun
1656*4882a593Smuzhiyun nbd_dbg_dir = dbg_dir;
1657*4882a593Smuzhiyun
1658*4882a593Smuzhiyun return 0;
1659*4882a593Smuzhiyun }
1660*4882a593Smuzhiyun
nbd_dbg_close(void)1661*4882a593Smuzhiyun static void nbd_dbg_close(void)
1662*4882a593Smuzhiyun {
1663*4882a593Smuzhiyun debugfs_remove_recursive(nbd_dbg_dir);
1664*4882a593Smuzhiyun }
1665*4882a593Smuzhiyun
1666*4882a593Smuzhiyun #else /* IS_ENABLED(CONFIG_DEBUG_FS) */
1667*4882a593Smuzhiyun
nbd_dev_dbg_init(struct nbd_device * nbd)1668*4882a593Smuzhiyun static int nbd_dev_dbg_init(struct nbd_device *nbd)
1669*4882a593Smuzhiyun {
1670*4882a593Smuzhiyun return 0;
1671*4882a593Smuzhiyun }
1672*4882a593Smuzhiyun
nbd_dev_dbg_close(struct nbd_device * nbd)1673*4882a593Smuzhiyun static void nbd_dev_dbg_close(struct nbd_device *nbd)
1674*4882a593Smuzhiyun {
1675*4882a593Smuzhiyun }
1676*4882a593Smuzhiyun
nbd_dbg_init(void)1677*4882a593Smuzhiyun static int nbd_dbg_init(void)
1678*4882a593Smuzhiyun {
1679*4882a593Smuzhiyun return 0;
1680*4882a593Smuzhiyun }
1681*4882a593Smuzhiyun
nbd_dbg_close(void)1682*4882a593Smuzhiyun static void nbd_dbg_close(void)
1683*4882a593Smuzhiyun {
1684*4882a593Smuzhiyun }
1685*4882a593Smuzhiyun
1686*4882a593Smuzhiyun #endif
1687*4882a593Smuzhiyun
nbd_init_request(struct blk_mq_tag_set * set,struct request * rq,unsigned int hctx_idx,unsigned int numa_node)1688*4882a593Smuzhiyun static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq,
1689*4882a593Smuzhiyun unsigned int hctx_idx, unsigned int numa_node)
1690*4882a593Smuzhiyun {
1691*4882a593Smuzhiyun struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
1692*4882a593Smuzhiyun cmd->nbd = set->driver_data;
1693*4882a593Smuzhiyun cmd->flags = 0;
1694*4882a593Smuzhiyun mutex_init(&cmd->lock);
1695*4882a593Smuzhiyun return 0;
1696*4882a593Smuzhiyun }
1697*4882a593Smuzhiyun
1698*4882a593Smuzhiyun static const struct blk_mq_ops nbd_mq_ops = {
1699*4882a593Smuzhiyun .queue_rq = nbd_queue_rq,
1700*4882a593Smuzhiyun .complete = nbd_complete_rq,
1701*4882a593Smuzhiyun .init_request = nbd_init_request,
1702*4882a593Smuzhiyun .timeout = nbd_xmit_timeout,
1703*4882a593Smuzhiyun };
1704*4882a593Smuzhiyun
nbd_dev_add(int index)1705*4882a593Smuzhiyun static int nbd_dev_add(int index)
1706*4882a593Smuzhiyun {
1707*4882a593Smuzhiyun struct nbd_device *nbd;
1708*4882a593Smuzhiyun struct gendisk *disk;
1709*4882a593Smuzhiyun struct request_queue *q;
1710*4882a593Smuzhiyun int err = -ENOMEM;
1711*4882a593Smuzhiyun
1712*4882a593Smuzhiyun nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL);
1713*4882a593Smuzhiyun if (!nbd)
1714*4882a593Smuzhiyun goto out;
1715*4882a593Smuzhiyun
1716*4882a593Smuzhiyun disk = alloc_disk(1 << part_shift);
1717*4882a593Smuzhiyun if (!disk)
1718*4882a593Smuzhiyun goto out_free_nbd;
1719*4882a593Smuzhiyun
1720*4882a593Smuzhiyun if (index >= 0) {
1721*4882a593Smuzhiyun err = idr_alloc(&nbd_index_idr, nbd, index, index + 1,
1722*4882a593Smuzhiyun GFP_KERNEL);
1723*4882a593Smuzhiyun if (err == -ENOSPC)
1724*4882a593Smuzhiyun err = -EEXIST;
1725*4882a593Smuzhiyun } else {
1726*4882a593Smuzhiyun err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL);
1727*4882a593Smuzhiyun if (err >= 0)
1728*4882a593Smuzhiyun index = err;
1729*4882a593Smuzhiyun }
1730*4882a593Smuzhiyun if (err < 0)
1731*4882a593Smuzhiyun goto out_free_disk;
1732*4882a593Smuzhiyun
1733*4882a593Smuzhiyun nbd->index = index;
1734*4882a593Smuzhiyun nbd->disk = disk;
1735*4882a593Smuzhiyun nbd->tag_set.ops = &nbd_mq_ops;
1736*4882a593Smuzhiyun nbd->tag_set.nr_hw_queues = 1;
1737*4882a593Smuzhiyun nbd->tag_set.queue_depth = 128;
1738*4882a593Smuzhiyun nbd->tag_set.numa_node = NUMA_NO_NODE;
1739*4882a593Smuzhiyun nbd->tag_set.cmd_size = sizeof(struct nbd_cmd);
1740*4882a593Smuzhiyun nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
1741*4882a593Smuzhiyun BLK_MQ_F_BLOCKING;
1742*4882a593Smuzhiyun nbd->tag_set.driver_data = nbd;
1743*4882a593Smuzhiyun nbd->destroy_complete = NULL;
1744*4882a593Smuzhiyun
1745*4882a593Smuzhiyun err = blk_mq_alloc_tag_set(&nbd->tag_set);
1746*4882a593Smuzhiyun if (err)
1747*4882a593Smuzhiyun goto out_free_idr;
1748*4882a593Smuzhiyun
1749*4882a593Smuzhiyun q = blk_mq_init_queue(&nbd->tag_set);
1750*4882a593Smuzhiyun if (IS_ERR(q)) {
1751*4882a593Smuzhiyun err = PTR_ERR(q);
1752*4882a593Smuzhiyun goto out_free_tags;
1753*4882a593Smuzhiyun }
1754*4882a593Smuzhiyun disk->queue = q;
1755*4882a593Smuzhiyun
1756*4882a593Smuzhiyun /*
1757*4882a593Smuzhiyun * Tell the block layer that we are not a rotational device
1758*4882a593Smuzhiyun */
1759*4882a593Smuzhiyun blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
1760*4882a593Smuzhiyun blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
1761*4882a593Smuzhiyun disk->queue->limits.discard_granularity = 0;
1762*4882a593Smuzhiyun disk->queue->limits.discard_alignment = 0;
1763*4882a593Smuzhiyun blk_queue_max_discard_sectors(disk->queue, 0);
1764*4882a593Smuzhiyun blk_queue_max_segment_size(disk->queue, UINT_MAX);
1765*4882a593Smuzhiyun blk_queue_max_segments(disk->queue, USHRT_MAX);
1766*4882a593Smuzhiyun blk_queue_max_hw_sectors(disk->queue, 65536);
1767*4882a593Smuzhiyun disk->queue->limits.max_sectors = 256;
1768*4882a593Smuzhiyun
1769*4882a593Smuzhiyun mutex_init(&nbd->config_lock);
1770*4882a593Smuzhiyun refcount_set(&nbd->config_refs, 0);
1771*4882a593Smuzhiyun refcount_set(&nbd->refs, 1);
1772*4882a593Smuzhiyun INIT_LIST_HEAD(&nbd->list);
1773*4882a593Smuzhiyun disk->major = NBD_MAJOR;
1774*4882a593Smuzhiyun disk->first_minor = index << part_shift;
1775*4882a593Smuzhiyun disk->fops = &nbd_fops;
1776*4882a593Smuzhiyun disk->private_data = nbd;
1777*4882a593Smuzhiyun sprintf(disk->disk_name, "nbd%d", index);
1778*4882a593Smuzhiyun add_disk(disk);
1779*4882a593Smuzhiyun nbd_total_devices++;
1780*4882a593Smuzhiyun return index;
1781*4882a593Smuzhiyun
1782*4882a593Smuzhiyun out_free_tags:
1783*4882a593Smuzhiyun blk_mq_free_tag_set(&nbd->tag_set);
1784*4882a593Smuzhiyun out_free_idr:
1785*4882a593Smuzhiyun idr_remove(&nbd_index_idr, index);
1786*4882a593Smuzhiyun out_free_disk:
1787*4882a593Smuzhiyun put_disk(disk);
1788*4882a593Smuzhiyun out_free_nbd:
1789*4882a593Smuzhiyun kfree(nbd);
1790*4882a593Smuzhiyun out:
1791*4882a593Smuzhiyun return err;
1792*4882a593Smuzhiyun }
1793*4882a593Smuzhiyun
find_free_cb(int id,void * ptr,void * data)1794*4882a593Smuzhiyun static int find_free_cb(int id, void *ptr, void *data)
1795*4882a593Smuzhiyun {
1796*4882a593Smuzhiyun struct nbd_device *nbd = ptr;
1797*4882a593Smuzhiyun struct nbd_device **found = data;
1798*4882a593Smuzhiyun
1799*4882a593Smuzhiyun if (!refcount_read(&nbd->config_refs)) {
1800*4882a593Smuzhiyun *found = nbd;
1801*4882a593Smuzhiyun return 1;
1802*4882a593Smuzhiyun }
1803*4882a593Smuzhiyun return 0;
1804*4882a593Smuzhiyun }
1805*4882a593Smuzhiyun
1806*4882a593Smuzhiyun /* Netlink interface. */
1807*4882a593Smuzhiyun static const struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = {
1808*4882a593Smuzhiyun [NBD_ATTR_INDEX] = { .type = NLA_U32 },
1809*4882a593Smuzhiyun [NBD_ATTR_SIZE_BYTES] = { .type = NLA_U64 },
1810*4882a593Smuzhiyun [NBD_ATTR_BLOCK_SIZE_BYTES] = { .type = NLA_U64 },
1811*4882a593Smuzhiyun [NBD_ATTR_TIMEOUT] = { .type = NLA_U64 },
1812*4882a593Smuzhiyun [NBD_ATTR_SERVER_FLAGS] = { .type = NLA_U64 },
1813*4882a593Smuzhiyun [NBD_ATTR_CLIENT_FLAGS] = { .type = NLA_U64 },
1814*4882a593Smuzhiyun [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED},
1815*4882a593Smuzhiyun [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 },
1816*4882a593Smuzhiyun [NBD_ATTR_DEVICE_LIST] = { .type = NLA_NESTED},
1817*4882a593Smuzhiyun };
1818*4882a593Smuzhiyun
1819*4882a593Smuzhiyun static const struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = {
1820*4882a593Smuzhiyun [NBD_SOCK_FD] = { .type = NLA_U32 },
1821*4882a593Smuzhiyun };
1822*4882a593Smuzhiyun
1823*4882a593Smuzhiyun /* We don't use this right now since we don't parse the incoming list, but we
1824*4882a593Smuzhiyun * still want it here so userspace knows what to expect.
1825*4882a593Smuzhiyun */
1826*4882a593Smuzhiyun static const struct nla_policy __attribute__((unused))
1827*4882a593Smuzhiyun nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = {
1828*4882a593Smuzhiyun [NBD_DEVICE_INDEX] = { .type = NLA_U32 },
1829*4882a593Smuzhiyun [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 },
1830*4882a593Smuzhiyun };
1831*4882a593Smuzhiyun
nbd_genl_size_set(struct genl_info * info,struct nbd_device * nbd)1832*4882a593Smuzhiyun static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
1833*4882a593Smuzhiyun {
1834*4882a593Smuzhiyun struct nbd_config *config = nbd->config;
1835*4882a593Smuzhiyun u64 bsize = config->blksize;
1836*4882a593Smuzhiyun u64 bytes = config->bytesize;
1837*4882a593Smuzhiyun
1838*4882a593Smuzhiyun if (info->attrs[NBD_ATTR_SIZE_BYTES])
1839*4882a593Smuzhiyun bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]);
1840*4882a593Smuzhiyun
1841*4882a593Smuzhiyun if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) {
1842*4882a593Smuzhiyun bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
1843*4882a593Smuzhiyun if (!bsize)
1844*4882a593Smuzhiyun bsize = NBD_DEF_BLKSIZE;
1845*4882a593Smuzhiyun if (!nbd_is_valid_blksize(bsize)) {
1846*4882a593Smuzhiyun printk(KERN_ERR "Invalid block size %llu\n", bsize);
1847*4882a593Smuzhiyun return -EINVAL;
1848*4882a593Smuzhiyun }
1849*4882a593Smuzhiyun }
1850*4882a593Smuzhiyun
1851*4882a593Smuzhiyun if (bytes != config->bytesize || bsize != config->blksize)
1852*4882a593Smuzhiyun nbd_size_set(nbd, bsize, div64_u64(bytes, bsize));
1853*4882a593Smuzhiyun return 0;
1854*4882a593Smuzhiyun }
1855*4882a593Smuzhiyun
nbd_genl_connect(struct sk_buff * skb,struct genl_info * info)1856*4882a593Smuzhiyun static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
1857*4882a593Smuzhiyun {
1858*4882a593Smuzhiyun DECLARE_COMPLETION_ONSTACK(destroy_complete);
1859*4882a593Smuzhiyun struct nbd_device *nbd = NULL;
1860*4882a593Smuzhiyun struct nbd_config *config;
1861*4882a593Smuzhiyun int index = -1;
1862*4882a593Smuzhiyun int ret;
1863*4882a593Smuzhiyun bool put_dev = false;
1864*4882a593Smuzhiyun
1865*4882a593Smuzhiyun if (!netlink_capable(skb, CAP_SYS_ADMIN))
1866*4882a593Smuzhiyun return -EPERM;
1867*4882a593Smuzhiyun
1868*4882a593Smuzhiyun if (info->attrs[NBD_ATTR_INDEX])
1869*4882a593Smuzhiyun index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
1870*4882a593Smuzhiyun if (!info->attrs[NBD_ATTR_SOCKETS]) {
1871*4882a593Smuzhiyun printk(KERN_ERR "nbd: must specify at least one socket\n");
1872*4882a593Smuzhiyun return -EINVAL;
1873*4882a593Smuzhiyun }
1874*4882a593Smuzhiyun if (!info->attrs[NBD_ATTR_SIZE_BYTES]) {
1875*4882a593Smuzhiyun printk(KERN_ERR "nbd: must specify a size in bytes for the device\n");
1876*4882a593Smuzhiyun return -EINVAL;
1877*4882a593Smuzhiyun }
1878*4882a593Smuzhiyun again:
1879*4882a593Smuzhiyun mutex_lock(&nbd_index_mutex);
1880*4882a593Smuzhiyun if (index == -1) {
1881*4882a593Smuzhiyun ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd);
1882*4882a593Smuzhiyun if (ret == 0) {
1883*4882a593Smuzhiyun int new_index;
1884*4882a593Smuzhiyun new_index = nbd_dev_add(-1);
1885*4882a593Smuzhiyun if (new_index < 0) {
1886*4882a593Smuzhiyun mutex_unlock(&nbd_index_mutex);
1887*4882a593Smuzhiyun printk(KERN_ERR "nbd: failed to add new device\n");
1888*4882a593Smuzhiyun return new_index;
1889*4882a593Smuzhiyun }
1890*4882a593Smuzhiyun nbd = idr_find(&nbd_index_idr, new_index);
1891*4882a593Smuzhiyun }
1892*4882a593Smuzhiyun } else {
1893*4882a593Smuzhiyun nbd = idr_find(&nbd_index_idr, index);
1894*4882a593Smuzhiyun if (!nbd) {
1895*4882a593Smuzhiyun ret = nbd_dev_add(index);
1896*4882a593Smuzhiyun if (ret < 0) {
1897*4882a593Smuzhiyun mutex_unlock(&nbd_index_mutex);
1898*4882a593Smuzhiyun printk(KERN_ERR "nbd: failed to add new device\n");
1899*4882a593Smuzhiyun return ret;
1900*4882a593Smuzhiyun }
1901*4882a593Smuzhiyun nbd = idr_find(&nbd_index_idr, index);
1902*4882a593Smuzhiyun }
1903*4882a593Smuzhiyun }
1904*4882a593Smuzhiyun if (!nbd) {
1905*4882a593Smuzhiyun printk(KERN_ERR "nbd: couldn't find device at index %d\n",
1906*4882a593Smuzhiyun index);
1907*4882a593Smuzhiyun mutex_unlock(&nbd_index_mutex);
1908*4882a593Smuzhiyun return -EINVAL;
1909*4882a593Smuzhiyun }
1910*4882a593Smuzhiyun
1911*4882a593Smuzhiyun if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
1912*4882a593Smuzhiyun test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) {
1913*4882a593Smuzhiyun nbd->destroy_complete = &destroy_complete;
1914*4882a593Smuzhiyun mutex_unlock(&nbd_index_mutex);
1915*4882a593Smuzhiyun
1916*4882a593Smuzhiyun /* Wait untill the the nbd stuff is totally destroyed */
1917*4882a593Smuzhiyun wait_for_completion(&destroy_complete);
1918*4882a593Smuzhiyun goto again;
1919*4882a593Smuzhiyun }
1920*4882a593Smuzhiyun
1921*4882a593Smuzhiyun if (!refcount_inc_not_zero(&nbd->refs)) {
1922*4882a593Smuzhiyun mutex_unlock(&nbd_index_mutex);
1923*4882a593Smuzhiyun if (index == -1)
1924*4882a593Smuzhiyun goto again;
1925*4882a593Smuzhiyun printk(KERN_ERR "nbd: device at index %d is going down\n",
1926*4882a593Smuzhiyun index);
1927*4882a593Smuzhiyun return -EINVAL;
1928*4882a593Smuzhiyun }
1929*4882a593Smuzhiyun mutex_unlock(&nbd_index_mutex);
1930*4882a593Smuzhiyun
1931*4882a593Smuzhiyun mutex_lock(&nbd->config_lock);
1932*4882a593Smuzhiyun if (refcount_read(&nbd->config_refs)) {
1933*4882a593Smuzhiyun mutex_unlock(&nbd->config_lock);
1934*4882a593Smuzhiyun nbd_put(nbd);
1935*4882a593Smuzhiyun if (index == -1)
1936*4882a593Smuzhiyun goto again;
1937*4882a593Smuzhiyun printk(KERN_ERR "nbd: nbd%d already in use\n", index);
1938*4882a593Smuzhiyun return -EBUSY;
1939*4882a593Smuzhiyun }
1940*4882a593Smuzhiyun if (WARN_ON(nbd->config)) {
1941*4882a593Smuzhiyun mutex_unlock(&nbd->config_lock);
1942*4882a593Smuzhiyun nbd_put(nbd);
1943*4882a593Smuzhiyun return -EINVAL;
1944*4882a593Smuzhiyun }
1945*4882a593Smuzhiyun config = nbd_alloc_config();
1946*4882a593Smuzhiyun if (IS_ERR(config)) {
1947*4882a593Smuzhiyun mutex_unlock(&nbd->config_lock);
1948*4882a593Smuzhiyun nbd_put(nbd);
1949*4882a593Smuzhiyun printk(KERN_ERR "nbd: couldn't allocate config\n");
1950*4882a593Smuzhiyun return PTR_ERR(config);
1951*4882a593Smuzhiyun }
1952*4882a593Smuzhiyun nbd->config = config;
1953*4882a593Smuzhiyun refcount_set(&nbd->config_refs, 1);
1954*4882a593Smuzhiyun set_bit(NBD_RT_BOUND, &config->runtime_flags);
1955*4882a593Smuzhiyun
1956*4882a593Smuzhiyun ret = nbd_genl_size_set(info, nbd);
1957*4882a593Smuzhiyun if (ret)
1958*4882a593Smuzhiyun goto out;
1959*4882a593Smuzhiyun
1960*4882a593Smuzhiyun if (info->attrs[NBD_ATTR_TIMEOUT])
1961*4882a593Smuzhiyun nbd_set_cmd_timeout(nbd,
1962*4882a593Smuzhiyun nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]));
1963*4882a593Smuzhiyun if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
1964*4882a593Smuzhiyun config->dead_conn_timeout =
1965*4882a593Smuzhiyun nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
1966*4882a593Smuzhiyun config->dead_conn_timeout *= HZ;
1967*4882a593Smuzhiyun }
1968*4882a593Smuzhiyun if (info->attrs[NBD_ATTR_SERVER_FLAGS])
1969*4882a593Smuzhiyun config->flags =
1970*4882a593Smuzhiyun nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]);
1971*4882a593Smuzhiyun if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
1972*4882a593Smuzhiyun u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
1973*4882a593Smuzhiyun if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
1974*4882a593Smuzhiyun /*
1975*4882a593Smuzhiyun * We have 1 ref to keep the device around, and then 1
1976*4882a593Smuzhiyun * ref for our current operation here, which will be
1977*4882a593Smuzhiyun * inherited by the config. If we already have
1978*4882a593Smuzhiyun * DESTROY_ON_DISCONNECT set then we know we don't have
1979*4882a593Smuzhiyun * that extra ref already held so we don't need the
1980*4882a593Smuzhiyun * put_dev.
1981*4882a593Smuzhiyun */
1982*4882a593Smuzhiyun if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
1983*4882a593Smuzhiyun &nbd->flags))
1984*4882a593Smuzhiyun put_dev = true;
1985*4882a593Smuzhiyun } else {
1986*4882a593Smuzhiyun if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
1987*4882a593Smuzhiyun &nbd->flags))
1988*4882a593Smuzhiyun refcount_inc(&nbd->refs);
1989*4882a593Smuzhiyun }
1990*4882a593Smuzhiyun if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
1991*4882a593Smuzhiyun set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
1992*4882a593Smuzhiyun &config->runtime_flags);
1993*4882a593Smuzhiyun }
1994*4882a593Smuzhiyun }
1995*4882a593Smuzhiyun
1996*4882a593Smuzhiyun if (info->attrs[NBD_ATTR_SOCKETS]) {
1997*4882a593Smuzhiyun struct nlattr *attr;
1998*4882a593Smuzhiyun int rem, fd;
1999*4882a593Smuzhiyun
2000*4882a593Smuzhiyun nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS],
2001*4882a593Smuzhiyun rem) {
2002*4882a593Smuzhiyun struct nlattr *socks[NBD_SOCK_MAX+1];
2003*4882a593Smuzhiyun
2004*4882a593Smuzhiyun if (nla_type(attr) != NBD_SOCK_ITEM) {
2005*4882a593Smuzhiyun printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n");
2006*4882a593Smuzhiyun ret = -EINVAL;
2007*4882a593Smuzhiyun goto out;
2008*4882a593Smuzhiyun }
2009*4882a593Smuzhiyun ret = nla_parse_nested_deprecated(socks, NBD_SOCK_MAX,
2010*4882a593Smuzhiyun attr,
2011*4882a593Smuzhiyun nbd_sock_policy,
2012*4882a593Smuzhiyun info->extack);
2013*4882a593Smuzhiyun if (ret != 0) {
2014*4882a593Smuzhiyun printk(KERN_ERR "nbd: error processing sock list\n");
2015*4882a593Smuzhiyun ret = -EINVAL;
2016*4882a593Smuzhiyun goto out;
2017*4882a593Smuzhiyun }
2018*4882a593Smuzhiyun if (!socks[NBD_SOCK_FD])
2019*4882a593Smuzhiyun continue;
2020*4882a593Smuzhiyun fd = (int)nla_get_u32(socks[NBD_SOCK_FD]);
2021*4882a593Smuzhiyun ret = nbd_add_socket(nbd, fd, true);
2022*4882a593Smuzhiyun if (ret)
2023*4882a593Smuzhiyun goto out;
2024*4882a593Smuzhiyun }
2025*4882a593Smuzhiyun }
2026*4882a593Smuzhiyun ret = nbd_start_device(nbd);
2027*4882a593Smuzhiyun out:
2028*4882a593Smuzhiyun mutex_unlock(&nbd->config_lock);
2029*4882a593Smuzhiyun if (!ret) {
2030*4882a593Smuzhiyun set_bit(NBD_RT_HAS_CONFIG_REF, &config->runtime_flags);
2031*4882a593Smuzhiyun refcount_inc(&nbd->config_refs);
2032*4882a593Smuzhiyun nbd_connect_reply(info, nbd->index);
2033*4882a593Smuzhiyun }
2034*4882a593Smuzhiyun nbd_config_put(nbd);
2035*4882a593Smuzhiyun if (put_dev)
2036*4882a593Smuzhiyun nbd_put(nbd);
2037*4882a593Smuzhiyun return ret;
2038*4882a593Smuzhiyun }
2039*4882a593Smuzhiyun
nbd_disconnect_and_put(struct nbd_device * nbd)2040*4882a593Smuzhiyun static void nbd_disconnect_and_put(struct nbd_device *nbd)
2041*4882a593Smuzhiyun {
2042*4882a593Smuzhiyun mutex_lock(&nbd->config_lock);
2043*4882a593Smuzhiyun nbd_disconnect(nbd);
2044*4882a593Smuzhiyun sock_shutdown(nbd);
2045*4882a593Smuzhiyun wake_up(&nbd->config->conn_wait);
2046*4882a593Smuzhiyun /*
2047*4882a593Smuzhiyun * Make sure recv thread has finished, so it does not drop the last
2048*4882a593Smuzhiyun * config ref and try to destroy the workqueue from inside the work
2049*4882a593Smuzhiyun * queue. And this also ensure that we can safely call nbd_clear_que()
2050*4882a593Smuzhiyun * to cancel the inflight I/Os.
2051*4882a593Smuzhiyun */
2052*4882a593Smuzhiyun if (nbd->recv_workq)
2053*4882a593Smuzhiyun flush_workqueue(nbd->recv_workq);
2054*4882a593Smuzhiyun nbd_clear_que(nbd);
2055*4882a593Smuzhiyun nbd->task_setup = NULL;
2056*4882a593Smuzhiyun mutex_unlock(&nbd->config_lock);
2057*4882a593Smuzhiyun
2058*4882a593Smuzhiyun if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
2059*4882a593Smuzhiyun &nbd->config->runtime_flags))
2060*4882a593Smuzhiyun nbd_config_put(nbd);
2061*4882a593Smuzhiyun }
2062*4882a593Smuzhiyun
nbd_genl_disconnect(struct sk_buff * skb,struct genl_info * info)2063*4882a593Smuzhiyun static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
2064*4882a593Smuzhiyun {
2065*4882a593Smuzhiyun struct nbd_device *nbd;
2066*4882a593Smuzhiyun int index;
2067*4882a593Smuzhiyun
2068*4882a593Smuzhiyun if (!netlink_capable(skb, CAP_SYS_ADMIN))
2069*4882a593Smuzhiyun return -EPERM;
2070*4882a593Smuzhiyun
2071*4882a593Smuzhiyun if (!info->attrs[NBD_ATTR_INDEX]) {
2072*4882a593Smuzhiyun printk(KERN_ERR "nbd: must specify an index to disconnect\n");
2073*4882a593Smuzhiyun return -EINVAL;
2074*4882a593Smuzhiyun }
2075*4882a593Smuzhiyun index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
2076*4882a593Smuzhiyun mutex_lock(&nbd_index_mutex);
2077*4882a593Smuzhiyun nbd = idr_find(&nbd_index_idr, index);
2078*4882a593Smuzhiyun if (!nbd) {
2079*4882a593Smuzhiyun mutex_unlock(&nbd_index_mutex);
2080*4882a593Smuzhiyun printk(KERN_ERR "nbd: couldn't find device at index %d\n",
2081*4882a593Smuzhiyun index);
2082*4882a593Smuzhiyun return -EINVAL;
2083*4882a593Smuzhiyun }
2084*4882a593Smuzhiyun if (!refcount_inc_not_zero(&nbd->refs)) {
2085*4882a593Smuzhiyun mutex_unlock(&nbd_index_mutex);
2086*4882a593Smuzhiyun printk(KERN_ERR "nbd: device at index %d is going down\n",
2087*4882a593Smuzhiyun index);
2088*4882a593Smuzhiyun return -EINVAL;
2089*4882a593Smuzhiyun }
2090*4882a593Smuzhiyun mutex_unlock(&nbd_index_mutex);
2091*4882a593Smuzhiyun if (!refcount_inc_not_zero(&nbd->config_refs)) {
2092*4882a593Smuzhiyun nbd_put(nbd);
2093*4882a593Smuzhiyun return 0;
2094*4882a593Smuzhiyun }
2095*4882a593Smuzhiyun nbd_disconnect_and_put(nbd);
2096*4882a593Smuzhiyun nbd_config_put(nbd);
2097*4882a593Smuzhiyun nbd_put(nbd);
2098*4882a593Smuzhiyun return 0;
2099*4882a593Smuzhiyun }
2100*4882a593Smuzhiyun
nbd_genl_reconfigure(struct sk_buff * skb,struct genl_info * info)2101*4882a593Smuzhiyun static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
2102*4882a593Smuzhiyun {
2103*4882a593Smuzhiyun struct nbd_device *nbd = NULL;
2104*4882a593Smuzhiyun struct nbd_config *config;
2105*4882a593Smuzhiyun int index;
2106*4882a593Smuzhiyun int ret = 0;
2107*4882a593Smuzhiyun bool put_dev = false;
2108*4882a593Smuzhiyun
2109*4882a593Smuzhiyun if (!netlink_capable(skb, CAP_SYS_ADMIN))
2110*4882a593Smuzhiyun return -EPERM;
2111*4882a593Smuzhiyun
2112*4882a593Smuzhiyun if (!info->attrs[NBD_ATTR_INDEX]) {
2113*4882a593Smuzhiyun printk(KERN_ERR "nbd: must specify a device to reconfigure\n");
2114*4882a593Smuzhiyun return -EINVAL;
2115*4882a593Smuzhiyun }
2116*4882a593Smuzhiyun index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
2117*4882a593Smuzhiyun mutex_lock(&nbd_index_mutex);
2118*4882a593Smuzhiyun nbd = idr_find(&nbd_index_idr, index);
2119*4882a593Smuzhiyun if (!nbd) {
2120*4882a593Smuzhiyun mutex_unlock(&nbd_index_mutex);
2121*4882a593Smuzhiyun printk(KERN_ERR "nbd: couldn't find a device at index %d\n",
2122*4882a593Smuzhiyun index);
2123*4882a593Smuzhiyun return -EINVAL;
2124*4882a593Smuzhiyun }
2125*4882a593Smuzhiyun if (!refcount_inc_not_zero(&nbd->refs)) {
2126*4882a593Smuzhiyun mutex_unlock(&nbd_index_mutex);
2127*4882a593Smuzhiyun printk(KERN_ERR "nbd: device at index %d is going down\n",
2128*4882a593Smuzhiyun index);
2129*4882a593Smuzhiyun return -EINVAL;
2130*4882a593Smuzhiyun }
2131*4882a593Smuzhiyun mutex_unlock(&nbd_index_mutex);
2132*4882a593Smuzhiyun
2133*4882a593Smuzhiyun if (!refcount_inc_not_zero(&nbd->config_refs)) {
2134*4882a593Smuzhiyun dev_err(nbd_to_dev(nbd),
2135*4882a593Smuzhiyun "not configured, cannot reconfigure\n");
2136*4882a593Smuzhiyun nbd_put(nbd);
2137*4882a593Smuzhiyun return -EINVAL;
2138*4882a593Smuzhiyun }
2139*4882a593Smuzhiyun
2140*4882a593Smuzhiyun mutex_lock(&nbd->config_lock);
2141*4882a593Smuzhiyun config = nbd->config;
2142*4882a593Smuzhiyun if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
2143*4882a593Smuzhiyun !nbd->task_recv) {
2144*4882a593Smuzhiyun dev_err(nbd_to_dev(nbd),
2145*4882a593Smuzhiyun "not configured, cannot reconfigure\n");
2146*4882a593Smuzhiyun ret = -EINVAL;
2147*4882a593Smuzhiyun goto out;
2148*4882a593Smuzhiyun }
2149*4882a593Smuzhiyun
2150*4882a593Smuzhiyun ret = nbd_genl_size_set(info, nbd);
2151*4882a593Smuzhiyun if (ret)
2152*4882a593Smuzhiyun goto out;
2153*4882a593Smuzhiyun
2154*4882a593Smuzhiyun if (info->attrs[NBD_ATTR_TIMEOUT])
2155*4882a593Smuzhiyun nbd_set_cmd_timeout(nbd,
2156*4882a593Smuzhiyun nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]));
2157*4882a593Smuzhiyun if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
2158*4882a593Smuzhiyun config->dead_conn_timeout =
2159*4882a593Smuzhiyun nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
2160*4882a593Smuzhiyun config->dead_conn_timeout *= HZ;
2161*4882a593Smuzhiyun }
2162*4882a593Smuzhiyun if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
2163*4882a593Smuzhiyun u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
2164*4882a593Smuzhiyun if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
2165*4882a593Smuzhiyun if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
2166*4882a593Smuzhiyun &nbd->flags))
2167*4882a593Smuzhiyun put_dev = true;
2168*4882a593Smuzhiyun } else {
2169*4882a593Smuzhiyun if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
2170*4882a593Smuzhiyun &nbd->flags))
2171*4882a593Smuzhiyun refcount_inc(&nbd->refs);
2172*4882a593Smuzhiyun }
2173*4882a593Smuzhiyun
2174*4882a593Smuzhiyun if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
2175*4882a593Smuzhiyun set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
2176*4882a593Smuzhiyun &config->runtime_flags);
2177*4882a593Smuzhiyun } else {
2178*4882a593Smuzhiyun clear_bit(NBD_RT_DISCONNECT_ON_CLOSE,
2179*4882a593Smuzhiyun &config->runtime_flags);
2180*4882a593Smuzhiyun }
2181*4882a593Smuzhiyun }
2182*4882a593Smuzhiyun
2183*4882a593Smuzhiyun if (info->attrs[NBD_ATTR_SOCKETS]) {
2184*4882a593Smuzhiyun struct nlattr *attr;
2185*4882a593Smuzhiyun int rem, fd;
2186*4882a593Smuzhiyun
2187*4882a593Smuzhiyun nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS],
2188*4882a593Smuzhiyun rem) {
2189*4882a593Smuzhiyun struct nlattr *socks[NBD_SOCK_MAX+1];
2190*4882a593Smuzhiyun
2191*4882a593Smuzhiyun if (nla_type(attr) != NBD_SOCK_ITEM) {
2192*4882a593Smuzhiyun printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n");
2193*4882a593Smuzhiyun ret = -EINVAL;
2194*4882a593Smuzhiyun goto out;
2195*4882a593Smuzhiyun }
2196*4882a593Smuzhiyun ret = nla_parse_nested_deprecated(socks, NBD_SOCK_MAX,
2197*4882a593Smuzhiyun attr,
2198*4882a593Smuzhiyun nbd_sock_policy,
2199*4882a593Smuzhiyun info->extack);
2200*4882a593Smuzhiyun if (ret != 0) {
2201*4882a593Smuzhiyun printk(KERN_ERR "nbd: error processing sock list\n");
2202*4882a593Smuzhiyun ret = -EINVAL;
2203*4882a593Smuzhiyun goto out;
2204*4882a593Smuzhiyun }
2205*4882a593Smuzhiyun if (!socks[NBD_SOCK_FD])
2206*4882a593Smuzhiyun continue;
2207*4882a593Smuzhiyun fd = (int)nla_get_u32(socks[NBD_SOCK_FD]);
2208*4882a593Smuzhiyun ret = nbd_reconnect_socket(nbd, fd);
2209*4882a593Smuzhiyun if (ret) {
2210*4882a593Smuzhiyun if (ret == -ENOSPC)
2211*4882a593Smuzhiyun ret = 0;
2212*4882a593Smuzhiyun goto out;
2213*4882a593Smuzhiyun }
2214*4882a593Smuzhiyun dev_info(nbd_to_dev(nbd), "reconnected socket\n");
2215*4882a593Smuzhiyun }
2216*4882a593Smuzhiyun }
2217*4882a593Smuzhiyun out:
2218*4882a593Smuzhiyun mutex_unlock(&nbd->config_lock);
2219*4882a593Smuzhiyun nbd_config_put(nbd);
2220*4882a593Smuzhiyun nbd_put(nbd);
2221*4882a593Smuzhiyun if (put_dev)
2222*4882a593Smuzhiyun nbd_put(nbd);
2223*4882a593Smuzhiyun return ret;
2224*4882a593Smuzhiyun }
2225*4882a593Smuzhiyun
2226*4882a593Smuzhiyun static const struct genl_small_ops nbd_connect_genl_ops[] = {
2227*4882a593Smuzhiyun {
2228*4882a593Smuzhiyun .cmd = NBD_CMD_CONNECT,
2229*4882a593Smuzhiyun .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2230*4882a593Smuzhiyun .doit = nbd_genl_connect,
2231*4882a593Smuzhiyun },
2232*4882a593Smuzhiyun {
2233*4882a593Smuzhiyun .cmd = NBD_CMD_DISCONNECT,
2234*4882a593Smuzhiyun .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2235*4882a593Smuzhiyun .doit = nbd_genl_disconnect,
2236*4882a593Smuzhiyun },
2237*4882a593Smuzhiyun {
2238*4882a593Smuzhiyun .cmd = NBD_CMD_RECONFIGURE,
2239*4882a593Smuzhiyun .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2240*4882a593Smuzhiyun .doit = nbd_genl_reconfigure,
2241*4882a593Smuzhiyun },
2242*4882a593Smuzhiyun {
2243*4882a593Smuzhiyun .cmd = NBD_CMD_STATUS,
2244*4882a593Smuzhiyun .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2245*4882a593Smuzhiyun .doit = nbd_genl_status,
2246*4882a593Smuzhiyun },
2247*4882a593Smuzhiyun };
2248*4882a593Smuzhiyun
2249*4882a593Smuzhiyun static const struct genl_multicast_group nbd_mcast_grps[] = {
2250*4882a593Smuzhiyun { .name = NBD_GENL_MCAST_GROUP_NAME, },
2251*4882a593Smuzhiyun };
2252*4882a593Smuzhiyun
2253*4882a593Smuzhiyun static struct genl_family nbd_genl_family __ro_after_init = {
2254*4882a593Smuzhiyun .hdrsize = 0,
2255*4882a593Smuzhiyun .name = NBD_GENL_FAMILY_NAME,
2256*4882a593Smuzhiyun .version = NBD_GENL_VERSION,
2257*4882a593Smuzhiyun .module = THIS_MODULE,
2258*4882a593Smuzhiyun .small_ops = nbd_connect_genl_ops,
2259*4882a593Smuzhiyun .n_small_ops = ARRAY_SIZE(nbd_connect_genl_ops),
2260*4882a593Smuzhiyun .maxattr = NBD_ATTR_MAX,
2261*4882a593Smuzhiyun .policy = nbd_attr_policy,
2262*4882a593Smuzhiyun .mcgrps = nbd_mcast_grps,
2263*4882a593Smuzhiyun .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps),
2264*4882a593Smuzhiyun };
2265*4882a593Smuzhiyun
populate_nbd_status(struct nbd_device * nbd,struct sk_buff * reply)2266*4882a593Smuzhiyun static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply)
2267*4882a593Smuzhiyun {
2268*4882a593Smuzhiyun struct nlattr *dev_opt;
2269*4882a593Smuzhiyun u8 connected = 0;
2270*4882a593Smuzhiyun int ret;
2271*4882a593Smuzhiyun
2272*4882a593Smuzhiyun /* This is a little racey, but for status it's ok. The
2273*4882a593Smuzhiyun * reason we don't take a ref here is because we can't
2274*4882a593Smuzhiyun * take a ref in the index == -1 case as we would need
2275*4882a593Smuzhiyun * to put under the nbd_index_mutex, which could
2276*4882a593Smuzhiyun * deadlock if we are configured to remove ourselves
2277*4882a593Smuzhiyun * once we're disconnected.
2278*4882a593Smuzhiyun */
2279*4882a593Smuzhiyun if (refcount_read(&nbd->config_refs))
2280*4882a593Smuzhiyun connected = 1;
2281*4882a593Smuzhiyun dev_opt = nla_nest_start_noflag(reply, NBD_DEVICE_ITEM);
2282*4882a593Smuzhiyun if (!dev_opt)
2283*4882a593Smuzhiyun return -EMSGSIZE;
2284*4882a593Smuzhiyun ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index);
2285*4882a593Smuzhiyun if (ret)
2286*4882a593Smuzhiyun return -EMSGSIZE;
2287*4882a593Smuzhiyun ret = nla_put_u8(reply, NBD_DEVICE_CONNECTED,
2288*4882a593Smuzhiyun connected);
2289*4882a593Smuzhiyun if (ret)
2290*4882a593Smuzhiyun return -EMSGSIZE;
2291*4882a593Smuzhiyun nla_nest_end(reply, dev_opt);
2292*4882a593Smuzhiyun return 0;
2293*4882a593Smuzhiyun }
2294*4882a593Smuzhiyun
status_cb(int id,void * ptr,void * data)2295*4882a593Smuzhiyun static int status_cb(int id, void *ptr, void *data)
2296*4882a593Smuzhiyun {
2297*4882a593Smuzhiyun struct nbd_device *nbd = ptr;
2298*4882a593Smuzhiyun return populate_nbd_status(nbd, (struct sk_buff *)data);
2299*4882a593Smuzhiyun }
2300*4882a593Smuzhiyun
nbd_genl_status(struct sk_buff * skb,struct genl_info * info)2301*4882a593Smuzhiyun static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info)
2302*4882a593Smuzhiyun {
2303*4882a593Smuzhiyun struct nlattr *dev_list;
2304*4882a593Smuzhiyun struct sk_buff *reply;
2305*4882a593Smuzhiyun void *reply_head;
2306*4882a593Smuzhiyun size_t msg_size;
2307*4882a593Smuzhiyun int index = -1;
2308*4882a593Smuzhiyun int ret = -ENOMEM;
2309*4882a593Smuzhiyun
2310*4882a593Smuzhiyun if (info->attrs[NBD_ATTR_INDEX])
2311*4882a593Smuzhiyun index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
2312*4882a593Smuzhiyun
2313*4882a593Smuzhiyun mutex_lock(&nbd_index_mutex);
2314*4882a593Smuzhiyun
2315*4882a593Smuzhiyun msg_size = nla_total_size(nla_attr_size(sizeof(u32)) +
2316*4882a593Smuzhiyun nla_attr_size(sizeof(u8)));
2317*4882a593Smuzhiyun msg_size *= (index == -1) ? nbd_total_devices : 1;
2318*4882a593Smuzhiyun
2319*4882a593Smuzhiyun reply = genlmsg_new(msg_size, GFP_KERNEL);
2320*4882a593Smuzhiyun if (!reply)
2321*4882a593Smuzhiyun goto out;
2322*4882a593Smuzhiyun reply_head = genlmsg_put_reply(reply, info, &nbd_genl_family, 0,
2323*4882a593Smuzhiyun NBD_CMD_STATUS);
2324*4882a593Smuzhiyun if (!reply_head) {
2325*4882a593Smuzhiyun nlmsg_free(reply);
2326*4882a593Smuzhiyun goto out;
2327*4882a593Smuzhiyun }
2328*4882a593Smuzhiyun
2329*4882a593Smuzhiyun dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST);
2330*4882a593Smuzhiyun if (index == -1) {
2331*4882a593Smuzhiyun ret = idr_for_each(&nbd_index_idr, &status_cb, reply);
2332*4882a593Smuzhiyun if (ret) {
2333*4882a593Smuzhiyun nlmsg_free(reply);
2334*4882a593Smuzhiyun goto out;
2335*4882a593Smuzhiyun }
2336*4882a593Smuzhiyun } else {
2337*4882a593Smuzhiyun struct nbd_device *nbd;
2338*4882a593Smuzhiyun nbd = idr_find(&nbd_index_idr, index);
2339*4882a593Smuzhiyun if (nbd) {
2340*4882a593Smuzhiyun ret = populate_nbd_status(nbd, reply);
2341*4882a593Smuzhiyun if (ret) {
2342*4882a593Smuzhiyun nlmsg_free(reply);
2343*4882a593Smuzhiyun goto out;
2344*4882a593Smuzhiyun }
2345*4882a593Smuzhiyun }
2346*4882a593Smuzhiyun }
2347*4882a593Smuzhiyun nla_nest_end(reply, dev_list);
2348*4882a593Smuzhiyun genlmsg_end(reply, reply_head);
2349*4882a593Smuzhiyun ret = genlmsg_reply(reply, info);
2350*4882a593Smuzhiyun out:
2351*4882a593Smuzhiyun mutex_unlock(&nbd_index_mutex);
2352*4882a593Smuzhiyun return ret;
2353*4882a593Smuzhiyun }
2354*4882a593Smuzhiyun
nbd_connect_reply(struct genl_info * info,int index)2355*4882a593Smuzhiyun static void nbd_connect_reply(struct genl_info *info, int index)
2356*4882a593Smuzhiyun {
2357*4882a593Smuzhiyun struct sk_buff *skb;
2358*4882a593Smuzhiyun void *msg_head;
2359*4882a593Smuzhiyun int ret;
2360*4882a593Smuzhiyun
2361*4882a593Smuzhiyun skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL);
2362*4882a593Smuzhiyun if (!skb)
2363*4882a593Smuzhiyun return;
2364*4882a593Smuzhiyun msg_head = genlmsg_put_reply(skb, info, &nbd_genl_family, 0,
2365*4882a593Smuzhiyun NBD_CMD_CONNECT);
2366*4882a593Smuzhiyun if (!msg_head) {
2367*4882a593Smuzhiyun nlmsg_free(skb);
2368*4882a593Smuzhiyun return;
2369*4882a593Smuzhiyun }
2370*4882a593Smuzhiyun ret = nla_put_u32(skb, NBD_ATTR_INDEX, index);
2371*4882a593Smuzhiyun if (ret) {
2372*4882a593Smuzhiyun nlmsg_free(skb);
2373*4882a593Smuzhiyun return;
2374*4882a593Smuzhiyun }
2375*4882a593Smuzhiyun genlmsg_end(skb, msg_head);
2376*4882a593Smuzhiyun genlmsg_reply(skb, info);
2377*4882a593Smuzhiyun }
2378*4882a593Smuzhiyun
nbd_mcast_index(int index)2379*4882a593Smuzhiyun static void nbd_mcast_index(int index)
2380*4882a593Smuzhiyun {
2381*4882a593Smuzhiyun struct sk_buff *skb;
2382*4882a593Smuzhiyun void *msg_head;
2383*4882a593Smuzhiyun int ret;
2384*4882a593Smuzhiyun
2385*4882a593Smuzhiyun skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL);
2386*4882a593Smuzhiyun if (!skb)
2387*4882a593Smuzhiyun return;
2388*4882a593Smuzhiyun msg_head = genlmsg_put(skb, 0, 0, &nbd_genl_family, 0,
2389*4882a593Smuzhiyun NBD_CMD_LINK_DEAD);
2390*4882a593Smuzhiyun if (!msg_head) {
2391*4882a593Smuzhiyun nlmsg_free(skb);
2392*4882a593Smuzhiyun return;
2393*4882a593Smuzhiyun }
2394*4882a593Smuzhiyun ret = nla_put_u32(skb, NBD_ATTR_INDEX, index);
2395*4882a593Smuzhiyun if (ret) {
2396*4882a593Smuzhiyun nlmsg_free(skb);
2397*4882a593Smuzhiyun return;
2398*4882a593Smuzhiyun }
2399*4882a593Smuzhiyun genlmsg_end(skb, msg_head);
2400*4882a593Smuzhiyun genlmsg_multicast(&nbd_genl_family, skb, 0, 0, GFP_KERNEL);
2401*4882a593Smuzhiyun }
2402*4882a593Smuzhiyun
nbd_dead_link_work(struct work_struct * work)2403*4882a593Smuzhiyun static void nbd_dead_link_work(struct work_struct *work)
2404*4882a593Smuzhiyun {
2405*4882a593Smuzhiyun struct link_dead_args *args = container_of(work, struct link_dead_args,
2406*4882a593Smuzhiyun work);
2407*4882a593Smuzhiyun nbd_mcast_index(args->index);
2408*4882a593Smuzhiyun kfree(args);
2409*4882a593Smuzhiyun }
2410*4882a593Smuzhiyun
nbd_init(void)2411*4882a593Smuzhiyun static int __init nbd_init(void)
2412*4882a593Smuzhiyun {
2413*4882a593Smuzhiyun int i;
2414*4882a593Smuzhiyun
2415*4882a593Smuzhiyun BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
2416*4882a593Smuzhiyun
2417*4882a593Smuzhiyun if (max_part < 0) {
2418*4882a593Smuzhiyun printk(KERN_ERR "nbd: max_part must be >= 0\n");
2419*4882a593Smuzhiyun return -EINVAL;
2420*4882a593Smuzhiyun }
2421*4882a593Smuzhiyun
2422*4882a593Smuzhiyun part_shift = 0;
2423*4882a593Smuzhiyun if (max_part > 0) {
2424*4882a593Smuzhiyun part_shift = fls(max_part);
2425*4882a593Smuzhiyun
2426*4882a593Smuzhiyun /*
2427*4882a593Smuzhiyun * Adjust max_part according to part_shift as it is exported
2428*4882a593Smuzhiyun * to user space so that user can know the max number of
2429*4882a593Smuzhiyun * partition kernel should be able to manage.
2430*4882a593Smuzhiyun *
2431*4882a593Smuzhiyun * Note that -1 is required because partition 0 is reserved
2432*4882a593Smuzhiyun * for the whole disk.
2433*4882a593Smuzhiyun */
2434*4882a593Smuzhiyun max_part = (1UL << part_shift) - 1;
2435*4882a593Smuzhiyun }
2436*4882a593Smuzhiyun
2437*4882a593Smuzhiyun if ((1UL << part_shift) > DISK_MAX_PARTS)
2438*4882a593Smuzhiyun return -EINVAL;
2439*4882a593Smuzhiyun
2440*4882a593Smuzhiyun if (nbds_max > 1UL << (MINORBITS - part_shift))
2441*4882a593Smuzhiyun return -EINVAL;
2442*4882a593Smuzhiyun
2443*4882a593Smuzhiyun if (register_blkdev(NBD_MAJOR, "nbd"))
2444*4882a593Smuzhiyun return -EIO;
2445*4882a593Smuzhiyun
2446*4882a593Smuzhiyun if (genl_register_family(&nbd_genl_family)) {
2447*4882a593Smuzhiyun unregister_blkdev(NBD_MAJOR, "nbd");
2448*4882a593Smuzhiyun return -EINVAL;
2449*4882a593Smuzhiyun }
2450*4882a593Smuzhiyun nbd_dbg_init();
2451*4882a593Smuzhiyun
2452*4882a593Smuzhiyun mutex_lock(&nbd_index_mutex);
2453*4882a593Smuzhiyun for (i = 0; i < nbds_max; i++)
2454*4882a593Smuzhiyun nbd_dev_add(i);
2455*4882a593Smuzhiyun mutex_unlock(&nbd_index_mutex);
2456*4882a593Smuzhiyun return 0;
2457*4882a593Smuzhiyun }
2458*4882a593Smuzhiyun
nbd_exit_cb(int id,void * ptr,void * data)2459*4882a593Smuzhiyun static int nbd_exit_cb(int id, void *ptr, void *data)
2460*4882a593Smuzhiyun {
2461*4882a593Smuzhiyun struct list_head *list = (struct list_head *)data;
2462*4882a593Smuzhiyun struct nbd_device *nbd = ptr;
2463*4882a593Smuzhiyun
2464*4882a593Smuzhiyun list_add_tail(&nbd->list, list);
2465*4882a593Smuzhiyun return 0;
2466*4882a593Smuzhiyun }
2467*4882a593Smuzhiyun
nbd_cleanup(void)2468*4882a593Smuzhiyun static void __exit nbd_cleanup(void)
2469*4882a593Smuzhiyun {
2470*4882a593Smuzhiyun struct nbd_device *nbd;
2471*4882a593Smuzhiyun LIST_HEAD(del_list);
2472*4882a593Smuzhiyun
2473*4882a593Smuzhiyun /*
2474*4882a593Smuzhiyun * Unregister netlink interface prior to waiting
2475*4882a593Smuzhiyun * for the completion of netlink commands.
2476*4882a593Smuzhiyun */
2477*4882a593Smuzhiyun genl_unregister_family(&nbd_genl_family);
2478*4882a593Smuzhiyun
2479*4882a593Smuzhiyun nbd_dbg_close();
2480*4882a593Smuzhiyun
2481*4882a593Smuzhiyun mutex_lock(&nbd_index_mutex);
2482*4882a593Smuzhiyun idr_for_each(&nbd_index_idr, &nbd_exit_cb, &del_list);
2483*4882a593Smuzhiyun mutex_unlock(&nbd_index_mutex);
2484*4882a593Smuzhiyun
2485*4882a593Smuzhiyun while (!list_empty(&del_list)) {
2486*4882a593Smuzhiyun nbd = list_first_entry(&del_list, struct nbd_device, list);
2487*4882a593Smuzhiyun list_del_init(&nbd->list);
2488*4882a593Smuzhiyun if (refcount_read(&nbd->config_refs))
2489*4882a593Smuzhiyun printk(KERN_ERR "nbd: possibly leaking nbd_config (ref %d)\n",
2490*4882a593Smuzhiyun refcount_read(&nbd->config_refs));
2491*4882a593Smuzhiyun if (refcount_read(&nbd->refs) != 1)
2492*4882a593Smuzhiyun printk(KERN_ERR "nbd: possibly leaking a device\n");
2493*4882a593Smuzhiyun nbd_put(nbd);
2494*4882a593Smuzhiyun }
2495*4882a593Smuzhiyun
2496*4882a593Smuzhiyun idr_destroy(&nbd_index_idr);
2497*4882a593Smuzhiyun unregister_blkdev(NBD_MAJOR, "nbd");
2498*4882a593Smuzhiyun }
2499*4882a593Smuzhiyun
2500*4882a593Smuzhiyun module_init(nbd_init);
2501*4882a593Smuzhiyun module_exit(nbd_cleanup);
2502*4882a593Smuzhiyun
2503*4882a593Smuzhiyun MODULE_DESCRIPTION("Network Block Device");
2504*4882a593Smuzhiyun MODULE_LICENSE("GPL");
2505*4882a593Smuzhiyun
2506*4882a593Smuzhiyun module_param(nbds_max, int, 0444);
2507*4882a593Smuzhiyun MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
2508*4882a593Smuzhiyun module_param(max_part, int, 0444);
2509*4882a593Smuzhiyun MODULE_PARM_DESC(max_part, "number of partitions per device (default: 16)");
2510