xref: /OK3568_Linux_fs/kernel/drivers/nvme/target/core.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Common code for the NVMe target.
4*4882a593Smuzhiyun  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5*4882a593Smuzhiyun  */
6*4882a593Smuzhiyun #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7*4882a593Smuzhiyun #include <linux/module.h>
8*4882a593Smuzhiyun #include <linux/random.h>
9*4882a593Smuzhiyun #include <linux/rculist.h>
10*4882a593Smuzhiyun #include <linux/pci-p2pdma.h>
11*4882a593Smuzhiyun #include <linux/scatterlist.h>
12*4882a593Smuzhiyun 
13*4882a593Smuzhiyun #define CREATE_TRACE_POINTS
14*4882a593Smuzhiyun #include "trace.h"
15*4882a593Smuzhiyun 
16*4882a593Smuzhiyun #include "nvmet.h"
17*4882a593Smuzhiyun 
18*4882a593Smuzhiyun struct workqueue_struct *buffered_io_wq;
19*4882a593Smuzhiyun static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
20*4882a593Smuzhiyun static DEFINE_IDA(cntlid_ida);
21*4882a593Smuzhiyun 
22*4882a593Smuzhiyun /*
23*4882a593Smuzhiyun  * This read/write semaphore is used to synchronize access to configuration
24*4882a593Smuzhiyun  * information on a target system that will result in discovery log page
25*4882a593Smuzhiyun  * information change for at least one host.
26*4882a593Smuzhiyun  * The full list of resources to protected by this semaphore is:
27*4882a593Smuzhiyun  *
28*4882a593Smuzhiyun  *  - subsystems list
29*4882a593Smuzhiyun  *  - per-subsystem allowed hosts list
30*4882a593Smuzhiyun  *  - allow_any_host subsystem attribute
31*4882a593Smuzhiyun  *  - nvmet_genctr
32*4882a593Smuzhiyun  *  - the nvmet_transports array
33*4882a593Smuzhiyun  *
34*4882a593Smuzhiyun  * When updating any of those lists/structures write lock should be obtained,
35*4882a593Smuzhiyun  * while when reading (popolating discovery log page or checking host-subsystem
36*4882a593Smuzhiyun  * link) read lock is obtained to allow concurrent reads.
37*4882a593Smuzhiyun  */
38*4882a593Smuzhiyun DECLARE_RWSEM(nvmet_config_sem);
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
41*4882a593Smuzhiyun u64 nvmet_ana_chgcnt;
42*4882a593Smuzhiyun DECLARE_RWSEM(nvmet_ana_sem);
43*4882a593Smuzhiyun 
errno_to_nvme_status(struct nvmet_req * req,int errno)44*4882a593Smuzhiyun inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno)
45*4882a593Smuzhiyun {
46*4882a593Smuzhiyun 	u16 status;
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun 	switch (errno) {
49*4882a593Smuzhiyun 	case 0:
50*4882a593Smuzhiyun 		status = NVME_SC_SUCCESS;
51*4882a593Smuzhiyun 		break;
52*4882a593Smuzhiyun 	case -ENOSPC:
53*4882a593Smuzhiyun 		req->error_loc = offsetof(struct nvme_rw_command, length);
54*4882a593Smuzhiyun 		status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
55*4882a593Smuzhiyun 		break;
56*4882a593Smuzhiyun 	case -EREMOTEIO:
57*4882a593Smuzhiyun 		req->error_loc = offsetof(struct nvme_rw_command, slba);
58*4882a593Smuzhiyun 		status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
59*4882a593Smuzhiyun 		break;
60*4882a593Smuzhiyun 	case -EOPNOTSUPP:
61*4882a593Smuzhiyun 		req->error_loc = offsetof(struct nvme_common_command, opcode);
62*4882a593Smuzhiyun 		switch (req->cmd->common.opcode) {
63*4882a593Smuzhiyun 		case nvme_cmd_dsm:
64*4882a593Smuzhiyun 		case nvme_cmd_write_zeroes:
65*4882a593Smuzhiyun 			status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
66*4882a593Smuzhiyun 			break;
67*4882a593Smuzhiyun 		default:
68*4882a593Smuzhiyun 			status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
69*4882a593Smuzhiyun 		}
70*4882a593Smuzhiyun 		break;
71*4882a593Smuzhiyun 	case -ENODATA:
72*4882a593Smuzhiyun 		req->error_loc = offsetof(struct nvme_rw_command, nsid);
73*4882a593Smuzhiyun 		status = NVME_SC_ACCESS_DENIED;
74*4882a593Smuzhiyun 		break;
75*4882a593Smuzhiyun 	case -EIO:
76*4882a593Smuzhiyun 		fallthrough;
77*4882a593Smuzhiyun 	default:
78*4882a593Smuzhiyun 		req->error_loc = offsetof(struct nvme_common_command, opcode);
79*4882a593Smuzhiyun 		status = NVME_SC_INTERNAL | NVME_SC_DNR;
80*4882a593Smuzhiyun 	}
81*4882a593Smuzhiyun 
82*4882a593Smuzhiyun 	return status;
83*4882a593Smuzhiyun }
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
86*4882a593Smuzhiyun 		const char *subsysnqn);
87*4882a593Smuzhiyun 
nvmet_copy_to_sgl(struct nvmet_req * req,off_t off,const void * buf,size_t len)88*4882a593Smuzhiyun u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
89*4882a593Smuzhiyun 		size_t len)
90*4882a593Smuzhiyun {
91*4882a593Smuzhiyun 	if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
92*4882a593Smuzhiyun 		req->error_loc = offsetof(struct nvme_common_command, dptr);
93*4882a593Smuzhiyun 		return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
94*4882a593Smuzhiyun 	}
95*4882a593Smuzhiyun 	return 0;
96*4882a593Smuzhiyun }
97*4882a593Smuzhiyun 
nvmet_copy_from_sgl(struct nvmet_req * req,off_t off,void * buf,size_t len)98*4882a593Smuzhiyun u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
99*4882a593Smuzhiyun {
100*4882a593Smuzhiyun 	if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
101*4882a593Smuzhiyun 		req->error_loc = offsetof(struct nvme_common_command, dptr);
102*4882a593Smuzhiyun 		return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
103*4882a593Smuzhiyun 	}
104*4882a593Smuzhiyun 	return 0;
105*4882a593Smuzhiyun }
106*4882a593Smuzhiyun 
nvmet_zero_sgl(struct nvmet_req * req,off_t off,size_t len)107*4882a593Smuzhiyun u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
108*4882a593Smuzhiyun {
109*4882a593Smuzhiyun 	if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) {
110*4882a593Smuzhiyun 		req->error_loc = offsetof(struct nvme_common_command, dptr);
111*4882a593Smuzhiyun 		return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
112*4882a593Smuzhiyun 	}
113*4882a593Smuzhiyun 	return 0;
114*4882a593Smuzhiyun }
115*4882a593Smuzhiyun 
nvmet_max_nsid(struct nvmet_subsys * subsys)116*4882a593Smuzhiyun static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys)
117*4882a593Smuzhiyun {
118*4882a593Smuzhiyun 	unsigned long nsid = 0;
119*4882a593Smuzhiyun 	struct nvmet_ns *cur;
120*4882a593Smuzhiyun 	unsigned long idx;
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun 	xa_for_each(&subsys->namespaces, idx, cur)
123*4882a593Smuzhiyun 		nsid = cur->nsid;
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun 	return nsid;
126*4882a593Smuzhiyun }
127*4882a593Smuzhiyun 
nvmet_async_event_result(struct nvmet_async_event * aen)128*4882a593Smuzhiyun static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
129*4882a593Smuzhiyun {
130*4882a593Smuzhiyun 	return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
131*4882a593Smuzhiyun }
132*4882a593Smuzhiyun 
nvmet_async_events_failall(struct nvmet_ctrl * ctrl)133*4882a593Smuzhiyun static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl)
134*4882a593Smuzhiyun {
135*4882a593Smuzhiyun 	u16 status = NVME_SC_INTERNAL | NVME_SC_DNR;
136*4882a593Smuzhiyun 	struct nvmet_req *req;
137*4882a593Smuzhiyun 
138*4882a593Smuzhiyun 	mutex_lock(&ctrl->lock);
139*4882a593Smuzhiyun 	while (ctrl->nr_async_event_cmds) {
140*4882a593Smuzhiyun 		req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
141*4882a593Smuzhiyun 		mutex_unlock(&ctrl->lock);
142*4882a593Smuzhiyun 		nvmet_req_complete(req, status);
143*4882a593Smuzhiyun 		mutex_lock(&ctrl->lock);
144*4882a593Smuzhiyun 	}
145*4882a593Smuzhiyun 	mutex_unlock(&ctrl->lock);
146*4882a593Smuzhiyun }
147*4882a593Smuzhiyun 
nvmet_async_events_process(struct nvmet_ctrl * ctrl)148*4882a593Smuzhiyun static void nvmet_async_events_process(struct nvmet_ctrl *ctrl)
149*4882a593Smuzhiyun {
150*4882a593Smuzhiyun 	struct nvmet_async_event *aen;
151*4882a593Smuzhiyun 	struct nvmet_req *req;
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun 	mutex_lock(&ctrl->lock);
154*4882a593Smuzhiyun 	while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) {
155*4882a593Smuzhiyun 		aen = list_first_entry(&ctrl->async_events,
156*4882a593Smuzhiyun 				       struct nvmet_async_event, entry);
157*4882a593Smuzhiyun 		req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
158*4882a593Smuzhiyun 		nvmet_set_result(req, nvmet_async_event_result(aen));
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun 		list_del(&aen->entry);
161*4882a593Smuzhiyun 		kfree(aen);
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun 		mutex_unlock(&ctrl->lock);
164*4882a593Smuzhiyun 		trace_nvmet_async_event(ctrl, req->cqe->result.u32);
165*4882a593Smuzhiyun 		nvmet_req_complete(req, 0);
166*4882a593Smuzhiyun 		mutex_lock(&ctrl->lock);
167*4882a593Smuzhiyun 	}
168*4882a593Smuzhiyun 	mutex_unlock(&ctrl->lock);
169*4882a593Smuzhiyun }
170*4882a593Smuzhiyun 
nvmet_async_events_free(struct nvmet_ctrl * ctrl)171*4882a593Smuzhiyun static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
172*4882a593Smuzhiyun {
173*4882a593Smuzhiyun 	struct nvmet_async_event *aen, *tmp;
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 	mutex_lock(&ctrl->lock);
176*4882a593Smuzhiyun 	list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) {
177*4882a593Smuzhiyun 		list_del(&aen->entry);
178*4882a593Smuzhiyun 		kfree(aen);
179*4882a593Smuzhiyun 	}
180*4882a593Smuzhiyun 	mutex_unlock(&ctrl->lock);
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun 
nvmet_async_event_work(struct work_struct * work)183*4882a593Smuzhiyun static void nvmet_async_event_work(struct work_struct *work)
184*4882a593Smuzhiyun {
185*4882a593Smuzhiyun 	struct nvmet_ctrl *ctrl =
186*4882a593Smuzhiyun 		container_of(work, struct nvmet_ctrl, async_event_work);
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun 	nvmet_async_events_process(ctrl);
189*4882a593Smuzhiyun }
190*4882a593Smuzhiyun 
nvmet_add_async_event(struct nvmet_ctrl * ctrl,u8 event_type,u8 event_info,u8 log_page)191*4882a593Smuzhiyun void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
192*4882a593Smuzhiyun 		u8 event_info, u8 log_page)
193*4882a593Smuzhiyun {
194*4882a593Smuzhiyun 	struct nvmet_async_event *aen;
195*4882a593Smuzhiyun 
196*4882a593Smuzhiyun 	aen = kmalloc(sizeof(*aen), GFP_KERNEL);
197*4882a593Smuzhiyun 	if (!aen)
198*4882a593Smuzhiyun 		return;
199*4882a593Smuzhiyun 
200*4882a593Smuzhiyun 	aen->event_type = event_type;
201*4882a593Smuzhiyun 	aen->event_info = event_info;
202*4882a593Smuzhiyun 	aen->log_page = log_page;
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun 	mutex_lock(&ctrl->lock);
205*4882a593Smuzhiyun 	list_add_tail(&aen->entry, &ctrl->async_events);
206*4882a593Smuzhiyun 	mutex_unlock(&ctrl->lock);
207*4882a593Smuzhiyun 
208*4882a593Smuzhiyun 	schedule_work(&ctrl->async_event_work);
209*4882a593Smuzhiyun }
210*4882a593Smuzhiyun 
nvmet_add_to_changed_ns_log(struct nvmet_ctrl * ctrl,__le32 nsid)211*4882a593Smuzhiyun static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid)
212*4882a593Smuzhiyun {
213*4882a593Smuzhiyun 	u32 i;
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun 	mutex_lock(&ctrl->lock);
216*4882a593Smuzhiyun 	if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES)
217*4882a593Smuzhiyun 		goto out_unlock;
218*4882a593Smuzhiyun 
219*4882a593Smuzhiyun 	for (i = 0; i < ctrl->nr_changed_ns; i++) {
220*4882a593Smuzhiyun 		if (ctrl->changed_ns_list[i] == nsid)
221*4882a593Smuzhiyun 			goto out_unlock;
222*4882a593Smuzhiyun 	}
223*4882a593Smuzhiyun 
224*4882a593Smuzhiyun 	if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) {
225*4882a593Smuzhiyun 		ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff);
226*4882a593Smuzhiyun 		ctrl->nr_changed_ns = U32_MAX;
227*4882a593Smuzhiyun 		goto out_unlock;
228*4882a593Smuzhiyun 	}
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 	ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid;
231*4882a593Smuzhiyun out_unlock:
232*4882a593Smuzhiyun 	mutex_unlock(&ctrl->lock);
233*4882a593Smuzhiyun }
234*4882a593Smuzhiyun 
nvmet_ns_changed(struct nvmet_subsys * subsys,u32 nsid)235*4882a593Smuzhiyun void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
236*4882a593Smuzhiyun {
237*4882a593Smuzhiyun 	struct nvmet_ctrl *ctrl;
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 	lockdep_assert_held(&subsys->lock);
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
242*4882a593Smuzhiyun 		nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid));
243*4882a593Smuzhiyun 		if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR))
244*4882a593Smuzhiyun 			continue;
245*4882a593Smuzhiyun 		nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
246*4882a593Smuzhiyun 				NVME_AER_NOTICE_NS_CHANGED,
247*4882a593Smuzhiyun 				NVME_LOG_CHANGED_NS);
248*4882a593Smuzhiyun 	}
249*4882a593Smuzhiyun }
250*4882a593Smuzhiyun 
nvmet_send_ana_event(struct nvmet_subsys * subsys,struct nvmet_port * port)251*4882a593Smuzhiyun void nvmet_send_ana_event(struct nvmet_subsys *subsys,
252*4882a593Smuzhiyun 		struct nvmet_port *port)
253*4882a593Smuzhiyun {
254*4882a593Smuzhiyun 	struct nvmet_ctrl *ctrl;
255*4882a593Smuzhiyun 
256*4882a593Smuzhiyun 	mutex_lock(&subsys->lock);
257*4882a593Smuzhiyun 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
258*4882a593Smuzhiyun 		if (port && ctrl->port != port)
259*4882a593Smuzhiyun 			continue;
260*4882a593Smuzhiyun 		if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE))
261*4882a593Smuzhiyun 			continue;
262*4882a593Smuzhiyun 		nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
263*4882a593Smuzhiyun 				NVME_AER_NOTICE_ANA, NVME_LOG_ANA);
264*4882a593Smuzhiyun 	}
265*4882a593Smuzhiyun 	mutex_unlock(&subsys->lock);
266*4882a593Smuzhiyun }
267*4882a593Smuzhiyun 
nvmet_port_send_ana_event(struct nvmet_port * port)268*4882a593Smuzhiyun void nvmet_port_send_ana_event(struct nvmet_port *port)
269*4882a593Smuzhiyun {
270*4882a593Smuzhiyun 	struct nvmet_subsys_link *p;
271*4882a593Smuzhiyun 
272*4882a593Smuzhiyun 	down_read(&nvmet_config_sem);
273*4882a593Smuzhiyun 	list_for_each_entry(p, &port->subsystems, entry)
274*4882a593Smuzhiyun 		nvmet_send_ana_event(p->subsys, port);
275*4882a593Smuzhiyun 	up_read(&nvmet_config_sem);
276*4882a593Smuzhiyun }
277*4882a593Smuzhiyun 
nvmet_register_transport(const struct nvmet_fabrics_ops * ops)278*4882a593Smuzhiyun int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
279*4882a593Smuzhiyun {
280*4882a593Smuzhiyun 	int ret = 0;
281*4882a593Smuzhiyun 
282*4882a593Smuzhiyun 	down_write(&nvmet_config_sem);
283*4882a593Smuzhiyun 	if (nvmet_transports[ops->type])
284*4882a593Smuzhiyun 		ret = -EINVAL;
285*4882a593Smuzhiyun 	else
286*4882a593Smuzhiyun 		nvmet_transports[ops->type] = ops;
287*4882a593Smuzhiyun 	up_write(&nvmet_config_sem);
288*4882a593Smuzhiyun 
289*4882a593Smuzhiyun 	return ret;
290*4882a593Smuzhiyun }
291*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(nvmet_register_transport);
292*4882a593Smuzhiyun 
nvmet_unregister_transport(const struct nvmet_fabrics_ops * ops)293*4882a593Smuzhiyun void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops)
294*4882a593Smuzhiyun {
295*4882a593Smuzhiyun 	down_write(&nvmet_config_sem);
296*4882a593Smuzhiyun 	nvmet_transports[ops->type] = NULL;
297*4882a593Smuzhiyun 	up_write(&nvmet_config_sem);
298*4882a593Smuzhiyun }
299*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
300*4882a593Smuzhiyun 
nvmet_port_del_ctrls(struct nvmet_port * port,struct nvmet_subsys * subsys)301*4882a593Smuzhiyun void nvmet_port_del_ctrls(struct nvmet_port *port, struct nvmet_subsys *subsys)
302*4882a593Smuzhiyun {
303*4882a593Smuzhiyun 	struct nvmet_ctrl *ctrl;
304*4882a593Smuzhiyun 
305*4882a593Smuzhiyun 	mutex_lock(&subsys->lock);
306*4882a593Smuzhiyun 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
307*4882a593Smuzhiyun 		if (ctrl->port == port)
308*4882a593Smuzhiyun 			ctrl->ops->delete_ctrl(ctrl);
309*4882a593Smuzhiyun 	}
310*4882a593Smuzhiyun 	mutex_unlock(&subsys->lock);
311*4882a593Smuzhiyun }
312*4882a593Smuzhiyun 
nvmet_enable_port(struct nvmet_port * port)313*4882a593Smuzhiyun int nvmet_enable_port(struct nvmet_port *port)
314*4882a593Smuzhiyun {
315*4882a593Smuzhiyun 	const struct nvmet_fabrics_ops *ops;
316*4882a593Smuzhiyun 	int ret;
317*4882a593Smuzhiyun 
318*4882a593Smuzhiyun 	lockdep_assert_held(&nvmet_config_sem);
319*4882a593Smuzhiyun 
320*4882a593Smuzhiyun 	ops = nvmet_transports[port->disc_addr.trtype];
321*4882a593Smuzhiyun 	if (!ops) {
322*4882a593Smuzhiyun 		up_write(&nvmet_config_sem);
323*4882a593Smuzhiyun 		request_module("nvmet-transport-%d", port->disc_addr.trtype);
324*4882a593Smuzhiyun 		down_write(&nvmet_config_sem);
325*4882a593Smuzhiyun 		ops = nvmet_transports[port->disc_addr.trtype];
326*4882a593Smuzhiyun 		if (!ops) {
327*4882a593Smuzhiyun 			pr_err("transport type %d not supported\n",
328*4882a593Smuzhiyun 				port->disc_addr.trtype);
329*4882a593Smuzhiyun 			return -EINVAL;
330*4882a593Smuzhiyun 		}
331*4882a593Smuzhiyun 	}
332*4882a593Smuzhiyun 
333*4882a593Smuzhiyun 	if (!try_module_get(ops->owner))
334*4882a593Smuzhiyun 		return -EINVAL;
335*4882a593Smuzhiyun 
336*4882a593Smuzhiyun 	/*
337*4882a593Smuzhiyun 	 * If the user requested PI support and the transport isn't pi capable,
338*4882a593Smuzhiyun 	 * don't enable the port.
339*4882a593Smuzhiyun 	 */
340*4882a593Smuzhiyun 	if (port->pi_enable && !(ops->flags & NVMF_METADATA_SUPPORTED)) {
341*4882a593Smuzhiyun 		pr_err("T10-PI is not supported by transport type %d\n",
342*4882a593Smuzhiyun 		       port->disc_addr.trtype);
343*4882a593Smuzhiyun 		ret = -EINVAL;
344*4882a593Smuzhiyun 		goto out_put;
345*4882a593Smuzhiyun 	}
346*4882a593Smuzhiyun 
347*4882a593Smuzhiyun 	ret = ops->add_port(port);
348*4882a593Smuzhiyun 	if (ret)
349*4882a593Smuzhiyun 		goto out_put;
350*4882a593Smuzhiyun 
351*4882a593Smuzhiyun 	/* If the transport didn't set inline_data_size, then disable it. */
352*4882a593Smuzhiyun 	if (port->inline_data_size < 0)
353*4882a593Smuzhiyun 		port->inline_data_size = 0;
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun 	port->enabled = true;
356*4882a593Smuzhiyun 	port->tr_ops = ops;
357*4882a593Smuzhiyun 	return 0;
358*4882a593Smuzhiyun 
359*4882a593Smuzhiyun out_put:
360*4882a593Smuzhiyun 	module_put(ops->owner);
361*4882a593Smuzhiyun 	return ret;
362*4882a593Smuzhiyun }
363*4882a593Smuzhiyun 
nvmet_disable_port(struct nvmet_port * port)364*4882a593Smuzhiyun void nvmet_disable_port(struct nvmet_port *port)
365*4882a593Smuzhiyun {
366*4882a593Smuzhiyun 	const struct nvmet_fabrics_ops *ops;
367*4882a593Smuzhiyun 
368*4882a593Smuzhiyun 	lockdep_assert_held(&nvmet_config_sem);
369*4882a593Smuzhiyun 
370*4882a593Smuzhiyun 	port->enabled = false;
371*4882a593Smuzhiyun 	port->tr_ops = NULL;
372*4882a593Smuzhiyun 
373*4882a593Smuzhiyun 	ops = nvmet_transports[port->disc_addr.trtype];
374*4882a593Smuzhiyun 	ops->remove_port(port);
375*4882a593Smuzhiyun 	module_put(ops->owner);
376*4882a593Smuzhiyun }
377*4882a593Smuzhiyun 
nvmet_keep_alive_timer(struct work_struct * work)378*4882a593Smuzhiyun static void nvmet_keep_alive_timer(struct work_struct *work)
379*4882a593Smuzhiyun {
380*4882a593Smuzhiyun 	struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
381*4882a593Smuzhiyun 			struct nvmet_ctrl, ka_work);
382*4882a593Smuzhiyun 	bool reset_tbkas = ctrl->reset_tbkas;
383*4882a593Smuzhiyun 
384*4882a593Smuzhiyun 	ctrl->reset_tbkas = false;
385*4882a593Smuzhiyun 	if (reset_tbkas) {
386*4882a593Smuzhiyun 		pr_debug("ctrl %d reschedule traffic based keep-alive timer\n",
387*4882a593Smuzhiyun 			ctrl->cntlid);
388*4882a593Smuzhiyun 		schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
389*4882a593Smuzhiyun 		return;
390*4882a593Smuzhiyun 	}
391*4882a593Smuzhiyun 
392*4882a593Smuzhiyun 	pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
393*4882a593Smuzhiyun 		ctrl->cntlid, ctrl->kato);
394*4882a593Smuzhiyun 
395*4882a593Smuzhiyun 	nvmet_ctrl_fatal_error(ctrl);
396*4882a593Smuzhiyun }
397*4882a593Smuzhiyun 
nvmet_start_keep_alive_timer(struct nvmet_ctrl * ctrl)398*4882a593Smuzhiyun void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
399*4882a593Smuzhiyun {
400*4882a593Smuzhiyun 	if (unlikely(ctrl->kato == 0))
401*4882a593Smuzhiyun 		return;
402*4882a593Smuzhiyun 
403*4882a593Smuzhiyun 	pr_debug("ctrl %d start keep-alive timer for %d secs\n",
404*4882a593Smuzhiyun 		ctrl->cntlid, ctrl->kato);
405*4882a593Smuzhiyun 
406*4882a593Smuzhiyun 	INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
407*4882a593Smuzhiyun 	schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
408*4882a593Smuzhiyun }
409*4882a593Smuzhiyun 
nvmet_stop_keep_alive_timer(struct nvmet_ctrl * ctrl)410*4882a593Smuzhiyun void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
411*4882a593Smuzhiyun {
412*4882a593Smuzhiyun 	if (unlikely(ctrl->kato == 0))
413*4882a593Smuzhiyun 		return;
414*4882a593Smuzhiyun 
415*4882a593Smuzhiyun 	pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
416*4882a593Smuzhiyun 
417*4882a593Smuzhiyun 	cancel_delayed_work_sync(&ctrl->ka_work);
418*4882a593Smuzhiyun }
419*4882a593Smuzhiyun 
nvmet_find_namespace(struct nvmet_ctrl * ctrl,__le32 nsid)420*4882a593Smuzhiyun struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
421*4882a593Smuzhiyun {
422*4882a593Smuzhiyun 	struct nvmet_ns *ns;
423*4882a593Smuzhiyun 
424*4882a593Smuzhiyun 	ns = xa_load(&ctrl->subsys->namespaces, le32_to_cpu(nsid));
425*4882a593Smuzhiyun 	if (ns)
426*4882a593Smuzhiyun 		percpu_ref_get(&ns->ref);
427*4882a593Smuzhiyun 
428*4882a593Smuzhiyun 	return ns;
429*4882a593Smuzhiyun }
430*4882a593Smuzhiyun 
nvmet_destroy_namespace(struct percpu_ref * ref)431*4882a593Smuzhiyun static void nvmet_destroy_namespace(struct percpu_ref *ref)
432*4882a593Smuzhiyun {
433*4882a593Smuzhiyun 	struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
434*4882a593Smuzhiyun 
435*4882a593Smuzhiyun 	complete(&ns->disable_done);
436*4882a593Smuzhiyun }
437*4882a593Smuzhiyun 
nvmet_put_namespace(struct nvmet_ns * ns)438*4882a593Smuzhiyun void nvmet_put_namespace(struct nvmet_ns *ns)
439*4882a593Smuzhiyun {
440*4882a593Smuzhiyun 	percpu_ref_put(&ns->ref);
441*4882a593Smuzhiyun }
442*4882a593Smuzhiyun 
nvmet_ns_dev_disable(struct nvmet_ns * ns)443*4882a593Smuzhiyun static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
444*4882a593Smuzhiyun {
445*4882a593Smuzhiyun 	nvmet_bdev_ns_disable(ns);
446*4882a593Smuzhiyun 	nvmet_file_ns_disable(ns);
447*4882a593Smuzhiyun }
448*4882a593Smuzhiyun 
nvmet_p2pmem_ns_enable(struct nvmet_ns * ns)449*4882a593Smuzhiyun static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns)
450*4882a593Smuzhiyun {
451*4882a593Smuzhiyun 	int ret;
452*4882a593Smuzhiyun 	struct pci_dev *p2p_dev;
453*4882a593Smuzhiyun 
454*4882a593Smuzhiyun 	if (!ns->use_p2pmem)
455*4882a593Smuzhiyun 		return 0;
456*4882a593Smuzhiyun 
457*4882a593Smuzhiyun 	if (!ns->bdev) {
458*4882a593Smuzhiyun 		pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n");
459*4882a593Smuzhiyun 		return -EINVAL;
460*4882a593Smuzhiyun 	}
461*4882a593Smuzhiyun 
462*4882a593Smuzhiyun 	if (!blk_queue_pci_p2pdma(ns->bdev->bd_disk->queue)) {
463*4882a593Smuzhiyun 		pr_err("peer-to-peer DMA is not supported by the driver of %s\n",
464*4882a593Smuzhiyun 		       ns->device_path);
465*4882a593Smuzhiyun 		return -EINVAL;
466*4882a593Smuzhiyun 	}
467*4882a593Smuzhiyun 
468*4882a593Smuzhiyun 	if (ns->p2p_dev) {
469*4882a593Smuzhiyun 		ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true);
470*4882a593Smuzhiyun 		if (ret < 0)
471*4882a593Smuzhiyun 			return -EINVAL;
472*4882a593Smuzhiyun 	} else {
473*4882a593Smuzhiyun 		/*
474*4882a593Smuzhiyun 		 * Right now we just check that there is p2pmem available so
475*4882a593Smuzhiyun 		 * we can report an error to the user right away if there
476*4882a593Smuzhiyun 		 * is not. We'll find the actual device to use once we
477*4882a593Smuzhiyun 		 * setup the controller when the port's device is available.
478*4882a593Smuzhiyun 		 */
479*4882a593Smuzhiyun 
480*4882a593Smuzhiyun 		p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns));
481*4882a593Smuzhiyun 		if (!p2p_dev) {
482*4882a593Smuzhiyun 			pr_err("no peer-to-peer memory is available for %s\n",
483*4882a593Smuzhiyun 			       ns->device_path);
484*4882a593Smuzhiyun 			return -EINVAL;
485*4882a593Smuzhiyun 		}
486*4882a593Smuzhiyun 
487*4882a593Smuzhiyun 		pci_dev_put(p2p_dev);
488*4882a593Smuzhiyun 	}
489*4882a593Smuzhiyun 
490*4882a593Smuzhiyun 	return 0;
491*4882a593Smuzhiyun }
492*4882a593Smuzhiyun 
493*4882a593Smuzhiyun /*
494*4882a593Smuzhiyun  * Note: ctrl->subsys->lock should be held when calling this function
495*4882a593Smuzhiyun  */
nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl * ctrl,struct nvmet_ns * ns)496*4882a593Smuzhiyun static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
497*4882a593Smuzhiyun 				    struct nvmet_ns *ns)
498*4882a593Smuzhiyun {
499*4882a593Smuzhiyun 	struct device *clients[2];
500*4882a593Smuzhiyun 	struct pci_dev *p2p_dev;
501*4882a593Smuzhiyun 	int ret;
502*4882a593Smuzhiyun 
503*4882a593Smuzhiyun 	if (!ctrl->p2p_client || !ns->use_p2pmem)
504*4882a593Smuzhiyun 		return;
505*4882a593Smuzhiyun 
506*4882a593Smuzhiyun 	if (ns->p2p_dev) {
507*4882a593Smuzhiyun 		ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true);
508*4882a593Smuzhiyun 		if (ret < 0)
509*4882a593Smuzhiyun 			return;
510*4882a593Smuzhiyun 
511*4882a593Smuzhiyun 		p2p_dev = pci_dev_get(ns->p2p_dev);
512*4882a593Smuzhiyun 	} else {
513*4882a593Smuzhiyun 		clients[0] = ctrl->p2p_client;
514*4882a593Smuzhiyun 		clients[1] = nvmet_ns_dev(ns);
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 		p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients));
517*4882a593Smuzhiyun 		if (!p2p_dev) {
518*4882a593Smuzhiyun 			pr_err("no peer-to-peer memory is available that's supported by %s and %s\n",
519*4882a593Smuzhiyun 			       dev_name(ctrl->p2p_client), ns->device_path);
520*4882a593Smuzhiyun 			return;
521*4882a593Smuzhiyun 		}
522*4882a593Smuzhiyun 	}
523*4882a593Smuzhiyun 
524*4882a593Smuzhiyun 	ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev);
525*4882a593Smuzhiyun 	if (ret < 0)
526*4882a593Smuzhiyun 		pci_dev_put(p2p_dev);
527*4882a593Smuzhiyun 
528*4882a593Smuzhiyun 	pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev),
529*4882a593Smuzhiyun 		ns->nsid);
530*4882a593Smuzhiyun }
531*4882a593Smuzhiyun 
nvmet_ns_revalidate(struct nvmet_ns * ns)532*4882a593Smuzhiyun void nvmet_ns_revalidate(struct nvmet_ns *ns)
533*4882a593Smuzhiyun {
534*4882a593Smuzhiyun 	loff_t oldsize = ns->size;
535*4882a593Smuzhiyun 
536*4882a593Smuzhiyun 	if (ns->bdev)
537*4882a593Smuzhiyun 		nvmet_bdev_ns_revalidate(ns);
538*4882a593Smuzhiyun 	else
539*4882a593Smuzhiyun 		nvmet_file_ns_revalidate(ns);
540*4882a593Smuzhiyun 
541*4882a593Smuzhiyun 	if (oldsize != ns->size)
542*4882a593Smuzhiyun 		nvmet_ns_changed(ns->subsys, ns->nsid);
543*4882a593Smuzhiyun }
544*4882a593Smuzhiyun 
nvmet_ns_enable(struct nvmet_ns * ns)545*4882a593Smuzhiyun int nvmet_ns_enable(struct nvmet_ns *ns)
546*4882a593Smuzhiyun {
547*4882a593Smuzhiyun 	struct nvmet_subsys *subsys = ns->subsys;
548*4882a593Smuzhiyun 	struct nvmet_ctrl *ctrl;
549*4882a593Smuzhiyun 	int ret;
550*4882a593Smuzhiyun 
551*4882a593Smuzhiyun 	mutex_lock(&subsys->lock);
552*4882a593Smuzhiyun 	ret = 0;
553*4882a593Smuzhiyun 
554*4882a593Smuzhiyun 	if (nvmet_passthru_ctrl(subsys)) {
555*4882a593Smuzhiyun 		pr_info("cannot enable both passthru and regular namespaces for a single subsystem");
556*4882a593Smuzhiyun 		goto out_unlock;
557*4882a593Smuzhiyun 	}
558*4882a593Smuzhiyun 
559*4882a593Smuzhiyun 	if (ns->enabled)
560*4882a593Smuzhiyun 		goto out_unlock;
561*4882a593Smuzhiyun 
562*4882a593Smuzhiyun 	ret = -EMFILE;
563*4882a593Smuzhiyun 	if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
564*4882a593Smuzhiyun 		goto out_unlock;
565*4882a593Smuzhiyun 
566*4882a593Smuzhiyun 	ret = nvmet_bdev_ns_enable(ns);
567*4882a593Smuzhiyun 	if (ret == -ENOTBLK)
568*4882a593Smuzhiyun 		ret = nvmet_file_ns_enable(ns);
569*4882a593Smuzhiyun 	if (ret)
570*4882a593Smuzhiyun 		goto out_unlock;
571*4882a593Smuzhiyun 
572*4882a593Smuzhiyun 	ret = nvmet_p2pmem_ns_enable(ns);
573*4882a593Smuzhiyun 	if (ret)
574*4882a593Smuzhiyun 		goto out_dev_disable;
575*4882a593Smuzhiyun 
576*4882a593Smuzhiyun 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
577*4882a593Smuzhiyun 		nvmet_p2pmem_ns_add_p2p(ctrl, ns);
578*4882a593Smuzhiyun 
579*4882a593Smuzhiyun 	ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
580*4882a593Smuzhiyun 				0, GFP_KERNEL);
581*4882a593Smuzhiyun 	if (ret)
582*4882a593Smuzhiyun 		goto out_dev_put;
583*4882a593Smuzhiyun 
584*4882a593Smuzhiyun 	if (ns->nsid > subsys->max_nsid)
585*4882a593Smuzhiyun 		subsys->max_nsid = ns->nsid;
586*4882a593Smuzhiyun 
587*4882a593Smuzhiyun 	ret = xa_insert(&subsys->namespaces, ns->nsid, ns, GFP_KERNEL);
588*4882a593Smuzhiyun 	if (ret)
589*4882a593Smuzhiyun 		goto out_restore_subsys_maxnsid;
590*4882a593Smuzhiyun 
591*4882a593Smuzhiyun 	subsys->nr_namespaces++;
592*4882a593Smuzhiyun 
593*4882a593Smuzhiyun 	nvmet_ns_changed(subsys, ns->nsid);
594*4882a593Smuzhiyun 	ns->enabled = true;
595*4882a593Smuzhiyun 	ret = 0;
596*4882a593Smuzhiyun out_unlock:
597*4882a593Smuzhiyun 	mutex_unlock(&subsys->lock);
598*4882a593Smuzhiyun 	return ret;
599*4882a593Smuzhiyun 
600*4882a593Smuzhiyun out_restore_subsys_maxnsid:
601*4882a593Smuzhiyun 	subsys->max_nsid = nvmet_max_nsid(subsys);
602*4882a593Smuzhiyun 	percpu_ref_exit(&ns->ref);
603*4882a593Smuzhiyun out_dev_put:
604*4882a593Smuzhiyun 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
605*4882a593Smuzhiyun 		pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
606*4882a593Smuzhiyun out_dev_disable:
607*4882a593Smuzhiyun 	nvmet_ns_dev_disable(ns);
608*4882a593Smuzhiyun 	goto out_unlock;
609*4882a593Smuzhiyun }
610*4882a593Smuzhiyun 
nvmet_ns_disable(struct nvmet_ns * ns)611*4882a593Smuzhiyun void nvmet_ns_disable(struct nvmet_ns *ns)
612*4882a593Smuzhiyun {
613*4882a593Smuzhiyun 	struct nvmet_subsys *subsys = ns->subsys;
614*4882a593Smuzhiyun 	struct nvmet_ctrl *ctrl;
615*4882a593Smuzhiyun 
616*4882a593Smuzhiyun 	mutex_lock(&subsys->lock);
617*4882a593Smuzhiyun 	if (!ns->enabled)
618*4882a593Smuzhiyun 		goto out_unlock;
619*4882a593Smuzhiyun 
620*4882a593Smuzhiyun 	ns->enabled = false;
621*4882a593Smuzhiyun 	xa_erase(&ns->subsys->namespaces, ns->nsid);
622*4882a593Smuzhiyun 	if (ns->nsid == subsys->max_nsid)
623*4882a593Smuzhiyun 		subsys->max_nsid = nvmet_max_nsid(subsys);
624*4882a593Smuzhiyun 
625*4882a593Smuzhiyun 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
626*4882a593Smuzhiyun 		pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
627*4882a593Smuzhiyun 
628*4882a593Smuzhiyun 	mutex_unlock(&subsys->lock);
629*4882a593Smuzhiyun 
630*4882a593Smuzhiyun 	/*
631*4882a593Smuzhiyun 	 * Now that we removed the namespaces from the lookup list, we
632*4882a593Smuzhiyun 	 * can kill the per_cpu ref and wait for any remaining references
633*4882a593Smuzhiyun 	 * to be dropped, as well as a RCU grace period for anyone only
634*4882a593Smuzhiyun 	 * using the namepace under rcu_read_lock().  Note that we can't
635*4882a593Smuzhiyun 	 * use call_rcu here as we need to ensure the namespaces have
636*4882a593Smuzhiyun 	 * been fully destroyed before unloading the module.
637*4882a593Smuzhiyun 	 */
638*4882a593Smuzhiyun 	percpu_ref_kill(&ns->ref);
639*4882a593Smuzhiyun 	synchronize_rcu();
640*4882a593Smuzhiyun 	wait_for_completion(&ns->disable_done);
641*4882a593Smuzhiyun 	percpu_ref_exit(&ns->ref);
642*4882a593Smuzhiyun 
643*4882a593Smuzhiyun 	mutex_lock(&subsys->lock);
644*4882a593Smuzhiyun 
645*4882a593Smuzhiyun 	subsys->nr_namespaces--;
646*4882a593Smuzhiyun 	nvmet_ns_changed(subsys, ns->nsid);
647*4882a593Smuzhiyun 	nvmet_ns_dev_disable(ns);
648*4882a593Smuzhiyun out_unlock:
649*4882a593Smuzhiyun 	mutex_unlock(&subsys->lock);
650*4882a593Smuzhiyun }
651*4882a593Smuzhiyun 
nvmet_ns_free(struct nvmet_ns * ns)652*4882a593Smuzhiyun void nvmet_ns_free(struct nvmet_ns *ns)
653*4882a593Smuzhiyun {
654*4882a593Smuzhiyun 	nvmet_ns_disable(ns);
655*4882a593Smuzhiyun 
656*4882a593Smuzhiyun 	down_write(&nvmet_ana_sem);
657*4882a593Smuzhiyun 	nvmet_ana_group_enabled[ns->anagrpid]--;
658*4882a593Smuzhiyun 	up_write(&nvmet_ana_sem);
659*4882a593Smuzhiyun 
660*4882a593Smuzhiyun 	kfree(ns->device_path);
661*4882a593Smuzhiyun 	kfree(ns);
662*4882a593Smuzhiyun }
663*4882a593Smuzhiyun 
nvmet_ns_alloc(struct nvmet_subsys * subsys,u32 nsid)664*4882a593Smuzhiyun struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
665*4882a593Smuzhiyun {
666*4882a593Smuzhiyun 	struct nvmet_ns *ns;
667*4882a593Smuzhiyun 
668*4882a593Smuzhiyun 	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
669*4882a593Smuzhiyun 	if (!ns)
670*4882a593Smuzhiyun 		return NULL;
671*4882a593Smuzhiyun 
672*4882a593Smuzhiyun 	init_completion(&ns->disable_done);
673*4882a593Smuzhiyun 
674*4882a593Smuzhiyun 	ns->nsid = nsid;
675*4882a593Smuzhiyun 	ns->subsys = subsys;
676*4882a593Smuzhiyun 
677*4882a593Smuzhiyun 	down_write(&nvmet_ana_sem);
678*4882a593Smuzhiyun 	ns->anagrpid = NVMET_DEFAULT_ANA_GRPID;
679*4882a593Smuzhiyun 	nvmet_ana_group_enabled[ns->anagrpid]++;
680*4882a593Smuzhiyun 	up_write(&nvmet_ana_sem);
681*4882a593Smuzhiyun 
682*4882a593Smuzhiyun 	uuid_gen(&ns->uuid);
683*4882a593Smuzhiyun 	ns->buffered_io = false;
684*4882a593Smuzhiyun 
685*4882a593Smuzhiyun 	return ns;
686*4882a593Smuzhiyun }
687*4882a593Smuzhiyun 
nvmet_update_sq_head(struct nvmet_req * req)688*4882a593Smuzhiyun static void nvmet_update_sq_head(struct nvmet_req *req)
689*4882a593Smuzhiyun {
690*4882a593Smuzhiyun 	if (req->sq->size) {
691*4882a593Smuzhiyun 		u32 old_sqhd, new_sqhd;
692*4882a593Smuzhiyun 
693*4882a593Smuzhiyun 		do {
694*4882a593Smuzhiyun 			old_sqhd = req->sq->sqhd;
695*4882a593Smuzhiyun 			new_sqhd = (old_sqhd + 1) % req->sq->size;
696*4882a593Smuzhiyun 		} while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) !=
697*4882a593Smuzhiyun 					old_sqhd);
698*4882a593Smuzhiyun 	}
699*4882a593Smuzhiyun 	req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF);
700*4882a593Smuzhiyun }
701*4882a593Smuzhiyun 
nvmet_set_error(struct nvmet_req * req,u16 status)702*4882a593Smuzhiyun static void nvmet_set_error(struct nvmet_req *req, u16 status)
703*4882a593Smuzhiyun {
704*4882a593Smuzhiyun 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
705*4882a593Smuzhiyun 	struct nvme_error_slot *new_error_slot;
706*4882a593Smuzhiyun 	unsigned long flags;
707*4882a593Smuzhiyun 
708*4882a593Smuzhiyun 	req->cqe->status = cpu_to_le16(status << 1);
709*4882a593Smuzhiyun 
710*4882a593Smuzhiyun 	if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC)
711*4882a593Smuzhiyun 		return;
712*4882a593Smuzhiyun 
713*4882a593Smuzhiyun 	spin_lock_irqsave(&ctrl->error_lock, flags);
714*4882a593Smuzhiyun 	ctrl->err_counter++;
715*4882a593Smuzhiyun 	new_error_slot =
716*4882a593Smuzhiyun 		&ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS];
717*4882a593Smuzhiyun 
718*4882a593Smuzhiyun 	new_error_slot->error_count = cpu_to_le64(ctrl->err_counter);
719*4882a593Smuzhiyun 	new_error_slot->sqid = cpu_to_le16(req->sq->qid);
720*4882a593Smuzhiyun 	new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id);
721*4882a593Smuzhiyun 	new_error_slot->status_field = cpu_to_le16(status << 1);
722*4882a593Smuzhiyun 	new_error_slot->param_error_location = cpu_to_le16(req->error_loc);
723*4882a593Smuzhiyun 	new_error_slot->lba = cpu_to_le64(req->error_slba);
724*4882a593Smuzhiyun 	new_error_slot->nsid = req->cmd->common.nsid;
725*4882a593Smuzhiyun 	spin_unlock_irqrestore(&ctrl->error_lock, flags);
726*4882a593Smuzhiyun 
727*4882a593Smuzhiyun 	/* set the more bit for this request */
728*4882a593Smuzhiyun 	req->cqe->status |= cpu_to_le16(1 << 14);
729*4882a593Smuzhiyun }
730*4882a593Smuzhiyun 
__nvmet_req_complete(struct nvmet_req * req,u16 status)731*4882a593Smuzhiyun static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
732*4882a593Smuzhiyun {
733*4882a593Smuzhiyun 	struct nvmet_ns *ns = req->ns;
734*4882a593Smuzhiyun 
735*4882a593Smuzhiyun 	if (!req->sq->sqhd_disabled)
736*4882a593Smuzhiyun 		nvmet_update_sq_head(req);
737*4882a593Smuzhiyun 	req->cqe->sq_id = cpu_to_le16(req->sq->qid);
738*4882a593Smuzhiyun 	req->cqe->command_id = req->cmd->common.command_id;
739*4882a593Smuzhiyun 
740*4882a593Smuzhiyun 	if (unlikely(status))
741*4882a593Smuzhiyun 		nvmet_set_error(req, status);
742*4882a593Smuzhiyun 
743*4882a593Smuzhiyun 	trace_nvmet_req_complete(req);
744*4882a593Smuzhiyun 
745*4882a593Smuzhiyun 	req->ops->queue_response(req);
746*4882a593Smuzhiyun 	if (ns)
747*4882a593Smuzhiyun 		nvmet_put_namespace(ns);
748*4882a593Smuzhiyun }
749*4882a593Smuzhiyun 
nvmet_req_complete(struct nvmet_req * req,u16 status)750*4882a593Smuzhiyun void nvmet_req_complete(struct nvmet_req *req, u16 status)
751*4882a593Smuzhiyun {
752*4882a593Smuzhiyun 	__nvmet_req_complete(req, status);
753*4882a593Smuzhiyun 	percpu_ref_put(&req->sq->ref);
754*4882a593Smuzhiyun }
755*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(nvmet_req_complete);
756*4882a593Smuzhiyun 
nvmet_cq_setup(struct nvmet_ctrl * ctrl,struct nvmet_cq * cq,u16 qid,u16 size)757*4882a593Smuzhiyun void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
758*4882a593Smuzhiyun 		u16 qid, u16 size)
759*4882a593Smuzhiyun {
760*4882a593Smuzhiyun 	cq->qid = qid;
761*4882a593Smuzhiyun 	cq->size = size;
762*4882a593Smuzhiyun }
763*4882a593Smuzhiyun 
nvmet_sq_setup(struct nvmet_ctrl * ctrl,struct nvmet_sq * sq,u16 qid,u16 size)764*4882a593Smuzhiyun void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
765*4882a593Smuzhiyun 		u16 qid, u16 size)
766*4882a593Smuzhiyun {
767*4882a593Smuzhiyun 	sq->sqhd = 0;
768*4882a593Smuzhiyun 	sq->qid = qid;
769*4882a593Smuzhiyun 	sq->size = size;
770*4882a593Smuzhiyun 
771*4882a593Smuzhiyun 	ctrl->sqs[qid] = sq;
772*4882a593Smuzhiyun }
773*4882a593Smuzhiyun 
nvmet_confirm_sq(struct percpu_ref * ref)774*4882a593Smuzhiyun static void nvmet_confirm_sq(struct percpu_ref *ref)
775*4882a593Smuzhiyun {
776*4882a593Smuzhiyun 	struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
777*4882a593Smuzhiyun 
778*4882a593Smuzhiyun 	complete(&sq->confirm_done);
779*4882a593Smuzhiyun }
780*4882a593Smuzhiyun 
nvmet_sq_destroy(struct nvmet_sq * sq)781*4882a593Smuzhiyun void nvmet_sq_destroy(struct nvmet_sq *sq)
782*4882a593Smuzhiyun {
783*4882a593Smuzhiyun 	struct nvmet_ctrl *ctrl = sq->ctrl;
784*4882a593Smuzhiyun 
785*4882a593Smuzhiyun 	/*
786*4882a593Smuzhiyun 	 * If this is the admin queue, complete all AERs so that our
787*4882a593Smuzhiyun 	 * queue doesn't have outstanding requests on it.
788*4882a593Smuzhiyun 	 */
789*4882a593Smuzhiyun 	if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq)
790*4882a593Smuzhiyun 		nvmet_async_events_failall(ctrl);
791*4882a593Smuzhiyun 	percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
792*4882a593Smuzhiyun 	wait_for_completion(&sq->confirm_done);
793*4882a593Smuzhiyun 	wait_for_completion(&sq->free_done);
794*4882a593Smuzhiyun 	percpu_ref_exit(&sq->ref);
795*4882a593Smuzhiyun 
796*4882a593Smuzhiyun 	if (ctrl) {
797*4882a593Smuzhiyun 		/*
798*4882a593Smuzhiyun 		 * The teardown flow may take some time, and the host may not
799*4882a593Smuzhiyun 		 * send us keep-alive during this period, hence reset the
800*4882a593Smuzhiyun 		 * traffic based keep-alive timer so we don't trigger a
801*4882a593Smuzhiyun 		 * controller teardown as a result of a keep-alive expiration.
802*4882a593Smuzhiyun 		 */
803*4882a593Smuzhiyun 		ctrl->reset_tbkas = true;
804*4882a593Smuzhiyun 		nvmet_ctrl_put(ctrl);
805*4882a593Smuzhiyun 		sq->ctrl = NULL; /* allows reusing the queue later */
806*4882a593Smuzhiyun 	}
807*4882a593Smuzhiyun }
808*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
809*4882a593Smuzhiyun 
nvmet_sq_free(struct percpu_ref * ref)810*4882a593Smuzhiyun static void nvmet_sq_free(struct percpu_ref *ref)
811*4882a593Smuzhiyun {
812*4882a593Smuzhiyun 	struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
813*4882a593Smuzhiyun 
814*4882a593Smuzhiyun 	complete(&sq->free_done);
815*4882a593Smuzhiyun }
816*4882a593Smuzhiyun 
nvmet_sq_init(struct nvmet_sq * sq)817*4882a593Smuzhiyun int nvmet_sq_init(struct nvmet_sq *sq)
818*4882a593Smuzhiyun {
819*4882a593Smuzhiyun 	int ret;
820*4882a593Smuzhiyun 
821*4882a593Smuzhiyun 	ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
822*4882a593Smuzhiyun 	if (ret) {
823*4882a593Smuzhiyun 		pr_err("percpu_ref init failed!\n");
824*4882a593Smuzhiyun 		return ret;
825*4882a593Smuzhiyun 	}
826*4882a593Smuzhiyun 	init_completion(&sq->free_done);
827*4882a593Smuzhiyun 	init_completion(&sq->confirm_done);
828*4882a593Smuzhiyun 
829*4882a593Smuzhiyun 	return 0;
830*4882a593Smuzhiyun }
831*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(nvmet_sq_init);
832*4882a593Smuzhiyun 
nvmet_check_ana_state(struct nvmet_port * port,struct nvmet_ns * ns)833*4882a593Smuzhiyun static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
834*4882a593Smuzhiyun 		struct nvmet_ns *ns)
835*4882a593Smuzhiyun {
836*4882a593Smuzhiyun 	enum nvme_ana_state state = port->ana_state[ns->anagrpid];
837*4882a593Smuzhiyun 
838*4882a593Smuzhiyun 	if (unlikely(state == NVME_ANA_INACCESSIBLE))
839*4882a593Smuzhiyun 		return NVME_SC_ANA_INACCESSIBLE;
840*4882a593Smuzhiyun 	if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
841*4882a593Smuzhiyun 		return NVME_SC_ANA_PERSISTENT_LOSS;
842*4882a593Smuzhiyun 	if (unlikely(state == NVME_ANA_CHANGE))
843*4882a593Smuzhiyun 		return NVME_SC_ANA_TRANSITION;
844*4882a593Smuzhiyun 	return 0;
845*4882a593Smuzhiyun }
846*4882a593Smuzhiyun 
nvmet_io_cmd_check_access(struct nvmet_req * req)847*4882a593Smuzhiyun static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req)
848*4882a593Smuzhiyun {
849*4882a593Smuzhiyun 	if (unlikely(req->ns->readonly)) {
850*4882a593Smuzhiyun 		switch (req->cmd->common.opcode) {
851*4882a593Smuzhiyun 		case nvme_cmd_read:
852*4882a593Smuzhiyun 		case nvme_cmd_flush:
853*4882a593Smuzhiyun 			break;
854*4882a593Smuzhiyun 		default:
855*4882a593Smuzhiyun 			return NVME_SC_NS_WRITE_PROTECTED;
856*4882a593Smuzhiyun 		}
857*4882a593Smuzhiyun 	}
858*4882a593Smuzhiyun 
859*4882a593Smuzhiyun 	return 0;
860*4882a593Smuzhiyun }
861*4882a593Smuzhiyun 
nvmet_parse_io_cmd(struct nvmet_req * req)862*4882a593Smuzhiyun static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
863*4882a593Smuzhiyun {
864*4882a593Smuzhiyun 	struct nvme_command *cmd = req->cmd;
865*4882a593Smuzhiyun 	u16 ret;
866*4882a593Smuzhiyun 
867*4882a593Smuzhiyun 	ret = nvmet_check_ctrl_status(req, cmd);
868*4882a593Smuzhiyun 	if (unlikely(ret))
869*4882a593Smuzhiyun 		return ret;
870*4882a593Smuzhiyun 
871*4882a593Smuzhiyun 	if (nvmet_req_passthru_ctrl(req))
872*4882a593Smuzhiyun 		return nvmet_parse_passthru_io_cmd(req);
873*4882a593Smuzhiyun 
874*4882a593Smuzhiyun 	req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
875*4882a593Smuzhiyun 	if (unlikely(!req->ns)) {
876*4882a593Smuzhiyun 		req->error_loc = offsetof(struct nvme_common_command, nsid);
877*4882a593Smuzhiyun 		return NVME_SC_INVALID_NS | NVME_SC_DNR;
878*4882a593Smuzhiyun 	}
879*4882a593Smuzhiyun 	ret = nvmet_check_ana_state(req->port, req->ns);
880*4882a593Smuzhiyun 	if (unlikely(ret)) {
881*4882a593Smuzhiyun 		req->error_loc = offsetof(struct nvme_common_command, nsid);
882*4882a593Smuzhiyun 		return ret;
883*4882a593Smuzhiyun 	}
884*4882a593Smuzhiyun 	ret = nvmet_io_cmd_check_access(req);
885*4882a593Smuzhiyun 	if (unlikely(ret)) {
886*4882a593Smuzhiyun 		req->error_loc = offsetof(struct nvme_common_command, nsid);
887*4882a593Smuzhiyun 		return ret;
888*4882a593Smuzhiyun 	}
889*4882a593Smuzhiyun 
890*4882a593Smuzhiyun 	if (req->ns->file)
891*4882a593Smuzhiyun 		return nvmet_file_parse_io_cmd(req);
892*4882a593Smuzhiyun 	else
893*4882a593Smuzhiyun 		return nvmet_bdev_parse_io_cmd(req);
894*4882a593Smuzhiyun }
895*4882a593Smuzhiyun 
nvmet_req_init(struct nvmet_req * req,struct nvmet_cq * cq,struct nvmet_sq * sq,const struct nvmet_fabrics_ops * ops)896*4882a593Smuzhiyun bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
897*4882a593Smuzhiyun 		struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
898*4882a593Smuzhiyun {
899*4882a593Smuzhiyun 	u8 flags = req->cmd->common.flags;
900*4882a593Smuzhiyun 	u16 status;
901*4882a593Smuzhiyun 
902*4882a593Smuzhiyun 	req->cq = cq;
903*4882a593Smuzhiyun 	req->sq = sq;
904*4882a593Smuzhiyun 	req->ops = ops;
905*4882a593Smuzhiyun 	req->sg = NULL;
906*4882a593Smuzhiyun 	req->metadata_sg = NULL;
907*4882a593Smuzhiyun 	req->sg_cnt = 0;
908*4882a593Smuzhiyun 	req->metadata_sg_cnt = 0;
909*4882a593Smuzhiyun 	req->transfer_len = 0;
910*4882a593Smuzhiyun 	req->metadata_len = 0;
911*4882a593Smuzhiyun 	req->cqe->status = 0;
912*4882a593Smuzhiyun 	req->cqe->sq_head = 0;
913*4882a593Smuzhiyun 	req->ns = NULL;
914*4882a593Smuzhiyun 	req->error_loc = NVMET_NO_ERROR_LOC;
915*4882a593Smuzhiyun 	req->error_slba = 0;
916*4882a593Smuzhiyun 
917*4882a593Smuzhiyun 	/* no support for fused commands yet */
918*4882a593Smuzhiyun 	if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
919*4882a593Smuzhiyun 		req->error_loc = offsetof(struct nvme_common_command, flags);
920*4882a593Smuzhiyun 		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
921*4882a593Smuzhiyun 		goto fail;
922*4882a593Smuzhiyun 	}
923*4882a593Smuzhiyun 
924*4882a593Smuzhiyun 	/*
925*4882a593Smuzhiyun 	 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that
926*4882a593Smuzhiyun 	 * contains an address of a single contiguous physical buffer that is
927*4882a593Smuzhiyun 	 * byte aligned.
928*4882a593Smuzhiyun 	 */
929*4882a593Smuzhiyun 	if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) {
930*4882a593Smuzhiyun 		req->error_loc = offsetof(struct nvme_common_command, flags);
931*4882a593Smuzhiyun 		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
932*4882a593Smuzhiyun 		goto fail;
933*4882a593Smuzhiyun 	}
934*4882a593Smuzhiyun 
935*4882a593Smuzhiyun 	if (unlikely(!req->sq->ctrl))
936*4882a593Smuzhiyun 		/* will return an error for any non-connect command: */
937*4882a593Smuzhiyun 		status = nvmet_parse_connect_cmd(req);
938*4882a593Smuzhiyun 	else if (likely(req->sq->qid != 0))
939*4882a593Smuzhiyun 		status = nvmet_parse_io_cmd(req);
940*4882a593Smuzhiyun 	else
941*4882a593Smuzhiyun 		status = nvmet_parse_admin_cmd(req);
942*4882a593Smuzhiyun 
943*4882a593Smuzhiyun 	if (status)
944*4882a593Smuzhiyun 		goto fail;
945*4882a593Smuzhiyun 
946*4882a593Smuzhiyun 	trace_nvmet_req_init(req, req->cmd);
947*4882a593Smuzhiyun 
948*4882a593Smuzhiyun 	if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
949*4882a593Smuzhiyun 		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
950*4882a593Smuzhiyun 		goto fail;
951*4882a593Smuzhiyun 	}
952*4882a593Smuzhiyun 
953*4882a593Smuzhiyun 	if (sq->ctrl)
954*4882a593Smuzhiyun 		sq->ctrl->reset_tbkas = true;
955*4882a593Smuzhiyun 
956*4882a593Smuzhiyun 	return true;
957*4882a593Smuzhiyun 
958*4882a593Smuzhiyun fail:
959*4882a593Smuzhiyun 	__nvmet_req_complete(req, status);
960*4882a593Smuzhiyun 	return false;
961*4882a593Smuzhiyun }
962*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(nvmet_req_init);
963*4882a593Smuzhiyun 
nvmet_req_uninit(struct nvmet_req * req)964*4882a593Smuzhiyun void nvmet_req_uninit(struct nvmet_req *req)
965*4882a593Smuzhiyun {
966*4882a593Smuzhiyun 	percpu_ref_put(&req->sq->ref);
967*4882a593Smuzhiyun 	if (req->ns)
968*4882a593Smuzhiyun 		nvmet_put_namespace(req->ns);
969*4882a593Smuzhiyun }
970*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(nvmet_req_uninit);
971*4882a593Smuzhiyun 
nvmet_check_transfer_len(struct nvmet_req * req,size_t len)972*4882a593Smuzhiyun bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len)
973*4882a593Smuzhiyun {
974*4882a593Smuzhiyun 	if (unlikely(len != req->transfer_len)) {
975*4882a593Smuzhiyun 		req->error_loc = offsetof(struct nvme_common_command, dptr);
976*4882a593Smuzhiyun 		nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
977*4882a593Smuzhiyun 		return false;
978*4882a593Smuzhiyun 	}
979*4882a593Smuzhiyun 
980*4882a593Smuzhiyun 	return true;
981*4882a593Smuzhiyun }
982*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(nvmet_check_transfer_len);
983*4882a593Smuzhiyun 
nvmet_check_data_len_lte(struct nvmet_req * req,size_t data_len)984*4882a593Smuzhiyun bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
985*4882a593Smuzhiyun {
986*4882a593Smuzhiyun 	if (unlikely(data_len > req->transfer_len)) {
987*4882a593Smuzhiyun 		req->error_loc = offsetof(struct nvme_common_command, dptr);
988*4882a593Smuzhiyun 		nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
989*4882a593Smuzhiyun 		return false;
990*4882a593Smuzhiyun 	}
991*4882a593Smuzhiyun 
992*4882a593Smuzhiyun 	return true;
993*4882a593Smuzhiyun }
994*4882a593Smuzhiyun 
nvmet_data_transfer_len(struct nvmet_req * req)995*4882a593Smuzhiyun static unsigned int nvmet_data_transfer_len(struct nvmet_req *req)
996*4882a593Smuzhiyun {
997*4882a593Smuzhiyun 	return req->transfer_len - req->metadata_len;
998*4882a593Smuzhiyun }
999*4882a593Smuzhiyun 
nvmet_req_alloc_p2pmem_sgls(struct pci_dev * p2p_dev,struct nvmet_req * req)1000*4882a593Smuzhiyun static int nvmet_req_alloc_p2pmem_sgls(struct pci_dev *p2p_dev,
1001*4882a593Smuzhiyun 		struct nvmet_req *req)
1002*4882a593Smuzhiyun {
1003*4882a593Smuzhiyun 	req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
1004*4882a593Smuzhiyun 			nvmet_data_transfer_len(req));
1005*4882a593Smuzhiyun 	if (!req->sg)
1006*4882a593Smuzhiyun 		goto out_err;
1007*4882a593Smuzhiyun 
1008*4882a593Smuzhiyun 	if (req->metadata_len) {
1009*4882a593Smuzhiyun 		req->metadata_sg = pci_p2pmem_alloc_sgl(p2p_dev,
1010*4882a593Smuzhiyun 				&req->metadata_sg_cnt, req->metadata_len);
1011*4882a593Smuzhiyun 		if (!req->metadata_sg)
1012*4882a593Smuzhiyun 			goto out_free_sg;
1013*4882a593Smuzhiyun 	}
1014*4882a593Smuzhiyun 
1015*4882a593Smuzhiyun 	req->p2p_dev = p2p_dev;
1016*4882a593Smuzhiyun 
1017*4882a593Smuzhiyun 	return 0;
1018*4882a593Smuzhiyun out_free_sg:
1019*4882a593Smuzhiyun 	pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
1020*4882a593Smuzhiyun out_err:
1021*4882a593Smuzhiyun 	return -ENOMEM;
1022*4882a593Smuzhiyun }
1023*4882a593Smuzhiyun 
nvmet_req_find_p2p_dev(struct nvmet_req * req)1024*4882a593Smuzhiyun static struct pci_dev *nvmet_req_find_p2p_dev(struct nvmet_req *req)
1025*4882a593Smuzhiyun {
1026*4882a593Smuzhiyun 	if (!IS_ENABLED(CONFIG_PCI_P2PDMA) ||
1027*4882a593Smuzhiyun 	    !req->sq->ctrl || !req->sq->qid || !req->ns)
1028*4882a593Smuzhiyun 		return NULL;
1029*4882a593Smuzhiyun 	return radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, req->ns->nsid);
1030*4882a593Smuzhiyun }
1031*4882a593Smuzhiyun 
nvmet_req_alloc_sgls(struct nvmet_req * req)1032*4882a593Smuzhiyun int nvmet_req_alloc_sgls(struct nvmet_req *req)
1033*4882a593Smuzhiyun {
1034*4882a593Smuzhiyun 	struct pci_dev *p2p_dev = nvmet_req_find_p2p_dev(req);
1035*4882a593Smuzhiyun 
1036*4882a593Smuzhiyun 	if (p2p_dev && !nvmet_req_alloc_p2pmem_sgls(p2p_dev, req))
1037*4882a593Smuzhiyun 		return 0;
1038*4882a593Smuzhiyun 
1039*4882a593Smuzhiyun 	req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL,
1040*4882a593Smuzhiyun 			    &req->sg_cnt);
1041*4882a593Smuzhiyun 	if (unlikely(!req->sg))
1042*4882a593Smuzhiyun 		goto out;
1043*4882a593Smuzhiyun 
1044*4882a593Smuzhiyun 	if (req->metadata_len) {
1045*4882a593Smuzhiyun 		req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL,
1046*4882a593Smuzhiyun 					     &req->metadata_sg_cnt);
1047*4882a593Smuzhiyun 		if (unlikely(!req->metadata_sg))
1048*4882a593Smuzhiyun 			goto out_free;
1049*4882a593Smuzhiyun 	}
1050*4882a593Smuzhiyun 
1051*4882a593Smuzhiyun 	return 0;
1052*4882a593Smuzhiyun out_free:
1053*4882a593Smuzhiyun 	sgl_free(req->sg);
1054*4882a593Smuzhiyun out:
1055*4882a593Smuzhiyun 	return -ENOMEM;
1056*4882a593Smuzhiyun }
1057*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls);
1058*4882a593Smuzhiyun 
nvmet_req_free_sgls(struct nvmet_req * req)1059*4882a593Smuzhiyun void nvmet_req_free_sgls(struct nvmet_req *req)
1060*4882a593Smuzhiyun {
1061*4882a593Smuzhiyun 	if (req->p2p_dev) {
1062*4882a593Smuzhiyun 		pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
1063*4882a593Smuzhiyun 		if (req->metadata_sg)
1064*4882a593Smuzhiyun 			pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg);
1065*4882a593Smuzhiyun 		req->p2p_dev = NULL;
1066*4882a593Smuzhiyun 	} else {
1067*4882a593Smuzhiyun 		sgl_free(req->sg);
1068*4882a593Smuzhiyun 		if (req->metadata_sg)
1069*4882a593Smuzhiyun 			sgl_free(req->metadata_sg);
1070*4882a593Smuzhiyun 	}
1071*4882a593Smuzhiyun 
1072*4882a593Smuzhiyun 	req->sg = NULL;
1073*4882a593Smuzhiyun 	req->metadata_sg = NULL;
1074*4882a593Smuzhiyun 	req->sg_cnt = 0;
1075*4882a593Smuzhiyun 	req->metadata_sg_cnt = 0;
1076*4882a593Smuzhiyun }
1077*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(nvmet_req_free_sgls);
1078*4882a593Smuzhiyun 
nvmet_cc_en(u32 cc)1079*4882a593Smuzhiyun static inline bool nvmet_cc_en(u32 cc)
1080*4882a593Smuzhiyun {
1081*4882a593Smuzhiyun 	return (cc >> NVME_CC_EN_SHIFT) & 0x1;
1082*4882a593Smuzhiyun }
1083*4882a593Smuzhiyun 
nvmet_cc_css(u32 cc)1084*4882a593Smuzhiyun static inline u8 nvmet_cc_css(u32 cc)
1085*4882a593Smuzhiyun {
1086*4882a593Smuzhiyun 	return (cc >> NVME_CC_CSS_SHIFT) & 0x7;
1087*4882a593Smuzhiyun }
1088*4882a593Smuzhiyun 
nvmet_cc_mps(u32 cc)1089*4882a593Smuzhiyun static inline u8 nvmet_cc_mps(u32 cc)
1090*4882a593Smuzhiyun {
1091*4882a593Smuzhiyun 	return (cc >> NVME_CC_MPS_SHIFT) & 0xf;
1092*4882a593Smuzhiyun }
1093*4882a593Smuzhiyun 
nvmet_cc_ams(u32 cc)1094*4882a593Smuzhiyun static inline u8 nvmet_cc_ams(u32 cc)
1095*4882a593Smuzhiyun {
1096*4882a593Smuzhiyun 	return (cc >> NVME_CC_AMS_SHIFT) & 0x7;
1097*4882a593Smuzhiyun }
1098*4882a593Smuzhiyun 
nvmet_cc_shn(u32 cc)1099*4882a593Smuzhiyun static inline u8 nvmet_cc_shn(u32 cc)
1100*4882a593Smuzhiyun {
1101*4882a593Smuzhiyun 	return (cc >> NVME_CC_SHN_SHIFT) & 0x3;
1102*4882a593Smuzhiyun }
1103*4882a593Smuzhiyun 
nvmet_cc_iosqes(u32 cc)1104*4882a593Smuzhiyun static inline u8 nvmet_cc_iosqes(u32 cc)
1105*4882a593Smuzhiyun {
1106*4882a593Smuzhiyun 	return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf;
1107*4882a593Smuzhiyun }
1108*4882a593Smuzhiyun 
nvmet_cc_iocqes(u32 cc)1109*4882a593Smuzhiyun static inline u8 nvmet_cc_iocqes(u32 cc)
1110*4882a593Smuzhiyun {
1111*4882a593Smuzhiyun 	return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf;
1112*4882a593Smuzhiyun }
1113*4882a593Smuzhiyun 
nvmet_start_ctrl(struct nvmet_ctrl * ctrl)1114*4882a593Smuzhiyun static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
1115*4882a593Smuzhiyun {
1116*4882a593Smuzhiyun 	lockdep_assert_held(&ctrl->lock);
1117*4882a593Smuzhiyun 
1118*4882a593Smuzhiyun 	/*
1119*4882a593Smuzhiyun 	 * Only I/O controllers should verify iosqes,iocqes.
1120*4882a593Smuzhiyun 	 * Strictly speaking, the spec says a discovery controller
1121*4882a593Smuzhiyun 	 * should verify iosqes,iocqes are zeroed, however that
1122*4882a593Smuzhiyun 	 * would break backwards compatibility, so don't enforce it.
1123*4882a593Smuzhiyun 	 */
1124*4882a593Smuzhiyun 	if (ctrl->subsys->type != NVME_NQN_DISC &&
1125*4882a593Smuzhiyun 	    (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
1126*4882a593Smuzhiyun 	     nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES)) {
1127*4882a593Smuzhiyun 		ctrl->csts = NVME_CSTS_CFS;
1128*4882a593Smuzhiyun 		return;
1129*4882a593Smuzhiyun 	}
1130*4882a593Smuzhiyun 
1131*4882a593Smuzhiyun 	if (nvmet_cc_mps(ctrl->cc) != 0 ||
1132*4882a593Smuzhiyun 	    nvmet_cc_ams(ctrl->cc) != 0 ||
1133*4882a593Smuzhiyun 	    nvmet_cc_css(ctrl->cc) != 0) {
1134*4882a593Smuzhiyun 		ctrl->csts = NVME_CSTS_CFS;
1135*4882a593Smuzhiyun 		return;
1136*4882a593Smuzhiyun 	}
1137*4882a593Smuzhiyun 
1138*4882a593Smuzhiyun 	ctrl->csts = NVME_CSTS_RDY;
1139*4882a593Smuzhiyun 
1140*4882a593Smuzhiyun 	/*
1141*4882a593Smuzhiyun 	 * Controllers that are not yet enabled should not really enforce the
1142*4882a593Smuzhiyun 	 * keep alive timeout, but we still want to track a timeout and cleanup
1143*4882a593Smuzhiyun 	 * in case a host died before it enabled the controller.  Hence, simply
1144*4882a593Smuzhiyun 	 * reset the keep alive timer when the controller is enabled.
1145*4882a593Smuzhiyun 	 */
1146*4882a593Smuzhiyun 	if (ctrl->kato)
1147*4882a593Smuzhiyun 		mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
1148*4882a593Smuzhiyun }
1149*4882a593Smuzhiyun 
nvmet_clear_ctrl(struct nvmet_ctrl * ctrl)1150*4882a593Smuzhiyun static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
1151*4882a593Smuzhiyun {
1152*4882a593Smuzhiyun 	lockdep_assert_held(&ctrl->lock);
1153*4882a593Smuzhiyun 
1154*4882a593Smuzhiyun 	/* XXX: tear down queues? */
1155*4882a593Smuzhiyun 	ctrl->csts &= ~NVME_CSTS_RDY;
1156*4882a593Smuzhiyun 	ctrl->cc = 0;
1157*4882a593Smuzhiyun }
1158*4882a593Smuzhiyun 
nvmet_update_cc(struct nvmet_ctrl * ctrl,u32 new)1159*4882a593Smuzhiyun void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
1160*4882a593Smuzhiyun {
1161*4882a593Smuzhiyun 	u32 old;
1162*4882a593Smuzhiyun 
1163*4882a593Smuzhiyun 	mutex_lock(&ctrl->lock);
1164*4882a593Smuzhiyun 	old = ctrl->cc;
1165*4882a593Smuzhiyun 	ctrl->cc = new;
1166*4882a593Smuzhiyun 
1167*4882a593Smuzhiyun 	if (nvmet_cc_en(new) && !nvmet_cc_en(old))
1168*4882a593Smuzhiyun 		nvmet_start_ctrl(ctrl);
1169*4882a593Smuzhiyun 	if (!nvmet_cc_en(new) && nvmet_cc_en(old))
1170*4882a593Smuzhiyun 		nvmet_clear_ctrl(ctrl);
1171*4882a593Smuzhiyun 	if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
1172*4882a593Smuzhiyun 		nvmet_clear_ctrl(ctrl);
1173*4882a593Smuzhiyun 		ctrl->csts |= NVME_CSTS_SHST_CMPLT;
1174*4882a593Smuzhiyun 	}
1175*4882a593Smuzhiyun 	if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
1176*4882a593Smuzhiyun 		ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
1177*4882a593Smuzhiyun 	mutex_unlock(&ctrl->lock);
1178*4882a593Smuzhiyun }
1179*4882a593Smuzhiyun 
nvmet_init_cap(struct nvmet_ctrl * ctrl)1180*4882a593Smuzhiyun static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
1181*4882a593Smuzhiyun {
1182*4882a593Smuzhiyun 	/* command sets supported: NVMe command set: */
1183*4882a593Smuzhiyun 	ctrl->cap = (1ULL << 37);
1184*4882a593Smuzhiyun 	/* CC.EN timeout in 500msec units: */
1185*4882a593Smuzhiyun 	ctrl->cap |= (15ULL << 24);
1186*4882a593Smuzhiyun 	/* maximum queue entries supported: */
1187*4882a593Smuzhiyun 	ctrl->cap |= NVMET_QUEUE_SIZE - 1;
1188*4882a593Smuzhiyun }
1189*4882a593Smuzhiyun 
nvmet_ctrl_find_get(const char * subsysnqn,const char * hostnqn,u16 cntlid,struct nvmet_req * req,struct nvmet_ctrl ** ret)1190*4882a593Smuzhiyun u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
1191*4882a593Smuzhiyun 		struct nvmet_req *req, struct nvmet_ctrl **ret)
1192*4882a593Smuzhiyun {
1193*4882a593Smuzhiyun 	struct nvmet_subsys *subsys;
1194*4882a593Smuzhiyun 	struct nvmet_ctrl *ctrl;
1195*4882a593Smuzhiyun 	u16 status = 0;
1196*4882a593Smuzhiyun 
1197*4882a593Smuzhiyun 	subsys = nvmet_find_get_subsys(req->port, subsysnqn);
1198*4882a593Smuzhiyun 	if (!subsys) {
1199*4882a593Smuzhiyun 		pr_warn("connect request for invalid subsystem %s!\n",
1200*4882a593Smuzhiyun 			subsysnqn);
1201*4882a593Smuzhiyun 		req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
1202*4882a593Smuzhiyun 		return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1203*4882a593Smuzhiyun 	}
1204*4882a593Smuzhiyun 
1205*4882a593Smuzhiyun 	mutex_lock(&subsys->lock);
1206*4882a593Smuzhiyun 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
1207*4882a593Smuzhiyun 		if (ctrl->cntlid == cntlid) {
1208*4882a593Smuzhiyun 			if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) {
1209*4882a593Smuzhiyun 				pr_warn("hostnqn mismatch.\n");
1210*4882a593Smuzhiyun 				continue;
1211*4882a593Smuzhiyun 			}
1212*4882a593Smuzhiyun 			if (!kref_get_unless_zero(&ctrl->ref))
1213*4882a593Smuzhiyun 				continue;
1214*4882a593Smuzhiyun 
1215*4882a593Smuzhiyun 			*ret = ctrl;
1216*4882a593Smuzhiyun 			goto out;
1217*4882a593Smuzhiyun 		}
1218*4882a593Smuzhiyun 	}
1219*4882a593Smuzhiyun 
1220*4882a593Smuzhiyun 	pr_warn("could not find controller %d for subsys %s / host %s\n",
1221*4882a593Smuzhiyun 		cntlid, subsysnqn, hostnqn);
1222*4882a593Smuzhiyun 	req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
1223*4882a593Smuzhiyun 	status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1224*4882a593Smuzhiyun 
1225*4882a593Smuzhiyun out:
1226*4882a593Smuzhiyun 	mutex_unlock(&subsys->lock);
1227*4882a593Smuzhiyun 	nvmet_subsys_put(subsys);
1228*4882a593Smuzhiyun 	return status;
1229*4882a593Smuzhiyun }
1230*4882a593Smuzhiyun 
nvmet_check_ctrl_status(struct nvmet_req * req,struct nvme_command * cmd)1231*4882a593Smuzhiyun u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
1232*4882a593Smuzhiyun {
1233*4882a593Smuzhiyun 	if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
1234*4882a593Smuzhiyun 		pr_err("got cmd %d while CC.EN == 0 on qid = %d\n",
1235*4882a593Smuzhiyun 		       cmd->common.opcode, req->sq->qid);
1236*4882a593Smuzhiyun 		return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
1237*4882a593Smuzhiyun 	}
1238*4882a593Smuzhiyun 
1239*4882a593Smuzhiyun 	if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
1240*4882a593Smuzhiyun 		pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n",
1241*4882a593Smuzhiyun 		       cmd->common.opcode, req->sq->qid);
1242*4882a593Smuzhiyun 		return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
1243*4882a593Smuzhiyun 	}
1244*4882a593Smuzhiyun 	return 0;
1245*4882a593Smuzhiyun }
1246*4882a593Smuzhiyun 
nvmet_host_allowed(struct nvmet_subsys * subsys,const char * hostnqn)1247*4882a593Smuzhiyun bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn)
1248*4882a593Smuzhiyun {
1249*4882a593Smuzhiyun 	struct nvmet_host_link *p;
1250*4882a593Smuzhiyun 
1251*4882a593Smuzhiyun 	lockdep_assert_held(&nvmet_config_sem);
1252*4882a593Smuzhiyun 
1253*4882a593Smuzhiyun 	if (subsys->allow_any_host)
1254*4882a593Smuzhiyun 		return true;
1255*4882a593Smuzhiyun 
1256*4882a593Smuzhiyun 	if (subsys->type == NVME_NQN_DISC) /* allow all access to disc subsys */
1257*4882a593Smuzhiyun 		return true;
1258*4882a593Smuzhiyun 
1259*4882a593Smuzhiyun 	list_for_each_entry(p, &subsys->hosts, entry) {
1260*4882a593Smuzhiyun 		if (!strcmp(nvmet_host_name(p->host), hostnqn))
1261*4882a593Smuzhiyun 			return true;
1262*4882a593Smuzhiyun 	}
1263*4882a593Smuzhiyun 
1264*4882a593Smuzhiyun 	return false;
1265*4882a593Smuzhiyun }
1266*4882a593Smuzhiyun 
1267*4882a593Smuzhiyun /*
1268*4882a593Smuzhiyun  * Note: ctrl->subsys->lock should be held when calling this function
1269*4882a593Smuzhiyun  */
nvmet_setup_p2p_ns_map(struct nvmet_ctrl * ctrl,struct nvmet_req * req)1270*4882a593Smuzhiyun static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl,
1271*4882a593Smuzhiyun 		struct nvmet_req *req)
1272*4882a593Smuzhiyun {
1273*4882a593Smuzhiyun 	struct nvmet_ns *ns;
1274*4882a593Smuzhiyun 	unsigned long idx;
1275*4882a593Smuzhiyun 
1276*4882a593Smuzhiyun 	if (!req->p2p_client)
1277*4882a593Smuzhiyun 		return;
1278*4882a593Smuzhiyun 
1279*4882a593Smuzhiyun 	ctrl->p2p_client = get_device(req->p2p_client);
1280*4882a593Smuzhiyun 
1281*4882a593Smuzhiyun 	xa_for_each(&ctrl->subsys->namespaces, idx, ns)
1282*4882a593Smuzhiyun 		nvmet_p2pmem_ns_add_p2p(ctrl, ns);
1283*4882a593Smuzhiyun }
1284*4882a593Smuzhiyun 
1285*4882a593Smuzhiyun /*
1286*4882a593Smuzhiyun  * Note: ctrl->subsys->lock should be held when calling this function
1287*4882a593Smuzhiyun  */
nvmet_release_p2p_ns_map(struct nvmet_ctrl * ctrl)1288*4882a593Smuzhiyun static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl)
1289*4882a593Smuzhiyun {
1290*4882a593Smuzhiyun 	struct radix_tree_iter iter;
1291*4882a593Smuzhiyun 	void __rcu **slot;
1292*4882a593Smuzhiyun 
1293*4882a593Smuzhiyun 	radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0)
1294*4882a593Smuzhiyun 		pci_dev_put(radix_tree_deref_slot(slot));
1295*4882a593Smuzhiyun 
1296*4882a593Smuzhiyun 	put_device(ctrl->p2p_client);
1297*4882a593Smuzhiyun }
1298*4882a593Smuzhiyun 
nvmet_fatal_error_handler(struct work_struct * work)1299*4882a593Smuzhiyun static void nvmet_fatal_error_handler(struct work_struct *work)
1300*4882a593Smuzhiyun {
1301*4882a593Smuzhiyun 	struct nvmet_ctrl *ctrl =
1302*4882a593Smuzhiyun 			container_of(work, struct nvmet_ctrl, fatal_err_work);
1303*4882a593Smuzhiyun 
1304*4882a593Smuzhiyun 	pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
1305*4882a593Smuzhiyun 	ctrl->ops->delete_ctrl(ctrl);
1306*4882a593Smuzhiyun }
1307*4882a593Smuzhiyun 
nvmet_alloc_ctrl(const char * subsysnqn,const char * hostnqn,struct nvmet_req * req,u32 kato,struct nvmet_ctrl ** ctrlp)1308*4882a593Smuzhiyun u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
1309*4882a593Smuzhiyun 		struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
1310*4882a593Smuzhiyun {
1311*4882a593Smuzhiyun 	struct nvmet_subsys *subsys;
1312*4882a593Smuzhiyun 	struct nvmet_ctrl *ctrl;
1313*4882a593Smuzhiyun 	int ret;
1314*4882a593Smuzhiyun 	u16 status;
1315*4882a593Smuzhiyun 
1316*4882a593Smuzhiyun 	status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1317*4882a593Smuzhiyun 	subsys = nvmet_find_get_subsys(req->port, subsysnqn);
1318*4882a593Smuzhiyun 	if (!subsys) {
1319*4882a593Smuzhiyun 		pr_warn("connect request for invalid subsystem %s!\n",
1320*4882a593Smuzhiyun 			subsysnqn);
1321*4882a593Smuzhiyun 		req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
1322*4882a593Smuzhiyun 		goto out;
1323*4882a593Smuzhiyun 	}
1324*4882a593Smuzhiyun 
1325*4882a593Smuzhiyun 	status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1326*4882a593Smuzhiyun 	down_read(&nvmet_config_sem);
1327*4882a593Smuzhiyun 	if (!nvmet_host_allowed(subsys, hostnqn)) {
1328*4882a593Smuzhiyun 		pr_info("connect by host %s for subsystem %s not allowed\n",
1329*4882a593Smuzhiyun 			hostnqn, subsysnqn);
1330*4882a593Smuzhiyun 		req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
1331*4882a593Smuzhiyun 		up_read(&nvmet_config_sem);
1332*4882a593Smuzhiyun 		status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR;
1333*4882a593Smuzhiyun 		goto out_put_subsystem;
1334*4882a593Smuzhiyun 	}
1335*4882a593Smuzhiyun 	up_read(&nvmet_config_sem);
1336*4882a593Smuzhiyun 
1337*4882a593Smuzhiyun 	status = NVME_SC_INTERNAL;
1338*4882a593Smuzhiyun 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
1339*4882a593Smuzhiyun 	if (!ctrl)
1340*4882a593Smuzhiyun 		goto out_put_subsystem;
1341*4882a593Smuzhiyun 	mutex_init(&ctrl->lock);
1342*4882a593Smuzhiyun 
1343*4882a593Smuzhiyun 	nvmet_init_cap(ctrl);
1344*4882a593Smuzhiyun 
1345*4882a593Smuzhiyun 	ctrl->port = req->port;
1346*4882a593Smuzhiyun 
1347*4882a593Smuzhiyun 	INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
1348*4882a593Smuzhiyun 	INIT_LIST_HEAD(&ctrl->async_events);
1349*4882a593Smuzhiyun 	INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
1350*4882a593Smuzhiyun 	INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
1351*4882a593Smuzhiyun 
1352*4882a593Smuzhiyun 	memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
1353*4882a593Smuzhiyun 	memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
1354*4882a593Smuzhiyun 
1355*4882a593Smuzhiyun 	kref_init(&ctrl->ref);
1356*4882a593Smuzhiyun 	ctrl->subsys = subsys;
1357*4882a593Smuzhiyun 	WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL);
1358*4882a593Smuzhiyun 
1359*4882a593Smuzhiyun 	ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES,
1360*4882a593Smuzhiyun 			sizeof(__le32), GFP_KERNEL);
1361*4882a593Smuzhiyun 	if (!ctrl->changed_ns_list)
1362*4882a593Smuzhiyun 		goto out_free_ctrl;
1363*4882a593Smuzhiyun 
1364*4882a593Smuzhiyun 	ctrl->sqs = kcalloc(subsys->max_qid + 1,
1365*4882a593Smuzhiyun 			sizeof(struct nvmet_sq *),
1366*4882a593Smuzhiyun 			GFP_KERNEL);
1367*4882a593Smuzhiyun 	if (!ctrl->sqs)
1368*4882a593Smuzhiyun 		goto out_free_changed_ns_list;
1369*4882a593Smuzhiyun 
1370*4882a593Smuzhiyun 	if (subsys->cntlid_min > subsys->cntlid_max)
1371*4882a593Smuzhiyun 		goto out_free_sqs;
1372*4882a593Smuzhiyun 
1373*4882a593Smuzhiyun 	ret = ida_simple_get(&cntlid_ida,
1374*4882a593Smuzhiyun 			     subsys->cntlid_min, subsys->cntlid_max,
1375*4882a593Smuzhiyun 			     GFP_KERNEL);
1376*4882a593Smuzhiyun 	if (ret < 0) {
1377*4882a593Smuzhiyun 		status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
1378*4882a593Smuzhiyun 		goto out_free_sqs;
1379*4882a593Smuzhiyun 	}
1380*4882a593Smuzhiyun 	ctrl->cntlid = ret;
1381*4882a593Smuzhiyun 
1382*4882a593Smuzhiyun 	ctrl->ops = req->ops;
1383*4882a593Smuzhiyun 
1384*4882a593Smuzhiyun 	/*
1385*4882a593Smuzhiyun 	 * Discovery controllers may use some arbitrary high value
1386*4882a593Smuzhiyun 	 * in order to cleanup stale discovery sessions
1387*4882a593Smuzhiyun 	 */
1388*4882a593Smuzhiyun 	if ((ctrl->subsys->type == NVME_NQN_DISC) && !kato)
1389*4882a593Smuzhiyun 		kato = NVMET_DISC_KATO_MS;
1390*4882a593Smuzhiyun 
1391*4882a593Smuzhiyun 	/* keep-alive timeout in seconds */
1392*4882a593Smuzhiyun 	ctrl->kato = DIV_ROUND_UP(kato, 1000);
1393*4882a593Smuzhiyun 
1394*4882a593Smuzhiyun 	ctrl->err_counter = 0;
1395*4882a593Smuzhiyun 	spin_lock_init(&ctrl->error_lock);
1396*4882a593Smuzhiyun 
1397*4882a593Smuzhiyun 	nvmet_start_keep_alive_timer(ctrl);
1398*4882a593Smuzhiyun 
1399*4882a593Smuzhiyun 	mutex_lock(&subsys->lock);
1400*4882a593Smuzhiyun 	list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
1401*4882a593Smuzhiyun 	nvmet_setup_p2p_ns_map(ctrl, req);
1402*4882a593Smuzhiyun 	mutex_unlock(&subsys->lock);
1403*4882a593Smuzhiyun 
1404*4882a593Smuzhiyun 	*ctrlp = ctrl;
1405*4882a593Smuzhiyun 	return 0;
1406*4882a593Smuzhiyun 
1407*4882a593Smuzhiyun out_free_sqs:
1408*4882a593Smuzhiyun 	kfree(ctrl->sqs);
1409*4882a593Smuzhiyun out_free_changed_ns_list:
1410*4882a593Smuzhiyun 	kfree(ctrl->changed_ns_list);
1411*4882a593Smuzhiyun out_free_ctrl:
1412*4882a593Smuzhiyun 	kfree(ctrl);
1413*4882a593Smuzhiyun out_put_subsystem:
1414*4882a593Smuzhiyun 	nvmet_subsys_put(subsys);
1415*4882a593Smuzhiyun out:
1416*4882a593Smuzhiyun 	return status;
1417*4882a593Smuzhiyun }
1418*4882a593Smuzhiyun 
nvmet_ctrl_free(struct kref * ref)1419*4882a593Smuzhiyun static void nvmet_ctrl_free(struct kref *ref)
1420*4882a593Smuzhiyun {
1421*4882a593Smuzhiyun 	struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
1422*4882a593Smuzhiyun 	struct nvmet_subsys *subsys = ctrl->subsys;
1423*4882a593Smuzhiyun 
1424*4882a593Smuzhiyun 	mutex_lock(&subsys->lock);
1425*4882a593Smuzhiyun 	nvmet_release_p2p_ns_map(ctrl);
1426*4882a593Smuzhiyun 	list_del(&ctrl->subsys_entry);
1427*4882a593Smuzhiyun 	mutex_unlock(&subsys->lock);
1428*4882a593Smuzhiyun 
1429*4882a593Smuzhiyun 	nvmet_stop_keep_alive_timer(ctrl);
1430*4882a593Smuzhiyun 
1431*4882a593Smuzhiyun 	flush_work(&ctrl->async_event_work);
1432*4882a593Smuzhiyun 	cancel_work_sync(&ctrl->fatal_err_work);
1433*4882a593Smuzhiyun 
1434*4882a593Smuzhiyun 	ida_simple_remove(&cntlid_ida, ctrl->cntlid);
1435*4882a593Smuzhiyun 
1436*4882a593Smuzhiyun 	nvmet_async_events_free(ctrl);
1437*4882a593Smuzhiyun 	kfree(ctrl->sqs);
1438*4882a593Smuzhiyun 	kfree(ctrl->changed_ns_list);
1439*4882a593Smuzhiyun 	kfree(ctrl);
1440*4882a593Smuzhiyun 
1441*4882a593Smuzhiyun 	nvmet_subsys_put(subsys);
1442*4882a593Smuzhiyun }
1443*4882a593Smuzhiyun 
nvmet_ctrl_put(struct nvmet_ctrl * ctrl)1444*4882a593Smuzhiyun void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
1445*4882a593Smuzhiyun {
1446*4882a593Smuzhiyun 	kref_put(&ctrl->ref, nvmet_ctrl_free);
1447*4882a593Smuzhiyun }
1448*4882a593Smuzhiyun 
nvmet_ctrl_fatal_error(struct nvmet_ctrl * ctrl)1449*4882a593Smuzhiyun void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
1450*4882a593Smuzhiyun {
1451*4882a593Smuzhiyun 	mutex_lock(&ctrl->lock);
1452*4882a593Smuzhiyun 	if (!(ctrl->csts & NVME_CSTS_CFS)) {
1453*4882a593Smuzhiyun 		ctrl->csts |= NVME_CSTS_CFS;
1454*4882a593Smuzhiyun 		schedule_work(&ctrl->fatal_err_work);
1455*4882a593Smuzhiyun 	}
1456*4882a593Smuzhiyun 	mutex_unlock(&ctrl->lock);
1457*4882a593Smuzhiyun }
1458*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
1459*4882a593Smuzhiyun 
nvmet_find_get_subsys(struct nvmet_port * port,const char * subsysnqn)1460*4882a593Smuzhiyun static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
1461*4882a593Smuzhiyun 		const char *subsysnqn)
1462*4882a593Smuzhiyun {
1463*4882a593Smuzhiyun 	struct nvmet_subsys_link *p;
1464*4882a593Smuzhiyun 
1465*4882a593Smuzhiyun 	if (!port)
1466*4882a593Smuzhiyun 		return NULL;
1467*4882a593Smuzhiyun 
1468*4882a593Smuzhiyun 	if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) {
1469*4882a593Smuzhiyun 		if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
1470*4882a593Smuzhiyun 			return NULL;
1471*4882a593Smuzhiyun 		return nvmet_disc_subsys;
1472*4882a593Smuzhiyun 	}
1473*4882a593Smuzhiyun 
1474*4882a593Smuzhiyun 	down_read(&nvmet_config_sem);
1475*4882a593Smuzhiyun 	list_for_each_entry(p, &port->subsystems, entry) {
1476*4882a593Smuzhiyun 		if (!strncmp(p->subsys->subsysnqn, subsysnqn,
1477*4882a593Smuzhiyun 				NVMF_NQN_SIZE)) {
1478*4882a593Smuzhiyun 			if (!kref_get_unless_zero(&p->subsys->ref))
1479*4882a593Smuzhiyun 				break;
1480*4882a593Smuzhiyun 			up_read(&nvmet_config_sem);
1481*4882a593Smuzhiyun 			return p->subsys;
1482*4882a593Smuzhiyun 		}
1483*4882a593Smuzhiyun 	}
1484*4882a593Smuzhiyun 	up_read(&nvmet_config_sem);
1485*4882a593Smuzhiyun 	return NULL;
1486*4882a593Smuzhiyun }
1487*4882a593Smuzhiyun 
nvmet_subsys_alloc(const char * subsysnqn,enum nvme_subsys_type type)1488*4882a593Smuzhiyun struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
1489*4882a593Smuzhiyun 		enum nvme_subsys_type type)
1490*4882a593Smuzhiyun {
1491*4882a593Smuzhiyun 	struct nvmet_subsys *subsys;
1492*4882a593Smuzhiyun 
1493*4882a593Smuzhiyun 	subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
1494*4882a593Smuzhiyun 	if (!subsys)
1495*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
1496*4882a593Smuzhiyun 
1497*4882a593Smuzhiyun 	subsys->ver = NVMET_DEFAULT_VS;
1498*4882a593Smuzhiyun 	/* generate a random serial number as our controllers are ephemeral: */
1499*4882a593Smuzhiyun 	get_random_bytes(&subsys->serial, sizeof(subsys->serial));
1500*4882a593Smuzhiyun 
1501*4882a593Smuzhiyun 	switch (type) {
1502*4882a593Smuzhiyun 	case NVME_NQN_NVME:
1503*4882a593Smuzhiyun 		subsys->max_qid = NVMET_NR_QUEUES;
1504*4882a593Smuzhiyun 		break;
1505*4882a593Smuzhiyun 	case NVME_NQN_DISC:
1506*4882a593Smuzhiyun 		subsys->max_qid = 0;
1507*4882a593Smuzhiyun 		break;
1508*4882a593Smuzhiyun 	default:
1509*4882a593Smuzhiyun 		pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
1510*4882a593Smuzhiyun 		kfree(subsys);
1511*4882a593Smuzhiyun 		return ERR_PTR(-EINVAL);
1512*4882a593Smuzhiyun 	}
1513*4882a593Smuzhiyun 	subsys->type = type;
1514*4882a593Smuzhiyun 	subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
1515*4882a593Smuzhiyun 			GFP_KERNEL);
1516*4882a593Smuzhiyun 	if (!subsys->subsysnqn) {
1517*4882a593Smuzhiyun 		kfree(subsys);
1518*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
1519*4882a593Smuzhiyun 	}
1520*4882a593Smuzhiyun 	subsys->cntlid_min = NVME_CNTLID_MIN;
1521*4882a593Smuzhiyun 	subsys->cntlid_max = NVME_CNTLID_MAX;
1522*4882a593Smuzhiyun 	kref_init(&subsys->ref);
1523*4882a593Smuzhiyun 
1524*4882a593Smuzhiyun 	mutex_init(&subsys->lock);
1525*4882a593Smuzhiyun 	xa_init(&subsys->namespaces);
1526*4882a593Smuzhiyun 	INIT_LIST_HEAD(&subsys->ctrls);
1527*4882a593Smuzhiyun 	INIT_LIST_HEAD(&subsys->hosts);
1528*4882a593Smuzhiyun 
1529*4882a593Smuzhiyun 	return subsys;
1530*4882a593Smuzhiyun }
1531*4882a593Smuzhiyun 
nvmet_subsys_free(struct kref * ref)1532*4882a593Smuzhiyun static void nvmet_subsys_free(struct kref *ref)
1533*4882a593Smuzhiyun {
1534*4882a593Smuzhiyun 	struct nvmet_subsys *subsys =
1535*4882a593Smuzhiyun 		container_of(ref, struct nvmet_subsys, ref);
1536*4882a593Smuzhiyun 
1537*4882a593Smuzhiyun 	WARN_ON_ONCE(!xa_empty(&subsys->namespaces));
1538*4882a593Smuzhiyun 
1539*4882a593Smuzhiyun 	xa_destroy(&subsys->namespaces);
1540*4882a593Smuzhiyun 	nvmet_passthru_subsys_free(subsys);
1541*4882a593Smuzhiyun 
1542*4882a593Smuzhiyun 	kfree(subsys->subsysnqn);
1543*4882a593Smuzhiyun 	kfree_rcu(subsys->model, rcuhead);
1544*4882a593Smuzhiyun 	kfree(subsys);
1545*4882a593Smuzhiyun }
1546*4882a593Smuzhiyun 
nvmet_subsys_del_ctrls(struct nvmet_subsys * subsys)1547*4882a593Smuzhiyun void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys)
1548*4882a593Smuzhiyun {
1549*4882a593Smuzhiyun 	struct nvmet_ctrl *ctrl;
1550*4882a593Smuzhiyun 
1551*4882a593Smuzhiyun 	mutex_lock(&subsys->lock);
1552*4882a593Smuzhiyun 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
1553*4882a593Smuzhiyun 		ctrl->ops->delete_ctrl(ctrl);
1554*4882a593Smuzhiyun 	mutex_unlock(&subsys->lock);
1555*4882a593Smuzhiyun }
1556*4882a593Smuzhiyun 
nvmet_subsys_put(struct nvmet_subsys * subsys)1557*4882a593Smuzhiyun void nvmet_subsys_put(struct nvmet_subsys *subsys)
1558*4882a593Smuzhiyun {
1559*4882a593Smuzhiyun 	kref_put(&subsys->ref, nvmet_subsys_free);
1560*4882a593Smuzhiyun }
1561*4882a593Smuzhiyun 
nvmet_init(void)1562*4882a593Smuzhiyun static int __init nvmet_init(void)
1563*4882a593Smuzhiyun {
1564*4882a593Smuzhiyun 	int error;
1565*4882a593Smuzhiyun 
1566*4882a593Smuzhiyun 	nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
1567*4882a593Smuzhiyun 
1568*4882a593Smuzhiyun 	buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
1569*4882a593Smuzhiyun 			WQ_MEM_RECLAIM, 0);
1570*4882a593Smuzhiyun 	if (!buffered_io_wq) {
1571*4882a593Smuzhiyun 		error = -ENOMEM;
1572*4882a593Smuzhiyun 		goto out;
1573*4882a593Smuzhiyun 	}
1574*4882a593Smuzhiyun 
1575*4882a593Smuzhiyun 	error = nvmet_init_discovery();
1576*4882a593Smuzhiyun 	if (error)
1577*4882a593Smuzhiyun 		goto out_free_work_queue;
1578*4882a593Smuzhiyun 
1579*4882a593Smuzhiyun 	error = nvmet_init_configfs();
1580*4882a593Smuzhiyun 	if (error)
1581*4882a593Smuzhiyun 		goto out_exit_discovery;
1582*4882a593Smuzhiyun 	return 0;
1583*4882a593Smuzhiyun 
1584*4882a593Smuzhiyun out_exit_discovery:
1585*4882a593Smuzhiyun 	nvmet_exit_discovery();
1586*4882a593Smuzhiyun out_free_work_queue:
1587*4882a593Smuzhiyun 	destroy_workqueue(buffered_io_wq);
1588*4882a593Smuzhiyun out:
1589*4882a593Smuzhiyun 	return error;
1590*4882a593Smuzhiyun }
1591*4882a593Smuzhiyun 
nvmet_exit(void)1592*4882a593Smuzhiyun static void __exit nvmet_exit(void)
1593*4882a593Smuzhiyun {
1594*4882a593Smuzhiyun 	nvmet_exit_configfs();
1595*4882a593Smuzhiyun 	nvmet_exit_discovery();
1596*4882a593Smuzhiyun 	ida_destroy(&cntlid_ida);
1597*4882a593Smuzhiyun 	destroy_workqueue(buffered_io_wq);
1598*4882a593Smuzhiyun 
1599*4882a593Smuzhiyun 	BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
1600*4882a593Smuzhiyun 	BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
1601*4882a593Smuzhiyun }
1602*4882a593Smuzhiyun 
1603*4882a593Smuzhiyun module_init(nvmet_init);
1604*4882a593Smuzhiyun module_exit(nvmet_exit);
1605*4882a593Smuzhiyun 
1606*4882a593Smuzhiyun MODULE_LICENSE("GPL v2");
1607*4882a593Smuzhiyun MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver);
1608