1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * scsi_error.c Copyright (C) 1997 Eric Youngdale
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * SCSI error/timeout handling
6*4882a593Smuzhiyun * Initial versions: Eric Youngdale. Based upon conversations with
7*4882a593Smuzhiyun * Leonard Zubkoff and David Miller at Linux Expo,
8*4882a593Smuzhiyun * ideas originating from all over the place.
9*4882a593Smuzhiyun *
10*4882a593Smuzhiyun * Restructured scsi_unjam_host and associated functions.
11*4882a593Smuzhiyun * September 04, 2002 Mike Anderson (andmike@us.ibm.com)
12*4882a593Smuzhiyun *
13*4882a593Smuzhiyun * Forward port of Russell King's (rmk@arm.linux.org.uk) changes and
14*4882a593Smuzhiyun * minor cleanups.
15*4882a593Smuzhiyun * September 30, 2002 Mike Anderson (andmike@us.ibm.com)
16*4882a593Smuzhiyun */
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun #include <linux/module.h>
19*4882a593Smuzhiyun #include <linux/sched.h>
20*4882a593Smuzhiyun #include <linux/gfp.h>
21*4882a593Smuzhiyun #include <linux/timer.h>
22*4882a593Smuzhiyun #include <linux/string.h>
23*4882a593Smuzhiyun #include <linux/kernel.h>
24*4882a593Smuzhiyun #include <linux/freezer.h>
25*4882a593Smuzhiyun #include <linux/kthread.h>
26*4882a593Smuzhiyun #include <linux/interrupt.h>
27*4882a593Smuzhiyun #include <linux/blkdev.h>
28*4882a593Smuzhiyun #include <linux/delay.h>
29*4882a593Smuzhiyun #include <linux/jiffies.h>
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun #include <scsi/scsi.h>
32*4882a593Smuzhiyun #include <scsi/scsi_cmnd.h>
33*4882a593Smuzhiyun #include <scsi/scsi_dbg.h>
34*4882a593Smuzhiyun #include <scsi/scsi_device.h>
35*4882a593Smuzhiyun #include <scsi/scsi_driver.h>
36*4882a593Smuzhiyun #include <scsi/scsi_eh.h>
37*4882a593Smuzhiyun #include <scsi/scsi_common.h>
38*4882a593Smuzhiyun #include <scsi/scsi_transport.h>
39*4882a593Smuzhiyun #include <scsi/scsi_host.h>
40*4882a593Smuzhiyun #include <scsi/scsi_ioctl.h>
41*4882a593Smuzhiyun #include <scsi/scsi_dh.h>
42*4882a593Smuzhiyun #include <scsi/scsi_devinfo.h>
43*4882a593Smuzhiyun #include <scsi/sg.h>
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun #include "scsi_priv.h"
46*4882a593Smuzhiyun #include "scsi_logging.h"
47*4882a593Smuzhiyun #include "scsi_transport_api.h"
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun #include <trace/events/scsi.h>
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun #include <asm/unaligned.h>
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun static void scsi_eh_done(struct scsi_cmnd *scmd);
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun /*
56*4882a593Smuzhiyun * These should *probably* be handled by the host itself.
57*4882a593Smuzhiyun * Since it is allowed to sleep, it probably should.
58*4882a593Smuzhiyun */
59*4882a593Smuzhiyun #define BUS_RESET_SETTLE_TIME (10)
60*4882a593Smuzhiyun #define HOST_RESET_SETTLE_TIME (10)
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun static int scsi_eh_try_stu(struct scsi_cmnd *scmd);
63*4882a593Smuzhiyun static int scsi_try_to_abort_cmd(struct scsi_host_template *,
64*4882a593Smuzhiyun struct scsi_cmnd *);
65*4882a593Smuzhiyun
scsi_eh_wakeup(struct Scsi_Host * shost)66*4882a593Smuzhiyun void scsi_eh_wakeup(struct Scsi_Host *shost)
67*4882a593Smuzhiyun {
68*4882a593Smuzhiyun lockdep_assert_held(shost->host_lock);
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun if (scsi_host_busy(shost) == shost->host_failed) {
71*4882a593Smuzhiyun trace_scsi_eh_wakeup(shost);
72*4882a593Smuzhiyun wake_up_process(shost->ehandler);
73*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(5, shost_printk(KERN_INFO, shost,
74*4882a593Smuzhiyun "Waking error handler thread\n"));
75*4882a593Smuzhiyun }
76*4882a593Smuzhiyun }
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun /**
79*4882a593Smuzhiyun * scsi_schedule_eh - schedule EH for SCSI host
80*4882a593Smuzhiyun * @shost: SCSI host to invoke error handling on.
81*4882a593Smuzhiyun *
82*4882a593Smuzhiyun * Schedule SCSI EH without scmd.
83*4882a593Smuzhiyun */
scsi_schedule_eh(struct Scsi_Host * shost)84*4882a593Smuzhiyun void scsi_schedule_eh(struct Scsi_Host *shost)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun unsigned long flags;
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun spin_lock_irqsave(shost->host_lock, flags);
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 ||
91*4882a593Smuzhiyun scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) {
92*4882a593Smuzhiyun shost->host_eh_scheduled++;
93*4882a593Smuzhiyun scsi_eh_wakeup(shost);
94*4882a593Smuzhiyun }
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun spin_unlock_irqrestore(shost->host_lock, flags);
97*4882a593Smuzhiyun }
98*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(scsi_schedule_eh);
99*4882a593Smuzhiyun
scsi_host_eh_past_deadline(struct Scsi_Host * shost)100*4882a593Smuzhiyun static int scsi_host_eh_past_deadline(struct Scsi_Host *shost)
101*4882a593Smuzhiyun {
102*4882a593Smuzhiyun if (!shost->last_reset || shost->eh_deadline == -1)
103*4882a593Smuzhiyun return 0;
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun /*
106*4882a593Smuzhiyun * 32bit accesses are guaranteed to be atomic
107*4882a593Smuzhiyun * (on all supported architectures), so instead
108*4882a593Smuzhiyun * of using a spinlock we can as well double check
109*4882a593Smuzhiyun * if eh_deadline has been set to 'off' during the
110*4882a593Smuzhiyun * time_before call.
111*4882a593Smuzhiyun */
112*4882a593Smuzhiyun if (time_before(jiffies, shost->last_reset + shost->eh_deadline) &&
113*4882a593Smuzhiyun shost->eh_deadline > -1)
114*4882a593Smuzhiyun return 0;
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun return 1;
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun
scsi_cmd_retry_allowed(struct scsi_cmnd * cmd)119*4882a593Smuzhiyun static bool scsi_cmd_retry_allowed(struct scsi_cmnd *cmd)
120*4882a593Smuzhiyun {
121*4882a593Smuzhiyun if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
122*4882a593Smuzhiyun return true;
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun return ++cmd->retries <= cmd->allowed;
125*4882a593Smuzhiyun }
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun /**
128*4882a593Smuzhiyun * scmd_eh_abort_handler - Handle command aborts
129*4882a593Smuzhiyun * @work: command to be aborted.
130*4882a593Smuzhiyun *
131*4882a593Smuzhiyun * Note: this function must be called only for a command that has timed out.
132*4882a593Smuzhiyun * Because the block layer marks a request as complete before it calls
133*4882a593Smuzhiyun * scsi_times_out(), a .scsi_done() call from the LLD for a command that has
134*4882a593Smuzhiyun * timed out do not have any effect. Hence it is safe to call
135*4882a593Smuzhiyun * scsi_finish_command() from this function.
136*4882a593Smuzhiyun */
137*4882a593Smuzhiyun void
scmd_eh_abort_handler(struct work_struct * work)138*4882a593Smuzhiyun scmd_eh_abort_handler(struct work_struct *work)
139*4882a593Smuzhiyun {
140*4882a593Smuzhiyun struct scsi_cmnd *scmd =
141*4882a593Smuzhiyun container_of(work, struct scsi_cmnd, abort_work.work);
142*4882a593Smuzhiyun struct scsi_device *sdev = scmd->device;
143*4882a593Smuzhiyun int rtn;
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun if (scsi_host_eh_past_deadline(sdev->host)) {
146*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
147*4882a593Smuzhiyun scmd_printk(KERN_INFO, scmd,
148*4882a593Smuzhiyun "eh timeout, not aborting\n"));
149*4882a593Smuzhiyun } else {
150*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
151*4882a593Smuzhiyun scmd_printk(KERN_INFO, scmd,
152*4882a593Smuzhiyun "aborting command\n"));
153*4882a593Smuzhiyun rtn = scsi_try_to_abort_cmd(sdev->host->hostt, scmd);
154*4882a593Smuzhiyun if (rtn == SUCCESS) {
155*4882a593Smuzhiyun set_host_byte(scmd, DID_TIME_OUT);
156*4882a593Smuzhiyun if (scsi_host_eh_past_deadline(sdev->host)) {
157*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
158*4882a593Smuzhiyun scmd_printk(KERN_INFO, scmd,
159*4882a593Smuzhiyun "eh timeout, not retrying "
160*4882a593Smuzhiyun "aborted command\n"));
161*4882a593Smuzhiyun } else if (!scsi_noretry_cmd(scmd) &&
162*4882a593Smuzhiyun scsi_cmd_retry_allowed(scmd)) {
163*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
164*4882a593Smuzhiyun scmd_printk(KERN_WARNING, scmd,
165*4882a593Smuzhiyun "retry aborted command\n"));
166*4882a593Smuzhiyun scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
167*4882a593Smuzhiyun return;
168*4882a593Smuzhiyun } else {
169*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
170*4882a593Smuzhiyun scmd_printk(KERN_WARNING, scmd,
171*4882a593Smuzhiyun "finish aborted command\n"));
172*4882a593Smuzhiyun scsi_finish_command(scmd);
173*4882a593Smuzhiyun return;
174*4882a593Smuzhiyun }
175*4882a593Smuzhiyun } else {
176*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
177*4882a593Smuzhiyun scmd_printk(KERN_INFO, scmd,
178*4882a593Smuzhiyun "cmd abort %s\n",
179*4882a593Smuzhiyun (rtn == FAST_IO_FAIL) ?
180*4882a593Smuzhiyun "not send" : "failed"));
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun }
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun scsi_eh_scmd_add(scmd);
185*4882a593Smuzhiyun }
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun /**
188*4882a593Smuzhiyun * scsi_abort_command - schedule a command abort
189*4882a593Smuzhiyun * @scmd: scmd to abort.
190*4882a593Smuzhiyun *
191*4882a593Smuzhiyun * We only need to abort commands after a command timeout
192*4882a593Smuzhiyun */
193*4882a593Smuzhiyun static int
scsi_abort_command(struct scsi_cmnd * scmd)194*4882a593Smuzhiyun scsi_abort_command(struct scsi_cmnd *scmd)
195*4882a593Smuzhiyun {
196*4882a593Smuzhiyun struct scsi_device *sdev = scmd->device;
197*4882a593Smuzhiyun struct Scsi_Host *shost = sdev->host;
198*4882a593Smuzhiyun unsigned long flags;
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) {
201*4882a593Smuzhiyun /*
202*4882a593Smuzhiyun * Retry after abort failed, escalate to next level.
203*4882a593Smuzhiyun */
204*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
205*4882a593Smuzhiyun scmd_printk(KERN_INFO, scmd,
206*4882a593Smuzhiyun "previous abort failed\n"));
207*4882a593Smuzhiyun BUG_ON(delayed_work_pending(&scmd->abort_work));
208*4882a593Smuzhiyun return FAILED;
209*4882a593Smuzhiyun }
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun spin_lock_irqsave(shost->host_lock, flags);
212*4882a593Smuzhiyun if (shost->eh_deadline != -1 && !shost->last_reset)
213*4882a593Smuzhiyun shost->last_reset = jiffies;
214*4882a593Smuzhiyun spin_unlock_irqrestore(shost->host_lock, flags);
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun scmd->eh_eflags |= SCSI_EH_ABORT_SCHEDULED;
217*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
218*4882a593Smuzhiyun scmd_printk(KERN_INFO, scmd, "abort scheduled\n"));
219*4882a593Smuzhiyun queue_delayed_work(shost->tmf_work_q, &scmd->abort_work, HZ / 100);
220*4882a593Smuzhiyun return SUCCESS;
221*4882a593Smuzhiyun }
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun /**
224*4882a593Smuzhiyun * scsi_eh_reset - call into ->eh_action to reset internal counters
225*4882a593Smuzhiyun * @scmd: scmd to run eh on.
226*4882a593Smuzhiyun *
227*4882a593Smuzhiyun * The scsi driver might be carrying internal state about the
228*4882a593Smuzhiyun * devices, so we need to call into the driver to reset the
229*4882a593Smuzhiyun * internal state once the error handler is started.
230*4882a593Smuzhiyun */
scsi_eh_reset(struct scsi_cmnd * scmd)231*4882a593Smuzhiyun static void scsi_eh_reset(struct scsi_cmnd *scmd)
232*4882a593Smuzhiyun {
233*4882a593Smuzhiyun if (!blk_rq_is_passthrough(scmd->request)) {
234*4882a593Smuzhiyun struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd);
235*4882a593Smuzhiyun if (sdrv->eh_reset)
236*4882a593Smuzhiyun sdrv->eh_reset(scmd);
237*4882a593Smuzhiyun }
238*4882a593Smuzhiyun }
239*4882a593Smuzhiyun
scsi_eh_inc_host_failed(struct rcu_head * head)240*4882a593Smuzhiyun static void scsi_eh_inc_host_failed(struct rcu_head *head)
241*4882a593Smuzhiyun {
242*4882a593Smuzhiyun struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu);
243*4882a593Smuzhiyun struct Scsi_Host *shost = scmd->device->host;
244*4882a593Smuzhiyun unsigned long flags;
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun spin_lock_irqsave(shost->host_lock, flags);
247*4882a593Smuzhiyun shost->host_failed++;
248*4882a593Smuzhiyun scsi_eh_wakeup(shost);
249*4882a593Smuzhiyun spin_unlock_irqrestore(shost->host_lock, flags);
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun
252*4882a593Smuzhiyun /**
253*4882a593Smuzhiyun * scsi_eh_scmd_add - add scsi cmd to error handling.
254*4882a593Smuzhiyun * @scmd: scmd to run eh on.
255*4882a593Smuzhiyun */
scsi_eh_scmd_add(struct scsi_cmnd * scmd)256*4882a593Smuzhiyun void scsi_eh_scmd_add(struct scsi_cmnd *scmd)
257*4882a593Smuzhiyun {
258*4882a593Smuzhiyun struct Scsi_Host *shost = scmd->device->host;
259*4882a593Smuzhiyun unsigned long flags;
260*4882a593Smuzhiyun int ret;
261*4882a593Smuzhiyun
262*4882a593Smuzhiyun WARN_ON_ONCE(!shost->ehandler);
263*4882a593Smuzhiyun
264*4882a593Smuzhiyun spin_lock_irqsave(shost->host_lock, flags);
265*4882a593Smuzhiyun if (scsi_host_set_state(shost, SHOST_RECOVERY)) {
266*4882a593Smuzhiyun ret = scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY);
267*4882a593Smuzhiyun WARN_ON_ONCE(ret);
268*4882a593Smuzhiyun }
269*4882a593Smuzhiyun if (shost->eh_deadline != -1 && !shost->last_reset)
270*4882a593Smuzhiyun shost->last_reset = jiffies;
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun scsi_eh_reset(scmd);
273*4882a593Smuzhiyun list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
274*4882a593Smuzhiyun spin_unlock_irqrestore(shost->host_lock, flags);
275*4882a593Smuzhiyun /*
276*4882a593Smuzhiyun * Ensure that all tasks observe the host state change before the
277*4882a593Smuzhiyun * host_failed change.
278*4882a593Smuzhiyun */
279*4882a593Smuzhiyun call_rcu(&scmd->rcu, scsi_eh_inc_host_failed);
280*4882a593Smuzhiyun }
281*4882a593Smuzhiyun
282*4882a593Smuzhiyun /**
283*4882a593Smuzhiyun * scsi_times_out - Timeout function for normal scsi commands.
284*4882a593Smuzhiyun * @req: request that is timing out.
285*4882a593Smuzhiyun *
286*4882a593Smuzhiyun * Notes:
287*4882a593Smuzhiyun * We do not need to lock this. There is the potential for a race
288*4882a593Smuzhiyun * only in that the normal completion handling might run, but if the
289*4882a593Smuzhiyun * normal completion function determines that the timer has already
290*4882a593Smuzhiyun * fired, then it mustn't do anything.
291*4882a593Smuzhiyun */
scsi_times_out(struct request * req)292*4882a593Smuzhiyun enum blk_eh_timer_return scsi_times_out(struct request *req)
293*4882a593Smuzhiyun {
294*4882a593Smuzhiyun struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
295*4882a593Smuzhiyun enum blk_eh_timer_return rtn = BLK_EH_DONE;
296*4882a593Smuzhiyun struct Scsi_Host *host = scmd->device->host;
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun trace_scsi_dispatch_cmd_timeout(scmd);
299*4882a593Smuzhiyun scsi_log_completion(scmd, TIMEOUT_ERROR);
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun if (host->eh_deadline != -1 && !host->last_reset)
302*4882a593Smuzhiyun host->last_reset = jiffies;
303*4882a593Smuzhiyun
304*4882a593Smuzhiyun if (host->hostt->eh_timed_out)
305*4882a593Smuzhiyun rtn = host->hostt->eh_timed_out(scmd);
306*4882a593Smuzhiyun
307*4882a593Smuzhiyun if (rtn == BLK_EH_DONE) {
308*4882a593Smuzhiyun /*
309*4882a593Smuzhiyun * Set the command to complete first in order to prevent a real
310*4882a593Smuzhiyun * completion from releasing the command while error handling
311*4882a593Smuzhiyun * is using it. If the command was already completed, then the
312*4882a593Smuzhiyun * lower level driver beat the timeout handler, and it is safe
313*4882a593Smuzhiyun * to return without escalating error recovery.
314*4882a593Smuzhiyun *
315*4882a593Smuzhiyun * If timeout handling lost the race to a real completion, the
316*4882a593Smuzhiyun * block layer may ignore that due to a fake timeout injection,
317*4882a593Smuzhiyun * so return RESET_TIMER to allow error handling another shot
318*4882a593Smuzhiyun * at this command.
319*4882a593Smuzhiyun */
320*4882a593Smuzhiyun if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state))
321*4882a593Smuzhiyun return BLK_EH_RESET_TIMER;
322*4882a593Smuzhiyun if (scsi_abort_command(scmd) != SUCCESS) {
323*4882a593Smuzhiyun set_host_byte(scmd, DID_TIME_OUT);
324*4882a593Smuzhiyun scsi_eh_scmd_add(scmd);
325*4882a593Smuzhiyun }
326*4882a593Smuzhiyun }
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun return rtn;
329*4882a593Smuzhiyun }
330*4882a593Smuzhiyun
331*4882a593Smuzhiyun /**
332*4882a593Smuzhiyun * scsi_block_when_processing_errors - Prevent cmds from being queued.
333*4882a593Smuzhiyun * @sdev: Device on which we are performing recovery.
334*4882a593Smuzhiyun *
335*4882a593Smuzhiyun * Description:
336*4882a593Smuzhiyun * We block until the host is out of error recovery, and then check to
337*4882a593Smuzhiyun * see whether the host or the device is offline.
338*4882a593Smuzhiyun *
339*4882a593Smuzhiyun * Return value:
340*4882a593Smuzhiyun * 0 when dev was taken offline by error recovery. 1 OK to proceed.
341*4882a593Smuzhiyun */
scsi_block_when_processing_errors(struct scsi_device * sdev)342*4882a593Smuzhiyun int scsi_block_when_processing_errors(struct scsi_device *sdev)
343*4882a593Smuzhiyun {
344*4882a593Smuzhiyun int online;
345*4882a593Smuzhiyun
346*4882a593Smuzhiyun wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host));
347*4882a593Smuzhiyun
348*4882a593Smuzhiyun online = scsi_device_online(sdev);
349*4882a593Smuzhiyun
350*4882a593Smuzhiyun return online;
351*4882a593Smuzhiyun }
352*4882a593Smuzhiyun EXPORT_SYMBOL(scsi_block_when_processing_errors);
353*4882a593Smuzhiyun
354*4882a593Smuzhiyun #ifdef CONFIG_SCSI_LOGGING
355*4882a593Smuzhiyun /**
356*4882a593Smuzhiyun * scsi_eh_prt_fail_stats - Log info on failures.
357*4882a593Smuzhiyun * @shost: scsi host being recovered.
358*4882a593Smuzhiyun * @work_q: Queue of scsi cmds to process.
359*4882a593Smuzhiyun */
scsi_eh_prt_fail_stats(struct Scsi_Host * shost,struct list_head * work_q)360*4882a593Smuzhiyun static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost,
361*4882a593Smuzhiyun struct list_head *work_q)
362*4882a593Smuzhiyun {
363*4882a593Smuzhiyun struct scsi_cmnd *scmd;
364*4882a593Smuzhiyun struct scsi_device *sdev;
365*4882a593Smuzhiyun int total_failures = 0;
366*4882a593Smuzhiyun int cmd_failed = 0;
367*4882a593Smuzhiyun int cmd_cancel = 0;
368*4882a593Smuzhiyun int devices_failed = 0;
369*4882a593Smuzhiyun
370*4882a593Smuzhiyun shost_for_each_device(sdev, shost) {
371*4882a593Smuzhiyun list_for_each_entry(scmd, work_q, eh_entry) {
372*4882a593Smuzhiyun if (scmd->device == sdev) {
373*4882a593Smuzhiyun ++total_failures;
374*4882a593Smuzhiyun if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED)
375*4882a593Smuzhiyun ++cmd_cancel;
376*4882a593Smuzhiyun else
377*4882a593Smuzhiyun ++cmd_failed;
378*4882a593Smuzhiyun }
379*4882a593Smuzhiyun }
380*4882a593Smuzhiyun
381*4882a593Smuzhiyun if (cmd_cancel || cmd_failed) {
382*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
383*4882a593Smuzhiyun shost_printk(KERN_INFO, shost,
384*4882a593Smuzhiyun "%s: cmds failed: %d, cancel: %d\n",
385*4882a593Smuzhiyun __func__, cmd_failed,
386*4882a593Smuzhiyun cmd_cancel));
387*4882a593Smuzhiyun cmd_cancel = 0;
388*4882a593Smuzhiyun cmd_failed = 0;
389*4882a593Smuzhiyun ++devices_failed;
390*4882a593Smuzhiyun }
391*4882a593Smuzhiyun }
392*4882a593Smuzhiyun
393*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(2, shost_printk(KERN_INFO, shost,
394*4882a593Smuzhiyun "Total of %d commands on %d"
395*4882a593Smuzhiyun " devices require eh work\n",
396*4882a593Smuzhiyun total_failures, devices_failed));
397*4882a593Smuzhiyun }
398*4882a593Smuzhiyun #endif
399*4882a593Smuzhiyun
400*4882a593Smuzhiyun /**
401*4882a593Smuzhiyun * scsi_report_lun_change - Set flag on all *other* devices on the same target
402*4882a593Smuzhiyun * to indicate that a UNIT ATTENTION is expected.
403*4882a593Smuzhiyun * @sdev: Device reporting the UNIT ATTENTION
404*4882a593Smuzhiyun */
scsi_report_lun_change(struct scsi_device * sdev)405*4882a593Smuzhiyun static void scsi_report_lun_change(struct scsi_device *sdev)
406*4882a593Smuzhiyun {
407*4882a593Smuzhiyun sdev->sdev_target->expecting_lun_change = 1;
408*4882a593Smuzhiyun }
409*4882a593Smuzhiyun
410*4882a593Smuzhiyun /**
411*4882a593Smuzhiyun * scsi_report_sense - Examine scsi sense information and log messages for
412*4882a593Smuzhiyun * certain conditions, also issue uevents for some of them.
413*4882a593Smuzhiyun * @sdev: Device reporting the sense code
414*4882a593Smuzhiyun * @sshdr: sshdr to be examined
415*4882a593Smuzhiyun */
scsi_report_sense(struct scsi_device * sdev,struct scsi_sense_hdr * sshdr)416*4882a593Smuzhiyun static void scsi_report_sense(struct scsi_device *sdev,
417*4882a593Smuzhiyun struct scsi_sense_hdr *sshdr)
418*4882a593Smuzhiyun {
419*4882a593Smuzhiyun enum scsi_device_event evt_type = SDEV_EVT_MAXBITS; /* i.e. none */
420*4882a593Smuzhiyun
421*4882a593Smuzhiyun if (sshdr->sense_key == UNIT_ATTENTION) {
422*4882a593Smuzhiyun if (sshdr->asc == 0x3f && sshdr->ascq == 0x03) {
423*4882a593Smuzhiyun evt_type = SDEV_EVT_INQUIRY_CHANGE_REPORTED;
424*4882a593Smuzhiyun sdev_printk(KERN_WARNING, sdev,
425*4882a593Smuzhiyun "Inquiry data has changed");
426*4882a593Smuzhiyun } else if (sshdr->asc == 0x3f && sshdr->ascq == 0x0e) {
427*4882a593Smuzhiyun evt_type = SDEV_EVT_LUN_CHANGE_REPORTED;
428*4882a593Smuzhiyun scsi_report_lun_change(sdev);
429*4882a593Smuzhiyun sdev_printk(KERN_WARNING, sdev,
430*4882a593Smuzhiyun "Warning! Received an indication that the "
431*4882a593Smuzhiyun "LUN assignments on this target have "
432*4882a593Smuzhiyun "changed. The Linux SCSI layer does not "
433*4882a593Smuzhiyun "automatically remap LUN assignments.\n");
434*4882a593Smuzhiyun } else if (sshdr->asc == 0x3f)
435*4882a593Smuzhiyun sdev_printk(KERN_WARNING, sdev,
436*4882a593Smuzhiyun "Warning! Received an indication that the "
437*4882a593Smuzhiyun "operating parameters on this target have "
438*4882a593Smuzhiyun "changed. The Linux SCSI layer does not "
439*4882a593Smuzhiyun "automatically adjust these parameters.\n");
440*4882a593Smuzhiyun
441*4882a593Smuzhiyun if (sshdr->asc == 0x38 && sshdr->ascq == 0x07) {
442*4882a593Smuzhiyun evt_type = SDEV_EVT_SOFT_THRESHOLD_REACHED_REPORTED;
443*4882a593Smuzhiyun sdev_printk(KERN_WARNING, sdev,
444*4882a593Smuzhiyun "Warning! Received an indication that the "
445*4882a593Smuzhiyun "LUN reached a thin provisioning soft "
446*4882a593Smuzhiyun "threshold.\n");
447*4882a593Smuzhiyun }
448*4882a593Smuzhiyun
449*4882a593Smuzhiyun if (sshdr->asc == 0x29) {
450*4882a593Smuzhiyun evt_type = SDEV_EVT_POWER_ON_RESET_OCCURRED;
451*4882a593Smuzhiyun sdev_printk(KERN_WARNING, sdev,
452*4882a593Smuzhiyun "Power-on or device reset occurred\n");
453*4882a593Smuzhiyun }
454*4882a593Smuzhiyun
455*4882a593Smuzhiyun if (sshdr->asc == 0x2a && sshdr->ascq == 0x01) {
456*4882a593Smuzhiyun evt_type = SDEV_EVT_MODE_PARAMETER_CHANGE_REPORTED;
457*4882a593Smuzhiyun sdev_printk(KERN_WARNING, sdev,
458*4882a593Smuzhiyun "Mode parameters changed");
459*4882a593Smuzhiyun } else if (sshdr->asc == 0x2a && sshdr->ascq == 0x06) {
460*4882a593Smuzhiyun evt_type = SDEV_EVT_ALUA_STATE_CHANGE_REPORTED;
461*4882a593Smuzhiyun sdev_printk(KERN_WARNING, sdev,
462*4882a593Smuzhiyun "Asymmetric access state changed");
463*4882a593Smuzhiyun } else if (sshdr->asc == 0x2a && sshdr->ascq == 0x09) {
464*4882a593Smuzhiyun evt_type = SDEV_EVT_CAPACITY_CHANGE_REPORTED;
465*4882a593Smuzhiyun sdev_printk(KERN_WARNING, sdev,
466*4882a593Smuzhiyun "Capacity data has changed");
467*4882a593Smuzhiyun } else if (sshdr->asc == 0x2a)
468*4882a593Smuzhiyun sdev_printk(KERN_WARNING, sdev,
469*4882a593Smuzhiyun "Parameters changed");
470*4882a593Smuzhiyun }
471*4882a593Smuzhiyun
472*4882a593Smuzhiyun if (evt_type != SDEV_EVT_MAXBITS) {
473*4882a593Smuzhiyun set_bit(evt_type, sdev->pending_events);
474*4882a593Smuzhiyun schedule_work(&sdev->event_work);
475*4882a593Smuzhiyun }
476*4882a593Smuzhiyun }
477*4882a593Smuzhiyun
478*4882a593Smuzhiyun /**
479*4882a593Smuzhiyun * scsi_check_sense - Examine scsi cmd sense
480*4882a593Smuzhiyun * @scmd: Cmd to have sense checked.
481*4882a593Smuzhiyun *
482*4882a593Smuzhiyun * Return value:
483*4882a593Smuzhiyun * SUCCESS or FAILED or NEEDS_RETRY or ADD_TO_MLQUEUE
484*4882a593Smuzhiyun *
485*4882a593Smuzhiyun * Notes:
486*4882a593Smuzhiyun * When a deferred error is detected the current command has
487*4882a593Smuzhiyun * not been executed and needs retrying.
488*4882a593Smuzhiyun */
scsi_check_sense(struct scsi_cmnd * scmd)489*4882a593Smuzhiyun int scsi_check_sense(struct scsi_cmnd *scmd)
490*4882a593Smuzhiyun {
491*4882a593Smuzhiyun struct scsi_device *sdev = scmd->device;
492*4882a593Smuzhiyun struct scsi_sense_hdr sshdr;
493*4882a593Smuzhiyun
494*4882a593Smuzhiyun if (! scsi_command_normalize_sense(scmd, &sshdr))
495*4882a593Smuzhiyun return FAILED; /* no valid sense data */
496*4882a593Smuzhiyun
497*4882a593Smuzhiyun scsi_report_sense(sdev, &sshdr);
498*4882a593Smuzhiyun
499*4882a593Smuzhiyun if (scsi_sense_is_deferred(&sshdr))
500*4882a593Smuzhiyun return NEEDS_RETRY;
501*4882a593Smuzhiyun
502*4882a593Smuzhiyun if (sdev->handler && sdev->handler->check_sense) {
503*4882a593Smuzhiyun int rc;
504*4882a593Smuzhiyun
505*4882a593Smuzhiyun rc = sdev->handler->check_sense(sdev, &sshdr);
506*4882a593Smuzhiyun if (rc != SCSI_RETURN_NOT_HANDLED)
507*4882a593Smuzhiyun return rc;
508*4882a593Smuzhiyun /* handler does not care. Drop down to default handling */
509*4882a593Smuzhiyun }
510*4882a593Smuzhiyun
511*4882a593Smuzhiyun if (scmd->cmnd[0] == TEST_UNIT_READY && scmd->scsi_done != scsi_eh_done)
512*4882a593Smuzhiyun /*
513*4882a593Smuzhiyun * nasty: for mid-layer issued TURs, we need to return the
514*4882a593Smuzhiyun * actual sense data without any recovery attempt. For eh
515*4882a593Smuzhiyun * issued ones, we need to try to recover and interpret
516*4882a593Smuzhiyun */
517*4882a593Smuzhiyun return SUCCESS;
518*4882a593Smuzhiyun
519*4882a593Smuzhiyun /*
520*4882a593Smuzhiyun * Previous logic looked for FILEMARK, EOM or ILI which are
521*4882a593Smuzhiyun * mainly associated with tapes and returned SUCCESS.
522*4882a593Smuzhiyun */
523*4882a593Smuzhiyun if (sshdr.response_code == 0x70) {
524*4882a593Smuzhiyun /* fixed format */
525*4882a593Smuzhiyun if (scmd->sense_buffer[2] & 0xe0)
526*4882a593Smuzhiyun return SUCCESS;
527*4882a593Smuzhiyun } else {
528*4882a593Smuzhiyun /*
529*4882a593Smuzhiyun * descriptor format: look for "stream commands sense data
530*4882a593Smuzhiyun * descriptor" (see SSC-3). Assume single sense data
531*4882a593Smuzhiyun * descriptor. Ignore ILI from SBC-2 READ LONG and WRITE LONG.
532*4882a593Smuzhiyun */
533*4882a593Smuzhiyun if ((sshdr.additional_length > 3) &&
534*4882a593Smuzhiyun (scmd->sense_buffer[8] == 0x4) &&
535*4882a593Smuzhiyun (scmd->sense_buffer[11] & 0xe0))
536*4882a593Smuzhiyun return SUCCESS;
537*4882a593Smuzhiyun }
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun switch (sshdr.sense_key) {
540*4882a593Smuzhiyun case NO_SENSE:
541*4882a593Smuzhiyun return SUCCESS;
542*4882a593Smuzhiyun case RECOVERED_ERROR:
543*4882a593Smuzhiyun return /* soft_error */ SUCCESS;
544*4882a593Smuzhiyun
545*4882a593Smuzhiyun case ABORTED_COMMAND:
546*4882a593Smuzhiyun if (sshdr.asc == 0x10) /* DIF */
547*4882a593Smuzhiyun return SUCCESS;
548*4882a593Smuzhiyun
549*4882a593Smuzhiyun if (sshdr.asc == 0x44 && sdev->sdev_bflags & BLIST_RETRY_ITF)
550*4882a593Smuzhiyun return ADD_TO_MLQUEUE;
551*4882a593Smuzhiyun if (sshdr.asc == 0xc1 && sshdr.ascq == 0x01 &&
552*4882a593Smuzhiyun sdev->sdev_bflags & BLIST_RETRY_ASC_C1)
553*4882a593Smuzhiyun return ADD_TO_MLQUEUE;
554*4882a593Smuzhiyun
555*4882a593Smuzhiyun return NEEDS_RETRY;
556*4882a593Smuzhiyun case NOT_READY:
557*4882a593Smuzhiyun case UNIT_ATTENTION:
558*4882a593Smuzhiyun /*
559*4882a593Smuzhiyun * if we are expecting a cc/ua because of a bus reset that we
560*4882a593Smuzhiyun * performed, treat this just as a retry. otherwise this is
561*4882a593Smuzhiyun * information that we should pass up to the upper-level driver
562*4882a593Smuzhiyun * so that we can deal with it there.
563*4882a593Smuzhiyun */
564*4882a593Smuzhiyun if (scmd->device->expecting_cc_ua) {
565*4882a593Smuzhiyun /*
566*4882a593Smuzhiyun * Because some device does not queue unit
567*4882a593Smuzhiyun * attentions correctly, we carefully check
568*4882a593Smuzhiyun * additional sense code and qualifier so as
569*4882a593Smuzhiyun * not to squash media change unit attention.
570*4882a593Smuzhiyun */
571*4882a593Smuzhiyun if (sshdr.asc != 0x28 || sshdr.ascq != 0x00) {
572*4882a593Smuzhiyun scmd->device->expecting_cc_ua = 0;
573*4882a593Smuzhiyun return NEEDS_RETRY;
574*4882a593Smuzhiyun }
575*4882a593Smuzhiyun }
576*4882a593Smuzhiyun /*
577*4882a593Smuzhiyun * we might also expect a cc/ua if another LUN on the target
578*4882a593Smuzhiyun * reported a UA with an ASC/ASCQ of 3F 0E -
579*4882a593Smuzhiyun * REPORTED LUNS DATA HAS CHANGED.
580*4882a593Smuzhiyun */
581*4882a593Smuzhiyun if (scmd->device->sdev_target->expecting_lun_change &&
582*4882a593Smuzhiyun sshdr.asc == 0x3f && sshdr.ascq == 0x0e)
583*4882a593Smuzhiyun return NEEDS_RETRY;
584*4882a593Smuzhiyun /*
585*4882a593Smuzhiyun * if the device is in the process of becoming ready, we
586*4882a593Smuzhiyun * should retry.
587*4882a593Smuzhiyun */
588*4882a593Smuzhiyun if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01))
589*4882a593Smuzhiyun return NEEDS_RETRY;
590*4882a593Smuzhiyun /*
591*4882a593Smuzhiyun * if the device is not started, we need to wake
592*4882a593Smuzhiyun * the error handler to start the motor
593*4882a593Smuzhiyun */
594*4882a593Smuzhiyun if (scmd->device->allow_restart &&
595*4882a593Smuzhiyun (sshdr.asc == 0x04) && (sshdr.ascq == 0x02))
596*4882a593Smuzhiyun return FAILED;
597*4882a593Smuzhiyun /*
598*4882a593Smuzhiyun * Pass the UA upwards for a determination in the completion
599*4882a593Smuzhiyun * functions.
600*4882a593Smuzhiyun */
601*4882a593Smuzhiyun return SUCCESS;
602*4882a593Smuzhiyun
603*4882a593Smuzhiyun /* these are not supported */
604*4882a593Smuzhiyun case DATA_PROTECT:
605*4882a593Smuzhiyun if (sshdr.asc == 0x27 && sshdr.ascq == 0x07) {
606*4882a593Smuzhiyun /* Thin provisioning hard threshold reached */
607*4882a593Smuzhiyun set_host_byte(scmd, DID_ALLOC_FAILURE);
608*4882a593Smuzhiyun return SUCCESS;
609*4882a593Smuzhiyun }
610*4882a593Smuzhiyun fallthrough;
611*4882a593Smuzhiyun case COPY_ABORTED:
612*4882a593Smuzhiyun case VOLUME_OVERFLOW:
613*4882a593Smuzhiyun case MISCOMPARE:
614*4882a593Smuzhiyun case BLANK_CHECK:
615*4882a593Smuzhiyun set_host_byte(scmd, DID_TARGET_FAILURE);
616*4882a593Smuzhiyun return SUCCESS;
617*4882a593Smuzhiyun
618*4882a593Smuzhiyun case MEDIUM_ERROR:
619*4882a593Smuzhiyun if (sshdr.asc == 0x11 || /* UNRECOVERED READ ERR */
620*4882a593Smuzhiyun sshdr.asc == 0x13 || /* AMNF DATA FIELD */
621*4882a593Smuzhiyun sshdr.asc == 0x14) { /* RECORD NOT FOUND */
622*4882a593Smuzhiyun set_host_byte(scmd, DID_MEDIUM_ERROR);
623*4882a593Smuzhiyun return SUCCESS;
624*4882a593Smuzhiyun }
625*4882a593Smuzhiyun return NEEDS_RETRY;
626*4882a593Smuzhiyun
627*4882a593Smuzhiyun case HARDWARE_ERROR:
628*4882a593Smuzhiyun if (scmd->device->retry_hwerror)
629*4882a593Smuzhiyun return ADD_TO_MLQUEUE;
630*4882a593Smuzhiyun else
631*4882a593Smuzhiyun set_host_byte(scmd, DID_TARGET_FAILURE);
632*4882a593Smuzhiyun fallthrough;
633*4882a593Smuzhiyun
634*4882a593Smuzhiyun case ILLEGAL_REQUEST:
635*4882a593Smuzhiyun if (sshdr.asc == 0x20 || /* Invalid command operation code */
636*4882a593Smuzhiyun sshdr.asc == 0x21 || /* Logical block address out of range */
637*4882a593Smuzhiyun sshdr.asc == 0x22 || /* Invalid function */
638*4882a593Smuzhiyun sshdr.asc == 0x24 || /* Invalid field in cdb */
639*4882a593Smuzhiyun sshdr.asc == 0x26 || /* Parameter value invalid */
640*4882a593Smuzhiyun sshdr.asc == 0x27) { /* Write protected */
641*4882a593Smuzhiyun set_host_byte(scmd, DID_TARGET_FAILURE);
642*4882a593Smuzhiyun }
643*4882a593Smuzhiyun return SUCCESS;
644*4882a593Smuzhiyun
645*4882a593Smuzhiyun default:
646*4882a593Smuzhiyun return SUCCESS;
647*4882a593Smuzhiyun }
648*4882a593Smuzhiyun }
649*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(scsi_check_sense);
650*4882a593Smuzhiyun
scsi_handle_queue_ramp_up(struct scsi_device * sdev)651*4882a593Smuzhiyun static void scsi_handle_queue_ramp_up(struct scsi_device *sdev)
652*4882a593Smuzhiyun {
653*4882a593Smuzhiyun struct scsi_host_template *sht = sdev->host->hostt;
654*4882a593Smuzhiyun struct scsi_device *tmp_sdev;
655*4882a593Smuzhiyun
656*4882a593Smuzhiyun if (!sht->track_queue_depth ||
657*4882a593Smuzhiyun sdev->queue_depth >= sdev->max_queue_depth)
658*4882a593Smuzhiyun return;
659*4882a593Smuzhiyun
660*4882a593Smuzhiyun if (time_before(jiffies,
661*4882a593Smuzhiyun sdev->last_queue_ramp_up + sdev->queue_ramp_up_period))
662*4882a593Smuzhiyun return;
663*4882a593Smuzhiyun
664*4882a593Smuzhiyun if (time_before(jiffies,
665*4882a593Smuzhiyun sdev->last_queue_full_time + sdev->queue_ramp_up_period))
666*4882a593Smuzhiyun return;
667*4882a593Smuzhiyun
668*4882a593Smuzhiyun /*
669*4882a593Smuzhiyun * Walk all devices of a target and do
670*4882a593Smuzhiyun * ramp up on them.
671*4882a593Smuzhiyun */
672*4882a593Smuzhiyun shost_for_each_device(tmp_sdev, sdev->host) {
673*4882a593Smuzhiyun if (tmp_sdev->channel != sdev->channel ||
674*4882a593Smuzhiyun tmp_sdev->id != sdev->id ||
675*4882a593Smuzhiyun tmp_sdev->queue_depth == sdev->max_queue_depth)
676*4882a593Smuzhiyun continue;
677*4882a593Smuzhiyun
678*4882a593Smuzhiyun scsi_change_queue_depth(tmp_sdev, tmp_sdev->queue_depth + 1);
679*4882a593Smuzhiyun sdev->last_queue_ramp_up = jiffies;
680*4882a593Smuzhiyun }
681*4882a593Smuzhiyun }
682*4882a593Smuzhiyun
scsi_handle_queue_full(struct scsi_device * sdev)683*4882a593Smuzhiyun static void scsi_handle_queue_full(struct scsi_device *sdev)
684*4882a593Smuzhiyun {
685*4882a593Smuzhiyun struct scsi_host_template *sht = sdev->host->hostt;
686*4882a593Smuzhiyun struct scsi_device *tmp_sdev;
687*4882a593Smuzhiyun
688*4882a593Smuzhiyun if (!sht->track_queue_depth)
689*4882a593Smuzhiyun return;
690*4882a593Smuzhiyun
691*4882a593Smuzhiyun shost_for_each_device(tmp_sdev, sdev->host) {
692*4882a593Smuzhiyun if (tmp_sdev->channel != sdev->channel ||
693*4882a593Smuzhiyun tmp_sdev->id != sdev->id)
694*4882a593Smuzhiyun continue;
695*4882a593Smuzhiyun /*
696*4882a593Smuzhiyun * We do not know the number of commands that were at
697*4882a593Smuzhiyun * the device when we got the queue full so we start
698*4882a593Smuzhiyun * from the highest possible value and work our way down.
699*4882a593Smuzhiyun */
700*4882a593Smuzhiyun scsi_track_queue_full(tmp_sdev, tmp_sdev->queue_depth - 1);
701*4882a593Smuzhiyun }
702*4882a593Smuzhiyun }
703*4882a593Smuzhiyun
704*4882a593Smuzhiyun /**
705*4882a593Smuzhiyun * scsi_eh_completed_normally - Disposition a eh cmd on return from LLD.
706*4882a593Smuzhiyun * @scmd: SCSI cmd to examine.
707*4882a593Smuzhiyun *
708*4882a593Smuzhiyun * Notes:
709*4882a593Smuzhiyun * This is *only* called when we are examining the status of commands
710*4882a593Smuzhiyun * queued during error recovery. the main difference here is that we
711*4882a593Smuzhiyun * don't allow for the possibility of retries here, and we are a lot
712*4882a593Smuzhiyun * more restrictive about what we consider acceptable.
713*4882a593Smuzhiyun */
scsi_eh_completed_normally(struct scsi_cmnd * scmd)714*4882a593Smuzhiyun static int scsi_eh_completed_normally(struct scsi_cmnd *scmd)
715*4882a593Smuzhiyun {
716*4882a593Smuzhiyun /*
717*4882a593Smuzhiyun * first check the host byte, to see if there is anything in there
718*4882a593Smuzhiyun * that would indicate what we need to do.
719*4882a593Smuzhiyun */
720*4882a593Smuzhiyun if (host_byte(scmd->result) == DID_RESET) {
721*4882a593Smuzhiyun /*
722*4882a593Smuzhiyun * rats. we are already in the error handler, so we now
723*4882a593Smuzhiyun * get to try and figure out what to do next. if the sense
724*4882a593Smuzhiyun * is valid, we have a pretty good idea of what to do.
725*4882a593Smuzhiyun * if not, we mark it as FAILED.
726*4882a593Smuzhiyun */
727*4882a593Smuzhiyun return scsi_check_sense(scmd);
728*4882a593Smuzhiyun }
729*4882a593Smuzhiyun if (host_byte(scmd->result) != DID_OK)
730*4882a593Smuzhiyun return FAILED;
731*4882a593Smuzhiyun
732*4882a593Smuzhiyun /*
733*4882a593Smuzhiyun * next, check the message byte.
734*4882a593Smuzhiyun */
735*4882a593Smuzhiyun if (msg_byte(scmd->result) != COMMAND_COMPLETE)
736*4882a593Smuzhiyun return FAILED;
737*4882a593Smuzhiyun
738*4882a593Smuzhiyun /*
739*4882a593Smuzhiyun * now, check the status byte to see if this indicates
740*4882a593Smuzhiyun * anything special.
741*4882a593Smuzhiyun */
742*4882a593Smuzhiyun switch (status_byte(scmd->result)) {
743*4882a593Smuzhiyun case GOOD:
744*4882a593Smuzhiyun scsi_handle_queue_ramp_up(scmd->device);
745*4882a593Smuzhiyun fallthrough;
746*4882a593Smuzhiyun case COMMAND_TERMINATED:
747*4882a593Smuzhiyun return SUCCESS;
748*4882a593Smuzhiyun case CHECK_CONDITION:
749*4882a593Smuzhiyun return scsi_check_sense(scmd);
750*4882a593Smuzhiyun case CONDITION_GOOD:
751*4882a593Smuzhiyun case INTERMEDIATE_GOOD:
752*4882a593Smuzhiyun case INTERMEDIATE_C_GOOD:
753*4882a593Smuzhiyun /*
754*4882a593Smuzhiyun * who knows? FIXME(eric)
755*4882a593Smuzhiyun */
756*4882a593Smuzhiyun return SUCCESS;
757*4882a593Smuzhiyun case RESERVATION_CONFLICT:
758*4882a593Smuzhiyun if (scmd->cmnd[0] == TEST_UNIT_READY)
759*4882a593Smuzhiyun /* it is a success, we probed the device and
760*4882a593Smuzhiyun * found it */
761*4882a593Smuzhiyun return SUCCESS;
762*4882a593Smuzhiyun /* otherwise, we failed to send the command */
763*4882a593Smuzhiyun return FAILED;
764*4882a593Smuzhiyun case QUEUE_FULL:
765*4882a593Smuzhiyun scsi_handle_queue_full(scmd->device);
766*4882a593Smuzhiyun fallthrough;
767*4882a593Smuzhiyun case BUSY:
768*4882a593Smuzhiyun return NEEDS_RETRY;
769*4882a593Smuzhiyun default:
770*4882a593Smuzhiyun return FAILED;
771*4882a593Smuzhiyun }
772*4882a593Smuzhiyun return FAILED;
773*4882a593Smuzhiyun }
774*4882a593Smuzhiyun
775*4882a593Smuzhiyun /**
776*4882a593Smuzhiyun * scsi_eh_done - Completion function for error handling.
777*4882a593Smuzhiyun * @scmd: Cmd that is done.
778*4882a593Smuzhiyun */
scsi_eh_done(struct scsi_cmnd * scmd)779*4882a593Smuzhiyun static void scsi_eh_done(struct scsi_cmnd *scmd)
780*4882a593Smuzhiyun {
781*4882a593Smuzhiyun struct completion *eh_action;
782*4882a593Smuzhiyun
783*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd,
784*4882a593Smuzhiyun "%s result: %x\n", __func__, scmd->result));
785*4882a593Smuzhiyun
786*4882a593Smuzhiyun eh_action = scmd->device->host->eh_action;
787*4882a593Smuzhiyun if (eh_action)
788*4882a593Smuzhiyun complete(eh_action);
789*4882a593Smuzhiyun }
790*4882a593Smuzhiyun
791*4882a593Smuzhiyun /**
792*4882a593Smuzhiyun * scsi_try_host_reset - ask host adapter to reset itself
793*4882a593Smuzhiyun * @scmd: SCSI cmd to send host reset.
794*4882a593Smuzhiyun */
scsi_try_host_reset(struct scsi_cmnd * scmd)795*4882a593Smuzhiyun static int scsi_try_host_reset(struct scsi_cmnd *scmd)
796*4882a593Smuzhiyun {
797*4882a593Smuzhiyun unsigned long flags;
798*4882a593Smuzhiyun int rtn;
799*4882a593Smuzhiyun struct Scsi_Host *host = scmd->device->host;
800*4882a593Smuzhiyun struct scsi_host_template *hostt = host->hostt;
801*4882a593Smuzhiyun
802*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
803*4882a593Smuzhiyun shost_printk(KERN_INFO, host, "Snd Host RST\n"));
804*4882a593Smuzhiyun
805*4882a593Smuzhiyun if (!hostt->eh_host_reset_handler)
806*4882a593Smuzhiyun return FAILED;
807*4882a593Smuzhiyun
808*4882a593Smuzhiyun rtn = hostt->eh_host_reset_handler(scmd);
809*4882a593Smuzhiyun
810*4882a593Smuzhiyun if (rtn == SUCCESS) {
811*4882a593Smuzhiyun if (!hostt->skip_settle_delay)
812*4882a593Smuzhiyun ssleep(HOST_RESET_SETTLE_TIME);
813*4882a593Smuzhiyun spin_lock_irqsave(host->host_lock, flags);
814*4882a593Smuzhiyun scsi_report_bus_reset(host, scmd_channel(scmd));
815*4882a593Smuzhiyun spin_unlock_irqrestore(host->host_lock, flags);
816*4882a593Smuzhiyun }
817*4882a593Smuzhiyun
818*4882a593Smuzhiyun return rtn;
819*4882a593Smuzhiyun }
820*4882a593Smuzhiyun
821*4882a593Smuzhiyun /**
822*4882a593Smuzhiyun * scsi_try_bus_reset - ask host to perform a bus reset
823*4882a593Smuzhiyun * @scmd: SCSI cmd to send bus reset.
824*4882a593Smuzhiyun */
scsi_try_bus_reset(struct scsi_cmnd * scmd)825*4882a593Smuzhiyun static int scsi_try_bus_reset(struct scsi_cmnd *scmd)
826*4882a593Smuzhiyun {
827*4882a593Smuzhiyun unsigned long flags;
828*4882a593Smuzhiyun int rtn;
829*4882a593Smuzhiyun struct Scsi_Host *host = scmd->device->host;
830*4882a593Smuzhiyun struct scsi_host_template *hostt = host->hostt;
831*4882a593Smuzhiyun
832*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd,
833*4882a593Smuzhiyun "%s: Snd Bus RST\n", __func__));
834*4882a593Smuzhiyun
835*4882a593Smuzhiyun if (!hostt->eh_bus_reset_handler)
836*4882a593Smuzhiyun return FAILED;
837*4882a593Smuzhiyun
838*4882a593Smuzhiyun rtn = hostt->eh_bus_reset_handler(scmd);
839*4882a593Smuzhiyun
840*4882a593Smuzhiyun if (rtn == SUCCESS) {
841*4882a593Smuzhiyun if (!hostt->skip_settle_delay)
842*4882a593Smuzhiyun ssleep(BUS_RESET_SETTLE_TIME);
843*4882a593Smuzhiyun spin_lock_irqsave(host->host_lock, flags);
844*4882a593Smuzhiyun scsi_report_bus_reset(host, scmd_channel(scmd));
845*4882a593Smuzhiyun spin_unlock_irqrestore(host->host_lock, flags);
846*4882a593Smuzhiyun }
847*4882a593Smuzhiyun
848*4882a593Smuzhiyun return rtn;
849*4882a593Smuzhiyun }
850*4882a593Smuzhiyun
__scsi_report_device_reset(struct scsi_device * sdev,void * data)851*4882a593Smuzhiyun static void __scsi_report_device_reset(struct scsi_device *sdev, void *data)
852*4882a593Smuzhiyun {
853*4882a593Smuzhiyun sdev->was_reset = 1;
854*4882a593Smuzhiyun sdev->expecting_cc_ua = 1;
855*4882a593Smuzhiyun }
856*4882a593Smuzhiyun
857*4882a593Smuzhiyun /**
858*4882a593Smuzhiyun * scsi_try_target_reset - Ask host to perform a target reset
859*4882a593Smuzhiyun * @scmd: SCSI cmd used to send a target reset
860*4882a593Smuzhiyun *
861*4882a593Smuzhiyun * Notes:
862*4882a593Smuzhiyun * There is no timeout for this operation. if this operation is
863*4882a593Smuzhiyun * unreliable for a given host, then the host itself needs to put a
864*4882a593Smuzhiyun * timer on it, and set the host back to a consistent state prior to
865*4882a593Smuzhiyun * returning.
866*4882a593Smuzhiyun */
scsi_try_target_reset(struct scsi_cmnd * scmd)867*4882a593Smuzhiyun static int scsi_try_target_reset(struct scsi_cmnd *scmd)
868*4882a593Smuzhiyun {
869*4882a593Smuzhiyun unsigned long flags;
870*4882a593Smuzhiyun int rtn;
871*4882a593Smuzhiyun struct Scsi_Host *host = scmd->device->host;
872*4882a593Smuzhiyun struct scsi_host_template *hostt = host->hostt;
873*4882a593Smuzhiyun
874*4882a593Smuzhiyun if (!hostt->eh_target_reset_handler)
875*4882a593Smuzhiyun return FAILED;
876*4882a593Smuzhiyun
877*4882a593Smuzhiyun rtn = hostt->eh_target_reset_handler(scmd);
878*4882a593Smuzhiyun if (rtn == SUCCESS) {
879*4882a593Smuzhiyun spin_lock_irqsave(host->host_lock, flags);
880*4882a593Smuzhiyun __starget_for_each_device(scsi_target(scmd->device), NULL,
881*4882a593Smuzhiyun __scsi_report_device_reset);
882*4882a593Smuzhiyun spin_unlock_irqrestore(host->host_lock, flags);
883*4882a593Smuzhiyun }
884*4882a593Smuzhiyun
885*4882a593Smuzhiyun return rtn;
886*4882a593Smuzhiyun }
887*4882a593Smuzhiyun
888*4882a593Smuzhiyun /**
889*4882a593Smuzhiyun * scsi_try_bus_device_reset - Ask host to perform a BDR on a dev
890*4882a593Smuzhiyun * @scmd: SCSI cmd used to send BDR
891*4882a593Smuzhiyun *
892*4882a593Smuzhiyun * Notes:
893*4882a593Smuzhiyun * There is no timeout for this operation. if this operation is
894*4882a593Smuzhiyun * unreliable for a given host, then the host itself needs to put a
895*4882a593Smuzhiyun * timer on it, and set the host back to a consistent state prior to
896*4882a593Smuzhiyun * returning.
897*4882a593Smuzhiyun */
scsi_try_bus_device_reset(struct scsi_cmnd * scmd)898*4882a593Smuzhiyun static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd)
899*4882a593Smuzhiyun {
900*4882a593Smuzhiyun int rtn;
901*4882a593Smuzhiyun struct scsi_host_template *hostt = scmd->device->host->hostt;
902*4882a593Smuzhiyun
903*4882a593Smuzhiyun if (!hostt->eh_device_reset_handler)
904*4882a593Smuzhiyun return FAILED;
905*4882a593Smuzhiyun
906*4882a593Smuzhiyun rtn = hostt->eh_device_reset_handler(scmd);
907*4882a593Smuzhiyun if (rtn == SUCCESS)
908*4882a593Smuzhiyun __scsi_report_device_reset(scmd->device, NULL);
909*4882a593Smuzhiyun return rtn;
910*4882a593Smuzhiyun }
911*4882a593Smuzhiyun
912*4882a593Smuzhiyun /**
913*4882a593Smuzhiyun * scsi_try_to_abort_cmd - Ask host to abort a SCSI command
914*4882a593Smuzhiyun * @hostt: SCSI driver host template
915*4882a593Smuzhiyun * @scmd: SCSI cmd used to send a target reset
916*4882a593Smuzhiyun *
917*4882a593Smuzhiyun * Return value:
918*4882a593Smuzhiyun * SUCCESS, FAILED, or FAST_IO_FAIL
919*4882a593Smuzhiyun *
920*4882a593Smuzhiyun * Notes:
921*4882a593Smuzhiyun * SUCCESS does not necessarily indicate that the command
922*4882a593Smuzhiyun * has been aborted; it only indicates that the LLDDs
923*4882a593Smuzhiyun * has cleared all references to that command.
924*4882a593Smuzhiyun * LLDDs should return FAILED only if an abort was required
925*4882a593Smuzhiyun * but could not be executed. LLDDs should return FAST_IO_FAIL
926*4882a593Smuzhiyun * if the device is temporarily unavailable (eg due to a
927*4882a593Smuzhiyun * link down on FibreChannel)
928*4882a593Smuzhiyun */
scsi_try_to_abort_cmd(struct scsi_host_template * hostt,struct scsi_cmnd * scmd)929*4882a593Smuzhiyun static int scsi_try_to_abort_cmd(struct scsi_host_template *hostt,
930*4882a593Smuzhiyun struct scsi_cmnd *scmd)
931*4882a593Smuzhiyun {
932*4882a593Smuzhiyun if (!hostt->eh_abort_handler)
933*4882a593Smuzhiyun return FAILED;
934*4882a593Smuzhiyun
935*4882a593Smuzhiyun return hostt->eh_abort_handler(scmd);
936*4882a593Smuzhiyun }
937*4882a593Smuzhiyun
scsi_abort_eh_cmnd(struct scsi_cmnd * scmd)938*4882a593Smuzhiyun static void scsi_abort_eh_cmnd(struct scsi_cmnd *scmd)
939*4882a593Smuzhiyun {
940*4882a593Smuzhiyun if (scsi_try_to_abort_cmd(scmd->device->host->hostt, scmd) != SUCCESS)
941*4882a593Smuzhiyun if (scsi_try_bus_device_reset(scmd) != SUCCESS)
942*4882a593Smuzhiyun if (scsi_try_target_reset(scmd) != SUCCESS)
943*4882a593Smuzhiyun if (scsi_try_bus_reset(scmd) != SUCCESS)
944*4882a593Smuzhiyun scsi_try_host_reset(scmd);
945*4882a593Smuzhiyun }
946*4882a593Smuzhiyun
947*4882a593Smuzhiyun /**
948*4882a593Smuzhiyun * scsi_eh_prep_cmnd - Save a scsi command info as part of error recovery
949*4882a593Smuzhiyun * @scmd: SCSI command structure to hijack
950*4882a593Smuzhiyun * @ses: structure to save restore information
951*4882a593Smuzhiyun * @cmnd: CDB to send. Can be NULL if no new cmnd is needed
952*4882a593Smuzhiyun * @cmnd_size: size in bytes of @cmnd (must be <= BLK_MAX_CDB)
953*4882a593Smuzhiyun * @sense_bytes: size of sense data to copy. or 0 (if != 0 @cmnd is ignored)
954*4882a593Smuzhiyun *
955*4882a593Smuzhiyun * This function is used to save a scsi command information before re-execution
956*4882a593Smuzhiyun * as part of the error recovery process. If @sense_bytes is 0 the command
957*4882a593Smuzhiyun * sent must be one that does not transfer any data. If @sense_bytes != 0
958*4882a593Smuzhiyun * @cmnd is ignored and this functions sets up a REQUEST_SENSE command
959*4882a593Smuzhiyun * and cmnd buffers to read @sense_bytes into @scmd->sense_buffer.
960*4882a593Smuzhiyun */
scsi_eh_prep_cmnd(struct scsi_cmnd * scmd,struct scsi_eh_save * ses,unsigned char * cmnd,int cmnd_size,unsigned sense_bytes)961*4882a593Smuzhiyun void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses,
962*4882a593Smuzhiyun unsigned char *cmnd, int cmnd_size, unsigned sense_bytes)
963*4882a593Smuzhiyun {
964*4882a593Smuzhiyun struct scsi_device *sdev = scmd->device;
965*4882a593Smuzhiyun
966*4882a593Smuzhiyun /*
967*4882a593Smuzhiyun * We need saved copies of a number of fields - this is because
968*4882a593Smuzhiyun * error handling may need to overwrite these with different values
969*4882a593Smuzhiyun * to run different commands, and once error handling is complete,
970*4882a593Smuzhiyun * we will need to restore these values prior to running the actual
971*4882a593Smuzhiyun * command.
972*4882a593Smuzhiyun */
973*4882a593Smuzhiyun ses->cmd_len = scmd->cmd_len;
974*4882a593Smuzhiyun ses->cmnd = scmd->cmnd;
975*4882a593Smuzhiyun ses->data_direction = scmd->sc_data_direction;
976*4882a593Smuzhiyun ses->sdb = scmd->sdb;
977*4882a593Smuzhiyun ses->result = scmd->result;
978*4882a593Smuzhiyun ses->resid_len = scmd->req.resid_len;
979*4882a593Smuzhiyun ses->underflow = scmd->underflow;
980*4882a593Smuzhiyun ses->prot_op = scmd->prot_op;
981*4882a593Smuzhiyun ses->eh_eflags = scmd->eh_eflags;
982*4882a593Smuzhiyun
983*4882a593Smuzhiyun scmd->prot_op = SCSI_PROT_NORMAL;
984*4882a593Smuzhiyun scmd->eh_eflags = 0;
985*4882a593Smuzhiyun scmd->cmnd = ses->eh_cmnd;
986*4882a593Smuzhiyun memset(scmd->cmnd, 0, BLK_MAX_CDB);
987*4882a593Smuzhiyun memset(&scmd->sdb, 0, sizeof(scmd->sdb));
988*4882a593Smuzhiyun scmd->result = 0;
989*4882a593Smuzhiyun scmd->req.resid_len = 0;
990*4882a593Smuzhiyun
991*4882a593Smuzhiyun if (sense_bytes) {
992*4882a593Smuzhiyun scmd->sdb.length = min_t(unsigned, SCSI_SENSE_BUFFERSIZE,
993*4882a593Smuzhiyun sense_bytes);
994*4882a593Smuzhiyun sg_init_one(&ses->sense_sgl, scmd->sense_buffer,
995*4882a593Smuzhiyun scmd->sdb.length);
996*4882a593Smuzhiyun scmd->sdb.table.sgl = &ses->sense_sgl;
997*4882a593Smuzhiyun scmd->sc_data_direction = DMA_FROM_DEVICE;
998*4882a593Smuzhiyun scmd->sdb.table.nents = scmd->sdb.table.orig_nents = 1;
999*4882a593Smuzhiyun scmd->cmnd[0] = REQUEST_SENSE;
1000*4882a593Smuzhiyun scmd->cmnd[4] = scmd->sdb.length;
1001*4882a593Smuzhiyun scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
1002*4882a593Smuzhiyun } else {
1003*4882a593Smuzhiyun scmd->sc_data_direction = DMA_NONE;
1004*4882a593Smuzhiyun if (cmnd) {
1005*4882a593Smuzhiyun BUG_ON(cmnd_size > BLK_MAX_CDB);
1006*4882a593Smuzhiyun memcpy(scmd->cmnd, cmnd, cmnd_size);
1007*4882a593Smuzhiyun scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
1008*4882a593Smuzhiyun }
1009*4882a593Smuzhiyun }
1010*4882a593Smuzhiyun
1011*4882a593Smuzhiyun scmd->underflow = 0;
1012*4882a593Smuzhiyun
1013*4882a593Smuzhiyun if (sdev->scsi_level <= SCSI_2 && sdev->scsi_level != SCSI_UNKNOWN)
1014*4882a593Smuzhiyun scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) |
1015*4882a593Smuzhiyun (sdev->lun << 5 & 0xe0);
1016*4882a593Smuzhiyun
1017*4882a593Smuzhiyun /*
1018*4882a593Smuzhiyun * Zero the sense buffer. The scsi spec mandates that any
1019*4882a593Smuzhiyun * untransferred sense data should be interpreted as being zero.
1020*4882a593Smuzhiyun */
1021*4882a593Smuzhiyun memset(scmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
1022*4882a593Smuzhiyun }
1023*4882a593Smuzhiyun EXPORT_SYMBOL(scsi_eh_prep_cmnd);
1024*4882a593Smuzhiyun
1025*4882a593Smuzhiyun /**
1026*4882a593Smuzhiyun * scsi_eh_restore_cmnd - Restore a scsi command info as part of error recovery
1027*4882a593Smuzhiyun * @scmd: SCSI command structure to restore
1028*4882a593Smuzhiyun * @ses: saved information from a coresponding call to scsi_eh_prep_cmnd
1029*4882a593Smuzhiyun *
1030*4882a593Smuzhiyun * Undo any damage done by above scsi_eh_prep_cmnd().
1031*4882a593Smuzhiyun */
scsi_eh_restore_cmnd(struct scsi_cmnd * scmd,struct scsi_eh_save * ses)1032*4882a593Smuzhiyun void scsi_eh_restore_cmnd(struct scsi_cmnd* scmd, struct scsi_eh_save *ses)
1033*4882a593Smuzhiyun {
1034*4882a593Smuzhiyun /*
1035*4882a593Smuzhiyun * Restore original data
1036*4882a593Smuzhiyun */
1037*4882a593Smuzhiyun scmd->cmd_len = ses->cmd_len;
1038*4882a593Smuzhiyun scmd->cmnd = ses->cmnd;
1039*4882a593Smuzhiyun scmd->sc_data_direction = ses->data_direction;
1040*4882a593Smuzhiyun scmd->sdb = ses->sdb;
1041*4882a593Smuzhiyun scmd->result = ses->result;
1042*4882a593Smuzhiyun scmd->req.resid_len = ses->resid_len;
1043*4882a593Smuzhiyun scmd->underflow = ses->underflow;
1044*4882a593Smuzhiyun scmd->prot_op = ses->prot_op;
1045*4882a593Smuzhiyun scmd->eh_eflags = ses->eh_eflags;
1046*4882a593Smuzhiyun }
1047*4882a593Smuzhiyun EXPORT_SYMBOL(scsi_eh_restore_cmnd);
1048*4882a593Smuzhiyun
1049*4882a593Smuzhiyun /**
1050*4882a593Smuzhiyun * scsi_send_eh_cmnd - submit a scsi command as part of error recovery
1051*4882a593Smuzhiyun * @scmd: SCSI command structure to hijack
1052*4882a593Smuzhiyun * @cmnd: CDB to send
1053*4882a593Smuzhiyun * @cmnd_size: size in bytes of @cmnd
1054*4882a593Smuzhiyun * @timeout: timeout for this request
1055*4882a593Smuzhiyun * @sense_bytes: size of sense data to copy or 0
1056*4882a593Smuzhiyun *
1057*4882a593Smuzhiyun * This function is used to send a scsi command down to a target device
1058*4882a593Smuzhiyun * as part of the error recovery process. See also scsi_eh_prep_cmnd() above.
1059*4882a593Smuzhiyun *
1060*4882a593Smuzhiyun * Return value:
1061*4882a593Smuzhiyun * SUCCESS or FAILED or NEEDS_RETRY
1062*4882a593Smuzhiyun */
scsi_send_eh_cmnd(struct scsi_cmnd * scmd,unsigned char * cmnd,int cmnd_size,int timeout,unsigned sense_bytes)1063*4882a593Smuzhiyun static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,
1064*4882a593Smuzhiyun int cmnd_size, int timeout, unsigned sense_bytes)
1065*4882a593Smuzhiyun {
1066*4882a593Smuzhiyun struct scsi_device *sdev = scmd->device;
1067*4882a593Smuzhiyun struct Scsi_Host *shost = sdev->host;
1068*4882a593Smuzhiyun DECLARE_COMPLETION_ONSTACK(done);
1069*4882a593Smuzhiyun unsigned long timeleft = timeout, delay;
1070*4882a593Smuzhiyun struct scsi_eh_save ses;
1071*4882a593Smuzhiyun const unsigned long stall_for = msecs_to_jiffies(100);
1072*4882a593Smuzhiyun int rtn;
1073*4882a593Smuzhiyun
1074*4882a593Smuzhiyun retry:
1075*4882a593Smuzhiyun scsi_eh_prep_cmnd(scmd, &ses, cmnd, cmnd_size, sense_bytes);
1076*4882a593Smuzhiyun shost->eh_action = &done;
1077*4882a593Smuzhiyun
1078*4882a593Smuzhiyun scsi_log_send(scmd);
1079*4882a593Smuzhiyun scmd->scsi_done = scsi_eh_done;
1080*4882a593Smuzhiyun
1081*4882a593Smuzhiyun /*
1082*4882a593Smuzhiyun * Lock sdev->state_mutex to avoid that scsi_device_quiesce() can
1083*4882a593Smuzhiyun * change the SCSI device state after we have examined it and before
1084*4882a593Smuzhiyun * .queuecommand() is called.
1085*4882a593Smuzhiyun */
1086*4882a593Smuzhiyun mutex_lock(&sdev->state_mutex);
1087*4882a593Smuzhiyun while (sdev->sdev_state == SDEV_BLOCK && timeleft > 0) {
1088*4882a593Smuzhiyun mutex_unlock(&sdev->state_mutex);
1089*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(5, sdev_printk(KERN_DEBUG, sdev,
1090*4882a593Smuzhiyun "%s: state %d <> %d\n", __func__, sdev->sdev_state,
1091*4882a593Smuzhiyun SDEV_BLOCK));
1092*4882a593Smuzhiyun delay = min(timeleft, stall_for);
1093*4882a593Smuzhiyun timeleft -= delay;
1094*4882a593Smuzhiyun msleep(jiffies_to_msecs(delay));
1095*4882a593Smuzhiyun mutex_lock(&sdev->state_mutex);
1096*4882a593Smuzhiyun }
1097*4882a593Smuzhiyun if (sdev->sdev_state != SDEV_BLOCK)
1098*4882a593Smuzhiyun rtn = shost->hostt->queuecommand(shost, scmd);
1099*4882a593Smuzhiyun else
1100*4882a593Smuzhiyun rtn = SCSI_MLQUEUE_DEVICE_BUSY;
1101*4882a593Smuzhiyun mutex_unlock(&sdev->state_mutex);
1102*4882a593Smuzhiyun
1103*4882a593Smuzhiyun if (rtn) {
1104*4882a593Smuzhiyun if (timeleft > stall_for) {
1105*4882a593Smuzhiyun scsi_eh_restore_cmnd(scmd, &ses);
1106*4882a593Smuzhiyun timeleft -= stall_for;
1107*4882a593Smuzhiyun msleep(jiffies_to_msecs(stall_for));
1108*4882a593Smuzhiyun goto retry;
1109*4882a593Smuzhiyun }
1110*4882a593Smuzhiyun /* signal not to enter either branch of the if () below */
1111*4882a593Smuzhiyun timeleft = 0;
1112*4882a593Smuzhiyun rtn = FAILED;
1113*4882a593Smuzhiyun } else {
1114*4882a593Smuzhiyun timeleft = wait_for_completion_timeout(&done, timeout);
1115*4882a593Smuzhiyun rtn = SUCCESS;
1116*4882a593Smuzhiyun }
1117*4882a593Smuzhiyun
1118*4882a593Smuzhiyun shost->eh_action = NULL;
1119*4882a593Smuzhiyun
1120*4882a593Smuzhiyun scsi_log_completion(scmd, rtn);
1121*4882a593Smuzhiyun
1122*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd,
1123*4882a593Smuzhiyun "%s timeleft: %ld\n",
1124*4882a593Smuzhiyun __func__, timeleft));
1125*4882a593Smuzhiyun
1126*4882a593Smuzhiyun /*
1127*4882a593Smuzhiyun * If there is time left scsi_eh_done got called, and we will examine
1128*4882a593Smuzhiyun * the actual status codes to see whether the command actually did
1129*4882a593Smuzhiyun * complete normally, else if we have a zero return and no time left,
1130*4882a593Smuzhiyun * the command must still be pending, so abort it and return FAILED.
1131*4882a593Smuzhiyun * If we never actually managed to issue the command, because
1132*4882a593Smuzhiyun * ->queuecommand() kept returning non zero, use the rtn = FAILED
1133*4882a593Smuzhiyun * value above (so don't execute either branch of the if)
1134*4882a593Smuzhiyun */
1135*4882a593Smuzhiyun if (timeleft) {
1136*4882a593Smuzhiyun rtn = scsi_eh_completed_normally(scmd);
1137*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd,
1138*4882a593Smuzhiyun "%s: scsi_eh_completed_normally %x\n", __func__, rtn));
1139*4882a593Smuzhiyun
1140*4882a593Smuzhiyun switch (rtn) {
1141*4882a593Smuzhiyun case SUCCESS:
1142*4882a593Smuzhiyun case NEEDS_RETRY:
1143*4882a593Smuzhiyun case FAILED:
1144*4882a593Smuzhiyun break;
1145*4882a593Smuzhiyun case ADD_TO_MLQUEUE:
1146*4882a593Smuzhiyun rtn = NEEDS_RETRY;
1147*4882a593Smuzhiyun break;
1148*4882a593Smuzhiyun default:
1149*4882a593Smuzhiyun rtn = FAILED;
1150*4882a593Smuzhiyun break;
1151*4882a593Smuzhiyun }
1152*4882a593Smuzhiyun } else if (rtn != FAILED) {
1153*4882a593Smuzhiyun scsi_abort_eh_cmnd(scmd);
1154*4882a593Smuzhiyun rtn = FAILED;
1155*4882a593Smuzhiyun }
1156*4882a593Smuzhiyun
1157*4882a593Smuzhiyun scsi_eh_restore_cmnd(scmd, &ses);
1158*4882a593Smuzhiyun
1159*4882a593Smuzhiyun return rtn;
1160*4882a593Smuzhiyun }
1161*4882a593Smuzhiyun
1162*4882a593Smuzhiyun /**
1163*4882a593Smuzhiyun * scsi_request_sense - Request sense data from a particular target.
1164*4882a593Smuzhiyun * @scmd: SCSI cmd for request sense.
1165*4882a593Smuzhiyun *
1166*4882a593Smuzhiyun * Notes:
1167*4882a593Smuzhiyun * Some hosts automatically obtain this information, others require
1168*4882a593Smuzhiyun * that we obtain it on our own. This function will *not* return until
1169*4882a593Smuzhiyun * the command either times out, or it completes.
1170*4882a593Smuzhiyun */
scsi_request_sense(struct scsi_cmnd * scmd)1171*4882a593Smuzhiyun static int scsi_request_sense(struct scsi_cmnd *scmd)
1172*4882a593Smuzhiyun {
1173*4882a593Smuzhiyun return scsi_send_eh_cmnd(scmd, NULL, 0, scmd->device->eh_timeout, ~0);
1174*4882a593Smuzhiyun }
1175*4882a593Smuzhiyun
scsi_eh_action(struct scsi_cmnd * scmd,int rtn)1176*4882a593Smuzhiyun static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn)
1177*4882a593Smuzhiyun {
1178*4882a593Smuzhiyun if (!blk_rq_is_passthrough(scmd->request)) {
1179*4882a593Smuzhiyun struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd);
1180*4882a593Smuzhiyun if (sdrv->eh_action)
1181*4882a593Smuzhiyun rtn = sdrv->eh_action(scmd, rtn);
1182*4882a593Smuzhiyun }
1183*4882a593Smuzhiyun return rtn;
1184*4882a593Smuzhiyun }
1185*4882a593Smuzhiyun
1186*4882a593Smuzhiyun /**
1187*4882a593Smuzhiyun * scsi_eh_finish_cmd - Handle a cmd that eh is finished with.
1188*4882a593Smuzhiyun * @scmd: Original SCSI cmd that eh has finished.
1189*4882a593Smuzhiyun * @done_q: Queue for processed commands.
1190*4882a593Smuzhiyun *
1191*4882a593Smuzhiyun * Notes:
1192*4882a593Smuzhiyun * We don't want to use the normal command completion while we are are
1193*4882a593Smuzhiyun * still handling errors - it may cause other commands to be queued,
1194*4882a593Smuzhiyun * and that would disturb what we are doing. Thus we really want to
1195*4882a593Smuzhiyun * keep a list of pending commands for final completion, and once we
1196*4882a593Smuzhiyun * are ready to leave error handling we handle completion for real.
1197*4882a593Smuzhiyun */
scsi_eh_finish_cmd(struct scsi_cmnd * scmd,struct list_head * done_q)1198*4882a593Smuzhiyun void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q)
1199*4882a593Smuzhiyun {
1200*4882a593Smuzhiyun list_move_tail(&scmd->eh_entry, done_q);
1201*4882a593Smuzhiyun }
1202*4882a593Smuzhiyun EXPORT_SYMBOL(scsi_eh_finish_cmd);
1203*4882a593Smuzhiyun
1204*4882a593Smuzhiyun /**
1205*4882a593Smuzhiyun * scsi_eh_get_sense - Get device sense data.
1206*4882a593Smuzhiyun * @work_q: Queue of commands to process.
1207*4882a593Smuzhiyun * @done_q: Queue of processed commands.
1208*4882a593Smuzhiyun *
1209*4882a593Smuzhiyun * Description:
1210*4882a593Smuzhiyun * See if we need to request sense information. if so, then get it
1211*4882a593Smuzhiyun * now, so we have a better idea of what to do.
1212*4882a593Smuzhiyun *
1213*4882a593Smuzhiyun * Notes:
1214*4882a593Smuzhiyun * This has the unfortunate side effect that if a shost adapter does
1215*4882a593Smuzhiyun * not automatically request sense information, we end up shutting
1216*4882a593Smuzhiyun * it down before we request it.
1217*4882a593Smuzhiyun *
1218*4882a593Smuzhiyun * All drivers should request sense information internally these days,
1219*4882a593Smuzhiyun * so for now all I have to say is tough noogies if you end up in here.
1220*4882a593Smuzhiyun *
1221*4882a593Smuzhiyun * XXX: Long term this code should go away, but that needs an audit of
1222*4882a593Smuzhiyun * all LLDDs first.
1223*4882a593Smuzhiyun */
scsi_eh_get_sense(struct list_head * work_q,struct list_head * done_q)1224*4882a593Smuzhiyun int scsi_eh_get_sense(struct list_head *work_q,
1225*4882a593Smuzhiyun struct list_head *done_q)
1226*4882a593Smuzhiyun {
1227*4882a593Smuzhiyun struct scsi_cmnd *scmd, *next;
1228*4882a593Smuzhiyun struct Scsi_Host *shost;
1229*4882a593Smuzhiyun int rtn;
1230*4882a593Smuzhiyun
1231*4882a593Smuzhiyun /*
1232*4882a593Smuzhiyun * If SCSI_EH_ABORT_SCHEDULED has been set, it is timeout IO,
1233*4882a593Smuzhiyun * should not get sense.
1234*4882a593Smuzhiyun */
1235*4882a593Smuzhiyun list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
1236*4882a593Smuzhiyun if ((scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) ||
1237*4882a593Smuzhiyun SCSI_SENSE_VALID(scmd))
1238*4882a593Smuzhiyun continue;
1239*4882a593Smuzhiyun
1240*4882a593Smuzhiyun shost = scmd->device->host;
1241*4882a593Smuzhiyun if (scsi_host_eh_past_deadline(shost)) {
1242*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
1243*4882a593Smuzhiyun scmd_printk(KERN_INFO, scmd,
1244*4882a593Smuzhiyun "%s: skip request sense, past eh deadline\n",
1245*4882a593Smuzhiyun current->comm));
1246*4882a593Smuzhiyun break;
1247*4882a593Smuzhiyun }
1248*4882a593Smuzhiyun if (status_byte(scmd->result) != CHECK_CONDITION)
1249*4882a593Smuzhiyun /*
1250*4882a593Smuzhiyun * don't request sense if there's no check condition
1251*4882a593Smuzhiyun * status because the error we're processing isn't one
1252*4882a593Smuzhiyun * that has a sense code (and some devices get
1253*4882a593Smuzhiyun * confused by sense requests out of the blue)
1254*4882a593Smuzhiyun */
1255*4882a593Smuzhiyun continue;
1256*4882a593Smuzhiyun
1257*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd,
1258*4882a593Smuzhiyun "%s: requesting sense\n",
1259*4882a593Smuzhiyun current->comm));
1260*4882a593Smuzhiyun rtn = scsi_request_sense(scmd);
1261*4882a593Smuzhiyun if (rtn != SUCCESS)
1262*4882a593Smuzhiyun continue;
1263*4882a593Smuzhiyun
1264*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd,
1265*4882a593Smuzhiyun "sense requested, result %x\n", scmd->result));
1266*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3, scsi_print_sense(scmd));
1267*4882a593Smuzhiyun
1268*4882a593Smuzhiyun rtn = scsi_decide_disposition(scmd);
1269*4882a593Smuzhiyun
1270*4882a593Smuzhiyun /*
1271*4882a593Smuzhiyun * if the result was normal, then just pass it along to the
1272*4882a593Smuzhiyun * upper level.
1273*4882a593Smuzhiyun */
1274*4882a593Smuzhiyun if (rtn == SUCCESS)
1275*4882a593Smuzhiyun /*
1276*4882a593Smuzhiyun * We don't want this command reissued, just finished
1277*4882a593Smuzhiyun * with the sense data, so set retries to the max
1278*4882a593Smuzhiyun * allowed to ensure it won't get reissued. If the user
1279*4882a593Smuzhiyun * has requested infinite retries, we also want to
1280*4882a593Smuzhiyun * finish this command, so force completion by setting
1281*4882a593Smuzhiyun * retries and allowed to the same value.
1282*4882a593Smuzhiyun */
1283*4882a593Smuzhiyun if (scmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
1284*4882a593Smuzhiyun scmd->retries = scmd->allowed = 1;
1285*4882a593Smuzhiyun else
1286*4882a593Smuzhiyun scmd->retries = scmd->allowed;
1287*4882a593Smuzhiyun else if (rtn != NEEDS_RETRY)
1288*4882a593Smuzhiyun continue;
1289*4882a593Smuzhiyun
1290*4882a593Smuzhiyun scsi_eh_finish_cmd(scmd, done_q);
1291*4882a593Smuzhiyun }
1292*4882a593Smuzhiyun
1293*4882a593Smuzhiyun return list_empty(work_q);
1294*4882a593Smuzhiyun }
1295*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(scsi_eh_get_sense);
1296*4882a593Smuzhiyun
1297*4882a593Smuzhiyun /**
1298*4882a593Smuzhiyun * scsi_eh_tur - Send TUR to device.
1299*4882a593Smuzhiyun * @scmd: &scsi_cmnd to send TUR
1300*4882a593Smuzhiyun *
1301*4882a593Smuzhiyun * Return value:
1302*4882a593Smuzhiyun * 0 - Device is ready. 1 - Device NOT ready.
1303*4882a593Smuzhiyun */
scsi_eh_tur(struct scsi_cmnd * scmd)1304*4882a593Smuzhiyun static int scsi_eh_tur(struct scsi_cmnd *scmd)
1305*4882a593Smuzhiyun {
1306*4882a593Smuzhiyun static unsigned char tur_command[6] = {TEST_UNIT_READY, 0, 0, 0, 0, 0};
1307*4882a593Smuzhiyun int retry_cnt = 1, rtn;
1308*4882a593Smuzhiyun
1309*4882a593Smuzhiyun retry_tur:
1310*4882a593Smuzhiyun rtn = scsi_send_eh_cmnd(scmd, tur_command, 6,
1311*4882a593Smuzhiyun scmd->device->eh_timeout, 0);
1312*4882a593Smuzhiyun
1313*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd,
1314*4882a593Smuzhiyun "%s return: %x\n", __func__, rtn));
1315*4882a593Smuzhiyun
1316*4882a593Smuzhiyun switch (rtn) {
1317*4882a593Smuzhiyun case NEEDS_RETRY:
1318*4882a593Smuzhiyun if (retry_cnt--)
1319*4882a593Smuzhiyun goto retry_tur;
1320*4882a593Smuzhiyun fallthrough;
1321*4882a593Smuzhiyun case SUCCESS:
1322*4882a593Smuzhiyun return 0;
1323*4882a593Smuzhiyun default:
1324*4882a593Smuzhiyun return 1;
1325*4882a593Smuzhiyun }
1326*4882a593Smuzhiyun }
1327*4882a593Smuzhiyun
1328*4882a593Smuzhiyun /**
1329*4882a593Smuzhiyun * scsi_eh_test_devices - check if devices are responding from error recovery.
1330*4882a593Smuzhiyun * @cmd_list: scsi commands in error recovery.
1331*4882a593Smuzhiyun * @work_q: queue for commands which still need more error recovery
1332*4882a593Smuzhiyun * @done_q: queue for commands which are finished
1333*4882a593Smuzhiyun * @try_stu: boolean on if a STU command should be tried in addition to TUR.
1334*4882a593Smuzhiyun *
1335*4882a593Smuzhiyun * Decription:
1336*4882a593Smuzhiyun * Tests if devices are in a working state. Commands to devices now in
1337*4882a593Smuzhiyun * a working state are sent to the done_q while commands to devices which
1338*4882a593Smuzhiyun * are still failing to respond are returned to the work_q for more
1339*4882a593Smuzhiyun * processing.
1340*4882a593Smuzhiyun **/
scsi_eh_test_devices(struct list_head * cmd_list,struct list_head * work_q,struct list_head * done_q,int try_stu)1341*4882a593Smuzhiyun static int scsi_eh_test_devices(struct list_head *cmd_list,
1342*4882a593Smuzhiyun struct list_head *work_q,
1343*4882a593Smuzhiyun struct list_head *done_q, int try_stu)
1344*4882a593Smuzhiyun {
1345*4882a593Smuzhiyun struct scsi_cmnd *scmd, *next;
1346*4882a593Smuzhiyun struct scsi_device *sdev;
1347*4882a593Smuzhiyun int finish_cmds;
1348*4882a593Smuzhiyun
1349*4882a593Smuzhiyun while (!list_empty(cmd_list)) {
1350*4882a593Smuzhiyun scmd = list_entry(cmd_list->next, struct scsi_cmnd, eh_entry);
1351*4882a593Smuzhiyun sdev = scmd->device;
1352*4882a593Smuzhiyun
1353*4882a593Smuzhiyun if (!try_stu) {
1354*4882a593Smuzhiyun if (scsi_host_eh_past_deadline(sdev->host)) {
1355*4882a593Smuzhiyun /* Push items back onto work_q */
1356*4882a593Smuzhiyun list_splice_init(cmd_list, work_q);
1357*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
1358*4882a593Smuzhiyun sdev_printk(KERN_INFO, sdev,
1359*4882a593Smuzhiyun "%s: skip test device, past eh deadline",
1360*4882a593Smuzhiyun current->comm));
1361*4882a593Smuzhiyun break;
1362*4882a593Smuzhiyun }
1363*4882a593Smuzhiyun }
1364*4882a593Smuzhiyun
1365*4882a593Smuzhiyun finish_cmds = !scsi_device_online(scmd->device) ||
1366*4882a593Smuzhiyun (try_stu && !scsi_eh_try_stu(scmd) &&
1367*4882a593Smuzhiyun !scsi_eh_tur(scmd)) ||
1368*4882a593Smuzhiyun !scsi_eh_tur(scmd);
1369*4882a593Smuzhiyun
1370*4882a593Smuzhiyun list_for_each_entry_safe(scmd, next, cmd_list, eh_entry)
1371*4882a593Smuzhiyun if (scmd->device == sdev) {
1372*4882a593Smuzhiyun if (finish_cmds &&
1373*4882a593Smuzhiyun (try_stu ||
1374*4882a593Smuzhiyun scsi_eh_action(scmd, SUCCESS) == SUCCESS))
1375*4882a593Smuzhiyun scsi_eh_finish_cmd(scmd, done_q);
1376*4882a593Smuzhiyun else
1377*4882a593Smuzhiyun list_move_tail(&scmd->eh_entry, work_q);
1378*4882a593Smuzhiyun }
1379*4882a593Smuzhiyun }
1380*4882a593Smuzhiyun return list_empty(work_q);
1381*4882a593Smuzhiyun }
1382*4882a593Smuzhiyun
1383*4882a593Smuzhiyun /**
1384*4882a593Smuzhiyun * scsi_eh_try_stu - Send START_UNIT to device.
1385*4882a593Smuzhiyun * @scmd: &scsi_cmnd to send START_UNIT
1386*4882a593Smuzhiyun *
1387*4882a593Smuzhiyun * Return value:
1388*4882a593Smuzhiyun * 0 - Device is ready. 1 - Device NOT ready.
1389*4882a593Smuzhiyun */
scsi_eh_try_stu(struct scsi_cmnd * scmd)1390*4882a593Smuzhiyun static int scsi_eh_try_stu(struct scsi_cmnd *scmd)
1391*4882a593Smuzhiyun {
1392*4882a593Smuzhiyun static unsigned char stu_command[6] = {START_STOP, 0, 0, 0, 1, 0};
1393*4882a593Smuzhiyun
1394*4882a593Smuzhiyun if (scmd->device->allow_restart) {
1395*4882a593Smuzhiyun int i, rtn = NEEDS_RETRY;
1396*4882a593Smuzhiyun
1397*4882a593Smuzhiyun for (i = 0; rtn == NEEDS_RETRY && i < 2; i++)
1398*4882a593Smuzhiyun rtn = scsi_send_eh_cmnd(scmd, stu_command, 6, scmd->device->request_queue->rq_timeout, 0);
1399*4882a593Smuzhiyun
1400*4882a593Smuzhiyun if (rtn == SUCCESS)
1401*4882a593Smuzhiyun return 0;
1402*4882a593Smuzhiyun }
1403*4882a593Smuzhiyun
1404*4882a593Smuzhiyun return 1;
1405*4882a593Smuzhiyun }
1406*4882a593Smuzhiyun
1407*4882a593Smuzhiyun /**
1408*4882a593Smuzhiyun * scsi_eh_stu - send START_UNIT if needed
1409*4882a593Smuzhiyun * @shost: &scsi host being recovered.
1410*4882a593Smuzhiyun * @work_q: &list_head for pending commands.
1411*4882a593Smuzhiyun * @done_q: &list_head for processed commands.
1412*4882a593Smuzhiyun *
1413*4882a593Smuzhiyun * Notes:
1414*4882a593Smuzhiyun * If commands are failing due to not ready, initializing command required,
1415*4882a593Smuzhiyun * try revalidating the device, which will end up sending a start unit.
1416*4882a593Smuzhiyun */
scsi_eh_stu(struct Scsi_Host * shost,struct list_head * work_q,struct list_head * done_q)1417*4882a593Smuzhiyun static int scsi_eh_stu(struct Scsi_Host *shost,
1418*4882a593Smuzhiyun struct list_head *work_q,
1419*4882a593Smuzhiyun struct list_head *done_q)
1420*4882a593Smuzhiyun {
1421*4882a593Smuzhiyun struct scsi_cmnd *scmd, *stu_scmd, *next;
1422*4882a593Smuzhiyun struct scsi_device *sdev;
1423*4882a593Smuzhiyun
1424*4882a593Smuzhiyun shost_for_each_device(sdev, shost) {
1425*4882a593Smuzhiyun if (scsi_host_eh_past_deadline(shost)) {
1426*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
1427*4882a593Smuzhiyun sdev_printk(KERN_INFO, sdev,
1428*4882a593Smuzhiyun "%s: skip START_UNIT, past eh deadline\n",
1429*4882a593Smuzhiyun current->comm));
1430*4882a593Smuzhiyun scsi_device_put(sdev);
1431*4882a593Smuzhiyun break;
1432*4882a593Smuzhiyun }
1433*4882a593Smuzhiyun stu_scmd = NULL;
1434*4882a593Smuzhiyun list_for_each_entry(scmd, work_q, eh_entry)
1435*4882a593Smuzhiyun if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) &&
1436*4882a593Smuzhiyun scsi_check_sense(scmd) == FAILED ) {
1437*4882a593Smuzhiyun stu_scmd = scmd;
1438*4882a593Smuzhiyun break;
1439*4882a593Smuzhiyun }
1440*4882a593Smuzhiyun
1441*4882a593Smuzhiyun if (!stu_scmd)
1442*4882a593Smuzhiyun continue;
1443*4882a593Smuzhiyun
1444*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
1445*4882a593Smuzhiyun sdev_printk(KERN_INFO, sdev,
1446*4882a593Smuzhiyun "%s: Sending START_UNIT\n",
1447*4882a593Smuzhiyun current->comm));
1448*4882a593Smuzhiyun
1449*4882a593Smuzhiyun if (!scsi_eh_try_stu(stu_scmd)) {
1450*4882a593Smuzhiyun if (!scsi_device_online(sdev) ||
1451*4882a593Smuzhiyun !scsi_eh_tur(stu_scmd)) {
1452*4882a593Smuzhiyun list_for_each_entry_safe(scmd, next,
1453*4882a593Smuzhiyun work_q, eh_entry) {
1454*4882a593Smuzhiyun if (scmd->device == sdev &&
1455*4882a593Smuzhiyun scsi_eh_action(scmd, SUCCESS) == SUCCESS)
1456*4882a593Smuzhiyun scsi_eh_finish_cmd(scmd, done_q);
1457*4882a593Smuzhiyun }
1458*4882a593Smuzhiyun }
1459*4882a593Smuzhiyun } else {
1460*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
1461*4882a593Smuzhiyun sdev_printk(KERN_INFO, sdev,
1462*4882a593Smuzhiyun "%s: START_UNIT failed\n",
1463*4882a593Smuzhiyun current->comm));
1464*4882a593Smuzhiyun }
1465*4882a593Smuzhiyun }
1466*4882a593Smuzhiyun
1467*4882a593Smuzhiyun return list_empty(work_q);
1468*4882a593Smuzhiyun }
1469*4882a593Smuzhiyun
1470*4882a593Smuzhiyun
1471*4882a593Smuzhiyun /**
1472*4882a593Smuzhiyun * scsi_eh_bus_device_reset - send bdr if needed
1473*4882a593Smuzhiyun * @shost: scsi host being recovered.
1474*4882a593Smuzhiyun * @work_q: &list_head for pending commands.
1475*4882a593Smuzhiyun * @done_q: &list_head for processed commands.
1476*4882a593Smuzhiyun *
1477*4882a593Smuzhiyun * Notes:
1478*4882a593Smuzhiyun * Try a bus device reset. Still, look to see whether we have multiple
1479*4882a593Smuzhiyun * devices that are jammed or not - if we have multiple devices, it
1480*4882a593Smuzhiyun * makes no sense to try bus_device_reset - we really would need to try
1481*4882a593Smuzhiyun * a bus_reset instead.
1482*4882a593Smuzhiyun */
scsi_eh_bus_device_reset(struct Scsi_Host * shost,struct list_head * work_q,struct list_head * done_q)1483*4882a593Smuzhiyun static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,
1484*4882a593Smuzhiyun struct list_head *work_q,
1485*4882a593Smuzhiyun struct list_head *done_q)
1486*4882a593Smuzhiyun {
1487*4882a593Smuzhiyun struct scsi_cmnd *scmd, *bdr_scmd, *next;
1488*4882a593Smuzhiyun struct scsi_device *sdev;
1489*4882a593Smuzhiyun int rtn;
1490*4882a593Smuzhiyun
1491*4882a593Smuzhiyun shost_for_each_device(sdev, shost) {
1492*4882a593Smuzhiyun if (scsi_host_eh_past_deadline(shost)) {
1493*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
1494*4882a593Smuzhiyun sdev_printk(KERN_INFO, sdev,
1495*4882a593Smuzhiyun "%s: skip BDR, past eh deadline\n",
1496*4882a593Smuzhiyun current->comm));
1497*4882a593Smuzhiyun scsi_device_put(sdev);
1498*4882a593Smuzhiyun break;
1499*4882a593Smuzhiyun }
1500*4882a593Smuzhiyun bdr_scmd = NULL;
1501*4882a593Smuzhiyun list_for_each_entry(scmd, work_q, eh_entry)
1502*4882a593Smuzhiyun if (scmd->device == sdev) {
1503*4882a593Smuzhiyun bdr_scmd = scmd;
1504*4882a593Smuzhiyun break;
1505*4882a593Smuzhiyun }
1506*4882a593Smuzhiyun
1507*4882a593Smuzhiyun if (!bdr_scmd)
1508*4882a593Smuzhiyun continue;
1509*4882a593Smuzhiyun
1510*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
1511*4882a593Smuzhiyun sdev_printk(KERN_INFO, sdev,
1512*4882a593Smuzhiyun "%s: Sending BDR\n", current->comm));
1513*4882a593Smuzhiyun rtn = scsi_try_bus_device_reset(bdr_scmd);
1514*4882a593Smuzhiyun if (rtn == SUCCESS || rtn == FAST_IO_FAIL) {
1515*4882a593Smuzhiyun if (!scsi_device_online(sdev) ||
1516*4882a593Smuzhiyun rtn == FAST_IO_FAIL ||
1517*4882a593Smuzhiyun !scsi_eh_tur(bdr_scmd)) {
1518*4882a593Smuzhiyun list_for_each_entry_safe(scmd, next,
1519*4882a593Smuzhiyun work_q, eh_entry) {
1520*4882a593Smuzhiyun if (scmd->device == sdev &&
1521*4882a593Smuzhiyun scsi_eh_action(scmd, rtn) != FAILED)
1522*4882a593Smuzhiyun scsi_eh_finish_cmd(scmd,
1523*4882a593Smuzhiyun done_q);
1524*4882a593Smuzhiyun }
1525*4882a593Smuzhiyun }
1526*4882a593Smuzhiyun } else {
1527*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
1528*4882a593Smuzhiyun sdev_printk(KERN_INFO, sdev,
1529*4882a593Smuzhiyun "%s: BDR failed\n", current->comm));
1530*4882a593Smuzhiyun }
1531*4882a593Smuzhiyun }
1532*4882a593Smuzhiyun
1533*4882a593Smuzhiyun return list_empty(work_q);
1534*4882a593Smuzhiyun }
1535*4882a593Smuzhiyun
1536*4882a593Smuzhiyun /**
1537*4882a593Smuzhiyun * scsi_eh_target_reset - send target reset if needed
1538*4882a593Smuzhiyun * @shost: scsi host being recovered.
1539*4882a593Smuzhiyun * @work_q: &list_head for pending commands.
1540*4882a593Smuzhiyun * @done_q: &list_head for processed commands.
1541*4882a593Smuzhiyun *
1542*4882a593Smuzhiyun * Notes:
1543*4882a593Smuzhiyun * Try a target reset.
1544*4882a593Smuzhiyun */
scsi_eh_target_reset(struct Scsi_Host * shost,struct list_head * work_q,struct list_head * done_q)1545*4882a593Smuzhiyun static int scsi_eh_target_reset(struct Scsi_Host *shost,
1546*4882a593Smuzhiyun struct list_head *work_q,
1547*4882a593Smuzhiyun struct list_head *done_q)
1548*4882a593Smuzhiyun {
1549*4882a593Smuzhiyun LIST_HEAD(tmp_list);
1550*4882a593Smuzhiyun LIST_HEAD(check_list);
1551*4882a593Smuzhiyun
1552*4882a593Smuzhiyun list_splice_init(work_q, &tmp_list);
1553*4882a593Smuzhiyun
1554*4882a593Smuzhiyun while (!list_empty(&tmp_list)) {
1555*4882a593Smuzhiyun struct scsi_cmnd *next, *scmd;
1556*4882a593Smuzhiyun int rtn;
1557*4882a593Smuzhiyun unsigned int id;
1558*4882a593Smuzhiyun
1559*4882a593Smuzhiyun if (scsi_host_eh_past_deadline(shost)) {
1560*4882a593Smuzhiyun /* push back on work queue for further processing */
1561*4882a593Smuzhiyun list_splice_init(&check_list, work_q);
1562*4882a593Smuzhiyun list_splice_init(&tmp_list, work_q);
1563*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
1564*4882a593Smuzhiyun shost_printk(KERN_INFO, shost,
1565*4882a593Smuzhiyun "%s: Skip target reset, past eh deadline\n",
1566*4882a593Smuzhiyun current->comm));
1567*4882a593Smuzhiyun return list_empty(work_q);
1568*4882a593Smuzhiyun }
1569*4882a593Smuzhiyun
1570*4882a593Smuzhiyun scmd = list_entry(tmp_list.next, struct scsi_cmnd, eh_entry);
1571*4882a593Smuzhiyun id = scmd_id(scmd);
1572*4882a593Smuzhiyun
1573*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
1574*4882a593Smuzhiyun shost_printk(KERN_INFO, shost,
1575*4882a593Smuzhiyun "%s: Sending target reset to target %d\n",
1576*4882a593Smuzhiyun current->comm, id));
1577*4882a593Smuzhiyun rtn = scsi_try_target_reset(scmd);
1578*4882a593Smuzhiyun if (rtn != SUCCESS && rtn != FAST_IO_FAIL)
1579*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
1580*4882a593Smuzhiyun shost_printk(KERN_INFO, shost,
1581*4882a593Smuzhiyun "%s: Target reset failed"
1582*4882a593Smuzhiyun " target: %d\n",
1583*4882a593Smuzhiyun current->comm, id));
1584*4882a593Smuzhiyun list_for_each_entry_safe(scmd, next, &tmp_list, eh_entry) {
1585*4882a593Smuzhiyun if (scmd_id(scmd) != id)
1586*4882a593Smuzhiyun continue;
1587*4882a593Smuzhiyun
1588*4882a593Smuzhiyun if (rtn == SUCCESS)
1589*4882a593Smuzhiyun list_move_tail(&scmd->eh_entry, &check_list);
1590*4882a593Smuzhiyun else if (rtn == FAST_IO_FAIL)
1591*4882a593Smuzhiyun scsi_eh_finish_cmd(scmd, done_q);
1592*4882a593Smuzhiyun else
1593*4882a593Smuzhiyun /* push back on work queue for further processing */
1594*4882a593Smuzhiyun list_move(&scmd->eh_entry, work_q);
1595*4882a593Smuzhiyun }
1596*4882a593Smuzhiyun }
1597*4882a593Smuzhiyun
1598*4882a593Smuzhiyun return scsi_eh_test_devices(&check_list, work_q, done_q, 0);
1599*4882a593Smuzhiyun }
1600*4882a593Smuzhiyun
1601*4882a593Smuzhiyun /**
1602*4882a593Smuzhiyun * scsi_eh_bus_reset - send a bus reset
1603*4882a593Smuzhiyun * @shost: &scsi host being recovered.
1604*4882a593Smuzhiyun * @work_q: &list_head for pending commands.
1605*4882a593Smuzhiyun * @done_q: &list_head for processed commands.
1606*4882a593Smuzhiyun */
scsi_eh_bus_reset(struct Scsi_Host * shost,struct list_head * work_q,struct list_head * done_q)1607*4882a593Smuzhiyun static int scsi_eh_bus_reset(struct Scsi_Host *shost,
1608*4882a593Smuzhiyun struct list_head *work_q,
1609*4882a593Smuzhiyun struct list_head *done_q)
1610*4882a593Smuzhiyun {
1611*4882a593Smuzhiyun struct scsi_cmnd *scmd, *chan_scmd, *next;
1612*4882a593Smuzhiyun LIST_HEAD(check_list);
1613*4882a593Smuzhiyun unsigned int channel;
1614*4882a593Smuzhiyun int rtn;
1615*4882a593Smuzhiyun
1616*4882a593Smuzhiyun /*
1617*4882a593Smuzhiyun * we really want to loop over the various channels, and do this on
1618*4882a593Smuzhiyun * a channel by channel basis. we should also check to see if any
1619*4882a593Smuzhiyun * of the failed commands are on soft_reset devices, and if so, skip
1620*4882a593Smuzhiyun * the reset.
1621*4882a593Smuzhiyun */
1622*4882a593Smuzhiyun
1623*4882a593Smuzhiyun for (channel = 0; channel <= shost->max_channel; channel++) {
1624*4882a593Smuzhiyun if (scsi_host_eh_past_deadline(shost)) {
1625*4882a593Smuzhiyun list_splice_init(&check_list, work_q);
1626*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
1627*4882a593Smuzhiyun shost_printk(KERN_INFO, shost,
1628*4882a593Smuzhiyun "%s: skip BRST, past eh deadline\n",
1629*4882a593Smuzhiyun current->comm));
1630*4882a593Smuzhiyun return list_empty(work_q);
1631*4882a593Smuzhiyun }
1632*4882a593Smuzhiyun
1633*4882a593Smuzhiyun chan_scmd = NULL;
1634*4882a593Smuzhiyun list_for_each_entry(scmd, work_q, eh_entry) {
1635*4882a593Smuzhiyun if (channel == scmd_channel(scmd)) {
1636*4882a593Smuzhiyun chan_scmd = scmd;
1637*4882a593Smuzhiyun break;
1638*4882a593Smuzhiyun /*
1639*4882a593Smuzhiyun * FIXME add back in some support for
1640*4882a593Smuzhiyun * soft_reset devices.
1641*4882a593Smuzhiyun */
1642*4882a593Smuzhiyun }
1643*4882a593Smuzhiyun }
1644*4882a593Smuzhiyun
1645*4882a593Smuzhiyun if (!chan_scmd)
1646*4882a593Smuzhiyun continue;
1647*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
1648*4882a593Smuzhiyun shost_printk(KERN_INFO, shost,
1649*4882a593Smuzhiyun "%s: Sending BRST chan: %d\n",
1650*4882a593Smuzhiyun current->comm, channel));
1651*4882a593Smuzhiyun rtn = scsi_try_bus_reset(chan_scmd);
1652*4882a593Smuzhiyun if (rtn == SUCCESS || rtn == FAST_IO_FAIL) {
1653*4882a593Smuzhiyun list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
1654*4882a593Smuzhiyun if (channel == scmd_channel(scmd)) {
1655*4882a593Smuzhiyun if (rtn == FAST_IO_FAIL)
1656*4882a593Smuzhiyun scsi_eh_finish_cmd(scmd,
1657*4882a593Smuzhiyun done_q);
1658*4882a593Smuzhiyun else
1659*4882a593Smuzhiyun list_move_tail(&scmd->eh_entry,
1660*4882a593Smuzhiyun &check_list);
1661*4882a593Smuzhiyun }
1662*4882a593Smuzhiyun }
1663*4882a593Smuzhiyun } else {
1664*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
1665*4882a593Smuzhiyun shost_printk(KERN_INFO, shost,
1666*4882a593Smuzhiyun "%s: BRST failed chan: %d\n",
1667*4882a593Smuzhiyun current->comm, channel));
1668*4882a593Smuzhiyun }
1669*4882a593Smuzhiyun }
1670*4882a593Smuzhiyun return scsi_eh_test_devices(&check_list, work_q, done_q, 0);
1671*4882a593Smuzhiyun }
1672*4882a593Smuzhiyun
1673*4882a593Smuzhiyun /**
1674*4882a593Smuzhiyun * scsi_eh_host_reset - send a host reset
1675*4882a593Smuzhiyun * @shost: host to be reset.
1676*4882a593Smuzhiyun * @work_q: &list_head for pending commands.
1677*4882a593Smuzhiyun * @done_q: &list_head for processed commands.
1678*4882a593Smuzhiyun */
scsi_eh_host_reset(struct Scsi_Host * shost,struct list_head * work_q,struct list_head * done_q)1679*4882a593Smuzhiyun static int scsi_eh_host_reset(struct Scsi_Host *shost,
1680*4882a593Smuzhiyun struct list_head *work_q,
1681*4882a593Smuzhiyun struct list_head *done_q)
1682*4882a593Smuzhiyun {
1683*4882a593Smuzhiyun struct scsi_cmnd *scmd, *next;
1684*4882a593Smuzhiyun LIST_HEAD(check_list);
1685*4882a593Smuzhiyun int rtn;
1686*4882a593Smuzhiyun
1687*4882a593Smuzhiyun if (!list_empty(work_q)) {
1688*4882a593Smuzhiyun scmd = list_entry(work_q->next,
1689*4882a593Smuzhiyun struct scsi_cmnd, eh_entry);
1690*4882a593Smuzhiyun
1691*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
1692*4882a593Smuzhiyun shost_printk(KERN_INFO, shost,
1693*4882a593Smuzhiyun "%s: Sending HRST\n",
1694*4882a593Smuzhiyun current->comm));
1695*4882a593Smuzhiyun
1696*4882a593Smuzhiyun rtn = scsi_try_host_reset(scmd);
1697*4882a593Smuzhiyun if (rtn == SUCCESS) {
1698*4882a593Smuzhiyun list_splice_init(work_q, &check_list);
1699*4882a593Smuzhiyun } else if (rtn == FAST_IO_FAIL) {
1700*4882a593Smuzhiyun list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
1701*4882a593Smuzhiyun scsi_eh_finish_cmd(scmd, done_q);
1702*4882a593Smuzhiyun }
1703*4882a593Smuzhiyun } else {
1704*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
1705*4882a593Smuzhiyun shost_printk(KERN_INFO, shost,
1706*4882a593Smuzhiyun "%s: HRST failed\n",
1707*4882a593Smuzhiyun current->comm));
1708*4882a593Smuzhiyun }
1709*4882a593Smuzhiyun }
1710*4882a593Smuzhiyun return scsi_eh_test_devices(&check_list, work_q, done_q, 1);
1711*4882a593Smuzhiyun }
1712*4882a593Smuzhiyun
1713*4882a593Smuzhiyun /**
1714*4882a593Smuzhiyun * scsi_eh_offline_sdevs - offline scsi devices that fail to recover
1715*4882a593Smuzhiyun * @work_q: &list_head for pending commands.
1716*4882a593Smuzhiyun * @done_q: &list_head for processed commands.
1717*4882a593Smuzhiyun */
scsi_eh_offline_sdevs(struct list_head * work_q,struct list_head * done_q)1718*4882a593Smuzhiyun static void scsi_eh_offline_sdevs(struct list_head *work_q,
1719*4882a593Smuzhiyun struct list_head *done_q)
1720*4882a593Smuzhiyun {
1721*4882a593Smuzhiyun struct scsi_cmnd *scmd, *next;
1722*4882a593Smuzhiyun struct scsi_device *sdev;
1723*4882a593Smuzhiyun
1724*4882a593Smuzhiyun list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
1725*4882a593Smuzhiyun sdev_printk(KERN_INFO, scmd->device, "Device offlined - "
1726*4882a593Smuzhiyun "not ready after error recovery\n");
1727*4882a593Smuzhiyun sdev = scmd->device;
1728*4882a593Smuzhiyun
1729*4882a593Smuzhiyun mutex_lock(&sdev->state_mutex);
1730*4882a593Smuzhiyun scsi_device_set_state(sdev, SDEV_OFFLINE);
1731*4882a593Smuzhiyun mutex_unlock(&sdev->state_mutex);
1732*4882a593Smuzhiyun
1733*4882a593Smuzhiyun scsi_eh_finish_cmd(scmd, done_q);
1734*4882a593Smuzhiyun }
1735*4882a593Smuzhiyun return;
1736*4882a593Smuzhiyun }
1737*4882a593Smuzhiyun
1738*4882a593Smuzhiyun /**
1739*4882a593Smuzhiyun * scsi_noretry_cmd - determine if command should be failed fast
1740*4882a593Smuzhiyun * @scmd: SCSI cmd to examine.
1741*4882a593Smuzhiyun */
scsi_noretry_cmd(struct scsi_cmnd * scmd)1742*4882a593Smuzhiyun int scsi_noretry_cmd(struct scsi_cmnd *scmd)
1743*4882a593Smuzhiyun {
1744*4882a593Smuzhiyun switch (host_byte(scmd->result)) {
1745*4882a593Smuzhiyun case DID_OK:
1746*4882a593Smuzhiyun break;
1747*4882a593Smuzhiyun case DID_TIME_OUT:
1748*4882a593Smuzhiyun goto check_type;
1749*4882a593Smuzhiyun case DID_BUS_BUSY:
1750*4882a593Smuzhiyun return (scmd->request->cmd_flags & REQ_FAILFAST_TRANSPORT);
1751*4882a593Smuzhiyun case DID_PARITY:
1752*4882a593Smuzhiyun return (scmd->request->cmd_flags & REQ_FAILFAST_DEV);
1753*4882a593Smuzhiyun case DID_ERROR:
1754*4882a593Smuzhiyun if (msg_byte(scmd->result) == COMMAND_COMPLETE &&
1755*4882a593Smuzhiyun status_byte(scmd->result) == RESERVATION_CONFLICT)
1756*4882a593Smuzhiyun return 0;
1757*4882a593Smuzhiyun fallthrough;
1758*4882a593Smuzhiyun case DID_SOFT_ERROR:
1759*4882a593Smuzhiyun return (scmd->request->cmd_flags & REQ_FAILFAST_DRIVER);
1760*4882a593Smuzhiyun }
1761*4882a593Smuzhiyun
1762*4882a593Smuzhiyun if (status_byte(scmd->result) != CHECK_CONDITION)
1763*4882a593Smuzhiyun return 0;
1764*4882a593Smuzhiyun
1765*4882a593Smuzhiyun check_type:
1766*4882a593Smuzhiyun /*
1767*4882a593Smuzhiyun * assume caller has checked sense and determined
1768*4882a593Smuzhiyun * the check condition was retryable.
1769*4882a593Smuzhiyun */
1770*4882a593Smuzhiyun if (scmd->request->cmd_flags & REQ_FAILFAST_DEV ||
1771*4882a593Smuzhiyun blk_rq_is_passthrough(scmd->request))
1772*4882a593Smuzhiyun return 1;
1773*4882a593Smuzhiyun
1774*4882a593Smuzhiyun return 0;
1775*4882a593Smuzhiyun }
1776*4882a593Smuzhiyun
1777*4882a593Smuzhiyun /**
1778*4882a593Smuzhiyun * scsi_decide_disposition - Disposition a cmd on return from LLD.
1779*4882a593Smuzhiyun * @scmd: SCSI cmd to examine.
1780*4882a593Smuzhiyun *
1781*4882a593Smuzhiyun * Notes:
1782*4882a593Smuzhiyun * This is *only* called when we are examining the status after sending
1783*4882a593Smuzhiyun * out the actual data command. any commands that are queued for error
1784*4882a593Smuzhiyun * recovery (e.g. test_unit_ready) do *not* come through here.
1785*4882a593Smuzhiyun *
1786*4882a593Smuzhiyun * When this routine returns failed, it means the error handler thread
1787*4882a593Smuzhiyun * is woken. In cases where the error code indicates an error that
1788*4882a593Smuzhiyun * doesn't require the error handler read (i.e. we don't need to
1789*4882a593Smuzhiyun * abort/reset), this function should return SUCCESS.
1790*4882a593Smuzhiyun */
scsi_decide_disposition(struct scsi_cmnd * scmd)1791*4882a593Smuzhiyun int scsi_decide_disposition(struct scsi_cmnd *scmd)
1792*4882a593Smuzhiyun {
1793*4882a593Smuzhiyun int rtn;
1794*4882a593Smuzhiyun
1795*4882a593Smuzhiyun /*
1796*4882a593Smuzhiyun * if the device is offline, then we clearly just pass the result back
1797*4882a593Smuzhiyun * up to the top level.
1798*4882a593Smuzhiyun */
1799*4882a593Smuzhiyun if (!scsi_device_online(scmd->device)) {
1800*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(5, scmd_printk(KERN_INFO, scmd,
1801*4882a593Smuzhiyun "%s: device offline - report as SUCCESS\n", __func__));
1802*4882a593Smuzhiyun return SUCCESS;
1803*4882a593Smuzhiyun }
1804*4882a593Smuzhiyun
1805*4882a593Smuzhiyun /*
1806*4882a593Smuzhiyun * first check the host byte, to see if there is anything in there
1807*4882a593Smuzhiyun * that would indicate what we need to do.
1808*4882a593Smuzhiyun */
1809*4882a593Smuzhiyun switch (host_byte(scmd->result)) {
1810*4882a593Smuzhiyun case DID_PASSTHROUGH:
1811*4882a593Smuzhiyun /*
1812*4882a593Smuzhiyun * no matter what, pass this through to the upper layer.
1813*4882a593Smuzhiyun * nuke this special code so that it looks like we are saying
1814*4882a593Smuzhiyun * did_ok.
1815*4882a593Smuzhiyun */
1816*4882a593Smuzhiyun scmd->result &= 0xff00ffff;
1817*4882a593Smuzhiyun return SUCCESS;
1818*4882a593Smuzhiyun case DID_OK:
1819*4882a593Smuzhiyun /*
1820*4882a593Smuzhiyun * looks good. drop through, and check the next byte.
1821*4882a593Smuzhiyun */
1822*4882a593Smuzhiyun break;
1823*4882a593Smuzhiyun case DID_ABORT:
1824*4882a593Smuzhiyun if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) {
1825*4882a593Smuzhiyun set_host_byte(scmd, DID_TIME_OUT);
1826*4882a593Smuzhiyun return SUCCESS;
1827*4882a593Smuzhiyun }
1828*4882a593Smuzhiyun fallthrough;
1829*4882a593Smuzhiyun case DID_NO_CONNECT:
1830*4882a593Smuzhiyun case DID_BAD_TARGET:
1831*4882a593Smuzhiyun /*
1832*4882a593Smuzhiyun * note - this means that we just report the status back
1833*4882a593Smuzhiyun * to the top level driver, not that we actually think
1834*4882a593Smuzhiyun * that it indicates SUCCESS.
1835*4882a593Smuzhiyun */
1836*4882a593Smuzhiyun return SUCCESS;
1837*4882a593Smuzhiyun case DID_SOFT_ERROR:
1838*4882a593Smuzhiyun /*
1839*4882a593Smuzhiyun * when the low level driver returns did_soft_error,
1840*4882a593Smuzhiyun * it is responsible for keeping an internal retry counter
1841*4882a593Smuzhiyun * in order to avoid endless loops (db)
1842*4882a593Smuzhiyun */
1843*4882a593Smuzhiyun goto maybe_retry;
1844*4882a593Smuzhiyun case DID_IMM_RETRY:
1845*4882a593Smuzhiyun return NEEDS_RETRY;
1846*4882a593Smuzhiyun
1847*4882a593Smuzhiyun case DID_REQUEUE:
1848*4882a593Smuzhiyun return ADD_TO_MLQUEUE;
1849*4882a593Smuzhiyun case DID_TRANSPORT_DISRUPTED:
1850*4882a593Smuzhiyun /*
1851*4882a593Smuzhiyun * LLD/transport was disrupted during processing of the IO.
1852*4882a593Smuzhiyun * The transport class is now blocked/blocking,
1853*4882a593Smuzhiyun * and the transport will decide what to do with the IO
1854*4882a593Smuzhiyun * based on its timers and recovery capablilities if
1855*4882a593Smuzhiyun * there are enough retries.
1856*4882a593Smuzhiyun */
1857*4882a593Smuzhiyun goto maybe_retry;
1858*4882a593Smuzhiyun case DID_TRANSPORT_FAILFAST:
1859*4882a593Smuzhiyun /*
1860*4882a593Smuzhiyun * The transport decided to failfast the IO (most likely
1861*4882a593Smuzhiyun * the fast io fail tmo fired), so send IO directly upwards.
1862*4882a593Smuzhiyun */
1863*4882a593Smuzhiyun return SUCCESS;
1864*4882a593Smuzhiyun case DID_ERROR:
1865*4882a593Smuzhiyun if (msg_byte(scmd->result) == COMMAND_COMPLETE &&
1866*4882a593Smuzhiyun status_byte(scmd->result) == RESERVATION_CONFLICT)
1867*4882a593Smuzhiyun /*
1868*4882a593Smuzhiyun * execute reservation conflict processing code
1869*4882a593Smuzhiyun * lower down
1870*4882a593Smuzhiyun */
1871*4882a593Smuzhiyun break;
1872*4882a593Smuzhiyun fallthrough;
1873*4882a593Smuzhiyun case DID_BUS_BUSY:
1874*4882a593Smuzhiyun case DID_PARITY:
1875*4882a593Smuzhiyun goto maybe_retry;
1876*4882a593Smuzhiyun case DID_TIME_OUT:
1877*4882a593Smuzhiyun /*
1878*4882a593Smuzhiyun * when we scan the bus, we get timeout messages for
1879*4882a593Smuzhiyun * these commands if there is no device available.
1880*4882a593Smuzhiyun * other hosts report did_no_connect for the same thing.
1881*4882a593Smuzhiyun */
1882*4882a593Smuzhiyun if ((scmd->cmnd[0] == TEST_UNIT_READY ||
1883*4882a593Smuzhiyun scmd->cmnd[0] == INQUIRY)) {
1884*4882a593Smuzhiyun return SUCCESS;
1885*4882a593Smuzhiyun } else {
1886*4882a593Smuzhiyun return FAILED;
1887*4882a593Smuzhiyun }
1888*4882a593Smuzhiyun case DID_RESET:
1889*4882a593Smuzhiyun return SUCCESS;
1890*4882a593Smuzhiyun default:
1891*4882a593Smuzhiyun return FAILED;
1892*4882a593Smuzhiyun }
1893*4882a593Smuzhiyun
1894*4882a593Smuzhiyun /*
1895*4882a593Smuzhiyun * next, check the message byte.
1896*4882a593Smuzhiyun */
1897*4882a593Smuzhiyun if (msg_byte(scmd->result) != COMMAND_COMPLETE)
1898*4882a593Smuzhiyun return FAILED;
1899*4882a593Smuzhiyun
1900*4882a593Smuzhiyun /*
1901*4882a593Smuzhiyun * check the status byte to see if this indicates anything special.
1902*4882a593Smuzhiyun */
1903*4882a593Smuzhiyun switch (status_byte(scmd->result)) {
1904*4882a593Smuzhiyun case QUEUE_FULL:
1905*4882a593Smuzhiyun scsi_handle_queue_full(scmd->device);
1906*4882a593Smuzhiyun /*
1907*4882a593Smuzhiyun * the case of trying to send too many commands to a
1908*4882a593Smuzhiyun * tagged queueing device.
1909*4882a593Smuzhiyun */
1910*4882a593Smuzhiyun fallthrough;
1911*4882a593Smuzhiyun case BUSY:
1912*4882a593Smuzhiyun /*
1913*4882a593Smuzhiyun * device can't talk to us at the moment. Should only
1914*4882a593Smuzhiyun * occur (SAM-3) when the task queue is empty, so will cause
1915*4882a593Smuzhiyun * the empty queue handling to trigger a stall in the
1916*4882a593Smuzhiyun * device.
1917*4882a593Smuzhiyun */
1918*4882a593Smuzhiyun return ADD_TO_MLQUEUE;
1919*4882a593Smuzhiyun case GOOD:
1920*4882a593Smuzhiyun if (scmd->cmnd[0] == REPORT_LUNS)
1921*4882a593Smuzhiyun scmd->device->sdev_target->expecting_lun_change = 0;
1922*4882a593Smuzhiyun scsi_handle_queue_ramp_up(scmd->device);
1923*4882a593Smuzhiyun fallthrough;
1924*4882a593Smuzhiyun case COMMAND_TERMINATED:
1925*4882a593Smuzhiyun return SUCCESS;
1926*4882a593Smuzhiyun case TASK_ABORTED:
1927*4882a593Smuzhiyun goto maybe_retry;
1928*4882a593Smuzhiyun case CHECK_CONDITION:
1929*4882a593Smuzhiyun rtn = scsi_check_sense(scmd);
1930*4882a593Smuzhiyun if (rtn == NEEDS_RETRY)
1931*4882a593Smuzhiyun goto maybe_retry;
1932*4882a593Smuzhiyun /* if rtn == FAILED, we have no sense information;
1933*4882a593Smuzhiyun * returning FAILED will wake the error handler thread
1934*4882a593Smuzhiyun * to collect the sense and redo the decide
1935*4882a593Smuzhiyun * disposition */
1936*4882a593Smuzhiyun return rtn;
1937*4882a593Smuzhiyun case CONDITION_GOOD:
1938*4882a593Smuzhiyun case INTERMEDIATE_GOOD:
1939*4882a593Smuzhiyun case INTERMEDIATE_C_GOOD:
1940*4882a593Smuzhiyun case ACA_ACTIVE:
1941*4882a593Smuzhiyun /*
1942*4882a593Smuzhiyun * who knows? FIXME(eric)
1943*4882a593Smuzhiyun */
1944*4882a593Smuzhiyun return SUCCESS;
1945*4882a593Smuzhiyun
1946*4882a593Smuzhiyun case RESERVATION_CONFLICT:
1947*4882a593Smuzhiyun sdev_printk(KERN_INFO, scmd->device,
1948*4882a593Smuzhiyun "reservation conflict\n");
1949*4882a593Smuzhiyun set_host_byte(scmd, DID_NEXUS_FAILURE);
1950*4882a593Smuzhiyun return SUCCESS; /* causes immediate i/o error */
1951*4882a593Smuzhiyun default:
1952*4882a593Smuzhiyun return FAILED;
1953*4882a593Smuzhiyun }
1954*4882a593Smuzhiyun return FAILED;
1955*4882a593Smuzhiyun
1956*4882a593Smuzhiyun maybe_retry:
1957*4882a593Smuzhiyun
1958*4882a593Smuzhiyun /* we requeue for retry because the error was retryable, and
1959*4882a593Smuzhiyun * the request was not marked fast fail. Note that above,
1960*4882a593Smuzhiyun * even if the request is marked fast fail, we still requeue
1961*4882a593Smuzhiyun * for queue congestion conditions (QUEUE_FULL or BUSY) */
1962*4882a593Smuzhiyun if (scsi_cmd_retry_allowed(scmd) && !scsi_noretry_cmd(scmd)) {
1963*4882a593Smuzhiyun return NEEDS_RETRY;
1964*4882a593Smuzhiyun } else {
1965*4882a593Smuzhiyun /*
1966*4882a593Smuzhiyun * no more retries - report this one back to upper level.
1967*4882a593Smuzhiyun */
1968*4882a593Smuzhiyun return SUCCESS;
1969*4882a593Smuzhiyun }
1970*4882a593Smuzhiyun }
1971*4882a593Smuzhiyun
eh_lock_door_done(struct request * req,blk_status_t status)1972*4882a593Smuzhiyun static void eh_lock_door_done(struct request *req, blk_status_t status)
1973*4882a593Smuzhiyun {
1974*4882a593Smuzhiyun blk_put_request(req);
1975*4882a593Smuzhiyun }
1976*4882a593Smuzhiyun
1977*4882a593Smuzhiyun /**
1978*4882a593Smuzhiyun * scsi_eh_lock_door - Prevent medium removal for the specified device
1979*4882a593Smuzhiyun * @sdev: SCSI device to prevent medium removal
1980*4882a593Smuzhiyun *
1981*4882a593Smuzhiyun * Locking:
1982*4882a593Smuzhiyun * We must be called from process context.
1983*4882a593Smuzhiyun *
1984*4882a593Smuzhiyun * Notes:
1985*4882a593Smuzhiyun * We queue up an asynchronous "ALLOW MEDIUM REMOVAL" request on the
1986*4882a593Smuzhiyun * head of the devices request queue, and continue.
1987*4882a593Smuzhiyun */
scsi_eh_lock_door(struct scsi_device * sdev)1988*4882a593Smuzhiyun static void scsi_eh_lock_door(struct scsi_device *sdev)
1989*4882a593Smuzhiyun {
1990*4882a593Smuzhiyun struct request *req;
1991*4882a593Smuzhiyun struct scsi_request *rq;
1992*4882a593Smuzhiyun
1993*4882a593Smuzhiyun req = blk_get_request(sdev->request_queue, REQ_OP_SCSI_IN, 0);
1994*4882a593Smuzhiyun if (IS_ERR(req))
1995*4882a593Smuzhiyun return;
1996*4882a593Smuzhiyun rq = scsi_req(req);
1997*4882a593Smuzhiyun
1998*4882a593Smuzhiyun rq->cmd[0] = ALLOW_MEDIUM_REMOVAL;
1999*4882a593Smuzhiyun rq->cmd[1] = 0;
2000*4882a593Smuzhiyun rq->cmd[2] = 0;
2001*4882a593Smuzhiyun rq->cmd[3] = 0;
2002*4882a593Smuzhiyun rq->cmd[4] = SCSI_REMOVAL_PREVENT;
2003*4882a593Smuzhiyun rq->cmd[5] = 0;
2004*4882a593Smuzhiyun rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
2005*4882a593Smuzhiyun
2006*4882a593Smuzhiyun req->rq_flags |= RQF_QUIET;
2007*4882a593Smuzhiyun req->timeout = 10 * HZ;
2008*4882a593Smuzhiyun rq->retries = 5;
2009*4882a593Smuzhiyun
2010*4882a593Smuzhiyun blk_execute_rq_nowait(req->q, NULL, req, 1, eh_lock_door_done);
2011*4882a593Smuzhiyun }
2012*4882a593Smuzhiyun
2013*4882a593Smuzhiyun /**
2014*4882a593Smuzhiyun * scsi_restart_operations - restart io operations to the specified host.
2015*4882a593Smuzhiyun * @shost: Host we are restarting.
2016*4882a593Smuzhiyun *
2017*4882a593Smuzhiyun * Notes:
2018*4882a593Smuzhiyun * When we entered the error handler, we blocked all further i/o to
2019*4882a593Smuzhiyun * this device. we need to 'reverse' this process.
2020*4882a593Smuzhiyun */
scsi_restart_operations(struct Scsi_Host * shost)2021*4882a593Smuzhiyun static void scsi_restart_operations(struct Scsi_Host *shost)
2022*4882a593Smuzhiyun {
2023*4882a593Smuzhiyun struct scsi_device *sdev;
2024*4882a593Smuzhiyun unsigned long flags;
2025*4882a593Smuzhiyun
2026*4882a593Smuzhiyun /*
2027*4882a593Smuzhiyun * If the door was locked, we need to insert a door lock request
2028*4882a593Smuzhiyun * onto the head of the SCSI request queue for the device. There
2029*4882a593Smuzhiyun * is no point trying to lock the door of an off-line device.
2030*4882a593Smuzhiyun */
2031*4882a593Smuzhiyun shost_for_each_device(sdev, shost) {
2032*4882a593Smuzhiyun if (scsi_device_online(sdev) && sdev->was_reset && sdev->locked) {
2033*4882a593Smuzhiyun scsi_eh_lock_door(sdev);
2034*4882a593Smuzhiyun sdev->was_reset = 0;
2035*4882a593Smuzhiyun }
2036*4882a593Smuzhiyun }
2037*4882a593Smuzhiyun
2038*4882a593Smuzhiyun /*
2039*4882a593Smuzhiyun * next free up anything directly waiting upon the host. this
2040*4882a593Smuzhiyun * will be requests for character device operations, and also for
2041*4882a593Smuzhiyun * ioctls to queued block devices.
2042*4882a593Smuzhiyun */
2043*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
2044*4882a593Smuzhiyun shost_printk(KERN_INFO, shost, "waking up host to restart\n"));
2045*4882a593Smuzhiyun
2046*4882a593Smuzhiyun spin_lock_irqsave(shost->host_lock, flags);
2047*4882a593Smuzhiyun if (scsi_host_set_state(shost, SHOST_RUNNING))
2048*4882a593Smuzhiyun if (scsi_host_set_state(shost, SHOST_CANCEL))
2049*4882a593Smuzhiyun BUG_ON(scsi_host_set_state(shost, SHOST_DEL));
2050*4882a593Smuzhiyun spin_unlock_irqrestore(shost->host_lock, flags);
2051*4882a593Smuzhiyun
2052*4882a593Smuzhiyun wake_up(&shost->host_wait);
2053*4882a593Smuzhiyun
2054*4882a593Smuzhiyun /*
2055*4882a593Smuzhiyun * finally we need to re-initiate requests that may be pending. we will
2056*4882a593Smuzhiyun * have had everything blocked while error handling is taking place, and
2057*4882a593Smuzhiyun * now that error recovery is done, we will need to ensure that these
2058*4882a593Smuzhiyun * requests are started.
2059*4882a593Smuzhiyun */
2060*4882a593Smuzhiyun scsi_run_host_queues(shost);
2061*4882a593Smuzhiyun
2062*4882a593Smuzhiyun /*
2063*4882a593Smuzhiyun * if eh is active and host_eh_scheduled is pending we need to re-run
2064*4882a593Smuzhiyun * recovery. we do this check after scsi_run_host_queues() to allow
2065*4882a593Smuzhiyun * everything pent up since the last eh run a chance to make forward
2066*4882a593Smuzhiyun * progress before we sync again. Either we'll immediately re-run
2067*4882a593Smuzhiyun * recovery or scsi_device_unbusy() will wake us again when these
2068*4882a593Smuzhiyun * pending commands complete.
2069*4882a593Smuzhiyun */
2070*4882a593Smuzhiyun spin_lock_irqsave(shost->host_lock, flags);
2071*4882a593Smuzhiyun if (shost->host_eh_scheduled)
2072*4882a593Smuzhiyun if (scsi_host_set_state(shost, SHOST_RECOVERY))
2073*4882a593Smuzhiyun WARN_ON(scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY));
2074*4882a593Smuzhiyun spin_unlock_irqrestore(shost->host_lock, flags);
2075*4882a593Smuzhiyun }
2076*4882a593Smuzhiyun
2077*4882a593Smuzhiyun /**
2078*4882a593Smuzhiyun * scsi_eh_ready_devs - check device ready state and recover if not.
2079*4882a593Smuzhiyun * @shost: host to be recovered.
2080*4882a593Smuzhiyun * @work_q: &list_head for pending commands.
2081*4882a593Smuzhiyun * @done_q: &list_head for processed commands.
2082*4882a593Smuzhiyun */
scsi_eh_ready_devs(struct Scsi_Host * shost,struct list_head * work_q,struct list_head * done_q)2083*4882a593Smuzhiyun void scsi_eh_ready_devs(struct Scsi_Host *shost,
2084*4882a593Smuzhiyun struct list_head *work_q,
2085*4882a593Smuzhiyun struct list_head *done_q)
2086*4882a593Smuzhiyun {
2087*4882a593Smuzhiyun if (!scsi_eh_stu(shost, work_q, done_q))
2088*4882a593Smuzhiyun if (!scsi_eh_bus_device_reset(shost, work_q, done_q))
2089*4882a593Smuzhiyun if (!scsi_eh_target_reset(shost, work_q, done_q))
2090*4882a593Smuzhiyun if (!scsi_eh_bus_reset(shost, work_q, done_q))
2091*4882a593Smuzhiyun if (!scsi_eh_host_reset(shost, work_q, done_q))
2092*4882a593Smuzhiyun scsi_eh_offline_sdevs(work_q,
2093*4882a593Smuzhiyun done_q);
2094*4882a593Smuzhiyun }
2095*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(scsi_eh_ready_devs);
2096*4882a593Smuzhiyun
2097*4882a593Smuzhiyun /**
2098*4882a593Smuzhiyun * scsi_eh_flush_done_q - finish processed commands or retry them.
2099*4882a593Smuzhiyun * @done_q: list_head of processed commands.
2100*4882a593Smuzhiyun */
scsi_eh_flush_done_q(struct list_head * done_q)2101*4882a593Smuzhiyun void scsi_eh_flush_done_q(struct list_head *done_q)
2102*4882a593Smuzhiyun {
2103*4882a593Smuzhiyun struct scsi_cmnd *scmd, *next;
2104*4882a593Smuzhiyun
2105*4882a593Smuzhiyun list_for_each_entry_safe(scmd, next, done_q, eh_entry) {
2106*4882a593Smuzhiyun list_del_init(&scmd->eh_entry);
2107*4882a593Smuzhiyun if (scsi_device_online(scmd->device) &&
2108*4882a593Smuzhiyun !scsi_noretry_cmd(scmd) && scsi_cmd_retry_allowed(scmd)) {
2109*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
2110*4882a593Smuzhiyun scmd_printk(KERN_INFO, scmd,
2111*4882a593Smuzhiyun "%s: flush retry cmd\n",
2112*4882a593Smuzhiyun current->comm));
2113*4882a593Smuzhiyun scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
2114*4882a593Smuzhiyun } else {
2115*4882a593Smuzhiyun /*
2116*4882a593Smuzhiyun * If just we got sense for the device (called
2117*4882a593Smuzhiyun * scsi_eh_get_sense), scmd->result is already
2118*4882a593Smuzhiyun * set, do not set DRIVER_TIMEOUT.
2119*4882a593Smuzhiyun */
2120*4882a593Smuzhiyun if (!scmd->result)
2121*4882a593Smuzhiyun scmd->result |= (DRIVER_TIMEOUT << 24);
2122*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
2123*4882a593Smuzhiyun scmd_printk(KERN_INFO, scmd,
2124*4882a593Smuzhiyun "%s: flush finish cmd\n",
2125*4882a593Smuzhiyun current->comm));
2126*4882a593Smuzhiyun scsi_finish_command(scmd);
2127*4882a593Smuzhiyun }
2128*4882a593Smuzhiyun }
2129*4882a593Smuzhiyun }
2130*4882a593Smuzhiyun EXPORT_SYMBOL(scsi_eh_flush_done_q);
2131*4882a593Smuzhiyun
2132*4882a593Smuzhiyun /**
2133*4882a593Smuzhiyun * scsi_unjam_host - Attempt to fix a host which has a cmd that failed.
2134*4882a593Smuzhiyun * @shost: Host to unjam.
2135*4882a593Smuzhiyun *
2136*4882a593Smuzhiyun * Notes:
2137*4882a593Smuzhiyun * When we come in here, we *know* that all commands on the bus have
2138*4882a593Smuzhiyun * either completed, failed or timed out. we also know that no further
2139*4882a593Smuzhiyun * commands are being sent to the host, so things are relatively quiet
2140*4882a593Smuzhiyun * and we have freedom to fiddle with things as we wish.
2141*4882a593Smuzhiyun *
2142*4882a593Smuzhiyun * This is only the *default* implementation. it is possible for
2143*4882a593Smuzhiyun * individual drivers to supply their own version of this function, and
2144*4882a593Smuzhiyun * if the maintainer wishes to do this, it is strongly suggested that
2145*4882a593Smuzhiyun * this function be taken as a template and modified. this function
2146*4882a593Smuzhiyun * was designed to correctly handle problems for about 95% of the
2147*4882a593Smuzhiyun * different cases out there, and it should always provide at least a
2148*4882a593Smuzhiyun * reasonable amount of error recovery.
2149*4882a593Smuzhiyun *
2150*4882a593Smuzhiyun * Any command marked 'failed' or 'timeout' must eventually have
2151*4882a593Smuzhiyun * scsi_finish_cmd() called for it. we do all of the retry stuff
2152*4882a593Smuzhiyun * here, so when we restart the host after we return it should have an
2153*4882a593Smuzhiyun * empty queue.
2154*4882a593Smuzhiyun */
scsi_unjam_host(struct Scsi_Host * shost)2155*4882a593Smuzhiyun static void scsi_unjam_host(struct Scsi_Host *shost)
2156*4882a593Smuzhiyun {
2157*4882a593Smuzhiyun unsigned long flags;
2158*4882a593Smuzhiyun LIST_HEAD(eh_work_q);
2159*4882a593Smuzhiyun LIST_HEAD(eh_done_q);
2160*4882a593Smuzhiyun
2161*4882a593Smuzhiyun spin_lock_irqsave(shost->host_lock, flags);
2162*4882a593Smuzhiyun list_splice_init(&shost->eh_cmd_q, &eh_work_q);
2163*4882a593Smuzhiyun spin_unlock_irqrestore(shost->host_lock, flags);
2164*4882a593Smuzhiyun
2165*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q));
2166*4882a593Smuzhiyun
2167*4882a593Smuzhiyun if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q))
2168*4882a593Smuzhiyun scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
2169*4882a593Smuzhiyun
2170*4882a593Smuzhiyun spin_lock_irqsave(shost->host_lock, flags);
2171*4882a593Smuzhiyun if (shost->eh_deadline != -1)
2172*4882a593Smuzhiyun shost->last_reset = 0;
2173*4882a593Smuzhiyun spin_unlock_irqrestore(shost->host_lock, flags);
2174*4882a593Smuzhiyun scsi_eh_flush_done_q(&eh_done_q);
2175*4882a593Smuzhiyun }
2176*4882a593Smuzhiyun
2177*4882a593Smuzhiyun /**
2178*4882a593Smuzhiyun * scsi_error_handler - SCSI error handler thread
2179*4882a593Smuzhiyun * @data: Host for which we are running.
2180*4882a593Smuzhiyun *
2181*4882a593Smuzhiyun * Notes:
2182*4882a593Smuzhiyun * This is the main error handling loop. This is run as a kernel thread
2183*4882a593Smuzhiyun * for every SCSI host and handles all error handling activity.
2184*4882a593Smuzhiyun */
scsi_error_handler(void * data)2185*4882a593Smuzhiyun int scsi_error_handler(void *data)
2186*4882a593Smuzhiyun {
2187*4882a593Smuzhiyun struct Scsi_Host *shost = data;
2188*4882a593Smuzhiyun
2189*4882a593Smuzhiyun /*
2190*4882a593Smuzhiyun * We use TASK_INTERRUPTIBLE so that the thread is not
2191*4882a593Smuzhiyun * counted against the load average as a running process.
2192*4882a593Smuzhiyun * We never actually get interrupted because kthread_run
2193*4882a593Smuzhiyun * disables signal delivery for the created thread.
2194*4882a593Smuzhiyun */
2195*4882a593Smuzhiyun while (true) {
2196*4882a593Smuzhiyun /*
2197*4882a593Smuzhiyun * The sequence in kthread_stop() sets the stop flag first
2198*4882a593Smuzhiyun * then wakes the process. To avoid missed wakeups, the task
2199*4882a593Smuzhiyun * should always be in a non running state before the stop
2200*4882a593Smuzhiyun * flag is checked
2201*4882a593Smuzhiyun */
2202*4882a593Smuzhiyun set_current_state(TASK_INTERRUPTIBLE);
2203*4882a593Smuzhiyun if (kthread_should_stop())
2204*4882a593Smuzhiyun break;
2205*4882a593Smuzhiyun
2206*4882a593Smuzhiyun if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) ||
2207*4882a593Smuzhiyun shost->host_failed != scsi_host_busy(shost)) {
2208*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(1,
2209*4882a593Smuzhiyun shost_printk(KERN_INFO, shost,
2210*4882a593Smuzhiyun "scsi_eh_%d: sleeping\n",
2211*4882a593Smuzhiyun shost->host_no));
2212*4882a593Smuzhiyun schedule();
2213*4882a593Smuzhiyun continue;
2214*4882a593Smuzhiyun }
2215*4882a593Smuzhiyun
2216*4882a593Smuzhiyun __set_current_state(TASK_RUNNING);
2217*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(1,
2218*4882a593Smuzhiyun shost_printk(KERN_INFO, shost,
2219*4882a593Smuzhiyun "scsi_eh_%d: waking up %d/%d/%d\n",
2220*4882a593Smuzhiyun shost->host_no, shost->host_eh_scheduled,
2221*4882a593Smuzhiyun shost->host_failed,
2222*4882a593Smuzhiyun scsi_host_busy(shost)));
2223*4882a593Smuzhiyun
2224*4882a593Smuzhiyun /*
2225*4882a593Smuzhiyun * We have a host that is failing for some reason. Figure out
2226*4882a593Smuzhiyun * what we need to do to get it up and online again (if we can).
2227*4882a593Smuzhiyun * If we fail, we end up taking the thing offline.
2228*4882a593Smuzhiyun */
2229*4882a593Smuzhiyun if (!shost->eh_noresume && scsi_autopm_get_host(shost) != 0) {
2230*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(1,
2231*4882a593Smuzhiyun shost_printk(KERN_ERR, shost,
2232*4882a593Smuzhiyun "scsi_eh_%d: unable to autoresume\n",
2233*4882a593Smuzhiyun shost->host_no));
2234*4882a593Smuzhiyun continue;
2235*4882a593Smuzhiyun }
2236*4882a593Smuzhiyun
2237*4882a593Smuzhiyun if (shost->transportt->eh_strategy_handler)
2238*4882a593Smuzhiyun shost->transportt->eh_strategy_handler(shost);
2239*4882a593Smuzhiyun else
2240*4882a593Smuzhiyun scsi_unjam_host(shost);
2241*4882a593Smuzhiyun
2242*4882a593Smuzhiyun /* All scmds have been handled */
2243*4882a593Smuzhiyun shost->host_failed = 0;
2244*4882a593Smuzhiyun
2245*4882a593Smuzhiyun /*
2246*4882a593Smuzhiyun * Note - if the above fails completely, the action is to take
2247*4882a593Smuzhiyun * individual devices offline and flush the queue of any
2248*4882a593Smuzhiyun * outstanding requests that may have been pending. When we
2249*4882a593Smuzhiyun * restart, we restart any I/O to any other devices on the bus
2250*4882a593Smuzhiyun * which are still online.
2251*4882a593Smuzhiyun */
2252*4882a593Smuzhiyun scsi_restart_operations(shost);
2253*4882a593Smuzhiyun if (!shost->eh_noresume)
2254*4882a593Smuzhiyun scsi_autopm_put_host(shost);
2255*4882a593Smuzhiyun }
2256*4882a593Smuzhiyun __set_current_state(TASK_RUNNING);
2257*4882a593Smuzhiyun
2258*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(1,
2259*4882a593Smuzhiyun shost_printk(KERN_INFO, shost,
2260*4882a593Smuzhiyun "Error handler scsi_eh_%d exiting\n",
2261*4882a593Smuzhiyun shost->host_no));
2262*4882a593Smuzhiyun shost->ehandler = NULL;
2263*4882a593Smuzhiyun return 0;
2264*4882a593Smuzhiyun }
2265*4882a593Smuzhiyun
2266*4882a593Smuzhiyun /*
2267*4882a593Smuzhiyun * Function: scsi_report_bus_reset()
2268*4882a593Smuzhiyun *
2269*4882a593Smuzhiyun * Purpose: Utility function used by low-level drivers to report that
2270*4882a593Smuzhiyun * they have observed a bus reset on the bus being handled.
2271*4882a593Smuzhiyun *
2272*4882a593Smuzhiyun * Arguments: shost - Host in question
2273*4882a593Smuzhiyun * channel - channel on which reset was observed.
2274*4882a593Smuzhiyun *
2275*4882a593Smuzhiyun * Returns: Nothing
2276*4882a593Smuzhiyun *
2277*4882a593Smuzhiyun * Lock status: Host lock must be held.
2278*4882a593Smuzhiyun *
2279*4882a593Smuzhiyun * Notes: This only needs to be called if the reset is one which
2280*4882a593Smuzhiyun * originates from an unknown location. Resets originated
2281*4882a593Smuzhiyun * by the mid-level itself don't need to call this, but there
2282*4882a593Smuzhiyun * should be no harm.
2283*4882a593Smuzhiyun *
2284*4882a593Smuzhiyun * The main purpose of this is to make sure that a CHECK_CONDITION
2285*4882a593Smuzhiyun * is properly treated.
2286*4882a593Smuzhiyun */
scsi_report_bus_reset(struct Scsi_Host * shost,int channel)2287*4882a593Smuzhiyun void scsi_report_bus_reset(struct Scsi_Host *shost, int channel)
2288*4882a593Smuzhiyun {
2289*4882a593Smuzhiyun struct scsi_device *sdev;
2290*4882a593Smuzhiyun
2291*4882a593Smuzhiyun __shost_for_each_device(sdev, shost) {
2292*4882a593Smuzhiyun if (channel == sdev_channel(sdev))
2293*4882a593Smuzhiyun __scsi_report_device_reset(sdev, NULL);
2294*4882a593Smuzhiyun }
2295*4882a593Smuzhiyun }
2296*4882a593Smuzhiyun EXPORT_SYMBOL(scsi_report_bus_reset);
2297*4882a593Smuzhiyun
2298*4882a593Smuzhiyun /*
2299*4882a593Smuzhiyun * Function: scsi_report_device_reset()
2300*4882a593Smuzhiyun *
2301*4882a593Smuzhiyun * Purpose: Utility function used by low-level drivers to report that
2302*4882a593Smuzhiyun * they have observed a device reset on the device being handled.
2303*4882a593Smuzhiyun *
2304*4882a593Smuzhiyun * Arguments: shost - Host in question
2305*4882a593Smuzhiyun * channel - channel on which reset was observed
2306*4882a593Smuzhiyun * target - target on which reset was observed
2307*4882a593Smuzhiyun *
2308*4882a593Smuzhiyun * Returns: Nothing
2309*4882a593Smuzhiyun *
2310*4882a593Smuzhiyun * Lock status: Host lock must be held
2311*4882a593Smuzhiyun *
2312*4882a593Smuzhiyun * Notes: This only needs to be called if the reset is one which
2313*4882a593Smuzhiyun * originates from an unknown location. Resets originated
2314*4882a593Smuzhiyun * by the mid-level itself don't need to call this, but there
2315*4882a593Smuzhiyun * should be no harm.
2316*4882a593Smuzhiyun *
2317*4882a593Smuzhiyun * The main purpose of this is to make sure that a CHECK_CONDITION
2318*4882a593Smuzhiyun * is properly treated.
2319*4882a593Smuzhiyun */
scsi_report_device_reset(struct Scsi_Host * shost,int channel,int target)2320*4882a593Smuzhiyun void scsi_report_device_reset(struct Scsi_Host *shost, int channel, int target)
2321*4882a593Smuzhiyun {
2322*4882a593Smuzhiyun struct scsi_device *sdev;
2323*4882a593Smuzhiyun
2324*4882a593Smuzhiyun __shost_for_each_device(sdev, shost) {
2325*4882a593Smuzhiyun if (channel == sdev_channel(sdev) &&
2326*4882a593Smuzhiyun target == sdev_id(sdev))
2327*4882a593Smuzhiyun __scsi_report_device_reset(sdev, NULL);
2328*4882a593Smuzhiyun }
2329*4882a593Smuzhiyun }
2330*4882a593Smuzhiyun EXPORT_SYMBOL(scsi_report_device_reset);
2331*4882a593Smuzhiyun
2332*4882a593Smuzhiyun static void
scsi_reset_provider_done_command(struct scsi_cmnd * scmd)2333*4882a593Smuzhiyun scsi_reset_provider_done_command(struct scsi_cmnd *scmd)
2334*4882a593Smuzhiyun {
2335*4882a593Smuzhiyun }
2336*4882a593Smuzhiyun
2337*4882a593Smuzhiyun /**
2338*4882a593Smuzhiyun * scsi_ioctl_reset: explicitly reset a host/bus/target/device
2339*4882a593Smuzhiyun * @dev: scsi_device to operate on
2340*4882a593Smuzhiyun * @arg: reset type (see sg.h)
2341*4882a593Smuzhiyun */
2342*4882a593Smuzhiyun int
scsi_ioctl_reset(struct scsi_device * dev,int __user * arg)2343*4882a593Smuzhiyun scsi_ioctl_reset(struct scsi_device *dev, int __user *arg)
2344*4882a593Smuzhiyun {
2345*4882a593Smuzhiyun struct scsi_cmnd *scmd;
2346*4882a593Smuzhiyun struct Scsi_Host *shost = dev->host;
2347*4882a593Smuzhiyun struct request *rq;
2348*4882a593Smuzhiyun unsigned long flags;
2349*4882a593Smuzhiyun int error = 0, rtn, val;
2350*4882a593Smuzhiyun
2351*4882a593Smuzhiyun if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
2352*4882a593Smuzhiyun return -EACCES;
2353*4882a593Smuzhiyun
2354*4882a593Smuzhiyun error = get_user(val, arg);
2355*4882a593Smuzhiyun if (error)
2356*4882a593Smuzhiyun return error;
2357*4882a593Smuzhiyun
2358*4882a593Smuzhiyun if (scsi_autopm_get_host(shost) < 0)
2359*4882a593Smuzhiyun return -EIO;
2360*4882a593Smuzhiyun
2361*4882a593Smuzhiyun error = -EIO;
2362*4882a593Smuzhiyun rq = kzalloc(sizeof(struct request) + sizeof(struct scsi_cmnd) +
2363*4882a593Smuzhiyun shost->hostt->cmd_size, GFP_KERNEL);
2364*4882a593Smuzhiyun if (!rq)
2365*4882a593Smuzhiyun goto out_put_autopm_host;
2366*4882a593Smuzhiyun blk_rq_init(NULL, rq);
2367*4882a593Smuzhiyun
2368*4882a593Smuzhiyun scmd = (struct scsi_cmnd *)(rq + 1);
2369*4882a593Smuzhiyun scsi_init_command(dev, scmd);
2370*4882a593Smuzhiyun scmd->request = rq;
2371*4882a593Smuzhiyun scmd->cmnd = scsi_req(rq)->cmd;
2372*4882a593Smuzhiyun
2373*4882a593Smuzhiyun scmd->scsi_done = scsi_reset_provider_done_command;
2374*4882a593Smuzhiyun memset(&scmd->sdb, 0, sizeof(scmd->sdb));
2375*4882a593Smuzhiyun
2376*4882a593Smuzhiyun scmd->cmd_len = 0;
2377*4882a593Smuzhiyun
2378*4882a593Smuzhiyun scmd->sc_data_direction = DMA_BIDIRECTIONAL;
2379*4882a593Smuzhiyun
2380*4882a593Smuzhiyun spin_lock_irqsave(shost->host_lock, flags);
2381*4882a593Smuzhiyun shost->tmf_in_progress = 1;
2382*4882a593Smuzhiyun spin_unlock_irqrestore(shost->host_lock, flags);
2383*4882a593Smuzhiyun
2384*4882a593Smuzhiyun switch (val & ~SG_SCSI_RESET_NO_ESCALATE) {
2385*4882a593Smuzhiyun case SG_SCSI_RESET_NOTHING:
2386*4882a593Smuzhiyun rtn = SUCCESS;
2387*4882a593Smuzhiyun break;
2388*4882a593Smuzhiyun case SG_SCSI_RESET_DEVICE:
2389*4882a593Smuzhiyun rtn = scsi_try_bus_device_reset(scmd);
2390*4882a593Smuzhiyun if (rtn == SUCCESS || (val & SG_SCSI_RESET_NO_ESCALATE))
2391*4882a593Smuzhiyun break;
2392*4882a593Smuzhiyun fallthrough;
2393*4882a593Smuzhiyun case SG_SCSI_RESET_TARGET:
2394*4882a593Smuzhiyun rtn = scsi_try_target_reset(scmd);
2395*4882a593Smuzhiyun if (rtn == SUCCESS || (val & SG_SCSI_RESET_NO_ESCALATE))
2396*4882a593Smuzhiyun break;
2397*4882a593Smuzhiyun fallthrough;
2398*4882a593Smuzhiyun case SG_SCSI_RESET_BUS:
2399*4882a593Smuzhiyun rtn = scsi_try_bus_reset(scmd);
2400*4882a593Smuzhiyun if (rtn == SUCCESS || (val & SG_SCSI_RESET_NO_ESCALATE))
2401*4882a593Smuzhiyun break;
2402*4882a593Smuzhiyun fallthrough;
2403*4882a593Smuzhiyun case SG_SCSI_RESET_HOST:
2404*4882a593Smuzhiyun rtn = scsi_try_host_reset(scmd);
2405*4882a593Smuzhiyun if (rtn == SUCCESS)
2406*4882a593Smuzhiyun break;
2407*4882a593Smuzhiyun fallthrough;
2408*4882a593Smuzhiyun default:
2409*4882a593Smuzhiyun rtn = FAILED;
2410*4882a593Smuzhiyun break;
2411*4882a593Smuzhiyun }
2412*4882a593Smuzhiyun
2413*4882a593Smuzhiyun error = (rtn == SUCCESS) ? 0 : -EIO;
2414*4882a593Smuzhiyun
2415*4882a593Smuzhiyun spin_lock_irqsave(shost->host_lock, flags);
2416*4882a593Smuzhiyun shost->tmf_in_progress = 0;
2417*4882a593Smuzhiyun spin_unlock_irqrestore(shost->host_lock, flags);
2418*4882a593Smuzhiyun
2419*4882a593Smuzhiyun /*
2420*4882a593Smuzhiyun * be sure to wake up anyone who was sleeping or had their queue
2421*4882a593Smuzhiyun * suspended while we performed the TMF.
2422*4882a593Smuzhiyun */
2423*4882a593Smuzhiyun SCSI_LOG_ERROR_RECOVERY(3,
2424*4882a593Smuzhiyun shost_printk(KERN_INFO, shost,
2425*4882a593Smuzhiyun "waking up host to restart after TMF\n"));
2426*4882a593Smuzhiyun
2427*4882a593Smuzhiyun wake_up(&shost->host_wait);
2428*4882a593Smuzhiyun scsi_run_host_queues(shost);
2429*4882a593Smuzhiyun
2430*4882a593Smuzhiyun kfree(rq);
2431*4882a593Smuzhiyun
2432*4882a593Smuzhiyun out_put_autopm_host:
2433*4882a593Smuzhiyun scsi_autopm_put_host(shost);
2434*4882a593Smuzhiyun return error;
2435*4882a593Smuzhiyun }
2436*4882a593Smuzhiyun
scsi_command_normalize_sense(const struct scsi_cmnd * cmd,struct scsi_sense_hdr * sshdr)2437*4882a593Smuzhiyun bool scsi_command_normalize_sense(const struct scsi_cmnd *cmd,
2438*4882a593Smuzhiyun struct scsi_sense_hdr *sshdr)
2439*4882a593Smuzhiyun {
2440*4882a593Smuzhiyun return scsi_normalize_sense(cmd->sense_buffer,
2441*4882a593Smuzhiyun SCSI_SENSE_BUFFERSIZE, sshdr);
2442*4882a593Smuzhiyun }
2443*4882a593Smuzhiyun EXPORT_SYMBOL(scsi_command_normalize_sense);
2444*4882a593Smuzhiyun
2445*4882a593Smuzhiyun /**
2446*4882a593Smuzhiyun * scsi_get_sense_info_fld - get information field from sense data (either fixed or descriptor format)
2447*4882a593Smuzhiyun * @sense_buffer: byte array of sense data
2448*4882a593Smuzhiyun * @sb_len: number of valid bytes in sense_buffer
2449*4882a593Smuzhiyun * @info_out: pointer to 64 integer where 8 or 4 byte information
2450*4882a593Smuzhiyun * field will be placed if found.
2451*4882a593Smuzhiyun *
2452*4882a593Smuzhiyun * Return value:
2453*4882a593Smuzhiyun * true if information field found, false if not found.
2454*4882a593Smuzhiyun */
scsi_get_sense_info_fld(const u8 * sense_buffer,int sb_len,u64 * info_out)2455*4882a593Smuzhiyun bool scsi_get_sense_info_fld(const u8 *sense_buffer, int sb_len,
2456*4882a593Smuzhiyun u64 *info_out)
2457*4882a593Smuzhiyun {
2458*4882a593Smuzhiyun const u8 * ucp;
2459*4882a593Smuzhiyun
2460*4882a593Smuzhiyun if (sb_len < 7)
2461*4882a593Smuzhiyun return false;
2462*4882a593Smuzhiyun switch (sense_buffer[0] & 0x7f) {
2463*4882a593Smuzhiyun case 0x70:
2464*4882a593Smuzhiyun case 0x71:
2465*4882a593Smuzhiyun if (sense_buffer[0] & 0x80) {
2466*4882a593Smuzhiyun *info_out = get_unaligned_be32(&sense_buffer[3]);
2467*4882a593Smuzhiyun return true;
2468*4882a593Smuzhiyun }
2469*4882a593Smuzhiyun return false;
2470*4882a593Smuzhiyun case 0x72:
2471*4882a593Smuzhiyun case 0x73:
2472*4882a593Smuzhiyun ucp = scsi_sense_desc_find(sense_buffer, sb_len,
2473*4882a593Smuzhiyun 0 /* info desc */);
2474*4882a593Smuzhiyun if (ucp && (0xa == ucp[1])) {
2475*4882a593Smuzhiyun *info_out = get_unaligned_be64(&ucp[4]);
2476*4882a593Smuzhiyun return true;
2477*4882a593Smuzhiyun }
2478*4882a593Smuzhiyun return false;
2479*4882a593Smuzhiyun default:
2480*4882a593Smuzhiyun return false;
2481*4882a593Smuzhiyun }
2482*4882a593Smuzhiyun }
2483*4882a593Smuzhiyun EXPORT_SYMBOL(scsi_get_sense_info_fld);
2484