--- orig/drivers/scsi/scsi_error.c Wed Sep 25 23:06:34 2002 +++ linux-rpc/drivers/scsi/scsi_error.c Wed Sep 25 23:09:12 2002 @@ -1363,96 +1363,12 @@ spin_unlock_irqrestore(&io_request_lock, flags); } -/* - * Function: scsi_unjam_host - * - * Purpose: Attempt to fix a host which has a command that failed for - * some reason. - * - * Arguments: host - host that needs unjamming. - * - * Returns: Nothing - * - * Notes: When we come in here, we *know* that all commands on the - * bus have either completed, failed or timed out. We also - * know that no further commands are being sent to the host, - * so things are relatively quiet and we have freedom to - * fiddle with things as we wish. - * - * Additional note: This is only the *default* implementation. It is possible - * for individual drivers to supply their own version of this - * function, and if the maintainer wishes to do this, it is - * strongly suggested that this function be taken as a template - * and modified. This function was designed to correctly handle - * problems for about 95% of the different cases out there, and - * it should always provide at least a reasonable amount of error - * recovery. - * - * Note3: Any command marked 'FAILED' or 'TIMEOUT' must eventually - * have scsi_finish_command() called for it. We do all of - * the retry stuff here, so when we restart the host after we - * return it should have an empty queue. - */ -STATIC int scsi_unjam_host(struct Scsi_Host *host) +static void scsi_unjam_request_sense(struct Scsi_Host *host, Scsi_Cmnd **done) { - int devices_failed; - int numfailed; - int ourrtn; - int rtn = FALSE; + int rtn; int result; - Scsi_Cmnd *SCloop; Scsi_Cmnd *SCpnt; Scsi_Device *SDpnt; - Scsi_Device *SDloop; - Scsi_Cmnd *SCdone; - int timed_out; - - ASSERT_LOCK(&io_request_lock, 0); - - SCdone = NULL; - - /* - * First, protect against any sort of race condition. If any of the outstanding - * commands are in states that indicate that we are not yet blocked (i.e. we are - * not in a quiet state) then we got woken up in error. If we ever end up here, - * we need to re-examine some of the assumptions. - */ - for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { - for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) { - if (SCpnt->state == SCSI_STATE_FAILED - || SCpnt->state == SCSI_STATE_TIMEOUT - || SCpnt->state == SCSI_STATE_INITIALIZING - || SCpnt->state == SCSI_STATE_UNUSED) { - continue; - } - /* - * Rats. Something is still floating around out there. This could - * be the result of the fact that the upper level drivers are still frobbing - * commands that might have succeeded. There are two outcomes. One is that - * the command block will eventually be freed, and the other one is that - * the command will be queued and will be finished along the way. - */ - SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler prematurely woken - commands still active (%p %x %d)\n", SCpnt, SCpnt->state, SCpnt->target)); - -/* - * panic("SCSI Error handler woken too early\n"); - * - * This is no longer a problem, since now the code cares only about - * SCSI_STATE_TIMEOUT and SCSI_STATE_FAILED. - * Other states are useful only to release active commands when devices are - * set offline. If (host->host_active == host->host_busy) we can safely assume - * that there are no commands in state other then TIMEOUT od FAILED. (DB) - * - * FIXME: - * It is not easy to release correctly commands according to their state when - * devices are set offline, when the state is neither TIMEOUT nor FAILED. - * When a device is set offline, we can have some command with - * rq_status=RQ_SCSY_BUSY, owner=SCSI_STATE_HIGHLEVEL, - * state=SCSI_STATE_INITIALIZING and the driver module cannot be released. - * (DB, 17 May 1998) - */ - } - } /* * Next, see if we need to request sense information. if so, @@ -1489,7 +1405,7 @@ */ if (result == SUCCESS) { SCpnt->host->host_failed--; - scsi_eh_finish_command(&SCdone, SCpnt); + scsi_eh_finish_command(done, SCpnt); } if (result != NEEDS_RETRY) { continue; @@ -1510,9 +1426,18 @@ * We eventually hand this one back to the top level. */ SCpnt->host->host_failed--; - scsi_eh_finish_command(&SCdone, SCpnt); + scsi_eh_finish_command(done, SCpnt); } } +} + +static void scsi_unjam_count(struct Scsi_Host *host, Scsi_Cmnd **done) +{ + Scsi_Device *SDpnt; + Scsi_Cmnd *SCpnt; + int devices_failed; + int numfailed; + int timed_out; /* * Go through the list of commands and figure out where we stand and how bad things @@ -1545,11 +1470,14 @@ SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d+%d commands on %d devices require eh work\n", numfailed, timed_out, devices_failed)); +} + +static void scsi_unjam_abort(struct Scsi_Host *host, Scsi_Cmnd **done) +{ + Scsi_Device *SDpnt; + Scsi_Cmnd *SCloop; + int rtn; - if (host->host_failed == 0) { - ourrtn = TRUE; - goto leave; - } /* * Next, try and see whether or not it makes sense to try and abort * the running command. This only works out to be the case if we have @@ -1574,20 +1502,20 @@ if (rtn == SUCCESS) { SCloop->host->host_failed--; - scsi_eh_finish_command(&SCdone, SCloop); + scsi_eh_finish_command(done, SCloop); } } } } } +} + +static void scsi_unjam_device_reset(struct Scsi_Host *host, Scsi_Cmnd **done) +{ + Scsi_Device *SDpnt; + Scsi_Cmnd *SCloop; + int rtn; - /* - * If we have corrected all of the problems, then we are done. - */ - if (host->host_failed == 0) { - ourrtn = TRUE; - goto leave; - } /* * Either the abort wasn't appropriate, or it didn't succeed. * Now try a bus device reset. Still, look to see whether we have @@ -1629,16 +1557,19 @@ if (rtn == SUCCESS) { SCloop->host->host_failed--; - scsi_eh_finish_command(&SCdone, SCloop); + scsi_eh_finish_command(done, SCloop); } } } } +} + +static void scsi_unjam_bus_reset(struct Scsi_Host *host, Scsi_Cmnd **done) +{ + Scsi_Device *SDpnt, *SDloop; + Scsi_Cmnd *SCpnt, *SCloop; + int rtn; - if (host->host_failed == 0) { - ourrtn = TRUE; - goto leave; - } /* * If we ended up here, we have serious problems. The only thing left * to try is a full bus reset. If someone has grabbed the bus and isn't @@ -1717,7 +1648,7 @@ if (rtn == SUCCESS) { SCpnt->host->host_failed--; - scsi_eh_finish_command(&SCdone, SCloop); + scsi_eh_finish_command(done, SCloop); } } /* @@ -1730,18 +1661,21 @@ SDloop->online = FALSE; SDloop->host->host_failed--; - scsi_eh_finish_command(&SCdone, SCloop); + scsi_eh_finish_command(done, SCloop); } } } } } } +} + +static void scsi_unjam_host_reset(struct Scsi_Host *host, Scsi_Cmnd **done) +{ + Scsi_Device *SDpnt, *SDloop; + Scsi_Cmnd *SCpnt, *SCloop; + int rtn; - if (host->host_failed == 0) { - ourrtn = TRUE; - goto leave; - } /* * If we ended up here, we have serious problems. The only thing left * to try is a full host reset - perhaps the firmware on the device @@ -1815,28 +1749,27 @@ if (rtn == SUCCESS) { SCpnt->host->host_failed--; - scsi_eh_finish_command(&SCdone, SCloop); + scsi_eh_finish_command(done, SCloop); } } if (rtn != SUCCESS) { printk(KERN_INFO "scsi: device set offline - not ready or command retry failed after host reset: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun); SDloop->online = FALSE; SDloop->host->host_failed--; - scsi_eh_finish_command(&SCdone, SCloop); + scsi_eh_finish_command(done, SCloop); } } } } } } +} + +static void scsi_unjam_failure(struct Scsi_Host *host, Scsi_Cmnd **done) +{ + Scsi_Device *SDpnt, *SDloop; + Scsi_Cmnd *SCloop; - /* - * If we solved all of the problems, then let's rev up the engines again. - */ - if (host->host_failed == 0) { - ourrtn = TRUE; - goto leave; - } /* * If the HOST RESET failed, then for now we assume that the entire host * adapter is too hosed to be of any use. For our purposes, however, it is @@ -1866,7 +1799,7 @@ SCSI_LOG_ERROR_RECOVERY(3, printk("Finishing command for device %d %x\n", SDloop->id, SCloop->result)); - scsi_eh_finish_command(&SCdone, SCloop); + scsi_eh_finish_command(done, SCloop); } } } @@ -1874,11 +1807,115 @@ if (host->host_failed != 0) { panic("scsi_unjam_host: Miscount of number of failed commands.\n"); } + SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Returning\n")); +} + +static void (*unjam_method[])(struct Scsi_Host *, Scsi_Cmnd **) = { + scsi_unjam_request_sense, + scsi_unjam_count, + scsi_unjam_abort, + scsi_unjam_device_reset, + scsi_unjam_bus_reset, + scsi_unjam_host_reset, + scsi_unjam_failure, +}; + +/* + * Function: scsi_unjam_host + * + * Purpose: Attempt to fix a host which has a command that failed for + * some reason. + * + * Arguments: host - host that needs unjamming. + * + * Returns: Nothing + * + * Notes: When we come in here, we *know* that all commands on the + * bus have either completed, failed or timed out. We also + * know that no further commands are being sent to the host, + * so things are relatively quiet and we have freedom to + * fiddle with things as we wish. + * + * Additional note: This is only the *default* implementation. It is possible + * for individual drivers to supply their own version of this + * function, and if the maintainer wishes to do this, it is + * strongly suggested that this function be taken as a template + * and modified. This function was designed to correctly handle + * problems for about 95% of the different cases out there, and + * it should always provide at least a reasonable amount of error + * recovery. + * + * Note3: Any command marked 'FAILED' or 'TIMEOUT' must eventually + * have scsi_finish_command() called for it. We do all of + * the retry stuff here, so when we restart the host after we + * return it should have an empty queue. + */ +STATIC int scsi_unjam_host(struct Scsi_Host *host) +{ + Scsi_Cmnd *SCdone = NULL; + Scsi_Cmnd *SCpnt; + Scsi_Device *SDpnt; + int ourrtn = FALSE; + int i; - ourrtn = FALSE; + ASSERT_LOCK(&io_request_lock, 0); - leave: + /* + * First, protect against any sort of race condition. If any of the outstanding + * commands are in states that indicate that we are not yet blocked (i.e. we are + * not in a quiet state) then we got woken up in error. If we ever end up here, + * we need to re-examine some of the assumptions. + */ + for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { + for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) { + if (SCpnt->state == SCSI_STATE_FAILED + || SCpnt->state == SCSI_STATE_TIMEOUT + || SCpnt->state == SCSI_STATE_INITIALIZING + || SCpnt->state == SCSI_STATE_UNUSED) { + continue; + } + /* + * Rats. Something is still floating around out there. This could + * be the result of the fact that the upper level drivers are still frobbing + * commands that might have succeeded. There are two outcomes. One is that + * the command block will eventually be freed, and the other one is that + * the command will be queued and will be finished along the way. + */ + SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler prematurely woken - commands still active (%p %x %d)\n", SCpnt, SCpnt->state, SCpnt->target)); + +/* + * panic("SCSI Error handler woken too early\n"); + * + * This is no longer a problem, since now the code cares only about + * SCSI_STATE_TIMEOUT and SCSI_STATE_FAILED. + * Other states are useful only to release active commands when devices are + * set offline. If (host->host_active == host->host_busy) we can safely assume + * that there are no commands in state other then TIMEOUT od FAILED. (DB) + * + * FIXME: + * It is not easy to release correctly commands according to their state when + * devices are set offline, when the state is neither TIMEOUT nor FAILED. + * When a device is set offline, we can have some command with + * rq_status=RQ_SCSY_BUSY, owner=SCSI_STATE_HIGHLEVEL, + * state=SCSI_STATE_INITIALIZING and the driver module cannot be released. + * (DB, 17 May 1998) + */ + } + } + + for (i = 0; i < ARRAY_SIZE(unjam_method); i++) { + unjam_method[i](host, &SCdone); + + /* + * If we solved all of the problems, then + * let's rev up the engines again. + */ + if (host->host_failed == 0) { + ourrtn = TRUE; + break; + } + } /* * We should have a list of commands that we 'finished' during the course of @@ -2118,3 +2155,17 @@ * tab-width: 8 * End: */ + +EXPORT_SYMBOL(scsi_eh_times_out); +EXPORT_SYMBOL(scsi_eh_retry_command); +EXPORT_SYMBOL(scsi_request_sense); +EXPORT_SYMBOL(scsi_test_unit_ready); +EXPORT_SYMBOL(scsi_unit_is_ready); +EXPORT_SYMBOL(scsi_eh_finish_command); +EXPORT_SYMBOL(scsi_try_to_abort_command); +EXPORT_SYMBOL(scsi_try_bus_device_reset); +EXPORT_SYMBOL(scsi_try_bus_reset); +EXPORT_SYMBOL(scsi_try_host_reset); +EXPORT_SYMBOL(scsi_sense_valid); +EXPORT_SYMBOL(scsi_done); +EXPORT_SYMBOL(scsi_decide_disposition);