--- orig/drivers/scsi/scsi_error.c Wed Sep 25 23:16:50 2002 +++ linux-rpc/drivers/scsi/scsi_error.c Wed Sep 25 23:18:51 2002 @@ -1415,6 +1415,36 @@ /* + * Function: scsi_eh_restart_device + * + * Purpose: Retry all failed or timed out commands for a device + * + * Arguments: SDpnt - SCSI device to retry + * done - list of commands that have been successfully + * completed. + * + * Returns: SUCCESS or failure code + */ +STATIC int scsi_eh_restart_device(Scsi_Device *SDpnt, Scsi_Cmnd **done) +{ + Scsi_Cmnd *SCpnt, *SCnext; + int rtn = SUCCESS; + + for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCnext) { + SCnext = SCpnt->next; + + if (SCpnt->state == SCSI_STATE_FAILED || + SCpnt->state == SCSI_STATE_TIMEOUT) { + rtn = scsi_eh_test_and_retry(SCpnt, done); + if (rtn != SUCCESS) + break; + } + } + + return rtn; +} + +/* * Function: scsi_eh_set_device_offline * * Purpose: set a device off line @@ -1624,18 +1654,27 @@ if (SCloop == NULL) { continue; } + /* - * OK, we have a device that is having problems. Try and send - * a bus device reset to it. - * - * FIXME(eric) - make sure we handle the case where multiple - * commands to the same device have failed. They all must - * get properly restarted. + * OK, we have a device that is having problems. + * Try and send a bus device reset to it. */ rtn = scsi_try_bus_device_reset(SCloop, RESET_TIMEOUT); + /* + * A successful bus device reset causes all commands + * currently executing on the device to terminate. + * We expect the HBA driver to "forget" all commands + * associated with this device. + * + * Retry each failed or timed out command currently + * outstanding for this device. + * + * If any command fails, bail out. We will try a + * bus reset instead. + */ if (rtn == SUCCESS) - scsi_eh_test_and_retry(SCloop, done); + scsi_eh_restart_device(SDpnt, done); } } @@ -1708,29 +1747,13 @@ rtn = scsi_try_bus_reset(SCpnt); if (rtn == SUCCESS) { for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) { - for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) { - if (SCloop->channel != SCpnt->channel) { - continue; - } - if (SCloop->state != SCSI_STATE_FAILED - && SCloop->state != SCSI_STATE_TIMEOUT) { - continue; - } - - rtn = scsi_eh_test_and_retry(SCloop, done); - - /* - * If the bus reset worked, but we are still unable to - * talk to the device, take it offline. - * FIXME(eric) - is this really the correct thing to do? - */ - if (rtn != SUCCESS) - break; - } + rtn = scsi_eh_restart_device(SDloop, done); + if (rtn != SUCCESS) scsi_eh_set_device_offline(SDloop, done, "not ready or command retry failed after bus reset"); } } + break; } } } @@ -1738,7 +1761,7 @@ static void scsi_unjam_host_reset(struct Scsi_Host *host, Scsi_Cmnd **done) { Scsi_Device *SDpnt, *SDloop; - Scsi_Cmnd *SCpnt, *SCloop; + Scsi_Cmnd *SCpnt; int rtn; /* @@ -1802,20 +1825,13 @@ * tested. */ for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) { - for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) { - if (SCloop->state != SCSI_STATE_FAILED - && SCloop->state != SCSI_STATE_TIMEOUT) { - continue; - } - - rtn = scsi_eh_test_and_retry(SCloop, done); - if (rtn != SUCCESS) - break; - } + rtn = scsi_eh_restart_device(SDloop, done); + if (rtn != SUCCESS) scsi_eh_set_device_offline(SDloop, done, "not ready or command retry failed after host reset"); } } + return; } } }