[SCSI] zfcp: minor erp bug fixes
Bug fixes for zfcp's erp:
- trigger adapter reopen if do_QDIO fails
- avoid erp deadlock if registration of scsi target or remote port hang
- do not treat as error if exchange port data fails
- decrease timeout for target reset and aborts
- mark unit failed if slave_destroy is called
Additionally some code cleanup was done:
- made some functions void when retval is not of interest
- shortened initialization of zfcp's host_template
- corrected some comments
Signed-off-by: Andreas Herrmann <aherrman@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 72293f3..904d6a7 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -80,7 +80,7 @@
#define REQUEST_LIST_SIZE 128
/********************* SCSI SPECIFIC DEFINES *********************************/
-#define ZFCP_SCSI_ER_TIMEOUT (100*HZ)
+#define ZFCP_SCSI_ER_TIMEOUT (10*HZ)
/********************* CIO/QDIO SPECIFIC DEFINES *****************************/
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index f74412b..7f60b6f 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -64,8 +64,8 @@
static int zfcp_erp_adapter_strategy(struct zfcp_erp_action *);
static int zfcp_erp_adapter_strategy_generic(struct zfcp_erp_action *, int);
static int zfcp_erp_adapter_strategy_close(struct zfcp_erp_action *);
-static int zfcp_erp_adapter_strategy_close_qdio(struct zfcp_erp_action *);
-static int zfcp_erp_adapter_strategy_close_fsf(struct zfcp_erp_action *);
+static void zfcp_erp_adapter_strategy_close_qdio(struct zfcp_erp_action *);
+static void zfcp_erp_adapter_strategy_close_fsf(struct zfcp_erp_action *);
static int zfcp_erp_adapter_strategy_open(struct zfcp_erp_action *);
static int zfcp_erp_adapter_strategy_open_qdio(struct zfcp_erp_action *);
static int zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *);
@@ -93,10 +93,9 @@
static int zfcp_erp_unit_strategy_close(struct zfcp_erp_action *);
static int zfcp_erp_unit_strategy_open(struct zfcp_erp_action *);
-static int zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *);
-static int zfcp_erp_action_dismiss_port(struct zfcp_port *);
-static int zfcp_erp_action_dismiss_unit(struct zfcp_unit *);
-static int zfcp_erp_action_dismiss(struct zfcp_erp_action *);
+static void zfcp_erp_action_dismiss_port(struct zfcp_port *);
+static void zfcp_erp_action_dismiss_unit(struct zfcp_unit *);
+static void zfcp_erp_action_dismiss(struct zfcp_erp_action *);
static int zfcp_erp_action_enqueue(int, struct zfcp_adapter *,
struct zfcp_port *, struct zfcp_unit *);
@@ -135,29 +134,39 @@
zfcp_erp_adapter_reopen(adapter, 0);
}
-/*
- * function: zfcp_fsf_scsi_er_timeout_handler
+/**
+ * zfcp_fsf_scsi_er_timeout_handler - timeout handler for scsi eh tasks
*
- * purpose: This function needs to be called whenever a SCSI error recovery
- * action (abort/reset) does not return.
- * Re-opening the adapter means that the command can be returned
- * by zfcp (it is guarranteed that it does not return via the
- * adapter anymore). The buffer can then be used again.
- *
- * returns: sod all
+ * This function needs to be called whenever a SCSI error recovery
+ * action (abort/reset) does not return. Re-opening the adapter means
+ * that the abort/reset command can be returned by zfcp. It won't complete
+ * via the adapter anymore (because qdio queues are closed). If ERP is
+ * already running on this adapter it will be stopped.
*/
-void
-zfcp_fsf_scsi_er_timeout_handler(unsigned long data)
+void zfcp_fsf_scsi_er_timeout_handler(unsigned long data)
{
struct zfcp_adapter *adapter = (struct zfcp_adapter *) data;
+ unsigned long flags;
ZFCP_LOG_NORMAL("warning: SCSI error recovery timed out. "
"Restarting all operations on the adapter %s\n",
zfcp_get_busid_by_adapter(adapter));
debug_text_event(adapter->erp_dbf, 1, "eh_lmem_tout");
- zfcp_erp_adapter_reopen(adapter, 0);
- return;
+ write_lock_irqsave(&adapter->erp_lock, flags);
+ if (atomic_test_mask(ZFCP_STATUS_ADAPTER_ERP_PENDING,
+ &adapter->status)) {
+ zfcp_erp_modify_adapter_status(adapter,
+ ZFCP_STATUS_COMMON_UNBLOCKED|ZFCP_STATUS_COMMON_OPEN,
+ ZFCP_CLEAR);
+ zfcp_erp_action_dismiss_adapter(adapter);
+ write_unlock_irqrestore(&adapter->erp_lock, flags);
+ /* dismiss all pending requests including requests for ERP */
+ zfcp_fsf_req_dismiss_all(adapter);
+ adapter->fsf_req_seq_no = 0;
+ } else
+ write_unlock_irqrestore(&adapter->erp_lock, flags);
+ zfcp_erp_adapter_reopen(adapter, 0);
}
/*
@@ -670,17 +679,10 @@
return retval;
}
-/*
- * function:
- *
- * purpose: disable I/O,
- * return any open requests and clean them up,
- * aim: no pending and incoming I/O
- *
- * returns:
+/**
+ * zfcp_erp_adapter_block - mark adapter as blocked, block scsi requests
*/
-static void
-zfcp_erp_adapter_block(struct zfcp_adapter *adapter, int clear_mask)
+static void zfcp_erp_adapter_block(struct zfcp_adapter *adapter, int clear_mask)
{
debug_text_event(adapter->erp_dbf, 6, "a_bl");
zfcp_erp_modify_adapter_status(adapter,
@@ -688,15 +690,10 @@
clear_mask, ZFCP_CLEAR);
}
-/*
- * function:
- *
- * purpose: enable I/O
- *
- * returns:
+/**
+ * zfcp_erp_adapter_unblock - mark adapter as unblocked, allow scsi requests
*/
-static void
-zfcp_erp_adapter_unblock(struct zfcp_adapter *adapter)
+static void zfcp_erp_adapter_unblock(struct zfcp_adapter *adapter)
{
debug_text_event(adapter->erp_dbf, 6, "a_ubl");
atomic_set_mask(ZFCP_STATUS_COMMON_UNBLOCKED, &adapter->status);
@@ -897,23 +894,15 @@
return retval;
}
-/*
- * purpose: generic handler for asynchronous events related to erp_action events
- * (normal completion, time-out, dismissing, retry after
- * low memory condition)
+/**
+ * zfcp_erp_async_handler_nolock - complete erp_action
*
- * note: deletion of timer is not required (e.g. in case of a time-out),
- * but a second try does no harm,
- * we leave it in here to allow for greater simplification
- *
- * returns: 0 - there was an action to handle
- * !0 - otherwise
+ * Used for normal completion, time-out, dismissal and failure after
+ * low memory condition.
*/
-static int
-zfcp_erp_async_handler_nolock(struct zfcp_erp_action *erp_action,
- unsigned long set_mask)
+static void zfcp_erp_async_handler_nolock(struct zfcp_erp_action *erp_action,
+ unsigned long set_mask)
{
- int retval;
struct zfcp_adapter *adapter = erp_action->adapter;
if (zfcp_erp_action_exists(erp_action) == ZFCP_ERP_ACTION_RUNNING) {
@@ -924,43 +913,26 @@
del_timer(&erp_action->timer);
erp_action->status |= set_mask;
zfcp_erp_action_ready(erp_action);
- retval = 0;
} else {
/* action is ready or gone - nothing to do */
debug_text_event(adapter->erp_dbf, 3, "a_asyh_gone");
debug_event(adapter->erp_dbf, 3, &erp_action->action,
sizeof (int));
- retval = 1;
}
-
- return retval;
}
-/*
- * purpose: generic handler for asynchronous events related to erp_action
- * events (normal completion, time-out, dismissing, retry after
- * low memory condition)
- *
- * note: deletion of timer is not required (e.g. in case of a time-out),
- * but a second try does no harm,
- * we leave it in here to allow for greater simplification
- *
- * returns: 0 - there was an action to handle
- * !0 - otherwise
+/**
+ * zfcp_erp_async_handler - wrapper for erp_async_handler_nolock w/ locking
*/
-int
-zfcp_erp_async_handler(struct zfcp_erp_action *erp_action,
- unsigned long set_mask)
+void zfcp_erp_async_handler(struct zfcp_erp_action *erp_action,
+ unsigned long set_mask)
{
struct zfcp_adapter *adapter = erp_action->adapter;
unsigned long flags;
- int retval;
write_lock_irqsave(&adapter->erp_lock, flags);
- retval = zfcp_erp_async_handler_nolock(erp_action, set_mask);
+ zfcp_erp_async_handler_nolock(erp_action, set_mask);
write_unlock_irqrestore(&adapter->erp_lock, flags);
-
- return retval;
}
/*
@@ -997,17 +969,15 @@
zfcp_erp_async_handler(erp_action, ZFCP_STATUS_ERP_TIMEDOUT);
}
-/*
- * purpose: is called for an erp_action which needs to be ended
- * though not being done,
- * this is usually required if an higher is generated,
- * action gets an appropriate flag and will be processed
- * accordingly
+/**
+ * zfcp_erp_action_dismiss - dismiss an erp_action
*
- * locks: erp_lock held (thus we need to call another handler variant)
+ * adapter->erp_lock must be held
+ *
+ * Dismissal of an erp_action is usually required if an erp_action of
+ * higher priority is generated.
*/
-static int
-zfcp_erp_action_dismiss(struct zfcp_erp_action *erp_action)
+static void zfcp_erp_action_dismiss(struct zfcp_erp_action *erp_action)
{
struct zfcp_adapter *adapter = erp_action->adapter;
@@ -1015,8 +985,6 @@
debug_event(adapter->erp_dbf, 2, &erp_action->action, sizeof (int));
zfcp_erp_async_handler_nolock(erp_action, ZFCP_STATUS_ERP_DISMISSED);
-
- return 0;
}
int
@@ -2072,18 +2040,12 @@
return retval;
}
-/*
- * function: zfcp_qdio_cleanup
- *
- * purpose: cleans up QDIO operation for the specified adapter
- *
- * returns: 0 - successful cleanup
- * !0 - failed cleanup
+/**
+ * zfcp_erp_adapter_strategy_close_qdio - close qdio queues for an adapter
*/
-int
+static void
zfcp_erp_adapter_strategy_close_qdio(struct zfcp_erp_action *erp_action)
{
- int retval = ZFCP_ERP_SUCCEEDED;
int first_used;
int used_count;
struct zfcp_adapter *adapter = erp_action->adapter;
@@ -2092,15 +2054,13 @@
ZFCP_LOG_DEBUG("error: attempt to shut down inactive QDIO "
"queues on adapter %s\n",
zfcp_get_busid_by_adapter(adapter));
- retval = ZFCP_ERP_FAILED;
- goto out;
+ return;
}
/*
* Get queue_lock and clear QDIOUP flag. Thus it's guaranteed that
* do_QDIO won't be called while qdio_shutdown is in progress.
*/
-
write_lock_irq(&adapter->request_queue.queue_lock);
atomic_clear_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status);
write_unlock_irq(&adapter->request_queue.queue_lock);
@@ -2132,8 +2092,6 @@
adapter->request_queue.free_index = 0;
atomic_set(&adapter->request_queue.free_count, 0);
adapter->request_queue.distance_from_int = 0;
- out:
- return retval;
}
static int
@@ -2256,11 +2214,11 @@
"%s)\n", zfcp_get_busid_by_adapter(adapter));
ret = ZFCP_ERP_FAILED;
}
- if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status)) {
- ZFCP_LOG_INFO("error: exchange port data failed (adapter "
+
+ /* don't treat as error for the sake of compatibility */
+ if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status))
+ ZFCP_LOG_INFO("warning: exchange port data failed (adapter "
"%s\n", zfcp_get_busid_by_adapter(adapter));
- ret = ZFCP_ERP_FAILED;
- }
return ret;
}
@@ -2290,18 +2248,12 @@
return retval;
}
-/*
- * function: zfcp_fsf_cleanup
- *
- * purpose: cleanup FSF operation for specified adapter
- *
- * returns: 0 - FSF operation successfully cleaned up
- * !0 - failed to cleanup FSF operation for this adapter
+/**
+ * zfcp_erp_adapter_strategy_close_fsf - stop FSF operations for an adapter
*/
-static int
+static void
zfcp_erp_adapter_strategy_close_fsf(struct zfcp_erp_action *erp_action)
{
- int retval = ZFCP_ERP_SUCCEEDED;
struct zfcp_adapter *adapter = erp_action->adapter;
/*
@@ -2315,8 +2267,6 @@
/* all ports and units are closed */
zfcp_erp_modify_adapter_status(adapter,
ZFCP_STATUS_COMMON_OPEN, ZFCP_CLEAR);
-
- return retval;
}
/*
@@ -3291,10 +3241,8 @@
}
-static int
-zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter)
+void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter)
{
- int retval = 0;
struct zfcp_port *port;
debug_text_event(adapter->erp_dbf, 5, "a_actab");
@@ -3303,14 +3251,10 @@
else
list_for_each_entry(port, &adapter->port_list_head, list)
zfcp_erp_action_dismiss_port(port);
-
- return retval;
}
-static int
-zfcp_erp_action_dismiss_port(struct zfcp_port *port)
+static void zfcp_erp_action_dismiss_port(struct zfcp_port *port)
{
- int retval = 0;
struct zfcp_unit *unit;
struct zfcp_adapter *adapter = port->adapter;
@@ -3321,22 +3265,16 @@
else
list_for_each_entry(unit, &port->unit_list_head, list)
zfcp_erp_action_dismiss_unit(unit);
-
- return retval;
}
-static int
-zfcp_erp_action_dismiss_unit(struct zfcp_unit *unit)
+static void zfcp_erp_action_dismiss_unit(struct zfcp_unit *unit)
{
- int retval = 0;
struct zfcp_adapter *adapter = unit->port->adapter;
debug_text_event(adapter->erp_dbf, 5, "u_actab");
debug_event(adapter->erp_dbf, 5, &unit->fcp_lun, sizeof (fcp_lun_t));
if (atomic_test_mask(ZFCP_STATUS_COMMON_ERP_INUSE, &unit->status))
zfcp_erp_action_dismiss(&unit->erp_action);
-
- return retval;
}
static inline void
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index 04bb3a9..146d7a2 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -139,6 +139,7 @@
extern int zfcp_erp_adapter_reopen(struct zfcp_adapter *, int);
extern int zfcp_erp_adapter_shutdown(struct zfcp_adapter *, int);
extern void zfcp_erp_adapter_failed(struct zfcp_adapter *);
+extern void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *);
extern void zfcp_erp_modify_port_status(struct zfcp_port *, u32, int);
extern int zfcp_erp_port_reopen(struct zfcp_port *, int);
@@ -155,7 +156,7 @@
extern int zfcp_erp_thread_setup(struct zfcp_adapter *);
extern int zfcp_erp_thread_kill(struct zfcp_adapter *);
extern int zfcp_erp_wait(struct zfcp_adapter *);
-extern int zfcp_erp_async_handler(struct zfcp_erp_action *, unsigned long);
+extern void zfcp_erp_async_handler(struct zfcp_erp_action *, unsigned long);
extern int zfcp_test_link(struct zfcp_port *);
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 671f4a6..1bb5508 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -30,7 +30,6 @@
void (*done) (struct scsi_cmnd *));
static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *);
static int zfcp_scsi_eh_device_reset_handler(struct scsi_cmnd *);
-static int zfcp_scsi_eh_bus_reset_handler(struct scsi_cmnd *);
static int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *);
static int zfcp_task_management_function(struct zfcp_unit *, u8,
struct scsi_cmnd *);
@@ -46,30 +45,22 @@
.scsi_host_template = {
.name = ZFCP_NAME,
.proc_name = "zfcp",
- .proc_info = NULL,
- .detect = NULL,
.slave_alloc = zfcp_scsi_slave_alloc,
.slave_configure = zfcp_scsi_slave_configure,
.slave_destroy = zfcp_scsi_slave_destroy,
.queuecommand = zfcp_scsi_queuecommand,
.eh_abort_handler = zfcp_scsi_eh_abort_handler,
.eh_device_reset_handler = zfcp_scsi_eh_device_reset_handler,
- .eh_bus_reset_handler = zfcp_scsi_eh_bus_reset_handler,
+ .eh_bus_reset_handler = zfcp_scsi_eh_host_reset_handler,
.eh_host_reset_handler = zfcp_scsi_eh_host_reset_handler,
.can_queue = 4096,
.this_id = -1,
- /*
- * FIXME:
- * one less? can zfcp_create_sbale cope with it?
- */
.sg_tablesize = ZFCP_MAX_SBALES_PER_REQ,
.cmd_per_lun = 1,
- .unchecked_isa_dma = 0,
.use_clustering = 1,
.sdev_attrs = zfcp_sysfs_sdev_attrs,
},
.driver_version = ZFCP_VERSION,
- /* rest initialised with zeros */
};
/* Find start of Response Information in FCP response unit*/
@@ -176,8 +167,14 @@
return retval;
}
-static void
-zfcp_scsi_slave_destroy(struct scsi_device *sdpnt)
+/**
+ * zfcp_scsi_slave_destroy - called when scsi device is removed
+ *
+ * Remove reference to associated scsi device for an zfcp_unit.
+ * Mark zfcp_unit as failed. The scsi device might be deleted via sysfs
+ * or a scan for this device might have failed.
+ */
+static void zfcp_scsi_slave_destroy(struct scsi_device *sdpnt)
{
struct zfcp_unit *unit = (struct zfcp_unit *) sdpnt->hostdata;
@@ -185,6 +182,7 @@
atomic_clear_mask(ZFCP_STATUS_UNIT_REGISTERED, &unit->status);
sdpnt->hostdata = NULL;
unit->device = NULL;
+ zfcp_erp_unit_failed(unit);
zfcp_unit_put(unit);
} else {
ZFCP_LOG_NORMAL("bug: no unit associated with SCSI device at "
@@ -549,35 +547,38 @@
}
/**
- * zfcp_scsi_eh_bus_reset_handler - reset bus (reopen adapter)
+ * zfcp_scsi_eh_host_reset_handler - handler for host and bus reset
+ *
+ * If ERP is already running it will be stopped.
*/
-int
-zfcp_scsi_eh_bus_reset_handler(struct scsi_cmnd *scpnt)
+int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt)
{
- struct zfcp_unit *unit = (struct zfcp_unit*) scpnt->device->hostdata;
- struct zfcp_adapter *adapter = unit->port->adapter;
+ struct zfcp_unit *unit;
+ struct zfcp_adapter *adapter;
+ unsigned long flags;
- ZFCP_LOG_NORMAL("bus reset because of problems with "
+ unit = (struct zfcp_unit*) scpnt->device->hostdata;
+ adapter = unit->port->adapter;
+
+ ZFCP_LOG_NORMAL("host/bus reset because of problems with "
"unit 0x%016Lx\n", unit->fcp_lun);
- zfcp_erp_adapter_reopen(adapter, 0);
- zfcp_erp_wait(adapter);
- return SUCCESS;
-}
-
-/**
- * zfcp_scsi_eh_host_reset_handler - reset host (reopen adapter)
- */
-int
-zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt)
-{
- struct zfcp_unit *unit = (struct zfcp_unit*) scpnt->device->hostdata;
- struct zfcp_adapter *adapter = unit->port->adapter;
-
- ZFCP_LOG_NORMAL("host reset because of problems with "
- "unit 0x%016Lx\n", unit->fcp_lun);
- zfcp_erp_adapter_reopen(adapter, 0);
- zfcp_erp_wait(adapter);
+ write_lock_irqsave(&adapter->erp_lock, flags);
+ if (atomic_test_mask(ZFCP_STATUS_ADAPTER_ERP_PENDING,
+ &adapter->status)) {
+ zfcp_erp_modify_adapter_status(adapter,
+ ZFCP_STATUS_COMMON_UNBLOCKED|ZFCP_STATUS_COMMON_OPEN,
+ ZFCP_CLEAR);
+ zfcp_erp_action_dismiss_adapter(adapter);
+ write_unlock_irqrestore(&adapter->erp_lock, flags);
+ zfcp_fsf_req_dismiss_all(adapter);
+ adapter->fsf_req_seq_no = 0;
+ zfcp_erp_adapter_reopen(adapter, 0);
+ } else {
+ write_unlock_irqrestore(&adapter->erp_lock, flags);
+ zfcp_erp_adapter_reopen(adapter, 0);
+ zfcp_erp_wait(adapter);
+ }
return SUCCESS;
}