Aic7xxx and Aic79xx driver Update

o Avoid pre-2.5.X mid-layer deadlock due to SCSI malloc fragmentation For pre-2.5.X kernels, attempt to calculate a safe value for our S/G list length. In these kernels, the midlayer allocates an S/G array dynamically when a command is issued using SCSI malloc. This list, which is in an OS dependent format that must later be copied to our private S/G list, is sized to house just the number of segments needed for the current transfer. Since the code that sizes the SCSI malloc pool does not take into consideration fragmentation of the pool, executing transactions numbering just a fraction of our concurrent transaction limit with list lengths aproaching AH?_NSEG in length will quickly depleat the SCSI malloc pool of usable space. Unfortunately, the mid-layer does not properly handle this scsi malloc failure. In kernels prior to 2.4.20, should the device that experienced the malloc failure be idle and never have any new I/O initiated (block queue is not "kicked"), the process will hang indefinitely. In 2.4.20 and beyond, the disk experiencing the failure is marked as a "starved device", but this only helps if I/O is initiated to or completes on that HBA. If the failure was induced by another HBA, and no other I/O is pending on the HBA and no new transactions are queued, we are still succeptible to the hang. (Also note that many 2.4.X kernels do not properly lock the "some_device_starved" and "device_starved" fields calling into question their overall effectiveness). By sizing our S/G list to avoid SCSI malloc pool fragmentation, we will hopefully avoid this deadlock at least for configurations where our own HBAs are the only ones using the SCSI subsystem.

Aic7xxx and Aic79xx driver Update
o Avoid pre-2.5.X mid-layer deadlock due to SCSI malloc fragmentation For pre-2.5.X kernels, attempt to calculate a safe value for our S/G list length. In these kernels, the midlayer allocates an S/G array dynamically when a command is issued using SCSI malloc. This list, which is in an OS dependent format that must later be copied to our private S/G list, is sized to house just the number of segments needed for the current transfer. Since the code that sizes the SCSI malloc pool does not take into consideration fragmentation of the pool, executing transactions numbering just a fraction of our concurrent transaction limit with list lengths aproaching AH?_NSEG in length will quickly depleat the SCSI malloc pool of usable space. Unfortunately, the mid-layer does not properly handle this scsi malloc failure. In kernels prior to 2.4.20, should the device that experienced the malloc failure be idle and never have any new I/O initiated (block queue is not "kicked"), the process will hang indefinitely. In 2.4.20 and beyond, the disk experiencing the failure is marked as a "starved device", but this only helps if I/O is initiated to or completes on that HBA. If the failure was induced by another HBA, and no other I/O is pending on the HBA and no new transactions are queued, we are still succeptible to the hang. (Also note that many 2.4.X kernels do not properly lock the "some_device_starved" and "device_starved" fields calling into question their overall effectiveness). By sizing our S/G list to avoid SCSI malloc pool fragmentation, we will hopefully avoid this deadlock at least for configurations where our own HBAs are the only ones using the SCSI subsystem.
36da50bc · Justin T. Gibbs · 3bcc0df7 · 36da50bc · 36da50bc · 36da50bc
Commit 36da50bc authored Apr 23, 2003 by Justin T. Gibbs
6 changed files
--- a/drivers/scsi/aic7xxx/aic79xx_osm.c
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.c
 /*
 * Adaptec AIC79xx device driver for Linux.
 *
- * $Id: //depot/aic7xxx/linux/drivers/scsi/aic7xxx/aic79xx_osm.c#147 $
+ * $Id: //depot/aic7xxx/linux/drivers/scsi/aic7xxx/aic79xx_osm.c#148 $
 *
 * --------------------------------------------------------------------------
 * Copyright (c) 1994-2000 Justin T. Gibbs.
@@ -82,6 +82,11 @@ struct proc_dir_entry proc_scsi_aic79xx = {
 };
 #endif

+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+/* For dynamic sglist size calculation. */
+u_int ahd_linux_nseg;
+#endif
+
 /*
 * Bucket size for counting good commands in between bad ones.
 */
@@ -478,6 +483,7 @@ static void ahd_linux_filter_inquiry(struct ahd_softc *ahd,
 static void ahd_linux_dev_timed_unfreeze(u_long arg);
 static void ahd_linux_sem_timeout(u_long arg);
 static void ahd_linux_initialize_scsi_bus(struct ahd_softc *ahd);
+static void ahd_linux_size_nseg(void);
 static void ahd_linux_thread_run_complete_queue(struct ahd_softc *ahd);
 static void ahd_linux_start_dv(struct ahd_softc *ahd);
 static void ahd_linux_dv_timeout(struct scsi_cmnd *cmd);
@@ -823,6 +829,67 @@ static int	   ahd_linux_bus_reset(Scsi_Cmnd *);
 static int	   ahd_linux_dev_reset(Scsi_Cmnd *);
 static int	   ahd_linux_abort(Scsi_Cmnd *);

+/*
+ * Calculate a safe value for AHD_NSEG (as expressed through ahd_linux_nseg).
+ *
+ * In pre-2.5.X...
+ * The midlayer allocates an S/G array dynamically when a command is issued
+ * using SCSI malloc.  This array, which is in an OS dependent format that
+ * must later be copied to our private S/G list, is sized to house just the
+ * number of segments needed for the current transfer.  Since the code that
+ * sizes the SCSI malloc pool does not take into consideration fragmentation
+ * of the pool, executing transactions numbering just a fraction of our
+ * concurrent transaction limit with SG list lengths aproaching AHC_NSEG will
+ * quickly depleat the SCSI malloc pool of usable space.  Unfortunately, the
+ * mid-layer does not properly handle this scsi malloc failures for the S/G
+ * array and the result can be a lockup of the I/O subsystem.  We try to size
+ * our S/G list so that it satisfies our drivers allocation requirements in
+ * addition to avoiding fragmentation of the SCSI malloc pool.
+ */
+static void
+ahd_linux_size_nseg(void)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+	u_int cur_size;
+	u_int best_size;
+
+	/*
+	 * The SCSI allocator rounds to the nearest 512 bytes
+	 * an cannot allocate across a page boundary.  Our algorithm
+	 * is to start at 1K of scsi malloc space per-command and
+	 * loop through all factors of the PAGE_SIZE and pick the best.
+	 */
+	best_size = 0;
+	for (cur_size = 1024; cur_size <= PAGE_SIZE; cur_size *= 2) {
+		u_int nseg;
+
+		nseg = cur_size / sizeof(struct scatterlist);
+		if (nseg < AHD_LINUX_MIN_NSEG)
+			continue;
+
+		if (best_size == 0) {
+			best_size = cur_size;
+			ahd_linux_nseg = nseg;
+		} else {
+			u_int best_rem;
+			u_int cur_rem;
+
+			/*
+			 * Compare the traits of the current "best_size"
+			 * with the current size to determine if the
+			 * current size is a better size.
+			 */
+			best_rem = best_size % sizeof(struct scatterlist);
+			cur_rem = cur_size % sizeof(struct scatterlist);
+			if (cur_rem < best_rem) {
+				best_size = cur_size;
+				ahd_linux_nseg = nseg;
+			}
+		}
+	}
+#endif
+}
+
 /*
 * Try to detect an Adaptec 79XX controller.
 */
@@ -851,6 +918,10 @@ ahd_linux_detect(Scsi_Host_Template *template)
 		printf("ahd_linux_detect: Unable to attach\n");
 		return (0);
 	}
+	/*
+	 * Determine an appropriate size for our Scatter Gatther lists.
+	 */
+	ahd_linux_size_nseg();
 #ifdef MODULE
 	/*
 	 * If we've been passed any parameters, process them now.
@@ -1650,7 +1721,6 @@ Scsi_Host_Template aic79xx_driver_template = {
 #endif
 	.can_queue		= AHD_MAX_QUEUE,
 	.this_id		= -1,
-	.sg_tablesize		= AHD_NSEG,
 	.cmd_per_lun		= 2,
 	.use_clustering		= ENABLE_CLUSTERING,
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,7)
@@ -2089,6 +2159,7 @@ ahd_linux_register_host(struct ahd_softc *ahd, Scsi_Host_Template *template)
 	host->max_id = (ahd->features & AHD_WIDE) ? 16 : 8;
 	host->max_lun = AHD_NUM_LUNS;
 	host->max_channel = 0;
+	host->sg_tablesize = AHD_NSEG;
 	ahd_set_unit(ahd, ahd_linux_next_unit());
 	sprintf(buf, "scsi%d", host->host_no);
 	new_name = malloc(strlen(buf) + 1, M_DEVBUF, M_NOWAIT);

--- a/drivers/scsi/aic7xxx/aic79xx_osm.h
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.h
@@ -36,7 +36,7 @@
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGES.
 *
- * $Id: //depot/aic7xxx/linux/drivers/scsi/aic7xxx/aic79xx_osm.h#123 $
+ * $Id: //depot/aic7xxx/linux/drivers/scsi/aic7xxx/aic79xx_osm.h#124 $
 *
 */
 #ifndef _AIC79XX_LINUX_H_
@@ -488,7 +488,18 @@ struct ahd_linux_target {
 * manner and are allocated below 4GB, the number of S/G segments is
 * unrestricted.
 */
-#define        AHD_NSEG 128
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+/*
+ * We dynamically adjust the number of segments in pre-2.5 kernels to
+ * avoid fragmentation issues in the SCSI mid-layer's private memory
+ * allocator.  See aic79xx_osm.c ahd_linux_size_nseg() for details.
+ */
+extern u_int ahd_linux_nseg;
+#define	AHD_NSEG ahd_linux_nseg
+#define	AHD_LINUX_MIN_NSEG 64
+#else
+#define	AHD_NSEG 128
+#endif

 /*
 * Per-SCB OSM storage.

--- a/drivers/scsi/aic7xxx/aic79xx_proc.c
+++ b/drivers/scsi/aic7xxx/aic79xx_proc.c
@@ -37,7 +37,7 @@
 * String handling code courtesy of Gerard Roudier's <groudier@club-internet.fr>
 * sym driver.
 *
- * $Id: //depot/aic7xxx/linux/drivers/scsi/aic7xxx/aic79xx_proc.c#14 $
+ * $Id: //depot/aic7xxx/linux/drivers/scsi/aic7xxx/aic79xx_proc.c#15 $
 */
 #include "aic79xx_osm.h"
 #include "aic79xx_inline.h"
@@ -173,7 +173,7 @@ ahd_dump_target_state(struct ahd_softc *ahd, struct info_str *info,

 	tinfo = ahd_fetch_transinfo(ahd, channel, our_id,
 				    target_id, &tstate);
-	copy_info(info, "Channel %c Target %d Negotiation Settings\n",
+	copy_info(info, "Channel %c, Target %d Negotiation Settings\n",
 		  channel, target_id);
 	copy_info(info, "\tUser: ");
 	ahd_format_transinfo(info, &tinfo->user);
@@ -318,7 +318,11 @@ ahd_linux_proc_info(char *buffer, char **start, off_t offset,
 		  AIC79XX_DRIVER_VERSION);
 	copy_info(&info, "%s\n", ahd->description);
 	ahd_controller_info(ahd, ahd_info);
-	copy_info(&info, "%s\n\n", ahd_info);
+	copy_info(&info, "%s\n", ahd_info);
+	copy_info(&info, "Allocated SCBs: %d, SG List Length: %d\n\n",
+		  ahd->scb_data.numscbs, ahd_linux_nseg);
+
+	max_targ = 15;

 	if (ahd->seep_config == NULL)
 		copy_info(&info, "No Serial EEPROM\n");
@@ -335,7 +339,6 @@ ahd_linux_proc_info(char *buffer, char **start, off_t offset,
 	}
 	copy_info(&info, "\n");

-	max_targ = 15;
 	if ((ahd->features & AHD_WIDE) == 0)
 		max_targ = 7;


--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
 /*
 * Adaptec AIC7xxx device driver for Linux.
 *
- * $Id: //depot/aic7xxx/linux/drivers/scsi/aic7xxx/aic7xxx_osm.c#210 $
+ * $Id: //depot/aic7xxx/linux/drivers/scsi/aic7xxx/aic7xxx_osm.c#211 $
 *
 * Copyright (c) 1994 John Aycock
 *   The University of Calgary Department of Computer Science.
@@ -153,11 +153,10 @@ static int errno;
 spinlock_t ahc_list_spinlock;
 #endif

-/*
- * To generate the correct addresses for the controller to issue
- * on the bus.  Originally added for DEC Alpha support.
- */
-#define VIRT_TO_BUS(a) (uint32_t)virt_to_bus((void *)(a))
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+/* For dynamic sglist size calculation. */
+u_int ahc_linux_nseg;
+#endif

 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0)
 struct proc_dir_entry proc_scsi_aic7xxx = {
@@ -508,6 +507,7 @@ static void ahc_linux_release_simq(u_long arg);
 static void ahc_linux_dev_timed_unfreeze(u_long arg);
 static int  ahc_linux_queue_recovery_cmd(Scsi_Cmnd *cmd, scb_flag flag);
 static void ahc_linux_initialize_scsi_bus(struct ahc_softc *ahc);
+static void ahc_linux_size_nseg(void);
 static void ahc_linux_thread_run_complete_queue(struct ahc_softc *ahc);
 static void ahc_linux_start_dv(struct ahc_softc *ahc);
 static void ahc_linux_dv_timeout(struct scsi_cmnd *cmd);
@@ -823,6 +823,67 @@ static int	   ahc_linux_bus_reset(Scsi_Cmnd *);
 static int	   ahc_linux_dev_reset(Scsi_Cmnd *);
 static int	   ahc_linux_abort(Scsi_Cmnd *);

+/*
+ * Calculate a safe value for AHC_NSEG (as expressed through ahc_linux_nseg).
+ *
+ * In pre-2.5.X...
+ * The midlayer allocates an S/G array dynamically when a command is issued
+ * using SCSI malloc.  This array, which is in an OS dependent format that
+ * must later be copied to our private S/G list, is sized to house just the
+ * number of segments needed for the current transfer.  Since the code that
+ * sizes the SCSI malloc pool does not take into consideration fragmentation
+ * of the pool, executing transactions numbering just a fraction of our
+ * concurrent transaction limit with list lengths aproaching AHC_NSEG will
+ * quickly depleat the SCSI malloc pool of usable space.  Unfortunately, the
+ * mid-layer does not properly handle this scsi malloc failures for the S/G
+ * array and the result can be a lockup of the I/O subsystem.  We try to size
+ * our S/G list so that it satisfies our drivers allocation requirements in
+ * addition to avoiding fragmentation of the SCSI malloc pool.
+ */
+static void
+ahc_linux_size_nseg(void)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+	u_int cur_size;
+	u_int best_size;
+
+	/*
+	 * The SCSI allocator rounds to the nearest 512 bytes
+	 * an cannot allocate across a page boundary.  Our algorithm
+	 * is to start at 1K of scsi malloc space per-command and
+	 * loop through all factors of the PAGE_SIZE and pick the best.
+	 */
+	best_size = 0;
+	for (cur_size = 1024; cur_size <= PAGE_SIZE; cur_size *= 2) {
+		u_int nseg;
+
+		nseg = cur_size / sizeof(struct scatterlist);
+		if (nseg < AHC_LINUX_MIN_NSEG)
+			continue;
+
+		if (best_size == 0) {
+			best_size = cur_size;
+			ahc_linux_nseg = nseg;
+		} else {
+			u_int best_rem;
+			u_int cur_rem;
+
+			/*
+			 * Compare the traits of the current "best_size"
+			 * with the current size to determine if the
+			 * current size is a better size.
+			 */
+			best_rem = best_size % sizeof(struct scatterlist);
+			cur_rem = cur_size % sizeof(struct scatterlist);
+			if (cur_rem < best_rem) {
+				best_size = cur_size;
+				ahc_linux_nseg = nseg;
+			}
+		}
+	}
+#endif
+}
+
 /*
 * Try to detect an Adaptec 7XXX controller.
 */
@@ -851,6 +912,7 @@ ahc_linux_detect(Scsi_Host_Template *template)
 		printf("ahc_linux_detect: Unable to attach\n");
 		return (0);
 	}
+	ahc_linux_size_nseg();
 #ifdef MODULE
 	/*
 	 * If we've been passed any parameters, process them now.
@@ -1280,7 +1342,6 @@ Scsi_Host_Template aic7xxx_driver_template = {
 #endif
 	.can_queue		= AHC_MAX_QUEUE,
 	.this_id		= -1,
-	.sg_tablesize		= AHC_NSEG,
 	.cmd_per_lun		= 2,
 	.use_clustering		= ENABLE_CLUSTERING,
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,7)
@@ -1454,6 +1515,7 @@ ahc_dmamap_load(struct ahc_softc *ahc, bus_dma_tag_t dmat, bus_dmamap_t map,
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0)
 	stack_sg.ds_addr = map->bus_addr;
 #else
+#define VIRT_TO_BUS(a) (uint32_t)virt_to_bus((void *)(a))
 	stack_sg.ds_addr = VIRT_TO_BUS(buf);
 #endif
 	stack_sg.ds_len = dmat->maxsize;
@@ -1711,13 +1773,13 @@ ahc_linux_register_host(struct ahc_softc *ahc, Scsi_Host_Template *template)
 	ahc->platform_data->host = host;
 	host->can_queue = AHC_MAX_QUEUE;
 	host->cmd_per_lun = 2;
-	host->sg_tablesize = AHC_NSEG;
 	/* XXX No way to communicate the ID for multiple channels */
 	host->this_id = ahc->our_id;
 	host->irq = ahc->platform_data->irq;
 	host->max_id = (ahc->features & AHC_WIDE) ? 16 : 8;
 	host->max_lun = AHC_NUM_LUNS;
 	host->max_channel = (ahc->features & AHC_TWIN) ? 1 : 0;
+	host->sg_tablesize = AHC_NSEG;
 	ahc_set_unit(ahc, ahc_linux_next_unit());
 	sprintf(buf, "scsi%d", host->host_no);
 	new_name = malloc(strlen(buf) + 1, M_DEVBUF, M_NOWAIT);

--- a/drivers/scsi/aic7xxx/aic7xxx_osm.h
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.h
@@ -53,7 +53,7 @@
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGES.
 *
- * $Id: //depot/aic7xxx/linux/drivers/scsi/aic7xxx/aic7xxx_osm.h#133 $
+ * $Id: //depot/aic7xxx/linux/drivers/scsi/aic7xxx/aic7xxx_osm.h#134 $
 *
 */
 #ifndef _AIC7XXX_LINUX_H_
@@ -494,7 +494,18 @@ struct ahc_linux_target {
 * manner and are allocated below 4GB, the number of S/G segments is
 * unrestricted.
 */
-#define        AHC_NSEG 128
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+/*
+ * We dynamically adjust the number of segments in pre-2.5 kernels to
+ * avoid fragmentation issues in the SCSI mid-layer's private memory
+ * allocator.  See aic7xxx_osm.c ahc_linux_size_nseg() for details.
+ */
+extern u_int ahc_linux_nseg;
+#define	AHC_NSEG ahc_linux_nseg
+#define	AHC_LINUX_MIN_NSEG 64
+#else
+#define	AHC_NSEG 128
+#endif

 /*
 * Per-SCB OSM storage.

--- a/drivers/scsi/aic7xxx/aic7xxx_proc.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_proc.c
@@ -37,7 +37,7 @@
 * String handling code courtesy of Gerard Roudier's <groudier@club-internet.fr>
 * sym driver.
 *
- * $Id: //depot/aic7xxx/linux/drivers/scsi/aic7xxx/aic7xxx_proc.c#24 $
+ * $Id: //depot/aic7xxx/linux/drivers/scsi/aic7xxx/aic7xxx_proc.c#25 $
 */
 #include "aic7xxx_osm.h"
 #include "aic7xxx_inline.h"
@@ -327,7 +327,10 @@ ahc_linux_proc_info(char *buffer, char **start, off_t offset,
 		  AIC7XXX_DRIVER_VERSION);
 	copy_info(&info, "%s\n", ahc->description);
 	ahc_controller_info(ahc, ahc_info);
-	copy_info(&info, "%s\n\n", ahc_info);
+	copy_info(&info, "%s\n", ahc_info);
+	copy_info(&info, "Allocated SCBs: %d, SG List Length: %d\n\n",
+		  ahc->scb_data->numscbs, ahc_linux_nseg);
+

 	if (ahc->seep_config == NULL)
 		copy_info(&info, "No Serial EEPROM\n");