Possible improvement for ATA resume problems

From: Ian Dowse <iedowse_at_maths.tcd.ie>
Date: Wed, 07 Jan 2004 09:57:23 +0000
I've had some success with the following patch to avoid ATA-related
hangs at resume time and also resume-time errors such as:

  ad0: FAILURE - READ_MUL status=59<READY,DSC,DRQ,ERROR> error=4<ABORTED>
  ad0: FAILURE - WRITE_MUL status=51<READY,DSC,ERROR> error=4<ABORTED>

The patch does two things:

 o Re-issue the ATA_SET_MULTI command to ATA disks when reinitialising
   the channel. This appears to be a simple bug, and explains the
   ABORTED errors above.  It doesn't explain why things used to
   recover for me after one or two of these errors though. Also
   reissue the ATA_SETFEATURES commands relating to caching. Non-disk
   devices might need to do something similar too.

 o Ensure that no normal requests can get queued while a channel
   is being reinitialised. This stops regular I/O operations from
   happening before a channel has been fully set up after a resume.
   It works by putting any such requests on a "deferred" list, and
   moving them back when the reinit is complete. Note that the root
   cause here is probably the fact that the ATA resume method blocks
   while waiting for some operations to complete, giving normal
   processes a chance to run before the resume has fully completed.

The deferred queue approach is fairly gross - I guess a much better
way would be a mechanism to allow a single request to be executed
synchronously while the ATA channel remains locked (optionally using
DELAY() calls and polling when sleeping is not permitted).

I've no idea if this will help much in general, but it has certainly
made suspend/resume work more reliably for me.

Ian


Index: ata-all.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/ata-all.c,v
retrieving revision 1.198
diff -u -r1.198 ata-all.c
--- ata-all.c	30 Nov 2003 16:27:58 -0000	1.198
+++ ata-all.c	7 Jan 2004 08:55:20 -0000
_at__at_ -122,6 +122,7 _at__at_
     bzero(&ch->queue_mtx, sizeof(struct mtx));
     mtx_init(&ch->queue_mtx, "ATA queue lock", MTX_DEF, 0);
     TAILQ_INIT(&ch->ata_queue);
+    TAILQ_INIT(&ch->ata_deferred_queue);
 
     /* initialise device(s) on this channel */
     ch->locking(ch, ATA_LF_LOCK);
_at__at_ -231,7 +232,7 _at__at_
 ata_reinit(struct ata_channel *ch)
 {
     struct ata_request *request = ch->running;
-    int devices, misdev, newdev;
+    int devices, keptdev, misdev, newdev;
     
     if (!ch->r_irq)
 	return ENXIO;
_at__at_ -239,6 +240,8 _at__at_
     /* reset the HW */
     ata_printf(ch, -1, "resetting devices ..\n");
     ATA_FORCELOCK_CH(ch, ATA_CONTROL);
+    ch->flags |= ATA_REINIT_ACTIVE;
+    KASSERT(TAILQ_EMPTY(&ch->ata_queue), ("ata_reinit: queue not empty!"));
     ch->running = NULL;
     devices = ch->devices;
     ch->hw.reset(ch);
_at__at_ -275,6 +278,16 _at__at_
     /* identify whats present on this channel now */
     ata_identify_devices(ch);
 
+    /* reset/reconfigure devices that are still there */
+    if ((keptdev = devices & ch->devices)) {
+	if ((keptdev & (ATA_ATA_MASTER | ATA_ATAPI_MASTER)) &&
+	    ch->device[MASTER].reset)
+	    ch->device[MASTER].reset(&ch->device[MASTER]);
+	if ((keptdev & (ATA_ATA_SLAVE | ATA_ATAPI_SLAVE)) &&
+	    ch->device[SLAVE].reset)
+	    ch->device[SLAVE].reset(&ch->device[SLAVE]);
+    }
+
     /* attach new devices that appeared during reset */
     if ((newdev = ~devices & ch->devices)) {
 	if ((newdev & (ATA_ATA_MASTER | ATA_ATAPI_MASTER)) &&
_at__at_ -295,6 +308,15 _at__at_
     atapi_cam_reinit_bus(ch);
 #endif
 
+    /* Now it's safe to execute any normal requests that were deferred. */
+    mtx_lock(&ch->queue_mtx);
+    while (!TAILQ_EMPTY(&ch->ata_deferred_queue)) {
+	request = TAILQ_FIRST(&ch->ata_deferred_queue);
+	TAILQ_REMOVE(&ch->ata_deferred_queue, request, chain);
+	TAILQ_INSERT_TAIL(&ch->ata_queue, request, chain);
+    }
+    mtx_unlock(&ch->queue_mtx);
+    ch->flags &= ~ATA_REINIT_ACTIVE;
     printf("done\n");
     return 0;
 }
_at__at_ -556,7 +578,7 _at__at_
 	if (request) {
 	    request->device = atadev;
 	    request->u.ata.command = command;
-	    request->flags = (ATA_R_READ | ATA_R_QUIET);
+	    request->flags = (ATA_R_READ | ATA_R_QUIET | ATA_R_DONTDEFER);
 	    request->data = (caddr_t)atacap;
 	    request->timeout = 2;
 	    request->retries = 3;
Index: ata-all.h
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/ata-all.h,v
retrieving revision 1.68
diff -u -r1.68 ata-all.h
--- ata-all.h	28 Nov 2003 19:01:28 -0000	1.68
+++ ata-all.h	7 Jan 2004 02:46:06 -0000
_at__at_ -195,6 +195,7 _at__at_
 #define		ATA_R_AT_HEAD		0x0200
 #define		ATA_R_REQUEUE		0x0400
 #define		ATA_R_SKIPSTART		0x0800
+#define		ATA_R_DONTDEFER		0x1000
 
     void			(*callback)(struct ata_request *request);
     int				retries;	/* retry count */
_at__at_ -218,6 +219,7 _at__at_
     void			*softc;		/* ptr to softc for device */
     void			(*attach)(struct ata_device *atadev);
     void			(*detach)(struct ata_device *atadev);
+    void			(*reset)(struct ata_device *atadev);
     void			(*start)(struct ata_device *atadev);
     int				flags;
 #define		ATA_D_USE_CHS		0x0001
_at__at_ -274,6 +276,8 _at__at_
     int				offset;
 };
 
+TAILQ_HEAD(ata_reqhead, ata_request);
+
 /* structure describing an ATA channel */
 struct ata_channel {
     struct device		*dev;		/* device handle */
_at__at_ -289,6 +293,7 _at__at_
 #define		ATA_USE_PC98GEOM	0x04
 #define		ATA_ATAPI_DMA_RO	0x08
 #define		ATA_48BIT_ACTIVE	0x10
+#define		ATA_REINIT_ACTIVE	0x20
 
     struct ata_device		device[2];	/* devices on this channel */
 #define		MASTER			0x00
_at__at_ -310,7 +315,8 _at__at_
 #define		ATA_LF_UNLOCK		0x0002
 
     struct mtx			queue_mtx;
-    TAILQ_HEAD(, ata_request)	ata_queue;	/* head of ATA queue */
+    struct ata_reqhead		ata_queue;	/* head of ATA queue */
+    struct ata_reqhead		ata_deferred_queue;
     void			*running;	/* currently running request */
 };
 
Index: ata-disk.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/ata-disk.c,v
retrieving revision 1.164
diff -u -r1.164 ata-disk.c
--- ata-disk.c	11 Nov 2003 14:55:35 -0000	1.164
+++ ata-disk.c	7 Jan 2004 02:05:59 -0000
_at__at_ -55,6 +55,7 _at__at_
 
 /* prototypes */
 static void ad_detach(struct ata_device *);
+static void ad_reset(struct ata_device *);
 static void ad_start(struct ata_device *);
 static void ad_done(struct ata_request *);
 static disk_open_t adopen;
_at__at_ -119,28 +120,13 _at__at_
 	lbasize48 > 268435455)
 	adp->total_secs = lbasize48;
 
-    /* enable read caching */
-    ata_controlcmd(atadev, ATA_SETFEATURES, ATA_SF_ENAB_RCACHE, 0, 0);
-
-    /* enable write caching if enabled */
-    if (ata_wc)
-	ata_controlcmd(atadev, ATA_SETFEATURES, ATA_SF_ENAB_WCACHE, 0, 0);
-    else
-	ata_controlcmd(atadev, ATA_SETFEATURES, ATA_SF_DIS_WCACHE, 0, 0);
-
-    /* use multiple sectors/interrupt if device supports it */
-    adp->max_iosize = DEV_BSIZE;
-    if (ad_version(atadev->param->version_major)) {
-	int secsperint = max(1, min(atadev->param->sectors_intr, 16));
-
-	if (!ata_controlcmd(atadev, ATA_SET_MULTI, 0, 0, secsperint))
-	    adp->max_iosize = secsperint * DEV_BSIZE;
-    }
+    atadev->softc = adp;
+    ad_reset(atadev);
 
     /* setup the function ptrs */
     atadev->detach = ad_detach;
+    atadev->reset = ad_reset;
     atadev->start = ad_start;
-    atadev->softc = adp;
 
     /* lets create the disk device */
     adp->disk.d_open = adopen;
_at__at_ -167,6 +153,30 _at__at_
 }
 
 static void
+ad_reset(struct ata_device *atadev)
+{
+    struct ad_softc *adp = atadev->softc;
+
+    /* enable read caching */
+    ata_controlcmd(atadev, ATA_SETFEATURES, ATA_SF_ENAB_RCACHE, 0, 0);
+
+    /* enable write caching if enabled */
+    if (ata_wc)
+	ata_controlcmd(atadev, ATA_SETFEATURES, ATA_SF_ENAB_WCACHE, 0, 0);
+    else
+	ata_controlcmd(atadev, ATA_SETFEATURES, ATA_SF_DIS_WCACHE, 0, 0);
+
+    /* use multiple sectors/interrupt if device supports it */
+    adp->max_iosize = DEV_BSIZE;
+    if (ad_version(atadev->param->version_major)) {
+	int secsperint = max(1, min(atadev->param->sectors_intr, 16));
+
+	if (!ata_controlcmd(atadev, ATA_SET_MULTI, 0, 0, secsperint))
+	    adp->max_iosize = secsperint * DEV_BSIZE;
+    }
+}
+
+static void
 ad_detach(struct ata_device *atadev)
 {
     struct ad_softc *adp = atadev->softc;
_at__at_ -185,6 +195,7 _at__at_
     ata_free_lun(&adp_lun_map, adp->lun);
     atadev->attach = NULL;
     atadev->detach = NULL;
+    atadev->reset = NULL;
     atadev->start = NULL;
     atadev->softc = NULL;
     atadev->flags = 0;
Index: ata-queue.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/ata-queue.c,v
retrieving revision 1.13
diff -u -r1.13 ata-queue.c
--- ata-queue.c	16 Dec 2003 19:41:38 -0000	1.13
+++ ata-queue.c	7 Jan 2004 08:55:21 -0000
_at__at_ -76,16 +76,25 _at__at_
 void
 ata_queue_request(struct ata_request *request)
 {
+    struct ata_reqhead *qhead;
+
     /* mark request as virgin (it might be a reused one) */
     request->result = request->status = request->error = 0;
     request->flags &= ~ATA_R_DONE;
 
+    qhead = &request->device->channel->ata_queue;
+    /* Don't let normal requests interfere with reinitialisation. */
+    if ((request->device->channel->flags & ATA_REINIT_ACTIVE) &&
+      (request->flags & ATA_R_DONTDEFER) == 0) {
+	qhead = &request->device->channel->ata_deferred_queue;
+    }
+
     /* put request on the locked queue at the specified location */
     mtx_lock(&request->device->channel->queue_mtx);
     if (request->flags & ATA_R_AT_HEAD)
-	TAILQ_INSERT_HEAD(&request->device->channel->ata_queue, request, chain);
+	TAILQ_INSERT_HEAD(qhead, request, chain);
     else
-	TAILQ_INSERT_TAIL(&request->device->channel->ata_queue, request, chain);
+	TAILQ_INSERT_TAIL(qhead, request, chain);
     mtx_unlock(&request->device->channel->queue_mtx);
 
     /* should we skip start ? */
_at__at_ -116,7 +125,7 _at__at_
 	request->u.ata.lba = lba;
 	request->u.ata.count = count;
 	request->u.ata.feature = feature;
-	request->flags = ATA_R_CONTROL;
+	request->flags = ATA_R_CONTROL | ATA_R_DONTDEFER;
 	request->timeout = 5;
 	ata_queue_request(request);
 	error = request->result;
Index: atapi-cd.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/atapi-cd.c,v
retrieving revision 1.157
diff -u -r1.157 atapi-cd.c
--- atapi-cd.c	7 Dec 2003 23:15:22 -0000	1.157
+++ atapi-cd.c	7 Jan 2004 08:55:21 -0000
_at__at_ -208,6 +208,7 _at__at_
     ata_free_lun(&acd_lun_map, cdp->lun);
     atadev->attach = NULL;
     atadev->detach = NULL;
+    atadev->reset = NULL;
     atadev->start = NULL;
     atadev->softc = NULL;
     atadev->flags = 0;
Index: atapi-fd.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/atapi-fd.c,v
retrieving revision 1.89
diff -u -r1.89 atapi-fd.c
--- atapi-fd.c	11 Nov 2003 14:55:35 -0000	1.89
+++ atapi-fd.c	7 Jan 2004 01:46:37 -0000
_at__at_ -126,6 +126,7 _at__at_
     ata_free_lun(&afd_lun_map, fdp->lun);
     atadev->attach = NULL;
     atadev->detach = NULL;
+    atadev->reset = NULL;
     atadev->start = NULL;
     atadev->softc = NULL;
     atadev->flags = 0;
Index: atapi-tape.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/atapi-tape.c,v
retrieving revision 1.84
diff -u -r1.84 atapi-tape.c
--- atapi-tape.c	11 Nov 2003 14:55:36 -0000	1.84
+++ atapi-tape.c	7 Jan 2004 01:46:55 -0000
_at__at_ -174,6 +174,7 _at__at_
     ata_free_lun(&ast_lun_map, stp->lun);
     atadev->attach = NULL;
     atadev->detach = NULL;
+    atadev->reset = NULL;
     atadev->start = NULL;
     atadev->softc = NULL;
     atadev->flags = 0;
Received on Wed Jan 07 2004 - 00:57:27 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:37:36 UTC