Re: ad WRITE_DMA timing out frequently

From: Ian Dowse <iedowse_at_maths.tcd.ie>
Date: Sat, 19 Feb 2005 15:56:35 +0000
In message <1108752445.1105.34.camel_at_zappa.Chelsea-Ct.Org>, Paul Mather writes:
>The "TIMEOUT - WRITE_DMA" issue has been a recurring problem for me
>since somewhere in the 5.2.1--5.3 release range.  (It's been so long now
>that I don't remember whether it first started plaguing me in 5.2.1 or
>5.3.  I do know for definite I never got this problem in 5.1 and it only
>crept in during an "upgrade.")

On a recent -CURRENT you could try the following patch. It attempts
to clean up the handling of timeouts in the ATA code by using the
new callout_init_mtx() function, and appeared to cure fairly frequent
WRITE_DMA timeout messages for me.

Ian

Index: sys/dev/ata/ata-all.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/ata-all.c,v
retrieving revision 1.235
diff -u -r1.235 ata-all.c
--- sys/dev/ata/ata-all.c	7 Feb 2005 17:14:42 -0000	1.235
+++ sys/dev/ata/ata-all.c	17 Feb 2005 20:52:27 -0000
_at__at_ -429,6 +429,7 _at__at_
 		ch->state = ATA_ACTIVE;
 	    else
 		ch->state = ATA_IDLE;
+	    callout_stop(&request->callout);
 	    mtx_unlock(&ch->state_mtx);
 	    ch->locking(ch, ATA_LF_UNLOCK);
 	    ata_finish(request);
Index: sys/dev/ata/ata-queue.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/ata-queue.c,v
retrieving revision 1.41
diff -u -r1.41 ata-queue.c
--- sys/dev/ata/ata-queue.c	8 Dec 2004 11:16:33 -0000	1.41
+++ sys/dev/ata/ata-queue.c	18 Feb 2005 04:27:18 -0000
_at__at_ -56,7 +56,7 _at__at_
 
     /* mark request as virgin */
     request->result = request->status = request->error = 0;
-    callout_init(&request->callout, 1);
+    callout_init_mtx(&request->callout, &ch->state_mtx, CALLOUT_RETURNUNLOCKED);
 
     if (!request->callback && !(request->flags & ATA_R_REQUEUE))
 	sema_init(&request->done, 0, "ATA request done");
_at__at_ -67,6 +67,7 _at__at_
 	(request->flags & (ATA_R_CONTROL | ATA_R_IMMEDIATE))) {
 
 	/* arm timeout */
+	mtx_lock(&ch->state_mtx);
 	if (!dumping)
 	    callout_reset(&request->callout, request->timeout * hz,
 			  (timeout_t*)ata_timeout, request);
_at__at_ -75,11 +76,13 _at__at_
 	ch->running = request;
 	if (ch->hw.begin_transaction(request) == ATA_OP_FINISHED) {
 	    ch->running = NULL;
-	    callout_drain(&request->callout);
+	    callout_stop(&request->callout);
 	    if (!request->callback) 
 		sema_destroy(&request->done);
+	    mtx_unlock(&ch->state_mtx);
 	    return;
 	}
+	mtx_unlock(&ch->state_mtx);
     }
     else  {
 	/* put request on the locked queue at the specified location */
_at__at_ -193,6 +196,7 _at__at_
 		    ch->running = NULL;
 		    ch->state = ATA_IDLE;
 		    mtx_unlock(&ch->queue_mtx);
+		    callout_stop(&request->callout);
 		    mtx_unlock(&ch->state_mtx);
 		    ch->locking(ch, ATA_LF_UNLOCK);
 		    ata_finish(request);
_at__at_ -216,9 +220,6 _at__at_
 	ata_completed(request, 0);
     }
     else {
-	if (!dumping)
-	    callout_reset(&request->callout, request->timeout * hz,
-			  (timeout_t*)ata_timeout, request);
 	if (request->bio && !(request->flags & ATA_R_TIMEOUT)) {
 	    ATA_DEBUG_RQ(request, "finish bio_taskqueue");
 	    bio_taskqueue(request->bio, (bio_task_t *)ata_completed, request);
_at__at_ -263,9 +264,6 _at__at_
 	    request->result = EIO;
     }
     else {
-	/* untimeout request now we have control back */
-	callout_drain(&request->callout);
-
 	/* if this is a soft ECC error warn about it */
 	if ((request->status & (ATA_S_CORR | ATA_S_ERROR)) == ATA_S_CORR) {
 	    ata_prtdev(request->device,
_at__at_ -408,9 +406,8 _at__at_
 {
     struct ata_channel *ch = request->device->channel;
 
-    mtx_lock(&ch->state_mtx);
-
     ATA_DEBUG_RQ(request, "timeout");
+    mtx_assert(&ch->state_mtx, MA_OWNED);
 
     /* if interrupt has been seen, shout and just rearm timeout */
     if (request->flags & ATA_R_INTR_SEEN) {
_at__at_ -426,6 +423,7 _at__at_
 	    callout_reset(&request->callout, request->timeout * hz,
 			  (timeout_t*)ata_timeout, request);
 	mtx_unlock(&ch->state_mtx);
+	/* CALLOUT_RETURNUNLOCKED */
 	return;
     }
 
_at__at_ -454,6 +452,7 _at__at_
 	mtx_unlock(&ch->state_mtx);
 	ata_prtdev(request->device, "timeout state=%d unexpected\n", ch->state);
     }
+    /* CALLOUT_RETURNUNLOCKED */
 }
 
 void
_at__at_ -464,10 +463,11 _at__at_
     mtx_lock(&ch->state_mtx);
     request = ch->running;
     ch->running = NULL;
+    if (request)
+	callout_stop(&request->callout);
     mtx_unlock(&ch->state_mtx);
 
     if (request) {
-        callout_drain(&request->callout);
         ata_prtdev(request->device,
                    "WARNING - %s requeued due to channel reset",
                    ata_cmd2str(request));
_at__at_ -501,11 +501,12 _at__at_
     mtx_lock(&ch->state_mtx);
     request = ch->running;
     ch->running = NULL;
+    if (request)
+	callout_stop(&request->callout);
     mtx_unlock(&ch->state_mtx);
 
     /* if we have a request "in flight" fail it as well */
     if (request && (!device || request->device == device)) {
-	callout_drain(&request->callout);
 	request->result = ENXIO;
 	if (request->callback)
 	    (request->callback)(request);
Received on Sat Feb 19 2005 - 14:56:38 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:38:28 UTC