On Wed, 10 Nov 2004, Zoltan Frombach wrote: > I'm on FreeBSD 5.3-RELEASE. I get this when I try to patch the file: <snip> > Is your patch were produce for HEAD? Or what is wrong? Indeed, where the contents of struct task have changed slightly to allow for safe draining while shutting down a task. Try this variation; the timeout is also modified to intentionally be 5/100 of a second per Andrea Campi's keen bug searchign eyes :-). Index: kern/subr_taskqueue.c =================================================================== RCS file: /home/ncvs/src/sys/kern/subr_taskqueue.c,v retrieving revision 1.24 diff -u -r1.24 subr_taskqueue.c --- kern/subr_taskqueue.c 8 Aug 2004 02:37:21 -0000 1.24 +++ kern/subr_taskqueue.c 10 Nov 2004 10:22:41 -0000 _at__at_ -36,9 +36,16 _at__at_ #include <sys/lock.h> #include <sys/malloc.h> #include <sys/mutex.h> +#include <sys/sysctl.h> #include <sys/taskqueue.h> +#include <sys/time.h> #include <sys/unistd.h> +int tq_in; +SYSCTL_INT(_kern, OID_AUTO, tq_in, CTLFLAG_RD, &tq_in, 0, ""); +int tq_out; +SYSCTL_INT(_kern, OID_AUTO, tq_out, CTLFLAG_RD, &tq_out, 0, ""); + static MALLOC_DEFINE(M_TASKQUEUE, "taskqueue", "Task Queues"); static void *taskqueue_giant_ih; static void *taskqueue_ih; _at__at_ -140,6 +147,9 _at__at_ return 0; } + getnanotime(&task->ta_queuetime); + tq_in++; + /* * Optimise the case when all tasks have the same priority. */ _at__at_ -172,6 +182,7 _at__at_ taskqueue_run(struct taskqueue *queue) { struct task *task; + struct timespec tv; int owned, pending; owned = mtx_owned(&queue->tq_mutex); _at__at_ -186,8 +197,16 _at__at_ STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link); pending = task->ta_pending; task->ta_pending = 0; + tq_out++; mtx_unlock(&queue->tq_mutex); + getnanotime(&tv); + timespecsub(&tv, &task->ta_queuetime); + if (tv.tv_nsec >= 50000000) { + printf("taskqueue_run: warning, queue time of %d.%09ld " + "for context %p\n", tv.tv_sec, tv.tv_nsec, + task->ta_func); + } task->ta_func(task->ta_context, pending); mtx_lock(&queue->tq_mutex); Index: sys/_task.h =================================================================== RCS file: /home/ncvs/src/sys/sys/_task.h,v retrieving revision 1.2 diff -u -r1.2 _task.h --- sys/_task.h 28 Jul 2004 09:12:54 -0000 1.2 +++ sys/_task.h 10 Nov 2004 10:22:19 -0000 _at__at_ -45,6 +45,7 _at__at_ int ta_priority; /* priority of task in queue */ task_fn_t *ta_func; /* task handler */ void *ta_context; /* argument for handler */ + struct timespec ta_queuetime; /* time enqueued */ }; #endif /* !_SYS__TASK_H_ */ > > Zoltan > > On Wed, 10 Nov 2004, Søren Schmidt wrote: > > > Zoltan Frombach wrote: > > > I just upgraded to 5.3-RELEASE a few days ago. This morning this line > > > got into my system log file: > > > Nov 9 06:14:03 www kernel: ad0: WARNING - WRITE_DMA interrupt was seen > > > but timeout fired LBA=2491143 > > > > > > I've never seen this message before. Can someone please explain what it > > > means? With Thanks, > > > > It means that the disk has processed the write request (interrupt seen), > > but that the system (the bio_taskqueue) hasn't been able to get the > > result returned to the kernel. > > > > Your disk is not involved in this problem since it has done its part, > > but the rest of the system is either busy with something else, or there > > are bugs lurking that prohibits the bio_taskqueue from running. > > > > Either way its a WARNING not a FAILURE :) > > I'm still a bit skeptical that the task queue is at fault -- I run my > notebook with continuous measurement of the latency to schedule tasks, > generating a warning for any latency > .5 seconds, and the only time I > ever see that sort of latency is during the boot process when ACPI has > scheduled a task to run, but the task queue thread has not yet been > allowed to run: > > ipfw2 initialized, divert loadable, rule-based forwarding disabled, > default to deny, logging disabled > taskqueue_run: warning, queue time of 0.900011314 for context 0xc045638c > ad0: 19077MB <FUJITSU MHS2020AT E/8307> [38760/16/63] at ata0-master > UDMA33 > > It would be quite interesting to have someone run with this timing code on > a system reporting the warning to see what they find. What is the > threshold to generate the ATA timeout warning? My impression was that it > was several seconds, which is quite a long time. The attached patch > checks for .5 second or above latency. If the task queue is really > getting stalled for several seconds, we should figure out what task it is. > There's KTR tracing in the taskqueue code that we can use to dump a trace > stream to see what task is taking so long. > > Robert N M Watson FreeBSD Core Team, TrustedBSD Projects > robert_at_fledge.watson.org Principal Research Scientist, McAfee Research > > Index: kern/subr_taskqueue.c > =================================================================== > RCS file: /home/ncvs/src/sys/kern/subr_taskqueue.c,v > retrieving revision 1.25 > diff -u -r1.25 subr_taskqueue.c > --- kern/subr_taskqueue.c 5 Oct 2004 04:16:00 -0000 1.25 > +++ kern/subr_taskqueue.c 23 Oct 2004 19:18:26 -0000 > _at__at_ -36,9 +36,16 _at__at_ > #include <sys/lock.h> > #include <sys/malloc.h> > #include <sys/mutex.h> > +#include <sys/sysctl.h> > #include <sys/taskqueue.h> > +#include <sys/time.h> > #include <sys/unistd.h> > > +int tq_in; > +SYSCTL_INT(_kern, OID_AUTO, tq_in, CTLFLAG_RD, &tq_in, 0, ""); > +int tq_out; > +SYSCTL_INT(_kern, OID_AUTO, tq_out, CTLFLAG_RD, &tq_out, 0, ""); > + > static MALLOC_DEFINE(M_TASKQUEUE, "taskqueue", "Task Queues"); > static void *taskqueue_giant_ih; > static void *taskqueue_ih; > _at__at_ -140,6 +147,9 _at__at_ > return 0; > } > > + getnanotime(&task->ta_queuetime); > + tq_in++; > + > /* > * Optimise the case when all tasks have the same priority. > */ > _at__at_ -172,6 +182,7 _at__at_ > taskqueue_run(struct taskqueue *queue) > { > struct task *task; > + struct timespec tv; > int owned, pending; > > owned = mtx_owned(&queue->tq_mutex); > _at__at_ -187,8 +198,16 _at__at_ > pending = task->ta_pending; > task->ta_pending = 0; > task->ta_flags |= TAF_PENDING; > + tq_out++; > mtx_unlock(&queue->tq_mutex); > > + getnanotime(&tv); > + timespecsub(&tv, &task->ta_queuetime); > + if (tv.tv_nsec >= 0500000000) { > + printf("taskqueue_run: warning, queue time of %d.%09ld " > + "for context %p\n", tv.tv_sec, tv.tv_nsec, > + task->ta_func); > + } > task->ta_func(task->ta_context, pending); > > mtx_lock(&queue->tq_mutex); > Index: sys/_task.h > =================================================================== > RCS file: /home/ncvs/src/sys/sys/_task.h,v > retrieving revision 1.3 > diff -u -r1.3 _task.h > --- sys/_task.h 5 Oct 2004 04:16:01 -0000 1.3 > +++ sys/_task.h 23 Oct 2004 19:38:16 -0000 > _at__at_ -46,6 +46,7 _at__at_ > task_fn_t *ta_func; /* task handler */ > void *ta_context; /* argument for handler */ > int ta_flags; /* Flags */ > + struct timespec ta_queuetime; /* time enqueued */ > }; > > #define TAF_PENDING 0x1 /* Task is being run now */ >Received on Wed Nov 10 2004 - 09:26:26 UTC
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:38:21 UTC