On Thu, Nov 08, 2018 at 05:05:03PM +0100, Sylvain GALLIANO wrote: > Hi, > > I replaced > << printf("XXX knote %p already in tailq status:%x kq_count:%d [%p %p] > %u\n",kn,kn->kn_status,kq->kq_count,kn->kn_tqe.tqe_next,kn->kn_tqe.tqe_prev,__LINE__); > by > >> panic("XXX knote %p already in tailq status:%x kq_count:%d [%p %p] > %u\n",kn,kn->kn_status,kq->kq_count,kn->kn_tqe.tqe_next,kn->kn_tqe.tqe_prev,__LINE__); > > Here is the stack during panic: > panic: XXX knote 0xfffff801e1c6ddc0 already in tailq status:1 kq_count:2 > [0 0xfffff8000957a978] 2671 > > cpuid = 0 > time = 1541688832 > KDB: stack backtrace: > db_trace_self_wrapper() at db_trace_self_wrapper+0x2c/frame > 0xfffffe0412258fd0 > kdb_backtrace() at kdb_backtrace+0x53/frame 0xfffffe04122590a0 > vpanic() at vpanic+0x277/frame 0xfffffe0412259170 > doadump() at doadump/frame 0xfffffe04122591d0 > knote_enqueue() at knote_enqueue+0xf2/frame 0xfffffe0412259210 > kqueue_register() at kqueue_register+0xaed/frame 0xfffffe0412259340 > kqueue_kevent() at kqueue_kevent+0x13c/frame 0xfffffe04122595b0 > kern_kevent_fp() at kern_kevent_fp+0x66/frame 0xfffffe0412259610 > kern_kevent() at kern_kevent+0x17f/frame 0xfffffe0412259700 > kern_kevent_generic() at kern_kevent_generic+0xfe/frame 0xfffffe0412259780 > sys_kevent() at sys_kevent+0xaa/frame 0xfffffe0412259810 > syscallenter() at syscallenter+0x4e3/frame 0xfffffe04122598f0 > amd64_syscall() at amd64_syscall+0x1b/frame 0xfffffe04122599b0 > fast_syscall_common() at fast_syscall_common+0x101/frame 0xfffffe04122599b0 > --- syscall (560, FreeBSD ELF64, sys_kevent), rip = 0x406e3bfa, rsp = > 0x7fffdf7e9db8, rbp = 0x7fffdf7e9e00 --- > KDB: enter: panic > > > you can get kernel.debug + vmcore at: > https://drive.google.com/drive/folders/1MbqJQm12-KOYDbb4-9uNRTnAdsNqLaIP?usp=sharing Could you please give the following patch a try? If possible, could you also ktrace one of the active syslog-ng processes for some time, perhaps 15 seconds, and share the kdump? I have been trying to reproduce the problem without any luck. diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index d9c670e29d60..9ef7c53361bf 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c _at__at_ -224,6 +224,7 _at__at_ SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, #define KQ_LOCK(kq) do { \ mtx_lock(&(kq)->kq_lock); \ } while (0) +#define KQ_LOCKPTR(kq) (&(kq)->kq_lock) #define KQ_FLUX_WAKEUP(kq) do { \ if (((kq)->kq_state & KQ_FLUXWAIT) == KQ_FLUXWAIT) { \ (kq)->kq_state &= ~KQ_FLUXWAIT; \ _at__at_ -687,8 +688,11 _at__at_ filt_timerexpire(void *knx) struct kq_timer_cb_data *kc; kn = knx; + KQ_OWNED(kn->kn_kq); kn->kn_data++; - KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */ + + if (!kn_in_flux(kn) || (kn->kn_status & KN_SCAN) != 0) + KNOTE_ACTIVATE(kn, 1); if ((kn->kn_flags & EV_ONESHOT) != 0) return; _at__at_ -753,7 +757,7 _at__at_ filt_timerattach(struct knote *kn) kn->kn_flags |= EV_CLEAR; /* automatically set */ kn->kn_status &= ~KN_DETACHED; /* knlist_add clears it */ kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK); - callout_init(&kc->c, 1); + callout_init_mtx(&kc->c, KQ_LOCKPTR(kn->kn_kq), 0); filt_timerstart(kn, to); return (0); _at__at_ -772,8 +776,10 _at__at_ filt_timerstart(struct knote *kn, sbintime_t to) kc->next = to + sbinuptime(); kc->to = to; } + KQ_LOCK(kn->kn_kq); callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE); + KQ_UNLOCK(kn->kn_kq); } static void _at__at_ -826,7 +832,6 _at__at_ filt_timertouch(struct knote *kn, struct kevent *kev, u_long type) KQ_LOCK(kq); if (kn->kn_status & KN_QUEUED) knote_dequeue(kn); - kn->kn_status &= ~KN_ACTIVE; kn->kn_data = 0; KQ_UNLOCK(kq); _at__at_ -1843,6 +1848,7 _at__at_ kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, } TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe); + marker->kn_status |= KN_QUEUED; influx = 0; while (count) { KQ_OWNED(kq); _at__at_ -1861,8 +1867,10 _at__at_ kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, } TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); + KASSERT((kn->kn_status & KN_QUEUED) != 0, + ("knote %p not queued", kn)); + kn->kn_status &= ~KN_QUEUED; if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) { - kn->kn_status &= ~KN_QUEUED; kq->kq_count--; continue; } _at__at_ -1876,7 +1884,6 _at__at_ kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, ("knote %p is unexpectedly in flux", kn)); if ((kn->kn_flags & EV_DROP) == EV_DROP) { - kn->kn_status &= ~KN_QUEUED; kn_enter_flux(kn); kq->kq_count--; KQ_UNLOCK(kq); _at__at_ -1888,7 +1895,6 _at__at_ kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, KQ_LOCK(kq); continue; } else if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) { - kn->kn_status &= ~KN_QUEUED; kn_enter_flux(kn); kq->kq_count--; KQ_UNLOCK(kq); _at__at_ -1910,8 +1916,8 _at__at_ kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, if (kn->kn_fop->f_event(kn, 0) == 0) { KQ_LOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); - kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE | - KN_SCAN); + /* XXXMJ races with EVFILT_TIMER activation. */ + kn->kn_status &= ~(KN_ACTIVE | KN_SCAN); kn_leave_flux(kn); kq->kq_count--; kn_list_unlock(knl); _at__at_ -1937,11 +1943,13 _at__at_ kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, } if (kn->kn_flags & EV_DISPATCH) kn->kn_status |= KN_DISABLED; - kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); + kn->kn_status &= ~KN_ACTIVE; kq->kq_count--; - } else + } else if ((kn->kn_status & KN_QUEUED) == 0) { + kn->kn_status |= KN_QUEUED; TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); - + } + kn->kn_status &= ~KN_SCAN; kn_leave_flux(kn); kn_list_unlock(knl);Received on Thu Nov 15 2018 - 21:10:26 UTC
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:19 UTC