With this latest patch, after stressing syslog-ng few minutes, it do not log anymore and a simple kill do not work (I have to do kill -9) Le dim. 18 nov. 2018 à 01:35, Mark Johnston <markj_at_freebsd.org> a écrit : > On Fri, Nov 16, 2018 at 05:56:54PM +0100, Sylvain GALLIANO wrote: > > Le ven. 16 nov. 2018 à 16:42, Mark Johnston <markj_at_freebsd.org> a écrit > : > > > > > On Fri, Nov 16, 2018 at 03:47:39PM +0100, Sylvain GALLIANO wrote: > > > > Le jeu. 15 nov. 2018 à 23:10, Mark Johnston <markj_at_freebsd.org> a > écrit > > > : > > > > > > > > > On Thu, Nov 08, 2018 at 05:05:03PM +0100, Sylvain GALLIANO wrote: > > > > > > Hi, > > > > > > > > > > > > I replaced > > > > > > << printf("XXX knote %p already in tailq status:%x kq_count:%d > [%p > > > %p] > > > > > > > > > > > > > > > %u\n",kn,kn->kn_status,kq->kq_count,kn->kn_tqe.tqe_next,kn->kn_tqe.tqe_prev,__LINE__); > > > > > > by > > > > > > >> panic("XXX knote %p already in tailq status:%x kq_count:%d > [%p > > > %p] > > > > > > > > > > > > > > > %u\n",kn,kn->kn_status,kq->kq_count,kn->kn_tqe.tqe_next,kn->kn_tqe.tqe_prev,__LINE__); > > > > > > > > > > > > Here is the stack during panic: > > > > > > panic: XXX knote 0xfffff801e1c6ddc0 already in tailq status:1 > > > kq_count:2 > > > > > > [0 0xfffff8000957a978] 2671 > > > > > > > > > > > Could you please give the following patch a try? > > > > > > > > > > If possible, could you also ktrace one of the active syslog-ng > > > processes > > > > > for some time, perhaps 15 seconds, and share the kdump? I have > been > > > > > trying to reproduce the problem without any luck. > > > > > > > > > Unfortunately patched kernel is not stable: > > > > - some processes run at 100% CPU (STOP state) and cannot be killed > > > > - sometime the system completely freeze (need a hard reboot) > > Could you please give this updated a patch a try? > > diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c > index d9c670e29d60..f989d612bac4 100644 > --- a/sys/kern/kern_event.c > +++ b/sys/kern/kern_event.c > _at__at_ -224,6 +224,7 _at__at_ SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, > #define KQ_LOCK(kq) do { \ > mtx_lock(&(kq)->kq_lock); \ > } while (0) > +#define KQ_LOCKPTR(kq) (&(kq)->kq_lock) > #define KQ_FLUX_WAKEUP(kq) do { > \ > if (((kq)->kq_state & KQ_FLUXWAIT) == KQ_FLUXWAIT) { \ > (kq)->kq_state &= ~KQ_FLUXWAIT; \ > _at__at_ -687,8 +688,11 _at__at_ filt_timerexpire(void *knx) > struct kq_timer_cb_data *kc; > > kn = knx; > + KQ_OWNED(kn->kn_kq); > kn->kn_data++; > - KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */ > + > + if (!kn_in_flux(kn) || (kn->kn_status & KN_SCAN) != 0) > + KNOTE_ACTIVATE(kn, 1); > > if ((kn->kn_flags & EV_ONESHOT) != 0) > return; > _at__at_ -753,7 +757,7 _at__at_ filt_timerattach(struct knote *kn) > kn->kn_flags |= EV_CLEAR; /* automatically set */ > kn->kn_status &= ~KN_DETACHED; /* knlist_add clears it */ > kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK); > - callout_init(&kc->c, 1); > + callout_init_mtx(&kc->c, KQ_LOCKPTR(kn->kn_kq), 0); > filt_timerstart(kn, to); > > return (0); > _at__at_ -772,8 +776,10 _at__at_ filt_timerstart(struct knote *kn, sbintime_t to) > kc->next = to + sbinuptime(); > kc->to = to; > } > + KQ_LOCK(kn->kn_kq); > callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn, > PCPU_GET(cpuid), C_ABSOLUTE); > + KQ_UNLOCK(kn->kn_kq); > } > > static void > _at__at_ -826,7 +832,6 _at__at_ filt_timertouch(struct knote *kn, struct kevent *kev, > u_long type) > KQ_LOCK(kq); > if (kn->kn_status & KN_QUEUED) > knote_dequeue(kn); > - > kn->kn_status &= ~KN_ACTIVE; > kn->kn_data = 0; > KQ_UNLOCK(kq); > _at__at_ -1843,12 +1848,13 _at__at_ kqueue_scan(struct kqueue *kq, int maxevents, > struct kevent_copyops *k_ops, > } > > TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe); > + marker->kn_status |= KN_QUEUED; > influx = 0; > while (count) { > KQ_OWNED(kq); > kn = TAILQ_FIRST(&kq->kq_head); > > - if ((kn->kn_status == KN_MARKER && kn != marker) || > + if (((kn->kn_status & KN_MARKER) != 0 && kn != marker) || > kn_in_flux(kn)) { > if (influx) { > influx = 0; > _at__at_ -1861,24 +1867,21 _at__at_ kqueue_scan(struct kqueue *kq, int maxevents, > struct kevent_copyops *k_ops, > } > > TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); > - if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) { > - kn->kn_status &= ~KN_QUEUED; > - kq->kq_count--; > + kn->kn_status &= ~KN_QUEUED; > + if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) > continue; > - } > if (kn == marker) { > KQ_FLUX_WAKEUP(kq); > if (count == maxevents) > goto retry; > goto done; > } > + kq->kq_count--; > KASSERT(!kn_in_flux(kn), > ("knote %p is unexpectedly in flux", kn)); > > if ((kn->kn_flags & EV_DROP) == EV_DROP) { > - kn->kn_status &= ~KN_QUEUED; > kn_enter_flux(kn); > - kq->kq_count--; > KQ_UNLOCK(kq); > /* > * We don't need to lock the list since we've > _at__at_ -1888,9 +1891,7 _at__at_ kqueue_scan(struct kqueue *kq, int maxevents, struct > kevent_copyops *k_ops, > KQ_LOCK(kq); > continue; > } else if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) { > - kn->kn_status &= ~KN_QUEUED; > kn_enter_flux(kn); > - kq->kq_count--; > KQ_UNLOCK(kq); > /* > * We don't need to lock the list since we've > _at__at_ -1910,10 +1911,9 _at__at_ kqueue_scan(struct kqueue *kq, int maxevents, > struct kevent_copyops *k_ops, > if (kn->kn_fop->f_event(kn, 0) == 0) { > KQ_LOCK(kq); > KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); > - kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE | > - KN_SCAN); > + /* XXXMJ races with EVFILT_TIMER > activation. */ > + kn->kn_status &= ~(KN_ACTIVE | KN_SCAN); > kn_leave_flux(kn); > - kq->kq_count--; > kn_list_unlock(knl); > influx = 1; > continue; > _at__at_ -1937,11 +1937,10 _at__at_ kqueue_scan(struct kqueue *kq, int maxevents, > struct kevent_copyops *k_ops, > } > if (kn->kn_flags & EV_DISPATCH) > kn->kn_status |= KN_DISABLED; > - kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); > - kq->kq_count--; > - } else > - TAILQ_INSERT_TAIL(&kq->kq_head, kn, > kn_tqe); > - > + kn->kn_status &= ~KN_ACTIVE; > + } else if ((kn->kn_status & KN_QUEUED) == 0) > + knote_enqueue(kn); > + > kn->kn_status &= ~KN_SCAN; > kn_leave_flux(kn); > kn_list_unlock(knl); >Received on Mon Nov 19 2018 - 08:27:07 UTC
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:19 UTC