On Wed, Apr 02, 2014 at 09:45:43AM -0700, John-Mark Gurney wrote: > Konstantin Belousov wrote this message on Wed, Apr 02, 2014 at 15:07 +0300: > Well, it's not that its preventing waking up the waiter, but failing to > register the event on the knote because of the _INFLUX flag... Yes, I used the wrong terminology. > > > Patch below fixed your test case for me, also tools/regression/kqueue did > > not noticed a breakage. I tried to describe the situation in the > > comment in knote(). Also, I removed unlocked check for the KN_INFLUX > > in knote, since it seems to be an optimization for rare case, and is > > the race on its own. > > Comments below... > > > diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c > > index b3fb23d..380f1ff 100644 > > --- a/sys/kern/kern_event.c > > +++ b/sys/kern/kern_event.c > > [...] > > > _at__at_ -1506,7 +1506,7 _at__at_ retry: > > KQ_LOCK(kq); > > kn = NULL; > > } else { > > - kn->kn_status |= KN_INFLUX; > > + kn->kn_status |= KN_INFLUX | KN_SCAN; > > KQ_UNLOCK(kq); > > if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) > > KQ_GLOBAL_LOCK(&kq_global, haskqglobal); > > Is there a reason you don't add the KN_SCAN to the other cases in > kqueue_scan that set the _INFLUX flag? Other cases in kqueue_scan() which set influx do the detach and drop, so I do not see a need to ensure that note is registered. Except I missed one case, which you pointed out. > > [...] > > > _at__at_ -1865,28 +1866,33 _at__at_ knote(struct knlist *list, long hint, int lockflags) > > */ > > SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { > > kq = kn->kn_kq; > > - if ((kn->kn_status & KN_INFLUX) != KN_INFLUX) { > > + KQ_LOCK(kq); > > + if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { > > + /* > > + * Do not process the influx notes, except for > > + * the influx coming from the kq unlock in the > > + * kqueue_scan(). In the later case, we do > > + * not interfere with the scan, since the code > > + * fragment in kqueue_scan() locks the knlist, > > + * and cannot proceed until we finished. > > + */ > > We might want to add a marker node, and reprocess the list from the > marker node, because this might introduce other races in the code too... > but the problem with that is that knote is expected to keep the list > locked throughout the call if called w/ it already locked, and so we > can't do that, without major work... :( Why ? If the knlist lock is not dropped, I do not see a need for the marker. The patch does not introduce the sleep point for the KN_SCAN knotes anyway. > > I added a similar comment in knote_fork: > * XXX - Why do we skip the kn if it is _INFLUX? Does this > * mean we will not properly wake up some notes? > > and it looks like it was true... > > So, upon reading the other _INFLUX cases, it looks like we should change > _SCAN to be, _CHANGING or something similar, and any place we don't end > up dropping the knote, we set this flag also... Once such case is at > the end of kqueue_register, just before the label done_ev_add, where we > update the knote w/ new udata and other fields.. Or change the logic > of the flag, and set it for all the cases we are about to drop the > knote.. So do you prefer KN_CHANGING instead of KN_SCAN ? I do not have any objections against renaming the flag, but _CHANGING seems to not say anything about the flag intent. I would say that KN_STABLE is more useful, or KN_INFLUX_NODEL, or whatever. The done_ev_add case is indeed missed in my patch, thank you for noting. The case of EV_ADD does not need the KN_SCAN workaround, IMO, since the race is possible just by the nature of adding the knote. > > > + KQ_UNLOCK(kq); > > + } else if ((lockflags & KNF_NOKQLOCK) != 0) { > > + kn->kn_status |= KN_INFLUX; > > + KQ_UNLOCK(kq); > > + error = kn->kn_fop->f_event(kn, hint); > > KQ_LOCK(kq); > > I believe we can drop this unlock/lock pair as it's safe to hold the > KQ lock over f_event, we do that in knote_fork... The knote_fork() is for the special kinds of knote only, where we indeed know in advance that having the kqueue locked around f_event does not break things. Updated patch below. diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index b3fb23d..fadb8fd 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c _at__at_ -474,7 +474,7 _at__at_ knote_fork(struct knlist *list, int pid) continue; kq = kn->kn_kq; KQ_LOCK(kq); - if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { + if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { KQ_UNLOCK(kq); continue; } _at__at_ -1174,7 +1174,7 _at__at_ findkn: * but doing so will not reset any filter which has already been * triggered. */ - kn->kn_status |= KN_INFLUX; + kn->kn_status |= KN_INFLUX | KN_SCAN; KQ_UNLOCK(kq); KN_LIST_LOCK(kn); kn->kn_kevent.udata = kev->udata; _at__at_ -1197,7 +1197,7 _at__at_ done_ev_add: KQ_LOCK(kq); if (event) KNOTE_ACTIVATE(kn, 1); - kn->kn_status &= ~KN_INFLUX; + kn->kn_status &= ~(KN_INFLUX | KN_SCAN); KN_LIST_UNLOCK(kn); if ((kev->flags & EV_DISABLE) && _at__at_ -1506,7 +1506,7 _at__at_ retry: KQ_LOCK(kq); kn = NULL; } else { - kn->kn_status |= KN_INFLUX; + kn->kn_status |= KN_INFLUX | KN_SCAN; KQ_UNLOCK(kq); if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) KQ_GLOBAL_LOCK(&kq_global, haskqglobal); _at__at_ -1515,7 +1515,8 _at__at_ retry: KQ_LOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); kn->kn_status &= - ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX); + ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX | + KN_SCAN); kq->kq_count--; KN_LIST_UNLOCK(kn); influx = 1; _at__at_ -1545,7 +1546,7 _at__at_ retry: } else TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); - kn->kn_status &= ~(KN_INFLUX); + kn->kn_status &= ~(KN_INFLUX | KN_SCAN); KN_LIST_UNLOCK(kn); influx = 1; } _at__at_ -1865,28 +1866,33 _at__at_ knote(struct knlist *list, long hint, int lockflags) */ SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { kq = kn->kn_kq; - if ((kn->kn_status & KN_INFLUX) != KN_INFLUX) { + KQ_LOCK(kq); + if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { + /* + * Do not process the influx notes, except for + * the influx coming from the kq unlock in the + * kqueue_scan(). In the later case, we do + * not interfere with the scan, since the code + * fragment in kqueue_scan() locks the knlist, + * and cannot proceed until we finished. + */ + KQ_UNLOCK(kq); + } else if ((lockflags & KNF_NOKQLOCK) != 0) { + kn->kn_status |= KN_INFLUX; + KQ_UNLOCK(kq); + error = kn->kn_fop->f_event(kn, hint); KQ_LOCK(kq); - if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { - KQ_UNLOCK(kq); - } else if ((lockflags & KNF_NOKQLOCK) != 0) { - kn->kn_status |= KN_INFLUX; - KQ_UNLOCK(kq); - error = kn->kn_fop->f_event(kn, hint); - KQ_LOCK(kq); - kn->kn_status &= ~KN_INFLUX; - if (error) - KNOTE_ACTIVATE(kn, 1); - KQ_UNLOCK_FLUX(kq); - } else { - kn->kn_status |= KN_HASKQLOCK; - if (kn->kn_fop->f_event(kn, hint)) - KNOTE_ACTIVATE(kn, 1); - kn->kn_status &= ~KN_HASKQLOCK; - KQ_UNLOCK(kq); - } + kn->kn_status &= ~KN_INFLUX; + if (error) + KNOTE_ACTIVATE(kn, 1); + KQ_UNLOCK_FLUX(kq); + } else { + kn->kn_status |= KN_HASKQLOCK; + if (kn->kn_fop->f_event(kn, hint)) + KNOTE_ACTIVATE(kn, 1); + kn->kn_status &= ~KN_HASKQLOCK; + KQ_UNLOCK(kq); } - kq = NULL; } if ((lockflags & KNF_LISTLOCKED) == 0) list->kl_unlock(list->kl_lockarg); diff --git a/sys/sys/event.h b/sys/sys/event.h index bad8c9e..3b765c0 100644 --- a/sys/sys/event.h +++ b/sys/sys/event.h _at__at_ -207,6 +207,7 _at__at_ struct knote { #define KN_MARKER 0x20 /* ignore this knote */ #define KN_KQUEUE 0x40 /* this knote belongs to a kq */ #define KN_HASKQLOCK 0x80 /* for _inevent */ +#define KN_SCAN 0x100 /* flux set in kqueue_scan() */ int kn_sfflags; /* saved filter flags */ intptr_t kn_sdata; /* saved data field */ union {
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:48 UTC