Re: proposed smp_rendezvous change

From: John Baldwin <jhb_at_freebsd.org> Date: Tue, 17 May 2011 16:35:17 -0400 · This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:14 UTC

On Tuesday, May 17, 2011 3:16:45 pm Max Laier wrote:
> On 05/17/2011 09:56 AM, John Baldwin wrote:
> > On Tuesday, May 17, 2011 12:20:40 pm Max Laier wrote:
> >> On 05/17/2011 05:16 AM, John Baldwin wrote:
> >> ...
> >>> Index: kern/kern_switch.c
> >>> ===================================================================
> >>> --- kern/kern_switch.c (revision 221536)
> >>> +++ kern/kern_switch.c (working copy)
> >>> _at__at_ -192,15 +192,22 _at__at_
> >>> critical_exit(void)
> >>> {
> >>> struct thread *td;
> >>> - int flags;
> >>> + int flags, owepreempt;
> >>>
> >>> td = curthread;
> >>> KASSERT(td->td_critnest != 0,
> >>> ("critical_exit: td_critnest == 0"));
> >>>
> >>> if (td->td_critnest == 1) {
> >>> + owepreempt = td->td_owepreempt;
> >>> + td->td_owepreempt = 0;
> >>> + /*
> >>> + * XXX: Should move compiler_memory_barrier() from
> >>> + * rmlock to a header.
> >>> + */
> >>
> >> XXX: If we get an interrupt at this point and td_owepreempt was zero,
> >> the new interrupt will re-set it, because td_critnest is still non-zero.
> >>
> >> So we still end up with a thread that is leaking an owepreempt *and*
> >> lose a preemption.
> >
> > I don't see how this can still leak owepreempt.  The nested interrupt 
should
> > do nothing (except for possibly set owepreempt) until td_critnest is 0.
> 
> Exactly.  The interrupt sets owepreempt and after we return here, we set 
> td_critnest to 0 and exit without clearing owepreempt.  Hence we leak 
> the owepreempt.
> 
> > However, we can certainly lose preemptions.
> >
> > I wonder if we can abuse the high bit of td_critnest for the owepreempt 
flag
> > so it is all stored in one cookie.  We only set owepreempt while holding
> > thread_lock() (so interrupts are disabled), so I think we would be ok and 
not
> > need atomic ops.
> >
> > Hmm, actually, the top-half code would have to use atomic ops.  Nuts.  Let 
me
> > think some more.
> 
> I think these two really belong into one single variable.  Setting the 
> owepreempt flag can be a normal RMW.  Increasing and decreasing critnest 
> must be atomic (otherwise we could lose the flag) and dropping the final 
> reference would work like this:
> 
>    if ((curthread->td_critnest & TD_CRITNEST_MASK) == 1) {
>      unsigned int owe;
>      owe = atomic_readandclear(&curthread->td_critnest);
>      if (owe & TD_OWEPREEMPT_FLAG) {
>        /* do the switch */
>    }
> 
> That should do it ... I can put that into a patch, if we agree that's 
> the right thing to do.

Yeah, I already have a patch to do that, but hadn't added atomic ops to 
critical_enter() and critical_exit().  But it also wasn't as fancy in the 
critical_exit() case.  Here is what I have and I think it might actually
be ok (it doesn't use an atomic read and clear, but I think it is safe).
Hmm, actually, it will need to use the read and clear:

Index: kern/sched_ule.c
===================================================================
--- kern/sched_ule.c	(revision 222024)
+++ kern/sched_ule.c	(working copy)
_at__at_ -1785,7 +1785,6 _at__at_
 	td->td_oncpu = NOCPU;
 	if (!(flags & SW_PREEMPT))
 		td->td_flags &= ~TDF_NEEDRESCHED;
-	td->td_owepreempt = 0;
 	tdq->tdq_switchcnt++;
 	/*
 	 * The lock pointer in an idle thread should never change.  Reset it
_at__at_ -2066,7 +2065,7 _at__at_

 		flags = SW_INVOL | SW_PREEMPT;
 		if (td->td_critnest > 1)
-			td->td_owepreempt = 1;
+			td->td_critnest |= TD_OWEPREEMPT;
 		else if (TD_IS_IDLETHREAD(td))
 			mi_switch(flags | SWT_REMOTEWAKEIDLE, NULL);
 		else
_at__at_ -2261,7 +2260,7 _at__at_
 		return;
 	if (!sched_shouldpreempt(pri, cpri, 0))
 		return;
-	ctd->td_owepreempt = 1;
+	ctd->td_critnest |= TD_OWEPREEMPT;
 }

 /*
Index: kern/kern_switch.c
===================================================================
--- kern/kern_switch.c	(revision 222024)
+++ kern/kern_switch.c	(working copy)
_at__at_ -183,7 +183,7 _at__at_
 	struct thread *td;

 	td = curthread;
-	td->td_critnest++;
+	atomic_add_int(&td->td_critnest, 1);
 	CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
 	    (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
 }
_at__at_ -192,18 +192,16 _at__at_
 critical_exit(void)
 {
 	struct thread *td;
-	int flags;
+	int flags, owepreempt;

 	td = curthread;
 	KASSERT(td->td_critnest != 0,
 	    ("critical_exit: td_critnest == 0"));

-	if (td->td_critnest == 1) {
-		td->td_critnest = 0;
-		if (td->td_owepreempt) {
-			td->td_critnest = 1;
+	if (td->td_critnest && ~TD_OWEPREEMPT == 1) {
+		owepreempt = atomic_readandclear_int(&td->td_owepreempt);
+		if (owepreempt & TD_OWEPREEMPT) {
 			thread_lock(td);
-			td->td_critnest--;
 			flags = SW_INVOL | SW_PREEMPT;
 			if (TD_IS_IDLETHREAD(td))
 				flags |= SWT_IDLE;
_at__at_ -213,7 +211,7 _at__at_
 			thread_unlock(td);
 		}
 	} else
-		td->td_critnest--;
+		atomic_subtract_int(&td->td_critnest, 1);

 	CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
 	    (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
Index: kern/kern_synch.c
===================================================================
--- kern/kern_synch.c	(revision 222024)
+++ kern/kern_synch.c	(working copy)
_at__at_ -400,9 +400,7 _at__at_
 	if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td))
 		mtx_assert(&Giant, MA_NOTOWNED);
 #endif
-	KASSERT(td->td_critnest == 1 || (td->td_critnest == 2 &&
-	    (td->td_owepreempt) && (flags & SW_INVOL) != 0 &&
-	    newtd == NULL) || panicstr,
+	KASSERT(td->td_critnest == 1 || panicstr,
 	    ("mi_switch: switch in a critical section"));
 	KASSERT((flags & (SW_INVOL | SW_VOL)) != 0,
 	    ("mi_switch: switch must be voluntary or involuntary"));
Index: kern/sched_4bsd.c
===================================================================
--- kern/sched_4bsd.c	(revision 222024)
+++ kern/sched_4bsd.c	(working copy)
_at__at_ -335,7 +335,7 _at__at_
 	if (ctd->td_critnest > 1) {
 		CTR1(KTR_PROC, "maybe_preempt: in critical section %d",
 		    ctd->td_critnest);
-		ctd->td_owepreempt = 1;
+		ctd->td_critnest |= TD_OWEPREEMPT;
 		return (0);
 	}
 	/*
_at__at_ -949,7 +949,6 _at__at_
 	td->td_lastcpu = td->td_oncpu;
 	if (!(flags & SW_PREEMPT))
 		td->td_flags &= ~TDF_NEEDRESCHED;
-	td->td_owepreempt = 0;
 	td->td_oncpu = NOCPU;

 	/*
_at__at_ -1437,7 +1436,7 _at__at_
 {
 	thread_lock(td);
 	if (td->td_critnest > 1)
-		td->td_owepreempt = 1;
+		td->td_critnest |= TD_OWEPREEMPT;
 	else
 		mi_switch(SW_INVOL | SW_PREEMPT | SWT_PREEMPT, NULL);
 	thread_unlock(td);
Index: kern/kern_rmlock.c
===================================================================
--- kern/kern_rmlock.c	(revision 222024)
+++ kern/kern_rmlock.c	(working copy)
_at__at_ -366,7 +366,8 _at__at_
 	 * Fast path to combine two common conditions into a single
 	 * conditional jump.
 	 */
-	if (0 == (td->td_owepreempt | (rm->rm_writecpus & pc->pc_cpumask)))
+	if (0 == ((td->td_critnest & TD_OWEPREEMPT) |
+	    (rm->rm_writecpus & pc->pc_cpumask)))
 		return (1);

 	/* We do not have a read token and need to acquire one. */
_at__at_ -377,7 +378,7 _at__at_
 _rm_unlock_hard(struct thread *td,struct rm_priotracker *tracker)
 {

-	if (td->td_owepreempt) {
+	if (td->td_critnest & TD_OWEPREEMPT) {
 		td->td_critnest++;
 		critical_exit();
 	}
_at__at_ -418,7 +419,7 _at__at_
 	td->td_critnest--;
 	sched_unpin();

-	if (0 == (td->td_owepreempt | tracker->rmp_flags))
+	if (0 == ((td->td_critnest & TD_OWEPREEMPT) | tracker->rmp_flags))
 		return;

 	_rm_unlock_hard(td, tracker);
Index: sys/proc.h
===================================================================
--- sys/proc.h	(revision 222024)
+++ sys/proc.h	(working copy)
_at__at_ -228,7 +228,6 _at__at_
 	const char	*td_wmesg;	/* (t) Reason for sleep. */
 	u_char		td_lastcpu;	/* (t) Last cpu we were on. */
 	u_char		td_oncpu;	/* (t) Which cpu we are on. */
-	volatile u_char td_owepreempt;  /* (k*) Preempt on last critical_exit */
 	u_char		td_tsqueue;	/* (t) Turnstile queue blocked on. */
 	short		td_locks;	/* (k) Count of non-spin locks. */
 	short		td_rw_rlocks;	/* (k) Count of rwlock read locks. */
_at__at_ -465,6 +464,12 _at__at_
 #define	TD_SET_CAN_RUN(td)	(td)->td_state = TDS_CAN_RUN

 /*
+ * The high bit of td_critnest is used to determine if a preemption is
+ * pending.
+ */
+#define	TD_OWEPREEMPT		((u_int)INT_MAX + 1)
+
+/*
  * Process structure.
  */
 struct proc {

-- 
John Baldwin