Index: sys/kern/kern_timeout.c =================================================================== --- sys/kern/kern_timeout.c (revision 276240) +++ sys/kern/kern_timeout.c (working copy) @@ -125,36 +125,56 @@ u_int callwheelsize, callwheelmask; /* - * The callout cpu exec entities represent informations necessary for - * describing the state of callouts currently running on the CPU and the ones - * necessary for migrating callouts to the new callout cpu. In particular, - * the first entry of the array cc_exec_entity holds informations for callout - * running in SWI thread context, while the second one holds informations - * for callout running directly from hardware interrupt context. - * The cached informations are very important for deferring migration when - * the migrating callout is already running. + * The callout CPU exec structure represent information necessary for + * describing the state of callouts currently running on the CPU and + * for handling deferred callout restarts. + * + * In particular, the first entry of the array cc_exec_entity holds + * information for callouts running from the SWI thread context, while + * the second one holds information for callouts running directly from + * the hardware interrupt context. */ struct cc_exec { - struct callout *cc_next; + /* + * The "cc_curr" points to the currently executing callout and + * is write protected by the "cc_lock" spinlock. If no + * callback is currently executing it is equal to "NULL". + */ struct callout *cc_curr; -#ifdef SMP - void (*ce_migration_func)(void *); - void *ce_migration_arg; - int ce_migration_cpu; - sbintime_t ce_migration_time; - sbintime_t ce_migration_prec; -#endif - bool cc_cancel; - bool cc_waiting; + /* + * The "cc_restart_args" structure holds the argument for a + * deferred callback restart and is write protected by the + * "cc_lock" spinlock. The structure is only valid if + * "cc_restart" is "true". If "cc_restart" is "false" the + * information in the "cc_restart_args" structure shall be + * ignored. + */ + struct callout_args cc_restart_args; + bool cc_restart; + /* + * The "cc_cancel" variable allows the currently pending + * callback to be atomically cancelled. This field is write + * protected by the "cc_lock" spinlock. + */ + bool cc_cancel; + /* + * The "cc_drain_fn" points to a function which shall be + * called with the argument stored in "cc_drain_arg" when an + * asynchronous drain is performed. This field is write + * protected by the "cc_lock" spinlock. + */ + callout_func_t *cc_drain_fn; + void *cc_drain_arg; }; /* - * There is one struct callout_cpu per cpu, holding all relevant + * There is one "struct callout_cpu" per CPU, holding all relevant * state for the callout processing thread on the individual CPU. */ struct callout_cpu { struct mtx_padalign cc_lock; struct cc_exec cc_exec_entity[2]; + struct callout *cc_exec_next_dir; struct callout *cc_callout; struct callout_list *cc_callwheel; struct callout_tailq cc_expireq; @@ -166,27 +186,23 @@ char cc_ktr_event_name[20]; }; -#define cc_exec_curr cc_exec_entity[0].cc_curr -#define cc_exec_next cc_exec_entity[0].cc_next -#define cc_exec_cancel cc_exec_entity[0].cc_cancel -#define cc_exec_waiting cc_exec_entity[0].cc_waiting -#define cc_exec_curr_dir cc_exec_entity[1].cc_curr -#define cc_exec_next_dir cc_exec_entity[1].cc_next -#define cc_exec_cancel_dir cc_exec_entity[1].cc_cancel -#define cc_exec_waiting_dir cc_exec_entity[1].cc_waiting +/* + * When "CALLOUT_RDONLY_C_CPU(c)" returns "true" it indicates the + * given callback doesn't have any locks associated with it or are + * using the Giant lock or a shared kind of lock. For these kind of + * callbacks the "c_cpu" field must remain constant, hence there is no + * lock to protect updates to this field. + * + * When "CALLOUT_RDONLY_C_CPU(c)" returns "false" it indicates that + * "c->c_lock" is protecting any changes to "c->c_cpu", for the + * specified callback. + */ +#define CALLOUT_RDONLY_C_CPU(c) \ + ((c)->c_lock == NULL || (c)->c_lock == &Giant.lock_object || \ + ((c)->c_flags & (CALLOUT_LOCAL_ALLOC | CALLOUT_RETURNUNLOCKED | \ + CALLOUT_SHAREDLOCK)) != 0) #ifdef SMP -#define cc_migration_func cc_exec_entity[0].ce_migration_func -#define cc_migration_arg cc_exec_entity[0].ce_migration_arg -#define cc_migration_cpu cc_exec_entity[0].ce_migration_cpu -#define cc_migration_time cc_exec_entity[0].ce_migration_time -#define cc_migration_prec cc_exec_entity[0].ce_migration_prec -#define cc_migration_func_dir cc_exec_entity[1].ce_migration_func -#define cc_migration_arg_dir cc_exec_entity[1].ce_migration_arg -#define cc_migration_cpu_dir cc_exec_entity[1].ce_migration_cpu -#define cc_migration_time_dir cc_exec_entity[1].ce_migration_time -#define cc_migration_prec_dir cc_exec_entity[1].ce_migration_prec - struct callout_cpu cc_cpu[MAXCPU]; #define CPUBLOCK MAXCPU #define CC_CPU(cpu) (&cc_cpu[(cpu)]) @@ -211,62 +227,11 @@ static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); -/** - * Locked by cc_lock: - * cc_curr - If a callout is in progress, it is cc_curr. - * If cc_curr is non-NULL, threads waiting in - * callout_drain() will be woken up as soon as the - * relevant callout completes. - * cc_cancel - Changing to 1 with both callout_lock and cc_lock held - * guarantees that the current callout will not run. - * The softclock() function sets this to 0 before it - * drops callout_lock to acquire c_lock, and it calls - * the handler only if curr_cancelled is still 0 after - * cc_lock is successfully acquired. - * cc_waiting - If a thread is waiting in callout_drain(), then - * callout_wait is nonzero. Set only when - * cc_curr is non-NULL. - */ - /* - * Resets the execution entity tied to a specific callout cpu. + * Kernel low level callwheel initialization called from cpu0 during + * kernel startup: */ static void -cc_cce_cleanup(struct callout_cpu *cc, int direct) -{ - - cc->cc_exec_entity[direct].cc_curr = NULL; - cc->cc_exec_entity[direct].cc_next = NULL; - cc->cc_exec_entity[direct].cc_cancel = false; - cc->cc_exec_entity[direct].cc_waiting = false; -#ifdef SMP - cc->cc_exec_entity[direct].ce_migration_cpu = CPUBLOCK; - cc->cc_exec_entity[direct].ce_migration_time = 0; - cc->cc_exec_entity[direct].ce_migration_prec = 0; - cc->cc_exec_entity[direct].ce_migration_func = NULL; - cc->cc_exec_entity[direct].ce_migration_arg = NULL; -#endif -} - -/* - * Checks if migration is requested by a specific callout cpu. - */ -static int -cc_cce_migrating(struct callout_cpu *cc, int direct) -{ - -#ifdef SMP - return (cc->cc_exec_entity[direct].ce_migration_cpu != CPUBLOCK); -#else - return (0); -#endif -} - -/* - * Kernel low level callwheel initialization - * called on cpu0 during kernel startup. - */ -static void callout_callwheel_init(void *dummy) { struct callout_cpu *cc; @@ -324,8 +289,6 @@ LIST_INIT(&cc->cc_callwheel[i]); TAILQ_INIT(&cc->cc_expireq); cc->cc_firstevent = SBT_MAX; - for (i = 0; i < 2; i++) - cc_cce_cleanup(cc, i); snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name), "callwheel cpu %d", cpu); if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */ @@ -338,37 +301,7 @@ } } -#ifdef SMP /* - * Switches the cpu tied to a specific callout. - * The function expects a locked incoming callout cpu and returns with - * locked outcoming callout cpu. - */ -static struct callout_cpu * -callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) -{ - struct callout_cpu *new_cc; - - MPASS(c != NULL && cc != NULL); - CC_LOCK_ASSERT(cc); - - /* - * Avoid interrupts and preemption firing after the callout cpu - * is blocked in order to avoid deadlocks as the new thread - * may be willing to acquire the callout cpu lock. - */ - c->c_cpu = CPUBLOCK; - spinlock_enter(); - CC_UNLOCK(cc); - new_cc = CC_CPU(new_cpu); - CC_LOCK(new_cc); - spinlock_exit(); - c->c_cpu = new_cpu; - return (new_cc); -} -#endif - -/* * Start standard softclock thread. */ static void @@ -562,49 +495,54 @@ callout_lock(struct callout *c) { struct callout_cpu *cc; - int cpu; - - for (;;) { - cpu = c->c_cpu; -#ifdef SMP - if (cpu == CPUBLOCK) { - while (c->c_cpu == CPUBLOCK) - cpu_spinwait(); - continue; - } -#endif - cc = CC_CPU(cpu); - CC_LOCK(cc); - if (cpu == c->c_cpu) - break; - CC_UNLOCK(cc); - } + cc = CC_CPU(c->c_cpu); + CC_LOCK(cc); return (cc); } -static void -callout_cc_add(struct callout *c, struct callout_cpu *cc, - sbintime_t sbt, sbintime_t precision, void (*func)(void *), - void *arg, int cpu, int flags) +static struct callout_cpu * +callout_cc_add_locked(struct callout *c, struct callout_cpu *cc, + struct callout_args *coa) { +#ifndef NO_EVENTTIMERS + sbintime_t sbt; +#endif int bucket; CC_LOCK_ASSERT(cc); - if (sbt < cc->cc_lastscan) - sbt = cc->cc_lastscan; - c->c_arg = arg; - c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); - if (flags & C_DIRECT_EXEC) - c->c_flags |= CALLOUT_DIRECT; - c->c_flags &= ~CALLOUT_PROCESSED; - c->c_func = func; - c->c_time = sbt; - c->c_precision = precision; + + /* update flags before swapping locks, if any */ + c->c_flags &= ~(CALLOUT_PROCESSED | CALLOUT_DIRECT | CALLOUT_DEFRESTART); + if (coa->flags & C_DIRECT_EXEC) + c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING | CALLOUT_DIRECT); + else + c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); + +#ifdef SMP + /* + * Check if we are changing the CPU on which the callback + * should be executed: + */ + if (coa->cpu != c->c_cpu) { + CC_UNLOCK(cc); + c->c_cpu = coa->cpu; + cc = callout_lock(c); + } +#endif + if (coa->time < cc->cc_lastscan) + coa->time = cc->cc_lastscan; + c->c_arg = coa->arg; + c->c_func = coa->func; + c->c_time = coa->time; + c->c_precision = coa->precision; + bucket = callout_get_bucket(c->c_time); CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", c, (int)(c->c_precision >> 32), (u_int)(c->c_precision & 0xffffffff)); LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); + + /* Ensure we are first to be scanned, if called via a callback */ if (cc->cc_bucket == bucket) cc->cc_exec_next_dir = c; #ifndef NO_EVENTTIMERS @@ -617,9 +555,10 @@ sbt = c->c_time + c->c_precision; if (sbt < cc->cc_firstevent) { cc->cc_firstevent = sbt; - cpu_new_callout(cpu, sbt, c->c_time); + cpu_new_callout(coa->cpu, sbt, c->c_time); } #endif + return (cc); } static void @@ -626,8 +565,6 @@ callout_cc_del(struct callout *c, struct callout_cpu *cc) { - if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0) - return; c->c_func = NULL; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } @@ -640,19 +577,13 @@ int direct) { struct rm_priotracker tracker; - void (*c_func)(void *); + struct callout_cpu *cc_orig = cc; + callout_func_t *c_func; void *c_arg; struct lock_class *class; struct lock_object *c_lock; uintptr_t lock_status; int c_flags; -#ifdef SMP - struct callout_cpu *new_cc; - void (*new_func)(void *); - void *new_arg; - int flags, new_cpu; - sbintime_t new_prec, new_time; -#endif #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbintime_t sbt1, sbt2; struct timespec ts2; @@ -675,25 +606,36 @@ c_func = c->c_func; c_arg = c->c_arg; c_flags = c->c_flags; - if (c->c_flags & CALLOUT_LOCAL_ALLOC) + if (c_flags & CALLOUT_LOCAL_ALLOC) c->c_flags = CALLOUT_LOCAL_ALLOC; else c->c_flags &= ~CALLOUT_PENDING; + + /* reset our local state */ cc->cc_exec_entity[direct].cc_curr = c; cc->cc_exec_entity[direct].cc_cancel = false; + cc->cc_exec_entity[direct].cc_restart = false; + cc->cc_exec_entity[direct].cc_drain_fn = NULL; + cc->cc_exec_entity[direct].cc_drain_arg = NULL; CC_UNLOCK(cc); + + /* unlocked region for switching locks, if any */ + if (c_lock != NULL) { class->lc_lock(c_lock, lock_status); /* - * The callout may have been cancelled - * while we switched locks. + * Check if the callout may have been cancelled while + * we were switching locks: */ if (cc->cc_exec_entity[direct].cc_cancel) { - class->lc_unlock(c_lock); - goto skip; + if ((c_flags & CALLOUT_RETURNUNLOCKED) != 0) + class->lc_unlock(c_lock); + goto skip_callback; } - /* The callout cannot be stopped now. */ + + /* The callout cannot be stopped now! */ cc->cc_exec_entity[direct].cc_cancel = true; + if (c_lock == &Giant.lock_object) { #ifdef CALLOUT_PROFILING (*gcalls)++; @@ -740,85 +682,49 @@ #endif KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle"); CTR1(KTR_CALLOUT, "callout %p finished", c); - if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0) - class->lc_unlock(c_lock); -skip: + +skip_callback: CC_LOCK(cc); KASSERT(cc->cc_exec_entity[direct].cc_curr == c, ("mishandled cc_curr")); cc->cc_exec_entity[direct].cc_curr = NULL; - if (cc->cc_exec_entity[direct].cc_waiting) { - /* - * There is someone waiting for the - * callout to complete. - * If the callout was scheduled for - * migration just cancel it. - */ - if (cc_cce_migrating(cc, direct)) { - cc_cce_cleanup(cc, direct); - /* - * It should be assert here that the callout is not - * destroyed but that is not easy. - */ - c->c_flags &= ~CALLOUT_DFRMIGRATION; - } - cc->cc_exec_entity[direct].cc_waiting = false; - CC_UNLOCK(cc); - wakeup(&cc->cc_exec_entity[direct].cc_waiting); - CC_LOCK(cc); - } else if (cc_cce_migrating(cc, direct)) { - KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0, - ("Migrating legacy callout %p", c)); -#ifdef SMP - /* - * If the callout was scheduled for - * migration just perform it now. - */ - new_cpu = cc->cc_exec_entity[direct].ce_migration_cpu; - new_time = cc->cc_exec_entity[direct].ce_migration_time; - new_prec = cc->cc_exec_entity[direct].ce_migration_prec; - new_func = cc->cc_exec_entity[direct].ce_migration_func; - new_arg = cc->cc_exec_entity[direct].ce_migration_arg; - cc_cce_cleanup(cc, direct); + /* + * At this point the callback structure might have been freed, + * so we need to check the copied "c->c_flags" to figure out + * if the callback is preallocated or not! + */ + if (c_flags & CALLOUT_LOCAL_ALLOC) { + /* return callout back to freelist */ + callout_cc_del(c, cc); + } else if (cc->cc_exec_entity[direct].cc_restart) { + /* [re-]schedule callout, if any */ + cc = callout_cc_add_locked(c, cc, + &cc->cc_exec_entity[direct].cc_restart_args); + } - /* - * It should be assert here that the callout is not destroyed - * but that is not easy. - * - * As first thing, handle deferred callout stops. - */ - if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) { - CTR3(KTR_CALLOUT, - "deferred cancelled %p func %p arg %p", - c, new_func, new_arg); - callout_cc_del(c, cc); - return; - } - c->c_flags &= ~CALLOUT_DFRMIGRATION; + /* + * Unlock the callback lock at the end, if any, so that the + * "c_cpu" field in the callback structure gets write + * protected by this lock. + */ + if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0) + class->lc_unlock(c_lock); - new_cc = callout_cpu_switch(c, cc, new_cpu); - flags = (direct) ? C_DIRECT_EXEC : 0; - callout_cc_add(c, new_cc, new_time, new_prec, new_func, - new_arg, new_cpu, flags); - CC_UNLOCK(new_cc); - CC_LOCK(cc); -#else - panic("migration should not happen"); -#endif - } /* - * If the current callout is locally allocated (from - * timeout(9)) then put it on the freelist. - * - * Note: we need to check the cached copy of c_flags because - * if it was not local, then it's not safe to deref the - * callout pointer. + * Check if there is anything which needs draining. There is + * no need to lock any locks here, hence the calling thread is + * the only one writing these fields! */ - KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 || - c->c_flags == CALLOUT_LOCAL_ALLOC, - ("corrupted callout")); - if (c_flags & CALLOUT_LOCAL_ALLOC) - callout_cc_del(c, cc); + if (cc->cc_exec_entity[direct].cc_drain_fn != NULL) { + CC_UNLOCK(cc); + cc->cc_exec_entity[direct].cc_drain_fn( + cc->cc_exec_entity[direct].cc_drain_arg); + CC_LOCK(cc_orig); + } else if (cc_orig != cc) { + /* switch back locks again, must return locked */ + CC_UNLOCK(cc); + CC_LOCK(cc_orig); + } } /* @@ -899,10 +805,11 @@ /* XXX Attempt to malloc first */ panic("timeout table full"); SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle); - callout_reset(new, to_ticks, ftn, arg); handle.callout = new; CC_UNLOCK(cc); + callout_reset(new, to_ticks, ftn, arg); + return (handle); } @@ -910,6 +817,7 @@ untimeout(timeout_t *ftn, void *arg, struct callout_handle handle) { struct callout_cpu *cc; + bool match; /* * Check for a handle that was initialized @@ -920,9 +828,11 @@ return; cc = callout_lock(handle.callout); - if (handle.callout->c_func == ftn && handle.callout->c_arg == arg) + match = (handle.callout->c_func == ftn && handle.callout->c_arg == arg); + CC_UNLOCK(cc); + + if (match) callout_stop(handle.callout); - CC_UNLOCK(cc); } void @@ -931,6 +841,143 @@ handle->callout = NULL; } +#if defined(DIAGNOSTIC) +static void +callout_assert_locks(struct callout *c) +{ + struct lock_class *class; + /* + * Some old subsystems don't hold Giant while stopping and + * starting callouts. Discard any asserts if the Giant lock is + * specified. + */ + if (c->c_lock == NULL || c->c_lock == &Giant.lock_object) + return; + class = LOCK_CLASS(c->c_lock); + class->lc_assert(c->c_lock, LA_XLOCKED); +} +#endif + +static int +callout_restart_async(struct callout *c, struct callout_args *coa, + callout_func_t *drain_fn, void *drain_arg) +{ + struct callout_cpu *cc; + int cancelled; + int direct; + +#if defined(DIAGNOSTIC) + callout_assert_locks(c); +#endif + cc = callout_lock(c); + +#ifdef SMP + /* don't change CPU, if any */ + if (coa != NULL && CALLOUT_RDONLY_C_CPU(c)) + coa->cpu = c->c_cpu; +#endif + /* Figure out if the callout is direct or not */ + direct = ((c->c_flags & CALLOUT_DIRECT) != 0); + + /* + * Check if the callback is currently scheduled for + * completion: + */ + if (cc->cc_exec_entity[direct].cc_curr == c) { + /* + * Try to prevent the callback from running by setting + * the "cc_cancel" variable to "true". Also check if + * the callout was previously subject to a deferred + * callout restart: + */ + if (cc->cc_exec_entity[direct].cc_cancel == false || + (c->c_flags & CALLOUT_DEFRESTART) != 0) { + cc->cc_exec_entity[direct].cc_cancel = true; + cancelled = 1; + } else { + cancelled = 0; + } + + /* + * Prevent callback restart if "callout_drain_xxx()" + * is being called or we are stopping the callout or + * the callback was preallocated by us: + */ + if (cc->cc_exec_entity[direct].cc_drain_fn != NULL || + coa == NULL || (c->c_flags & CALLOUT_LOCAL_ALLOC) != 0) { + CTR4(KTR_CALLOUT, "%s %p func %p arg %p", + cancelled ? "cancelled and draining" : "draining", + c, c->c_func, c->c_arg); + + /* set drain function, if any */ + if (drain_fn != NULL) { + cc->cc_exec_entity[direct].cc_drain_fn = drain_fn; + cc->cc_exec_entity[direct].cc_drain_arg = drain_arg; + cancelled |= 2; /* XXX define the value */ + } + /* clear old flags, if any */ + c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING | + CALLOUT_DEFRESTART | CALLOUT_PROCESSED); + + /* clear restart flag, if any */ + cc->cc_exec_entity[direct].cc_restart = false; + } else { + CTR4(KTR_CALLOUT, "%s %p func %p arg %p", + cancelled ? "cancelled and restarting" : "restarting", + c, c->c_func, c->c_arg); + + /* get us back into the game */ + c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING | + CALLOUT_DEFRESTART); + c->c_flags &= ~CALLOUT_PROCESSED; + + /* enable deferred restart */ + cc->cc_exec_entity[direct].cc_restart = true; + + /* store arguments for the deferred restart, if any */ + cc->cc_exec_entity[direct].cc_restart_args = *coa; + } + } else { + /* stop callout */ + if (c->c_flags & CALLOUT_PENDING) { + /* + * The callback has not yet been executed, and + * we simply just need to unlink it: + */ + if ((c->c_flags & CALLOUT_PROCESSED) == 0) { + if (cc->cc_exec_next_dir == c) + cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); + LIST_REMOVE(c, c_links.le); + } else { + TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + } + cancelled = 1; + } else { + cancelled = 0; + } + + /* [re-]schedule callout, if any */ + if (coa != NULL) { + cc = callout_cc_add_locked(c, cc, coa); + } else { + /* clear old flags, if any */ + c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING | + CALLOUT_DEFRESTART | CALLOUT_PROCESSED); + + /* return callback to pre-allocated list, if any */ + if ((c->c_flags & CALLOUT_LOCAL_ALLOC) && cancelled != 0) { + callout_cc_del(c, cc); + } + } + + CTR4(KTR_CALLOUT, "%s %p func %p arg %p", + cancelled ? "rescheduled" : "scheduled", + c, c->c_func, c->c_arg); + } + CC_UNLOCK(cc); + return (cancelled); +} + /* * New interface; clients allocate their own callout structures. * @@ -949,25 +996,32 @@ */ int callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, - void (*ftn)(void *), void *arg, int cpu, int flags) + callout_func_t *ftn, void *arg, int cpu, int flags) { - sbintime_t to_sbt, pr; - struct callout_cpu *cc; - int cancelled, direct; + struct callout_args coa; - cancelled = 0; - if (flags & C_ABSOLUTE) { - to_sbt = sbt; + /* store arguments for callout add function */ + coa.func = ftn; + coa.arg = arg; + coa.precision = precision; + coa.flags = flags; + coa.cpu = cpu; + + /* compute the rest of the arguments needed */ + if (coa.flags & C_ABSOLUTE) { + coa.time = sbt; } else { - if ((flags & C_HARDCLOCK) && (sbt < tick_sbt)) + sbintime_t pr; + + if ((coa.flags & C_HARDCLOCK) && (sbt < tick_sbt)) sbt = tick_sbt; - if ((flags & C_HARDCLOCK) || + if ((coa.flags & C_HARDCLOCK) || #ifdef NO_EVENTTIMERS sbt >= sbt_timethreshold) { - to_sbt = getsbinuptime(); + coa.time = getsbinuptime(); /* Add safety belt for the case of hz > 1000. */ - to_sbt += tc_tick_sbt - tick_sbt; + coa.time += tc_tick_sbt - tick_sbt; #else sbt >= sbt_tickthreshold) { /* @@ -977,101 +1031,29 @@ * active ones. */ #ifdef __LP64__ - to_sbt = DPCPU_GET(hardclocktime); + coa.time = DPCPU_GET(hardclocktime); #else spinlock_enter(); - to_sbt = DPCPU_GET(hardclocktime); + coa.time = DPCPU_GET(hardclocktime); spinlock_exit(); #endif #endif - if ((flags & C_HARDCLOCK) == 0) - to_sbt += tick_sbt; + if ((coa.flags & C_HARDCLOCK) == 0) + coa.time += tick_sbt; } else - to_sbt = sbinuptime(); - if (SBT_MAX - to_sbt < sbt) - to_sbt = SBT_MAX; + coa.time = sbinuptime(); + if (SBT_MAX - coa.time < sbt) + coa.time = SBT_MAX; else - to_sbt += sbt; - pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : - sbt >> C_PRELGET(flags)); - if (pr > precision) - precision = pr; + coa.time += sbt; + pr = ((C_PRELGET(coa.flags) < 0) ? sbt >> tc_precexp : + sbt >> C_PRELGET(coa.flags)); + if (pr > coa.precision) + coa.precision = pr; } - /* - * Don't allow migration of pre-allocated callouts lest they - * become unbalanced. - */ - if (c->c_flags & CALLOUT_LOCAL_ALLOC) - cpu = c->c_cpu; - direct = (c->c_flags & CALLOUT_DIRECT) != 0; - KASSERT(!direct || c->c_lock == NULL, - ("%s: direct callout %p has lock", __func__, c)); - cc = callout_lock(c); - if (cc->cc_exec_entity[direct].cc_curr == c) { - /* - * We're being asked to reschedule a callout which is - * currently in progress. If there is a lock then we - * can cancel the callout if it has not really started. - */ - if (c->c_lock != NULL && !cc->cc_exec_entity[direct].cc_cancel) - cancelled = cc->cc_exec_entity[direct].cc_cancel = true; - if (cc->cc_exec_entity[direct].cc_waiting) { - /* - * Someone has called callout_drain to kill this - * callout. Don't reschedule. - */ - CTR4(KTR_CALLOUT, "%s %p func %p arg %p", - cancelled ? "cancelled" : "failed to cancel", - c, c->c_func, c->c_arg); - CC_UNLOCK(cc); - return (cancelled); - } - } - if (c->c_flags & CALLOUT_PENDING) { - if ((c->c_flags & CALLOUT_PROCESSED) == 0) { - if (cc->cc_exec_next_dir == c) - cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); - LIST_REMOVE(c, c_links.le); - } else - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); - cancelled = 1; - c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); - } -#ifdef SMP - /* - * If the callout must migrate try to perform it immediately. - * If the callout is currently running, just defer the migration - * to a more appropriate moment. - */ - if (c->c_cpu != cpu) { - if (cc->cc_exec_entity[direct].cc_curr == c) { - cc->cc_exec_entity[direct].ce_migration_cpu = cpu; - cc->cc_exec_entity[direct].ce_migration_time - = to_sbt; - cc->cc_exec_entity[direct].ce_migration_prec - = precision; - cc->cc_exec_entity[direct].ce_migration_func = ftn; - cc->cc_exec_entity[direct].ce_migration_arg = arg; - c->c_flags |= CALLOUT_DFRMIGRATION; - CTR6(KTR_CALLOUT, - "migration of %p func %p arg %p in %d.%08x to %u deferred", - c, c->c_func, c->c_arg, (int)(to_sbt >> 32), - (u_int)(to_sbt & 0xffffffff), cpu); - CC_UNLOCK(cc); - return (cancelled); - } - cc = callout_cpu_switch(c, cc, cpu); - } -#endif - - callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags); - CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", - cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), - (u_int)(to_sbt & 0xffffffff)); - CC_UNLOCK(cc); - - return (cancelled); + /* get callback started, if any */ + return (callout_restart_async(c, &coa, NULL, NULL)); } /* @@ -1090,175 +1072,74 @@ } int -_callout_stop_safe(struct callout *c, int safe) +callout_stop(struct callout *c) { - struct callout_cpu *cc, *old_cc; + /* get callback stopped, if any */ + return (callout_restart_async(c, NULL, NULL, NULL)); +} + +static void +callout_drain_function(void *arg) +{ + wakeup(arg); +} + +int +callout_drain_async(struct callout *c, callout_func_t *fn, void *arg) +{ + /* get callback stopped, if any */ + return (callout_restart_async(c, NULL, fn, arg) & 2); +} + +int +callout_drain(struct callout *c) +{ struct lock_class *class; - int direct, sq_locked, use_lock; + int cancelled; - /* - * Some old subsystems don't hold Giant while running a callout_stop(), - * so just discard this check for the moment. - */ - if (!safe && c->c_lock != NULL) { - if (c->c_lock == &Giant.lock_object) - use_lock = mtx_owned(&Giant); - else { - use_lock = 1; - class = LOCK_CLASS(c->c_lock); - class->lc_assert(c->c_lock, LA_XLOCKED); - } - } else - use_lock = 0; - direct = (c->c_flags & CALLOUT_DIRECT) != 0; - sq_locked = 0; - old_cc = NULL; -again: - cc = callout_lock(c); - - /* - * If the callout was migrating while the callout cpu lock was - * dropped, just drop the sleepqueue lock and check the states - * again. - */ - if (sq_locked != 0 && cc != old_cc) { -#ifdef SMP - CC_UNLOCK(cc); - sleepq_release(&old_cc->cc_exec_entity[direct].cc_waiting); - sq_locked = 0; - old_cc = NULL; - goto again; -#else - panic("migration should not happen"); -#endif + if (c->c_lock != NULL && c->c_lock != &Giant.lock_object) { + class = LOCK_CLASS(c->c_lock); + class->lc_lock(c->c_lock, 0); + } else { + class = NULL; } - /* - * If the callout isn't pending, it's not on the queue, so - * don't attempt to remove it from the queue. We can try to - * stop it by other means however. - */ - if (!(c->c_flags & CALLOUT_PENDING)) { - c->c_flags &= ~CALLOUT_ACTIVE; + /* at this point the "c->c_cpu" field is not changing */ + cancelled = callout_drain_async(c, &callout_drain_function, c); + + if (cancelled & 2) { + struct callout_cpu *cc; + int direct; + + CTR3(KTR_CALLOUT, "need to drain %p func %p arg %p", + c, c->c_func, c->c_arg); + + cc = callout_lock(c); + direct = ((c->c_flags & CALLOUT_DIRECT) != 0); + /* - * If it wasn't on the queue and it isn't the current - * callout, then we can't stop it, so just bail. + * We've gotten our callout CPU lock, it is safe to + * drop the initial lock: */ - if (cc->cc_exec_entity[direct].cc_curr != c) { - CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", - c, c->c_func, c->c_arg); - CC_UNLOCK(cc); - if (sq_locked) - sleepq_release( - &cc->cc_exec_entity[direct].cc_waiting); - return (0); - } + if (class != NULL) + class->lc_unlock(c->c_lock); - if (safe) { - /* - * The current callout is running (or just - * about to run) and blocking is allowed, so - * just wait for the current invocation to - * finish. - */ - while (cc->cc_exec_entity[direct].cc_curr == c) { - /* - * Use direct calls to sleepqueue interface - * instead of cv/msleep in order to avoid - * a LOR between cc_lock and sleepqueue - * chain spinlocks. This piece of code - * emulates a msleep_spin() call actually. - * - * If we already have the sleepqueue chain - * locked, then we can safely block. If we - * don't already have it locked, however, - * we have to drop the cc_lock to lock - * it. This opens several races, so we - * restart at the beginning once we have - * both locks. If nothing has changed, then - * we will end up back here with sq_locked - * set. - */ - if (!sq_locked) { - CC_UNLOCK(cc); - sleepq_lock( - &cc->cc_exec_entity[direct].cc_waiting); - sq_locked = 1; - old_cc = cc; - goto again; - } + /* Wait for drain to complete */ - /* - * Migration could be cancelled here, but - * as long as it is still not sure when it - * will be packed up, just let softclock() - * take care of it. - */ - cc->cc_exec_entity[direct].cc_waiting = true; - DROP_GIANT(); - CC_UNLOCK(cc); - sleepq_add( - &cc->cc_exec_entity[direct].cc_waiting, - &cc->cc_lock.lock_object, "codrain", - SLEEPQ_SLEEP, 0); - sleepq_wait( - &cc->cc_exec_entity[direct].cc_waiting, - 0); - sq_locked = 0; - old_cc = NULL; + while (cc->cc_exec_entity[direct].cc_curr == c) + msleep_spin(c, (struct mtx *)&cc->cc_lock, "codrain", 0); - /* Reacquire locks previously released. */ - PICKUP_GIANT(); - CC_LOCK(cc); - } - } else if (use_lock && - !cc->cc_exec_entity[direct].cc_cancel) { - /* - * The current callout is waiting for its - * lock which we hold. Cancel the callout - * and return. After our caller drops the - * lock, the callout will be skipped in - * softclock(). - */ - cc->cc_exec_entity[direct].cc_cancel = true; - CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", - c, c->c_func, c->c_arg); - KASSERT(!cc_cce_migrating(cc, direct), - ("callout wrongly scheduled for migration")); - CC_UNLOCK(cc); - KASSERT(!sq_locked, ("sleepqueue chain locked")); - return (1); - } else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) { - c->c_flags &= ~CALLOUT_DFRMIGRATION; - CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", - c, c->c_func, c->c_arg); - CC_UNLOCK(cc); - return (1); - } - CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", - c, c->c_func, c->c_arg); CC_UNLOCK(cc); - KASSERT(!sq_locked, ("sleepqueue chain still locked")); - return (0); + } else { + if (class != NULL) + class->lc_unlock(c->c_lock); } - if (sq_locked) - sleepq_release(&cc->cc_exec_entity[direct].cc_waiting); - c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); - CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); - if ((c->c_flags & CALLOUT_PROCESSED) == 0) { - if (cc->cc_exec_next_dir == c) - cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); - LIST_REMOVE(c, c_links.le); - } else - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); - callout_cc_del(c, cc); - CC_UNLOCK(cc); - return (1); + return (cancelled & 1); } void Index: sys/sys/_callout.h =================================================================== --- sys/sys/_callout.h (revision 276240) +++ sys/sys/_callout.h (working copy) @@ -46,6 +46,17 @@ SLIST_HEAD(callout_slist, callout); TAILQ_HEAD(callout_tailq, callout); +typedef void callout_func_t(void *); + +struct callout_args { + sbintime_t time; /* absolute time for the event */ + sbintime_t precision; /* delta allowed wrt opt */ + void *arg; /* function argument */ + callout_func_t *func; /* function to call */ + int flags; /* flags passed to callout_reset() */ + int cpu; /* CPU we're scheduled on */ +}; + struct callout { union { LIST_ENTRY(callout) le; @@ -52,13 +63,13 @@ SLIST_ENTRY(callout) sle; TAILQ_ENTRY(callout) tqe; } c_links; - sbintime_t c_time; /* ticks to the event */ + sbintime_t c_time; /* absolute time for the event */ sbintime_t c_precision; /* delta allowed wrt opt */ void *c_arg; /* function argument */ - void (*c_func)(void *); /* function to call */ - struct lock_object *c_lock; /* lock to handle */ + callout_func_t *c_func; /* function to call */ + struct lock_object *c_lock; /* callback lock */ int c_flags; /* state of this entry */ - volatile int c_cpu; /* CPU we're scheduled on */ + int c_cpu; /* CPU we're scheduled on */ }; #endif Index: sys/sys/callout.h =================================================================== --- sys/sys/callout.h (revision 276240) +++ sys/sys/callout.h (working copy) @@ -46,7 +46,7 @@ #define CALLOUT_MPSAFE 0x0008 /* callout handler is mp safe */ #define CALLOUT_RETURNUNLOCKED 0x0010 /* handler returns with mtx unlocked */ #define CALLOUT_SHAREDLOCK 0x0020 /* callout lock held in shared mode */ -#define CALLOUT_DFRMIGRATION 0x0040 /* callout in deferred migration mode */ +#define CALLOUT_DEFRESTART 0x0040 /* callout restart is deferred */ #define CALLOUT_PROCESSED 0x0080 /* callout in wheel or processing list? */ #define CALLOUT_DIRECT 0x0100 /* allow exec from hw int context */ @@ -65,7 +65,8 @@ #ifdef _KERNEL #define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE) #define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE) -#define callout_drain(c) _callout_stop_safe(c, 1) +int callout_drain(struct callout *); +int callout_drain_async(struct callout *, callout_func_t *, void *); void callout_init(struct callout *, int); void _callout_init_lock(struct callout *, struct lock_object *, int); #define callout_init_mtx(c, mtx, flags) \ @@ -79,7 +80,7 @@ NULL, (flags)) #define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING) int callout_reset_sbt_on(struct callout *, sbintime_t, sbintime_t, - void (*)(void *), void *, int, int); + callout_func_t *, void *, int, int); #define callout_reset_sbt(c, sbt, pr, fn, arg, flags) \ callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), (c)->c_cpu, (flags)) #define callout_reset_sbt_curcpu(c, sbt, pr, fn, arg, flags) \ @@ -103,8 +104,7 @@ int callout_schedule_on(struct callout *, int, int); #define callout_schedule_curcpu(c, on_tick) \ callout_schedule_on((c), (on_tick), PCPU_GET(cpuid)) -#define callout_stop(c) _callout_stop_safe(c, 0) -int _callout_stop_safe(struct callout *, int); +int callout_stop(struct callout *); void callout_process(sbintime_t now); #endif