On Fri, Oct 02, 2009 at 10:12:13PM +0200, Jilles Tjoelker wrote: > On Thu, Oct 01, 2009 at 03:07:30PM +0300, Kostik Belousov wrote: > > On Wed, Sep 30, 2009 at 11:02:19AM -0700, Justin Teller wrote: > > > We're trying to control one process from another process through > > > signals (I know, revolutionary ;-), and we've found that a signal > > > occasionally gets lost. šThe process we're signaling is > > > multi-threaded. šIt looks like the signal is lost when the kernel > > > decides to post the signal to a thread that is in the process of dying > > > (calling pthread_exit, etc). > > > > Is this expected behavior that we should just handle, or is it a race > > > in the kernel that should be/will be/already is fixed? > > > > It may be that a fix is already in current, and I just haven't found > > > it in my searches through the source code (I'm working off of source > > > code for an older 8.0 image). If it is fixed, I'd appreciate a > > > pointer to the code that fixes it. > > > When thread enters the kernel last time to be killed, it is very much > > bad idea to allow it to return to usermode to handle directed signal. > > And, there would always be window between entering the kernel and > > marking the thread as exiting. > > > Moving the thread-directed signals back to the process queue is hard > > and there is no way to distinguish which signals were sent to process > > or to the thread. > > > Possibly, we could clear the thread signal mask while still in user mode. > > I think it would still leave a very narrow window when a process > > signal could be directed to the dying thread and not be delivered to > > usermode; this happens when signal is generated while sigsetmask already > > entered the kernel, but did not changed the mask yet. This is worked > > around by rechecking the pending signals after setting the block mask > > and releasing it if needed. > > SIGKILL cannot be masked. Is it possible that a kill(SIGKILL) is > assigned to a dying thread and lost? > > (SIGSTOP cannot be masked either, but its processing is done by the > sending thread, so it should be safe.) > > I suppose that race can also occur in other uses of pthread_sigmask(). > If a thread masks a signal for a while, and that signal is assigned to > that thread just as it is executing pthread_sigmask(), it will only be > processed when that thread unblocks or accepts it, even though other > threads may have the signal unmasked or be in a sigwait() for it. > Signals sent after pthread_sigmask() has changed the signal mask are > likely processed sooner because they will be assigned to a different > thread or left in the process queue. > > POSIX seems to say that signals generated for the process should remain > queued for the process and should only be assigned to a thread at time > of delivery. > > This could be implemented by leaving signals in the process queue or by > tracking for each signal in the thread queue whether it was directed at > the thread and moving the process signals back at sigmask/thr_exit. > Either way I am not sure of all the consequences at this time. I agree that postponing assignment of the thread for signal delivery till the actual delivery occurs is the proper fix. I tried to cheat in my previous patch. Below is an experimental change that did very minimal testing. diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 0386fc4..abd9714 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c _at__at_ -581,20 +581,11 _at__at_ void signotify(struct thread *td) { struct proc *p; - sigset_t set; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); - /* - * If our mask changed we may have to move signal that were - * previously masked by all threads to our sigqueue. - */ - set = p->p_sigqueue.sq_signals; - SIGSETNAND(set, td->td_sigmask); - if (! SIGISEMPTY(set)) - sigqueue_move_set(&p->p_sigqueue, &td->td_sigqueue, &set); if (SIGPENDING(td)) { thread_lock(td); td->td_flags |= TDF_NEEDSIGCHK | TDF_ASTPENDING; _at__at_ -1985,17 +1976,9 _at__at_ tdsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi) KNOTE_LOCKED(&p->p_klist, NOTE_SIGNAL | sig); prop = sigprop(sig); - /* - * If the signal is blocked and not destined for this thread, then - * assign it to the process so that we can find it later in the first - * thread that unblocks it. Otherwise, assign it to this thread now. - */ if (td == NULL) { td = sigtd(p, sig, prop); - if (SIGISMEMBER(td->td_sigmask, sig)) - sigqueue = &p->p_sigqueue; - else - sigqueue = &td->td_sigqueue; + sigqueue = &p->p_sigqueue; } else { KASSERT(td->td_proc == p, ("invalid thread")); sigqueue = &td->td_sigqueue; _at__at_ -2409,8 +2392,9 _at__at_ issignal(struct thread *td, int stop_allowed) { struct proc *p; struct sigacts *ps; + struct sigqueue *queue; sigset_t sigpending; - int sig, prop, newsig; + int sig, prop, newsig, signo; p = td->td_proc; ps = p->p_sigacts; _at__at_ -2420,6 +2404,7 _at__at_ issignal(struct thread *td, int stop_allowed) int traced = (p->p_flag & P_TRACED) || (p->p_stops & S_SIG); sigpending = td->td_sigqueue.sq_signals; + SIGSETOR(sigpending, p->p_sigqueue.sq_signals); SIGSETNAND(sigpending, td->td_sigmask); if (p->p_flag & P_PPWAIT) _at__at_ -2440,6 +2425,7 _at__at_ issignal(struct thread *td, int stop_allowed) */ if (SIGISMEMBER(ps->ps_sigignore, sig) && (traced == 0)) { sigqueue_delete(&td->td_sigqueue, sig); + sigqueue_delete(&p->p_sigqueue, sig); continue; } if (p->p_flag & P_TRACED && (p->p_flag & P_PPWAIT) == 0) { _at__at_ -2452,12 +2438,18 _at__at_ issignal(struct thread *td, int stop_allowed) if (sig != newsig) { ksiginfo_t ksi; + + queue = &td->td_sigqueue; /* * clear old signal. * XXX shrug off debugger, it causes siginfo to * be thrown away. */ - sigqueue_get(&td->td_sigqueue, sig, &ksi); + if (sigqueue_get(queue, sig, &ksi) == 0) { + queue = &p->p_sigqueue; + signo = sigqueue_get(queue, sig, &ksi); + KASSERT(signo == sig, ("signo != sig")); + } /* * If parent wants us to take the signal, _at__at_ -2472,7 +2464,7 _at__at_ issignal(struct thread *td, int stop_allowed) * Put the new signal into td_sigqueue. If the * signal is being masked, look for other signals. */ - SIGADDSET(td->td_sigqueue.sq_signals, sig); + SIGADDSET(queue->sq_signals, sig); if (SIGISMEMBER(td->td_sigmask, sig)) continue; signotify(td); _at__at_ -2567,6 +2559,7 _at__at_ issignal(struct thread *td, int stop_allowed) return (sig); } sigqueue_delete(&td->td_sigqueue, sig); /* take the signal! */ + sigqueue_delete(&p->p_sigqueue, sig); } /* NOTREACHED */ } _at__at_ -2613,7 +2606,9 _at__at_ postsig(sig) ps = p->p_sigacts; mtx_assert(&ps->ps_mtx, MA_OWNED); ksiginfo_init(&ksi); - sigqueue_get(&td->td_sigqueue, sig, &ksi); + if (sigqueue_get(&td->td_sigqueue, sig, &ksi) == 0 && + sigqueue_get(&p->p_sigqueue, sig, &ksi) == 0) + return; ksi.ksi_signo = sig; if (ksi.ksi_code == SI_TIMER) itimer_accept(p, ksi.ksi_timerid, &ksi); diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index 6d60ddb..ccf6479 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c _at__at_ -90,6 +90,7 _at__at_ userret(struct thread *td, struct trapframe *frame) CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid, td->td_name); +#if 0 #ifdef DIAGNOSTIC /* Check that we called signotify() enough. */ PROC_LOCK(p); _at__at_ -100,6 +101,7 _at__at_ userret(struct thread *td, struct trapframe *frame) thread_unlock(td); PROC_UNLOCK(p); #endif +#endif #ifdef KTRACE KTRUSERRET(td); #endif _at__at_ -218,7 +220,14 _at__at_ ast(struct trapframe *framep) ktrcsw(0, 1); #endif } - if (flags & TDF_NEEDSIGCHK) { + + /* + * Check for signals. Unlocked reads of p_pendingcnt or + * p_siglist might cause process-directed signal to be handled + * later. + */ + if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 || + !SIGISEMPTY(p->p_siglist)) { PROC_LOCK(p); mtx_lock(&p->p_sigacts->ps_mtx); while ((sig = cursig(td, SIG_STOP_ALLOWED)) != 0) diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h index 89b40f0..680da40 100644 --- a/sys/sys/signalvar.h +++ b/sys/sys/signalvar.h _at__at_ -252,9 +252,10 _at__at_ typedef struct sigqueue { /* Return nonzero if process p has an unmasked pending signal. */ #define SIGPENDING(td) \ - (!SIGISEMPTY((td)->td_siglist) && \ - !sigsetmasked(&(td)->td_siglist, &(td)->td_sigmask)) - + ((!SIGISEMPTY((td)->td_siglist) && \ + !sigsetmasked(&(td)->td_siglist, &(td)->td_sigmask)) || \ + (!SIGISEMPTY((td)->td_proc->p_siglist) && \ + !sigsetmasked(&(td)->td_proc->p_siglist, &(td)->td_sigmask))) /* * Return the value of the pseudo-expression ((*set & ~*mask) != 0). This * is an optimized version of SIGISEMPTY() on a temporary variable diff --git a/tools/regression/sigqueue/sigqtest1/sigqtest1.c b/tools/regression/sigqueue/sigqtest1/sigqtest1.c index 0f40021..f0201c3 100644 --- a/tools/regression/sigqueue/sigqtest1/sigqtest1.c +++ b/tools/regression/sigqueue/sigqtest1/sigqtest1.c _at__at_ -1,12 +1,14 _at__at_ /* $FreeBSD$ */ -#include <signal.h> -#include <stdio.h> #include <err.h> #include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <unistd.h> int received; -void handler(int sig, siginfo_t *si, void *ctx) +void +handler(int sig, siginfo_t *si, void *ctx) { if (si->si_code != SI_QUEUE) errx(1, "si_code != SI_QUEUE"); _at__at_ -15,7 +17,8 _at__at_ void handler(int sig, siginfo_t *si, void *ctx) received++; } -int main() +int +main() { struct sigaction sa; union sigval val; diff --git a/tools/regression/sigqueue/sigqtest2/sigqtest2.c b/tools/regression/sigqueue/sigqtest2/sigqtest2.c index 078ea81..50b579d 100644 --- a/tools/regression/sigqueue/sigqtest2/sigqtest2.c +++ b/tools/regression/sigqueue/sigqtest2/sigqtest2.c _at__at_ -1,24 +1,29 _at__at_ /* $FreeBSD$ */ -#include <signal.h> -#include <stdio.h> -#include <err.h> -#include <errno.h> #include <sys/types.h> #include <sys/wait.h> +#include <err.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> int stop_received; int exit_received; int cont_received; -void job_handler(int sig, siginfo_t *si, void *ctx) +void +job_handler(int sig, siginfo_t *si, void *ctx) { int status; int ret; if (si->si_code == CLD_STOPPED) { + printf("%d: stop received\n", si->si_pid); stop_received = 1; kill(si->si_pid, SIGCONT); } else if (si->si_code == CLD_EXITED) { + printf("%d: exit received\n", si->si_pid); ret = waitpid(si->si_pid, &status, 0); if (ret == -1) errx(1, "waitpid"); _at__at_ -26,11 +31,13 _at__at_ void job_handler(int sig, siginfo_t *si, void *ctx) errx(1, "!WIFEXITED(status)"); exit_received = 1; } else if (si->si_code == CLD_CONTINUED) { + printf("%d: cont received\n", si->si_pid); cont_received = 1; } } -void job_control_test() +void +job_control_test(void) { struct sigaction sa; pid_t pid; _at__at_ -43,9 +50,12 _at__at_ void job_control_test() stop_received = 0; cont_received = 0; exit_received = 0; + fflush(stdout); pid = fork(); if (pid == 0) { + printf("child %d\n", getpid()); kill(getpid(), SIGSTOP); + sleep(2); exit(1); } _at__at_ -60,11 +70,13 _at__at_ void job_control_test() printf("job control test OK.\n"); } -void rtsig_handler(int sig, siginfo_t *si, void *ctx) +void +rtsig_handler(int sig, siginfo_t *si, void *ctx) { } -int main() +int +main() { struct sigaction sa; sigset_t set;
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:39:56 UTC