On Wed, Jul 08, 2015 at 02:15:38AM +0200, Mateusz Guzik wrote: > First off note the patch below is a total hack with the easiest solution > possible so that it can be MFCed for 10.2. > > The issue: > > Closing the socket involves: > > if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) > (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb); > if (pr->pr_usrreqs->pru_detach != NULL) > (*pr->pr_usrreqs->pru_detach)(so); > > unp_dispose which gets rid of file descriptors stored in mbufs attached > to the socket. It leaves the mbuf chain in place. > > uipc_detach actually unlinks the socket from global unp list. > > In particular, this means there is a socket with unusable mbufs visible > to unp garbage collector (unp_gc). Also there is no synchronisation of > any form performed here, so it can inspect mbufs as fds are getting > freed (or afterwards) leading to panics. Note that uipc_detach waits for > unp_gc to finish due to UNP_LIST_LOCK, it's the dispose func which > causes trouble. > > Given that stuff should not be accessed after unp_dispose, and the > socket is about to die I figured it would be best to mark the socket so > that unp_gc can ignore it. > > Note that unp_dispose only gets a pointer to mbuf. I have not found any > way to obtain a socket from this, which in turn results in the hack > below. > > I added a new func - unp_dispose2, which is not a part of struct domain. > dom_dispose consumers check for PR_DISPOSE2 flag and call the function > passing the socket as an argument. > > unp_dispose2(struct socket *so) > { > struct unpcb *unp; > > unp = sotounpcb(so); > UNP_LIST_LOCK(); > unp->unp_gcflag |= UNPGC_IGNORE; > UNP_LIST_UNLOCK(); > unp_dispose(so->so_rcv.sb_mb); > } > > The UNP_LIST_LOCK + UNLOCK synchronizes against unp_gc - either it sees the > flag and ignores the socket, or gets to inspect it and unp_dispose2 waits for > it to finish. AFAICT it is completely harmless to proceed with freeing after > unp_gc had a look. > > There is a similar problem with shutdown(), but the race has a smaller window > due to it clearing mbufs just after dispose call. > > In general, it feels like something else is also broken, but I don't see what. > > The issue can be reproduced by running this program in a loop: > https://people.freebsd.org/~mjg/reproducers/unp-gc-panic.c > > With the patch below the issue seems to be fixed: > Deuglified the patch a little. PR_DISPOSE2 flag is introduced. When set on a protocol, unp_dispose2 is called insted of dom_dispose. Also fixed a copy-pasto in sorflush case. It used to partially construct a sockbuf and clear stuff in the original. It is changed to partialy construct the socket instead, as that's what we obtain in unp_dispose2. sockbuf is reconstructed to the same extent, socket also gets pcb so that unp_dispose2 can botain relevant unp pointer. diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index a431b4b..e78c81d 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c _at__at_ -804,8 +804,13 _at__at_ sofree(struct socket *so) ACCEPT_UNLOCK(); VNET_SO_ASSERT(so); - if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) - (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb); + if (pr->pr_flags & PR_RIGHTS) { + if (pr->pr_flags & PR_DISPOSE2) { + unp_dispose2(so); + } else if (pr->pr_domain->dom_dispose != NULL) { + (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb); + } + } if (pr->pr_usrreqs->pru_detach != NULL) (*pr->pr_usrreqs->pru_detach)(so); _at__at_ -2356,7 +2361,7 _at__at_ sorflush(struct socket *so) { struct sockbuf *sb = &so->so_rcv; struct protosw *pr = so->so_proto; - struct sockbuf asb; + struct socket aso; VNET_SO_ASSERT(so); _at__at_ -2381,8 +2386,9 _at__at_ sorflush(struct socket *so) * and mutex data unchanged. */ SOCKBUF_LOCK(sb); - bzero(&asb, offsetof(struct sockbuf, sb_startzero)); - bcopy(&sb->sb_startzero, &asb.sb_startzero, + bzero(&aso, sizeof(aso)); + aso.so_pcb = so->so_pcb; + bcopy(&sb->sb_startzero, &aso.so_rcv.sb_startzero, sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); bzero(&sb->sb_startzero, sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); _at__at_ -2393,9 +2399,14 _at__at_ sorflush(struct socket *so) * Dispose of special rights and flush the socket buffer. Don't call * any unsafe routines (that rely on locks being initialized) on asb. */ - if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) - (*pr->pr_domain->dom_dispose)(asb.sb_mb); - sbrelease_internal(&asb, so); + if (pr->pr_flags & PR_RIGHTS) { + if (pr->pr_flags & PR_DISPOSE2) { + unp_dispose2(&aso); + } else if (pr->pr_domain->dom_dispose != NULL) { + (*pr->pr_domain->dom_dispose)(aso.so_rcv.sb_mb); + } + } + sbrelease_internal(&aso.so_rcv, so); } /* diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index acf9fe9..98ef6dc 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c _at__at_ -302,14 +302,15 _at__at_ static struct protosw localsw[] = { { .pr_type = SOCK_STREAM, .pr_domain = &localdomain, - .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS, + .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS| + PR_DISPOSE2, .pr_ctloutput = &uipc_ctloutput, .pr_usrreqs = &uipc_usrreqs_stream }, { .pr_type = SOCK_DGRAM, .pr_domain = &localdomain, - .pr_flags = PR_ATOMIC|PR_ADDR|PR_RIGHTS, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_RIGHTS|PR_DISPOSE2, .pr_ctloutput = &uipc_ctloutput, .pr_usrreqs = &uipc_usrreqs_dgram }, _at__at_ -323,7 +324,7 _at__at_ static struct protosw localsw[] = { * that supports both atomic record writes and control data. */ .pr_flags = PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|PR_WANTRCVD| - PR_RIGHTS, + PR_RIGHTS|PR_DISPOSE2, .pr_ctloutput = &uipc_ctloutput, .pr_usrreqs = &uipc_usrreqs_seqpacket, }, _at__at_ -2193,8 +2194,7 _at__at_ unp_gc_process(struct unpcb *unp) struct socket *so; struct file *fp; - /* Already processed. */ - if (unp->unp_gcflag & UNPGC_SCANNED) + if (unp->unp_gcflag & (UNPGC_SCANNED | UNPGC_IGNORE)) return; fp = unp->unp_file; _at__at_ -2252,11 +2252,11 _at__at_ unp_gc(__unused void *arg, int pending) unp_taskcount++; UNP_LIST_LOCK(); /* - * First clear all gc flags from previous runs. + * First clear all gc flags from previous runs, apart from UNPGC_IGNORE. */ for (head = heads; *head != NULL; head++) LIST_FOREACH(unp, *head, unp_link) - unp->unp_gcflag = 0; + unp->unp_gcflag = unp->unp_gcflag & UNPGC_IGNORE; /* * Scan marking all reachable sockets with UNPGC_REF. Once a socket _at__at_ -2333,6 +2333,24 _at__at_ unp_dispose(struct mbuf *m) unp_scan(m, unp_freerights); } +/* + * XXX A hack working around a difenciency in domain API. + * dom_dispose handler does not get the socket it is supposed to operate on, + * which makes it very problematic to synchronize against unp_gc, which in turn + * can trip over data as we are freeing it. + */ +void +unp_dispose2(struct socket *so) +{ + struct unpcb *unp; + + unp = sotounpcb(so); + UNP_LIST_LOCK(); + unp->unp_gcflag |= UNPGC_IGNORE; + UNP_LIST_UNLOCK(); + unp_dispose(so->so_rcv.sb_mb); +} + static void unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int)) { diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h index 55db0e3..af45532 100644 --- a/sys/sys/protosw.h +++ b/sys/sys/protosw.h _at__at_ -120,6 +120,7 _at__at_ struct protosw { #define PR_RIGHTS 0x10 /* passes capabilities */ #define PR_IMPLOPCL 0x20 /* implied open/close */ #define PR_LASTHDR 0x40 /* enforce ipsec policy; last header */ +#define PR_DISPOSE2 0x80 /* call unp_dispose2 */ /* * In earlier BSD network stacks, a single pr_usrreq() function pointer was diff --git a/sys/sys/socket.h b/sys/sys/socket.h index 18e2de1..e927cdc 100644 --- a/sys/sys/socket.h +++ b/sys/sys/socket.h _at__at_ -666,6 +666,8 _at__at_ void so_unlock(struct socket *so); void so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg); +void unp_dispose2(struct socket *so); + #endif diff --git a/sys/sys/unpcb.h b/sys/sys/unpcb.h index ba63f30..ead9f0a 100644 --- a/sys/sys/unpcb.h +++ b/sys/sys/unpcb.h _at__at_ -106,6 +106,7 _at__at_ struct unpcb { #define UNPGC_REF 0x1 /* unpcb has external ref. */ #define UNPGC_DEAD 0x2 /* unpcb might be dead. */ #define UNPGC_SCANNED 0x4 /* Has been scanned. */ +#define UNPGC_IGNORE 0x4 /* Someone will clear it. */ /* * These flags are used to handle non-atomicity in connect() and bind() -- Mateusz Guzik <mjguzik gmail.com>Received on Thu Jul 09 2015 - 22:17:46 UTC
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:58 UTC