[PATCH] devfs / tty races solved

From: Martin Blapp <mb_at_imp.ch>
Date: Sat, 23 Sep 2006 13:42:28 +0200 (CEST)
Hi all,

J. Porter Clark sent me some traces and dumps to analyze, but I was
not able to reproduce it until yesterday. I guess I do know now
what's going on.

A patch to solve this problem is at the end.

I'm hunted down the same panic Doug White tried to fix one year ago
in FreeBSD 5. I've added some debug output to see what was going on:

ptsopen(): tty with state 131112 has refcnt 3 (ttyp9) dev refcount = 2
ptsopen(): tty with state 131112 has refcnt 2 (ttyp8) dev refcount = 2
ptsopen(): tty with state 0 has refcnt 1 (ttyp5) dev refcount = 2
ptsopen(): tty with state 131112 has refcnt 3 (ttyp9) dev refcount = 2
ptsopen(): tty with state 131112 has refcnt 2 (ttyp2) dev refcount = 2
ptsopen(): tty with state 0 has refcnt 1 (ttyp6) dev refcount = 2
ptsopen(): tty with state 0 has refcnt 1 (ttyp3) dev refcount = 2
ptsopen(): tty with state 0 has refcnt 1 (ttyp5) dev refcount = 2
ptsopen(): tty with state 131112 has refcnt 3 (ttyp0) dev refcount = 2
ptsopen(): tty with state 131112 has refcnt 2 (ttyp8) dev refcount = 2
ptsopen(): tty with state 0 has refcnt 1 (ttyp6) dev refcount = 2
ttyrel(): tty refcnt is now 0 (ttyp3)
ptsopen(): tty with state 0 has refcnt 1 (ttyp4) dev refcount = 2
ptsopen(): tty with state 0 has refcnt 0 (ttyp3) dev refcount = 2
panic()

Interesting. This shouldn't happen in any case. ptsopen() is called with a tty 
which has a refcount of 0 which means ttyrel() has freed our struct tty before.
Interesting is also that I've called ttyrel() and also ptsopen() with 
mtx_assert(&Giant, MA_OWNED); So it's not a missing GIANT lock at all.

The box recovers if refcount > 0 and state is set to 0, but all tty/devfs
operations are locked then for 2-3 minutes.

In the meantime I wrote a perl script to crash a box in 5-10 minutes
without significant load, just doing tty operations and writing stuff at the
same time to 'closed' ttys.

Devfs open doesn't check if the tty is still in use in some cases, I've added 
now checks for ptcopen(), ptsopen(), ttyopen() to check if we have been called
with a refcount of zero. this should solve the panics with ttwakeup() we have
in RELENG_5 and RELENG_6. A patch is attached at the end.

Unfortunatly the backtrace in kgdb is broken because there is a null
frame in between. I'll write another mail about this and I'll add later some 
ASSERTS in ptsopen() to get a usable trace.

Fatal trap 12: page fault while in kernel mode
cpuid = 1; apic id = 01
fault virtual address   = 0x0
fault code              = supervisor read, page not present
instruction pointer     = 0x20:0x0
stack pointer           = 0x28:0xe8dd8974
frame pointer           = 0x28:0xe8dd8988
code segment            = base 0x0, limit 0xfffff, type 0x1b
                         = DPL 0, pres 1, def32 1, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 70635 (perl)
trap number             = 12
panic: page fault
cpuid = 1
KDB: stack backtrace:
kdb_backtrace(100,c6bf2c00,28,e8dd8934,c,...) at kdb_backtrace+0x29
panic(c08a9756,c08fd779,0,fffff,c6bfd89b,...) at panic+0x114
trap_fatal(e8dd8934,0,c6bf2c00,c71f6cb8,c,...) at trap_fatal+0x2ce
trap_pfault(e8dd8934,0,0) at trap_pfault+0x1d7
trap(c6710008,e8dd0028,c06c0028,c6bf2c00,c643ec00,...) at trap+0x2fd
calltrap() at calltrap+0x5
--- trap 0xc, eip = 0, esp = 0xe8dd8974, ebp = 0xe8dd8988 ---
(null)(c643ec88,0,0,c643ec10,c643ec00,...) at 0
ttwakeup(c643ec00,c643ec00,c6dd7980,c643ec00,e8dd89d8,...) at ttwakeup+0x65
ttymodem(c643ec00,1) at ttymodem+0x178
ptsopen(c6bf5200,3,2000,c6bf2c00) at ptsopen+0x99
giant_open(c6bf5200,3,2000,c6bf2c00) at giant_open+0x4f
devfs_open(e8dd8a64) at devfs_open+0x20f
VOP_OPEN_APV(c094ca20,e8dd8a64) at VOP_OPEN_APV+0x38
vn_open_cred(e8dd8bcc,e8dd8ccc,0,c6e43d00,1,...) at vn_open_cred+0x434
vn_open(e8dd8bcc,e8dd8ccc,0,1) at vn_open+0x1e
kern_open(c6bf2c00,bfbfe8b0,0,3,0,...) at kern_open+0xb6
open(c6bf2c00,e8dd8d04) at open+0x1a
syscall(808003b,3b,bfbf003b,2,0,...) at syscall+0x2bf
Xint0x80_syscall() at Xint0x80_syscall+0x1f
--- syscall (5, FreeBSD ELF32, open), eip = 0x28256aeb, esp = 0xbfbfe87c, ebp = 
0xbfbfe8d8 ---

(kgdb) p *(struct cdev *)0xc6bf5200
$9 = {si_priv = 0xc6bf5200, si_flags = 4, si_atime = {tv_sec = 1158948685, 
tv_nsec = 0}, si_ctime = {tv_sec = 1158968040, tv_nsec = 0},
   si_mtime = {tv_sec = 1158948685, tv_nsec = 0}, si_uid = 0, si_gid = 0, si_mode 
= 438, si_cred = 0xc67f2900, si_drv0 = 6,
   si_refcount = 2, si_list = {le_next = 0xc680dc00, le_prev = 0xc6b57738}, 
si_clone = {le_next = 0x0, le_prev = 0x0}, si_children = {
     lh_first = 0x0}, si_siblings = {le_next = 0x0, le_prev = 0x0}, si_parent = 
0x0, si_name = 0xc6bf5278 "ttyp6", si_drv1 = 0xc6dd7980,
   si_drv2 = 0x0, si_devsw = 0xc0959460, si_iosize_max = 65536, si_usecount = 1, 
si_threadcount = 4, __si_u = {__sit_tty = 0xc643ec00,
     __sid_snapdata = 0xc643ec00}, __si_namebuf = "ttyp3", '\0' <repeats 58 
times>}

$36 = {pt_flags = -960540160, pt_selr = {si_thrlist = {tqe_next = 0x4, tqe_prev 
= 0x4514274d}, si_thread = 0x0, si_note = {kl_list = {
         slh_first = 0x451472e8}, kl_lock = 0, kl_unlock = 0x4514274d, kl_locked 
= 0, kl_lockarg = 0x0}, si_flags = 0}, pt_selw = {
     si_thrlist = {tqe_next = 0x1b6, tqe_prev = 0xc67f2900}, si_thread = 0x6, 
si_note = {kl_list = {slh_first = 0x2},
       kl_lock = 0xc680dc00, kl_unlock = 0xc6b57738, kl_locked = 0, kl_lockarg = 
0x0}, si_flags = 0}, pt_send = 0 '\0',
   pt_ucntl = 0 '\0', pt_tty = 0x0, devs = 0x0, devc = 0xc6bf5278, pt_prison = 
0xc6dd7980}

kgdb) p *(struct tty *)0xc643ec00
$1 = {t_rawq = {c_cc = 0, c_cbcount = 0, c_cbmax = 0, c_cbreserved = 0, c_cf = 
0x0, c_cl = 0x0}, t_rawcc = 288210, t_canq = {c_cc = 0,
     c_cbcount = 0, c_cbmax = 0, c_cbreserved = 0, c_cf = 0x0, c_cl = 0x0}, 
t_cancc = 30, t_outq = {c_cc = 0, c_cbcount = 0,
     c_cbmax = 0, c_cbreserved = 0, c_cf = 0x0, c_cl = 0x0}, t_outcc = 288974, 
t_line = 0, t_dev = 0xc6bf5200, t_mdev = 0x0,
   t_devunit = 0, t_state = 131080, t_flags = 0, t_timeout = -1, t_pgrp = 0x0, 
t_session = 0x0, t_sigio = 0x0, t_rsel = {si_thrlist = {
       tqe_next = 0x0, tqe_prev = 0x0}, si_thread = 0x0, si_note = {kl_list = 
{slh_first = 0x0}, kl_lock = 0, kl_unlock = 0,
       kl_locked = 0xc064f070 <knlist_mtx_locked>, kl_lockarg = 0x0}, si_flags = 
0}, t_wsel = {si_thrlist = {tqe_next = 0x0,
       tqe_prev = 0x0}, si_thread = 0x0, si_note = {kl_list = {slh_first = 0x0}, 
kl_lock = 0, kl_unlock = 0,
       kl_locked = 0xc064f070 <knlist_mtx_locked>, kl_lockarg = 0x0}, si_flags = 
0}, t_termios = {c_iflag = 11010, c_oflag = 3,
     c_cflag = 19200, c_lflag = 1483, c_cc = 
"\004ÿÿ\177\027\025\022\b\003\034\032\031\021\023\026\017\001\000\024ÿ", 
c_ispeed = 9600,
     c_ospeed = 9600}, t_init_in = {c_iflag = 11010, c_oflag = 3, c_cflag = 
19200, c_lflag = 1483,
     c_cc = 
"\004ÿÿ\177\027\025\022\b\003\034\032\031\021\023\026\017\001\000\024ÿ", 
c_ispeed = 9600, c_ospeed = 9600}, t_init_out = {
     c_iflag = 11010, c_oflag = 3, c_cflag = 19200, c_lflag = 1483,
     c_cc = 
"\004ÿÿ\177\027\025\022\b\003\034\032\031\021\023\026\017\001\000\024ÿ", 
c_ispeed = 9600, c_ospeed = 9600}, t_lock_in = {
     c_iflag = 0, c_oflag = 0, c_cflag = 0, c_lflag = 0, c_cc = '\0' <repeats 19 
times>, c_ispeed = 0, c_ospeed = 0}, t_lock_out = {
     c_iflag = 0, c_oflag = 0, c_cflag = 0, c_lflag = 0, c_cc = '\0' <repeats 19 
times>, c_ispeed = 0, c_ospeed = 0}, t_winsize = {
     ws_row = 0, ws_col = 0, ws_xpixel = 0, ws_ypixel = 0}, t_sc = 0xc6dd7980, 
t_lsc = 0x0, t_column = 131840, t_rocount = 0,
   t_rocol = 0, t_ififosize = 512, t_ihiwat = 7680, t_ilowat = 6720, t_ispeedwat 
= 0, t_ohiwat = 1296, t_olowat = 256, t_ospeedwat = 0,
   t_gen = 15, t_list = {tqe_next = 0xc66f9c00, tqe_prev = 0xc657d5dc}, t_actout 
= 0, t_wopeners = 0, t_mtx = {mtx_object = {
       lo_class = 0xc0953f44, lo_name = 0xc08daf1c "tty", lo_type = 0xc08daf1c 
"tty", lo_flags = 131072, lo_list = {tqe_next = 0x0,
         tqe_prev = 0x0}, lo_witness = 0x0}, mtx_lock = 6, mtx_recurse = 0}, 
t_refcnt = 0, t_hotchar = 0, t_dtr_wait = 3000,
   t_do_timestamp = 0, t_timestamp = {tv_sec = 0, tv_usec = 0}, t_pps = 0x0, 
t_oproc = 0xc06a25d4 <ptsstart>,
   t_stop = 0xc06a28c4 <ptsstop>, t_param = 0, t_modem = 0, t_break = 0, t_ioctl 
= 0, t_open = 0, t_purge = 0, t_close = 0, t_cioctl = 0}

--- sys/kern/tty.c      Sun Nov  6 16:09:32 2005
+++ sys/kern/tty.c      Sat Sep 23 13:16:51 2006
_at__at_ -3101,6 +3101,12 _at__at_
         struct tty      *tp;

         tp = dev->si_tty;
+
+       /* XXX It can happen that devfs_open() calls us with tp->t_refcnt == 0 */
+       if (tp == NULL || tp->t_refcnt == 0) {
+               return (ENXIO);
+       }
+
         s = spltty();
         /*
          * We jump to this label after all non-interrupted sleeps to pick
--- sys/kern/tty_pty.c  Thu Mar 30 16:46:56 2006
+++ sys/kern/tty_pty.c  Sat Sep 23 13:17:36 2006
_at__at_ -170,6 +170,12 _at__at_
                 return(ENXIO);
         pt = dev->si_drv1;
         tp = dev->si_tty;
+
+        /* XXX It can happen that devfs_open() calls us with tp->t_refcnt == 0 
*/
+       if (tp == NULL || tp->t_refcnt == 0) {
+               return (ENXIO);
+        }
+
         if ((tp->t_state & TS_ISOPEN) == 0) {
                 ttyinitmode(tp, 1, 0);
         } else if (tp->t_state & TS_XCLUDE && suser(td))
_at__at_ -276,6 +282,12 _at__at_
         if (!dev->si_drv1)
                 return(ENXIO);
         tp = dev->si_tty;
+
+       /* XXX It can happen that devfs_open() calls us with tp->t_refcnt == 0 */
+        if (tp == NULL || tp->t_refcnt == 0) {
+                return (ENXIO);
+        }
+
         if (tp->t_oproc)
                 return (EIO);
         tp->t_timeout = -1;

With this patch the system recovers fine:

# ttyrel(): tty refcnt is now 1 (ttyp6)
ttyref(): tty refcnt is now 2 (ttyp6)
ttyref(): tty refcnt is now 2 (ttyp1)
ttyrel(): tty refcnt is now 0 (ttyp3)
ttyrel(): tty refcnt is now 0 (ttyp7)
ttyrel(): tty refcnt is now 0 (ttyp8)
ttyrel(): tty refcnt is now 0 (ttyp9)
ttyref(): tty refcnt is now 1 (ttyp9)

[...]

Martin Blapp, <mb_at_imp.ch> <mbr_at_FreeBSD.org>
------------------------------------------------------------------
ImproWare AG, UNIXSP & ISP, Zurlindenstrasse 29, 4133 Pratteln, CH
Phone: +41 61 826 93 00 Fax: +41 61 826 93 01
PGP: <finger -l mbr_at_freebsd.org>
PGP Fingerprint: B434 53FC C87C FE7B 0A18 B84C 8686 EF22 D300 551E
------------------------------------------------------------------
Received on Sat Sep 23 2006 - 09:42:34 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:39:00 UTC