Re: panic: VM object not locked in vm_page_ps_test()

From: John Baldwin <jhb_at_freebsd.org>
Date: Tue, 17 Apr 2018 10:03:55 -0700
On Tuesday, April 17, 2018 10:01:41 AM John Baldwin wrote:
> My laptop running recent head panicked this morning, apparently from hitting
> a key to stop the screensaver (at which point xscreensaver prompts for a
> password to unlock).

(Sorry, buggy mail client sent this early)

panic: Lock vm object not locked _at_ /usr/src/sys/vm/vm_page.c:4135

#4  0xffffffff805e4893 in panic (fmt=<unavailable>)
    at /usr/src/sys/kern/kern_shutdown.c:764
#5  0xffffffff805dff22 in __rw_assert (c=<optimized out>, 
    what=<optimized out>, file=<unavailable>, line=<unavailable>)
    at /usr/src/sys/kern/kern_rwlock.c:1397
#6  0xffffffff80882723 in vm_page_ps_test (m=0xfffff80431c2e980, flags=7, 
    skip_m=0xfffff80431c34890) at /usr/src/sys/vm/vm_page.c:4135
#7  0xffffffff80867d84 in vm_fault_soft_fast (vaddr=<optimized out>, 
    prot=<optimized out>, fault_type=<optimized out>, 
    fault_flags=<optimized out>, wired=0, fs=<optimized out>, 
    m_hold=<optimized out>) at /usr/src/sys/vm/vm_fault.c:307
#8  vm_fault_hold (map=0xfffff8000832a000, vaddr=<optimized out>, 
    fault_type=<optimized out>, fault_flags=<optimized out>, m_hold=0x0)
    at /usr/src/sys/vm/vm_fault.c:610
#9  0xffffffff80866cf5 in vm_fault (map=0xfffff8000832a000, 
    vaddr=<optimized out>, fault_type=2 '\002', fault_flags=0)
    at /usr/src/sys/vm/vm_fault.c:514
#10 0xffffffff808bc64c in trap_pfault (frame=0xfffffe008b1dbac0, usermode=1)
    at /usr/src/sys/amd64/amd64/trap.c:728
#11 0xffffffff808bbe1e in trap (frame=0xfffffe008b1dbac0)
#12 <signal handler called>
#13 0x0000000805b51556 in ?? ()

(kgdb) frame 6
#6  0xffffffff80882723 in vm_page_ps_test (m=0xfffff80431c2e980, flags=7, 
    skip_m=0xfffff80431c34890) at /usr/src/sys/vm/vm_page.c:4135
(kgdb) l
4130    {
4131            vm_object_t object;
4132            int i, npages;
4133
4134            object = m->object;
4135            VM_OBJECT_ASSERT_LOCKED(object);
4136            npages = atop(pagesizes[m->psind]);
4137
4138            /*
4139             * The physically contiguous pages that make up a superpage, i.e., a
(kgdb) p m->object
$1 = (vm_object_t) 0xfffff80190785900
(kgdb) p pagesizes[m->psind]
$3 = 2097152
(kgdb) up
#7  0xffffffff80867d84 in vm_fault_soft_fast (vaddr=<optimized out>, 
    prot=<optimized out>, fault_type=<optimized out>, 
    fault_flags=<optimized out>, wired=0, fs=<optimized out>, 
    m_hold=<optimized out>) at /usr/src/sys/vm/vm_fault.c:307
307                     if (vm_page_ps_test(m_super, flags, m)) {
(kgdb) p m->object
$4 = (vm_object_t) 0xfffff80190116a00
(kgdb) p/x m->flags
$5 = 0x0

So 'm' (original page fault page) and 'm_super' are from different VM
objects.  Why are they part of the same reservation?

(kgdb) p m->phys_addr >> (9 + 12)
$7 = 4514
(kgdb) p vm_reserv_array[$7]
$8 = {lock = {lock_object = {lo_name = 0xffffffff8099112c "vm reserv", 
      lo_flags = 16973824, lo_data = 0, lo_witness = 0x0}, mtx_lock = 0}, 
  partpopq = {tqe_next = 0x0, tqe_prev = 0xfffff80423656680}, objq = {
    le_next = 0xfffff8042365b0c0, le_prev = 0xfffff80190116ab8}, 
  object = 0xfffff80190116a00, pindex = 1760, pages = 0xfffff80431c2e980, 
  domain = 0, popcnt = 512, inpartpopq = 0 '\000', popmap = {
    18446744073709551615, 18446744073709551615, 18446744073709551615, 
    18446744073709551615, 18446744073709551615, 18446744073709551615, 
    18446744073709551615, 18446744073709551615}}
(kgdb) set $rv = vm_reserv_array[$7]
(kgdb) p $rv.object
$9 = (vm_object_t) 0xfffff80190116a00

So rv->object matches m->object ($4) but not m_super->object ($1).
Double-checking:

(kgdb) p m_super->object
$10 = (vm_object_t) 0xfffff80190785900

Other conditions in vm_reserv_to_superpage() are true:

(kgdb) p $rv.pages
$11 = (vm_page_t) 0xfffff80431c2e980
(kgdb) p m_super
$12 = (vm_page_t) 0xfffff80431c2e980
(kgdb) p $rv.popcnt
$13 = 512

Both objects are OBJT_DEFAULT objects:

(kgdb) p *m->object
$14 = {lock = {lock_object = {lo_name = 0xffffffff8095e7ce "vm object", 
      lo_flags = 627245056, lo_data = 0, lo_witness = 0x0}, rw_lock = 41}, 
  object_list = {tqe_next = 0xfffff80190116b00, 
    tqe_prev = 0xfffff80190116920}, shadow_head = {lh_first = 0x0}, 
  shadow_list = {le_next = 0x0, le_prev = 0xfffff80190785930}, memq = {
    tqh_first = 0xfffff80431ddf878, tqh_last = 0xfffff80431e2a900}, rtree = {
    rt_root = 18446735284333515328}, size = 2829, domain = {dr_policy = 0x0, 
    dr_iterator = 0}, generation = 1, ref_count = 3, shadow_count = 0, 
  memattr = 6 '\006', type = 0 '\000', flags = 12352, pg_color = 1824, 
  paging_in_progress = 1, resident_page_count = 1024, 
  backing_object = 0xfffff80190785900, backing_object_offset = 0, 
  pager_object_list = {tqe_next = 0x0, tqe_prev = 0x0}, rvq = {
    lh_first = 0xfffff80423659540}, handle = 0x0, un_pager = {vnp = {
      vnp_size = 0, writemappings = 0}, devp = {devp_pglist = {
        tqh_first = 0x0, tqh_last = 0x0}, ops = 0x0, dev = 0x0}, sgp = {
      sgp_pglist = {tqh_first = 0x0, tqh_last = 0x0}}, swp = {swp_tmpfs = 0x0, 
      swp_blks = {pt_root = 0}}}, cred = 0xfffff80008d99500, 
  charge = 11587584, umtx_data = 0x0}
(kgdb) p *m_super->object
$15 = {lock = {lock_object = {lo_name = 0xffffffff8095e7ce "vm object", 
      lo_flags = 627245056, lo_data = 0, lo_witness = 0x0}, rw_lock = 1}, 
  object_list = {tqe_next = 0xfffff80190785a00, 
    tqe_prev = 0xfffff80190785820}, shadow_head = {
    lh_first = 0xfffff80190116a00}, shadow_list = {
    le_next = 0xfffff801902b2100, le_prev = 0xfffff80008d61d30}, memq = {
    tqh_first = 0xfffff80431bd0720, tqh_last = 0xfffff80431c49b70}, rtree = {
    rt_root = 18446735284336303488}, size = 2829, domain = {dr_policy = 0x0, 
    dr_iterator = 0}, generation = 1, ref_count = 1, shadow_count = 1, 
  memattr = 6 '\006', type = 0 '\000', flags = 4096, pg_color = 42784, 
  paging_in_progress = 0, resident_page_count = 2820, backing_object = 0x0, 
  backing_object_offset = 0, pager_object_list = {tqe_next = 0x0, 
    tqe_prev = 0x0}, rvq = {lh_first = 0xfffff80423659680}, handle = 0x0, 
  un_pager = {vnp = {vnp_size = 0, writemappings = 0}, devp = {devp_pglist = {
        tqh_first = 0x0, tqh_last = 0x0}, ops = 0x0, dev = 0x0}, sgp = {
      sgp_pglist = {tqh_first = 0x0, tqh_last = 0x0}}, swp = {swp_tmpfs = 0x0, 
      swp_blks = {pt_root = 0}}}, cred = 0xfffff80008d99500, 
  charge = 11587584, umtx_data = 0x0}

Huh, m_super's object is the backing object of the object for 'm' and
'rv':

(kgdb) p m->object->backing_object
$18 = (struct vm_object *) 0xfffff80190785900
(kgdb) p m_super->object
$19 = (vm_object_t) 0xfffff80190785900

The program that was running was some X screen saver process (esper):

(kgdb) p td->td_tid
$24 = 100260
(kgdb) ps
  pid  ppid  pgrp   uid   state   wmesg         wchan        cmd
 1768  1767  1618  1001  D+      objde1   0xfffff80190116a00 esper
 1767  1639  1618  1001  R+      (threaded)          esper
    100217   145         S       uwait    0xfffff800026e7880 esper
    100259   143         Run     CPU  2                      esper
    100260   142         Run     CPU  3                      esper
    100261   143         Run     CPU  1                      esper
    100262   143         RunQ                                esper

The other 'esper' process (1768) is waiting in "objdel" on m->object. (!)
It is trying to teardown a forked address space during a call to exec:

(kgdb) proc 1768
(kgdb) where
#0  sched_switch (td=0xfffff800086ff000, newtd=0xfffff801904e8560, 
    flags=<optimized out>) at /usr/src/sys/kern/sched_ule.c:2115
#1  0xffffffff805ef02c in mi_switch (flags=260, newtd=0x0)
    at /usr/src/sys/kern/kern_synch.c:437
#2  0xffffffff80637ded in sleepq_switch (wchan=0xfffff80190116a00, pri=84)
    at /usr/src/sys/kern/subr_sleepqueue.c:613
#3  0xffffffff80637c93 in sleepq_wait (wchan=0xfffff80190116a00, pri=84)
    at /usr/src/sys/kern/subr_sleepqueue.c:692
#4  0xffffffff805ee973 in _sleep (ident=0xfffff80190116a00, 
    lock=<optimized out>, priority=84, wmesg=0xffffffff809558da "objde1", 
    sbt=0, pr=0, flags=256) at /usr/src/sys/kern/kern_synch.c:216
#5  0xffffffff808779bb in vm_object_pip_wait (object=<optimized out>, 
    waitid=<unavailable>) at /usr/src/sys/vm/vm_object.c:417
#6  vm_object_deallocate (object=0xfffff80190116a00)
    at /usr/src/sys/vm/vm_object.c:630
#7  0xffffffff8086cd99 in vm_map_entry_deallocate (
    system_map=<error reading variable: Cannot access memory at address 0x0>, 
    entry=<optimized out>) at /usr/src/sys/vm/vm_map.c:2997
#8  vm_map_process_deferred () at /usr/src/sys/vm/vm_map.c:541
#9  0xffffffff80871e76 in _vm_map_unlock (map=<optimized out>, 
    file=<optimized out>, line=3189) at /usr/src/sys/vm/vm_map.c:554
#10 vm_map_remove (map=<optimized out>, start=4096, end=140737488355328)
    at /usr/src/sys/vm/vm_map.c:3189
#11 0xffffffff8059efe5 in exec_new_vmspace (imgp=0xfffffe008b1ea770, 
    sv=<optimized out>) at /usr/src/sys/kern/kern_exec.c:1099

At this point I'm not sure what to look at next but have the core if
you all have suggestions.

-- 
John Baldwin
Received on Tue Apr 17 2018 - 15:04:02 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:15 UTC