Re: unkillable firefox

From: Konstantin Belousov <kostikbel_at_gmail.com>
Date: Thu, 29 Dec 2016 00:28:36 +0200
On Wed, Dec 28, 2016 at 12:54:53PM -0800, Steve Kargl wrote:
> On Tue, Dec 20, 2016 at 01:29:20PM -0800, Steve Kargl wrote:
> > Anyone know how to kill firefox?
> > 
> > last pid: 69652;  load averages:  0.49,  0.27,  0.24      up 1+02:40:06  13:16:02
> > 126 processes: 1 running, 121 sleeping, 4 stopped
> > CPU:  0.8% user,  0.0% nice,  0.0% system,  0.0% interrupt,  100% idle
> > Mem: 2049M Active, 3739M Inact, 496M Laundry, 1365M Wired, 783M Buf, 239M Free
> > Swap: 16G Total, 1772K Used, 16G Free
> > 
> >   PID USERNAME   PRI NICE SIZE    RES STATE   C   TIME    WCPU COMMAND
> > 63902 kargl      40   0  3157M  2302M STOP    1  10:50   0.00% firefox{firefox}
> > 63902 kargl     -16   0  3157M  2302M STOP    2   5:46   0.00% firefox{Composit
> > 16874 kargl      40   0   740M   330M STOP    1   0:07   0.00% firefox{firefox}
> > 16874 kargl     -16   0   740M   330M STOP    1   0:00   0.00% firefox{Composit
> > 
> > It seems that firefox is wedged in the thread firefox{Compositor},
> > and slowly eating up memory.  This is on an amd64 system at
> > r310125 and latest firefox from ports.  procstat suggests that its
> > stuck in a vm sleep queue.
> > 
> > % procstat -k 63902
> >   PID    TID COMM       TDNAME       KSTACK                       
> > 63902 100504 firefox    -            mi_switch thread_suspend_switch
> >                                      thread_single exit1 sigexit postsig ast
> >                                      Xfast_syscall 
> > 63902 101494 firefox    Compositor   mi_switch sleepq_wait _sleep 
> >                                      vm_page_busy_sleep vm_page_sleep_if_busy
> >                                      vm_fault_hold vm_fault trap_pfault trap
> >                                      calltrap 
> > 
> 
> Firefox is still fubar on freebsd-current.  Following kib's
> instructions in the thread "Mozilla firefox freezes/zombie on FreeBSD
> current" thread, here's the requested kernel info
>   PID USERNAME    THR PRI NICE   SIZE    RES STATE   C   TIME    WCPU COMMAND
> 96285 kargl        64  40    0  1008M   678M select  0   2:01  18.10% firefox
> 96256 kargl         2  47    0   834M   304M STOP    0   0:04   0.00% firefox
> 96276 kargl         2  40    0   593M   307M STOP    1   0:03   0.00% firefox
> 96265 kargl         2  40    0   592M   307M STOP    1   0:03   0.00% firefox
> 
> % ps -H -o pid,lwp,mwchan,nwchan 96265
>   PID    LWP MWCHAN NWCHAN
> 96265 100137 -      -
> 96265 100737 vmpfw  fffff80232a1a980
> 
> % kgdb /usr/obj/usr/src/sys/SPEW/kernel.full /dev/mem
> (kgdb) p/x *(struct vm_page *)0xfffff80232a1a980
> $1 = {plinks = {q = {tqe_next = 0xfffff8023467d980, 
>       tqe_prev = 0xffffffff80d9e1b8}, s = {ss = {
>         sle_next = 0xfffff8023467d980}, pv = 0xffffffff80d9e1b8}, memguard = {
>       p = 0xfffff8023467d980, v = 0xffffffff80d9e1b8}}, listq = {
>     tqe_next = 0xfffff80232a1a9e8, tqe_prev = 0xfffff8010c0614f8}, 
>   object = 0xfffff8010c0614b0, pindex = 0x0, phys_addr = 0x59600000, md = {
>     pv_list = {tqh_first = 0x0, tqh_last = 0xfffff80232a1a9b8}, 
>     pv_gen = 0x31c, pat_mode = 0x6}, wire_count = 0x0, busy_lock = 0x6, 
>   hold_count = 0x0, flags = 0x0, aflags = 0x2, oflags = 0x4, queue = 0xff, 
>   psind = 0x1, segind = 0x3, order = 0xd, pool = 0x0, act_count = 0x0, 
>   valid = 0xff, dirty = 0x0}
> (kgdb) p/x *(struct vm_object *)0xfffff8010c0614b0
> $2 = {lock = {lock_object = {lo_name = 0xffffffff80937e16, 
>       lo_flags = 0x25630000, lo_data = 0x0, lo_witness = 0x0}, rw_lock = 0x1}, 
>   object_list = {tqe_next = 0xfffff8010c0615a0, 
>     tqe_prev = 0xfffff8010c0613e0}, shadow_head = {lh_first = 0x0}, 
>   shadow_list = {le_next = 0x0, le_prev = 0x0}, memq = {
>     tqh_first = 0xfffff80232a1a980, tqh_last = 0xfffff80234685e00}, rtree = {
>     rt_root = 0xfffff80138b253f0}, size = 0x347, generation = 0x1, 
>   ref_count = 0x3, shadow_count = 0x0, memattr = 0x6, type = 0x4, 
>   flags = 0x1016, pg_color = 0x0, paging_in_progress = 0x1, 
>   resident_page_count = 0x347, backing_object = 0x0, 
>   backing_object_offset = 0x0, pager_object_list = {
>     tqe_next = 0xfffff801d68e93c0, tqe_prev = 0xfffff801058c8cc8}, rvq = {
>     lh_first = 0xfffff80230563d00}, handle = 0x0, un_pager = {vnp = {
>       vnp_size = 0x0, writemappings = 0xfffff8010c061568}, devp = {
>       devp_pglist = {tqh_first = 0x0, tqh_last = 0xfffff8010c061568}, 
>       ops = 0xffffffff81367fa0, dev = 0xfffff80153397048}, sgp = {
>       sgp_pglist = {tqh_first = 0x0, tqh_last = 0xfffff8010c061568}}, swp = {
>       swp_tmpfs = 0x0, swp_bcount = 0xc061568}}, cred = 0x0, charge = 0x0, 
>   umtx_data = 0x0}

Do you have kern.ipc.shm_use_phys set to 1 ?

Try the following patch, please.

diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index e8fb5d00408..98bd7bae3db 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
_at__at_ -304,13 +304,48 _at__at_ vm_fault_restore_map_lock(struct faultstate *fs)
 	fs->lookup_still_valid = true;
 }
 
+static void
+vm_fault_populate_check_page(vm_page_t m, vm_object_t object, vm_pindex_t pidx)
+{
+
+	/*
+	 * Check each page to ensure that the driver is
+	 * obeying the interface: the page must be installed
+	 * in the object, fully valid, and exclusively busied.
+	 */
+	MPASS(m != NULL);
+	MPASS(vm_page_xbusied(m));
+	MPASS(m->valid == VM_PAGE_BITS_ALL);
+	MPASS(m->object == object);
+	MPASS(m->pindex == pidx);
+}
+
+static void
+vm_fault_populate_cleanup(vm_object_t object, vm_pindex_t first,
+    vm_pindex_t last)
+{
+	vm_page_t m;
+	vm_pindex_t pidx;
+
+	VM_OBJECT_ASSERT_WLOCKED(object);
+	if (first > last) /* micro-op: avoid page lookup */
+		return;
+	for (pidx = first, m = vm_page_lookup(object, pidx);
+	    pidx <= last; pidx++, m = vm_page_next(m)) {
+		vm_fault_populate_check_page(m, object, pidx);
+		vm_page_lock(m);
+		vm_page_activate(m);
+		vm_page_unlock(m);
+		vm_page_xunbusy(m);
+	}
+}
 
 static int
 vm_fault_populate(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot,
     int fault_type, int fault_flags, boolean_t wired, vm_page_t *m_hold)
 {
 	vm_page_t m;
-	vm_pindex_t f_first, f_last, pidx;
+	vm_pindex_t f_first, f_last, f1_first, f1_last, pidx;
 	int rv;
 
 	MPASS(fs->object == fs->first_object);
_at__at_ -357,28 +392,21 _at__at_ vm_fault_populate(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot,
 	MPASS(f_last < fs->first_object->size);
 
 	vm_fault_restore_map_lock(fs);
-	if (fs->map->timestamp != fs->map_generation)
+	if (fs->map->timestamp != fs->map_generation) {
+		vm_fault_populate_cleanup(fs->first_object, f_first, f_last);
 		return (KERN_RESOURCE_SHORTAGE); /* RetryFault */
+	}
 
 	/* Clip pager response to fit into the vm_map_entry. */
-	f_first = MAX(OFF_TO_IDX(fs->entry->offset), f_first);
-	f_last = MIN(OFF_TO_IDX(fs->entry->end - fs->entry->start +
+	f1_first = MAX(OFF_TO_IDX(fs->entry->offset), f_first);
+	vm_fault_populate_cleanup(fs->first_object, f_first, f1_first - 1);
+	f1_last = MIN(OFF_TO_IDX(fs->entry->end - fs->entry->start +
 	    fs->entry->offset), f_last);
+	vm_fault_populate_cleanup(fs->first_object, f1_last + 1, f_last);
 
-	pidx = f_first;
-	for (m = vm_page_lookup(fs->first_object, pidx); pidx <= f_last;
-	    pidx++, m = vm_page_next(m)) {
-		/*
-		 * Check each page to ensure that the driver is
-		 * obeying the interface: the page must be installed
-		 * in the object, fully valid, and exclusively busied.
-		 */
-		MPASS(m != NULL);
-		MPASS(vm_page_xbusied(m));
-		MPASS(m->valid == VM_PAGE_BITS_ALL);
-		MPASS(m->object == fs->first_object);
-		MPASS(m->pindex == pidx);
-
+	for (pidx = f1_first, m = vm_page_lookup(fs->first_object, pidx);
+	    pidx <= f1_last; pidx++, m = vm_page_next(m)) {
+		vm_fault_populate_check_page(m, fs->first_object, pidx);
 		vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags,
 		    true);
 		VM_OBJECT_WUNLOCK(fs->first_object);
Received on Wed Dec 28 2016 - 21:28:43 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:09 UTC