Bad link elm in vm_object_terminate [Was: crash on process exit.. current at about r332467]

From: Andriy Gapon <avg_at_FreeBSD.org> Date: Tue, 29 May 2018 16:50:14 +0300 · This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:16 UTC

On 23/04/2018 17:50, Julian Elischer wrote:
> back trace at:  http://www.freebsd.org/~julian/bob-crash.png
> 
> If anyone wants to take a look..
> 
> In the exit syscall, while deallocating a vm object.
> 
> I haven't see references to a similar crash in the last 10 days or so.. But if
> it rings any bells...

We have just got another one:
panic: Bad link elm 0xfffff80cc3938360 prev->next != elm

Matching disassembled code to C code, it seems that the crash is somewhere in
vm_object_terminate_pages (inlined into vm_object_terminate), probably in one of
TAILQ_REMOVE-s there:
		if (p->queue != PQ_NONE) {
			KASSERT(p->queue < PQ_COUNT, ("vm_object_terminate: "
			    "page %p is not queued", p));
			pq1 = vm_page_pagequeue(p);
			if (pq != pq1) {
				if (pq != NULL) {
					vm_pagequeue_cnt_add(pq, dequeued);
					vm_pagequeue_unlock(pq);
				}
				pq = pq1;
				vm_pagequeue_lock(pq);
				dequeued = 0;
			}
			p->queue = PQ_NONE;
			TAILQ_REMOVE(&pq->pq_pl, p, plinks.q);
			dequeued--;
		}
		if (vm_page_free_prep(p, true))
			continue;
unlist:
		TAILQ_REMOVE(&object->memq, p, listq);
	}

Please note that this is the code before r332974 Improve VM page queue scalability.
I am not sure if r332974 + r333256 would fix the problem or if it just would get
moved to a different place.

Does this ring a bell to anyone who tinkered with that part of the VM code recently?

Looking a little bit further, I think that object->memq somehow got corrupted.
memq contains just two elements and the reported element is not there.

(kgdb) p *(struct vm_page *)0xfffff80cc3938360
$22 = {
  plinks = {
    q = {
      tqe_next = 0xfffff80cd7175398,
      tqe_prev = 0xfffff80cb9f69170
    },
    s = {
      ss = {
        sle_next = 0xfffff80cd7175398
      },
      pv = 0xfffff80cb9f69170
    },
    memguard = {
      p = 18446735332764767128,
      v = 18446735332276081008
    }
  },
  listq = {
    tqe_next = 0xfffff80cc3938568,	<=============
    tqe_prev = 0xfffff8078c11b848	<=============
  },
  object = 0x0,
  pindex = 1548,
  phys_addr = 14695911424,
  md = {
    pv_list = {
      tqh_first = 0x0,
      tqh_last = 0xfffff80cc3938398
    },
    pv_gen = 1205766,
    pat_mode = 6
  },
  wire_count = 0,
  busy_lock = 1,
  hold_count = 0,
  flags = 0,
  aflags = 0 '\000',
  oflags = 0 '\000',
  queue = 255 '\377',
  psind = 0 '\000',
  segind = 5 '\005',
  order = 13 '\r',
  pool = 0 '\000',
  act_count = 5 '\005',
  valid = 0 '\000',
  dirty = 0 '\000'
}

(kgdb) p object->memq
$11 = {
  tqh_first = 0xfffff80cb861cfb8,
  tqh_last = 0xfffff80cc3938780
}

(kgdb) p *object->memq.tqh_first
$25 = {
  plinks = {
    q = {
      tqe_next = 0xfffff80cb9f69108,
      tqe_prev = 0xfffff80cd7175398
    },
    s = {
      ss = {
        sle_next = 0xfffff80cb9f69108
      },
      pv = 0xfffff80cd7175398
    },
    memguard = {
      p = 18446735332276080904,
      v = 18446735332764767128
    }
  },
  listq = {
    tqe_next = 0xfffff80cb56eafb0,	<=============
    tqe_prev = 0xfffff8078c11b848	<=============
  },
  object = 0xfffff8078c11b800,
  pindex = 515,
  phys_addr = 7299219456,
  md = {
    pv_list = {
      tqh_first = 0xfffff80b99e4ff88,
      tqh_last = 0xfffff80b99e4ff90
    },
    pv_gen = 466177,
    pat_mode = 6
  },
  wire_count = 0,
  busy_lock = 2,
  hold_count = 0,
  flags = 0,
  aflags = 0 '\000',
  oflags = 0 '\000',
  queue = 255 '\377',
  psind = 0 '\000',
  segind = 5 '\005',
  order = 13 '\r',
  pool = 0 '\000',
  act_count = 5 '\005',
  valid = 255 '\377',
  dirty = 0 '\000'
}
(kgdb) p *object->memq.tqh_first->listq.tqe_next
$26 = {
  plinks = {
    q = {
      tqe_next = 0x0,
      tqe_prev = 0xfffff80cc92e1d18
    },
    s = {
      ss = {
        sle_next = 0x0
      },
      pv = 0xfffff80cc92e1d18
    },
    memguard = {
      p = 0,
      v = 18446735332531379480
    }
  },
  listq = {
    tqe_next = 0x0,			<=============
    tqe_prev = 0xfffff80cb861cfc8	<=============
  },
  object = 0xfffff8078c11b800,
  pindex = 1548,
  phys_addr = 5350158336,
  md = {
    pv_list = {
      tqh_first = 0xfffff80a07222808,
      tqh_last = 0xfffff80a07222810
    },
    pv_gen = 7085,
    pat_mode = 6
  },
  wire_count = 0,
  busy_lock = 1,
  hold_count = 0,
  flags = 0,
  aflags = 1 '\001',
  oflags = 0 '\000',
  queue = 1 '\001',
  psind = 0 '\000',
  segind = 5 '\005',
  order = 13 '\r',
  pool = 0 '\000',
  act_count = 5 '\005',
  valid = 255 '\377',
  dirty = 255 '\377'
}

Pages 0xfffff80cc3938360 (the reported one) and 0xfffff80cb56eafb0 (the last one
on memq) have the same index 1548.  Also, memq.tqh_last points to the reported
page, but it is not reachable via tqe_next pointers.
It's also potentially interesting is that the reported page looks like it's
already freed and the replacement page is both valid and dirty.

The object, just in case:
(kgdb) p *object
$34 = {
  lock = {
    lock_object = {
      lo_name = 0xffffffff81202c27 "vm object",
      lo_flags = 627245056,
      lo_data = 0,
      lo_witness = 0xfffff80cffd6a700
    },
    rw_lock = 18446735286009226592
  },
  object_list = {
    tqe_next = 0xfffff80b2481e200,
    tqe_prev = 0xfffff80b2481e020
  },
  shadow_head = {
    lh_first = 0x0
  },
  shadow_list = {
    le_next = 0xfffff809c070f900,
    le_prev = 0xfffff80869c06c30
  },
  memq = {
    tqh_first = 0xfffff80cb861cfb8,
    tqh_last = 0xfffff80cc3938780
  },
  rtree = {
    rt_root = 18446735279843613792
  },
  size = 1561,
  domain = {
    dr_policy = 0x0,
    dr_iterator = 0
  },
  generation = 1,
  ref_count = 0,
  shadow_count = 0,
  memattr = 6 '\006',
  type = 0 '\000',
  flags = 12296,
  pg_color = 1809,
  paging_in_progress = 0,
  resident_page_count = 5,
  backing_object = 0x0,
  backing_object_offset = 0,
  pager_object_list = {
    tqe_next = 0x0,
    tqe_prev = 0x0
  },
  rvq = {
    lh_first = 0xfffff80cad278b60
  },
  handle = 0x0,
  un_pager = {
    vnp = {
      vnp_size = 19444,
      writemappings = 0
    },
    devp = {
      devp_pglist = {
        tqh_first = 0x4bf4,
        tqh_last = 0x0
      },
      ops = 0x0,
      dev = 0x0
    },
    sgp = {
      sgp_pglist = {
        tqh_first = 0x4bf4,
        tqh_last = 0x0
      }
    },
    swp = {
      swp_tmpfs = 0x4bf4,
      swp_blks = {
        pt_root = 0
      }
    }
  },
  cred = 0xfffff806811adc00,
  charge = 6393856,
  umtx_data = 0x0
}

Interesting that it is on a shadow list.

-- 
Andriy Gapon