Re: panic in uma_startup for many-core amd64 system

From: John Baldwin <jhb_at_freebsd.org>
Date: Mon, 1 Nov 2010 15:14:50 -0400
On Monday, November 01, 2010 1:09:22 pm Giovanni Trematerra wrote:
> On Tue, Oct 19, 2010 at 8:55 AM, Andriy Gapon <avg_at_freebsd.org> wrote:
> > on 19/10/2010 00:01 Giovanni Trematerra said the following:
> >>
> >> Your patch seems just a work around about initial slab size where the
> >> keg is backed.
> >
> > Well, setting aside my confusion with the terminology - yes, the patch is 
just
> > that, and precisely because I only tried to solve that particular problem.
> >
> >> Having dynamic slab sizes would allow to have the keg backed on a larger 
slab
> >> without going OFFPAGE.
> >
> > I agree in principle.
> > But without seeing code that implements that I can't guess if it would 
really be
> > more efficient or more maintainable, i.e. more useful in general.
> > Still a very good idea.
> >
> 
> Here the patch that was in my mind.
> The patch doesn't implement dynamic slab size just allow
> to have a multipage slab to back uma_zone objects.
> I'm going to work more on the topic "dynamic slab size" soon.
> I tested the patch on qemu with -smp 32.
> I'm looking for real hw to test the patch on.
> 
> here some interesting output:
> qemulab# vmstat -z | more
> ITEM                   SIZE  LIMIT     USED     FREE      REQ FAIL SLEEP
> 
> UMA Kegs:               208,      0,     149,       4,     149,   0,   0
> UMA Zones:             4480,      0,     149,       0,     149,   0,   0
> UMA Slabs:              568,      0,     836,       4,    1187,   0,   0
> UMA RCntSlabs:          568,      0,     202,       1,     202,   0,   0
> UMA Hash:               256,      0,       2,      13,       3,   0,   0
> 
> qemulab# sysctl kern | grep cpu
> kern.ccpu: 0
>   <cpu count="32" mask="0xffffffff">0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
> 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
> 28, 29, 30, 31</cpu>
> kern.smp.cpus: 32
> kern.smp.maxcpus: 32
> 
> Any feedback will be appreciate.
> 
> --
> Giovanni Trematerra
> 
> 
> ==============================================================
> diff -r 36572cbc6817 sys/vm/uma_core.c
> --- a/sys/vm/uma_core.c	Tue Oct 05 04:49:04 2010 -0400
> +++ b/sys/vm/uma_core.c	Mon Nov 01 11:54:38 2010 -0400
> _at__at_ -930,27 +930,36 _at__at_ startup_alloc(uma_zone_t zone, int bytes
>  {
>  	uma_keg_t keg;
>  	uma_slab_t tmps;
> -
> -	keg = zone_first_keg(zone);
> +	u_int16_t pages;
> +
> +	keg   = zone_first_keg(zone);
> +	pages = bytes / PAGE_SIZE;
> +
> +	/* Account for remainder */
> +	if ((pages * PAGE_SIZE) < bytes)
> +		pages++;
> +	KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));

You can use 'pages = howmany(bytes, PAGE_SIZE)' here.  Also, why did you make 
pages a uint16_t instead of an int?  An int is generally more convenient 
unless you really need a uint16_t (and C99 spells it without an _ after the 
leading 'u'.. FYI).

>  	/*
>  	 * Check our small startup cache to see if it has pages remaining.
>  	 */
>  	mtx_lock(&uma_boot_pages_mtx);
> -	if ((tmps = LIST_FIRST(&uma_boot_pages)) != NULL) {
> -		LIST_REMOVE(tmps, us_link);
> +	do {
> +		if ((tmps = LIST_FIRST(&uma_boot_pages)) != NULL)
> +			LIST_REMOVE(tmps, us_link);
> +	} while (--pages && tmps != NULL);
> +	if (tmps != NULL) {
>  		mtx_unlock(&uma_boot_pages_mtx);
>  		*pflag = tmps->us_flags;
>  		return (tmps->us_data);
> -	}
> +	} else if (booted == 0)
> +		panic("UMA: Increase vm.boot_pages");
>  	mtx_unlock(&uma_boot_pages_mtx);
> -	if (booted == 0)
> -		panic("UMA: Increase vm.boot_pages");

Probably best to make the pages test here explicit.  Also, is there any reason 
you can't do this as:

	while (--pages > 0) {
		tmps = LIST_FIRST(&uma_boot_pages);
		if (tmps != NULL)
			LIST_REMOVE(tmps, us_link);
		else if (booted == 0)
			panic(...);
	}

One question btw, how does this work since if you need to allocate more than 1
page it seems that the 'tmps' values for all but the last are simply ignored
and leaked?

Is there some unwritten assumption that the free pages are all virtually
contiguous (and in order), so you can just pull off a run of X and use
the address from X?

>  	/*
>  	 * Now that we've booted reset these users to their real allocator.
>  	 */
>  #ifdef UMA_MD_SMALL_ALLOC
> -	keg->uk_allocf = uma_small_alloc;
> +	keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
>  #else
>  	keg->uk_allocf = page_alloc;
>  #endif
> _at__at_ -1163,7 +1172,7 _at__at_ keg_small_init(uma_keg_t keg)
>  static void
>  keg_large_init(uma_keg_t keg)
>  {
> -	int pages;
> +	u_int16_t pages;
> 
>  	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
>  	KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
> _at__at_ -1177,12 +1186,15 _at__at_ keg_large_init(uma_keg_t keg)
> 
>  	keg->uk_ppera = pages;
>  	keg->uk_ipers = 1;
> +	keg->uk_rsize = keg->uk_size;
> +
> +	/* We can't do OFFPAGE if we're internal, bail out here. */
> +	if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
> +		return;
> 
>  	keg->uk_flags |= UMA_ZONE_OFFPAGE;
>  	if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
>  		keg->uk_flags |= UMA_ZONE_HASH;
> -
> -	keg->uk_rsize = keg->uk_size;
>  }
> 
>  static void
> _at__at_ -1301,7 +1313,8 _at__at_ keg_ctor(void *mem, int size, void *udat
>  #endif
>  		if (booted == 0)
>  			keg->uk_allocf = startup_alloc;
> -	}
> +	} else if (booted == 0 && (keg->uk_flags & UMA_ZFLAG_INTERNAL))
> +		keg->uk_allocf = startup_alloc;
> 
>  	/*
>  	 * Initialize keg's lock (shared among zones).
> _at__at_ -1330,7 +1343,7 _at__at_ keg_ctor(void *mem, int size, void *udat
>  		if (totsize & UMA_ALIGN_PTR)
>  			totsize = (totsize & ~UMA_ALIGN_PTR) +
>  			    (UMA_ALIGN_PTR + 1);
> -		keg->uk_pgoff = UMA_SLAB_SIZE - totsize;
> +		keg->uk_pgoff = (UMA_SLAB_SIZE * keg->uk_ppera) - totsize;
> 
>  		if (keg->uk_flags & UMA_ZONE_REFCNT)
>  			totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
> _at__at_ -1346,7 +1359,7 _at__at_ keg_ctor(void *mem, int size, void *udat
>  		 * mathematically possible for all cases, so we make
>  		 * sure here anyway.
>  		 */
> -		if (totsize > UMA_SLAB_SIZE) {
> +		if (totsize > UMA_SLAB_SIZE * keg->uk_ppera) {
>  			printf("zone %s ipers %d rsize %d size %d\n",
>  			    zone->uz_name, keg->uk_ipers, keg->uk_rsize,
>  			    keg->uk_size);
> _______________________________________________
> freebsd-current_at_freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-current
> To unsubscribe, send any mail to "freebsd-current-unsubscribe_at_freebsd.org"
> 

-- 
John Baldwin
Received on Mon Nov 01 2010 - 18:16:15 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:08 UTC