Re: [TTM] Unable to allocate page

From: Konstantin Belousov <kostikbel_at_gmail.com> Date: Fri, 14 Feb 2014 22:38:31 +0200 · This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:46 UTC

On Fri, Feb 14, 2014 at 03:55:47PM +0200, Andriy Gapon wrote:
> 
> I am using "radeonkms" on a machine with quite mixed, diverse and varying loads.
> Sometimes I get an X server crash like the following:
> 
> kernel: [TTM] Unable to allocate page
> kernel: error: [drm:pid1815:radeon_gem_object_create] *ERROR* Failed to allocate
> GEM object (25591808, 2, 4096, -12)
> kernel: [TTM] Unable to allocate page
> kernel: [TTM] Buffer eviction failed
> kernel: vm_fault: pager read error, pid 1815 (Xorg)
> kernel: pid 1815 (Xorg), uid 0: exited on signal 11 (core dumped)
> 
> At the same time there was the following in X servers stderr:
> Failed to allocate :
>    size      : 25589760 bytes
>    alignment : 256 bytes
>    domains   : 2
> 
> I wonder if this is a generic problem for example caused by severe resource
> exhaustion or if this is something where FreeBSD specific code does not do its best.
> In particular, it caught my attention that ttm_get_pages() effectively has
> semantics of M_NOWAIT as it never retries allocation failures in
> vm_page_alloc_contig().

It seems you are right that Linux tries much harder to allocate the page
than the current FreeBSD TTM ports does.  Can you try the following patch ?
I did not tested it, only compiled.  Please keep witness enabled.

diff --git a/sys/dev/drm2/ttm/ttm_bo.c b/sys/dev/drm2/ttm/ttm_bo.c
index d87940c..748c969 100644
--- a/sys/dev/drm2/ttm/ttm_bo.c
+++ b/sys/dev/drm2/ttm/ttm_bo.c
_at__at_ -35,6 +35,7 _at__at_ __FBSDID("$FreeBSD$");
 #include <dev/drm2/ttm/ttm_module.h>
 #include <dev/drm2/ttm/ttm_bo_driver.h>
 #include <dev/drm2/ttm/ttm_placement.h>
+#include <vm/vm_pageout.h>

 #define TTM_ASSERT_LOCKED(param)
 #define TTM_DEBUG(fmt, arg...)
_at__at_ -1489,15 +1490,23 _at__at_ int ttm_bo_global_init(struct drm_global_reference *ref)
 		container_of(ref, struct ttm_bo_global_ref, ref);
 	struct ttm_bo_global *glob = ref->object;
 	int ret;
+	int tries;

 	sx_init(&glob->device_list_mutex, "ttmdlm");
 	mtx_init(&glob->lru_lock, "ttmlru", NULL, MTX_DEF);
 	glob->mem_glob = bo_ref->mem_glob;
+	tries = 0;
+retry:
 	glob->dummy_read_page = vm_page_alloc_contig(NULL, 0,
 	    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ,
 	    1, 0, VM_MAX_ADDRESS, PAGE_SIZE, 0, VM_MEMATTR_UNCACHEABLE);

 	if (unlikely(glob->dummy_read_page == NULL)) {
+		if (tries < 1) {
+			vm_pageout_grow_cache(tries, 0, VM_MAX_ADDRESS);
+			tries++;
+			goto retry;
+		}
 		ret = -ENOMEM;
 		goto out_no_drp;
 	}
diff --git a/sys/dev/drm2/ttm/ttm_page_alloc.c b/sys/dev/drm2/ttm/ttm_page_alloc.c
index 3c0f18a..29a3621 100644
--- a/sys/dev/drm2/ttm/ttm_page_alloc.c
+++ b/sys/dev/drm2/ttm/ttm_page_alloc.c
_at__at_ -44,6 +44,7 _at__at_ __FBSDID("$FreeBSD$");
 #include <dev/drm2/drmP.h>
 #include <dev/drm2/ttm/ttm_bo_driver.h>
 #include <dev/drm2/ttm/ttm_page_alloc.h>
+#include <vm/vm_pageout.h>

 #ifdef TTM_HAS_AGP
 #include <asm/agp.h>
_at__at_ -476,6 +477,14 _at__at_ static void ttm_handle_caching_state_failure(struct pglist *pages,
 	}
 }

+static vm_paddr_t
+ttm_alloc_high_bound(int ttm_alloc_flags)
+{
+
+	return ((ttm_alloc_flags & TTM_PAGE_FLAG_DMA32) ? 0xffffffff :
+	    VM_MAX_ADDRESS);
+}
+
 /**
  * Allocate new pages with correct caching.
  *
_at__at_ -491,6 +500,7 _at__at_ static int ttm_alloc_new_pages(struct pglist *pages, int ttm_alloc_flags,
 	unsigned i, cpages, aflags;
 	unsigned max_cpages = min(count,
 			(unsigned)(PAGE_SIZE/sizeof(vm_page_t)));
+	int tries;

 	aflags = VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
 	    ((ttm_alloc_flags & TTM_PAGE_FLAG_ZERO_ALLOC) != 0 ?
_at__at_ -501,11 +511,18 _at__at_ static int ttm_alloc_new_pages(struct pglist *pages, int ttm_alloc_flags,
 	    M_WAITOK | M_ZERO);

 	for (i = 0, cpages = 0; i < count; ++i) {
+		tries = 0;
+retry:
 		p = vm_page_alloc_contig(NULL, 0, aflags, 1, 0,
-		    (ttm_alloc_flags & TTM_PAGE_FLAG_DMA32) ? 0xffffffff :
-		    VM_MAX_ADDRESS, PAGE_SIZE, 0,
-		    ttm_caching_state_to_vm(cstate));
+		    ttm_alloc_high_bound(ttm_alloc_flags),
+		    PAGE_SIZE, 0, ttm_caching_state_to_vm(cstate));
 		if (!p) {
+			if (tries < 3) {
+				vm_pageout_grow_cache(tries, 0,
+				    ttm_alloc_high_bound(ttm_alloc_flags));
+				tries++;
+				goto retry;
+			}
 			printf("[TTM] Unable to get page %u\n", i);

 			/* store already allocated pages in the pool after
_at__at_ -707,6 +724,7 _at__at_ static int ttm_get_pages(vm_page_t *pages, unsigned npages, int flags,
 	int gfp_flags, aflags;
 	unsigned count;
 	int r;
+	int tries;

 	aflags = VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 	    ((flags & TTM_PAGE_FLAG_ZERO_ALLOC) != 0 ? VM_ALLOC_ZERO : 0);
_at__at_ -714,11 +732,18 _at__at_ static int ttm_get_pages(vm_page_t *pages, unsigned npages, int flags,
 	/* No pool for cached pages */
 	if (pool == NULL) {
 		for (r = 0; r < npages; ++r) {
+			tries = 0;
+retry:
 			p = vm_page_alloc_contig(NULL, 0, aflags, 1, 0,
-			    (flags & TTM_PAGE_FLAG_DMA32) ? 0xffffffff :
-			    VM_MAX_ADDRESS, PAGE_SIZE,
+			    ttm_alloc_high_bound(flags), PAGE_SIZE,
 			    0, ttm_caching_state_to_vm(cstate));
 			if (!p) {
+				if (tries < 3) {
+					vm_pageout_grow_cache(tries, 0,
+					    ttm_alloc_high_bound(flags));
+					tries++;
+					goto retry;
+				}
 				printf("[TTM] Unable to allocate page\n");
 				return -ENOMEM;
 			}