Adding support for WC (write-combining) memory to bus_dma

From: John Baldwin <jhb_at_freebsd.org>
Date: Thu, 12 Jul 2012 10:40:27 -0400
I have a need to allocate static DMA memory via bus_dmamem_alloc() that is 
also WC (for a PCI-e device so it can use "nosnoop" transactions).  This is 
similar to what the nvidia driver needs, but in my case it is much cleaner to 
allocate the memory via bus dma since the existing code I am extending all 
uses busdma.

I have a patch to implement this on 8.x for amd64 that I can port to HEAD if 
folks don't object.  What I would really like to do is add a new paramter to 
bus_dmamem_alloc() to specify the memory attribute to use, but I am hesitant 
to break that API.  Instead, I added a new flag similar to the existing 
BUS_DMA_NOCACHE used to allocate UC memory.

While doing this, I ran into an old bug, which is that if you were to call 
bus_dmamem_alloc() with BUS_DMA_NOCACHE but a tag that otherwise fell through 
to using malloc() instead of contigmalloc(), bus_dmamem_alloc() would actually
change the state of the entire page.  This seems wrong.  Instead, I think that 
any request for a non-default memory attribute should always use 
contigmalloc().  In fact, even better is to call kmem_alloc_contig() directly
rather than using contigmalloc().  However, if you change this, then 
bus_dmamem_free() won't always DTRT as it doesn't have enough state to know if
a small allocation should be free'd via free() or contigfree() (the latter 
would be required if it used a non-default memory attribute).  The fix I used 
for this was to create a new dummy dmamap that is returned by bus_dmamem_alloc 
if it uses contigmalloc().  bus_dmamem_free() then checks the passed in map 
pointer to decide which type of free to perform.  Once this is fixed, the 
actual WC support is rather trivial as it merely consists of passing a 
different argument to kmem_alloc_contig().

Oh, and using kmem_alloc_contig() instead of the pmap_change_attr() hack is
required if you want to be able to export the same pages to userland via
mmap (e.g. using an OBJT_SG object). :)

Peter, this is somewhat orthognal (but related) to your bus_dma patch which is
what prompted me to post this.

Patch for 8 is below.  Porting it to HEAD should be fairly trivial and direct.

Index: amd64/amd64/busdma_machdep.c
===================================================================
--- amd64/amd64/busdma_machdep.c	(revision 225604)
+++ amd64/amd64/busdma_machdep.c	(working copy)
_at__at_ -42,6 +42,8 _at__at_
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 
_at__at_ -125,7 +127,7 _at__at_
 
 static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
 static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
-static struct bus_dmamap nobounce_dmamap;
+static struct bus_dmamap nobounce_dmamap, contig_dmamap;
 
 static void init_bounce_pages(void *dummy);
 static int alloc_bounce_zone(bus_dma_tag_t dmat);
_at__at_ -455,7 +457,7 _at__at_
 int
 bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
-	if (map != NULL && map != &nobounce_dmamap) {
+	if (map != NULL && map != &nobounce_dmamap && map != &contig_dmamap) {
 		if (STAILQ_FIRST(&map->bpages) != NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, EBUSY);
_at__at_ -480,6 +482,7 _at__at_
 bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
 		 bus_dmamap_t *mapp)
 {
+	vm_memattr_t attr;
 	int mflags;
 
 	if (flags & BUS_DMA_NOWAIT)
_at__at_ -502,6 +505,12 _at__at_
 	}
 	if (flags & BUS_DMA_ZERO)
 		mflags |= M_ZERO;
+	if (flags & BUS_DMA_WRITE_COMBINING)
+		attr = VM_MEMATTR_WRITE_COMBINING;
+	else if (flags & BUS_DMA_NOCACHE)
+		attr = VM_MEMATTR_UNCACHEABLE;
+	else
+		attr = VM_MEMATTR_DEFAULT;
 
 	/* 
 	 * XXX:
_at__at_ -513,7 +522,8 _at__at_
 	 */
 	if ((dmat->maxsize <= PAGE_SIZE) &&
 	   (dmat->alignment < dmat->maxsize) &&
-	    dmat->lowaddr >= ptoa((vm_paddr_t)Maxmem)) {
+	    dmat->lowaddr >= ptoa((vm_paddr_t)Maxmem) &&
+	    attr == VM_MEMATTR_DEFAULT) {
 		*vaddr = malloc(dmat->maxsize, M_DEVBUF, mflags);
 	} else {
 		/*
_at__at_ -522,9 +532,10 _at__at_
 		 *     multi-seg allocations yet though.
 		 * XXX Certain AGP hardware does.
 		 */
-		*vaddr = contigmalloc(dmat->maxsize, M_DEVBUF, mflags,
-		    0ul, dmat->lowaddr, dmat->alignment? dmat->alignment : 1ul,
-		    dmat->boundary);
+		*vaddr = (void *)kmem_alloc_contig(kernel_map, dmat->maxsize,
+		    mflags, 0ul, dmat->lowaddr, dmat->alignment ?
+		    dmat->alignment : 1ul, dmat->boundary, attr);
+		*mapp = &contig_dmamap;
 	}
 	if (*vaddr == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
_at__at_ -533,9 +544,6 _at__at_
 	} else if ((uintptr_t)*vaddr & (dmat->alignment - 1)) {
 		printf("bus_dmamem_alloc failed to align memory properly.\n");
 	}
-	if (flags & BUS_DMA_NOCACHE)
-		pmap_change_attr((vm_offset_t)*vaddr, dmat->maxsize,
-		    PAT_UNCACHEABLE);
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->flags, 0);
 	return (0);
_at__at_ -550,18 +558,15 _at__at_
 {
 	/*
 	 * dmamem does not need to be bounced, so the map should be
-	 * NULL
+	 * NULL if malloc() was used and contig_dmamap if
+	 * contigmalloc() was used.
 	 */
-	if (map != NULL)
+	if (!(map == NULL || map == &contig_dmamap))
 		panic("bus_dmamem_free: Invalid map freed\n");
-	pmap_change_attr((vm_offset_t)vaddr, dmat->maxsize, PAT_WRITE_BACK);
-	if ((dmat->maxsize <= PAGE_SIZE) &&
-	   (dmat->alignment < dmat->maxsize) &&
-	    dmat->lowaddr >= ptoa((vm_paddr_t)Maxmem))
+	if (map == NULL)
 		free(vaddr, M_DEVBUF);
-	else {
-		contigfree(vaddr, dmat->maxsize, M_DEVBUF);
-	}
+	else
+		kmem_free(kernel_map, (vm_offset_t)vaddr, dmat->maxsize);
 	CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags);
 }
 
_at__at_ -588,7 +593,7 _at__at_
 	bus_addr_t paddr;
 	int seg;
 
-	if (map == NULL)
+	if (map == NULL || map == &contig_dmamap)
 		map = &nobounce_dmamap;
 
 	if ((map != &nobounce_dmamap && map->pagesneeded == 0) 
_at__at_ -1119,7 +1124,7 _at__at_
 	struct bounce_page *bpage;
 
 	KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag"));
-	KASSERT(map != NULL && map != &nobounce_dmamap,
+	KASSERT(map != NULL && map != &nobounce_dmamap && map != &contig_dmamap,
 	    ("add_bounce_page: bad map %p", map));
 
 	bz = dmat->bounce_zone;
Index: sys/bus_dma.h
===================================================================
--- sys/bus_dma.h	(revision 225604)
+++ sys/bus_dma.h	(working copy)
_at__at_ -101,6 +101,7 _at__at_
  */
 #define	BUS_DMA_NOWRITE		0x100
 #define	BUS_DMA_NOCACHE		0x200
+#define	BUS_DMA_WRITE_COMBINING	0x400
 
 /*
  * The following flag is a DMA tag hint that the page offset of the
 
-- 
John Baldwin
Received on Thu Jul 12 2012 - 12:40:28 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:28 UTC