Re: [CFT/CFR] machine independent sf_bufs

From: Michael Tuexen <tuexen_at_freebsd.org>
Date: Tue, 29 Jul 2014 13:37:18 +0200
On 29 Jul 2014, at 12:41, Gleb Smirnoff <glebius_at_FreeBSD.org> wrote:

> Hi!
> 
> Sorry for top quoting, this is to annoy you :) I got zero
> replies on the below email during a week. I'd really appreciate
> testing on different platforms. Any takers?
I can try to test it on a raspberry pi, building a patched kernel
right now.

Best regards
Michael
> 
> On Sat, Jul 19, 2014 at 10:27:25AM +0400, Gleb Smirnoff wrote:
> T>   Hi!
> T> 
> T>   we've got a lot of common code in sys/*/*/vm_machdep.c wrt the
> T> sf_buf allocation. I have gathered it into kern/subr_sfbuf.c.
> T> 
> T> o No MD code left in sys/*/*/vm_machdep.c.
> T> o The arches that have physical map have their implementation in
> T>   machine/sf_buf.h
> T> o The arches that needs sf_bufs use subr_sfbuf.c, optionally having
> T>   some stuff in machine/sf_buf.h
> T> 
> T> I can test only i386. I'd be grateful for testing:
> T> 
> T> arm
> T> mips
> T> mips64
> T> sparc64
> T> powerpc
> T> i386 XEN
> T> 
> T> The test is a simple use of any applcation or test that uses sendfile(2).
> T> The box shouldn't crash :) of course, and after end of a test there
> T> should be no evidence of sf_buf leak (observed via netstat -m).
> T> 
> T> -- 
> T> Totus tuus, Glebius.
> 
> T> Index: sys/amd64/include/sf_buf.h
> T> ===================================================================
> T> --- sys/amd64/include/sf_buf.h	(revision 268750)
> T> +++ sys/amd64/include/sf_buf.h	(working copy)
> T> _at__at_ -29,10 +29,6 _at__at_
> T>  #ifndef _MACHINE_SF_BUF_H_
> T>  #define _MACHINE_SF_BUF_H_
> T>  
> T> -#include <vm/vm.h>
> T> -#include <vm/vm_param.h>
> T> -#include <vm/vm_page.h>
> T> -
> T>  /*
> T>   * On this machine, the only purpose for which sf_buf is used is to implement
> T>   * an opaque pointer required by the machine-independent parts of the kernel.
> T> _at__at_ -39,21 +35,7 _at__at_
> T>   * That pointer references the vm_page that is "mapped" by the sf_buf.  The
> T>   * actual mapping is provided by the direct virtual-to-physical mapping.  
> T>   */
> T> -struct sf_buf;
> T> -
> T> -static inline struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int pri)
> T> -{
> T> -
> T> -	return ((struct sf_buf *)m);
> T> -}
> T> -
> T> -static inline void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -}
> T> -
> T> -static __inline vm_offset_t
> T> +static inline vm_offset_t
> T>  sf_buf_kva(struct sf_buf *sf)
> T>  {
> T>  
> T> _at__at_ -60,11 +42,10 _at__at_ sf_buf_kva(struct sf_buf *sf)
> T>  	return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS((vm_page_t)sf)));
> T>  }
> T>  
> T> -static __inline vm_page_t
> T> +static inline vm_page_t
> T>  sf_buf_page(struct sf_buf *sf)
> T>  {
> T>  
> T>  	return ((vm_page_t)sf);
> T>  }
> T> -
> T>  #endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/arm/arm/vm_machdep.c
> T> ===================================================================
> T> --- sys/arm/arm/vm_machdep.c	(revision 268750)
> T> +++ sys/arm/arm/vm_machdep.c	(working copy)
> T> _at__at_ -50,7 +50,6 _at__at_ __FBSDID("$FreeBSD$");
> T>  #include <sys/mbuf.h>
> T>  #include <sys/proc.h>
> T>  #include <sys/socketvar.h>
> T> -#include <sys/sf_buf.h>
> T>  #include <sys/syscall.h>
> T>  #include <sys/sysctl.h>
> T>  #include <sys/sysent.h>
> T> _at__at_ -83,43 +82,7 _at__at_ __FBSDID("$FreeBSD$");
> T>  CTASSERT(sizeof(struct switchframe) == 24);
> T>  CTASSERT(sizeof(struct trapframe) == 80);
> T>  
> T> -#ifndef NSFBUFS
> T> -#define NSFBUFS		(512 + maxusers * 16)
> T> -#endif
> T> -
> T> -static int nsfbufs;
> T> -static int nsfbufspeak;
> T> -static int nsfbufsused;
> T> -
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> -    "Maximum number of sendfile(2) sf_bufs available");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> -    "Number of sendfile(2) sf_bufs at peak usage");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> -    "Number of sendfile(2) sf_bufs in use");
> T> -
> T> -static void     sf_buf_init(void *arg);
> T> -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> -
> T> -LIST_HEAD(sf_head, sf_buf);
> T> -
> T>  /*
> T> - * A hash table of active sendfile(2) buffers
> T> - */
> T> -static struct sf_head *sf_buf_active;
> T> -static u_long sf_buf_hashmask;
> T> -
> T> -#define SF_BUF_HASH(m)  (((m) - vm_page_array) & sf_buf_hashmask)
> T> -
> T> -static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
> T> -static u_int    sf_buf_alloc_want;
> T> -
> T> -/*
> T> - * A lock used to synchronize access to the hash table and free list
> T> - */
> T> -static struct mtx sf_buf_lock;
> T> -
> T> -/*
> T>   * Finish a fork operation, with process p2 nearly set up.
> T>   * Copy and update the pcb, set up the stack so that the child
> T>   * ready to run and return to user mode.
> T> _at__at_ -184,107 +147,7 _at__at_ cpu_thread_swapout(struct thread *td)
> T>  {
> T>  }
> T>  
> T> -/*
> T> - * Detatch mapped page and release resources back to the system.
> T> - */
> T>  void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -
> T> -	 mtx_lock(&sf_buf_lock);
> T> -	 sf->ref_count--;
> T> -	 if (sf->ref_count == 0) {
> T> -		 TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
> T> -		 nsfbufsused--;
> T> -		 pmap_kremove(sf->kva);
> T> -		 sf->m = NULL;
> T> -		 LIST_REMOVE(sf, list_entry);
> T> -		 if (sf_buf_alloc_want > 0)
> T> -			 wakeup(&sf_buf_freelist);
> T> -	 }
> T> -	 mtx_unlock(&sf_buf_lock);
> T> -}
> T> -
> T> -/*
> T> - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
> T> - */
> T> -static void
> T> -sf_buf_init(void *arg)
> T> -{
> T> -	struct sf_buf *sf_bufs;
> T> -	vm_offset_t sf_base;
> T> -	int i;
> T> -
> T> -	nsfbufs = NSFBUFS;
> T> -	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> -		
> T> -	sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
> T> -	TAILQ_INIT(&sf_buf_freelist);
> T> -	sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> -	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> -	    M_NOWAIT | M_ZERO);
> T> -	for (i = 0; i < nsfbufs; i++) {
> T> -		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> -		TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
> T> -	}
> T> -	sf_buf_alloc_want = 0;
> T> -	mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
> T> -}
> T> -
> T> -/*
> T> - * Get an sf_buf from the freelist. Will block if none are available.
> T> - */
> T> -struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int flags)
> T> -{
> T> -	struct sf_head *hash_list;
> T> -	struct sf_buf *sf;
> T> -	int error;
> T> -
> T> -	hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> -	mtx_lock(&sf_buf_lock);
> T> -	LIST_FOREACH(sf, hash_list, list_entry) {
> T> -		if (sf->m == m) {
> T> -			sf->ref_count++;
> T> -			if (sf->ref_count == 1) {
> T> -				TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -				nsfbufsused++;
> T> -				nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -			}
> T> -			goto done;
> T> -		}
> T> -	}
> T> -	while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
> T> -		if (flags & SFB_NOWAIT)
> T> -			goto done;
> T> -		sf_buf_alloc_want++;
> T> -		SFSTAT_INC(sf_allocwait);
> T> -		error = msleep(&sf_buf_freelist, &sf_buf_lock,
> T> -		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> -		sf_buf_alloc_want--;
> T> -	
> T> -
> T> -		/*
> T> -		 * If we got a signal, don't risk going back to sleep.
> T> -		 */
> T> -		if (error)
> T> -			goto done;
> T> -	}
> T> -	TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -	if (sf->m != NULL)
> T> -		LIST_REMOVE(sf, list_entry);
> T> -	LIST_INSERT_HEAD(hash_list, sf, list_entry);
> T> -	sf->ref_count = 1;
> T> -	sf->m = m;
> T> -	nsfbufsused++;
> T> -	nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -	pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m));
> T> -done:
> T> -	mtx_unlock(&sf_buf_lock);
> T> -	return (sf);
> T> -}
> T> -
> T> -void
> T>  cpu_set_syscall_retval(struct thread *td, int error)
> T>  {
> T>  	struct trapframe *frame;
> T> Index: sys/arm/include/sf_buf.h
> T> ===================================================================
> T> --- sys/arm/include/sf_buf.h	(revision 268750)
> T> +++ sys/arm/include/sf_buf.h	(working copy)
> T> _at__at_ -29,33 +29,18 _at__at_
> T>  #ifndef _MACHINE_SF_BUF_H_
> T>  #define _MACHINE_SF_BUF_H_
> T>  
> T> -#include <sys/queue.h>
> T> -
> T> -struct vm_page;
> T> -
> T> -struct sf_buf {
> T> -	LIST_ENTRY(sf_buf) list_entry;	/* list of buffers */
> T> -	TAILQ_ENTRY(sf_buf) free_entry;	/* list of buffers */
> T> -	struct		vm_page *m;	/* currently mapped page */
> T> -	vm_offset_t	kva;		/* va of mapping */
> T> -	int		ref_count;	/* usage of this mapping */
> T> -};
> T> -
> T> -static __inline vm_offset_t
> T> -sf_buf_kva(struct sf_buf *sf)
> T> +static inline void
> T> +sf_buf_map(struct sf_buf *sf, int flags)
> T>  {
> T>  
> T> -	return (sf->kva);
> T> +	pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m));
> T>  }
> T>  
> T> -static __inline struct vm_page *
> T> -sf_buf_page(struct sf_buf *sf)
> T> +static inline int
> T> +sf_buf_unmap(struct sf_buf *sf)
> T>  {
> T>  
> T> -	return (sf->m);
> T> +	pmap_kremove(sf->kva);
> T> +	return (1);
> T>  }
> T> -
> T> -struct sf_buf *	sf_buf_alloc(struct vm_page *m, int flags);
> T> -void sf_buf_free(struct sf_buf *sf);
> T> -
> T>  #endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/arm/include/vmparam.h
> T> ===================================================================
> T> --- sys/arm/include/vmparam.h	(revision 268750)
> T> +++ sys/arm/include/vmparam.h	(working copy)
> T> _at__at_ -170,4 +170,7 _at__at_ extern vm_offset_t vm_max_kernel_address;
> T>  #define	VM_MAX_AUTOTUNE_MAXUSERS	384
> T>  #endif
> T>  
> T> +#define	SFBUF
> T> +#define	SFBUF_MAP
> T> +
> T>  #endif	/* _MACHINE_VMPARAM_H_ */
> T> Index: sys/conf/files.arm
> T> ===================================================================
> T> --- sys/conf/files.arm	(revision 268750)
> T> +++ sys/conf/files.arm	(working copy)
> T> _at__at_ -77,6 +77,7 _at__at_ font.h				optional	sc			\
> T>  	clean	"font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8"
> T>  kern/subr_busdma_bufalloc.c	standard
> T>  kern/subr_dummy_vdso_tc.c	standard
> T> +kern/subr_sfbuf.c		standard
> T>  libkern/arm/aeabi_unwind.c	standard
> T>  libkern/arm/divsi3.S		standard
> T>  libkern/arm/ffs.S		standard
> T> Index: sys/conf/files.i386
> T> ===================================================================
> T> --- sys/conf/files.i386	(revision 268750)
> T> +++ sys/conf/files.i386	(working copy)
> T> _at__at_ -520,6 +520,7 _at__at_ isa/vga_isa.c			optional vga
> T>  kern/kern_clocksource.c		standard
> T>  kern/imgact_aout.c		optional compat_aout
> T>  kern/imgact_gzip.c		optional gzip
> T> +kern/subr_sfbuf.c		standard
> T>  libkern/divdi3.c		standard
> T>  libkern/flsll.c			standard
> T>  libkern/memmove.c		standard
> T> Index: sys/conf/files.mips
> T> ===================================================================
> T> --- sys/conf/files.mips	(revision 268750)
> T> +++ sys/conf/files.mips	(working copy)
> T> _at__at_ -51,6 +51,7 _at__at_ mips/mips/vm_machdep.c			standard
> T>  kern/kern_clocksource.c			standard
> T>  kern/link_elf_obj.c			standard
> T>  kern/subr_dummy_vdso_tc.c		standard
> T> +kern/subr_sfbuf.c			optional	mips | mipsel | mipsn32
> T>  
> T>  # gcc/clang runtime
> T>  libkern/ffsl.c				standard
> T> Index: sys/conf/files.pc98
> T> ===================================================================
> T> --- sys/conf/files.pc98	(revision 268750)
> T> +++ sys/conf/files.pc98	(working copy)
> T> _at__at_ -205,6 +205,7 _at__at_ i386/svr4/svr4_machdep.c	optional compat_svr4
> T>  kern/kern_clocksource.c		standard
> T>  kern/imgact_aout.c		optional compat_aout
> T>  kern/imgact_gzip.c		optional gzip
> T> +kern/subr_sfbuf.c		standard
> T>  libkern/divdi3.c		standard
> T>  libkern/flsll.c			standard
> T>  libkern/memmove.c		standard
> T> Index: sys/conf/files.powerpc
> T> ===================================================================
> T> --- sys/conf/files.powerpc	(revision 268750)
> T> +++ sys/conf/files.powerpc	(working copy)
> T> _at__at_ -71,6 +71,7 _at__at_ dev/vt/hw/ofwfb/ofwfb.c		optional	vt aim
> T>  kern/kern_clocksource.c		standard
> T>  kern/subr_dummy_vdso_tc.c	standard
> T>  kern/syscalls.c			optional	ktr
> T> +kern/subr_sfbuf.c		standard
> T>  libkern/ashldi3.c		optional	powerpc
> T>  libkern/ashrdi3.c		optional	powerpc
> T>  libkern/bcmp.c			standard
> T> Index: sys/conf/files.sparc64
> T> ===================================================================
> T> --- sys/conf/files.sparc64	(revision 268750)
> T> +++ sys/conf/files.sparc64	(working copy)
> T> _at__at_ -63,6 +63,7 _at__at_ dev/uart/uart_kbd_sun.c		optional	uart sc | vt
> T>  kern/kern_clocksource.c		standard
> T>  kern/subr_dummy_vdso_tc.c	standard
> T>  kern/syscalls.c			optional	ktr
> T> +kern/subr_sfbuf.c		standard
> T>  libkern/ffs.c			standard
> T>  libkern/ffsl.c			standard
> T>  libkern/fls.c			standard
> T> Index: sys/i386/i386/vm_machdep.c
> T> ===================================================================
> T> --- sys/i386/i386/vm_machdep.c	(revision 268750)
> T> +++ sys/i386/i386/vm_machdep.c	(working copy)
> T> _at__at_ -118,38 +118,6 _at__at_ static u_int	cpu_reset_proxyid;
> T>  static volatile u_int	cpu_reset_proxy_active;
> T>  #endif
> T>  
> T> -static int nsfbufs;
> T> -static int nsfbufspeak;
> T> -static int nsfbufsused;
> T> -
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> -    "Maximum number of sendfile(2) sf_bufs available");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> -    "Number of sendfile(2) sf_bufs at peak usage");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> -    "Number of sendfile(2) sf_bufs in use");
> T> -
> T> -static void	sf_buf_init(void *arg);
> T> -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> -
> T> -LIST_HEAD(sf_head, sf_buf);
> T> -
> T> -/*
> T> - * A hash table of active sendfile(2) buffers
> T> - */
> T> -static struct sf_head *sf_buf_active;
> T> -static u_long sf_buf_hashmask;
> T> -
> T> -#define	SF_BUF_HASH(m)	(((m) - vm_page_array) & sf_buf_hashmask)
> T> -
> T> -static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
> T> -static u_int	sf_buf_alloc_want;
> T> -
> T> -/*
> T> - * A lock used to synchronize access to the hash table and free list
> T> - */
> T> -static struct mtx sf_buf_lock;
> T> -
> T>  extern int	_ucodesel, _udatasel;
> T>  
> T>  /*
> T> _at__at_ -750,122 +718,13 _at__at_ cpu_reset_real()
> T>  }
> T>  
> T>  /*
> T> - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
> T> - */
> T> -static void
> T> -sf_buf_init(void *arg)
> T> -{
> T> -	struct sf_buf *sf_bufs;
> T> -	vm_offset_t sf_base;
> T> -	int i;
> T> -
> T> -	nsfbufs = NSFBUFS;
> T> -	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> -
> T> -	sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
> T> -	TAILQ_INIT(&sf_buf_freelist);
> T> -	sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> -	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> -	    M_NOWAIT | M_ZERO);
> T> -	for (i = 0; i < nsfbufs; i++) {
> T> -		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> -		TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
> T> -	}
> T> -	sf_buf_alloc_want = 0;
> T> -	mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
> T> -}
> T> -
> T> -/*
> T> - * Invalidate the cache lines that may belong to the page, if
> T> - * (possibly old) mapping of the page by sf buffer exists.  Returns
> T> - * TRUE when mapping was found and cache invalidated.
> T> - */
> T> -boolean_t
> T> -sf_buf_invalidate_cache(vm_page_t m)
> T> -{
> T> -	struct sf_head *hash_list;
> T> -	struct sf_buf *sf;
> T> -	boolean_t ret;
> T> -
> T> -	hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> -	ret = FALSE;
> T> -	mtx_lock(&sf_buf_lock);
> T> -	LIST_FOREACH(sf, hash_list, list_entry) {
> T> -		if (sf->m == m) {
> T> -			/*
> T> -			 * Use pmap_qenter to update the pte for
> T> -			 * existing mapping, in particular, the PAT
> T> -			 * settings are recalculated.
> T> -			 */
> T> -			pmap_qenter(sf->kva, &m, 1);
> T> -			pmap_invalidate_cache_range(sf->kva, sf->kva +
> T> -			    PAGE_SIZE);
> T> -			ret = TRUE;
> T> -			break;
> T> -		}
> T> -	}
> T> -	mtx_unlock(&sf_buf_lock);
> T> -	return (ret);
> T> -}
> T> -
> T> -/*
> T>   * Get an sf_buf from the freelist.  May block if none are available.
> T>   */
> T> -struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int flags)
> T> +void
> T> +sf_buf_map(struct sf_buf *sf, int flags)
> T>  {
> T>  	pt_entry_t opte, *ptep;
> T> -	struct sf_head *hash_list;
> T> -	struct sf_buf *sf;
> T> -#ifdef SMP
> T> -	cpuset_t other_cpus;
> T> -	u_int cpuid;
> T> -#endif
> T> -	int error;
> T>  
> T> -	KASSERT(curthread->td_pinned > 0 || (flags & SFB_CPUPRIVATE) == 0,
> T> -	    ("sf_buf_alloc(SFB_CPUPRIVATE): curthread not pinned"));
> T> -	hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> -	mtx_lock(&sf_buf_lock);
> T> -	LIST_FOREACH(sf, hash_list, list_entry) {
> T> -		if (sf->m == m) {
> T> -			sf->ref_count++;
> T> -			if (sf->ref_count == 1) {
> T> -				TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -				nsfbufsused++;
> T> -				nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -			}
> T> -#ifdef SMP
> T> -			goto shootdown;	
> T> -#else
> T> -			goto done;
> T> -#endif
> T> -		}
> T> -	}
> T> -	while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
> T> -		if (flags & SFB_NOWAIT)
> T> -			goto done;
> T> -		sf_buf_alloc_want++;
> T> -		SFSTAT_INC(sf_allocwait);
> T> -		error = msleep(&sf_buf_freelist, &sf_buf_lock,
> T> -		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> -		sf_buf_alloc_want--;
> T> -
> T> -		/*
> T> -		 * If we got a signal, don't risk going back to sleep. 
> T> -		 */
> T> -		if (error)
> T> -			goto done;
> T> -	}
> T> -	TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -	if (sf->m != NULL)
> T> -		LIST_REMOVE(sf, list_entry);
> T> -	LIST_INSERT_HEAD(hash_list, sf, list_entry);
> T> -	sf->ref_count = 1;
> T> -	sf->m = m;
> T> -	nsfbufsused++;
> T> -	nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -
> T>  	/*
> T>  	 * Update the sf_buf's virtual-to-physical mapping, flushing the
> T>  	 * virtual address from the TLB.  Since the reference count for 
> T> _at__at_ -876,11 +735,11 _at__at_ cpu_reset_real()
> T>  	ptep = vtopte(sf->kva);
> T>  	opte = *ptep;
> T>  #ifdef XEN
> T> -       PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(m)) | pgeflag
> T> -	   | PG_RW | PG_V | pmap_cache_bits(m->md.pat_mode, 0));
> T> +       PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(sf->m)) | pgeflag
> T> +	   | PG_RW | PG_V | pmap_cache_bits(sf->m->md.pat_mode, 0));
> T>  #else
> T> -	*ptep = VM_PAGE_TO_PHYS(m) | pgeflag | PG_RW | PG_V |
> T> -	    pmap_cache_bits(m->md.pat_mode, 0);
> T> +	*ptep = VM_PAGE_TO_PHYS(sf->m) | pgeflag | PG_RW | PG_V |
> T> +	    pmap_cache_bits(sf->m->md.pat_mode, 0);
> T>  #endif
> T>  
> T>  	/*
> T> _at__at_ -892,7 +751,21 _at__at_ cpu_reset_real()
> T>  #ifdef SMP
> T>  	if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
> T>  		CPU_ZERO(&sf->cpumask);
> T> -shootdown:
> T> +
> T> +	sf_buf_shootdown(sf, flags);
> T> +#else
> T> +	if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
> T> +		pmap_invalidate_page(kernel_pmap, sf->kva);
> T> +#endif
> T> +}
> T> +
> T> +#ifdef SMP
> T> +void
> T> +sf_buf_shootdown(struct sf_buf *sf, int flags)
> T> +{
> T> +	cpuset_t other_cpus;
> T> +	u_int cpuid;
> T> +
> T>  	sched_pin();
> T>  	cpuid = PCPU_GET(cpuid);
> T>  	if (!CPU_ISSET(cpuid, &sf->cpumask)) {
> T> _at__at_ -909,42 +782,50 _at__at_ cpu_reset_real()
> T>  		}
> T>  	}
> T>  	sched_unpin();
> T> +}
> T> +#endif
> T> +
> T> +/*
> T> + * MD part of sf_buf_free().
> T> + */
> T> +int
> T> +sf_buf_unmap(struct sf_buf *sf)
> T> +{
> T> +#ifdef XEN
> T> +	/*
> T> +	 * Xen doesn't like having dangling R/W mappings
> T> +	 */
> T> +	pmap_qremove(sf->kva, 1);
> T> +	return (1);
> T>  #else
> T> -	if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
> T> -		pmap_invalidate_page(kernel_pmap, sf->kva);
> T> +	return (0);
> T>  #endif
> T> -done:
> T> -	mtx_unlock(&sf_buf_lock);
> T> -	return (sf);
> T>  }
> T>  
> T> +static void
> T> +sf_buf_invalidate(struct sf_buf *sf)
> T> +{
> T> +	vm_page_t m = sf->m;
> T> +
> T> +	/*
> T> +	 * Use pmap_qenter to update the pte for
> T> +	 * existing mapping, in particular, the PAT
> T> +	 * settings are recalculated.
> T> +	 */
> T> +	pmap_qenter(sf->kva, &m, 1);
> T> +	pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE);
> T> +}
> T> +
> T>  /*
> T> - * Remove a reference from the given sf_buf, adding it to the free
> T> - * list when its reference count reaches zero.  A freed sf_buf still,
> T> - * however, retains its virtual-to-physical mapping until it is
> T> - * recycled or reactivated by sf_buf_alloc(9).
> T> + * Invalidate the cache lines that may belong to the page, if
> T> + * (possibly old) mapping of the page by sf buffer exists.  Returns
> T> + * TRUE when mapping was found and cache invalidated.
> T>   */
> T> -void
> T> -sf_buf_free(struct sf_buf *sf)
> T> +boolean_t
> T> +sf_buf_invalidate_cache(vm_page_t m)
> T>  {
> T>  
> T> -	mtx_lock(&sf_buf_lock);
> T> -	sf->ref_count--;
> T> -	if (sf->ref_count == 0) {
> T> -		TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
> T> -		nsfbufsused--;
> T> -#ifdef XEN
> T> -/*
> T> - * Xen doesn't like having dangling R/W mappings
> T> - */
> T> -		pmap_qremove(sf->kva, 1);
> T> -		sf->m = NULL;
> T> -		LIST_REMOVE(sf, list_entry);
> T> -#endif
> T> -		if (sf_buf_alloc_want > 0)
> T> -			wakeup(&sf_buf_freelist);
> T> -	}
> T> -	mtx_unlock(&sf_buf_lock);
> T> +	return (sf_buf_process_page(m, sf_buf_invalidate));
> T>  }
> T>  
> T>  /*
> T> Index: sys/i386/include/sf_buf.h
> T> ===================================================================
> T> --- sys/i386/include/sf_buf.h	(revision 268750)
> T> +++ sys/i386/include/sf_buf.h	(working copy)
> T> _at__at_ -1,5 +1,5 _at__at_
> T>  /*-
> T> - * Copyright (c) 2003, 2005 Alan L. Cox <alc_at_cs.rice.edu>
> T> + * Copyright (c) 2014 Gleb Smirnoff <glebius_at_FreeBSD.org>
> T>   * All rights reserved.
> T>   *
> T>   * Redistribution and use in source and binary forms, with or without
> T> _at__at_ -29,39 +29,8 _at__at_
> T>  #ifndef _MACHINE_SF_BUF_H_
> T>  #define _MACHINE_SF_BUF_H_
> T>  
> T> -#include <sys/_cpuset.h>
> T> -#include <sys/queue.h>
> T> +void sf_buf_map(struct sf_buf *, int);
> T> +int sf_buf_unmap(struct sf_buf *);
> T> +boolean_t sf_buf_invalidate_cache(vm_page_t);
> T>  
> T> -struct vm_page;
> T> -
> T> -struct sf_buf {
> T> -	LIST_ENTRY(sf_buf) list_entry;	/* list of buffers */
> T> -	TAILQ_ENTRY(sf_buf) free_entry;	/* list of buffers */
> T> -	struct		vm_page *m;	/* currently mapped page */
> T> -	vm_offset_t	kva;		/* va of mapping */
> T> -	int		ref_count;	/* usage of this mapping */
> T> -#ifdef SMP
> T> -	cpuset_t	cpumask;	/* cpus on which mapping is valid */
> T> -#endif
> T> -};
> T> -
> T> -struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
> T> -void sf_buf_free(struct sf_buf *sf);
> T> -
> T> -static __inline vm_offset_t
> T> -sf_buf_kva(struct sf_buf *sf)
> T> -{
> T> -
> T> -	return (sf->kva);
> T> -}
> T> -
> T> -static __inline struct vm_page *
> T> -sf_buf_page(struct sf_buf *sf)
> T> -{
> T> -
> T> -	return (sf->m);
> T> -}
> T> -
> T> -boolean_t sf_buf_invalidate_cache(vm_page_t m);
> T> -
> T>  #endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/i386/include/vmparam.h
> T> ===================================================================
> T> --- sys/i386/include/vmparam.h	(revision 268750)
> T> +++ sys/i386/include/vmparam.h	(working copy)
> T> _at__at_ -198,4 +198,9 _at__at_
> T>  #define VM_MAX_AUTOTUNE_MAXUSERS 384
> T>  #endif
> T>  
> T> +#define	SFBUF
> T> +#define	SFBUF_MAP
> T> +#define	SFBUF_CPUSET
> T> +#define	SFBUF_PROCESS_PAGE
> T> +
> T>  #endif /* _MACHINE_VMPARAM_H_ */
> T> Index: sys/kern/subr_sfbuf.c
> T> ===================================================================
> T> --- sys/kern/subr_sfbuf.c	(revision 0)
> T> +++ sys/kern/subr_sfbuf.c	(working copy)
> T> _at__at_ -0,0 +1,226 _at__at_
> T> +/*-
> T> + * Copyright (c) 2014 Gleb Smirnoff <glebius_at_FreeBSD.org>
> T> + * Copyright (c) 2003, 2005 Alan L. Cox <alc_at_cs.rice.edu>
> T> + * All rights reserved.
> T> + *
> T> + * Redistribution and use in source and binary forms, with or without
> T> + * modification, are permitted provided that the following conditions
> T> + * are met:
> T> + * 1. Redistributions of source code must retain the above copyright
> T> + *    notice, this list of conditions and the following disclaimer.
> T> + * 2. Redistributions in binary form must reproduce the above copyright
> T> + *    notice, this list of conditions and the following disclaimer in the
> T> + *    documentation and/or other materials provided with the distribution.
> T> + *
> T> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> T> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> T> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> T> + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> T> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> T> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> T> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> T> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> T> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> T> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> T> + * SUCH DAMAGE.
> T> + */
> T> +
> T> +#include <sys/cdefs.h>
> T> +__FBSDID("$FreeBSD$");
> T> +
> T> +#include <sys/param.h>
> T> +#include <sys/kernel.h>
> T> +#include <sys/lock.h>
> T> +#include <sys/malloc.h>
> T> +#include <sys/mutex.h>
> T> +#include <sys/sf_buf.h>
> T> +#include <sys/smp.h>
> T> +#include <sys/sysctl.h>
> T> +
> T> +#include <vm/vm.h>
> T> +#include <vm/vm_extern.h>
> T> +#include <vm/vm_page.h>
> T> +
> T> +#ifndef NSFBUFS
> T> +#define	NSFBUFS		(512 + maxusers * 16)
> T> +#endif
> T> +
> T> +static int nsfbufs;
> T> +static int nsfbufspeak;
> T> +static int nsfbufsused;
> T> +
> T> +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> +    "Maximum number of sendfile(2) sf_bufs available");
> T> +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> +    "Number of sendfile(2) sf_bufs at peak usage");
> T> +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> +    "Number of sendfile(2) sf_bufs in use");
> T> +
> T> +static void	sf_buf_init(void *arg);
> T> +SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> +
> T> +LIST_HEAD(sf_head, sf_buf);
> T> +
> T> +/*
> T> + * A hash table of active sendfile(2) buffers
> T> + */
> T> +static struct sf_head *sf_buf_active;
> T> +static u_long sf_buf_hashmask;
> T> +
> T> +#define	SF_BUF_HASH(m)	(((m) - vm_page_array) & sf_buf_hashmask)
> T> +
> T> +static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
> T> +static u_int	sf_buf_alloc_want;
> T> +
> T> +/*
> T> + * A lock used to synchronize access to the hash table and free list
> T> + */
> T> +static struct mtx sf_buf_lock;
> T> +
> T> +/*
> T> + * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
> T> + */
> T> +static void
> T> +sf_buf_init(void *arg)
> T> +{
> T> +	struct sf_buf *sf_bufs;
> T> +	vm_offset_t sf_base;
> T> +	int i;
> T> +
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +	if (SFBUF_OPTIONAL_DIRECT_MAP)
> T> +		return;
> T> +#endif
> T> +
> T> +	nsfbufs = NSFBUFS;
> T> +	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> +
> T> +	sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
> T> +	TAILQ_INIT(&sf_buf_freelist);
> T> +	sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> +	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> +	    M_NOWAIT | M_ZERO);
> T> +	KASSERT(sf_bufs, ("%s: malloc failure", __func__));
> T> +	for (i = 0; i < nsfbufs; i++) {
> T> +		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> +		TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
> T> +	}
> T> +	sf_buf_alloc_want = 0;
> T> +	mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
> T> +}
> T> +
> T> +/*
> T> + * Get an sf_buf from the freelist.  May block if none are available.
> T> + */
> T> +struct sf_buf *
> T> +sf_buf_alloc(struct vm_page *m, int flags)
> T> +{
> T> +	struct sf_head *hash_list;
> T> +	struct sf_buf *sf;
> T> +	int error;
> T> +
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +	if (SFBUF_OPTIONAL_DIRECT_MAP)
> T> +		return ((struct sf_buf *)m);
> T> +#endif
> T> +
> T> +	KASSERT(curthread->td_pinned > 0 || (flags & SFB_CPUPRIVATE) == 0,
> T> +	    ("sf_buf_alloc(SFB_CPUPRIVATE): curthread not pinned"));
> T> +	hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> +	mtx_lock(&sf_buf_lock);
> T> +	LIST_FOREACH(sf, hash_list, list_entry) {
> T> +		if (sf->m == m) {
> T> +			sf->ref_count++;
> T> +			if (sf->ref_count == 1) {
> T> +				TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> +				nsfbufsused++;
> T> +				nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> +			}
> T> +#if defined(SMP) && defined(SFBUF_CPUSET)
> T> +			sf_buf_shootdown(sf, flags);
> T> +#endif
> T> +			goto done;
> T> +		}
> T> +	}
> T> +	while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
> T> +		if (flags & SFB_NOWAIT)
> T> +			goto done;
> T> +		sf_buf_alloc_want++;
> T> +		SFSTAT_INC(sf_allocwait);
> T> +		error = msleep(&sf_buf_freelist, &sf_buf_lock,
> T> +		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> +		sf_buf_alloc_want--;
> T> +
> T> +		/*
> T> +		 * If we got a signal, don't risk going back to sleep. 
> T> +		 */
> T> +		if (error)
> T> +			goto done;
> T> +	}
> T> +	TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> +	if (sf->m != NULL)
> T> +		LIST_REMOVE(sf, list_entry);
> T> +	LIST_INSERT_HEAD(hash_list, sf, list_entry);
> T> +	sf->ref_count = 1;
> T> +	sf->m = m;
> T> +	nsfbufsused++;
> T> +	nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> +	sf_buf_map(sf, flags);
> T> +done:
> T> +	mtx_unlock(&sf_buf_lock);
> T> +	return (sf);
> T> +}
> T> +
> T> +/*
> T> + * Remove a reference from the given sf_buf, adding it to the free
> T> + * list when its reference count reaches zero.  A freed sf_buf still,
> T> + * however, retains its virtual-to-physical mapping until it is
> T> + * recycled or reactivated by sf_buf_alloc(9).
> T> + */
> T> +void
> T> +sf_buf_free(struct sf_buf *sf)
> T> +{
> T> +
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +	if (SFBUF_OPTIONAL_DIRECT_MAP)
> T> +		return;
> T> +#endif
> T> +
> T> +	mtx_lock(&sf_buf_lock);
> T> +	sf->ref_count--;
> T> +	if (sf->ref_count == 0) {
> T> +		TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
> T> +		nsfbufsused--;
> T> +		if (sf_buf_unmap(sf)) {
> T> +			sf->m = NULL;
> T> +			LIST_REMOVE(sf, list_entry);
> T> +		}
> T> +		if (sf_buf_alloc_want > 0)
> T> +			wakeup(&sf_buf_freelist);
> T> +	}
> T> +	mtx_unlock(&sf_buf_lock);
> T> +}
> T> +
> T> +#ifdef SFBUF_PROCESS_PAGE
> T> +/*
> T> + * Run callback function on sf_buf that holds a certain page.
> T> + */
> T> +boolean_t
> T> +sf_buf_process_page(vm_page_t m, void (*cb)(struct sf_buf *))
> T> +{
> T> +	struct sf_head *hash_list;
> T> +	struct sf_buf *sf;
> T> +
> T> +	hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> +	mtx_lock(&sf_buf_lock);
> T> +	LIST_FOREACH(sf, hash_list, list_entry) {
> T> +		if (sf->m == m) {
> T> +			cb(sf);
> T> +			mtx_unlock(&sf_buf_lock);
> T> +			return (TRUE);
> T> +		}
> T> +	}
> T> +	mtx_unlock(&sf_buf_lock);
> T> +	return (FALSE);
> T> +}
> T> +#endif	/* SFBUF_PROCESS_PAGE */
> T> 
> T> Property changes on: sys/kern/subr_sfbuf.c
> T> ___________________________________________________________________
> T> Added: svn:mime-type
> T> ## -0,0 +1 ##
> T> +text/plain
> T> \ No newline at end of property
> T> Added: svn:keywords
> T> ## -0,0 +1 ##
> T> +FreeBSD=%H
> T> \ No newline at end of property
> T> Added: svn:eol-style
> T> ## -0,0 +1 ##
> T> +native
> T> \ No newline at end of property
> T> Index: sys/mips/include/sf_buf.h
> T> ===================================================================
> T> --- sys/mips/include/sf_buf.h	(revision 268750)
> T> +++ sys/mips/include/sf_buf.h	(working copy)
> T> _at__at_ -29,31 +29,9 _at__at_
> T>  #ifndef _MACHINE_SF_BUF_H_
> T>  #define _MACHINE_SF_BUF_H_
> T>  
> T> -#ifdef __mips_n64
> T> -#include <vm/vm.h>
> T> -#include <vm/vm_param.h>
> T> -#include <vm/vm_page.h>
> T> -#else
> T> -#include <sys/queue.h>
> T> -#endif
> T> +#ifdef __mips_n64	/* In 64 bit the whole memory is directly mapped */
> T>  
> T> -#ifdef __mips_n64
> T> -/* In 64 bit the whole memory is directly mapped */
> T> -struct	sf_buf;
> T> -
> T> -static inline struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int pri)
> T> -{
> T> -
> T> -	return ((struct sf_buf *)m);
> T> -}
> T> -
> T> -static inline void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -}
> T> -
> T> -static __inline vm_offset_t
> T> +static inline vm_offset_t
> T>  sf_buf_kva(struct sf_buf *sf)
> T>  {
> T>  	vm_page_t	m;
> T> _at__at_ -62,7 +40,7 _at__at_ sf_buf_kva(struct sf_buf *sf)
> T>  	return (MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)));
> T>  }
> T>  
> T> -static __inline struct vm_page *
> T> +static inline struct vm_page *
> T>  sf_buf_page(struct sf_buf *sf)
> T>  {
> T>  
> T> _at__at_ -69,31 +47,5 _at__at_ sf_buf_page(struct sf_buf *sf)
> T>  	return ((vm_page_t)sf);
> T>  }
> T>  
> T> -#else /* ! __mips_n64 */
> T> -struct vm_page;
> T> -
> T> -struct sf_buf {
> T> -	SLIST_ENTRY(sf_buf) free_list;	/* list of free buffer slots */
> T> -	struct		vm_page *m;	/* currently mapped page */
> T> -	vm_offset_t	kva;		/* va of mapping */
> T> -};
> T> -
> T> -struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
> T> -void sf_buf_free(struct sf_buf *sf);
> T> -
> T> -static __inline vm_offset_t
> T> -sf_buf_kva(struct sf_buf *sf)
> T> -{
> T> -
> T> -	return (sf->kva);
> T> -}
> T> -
> T> -static __inline struct vm_page *
> T> -sf_buf_page(struct sf_buf *sf)
> T> -{
> T> -
> T> -	return (sf->m);
> T> -}
> T>  #endif /* __mips_n64 */
> T> -
> T>  #endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/mips/include/vmparam.h
> T> ===================================================================
> T> --- sys/mips/include/vmparam.h	(revision 268750)
> T> +++ sys/mips/include/vmparam.h	(working copy)
> T> _at__at_ -187,4 +187,8 _at__at_
> T>  
> T>  #define	ZERO_REGION_SIZE	(64 * 1024)	/* 64KB */
> T>  
> T> +#ifndef __mips_n64
> T> +#define	SFBUF
> T> +#endif
> T> +
> T>  #endif /* !_MACHINE_VMPARAM_H_ */
> T> Index: sys/mips/mips/vm_machdep.c
> T> ===================================================================
> T> --- sys/mips/mips/vm_machdep.c	(revision 268750)
> T> +++ sys/mips/mips/vm_machdep.c	(working copy)
> T> _at__at_ -76,9 +76,6 _at__at_ __FBSDID("$FreeBSD$");
> T>  
> T>  #include <sys/user.h>
> T>  #include <sys/mbuf.h>
> T> -#ifndef __mips_n64
> T> -#include <sys/sf_buf.h>
> T> -#endif
> T>  
> T>  /* Duplicated from asm.h */
> T>  #if defined(__mips_o32)
> T> _at__at_ -92,39 +89,7 _at__at_ __FBSDID("$FreeBSD$");
> T>  #define	CALLFRAME_SIZ	(SZREG * 4)
> T>  #endif
> T>  
> T> -#ifndef __mips_n64
> T> -
> T> -#ifndef NSFBUFS
> T> -#define	NSFBUFS		(512 + maxusers * 16)
> T> -#endif
> T> -
> T> -static int nsfbufs;
> T> -static int nsfbufspeak;
> T> -static int nsfbufsused;
> T> -
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> -    "Maximum number of sendfile(2) sf_bufs available");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> -    "Number of sendfile(2) sf_bufs at peak usage");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> -    "Number of sendfile(2) sf_bufs in use");
> T> -
> T> -static void	sf_buf_init(void *arg);
> T> -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> -
> T>  /*
> T> - * Expanded sf_freelist head.  Really an SLIST_HEAD() in disguise, with the
> T> - * sf_freelist head with the sf_lock mutex.
> T> - */
> T> -static struct {
> T> -	SLIST_HEAD(, sf_buf) sf_head;
> T> -	struct mtx sf_lock;
> T> -} sf_freelist;
> T> -
> T> -static u_int	sf_buf_alloc_want;
> T> -#endif /* !__mips_n64 */
> T> -
> T> -/*
> T>   * Finish a fork operation, with process p2 nearly set up.
> T>   * Copy and update the pcb, set up the stack so that the child
> T>   * ready to run and return to user mode.
> T> _at__at_ -513,84 +478,6 _at__at_ cpu_set_upcall_kse(struct thread *td, void (*entry
> T>  #define	ZIDLE_HI(v)	((v) * 4 / 5)
> T>  
> T>  /*
> T> - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
> T> - */
> T> -#ifndef __mips_n64
> T> -static void
> T> -sf_buf_init(void *arg)
> T> -{
> T> -	struct sf_buf *sf_bufs;
> T> -	vm_offset_t sf_base;
> T> -	int i;
> T> -
> T> -	nsfbufs = NSFBUFS;
> T> -	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> -
> T> -	mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
> T> -	SLIST_INIT(&sf_freelist.sf_head);
> T> -	sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> -	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> -	    M_NOWAIT | M_ZERO);
> T> -	for (i = 0; i < nsfbufs; i++) {
> T> -		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> -		SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
> T> -	}
> T> -	sf_buf_alloc_want = 0;
> T> -}
> T> -
> T> -/*
> T> - * Get an sf_buf from the freelist.  Will block if none are available.
> T> - */
> T> -struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int flags)
> T> -{
> T> -	struct sf_buf *sf;
> T> -	int error;
> T> -
> T> -	mtx_lock(&sf_freelist.sf_lock);
> T> -	while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
> T> -		if (flags & SFB_NOWAIT)
> T> -			break;
> T> -		sf_buf_alloc_want++;
> T> -		SFSTAT_INC(sf_allocwait);
> T> -		error = msleep(&sf_freelist, &sf_freelist.sf_lock,
> T> -		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> -		sf_buf_alloc_want--;
> T> -
> T> -		/*
> T> -		 * If we got a signal, don't risk going back to sleep.
> T> -		 */
> T> -		if (error)
> T> -			break;
> T> -	}
> T> -	if (sf != NULL) {
> T> -		SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
> T> -		sf->m = m;
> T> -		nsfbufsused++;
> T> -		nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -		pmap_qenter(sf->kva, &sf->m, 1);
> T> -	}
> T> -	mtx_unlock(&sf_freelist.sf_lock);
> T> -	return (sf);
> T> -}
> T> -
> T> -/*
> T> - * Release resources back to the system.
> T> - */
> T> -void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -	pmap_qremove(sf->kva, 1);
> T> -	mtx_lock(&sf_freelist.sf_lock);
> T> -	SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
> T> -	nsfbufsused--;
> T> -	if (sf_buf_alloc_want > 0)
> T> -		wakeup(&sf_freelist);
> T> -	mtx_unlock(&sf_freelist.sf_lock);
> T> -}
> T> -#endif	/* !__mips_n64 */
> T> -
> T> -/*
> T>   * Software interrupt handler for queued VM system processing.
> T>   */
> T>  void
> T> Index: sys/powerpc/include/sf_buf.h
> T> ===================================================================
> T> --- sys/powerpc/include/sf_buf.h	(revision 268750)
> T> +++ sys/powerpc/include/sf_buf.h	(working copy)
> T> _at__at_ -1,80 +0,0 _at__at_
> T> -/*-
> T> - * Copyright (c) 2003 Alan L. Cox <alc_at_cs.rice.edu>
> T> - * All rights reserved.
> T> - *
> T> - * Redistribution and use in source and binary forms, with or without
> T> - * modification, are permitted provided that the following conditions
> T> - * are met:
> T> - * 1. Redistributions of source code must retain the above copyright
> T> - *    notice, this list of conditions and the following disclaimer.
> T> - * 2. Redistributions in binary form must reproduce the above copyright
> T> - *    notice, this list of conditions and the following disclaimer in the
> T> - *    documentation and/or other materials provided with the distribution.
> T> - *
> T> - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> T> - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> T> - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> T> - * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> T> - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> T> - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> T> - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> T> - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> T> - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> T> - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> T> - * SUCH DAMAGE.
> T> - *
> T> - * $FreeBSD$
> T> - */
> T> -
> T> -#ifndef _MACHINE_SF_BUF_H_
> T> -#define _MACHINE_SF_BUF_H_
> T> -
> T> -#include <vm/vm.h>
> T> -#include <vm/vm_param.h>
> T> -#include <vm/vm_page.h>
> T> -#include <machine/md_var.h>
> T> -#include <sys/queue.h>
> T> -
> T> -struct vm_page;
> T> -
> T> -struct sf_buf {
> T> -	LIST_ENTRY(sf_buf) list_entry;	/* list of buffers */
> T> -	TAILQ_ENTRY(sf_buf) free_entry;	/* list of buffers */
> T> -	struct		vm_page *m;	/* currently mapped page */
> T> -	vm_offset_t	kva;		/* va of mapping */
> T> -	int		ref_count;	/* usage of this mapping */
> T> -};
> T> -
> T> -struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
> T> -void sf_buf_free(struct sf_buf *sf);
> T> -
> T> -/*
> T> - * On 32-bit OEA, the only purpose for which sf_buf is used is to implement
> T> - * an opaque pointer required by the machine-independent parts of the kernel.
> T> - * That pointer references the vm_page that is "mapped" by the sf_buf.  The
> T> - * actual mapping is provided by the direct virtual-to-physical mapping.  
> T> - *
> T> - * On OEA64 and Book-E, we need to do something a little more complicated. Use
> T> - * the runtime-detected hw_direct_map to pick between the two cases. Our
> T> - * friends in vm_machdep.c will do the same to ensure nothing gets confused.
> T> - */
> T> -
> T> -static __inline vm_offset_t
> T> -sf_buf_kva(struct sf_buf *sf)
> T> -{
> T> -	if (hw_direct_map)
> T> -		return (VM_PAGE_TO_PHYS((vm_page_t)sf));
> T> -
> T> -	return (sf->kva);
> T> -}
> T> -
> T> -static __inline struct vm_page *
> T> -sf_buf_page(struct sf_buf *sf)
> T> -{
> T> -	if (hw_direct_map)
> T> -		return ((vm_page_t)sf);
> T> -
> T> -	return (sf->m);
> T> -}
> T> -
> T> -#endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/powerpc/include/vmparam.h
> T> ===================================================================
> T> --- sys/powerpc/include/vmparam.h	(revision 268750)
> T> +++ sys/powerpc/include/vmparam.h	(working copy)
> T> _at__at_ -197,4 +197,18 _at__at_ struct pmap_physseg {
> T>  
> T>  #define	ZERO_REGION_SIZE	(64 * 1024)	/* 64KB */
> T>  
> T> +/*
> T> + * On 32-bit OEA, the only purpose for which sf_buf is used is to implement
> T> + * an opaque pointer required by the machine-independent parts of the kernel.
> T> + * That pointer references the vm_page that is "mapped" by the sf_buf.  The
> T> + * actual mapping is provided by the direct virtual-to-physical mapping.
> T> + *
> T> + * On OEA64 and Book-E, we need to do something a little more complicated. Use
> T> + * the runtime-detected hw_direct_map to pick between the two cases. Our
> T> + * friends in vm_machdep.c will do the same to ensure nothing gets confused.
> T> + */
> T> +#define	SFBUF
> T> +#define	SFBUF_NOMD
> T> +#define	SFBUF_OPTIONAL_DIRECT_MAP	hw_direct_map
> T> + 
> T>  #endif /* _MACHINE_VMPARAM_H_ */
> T> Index: sys/powerpc/powerpc/vm_machdep.c
> T> ===================================================================
> T> --- sys/powerpc/powerpc/vm_machdep.c	(revision 268750)
> T> +++ sys/powerpc/powerpc/vm_machdep.c	(working copy)
> T> _at__at_ -80,7 +80,6 _at__at_
> T>  #include <sys/vmmeter.h>
> T>  #include <sys/kernel.h>
> T>  #include <sys/mbuf.h>
> T> -#include <sys/sf_buf.h>
> T>  #include <sys/sysctl.h>
> T>  #include <sys/sysent.h>
> T>  #include <sys/unistd.h>
> T> _at__at_ -100,47 +99,6 _at__at_
> T>  #include <vm/vm_map.h>
> T>  #include <vm/vm_extern.h>
> T>  
> T> -/*
> T> - * On systems without a direct mapped region (e.g. PPC64),
> T> - * we use the same code as the Book E implementation. Since
> T> - * we need to have runtime detection of this, define some machinery
> T> - * for sf_bufs in this case, and ignore it on systems with direct maps.
> T> - */
> T> -
> T> -#ifndef NSFBUFS
> T> -#define NSFBUFS		(512 + maxusers * 16)
> T> -#endif
> T> -
> T> -static int nsfbufs;
> T> -static int nsfbufspeak;
> T> -static int nsfbufsused;
> T> -
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> -    "Maximum number of sendfile(2) sf_bufs available");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> -    "Number of sendfile(2) sf_bufs at peak usage");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> -    "Number of sendfile(2) sf_bufs in use");
> T> -
> T> -static void sf_buf_init(void *arg);
> T> -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> - 
> T> -LIST_HEAD(sf_head, sf_buf);
> T> - 
> T> -/* A hash table of active sendfile(2) buffers */
> T> -static struct sf_head *sf_buf_active;
> T> -static u_long sf_buf_hashmask;
> T> -
> T> -#define SF_BUF_HASH(m)  (((m) - vm_page_array) & sf_buf_hashmask)
> T> -
> T> -static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
> T> -static u_int sf_buf_alloc_want;
> T> -
> T> -/*
> T> - * A lock used to synchronize access to the hash table and free list
> T> - */
> T> -static struct mtx sf_buf_lock;
> T> -
> T>  #ifdef __powerpc64__
> T>  extern uintptr_t tocbase;
> T>  #endif
> T> _at__at_ -245,124 +203,6 _at__at_ cpu_exit(struct thread *td)
> T>  }
> T>  
> T>  /*
> T> - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
> T> - */
> T> -static void
> T> -sf_buf_init(void *arg)
> T> -{
> T> -	struct sf_buf *sf_bufs;
> T> -	vm_offset_t sf_base;
> T> -	int i;
> T> -
> T> -	/* Don't bother on systems with a direct map */
> T> -	if (hw_direct_map)
> T> -		return;
> T> -
> T> -	nsfbufs = NSFBUFS;
> T> -	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> -
> T> -	sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
> T> -	TAILQ_INIT(&sf_buf_freelist);
> T> -	sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> -	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> -	    M_NOWAIT | M_ZERO);
> T> -
> T> -	for (i = 0; i < nsfbufs; i++) {
> T> -		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> -		TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
> T> -	}
> T> -	sf_buf_alloc_want = 0;
> T> -	mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
> T> -}
> T> -
> T> -/*
> T> - * Get an sf_buf from the freelist. Will block if none are available.
> T> - */
> T> -struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int flags)
> T> -{
> T> -	struct sf_head *hash_list;
> T> -	struct sf_buf *sf;
> T> -	int error;
> T> -
> T> -	if (hw_direct_map) {
> T> -		/* Shortcut the direct mapped case */
> T> -		return ((struct sf_buf *)m);
> T> -	}
> T> -
> T> -	hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> -	mtx_lock(&sf_buf_lock);
> T> -	LIST_FOREACH(sf, hash_list, list_entry) {
> T> -		if (sf->m == m) {
> T> -			sf->ref_count++;
> T> -			if (sf->ref_count == 1) {
> T> -				TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -				nsfbufsused++;
> T> -				nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -			}
> T> -			goto done;
> T> -		}
> T> -	}
> T> -
> T> -	while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
> T> -		if (flags & SFB_NOWAIT)
> T> -			goto done;
> T> -
> T> -		sf_buf_alloc_want++;
> T> -		SFSTAT_INC(sf_allocwait);
> T> -		error = msleep(&sf_buf_freelist, &sf_buf_lock,
> T> -		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> -		sf_buf_alloc_want--;
> T> -
> T> -		/*
> T> -		 * If we got a signal, don't risk going back to sleep.
> T> -		 */
> T> -		if (error)
> T> -			goto done;
> T> -	}
> T> -
> T> -	TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -	if (sf->m != NULL)
> T> -		LIST_REMOVE(sf, list_entry);
> T> -
> T> -	LIST_INSERT_HEAD(hash_list, sf, list_entry);
> T> -	sf->ref_count = 1;
> T> -	sf->m = m;
> T> -	nsfbufsused++;
> T> -	nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -	pmap_qenter(sf->kva, &sf->m, 1);
> T> -done:
> T> -	mtx_unlock(&sf_buf_lock);
> T> -	return (sf);
> T> -}
> T> -
> T> -/*
> T> - * Detach mapped page and release resources back to the system.
> T> - *
> T> - * Remove a reference from the given sf_buf, adding it to the free
> T> - * list when its reference count reaches zero. A freed sf_buf still,
> T> - * however, retains its virtual-to-physical mapping until it is
> T> - * recycled or reactivated by sf_buf_alloc(9).
> T> - */
> T> -void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -	if (hw_direct_map)
> T> -		return;
> T> -
> T> -	mtx_lock(&sf_buf_lock);
> T> -	sf->ref_count--;
> T> -	if (sf->ref_count == 0) {
> T> -		TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
> T> -		nsfbufsused--;
> T> -
> T> -		if (sf_buf_alloc_want > 0)
> T> -			wakeup(&sf_buf_freelist);
> T> -	}
> T> -	mtx_unlock(&sf_buf_lock);
> T> -}
> T> -
> T> -/*
> T>   * Software interrupt handler for queued VM system processing.
> T>   */
> T>  void
> T> Index: sys/sparc64/include/sf_buf.h
> T> ===================================================================
> T> --- sys/sparc64/include/sf_buf.h	(revision 268750)
> T> +++ sys/sparc64/include/sf_buf.h	(working copy)
> T> _at__at_ -1,59 +0,0 _at__at_
> T> -/*-
> T> - * Copyright (c) 2003 Alan L. Cox <alc_at_cs.rice.edu>
> T> - * All rights reserved.
> T> - *
> T> - * Redistribution and use in source and binary forms, with or without
> T> - * modification, are permitted provided that the following conditions
> T> - * are met:
> T> - * 1. Redistributions of source code must retain the above copyright
> T> - *    notice, this list of conditions and the following disclaimer.
> T> - * 2. Redistributions in binary form must reproduce the above copyright
> T> - *    notice, this list of conditions and the following disclaimer in the
> T> - *    documentation and/or other materials provided with the distribution.
> T> - *
> T> - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> T> - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> T> - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> T> - * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> T> - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> T> - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> T> - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> T> - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> T> - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> T> - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> T> - * SUCH DAMAGE.
> T> - *
> T> - * $FreeBSD$
> T> - */
> T> -
> T> -#ifndef _MACHINE_SF_BUF_H_
> T> -#define _MACHINE_SF_BUF_H_
> T> -
> T> -#include <sys/queue.h>
> T> -
> T> -struct vm_page;
> T> -
> T> -struct sf_buf {
> T> -	SLIST_ENTRY(sf_buf) free_list;	/* list of free buffer slots */
> T> -	struct		vm_page *m;	/* currently mapped page */
> T> -	vm_offset_t	kva;		/* va of mapping */
> T> -};
> T> -
> T> -struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
> T> -void sf_buf_free(struct sf_buf *sf);
> T> -
> T> -static __inline vm_offset_t
> T> -sf_buf_kva(struct sf_buf *sf)
> T> -{
> T> -
> T> -	return (sf->kva);
> T> -}
> T> -
> T> -static __inline struct vm_page *
> T> -sf_buf_page(struct sf_buf *sf)
> T> -{
> T> -
> T> -	return (sf->m);
> T> -}
> T> -
> T> -#endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/sparc64/include/vmparam.h
> T> ===================================================================
> T> --- sys/sparc64/include/vmparam.h	(revision 268750)
> T> +++ sys/sparc64/include/vmparam.h	(working copy)
> T> _at__at_ -239,4 +239,7 _at__at_ extern vm_offset_t vm_max_kernel_address;
> T>   */
> T>  #define	ZERO_REGION_SIZE	PAGE_SIZE
> T>  
> T> +#define	SFBUF
> T> +#define	SFBUF_NOMD
> T> +
> T>  #endif /* !_MACHINE_VMPARAM_H_ */
> T> Index: sys/sparc64/sparc64/vm_machdep.c
> T> ===================================================================
> T> --- sys/sparc64/sparc64/vm_machdep.c	(revision 268750)
> T> +++ sys/sparc64/sparc64/vm_machdep.c	(working copy)
> T> _at__at_ -53,7 +53,6 _at__at_ __FBSDID("$FreeBSD$");
> T>  #include <sys/mutex.h>
> T>  #include <sys/proc.h>
> T>  #include <sys/sysent.h>
> T> -#include <sys/sf_buf.h>
> T>  #include <sys/sched.h>
> T>  #include <sys/sysctl.h>
> T>  #include <sys/unistd.h>
> T> _at__at_ -84,35 +83,6 _at__at_ __FBSDID("$FreeBSD$");
> T>  #include <machine/tlb.h>
> T>  #include <machine/tstate.h>
> T>  
> T> -#ifndef NSFBUFS
> T> -#define	NSFBUFS		(512 + maxusers * 16)
> T> -#endif
> T> -
> T> -static int nsfbufs;
> T> -static int nsfbufspeak;
> T> -static int nsfbufsused;
> T> -
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> -    "Maximum number of sendfile(2) sf_bufs available");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> -    "Number of sendfile(2) sf_bufs at peak usage");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> -    "Number of sendfile(2) sf_bufs in use");
> T> -
> T> -static void	sf_buf_init(void *arg);
> T> -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> -
> T> -/*
> T> - * Expanded sf_freelist head.  Really an SLIST_HEAD() in disguise, with the
> T> - * sf_freelist head with the sf_lock mutex.
> T> - */
> T> -static struct {
> T> -	SLIST_HEAD(, sf_buf) sf_head;
> T> -	struct mtx sf_lock;
> T> -} sf_freelist;
> T> -
> T> -static u_int	sf_buf_alloc_want;
> T> -
> T>  PMAP_STATS_VAR(uma_nsmall_alloc);
> T>  PMAP_STATS_VAR(uma_nsmall_alloc_oc);
> T>  PMAP_STATS_VAR(uma_nsmall_free);
> T> _at__at_ -417,84 +387,7 _at__at_ is_physical_memory(vm_paddr_t addr)
> T>  	return (0);
> T>  }
> T>  
> T> -/*
> T> - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
> T> - */
> T> -static void
> T> -sf_buf_init(void *arg)
> T> -{
> T> -	struct sf_buf *sf_bufs;
> T> -	vm_offset_t sf_base;
> T> -	int i;
> T> -
> T> -	nsfbufs = NSFBUFS;
> T> -	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> -
> T> -	mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
> T> -	SLIST_INIT(&sf_freelist.sf_head);
> T> -	sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> -	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> -	    M_NOWAIT | M_ZERO);
> T> -	for (i = 0; i < nsfbufs; i++) {
> T> -		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> -		SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
> T> -	}
> T> -	sf_buf_alloc_want = 0;
> T> -}
> T> -
> T> -/*
> T> - * Get an sf_buf from the freelist.  Will block if none are available.
> T> - */
> T> -struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int flags)
> T> -{
> T> -	struct sf_buf *sf;
> T> -	int error;
> T> -
> T> -	mtx_lock(&sf_freelist.sf_lock);
> T> -	while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
> T> -		if (flags & SFB_NOWAIT)
> T> -			break;
> T> -		sf_buf_alloc_want++;
> T> -		SFSTAT_INC(sf_allocwait);
> T> -		error = msleep(&sf_freelist, &sf_freelist.sf_lock,
> T> -		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> -		sf_buf_alloc_want--;
> T> -
> T> -		/*
> T> -		 * If we got a signal, don't risk going back to sleep.
> T> -		 */
> T> -		if (error)
> T> -			break;
> T> -	}
> T> -	if (sf != NULL) {
> T> -		SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
> T> -		sf->m = m;
> T> -		nsfbufsused++;
> T> -		nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -		pmap_qenter(sf->kva, &sf->m, 1);
> T> -	}
> T> -	mtx_unlock(&sf_freelist.sf_lock);
> T> -	return (sf);
> T> -}
> T> -
> T> -/*
> T> - * Release resources back to the system.
> T> - */
> T>  void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -
> T> -	pmap_qremove(sf->kva, 1);
> T> -	mtx_lock(&sf_freelist.sf_lock);
> T> -	SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
> T> -	nsfbufsused--;
> T> -	if (sf_buf_alloc_want > 0)
> T> -		wakeup(&sf_freelist);
> T> -	mtx_unlock(&sf_freelist.sf_lock);
> T> -}
> T> -
> T> -void
> T>  swi_vm(void *v)
> T>  {
> T>  
> T> Index: sys/sys/sf_buf.h
> T> ===================================================================
> T> --- sys/sys/sf_buf.h	(revision 268750)
> T> +++ sys/sys/sf_buf.h	(working copy)
> T> _at__at_ -29,6 +29,114 _at__at_
> T>  #ifndef _SYS_SF_BUF_H_
> T>  #define _SYS_SF_BUF_H_
> T>  
> T> +struct sfstat {				/* sendfile statistics */
> T> +	uint64_t	sf_iocnt;	/* times sendfile had to do disk I/O */
> T> +	uint64_t	sf_allocfail;	/* times sfbuf allocation failed */
> T> +	uint64_t	sf_allocwait;	/* times sfbuf allocation had to wait */
> T> +};
> T> +
> T> +#ifdef _KERNEL
> T> +#include <sys/types.h>
> T> +#include <sys/systm.h>
> T> +#include <sys/counter.h>
> T> +#include <vm/vm.h>
> T> +#include <vm/vm_param.h>
> T> +#include <vm/vm_page.h>
> T> +
> T> +#ifdef SFBUF
> T> +#if defined(SMP) && defined(SFBUF_CPUSET)
> T> +#include <sys/_cpuset.h>
> T> +#endif
> T> +#include <sys/queue.h>
> T> +
> T> +struct sf_buf {
> T> +	LIST_ENTRY(sf_buf)	list_entry;	/* list of buffers */
> T> +	TAILQ_ENTRY(sf_buf)	free_entry;	/* list of buffers */
> T> +	vm_page_t		m;		/* currently mapped page */
> T> +	vm_offset_t		kva;		/* va of mapping */
> T> +	int			ref_count;	/* usage of this mapping */
> T> +#if defined(SMP) && defined(SFBUF_CPUSET)
> T> +	cpuset_t		cpumask;	/* where mapping is valid */
> T> +#endif
> T> +};
> T> +#else /* ! SFBUF */
> T> +struct sf_buf;
> T> +#endif /* SFBUF */
> T> +
> T> +#ifndef SFBUF_NOMD
> T> +#include <machine/sf_buf.h>
> T> +#endif
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +#include <machine/md_var.h>
> T> +#endif
> T> +
> T> +#ifdef SFBUF
> T> +struct sf_buf *sf_buf_alloc(struct vm_page *, int);
> T> +void sf_buf_free(struct sf_buf *);
> T> +
> T> +static inline vm_offset_t
> T> +sf_buf_kva(struct sf_buf *sf)
> T> +{
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +	if (SFBUF_OPTIONAL_DIRECT_MAP)
> T> +		return (VM_PAGE_TO_PHYS((vm_page_t)sf));
> T> +#endif
> T> +
> T> +        return (sf->kva);
> T> +}
> T> +
> T> +static inline vm_page_t
> T> +sf_buf_page(struct sf_buf *sf)
> T> +{
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +	if (SFBUF_OPTIONAL_DIRECT_MAP)
> T> +		return ((vm_page_t)sf);
> T> +#endif
> T> +
> T> +        return (sf->m);
> T> +}
> T> +
> T> +#ifndef SFBUF_MAP
> T> +#include <vm/pmap.h>
> T> +
> T> +static inline void
> T> +sf_buf_map(struct sf_buf *sf, int flags)
> T> +{
> T> +
> T> +	pmap_qenter(sf->kva, &sf->m, 1);
> T> +}
> T> +
> T> +static inline int
> T> +sf_buf_unmap(struct sf_buf *sf)
> T> +{
> T> +
> T> +	return (0);
> T> +}
> T> +#endif /* SFBUF_MAP */
> T> +
> T> +#if defined(SMP) && defined(SFBUF_CPUSET)
> T> +void sf_buf_shootdown(struct sf_buf *, int);
> T> +#endif
> T> +
> T> +#ifdef SFBUF_PROCESS_PAGE
> T> +boolean_t sf_buf_process_page(vm_page_t, void (*)(struct sf_buf *));
> T> +#endif
> T> +
> T> +#else /* ! SFBUF */
> T> +
> T> +static inline struct sf_buf *
> T> +sf_buf_alloc(struct vm_page *m, int pri)
> T> +{
> T> +
> T> +	return ((struct sf_buf *)m);
> T> +}
> T> +
> T> +static inline void
> T> +sf_buf_free(struct sf_buf *sf)
> T> +{
> T> +}
> T> +#endif /* SFBUF */
> T> +
> T>  /*
> T>   * Options to sf_buf_alloc() are specified through its flags argument.  This
> T>   * argument's value should be the result of a bitwise or'ing of one or more
> T> _at__at_ -40,19 +148,6 _at__at_
> T>  #define	SFB_DEFAULT	0
> T>  #define	SFB_NOWAIT	4		/* Return NULL if all bufs are used. */
> T>  
> T> -struct vm_page;
> T> -
> T> -struct sfstat {				/* sendfile statistics */
> T> -	uint64_t	sf_iocnt;	/* times sendfile had to do disk I/O */
> T> -	uint64_t	sf_allocfail;	/* times sfbuf allocation failed */
> T> -	uint64_t	sf_allocwait;	/* times sfbuf allocation had to wait */
> T> -};
> T> -
> T> -#ifdef _KERNEL
> T> -#include <machine/sf_buf.h>
> T> -#include <sys/systm.h>
> T> -#include <sys/counter.h>
> T> -
> T>  extern counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
> T>  #define	SFSTAT_ADD(name, val)	\
> T>      counter_u64_add(sfstat[offsetof(struct sfstat, name) / sizeof(uint64_t)],\
> 
> T> _______________________________________________
> T> freebsd-current_at_freebsd.org mailing list
> T> http://lists.freebsd.org/mailman/listinfo/freebsd-current
> T> To unsubscribe, send any mail to "freebsd-current-unsubscribe_at_freebsd.org"
> 
> 
> -- 
> Totus tuus, Glebius.
> _______________________________________________
> freebsd-current_at_freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-current
> To unsubscribe, send any mail to "freebsd-current-unsubscribe_at_freebsd.org"
> 
Received on Tue Jul 29 2014 - 09:37:26 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:51 UTC