Re: [CFT/CFR] machine independent sf_bufs

From: Michael Tuexen <tuexen_at_freebsd.org>
Date: Tue, 29 Jul 2014 19:29:43 +0200
On 29 Jul 2014, at 12:41, Gleb Smirnoff <glebius_at_FreeBSD.org> wrote:

>  Hi!
> 
>  Sorry for top quoting, this is to annoy you :) I got zero
> replies on the below email during a week. I'd really appreciate
> testing on different platforms. Any takers?
OK, it works on an Raspberry pi running r269231 with your patch.
The only suspicious thing I observed was that the number of
'requests for I/O initiated by sendfile' in netstat -m doesn't
always increase. I would expect that. However, I'm not sure if
this is ARM related (I would not think so) or is related to your
patch at all.

Let me know if you need more information.

Best regards
Michael
> 
> On Sat, Jul 19, 2014 at 10:27:25AM +0400, Gleb Smirnoff wrote:
> T>   Hi!
> T> 
> T>   we've got a lot of common code in sys/*/*/vm_machdep.c wrt the
> T> sf_buf allocation. I have gathered it into kern/subr_sfbuf.c.
> T> 
> T> o No MD code left in sys/*/*/vm_machdep.c.
> T> o The arches that have physical map have their implementation in
> T>   machine/sf_buf.h
> T> o The arches that needs sf_bufs use subr_sfbuf.c, optionally having
> T>   some stuff in machine/sf_buf.h
> T> 
> T> I can test only i386. I'd be grateful for testing:
> T> 
> T> arm
> T> mips
> T> mips64
> T> sparc64
> T> powerpc
> T> i386 XEN
> T> 
> T> The test is a simple use of any applcation or test that uses sendfile(2).
> T> The box shouldn't crash :) of course, and after end of a test there
> T> should be no evidence of sf_buf leak (observed via netstat -m).
> T> 
> T> -- 
> T> Totus tuus, Glebius.
> 
> T> Index: sys/amd64/include/sf_buf.h
> T> ===================================================================
> T> --- sys/amd64/include/sf_buf.h	(revision 268750)
> T> +++ sys/amd64/include/sf_buf.h	(working copy)
> T> _at__at_ -29,10 +29,6 _at__at_
> T>  #ifndef _MACHINE_SF_BUF_H_
> T>  #define _MACHINE_SF_BUF_H_
> T>  
> T> -#include <vm/vm.h>
> T> -#include <vm/vm_param.h>
> T> -#include <vm/vm_page.h>
> T> -
> T>  /*
> T>   * On this machine, the only purpose for which sf_buf is used is to implement
> T>   * an opaque pointer required by the machine-independent parts of the kernel.
> T> _at__at_ -39,21 +35,7 _at__at_
> T>   * That pointer references the vm_page that is "mapped" by the sf_buf.  The
> T>   * actual mapping is provided by the direct virtual-to-physical mapping.  
> T>   */
> T> -struct sf_buf;
> T> -
> T> -static inline struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int pri)
> T> -{
> T> -
> T> -	return ((struct sf_buf *)m);
> T> -}
> T> -
> T> -static inline void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -}
> T> -
> T> -static __inline vm_offset_t
> T> +static inline vm_offset_t
> T>  sf_buf_kva(struct sf_buf *sf)
> T>  {
> T>  
> T> _at__at_ -60,11 +42,10 _at__at_ sf_buf_kva(struct sf_buf *sf)
> T>  	return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS((vm_page_t)sf)));
> T>  }
> T>  
> T> -static __inline vm_page_t
> T> +static inline vm_page_t
> T>  sf_buf_page(struct sf_buf *sf)
> T>  {
> T>  
> T>  	return ((vm_page_t)sf);
> T>  }
> T> -
> T>  #endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/arm/arm/vm_machdep.c
> T> ===================================================================
> T> --- sys/arm/arm/vm_machdep.c	(revision 268750)
> T> +++ sys/arm/arm/vm_machdep.c	(working copy)
> T> _at__at_ -50,7 +50,6 _at__at_ __FBSDID("$FreeBSD$");
> T>  #include <sys/mbuf.h>
> T>  #include <sys/proc.h>
> T>  #include <sys/socketvar.h>
> T> -#include <sys/sf_buf.h>
> T>  #include <sys/syscall.h>
> T>  #include <sys/sysctl.h>
> T>  #include <sys/sysent.h>
> T> _at__at_ -83,43 +82,7 _at__at_ __FBSDID("$FreeBSD$");
> T>  CTASSERT(sizeof(struct switchframe) == 24);
> T>  CTASSERT(sizeof(struct trapframe) == 80);
> T>  
> T> -#ifndef NSFBUFS
> T> -#define NSFBUFS		(512 + maxusers * 16)
> T> -#endif
> T> -
> T> -static int nsfbufs;
> T> -static int nsfbufspeak;
> T> -static int nsfbufsused;
> T> -
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> -    "Maximum number of sendfile(2) sf_bufs available");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> -    "Number of sendfile(2) sf_bufs at peak usage");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> -    "Number of sendfile(2) sf_bufs in use");
> T> -
> T> -static void     sf_buf_init(void *arg);
> T> -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> -
> T> -LIST_HEAD(sf_head, sf_buf);
> T> -
> T>  /*
> T> - * A hash table of active sendfile(2) buffers
> T> - */
> T> -static struct sf_head *sf_buf_active;
> T> -static u_long sf_buf_hashmask;
> T> -
> T> -#define SF_BUF_HASH(m)  (((m) - vm_page_array) & sf_buf_hashmask)
> T> -
> T> -static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
> T> -static u_int    sf_buf_alloc_want;
> T> -
> T> -/*
> T> - * A lock used to synchronize access to the hash table and free list
> T> - */
> T> -static struct mtx sf_buf_lock;
> T> -
> T> -/*
> T>   * Finish a fork operation, with process p2 nearly set up.
> T>   * Copy and update the pcb, set up the stack so that the child
> T>   * ready to run and return to user mode.
> T> _at__at_ -184,107 +147,7 _at__at_ cpu_thread_swapout(struct thread *td)
> T>  {
> T>  }
> T>  
> T> -/*
> T> - * Detatch mapped page and release resources back to the system.
> T> - */
> T>  void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -
> T> -	 mtx_lock(&sf_buf_lock);
> T> -	 sf->ref_count--;
> T> -	 if (sf->ref_count == 0) {
> T> -		 TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
> T> -		 nsfbufsused--;
> T> -		 pmap_kremove(sf->kva);
> T> -		 sf->m = NULL;
> T> -		 LIST_REMOVE(sf, list_entry);
> T> -		 if (sf_buf_alloc_want > 0)
> T> -			 wakeup(&sf_buf_freelist);
> T> -	 }
> T> -	 mtx_unlock(&sf_buf_lock);
> T> -}
> T> -
> T> -/*
> T> - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
> T> - */
> T> -static void
> T> -sf_buf_init(void *arg)
> T> -{
> T> -	struct sf_buf *sf_bufs;
> T> -	vm_offset_t sf_base;
> T> -	int i;
> T> -
> T> -	nsfbufs = NSFBUFS;
> T> -	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> -		
> T> -	sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
> T> -	TAILQ_INIT(&sf_buf_freelist);
> T> -	sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> -	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> -	    M_NOWAIT | M_ZERO);
> T> -	for (i = 0; i < nsfbufs; i++) {
> T> -		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> -		TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
> T> -	}
> T> -	sf_buf_alloc_want = 0;
> T> -	mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
> T> -}
> T> -
> T> -/*
> T> - * Get an sf_buf from the freelist. Will block if none are available.
> T> - */
> T> -struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int flags)
> T> -{
> T> -	struct sf_head *hash_list;
> T> -	struct sf_buf *sf;
> T> -	int error;
> T> -
> T> -	hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> -	mtx_lock(&sf_buf_lock);
> T> -	LIST_FOREACH(sf, hash_list, list_entry) {
> T> -		if (sf->m == m) {
> T> -			sf->ref_count++;
> T> -			if (sf->ref_count == 1) {
> T> -				TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -				nsfbufsused++;
> T> -				nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -			}
> T> -			goto done;
> T> -		}
> T> -	}
> T> -	while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
> T> -		if (flags & SFB_NOWAIT)
> T> -			goto done;
> T> -		sf_buf_alloc_want++;
> T> -		SFSTAT_INC(sf_allocwait);
> T> -		error = msleep(&sf_buf_freelist, &sf_buf_lock,
> T> -		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> -		sf_buf_alloc_want--;
> T> -	
> T> -
> T> -		/*
> T> -		 * If we got a signal, don't risk going back to sleep.
> T> -		 */
> T> -		if (error)
> T> -			goto done;
> T> -	}
> T> -	TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -	if (sf->m != NULL)
> T> -		LIST_REMOVE(sf, list_entry);
> T> -	LIST_INSERT_HEAD(hash_list, sf, list_entry);
> T> -	sf->ref_count = 1;
> T> -	sf->m = m;
> T> -	nsfbufsused++;
> T> -	nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -	pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m));
> T> -done:
> T> -	mtx_unlock(&sf_buf_lock);
> T> -	return (sf);
> T> -}
> T> -
> T> -void
> T>  cpu_set_syscall_retval(struct thread *td, int error)
> T>  {
> T>  	struct trapframe *frame;
> T> Index: sys/arm/include/sf_buf.h
> T> ===================================================================
> T> --- sys/arm/include/sf_buf.h	(revision 268750)
> T> +++ sys/arm/include/sf_buf.h	(working copy)
> T> _at__at_ -29,33 +29,18 _at__at_
> T>  #ifndef _MACHINE_SF_BUF_H_
> T>  #define _MACHINE_SF_BUF_H_
> T>  
> T> -#include <sys/queue.h>
> T> -
> T> -struct vm_page;
> T> -
> T> -struct sf_buf {
> T> -	LIST_ENTRY(sf_buf) list_entry;	/* list of buffers */
> T> -	TAILQ_ENTRY(sf_buf) free_entry;	/* list of buffers */
> T> -	struct		vm_page *m;	/* currently mapped page */
> T> -	vm_offset_t	kva;		/* va of mapping */
> T> -	int		ref_count;	/* usage of this mapping */
> T> -};
> T> -
> T> -static __inline vm_offset_t
> T> -sf_buf_kva(struct sf_buf *sf)
> T> +static inline void
> T> +sf_buf_map(struct sf_buf *sf, int flags)
> T>  {
> T>  
> T> -	return (sf->kva);
> T> +	pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m));
> T>  }
> T>  
> T> -static __inline struct vm_page *
> T> -sf_buf_page(struct sf_buf *sf)
> T> +static inline int
> T> +sf_buf_unmap(struct sf_buf *sf)
> T>  {
> T>  
> T> -	return (sf->m);
> T> +	pmap_kremove(sf->kva);
> T> +	return (1);
> T>  }
> T> -
> T> -struct sf_buf *	sf_buf_alloc(struct vm_page *m, int flags);
> T> -void sf_buf_free(struct sf_buf *sf);
> T> -
> T>  #endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/arm/include/vmparam.h
> T> ===================================================================
> T> --- sys/arm/include/vmparam.h	(revision 268750)
> T> +++ sys/arm/include/vmparam.h	(working copy)
> T> _at__at_ -170,4 +170,7 _at__at_ extern vm_offset_t vm_max_kernel_address;
> T>  #define	VM_MAX_AUTOTUNE_MAXUSERS	384
> T>  #endif
> T>  
> T> +#define	SFBUF
> T> +#define	SFBUF_MAP
> T> +
> T>  #endif	/* _MACHINE_VMPARAM_H_ */
> T> Index: sys/conf/files.arm
> T> ===================================================================
> T> --- sys/conf/files.arm	(revision 268750)
> T> +++ sys/conf/files.arm	(working copy)
> T> _at__at_ -77,6 +77,7 _at__at_ font.h				optional	sc			\
> T>  	clean	"font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8"
> T>  kern/subr_busdma_bufalloc.c	standard
> T>  kern/subr_dummy_vdso_tc.c	standard
> T> +kern/subr_sfbuf.c		standard
> T>  libkern/arm/aeabi_unwind.c	standard
> T>  libkern/arm/divsi3.S		standard
> T>  libkern/arm/ffs.S		standard
> T> Index: sys/conf/files.i386
> T> ===================================================================
> T> --- sys/conf/files.i386	(revision 268750)
> T> +++ sys/conf/files.i386	(working copy)
> T> _at__at_ -520,6 +520,7 _at__at_ isa/vga_isa.c			optional vga
> T>  kern/kern_clocksource.c		standard
> T>  kern/imgact_aout.c		optional compat_aout
> T>  kern/imgact_gzip.c		optional gzip
> T> +kern/subr_sfbuf.c		standard
> T>  libkern/divdi3.c		standard
> T>  libkern/flsll.c			standard
> T>  libkern/memmove.c		standard
> T> Index: sys/conf/files.mips
> T> ===================================================================
> T> --- sys/conf/files.mips	(revision 268750)
> T> +++ sys/conf/files.mips	(working copy)
> T> _at__at_ -51,6 +51,7 _at__at_ mips/mips/vm_machdep.c			standard
> T>  kern/kern_clocksource.c			standard
> T>  kern/link_elf_obj.c			standard
> T>  kern/subr_dummy_vdso_tc.c		standard
> T> +kern/subr_sfbuf.c			optional	mips | mipsel | mipsn32
> T>  
> T>  # gcc/clang runtime
> T>  libkern/ffsl.c				standard
> T> Index: sys/conf/files.pc98
> T> ===================================================================
> T> --- sys/conf/files.pc98	(revision 268750)
> T> +++ sys/conf/files.pc98	(working copy)
> T> _at__at_ -205,6 +205,7 _at__at_ i386/svr4/svr4_machdep.c	optional compat_svr4
> T>  kern/kern_clocksource.c		standard
> T>  kern/imgact_aout.c		optional compat_aout
> T>  kern/imgact_gzip.c		optional gzip
> T> +kern/subr_sfbuf.c		standard
> T>  libkern/divdi3.c		standard
> T>  libkern/flsll.c			standard
> T>  libkern/memmove.c		standard
> T> Index: sys/conf/files.powerpc
> T> ===================================================================
> T> --- sys/conf/files.powerpc	(revision 268750)
> T> +++ sys/conf/files.powerpc	(working copy)
> T> _at__at_ -71,6 +71,7 _at__at_ dev/vt/hw/ofwfb/ofwfb.c		optional	vt aim
> T>  kern/kern_clocksource.c		standard
> T>  kern/subr_dummy_vdso_tc.c	standard
> T>  kern/syscalls.c			optional	ktr
> T> +kern/subr_sfbuf.c		standard
> T>  libkern/ashldi3.c		optional	powerpc
> T>  libkern/ashrdi3.c		optional	powerpc
> T>  libkern/bcmp.c			standard
> T> Index: sys/conf/files.sparc64
> T> ===================================================================
> T> --- sys/conf/files.sparc64	(revision 268750)
> T> +++ sys/conf/files.sparc64	(working copy)
> T> _at__at_ -63,6 +63,7 _at__at_ dev/uart/uart_kbd_sun.c		optional	uart sc | vt
> T>  kern/kern_clocksource.c		standard
> T>  kern/subr_dummy_vdso_tc.c	standard
> T>  kern/syscalls.c			optional	ktr
> T> +kern/subr_sfbuf.c		standard
> T>  libkern/ffs.c			standard
> T>  libkern/ffsl.c			standard
> T>  libkern/fls.c			standard
> T> Index: sys/i386/i386/vm_machdep.c
> T> ===================================================================
> T> --- sys/i386/i386/vm_machdep.c	(revision 268750)
> T> +++ sys/i386/i386/vm_machdep.c	(working copy)
> T> _at__at_ -118,38 +118,6 _at__at_ static u_int	cpu_reset_proxyid;
> T>  static volatile u_int	cpu_reset_proxy_active;
> T>  #endif
> T>  
> T> -static int nsfbufs;
> T> -static int nsfbufspeak;
> T> -static int nsfbufsused;
> T> -
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> -    "Maximum number of sendfile(2) sf_bufs available");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> -    "Number of sendfile(2) sf_bufs at peak usage");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> -    "Number of sendfile(2) sf_bufs in use");
> T> -
> T> -static void	sf_buf_init(void *arg);
> T> -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> -
> T> -LIST_HEAD(sf_head, sf_buf);
> T> -
> T> -/*
> T> - * A hash table of active sendfile(2) buffers
> T> - */
> T> -static struct sf_head *sf_buf_active;
> T> -static u_long sf_buf_hashmask;
> T> -
> T> -#define	SF_BUF_HASH(m)	(((m) - vm_page_array) & sf_buf_hashmask)
> T> -
> T> -static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
> T> -static u_int	sf_buf_alloc_want;
> T> -
> T> -/*
> T> - * A lock used to synchronize access to the hash table and free list
> T> - */
> T> -static struct mtx sf_buf_lock;
> T> -
> T>  extern int	_ucodesel, _udatasel;
> T>  
> T>  /*
> T> _at__at_ -750,122 +718,13 _at__at_ cpu_reset_real()
> T>  }
> T>  
> T>  /*
> T> - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
> T> - */
> T> -static void
> T> -sf_buf_init(void *arg)
> T> -{
> T> -	struct sf_buf *sf_bufs;
> T> -	vm_offset_t sf_base;
> T> -	int i;
> T> -
> T> -	nsfbufs = NSFBUFS;
> T> -	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> -
> T> -	sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
> T> -	TAILQ_INIT(&sf_buf_freelist);
> T> -	sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> -	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> -	    M_NOWAIT | M_ZERO);
> T> -	for (i = 0; i < nsfbufs; i++) {
> T> -		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> -		TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
> T> -	}
> T> -	sf_buf_alloc_want = 0;
> T> -	mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
> T> -}
> T> -
> T> -/*
> T> - * Invalidate the cache lines that may belong to the page, if
> T> - * (possibly old) mapping of the page by sf buffer exists.  Returns
> T> - * TRUE when mapping was found and cache invalidated.
> T> - */
> T> -boolean_t
> T> -sf_buf_invalidate_cache(vm_page_t m)
> T> -{
> T> -	struct sf_head *hash_list;
> T> -	struct sf_buf *sf;
> T> -	boolean_t ret;
> T> -
> T> -	hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> -	ret = FALSE;
> T> -	mtx_lock(&sf_buf_lock);
> T> -	LIST_FOREACH(sf, hash_list, list_entry) {
> T> -		if (sf->m == m) {
> T> -			/*
> T> -			 * Use pmap_qenter to update the pte for
> T> -			 * existing mapping, in particular, the PAT
> T> -			 * settings are recalculated.
> T> -			 */
> T> -			pmap_qenter(sf->kva, &m, 1);
> T> -			pmap_invalidate_cache_range(sf->kva, sf->kva +
> T> -			    PAGE_SIZE);
> T> -			ret = TRUE;
> T> -			break;
> T> -		}
> T> -	}
> T> -	mtx_unlock(&sf_buf_lock);
> T> -	return (ret);
> T> -}
> T> -
> T> -/*
> T>   * Get an sf_buf from the freelist.  May block if none are available.
> T>   */
> T> -struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int flags)
> T> +void
> T> +sf_buf_map(struct sf_buf *sf, int flags)
> T>  {
> T>  	pt_entry_t opte, *ptep;
> T> -	struct sf_head *hash_list;
> T> -	struct sf_buf *sf;
> T> -#ifdef SMP
> T> -	cpuset_t other_cpus;
> T> -	u_int cpuid;
> T> -#endif
> T> -	int error;
> T>  
> T> -	KASSERT(curthread->td_pinned > 0 || (flags & SFB_CPUPRIVATE) == 0,
> T> -	    ("sf_buf_alloc(SFB_CPUPRIVATE): curthread not pinned"));
> T> -	hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> -	mtx_lock(&sf_buf_lock);
> T> -	LIST_FOREACH(sf, hash_list, list_entry) {
> T> -		if (sf->m == m) {
> T> -			sf->ref_count++;
> T> -			if (sf->ref_count == 1) {
> T> -				TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -				nsfbufsused++;
> T> -				nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -			}
> T> -#ifdef SMP
> T> -			goto shootdown;	
> T> -#else
> T> -			goto done;
> T> -#endif
> T> -		}
> T> -	}
> T> -	while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
> T> -		if (flags & SFB_NOWAIT)
> T> -			goto done;
> T> -		sf_buf_alloc_want++;
> T> -		SFSTAT_INC(sf_allocwait);
> T> -		error = msleep(&sf_buf_freelist, &sf_buf_lock,
> T> -		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> -		sf_buf_alloc_want--;
> T> -
> T> -		/*
> T> -		 * If we got a signal, don't risk going back to sleep. 
> T> -		 */
> T> -		if (error)
> T> -			goto done;
> T> -	}
> T> -	TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -	if (sf->m != NULL)
> T> -		LIST_REMOVE(sf, list_entry);
> T> -	LIST_INSERT_HEAD(hash_list, sf, list_entry);
> T> -	sf->ref_count = 1;
> T> -	sf->m = m;
> T> -	nsfbufsused++;
> T> -	nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -
> T>  	/*
> T>  	 * Update the sf_buf's virtual-to-physical mapping, flushing the
> T>  	 * virtual address from the TLB.  Since the reference count for 
> T> _at__at_ -876,11 +735,11 _at__at_ cpu_reset_real()
> T>  	ptep = vtopte(sf->kva);
> T>  	opte = *ptep;
> T>  #ifdef XEN
> T> -       PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(m)) | pgeflag
> T> -	   | PG_RW | PG_V | pmap_cache_bits(m->md.pat_mode, 0));
> T> +       PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(sf->m)) | pgeflag
> T> +	   | PG_RW | PG_V | pmap_cache_bits(sf->m->md.pat_mode, 0));
> T>  #else
> T> -	*ptep = VM_PAGE_TO_PHYS(m) | pgeflag | PG_RW | PG_V |
> T> -	    pmap_cache_bits(m->md.pat_mode, 0);
> T> +	*ptep = VM_PAGE_TO_PHYS(sf->m) | pgeflag | PG_RW | PG_V |
> T> +	    pmap_cache_bits(sf->m->md.pat_mode, 0);
> T>  #endif
> T>  
> T>  	/*
> T> _at__at_ -892,7 +751,21 _at__at_ cpu_reset_real()
> T>  #ifdef SMP
> T>  	if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
> T>  		CPU_ZERO(&sf->cpumask);
> T> -shootdown:
> T> +
> T> +	sf_buf_shootdown(sf, flags);
> T> +#else
> T> +	if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
> T> +		pmap_invalidate_page(kernel_pmap, sf->kva);
> T> +#endif
> T> +}
> T> +
> T> +#ifdef SMP
> T> +void
> T> +sf_buf_shootdown(struct sf_buf *sf, int flags)
> T> +{
> T> +	cpuset_t other_cpus;
> T> +	u_int cpuid;
> T> +
> T>  	sched_pin();
> T>  	cpuid = PCPU_GET(cpuid);
> T>  	if (!CPU_ISSET(cpuid, &sf->cpumask)) {
> T> _at__at_ -909,42 +782,50 _at__at_ cpu_reset_real()
> T>  		}
> T>  	}
> T>  	sched_unpin();
> T> +}
> T> +#endif
> T> +
> T> +/*
> T> + * MD part of sf_buf_free().
> T> + */
> T> +int
> T> +sf_buf_unmap(struct sf_buf *sf)
> T> +{
> T> +#ifdef XEN
> T> +	/*
> T> +	 * Xen doesn't like having dangling R/W mappings
> T> +	 */
> T> +	pmap_qremove(sf->kva, 1);
> T> +	return (1);
> T>  #else
> T> -	if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
> T> -		pmap_invalidate_page(kernel_pmap, sf->kva);
> T> +	return (0);
> T>  #endif
> T> -done:
> T> -	mtx_unlock(&sf_buf_lock);
> T> -	return (sf);
> T>  }
> T>  
> T> +static void
> T> +sf_buf_invalidate(struct sf_buf *sf)
> T> +{
> T> +	vm_page_t m = sf->m;
> T> +
> T> +	/*
> T> +	 * Use pmap_qenter to update the pte for
> T> +	 * existing mapping, in particular, the PAT
> T> +	 * settings are recalculated.
> T> +	 */
> T> +	pmap_qenter(sf->kva, &m, 1);
> T> +	pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE);
> T> +}
> T> +
> T>  /*
> T> - * Remove a reference from the given sf_buf, adding it to the free
> T> - * list when its reference count reaches zero.  A freed sf_buf still,
> T> - * however, retains its virtual-to-physical mapping until it is
> T> - * recycled or reactivated by sf_buf_alloc(9).
> T> + * Invalidate the cache lines that may belong to the page, if
> T> + * (possibly old) mapping of the page by sf buffer exists.  Returns
> T> + * TRUE when mapping was found and cache invalidated.
> T>   */
> T> -void
> T> -sf_buf_free(struct sf_buf *sf)
> T> +boolean_t
> T> +sf_buf_invalidate_cache(vm_page_t m)
> T>  {
> T>  
> T> -	mtx_lock(&sf_buf_lock);
> T> -	sf->ref_count--;
> T> -	if (sf->ref_count == 0) {
> T> -		TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
> T> -		nsfbufsused--;
> T> -#ifdef XEN
> T> -/*
> T> - * Xen doesn't like having dangling R/W mappings
> T> - */
> T> -		pmap_qremove(sf->kva, 1);
> T> -		sf->m = NULL;
> T> -		LIST_REMOVE(sf, list_entry);
> T> -#endif
> T> -		if (sf_buf_alloc_want > 0)
> T> -			wakeup(&sf_buf_freelist);
> T> -	}
> T> -	mtx_unlock(&sf_buf_lock);
> T> +	return (sf_buf_process_page(m, sf_buf_invalidate));
> T>  }
> T>  
> T>  /*
> T> Index: sys/i386/include/sf_buf.h
> T> ===================================================================
> T> --- sys/i386/include/sf_buf.h	(revision 268750)
> T> +++ sys/i386/include/sf_buf.h	(working copy)
> T> _at__at_ -1,5 +1,5 _at__at_
> T>  /*-
> T> - * Copyright (c) 2003, 2005 Alan L. Cox <alc_at_cs.rice.edu>
> T> + * Copyright (c) 2014 Gleb Smirnoff <glebius_at_FreeBSD.org>
> T>   * All rights reserved.
> T>   *
> T>   * Redistribution and use in source and binary forms, with or without
> T> _at__at_ -29,39 +29,8 _at__at_
> T>  #ifndef _MACHINE_SF_BUF_H_
> T>  #define _MACHINE_SF_BUF_H_
> T>  
> T> -#include <sys/_cpuset.h>
> T> -#include <sys/queue.h>
> T> +void sf_buf_map(struct sf_buf *, int);
> T> +int sf_buf_unmap(struct sf_buf *);
> T> +boolean_t sf_buf_invalidate_cache(vm_page_t);
> T>  
> T> -struct vm_page;
> T> -
> T> -struct sf_buf {
> T> -	LIST_ENTRY(sf_buf) list_entry;	/* list of buffers */
> T> -	TAILQ_ENTRY(sf_buf) free_entry;	/* list of buffers */
> T> -	struct		vm_page *m;	/* currently mapped page */
> T> -	vm_offset_t	kva;		/* va of mapping */
> T> -	int		ref_count;	/* usage of this mapping */
> T> -#ifdef SMP
> T> -	cpuset_t	cpumask;	/* cpus on which mapping is valid */
> T> -#endif
> T> -};
> T> -
> T> -struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
> T> -void sf_buf_free(struct sf_buf *sf);
> T> -
> T> -static __inline vm_offset_t
> T> -sf_buf_kva(struct sf_buf *sf)
> T> -{
> T> -
> T> -	return (sf->kva);
> T> -}
> T> -
> T> -static __inline struct vm_page *
> T> -sf_buf_page(struct sf_buf *sf)
> T> -{
> T> -
> T> -	return (sf->m);
> T> -}
> T> -
> T> -boolean_t sf_buf_invalidate_cache(vm_page_t m);
> T> -
> T>  #endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/i386/include/vmparam.h
> T> ===================================================================
> T> --- sys/i386/include/vmparam.h	(revision 268750)
> T> +++ sys/i386/include/vmparam.h	(working copy)
> T> _at__at_ -198,4 +198,9 _at__at_
> T>  #define VM_MAX_AUTOTUNE_MAXUSERS 384
> T>  #endif
> T>  
> T> +#define	SFBUF
> T> +#define	SFBUF_MAP
> T> +#define	SFBUF_CPUSET
> T> +#define	SFBUF_PROCESS_PAGE
> T> +
> T>  #endif /* _MACHINE_VMPARAM_H_ */
> T> Index: sys/kern/subr_sfbuf.c
> T> ===================================================================
> T> --- sys/kern/subr_sfbuf.c	(revision 0)
> T> +++ sys/kern/subr_sfbuf.c	(working copy)
> T> _at__at_ -0,0 +1,226 _at__at_
> T> +/*-
> T> + * Copyright (c) 2014 Gleb Smirnoff <glebius_at_FreeBSD.org>
> T> + * Copyright (c) 2003, 2005 Alan L. Cox <alc_at_cs.rice.edu>
> T> + * All rights reserved.
> T> + *
> T> + * Redistribution and use in source and binary forms, with or without
> T> + * modification, are permitted provided that the following conditions
> T> + * are met:
> T> + * 1. Redistributions of source code must retain the above copyright
> T> + *    notice, this list of conditions and the following disclaimer.
> T> + * 2. Redistributions in binary form must reproduce the above copyright
> T> + *    notice, this list of conditions and the following disclaimer in the
> T> + *    documentation and/or other materials provided with the distribution.
> T> + *
> T> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> T> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> T> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> T> + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> T> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> T> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> T> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> T> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> T> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> T> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> T> + * SUCH DAMAGE.
> T> + */
> T> +
> T> +#include <sys/cdefs.h>
> T> +__FBSDID("$FreeBSD$");
> T> +
> T> +#include <sys/param.h>
> T> +#include <sys/kernel.h>
> T> +#include <sys/lock.h>
> T> +#include <sys/malloc.h>
> T> +#include <sys/mutex.h>
> T> +#include <sys/sf_buf.h>
> T> +#include <sys/smp.h>
> T> +#include <sys/sysctl.h>
> T> +
> T> +#include <vm/vm.h>
> T> +#include <vm/vm_extern.h>
> T> +#include <vm/vm_page.h>
> T> +
> T> +#ifndef NSFBUFS
> T> +#define	NSFBUFS		(512 + maxusers * 16)
> T> +#endif
> T> +
> T> +static int nsfbufs;
> T> +static int nsfbufspeak;
> T> +static int nsfbufsused;
> T> +
> T> +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> +    "Maximum number of sendfile(2) sf_bufs available");
> T> +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> +    "Number of sendfile(2) sf_bufs at peak usage");
> T> +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> +    "Number of sendfile(2) sf_bufs in use");
> T> +
> T> +static void	sf_buf_init(void *arg);
> T> +SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> +
> T> +LIST_HEAD(sf_head, sf_buf);
> T> +
> T> +/*
> T> + * A hash table of active sendfile(2) buffers
> T> + */
> T> +static struct sf_head *sf_buf_active;
> T> +static u_long sf_buf_hashmask;
> T> +
> T> +#define	SF_BUF_HASH(m)	(((m) - vm_page_array) & sf_buf_hashmask)
> T> +
> T> +static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
> T> +static u_int	sf_buf_alloc_want;
> T> +
> T> +/*
> T> + * A lock used to synchronize access to the hash table and free list
> T> + */
> T> +static struct mtx sf_buf_lock;
> T> +
> T> +/*
> T> + * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
> T> + */
> T> +static void
> T> +sf_buf_init(void *arg)
> T> +{
> T> +	struct sf_buf *sf_bufs;
> T> +	vm_offset_t sf_base;
> T> +	int i;
> T> +
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +	if (SFBUF_OPTIONAL_DIRECT_MAP)
> T> +		return;
> T> +#endif
> T> +
> T> +	nsfbufs = NSFBUFS;
> T> +	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> +
> T> +	sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
> T> +	TAILQ_INIT(&sf_buf_freelist);
> T> +	sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> +	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> +	    M_NOWAIT | M_ZERO);
> T> +	KASSERT(sf_bufs, ("%s: malloc failure", __func__));
> T> +	for (i = 0; i < nsfbufs; i++) {
> T> +		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> +		TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
> T> +	}
> T> +	sf_buf_alloc_want = 0;
> T> +	mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
> T> +}
> T> +
> T> +/*
> T> + * Get an sf_buf from the freelist.  May block if none are available.
> T> + */
> T> +struct sf_buf *
> T> +sf_buf_alloc(struct vm_page *m, int flags)
> T> +{
> T> +	struct sf_head *hash_list;
> T> +	struct sf_buf *sf;
> T> +	int error;
> T> +
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +	if (SFBUF_OPTIONAL_DIRECT_MAP)
> T> +		return ((struct sf_buf *)m);
> T> +#endif
> T> +
> T> +	KASSERT(curthread->td_pinned > 0 || (flags & SFB_CPUPRIVATE) == 0,
> T> +	    ("sf_buf_alloc(SFB_CPUPRIVATE): curthread not pinned"));
> T> +	hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> +	mtx_lock(&sf_buf_lock);
> T> +	LIST_FOREACH(sf, hash_list, list_entry) {
> T> +		if (sf->m == m) {
> T> +			sf->ref_count++;
> T> +			if (sf->ref_count == 1) {
> T> +				TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> +				nsfbufsused++;
> T> +				nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> +			}
> T> +#if defined(SMP) && defined(SFBUF_CPUSET)
> T> +			sf_buf_shootdown(sf, flags);
> T> +#endif
> T> +			goto done;
> T> +		}
> T> +	}
> T> +	while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
> T> +		if (flags & SFB_NOWAIT)
> T> +			goto done;
> T> +		sf_buf_alloc_want++;
> T> +		SFSTAT_INC(sf_allocwait);
> T> +		error = msleep(&sf_buf_freelist, &sf_buf_lock,
> T> +		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> +		sf_buf_alloc_want--;
> T> +
> T> +		/*
> T> +		 * If we got a signal, don't risk going back to sleep. 
> T> +		 */
> T> +		if (error)
> T> +			goto done;
> T> +	}
> T> +	TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> +	if (sf->m != NULL)
> T> +		LIST_REMOVE(sf, list_entry);
> T> +	LIST_INSERT_HEAD(hash_list, sf, list_entry);
> T> +	sf->ref_count = 1;
> T> +	sf->m = m;
> T> +	nsfbufsused++;
> T> +	nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> +	sf_buf_map(sf, flags);
> T> +done:
> T> +	mtx_unlock(&sf_buf_lock);
> T> +	return (sf);
> T> +}
> T> +
> T> +/*
> T> + * Remove a reference from the given sf_buf, adding it to the free
> T> + * list when its reference count reaches zero.  A freed sf_buf still,
> T> + * however, retains its virtual-to-physical mapping until it is
> T> + * recycled or reactivated by sf_buf_alloc(9).
> T> + */
> T> +void
> T> +sf_buf_free(struct sf_buf *sf)
> T> +{
> T> +
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +	if (SFBUF_OPTIONAL_DIRECT_MAP)
> T> +		return;
> T> +#endif
> T> +
> T> +	mtx_lock(&sf_buf_lock);
> T> +	sf->ref_count--;
> T> +	if (sf->ref_count == 0) {
> T> +		TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
> T> +		nsfbufsused--;
> T> +		if (sf_buf_unmap(sf)) {
> T> +			sf->m = NULL;
> T> +			LIST_REMOVE(sf, list_entry);
> T> +		}
> T> +		if (sf_buf_alloc_want > 0)
> T> +			wakeup(&sf_buf_freelist);
> T> +	}
> T> +	mtx_unlock(&sf_buf_lock);
> T> +}
> T> +
> T> +#ifdef SFBUF_PROCESS_PAGE
> T> +/*
> T> + * Run callback function on sf_buf that holds a certain page.
> T> + */
> T> +boolean_t
> T> +sf_buf_process_page(vm_page_t m, void (*cb)(struct sf_buf *))
> T> +{
> T> +	struct sf_head *hash_list;
> T> +	struct sf_buf *sf;
> T> +
> T> +	hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> +	mtx_lock(&sf_buf_lock);
> T> +	LIST_FOREACH(sf, hash_list, list_entry) {
> T> +		if (sf->m == m) {
> T> +			cb(sf);
> T> +			mtx_unlock(&sf_buf_lock);
> T> +			return (TRUE);
> T> +		}
> T> +	}
> T> +	mtx_unlock(&sf_buf_lock);
> T> +	return (FALSE);
> T> +}
> T> +#endif	/* SFBUF_PROCESS_PAGE */
> T> 
> T> Property changes on: sys/kern/subr_sfbuf.c
> T> ___________________________________________________________________
> T> Added: svn:mime-type
> T> ## -0,0 +1 ##
> T> +text/plain
> T> \ No newline at end of property
> T> Added: svn:keywords
> T> ## -0,0 +1 ##
> T> +FreeBSD=%H
> T> \ No newline at end of property
> T> Added: svn:eol-style
> T> ## -0,0 +1 ##
> T> +native
> T> \ No newline at end of property
> T> Index: sys/mips/include/sf_buf.h
> T> ===================================================================
> T> --- sys/mips/include/sf_buf.h	(revision 268750)
> T> +++ sys/mips/include/sf_buf.h	(working copy)
> T> _at__at_ -29,31 +29,9 _at__at_
> T>  #ifndef _MACHINE_SF_BUF_H_
> T>  #define _MACHINE_SF_BUF_H_
> T>  
> T> -#ifdef __mips_n64
> T> -#include <vm/vm.h>
> T> -#include <vm/vm_param.h>
> T> -#include <vm/vm_page.h>
> T> -#else
> T> -#include <sys/queue.h>
> T> -#endif
> T> +#ifdef __mips_n64	/* In 64 bit the whole memory is directly mapped */
> T>  
> T> -#ifdef __mips_n64
> T> -/* In 64 bit the whole memory is directly mapped */
> T> -struct	sf_buf;
> T> -
> T> -static inline struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int pri)
> T> -{
> T> -
> T> -	return ((struct sf_buf *)m);
> T> -}
> T> -
> T> -static inline void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -}
> T> -
> T> -static __inline vm_offset_t
> T> +static inline vm_offset_t
> T>  sf_buf_kva(struct sf_buf *sf)
> T>  {
> T>  	vm_page_t	m;
> T> _at__at_ -62,7 +40,7 _at__at_ sf_buf_kva(struct sf_buf *sf)
> T>  	return (MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)));
> T>  }
> T>  
> T> -static __inline struct vm_page *
> T> +static inline struct vm_page *
> T>  sf_buf_page(struct sf_buf *sf)
> T>  {
> T>  
> T> _at__at_ -69,31 +47,5 _at__at_ sf_buf_page(struct sf_buf *sf)
> T>  	return ((vm_page_t)sf);
> T>  }
> T>  
> T> -#else /* ! __mips_n64 */
> T> -struct vm_page;
> T> -
> T> -struct sf_buf {
> T> -	SLIST_ENTRY(sf_buf) free_list;	/* list of free buffer slots */
> T> -	struct		vm_page *m;	/* currently mapped page */
> T> -	vm_offset_t	kva;		/* va of mapping */
> T> -};
> T> -
> T> -struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
> T> -void sf_buf_free(struct sf_buf *sf);
> T> -
> T> -static __inline vm_offset_t
> T> -sf_buf_kva(struct sf_buf *sf)
> T> -{
> T> -
> T> -	return (sf->kva);
> T> -}
> T> -
> T> -static __inline struct vm_page *
> T> -sf_buf_page(struct sf_buf *sf)
> T> -{
> T> -
> T> -	return (sf->m);
> T> -}
> T>  #endif /* __mips_n64 */
> T> -
> T>  #endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/mips/include/vmparam.h
> T> ===================================================================
> T> --- sys/mips/include/vmparam.h	(revision 268750)
> T> +++ sys/mips/include/vmparam.h	(working copy)
> T> _at__at_ -187,4 +187,8 _at__at_
> T>  
> T>  #define	ZERO_REGION_SIZE	(64 * 1024)	/* 64KB */
> T>  
> T> +#ifndef __mips_n64
> T> +#define	SFBUF
> T> +#endif
> T> +
> T>  #endif /* !_MACHINE_VMPARAM_H_ */
> T> Index: sys/mips/mips/vm_machdep.c
> T> ===================================================================
> T> --- sys/mips/mips/vm_machdep.c	(revision 268750)
> T> +++ sys/mips/mips/vm_machdep.c	(working copy)
> T> _at__at_ -76,9 +76,6 _at__at_ __FBSDID("$FreeBSD$");
> T>  
> T>  #include <sys/user.h>
> T>  #include <sys/mbuf.h>
> T> -#ifndef __mips_n64
> T> -#include <sys/sf_buf.h>
> T> -#endif
> T>  
> T>  /* Duplicated from asm.h */
> T>  #if defined(__mips_o32)
> T> _at__at_ -92,39 +89,7 _at__at_ __FBSDID("$FreeBSD$");
> T>  #define	CALLFRAME_SIZ	(SZREG * 4)
> T>  #endif
> T>  
> T> -#ifndef __mips_n64
> T> -
> T> -#ifndef NSFBUFS
> T> -#define	NSFBUFS		(512 + maxusers * 16)
> T> -#endif
> T> -
> T> -static int nsfbufs;
> T> -static int nsfbufspeak;
> T> -static int nsfbufsused;
> T> -
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> -    "Maximum number of sendfile(2) sf_bufs available");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> -    "Number of sendfile(2) sf_bufs at peak usage");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> -    "Number of sendfile(2) sf_bufs in use");
> T> -
> T> -static void	sf_buf_init(void *arg);
> T> -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> -
> T>  /*
> T> - * Expanded sf_freelist head.  Really an SLIST_HEAD() in disguise, with the
> T> - * sf_freelist head with the sf_lock mutex.
> T> - */
> T> -static struct {
> T> -	SLIST_HEAD(, sf_buf) sf_head;
> T> -	struct mtx sf_lock;
> T> -} sf_freelist;
> T> -
> T> -static u_int	sf_buf_alloc_want;
> T> -#endif /* !__mips_n64 */
> T> -
> T> -/*
> T>   * Finish a fork operation, with process p2 nearly set up.
> T>   * Copy and update the pcb, set up the stack so that the child
> T>   * ready to run and return to user mode.
> T> _at__at_ -513,84 +478,6 _at__at_ cpu_set_upcall_kse(struct thread *td, void (*entry
> T>  #define	ZIDLE_HI(v)	((v) * 4 / 5)
> T>  
> T>  /*
> T> - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
> T> - */
> T> -#ifndef __mips_n64
> T> -static void
> T> -sf_buf_init(void *arg)
> T> -{
> T> -	struct sf_buf *sf_bufs;
> T> -	vm_offset_t sf_base;
> T> -	int i;
> T> -
> T> -	nsfbufs = NSFBUFS;
> T> -	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> -
> T> -	mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
> T> -	SLIST_INIT(&sf_freelist.sf_head);
> T> -	sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> -	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> -	    M_NOWAIT | M_ZERO);
> T> -	for (i = 0; i < nsfbufs; i++) {
> T> -		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> -		SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
> T> -	}
> T> -	sf_buf_alloc_want = 0;
> T> -}
> T> -
> T> -/*
> T> - * Get an sf_buf from the freelist.  Will block if none are available.
> T> - */
> T> -struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int flags)
> T> -{
> T> -	struct sf_buf *sf;
> T> -	int error;
> T> -
> T> -	mtx_lock(&sf_freelist.sf_lock);
> T> -	while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
> T> -		if (flags & SFB_NOWAIT)
> T> -			break;
> T> -		sf_buf_alloc_want++;
> T> -		SFSTAT_INC(sf_allocwait);
> T> -		error = msleep(&sf_freelist, &sf_freelist.sf_lock,
> T> -		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> -		sf_buf_alloc_want--;
> T> -
> T> -		/*
> T> -		 * If we got a signal, don't risk going back to sleep.
> T> -		 */
> T> -		if (error)
> T> -			break;
> T> -	}
> T> -	if (sf != NULL) {
> T> -		SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
> T> -		sf->m = m;
> T> -		nsfbufsused++;
> T> -		nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -		pmap_qenter(sf->kva, &sf->m, 1);
> T> -	}
> T> -	mtx_unlock(&sf_freelist.sf_lock);
> T> -	return (sf);
> T> -}
> T> -
> T> -/*
> T> - * Release resources back to the system.
> T> - */
> T> -void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -	pmap_qremove(sf->kva, 1);
> T> -	mtx_lock(&sf_freelist.sf_lock);
> T> -	SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
> T> -	nsfbufsused--;
> T> -	if (sf_buf_alloc_want > 0)
> T> -		wakeup(&sf_freelist);
> T> -	mtx_unlock(&sf_freelist.sf_lock);
> T> -}
> T> -#endif	/* !__mips_n64 */
> T> -
> T> -/*
> T>   * Software interrupt handler for queued VM system processing.
> T>   */
> T>  void
> T> Index: sys/powerpc/include/sf_buf.h
> T> ===================================================================
> T> --- sys/powerpc/include/sf_buf.h	(revision 268750)
> T> +++ sys/powerpc/include/sf_buf.h	(working copy)
> T> _at__at_ -1,80 +0,0 _at__at_
> T> -/*-
> T> - * Copyright (c) 2003 Alan L. Cox <alc_at_cs.rice.edu>
> T> - * All rights reserved.
> T> - *
> T> - * Redistribution and use in source and binary forms, with or without
> T> - * modification, are permitted provided that the following conditions
> T> - * are met:
> T> - * 1. Redistributions of source code must retain the above copyright
> T> - *    notice, this list of conditions and the following disclaimer.
> T> - * 2. Redistributions in binary form must reproduce the above copyright
> T> - *    notice, this list of conditions and the following disclaimer in the
> T> - *    documentation and/or other materials provided with the distribution.
> T> - *
> T> - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> T> - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> T> - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> T> - * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> T> - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> T> - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> T> - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> T> - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> T> - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> T> - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> T> - * SUCH DAMAGE.
> T> - *
> T> - * $FreeBSD$
> T> - */
> T> -
> T> -#ifndef _MACHINE_SF_BUF_H_
> T> -#define _MACHINE_SF_BUF_H_
> T> -
> T> -#include <vm/vm.h>
> T> -#include <vm/vm_param.h>
> T> -#include <vm/vm_page.h>
> T> -#include <machine/md_var.h>
> T> -#include <sys/queue.h>
> T> -
> T> -struct vm_page;
> T> -
> T> -struct sf_buf {
> T> -	LIST_ENTRY(sf_buf) list_entry;	/* list of buffers */
> T> -	TAILQ_ENTRY(sf_buf) free_entry;	/* list of buffers */
> T> -	struct		vm_page *m;	/* currently mapped page */
> T> -	vm_offset_t	kva;		/* va of mapping */
> T> -	int		ref_count;	/* usage of this mapping */
> T> -};
> T> -
> T> -struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
> T> -void sf_buf_free(struct sf_buf *sf);
> T> -
> T> -/*
> T> - * On 32-bit OEA, the only purpose for which sf_buf is used is to implement
> T> - * an opaque pointer required by the machine-independent parts of the kernel.
> T> - * That pointer references the vm_page that is "mapped" by the sf_buf.  The
> T> - * actual mapping is provided by the direct virtual-to-physical mapping.  
> T> - *
> T> - * On OEA64 and Book-E, we need to do something a little more complicated. Use
> T> - * the runtime-detected hw_direct_map to pick between the two cases. Our
> T> - * friends in vm_machdep.c will do the same to ensure nothing gets confused.
> T> - */
> T> -
> T> -static __inline vm_offset_t
> T> -sf_buf_kva(struct sf_buf *sf)
> T> -{
> T> -	if (hw_direct_map)
> T> -		return (VM_PAGE_TO_PHYS((vm_page_t)sf));
> T> -
> T> -	return (sf->kva);
> T> -}
> T> -
> T> -static __inline struct vm_page *
> T> -sf_buf_page(struct sf_buf *sf)
> T> -{
> T> -	if (hw_direct_map)
> T> -		return ((vm_page_t)sf);
> T> -
> T> -	return (sf->m);
> T> -}
> T> -
> T> -#endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/powerpc/include/vmparam.h
> T> ===================================================================
> T> --- sys/powerpc/include/vmparam.h	(revision 268750)
> T> +++ sys/powerpc/include/vmparam.h	(working copy)
> T> _at__at_ -197,4 +197,18 _at__at_ struct pmap_physseg {
> T>  
> T>  #define	ZERO_REGION_SIZE	(64 * 1024)	/* 64KB */
> T>  
> T> +/*
> T> + * On 32-bit OEA, the only purpose for which sf_buf is used is to implement
> T> + * an opaque pointer required by the machine-independent parts of the kernel.
> T> + * That pointer references the vm_page that is "mapped" by the sf_buf.  The
> T> + * actual mapping is provided by the direct virtual-to-physical mapping.
> T> + *
> T> + * On OEA64 and Book-E, we need to do something a little more complicated. Use
> T> + * the runtime-detected hw_direct_map to pick between the two cases. Our
> T> + * friends in vm_machdep.c will do the same to ensure nothing gets confused.
> T> + */
> T> +#define	SFBUF
> T> +#define	SFBUF_NOMD
> T> +#define	SFBUF_OPTIONAL_DIRECT_MAP	hw_direct_map
> T> + 
> T>  #endif /* _MACHINE_VMPARAM_H_ */
> T> Index: sys/powerpc/powerpc/vm_machdep.c
> T> ===================================================================
> T> --- sys/powerpc/powerpc/vm_machdep.c	(revision 268750)
> T> +++ sys/powerpc/powerpc/vm_machdep.c	(working copy)
> T> _at__at_ -80,7 +80,6 _at__at_
> T>  #include <sys/vmmeter.h>
> T>  #include <sys/kernel.h>
> T>  #include <sys/mbuf.h>
> T> -#include <sys/sf_buf.h>
> T>  #include <sys/sysctl.h>
> T>  #include <sys/sysent.h>
> T>  #include <sys/unistd.h>
> T> _at__at_ -100,47 +99,6 _at__at_
> T>  #include <vm/vm_map.h>
> T>  #include <vm/vm_extern.h>
> T>  
> T> -/*
> T> - * On systems without a direct mapped region (e.g. PPC64),
> T> - * we use the same code as the Book E implementation. Since
> T> - * we need to have runtime detection of this, define some machinery
> T> - * for sf_bufs in this case, and ignore it on systems with direct maps.
> T> - */
> T> -
> T> -#ifndef NSFBUFS
> T> -#define NSFBUFS		(512 + maxusers * 16)
> T> -#endif
> T> -
> T> -static int nsfbufs;
> T> -static int nsfbufspeak;
> T> -static int nsfbufsused;
> T> -
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> -    "Maximum number of sendfile(2) sf_bufs available");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> -    "Number of sendfile(2) sf_bufs at peak usage");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> -    "Number of sendfile(2) sf_bufs in use");
> T> -
> T> -static void sf_buf_init(void *arg);
> T> -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> - 
> T> -LIST_HEAD(sf_head, sf_buf);
> T> - 
> T> -/* A hash table of active sendfile(2) buffers */
> T> -static struct sf_head *sf_buf_active;
> T> -static u_long sf_buf_hashmask;
> T> -
> T> -#define SF_BUF_HASH(m)  (((m) - vm_page_array) & sf_buf_hashmask)
> T> -
> T> -static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
> T> -static u_int sf_buf_alloc_want;
> T> -
> T> -/*
> T> - * A lock used to synchronize access to the hash table and free list
> T> - */
> T> -static struct mtx sf_buf_lock;
> T> -
> T>  #ifdef __powerpc64__
> T>  extern uintptr_t tocbase;
> T>  #endif
> T> _at__at_ -245,124 +203,6 _at__at_ cpu_exit(struct thread *td)
> T>  }
> T>  
> T>  /*
> T> - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
> T> - */
> T> -static void
> T> -sf_buf_init(void *arg)
> T> -{
> T> -	struct sf_buf *sf_bufs;
> T> -	vm_offset_t sf_base;
> T> -	int i;
> T> -
> T> -	/* Don't bother on systems with a direct map */
> T> -	if (hw_direct_map)
> T> -		return;
> T> -
> T> -	nsfbufs = NSFBUFS;
> T> -	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> -
> T> -	sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
> T> -	TAILQ_INIT(&sf_buf_freelist);
> T> -	sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> -	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> -	    M_NOWAIT | M_ZERO);
> T> -
> T> -	for (i = 0; i < nsfbufs; i++) {
> T> -		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> -		TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
> T> -	}
> T> -	sf_buf_alloc_want = 0;
> T> -	mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
> T> -}
> T> -
> T> -/*
> T> - * Get an sf_buf from the freelist. Will block if none are available.
> T> - */
> T> -struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int flags)
> T> -{
> T> -	struct sf_head *hash_list;
> T> -	struct sf_buf *sf;
> T> -	int error;
> T> -
> T> -	if (hw_direct_map) {
> T> -		/* Shortcut the direct mapped case */
> T> -		return ((struct sf_buf *)m);
> T> -	}
> T> -
> T> -	hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> -	mtx_lock(&sf_buf_lock);
> T> -	LIST_FOREACH(sf, hash_list, list_entry) {
> T> -		if (sf->m == m) {
> T> -			sf->ref_count++;
> T> -			if (sf->ref_count == 1) {
> T> -				TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -				nsfbufsused++;
> T> -				nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -			}
> T> -			goto done;
> T> -		}
> T> -	}
> T> -
> T> -	while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
> T> -		if (flags & SFB_NOWAIT)
> T> -			goto done;
> T> -
> T> -		sf_buf_alloc_want++;
> T> -		SFSTAT_INC(sf_allocwait);
> T> -		error = msleep(&sf_buf_freelist, &sf_buf_lock,
> T> -		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> -		sf_buf_alloc_want--;
> T> -
> T> -		/*
> T> -		 * If we got a signal, don't risk going back to sleep.
> T> -		 */
> T> -		if (error)
> T> -			goto done;
> T> -	}
> T> -
> T> -	TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -	if (sf->m != NULL)
> T> -		LIST_REMOVE(sf, list_entry);
> T> -
> T> -	LIST_INSERT_HEAD(hash_list, sf, list_entry);
> T> -	sf->ref_count = 1;
> T> -	sf->m = m;
> T> -	nsfbufsused++;
> T> -	nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -	pmap_qenter(sf->kva, &sf->m, 1);
> T> -done:
> T> -	mtx_unlock(&sf_buf_lock);
> T> -	return (sf);
> T> -}
> T> -
> T> -/*
> T> - * Detach mapped page and release resources back to the system.
> T> - *
> T> - * Remove a reference from the given sf_buf, adding it to the free
> T> - * list when its reference count reaches zero. A freed sf_buf still,
> T> - * however, retains its virtual-to-physical mapping until it is
> T> - * recycled or reactivated by sf_buf_alloc(9).
> T> - */
> T> -void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -	if (hw_direct_map)
> T> -		return;
> T> -
> T> -	mtx_lock(&sf_buf_lock);
> T> -	sf->ref_count--;
> T> -	if (sf->ref_count == 0) {
> T> -		TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
> T> -		nsfbufsused--;
> T> -
> T> -		if (sf_buf_alloc_want > 0)
> T> -			wakeup(&sf_buf_freelist);
> T> -	}
> T> -	mtx_unlock(&sf_buf_lock);
> T> -}
> T> -
> T> -/*
> T>   * Software interrupt handler for queued VM system processing.
> T>   */
> T>  void
> T> Index: sys/sparc64/include/sf_buf.h
> T> ===================================================================
> T> --- sys/sparc64/include/sf_buf.h	(revision 268750)
> T> +++ sys/sparc64/include/sf_buf.h	(working copy)
> T> _at__at_ -1,59 +0,0 _at__at_
> T> -/*-
> T> - * Copyright (c) 2003 Alan L. Cox <alc_at_cs.rice.edu>
> T> - * All rights reserved.
> T> - *
> T> - * Redistribution and use in source and binary forms, with or without
> T> - * modification, are permitted provided that the following conditions
> T> - * are met:
> T> - * 1. Redistributions of source code must retain the above copyright
> T> - *    notice, this list of conditions and the following disclaimer.
> T> - * 2. Redistributions in binary form must reproduce the above copyright
> T> - *    notice, this list of conditions and the following disclaimer in the
> T> - *    documentation and/or other materials provided with the distribution.
> T> - *
> T> - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> T> - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> T> - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> T> - * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> T> - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> T> - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> T> - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> T> - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> T> - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> T> - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> T> - * SUCH DAMAGE.
> T> - *
> T> - * $FreeBSD$
> T> - */
> T> -
> T> -#ifndef _MACHINE_SF_BUF_H_
> T> -#define _MACHINE_SF_BUF_H_
> T> -
> T> -#include <sys/queue.h>
> T> -
> T> -struct vm_page;
> T> -
> T> -struct sf_buf {
> T> -	SLIST_ENTRY(sf_buf) free_list;	/* list of free buffer slots */
> T> -	struct		vm_page *m;	/* currently mapped page */
> T> -	vm_offset_t	kva;		/* va of mapping */
> T> -};
> T> -
> T> -struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
> T> -void sf_buf_free(struct sf_buf *sf);
> T> -
> T> -static __inline vm_offset_t
> T> -sf_buf_kva(struct sf_buf *sf)
> T> -{
> T> -
> T> -	return (sf->kva);
> T> -}
> T> -
> T> -static __inline struct vm_page *
> T> -sf_buf_page(struct sf_buf *sf)
> T> -{
> T> -
> T> -	return (sf->m);
> T> -}
> T> -
> T> -#endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/sparc64/include/vmparam.h
> T> ===================================================================
> T> --- sys/sparc64/include/vmparam.h	(revision 268750)
> T> +++ sys/sparc64/include/vmparam.h	(working copy)
> T> _at__at_ -239,4 +239,7 _at__at_ extern vm_offset_t vm_max_kernel_address;
> T>   */
> T>  #define	ZERO_REGION_SIZE	PAGE_SIZE
> T>  
> T> +#define	SFBUF
> T> +#define	SFBUF_NOMD
> T> +
> T>  #endif /* !_MACHINE_VMPARAM_H_ */
> T> Index: sys/sparc64/sparc64/vm_machdep.c
> T> ===================================================================
> T> --- sys/sparc64/sparc64/vm_machdep.c	(revision 268750)
> T> +++ sys/sparc64/sparc64/vm_machdep.c	(working copy)
> T> _at__at_ -53,7 +53,6 _at__at_ __FBSDID("$FreeBSD$");
> T>  #include <sys/mutex.h>
> T>  #include <sys/proc.h>
> T>  #include <sys/sysent.h>
> T> -#include <sys/sf_buf.h>
> T>  #include <sys/sched.h>
> T>  #include <sys/sysctl.h>
> T>  #include <sys/unistd.h>
> T> _at__at_ -84,35 +83,6 _at__at_ __FBSDID("$FreeBSD$");
> T>  #include <machine/tlb.h>
> T>  #include <machine/tstate.h>
> T>  
> T> -#ifndef NSFBUFS
> T> -#define	NSFBUFS		(512 + maxusers * 16)
> T> -#endif
> T> -
> T> -static int nsfbufs;
> T> -static int nsfbufspeak;
> T> -static int nsfbufsused;
> T> -
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> -    "Maximum number of sendfile(2) sf_bufs available");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> -    "Number of sendfile(2) sf_bufs at peak usage");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> -    "Number of sendfile(2) sf_bufs in use");
> T> -
> T> -static void	sf_buf_init(void *arg);
> T> -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> -
> T> -/*
> T> - * Expanded sf_freelist head.  Really an SLIST_HEAD() in disguise, with the
> T> - * sf_freelist head with the sf_lock mutex.
> T> - */
> T> -static struct {
> T> -	SLIST_HEAD(, sf_buf) sf_head;
> T> -	struct mtx sf_lock;
> T> -} sf_freelist;
> T> -
> T> -static u_int	sf_buf_alloc_want;
> T> -
> T>  PMAP_STATS_VAR(uma_nsmall_alloc);
> T>  PMAP_STATS_VAR(uma_nsmall_alloc_oc);
> T>  PMAP_STATS_VAR(uma_nsmall_free);
> T> _at__at_ -417,84 +387,7 _at__at_ is_physical_memory(vm_paddr_t addr)
> T>  	return (0);
> T>  }
> T>  
> T> -/*
> T> - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
> T> - */
> T> -static void
> T> -sf_buf_init(void *arg)
> T> -{
> T> -	struct sf_buf *sf_bufs;
> T> -	vm_offset_t sf_base;
> T> -	int i;
> T> -
> T> -	nsfbufs = NSFBUFS;
> T> -	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> -
> T> -	mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
> T> -	SLIST_INIT(&sf_freelist.sf_head);
> T> -	sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> -	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> -	    M_NOWAIT | M_ZERO);
> T> -	for (i = 0; i < nsfbufs; i++) {
> T> -		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> -		SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
> T> -	}
> T> -	sf_buf_alloc_want = 0;
> T> -}
> T> -
> T> -/*
> T> - * Get an sf_buf from the freelist.  Will block if none are available.
> T> - */
> T> -struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int flags)
> T> -{
> T> -	struct sf_buf *sf;
> T> -	int error;
> T> -
> T> -	mtx_lock(&sf_freelist.sf_lock);
> T> -	while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
> T> -		if (flags & SFB_NOWAIT)
> T> -			break;
> T> -		sf_buf_alloc_want++;
> T> -		SFSTAT_INC(sf_allocwait);
> T> -		error = msleep(&sf_freelist, &sf_freelist.sf_lock,
> T> -		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> -		sf_buf_alloc_want--;
> T> -
> T> -		/*
> T> -		 * If we got a signal, don't risk going back to sleep.
> T> -		 */
> T> -		if (error)
> T> -			break;
> T> -	}
> T> -	if (sf != NULL) {
> T> -		SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
> T> -		sf->m = m;
> T> -		nsfbufsused++;
> T> -		nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -		pmap_qenter(sf->kva, &sf->m, 1);
> T> -	}
> T> -	mtx_unlock(&sf_freelist.sf_lock);
> T> -	return (sf);
> T> -}
> T> -
> T> -/*
> T> - * Release resources back to the system.
> T> - */
> T>  void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -
> T> -	pmap_qremove(sf->kva, 1);
> T> -	mtx_lock(&sf_freelist.sf_lock);
> T> -	SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
> T> -	nsfbufsused--;
> T> -	if (sf_buf_alloc_want > 0)
> T> -		wakeup(&sf_freelist);
> T> -	mtx_unlock(&sf_freelist.sf_lock);
> T> -}
> T> -
> T> -void
> T>  swi_vm(void *v)
> T>  {
> T>  
> T> Index: sys/sys/sf_buf.h
> T> ===================================================================
> T> --- sys/sys/sf_buf.h	(revision 268750)
> T> +++ sys/sys/sf_buf.h	(working copy)
> T> _at__at_ -29,6 +29,114 _at__at_
> T>  #ifndef _SYS_SF_BUF_H_
> T>  #define _SYS_SF_BUF_H_
> T>  
> T> +struct sfstat {				/* sendfile statistics */
> T> +	uint64_t	sf_iocnt;	/* times sendfile had to do disk I/O */
> T> +	uint64_t	sf_allocfail;	/* times sfbuf allocation failed */
> T> +	uint64_t	sf_allocwait;	/* times sfbuf allocation had to wait */
> T> +};
> T> +
> T> +#ifdef _KERNEL
> T> +#include <sys/types.h>
> T> +#include <sys/systm.h>
> T> +#include <sys/counter.h>
> T> +#include <vm/vm.h>
> T> +#include <vm/vm_param.h>
> T> +#include <vm/vm_page.h>
> T> +
> T> +#ifdef SFBUF
> T> +#if defined(SMP) && defined(SFBUF_CPUSET)
> T> +#include <sys/_cpuset.h>
> T> +#endif
> T> +#include <sys/queue.h>
> T> +
> T> +struct sf_buf {
> T> +	LIST_ENTRY(sf_buf)	list_entry;	/* list of buffers */
> T> +	TAILQ_ENTRY(sf_buf)	free_entry;	/* list of buffers */
> T> +	vm_page_t		m;		/* currently mapped page */
> T> +	vm_offset_t		kva;		/* va of mapping */
> T> +	int			ref_count;	/* usage of this mapping */
> T> +#if defined(SMP) && defined(SFBUF_CPUSET)
> T> +	cpuset_t		cpumask;	/* where mapping is valid */
> T> +#endif
> T> +};
> T> +#else /* ! SFBUF */
> T> +struct sf_buf;
> T> +#endif /* SFBUF */
> T> +
> T> +#ifndef SFBUF_NOMD
> T> +#include <machine/sf_buf.h>
> T> +#endif
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +#include <machine/md_var.h>
> T> +#endif
> T> +
> T> +#ifdef SFBUF
> T> +struct sf_buf *sf_buf_alloc(struct vm_page *, int);
> T> +void sf_buf_free(struct sf_buf *);
> T> +
> T> +static inline vm_offset_t
> T> +sf_buf_kva(struct sf_buf *sf)
> T> +{
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +	if (SFBUF_OPTIONAL_DIRECT_MAP)
> T> +		return (VM_PAGE_TO_PHYS((vm_page_t)sf));
> T> +#endif
> T> +
> T> +        return (sf->kva);
> T> +}
> T> +
> T> +static inline vm_page_t
> T> +sf_buf_page(struct sf_buf *sf)
> T> +{
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +	if (SFBUF_OPTIONAL_DIRECT_MAP)
> T> +		return ((vm_page_t)sf);
> T> +#endif
> T> +
> T> +        return (sf->m);
> T> +}
> T> +
> T> +#ifndef SFBUF_MAP
> T> +#include <vm/pmap.h>
> T> +
> T> +static inline void
> T> +sf_buf_map(struct sf_buf *sf, int flags)
> T> +{
> T> +
> T> +	pmap_qenter(sf->kva, &sf->m, 1);
> T> +}
> T> +
> T> +static inline int
> T> +sf_buf_unmap(struct sf_buf *sf)
> T> +{
> T> +
> T> +	return (0);
> T> +}
> T> +#endif /* SFBUF_MAP */
> T> +
> T> +#if defined(SMP) && defined(SFBUF_CPUSET)
> T> +void sf_buf_shootdown(struct sf_buf *, int);
> T> +#endif
> T> +
> T> +#ifdef SFBUF_PROCESS_PAGE
> T> +boolean_t sf_buf_process_page(vm_page_t, void (*)(struct sf_buf *));
> T> +#endif
> T> +
> T> +#else /* ! SFBUF */
> T> +
> T> +static inline struct sf_buf *
> T> +sf_buf_alloc(struct vm_page *m, int pri)
> T> +{
> T> +
> T> +	return ((struct sf_buf *)m);
> T> +}
> T> +
> T> +static inline void
> T> +sf_buf_free(struct sf_buf *sf)
> T> +{
> T> +}
> T> +#endif /* SFBUF */
> T> +
> T>  /*
> T>   * Options to sf_buf_alloc() are specified through its flags argument.  This
> T>   * argument's value should be the result of a bitwise or'ing of one or more
> T> _at__at_ -40,19 +148,6 _at__at_
> T>  #define	SFB_DEFAULT	0
> T>  #define	SFB_NOWAIT	4		/* Return NULL if all bufs are used. */
> T>  
> T> -struct vm_page;
> T> -
> T> -struct sfstat {				/* sendfile statistics */
> T> -	uint64_t	sf_iocnt;	/* times sendfile had to do disk I/O */
> T> -	uint64_t	sf_allocfail;	/* times sfbuf allocation failed */
> T> -	uint64_t	sf_allocwait;	/* times sfbuf allocation had to wait */
> T> -};
> T> -
> T> -#ifdef _KERNEL
> T> -#include <machine/sf_buf.h>
> T> -#include <sys/systm.h>
> T> -#include <sys/counter.h>
> T> -
> T>  extern counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
> T>  #define	SFSTAT_ADD(name, val)	\
> T>      counter_u64_add(sfstat[offsetof(struct sfstat, name) / sizeof(uint64_t)],\
> 
> T> _______________________________________________
> T> freebsd-current_at_freebsd.org mailing list
> T> http://lists.freebsd.org/mailman/listinfo/freebsd-current
> T> To unsubscribe, send any mail to "freebsd-current-unsubscribe_at_freebsd.org"
> 
> 
> -- 
> Totus tuus, Glebius.
> _______________________________________________
> freebsd-current_at_freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-current
> To unsubscribe, send any mail to "freebsd-current-unsubscribe_at_freebsd.org"
> 
Received on Tue Jul 29 2014 - 15:29:53 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:51 UTC