Re: panic: vm_fault: fault on nofault entry, addr: fffffe00873d8000

From: Konstantin Belousov <kostikbel_at_gmail.com>
Date: Sun, 6 Dec 2015 20:57:36 +0200
On Sun, Dec 06, 2015 at 06:51:36PM +0100, Fabian Keil wrote:
> > > #16 0xffffffff80877d5a in bcopy () at /usr/src/sys/amd64/amd64/support.S:118
> > > #17 0xffffffff805f64e8 in uiomove_faultflag (cp=<value optimized out>, n=<value optimized out>, uio=0xfffffe009444aae0, nofault=<value optimized out>) at /usr/src/sys/kern/subr_uio.c:208
> > > #18 0xffffffff8046236f in msdosfs_read (ap=<value optimized out>) at /usr/src/sys/fs/msdosfs/msdosfs_vnops.c:596
> > > #19 0xffffffff808feb20 in VOP_READ_APV (vop=<value optimized out>, a=<value optimized out>) at vnode_if.c:930
> > > #20 0xffffffff8039bf3a in mdstart_vnode (sc=0xfffff8004c7ce000, bp=0xfffff80028fc81f0) at vnode_if.h:384  
> > From the frame 20, do 'p *bp' in kgdb and mail the result.  Do you have
> > any non-standard values for buffer cache knobs, esp. for MAXPHYS ?
> 
> (kgdb) p *bp
> $1 = {bio_cmd = 1 '\001', bio_flags = 16 '\020', bio_cflags = 0 '\0', bio_pflags = 0 '\0', bio_dev = 0x0, bio_disk = 0x0, bio_offset = 0, bio_bcount = 0, 
>   bio_data = 0xfffffe0077d94000 <Address 0xfffffe0077d94000 out of bounds>, bio_ma = 0xfffff8000275bc00, bio_ma_offset = 960,

bio_ma_n = 33,
This is the issue.  The upper layer (ZFS ?) passed down the request
which is max-sized (see bio_length == 32 pages) but not aligned.
The physical buffer used for transient mapping cannot handle this.

bio_error = 0, bio_resid = 0, 
>   bio_done = 0xffffffff804e51d0 <g_std_done>, bio_driver1 = 0x0, bio_driver2 = 0x0, bio_caller1 = 0x0, bio_caller2 = 0x0, bio_queue = {tqe_next = 0x0, tqe_prev = 0xfffff8004c7ce018}, bio_attribute = 0x0, 
>   bio_from = 0xfffff80010131d80, bio_to = 0xfffff800694f2a00, bio_length = 131072, bio_completed = 0, bio_children = 0, bio_inbed = 0, bio_parent = 0xfffff8000628bd90, bio_t0 = {sec = 33029, 
>     frac = 13163670047247984455}, bio_task = 0, bio_task_arg = 0x0, bio_classifier1 = 0x0, bio_classifier2 = 0x0, bio_pblkno = 0}
>  
> I don't use non-standard values for MAXPHYS or other buffer cache settings.
> 

Try the following patch.

diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index a47066e..52142ed 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
_at__at_ -836,8 +836,8 _at__at_ mdstart_vnode(struct md_s *sc, struct bio *bp)
 	struct buf *pb;
 	bus_dma_segment_t *vlist;
 	struct thread *td;
-	off_t len, zerosize;
-	int ma_offs;
+	off_t iolen, len, zerosize;
+	int ma_offs, npages;
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
_at__at_ -858,6 +858,7 _at__at_ mdstart_vnode(struct md_s *sc, struct bio *bp)
 	pb = NULL;
 	piov = NULL;
 	ma_offs = bp->bio_ma_offset;
+	len = bp->bio_length;
 
 	/*
 	 * VNODE I/O
_at__at_ -890,7 +891,6 _at__at_ mdstart_vnode(struct md_s *sc, struct bio *bp)
 		auio.uio_iovcnt = howmany(bp->bio_length, zerosize);
 		piov = malloc(sizeof(*piov) * auio.uio_iovcnt, M_MD, M_WAITOK);
 		auio.uio_iov = piov;
-		len = bp->bio_length;
 		while (len > 0) {
 			piov->iov_base = __DECONST(void *, zero_region);
 			piov->iov_len = len;
_at__at_ -904,7 +904,6 _at__at_ mdstart_vnode(struct md_s *sc, struct bio *bp)
 		piov = malloc(sizeof(*piov) * bp->bio_ma_n, M_MD, M_WAITOK);
 		auio.uio_iov = piov;
 		vlist = (bus_dma_segment_t *)bp->bio_data;
-		len = bp->bio_length;
 		while (len > 0) {
 			piov->iov_base = (void *)(uintptr_t)(vlist->ds_addr +
 			    ma_offs);
_at__at_ -920,11 +919,20 _at__at_ mdstart_vnode(struct md_s *sc, struct bio *bp)
 		piov = auio.uio_iov;
 	} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 		pb = getpbuf(&md_vnode_pbuf_freecnt);
-		pmap_qenter((vm_offset_t)pb->b_data, bp->bio_ma, bp->bio_ma_n);
-		aiov.iov_base = (void *)((vm_offset_t)pb->b_data + ma_offs);
-		aiov.iov_len = bp->bio_length;
+		bp->bio_resid = len;
+unmapped_step:
+		npages = min(MAXPHYS, roundup2(len + ma_offs, PAGE_SIZE)) /
+		    PAGE_SIZE;
+		iolen = min(npages * PAGE_SIZE - ma_offs, len);
+		KASSERT(iolen > 0, ("zero iolen"));
+		pmap_qenter((vm_offset_t)pb->b_data,
+		    &bp->bio_ma[ma_offs / PAGE_SIZE], npages);
+		aiov.iov_base = (void *)((vm_offset_t)pb->b_data +
+		    ma_offs % PAGE_SIZE);
+		aiov.iov_len = iolen;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
+		auio.uio_resid = aiov.iov_len;
 	} else {
 		aiov.iov_base = bp->bio_data;
 		aiov.iov_len = bp->bio_length;
_at__at_ -948,15 +956,21 _at__at_ mdstart_vnode(struct md_s *sc, struct bio *bp)
 		vn_finished_write(mp);
 	}
 
-	if (pb) {
-		pmap_qremove((vm_offset_t)pb->b_data, bp->bio_ma_n);
+	if (pb != NULL) {
+		pmap_qremove((vm_offset_t)pb->b_data, npages);
+		if (error == 0) {
+			len -= iolen;
+			bp->bio_resid -= iolen;
+			ma_offs += iolen;
+			if (len > 0)
+				goto unmapped_step;
+		}
 		relpbuf(pb, &md_vnode_pbuf_freecnt);
 	}
 
-	if (piov != NULL)
-		free(piov, M_MD);
-
-	bp->bio_resid = auio.uio_resid;
+	free(piov, M_MD);
+	if (pb == NULL)
+		bp->bio_resid = auio.uio_resid;
 	return (error);
 }
 
Received on Sun Dec 06 2015 - 17:57:43 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:01 UTC