[TEST(/review)] major/minor/devname fix

From: Poul-Henning Kamp <phk_at_phk.freebsd.dk>
Date: Wed, 09 Mar 2005 22:40:18 +0100
For the people with devname(3) and ssh trouble:  Please try out
this patch.  It is not quite commit ready yet, but it should
make your system usable again.


Part I:	Divorce the userland notion of "major/minor" from the kernel.

Change dev2udev() to DTRT, so that kvm/sysctl grubbing programs see
the userland values.  Make sysctl_devname() do the right thing as
well.  Move both to devfs where they belong (now).

Add a small frustation in case anybody tries to shortcut us again.



Part II: Reclaim and clean up kernel notion of major/minor.

Change the kernel side of the minor concept into cdev->si_drv0 and
make things work accordingly.

Remove the kernel side notion of major in toto, the real handle is
the cdevsw{} of which any given driver can have multiple.

Incomplete: only drivers using SI_CHEAPCLONE should be required to
use unique values in si_drv0.


Index: conf/kern.post.mk
===================================================================
RCS file: /home/ncvs/src/sys/conf/kern.post.mk,v
retrieving revision 1.76
diff -u -r1.76 kern.post.mk
--- conf/kern.post.mk	25 Feb 2005 05:34:45 -0000	1.76
+++ conf/kern.post.mk	9 Mar 2005 21:31:00 -0000
_at__at_ -110,7 +110,7 _at__at_
 kernel-clean:
 	rm -f *.o *.so *.So *.ko *.s eddep errs \
 	    ${FULLKERNEL} ${KERNEL_KO} linterrs makelinks tags \
-	    vers.c vnode_if.c vnode_if.h majors.c \
+	    vers.c vnode_if.c vnode_if.h \
 	    ${MFILES:T:S/.m$/.c/} ${MFILES:T:S/.m$/.h/} \
 	    ${CLEAN}
 
_at__at_ -227,15 +227,12 _at__at_
 	${INSTALL} -p -m 555 -o root -g wheel ${KERNEL_KO} ${DESTDIR}${KODIR}
 .endif
 
-config.o env.o hints.o majors.o vers.o vnode_if.o:
+config.o env.o hints.o vers.o vnode_if.o:
 	${NORMAL_C}
 
-config.ln env.ln hints.ln majors.ln vers.ln vnode_if.ln:
+config.ln env.ln hints.ln vers.ln vnode_if.ln:
 	${NORMAL_LINT}
 
-majors.c: $S/conf/majors $S/conf/majors.awk
-	${AWK} -f $S/conf/majors.awk $S/conf/majors > ${.TARGET}
-
 vers.c: $S/conf/newvers.sh $S/sys/param.h ${SYSTEM_DEP}
 	MAKE=${MAKE} sh $S/conf/newvers.sh ${KERN_IDENT}
 
Index: conf/kern.pre.mk
===================================================================
RCS file: /home/ncvs/src/sys/conf/kern.pre.mk,v
retrieving revision 1.63
diff -u -r1.63 kern.pre.mk
--- conf/kern.pre.mk	13 Feb 2005 05:58:40 -0000	1.63
+++ conf/kern.pre.mk	9 Mar 2005 21:31:00 -0000
_at__at_ -121,7 +121,7 _at__at_
 NORMAL_LINT=	${LINT} ${LINTFLAGS} ${CFLAGS:M-[DIU]*} ${.IMPSRC}
 
 GEN_CFILES= $S/$M/$M/genassym.c ${MFILES:T:S/.m$/.c/}
-SYSTEM_CFILES= config.c env.c hints.c majors.c vnode_if.c
+SYSTEM_CFILES= config.c env.c hints.c vnode_if.c
 SYSTEM_DEP= Makefile ${SYSTEM_OBJS}
 SYSTEM_OBJS= locore.o ${MDOBJS} ${OBJS}
 SYSTEM_OBJS+= ${SYSTEM_CFILES:.c=.o}
Index: fs/devfs/devfs_devs.c
===================================================================
RCS file: /home/ncvs/src/sys/fs/devfs/devfs_devs.c,v
retrieving revision 1.33
diff -u -r1.33 devfs_devs.c
--- fs/devfs/devfs_devs.c	10 Feb 2005 12:22:17 -0000	1.33
+++ fs/devfs/devfs_devs.c	9 Mar 2005 21:31:00 -0000
_at__at_ -114,6 +114,8 _at__at_
 devfs_itode (struct devfs_mount *dm, int inode)
 {
 
+	if (inode < 0)
+		return (NULL);
 	if (inode < NDEVFSINO)
 		return (&dm->dm_dirent[inode]);
 	if (devfs_overflow == NULL)
_at__at_ -127,6 +129,8 _at__at_
 devfs_itod (int inode)
 {
 
+	if (inode < 0)
+		return (NULL);
 	if (inode < NDEVFSINO)
 		return (&devfs_inot[inode]);
 	if (devfs_overflow == NULL)
_at__at_ -270,10 +274,7 _at__at_
 			if (dev == NULL && de != NULL) {
 				dd = de->de_dir;
 				*dep = NULL;
-				TAILQ_REMOVE(&dd->de_dlist, de, de_list);
-				if (de->de_vnode)
-					de->de_vnode->v_data = NULL;
-				FREE(de, M_DEVFS);
+				devfs_delete(dd, de);
 				devfs_dropref(i);
 				continue;
 			}
Index: fs/devfs/devfs_vnops.c
===================================================================
RCS file: /home/ncvs/src/sys/fs/devfs/devfs_vnops.c,v
retrieving revision 1.105
diff -u -r1.105 devfs_vnops.c
--- fs/devfs/devfs_vnops.c	22 Feb 2005 18:17:31 -0000	1.105
+++ fs/devfs/devfs_vnops.c	9 Mar 2005 21:31:00 -0000
_at__at_ -61,6 +61,7 _at__at_
 #include <sys/proc.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
+#include <sys/sysctl.h>
 #include <sys/time.h>
 #include <sys/ttycom.h>
 #include <sys/unistd.h>
_at__at_ -113,6 +114,25 _at__at_
 extern struct vop_vector devfs_vnodeops;
 extern struct vop_vector devfs_specops;
 
+static u_int
+devfs_random(void)
+{
+	static u_int devfs_seed;
+
+	while (devfs_seed == 0) {
+		/*
+		 * Make sure people don't make stupid assumptions
+		 * about device major/minor numbers in userspace.
+		 * We do this late to get entropy and for the same
+		 * reason we force a reseed, but it may not be
+		 * late enough for entropy to be available.
+		 */
+		arc4rand(&devfs_seed, sizeof devfs_seed, 1);
+		devfs_seed &= 0xf0f;
+	}
+	return (devfs_seed);
+}
+
 static int
 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp)
 {
_at__at_ -394,8 +414,10 _at__at_
 	int error = 0;
 	struct devfs_dirent *de;
 	struct cdev *dev;
+	struct devfs_mount *dmp;
 
 	de = vp->v_data;
+	dmp = VFSTODEVFS(vp->v_mount);
 	KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp));
 	if (vp->v_type == VDIR) {
 		de = de->de_dir;
_at__at_ -441,7 +463,8 _at__at_
 		vap->va_mtime = dev->si_mtime;
 		fix(dev->si_ctime);
 		vap->va_ctime = dev->si_ctime;
-		vap->va_rdev = dev->si_udev;
+
+		vap->va_rdev = de->de_inode ^ devfs_random();
 	}
 	vap->va_gen = 0;
 	vap->va_flags = 0;
_at__at_ -462,7 +485,9 _at__at_
 	struct cdevsw *dsw;
 	struct vnode *vp;
 	struct vnode *vpold;
-	int error;
+	int error, i;
+	const char *p;
+	struct fiodgname_arg *fgn;
 
 	error = devfs_fp_check(fp, &dev, &dsw);
 	if (error)
_at__at_ -472,6 +497,13 _at__at_
 		*(int *)data = dsw->d_flags & D_TYPEMASK;
 		dev_relthread(dev);
 		return (0);
+	} else if (com == FIODGNAME) {
+		fgn = data;
+		p = devtoname(dev);
+		i = strlen(p) + 1;
+		if (i > fgn->len)
+			return (EINVAL);
+		return (copyout(p, fgn->buf, i));
 	}
 	if (dsw->d_flags & D_NEEDGIANT)
 		mtx_lock(&Giant);
_at__at_ -1432,6 +1464,43 _at__at_
 	.vop_write =		VOP_PANIC,
 };
 
+dev_t
+dev2udev(struct cdev *x)
+{
+	if (x == NULL)
+		return (NODEV);
+	return (x->si_inode ^ devfs_random());
+}
+
+/*
+ * Helper sysctl for devname(3).  We're given a struct cdev * and return
+ * the name, if any, registered by the device driver.
+ */
+static int
+sysctl_devname(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	dev_t ud;
+	struct cdev *dev, **dp;
+
+	error = SYSCTL_IN(req, &ud, sizeof (ud));
+	if (error)
+		return (error);
+	if (ud == NODEV)
+		return(EINVAL);
+	dp = devfs_itod(ud ^ devfs_random());
+	if (dp == NULL)
+		return(ENOENT);
+	dev = *dp;
+	if (dev == NULL)
+		return(ENOENT);
+	return(SYSCTL_OUT(req, dev->si_name, strlen(dev->si_name) + 1));
+	return (error);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, devname, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY,
+	NULL, 0, sysctl_devname, "", "devname(3) handler");
+
 /*
  * Our calling convention to the device drivers used to be that we passed
  * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_ 
Index: kern/kern_conf.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_conf.c,v
retrieving revision 1.176
diff -u -r1.176 kern_conf.c
--- kern/kern_conf.c	8 Mar 2005 10:40:03 -0000	1.176
+++ kern/kern_conf.c	9 Mar 2005 21:31:00 -0000
_at__at_ -46,21 +46,8 _at__at_
 
 static MALLOC_DEFINE(M_DEVT, "cdev", "cdev storage");
 
-/* Built at compile time from sys/conf/majors */
-extern unsigned char reserved_majors[256];
-
-/*
- * This is the number of hash-buckets.  Experiments with 'real-life'
- * dev_t's show that a prime halfway between two powers of two works
- * best.
- */
-#define DEVT_HASH 83
-
-static LIST_HEAD(, cdev) dev_hash[DEVT_HASH];
-
 static struct mtx devmtx;
 static void freedev(struct cdev *dev);
-static struct cdev *newdev(int x, int y, struct cdev *);
 static void destroy_devl(struct cdev *dev);
 
 void
_at__at_ -189,7 +176,6 _at__at_
 	.d_mmap =	dead_mmap,
 	.d_strategy =	dead_strategy,
 	.d_name =	"dead",
-	.d_maj =	255,
 	.d_dump =	dead_dump,
 	.d_kqfilter =	dead_kqfilter
 };
_at__at_ -230,16 +216,12 _at__at_
 
 #define no_dump		(dumper_t *)enodev
 
-/*
- * struct cdev * and u_dev_t primitives
- */
-
 int
 major(struct cdev *x)
 {
 	if (x == NULL)
 		return NODEV;
-	return((x->si_udev >> 8) & 0xff);
+	return((x->si_drv0 >> 8) & 0xff);
 }
 
 int
_at__at_ -247,7 +229,7 _at__at_
 {
 	if (x == NULL)
 		return NODEV;
-	return(x->si_udev & MAXMINOR);
+	return(x->si_drv0 & MAXMINOR);
 }
 
 int
_at__at_ -287,29 +269,6 _at__at_
 	return (si);
 }
 
-static struct cdev *
-newdev(int x, int y, struct cdev *si)
-{
-	struct cdev *si2;
-	dev_t	udev;
-	int hash;
-
-	mtx_assert(&devmtx, MA_OWNED);
-	if (x == umajor(NODEV) && y == uminor(NODEV))
-		panic("newdev of NODEV");
-	udev = (x << 8) | y;
-	hash = udev % DEVT_HASH;
-	LIST_FOREACH(si2, &dev_hash[hash], si_hash) {
-		if (si2->si_udev == udev) {
-			freedev(si);
-			return (si2);
-		}
-	}
-	si->si_udev = udev;
-	LIST_INSERT_HEAD(&dev_hash[hash], si, si_hash);
-	return (si);
-}
-
 static void
 freedev(struct cdev *dev)
 {
_at__at_ -317,31 +276,10 _at__at_
 	free(dev, M_DEVT);
 }
 
-dev_t
-dev2udev(struct cdev *x)
-{
-	if (x == NULL)
-		return (NODEV);
-	return (x->si_udev);
-}
-
 struct cdev *
 findcdev(dev_t udev)
 {
-	struct cdev *si;
-	int hash;
-
-	mtx_assert(&devmtx, MA_NOTOWNED);
-	if (udev == NODEV)
-		return (NULL);
-	dev_lock();
-	hash = udev % DEVT_HASH;
-	LIST_FOREACH(si, &dev_hash[hash], si_hash) {
-		if (si->si_udev == udev)
-			break;
-	}
-	dev_unlock();
-	return (si);
+	return (NULL);
 }
 
 int
_at__at_ -357,55 +295,27 _at__at_
 }
 
 static void
-find_major(struct cdevsw *devsw)
-{
-	int i;
-
-	if (devsw->d_maj != MAJOR_AUTO) {
-		printf("NOTICE: Ignoring d_maj hint from driver \"%s\", %s",
-		    devsw->d_name, "driver should be updated/fixed\n");
-		devsw->d_maj = MAJOR_AUTO;
-	}
-	for (i = NUMCDEVSW - 1; i > 0; i--)
-		if (reserved_majors[i] != i)
-			break;
-	KASSERT(i > 0, ("Out of major numbers (%s)", devsw->d_name));
-	devsw->d_maj = i;
-	reserved_majors[i] = i;
-	devsw->d_flags |= D_ALLOCMAJ;
-}
-
-static void
 fini_cdevsw(struct cdevsw *devsw)
 {
-	if (devsw->d_flags & D_ALLOCMAJ) {
-		reserved_majors[devsw->d_maj] = 0;
-		devsw->d_maj = MAJOR_AUTO;
-		devsw->d_flags &= ~D_ALLOCMAJ;
-	}
+
+	mtx_assert(&devmtx, MA_OWNED);
 	devsw->d_flags &= ~D_INIT;
 }
 
-static void
+static struct cdevsw *
 prep_cdevsw(struct cdevsw *devsw)
 {
 
+	if (devsw->d_flags & D_INIT)
+		return (devsw);
 	dev_lock();
 
 	if (devsw->d_version != D_VERSION_00) {
 		printf(
 		    "WARNING: Device driver \"%s\" has wrong version %s\n",
 		    devsw->d_name, "and is disabled.  Recompile KLD module.");
-		devsw->d_open = dead_open;
-		devsw->d_close = dead_close;
-		devsw->d_read = dead_read;
-		devsw->d_write = dead_write;
-		devsw->d_ioctl = dead_ioctl;
-		devsw->d_poll = dead_poll;
-		devsw->d_mmap = dead_mmap;
-		devsw->d_strategy = dead_strategy;
-		devsw->d_dump = dead_dump;
-		devsw->d_kqfilter = dead_kqfilter;
+		dev_unlock();
+		return (&dead_cdevsw);
 	}
 	
 	if (devsw->d_flags & D_TTY) {
_at__at_ -431,57 +341,64 _at__at_
 
 	devsw->d_flags |= D_INIT;
 
-	if (!(devsw->d_flags & D_ALLOCMAJ))
-		find_major(devsw);
 	dev_unlock();
+	return (devsw);
 }
 
-struct cdev *
-make_dev(struct cdevsw *devsw, int minornr, uid_t uid, gid_t gid, int perms, const char *fmt, ...)
+static void
+imake_dev(struct cdev *dev, u_int drv0, uid_t uid, gid_t gid, int perms, const char *fmt, va_list ap)
 {
-	struct cdev *dev;
-	va_list ap;
 	int i;
 
-	KASSERT((minornr & ~MAXMINOR) == 0,
-	    ("Invalid minor (0x%x) in make_dev", minornr));
-
-	if (!(devsw->d_flags & D_INIT))
-		prep_cdevsw(devsw);
-	dev = allocdev();
-	dev_lock();
-	dev = newdev(devsw->d_maj, minornr, dev);
+	dev->si_drv0 = drv0;
 	if (dev->si_flags & SI_CHEAPCLONE &&
-	    dev->si_flags & SI_NAMED &&
-	    dev->si_devsw == devsw) {
+	    dev->si_flags & SI_NAMED) {
 		/*
 		 * This is allowed as it removes races and generally
 		 * simplifies cloning devices.
 		 * XXX: still ??
 		 */
-		dev_unlock();
-		return (dev);
+		return;
 	}
 	KASSERT(!(dev->si_flags & SI_NAMED),
 	    ("make_dev() by driver %s on pre-existing device (maj=%d, min=%x, name=%s)",
-	    devsw->d_name, major(dev), minor(dev), devtoname(dev)));
+	    dev->si_devsw->d_name, major(dev), minor(dev), devtoname(dev)));
 
-	va_start(ap, fmt);
 	i = vsnrprintf(dev->__si_namebuf, sizeof dev->__si_namebuf, 32, fmt, ap);
 	if (i > (sizeof dev->__si_namebuf - 1)) {
 		printf("WARNING: Device name truncated! (%s)\n", 
 		    dev->__si_namebuf);
 	}
-	va_end(ap);
 		
-	dev->si_devsw = devsw;
 	dev->si_uid = uid;
 	dev->si_gid = gid;
 	dev->si_mode = perms;
 	dev->si_flags |= SI_NAMED;
 
-	LIST_INSERT_HEAD(&devsw->d_devs, dev, si_list);
 	devfs_create(dev);
+}
+
+
+struct cdev *
+make_dev(struct cdevsw *devsw, u_int drv0, uid_t uid, gid_t gid, int perms, const char *fmt, ...)
+{
+	struct cdev *dev;
+	va_list ap;
+
+	devsw = prep_cdevsw(devsw);
+	LIST_FOREACH(dev, &devsw->d_devs, si_list) {
+		if (dev->si_drv0 == drv0)
+			break;
+	}
+	if (dev == NULL) {
+		dev = allocdev();
+		LIST_INSERT_HEAD(&devsw->d_devs, dev, si_list);
+	}
+	dev->si_devsw = devsw;
+	dev_lock();
+	va_start(ap, fmt);
+	imake_dev(dev, drv0, uid, gid, perms, fmt, ap);
+	va_end(ap);
 	dev_unlock();
 	return (dev);
 }
_at__at_ -589,7 +506,6 _at__at_
 		if (LIST_EMPTY(&csw->d_devs))
 			fini_cdevsw(csw);
 
-		LIST_REMOVE(dev, si_hash);
 	}
 	dev->si_flags &= ~SI_ALIAS;
 
_at__at_ -705,8 +621,7 _at__at_
 	KASSERT(*up <= CLONE_UNITMASK,
 	    ("Too high unit (0x%x) in clone_create", *up));
 
-	if (!(csw->d_flags & D_INIT))
-		prep_cdevsw(csw);
+	csw = prep_cdevsw(csw);
 
 	/*
 	 * Search the list for a lot of things in one go:
_at__at_ -744,7 +659,8 _at__at_
 	}
 	if (unit == -1)
 		unit = low & CLONE_UNITMASK;
-	dev = newdev(csw->d_maj, unit2minor(unit | extra), ndev);
+	dev = ndev;
+	dev->si_drv0 = unit2minor(unit | extra);
 	if (dev->si_flags & SI_CLONELIST) {
 		printf("dev %p (%s) is on clonelist\n", dev, dev->si_name);
 		printf("unit=%d\n", unit);
_at__at_ -755,6 +671,8 _at__at_
 	}
 	KASSERT(!(dev->si_flags & SI_CLONELIST),
 	    ("Dev %p(%s) should not be on clonelist", dev, dev->si_name));
+	dev->si_devsw = csw;
+	LIST_INSERT_HEAD(&csw->d_devs, dev, si_list);
 	if (dl != NULL)
 		LIST_INSERT_BEFORE(dl, dev, si_clone);
 	else if (de != NULL)
_at__at_ -785,37 +703,11 _at__at_
 		KASSERT(dev->si_flags & SI_CLONELIST,
 		    ("Dev %p(%s) should be on clonelist", dev, dev->si_name));
 		KASSERT(dev->si_flags & SI_NAMED,
-		    ("Driver has goofed in cloning underways udev %x", dev->si_udev));
+		    ("Driver has goofed in cloning underways drv0 %x %s",
+		    dev->si_drv0, devtoname(dev)));
 		destroy_devl(dev);
 	}
 	dev_unlock();
 	free(cd, M_DEVBUF);
 	*cdp = NULL;
 }
-
-/*
- * Helper sysctl for devname(3).  We're given a struct cdev * and return
- * the name, if any, registered by the device driver.
- */
-static int
-sysctl_devname(SYSCTL_HANDLER_ARGS)
-{
-	int error;
-	dev_t ud;
-	struct cdev *dev;
-
-	error = SYSCTL_IN(req, &ud, sizeof (ud));
-	if (error)
-		return (error);
-	if (ud == NODEV)
-		return(EINVAL);
-	dev = findcdev(ud);
-	if (dev == NULL)
-		error = ENOENT;
-	else
-		error = SYSCTL_OUT(req, dev->si_name, strlen(dev->si_name) + 1);
-	return (error);
-}
-
-SYSCTL_PROC(_kern, OID_AUTO, devname, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY,
-	NULL, 0, sysctl_devname, "", "devname(3) handler");
Index: sys/conf.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/conf.h,v
retrieving revision 1.212
diff -u -r1.212 conf.h
--- sys/conf.h	8 Mar 2005 10:40:03 -0000	1.212
+++ sys/conf.h	9 Mar 2005 21:31:00 -0000
_at__at_ -67,7 +67,6 _at__at_
 	struct timespec	si_atime;
 	struct timespec	si_ctime;
 	struct timespec	si_mtime;
-	dev_t		si_udev;
 	int		si_refcount;
 	LIST_ENTRY(cdev)	si_list;
 	LIST_ENTRY(cdev)	si_clone;
_at__at_ -78,6 +77,7 _at__at_
 	struct cdev *si_parent;
 	u_int		si_inode;
 	char		*si_name;
+	u_int		si_drv0;
 	void		*si_drv1, *si_drv2;
 	struct cdevsw	*si_devsw;
 	int		si_iosize_max;	/* maximum I/O size (for physio &al) */
_at__at_ -222,12 +222,6 _at__at_
 
 #define MAXMINOR	0xffff00ffU
 
-/*
- * XXX: do not use MAJOR_AUTO unless you have no choice.  In general drivers
- * should just not initialize .d_maj and that will DTRT.
- */
-#define	MAJOR_AUTO	0	/* XXX: Not GM */
-
 struct module;
 
 struct devsw_module_data {
_at__at_ -262,7 +256,7 _at__at_
 void	dev_rel(struct cdev *dev);
 void	dev_strategy(struct cdev *dev, struct buf *bp);
 struct cdev *makebdev(int _maj, int _min);
-struct cdev *make_dev(struct cdevsw *_devsw, int _minor, uid_t _uid, gid_t _gid,
+struct cdev *make_dev(struct cdevsw *_devsw, u_int drv0, uid_t _uid, gid_t _gid,
 		int _perms, const char *_fmt, ...) __printflike(6, 7);
 struct cdev *make_dev_alias(struct cdev *_pdev, const char *_fmt, ...) __printflike(2, 3);
 int	dev2unit(struct cdev *_dev);
-- 
Poul-Henning Kamp       | UNIX since Zilog Zeus 3.20
phk_at_FreeBSD.ORG         | TCP/IP since RFC 956
FreeBSD committer       | BSD since 4.3-tahoe
Never attribute to malice what can adequately be explained by incompetence.
Received on Wed Mar 09 2005 - 20:40:20 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:38:29 UTC