Re: Bug about devfs?

From: Kohji Okuno <okuno.kohji_at_jp.panasonic.com>
Date: Wed, 13 Jul 2011 13:15:14 +0900 (JST)
Hello,

From: Kostik Belousov <kostikbel_at_gmail.com>
Date: Tue, 12 Jul 2011 17:57:53 +0300
> On Tue, Jul 12, 2011 at 03:02:44PM +0200, Attilio Rao wrote:
>> 2011/7/12 Kostik Belousov <kostikbel_at_gmail.com>:
>> > On Tue, Jul 12, 2011 at 07:10:28PM +0900, Kohji Okuno wrote:
>> >> Hello,
>> >>
>> >> I think that devfs has a problem.
>> >> I encountered the problem that open("/dev/AAA") returned ENOENT.
>> >> Of course, /dev/AAA exists.
>> >>
>> >> ENOENT was created by the point(***) in devfs_allocv().
>> >> I think that the race condition had occurred between process A and
>> >> vnlru kernel thread.
>> >>
>> >> Please check the following.
>> >>
>> >> If vnlru set VI_DOOMED to vp->v_iflag but vnlru didn't still execute
>> >> VOP_RECLAIM(), process A cat get valid vp from de->de_vnode.
>> >> But, vget() will return ENOENT, because vp->v_iflag has VI_DOOMED.
>> >>
>> >> When I set the break point to (***), I checked that de->de_vnode and
>> >> vp->v_data were NULL.
>> >>
>> >>
>> >> process A:                            vnlru:
>> >>
>> >> devfs_allocv() {
>> >>                                       vgonel(vp) {
>> >>    ...                                           ...
>> >>                                         vp->v_iflag |= VI_DOOMED;
>> >>   mtx_lock(&devfs_de_interlock);         ...
>> >>   vp = de->de_vnode;
>> >>   if (vp != NULL) {                     VI_UNLOCK(vp);
>> >>                           _____________/ ...
>> >>   VI_LOCK(vp); ____________/              if (VOP_RECLAIM(vp, td))
>> >>   mtx_unlock(&devfs_de_interlock);       ...
>> >>    ...                                \         devfs_reclaim(ap) {
>> >>   error = vget(vp,...);                \
>> >>    ...                                  \______   mtx_lock(&devfs_de_interlock);
>> >>   if (devfs_allocv_drop_refs(...)) {        de = vp->v_data;
>> >>     ...                                           if (de != NULL) {
>> >>   }                                         de->de_vnode = NULL;
>> >>   else if (error) {                         vp->v_data = NULL;
>> >>     ...                                           }
>> >>     rturn (error); (***)                  mtx_unlock(&devfs_de_interlock);
>> >>   }                                         ...
>> >>                                         }
>> >>
>> >>
>> >>
>> >> I think that devfs_allocv() should be fixed as below.
>> >> How do you think?
>> >>
>> >> devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp)
>> >> {
>> >>         int error;
>> >>       struct vnode *vp;
>> >>       struct cdev *dev;
>> >>       struct devfs_mount *dmp;
>> >>
>> >>       dmp = VFSTODEVFS(mp);
>> >> +#if 1
>> >> + retry:
>> >> +#endif
>> >>         if (de->de_flags & DE_DOOMED) {
>> >>
>> >>            ...
>> >>
>> >>         mtx_lock(&devfs_de_interlock);
>> >>         vp = de->de_vnode;
>> >>         if (vp != NULL) {
>> >>                 VI_LOCK(vp);
>> >>                 mtx_unlock(&devfs_de_interlock);
>> >>                 sx_xunlock(&dmp->dm_lock);
>> >>                 error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, curthread);
>> >>                 sx_xlock(&dmp->dm_lock);
>> >>                 if (devfs_allocv_drop_refs(0, dmp, de)) {
>> >>                         if (error == 0)
>> >>                                 vput(vp);
>> >>                         return (ENOENT);
>> >>                 }
>> >>                 else if (error) {
>> >> +#if 1
>> >> +                     if (error == ENOENT)
>> >> +                             goto retry;
>> >> +#endif
>> >>                       sx_xunlock(&dmp->dm_lock);
>> >>                       return (error);
>> >>               }
>> >>
>> > Thank you for the report.
>> >
>> > The proposed change would revert r179247, which also caused some issues.
>> > Are you able to reproduce the problem ?
>> >
>> > Could you try the following patch ? I cannot reproduce your situation,
>> > so the patch is untested by me.
>> >
>> > diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c
>> > index bf6dab8..bbbfff4 100644
>> > --- a/sys/fs/devfs/devfs_vnops.c
>> > +++ b/sys/fs/devfs/devfs_vnops.c
>> > _at__at_ -397,6 +397,7 _at__at_ devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
>> >                sx_xunlock(&dmp->dm_lock);
>> >                return (ENOENT);
>> >        }
>> > +loop:
>> >        DEVFS_DE_HOLD(de);
>> >        DEVFS_DMP_HOLD(dmp);
>> >        mtx_lock(&devfs_de_interlock);
>> > _at__at_ -412,7 +413,16 _at__at_ devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
>> >                                vput(vp);
>> >                        return (ENOENT);
>> >                }
>> > -               else if (error) {
>> > +               else if (error != 0) {
>> > +                       if (error == ENOENT) {
>> > +                               mtx_lock(&devfs_de_interlock);
>> > +                               while (de->de_vnode != NULL) {
>> > +                                       msleep(&de->de_vnode,
>> > +                                           &devfs_de_interlock, 0, "dvall", 0);
>> > +                               }
>> > +                               mtx_unlock(&devfs_de_interlock);
>> > +                               goto loop;
>> > +                       }
>> >                        sx_xunlock(&dmp->dm_lock);
>> >                        return (error);
>> >                }
>> > _at__at_ -1259,6 +1269,7 _at__at_ devfs_reclaim(struct vop_reclaim_args *ap)
>> >        if (de != NULL) {
>> >                de->de_vnode = NULL;
>> >                vp->v_data = NULL;
>> > +               wakeup(&de->de_vnode);
>> >        }
>> >        mtx_unlock(&devfs_de_interlock);
>> 
>> I think that this approach may starve the thread for a while.
>> As I told you privately I was able to see on field a livelock because
>> of this check. In my case, it was a thread running for 63seconds (at
>> least, at that point the watchdog was tripping over).
> Feasible explanation was not found at the time, AFAIR. I could believe
> that this is possible with r179247 and driver stuck in the close cdevsw
> method.
> 
> More risky change would be to clear de_vnode early. All devfs code shall
> be already safe by checking for VI_DOOMED, and if VI_DOOMED, v_data may
> be NULL.
> 
> Again, I am unable to test.
> 
> diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c
> index bf6dab8..955bd8b 100644
> --- a/sys/fs/devfs/devfs_vnops.c
> +++ b/sys/fs/devfs/devfs_vnops.c
> _at__at_ -397,6 +397,7 _at__at_ devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
>  		sx_xunlock(&dmp->dm_lock);
>  		return (ENOENT);
>  	}
> +loop:
>  	DEVFS_DE_HOLD(de);
>  	DEVFS_DMP_HOLD(dmp);
>  	mtx_lock(&devfs_de_interlock);
> _at__at_ -405,16 +406,21 _at__at_ devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
>  		VI_LOCK(vp);
>  		mtx_unlock(&devfs_de_interlock);
>  		sx_xunlock(&dmp->dm_lock);
> -		error = vget(vp, lockmode | LK_INTERLOCK, curthread);
> +		vget(vp, lockmode | LK_INTERLOCK | LK_RETRY, curthread);
>  		sx_xlock(&dmp->dm_lock);
>  		if (devfs_allocv_drop_refs(0, dmp, de)) {
> -			if (error == 0)
> -				vput(vp);
> +			vput(vp);
>  			return (ENOENT);
>  		}
> -		else if (error) {
> -			sx_xunlock(&dmp->dm_lock);
> -			return (error);
> +		else if ((vp->v_iflag & VI_DOOMED) != 0) {
> +			mtx_lock(&devfs_de_interlock);
> +			if (de->de_vnode == vp) {
> +				de->de_vnode = NULL;
> +				vp->v_data = NULL;
> +			}
> +			mtx_unlock(&devfs_de_interlock);
> +			vput(vp);
> +			goto loop;
>  		}
>  		sx_xunlock(&dmp->dm_lock);
>  		*vpp = vp;

I tried Kostik's last patch, and I checked that open() successed twice
by retry. It is difficult for me to reproduce this situation.

At least, my problem is solved by this patch.

Thanks,
 Kohji Okuno.
Received on Wed Jul 13 2011 - 02:15:20 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:15 UTC