Re: Bug about devfs?

From: Kohji Okuno <okuno.kohji_at_jp.panasonic.com>
Date: Wed, 13 Jul 2011 22:11:15 +0900 (JST)
Hello,

> From: Kostik Belousov <kostikbel_at_gmail.com>
> Date: Tue, 12 Jul 2011 17:57:53 +0300
>> On Tue, Jul 12, 2011 at 03:02:44PM +0200, Attilio Rao wrote:
>>> 2011/7/12 Kostik Belousov <kostikbel_at_gmail.com>:
>>> > On Tue, Jul 12, 2011 at 07:10:28PM +0900, Kohji Okuno wrote:
>>> >> Hello,
>>> >>
>>> >> I think that devfs has a problem.
>>> >> I encountered the problem that open("/dev/AAA") returned ENOENT.
>>> >> Of course, /dev/AAA exists.
>>> >>
>>> >> ENOENT was created by the point(***) in devfs_allocv().
>>> >> I think that the race condition had occurred between process A and
>>> >> vnlru kernel thread.
>>> >>
>>> >> Please check the following.
>>> >>
>>> >> If vnlru set VI_DOOMED to vp->v_iflag but vnlru didn't still execute
>>> >> VOP_RECLAIM(), process A cat get valid vp from de->de_vnode.
>>> >> But, vget() will return ENOENT, because vp->v_iflag has VI_DOOMED.
>>> >>
>>> >> When I set the break point to (***), I checked that de->de_vnode and
>>> >> vp->v_data were NULL.
>>> >>
>>> >>
>>> >> process A:                            vnlru:
>>> >>
>>> >> devfs_allocv() {
>>> >>                                       vgonel(vp) {
>>> >>    ...                                           ...
>>> >>                                         vp->v_iflag |= VI_DOOMED;
>>> >>   mtx_lock(&devfs_de_interlock);         ...
>>> >>   vp = de->de_vnode;
>>> >>   if (vp != NULL) {                     VI_UNLOCK(vp);
>>> >>                           _____________/ ...
>>> >>   VI_LOCK(vp); ____________/              if (VOP_RECLAIM(vp, td))
>>> >>   mtx_unlock(&devfs_de_interlock);       ...
>>> >>    ...                                \         devfs_reclaim(ap) {
>>> >>   error = vget(vp,...);                \
>>> >>    ...                                  \______   mtx_lock(&devfs_de_interlock);
>>> >>   if (devfs_allocv_drop_refs(...)) {        de = vp->v_data;
>>> >>     ...                                           if (de != NULL) {
>>> >>   }                                         de->de_vnode = NULL;
>>> >>   else if (error) {                         vp->v_data = NULL;
>>> >>     ...                                           }
>>> >>     rturn (error); (***)                  mtx_unlock(&devfs_de_interlock);
>>> >>   }                                         ...
>>> >>                                         }
>>> >>
>>> >>
>>> >>
>>> >> I think that devfs_allocv() should be fixed as below.
>>> >> How do you think?
>>> >>
>>> >> devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp)
>>> >> {
>>> >>         int error;
>>> >>       struct vnode *vp;
>>> >>       struct cdev *dev;
>>> >>       struct devfs_mount *dmp;
>>> >>
>>> >>       dmp = VFSTODEVFS(mp);
>>> >> +#if 1
>>> >> + retry:
>>> >> +#endif
>>> >>         if (de->de_flags & DE_DOOMED) {
>>> >>
>>> >>            ...
>>> >>
>>> >>         mtx_lock(&devfs_de_interlock);
>>> >>         vp = de->de_vnode;
>>> >>         if (vp != NULL) {
>>> >>                 VI_LOCK(vp);
>>> >>                 mtx_unlock(&devfs_de_interlock);
>>> >>                 sx_xunlock(&dmp->dm_lock);
>>> >>                 error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, curthread);
>>> >>                 sx_xlock(&dmp->dm_lock);
>>> >>                 if (devfs_allocv_drop_refs(0, dmp, de)) {
>>> >>                         if (error == 0)
>>> >>                                 vput(vp);
>>> >>                         return (ENOENT);
>>> >>                 }
>>> >>                 else if (error) {
>>> >> +#if 1
>>> >> +                     if (error == ENOENT)
>>> >> +                             goto retry;
>>> >> +#endif
>>> >>                       sx_xunlock(&dmp->dm_lock);
>>> >>                       return (error);
>>> >>               }
>>> >>
>>> > Thank you for the report.
>>> >
>>> > The proposed change would revert r179247, which also caused some issues.
>>> > Are you able to reproduce the problem ?
>>> >
>>> > Could you try the following patch ? I cannot reproduce your situation,
>>> > so the patch is untested by me.
>>> >
>>> > diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c
>>> > index bf6dab8..bbbfff4 100644
>>> > --- a/sys/fs/devfs/devfs_vnops.c
>>> > +++ b/sys/fs/devfs/devfs_vnops.c
>>> > _at__at_ -397,6 +397,7 _at__at_ devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
>>> >                sx_xunlock(&dmp->dm_lock);
>>> >                return (ENOENT);
>>> >        }
>>> > +loop:
>>> >        DEVFS_DE_HOLD(de);
>>> >        DEVFS_DMP_HOLD(dmp);
>>> >        mtx_lock(&devfs_de_interlock);
>>> > _at__at_ -412,7 +413,16 _at__at_ devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
>>> >                                vput(vp);
>>> >                        return (ENOENT);
>>> >                }
>>> > -               else if (error) {
>>> > +               else if (error != 0) {
>>> > +                       if (error == ENOENT) {
>>> > +                               mtx_lock(&devfs_de_interlock);
>>> > +                               while (de->de_vnode != NULL) {
>>> > +                                       msleep(&de->de_vnode,
>>> > +                                           &devfs_de_interlock, 0, "dvall", 0);
>>> > +                               }
>>> > +                               mtx_unlock(&devfs_de_interlock);
>>> > +                               goto loop;
>>> > +                       }
>>> >                        sx_xunlock(&dmp->dm_lock);
>>> >                        return (error);
>>> >                }
>>> > _at__at_ -1259,6 +1269,7 _at__at_ devfs_reclaim(struct vop_reclaim_args *ap)
>>> >        if (de != NULL) {
>>> >                de->de_vnode = NULL;
>>> >                vp->v_data = NULL;
>>> > +               wakeup(&de->de_vnode);
>>> >        }
>>> >        mtx_unlock(&devfs_de_interlock);
>>> 
>>> I think that this approach may starve the thread for a while.
>>> As I told you privately I was able to see on field a livelock because
>>> of this check. In my case, it was a thread running for 63seconds (at
>>> least, at that point the watchdog was tripping over).
>> Feasible explanation was not found at the time, AFAIR. I could believe
>> that this is possible with r179247 and driver stuck in the close cdevsw
>> method.
>> 
>> More risky change would be to clear de_vnode early. All devfs code shall
>> be already safe by checking for VI_DOOMED, and if VI_DOOMED, v_data may
>> be NULL.
>> 
>> Again, I am unable to test.
>> 
>> diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c
>> index bf6dab8..955bd8b 100644
>> --- a/sys/fs/devfs/devfs_vnops.c
>> +++ b/sys/fs/devfs/devfs_vnops.c
>> _at__at_ -397,6 +397,7 _at__at_ devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
>>  		sx_xunlock(&dmp->dm_lock);
>>  		return (ENOENT);
>>  	}
>> +loop:
>>  	DEVFS_DE_HOLD(de);
>>  	DEVFS_DMP_HOLD(dmp);
>>  	mtx_lock(&devfs_de_interlock);
>> _at__at_ -405,16 +406,21 _at__at_ devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
>>  		VI_LOCK(vp);
>>  		mtx_unlock(&devfs_de_interlock);
>>  		sx_xunlock(&dmp->dm_lock);
>> -		error = vget(vp, lockmode | LK_INTERLOCK, curthread);
>> +		vget(vp, lockmode | LK_INTERLOCK | LK_RETRY, curthread);
>>  		sx_xlock(&dmp->dm_lock);
>>  		if (devfs_allocv_drop_refs(0, dmp, de)) {
>> -			if (error == 0)
>> -				vput(vp);
>> +			vput(vp);
>>  			return (ENOENT);
>>  		}
>> -		else if (error) {
>> -			sx_xunlock(&dmp->dm_lock);
>> -			return (error);
>> +		else if ((vp->v_iflag & VI_DOOMED) != 0) {
>> +			mtx_lock(&devfs_de_interlock);
>> +			if (de->de_vnode == vp) {
>> +				de->de_vnode = NULL;
>> +				vp->v_data = NULL;
>> +			}
>> +			mtx_unlock(&devfs_de_interlock);
>> +			vput(vp);
>> +			goto loop;
>>  		}
>>  		sx_xunlock(&dmp->dm_lock);
>>  		*vpp = vp;
> 
> I tried Kostik's last patch, and I checked that open() successed twice
> by retry. It is difficult for me to reproduce this situation.
> 
> At least, my problem is solved by this patch.

I checked ten times or more. And, I didn't encounter all problems.

Thanks,
 Kohji Okuno.
Received on Wed Jul 13 2011 - 11:11:18 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:15 UTC