Re: Bug about devfs?

From: Attilio Rao <attilio_at_freebsd.org>
Date: Tue, 12 Jul 2011 15:02:44 +0200
2011/7/12 Kostik Belousov <kostikbel_at_gmail.com>:
> On Tue, Jul 12, 2011 at 07:10:28PM +0900, Kohji Okuno wrote:
>> Hello,
>>
>> I think that devfs has a problem.
>> I encountered the problem that open("/dev/AAA") returned ENOENT.
>> Of course, /dev/AAA exists.
>>
>> ENOENT was created by the point(***) in devfs_allocv().
>> I think that the race condition had occurred between process A and
>> vnlru kernel thread.
>>
>> Please check the following.
>>
>> If vnlru set VI_DOOMED to vp->v_iflag but vnlru didn't still execute
>> VOP_RECLAIM(), process A cat get valid vp from de->de_vnode.
>> But, vget() will return ENOENT, because vp->v_iflag has VI_DOOMED.
>>
>> When I set the break point to (***), I checked that de->de_vnode and
>> vp->v_data were NULL.
>>
>>
>> process A:                            vnlru:
>>
>> devfs_allocv() {
>>                                       vgonel(vp) {
>>    ...                                           ...
>>                                         vp->v_iflag |= VI_DOOMED;
>>   mtx_lock(&devfs_de_interlock);         ...
>>   vp = de->de_vnode;
>>   if (vp != NULL) {                     VI_UNLOCK(vp);
>>                           _____________/ ...
>>   VI_LOCK(vp); ____________/              if (VOP_RECLAIM(vp, td))
>>   mtx_unlock(&devfs_de_interlock);       ...
>>    ...                                \         devfs_reclaim(ap) {
>>   error = vget(vp,...);                \
>>    ...                                  \______   mtx_lock(&devfs_de_interlock);
>>   if (devfs_allocv_drop_refs(...)) {        de = vp->v_data;
>>     ...                                           if (de != NULL) {
>>   }                                         de->de_vnode = NULL;
>>   else if (error) {                         vp->v_data = NULL;
>>     ...                                           }
>>     rturn (error); (***)                  mtx_unlock(&devfs_de_interlock);
>>   }                                         ...
>>                                         }
>>
>>
>>
>> I think that devfs_allocv() should be fixed as below.
>> How do you think?
>>
>> devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp)
>> {
>>         int error;
>>       struct vnode *vp;
>>       struct cdev *dev;
>>       struct devfs_mount *dmp;
>>
>>       dmp = VFSTODEVFS(mp);
>> +#if 1
>> + retry:
>> +#endif
>>         if (de->de_flags & DE_DOOMED) {
>>
>>            ...
>>
>>         mtx_lock(&devfs_de_interlock);
>>         vp = de->de_vnode;
>>         if (vp != NULL) {
>>                 VI_LOCK(vp);
>>                 mtx_unlock(&devfs_de_interlock);
>>                 sx_xunlock(&dmp->dm_lock);
>>                 error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, curthread);
>>                 sx_xlock(&dmp->dm_lock);
>>                 if (devfs_allocv_drop_refs(0, dmp, de)) {
>>                         if (error == 0)
>>                                 vput(vp);
>>                         return (ENOENT);
>>                 }
>>                 else if (error) {
>> +#if 1
>> +                     if (error == ENOENT)
>> +                             goto retry;
>> +#endif
>>                       sx_xunlock(&dmp->dm_lock);
>>                       return (error);
>>               }
>>
> Thank you for the report.
>
> The proposed change would revert r179247, which also caused some issues.
> Are you able to reproduce the problem ?
>
> Could you try the following patch ? I cannot reproduce your situation,
> so the patch is untested by me.
>
> diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c
> index bf6dab8..bbbfff4 100644
> --- a/sys/fs/devfs/devfs_vnops.c
> +++ b/sys/fs/devfs/devfs_vnops.c
> _at__at_ -397,6 +397,7 _at__at_ devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
>                sx_xunlock(&dmp->dm_lock);
>                return (ENOENT);
>        }
> +loop:
>        DEVFS_DE_HOLD(de);
>        DEVFS_DMP_HOLD(dmp);
>        mtx_lock(&devfs_de_interlock);
> _at__at_ -412,7 +413,16 _at__at_ devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
>                                vput(vp);
>                        return (ENOENT);
>                }
> -               else if (error) {
> +               else if (error != 0) {
> +                       if (error == ENOENT) {
> +                               mtx_lock(&devfs_de_interlock);
> +                               while (de->de_vnode != NULL) {
> +                                       msleep(&de->de_vnode,
> +                                           &devfs_de_interlock, 0, "dvall", 0);
> +                               }
> +                               mtx_unlock(&devfs_de_interlock);
> +                               goto loop;
> +                       }
>                        sx_xunlock(&dmp->dm_lock);
>                        return (error);
>                }
> _at__at_ -1259,6 +1269,7 _at__at_ devfs_reclaim(struct vop_reclaim_args *ap)
>        if (de != NULL) {
>                de->de_vnode = NULL;
>                vp->v_data = NULL;
> +               wakeup(&de->de_vnode);
>        }
>        mtx_unlock(&devfs_de_interlock);

I think that this approach may starve the thread for a while.
As I told you privately I was able to see on field a livelock because
of this check. In my case, it was a thread running for 63seconds (at
least, at that point the watchdog was tripping over).

Attilio


-- 
Peace can only be achieved by understanding - A. Einstein
Received on Tue Jul 12 2011 - 11:02:45 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:40:15 UTC