Re: "panic: malloc(M_WAITOK) in non-sleepable context" after r364296 -> r364341

From: Mateusz Guzik <mjguzik_at_gmail.com>
Date: Tue, 18 Aug 2020 15:55:15 +0200
On 8/18/20, Mark Johnston <markj_at_freebsd.org> wrote:
> On Tue, Aug 18, 2020 at 03:24:52PM +0200, Mateusz Guzik wrote:
>> Try this:
>>
>> diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
>> index 46eb1c4347c..7b94ca7b880 100644
>> --- a/sys/kern/kern_malloc.c
>> +++ b/sys/kern/kern_malloc.c
>> _at__at_ -618,8 +618,8 _at__at_ void *
>>         unsigned long osize = size;
>>  #endif
>>
>> -       KASSERT((flags & M_WAITOK) == 0 || THREAD_CAN_SLEEP(),
>> -           ("malloc(M_WAITOK) in non-sleepable context"));
>> +       if ((flags & M_WAITOK) != 0)
>> +               WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
>> __func__);
>>
>>  #ifdef MALLOC_DEBUG
>>         va = NULL;
>> diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
>> index 37d78354200..2e1267ec02f 100644
>> --- a/sys/vm/uma_core.c
>> +++ b/sys/vm/uma_core.c
>> _at__at_ -3355,8 +3355,8 _at__at_ uma_zalloc_arg(uma_zone_t zone, void *udata, int
>> flags)
>>         uma_cache_bucket_t bucket;
>>         uma_cache_t cache;
>>
>> -       KASSERT((flags & M_WAITOK) == 0 || THREAD_CAN_SLEEP(),
>> -           ("uma_zalloc(M_WAITOK) in non-sleepable context"));
>> +       if ((flags & M_WAITOK) != 0)
>> +               WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
>> __func__);
>>
>>         /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option
>> */
>>         random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
>
> Doesn't it only print a warning if non-sleepable locks are held?
> THREAD_CAN_SLEEP() catches other cases, like epoch sections and worker
> threads that are not allowed to sleep (CAM doneq threads in this case).
> Otherwise uma_zalloc_debug() already checks with WITNESS.
>
> I'm inclined to simply revert the commit for now.  Alternately,
> disk_alloc() could be modified to handle M_NOWAIT/M_WAITOK flags, and
> that could be used in daregister() and other CAM periph drivers.
> daregister() already uses M_NOWAIT to allocate the driver softc itself.
>

If WITNESS_WARN(.., WARN_SLEEPOK) does not check for all possible
blockers for going off CPU that's a bug. I do support reverting the
patch for now until the dust settles. I don't propose the above hack
as the final fix.

As for the culrpit at hand, given the backtrace:
devfs_alloc() at devfs_alloc+0x28/frame 0xffffffff8218d570
make_dev_sv() at make_dev_sv+0x97/frame 0xffffffff8218d600
make_dev_s() at make_dev_s+0x3b/frame 0xffffffff8218d660
passregister() at passregister+0x3e7/frame 0xffffffff8218d8b0

I think this is missing wait flags, resulting in M_WAITOK later, i.e.:
diff --git a/sys/cam/scsi/scsi_pass.c b/sys/cam/scsi/scsi_pass.c
index b299fbddd84..c6d6a5da403 100644
--- a/sys/cam/scsi/scsi_pass.c
+++ b/sys/cam/scsi/scsi_pass.c
_at__at_ -652,6 +652,7 _at__at_ passregister(struct cam_periph *periph, void *arg)
        args.mda_gid = GID_OPERATOR;
        args.mda_mode = 0600;
        args.mda_si_drv1 = periph;
+       args.mda_flags = MAKEDEV_NOWAIT;
        error = make_dev_s(&args, &softc->dev, "%s%d", periph->periph_name,
            periph->unit_number);
        if (error != 0) {


> diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c
> index d648d511b1a2..d940e7db24e0 100644
> --- a/sys/cam/scsi/scsi_da.c
> +++ b/sys/cam/scsi/scsi_da.c
> _at__at_ -2767,18 +2767,23 _at__at_ daregister(struct cam_periph *periph, void *arg)
>  		return(CAM_REQ_CMP_ERR);
>  	}
>
> -	softc = (struct da_softc *)malloc(sizeof(*softc), M_DEVBUF,
> -	    M_NOWAIT|M_ZERO);
> -
> +	softc = malloc(sizeof(*softc), M_DEVBUF, M_NOWAIT | M_ZERO);
>  	if (softc == NULL) {
>  		printf("daregister: Unable to probe new device. "
>  		       "Unable to allocate softc\n");
>  		return(CAM_REQ_CMP_ERR);
>  	}
> +	softc->disk = disk_alloc_flags(M_NOWAIT);
> +	if (softc->disk == NULL) {
> +		printf("daregister: Unable to probe new device. "
> +		       "Unable to allocate disk structure\n");
> +		return (CAM_REQ_CMP_ERR);
> +	}
>
>  	if (cam_iosched_init(&softc->cam_iosched, periph) != 0) {
>  		printf("daregister: Unable to probe new device. "
>  		       "Unable to allocate iosched memory\n");
> +		disk_destroy(softc->disk);
>  		free(softc, M_DEVBUF);
>  		return(CAM_REQ_CMP_ERR);
>  	}
> _at__at_ -2898,7 +2903,6 _at__at_ daregister(struct cam_periph *periph, void *arg)
>  	/*
>  	 * Register this media as a disk.
>  	 */
> -	softc->disk = disk_alloc();
>  	softc->disk->d_devstat = devstat_new_entry(periph->periph_name,
>  			  periph->unit_number, 0,
>  			  DEVSTAT_BS_UNAVAILABLE,
> diff --git a/sys/geom/geom_disk.c b/sys/geom/geom_disk.c
> index eaba770828d0..c07494ab4ec0 100644
> --- a/sys/geom/geom_disk.c
> +++ b/sys/geom/geom_disk.c
> _at__at_ -850,15 +850,22 _at__at_ g_disk_ident_adjust(char *ident, size_t size)
>  }
>
>  struct disk *
> -disk_alloc(void)
> +disk_alloc_flags(int mflag)
>  {
>  	struct disk *dp;
>
> -	dp = g_malloc(sizeof(struct disk), M_WAITOK | M_ZERO);
> -	LIST_INIT(&dp->d_aliases);
> +	dp = g_malloc(sizeof(struct disk), mflag | M_ZERO);
> +	if (dp != NULL)
> +		LIST_INIT(&dp->d_aliases);
>  	return (dp);
>  }
>
> +struct disk *
> +disk_alloc(void)
> +{
> +	return (disk_alloc_flags(M_WAITOK));
> +}
> +
>  void
>  disk_create(struct disk *dp, int version)
>  {
> diff --git a/sys/geom/geom_disk.h b/sys/geom/geom_disk.h
> index 8e2282a09a3a..794ce2cc38bd 100644
> --- a/sys/geom/geom_disk.h
> +++ b/sys/geom/geom_disk.h
> _at__at_ -137,6 +137,7 _at__at_ struct disk {
>  #define	DISKFLAG_WRITE_PROTECT		0x0100
>
>  struct disk *disk_alloc(void);
> +struct disk *disk_alloc_flags(int mflag);
>  void disk_create(struct disk *disk, int version);
>  void disk_destroy(struct disk *disk);
>  void disk_gone(struct disk *disk);
>


-- 
Mateusz Guzik <mjguzik gmail.com>
Received on Tue Aug 18 2020 - 11:55:19 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:24 UTC