Re: panic in arptimer in r289937

From: Hans Petter Selasky <hps_at_selasky.org>
Date: Thu, 10 Dec 2015 16:13:07 +0100
Hi,

Here is the backtrace for a reproducable panic seen with arptimer():

> #0  doadump (textdump=0) at pcpu.h:221
> #1  0xffffffff80385afb in db_dump (dummy=<value optimized out>, dummy2=false, dummy3=0, dummy4=0x0) at /usr/img/freebsd/sys/ddb/db_command.c:533
> #2  0xffffffff803858ee in db_command (cmd_table=0x0) at /usr/img/freebsd/sys/ddb/db_command.c:440
> #3  0xffffffff80385684 in db_command_loop () at /usr/img/freebsd/sys/ddb/db_command.c:493
> #4  0xffffffff8038818b in db_trap (type=<value optimized out>, code=0) at /usr/img/freebsd/sys/ddb/db_main.c:251
> #5  0xffffffff80ae0973 in kdb_trap (type=12, code=0, tf=<value optimized out>) at /usr/img/freebsd/sys/kern/subr_kdb.c:654
> #6  0xffffffff80f276f1 in trap_fatal (frame=0xfffffe00f59f4950, eva=<value optimized out>) at /usr/img/freebsd/sys/amd64/amd64/trap.c:829
> #7  0xffffffff80f27924 in trap_pfault (frame=0xfffffe00f59f4950, usermode=<value optimized out>) at /usr/img/freebsd/sys/amd64/amd64/trap.c:684
> #8  0xffffffff80f270de in trap (frame=0xfffffe00f59f4950) at /usr/img/freebsd/sys/amd64/amd64/trap.c:435
> #9  0xffffffff80f0a347 in calltrap () at /usr/img/freebsd/sys/amd64/amd64/exception.S:234
> #10 0xffffffff80be9e3d in arptimer (arg=0xfffff8011d0fda00) at atomic.h:184
> #11 0xffffffff80ab54f1 in softclock_call_cc (c=0xfffff8011d0fdaa8, cc=0xffffffff81ccd600, direct=<value optimized out>)
>     at /usr/img/freebsd/sys/kern/kern_timeout.c:832
> #12 0xffffffff80ab5814 in softclock (arg=0xffffffff81ccd600) at /usr/img/freebsd/sys/kern/kern_timeout.c:921
> #13 0xffffffff80a5d7f6 in intr_event_execute_handlers (p=<value optimized out>, ie=0xfffff80003998b00) at /usr/img/freebsd/sys/kern/kern_intr.c:1262
> #14 0xffffffff80a5de06 in ithread_loop (arg=0xfffff8000396cde0) at /usr/img/freebsd/sys/kern/kern_intr.c:1275
> #15 0xffffffff80a5a87c in fork_exit (callout=0xffffffff80a5dd60 <ithread_loop>, arg=0xfffff8000396cde0, frame=0xfffffe00f59f4c00)
>     at /usr/img/freebsd/sys/kern/kern_fork.c:1011
> #16 0xffffffff80f0a87e in fork_trampoline () at /usr/img/freebsd/sys/amd64/amd64/exception.S:609
> #17 0x0000000000000000 in ?? ()

(kgdb) print  ((struct llentry *)arg)[0]
$5 = {
   lle_next = {
     le_next = 0x0,
     le_prev = 0xfffff80069b5fa98
   },
   r_l3addr = {
     addr4 = {
       s_addr = 1563742475
     },
     addr6 = {
       __u6_addr = {
         __u6_addr8 = 0xfffff8011d0fda10 "\v�4]",
         __u6_addr16 = 0xfffff8011d0fda10,
         __u6_addr32 = 0xfffff8011d0fda10
       }
     }
   },
   ll_addr = {
     mac_aligned = 121984137371108,
     mac16 = 0xfffff8011d0fda20,
     mac8 = 0xfffff8011d0fda20 "�\035-��n"
   },
   r_flags = 1,
   r_skip_req = 1,
   spare1 = 0,
   lle_tbl = 0xfffff80005653300,
   lle_head = 0xfffff80069b5fa98,
   lle_free = 0xffffffff80bf2270 <in_lltable_destroy_lle>,
   la_hold = 0x0,
   la_numheld = 0,
   la_expire = 12422,
   la_flags = 1,
   la_asked = 0,
   la_preempt = 5,
   ln_state = 2,
   ln_router = 0,
   ln_ntick = 0,
   lle_refcnt = 1,
   lle_chain = {
     le_next = 0x0,
     le_prev = 0x0
   },
   lle_timer = {
     c_links = {
       le = {
         le_next = 0x0,
         le_prev = 0xffffffff81ccd718
       },
       sle = {
         sle_next = 0x0
       },
       tqe = {
         tqe_next = 0x0,
         tqe_prev = 0xffffffff81ccd718
       }
     },
     c_time = 53354272998546,
     c_precision = 268435437,
     c_arg = 0xfffff8011d0fda00,
     c_func = 0xffffffff80be9950 <arptimer>,
     c_lock = 0x0,
     c_flags = 16,
     c_cpu = 0
   },
   lle_lock = {
     lock_object = {
       lo_name = 0xffffffff8144abf0 "lle",
       lo_flags = 90374144,
       lo_data = 0,
       lo_witness = 0x0
     },
     rw_lock = 1
   },
   req_mtx = {
     lock_object = {
       lo_name = 0xffffffff8144abf4 "lle req",
       lo_flags = 16973824,
       lo_data = 0,
       lo_witness = 0x0
     },
     mtx_lock = 4
   }
}

(kgdb) print /x ((struct llentry *)arg)->lle_tbl[0]
$6 = {
   llt_link = {
     sle_next = 0xfffff8004be51900
   },
   llt_af = 0x1d243aa0,
   llt_hsize = 0xfffff801,
   lle_head = 0xfffff800053e5330,
   llt_ifp = 0xd04,
   llt_lookup = 0x0,
   llt_alloc_entry = 0xfffffe0001a472f0,
   llt_delete_entry = 0xfffff800504a5100,
   llt_prefix_free = 0xfffff800503d8c88,
   llt_dump_entry = 0x0,
   llt_hash = 0x0,
   llt_match_prefix = 0x0,
   llt_free_entry = 0x0,
   llt_foreach_entry = 0x0,
   llt_link_entry = 0x0,
   llt_unlink_entry = 0x38e425e,
   llt_fill_sa_entry = 0x0,
   llt_free_tbl = 0xfffff8011d243ab0
}

It appears arptimer() was called after lltable_unlink_entry() was 
called, because la_flags does not have the LLE_LINKED bit set, which can 
happen!! If arptimer() is firing exactly when we call 
lltable_unlink_entry(), then arptimer() will refer to freed memory. Does 
the following patch make sense?

> Index: netinet/if_ether.c
> ===================================================================
> --- netinet/if_ether.c  (revision 291256)
> +++ netinet/if_ether.c  (working copy)
> _at__at_ -185,7 +185,13 _at__at_
>                 LLE_WUNLOCK(lle);
>                 return;
>         }
> -       ifp = lle->lle_tbl->llt_ifp;
> +       if (lle->la_flags & LLE_LINKED) {
> +               ifp = lle->lle_tbl->llt_ifp;
> +       } else {
> +               /* XXX RACE entry has been freed */
> +               llentry_free(lle);
> +               return;
> +       }
>         CURVNET_SET(ifp->if_vnet);
>
>         if ((lle->la_flags & LLE_DELETED) == 0) {

If you need more information from the dump, let me know.

--HPS
Received on Thu Dec 10 2015 - 14:11:28 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:01 UTC