amd64 fork/swap data corruptions: A ~110 line C program demonstrating an example (Pine64+ 2GB context)

From: Mark Millard <markmi_at_dsl-only.net>
Date: Mon, 13 Mar 2017 23:52:41 -0700
I'm still at a loss about how to figure out what stages are messed
up. (Memory coherency? Some memory not swapped out? Bad data swapped
out? Wrong data swapped in?)

But at least I've found a much smaller/simpler example to demonstrate
some problem with in my Pine64+_ 2GB context.

The Pine64+ 2GB is the only amd64 context that I have access to.


The following program fails its check for data
having its expected byte pattern in dynamically
allocated memory after a fork/swap-out/swap-in
sequence.

I'll note that the program sleeps for 60s after
forking to give time to do something else to
cause the parent and child processes to swap
out (RES=0 as seen in top).

Note the source code line:

    // test_check(); // Adding this line prevents failure.

It seem that accessing the region contents before forking
and swapping avoids the problem. But there is a problem
if the region was only written-to before the fork/swap.

Another point is the size of the region matters: <= 14K Bytes
fails and > 14K Bytes works for as much has I have tested.


# more swap_testing.c
// swap_testing.c

// Built via (c++ was clang++ 4.0 in my case):
//
// cc -g -std=c11 -Wpedantic swap_testing.c
// -O0 and -O2 also gets the problem.

#include <unistd.h>     // for fork(), sleep(.)
#include <sys/types.h>  // for pid_t
#include <sys/wait.h>   // for wait(.)

extern void test_setup(void); // Sets up the memory byte pattern.
extern void test_check(void); // Tests the memory byte pattern.

int main(void)
{
    test_setup();
    // test_check(); // Adding this line prevents failure.

    pid_t pid = fork();
    int wait_status = 0;;

    if (0<pid) { wait(&wait_status); }

    if (-1!=wait_status && 0<=pid)
    {
        if (0==pid)
        {
            sleep(60);

            // During this manually force this process to
            // swap out. I use something like:

            // stress -m 1 --vm-bytes 1800M

            // in another shell and ^C'ing it after top
            // shows the swapped status desired. 1800M
            // just happened to work on the Pine64+ 2GB
            // that I was using.
        }

        test_check();
    }
}

// The memory and test code follows.

#include <stdbool.h>    // for bool, true, false
#include <stddef.h>     // for size_t, NULL
#include <stdlib.h>     // for malloc(.), free(.)

#include <signal.h>     // for raise(.), SIGABRT

#define region_size (14u*1024u)
                        // Bad dyn_region pattern, parent and child
                        // processes:
                        //  256u, 4u*1024u, 8u*1024u, 9u*1024u,
                        // 12u*1024u, 14u*1024u

                        // Works:
                        // 14u*1024u+1u, 15u*1024u, 16u*1024u,
                        // 32u*1024u, 256u*1024u*1024u

typedef volatile unsigned char value_type;

struct region_struct { value_type array[region_size]; };
typedef struct region_struct region;

static region            gbl_region;
static region * volatile dyn_region = NULL;

static value_type value(size_t v) { return (value_type)v; }

void test_setup(void) {
    dyn_region = malloc(sizeof(region));
    if (!dyn_region) raise(SIGABRT);

    for(size_t i=0u; i<region_size; i++) {
        (*dyn_region).array[i] = gbl_region.array[i] = value(i);
    }
}

static volatile bool gbl_failed = false; // Until potentially disproved
static volatile size_t gbl_pos = 0u;

static volatile bool dyn_failed = false; // Until potentially disproved
static volatile size_t dyn_pos = 0u;

void test_check(void) {
    while (!gbl_failed && gbl_pos<region_size) {
        gbl_failed = (value(gbl_pos) != gbl_region.array[gbl_pos]);
        gbl_pos++;
    }

    while (!dyn_failed && dyn_pos<region_size) {
        dyn_failed = (value(dyn_pos) != (*dyn_region).array[dyn_pos]);
        // Note: When the memory pattern fails this case is that
        //       records the failure.
        dyn_pos++;
    }

    if (gbl_failed) raise(SIGABRT);
    if (dyn_failed) raise(SIGABRT); // lldb reports this line for the __raise call.
                                    // when it fails (both parent and child processes).
}


Other details from lldb (not using -O2 so things are
simpler, not presented in the order examined):

# lldb a.out -c /var/crash/a.out.11575.core
(lldb) target create "a.out" --core "/var/crash/a.out.11575.core"
Core file '/var/crash/a.out.11575.core' (aarch64) was loaded.
(lldb) bt
* thread #1, name = 'a.out', stop reason = signal SIGABRT
  * frame #0: 0x0000000040113d38 libc.so.7`_thr_kill + 8
    frame #1: libc.so.7`__raise(s=6) at raise.c:52
    frame #2: a.out`test_check at swap_testing.c:103
    frame #3: a.out`main at swap_testing.c:42
    frame #4: 0x0000000000020184 a.out`__start + 364
    frame #5: ld-elf.so.1`.rtld_start at rtld_start.S:41

(lldb) up 2
frame #2: a.out`test_check at swap_testing.c:103
   100 	    }
   101 	
   102 	    if (gbl_failed) raise(SIGABRT);
-> 103 	    if (dyn_failed) raise(SIGABRT); // lldb reports this line for the __raise call.
   104 	                                    // when it fails (both parent and child processes).
   105 	}

(lldb) print dyn_pos
(size_t) $0 = 2

(That is one after the failure position.)


(lldb) print dyn_region
(region *volatile) $3 = 0x0000000040616000

(lldb) print *dyn_region
(region) $1 = {
  array = {
    [0] = '\0'
    [1] = '\0'
    [2] = '\0'
. . . (all '\0' bytes) . . .
    [251] = '\0'
    [252] = '\0'
    [253] = '\0'
    [254] = '\0'
    [255] = '\0'
    ...
  }
}

(lldb) print gbl_region
(region) $2 = {
  array = {
    [0] = '\0'
    [1] = '\x01'
    [2] = '\x02'
. . .
    [251] = '\xfb'
    [252] = '\xfc'
    [253] = '\xfd'
    [254] = '\xfe'
    [255] = '\xff'
    ...
  }
}

(lldb) disass -n main
a.out`main:
    0x2022c <+0>:   sub    sp, sp, #0x30             ; =0x30 
    0x20230 <+4>:   stp    x29, x30, [sp, #0x20]
    0x20234 <+8>:   add    x29, sp, #0x20            ; =0x20 
    0x20238 <+12>:  stur   wzr, [x29, #-0x4]
    0x2023c <+16>:  bl     0x202b0                   ; test_setup at swap_testing.c:74
    0x20240 <+20>:  bl     0x20580                   ; symbol stub for: fork
    0x20244 <+24>:  mov    w8, wzr
    0x20248 <+28>:  stur   w0, [x29, #-0x8]
    0x2024c <+32>:  stur   wzr, [x29, #-0xc]
    0x20250 <+36>:  ldur   w0, [x29, #-0x8]
    0x20254 <+40>:  cmp    w8, w0
    0x20258 <+44>:  b.ge   0x20268                   ; <+60> at swap_testing.c
    0x2025c <+48>:  sub    x0, x29, #0xc             ; =0xc 
    0x20260 <+52>:  bl     0x20590                   ; symbol stub for: wait
    0x20264 <+56>:  str    w0, [sp, #0x10]
    0x20268 <+60>:  mov    w8, #-0x1
    0x2026c <+64>:  ldur   w9, [x29, #-0xc]
    0x20270 <+68>:  cmp    w8, w9
    0x20274 <+72>:  b.eq   0x202a0                   ; <+116> at swap_testing.c:44
    0x20278 <+76>:  mov    w8, wzr
    0x2027c <+80>:  ldur   w9, [x29, #-0x8]
    0x20280 <+84>:  cmp    w8, w9
    0x20284 <+88>:  b.gt   0x202a0                   ; <+116> at swap_testing.c:44
    0x20288 <+92>:  ldur   w8, [x29, #-0x8]
    0x2028c <+96>:  cbnz   w8, 0x2029c               ; <+112> at swap_testing.c:42
    0x20290 <+100>: orr    w0, wzr, #0x3c
    0x20294 <+104>: bl     0x205a0                   ; symbol stub for: sleep
    0x20298 <+108>: str    w0, [sp, #0xc]
    0x2029c <+112>: bl     0x20348                   ; test_check at swap_testing.c:89
    0x202a0 <+116>: ldur   w0, [x29, #-0x4]
    0x202a4 <+120>: ldp    x29, x30, [sp, #0x20]
    0x202a8 <+124>: add    sp, sp, #0x30             ; =0x30 
    0x202ac <+128>: ret    

(lldb) disass -n value
a.out`value:
    0x204cc <+0>:  sub    sp, sp, #0x10             ; =0x10 
    0x204d0 <+4>:  str    x0, [sp, #0x8]
    0x204d4 <+8>:  ldrb   w8, [sp, #0x8]
    0x204d8 <+12>: mov    w1, w8
    0x204dc <+16>: mov    w0, w8
    0x204e0 <+20>: str    w1, [sp, #0x4]
    0x204e4 <+24>: add    sp, sp, #0x10             ; =0x10 
    0x204e8 <+28>: ret    

(lldb) disass -n test_setup
a.out`test_setup:
    0x202b0 <+0>:   sub    sp, sp, #0x20             ; =0x20 
    0x202b4 <+4>:   stp    x29, x30, [sp, #0x10]
    0x202b8 <+8>:   add    x29, sp, #0x10            ; =0x10 
    0x202bc <+12>:  orr    x0, xzr, #0x3800
    0x202c0 <+16>:  bl     0x205b0                   ; symbol stub for: malloc
    0x202c4 <+20>:  adrp   x30, 48
    0x202c8 <+24>:  add    x30, x30, #0x0            ; =0x0 
    0x202cc <+28>:  str    x0, [x30]
    0x202d0 <+32>:  ldr    x0, [x30]
    0x202d4 <+36>:  cbnz   x0, 0x202e4               ; <+52> at swap_testing.c:78
    0x202d8 <+40>:  orr    w0, wzr, #0x6
    0x202dc <+44>:  bl     0x205c0                   ; symbol stub for: raise
    0x202e0 <+48>:  str    w0, [sp, #0x4]
    0x202e4 <+52>:  str    xzr, [sp, #0x8]
    0x202e8 <+56>:  orr    x8, xzr, #0x3800
    0x202ec <+60>:  ldr    x9, [sp, #0x8]
    0x202f0 <+64>:  cmp    x9, x8
    0x202f4 <+68>:  b.hs   0x2033c                   ; <+140> at swap_testing.c:81
    0x202f8 <+72>:  ldr    x0, [sp, #0x8]
    0x202fc <+76>:  bl     0x204cc                   ; value at swap_testing.c:72
    0x20300 <+80>:  adrp   x30, 48
    0x20304 <+84>:  add    x30, x30, #0x0            ; =0x0 
    0x20308 <+88>:  adrp   x8, 48
    0x2030c <+92>:  add    x8, x8, #0x8              ; =0x8 
    0x20310 <+96>:  ldr    x9, [sp, #0x8]
    0x20314 <+100>: add    x8, x8, x9
    0x20318 <+104>: strb   w0, [x8]
    0x2031c <+108>: ldr    x8, [x30]
    0x20320 <+112>: ldr    x9, [sp, #0x8]
    0x20324 <+116>: add    x8, x8, x9
    0x20328 <+120>: strb   w0, [x8]
    0x2032c <+124>: ldr    x8, [sp, #0x8]
    0x20330 <+128>: add    x8, x8, #0x1              ; =0x1 
    0x20334 <+132>: str    x8, [sp, #0x8]
    0x20338 <+136>: b      0x202e8                   ; <+56> at swap_testing.c
    0x2033c <+140>: ldp    x29, x30, [sp, #0x10]
    0x20340 <+144>: add    sp, sp, #0x20             ; =0x20 
    0x20344 <+148>: ret    

(lldb) disass -n test_check
a.out`test_check:
    0x20348 <+0>:   sub    sp, sp, #0x20             ; =0x20 
    0x2034c <+4>:   stp    x29, x30, [sp, #0x10]
    0x20350 <+8>:   add    x29, sp, #0x10            ; =0x10 
    0x20354 <+12>:  b      0x20358                   ; <+16> at swap_testing.c
    0x20358 <+16>:  mov    w8, wzr
    0x2035c <+20>:  adrp   x9, 51
    0x20360 <+24>:  add    x9, x9, #0x808            ; =0x808 
    0x20364 <+28>:  ldrb   w10, [x9]
    0x20368 <+32>:  stur   w8, [x29, #-0x4]
    0x2036c <+36>:  tbnz   w10, #0x0, 0x2038c        ; <+68> at swap_testing.c
    0x20370 <+40>:  orr    x8, xzr, #0x3800
    0x20374 <+44>:  adrp   x9, 51
    0x20378 <+48>:  add    x9, x9, #0x810            ; =0x810 
    0x2037c <+52>:  ldr    x9, [x9]
    0x20380 <+56>:  cmp    x9, x8
    0x20384 <+60>:  cset   w10, lo
    0x20388 <+64>:  stur   w10, [x29, #-0x4]
    0x2038c <+68>:  ldur   w8, [x29, #-0x4]
    0x20390 <+72>:  tbz    w8, #0x0, 0x203ec         ; <+164> at swap_testing.c:95
    0x20394 <+76>:  adrp   x8, 51
    0x20398 <+80>:  add    x8, x8, #0x810            ; =0x810 
    0x2039c <+84>:  ldr    x0, [x8]
    0x203a0 <+88>:  bl     0x204cc                   ; value at swap_testing.c:72
    0x203a4 <+92>:  adrp   x8, 51
    0x203a8 <+96>:  add    x8, x8, #0x810            ; =0x810 
    0x203ac <+100>: adrp   x30, 51
    0x203b0 <+104>: add    x30, x30, #0x808          ; =0x808 
    0x203b4 <+108>: adrp   x9, 48
    0x203b8 <+112>: add    x9, x9, #0x8              ; =0x8 
    0x203bc <+116>: uxtb   w0, w0
    0x203c0 <+120>: ldr    x10, [x8]
    0x203c4 <+124>: add    x9, x9, x10
    0x203c8 <+128>: ldrb   w11, [x9]
    0x203cc <+132>: cmp    w0, w11
    0x203d0 <+136>: cset   w11, ne
    0x203d4 <+140>: and    w11, w11, #0x1
    0x203d8 <+144>: strb   w11, [x30]
    0x203dc <+148>: ldr    x9, [x8]
    0x203e0 <+152>: add    x9, x9, #0x1              ; =0x1 
    0x203e4 <+156>: str    x9, [x8]
    0x203e8 <+160>: b      0x20358                   ; <+16> at swap_testing.c
    0x203ec <+164>: b      0x203f0                   ; <+168> at swap_testing.c
    0x203f0 <+168>: mov    w8, wzr
    0x203f4 <+172>: adrp   x9, 51
    0x203f8 <+176>: add    x9, x9, #0x818            ; =0x818 
    0x203fc <+180>: ldrb   w10, [x9]
    0x20400 <+184>: str    w8, [sp, #0x8]
    0x20404 <+188>: tbnz   w10, #0x0, 0x20424        ; <+220> at swap_testing.c
    0x20408 <+192>: orr    x8, xzr, #0x3800
    0x2040c <+196>: adrp   x9, 51
    0x20410 <+200>: add    x9, x9, #0x820            ; =0x820 
    0x20414 <+204>: ldr    x9, [x9]
    0x20418 <+208>: cmp    x9, x8
    0x2041c <+212>: cset   w10, lo
    0x20420 <+216>: str    w10, [sp, #0x8]
    0x20424 <+220>: ldr    w8, [sp, #0x8]
    0x20428 <+224>: tbz    w8, #0x0, 0x20488         ; <+320> at swap_testing.c
    0x2042c <+228>: adrp   x8, 51
    0x20430 <+232>: add    x8, x8, #0x820            ; =0x820 
    0x20434 <+236>: ldr    x0, [x8]
    0x20438 <+240>: bl     0x204cc                   ; value at swap_testing.c:72
    0x2043c <+244>: adrp   x8, 51
    0x20440 <+248>: add    x8, x8, #0x820            ; =0x820 
    0x20444 <+252>: adrp   x30, 51
    0x20448 <+256>: add    x30, x30, #0x818          ; =0x818 
    0x2044c <+260>: adrp   x9, 48
    0x20450 <+264>: add    x9, x9, #0x0              ; =0x0 
    0x20454 <+268>: uxtb   w0, w0
    0x20458 <+272>: ldr    x9, [x9]
    0x2045c <+276>: ldr    x10, [x8]
    0x20460 <+280>: add    x9, x9, x10
    0x20464 <+284>: ldrb   w11, [x9]
    0x20468 <+288>: cmp    w0, w11
    0x2046c <+292>: cset   w11, ne
    0x20470 <+296>: and    w11, w11, #0x1
    0x20474 <+300>: strb   w11, [x30]
    0x20478 <+304>: ldr    x9, [x8]
    0x2047c <+308>: add    x9, x9, #0x1              ; =0x1 
    0x20480 <+312>: str    x9, [x8]
    0x20484 <+316>: b      0x203f0                   ; <+168> at swap_testing.c
    0x20488 <+320>: adrp   x8, 51
    0x2048c <+324>: add    x8, x8, #0x808            ; =0x808 
    0x20490 <+328>: ldrb   w9, [x8]
    0x20494 <+332>: tbz    w9, #0x0, 0x204a4         ; <+348> at swap_testing.c
    0x20498 <+336>: orr    w0, wzr, #0x6
    0x2049c <+340>: bl     0x205c0                   ; symbol stub for: raise
    0x204a0 <+344>: str    w0, [sp, #0x4]
    0x204a4 <+348>: adrp   x8, 51
    0x204a8 <+352>: add    x8, x8, #0x818            ; =0x818 
    0x204ac <+356>: ldrb   w9, [x8]
    0x204b0 <+360>: tbz    w9, #0x0, 0x204c0         ; <+376> at swap_testing.c:105
    0x204b4 <+364>: orr    w0, wzr, #0x6
    0x204b8 <+368>: bl     0x205c0                   ; symbol stub for: raise
->  0x204bc <+372>: str    w0, [sp]
    0x204c0 <+376>: ldp    x29, x30, [sp, #0x10]
    0x204c4 <+380>: add    sp, sp, #0x20             ; =0x20 
    0x204c8 <+384>: ret    

# uname -apKU
FreeBSD pine64 12.0-CURRENT FreeBSD 12.0-CURRENT  r314638M  arm64 aarch64 1200023 1200023

buildworld buildlkernel did not have MALLOC_PRODUCTION= defined. The kernel is a
non-debug kernel. (Previous to these experiments my other corruption examples
were not caught by a debug kernel. I'm not hopeful that this simpler context
would either.)



===
Mark Millard
markmi at dsl-only.net
Received on Tue Mar 14 2017 - 06:19:25 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:10 UTC