Re: head -r320521 (e.g.): another powerpc64 problem: programs using fgets crash trying to store address over code instead of into __cleanup_info__

From: Mark Millard <markmi_at_dsl-only.net>
Date: Sat, 1 Jul 2017 20:33:49 -0700
[I've now got a route to get information from
the old PowerMac so-called "Quad Core" despite
sshd being broken. So I add gdb output
material as evidence to go with the more
source code level analysis from before.]

On 2017-Jul-1, at 7:42 PM, Mark Millard <markmi at dsl-only.net> wrote:

> [Note: this is from a amd64 -> powerpc64 cross-build based
> on system clang 4 instead of gcc 4.2.1. I'm building a
> gcc 4.2.1 based system currently so that I can test
> a more standard configuration. But I'm one of the ones
> that experiments with finding things to report for
> clang targeting powerpc64 and powerpc.]
> 
> powerpc64 is having programs crash with an attempt
> to store addresses over code instead of into
> __cleanup_info__ when fgets is used. ntpd is an
> example. As is sshd (although I've looked at
> its details less).
> 
> Building up the context for this claim. . .
> 
> public declaration:
> 
> struct _pthread_cleanup_info {
>        __uintptr_t     pthread_cleanup_pad[8];
> };
> 
> private declaration:
> 
> struct pthread_cleanup {
>        struct pthread_cleanup  *prev;
>        void                    (*routine)(void *);
>        void                    *routine_arg;
>        int                     onheap;
> };
> 
> ntpd and sshd die with segmentation faults in:
> 
> void
> __pthread_cleanup_push_imp(void (*routine)(void *), void *arg,
>        struct _pthread_cleanup_info *info)
> {
>        struct pthread  *curthread = _get_curthread();
>        struct pthread_cleanup *newbuf;
> 
>        newbuf = (void *)info;
>        newbuf->routine = routine;
>        newbuf->routine_arg = arg;
>        newbuf->onheap = 0;
>        newbuf->prev = curthread->cleanup;
>        curthread->cleanup = newbuf;
> }
> 
> at the statement: newbuf->routine = routine;
> 
> But it turns out that the bt is like:
> 
> __pthread_cleanup_push_imp(routine=0x507b1248 <__stdio_cancel_cleanup>, arg=0x0, info=0x509eaaf4)
> __pthread_cleanup_push_imp_int(p0=0x507b1248,p1=0x0)
> fgets (buf=0x51415000 "", n=511, fp=0x507d4c40)
> . . .
> 
> Note the 2 arguments to __pthread_cleanup_push_imp_int when called
> from fgets but the 3 arguemnts to __pthread_cleanup_push_imp . . .
> 
> fgets uses FLOCK_CANCELSAFE(fp) :
> 
> #define FLOCKFILE_CANCELSAFE(fp)                                        \
>        {                                                               \
>                struct _pthread_cleanup_info __cleanup_info__;          \
>                if (__isthreaded) {                                     \
>                        _FLOCKFILE(fp);                                 \
>                        ___pthread_cleanup_push_imp(                    \
>                            __stdio_cancel_cleanup, (fp),               \
>                            &__cleanup_info__);                         \
>                } else {                                                \
>                        ___pthread_cleanup_push_imp(                    \
>                            __stdio_cancel_cleanup, NULL,               \
>                            &__cleanup_info__);                         \
>                }                                                       \
>                {
> #define FUNLOCKFILE_CANCELSAFE()                                        \
>                        (void)0;                                        \
>                }                                                       \
>                ___pthread_cleanup_pop_imp(1);                          \
>        }
> 
> where here the NULL case is in use. 3 arguments.
> 
> But:
> 
> STUB_FUNC2(__pthread_cleanup_push_imp, PJT_CLEANUP_PUSH_IMP, void, void*, void *);
> 
> which is use of:
> 
> #define STUB_FUNC2(name, idx, ret, p0_type, p1_type)            \
>        static ret FUNC_EXP(name)(p0_type, p1_type) __used;     \
>        static ret FUNC_INT(name)(p0_type, p1_type) __used;     \
>        WEAK_REF(FUNC_EXP(name), name);                         \
>        WEAK_REF(FUNC_INT(name), __CONCAT(_, name));            \
>        typedef ret (*FUNC_TYPE(name))(p0_type, p1_type);       \
>        static ret FUNC_EXP(name)(p0_type p0, p1_type p1)       \
>        {                                                       \
>                FUNC_TYPE(name) func;                           \
>                func = (FUNC_TYPE(name))__thr_jtable[idx][0];   \
>                return (func(p0, p1));                          \
>        }                                                       \
>        static ret FUNC_INT(name)(p0_type p0, p1_type p1)       \
>        {                                                       \
>                FUNC_TYPE(name) func;                           \
>                func = (FUNC_TYPE(name))__thr_jtable[idx][1];   \
>                return (func(p0, p1));                          \
>        }
> 
> so only 2 arguments to func (i.e., __pthread_cleanup_push_imp
> here).
> 
> Compared to:
> 
>                        ___pthread_cleanup_push_imp(                    \
>                            __stdio_cancel_cleanup, NULL,               \
>                            &__cleanup_info__);                         \
> 
> As a result junk is used instead of &__cleanup_info__.
> On powerpc64 it happens to be the address of
> ___pthread_cleanup_push_imp that happens to be
> in r5 (normally the third argument) at the time.
> 
> So:
> 
>        newbuf->routine = routine;
> 
> tries to replace the first instruction of
> ___pthread_cleanup_push_imp .
> 
> As far as I can tell what should be used is:
> 
> #define STUB_FUNC3(name, idx, ret, p0_type, p1_type, p2_type)   \
>        static ret FUNC_EXP(name)(p0_type, p1_type, p2_type) __used; \
>        static ret FUNC_INT(name)(p0_type, p1_type, p2_type) __used; \
>        WEAK_REF(FUNC_EXP(name), name);                         \
>        WEAK_REF(FUNC_INT(name), __CONCAT(_, name));            \
>        typedef ret (*FUNC_TYPE(name))(p0_type, p1_type, p2_type); \
>        static ret FUNC_EXP(name)(p0_type p0, p1_type p1, p2_type p2) \
>        {                                                       \
>                FUNC_TYPE(name) func;                           \
>                func = (FUNC_TYPE(name))__thr_jtable[idx][0];   \
>                return (func(p0, p1, p2));                      \
>        }                                                       \
>        static ret FUNC_INT(name)(p0_type p0, p1_type p1, p2_type p2) \
>        {                                                       \
>                FUNC_TYPE(name) func;                           \
>                func = (FUNC_TYPE(name))__thr_jtable[idx][1];   \
>                return (func(p0, p1, p2));                      \
>        }
> 
> with the p2_type being: struct _pthread_cleanup_info *
> 
> but I'm not expert in this code.

The backtrace for ntpd :

Loaded symbols for /libexec/ld-elf.so.1
#0  __pthread_cleanup_push_imp (routine=0x50649248 <__stdio_cancel_cleanup>, arg=0x0, info=0x50403af4) at /usr/src/lib/libthr/thread/thr_clean.c:57
57              newbuf->routine = routine;
(gdb) bt
#0  __pthread_cleanup_push_imp (routine=0x50649248 <__stdio_cancel_cleanup>, arg=0x0, info=0x50403af4) at /usr/src/lib/libthr/thread/thr_clean.c:57
#1  0x0000000050603cbc in __pthread_cleanup_push_imp_int (p0=0x50649248, p1=0x0) at /usr/src/lib/libc/gen/_pthread_stubs.c:283
#2  0x00000000505850a0 in vfprintf_l (fp=0x5066cc40, locale=<value optimized out>, fmt0=0x50618523 "<%d>", ap=0xffffffffffffc150 "") at printfcommon.h:75
#3  0x000000005058b034 in fprintf (fp=0x50649248, fmt=0x50403af4 "`") at /usr/src/lib/libc/stdio/fprintf.c:55
#4  0x00000000505d22e0 in vsyslog (pri=<value optimized out>, fmt=0x50618523 "<%d>", ap=0xffffffffffffc150 "") at /usr/src/lib/libc/gen/syslog.c:171
#5  0x00000000505d2138 in syslog (pri=1348768328, fmt=0x0) at /usr/src/lib/libc/gen/syslog.c:128
#6  0x0000000010095eb8 in 00000016.plt_call.OBJ_sn2nid ()
#7  0x00000000100965e0 in 00000016.plt_call.OBJ_sn2nid ()
#8  0x000000001004624c in ntpdmain (argc=0, argv=<value optimized out>) at /usr/src/contrib/ntp/ntpd/ntpd.c:602
#9  0x0000000010046020 in main (argc=1348768328, argv=0x0) at /usr/src/contrib/ntp/ntpd/ntpd.c:394
#10 0x0000000010008b5c in 00000016.plt_call.OBJ_sn2nid ()
#11 0x00000000500e2b70 in .text () at /usr/src/libexec/rtld-elf/powerpc64/rtld_start.S:104

Note the expected use of r5 in the below.

0x50403af4 <__pthread_cleanup_push_imp>:        nop
0x50403af8 <__pthread_cleanup_push_imp+4>:      ld      r6,18288(r2)
0x50403afc <__pthread_cleanup_push_imp+8>:      cmpldi  r6,0
0x50403b00 <__pthread_cleanup_push_imp+12>:     beq-    0x50403b10 <__pthread_cleanup_push_imp+28>
0x50403b04 <__pthread_cleanup_push_imp+16>:     mr      r6,r13
0x50403b08 <__pthread_cleanup_push_imp+20>:     ld      r6,-28680(r6)
0x50403b0c <__pthread_cleanup_push_imp+24>:     b       0x50403b14 <__pthread_cleanup_push_imp+32>
0x50403b10 <__pthread_cleanup_push_imp+28>:     li      r6,0
0x50403b14 <__pthread_cleanup_push_imp+32>:     std     r3,8(r5)
0x50403b18 <__pthread_cleanup_push_imp+36>:     li      r3,0
0x50403b1c <__pthread_cleanup_push_imp+40>:     std     r4,16(r5)
0x50403b20 <__pthread_cleanup_push_imp+44>:     stw     r3,24(r5)
0x50403b24 <__pthread_cleanup_push_imp+48>:     ld      r3,552(r6)
0x50403b28 <__pthread_cleanup_push_imp+52>:     std     r3,0(r5)
0x50403b2c <__pthread_cleanup_push_imp+56>:     std     r5,552(r6)
0x50403b30 <__pthread_cleanup_push_imp+60>:     blr
0x50403b34 <__pthread_cleanup_push_imp+64>:     .long 0x0
0x50403b38 <__pthread_cleanup_push_imp+68>:     .long 0x0
0x50403b3c <__pthread_cleanup_push_imp+72>:     .long 0x0

But note the use of r5 in the below: r5==0x50403af4
results, replacing any potential r5 from the call
to __pthread_cleanup_push_imp_int . Also r5's value
is replaced without accessing or recording the value
it the __pthread_cleanup_push_imp_int's start.

r5             0x50403af4       1346386676

0x50603c80 <__pthread_cleanup_push_imp_int>:    mflr    r0
0x50603c84 <__pthread_cleanup_push_imp_int+4>:  std     r31,-8(r1)
0x50603c88 <__pthread_cleanup_push_imp_int+8>:  std     r0,16(r1)
0x50603c8c <__pthread_cleanup_push_imp_int+12>: stdu    r1,-128(r1)
0x50603c90 <__pthread_cleanup_push_imp_int+16>: mr      r31,r1
0x50603c94 <__pthread_cleanup_push_imp_int+20>: nop
0x50603c98 <__pthread_cleanup_push_imp_int+24>: std     r2,40(r1)
0x50603c9c <__pthread_cleanup_push_imp_int+28>: ld      r5,-22408(r2)
0x50603ca0 <__pthread_cleanup_push_imp_int+32>: ld      r5,968(r5)
0x50603ca4 <__pthread_cleanup_push_imp_int+36>: ld      r6,8(r5)
0x50603ca8 <__pthread_cleanup_push_imp_int+40>: ld      r11,16(r5)
0x50603cac <__pthread_cleanup_push_imp_int+44>: ld      r5,0(r5)
0x50603cb0 <__pthread_cleanup_push_imp_int+48>: mr      r2,r6
0x50603cb4 <__pthread_cleanup_push_imp_int+52>: mtctr   r5
0x50603cb8 <__pthread_cleanup_push_imp_int+56>: bctrl
0x50603cbc <__pthread_cleanup_push_imp_int+60>: ld      r2,40(r1)
0x50603cc0 <__pthread_cleanup_push_imp_int+64>: addi    r1,r1,128
0x50603cc4 <__pthread_cleanup_push_imp_int+68>: ld      r0,16(r1)
0x50603cc8 <__pthread_cleanup_push_imp_int+72>: ld      r31,-8(r1)
0x50603ccc <__pthread_cleanup_push_imp_int+76>: mtlr    r0
0x50603cd0 <__pthread_cleanup_push_imp_int+80>: blr
0x50603cd4 <__pthread_cleanup_push_imp_int+84>: .long 0x0
0x50603cd8 <__pthread_cleanup_push_imp_int+88>: .long 0x0
0x50603cdc <__pthread_cleanup_push_imp_int+92>: .long 0x0

As the above code is set up for passing 2 arguments
to __pthread_cleanup_push_imp (in r3 and r4) the above
code uses r5 as a scratch register to hold the computed
address of __pthread_cleanup_push_imp.

This is not what __pthread_cleanup_push_imp is designed
for in its use of r5.

But in the below r5 was expected to be passed in
to __pthread_cleanup_push_imp_int even though
__pthread_cleanup_push_imp_int ignores the r5
value that is supplied.

0x505da750 <fgets>:     mflr    r0
0x505da754 <fgets+4>:   std     r31,-8(r1)
0x505da758 <fgets+8>:   std     r0,16(r1)
0x505da75c <fgets+12>:  stdu    r1,-256(r1)
0x505da760 <fgets+16>:  mr      r31,r1
. . .
0x505da7b8 <fgets+104>: bl      0x5048a9a0 <00000017.plt_call._flockfile>
0x505da7bc <fgets+108>: ld      r2,40(r1)
0x505da7c0 <fgets+112>: nop
0x505da7c4 <fgets+116>: addi    r5,r31,112
0x505da7c8 <fgets+120>: mr      r4,r29
0x505da7cc <fgets+124>: b       0x505da7dc <fgets+140>
0x505da7d0 <fgets+128>: nop
0x505da7d4 <fgets+132>: addi    r5,r31,112
0x505da7d8 <fgets+136>: li      r4,0
0x505da7dc <fgets+140>: ld      r3,-23664(r2)
0x505da7e0 <fgets+144>: bl      0x50603c80 <__pthread_cleanup_push_imp_int>
0x505da7e4 <fgets+148>: nop
. . .

Conclusion: __pthread_cleanup_push_imp_int should expect
and handle 3 arguments and propagate all 3 to
__pthread_cleanup_push_imp, not using r5 for the
address of __pthread_cleanup_push_imp .


I'll only give the backtrace for sshd. Also I've removed
the garbage text shown for appname= in #8 and just used
". . .".

Note that #0 through #2 are very similar to the above
for ntpd: same problem for sshd.

#0  __pthread_cleanup_push_imp (routine=0x507b1248 <__stdio_cancel_cleanup>, arg=0x0, info=0x509eaaf4) at /usr/src/lib/libthr/thread/thr_clean.c:57
#1  0x000000005076bcbc in __pthread_cleanup_push_imp_int (p0=0x507b1248, p1=0x0) at /usr/src/lib/libc/gen/_pthread_stubs.c:283
#2  0x00000000507427e4 in fgets (buf=0x51415000 "", n=511, fp=0x507d4c40) from /lib/libc.so.7
#3  0x00000000504e5268 in file_gets (bp=<value optimized out>, buf=0x507d4c40 "", size=0) at /usr/src/crypto/openssl/crypto/bio/bss_file.c:461
#4  0x00000000504f0678 in BIO_gets (b=<value optimized out>, in=0x51415000 "", inl=<value optimized out>) at /usr/src/crypto/openssl/crypto/bio/bio_lib.c:303
#5  0x000000005047eaa8 in def_load_bio (conf=0x51415000, in=0x5140e000, line=0x507d4c40) at /usr/src/crypto/openssl/crypto/conf/conf_def.c:260
#6  0x000000005047f5b4 in def_load (conf=<value optimized out>, name=<value optimized out>, line=0x51415000) at /usr/src/crypto/openssl/crypto/conf/conf_def.c:207
#7  0x000000005047e518 in NCONF_load (conf=0x507b1248, file=0x0, eline=0x509eaaf4) at /usr/src/crypto/openssl/crypto/conf/conf_lib.c:265
#8  0x000000005041e97c in CONF_modules_load_file (filename=<value optimized out>, appname=0x507c3da0 ". . .", 
    flags=1363206144) at /usr/src/crypto/openssl/crypto/conf/conf_mod.c:179
#9  0x00000000503ce39c in OPENSSL_config (config_name=0x51415000 "") at /usr/src/crypto/openssl/crypto/conf/conf_sap.c:90
#10 0x00000000500e6200 in Fssh_ssh_OpenSSL_add_all_algorithms () at /usr/src/crypto/openssh/openbsd-compat/openssl-compat.c:78
#11 0x000000001000e0e0 in main (ac=0, av=<value optimized out>) at /usr/src/crypto/openssh/sshd.c:1553
#12 0x000000001000d4bc in 00000016.plt_call.Fssh_scan_scaled ()
#13 0x0000000050057b70 in .text () at /usr/src/libexec/rtld-elf/powerpc64/rtld_start.S:104



===
Mark Millard
markmi at dsl-only.net
Received on Sun Jul 02 2017 - 01:33:52 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:41:12 UTC