From 646b51833f0397d56f3018a56340098172bde35b Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Tue, 13 Nov 2012 00:34:16 +0400 Subject: [PATCH 1/3] lib/bcheck: Prevent libc_malloc/libc_free etc from being miscompiled On i386 and gcc-4.7 I found that libc_malloc was miscompiled - look: static void *libc_malloc(size_t size) { void *ptr; restore_malloc_hooks(); // __malloc_hook = saved_malloc_hook ptr = malloc(size); install_malloc_hooks(); // saved_malloc_hook = __malloc_hook, __malloc_hook = __bound_malloc return ptr; } .type libc_malloc, @function libc_malloc: .LFB56: .cfi_startproc pushl %edx .cfi_def_cfa_offset 8 movl %eax, (%esp) call malloc movl $__bound_malloc, __malloc_hook movl $__bound_free, __free_hook movl $__bound_realloc, __realloc_hook movl $__bound_memalign, __memalign_hook popl %ecx .cfi_def_cfa_offset 4 ret Here gcc inlined both restore_malloc_hooks() and install_malloc_hooks() and decided that saved_malloc_hook -> __malloc_hook -> saved_malloc_hook stores are not needed and could be ommitted. Only it did not know __molloc_hook affects malloc()... So add compiler barrier to both install and restore hooks functions and be done with it - the code is now ok: diff --git a/bcheck0.s b/bcheck1.s index 5f50293..4c02a5f 100644 --- a/bcheck0.s +++ b/bcheck1.s @@ -42,8 +42,24 @@ libc_malloc: .cfi_startproc pushl %edx .cfi_def_cfa_offset 8 + movl saved_malloc_hook, %edx + movl %edx, __malloc_hook + movl saved_free_hook, %edx + movl %edx, __free_hook + movl saved_realloc_hook, %edx + movl %edx, __realloc_hook + movl saved_memalign_hook, %edx + movl %edx, __memalign_hook movl %eax, (%esp) call malloc + movl __malloc_hook, %edx + movl %edx, saved_malloc_hook + movl __free_hook, %edx + movl %edx, saved_free_hook + movl __realloc_hook, %edx + movl %edx, saved_realloc_hook + movl __memalign_hook, %edx + movl %edx, saved_memalign_hook movl $__bound_malloc, __malloc_hook movl $__bound_free, __free_hook movl $__bound_realloc, __realloc_hook For barrier I use __asm__ __volatile__ ("": : : "memory") which is used as compiler barrier by Linux kernel, and mentioned in gcc docs and in wikipedia [1]. Without this patch any program compiled with tcc -b crashes in startup because of infinite recursion in libc_malloc. [1] http://en.wikipedia.org/wiki/Memory_ordering#Compiler_memory_barrier --- lib/bcheck.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/bcheck.c b/lib/bcheck.c index 48d7606..700ceda 100644 --- a/lib/bcheck.c +++ b/lib/bcheck.c @@ -638,6 +638,9 @@ static unsigned long get_region_size(void *p) /* patched memory functions */ +/* force compiler to perform stores coded up to this point */ +#define barrier() __asm__ __volatile__ ("": : : "memory") + static void install_malloc_hooks(void) { #ifdef CONFIG_TCC_MALLOC_HOOKS @@ -649,6 +652,8 @@ static void install_malloc_hooks(void) __free_hook = __bound_free; __realloc_hook = __bound_realloc; __memalign_hook = __bound_memalign; + + barrier(); #endif } @@ -659,6 +664,8 @@ static void restore_malloc_hooks(void) __free_hook = saved_free_hook; __realloc_hook = saved_realloc_hook; __memalign_hook = saved_memalign_hook; + + barrier(); #endif } From cffb7af9f96834623ee0ff6b7fb10d56c91efb99 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Tue, 13 Nov 2012 13:14:26 +0400 Subject: [PATCH 2/3] lib/bcheck: Prevent __bound_local_new / __bound_local_delete from being miscompiled On i386 and gcc-4.7 I found that __bound_local_new was miscompiled - look: #ifdef __i386__ /* return the frame pointer of the caller */ #define GET_CALLER_FP(fp)\ {\ unsigned long *fp1;\ __asm__ __volatile__ ("movl %%ebp,%0" :"=g" (fp1));\ fp = fp1[0];\ } #endif /* called when entering a function to add all the local regions */ void FASTCALL __bound_local_new(void *p1) { unsigned long addr, size, fp, *p = p1; GET_CALLER_FP(fp); for(;;) { addr = p[0]; if (addr == 0) break; addr += fp; size = p[1]; p += 2; __bound_new_region((void *)addr, size); } } __bound_local_new: .LFB40: .cfi_startproc pushl %esi .cfi_def_cfa_offset 8 .cfi_offset 6, -8 pushl %ebx .cfi_def_cfa_offset 12 .cfi_offset 3, -12 subl $8, %esp // NOTE prologue does not touch %ebp .cfi_def_cfa_offset 20 #APP # 235 "lib/bcheck.c" 1 movl %ebp,%edx // %ebp -> fp1 # 0 "" 2 #NO_APP movl (%edx), %esi // fp1[0] -> fp movl (%eax), %edx movl %eax, %ebx testl %edx, %edx je .L167 .p2align 2,,3 .L173: movl 4(%ebx), %eax addl $8, %ebx movl %eax, 4(%esp) addl %esi, %edx movl %edx, (%esp) call __bound_new_region movl (%ebx), %edx testl %edx, %edx jne .L173 .L167: addl $8, %esp .cfi_def_cfa_offset 12 popl %ebx .cfi_restore 3 .cfi_def_cfa_offset 8 popl %esi .cfi_restore 6 .cfi_def_cfa_offset 4 ret here GET_CALLER_FP() assumed that its using function setups it's stack frame, i.e. first save, then set %ebp to stack frame start, and then it has to do perform two lookups: 1) to get current stack frame through %ebp, and 2) get caller stack frame through (%ebp). And here is the problem: gcc decided not to setup %ebp for __bound_local_new and in such case GET_CALLER_FP actually becomes GET_CALLER_CALLER_FP and oops, wrong regions are registered in bcheck tables... The solution is to stop using hand written assembly and rely on gcc's __builtin_frame_address(1) to get callers frame stack(*). I think for the builtin gcc should generate correct code, independent of whether it decides or not to omit frame pointer in using function - it knows it. (*) judging by gcc history, __builtin_frame_address was there almost from the beginning - at least it is present in 1992 as seen from the following commit: http://gcc.gnu.org/git/?p=gcc.git;a=commit;h=be07f7bdbac76d87d3006c89855491504d5d6202 so we can rely on it being supported by all versions of gcc. In my environment the assembly of __bound_local_new changes as follows: diff --git a/bcheck0.s b/bcheck1.s index 4c02a5f..ef68918 100644 --- a/bcheck0.s +++ b/bcheck1.s @@ -1409,20 +1409,17 @@ __bound_init: __bound_local_new: .LFB40: .cfi_startproc - pushl %esi + pushl %ebp // NOTE prologue saves %ebp ... .cfi_def_cfa_offset 8 - .cfi_offset 6, -8 + .cfi_offset 5, -8 + movl %esp, %ebp // ... and reset it to local stack frame + .cfi_def_cfa_register 5 + pushl %esi pushl %ebx - .cfi_def_cfa_offset 12 - .cfi_offset 3, -12 subl $8, %esp - .cfi_def_cfa_offset 20 -#APP -# 235 "lib/bcheck.c" 1 - movl %ebp,%edx -# 0 "" 2 -#NO_APP - movl (%edx), %esi + .cfi_offset 6, -12 + .cfi_offset 3, -16 + movl 0(%ebp), %esi // stkframe -> stkframe.parent -> fp movl (%eax), %edx movl %eax, %ebx testl %edx, %edx @@ -1440,13 +1437,13 @@ __bound_local_new: jne .L173 .L167: addl $8, %esp - .cfi_def_cfa_offset 12 popl %ebx .cfi_restore 3 - .cfi_def_cfa_offset 8 popl %esi .cfi_restore 6 - .cfi_def_cfa_offset 4 + popl %ebp + .cfi_restore 5 + .cfi_def_cfa 4, 4 ret .cfi_endproc i.e. now it compiles correctly. Though I do not have x86_64 to test, my guess is that __builtin_frame_address(1) should work there too. If not - please revert only x86_64 part of the patch. Thanks. Cc: Michael Matz --- lib/bcheck.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/lib/bcheck.c b/lib/bcheck.c index 700ceda..5ea2d0b 100644 --- a/lib/bcheck.c +++ b/lib/bcheck.c @@ -208,25 +208,11 @@ BOUND_PTR_INDIR(8) BOUND_PTR_INDIR(12) BOUND_PTR_INDIR(16) -#ifdef __i386__ /* return the frame pointer of the caller */ #define GET_CALLER_FP(fp)\ {\ - unsigned long *fp1;\ - __asm__ __volatile__ ("movl %%ebp,%0" :"=g" (fp1));\ - fp = fp1[0];\ + fp = (unsigned long)__builtin_frame_address(1);\ } -#elif defined(__x86_64__) -/* TCC always creates %rbp frames also on x86_64, so use them. */ -#define GET_CALLER_FP(fp)\ -{\ - unsigned long *fp1;\ - __asm__ __volatile__ ("movq %%rbp,%0" :"=g" (fp1));\ - fp = fp1[0];\ -} -#else -#error put code to extract the calling frame pointer -#endif /* called when entering a function to add all the local regions */ void FASTCALL __bound_local_new(void *p1) From 5d648485bdd8d1742b6711ce34069fccbfe5616f Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Tue, 13 Nov 2012 22:23:01 +0400 Subject: [PATCH 3/3] Now btest pass! Thanks to two previous commits now btest tests pass, at least on Linux. Signed-off-by: Kirill Smelkov --- tests/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/Makefile b/tests/Makefile index 4f057f4..2e7dd23 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -22,7 +22,6 @@ TESTS = libtest \ # some tests do not pass on all platforms, remove them for now ifeq ($(TARGETOS),Linux) - TESTS := $(filter-out btest,$(TESTS)) TESTS := $(filter-out weaktest,$(TESTS)) endif ifeq ($(TARGETOS),Darwin)