From dd57a348664d0fac46a0f4c822f70c80bbf97774 Mon Sep 17 00:00:00 2001 From: Michael Matz Date: Thu, 25 Aug 2016 16:40:50 +0200 Subject: [PATCH] tccasm: Don't ignore # in preprocessor directives Our preprocessor throws away # line-comments in asm mode. It did so also inside preprocessor directives, thereby removing stringification. Parse defines in non-asm mode (but retain '.' as identifier character inside macro definitions). --- tcc.h | 6 ++++++ tccasm.c | 2 ++ tccpp.c | 45 ++++++++++++++++++++++++--------------------- tests/asmtest.S | 21 +++++++++++++++++---- 4 files changed, 49 insertions(+), 25 deletions(-) diff --git a/tcc.h b/tcc.h index e4b5a0e..17c24a2 100644 --- a/tcc.h +++ b/tcc.h @@ -1175,10 +1175,16 @@ ST_DATA TokenSym **table_ident; #define PARSE_FLAG_ACCEPT_STRAYS 0x0020 /* next() returns '\\' token */ #define PARSE_FLAG_TOK_STR 0x0040 /* return parsed strings instead of TOK_PPSTR */ +/* isidnum_table flags: */ +#define IS_SPC 1 +#define IS_ID 2 +#define IS_NUM 4 + ST_FUNC TokenSym *tok_alloc(const char *str, int len); ST_FUNC const char *get_tok_str(int v, CValue *cv); ST_FUNC void begin_macro(TokenString *str, int alloc); ST_FUNC void end_macro(void); +ST_FUNC void set_idnum(int c, int val); ST_FUNC void save_parse_state(ParseState *s); ST_FUNC void restore_parse_state(ParseState *s); ST_INLN void tok_str_new(TokenString *s); diff --git a/tccasm.c b/tccasm.c index 192e485..1819950 100644 --- a/tccasm.c +++ b/tccasm.c @@ -916,6 +916,7 @@ static int tcc_assemble_internal(TCCState *s1, int do_preprocess) ch = file->buf_ptr[0]; tok_flags = TOK_FLAG_BOL | TOK_FLAG_BOF; parse_flags = PARSE_FLAG_ASM_FILE | PARSE_FLAG_TOK_STR; + set_idnum('.', IS_ID); if (do_preprocess) parse_flags |= PARSE_FLAG_PREPROCESS; next(); @@ -1036,6 +1037,7 @@ static void tcc_assemble_inline(TCCState *s1, char *str, int len) tcc_close(); parse_flags = saved_parse_flags; + set_idnum('.', (parse_flags & PARSE_FLAG_ASM_FILE) ? IS_ID : 0); macro_ptr = saved_macro_ptr; } diff --git a/tccpp.c b/tccpp.c index 5aaea1a..04d5722 100644 --- a/tccpp.c +++ b/tccpp.c @@ -53,11 +53,6 @@ static struct TinyAlloc *toksym_alloc; static struct TinyAlloc *tokstr_alloc; static struct TinyAlloc *cstr_alloc; -/* isidnum_table flags: */ -#define IS_SPC 1 -#define IS_ID 2 -#define IS_NUM 4 - static TokenString *macro_stack; static const char tcc_keywords[] = @@ -819,6 +814,11 @@ ST_FUNC uint8_t *parse_comment(uint8_t *p) return p; } +ST_FUNC void set_idnum(int c, int val) +{ + isidnum_table[c - CH_EOF] = val; +} + #define cinp minp static inline void skip_spaces(void) @@ -1453,13 +1453,15 @@ ST_FUNC void parse_define(void) /* XXX: should check if same macro (ANSI) */ first = NULL; t = MACRO_OBJ; + /* We have to parse the whole define as if not in asm mode, in particular + no line comment with '#' must be ignored. Also for function + macros the argument list must be parsed without '.' being an ID + character. */ + parse_flags = ((parse_flags & ~PARSE_FLAG_ASM_FILE) | PARSE_FLAG_SPACES); /* '(' must be just after macro definition for MACRO_FUNC */ - parse_flags |= PARSE_FLAG_SPACES; next_nomacro_spc(); if (tok == '(') { - /* must be able to parse TOK_DOTS (in asm mode '.' can be part of identifier) */ - parse_flags &= ~PARSE_FLAG_ASM_FILE; - isidnum_table['.' - CH_EOF] = 0; + set_idnum('.', 0); next_nomacro(); ps = &first; if (tok != ')') for (;;) { @@ -1487,14 +1489,17 @@ ST_FUNC void parse_define(void) } next_nomacro_spc(); t = MACRO_FUNC; - parse_flags |= (saved_parse_flags & PARSE_FLAG_ASM_FILE); - isidnum_table['.' - CH_EOF] = - (parse_flags & PARSE_FLAG_ASM_FILE) ? IS_ID : 0; } tokstr_buf.len = 0; spc = 2; parse_flags |= PARSE_FLAG_ACCEPT_STRAYS | PARSE_FLAG_SPACES | PARSE_FLAG_LINEFEED; + /* The body of a macro definition should be parsed such that identifiers + are parsed like the file mode determines (i.e. with '.' being an + ID character in asm mode). But '#' should be retained instead of + regarded as line comment leader, so still don't set ASM_FILE + in parse_flags. */ + set_idnum('.', (saved_parse_flags & PARSE_FLAG_ASM_FILE) ? IS_ID : 0); while (tok != TOK_LINEFEED && tok != TOK_EOF) { /* remove spaces around ## and after '#' */ if (TOK_TWOSHARPS == tok) { @@ -2673,7 +2678,7 @@ maybe_newline: if (isnum(c)) { t = '.'; goto parse_num; - } else if ((parse_flags & PARSE_FLAG_ASM_FILE) + } else if ((isidnum_table['.' - CH_EOF] & IS_ID) && (isidnum_table[c - CH_EOF] & (IS_ID|IS_NUM))) { *--p = c = '.'; goto parse_ident_fast; @@ -3470,10 +3475,8 @@ ST_FUNC void preprocess_start(TCCState *s1) s1->pack_stack[0] = 0; s1->pack_stack_ptr = s1->pack_stack; - isidnum_table['$' - CH_EOF] = - s1->dollars_in_identifiers ? IS_ID : 0; - isidnum_table['.' - CH_EOF] = - (parse_flags & PARSE_FLAG_ASM_FILE) ? IS_ID : 0; + set_idnum('$', s1->dollars_in_identifiers ? IS_ID : 0); + set_idnum('.', (parse_flags & PARSE_FLAG_ASM_FILE) ? IS_ID : 0); buf = tcc_malloc(3 + strlen(file->filename)); sprintf(buf, "\"%s\"", file->filename); tcc_undefine_symbol(s1, "__BASE_FILE__"); @@ -3505,14 +3508,14 @@ ST_FUNC void tccpp_new(TCCState *s) /* init isid table */ for(i = CH_EOF; i<128; i++) - isidnum_table[i - CH_EOF] - = is_space(i) ? IS_SPC + set_idnum(i, + is_space(i) ? IS_SPC : isid(i) ? IS_ID : isnum(i) ? IS_NUM - : 0; + : 0); for(i = 128; i<256; i++) - isidnum_table[i - CH_EOF] = IS_ID; + set_idnum(i, IS_ID); /* init allocators */ tal_new(&toksym_alloc, TOKSYM_TAL_LIMIT, TOKSYM_TAL_SIZE); diff --git a/tests/asmtest.S b/tests/asmtest.S index 59deb06..280aeaf 100644 --- a/tests/asmtest.S +++ b/tests/asmtest.S @@ -16,10 +16,17 @@ .skip 3 .skip 15, 0x90 .string "hello\0world" +/* Macro expansion should work like with C, the #n shouldn't be parsed + as asm line comment */ +#define __stringify(n) #n +#define stringify(n) __stringify(n) + .skip 8,0x90 + .asciz stringify(BLA) + .skip 8,0x90 +# 28 "asmtest.S" # a line directive (and a line comment) + movl %eax, %ebx # some more asm comment /* some label tests */ - - movl %eax, %ebx L1: movl %eax, %ebx mov 0x10000, %eax @@ -572,8 +579,8 @@ int $0x10 pusha popa #endif - clc - cld + clc # another comment + cld # a comment with embedded ' tick cli clts cmc @@ -676,8 +683,10 @@ int $0x10 lar %eax,%dx lar %ax,%edx lar %eax,%edx +#ifdef __x86_64__ lar %ax,%rdx lar %eax,%rdx +#endif emms movd %edx, %mm3 movd 0x1000, %mm2 @@ -820,7 +829,11 @@ nop .long 145 + 2b .word 164, 0 .org 2b+32 +#ifdef __x86_64__ .quad 1b +#else + .long 1b +#endif .popsection 3: mov %eax,%ecx 4: