parsing optimizations

tcc-xref
bellard 2002-11-23 18:15:17 +00:00
parent b81d4ba6b3
commit 8901fbeef1
2 changed files with 319 additions and 218 deletions

535
tcc.c
View File

@ -1118,11 +1118,13 @@ void test_lvalue(void)
TokenSym *tok_alloc(const char *str, int len) TokenSym *tok_alloc(const char *str, int len)
{ {
TokenSym *ts, **pts, **ptable; TokenSym *ts, **pts, **ptable;
int h, i; int i;
unsigned int h;
h = 1; h = 1;
for(i=0;i<len;i++) for(i=0;i<len;i++)
h = (h * 263 + ((unsigned char *)str)[i]) & (TOK_HASH_SIZE - 1); h = h * 263 + ((unsigned char *)str)[i];
h &= (TOK_HASH_SIZE - 1);
pts = &hash_ident[h]; pts = &hash_ident[h];
while (1) { while (1) {
@ -1522,7 +1524,12 @@ static int tcc_peekc_slow(BufferedFile *bf)
/* only tries to read if really end of buffer */ /* only tries to read if really end of buffer */
if (bf->buf_ptr >= bf->buf_end) { if (bf->buf_ptr >= bf->buf_end) {
if (bf->fd != -1) { if (bf->fd != -1) {
len = read(bf->fd, bf->buffer, IO_BUF_SIZE); #if defined(PARSE_DEBUG)
len = 8;
#else
len = IO_BUF_SIZE;
#endif
len = read(bf->fd, bf->buffer, len);
if (len < 0) if (len < 0)
len = 0; len = 0;
} else { } else {
@ -1541,13 +1548,11 @@ static int tcc_peekc_slow(BufferedFile *bf)
} }
} }
/* no need to put that inline */ /* return the current character, handling end of block if necessary
void handle_eob(void) (but not stray) */
static int handle_eob(void)
{ {
/* no need to do anything if not at EOB */ return tcc_peekc_slow(file);
if (file->buf_ptr < file->buf_end)
return;
ch = tcc_peekc_slow(file);
} }
/* read next char from current input file and handle end of input buffer */ /* read next char from current input file and handle end of input buffer */
@ -1556,7 +1561,7 @@ static inline void inp(void)
ch = *(++(file->buf_ptr)); ch = *(++(file->buf_ptr));
/* end of buffer/file handling */ /* end of buffer/file handling */
if (ch == CH_EOB) if (ch == CH_EOB)
handle_eob(); ch = handle_eob();
} }
/* handle '\[\r]\n' */ /* handle '\[\r]\n' */
@ -1580,6 +1585,40 @@ static void handle_stray(void)
} }
} }
/* skip the stray and handle the \\n case. Output an error if
incorrect char after the stray */
static int handle_stray1(uint8_t *p)
{
int c;
if (p >= file->buf_end) {
file->buf_ptr = p;
c = handle_eob();
p = file->buf_ptr;
if (c == '\\')
goto parse_stray;
} else {
parse_stray:
file->buf_ptr = p;
ch = *p;
handle_stray();
p = file->buf_ptr;
c = *p;
}
return c;
}
/* handle the complicated stray case */
#define PEEKC(c, p)\
{\
p++;\
c = *p;\
if (c == '\\') {\
c = handle_stray1(p);\
p = file->buf_ptr;\
}\
}
/* input with '\[\r]\n' handling. Note that this function cannot /* input with '\[\r]\n' handling. Note that this function cannot
handle other characters after '\', so you cannot call it inside handle other characters after '\', so you cannot call it inside
strings or comments */ strings or comments */
@ -1606,8 +1645,8 @@ static void parse_comment(void)
int c; int c;
/* C comments */ /* C comments */
minp();
p = file->buf_ptr; p = file->buf_ptr;
p++;
for(;;) { for(;;) {
/* fast skip loop */ /* fast skip loop */
for(;;) { for(;;) {
@ -1628,49 +1667,49 @@ static void parse_comment(void)
p++; p++;
for(;;) { for(;;) {
c = *p; c = *p;
if (c == '/') { if (c == '*') {
p++;
} else if (c == '/') {
goto end_of_comment; goto end_of_comment;
} else if (c == '\\') { } else if (c == '\\') {
if (p >= file->buf_end) { file->buf_ptr = p;
file->buf_ptr = p; c = handle_eob();
handle_eob(); if (c == '\\') {
p = file->buf_ptr; /* skip '\\n', but if '\' followed but another
if (p >= file->buf_end) char, behave asif a stray was parsed */
goto eof_found; ch = file->buf_ptr[0];
continue; while (ch == '\\') {
inp();
if (ch == '\n') {
file->line_num++;
inp();
} else if (ch == '\r') {
inp();
if (ch == '\n') {
file->line_num++;
inp();
}
} else {
p = file->buf_ptr;
break;
}
}
} }
p++; p = file->buf_ptr;
c = *p;
if (c == '\n') {
file->line_num++;
p++;
} else if (c == '\r') {
p++;
c = *p;
if (c != '\n')
break;
file->line_num++;
p++;
} else {
break;
}
} else if (c == '*') {
p++;
} else { } else {
break; break;
} }
} }
} else if (p >= file->buf_end) {
file->buf_ptr = p;
handle_eob();
p = file->buf_ptr;
if (p >= file->buf_end) {
eof_found:
error("unexpected end of file in comment");
}
} else { } else {
/* stray */ /* stray, eob or eof */
p++; file->buf_ptr = p;
c = handle_eob();
p = file->buf_ptr;
if (c == CH_EOF) {
error("unexpected end of file in comment");
} else if (c == '\\') {
p++;
}
} }
} }
end_of_comment: end_of_comment:
@ -1697,63 +1736,98 @@ static inline void skip_spaces(void)
#if/#endif */ #if/#endif */
void preprocess_skip(void) void preprocess_skip(void)
{ {
int a, start_of_line, sep; int a, start_of_line, sep, c;
uint8_t *p;
p = file->buf_ptr;
start_of_line = 1; start_of_line = 1;
a = 0; a = 0;
for(;;) { for(;;) {
redo_no_start: redo_no_start:
switch(ch) { c = *p;
switch(c) {
case ' ': case ' ':
case '\t': case '\t':
case '\f': case '\f':
case '\v': case '\v':
case '\r': case '\r':
inp(); p++;
goto redo_no_start; goto redo_no_start;
case '\n': case '\n':
start_of_line = 1; start_of_line = 1;
file->line_num++; file->line_num++;
inp(); p++;
goto redo_no_start; goto redo_no_start;
case '\\': case '\\':
handle_stray(); file->buf_ptr = p;
c = handle_eob();
if (c == CH_EOF) {
expect("#endif");
} else if (c == '\\') {
/* XXX: incorrect: should not give an error */
ch = file->buf_ptr[0];
handle_stray();
}
p = file->buf_ptr;
goto redo_no_start; goto redo_no_start;
/* skip strings */ /* skip strings */
case '\"': case '\"':
case '\'': case '\'':
sep = ch; sep = c;
inp(); p++;
while (ch != sep) { for(;;) {
/* XXX: better error message */ c = *p;
if (ch == TOK_EOF) { if (c == sep) {
error("unterminated string"); break;
} else if (ch == '\n') { } else if (c == '\\') {
file->buf_ptr = p;
c = handle_eob();
p = file->buf_ptr;
if (c == CH_EOF) {
/* XXX: better error message */
error("unterminated string");
} else if (c == '\\') {
/* ignore next char */
p++;
c = *p;
if (c == '\\') {
file->buf_ptr = p;
c = handle_eob();
p = file->buf_ptr;
}
if (c == '\n')
file->line_num++;
else if (c != CH_EOF)
p++;
}
} else if (c == '\n') {
file->line_num++; file->line_num++;
} else if (ch == '\\') { p++;
/* ignore next char */ } else {
inp(); p++;
if (ch == '\n')
file->line_num++;
} }
inp();
} }
minp(); p++;
break; break;
/* skip comments */ /* skip comments */
case '/': case '/':
file->buf_ptr = p;
ch = *p;
minp(); minp();
if (ch == '*') { if (ch == '*') {
parse_comment(); parse_comment();
} else if (ch == '/') { } else if (ch == '/') {
parse_line_comment(); parse_line_comment();
} }
p = file->buf_ptr;
break; break;
case '#': case '#':
minp(); p++;
if (start_of_line) { if (start_of_line) {
file->buf_ptr = p;
next_nomacro(); next_nomacro();
p = file->buf_ptr;
if (a == 0 && if (a == 0 &&
(tok == TOK_ELSE || tok == TOK_ELIF || tok == TOK_ENDIF)) (tok == TOK_ELSE || tok == TOK_ELIF || tok == TOK_ENDIF))
goto the_end; goto the_end;
@ -1763,16 +1837,14 @@ void preprocess_skip(void)
a--; a--;
} }
break; break;
case CH_EOF:
expect("#endif");
break;
default: default:
inp(); p++;
break; break;
} }
start_of_line = 0; start_of_line = 0;
} }
the_end: ; the_end: ;
file->buf_ptr = p;
} }
/* ParseState handling */ /* ParseState handling */
@ -2040,10 +2112,10 @@ void tok_print(int *str)
#endif #endif
/* parse after #define */ /* parse after #define */
void parse_define(void) static void parse_define(void)
{ {
Sym *s, *first, **ps; Sym *s, *first, **ps;
int v, t, varg, is_vaargs; int v, t, varg, is_vaargs, c;
TokenString str; TokenString str;
v = tok; v = tok;
@ -2053,7 +2125,10 @@ void parse_define(void)
first = NULL; first = NULL;
t = MACRO_OBJ; t = MACRO_OBJ;
/* '(' must be just after macro definition for MACRO_FUNC */ /* '(' must be just after macro definition for MACRO_FUNC */
if (ch == '(') { c = file->buf_ptr[0];
if (c == '\\')
c = handle_stray1(file->buf_ptr);
if (c == '(') {
next_nomacro(); next_nomacro();
next_nomacro(); next_nomacro();
ps = &first; ps = &first;
@ -2156,6 +2231,8 @@ static void preprocess(int is_bof)
define_undef(s); define_undef(s);
break; break;
case TOK_INCLUDE: case TOK_INCLUDE:
ch = file->buf_ptr[0];
/* XXX: incorrect if comments : use next_nomacro with a special mode */
skip_spaces(); skip_spaces();
if (ch == '<') { if (ch == '<') {
c = '>'; c = '>';
@ -2781,32 +2858,55 @@ void parse_number(const char *p)
} }
} }
#define PARSE2(c1, tok1, c2, tok2) \
case c1: \
PEEKC(c, p); \
if (c == c2) { \
p++; \
tok = tok2; \
} else { \
tok = tok1; \
} \
break;
/* return next token without macro substitution */ /* return next token without macro substitution */
static inline void next_nomacro1(void) static inline void next_nomacro1(void)
{ {
int b, t; int b, t, c;
char *q;
TokenSym *ts; TokenSym *ts;
uint8_t *p, *p1;
p = file->buf_ptr;
redo_no_start: redo_no_start:
switch(ch) { c = *p;
switch(c) {
case ' ': case ' ':
case '\t': case '\t':
case '\f': case '\f':
case '\v': case '\v':
case '\r': case '\r':
inp(); p++;
goto redo_no_start; goto redo_no_start;
case '\\': case '\\':
/* first look if it is in fact an end of buffer */ /* first look if it is in fact an end of buffer */
handle_eob(); if (p >= file->buf_end) {
if (ch != '\\') file->buf_ptr = p;
handle_eob();
p = file->buf_ptr;
if (p >= file->buf_end)
goto parse_eof;
else
goto redo_no_start;
} else {
file->buf_ptr = p;
ch = *p;
handle_stray();
p = file->buf_ptr;
goto redo_no_start; goto redo_no_start;
handle_stray(); }
goto redo_no_start; parse_eof:
case CH_EOF:
{ {
TCCState *s1 = tcc_state; TCCState *s1 = tcc_state;
@ -2837,6 +2937,7 @@ static inline void next_nomacro1(void)
s1->include_stack_ptr--; s1->include_stack_ptr--;
file = *s1->include_stack_ptr; file = *s1->include_stack_ptr;
inp(); inp();
p = file->buf_ptr;
goto redo_no_start; goto redo_no_start;
} }
} }
@ -2848,19 +2949,22 @@ static inline void next_nomacro1(void)
tok = TOK_LINEFEED; tok = TOK_LINEFEED;
} else { } else {
tok_flags |= TOK_FLAG_BOL; tok_flags |= TOK_FLAG_BOL;
inp(); p++;
goto redo_no_start; goto redo_no_start;
} }
break; break;
case '#': case '#':
minp(); /* XXX: simplify */
PEEKC(c, p);
if (tok_flags & TOK_FLAG_BOL) { if (tok_flags & TOK_FLAG_BOL) {
file->buf_ptr = p;
preprocess(tok_flags & TOK_FLAG_BOF); preprocess(tok_flags & TOK_FLAG_BOF);
p = file->buf_ptr;
goto redo_no_start; goto redo_no_start;
} else { } else {
if (ch == '#') { if (c == '#') {
inp(); p++;
tok = TOK_TWOSHARPS; tok = TOK_TWOSHARPS;
} else { } else {
tok = '#'; tok = '#';
@ -2883,34 +2987,57 @@ static inline void next_nomacro1(void)
case 'U': case 'V': case 'W': case 'X': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z': case 'Y': case 'Z':
case '_': case '_':
q = token_buf; parse_ident_fast:
*q++ = ch; p1 = p;
cinp(); p++;
parse_ident: for(;;) {
while (isid(ch) || isnum(ch)) { c = *p;
if (q >= token_buf + STRING_MAX_SIZE) if (!isid(c) && !isnum(c))
error("ident too long"); break;
*q++ = ch; p++;
cinp(); }
if (c != '\\') {
/* fast case : no stray found, so we have the full token */
ts = tok_alloc(p1, p - p1);
} else {
/* slower case */
cstr_reset(&tokcstr);
while (p1 < p) {
cstr_ccat(&tokcstr, *p1);
p1++;
}
p--;
PEEKC(c, p);
parse_ident_slow:
while (isid(c) || isnum(c)) {
cstr_ccat(&tokcstr, c);
PEEKC(c, p);
}
ts = tok_alloc(tokcstr.data, tokcstr.size);
} }
*q = '\0';
ts = tok_alloc(token_buf, q - token_buf);
tok = ts->tok; tok = ts->tok;
break; break;
case 'L': case 'L':
minp(); c = p[1];
if (ch == '\'') { if (c != '\\' && c != '\'' && c != '\"') {
tok = TOK_LCHAR; /* fast case */
goto char_const; goto parse_ident_fast;
} else {
PEEKC(c, p);
if (c == '\'') {
tok = TOK_LCHAR;
goto char_const;
} else if (c == '\"') {
tok = TOK_LSTR;
goto str_const;
} else {
cstr_reset(&tokcstr);
cstr_ccat(&tokcstr, 'L');
goto parse_ident_slow;
}
} }
if (ch == '\"') { break;
tok = TOK_LSTR;
goto str_const;
}
q = token_buf;
*q++ = 'L';
goto parse_ident;
case '0': case '1': case '2': case '3': case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7': case '4': case '5': case '6': case '7':
case '8': case '9': case '8': case '9':
@ -2920,11 +3047,11 @@ static inline void next_nomacro1(void)
prefixed by 'eEpP' */ prefixed by 'eEpP' */
parse_num: parse_num:
for(;;) { for(;;) {
t = ch; t = c;
cstr_ccat(&tokcstr, ch); cstr_ccat(&tokcstr, c);
cinp(); PEEKC(c, p);
if (!(isnum(ch) || isid(ch) || ch == '.' || if (!(isnum(c) || isid(c) || c == '.' ||
((ch == '+' || ch == '-') && ((c == '+' || c == '-') &&
(t == 'e' || t == 'E' || t == 'p' || t == 'P')))) (t == 'e' || t == 'E' || t == 'p' || t == 'P'))))
break; break;
} }
@ -2935,17 +3062,16 @@ static inline void next_nomacro1(void)
break; break;
case '.': case '.':
/* special dot handling because it can also start a number */ /* special dot handling because it can also start a number */
cinp(); PEEKC(c, p);
if (isnum(ch)) { if (isnum(c)) {
cstr_reset(&tokcstr); cstr_reset(&tokcstr);
cstr_ccat(&tokcstr, '.'); cstr_ccat(&tokcstr, '.');
goto parse_num; goto parse_num;
} } else if (c == '.') {
if (ch == '.') { PEEKC(c, p);
cinp(); if (c != '.')
if (ch != '.')
expect("'.'"); expect("'.'");
cinp(); PEEKC(c, p);
tok = TOK_DOTS; tok = TOK_DOTS;
} else { } else {
tok = '.'; tok = '.';
@ -2954,6 +3080,7 @@ static inline void next_nomacro1(void)
case '\'': case '\'':
tok = TOK_CCHAR; tok = TOK_CCHAR;
char_const: char_const:
file->buf_ptr = p;
inp(); inp();
b = getq(); b = getq();
/* this cast is needed if >= 128 */ /* this cast is needed if >= 128 */
@ -2962,11 +3089,13 @@ static inline void next_nomacro1(void)
tokc.i = b; tokc.i = b;
if (ch != '\'') if (ch != '\'')
error("unterminated character constant"); error("unterminated character constant");
inp(); p = file->buf_ptr;
p++;
break; break;
case '\"': case '\"':
tok = TOK_STR; tok = TOK_STR;
str_const: str_const:
file->buf_ptr = p;
inp(); inp();
cstr_reset(&tokcstr); cstr_reset(&tokcstr);
while (ch != '\"') { while (ch != '\"') {
@ -2983,18 +3112,19 @@ static inline void next_nomacro1(void)
else else
cstr_wccat(&tokcstr, '\0'); cstr_wccat(&tokcstr, '\0');
tokc.cstr = &tokcstr; tokc.cstr = &tokcstr;
inp(); p = file->buf_ptr;
p++;
break; break;
case '<': case '<':
cinp(); PEEKC(c, p);
if (ch == '=') { if (c == '=') {
cinp(); p++;
tok = TOK_LE; tok = TOK_LE;
} else if (ch == '<') { } else if (c == '<') {
cinp(); PEEKC(c, p);
if (ch == '=') { if (c == '=') {
cinp(); p++;
tok = TOK_A_SHL; tok = TOK_A_SHL;
} else { } else {
tok = TOK_SHL; tok = TOK_SHL;
@ -3005,14 +3135,14 @@ static inline void next_nomacro1(void)
break; break;
case '>': case '>':
cinp(); PEEKC(c, p);
if (ch == '=') { if (c == '=') {
cinp(); p++;
tok = TOK_GE; tok = TOK_GE;
} else if (ch == '>') { } else if (c == '>') {
cinp(); PEEKC(c, p);
if (ch == '=') { if (c == '=') {
cinp(); p++;
tok = TOK_A_SAR; tok = TOK_A_SAR;
} else { } else {
tok = TOK_SAR; tok = TOK_SAR;
@ -3022,113 +3152,82 @@ static inline void next_nomacro1(void)
} }
break; break;
case '!':
tok = ch;
cinp();
if (ch == '=') {
cinp();
tok = TOK_NE;
}
break;
case '=':
tok = ch;
cinp();
if (ch == '=') {
cinp();
tok = TOK_EQ;
}
break;
case '&': case '&':
tok = ch; PEEKC(c, p);
cinp(); if (c == '&') {
if (ch == '&') { p++;
cinp();
tok = TOK_LAND; tok = TOK_LAND;
} else if (ch == '=') { } else if (c == '=') {
cinp(); p++;
tok = TOK_A_AND; tok = TOK_A_AND;
} else {
tok = '&';
} }
break; break;
case '|': case '|':
tok = ch; PEEKC(c, p);
cinp(); if (c == '|') {
if (ch == '|') { p++;
cinp();
tok = TOK_LOR; tok = TOK_LOR;
} else if (ch == '=') { } else if (c == '=') {
cinp(); p++;
tok = TOK_A_OR; tok = TOK_A_OR;
} else {
tok = '|';
} }
break; break;
case '+': case '+':
tok = ch; PEEKC(c, p);
cinp(); if (c == '+') {
if (ch == '+') { p++;
cinp();
tok = TOK_INC; tok = TOK_INC;
} else if (ch == '=') { } else if (c == '=') {
cinp(); p++;
tok = TOK_A_ADD; tok = TOK_A_ADD;
} else {
tok = '+';
} }
break; break;
case '-': case '-':
tok = ch; PEEKC(c, p);
cinp(); if (c == '-') {
if (ch == '-') { p++;
cinp();
tok = TOK_DEC; tok = TOK_DEC;
} else if (ch == '=') { } else if (c == '=') {
cinp(); p++;
tok = TOK_A_SUB; tok = TOK_A_SUB;
} else if (ch == '>') { } else if (c == '>') {
cinp(); p++;
tok = TOK_ARROW; tok = TOK_ARROW;
} else {
tok = '-';
} }
break; break;
case '*': PARSE2('!', '!', '=', TOK_NE)
tok = ch; PARSE2('=', '=', '=', TOK_EQ)
cinp(); PARSE2('*', '*', '=', TOK_A_MUL)
if (ch == '=') { PARSE2('%', '%', '=', TOK_A_MOD)
cinp(); PARSE2('^', '^', '=', TOK_A_XOR)
tok = TOK_A_MUL;
}
break;
case '%':
tok = ch;
cinp();
if (ch == '=') {
cinp();
tok = TOK_A_MOD;
}
break;
case '^':
tok = ch;
cinp();
if (ch == '=') {
cinp();
tok = TOK_A_XOR;
}
break;
/* comments or operator */ /* comments or operator */
case '/': case '/':
minp(); PEEKC(c, p);
if (ch == '*') { if (c == '*') {
file->buf_ptr = p;
parse_comment(); parse_comment();
p = file->buf_ptr;
goto redo_no_start; goto redo_no_start;
} else if (ch == '/') { } else if (c == '/') {
file->buf_ptr = p;
parse_line_comment(); parse_line_comment();
p = file->buf_ptr;
goto redo_no_start; goto redo_no_start;
} else if (ch == '=') { } else if (c == '=') {
cinp(); p++;
tok = TOK_A_DIV; tok = TOK_A_DIV;
} else { } else {
tok = '/'; tok = '/';
@ -3147,13 +3246,14 @@ static inline void next_nomacro1(void)
case ':': case ':':
case '?': case '?':
case '~': case '~':
tok = ch; tok = c;
cinp(); p++;
break; break;
default: default:
error("unrecognized character \\x%02x", ch); error("unrecognized character \\x%02x", c);
break; break;
} }
file->buf_ptr = p;
tok_flags = 0; tok_flags = 0;
#if defined(PARSE_DEBUG) #if defined(PARSE_DEBUG)
printf("token = %s\n", get_tok_str(tok, &tokc)); printf("token = %s\n", get_tok_str(tok, &tokc));
@ -3427,6 +3527,7 @@ static int macro_subst_tok(TokenString *tok_str,
t = *macro_ptr; t = *macro_ptr;
} else { } else {
/* XXX: incorrect with comments */ /* XXX: incorrect with comments */
ch = file->buf_ptr[0];
while (is_space(ch) || ch == '\n') while (is_space(ch) || ch == '\n')
cinp(); cinp();
t = ch; t = ch;

View File

@ -1891,7 +1891,7 @@ static int tcc_load_ldscript(TCCState *s1)
int t; int t;
ch = file->buf_ptr[0]; ch = file->buf_ptr[0];
handle_eob(); ch = handle_eob();
for(;;) { for(;;) {
t = ld_next(s1, cmd, sizeof(cmd)); t = ld_next(s1, cmd, sizeof(cmd));
if (t == LD_TOK_EOF) if (t == LD_TOK_EOF)