parsing optimizations

2002-11-23 18:15:17 +00:00 · 2002-11-23 18:15:17 +00:00 · 8901fbeef1
parent b81d4ba6b3
commit 8901fbeef1
2 changed files with 319 additions and 218 deletions
--- a/tcc.c
+++ b/tcc.c
@ -1118,11 +1118,13 @@ void test_lvalue(void)
 TokenSym *tok_alloc(const char *str, int len)
 {
    TokenSym *ts, **pts, **ptable;
-    int h, i;
+    int i;
    unsigned int h;
    h = 1;
    for(i=0;i<len;i++)
-        h = (h * 263 +  ((unsigned char *)str)[i]) & (TOK_HASH_SIZE - 1);
+        h = h * 263 +  ((unsigned char *)str)[i];
    h &= (TOK_HASH_SIZE - 1);
    pts = &hash_ident[h];
    while (1) {
@ -1522,7 +1524,12 @@ static int tcc_peekc_slow(BufferedFile *bf)
    /* only tries to read if really end of buffer */
    if (bf->buf_ptr >= bf->buf_end) {
        if (bf->fd != -1) {
-            len = read(bf->fd, bf->buffer, IO_BUF_SIZE);
+#if defined(PARSE_DEBUG)
            len = 8;
 #else
            len = IO_BUF_SIZE;
 #endif
            len = read(bf->fd, bf->buffer, len);
            if (len < 0)
                len = 0;
        } else {
@ -1541,13 +1548,11 @@ static int tcc_peekc_slow(BufferedFile *bf)
    }
 }
-/* no need to put that inline */
+/* return the current character, handling end of block if necessary
-void handle_eob(void)
+   (but not stray) */
 static int handle_eob(void)
 {
-    /* no need to do anything if not at EOB */
+    return tcc_peekc_slow(file);
    if (file->buf_ptr < file->buf_end)
        return;
    ch = tcc_peekc_slow(file);
 }
 /* read next char from current input file and handle end of input buffer */
@ -1556,7 +1561,7 @@ static inline void inp(void)
    ch = *(++(file->buf_ptr));
    /* end of buffer/file handling */
    if (ch == CH_EOB)
-        handle_eob();
+        ch = handle_eob();
 }
 /* handle '\[\r]\n' */
@ -1580,6 +1585,40 @@ static void handle_stray(void)
    }
 }
 /* skip the stray and handle the \\n case. Output an error if
   incorrect char after the stray */
 static int handle_stray1(uint8_t *p)
 {
    int c;
    if (p >= file->buf_end) {
        file->buf_ptr = p;
        c = handle_eob();
        p = file->buf_ptr;
        if (c == '\\')
            goto parse_stray;
    } else {
    parse_stray:
        file->buf_ptr = p;
        ch = *p;
        handle_stray();
        p = file->buf_ptr;
        c = *p;
    }
    return c;
 }
 /* handle the complicated stray case */
 #define PEEKC(c, p)\
 {\
    p++;\
    c = *p;\
    if (c == '\\') {\
        c = handle_stray1(p);\
        p = file->buf_ptr;\
    }\
 }
 /* input with '\[\r]\n' handling. Note that this function cannot
   handle other characters after '\', so you cannot call it inside
   strings or comments */
@ -1606,8 +1645,8 @@ static void parse_comment(void)
    int c;
    /* C comments */
    minp();
    p = file->buf_ptr;
    p++;
    for(;;) {
        /* fast skip loop */
        for(;;) {
@ -1628,49 +1667,49 @@ static void parse_comment(void)
            p++;
            for(;;) {
                c = *p;
-                if (c == '/') {
+                if (c == '*') {
                    p++;
                } else if (c == '/') {
                    goto end_of_comment;
                } else if (c == '\\') {
-                    if (p >= file->buf_end) {
+                    file->buf_ptr = p;
-                        file->buf_ptr = p;
+                    c = handle_eob();
-                        handle_eob();
+                    if (c == '\\') {
-                        p = file->buf_ptr;
+                        /* skip '\\n', but if '\' followed but another
-                        if (p >= file->buf_end)
+                           char, behave asif a stray was parsed */
-                            goto eof_found;
+                        ch = file->buf_ptr[0];
-                        continue;
+                        while (ch == '\\') {
                            inp();
                            if (ch == '\n') {
                                file->line_num++;
                                inp();
                            } else if (ch == '\r') {
                                inp();
                                if (ch == '\n') {
                                    file->line_num++;
                                    inp();
                                }
                            } else {
                                p = file->buf_ptr;
                                break;
                            }
                        }
                    }
-                    p++;
+                    p = file->buf_ptr;
                    c = *p;
                    if (c == '\n') {
                        file->line_num++;
                        p++;
                    } else if (c == '\r') {
                        p++;
                        c = *p;
                        if (c != '\n')
                            break;
                        file->line_num++;
                        p++;
                    } else {
                        break;
                    }
                } else if (c == '*') {
                    p++;
                } else {
                    break;
                }
            }
        } else if (p >= file->buf_end) {
            file->buf_ptr = p;
            handle_eob();
            p = file->buf_ptr;
            if (p >= file->buf_end) {
            eof_found:
                error("unexpected end of file in comment");
            }
        } else {
-            /* stray */
+            /* stray, eob or eof */
-            p++;
+            file->buf_ptr = p;
            c = handle_eob();
            p = file->buf_ptr;
            if (c == CH_EOF) {
                error("unexpected end of file in comment");
            } else if (c == '\\') {
                p++;
            }
        }
    }
 end_of_comment:
@ -1697,63 +1736,98 @@ static inline void skip_spaces(void)
   #if/#endif */
 void preprocess_skip(void)
 {
-    int a, start_of_line, sep;
+    int a, start_of_line, sep, c;
-    
+    uint8_t *p;
    p = file->buf_ptr;
    start_of_line = 1;
    a = 0;
    for(;;) {
    redo_no_start:
-        switch(ch) {
+        c = *p;
        switch(c) {
        case ' ':
        case '\t':
        case '\f':
        case '\v':
        case '\r':
-            inp();
+            p++;
            goto redo_no_start;
        case '\n':
            start_of_line = 1;
            file->line_num++;
-            inp();
+            p++;
            goto redo_no_start;
        case '\\':
-            handle_stray();
+            file->buf_ptr = p;
            c = handle_eob();
            if (c == CH_EOF) {
                expect("#endif");
            } else if (c == '\\') {
                /* XXX: incorrect: should not give an error */
                ch = file->buf_ptr[0];
                handle_stray();
            }
            p = file->buf_ptr;
            goto redo_no_start;
            /* skip strings */
        case '\"':
        case '\'':
-            sep = ch;
+            sep = c;
-            inp();
+            p++;
-            while (ch != sep) {
+            for(;;) {
-                /* XXX: better error message */
+                c = *p;
-                if (ch == TOK_EOF) {
+                if (c == sep) {
-                    error("unterminated string");
+                    break;
-                } else if (ch == '\n') {
+                } else if (c == '\\') {
                    file->buf_ptr = p;
                    c = handle_eob();
                    p = file->buf_ptr;
                    if (c == CH_EOF) {
                        /* XXX: better error message */
                        error("unterminated string");
                    } else if (c == '\\') {
                        /* ignore next char */
                        p++;
                        c = *p;
                        if (c == '\\') {
                            file->buf_ptr = p;
                            c = handle_eob();
                            p = file->buf_ptr;
                        }
                        if (c == '\n')
                            file->line_num++;
                        else if (c != CH_EOF)
                            p++;
                    }
                } else if (c == '\n') {
                    file->line_num++;
-                } else if (ch == '\\') {
+                    p++;
-                    /* ignore next char */
+                } else {
-                    inp();
+                    p++;
                    if (ch == '\n')
                        file->line_num++;
                }
                inp();
            }
-            minp();
+            p++;
            break;
            /* skip comments */
        case '/':
            file->buf_ptr = p;
            ch = *p;
            minp();
            if (ch == '*') {
                parse_comment();
            } else if (ch == '/') {
                parse_line_comment();
            }
            p = file->buf_ptr;
            break;
        case '#':
-            minp();
+            p++;
            if (start_of_line) {
                file->buf_ptr = p;
                next_nomacro();
                p = file->buf_ptr;
                if (a == 0 && 
                    (tok == TOK_ELSE || tok == TOK_ELIF || tok == TOK_ENDIF))
                    goto the_end;
@ -1763,16 +1837,14 @@ void preprocess_skip(void)
                    a--;
            }
            break;
        case CH_EOF:
            expect("#endif");
            break;
        default:
-            inp();
+            p++;
            break;
        }
        start_of_line = 0;
    }
 the_end: ;
    file->buf_ptr = p;
 }
 /* ParseState handling */
@ -2040,10 +2112,10 @@ void tok_print(int *str)
 #endif
 /* parse after #define */
-void parse_define(void)
+static void parse_define(void)
 {
    Sym *s, *first, **ps;
-    int v, t, varg, is_vaargs;
+    int v, t, varg, is_vaargs, c;
    TokenString str;
    v = tok;
@ -2053,7 +2125,10 @@ void parse_define(void)
    first = NULL;
    t = MACRO_OBJ;
    /* '(' must be just after macro definition for MACRO_FUNC */
-    if (ch == '(') {
+    c = file->buf_ptr[0];
    if (c == '\\')
        c = handle_stray1(file->buf_ptr);
    if (c == '(') {
        next_nomacro();
        next_nomacro();
        ps = &first;
@ -2156,6 +2231,8 @@ static void preprocess(int is_bof)
            define_undef(s);
        break;
    case TOK_INCLUDE:
        ch = file->buf_ptr[0];
        /* XXX: incorrect if comments : use next_nomacro with a special mode */
        skip_spaces();
        if (ch == '<') {
            c = '>';
@ -2781,32 +2858,55 @@ void parse_number(const char *p)
    }
 }
 #define PARSE2(c1, tok1, c2, tok2)              \
    case c1:                                    \
        PEEKC(c, p);                            \
        if (c == c2) {                          \
            p++;                                \
            tok = tok2;                         \
        } else {                                \
            tok = tok1;                         \
        }                                       \
        break;
 /* return next token without macro substitution */
 static inline void next_nomacro1(void)
 {
-    int b, t;
+    int b, t, c;
    char *q;
    TokenSym *ts;
    uint8_t *p, *p1;
    p = file->buf_ptr;
 redo_no_start:
-    switch(ch) {
+    c = *p;
    switch(c) {
    case ' ':
    case '\t':
    case '\f':
    case '\v':
    case '\r':
-        inp();
+        p++;
        goto redo_no_start;
    case '\\':
        /* first look if it is in fact an end of buffer */
-        handle_eob();
+        if (p >= file->buf_end) {
-        if (ch != '\\')
+            file->buf_ptr = p;
            handle_eob();
            p = file->buf_ptr;
            if (p >= file->buf_end)
                goto parse_eof;
            else
                goto redo_no_start;
        } else {
            file->buf_ptr = p;
            ch = *p;
            handle_stray();
            p = file->buf_ptr;
            goto redo_no_start;
-        handle_stray();
+        }
-        goto redo_no_start;
+    parse_eof:
    case CH_EOF:
        {
            TCCState *s1 = tcc_state;
@ -2837,6 +2937,7 @@ static inline void next_nomacro1(void)
                s1->include_stack_ptr--;
                file = *s1->include_stack_ptr;
                inp();
                p = file->buf_ptr;
                goto redo_no_start;
            }
        }
@ -2848,19 +2949,22 @@ static inline void next_nomacro1(void)
            tok = TOK_LINEFEED;
        } else {
            tok_flags |= TOK_FLAG_BOL;
-            inp();
+            p++;
            goto redo_no_start;
        }
        break;
    case '#':
-        minp();
+        /* XXX: simplify */
        PEEKC(c, p);
        if (tok_flags & TOK_FLAG_BOL) {
            file->buf_ptr = p;
            preprocess(tok_flags & TOK_FLAG_BOF);
            p = file->buf_ptr;
            goto redo_no_start;
        } else {
-            if (ch == '#') {
+            if (c == '#') {
-                inp();
+                p++;
                tok = TOK_TWOSHARPS;
            } else {
                tok = '#';
@ -2883,34 +2987,57 @@ static inline void next_nomacro1(void)
    case 'U': case 'V': case 'W': case 'X':
    case 'Y': case 'Z': 
    case '_':
-        q = token_buf;
+    parse_ident_fast:
-        *q++ = ch;
+        p1 = p;
-        cinp();
+        p++;
-    parse_ident:
+        for(;;) {
-        while (isid(ch) || isnum(ch)) {
+            c = *p;
-            if (q >= token_buf + STRING_MAX_SIZE)
+            if (!isid(c) && !isnum(c))
-                error("ident too long");
+                break;
-            *q++ = ch;
+            p++;
-            cinp();
+        }
        if (c != '\\') {
            /* fast case : no stray found, so we have the full token */
            ts = tok_alloc(p1, p - p1);
        } else {
            /* slower case */
            cstr_reset(&tokcstr);
            while (p1 < p) {
                cstr_ccat(&tokcstr, *p1);
                p1++;
            }
            p--;
            PEEKC(c, p);
        parse_ident_slow:
            while (isid(c) || isnum(c)) {
                cstr_ccat(&tokcstr, c);
                PEEKC(c, p);
            }
            ts = tok_alloc(tokcstr.data, tokcstr.size);
        }
        *q = '\0';
        ts = tok_alloc(token_buf, q - token_buf);
        tok = ts->tok;
        break;
    case 'L':
-        minp();
+        c = p[1];
-        if (ch == '\'') {
+        if (c != '\\' && c != '\'' && c != '\"') {
-            tok = TOK_LCHAR;
+            /* fast case */
-            goto char_const;
+            goto parse_ident_fast;
        } else {
            PEEKC(c, p);
            if (c == '\'') {
                tok = TOK_LCHAR;
                goto char_const; 
            } else if (c == '\"') {
                tok = TOK_LSTR;
                goto str_const;
            } else {
                cstr_reset(&tokcstr);
                cstr_ccat(&tokcstr, 'L');
                goto parse_ident_slow;
            }
        }
-        if (ch == '\"') {
+        break;
            tok = TOK_LSTR;
            goto str_const;
        }
        q = token_buf;
        *q++ = 'L';
        goto parse_ident;
    case '0': case '1': case '2': case '3':
    case '4': case '5': case '6': case '7':
    case '8': case '9':
@ -2920,11 +3047,11 @@ static inline void next_nomacro1(void)
           prefixed by 'eEpP' */
    parse_num:
        for(;;) {
-            t = ch;
+            t = c;
-            cstr_ccat(&tokcstr, ch);
+            cstr_ccat(&tokcstr, c);
-            cinp();
+            PEEKC(c, p);
-            if (!(isnum(ch) || isid(ch) || ch == '.' ||
+            if (!(isnum(c) || isid(c) || c == '.' ||
-                  ((ch == '+' || ch == '-') && 
+                  ((c == '+' || c == '-') && 
                   (t == 'e' || t == 'E' || t == 'p' || t == 'P'))))
                break;
        }
@ -2935,17 +3062,16 @@ static inline void next_nomacro1(void)
        break;
    case '.':
        /* special dot handling because it can also start a number */
-        cinp();
+        PEEKC(c, p);
-        if (isnum(ch)) {
+        if (isnum(c)) {
            cstr_reset(&tokcstr);
            cstr_ccat(&tokcstr, '.');
            goto parse_num;
-        }
+        } else if (c == '.') {
-        if (ch == '.') {
+            PEEKC(c, p);
-            cinp();
+            if (c != '.')
            if (ch != '.')
                expect("'.'");
-            cinp();
+            PEEKC(c, p);
            tok = TOK_DOTS;
        } else {
            tok = '.';
@ -2954,6 +3080,7 @@ static inline void next_nomacro1(void)
    case '\'':
        tok = TOK_CCHAR;
    char_const:
        file->buf_ptr = p;
        inp();
        b = getq();
        /* this cast is needed if >= 128 */
@ -2962,11 +3089,13 @@ static inline void next_nomacro1(void)
        tokc.i = b;
        if (ch != '\'')
            error("unterminated character constant");
-        inp();
+        p = file->buf_ptr;
        p++;
        break;
    case '\"':
        tok = TOK_STR;
    str_const:
        file->buf_ptr = p;
        inp();
        cstr_reset(&tokcstr);
        while (ch != '\"') {
@ -2983,18 +3112,19 @@ static inline void next_nomacro1(void)
        else
            cstr_wccat(&tokcstr, '\0');
        tokc.cstr = &tokcstr;
-        inp();
+        p = file->buf_ptr;
        p++;
        break;
    case '<':
-        cinp();
+        PEEKC(c, p);
-        if (ch == '=') {
+        if (c == '=') {
-            cinp();
+            p++;
            tok = TOK_LE;
-        } else if (ch == '<') {
+        } else if (c == '<') {
-            cinp();
+            PEEKC(c, p);
-            if (ch == '=') {
+            if (c == '=') {
-                cinp();
+                p++;
                tok = TOK_A_SHL;
            } else {
                tok = TOK_SHL;
@ -3005,14 +3135,14 @@ static inline void next_nomacro1(void)
        break;
    case '>':
-        cinp();
+        PEEKC(c, p);
-        if (ch == '=') {
+        if (c == '=') {
-            cinp();
+            p++;
            tok = TOK_GE;
-        } else if (ch == '>') {
+        } else if (c == '>') {
-            cinp();
+            PEEKC(c, p);
-            if (ch == '=') {
+            if (c == '=') {
-                cinp();
+                p++;
                tok = TOK_A_SAR;
            } else {
                tok = TOK_SAR;
@ -3022,113 +3152,82 @@ static inline void next_nomacro1(void)
        }
        break;
    case '!':
        tok = ch;
        cinp();
        if (ch == '=') {
            cinp();
            tok = TOK_NE;
        }
        break;
    case '=':
        tok = ch;
        cinp();
        if (ch == '=') {
            cinp();
            tok = TOK_EQ;
        }
        break;
    case '&':
-        tok = ch;
+        PEEKC(c, p);
-        cinp();
+        if (c == '&') {
-        if (ch == '&') {
+            p++;
            cinp();
            tok = TOK_LAND;
-        } else if (ch == '=') {
+        } else if (c == '=') {
-            cinp();
+            p++;
            tok = TOK_A_AND;
        } else {
            tok = '&';
        }
        break;
    case '|':
-        tok = ch;
+        PEEKC(c, p);
-        cinp();
+        if (c == '|') {
-        if (ch == '|') {
+            p++;
            cinp();
            tok = TOK_LOR;
-        } else if (ch == '=') {
+        } else if (c == '=') {
-            cinp();
+            p++;
            tok = TOK_A_OR;
        } else {
            tok = '|';
        }
        break;
    case '+':
-        tok = ch;
+        PEEKC(c, p);
-        cinp();
+        if (c == '+') {
-        if (ch == '+') {
+            p++;
            cinp();
            tok = TOK_INC;
-        } else if (ch == '=') {
+        } else if (c == '=') {
-            cinp();
+            p++;
            tok = TOK_A_ADD;
        } else {
            tok = '+';
        }
        break;
    case '-':
-        tok = ch;
+        PEEKC(c, p);
-        cinp();
+        if (c == '-') {
-        if (ch == '-') {
+            p++;
            cinp();
            tok = TOK_DEC;
-        } else if (ch == '=') {
+        } else if (c == '=') {
-            cinp();
+            p++;
            tok = TOK_A_SUB;
-        } else if (ch == '>') {
+        } else if (c == '>') {
-            cinp();
+            p++;
            tok = TOK_ARROW;
        } else {
            tok = '-';
        }
        break;
-    case '*':
+    PARSE2('!', '!', '=', TOK_NE)
-        tok = ch;
+    PARSE2('=', '=', '=', TOK_EQ)
-        cinp();
+    PARSE2('*', '*', '=', TOK_A_MUL)
-        if (ch == '=') {
+    PARSE2('%', '%', '=', TOK_A_MOD)
-            cinp();
+    PARSE2('^', '^', '=', TOK_A_XOR)
            tok = TOK_A_MUL;
        }
        break;
    case '%':
        tok = ch;
        cinp();
        if (ch == '=') {
            cinp();
            tok = TOK_A_MOD;
        }
        break;
    case '^':
        tok = ch;
        cinp();
        if (ch == '=') {
            cinp();
            tok = TOK_A_XOR;
        }
        break;
        /* comments or operator */
    case '/':
-        minp();
+        PEEKC(c, p);
-        if (ch == '*') {
+        if (c == '*') {
            file->buf_ptr = p;
            parse_comment();
            p = file->buf_ptr;
            goto redo_no_start;
-        } else if (ch == '/') {
+        } else if (c == '/') {
            file->buf_ptr = p;
            parse_line_comment();
            p = file->buf_ptr;
            goto redo_no_start;
-        } else if (ch == '=') {
+        } else if (c == '=') {
-            cinp();
+            p++;
            tok = TOK_A_DIV;
        } else {
            tok = '/';
@ -3147,13 +3246,14 @@ static inline void next_nomacro1(void)
    case ':':
    case '?':
    case '~':
-        tok = ch;
+        tok = c;
-        cinp();
+        p++;
        break;
    default:
-        error("unrecognized character \\x%02x", ch);
+        error("unrecognized character \\x%02x", c);
        break;
    }
    file->buf_ptr = p;
    tok_flags = 0;
 #if defined(PARSE_DEBUG)
    printf("token = %s\n", get_tok_str(tok, &tokc));
@ -3427,6 +3527,7 @@ static int macro_subst_tok(TokenString *tok_str,
                t = *macro_ptr;
            } else {
                /* XXX: incorrect with comments */
                ch = file->buf_ptr[0];
                while (is_space(ch) || ch == '\n')
                    cinp();
                t = ch;
--- a/tccelf.c
+++ b/tccelf.c
@ -1891,7 +1891,7 @@ static int tcc_load_ldscript(TCCState *s1)
    int t;
    ch = file->buf_ptr[0];
-    handle_eob();
+    ch = handle_eob();
    for(;;) {
        t = ld_next(s1, cmd, sizeof(cmd));
        if (t == LD_TOK_EOF)