From 7893a9ecdf6fb70afb4a50ff73643e615e87a6fd Mon Sep 17 00:00:00 2001 From: bellard Date: Mon, 6 Jan 2003 20:21:42 +0000 Subject: [PATCH] asm support --- i386-asm.c | 1077 ++++++++++++++++++++++++++++++++++++++++++++++++++++ i386-asm.h | 444 ++++++++++++++++++++++ tccasm.c | 744 ++++++++++++++++++++++++++++++++++++ 3 files changed, 2265 insertions(+) create mode 100644 i386-asm.c create mode 100644 i386-asm.h create mode 100644 tccasm.c diff --git a/i386-asm.c b/i386-asm.c new file mode 100644 index 0000000..654ca76 --- /dev/null +++ b/i386-asm.c @@ -0,0 +1,1077 @@ +/* + * i386 specific functions for TCC assembler + * + * Copyright (c) 2001, 2002 Fabrice Bellard + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define MAX_OPERANDS 3 + +typedef struct ASMInstr { + uint16_t sym; + uint16_t opcode; + uint16_t instr_type; +#define OPC_JMP 0x01 /* jmp operand */ +#define OPC_B 0x02 /* only used zith OPC_WL */ +#define OPC_WL 0x04 /* accepts w, l or no suffix */ +#define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */ +#define OPC_REG 0x08 /* register is added to opcode */ +#define OPC_MODRM 0x10 /* modrm encoding */ +#define OPC_FWAIT 0x20 /* add fwait opcode */ +#define OPC_TEST 0x40 /* test opcodes */ +#define OPC_SHIFT 0x80 /* shift opcodes */ +#define OPC_D16 0x0100 /* generate data16 prefix */ +#define OPC_ARITH 0x0200 /* arithmetic opcodes */ +#define OPC_SHORTJMP 0x0400 /* short jmp operand */ +#define OPC_FARITH 0x0800 /* FPU arithmetic opcodes */ +#define OPC_GROUP_SHIFT 13 + +/* in order to compress the operand type, we use specific operands and + we or only with EA */ +#define OPT_REG8 0 /* warning: value is hardcoded from TOK_ASM_xxx */ +#define OPT_REG16 1 /* warning: value is hardcoded from TOK_ASM_xxx */ +#define OPT_REG32 2 /* warning: value is hardcoded from TOK_ASM_xxx */ +#define OPT_MMX 3 /* warning: value is hardcoded from TOK_ASM_xxx */ +#define OPT_SSE 4 /* warning: value is hardcoded from TOK_ASM_xxx */ +#define OPT_CR 5 /* warning: value is hardcoded from TOK_ASM_xxx */ +#define OPT_TR 6 /* warning: value is hardcoded from TOK_ASM_xxx */ +#define OPT_DB 7 /* warning: value is hardcoded from TOK_ASM_xxx */ +#define OPT_SEG 8 +#define OPT_ST 9 +#define OPT_IM8 10 +#define OPT_IM8S 11 +#define OPT_IM16 12 +#define OPT_IM32 13 +#define OPT_EAX 14 /* %al, %ax or %eax register */ +#define OPT_ST0 15 /* %st(0) register */ +#define OPT_CL 16 /* %cl register */ +#define OPT_DX 17 /* %dx register */ +#define OPT_ADDR 18 /* OP_EA with only offset */ +#define OPT_INDIR 19 /* *(expr) */ + +/* composite types */ +#define OPT_COMPOSITE_FIRST 20 +#define OPT_IM 20 /* IM8 | IM16 | IM32 */ +#define OPT_REG 21 /* REG8 | REG16 | REG32 */ +#define OPT_REGW 22 /* REG16 | REG32 */ +#define OPT_IMW 23 /* IM16 | IM32 */ + +/* can be ored with any OPT_xxx */ +#define OPT_EA 0x80 + + uint8_t nb_ops; + uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */ +} ASMInstr; + +typedef struct Operand { + uint32_t type; +#define OP_REG8 (1 << OPT_REG8) +#define OP_REG16 (1 << OPT_REG16) +#define OP_REG32 (1 << OPT_REG32) +#define OP_MMX (1 << OPT_MMX) +#define OP_SSE (1 << OPT_SSE) +#define OP_CR (1 << OPT_CR) +#define OP_TR (1 << OPT_TR) +#define OP_DB (1 << OPT_DB) +#define OP_SEG (1 << OPT_SEG) +#define OP_ST (1 << OPT_ST) +#define OP_IM8 (1 << OPT_IM8) +#define OP_IM8S (1 << OPT_IM8S) +#define OP_IM16 (1 << OPT_IM16) +#define OP_IM32 (1 << OPT_IM32) +#define OP_EAX (1 << OPT_EAX) +#define OP_ST0 (1 << OPT_ST0) +#define OP_CL (1 << OPT_CL) +#define OP_DX (1 << OPT_DX) +#define OP_ADDR (1 << OPT_ADDR) +#define OP_INDIR (1 << OPT_INDIR) + +#define OP_EA 0x40000000 +#define OP_REG (OP_REG8 | OP_REG16 | OP_REG32) +#define OP_IM OP_IM32 + int8_t reg; /* register, -1 if none */ + int8_t reg2; /* second register, -1 if none */ + uint8_t shift; + ExprValue e; +} Operand; + +static const uint8_t reg_to_size[5] = { + [OP_REG8] = 0, + [OP_REG16] = 1, + [OP_REG32] = 2, +}; + +#define WORD_PREFIX_OPCODE 0x66 + +#define NB_TEST_OPCODES 30 + +static const uint8_t test_bits[NB_TEST_OPCODES] = { + 0x00, /* o */ + 0x01, /* no */ + 0x02, /* b */ + 0x02, /* c */ + 0x02, /* nae */ + 0x03, /* nb */ + 0x03, /* nc */ + 0x03, /* ae */ + 0x04, /* e */ + 0x04, /* z */ + 0x05, /* ne */ + 0x05, /* nz */ + 0x06, /* be */ + 0x06, /* na */ + 0x07, /* nbe */ + 0x07, /* a */ + 0x08, /* s */ + 0x09, /* ns */ + 0x0a, /* p */ + 0x0a, /* pe */ + 0x0b, /* np */ + 0x0b, /* po */ + 0x0c, /* l */ + 0x0c, /* nge */ + 0x0d, /* nl */ + 0x0d, /* ge */ + 0x0e, /* le */ + 0x0e, /* ng */ + 0x0f, /* nle */ + 0x0f, /* g */ +}; + +static const ASMInstr asm_instrs[] = { +#define ALT(x) x +#define DEF_ASM_OP0(name, opcode) +#define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 0 }, +#define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 1, { op0 }}, +#define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 2, { op0, op1 }}, +#define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 3, { op0, op1, op2 }}, +#include "i386-asm.h" + + /* last operation */ + { 0, }, +}; + +static const uint16_t op0_codes[] = { +#define ALT(x) +#define DEF_ASM_OP0(x, opcode) opcode, +#define DEF_ASM_OP0L(name, opcode, group, instr_type) +#define DEF_ASM_OP1(name, opcode, group, instr_type, op0) +#define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) +#define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) +#include "i386-asm.h" +}; + +static inline int get_reg_shift(TCCState *s1) +{ + int shift, v; + + v = asm_int_expr(s1); + switch(v) { + case 1: + shift = 0; + break; + case 2: + shift = 1; + break; + case 4: + shift = 2; + break; + case 8: + shift = 3; + break; + default: + expect("1, 2, 4 or 8 constant"); + shift = 0; + break; + } + return shift; +} + +static int asm_parse_reg(void) +{ + int reg; + if (tok != '%') + goto error_32; + next(); + if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) { + reg = tok - TOK_ASM_eax; + next(); + return reg; + } else { + error_32: + expect("32 bit register"); + return 0; + } +} + +static void parse_operand(TCCState *s1, Operand *op) +{ + ExprValue e; + int reg, indir; + const char *p; + + indir = 0; + if (tok == '*') { + next(); + indir = OP_INDIR; + } + + if (tok == '%') { + next(); + if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) { + reg = tok - TOK_ASM_al; + op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */ + op->reg = reg & 7; + if ((op->type & OP_REG) && op->reg == TREG_EAX) + op->type |= OP_EAX; + else if (op->type == OP_REG8 && op->reg == TREG_ECX) + op->type |= OP_CL; + else if (op->type == OP_REG16 && op->reg == TREG_EDX) + op->type |= OP_DX; + } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) { + op->type = OP_DB; + op->reg = tok - TOK_ASM_dr0; + } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) { + op->type = OP_SEG; + op->reg = tok - TOK_ASM_es; + } else if (tok == TOK_ASM_st) { + op->type = OP_ST; + op->reg = 0; + next(); + if (tok == '(') { + next(); + if (tok != TOK_PPNUM) + goto reg_error; + p = tokc.cstr->data; + reg = p[0] - '0'; + if ((unsigned)reg >= 8 || p[1] != '\0') + goto reg_error; + op->reg = reg; + next(); + skip(')'); + } + if (op->reg == 0) + op->type |= OP_ST0; + goto no_skip; + } else { + reg_error: + error("unknown register"); + } + next(); + no_skip: ; + } else if (tok == '$') { + /* constant value */ + next(); + asm_expr(s1, &e); + op->type = OP_IM32; + op->e.v = e.v; + op->e.sym = e.sym; + if (!op->e.sym) { + if (op->e.v == (uint8_t)op->e.v) + op->type |= OP_IM8; + if (op->e.v == (int8_t)op->e.v) + op->type |= OP_IM8S; + if (op->e.v == (uint16_t)op->e.v) + op->type |= OP_IM16; + } + } else { + /* address(reg,reg2,shift) with all variants */ + op->type = OP_EA; + op->reg = -1; + op->reg2 = -1; + op->shift = 0; + if (tok != '(') { + asm_expr(s1, &e); + op->e.v = e.v; + op->e.sym = e.sym; + } else { + op->e.v = 0; + op->e.sym = NULL; + } + if (tok == '(') { + next(); + if (tok != ',') { + op->reg = asm_parse_reg(); + } + if (tok == ',') { + next(); + if (tok != ',') { + op->reg2 = asm_parse_reg(); + } + skip(','); + op->shift = get_reg_shift(s1); + } + skip(')'); + } + if (op->reg == -1 && op->reg2 == -1) + op->type |= OP_ADDR; + } + op->type |= indir; +} + +/* XXX: unify with C code output ? */ +static void gen_expr32(ExprValue *pe) +{ + if (pe->sym) + greloc(cur_text_section, pe->sym, ind, R_386_32); + gen_le32(pe->v); +} + +/* XXX: unify with C code output ? */ +static void gen_disp32(ExprValue *pe) +{ + Sym *sym; + sym = pe->sym; + if (sym) { + if (sym->r == cur_text_section->sh_num) { + /* same section: we can output an absolute value. Note + that the TCC compiler behaves differently here because + it always outputs a relocation to ease (future) code + elimination in the linker */ + gen_le32(pe->v + (long)sym->next - ind - 4); + } else { + greloc(cur_text_section, sym, ind, R_386_PC32); + gen_le32(pe->v - 4); + } + } else { + /* put an empty PC32 relocation */ + put_elf_reloc(symtab_section, cur_text_section, + ind, R_386_PC32, 0); + gen_le32(pe->v - 4); + } +} + + +static void gen_le16(int v) +{ + g(v); + g(v >> 8); +} + +/* generate the modrm operand */ +static inline void asm_modrm(int reg, Operand *op) +{ + int mod, reg1, reg2; + + if (op->type & (OP_REG | OP_MMX | OP_SSE)) { + g(0xc0 + (reg << 3) + op->reg); + } else if (op->reg == -1 && op->reg2 == -1) { + /* displacement only */ + g(0x05 + (reg << 3)); + gen_expr32(&op->e); + } else { + /* fist compute displacement encoding */ + if (op->e.v == 0 && !op->e.sym && op->reg != 5) { + mod = 0x00; + } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) { + mod = 0x40; + } else { + mod = 0x80; + } + /* compute if sib byte needed */ + reg1 = op->reg; + if (op->reg2 != -1) + reg1 = 4; + g(mod + (reg << 3) + reg1); + if (reg1 == 4) { + /* add sib byte */ + reg2 = op->reg2; + if (reg2 == -1) + reg2 = 4; /* indicate no index */ + g((op->shift << 6) + (reg2 << 3) + op->reg); + } + + /* add offset */ + if (mod == 0x40) { + g(op->e.v); + } else if (mod == 0x80) { + gen_expr32(&op->e); + } + } +} + +static void asm_opcode(TCCState *s1, int opcode) +{ + const ASMInstr *pa; + int i, modrm_index, reg, v, op1, is_short_jmp; + int nb_ops, s, ss; + Operand ops[MAX_OPERANDS], *pop; + int op_type[3]; /* decoded op type */ + + /* get operands */ + pop = ops; + nb_ops = 0; + for(;;) { + if (tok == ';' || tok == TOK_LINEFEED) + break; + if (nb_ops >= MAX_OPERANDS) { + error("incorrect number of operands"); + } + parse_operand(s1, pop); + pop++; + nb_ops++; + if (tok != ',') + break; + next(); + } + + is_short_jmp = 0; + s = 0; /* avoid warning */ + + /* optimize matching by using a lookup table (no hashing is needed + !) */ + for(pa = asm_instrs; pa->sym != 0; pa++) { + s = 0; + if (pa->instr_type & OPC_FARITH) { + v = opcode - pa->sym; + if (!((unsigned)v < 8 * 6 && (v % 6) == 0)) + continue; + } else if (pa->instr_type & OPC_ARITH) { + if (!(opcode >= pa->sym && opcode < pa->sym + 8 * 4)) + continue; + goto compute_size; + } else if (pa->instr_type & OPC_SHIFT) { + if (!(opcode >= pa->sym && opcode < pa->sym + 7 * 4)) + continue; + goto compute_size; + } else if (pa->instr_type & OPC_TEST) { + if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES)) + continue; + } else if (pa->instr_type & OPC_B) { + if (!(opcode >= pa->sym && opcode <= pa->sym + 3)) + continue; + compute_size: + s = (opcode - pa->sym) & 3; + } else if (pa->instr_type & OPC_WL) { + if (!(opcode >= pa->sym && opcode <= pa->sym + 2)) + continue; + s = opcode - pa->sym + 1; + } else { + if (pa->sym != opcode) + continue; + } + if (pa->nb_ops != nb_ops) + continue; + /* now decode and check each operand */ + for(i = 0; i < nb_ops; i++) { + int op1, op2; + op1 = pa->op_type[i]; + op2 = op1 & 0x1f; + switch(op2) { + case OPT_IM: + v = OP_IM8 | OP_IM16 | OP_IM32; + break; + case OPT_REG: + v = OP_REG8 | OP_REG16 | OP_REG32; + break; + case OPT_REGW: + v = OP_REG16 | OP_REG32; + break; + case OPT_IMW: + v = OP_IM16 | OP_IM32; + break; + default: + v = 1 << op2; + break; + } + if (op1 & OPT_EA) + v |= OP_EA; + op_type[i] = v; + if ((ops[i].type & v) == 0) + goto next; + } + /* all is matching ! */ + break; + next: ; + } + if (pa->sym == 0) { + if (opcode >= TOK_ASM_pusha && opcode <= TOK_ASM_emms) { + int b; + b = op0_codes[opcode - TOK_ASM_pusha]; + if (b & 0xff00) + g(b >> 8); + g(b); + return; + } else { + error("unknown opcode '%s'", + get_tok_str(opcode, NULL)); + } + } + /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */ + if (s == 3) { + for(i = 0; s == 3 && i < nb_ops; i++) { + if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX))) + s = reg_to_size[ops[i].type & OP_REG]; + } + if (s == 3) { + error("cannot infer opcode suffix"); + } + } + + /* generate data16 prefix if needed */ + ss = s; + if (s == 1 || (pa->instr_type & OPC_D16)) + g(WORD_PREFIX_OPCODE); + else if (s == 2) + s = 1; + /* now generates the operation */ + if (pa->instr_type & OPC_FWAIT) + g(0x9b); + + v = pa->opcode; + if (v == 0x69 || v == 0x69) { + /* kludge for imul $im, %reg */ + nb_ops = 3; + ops[2] = ops[1]; + } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) { + v--; /* int $3 case */ + nb_ops = 0; + } else if ((v == 0x06 || v == 0x07)) { + if (ops[0].reg >= 4) { + /* push/pop %fs or %gs */ + v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3); + } else { + v += ops[0].reg << 3; + } + nb_ops = 0; + } else if (v <= 0x05) { + /* arith case */ + v += ((opcode - TOK_ASM_addb) >> 2) << 3; + } else if ((pa->instr_type & (OPC_FARITH | OPC_MODRM)) == OPC_FARITH) { + /* fpu arith case */ + v += ((opcode - pa->sym) / 6) << 3; + } + if (pa->instr_type & OPC_REG) { + for(i = 0; i < nb_ops; i++) { + if (op_type[i] & (OP_REG | OP_ST)) { + v += ops[i].reg; + break; + } + } + /* mov $im, %reg case */ + if (pa->opcode == 0xb0 && s >= 1) + v += 7; + } + if (pa->instr_type & OPC_B) + v += s; + if (pa->instr_type & OPC_TEST) + v += test_bits[opcode - pa->sym]; + if (pa->instr_type & OPC_SHORTJMP) { + Sym *sym; + int jmp_disp; + + /* see if we can really generate the jump with a byte offset */ + sym = ops[0].e.sym; + if (!sym) + goto no_short_jump; + if (sym->r != cur_text_section->sh_num) + goto no_short_jump; + jmp_disp = ops[0].e.v + (long)sym->next - ind - 2; + if (jmp_disp == (int8_t)jmp_disp) { + /* OK to generate jump */ + is_short_jmp = 1; + ops[0].e.v = jmp_disp; + } else { + no_short_jump: + if (pa->instr_type & OPC_JMP) { + /* long jump will be allowed. need to modify the + opcode slightly */ + if (v == 0xeb) + v = 0xe9; + else + v += 0x0f10; + } else { + error("invalid displacement"); + } + } + } + op1 = v >> 8; + if (op1) + g(op1); + g(v); + + /* search which operand will used for modrm */ + modrm_index = 0; + if (pa->instr_type & OPC_SHIFT) { + reg = (opcode - pa->sym) >> 2; + if (reg == 6) + reg = 7; + } else if (pa->instr_type & OPC_ARITH) { + reg = (opcode - pa->sym) >> 2; + } else if (pa->instr_type & OPC_FARITH) { + reg = (opcode - pa->sym) / 6; + } else { + reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7; + } + if (pa->instr_type & OPC_MODRM) { + /* first look for an ea operand */ + for(i = 0;i < nb_ops; i++) { + if (op_type[i] & OP_EA) + goto modrm_found; + } + /* then if not found, a register or indirection (shift instructions) */ + for(i = 0;i < nb_ops; i++) { + if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR)) + goto modrm_found; + } +#ifdef ASM_DEBUG + error("bad op table"); +#endif + modrm_found: + modrm_index = i; + /* if a register is used in another operand then it is + used instead of group */ + for(i = 0;i < nb_ops; i++) { + v = op_type[i]; + if (i != modrm_index && + (v & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) { + reg = ops[i].reg; + break; + } + } + + asm_modrm(reg, &ops[modrm_index]); + } + + /* emit constants */ + if (pa->opcode == 0x9a || pa->opcode == 0xea) { + /* ljmp or lcall kludge */ + gen_expr32(&ops[1].e); + if (ops[0].e.sym) + error("cannot relocate"); + gen_le16(ops[0].e.v); + } else { + for(i = 0;i < nb_ops; i++) { + v = op_type[i]; + if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM8S | OP_ADDR)) { + /* if multiple sizes are given it means we must look + at the op size */ + if (v == (OP_IM8 | OP_IM16 | OP_IM32) || + v == (OP_IM16 | OP_IM32)) { + if (ss == 0) + v = OP_IM8; + else if (ss == 1) + v = OP_IM16; + else + v = OP_IM32; + } + if (v & (OP_IM8 | OP_IM8S)) { + if (ops[i].e.sym) + goto error_relocate; + g(ops[i].e.v); + } else if (v & OP_IM16) { + if (ops[i].e.sym) { + error_relocate: + error("cannot relocate"); + } + gen_le16(ops[i].e.v); + } else { + if (pa->instr_type & (OPC_JMP | OPC_SHORTJMP)) { + if (is_short_jmp) + g(ops[i].e.v); + else + gen_disp32(&ops[i].e); + } else { + gen_expr32(&ops[i].e); + } + } + } + } + } +} + +#define NB_SAVED_REGS 3 +#define NB_ASM_REGS 8 + +/* return the constraint priority (we allocate first the lowest + numbered constraints) */ +static inline int constraint_priority(const char *str) +{ + int priority, c, pr; + + /* we take the lowest priority */ + priority = 0; + for(;;) { + c = *str; + if (c == '\0') + break; + str++; + switch(c) { + case 'a': + case 'b': + case 'c': + case 'd': + case 'S': + case 'D': + pr = 0; + break; + case 'q': + pr = 1; + break; + case 'r': + pr = 2; + break; + case 'N': + case 'M': + case 'I': + case 'i': + case 'm': + case 'g': + pr = 3; + break; + default: + error("unknown constraint '%c'", c); + pr = 0; + } + if (pr > priority) + priority = pr; + } + return priority; +} + +static void asm_compute_constraints(uint8_t *regs_allocated, + ASMOperand *operands, + int nb_operands1, int nb_outputs, + int is_output, + uint8_t *input_regs_allocated) +{ + ASMOperand *op; + int sorted_op[MAX_ASM_OPERANDS]; + int i, j, k, p1, p2, tmp, reg, c, base, nb_operands; + const char *str; + + if (is_output) { + base = 0; + nb_operands = nb_outputs; + } else { + base = nb_outputs; + nb_operands = nb_operands1 - nb_outputs; + } + + /* compute constraint priority and evaluate references to output + constraints if input constraints */ + for(i=0;iconstraint; + op->ref_index = -1; + op->reg = -1; + if (!is_output && (isnum(*str) || *str == '[')) { + /* this is a reference to another constraint */ + k = find_constraint(operands, nb_operands1, str, NULL); + if ((unsigned)k >= j) + error("invalid reference in constraint %d ('%s')", + j, str); + op->ref_index = k; + str = operands[k].constraint; + } + while (*str == '=' || *str == '&' || *str == '+') + str++; + op->priority = constraint_priority(str); + } + + /* sort operands according to their priority */ + for(i=0;iconstraint; + + if (op->ref_index >= 0) { + str = operands[op->ref_index].constraint; + } + + while (*str == '=' || *str == '&' || *str == '+') + str++; + try_next: + c = *str++; + switch(c) { + case 'a': + reg = TREG_EAX; + goto alloc_reg; + case 'b': + reg = 3; + goto alloc_reg; + case 'c': + reg = TREG_ECX; + goto alloc_reg; + case 'd': + reg = TREG_EDX; + goto alloc_reg; + case 'S': + reg = 6; + goto alloc_reg; + case 'D': + reg = 7; + alloc_reg: + if (regs_allocated[reg]) + goto try_next; + goto reg_found; + case 'q': + /* eax, ebx, ecx or edx */ + for(reg = 0; reg < 4; reg++) { + if (!regs_allocated[reg]) + goto reg_found; + } + goto try_next; + case 'r': + /* any general register */ + for(reg = 0; reg < 8; reg++) { + if (!regs_allocated[reg]) + goto reg_found; + } + goto try_next; + reg_found: + /* now we can reload in the register */ + op->reg = reg; + regs_allocated[reg] = 1; + break; + case 'i': + if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST)) + goto try_next; + break; + case 'I': + case 'N': + case 'M': + if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST)) + goto try_next; + break; + case 'm': + case 'g': + /* nothing special to do because the operand is + already in memory */ + /* XXX: fix constant case */ + if (is_output) { + /* if it is a reference to a memory zone, it must lie + in a register, so we reserve the register in the + input registers and a load will be generated + later */ + if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) { + /* any general register */ + for(reg = 0; reg < 8; reg++) { + if (!input_regs_allocated[reg]) + goto reg_found1; + } + goto try_next; + reg_found1: + /* now we can reload in the register */ + input_regs_allocated[reg] = 1; + op->reg = reg; + } + } + break; + default: + error("asm constraint %d ('%s') could not be satisfied", + j, op->constraint); + break; + } + } + + /* print sorted constraints */ +#ifdef ASM_DEBUG + if (is_output) + printf("outputs=\n"); + else + printf("inputs=\n"); + for(i=0;iid ? get_tok_str(op->id, NULL) : "", + op->constraint, + op->vt->r, + op->reg); + } +#endif +} + +static void subst_asm_operand(CString *add_str, + SValue *sv, int modifier) +{ + int r, reg, size, val; + char buf[64]; + + r = sv->r; + if ((r & VT_VALMASK) == VT_CONST) { + if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n') + cstr_ccat(add_str, '$'); + if (r & VT_SYM) { + cstr_cat(add_str, get_tok_str(sv->sym->v, NULL)); + if (sv->c.i != 0) { + cstr_ccat(add_str, '+'); + } else { + return; + } + } + val = sv->c.i; + if (modifier == 'n') + val = -val; + snprintf(buf, sizeof(buf), "%d", sv->c.i); + cstr_cat(add_str, buf); + } else if ((r & VT_VALMASK) == VT_LOCAL) { + snprintf(buf, sizeof(buf), "%d(%%ebp)", sv->c.i); + cstr_cat(add_str, buf); + } else if (r & VT_LVAL) { + reg = r & VT_VALMASK; + if (reg >= VT_CONST) + error("internal compiler error"); + snprintf(buf, sizeof(buf), "(%%%s)", + get_tok_str(TOK_ASM_eax + reg, NULL)); + cstr_cat(add_str, buf); + } else { + /* register case */ + reg = r & VT_VALMASK; + if (reg >= VT_CONST) + error("internal compiler error"); + + /* choose register operand size */ + if ((sv->type.t & VT_BTYPE) == VT_BYTE) + size = 1; + else if ((sv->type.t & VT_BTYPE) == VT_SHORT) + size = 2; + else + size = 4; + if (size == 1 && reg >= 4) + size = 4; + + if (modifier == 'b') { + if (reg >= 4) + error("cannot use byte register"); + size = 1; + } else if (modifier == 'h') { + if (reg >= 4) + error("cannot use byte register"); + size = -1; + } else if (modifier == 'w') { + size = 2; + } + + switch(size) { + case -1: + reg = TOK_ASM_ah + reg; + break; + case 1: + reg = TOK_ASM_al + reg; + break; + case 2: + reg = TOK_ASM_ax + reg; + break; + default: + reg = TOK_ASM_eax + reg; + break; + } + snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL)); + cstr_cat(add_str, buf); + } +} + +/* generate prolog and epilog code for asm statment */ +static void asm_gen_code(ASMOperand *operands, int nb_operands, + int nb_outputs, int is_output, + uint8_t *clobber_regs) +{ + uint8_t regs_allocated[NB_ASM_REGS]; + ASMOperand *op; + int i, reg; + static uint8_t reg_saved[NB_SAVED_REGS] = { 3, 6, 7 }; + + /* mark all used registers */ + memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated)); + for(i = 0; i < nb_operands;i++) { + op = &operands[i]; + if (op->reg >= 0) + regs_allocated[op->reg] = 1; + } + if (!is_output) { + /* generate reg save code */ + for(i = 0; i < NB_SAVED_REGS; i++) { + reg = reg_saved[i]; + if (regs_allocated[reg]) + g(0x50 + reg); + } + + /* generate load code */ + for(i = nb_outputs ; i < nb_operands; i++) { + op = &operands[i]; + if (op->reg >= 0) { + load(op->reg, op->vt); + } + } + /* generate load code for output memory references */ + for(i = 0 ; i < nb_outputs; i++) { + op = &operands[i]; + if (op->reg >= 0 && ((op->vt->r & VT_VALMASK) == VT_LLOCAL)) { + SValue sv; + sv = *op->vt; + sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL; + load(op->reg, &sv); + } + } + } else { + /* generate save code */ + for(i = 0 ; i < nb_outputs; i++) { + op = &operands[i]; + if (op->reg >= 0 && ((op->vt->r & VT_VALMASK) != VT_LLOCAL)) { + store(op->reg, op->vt); + } + } + /* generate reg restore code */ + for(i = NB_SAVED_REGS - 1; i >= 0; i--) { + reg = reg_saved[i]; + if (regs_allocated[reg]) + g(0x58 + reg); + } + } +} + +static void asm_clobber(uint8_t *clobber_regs, const char *str) +{ + int reg; + TokenSym *ts; + + if (!strcmp(str, "memory") || + !strcmp(str, "cc")) + return; + ts = tok_alloc(str, strlen(str)); + reg = ts->tok; + if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) { + reg -= TOK_ASM_eax; + } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) { + reg -= TOK_ASM_ax; + } else { + error("invalid clobber register '%s'", str); + } + clobber_regs[reg] = 1; +} diff --git a/i386-asm.h b/i386-asm.h new file mode 100644 index 0000000..612105c --- /dev/null +++ b/i386-asm.h @@ -0,0 +1,444 @@ + DEF_ASM_OP0(pusha, 0x60) /* must be first OP0 */ + DEF_ASM_OP0(popa, 0x61) + DEF_ASM_OP0(clc, 0xf8) + DEF_ASM_OP0(cld, 0xfc) + DEF_ASM_OP0(cli, 0xfa) + DEF_ASM_OP0(clts, 0x0f06) + DEF_ASM_OP0(cmc, 0xf5) + DEF_ASM_OP0(lahf, 0x9f) + DEF_ASM_OP0(sahf, 0x9e) + DEF_ASM_OP0(pushfl, 0x9c) + DEF_ASM_OP0(popfl, 0x9d) + DEF_ASM_OP0(pushf, 0x9c) + DEF_ASM_OP0(popf, 0x9d) + DEF_ASM_OP0(stc, 0xf9) + DEF_ASM_OP0(std, 0xfd) + DEF_ASM_OP0(sti, 0xfb) + DEF_ASM_OP0(aaa, 0x37) + DEF_ASM_OP0(aas, 0x3f) + DEF_ASM_OP0(daa, 0x27) + DEF_ASM_OP0(das, 0x2f) + DEF_ASM_OP0(aad, 0xd50a) + DEF_ASM_OP0(aam, 0xd40a) + DEF_ASM_OP0(cbw, 0x6698) + DEF_ASM_OP0(cwd, 0x6699) + DEF_ASM_OP0(cwde, 0x98) + DEF_ASM_OP0(cdq, 0x99) + DEF_ASM_OP0(cbtw, 0x6698) + DEF_ASM_OP0(cwtl, 0x98) + DEF_ASM_OP0(cwtd, 0x6699) + DEF_ASM_OP0(cltd, 0x99) + DEF_ASM_OP0(int3, 0xcc) + DEF_ASM_OP0(into, 0xce) + DEF_ASM_OP0(iret, 0xcf) + DEF_ASM_OP0(rsm, 0x0faa) + DEF_ASM_OP0(hlt, 0xf4) + DEF_ASM_OP0(wait, 0x9b) + DEF_ASM_OP0(nop, 0x90) + DEF_ASM_OP0(xlat, 0xd7) + + /* strings */ +ALT(DEF_ASM_OP0L(cmpsb, 0xa6, 0, OPC_BWL)) +ALT(DEF_ASM_OP0L(scmpb, 0xa6, 0, OPC_BWL)) + +ALT(DEF_ASM_OP0L(insb, 0x6c, 0, OPC_BWL)) +ALT(DEF_ASM_OP0L(outsb, 0x6e, 0, OPC_BWL)) + +ALT(DEF_ASM_OP0L(lodsb, 0xac, 0, OPC_BWL)) +ALT(DEF_ASM_OP0L(slodb, 0xac, 0, OPC_BWL)) + +ALT(DEF_ASM_OP0L(movsb, 0xa4, 0, OPC_BWL)) +ALT(DEF_ASM_OP0L(smovb, 0xa4, 0, OPC_BWL)) + +ALT(DEF_ASM_OP0L(scasb, 0xae, 0, OPC_BWL)) +ALT(DEF_ASM_OP0L(sscab, 0xae, 0, OPC_BWL)) + +ALT(DEF_ASM_OP0L(stosb, 0xaa, 0, OPC_BWL)) +ALT(DEF_ASM_OP0L(sstob, 0xaa, 0, OPC_BWL)) + + /* bits */ + +ALT(DEF_ASM_OP2(bsfw, 0x0fbc, 0, OPC_MODRM | OPC_WL, OPT_REGW | OPT_EA, OPT_REGW)) +ALT(DEF_ASM_OP2(bsrw, 0x0fbd, 0, OPC_MODRM | OPC_WL, OPT_REGW | OPT_EA, OPT_REGW)) + +ALT(DEF_ASM_OP2(btw, 0x0fa3, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_REGW | OPT_EA)) +ALT(DEF_ASM_OP2(btw, 0x0fba, 4, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW | OPT_EA)) + +ALT(DEF_ASM_OP2(btsw, 0x0fab, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_REGW | OPT_EA)) +ALT(DEF_ASM_OP2(btsw, 0x0fba, 5, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW | OPT_EA)) + +ALT(DEF_ASM_OP2(btrw, 0x0fb3, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_REGW | OPT_EA)) +ALT(DEF_ASM_OP2(btrw, 0x0fba, 6, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW | OPT_EA)) + +ALT(DEF_ASM_OP2(btcw, 0x0fbb, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_REGW | OPT_EA)) +ALT(DEF_ASM_OP2(btcw, 0x0fba, 7, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW | OPT_EA)) + + /* prefixes */ + DEF_ASM_OP0(aword, 0x67) + DEF_ASM_OP0(addr16, 0x67) + DEF_ASM_OP0(word, 0x66) + DEF_ASM_OP0(data16, 0x66) + DEF_ASM_OP0(lock, 0xf0) + DEF_ASM_OP0(rep, 0xf3) + DEF_ASM_OP0(repe, 0xf3) + DEF_ASM_OP0(repz, 0xf3) + DEF_ASM_OP0(repne, 0xf2) + DEF_ASM_OP0(repnz, 0xf2) + + DEF_ASM_OP0(invd, 0x0f08) + DEF_ASM_OP0(wbinvd, 0x0f09) + DEF_ASM_OP0(cpuid, 0x0fa2) + DEF_ASM_OP0(wrmsr, 0x0f30) + DEF_ASM_OP0(rdtsc, 0x0f31) + DEF_ASM_OP0(rdmsr, 0x0f32) + DEF_ASM_OP0(rdpmc, 0x0f33) + DEF_ASM_OP0(ud2, 0x0f0b) + + /* NOTE: we took the same order as gas opcode definition order */ +ALT(DEF_ASM_OP2(movb, 0xa0, 0, OPC_BWL, OPT_ADDR, OPT_EAX)) +ALT(DEF_ASM_OP2(movb, 0xa2, 0, OPC_BWL, OPT_EAX, OPT_ADDR)) +ALT(DEF_ASM_OP2(movb, 0x88, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG)) +ALT(DEF_ASM_OP2(movb, 0x8a, 0, OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG)) +ALT(DEF_ASM_OP2(movb, 0xb0, 0, OPC_REG | OPC_BWL, OPT_IM, OPT_REG)) +ALT(DEF_ASM_OP2(movb, 0xc6, 0, OPC_MODRM | OPC_BWL, OPT_IM, OPT_REG | OPT_EA)) + +ALT(DEF_ASM_OP2(movw, 0x8c, 0, OPC_MODRM | OPC_WL, OPT_SEG, OPT_EA | OPT_REG)) +ALT(DEF_ASM_OP2(movw, 0x8e, 0, OPC_MODRM | OPC_WL, OPT_EA | OPT_REG, OPT_SEG)) + +ALT(DEF_ASM_OP2(movw, 0x0f20, 0, OPC_MODRM | OPC_WL, OPT_CR, OPT_REG32)) +ALT(DEF_ASM_OP2(movw, 0x0f21, 0, OPC_MODRM | OPC_WL, OPT_DB, OPT_REG32)) +ALT(DEF_ASM_OP2(movw, 0x0f24, 0, OPC_MODRM | OPC_WL, OPT_TR, OPT_REG32)) +ALT(DEF_ASM_OP2(movw, 0x0f22, 0, OPC_MODRM | OPC_WL, OPT_REG32, OPT_CR)) +ALT(DEF_ASM_OP2(movw, 0x0f23, 0, OPC_MODRM | OPC_WL, OPT_REG32, OPT_DB)) +ALT(DEF_ASM_OP2(movw, 0x0f26, 0, OPC_MODRM | OPC_WL, OPT_REG32, OPT_TR)) + +ALT(DEF_ASM_OP2(movsbl, 0x0fbe, 0, OPC_MODRM, OPT_REG8 | OPT_EA, OPT_REG32)) +ALT(DEF_ASM_OP2(movsbw, 0x0fbe, 0, OPC_MODRM | OPC_D16, OPT_REG8 | OPT_EA, OPT_REG16)) +ALT(DEF_ASM_OP2(movswl, 0x0fbf, 0, OPC_MODRM, OPT_REG16 | OPT_EA, OPT_REG32)) +ALT(DEF_ASM_OP2(movzbw, 0x0fb6, 0, OPC_MODRM | OPC_WL, OPT_REG8 | OPT_EA, OPT_REGW)) +ALT(DEF_ASM_OP2(movzwl, 0x0fb7, 0, OPC_MODRM, OPT_REG16 | OPT_EA, OPT_REG32)) + +ALT(DEF_ASM_OP1(pushw, 0x50, 0, OPC_REG | OPC_WL, OPT_REGW)) +ALT(DEF_ASM_OP1(pushw, 0xff, 6, OPC_MODRM | OPC_WL, OPT_REGW | OPT_EA)) +ALT(DEF_ASM_OP1(push, 0x6a, 0, 0, OPT_IM8S)) +ALT(DEF_ASM_OP1(push, 0x68, 0, 0, OPT_IM32)) +ALT(DEF_ASM_OP1(push, 0x06, 0, 0, OPT_SEG)) + +ALT(DEF_ASM_OP1(popw, 0x58, 0, OPC_REG | OPC_WL, OPT_REGW)) +ALT(DEF_ASM_OP1(popw, 0x8f, 0, OPC_MODRM | OPC_WL, OPT_REGW | OPT_EA)) +ALT(DEF_ASM_OP1(pop, 0x07, 0, 0, OPT_SEG)) + +ALT(DEF_ASM_OP2(xchgw, 0x90, 0, OPC_REG | OPC_WL, OPT_REG, OPT_EAX)) +ALT(DEF_ASM_OP2(xchgw, 0x90, 0, OPC_REG | OPC_WL, OPT_EAX, OPT_REG)) +ALT(DEF_ASM_OP2(xchgb, 0x86, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG)) +ALT(DEF_ASM_OP2(xchgb, 0x86, 0, OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG)) + +ALT(DEF_ASM_OP2(inb, 0xe4, 0, OPC_BWL, OPT_IM8, OPT_EAX)) +ALT(DEF_ASM_OP1(inb, 0xe4, 0, OPC_BWL, OPT_IM8)) +ALT(DEF_ASM_OP2(inb, 0xec, 0, OPC_BWL, OPT_DX, OPT_EAX)) +ALT(DEF_ASM_OP1(inb, 0xec, 0, OPC_BWL, OPT_DX)) + +ALT(DEF_ASM_OP2(outb, 0xe6, 0, OPC_BWL, OPT_EAX, OPT_IM8)) +ALT(DEF_ASM_OP1(outb, 0xe6, 0, OPC_BWL, OPT_IM8)) +ALT(DEF_ASM_OP2(outb, 0xee, 0, OPC_BWL, OPT_EAX, OPT_DX)) +ALT(DEF_ASM_OP1(outb, 0xee, 0, OPC_BWL, OPT_DX)) + +ALT(DEF_ASM_OP2(leaw, 0x8d, 0, OPC_MODRM | OPC_WL, OPT_EA, OPT_REG)) + +ALT(DEF_ASM_OP2(les, 0xc4, 0, OPC_MODRM, OPT_EA, OPT_REG32)) +ALT(DEF_ASM_OP2(lds, 0xc5, 0, OPC_MODRM, OPT_EA, OPT_REG32)) +ALT(DEF_ASM_OP2(lss, 0x0fb2, 0, OPC_MODRM, OPT_EA, OPT_REG32)) +ALT(DEF_ASM_OP2(lfs, 0x0fb4, 0, OPC_MODRM, OPT_EA, OPT_REG32)) +ALT(DEF_ASM_OP2(lgs, 0x0fb5, 0, OPC_MODRM, OPT_EA, OPT_REG32)) + + /* arith */ +ALT(DEF_ASM_OP2(addb, 0x00, 0, OPC_ARITH | OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG)) /* XXX: use D bit ? */ +ALT(DEF_ASM_OP2(addb, 0x02, 0, OPC_ARITH | OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG)) +ALT(DEF_ASM_OP2(addb, 0x04, 0, OPC_ARITH | OPC_BWL, OPT_IM, OPT_EAX)) +ALT(DEF_ASM_OP2(addb, 0x80, 0, OPC_ARITH | OPC_MODRM | OPC_BWL, OPT_IM, OPT_EA | OPT_REG)) +ALT(DEF_ASM_OP2(addw, 0x83, 0, OPC_ARITH | OPC_MODRM | OPC_WL, OPT_IM8S, OPT_EA | OPT_REG)) + +ALT(DEF_ASM_OP2(testb, 0x84, 0, OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG)) +ALT(DEF_ASM_OP2(testb, 0x84, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG)) +ALT(DEF_ASM_OP2(testb, 0xa8, 0, OPC_BWL, OPT_IM, OPT_EAX)) +ALT(DEF_ASM_OP2(testb, 0xf6, 0, OPC_MODRM | OPC_BWL, OPT_IM, OPT_EA | OPT_REG)) + +ALT(DEF_ASM_OP1(incw, 0x40, 0, OPC_REG | OPC_WL, OPT_REGW)) +ALT(DEF_ASM_OP1(incb, 0xfe, 0, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA)) +ALT(DEF_ASM_OP1(decw, 0x48, 0, OPC_REG | OPC_WL, OPT_REGW)) +ALT(DEF_ASM_OP1(decb, 0xfe, 1, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA)) + +ALT(DEF_ASM_OP1(notb, 0xf6, 2, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA)) +ALT(DEF_ASM_OP1(negb, 0xf6, 3, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA)) + +ALT(DEF_ASM_OP1(mulb, 0xf6, 4, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA)) +ALT(DEF_ASM_OP1(imulb, 0xf6, 5, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA)) + +ALT(DEF_ASM_OP2(imulw, 0x0faf, 0, OPC_MODRM | OPC_WL, OPT_REG | OPT_EA, OPT_REG)) +ALT(DEF_ASM_OP3(imulw, 0x6b, 0, OPC_MODRM | OPC_WL, OPT_IM8S, OPT_REGW | OPT_EA, OPT_REGW)) +ALT(DEF_ASM_OP2(imulw, 0x6b, 0, OPC_MODRM | OPC_WL, OPT_IM8S, OPT_REGW)) +ALT(DEF_ASM_OP3(imulw, 0x69, 0, OPC_MODRM | OPC_WL, OPT_IMW, OPT_REGW | OPT_EA, OPT_REGW)) +ALT(DEF_ASM_OP2(imulw, 0x69, 0, OPC_MODRM | OPC_WL, OPT_IMW, OPT_REGW)) + +ALT(DEF_ASM_OP1(divb, 0xf6, 6, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA)) +ALT(DEF_ASM_OP2(divb, 0xf6, 6, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA, OPT_EAX)) +ALT(DEF_ASM_OP1(idivb, 0xf6, 7, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA)) +ALT(DEF_ASM_OP2(idivb, 0xf6, 7, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA, OPT_EAX)) + + /* shifts */ +ALT(DEF_ASM_OP2(rolb, 0xc0, 0, OPC_MODRM | OPC_BWL | OPC_SHIFT, OPT_IM8, OPT_EA | OPT_REG)) +ALT(DEF_ASM_OP2(rolb, 0xd2, 0, OPC_MODRM | OPC_BWL | OPC_SHIFT, OPT_CL, OPT_EA | OPT_REG)) +ALT(DEF_ASM_OP1(rolb, 0xd0, 0, OPC_MODRM | OPC_BWL | OPC_SHIFT, OPT_EA | OPT_REG)) + +ALT(DEF_ASM_OP3(shldw, 0x0fa4, 0, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW, OPT_EA | OPT_REGW)) +ALT(DEF_ASM_OP3(shldw, 0x0fa5, 0, OPC_MODRM | OPC_WL, OPT_CL, OPT_REGW, OPT_EA | OPT_REGW)) +ALT(DEF_ASM_OP2(shldw, 0x0fa5, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_EA | OPT_REGW)) +ALT(DEF_ASM_OP3(shrdw, 0x0fac, 0, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW, OPT_EA | OPT_REGW)) +ALT(DEF_ASM_OP3(shrdw, 0x0fad, 0, OPC_MODRM | OPC_WL, OPT_CL, OPT_REGW, OPT_EA | OPT_REGW)) +ALT(DEF_ASM_OP2(shrdw, 0x0fad, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_EA | OPT_REGW)) + +ALT(DEF_ASM_OP1(call, 0xff, 2, OPC_MODRM, OPT_INDIR)) +ALT(DEF_ASM_OP1(call, 0xe8, 0, OPC_JMP, OPT_ADDR)) +ALT(DEF_ASM_OP1(jmp, 0xff, 4, OPC_MODRM, OPT_INDIR)) +ALT(DEF_ASM_OP1(jmp, 0xeb, 0, OPC_SHORTJMP | OPC_JMP, OPT_ADDR)) + +ALT(DEF_ASM_OP2(lcall, 0x9a, 0, 0, OPT_IM16, OPT_IM32)) +ALT(DEF_ASM_OP1(lcall, 0xff, 3, 0, OPT_EA)) +ALT(DEF_ASM_OP2(ljmp, 0xea, 0, 0, OPT_IM16, OPT_IM32)) +ALT(DEF_ASM_OP1(ljmp, 0xff, 5, 0, OPT_EA)) + +ALT(DEF_ASM_OP1(int, 0xcd, 0, 0, OPT_IM8)) +ALT(DEF_ASM_OP1(seto, 0x0f90, 0, OPC_MODRM | OPC_TEST, OPT_REG8 | OPT_EA)) + DEF_ASM_OP2(enter, 0xc8, 0, 0, OPT_IM16, OPT_IM8) + DEF_ASM_OP0(leave, 0xc9) + DEF_ASM_OP0(ret, 0xc3) +ALT(DEF_ASM_OP1(ret, 0xc2, 0, 0, OPT_IM16)) + DEF_ASM_OP0(lret, 0xcb) +ALT(DEF_ASM_OP1(lret, 0xca, 0, 0, OPT_IM16)) + +ALT(DEF_ASM_OP1(jo, 0x70, 0, OPC_SHORTJMP | OPC_JMP | OPC_TEST, OPT_ADDR)) + DEF_ASM_OP1(loopne, 0xe0, 0, OPC_SHORTJMP, OPT_ADDR) + DEF_ASM_OP1(loopnz, 0xe0, 0, OPC_SHORTJMP, OPT_ADDR) + DEF_ASM_OP1(loope, 0xe1, 0, OPC_SHORTJMP, OPT_ADDR) + DEF_ASM_OP1(loopz, 0xe1, 0, OPC_SHORTJMP, OPT_ADDR) + DEF_ASM_OP1(loop, 0xe2, 0, OPC_SHORTJMP, OPT_ADDR) + DEF_ASM_OP1(jecxz, 0xe3, 0, OPC_SHORTJMP, OPT_ADDR) + + /* float */ + /* specific fcomp handling */ +ALT(DEF_ASM_OP0L(fcomp, 0xd8d9, 0, 0)) + +ALT(DEF_ASM_OP1(fadd, 0xd8c0, 0, OPC_FARITH | OPC_REG, OPT_ST)) +ALT(DEF_ASM_OP2(fadd, 0xd8c0, 0, OPC_FARITH | OPC_REG, OPT_ST, OPT_ST0)) +ALT(DEF_ASM_OP0L(fadd, 0xdec1, 0, OPC_FARITH)) +ALT(DEF_ASM_OP1(faddp, 0xdec0, 0, OPC_FARITH | OPC_REG, OPT_ST)) +ALT(DEF_ASM_OP2(faddp, 0xdec0, 0, OPC_FARITH | OPC_REG, OPT_ST, OPT_ST0)) +ALT(DEF_ASM_OP2(faddp, 0xdec0, 0, OPC_FARITH | OPC_REG, OPT_ST0, OPT_ST)) +ALT(DEF_ASM_OP0L(faddp, 0xdec1, 0, OPC_FARITH)) +ALT(DEF_ASM_OP1(fadds, 0xd8, 0, OPC_FARITH | OPC_MODRM, OPT_EA)) +ALT(DEF_ASM_OP1(fiaddl, 0xda, 0, OPC_FARITH | OPC_MODRM, OPT_EA)) +ALT(DEF_ASM_OP1(faddl, 0xdc, 0, OPC_FARITH | OPC_MODRM, OPT_EA)) +ALT(DEF_ASM_OP1(fiadds, 0xde, 0, OPC_FARITH | OPC_MODRM, OPT_EA)) + + DEF_ASM_OP0(fucompp, 0xdae9) + DEF_ASM_OP0(ftst, 0xd9e4) + DEF_ASM_OP0(fxam, 0xd9e5) + DEF_ASM_OP0(fld1, 0xd9e8) + DEF_ASM_OP0(fldl2t, 0xd9e9) + DEF_ASM_OP0(fldl2e, 0xd9ea) + DEF_ASM_OP0(fldpi, 0xd9eb) + DEF_ASM_OP0(fldlg2, 0xd9ec) + DEF_ASM_OP0(fldln2, 0xd9ed) + DEF_ASM_OP0(fldz, 0xd9ee) + + DEF_ASM_OP0(f2xm1, 0xd9f0) + DEF_ASM_OP0(fyl2x, 0xd9f1) + DEF_ASM_OP0(fptan, 0xd9f2) + DEF_ASM_OP0(fpatan, 0xd9f3) + DEF_ASM_OP0(fxtract, 0xd9f4) + DEF_ASM_OP0(fprem1, 0xd9f5) + DEF_ASM_OP0(fdecstp, 0xd9f6) + DEF_ASM_OP0(fincstp, 0xd9f7) + DEF_ASM_OP0(fprem, 0xd9f8) + DEF_ASM_OP0(fyl2xp1, 0xd9f9) + DEF_ASM_OP0(fsqrt, 0xd9fa) + DEF_ASM_OP0(fsincos, 0xd9fb) + DEF_ASM_OP0(frndint, 0xd9fc) + DEF_ASM_OP0(fscale, 0xd9fd) + DEF_ASM_OP0(fsin, 0xd9fe) + DEF_ASM_OP0(fcos, 0xd9ff) + DEF_ASM_OP0(fchs, 0xd9e0) + DEF_ASM_OP0(fabs, 0xd9e1) + DEF_ASM_OP0(fninit, 0xdbe3) + DEF_ASM_OP0(fnclex, 0xdbe2) + DEF_ASM_OP0(fnop, 0xd9d0) + DEF_ASM_OP0(fwait, 0x9b) + + /* fp load */ + DEF_ASM_OP1(fld, 0xd9c0, 0, OPC_REG, OPT_ST) + DEF_ASM_OP1(fldl, 0xd9c0, 0, OPC_REG, OPT_ST) + DEF_ASM_OP1(flds, 0xd9, 0, OPC_MODRM, OPT_EA) +ALT(DEF_ASM_OP1(fldl, 0xdd, 0, OPC_MODRM, OPT_EA)) + DEF_ASM_OP1(fildl, 0xdb, 0, OPC_MODRM, OPT_EA) + DEF_ASM_OP1(fildq, 0xdf, 5, OPC_MODRM, OPT_EA) + DEF_ASM_OP1(fildll, 0xdf, 5, OPC_MODRM,OPT_EA) + DEF_ASM_OP1(fldt, 0xdb, 5, OPC_MODRM, OPT_EA) + DEF_ASM_OP1(fbld, 0xdf, 4, OPC_MODRM, OPT_EA) + + /* fp store */ + DEF_ASM_OP1(fst, 0xddd0, 0, OPC_REG, OPT_ST) + DEF_ASM_OP1(fstl, 0xddd0, 0, OPC_REG, OPT_ST) + DEF_ASM_OP1(fsts, 0xd9, 2, OPC_MODRM, OPT_EA) + DEF_ASM_OP1(fstps, 0xd9, 3, OPC_MODRM, OPT_EA) +ALT(DEF_ASM_OP1(fstl, 0xdd, 2, OPC_MODRM, OPT_EA)) + DEF_ASM_OP1(fstpl, 0xdd, 3, OPC_MODRM, OPT_EA) + DEF_ASM_OP1(fist, 0xdf, 2, OPC_MODRM, OPT_EA) + DEF_ASM_OP1(fistp, 0xdf, 3, OPC_MODRM, OPT_EA) + DEF_ASM_OP1(fistl, 0xdb, 2, OPC_MODRM, OPT_EA) + DEF_ASM_OP1(fistpl, 0xdb, 3, OPC_MODRM, OPT_EA) + + DEF_ASM_OP1(fstp, 0xddd8, 0, OPC_REG, OPT_ST) + DEF_ASM_OP1(fistpq, 0xdf, 7, OPC_MODRM, OPT_EA) + DEF_ASM_OP1(fistpll, 0xdf, 7, OPC_MODRM, OPT_EA) + DEF_ASM_OP1(fstpt, 0xdb, 7, OPC_MODRM, OPT_EA) + DEF_ASM_OP1(fbstp, 0xdf, 6, OPC_MODRM, OPT_EA) + + /* exchange */ + DEF_ASM_OP0(fxch, 0xd9c9) +ALT(DEF_ASM_OP1(fxch, 0xd9c8, 0, OPC_REG, OPT_ST)) + + /* misc FPU */ + DEF_ASM_OP1(fucom, 0xdde0, 0, OPC_REG, OPT_ST ) + DEF_ASM_OP1(fucomp, 0xdde8, 0, OPC_REG, OPT_ST ) + + DEF_ASM_OP0L(finit, 0xdbe3, 0, OPC_FWAIT) + DEF_ASM_OP1(fldcw, 0xd9, 5, OPC_MODRM, OPT_EA ) + DEF_ASM_OP1(fnstcw, 0xd9, 7, OPC_MODRM, OPT_EA ) + DEF_ASM_OP1(fstcw, 0xd9, 7, OPC_MODRM | OPC_FWAIT, OPT_EA ) + DEF_ASM_OP0(fnstsw, 0xdfe0) +ALT(DEF_ASM_OP1(fnstsw, 0xdfe0, 0, 0, OPT_EAX )) +ALT(DEF_ASM_OP1(fnstsw, 0xdd, 7, OPC_MODRM, OPT_EA )) + DEF_ASM_OP1(fstsw, 0xdfe0, 0, OPC_FWAIT, OPT_EAX ) +ALT(DEF_ASM_OP0L(fstsw, 0xdfe0, 0, OPC_FWAIT)) +ALT(DEF_ASM_OP1(fstsw, 0xdd, 7, OPC_MODRM | OPC_FWAIT, OPT_EA )) + DEF_ASM_OP0L(fclex, 0xdbe2, 0, OPC_FWAIT) + DEF_ASM_OP1(fnstenv, 0xd9, 6, OPC_MODRM, OPT_EA ) + DEF_ASM_OP1(fstenv, 0xd9, 6, OPC_MODRM | OPC_FWAIT, OPT_EA ) + DEF_ASM_OP1(fldenv, 0xd9, 4, OPC_MODRM, OPT_EA ) + DEF_ASM_OP1(fnsave, 0xdd, 6, OPC_MODRM, OPT_EA ) + DEF_ASM_OP1(fsave, 0xdd, 6, OPC_MODRM | OPC_FWAIT, OPT_EA ) + DEF_ASM_OP1(frstor, 0xdd, 4, OPC_MODRM, OPT_EA ) + DEF_ASM_OP1(ffree, 0xddc0, 4, OPC_REG, OPT_ST ) + DEF_ASM_OP1(ffreep, 0xdfc0, 4, OPC_REG, OPT_ST ) + + /* segments */ + DEF_ASM_OP2(arpl, 0x63, 0, OPC_MODRM, OPT_REG16, OPT_REG16 | OPT_EA) + DEF_ASM_OP2(lar, 0x0f02, 0, OPC_MODRM, OPT_REG32 | OPT_EA, OPT_REG32) + DEF_ASM_OP1(lgdt, 0x0f01, 2, OPC_MODRM, OPT_EA) + DEF_ASM_OP1(lidt, 0x0f01, 3, OPC_MODRM, OPT_EA) + DEF_ASM_OP1(lldt, 0x0f00, 2, OPC_MODRM, OPT_EA | OPT_REG) + DEF_ASM_OP1(lmsw, 0x0f01, 6, OPC_MODRM, OPT_EA | OPT_REG) +ALT(DEF_ASM_OP2(lslw, 0x0f03, 0, OPC_MODRM | OPC_WL, OPT_EA | OPT_REG, OPT_REG)) + DEF_ASM_OP1(ltr, 0x0f00, 3, OPC_MODRM, OPT_EA | OPT_REG) + DEF_ASM_OP1(sgdt, 0x0f01, 0, OPC_MODRM, OPT_EA) + DEF_ASM_OP1(sidt, 0x0f01, 1, OPC_MODRM, OPT_EA) + DEF_ASM_OP1(sldt, 0x0f00, 0, OPC_MODRM, OPT_REG | OPT_EA) + DEF_ASM_OP1(smsw, 0x0f01, 4, OPC_MODRM, OPT_REG | OPT_EA) + DEF_ASM_OP1(str, 0x0f00, 1, OPC_MODRM, OPT_REG16| OPT_EA) + DEF_ASM_OP1(verr, 0x0f00, 4, OPC_MODRM, OPT_REG | OPT_EA) + DEF_ASM_OP1(verw, 0x0f00, 5, OPC_MODRM, OPT_REG | OPT_EA) + + /* 486 */ + DEF_ASM_OP1(bswap, 0x0fc8, 0, OPC_REG, OPT_REG32 ) +ALT(DEF_ASM_OP2(xaddb, 0x0fc0, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_REG | OPT_EA )) +ALT(DEF_ASM_OP2(cmpxchgb, 0x0fb0, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_REG | OPT_EA )) + DEF_ASM_OP1(invlpg, 0x0f01, 7, OPC_MODRM, OPT_EA ) + + DEF_ASM_OP2(boundl, 0x62, 0, OPC_MODRM, OPT_REG32, OPT_EA) + DEF_ASM_OP2(boundw, 0x62, 0, OPC_MODRM | OPC_D16, OPT_REG16, OPT_EA) + + /* pentium */ + DEF_ASM_OP1(cmpxchg8b, 0x0fc7, 1, OPC_MODRM, OPT_EA ) + + /* pentium pro */ + ALT(DEF_ASM_OP2(cmovo, 0x0f40, 0, OPC_MODRM | OPC_TEST, OPT_REG32 | OPT_EA, OPT_REG32)) + + DEF_ASM_OP2(fcmovb, 0xdac0, 0, OPC_REG, OPT_ST, OPT_ST0 ) + DEF_ASM_OP2(fcmove, 0xdac8, 0, OPC_REG, OPT_ST, OPT_ST0 ) + DEF_ASM_OP2(fcmovbe, 0xdad0, 0, OPC_REG, OPT_ST, OPT_ST0 ) + DEF_ASM_OP2(fcmovu, 0xdad8, 0, OPC_REG, OPT_ST, OPT_ST0 ) + DEF_ASM_OP2(fcmovnb, 0xdbc0, 0, OPC_REG, OPT_ST, OPT_ST0 ) + DEF_ASM_OP2(fcmovne, 0xdbc8, 0, OPC_REG, OPT_ST, OPT_ST0 ) + DEF_ASM_OP2(fcmovnbe, 0xdbd0, 0, OPC_REG, OPT_ST, OPT_ST0 ) + DEF_ASM_OP2(fcmovnu, 0xdbd8, 0, OPC_REG, OPT_ST, OPT_ST0 ) + + DEF_ASM_OP2(fucomi, 0xdbe8, 0, OPC_REG, OPT_ST, OPT_ST0 ) + DEF_ASM_OP2(fcomi, 0xdbf0, 0, OPC_REG, OPT_ST, OPT_ST0 ) + DEF_ASM_OP2(fucomip, 0xdfe8, 0, OPC_REG, OPT_ST, OPT_ST0 ) + DEF_ASM_OP2(fcomip, 0xdff0, 0, OPC_REG, OPT_ST, OPT_ST0 ) + + /* mmx */ + DEF_ASM_OP0(emms, 0x0f77) /* must be last OP0 */ + DEF_ASM_OP2(movd, 0x0f6e, 0, OPC_MODRM, OPT_EA | OPT_REG32, OPT_MMX ) +ALT(DEF_ASM_OP2(movd, 0x0f7e, 0, OPC_MODRM, OPT_MMX, OPT_EA | OPT_REG32 )) + DEF_ASM_OP2(movq, 0x0f6f, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) +ALT(DEF_ASM_OP2(movq, 0x0f7f, 0, OPC_MODRM, OPT_MMX, OPT_EA | OPT_MMX )) + DEF_ASM_OP2(packssdw, 0x0f6b, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(packsswb, 0x0f63, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(packuswb, 0x0f67, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(paddb, 0x0ffc, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(paddw, 0x0ffd, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(paddd, 0x0ffe, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(paddsb, 0x0fec, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(paddsw, 0x0fed, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(paddusb, 0x0fdc, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(paddusw, 0x0fdd, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pand, 0x0fdb, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pandn, 0x0fdf, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pcmpeqb, 0x0f74, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pcmpeqw, 0x0f75, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pcmpeqd, 0x0f76, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pcmpgtb, 0x0f64, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pcmpgtw, 0x0f65, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pcmpgtd, 0x0f66, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pmaddwd, 0x0ff5, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pmulhw, 0x0fe5, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pmullw, 0x0fd5, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(por, 0x0feb, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(psllw, 0x0ff1, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) +ALT(DEF_ASM_OP2(psllw, 0x0f71, 6, OPC_MODRM, OPT_IM8, OPT_MMX )) + DEF_ASM_OP2(pslld, 0x0ff2, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) +ALT(DEF_ASM_OP2(pslld, 0x0f72, 6, OPC_MODRM, OPT_IM8, OPT_MMX )) + DEF_ASM_OP2(psllq, 0x0ff3, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) +ALT(DEF_ASM_OP2(psllq, 0x0f73, 6, OPC_MODRM, OPT_IM8, OPT_MMX )) + DEF_ASM_OP2(psraw, 0x0fe1, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) +ALT(DEF_ASM_OP2(psraw, 0x0f71, 4, OPC_MODRM, OPT_IM8, OPT_MMX )) + DEF_ASM_OP2(psrad, 0x0fe2, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) +ALT(DEF_ASM_OP2(psrad, 0x0f72, 4, OPC_MODRM, OPT_IM8, OPT_MMX )) + DEF_ASM_OP2(psrlw, 0x0fd1, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) +ALT(DEF_ASM_OP2(psrlw, 0x0f71, 2, OPC_MODRM, OPT_IM8, OPT_MMX )) + DEF_ASM_OP2(psrld, 0x0fd2, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) +ALT(DEF_ASM_OP2(psrld, 0x0f72, 2, OPC_MODRM, OPT_IM8, OPT_MMX )) + DEF_ASM_OP2(psrlq, 0x0fd3, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) +ALT(DEF_ASM_OP2(psrlq, 0x0f73, 2, OPC_MODRM, OPT_IM8, OPT_MMX )) + DEF_ASM_OP2(psubb, 0x0ff8, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(psubw, 0x0ff9, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(psubd, 0x0ffa, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(psubsb, 0x0fe8, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(psubsw, 0x0fe9, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(psubusb, 0x0fd8, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(psubusw, 0x0fd9, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(punpckhbw, 0x0f68, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(punpckhwd, 0x0f69, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(punpckhdq, 0x0f6a, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(punpcklbw, 0x0f60, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(punpcklwd, 0x0f61, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(punpckldq, 0x0f62, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + DEF_ASM_OP2(pxor, 0x0fef, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX ) + +#undef ALT +#undef DEF_ASM_OP0 +#undef DEF_ASM_OP0L +#undef DEF_ASM_OP1 +#undef DEF_ASM_OP2 +#undef DEF_ASM_OP3 diff --git a/tccasm.c b/tccasm.c new file mode 100644 index 0000000..5b3b359 --- /dev/null +++ b/tccasm.c @@ -0,0 +1,744 @@ +/* + * GAS like assembler for TCC + * + * Copyright (c) 2001, 2002 Fabrice Bellard + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +static int asm_get_local_label_name(TCCState *s1, unsigned int n) +{ + char buf[64]; + TokenSym *ts; + + snprintf(buf, sizeof(buf), "L..%u", n); + ts = tok_alloc(buf, strlen(buf)); + return ts->tok; +} + +/* We do not use the C expression parser to handle symbols. Maybe the + C expression parser could be tweaked to do so. */ + +static void asm_expr_unary(TCCState *s1, ExprValue *pe) +{ + Sym *sym; + int op, n, label; + const char *p; + + switch(tok) { + case TOK_PPNUM: + p = tokc.cstr->data; + n = strtol(p, (char **)&p, 0); + if (*p == 'b' || *p == 'f') { + /* backward or forward label */ + label = asm_get_local_label_name(s1, n); + sym = label_find(label); + if (*p == 'b') { + /* backward : find the last corresponding defined label */ + if (sym && sym->r == 0) + sym = sym->prev_tok; + if (!sym) + error("local label '%d' not found backward", n); + } else { + /* forward */ + if (!sym || sym->r) { + /* if the last label is defined, then define a new one */ + sym = label_push(&s1->asm_labels, label, 0); + sym->type.t = VT_STATIC | VT_VOID; + } + } + pe->v = 0; + pe->sym = sym; + } else if (*p == '\0') { + pe->v = n; + pe->sym = NULL; + } else { + error("invalid number syntax"); + } + next(); + break; + case '+': + next(); + asm_expr_unary(s1, pe); + break; + case '-': + case '~': + op = tok; + next(); + asm_expr_unary(s1, pe); + if (pe->sym) + error("invalid operation with label"); + if (op == '-') + pe->v = -pe->v; + else + pe->v = ~pe->v; + break; + default: + if (tok >= TOK_IDENT) { + /* label case : if the label was not found, add one */ + sym = label_find(tok); + if (!sym) { + sym = label_push(&s1->asm_labels, tok, 0); + /* NOTE: by default, the symbol is global */ + sym->type.t = VT_VOID; + } + pe->v = 0; + pe->sym = sym; + next(); + } else { + error("bad expression syntax [%s]", get_tok_str(tok, &tokc)); + } + break; + } +} + +static void asm_expr_prod(TCCState *s1, ExprValue *pe) +{ + int op; + ExprValue e2; + + asm_expr_unary(s1, pe); + for(;;) { + op = tok; + if (op != '*' && op != '/' && op != '%' && + op != TOK_SHL && op != TOK_SAR) + break; + next(); + asm_expr_unary(s1, &e2); + if (pe->sym || e2.sym) + error("invalid operation with label"); + switch(op) { + case '*': + pe->v *= e2.v; + break; + case '/': + if (e2.v == 0) { + div_error: + error("division by zero"); + } + pe->v /= e2.v; + break; + case '%': + if (e2.v == 0) + goto div_error; + pe->v %= e2.v; + break; + case TOK_SHL: + pe->v <<= e2.v; + break; + default: + case TOK_SAR: + pe->v >>= e2.v; + break; + } + } +} + +static void asm_expr_logic(TCCState *s1, ExprValue *pe) +{ + int op; + ExprValue e2; + + asm_expr_prod(s1, pe); + for(;;) { + op = tok; + if (op != '&' && op != '|' && op != '^') + break; + next(); + asm_expr_prod(s1, &e2); + if (pe->sym || e2.sym) + error("invalid operation with label"); + switch(op) { + case '&': + pe->v &= e2.v; + break; + case '|': + pe->v |= e2.v; + break; + default: + case '^': + pe->v ^= e2.v; + break; + } + } +} + +static inline void asm_expr_sum(TCCState *s1, ExprValue *pe) +{ + int op; + ExprValue e2; + + asm_expr_logic(s1, pe); + for(;;) { + op = tok; + if (op != '+' && op != '-') + break; + next(); + asm_expr_logic(s1, &e2); + if (op == '+') { + if (pe->sym != NULL && e2.sym != NULL) + goto cannot_relocate; + pe->v += e2.v; + if (pe->sym == NULL && e2.sym != NULL) + pe->sym = e2.sym; + } else { + pe->v -= e2.v; + /* NOTE: we are less powerful than gas in that case + because we store only one symbol in the expression */ + if (!pe->sym && !e2.sym) { + /* OK */ + } else if (pe->sym && !e2.sym) { + /* OK */ + } else if (pe->sym && e2.sym) { + if (pe->sym == e2.sym) { + /* OK */ + } else if (pe->sym->r == e2.sym->r && pe->sym->r != 0) { + /* we also accept defined symbols in the same section */ + pe->v += (long)pe->sym->next - (long)e2.sym->next; + } else { + goto cannot_relocate; + } + pe->sym = NULL; /* same symbols can be substracted to NULL */ + } else { + cannot_relocate: + error("invalid operation with label"); + } + } + } +} + +static void asm_expr(TCCState *s1, ExprValue *pe) +{ + asm_expr_sum(s1, pe); +} + +static int asm_int_expr(TCCState *s1) +{ + ExprValue e; + asm_expr(s1, &e); + if (e.sym) + expect("constant"); + return e.v; +} + +/* NOTE: the same name space as C labels is used to avoid using too + much memory when storing labels in TokenStrings */ +static void asm_new_label(TCCState *s1, int label, int is_local) +{ + Sym *sym; + + sym = label_find(label); + if (sym) { + if (sym->r) { + /* the label is already defined */ + if (!is_local) { + error("assembler label '%s' already defined", + get_tok_str(label, NULL)); + } else { + /* redefinition of local labels is possible */ + goto new_label; + } + } + } else { + new_label: + sym = label_push(&s1->asm_labels, label, 0); + sym->type.t = VT_STATIC | VT_VOID; + } + sym->r = cur_text_section->sh_num; + sym->next = (void *)ind; +} + +static void asm_free_labels(TCCState *st) +{ + Sym *s, *s1; + for(s = st->asm_labels; s != NULL; s = s1) { + s1 = s->prev; + /* define symbol value in object file */ + if (s->r) { + put_extern_sym(s, st->sections[s->r], (long)s->next, 0); + } + /* remove label */ + table_ident[s->v - TOK_IDENT]->sym_label = NULL; + tcc_free(s); + } + st->asm_labels = NULL; +} + +static void asm_parse_directive(TCCState *s1) +{ + int n, offset, v, size, tok1; + Section *sec; + uint8_t *ptr; + + /* assembler directive */ + next(); + sec = cur_text_section; + switch(tok) { + case TOK_ASM_align: + case TOK_ASM_skip: + case TOK_ASM_space: + tok1 = tok; + next(); + n = asm_int_expr(s1); + if (tok1 == TOK_ASM_align) { + if (n < 0 || (n & (n-1)) != 0) + error("alignment must be a positive power of two"); + offset = (ind + n - 1) & -n; + size = offset - ind; + } else { + size = n; + } + v = 0; + if (tok == ',') { + next(); + v = asm_int_expr(s1); + } + if (sec->sh_type != SHT_NOBITS) { + sec->data_offset = ind; + ptr = section_ptr_add(sec, size); + memset(ptr, v, size); + } + ind += size; + break; + case TOK_ASM_byte: + size = 1; + goto asm_data; + case TOK_ASM_word: + case TOK_SHORT: + size = 2; + goto asm_data; + case TOK_LONG: + case TOK_INT: + size = 4; + asm_data: + next(); + for(;;) { + ExprValue e; + asm_expr(s1, &e); + if (sec->sh_type != SHT_NOBITS) { + if (size == 4) { + gen_expr32(&e); + } else { + if (e.sym) + expect("constant"); + if (size == 1) + g(e.v); + else + gen_le16(e.v); + } + } else { + ind += size; + } + if (tok != ',') + break; + next(); + } + break; + default: + error("unknown assembler directive .%s", get_tok_str(tok, NULL)); + break; + } +} + + +/* assemble a file */ +static int tcc_assemble_internal(TCCState *s1, int do_preprocess) +{ + int opcode; + +#if 0 + /* print stats about opcodes */ + { + const ASMInstr *pa; + int freq[4]; + int op_vals[500]; + int nb_op_vals, i, j; + + nb_op_vals = 0; + memset(freq, 0, sizeof(freq)); + for(pa = asm_instrs; pa->sym != 0; pa++) { + freq[pa->nb_ops]++; + for(i=0;inb_ops;i++) { + for(j=0;jop_type[i] == op_vals[j]) + goto found; + } + op_vals[nb_op_vals++] = pa->op_type[i]; + found: ; + } + } + for(i=0;ibuf_ptr[0]; + tok_flags = TOK_FLAG_BOL | TOK_FLAG_BOF; + parse_flags = 0; + if (do_preprocess) + parse_flags |= PARSE_FLAG_PREPROCESS; + next(); + for(;;) { + if (tok == TOK_EOF) + break; + parse_flags |= PARSE_FLAG_LINEFEED; /* XXX: suppress that hack */ + redo: + if (tok == '#') { + /* horrible gas comment */ + while (tok != TOK_LINEFEED) + next(); + } else if (tok == '.') { + asm_parse_directive(s1); + } else if (tok == TOK_PPNUM) { + const char *p; + int n; + p = tokc.cstr->data; + n = strtol(p, (char **)&p, 10); + if (*p != '\0') + expect("':'"); + /* new local label */ + asm_new_label(s1, asm_get_local_label_name(s1, n), 1); + next(); + skip(':'); + goto redo; + } else if (tok >= TOK_IDENT) { + /* instruction or label */ + opcode = tok; + next(); + if (tok == ':') { + /* new label */ + asm_new_label(s1, opcode, 0); + next(); + goto redo; + } else { + asm_opcode(s1, opcode); + } + } + /* end of line */ + if (tok != ';' && tok != TOK_LINEFEED){ + expect("end of line"); + } + parse_flags &= ~PARSE_FLAG_LINEFEED; /* XXX: suppress that hack */ + next(); + } + + asm_free_labels(s1); + + return 0; +} + +/* Assemble the current file */ +static int tcc_assemble(TCCState *s1, int do_preprocess) +{ + int ret; + + preprocess_init(s1); + + /* default section is text */ + cur_text_section = text_section; + ind = cur_text_section->data_offset; + + ret = tcc_assemble_internal(s1, do_preprocess); + + cur_text_section->data_offset = ind; + return ret; +} + +/********************************************************************/ +/* GCC inline asm support */ + +/* assemble the string 'str' in the current C compilation unit without + C preprocessing. NOTE: str is modified by modifying the '\0' at the + end */ +static void tcc_assemble_inline(TCCState *s1, char *str, int len) +{ + BufferedFile *bf, *saved_file; + int saved_parse_flags, *saved_macro_ptr; + + bf = tcc_malloc(sizeof(BufferedFile)); + memset(bf, 0, sizeof(BufferedFile)); + bf->fd = -1; + bf->buf_ptr = str; + bf->buf_end = str + len; + str[len] = CH_EOB; + /* same name as current file so that errors are correctly + reported */ + pstrcpy(bf->filename, sizeof(bf->filename), file->filename); + bf->line_num = file->line_num; + saved_file = file; + file = bf; + saved_parse_flags = parse_flags; + saved_macro_ptr = macro_ptr; + macro_ptr = NULL; + + tcc_assemble_internal(s1, 0); + + parse_flags = saved_parse_flags; + macro_ptr = saved_macro_ptr; + file = saved_file; + tcc_free(bf); +} + +/* find a constraint by its number or id (gcc 3 extended + syntax). return -1 if not found. Return in *pp in char after the + constraint */ +static int find_constraint(ASMOperand *operands, int nb_operands, + const char *name, const char **pp) +{ + int index; + TokenSym *ts; + const char *p; + + if (isnum(*name)) { + index = 0; + while (isnum(*name)) { + index = (index * 10) + (*name) - '0'; + name++; + } + if ((unsigned)index >= nb_operands) + index = -1; + } else if (*name == '[') { + name++; + p = strchr(name, ']'); + if (p) { + ts = tok_alloc(name, p - name); + for(index = 0; index < nb_operands; index++) { + if (operands[index].id == ts->tok) + goto found; + } + index = -1; + found: + name = p + 1; + } else { + index = -1; + } + } else { + index = -1; + } + if (pp) + *pp = name; + return index; +} + +static void subst_asm_operands(ASMOperand *operands, int nb_operands, + int nb_outputs, + CString *out_str, CString *in_str) +{ + int c, index, modifier; + const char *str; + ASMOperand *op; + SValue sv; + + cstr_new(out_str); + str = in_str->data; + for(;;) { + c = *str++; + if (c == '%') { + if (*str == '%') { + str++; + goto add_char; + } + modifier = 0; + if (*str == 'c' || *str == 'n' || + *str == 'b' || *str == 'w' || *str == 'h') + modifier = *str++; + index = find_constraint(operands, nb_operands, str, &str); + if (index < 0) + error("invalid operand reference after %%"); + op = &operands[index]; + sv = *op->vt; + if (op->reg >= 0) { + sv.r = op->reg; + if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) + sv.r |= VT_LVAL; + } + subst_asm_operand(out_str, &sv, modifier); + } else { + add_char: + cstr_ccat(out_str, c); + if (c == '\0') + break; + } + } +} + + +static void parse_asm_operands(ASMOperand *operands, int *nb_operands_ptr, + int is_output) +{ + ASMOperand *op; + int nb_operands; + + if (tok != ':') { + nb_operands = *nb_operands_ptr; + for(;;) { + if (nb_operands >= MAX_ASM_OPERANDS) + error("too many asm operands"); + op = &operands[nb_operands++]; + op->id = 0; + if (tok == '[') { + next(); + if (tok < TOK_IDENT) + expect("identifier"); + op->id = tok; + next(); + skip(']'); + } + if (tok != TOK_STR) + expect("string constant"); + op->constraint = tcc_malloc(tokc.cstr->size); + strcpy(op->constraint, tokc.cstr->data); + next(); + skip('('); + gexpr(); + if (is_output) { + test_lvalue(); + } else { + /* we want to avoid LLOCAL case. note that it may come + from register storage, so we need to convert (reg) + case */ + if ((vtop->r & VT_LVAL) && + ((vtop->r & VT_VALMASK) == VT_LLOCAL || + (vtop->r & VT_VALMASK) < VT_CONST)) { + gv(RC_INT); + } + } + op->vt = vtop; + skip(')'); + if (tok == ',') { + next(); + } else { + break; + } + } + *nb_operands_ptr = nb_operands; + } +} + +/* parse the GCC asm() instruction */ +static void asm_instr(void) +{ + CString astr, astr1; + ASMOperand operands[MAX_ASM_OPERANDS]; + int nb_inputs, nb_outputs, nb_operands, i; + uint8_t input_regs_allocated[NB_ASM_REGS]; + uint8_t output_regs_allocated[NB_ASM_REGS]; + uint8_t clobber_regs[NB_ASM_REGS]; + + next(); + /* since we always generate the asm() instruction, we can ignore + volatile */ + if (tok == TOK_VOLATILE1 || tok == TOK_VOLATILE2 || tok == TOK_VOLATILE3) { + next(); + } + skip('('); + /* read the string */ + if (tok != TOK_STR) + expect("string constant"); + cstr_new(&astr); + while (tok == TOK_STR) { + /* XXX: add \0 handling too ? */ + cstr_cat(&astr, tokc.cstr->data); + next(); + } + cstr_ccat(&astr, '\0'); + nb_operands = 0; + nb_outputs = 0; + memset(clobber_regs, 0, sizeof(clobber_regs)); + if (tok == ':') { + next(); + /* output args */ + parse_asm_operands(operands, &nb_operands, 1); + nb_outputs = nb_operands; + if (tok == ':') { + next(); + /* input args */ + parse_asm_operands(operands, &nb_operands, 0); + if (tok == ':') { + /* clobber list */ + /* XXX: handle registers */ + next(); + for(;;) { + if (tok != TOK_STR) + expect("string constant"); + asm_clobber(clobber_regs, tokc.cstr->data); + next(); + if (tok == ',') { + next(); + } else { + break; + } + } + } + } + } + skip(')'); + /* NOTE: we do not eat the ';' so that we can restore the current + token after the assembler parsing */ + if (tok != ';') + expect("';'"); + nb_inputs = nb_operands - nb_outputs; + + /* save all values in the memory */ + save_regs(0); + + /* compute constraints */ + asm_compute_constraints(input_regs_allocated, + operands, nb_operands, nb_outputs, 0, + NULL); + asm_compute_constraints(output_regs_allocated, + operands, nb_operands, nb_outputs, 1, + input_regs_allocated); + + /* substitute the operands in the asm string */ +#ifdef ASM_DEBUG + printf("asm: \"%s\"\n", (char *)astr.data); +#endif + subst_asm_operands(operands, nb_operands, nb_outputs, &astr1, &astr); + cstr_free(&astr); +#ifdef ASM_DEBUG + printf("subst_asm: \"%s\"\n", (char *)astr1.data); +#endif + + /* generate loads */ + asm_gen_code(operands, nb_operands, nb_outputs, 0, clobber_regs); + + /* assemble the string with tcc internal assembler */ + tcc_assemble_inline(tcc_state, astr1.data, astr1.size - 1); + + /* restore the current C token */ + next(); + + /* store the output values if needed */ + asm_gen_code(operands, nb_operands, nb_outputs, 1, clobber_regs); + + /* free everything */ + for(i=0;iconstraint); + vpop(); + } + cstr_free(&astr1); +} +