diff --git a/i386-asm.c b/i386-asm.c index 5011af5..0c0985e 100644 --- a/i386-asm.c +++ b/i386-asm.c @@ -42,6 +42,7 @@ #define OPCT_IS(v,i) (((v) & OPCT_MASK) == (i)) #define OPC_0F 0x100 /* Is secondary map (0x0f prefix) */ +#define OPC_48 0x200 /* Always has REX prefix */ #ifdef TCC_TARGET_X86_64 # define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */ # define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */ @@ -785,7 +786,7 @@ ST_FUNC void asm_opcode(TCCState *s1, int opcode) should only be done if we really have an >32bit imm64, and that is hardcoded. Ignore it here. */ if (pa->opcode == 0xb0 && ops[0].type != OP_IM64 - && ops[1].type == OP_REG64 + && (ops[1].type & OP_REG) == OP_REG64 && !(pa->instr_type & OPC_0F)) continue; #endif @@ -901,14 +902,16 @@ ST_FUNC void asm_opcode(TCCState *s1, int opcode) g(0x66); #ifdef TCC_TARGET_X86_64 rex64 = 0; - if (s == 3 || (alltypes & OP_REG64)) { + if (pa->instr_type & OPC_48) + rex64 = 1; + else if (s == 3 || (alltypes & OP_REG64)) { /* generate REX prefix */ int default64 = 0; for(i = 0; i < nb_ops; i++) { - if (op_type[i] == OP_REG64) { + if (op_type[i] == OP_REG64 && pa->opcode != 0xb8) { /* If only 64bit regs are accepted in one operand this is a default64 instruction without need for - REX prefixes. */ + REX prefixes, except for movabs(0xb8). */ default64 = 1; break; } diff --git a/tests/asmtest.S b/tests/asmtest.S index 280aeaf..5578705 100644 --- a/tests/asmtest.S +++ b/tests/asmtest.S @@ -114,12 +114,21 @@ notl %r15d movzb 0x1000, %eax movzb 0x1000, %ax + mov $0x12345678,%eax + #ifdef __x86_64__ movzb 0x1000, %rax movzbq 0x1000, %rbx movsbq 0x1000, %rdx movzwq 0x1000, %rdi movswq 0x1000, %rdx + movslq %eax, %rcx + mov $0x12345678,%rax + mov $0x12345678,%rdx + mov $0x12345678,%r10 + mov $0x123456789abcdef0,%rax + mov $0x123456789abcdef0,%rcx + mov $0x123456789abcdef0,%r11 #endif #ifdef __i386__ @@ -546,6 +555,7 @@ invlpg 0x1000 cmpxchg8b 0x1002 #ifdef __x86_64__ cmpxchg16b (%rax) +cmpxchg16b (%r10,%r11) #endif fcmovb %st(5), %st @@ -569,6 +579,7 @@ fucomip %st(5), %st cmovne %ax, %si #ifdef __x86_64__ bswapq %rsi + bswapq %r10 cmovz %rdi,%rbx #endif @@ -675,7 +686,9 @@ int $0x10 prefetchw (%rdi) clflush 0x1000(%rax,%rcx) fxsaveq (%rdx) + fxsaveq (%r11) fxrstorq (%rcx) + fxrstorq (%r10) #endif @@ -751,6 +764,9 @@ int $0x10 sidtq 0x1000 swapgs + + str %rdx + str %r9 #endif lmsw 0x1000 @@ -879,6 +895,7 @@ overrideme: #ifdef __x86_64__ movq %rcx, %mm1 movq %rdx, %xmm2 + movq %r13, %xmm3 /* movq mem64->xmm is encoded as f30f7e by GAS, but as 660f6e by tcc (which really is a movd and would need a REX.W prefix to be movq). */ diff --git a/x86_64-asm.h b/x86_64-asm.h index 675e7df..cb9eb16 100644 --- a/x86_64-asm.h +++ b/x86_64-asm.h @@ -106,8 +106,8 @@ ALT(DEF_ASM_OP2(movb, 0x8a, 0, OPC_MODRM | OPC_BWLX, OPT_EA | OPT_REG, OPT_REG)) the full movabs form (64bit immediate). For IM32->REG64 we prefer the 0xc7 opcode. So disallow all 64bit forms and code the rest by hand. */ ALT(DEF_ASM_OP2(movb, 0xb0, 0, OPC_REG | OPC_BWLX, OPT_IM, OPT_REG)) -ALT(DEF_ASM_OP2(mov, 0x48b8, 0, OPC_REG, OPT_IM64, OPT_REG64)) -ALT(DEF_ASM_OP2(movq, 0x48b8, 0, OPC_REG, OPT_IM64, OPT_REG64)) +ALT(DEF_ASM_OP2(mov, 0xb8, 0, OPC_REG, OPT_IM64, OPT_REG64)) +ALT(DEF_ASM_OP2(movq, 0xb8, 0, OPC_REG, OPT_IM64, OPT_REG64)) ALT(DEF_ASM_OP2(movb, 0xc6, 0, OPC_MODRM | OPC_BWLX, OPT_IM, OPT_REG | OPT_EA)) ALT(DEF_ASM_OP2(movw, 0x8c, 0, OPC_MODRM | OPC_WLX, OPT_SEG, OPT_EA | OPT_REG)) @@ -123,7 +123,7 @@ ALT(DEF_ASM_OP2(movsbl, 0x0fbe, 0, OPC_MODRM, OPT_REG8 | OPT_EA, OPT_REG32)) ALT(DEF_ASM_OP2(movsbq, 0x0fbe, 0, OPC_MODRM, OPT_REG8 | OPT_EA, OPT_REGW)) ALT(DEF_ASM_OP2(movswl, 0x0fbf, 0, OPC_MODRM, OPT_REG16 | OPT_EA, OPT_REG32)) ALT(DEF_ASM_OP2(movswq, 0x0fbf, 0, OPC_MODRM, OPT_REG16 | OPT_EA, OPT_REG)) -ALT(DEF_ASM_OP2(movslq, 0x4863, 0, OPC_MODRM, OPT_REG32 | OPT_EA, OPT_REG)) +ALT(DEF_ASM_OP2(movslq, 0x63, 0, OPC_MODRM, OPT_REG32 | OPT_EA, OPT_REG)) ALT(DEF_ASM_OP2(movzbw, 0x0fb6, 0, OPC_MODRM | OPC_WLX, OPT_REG8 | OPT_EA, OPT_REGW)) ALT(DEF_ASM_OP2(movzwl, 0x0fb7, 0, OPC_MODRM, OPT_REG16 | OPT_EA, OPT_REG32)) ALT(DEF_ASM_OP2(movzwq, 0x0fb7, 0, OPC_MODRM, OPT_REG16 | OPT_EA, OPT_REG)) @@ -354,8 +354,8 @@ ALT(DEF_ASM_OP1(fstsw, 0xdd, 7, OPC_MODRM | OPC_FWAIT, OPT_EA )) If the operand would use extended registers we would have to modify it instead of generating a second one. Currently that's no problem with TCC, we don't use extended registers. */ - DEF_ASM_OP1(fxsaveq, 0x480fae, 0, OPC_MODRM, OPT_EA ) - DEF_ASM_OP1(fxrstorq, 0x480fae, 1, OPC_MODRM, OPT_EA ) + DEF_ASM_OP1(fxsaveq, 0x0fae, 0, OPC_MODRM | OPC_48, OPT_EA ) + DEF_ASM_OP1(fxrstorq, 0x0fae, 1, OPC_MODRM | OPC_48, OPT_EA ) /* segments */ DEF_ASM_OP2(arpl, 0x63, 0, OPC_MODRM, OPT_REG16, OPT_REG16 | OPT_EA) @@ -376,7 +376,7 @@ ALT(DEF_ASM_OP2(lslw, 0x0f03, 0, OPC_MODRM | OPC_WLX, OPT_EA | OPT_REG, OPT_REG) DEF_ASM_OP1(smsw, 0x0f01, 4, OPC_MODRM, OPT_REG | OPT_EA) DEF_ASM_OP1(str, 0x0f00, 1, OPC_MODRM, OPT_REG32 | OPT_EA) ALT(DEF_ASM_OP1(str, 0x660f00, 1, OPC_MODRM, OPT_REG16)) -ALT(DEF_ASM_OP1(str, 0x480f00, 1, OPC_MODRM, OPT_REG64)) +ALT(DEF_ASM_OP1(str, 0x0f00, 1, OPC_MODRM | OPC_48, OPT_REG64)) DEF_ASM_OP1(verr, 0x0f00, 4, OPC_MODRM, OPT_REG | OPT_EA) DEF_ASM_OP1(verw, 0x0f00, 5, OPC_MODRM, OPT_REG | OPT_EA) DEF_ASM_OP0L(swapgs, 0x0f01, 7, OPC_MODRM) @@ -385,7 +385,7 @@ ALT(DEF_ASM_OP1(str, 0x480f00, 1, OPC_MODRM, OPT_REG64)) /* bswap can't be applied to 16bit regs */ DEF_ASM_OP1(bswap, 0x0fc8, 0, OPC_REG, OPT_REG32 ) DEF_ASM_OP1(bswapl, 0x0fc8, 0, OPC_REG, OPT_REG32 ) - DEF_ASM_OP1(bswapq, 0x480fc8, 0, OPC_REG, OPT_REG64 ) + DEF_ASM_OP1(bswapq, 0x0fc8, 0, OPC_REG | OPC_48, OPT_REG64 ) ALT(DEF_ASM_OP2(xaddb, 0x0fc0, 0, OPC_MODRM | OPC_BWLX, OPT_REG, OPT_REG | OPT_EA )) ALT(DEF_ASM_OP2(cmpxchgb, 0x0fb0, 0, OPC_MODRM | OPC_BWLX, OPT_REG, OPT_REG | OPT_EA )) @@ -395,7 +395,7 @@ ALT(DEF_ASM_OP2(cmpxchgb, 0x0fb0, 0, OPC_MODRM | OPC_BWLX, OPT_REG, OPT_REG | OP DEF_ASM_OP1(cmpxchg8b, 0x0fc7, 1, OPC_MODRM, OPT_EA ) /* AMD 64 */ - DEF_ASM_OP1(cmpxchg16b, 0x480fc7, 1, OPC_MODRM, OPT_EA ) + DEF_ASM_OP1(cmpxchg16b, 0x0fc7, 1, OPC_MODRM | OPC_48, OPT_EA ) /* pentium pro */ ALT(DEF_ASM_OP2(cmovo, 0x0f40, 0, OPC_MODRM | OPC_TEST | OPC_WLX, OPT_REGW | OPT_EA, OPT_REGW)) @@ -420,7 +420,7 @@ ALT(DEF_ASM_OP2(cmovo, 0x0f40, 0, OPC_MODRM | OPC_TEST | OPC_WLX, OPT_REGW | OPT /* movd shouldn't accept REG64, but AMD64 spec uses it for 32 and 64 bit moves, so let's be compatible. */ ALT(DEF_ASM_OP2(movd, 0x0f6e, 0, OPC_MODRM, OPT_EA | OPT_REG64, OPT_MMXSSE )) -ALT(DEF_ASM_OP2(movq, 0x480f6e, 0, OPC_MODRM, OPT_REG64, OPT_MMXSSE )) +ALT(DEF_ASM_OP2(movq, 0x0f6e, 0, OPC_MODRM | OPC_48, OPT_REG64, OPT_MMXSSE )) ALT(DEF_ASM_OP2(movq, 0x0f6f, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )) ALT(DEF_ASM_OP2(movd, 0x0f7e, 0, OPC_MODRM, OPT_MMXSSE, OPT_EA | OPT_REG32 )) ALT(DEF_ASM_OP2(movd, 0x0f7e, 0, OPC_MODRM, OPT_MMXSSE, OPT_EA | OPT_REG64 ))