From b9f01dffc61d41868157d3cc31f460d952502779 Mon Sep 17 00:00:00 2001
From: Michael Matz <matz@suse.de>
Date: Wed, 11 May 2016 23:47:02 +0200
Subject: [PATCH] x86-64-asm: Clean up 64bit immediate support

Fix it to actually be able to parse 64bit immediates (enlarge
operand value type).  Then, generally there's no need for accepting
IM64 anywhere, except in the 0xba+r mov opcodes, so OP_IM is
unnecessary, as is OPT_IMNO64.  Improve the generated code a bit
by preferring the 0xc7 opcode for im32->reg64, instead of the
im64->reg64 form (which we therefore hardcode).
---
 i386-asm.c      | 34 ++++++++++++++++------------------
 tcc.h           |  2 +-
 tccasm.c        |  3 ++-
 tests/asmtest.S | 10 ++++++++++
 x86_64-asm.h    | 14 ++++++++++----
 5 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/i386-asm.c b/i386-asm.c
index bbc1995..b158e81 100644
--- a/i386-asm.c
+++ b/i386-asm.c
@@ -83,13 +83,10 @@ enum {
     OPT_INDIR,  /* *(expr) */
     /* composite types */
     OPT_COMPOSITE_FIRST,
-    OPT_IM,     /* IM8 | IM16 | IM32 | IM64 */
+    OPT_IM,     /* IM8 | IM16 | IM32 */
     OPT_REG,    /* REG8 | REG16 | REG32 | REG64 */
     OPT_REGW,   /* REG16 | REG32 | REG64 */
-    OPT_IMW,    /* IM16 | IM32 | IM64 */
-#ifdef TCC_TARGET_X86_64
-    OPT_IMNO64, /* IM16 | IM32 */
-#endif
+    OPT_IMW,    /* IM16 | IM32 */
     /* can be ored with any OPT_xxx */
     OPT_EA = 0x80
 };
@@ -128,12 +125,10 @@ enum {
 #define OP_REG    (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
 
 #ifdef TCC_TARGET_X86_64
-# define OP_IM      OP_IM64
 # define TREG_XAX   TREG_RAX
 # define TREG_XCX   TREG_RCX
 # define TREG_XDX   TREG_RDX
 #else
-# define OP_IM      OP_IM32
 # define TREG_XAX   TREG_EAX
 # define TREG_XCX   TREG_ECX
 # define TREG_XDX   TREG_EDX
@@ -347,7 +342,7 @@ static void parse_operand(TCCState *s1, Operand *op)
         /* constant value */
         next();
         asm_expr(s1, &e);
-        op->type = OP_IM;
+        op->type = OP_IM32;
         op->e.v = e.v;
         op->e.sym = e.sym;
         if (!op->e.sym) {
@@ -358,8 +353,8 @@ static void parse_operand(TCCState *s1, Operand *op)
             if (op->e.v == (uint16_t)op->e.v)
                 op->type |= OP_IM16;
 #ifdef TCC_TARGET_X86_64
-            if (op->e.v == (uint32_t)op->e.v)
-                op->type |= OP_IM32;
+            if (op->e.v != (uint32_t)op->e.v)
+                op->type = OP_IM64;
 #endif
         }
     } else {
@@ -587,6 +582,14 @@ ST_FUNC void asm_opcode(TCCState *s1, int opcode)
         }
         if (pa->nb_ops != nb_ops)
             continue;
+#ifdef TCC_TARGET_X86_64
+	/* Special case for moves.  Selecting the IM64->REG64 form
+	   should only be done if we really have an >32bit imm64, and that
+	   is hardcoded.  Ignore it here.  */
+	if (pa->opcode == 0xb0 && ops[0].type != OP_IM64
+	    && ops[1].type == OP_REG64)
+	    continue;
+#endif
         /* now decode and check each operand */
 	alltypes = 0;
         for(i = 0; i < nb_ops; i++) {
@@ -595,7 +598,7 @@ ST_FUNC void asm_opcode(TCCState *s1, int opcode)
             op2 = op1 & 0x1f;
             switch(op2) {
             case OPT_IM:
-                v = OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64;
+                v = OP_IM8 | OP_IM16 | OP_IM32;
                 break;
             case OPT_REG:
                 v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64;
@@ -604,13 +607,8 @@ ST_FUNC void asm_opcode(TCCState *s1, int opcode)
                 v = OP_REG16 | OP_REG32 | OP_REG64;
                 break;
             case OPT_IMW:
-                v = OP_IM16 | OP_IM32 | OP_IM64;
-                break;
-#ifdef TCC_TARGET_X86_64
-            case OPT_IMNO64:
                 v = OP_IM16 | OP_IM32;
                 break;
-#endif
             default:
                 v = 1 << op2;
                 break;
@@ -647,7 +645,7 @@ ST_FUNC void asm_opcode(TCCState *s1, int opcode)
 #ifdef TCC_TARGET_X86_64
     /* XXX the autosize should rather be zero, to not have to adjust this
        all the time.  */
-    if ((pa->instr_type & OPC_WLQ) != OPC_WLQ)
+    if ((pa->instr_type & OPC_BWLQ) == OPC_B)
         autosize = NBWLX-2;
 #endif
     if (s == autosize) {
@@ -657,7 +655,7 @@ ST_FUNC void asm_opcode(TCCState *s1, int opcode)
         }
         if (s == autosize) {
             if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
-                (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32 | OP_IM64)))
+                (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32)))
                 s = 2;
             else
                 tcc_error("cannot infer opcode suffix");
diff --git a/tcc.h b/tcc.h
index 361ac9f..7b4481e 100644
--- a/tcc.h
+++ b/tcc.h
@@ -610,7 +610,7 @@ typedef struct CachedInclude {
 
 #ifdef CONFIG_TCC_ASM
 typedef struct ExprValue {
-    uint32_t v;
+    uint64_t v;
     Sym *sym;
 } ExprValue;
 
diff --git a/tccasm.c b/tccasm.c
index 3fd69f7..b494398 100644
--- a/tccasm.c
+++ b/tccasm.c
@@ -41,7 +41,8 @@ static Sym sym_dot;
 static void asm_expr_unary(TCCState *s1, ExprValue *pe)
 {
     Sym *sym;
-    int op, n, label;
+    int op, label;
+    long n;
     const char *p;
 
     switch(tok) {
diff --git a/tests/asmtest.S b/tests/asmtest.S
index 985d824..3589f33 100644
--- a/tests/asmtest.S
+++ b/tests/asmtest.S
@@ -46,6 +46,16 @@ movl %eax, 0x100(%ebx,%edx,2)
 movl 0x100(%ebx,%edx,2), %edx
 movw %ax, 0x100(%ebx,%edx,2)
 
+movw $0x1122,%si
+movl $0x112233,%edx
+#ifdef __x86_64__
+mov $0x11223344,%rbx
+movq $0x11223344,%rbx
+mov $0x1122334455,%rbx
+movq $0x1122334455,%rbx
+movl $0x11334455,(%rbx)
+#endif
+
 mov %eax, 0x12(,%edx,2)
         
 #ifdef __i386__
diff --git a/x86_64-asm.h b/x86_64-asm.h
index 4da5c0b..f047e81 100644
--- a/x86_64-asm.h
+++ b/x86_64-asm.h
@@ -97,7 +97,13 @@ ALT(DEF_ASM_OP2(movb, 0xa0, 0, OPC_BWLQ, OPT_ADDR, OPT_EAX))
 ALT(DEF_ASM_OP2(movb, 0xa2, 0, OPC_BWLQ, OPT_EAX, OPT_ADDR)) */
 ALT(DEF_ASM_OP2(movb, 0x88, 0, OPC_MODRM | OPC_BWLQ, OPT_REG, OPT_EA | OPT_REG))
 ALT(DEF_ASM_OP2(movb, 0x8a, 0, OPC_MODRM | OPC_BWLQ, OPT_EA | OPT_REG, OPT_REG))
+/* The moves are special: the 0xb8 form supports IM64 (the only insn that
+   does) with REG64.  It doesn't support IM32 with REG64, it would use
+   the full movabs form (64bit immediate).  For IM32->REG64 we prefer
+   the 0xc7 opcode.  So disallow all 64bit forms and code the rest by hand. */
 ALT(DEF_ASM_OP2(movb, 0xb0, 0, OPC_REG | OPC_BWLQ, OPT_IM, OPT_REG))
+ALT(DEF_ASM_OP2(mov,  0x48b8, 0, OPC_REG, OPT_IM64, OPT_REG64))
+ALT(DEF_ASM_OP2(movq, 0x48b8, 0, OPC_REG, OPT_IM64, OPT_REG64))
 ALT(DEF_ASM_OP2(movb, 0xc6, 0, OPC_MODRM | OPC_BWLQ, OPT_IM, OPT_REG | OPT_EA))
 
 ALT(DEF_ASM_OP2(movw, 0x8c, 0, OPC_MODRM | OPC_WLQ, OPT_SEG, OPT_EA | OPT_REG))
@@ -156,14 +162,14 @@ ALT(DEF_ASM_OP2(lgs, 0x0fb5, 0, OPC_MODRM, OPT_EA, OPT_REG32))
      /* arith */
 ALT(DEF_ASM_OP2(addb, 0x00, 0, OPC_ARITH | OPC_MODRM | OPC_BWLQ, OPT_REG, OPT_EA | OPT_REG)) /* XXX: use D bit ? */
 ALT(DEF_ASM_OP2(addb, 0x02, 0, OPC_ARITH | OPC_MODRM | OPC_BWLQ, OPT_EA | OPT_REG, OPT_REG))
-ALT(DEF_ASM_OP2(addb, 0x04, 0, OPC_ARITH | OPC_BWLQ, OPT_IMNO64, OPT_EAX))
-ALT(DEF_ASM_OP2(addb, 0x80, 0, OPC_ARITH | OPC_MODRM | OPC_BWLQ, OPT_IMNO64, OPT_EA | OPT_REG))
+ALT(DEF_ASM_OP2(addb, 0x04, 0, OPC_ARITH | OPC_BWLQ, OPT_IM, OPT_EAX))
+ALT(DEF_ASM_OP2(addb, 0x80, 0, OPC_ARITH | OPC_MODRM | OPC_BWLQ, OPT_IM, OPT_EA | OPT_REG))
 ALT(DEF_ASM_OP2(addw, 0x83, 0, OPC_ARITH | OPC_MODRM | OPC_WLQ, OPT_IM8S, OPT_EA | OPT_REG))
 
 ALT(DEF_ASM_OP2(testb, 0x84, 0, OPC_MODRM | OPC_BWLQ, OPT_REG, OPT_EA | OPT_REG))
 ALT(DEF_ASM_OP2(testb, 0x84, 0, OPC_MODRM | OPC_BWLQ, OPT_EA | OPT_REG, OPT_REG))
-ALT(DEF_ASM_OP2(testb, 0xa8, 0, OPC_BWLQ, OPT_IMNO64, OPT_EAX))
-ALT(DEF_ASM_OP2(testb, 0xf6, 0, OPC_MODRM | OPC_BWLQ, OPT_IMNO64, OPT_EA | OPT_REG))
+ALT(DEF_ASM_OP2(testb, 0xa8, 0, OPC_BWLQ, OPT_IM, OPT_EAX))
+ALT(DEF_ASM_OP2(testb, 0xf6, 0, OPC_MODRM | OPC_BWLQ, OPT_IM, OPT_EA | OPT_REG))
 
 ALT(DEF_ASM_OP1(incb, 0xfe, 0, OPC_MODRM | OPC_BWLQ, OPT_REG | OPT_EA))
 ALT(DEF_ASM_OP1(decb, 0xfe, 1, OPC_MODRM | OPC_BWLQ, OPT_REG | OPT_EA))