x86-64: use r8/r9 as generic integer registers

master
grischka 2009-12-20 02:19:51 +01:00
parent 0e5c0ee045
commit 070b86a870
1 changed files with 98 additions and 151 deletions

View File

@ -114,8 +114,8 @@ ST_DATA const int reg_classes[NB_REGS] = {
0, 0,
0, 0,
0, 0,
RC_R8, RC_INT | RC_R8,
RC_R9, RC_INT | RC_R9,
#endif #endif
}; };
@ -167,6 +167,17 @@ void gen_le64(int64_t c)
g(c >> 56); g(c >> 56);
} }
void orex(int ll, int r, int r2, int b)
{
if ((r & VT_VALMASK) >= VT_CONST)
r = 0;
if ((r2 & VT_VALMASK) >= VT_CONST)
r2 = 0;
if (ll || REX_BASE(r) || REX_BASE(r2))
o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
o(b);
}
/* output a symbol and patch all calls to it */ /* output a symbol and patch all calls to it */
void gsym_addr(int t, int a) void gsym_addr(int t, int a)
{ {
@ -201,6 +212,7 @@ static int is_sse_float(int t) {
return bt == VT_DOUBLE || bt == VT_FLOAT; return bt == VT_DOUBLE || bt == VT_FLOAT;
} }
/* instruction + 4 bytes data. Return the address of the data */ /* instruction + 4 bytes data. Return the address of the data */
ST_FUNC int oad(int c, int s) ST_FUNC int oad(int c, int s)
{ {
@ -261,8 +273,7 @@ static void gen_gotpcrel(int r, Sym *sym, int c)
if (c) { if (c) {
/* we use add c, %xxx for displacement */ /* we use add c, %xxx for displacement */
o(0x48 + REX_BASE(r)); orex(1, r, 0, 0x81);
o(0x81);
o(0xc0 + REG_VALUE(r)); o(0xc0 + REG_VALUE(r));
gen_le32(c); gen_le32(c);
} }
@ -312,13 +323,7 @@ static void gen_modrm(int op_reg, int r, Sym *sym, int c)
static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c) static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
{ {
int is_got; int is_got;
int rex = 0x48 | (REX_BASE(op_reg) << 2); orex(1, r, op_reg, opcode);
if ((r & VT_VALMASK) != VT_CONST &&
(r & VT_VALMASK) != VT_LOCAL) {
rex |= REX_BASE(VT_VALMASK & r);
}
o(rex);
o(opcode);
is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC); is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
gen_modrm_impl(op_reg, r, sym, c, is_got); gen_modrm_impl(op_reg, r, sym, c, is_got);
} }
@ -358,6 +363,7 @@ void load(int r, SValue *sv)
v = fr & VT_VALMASK; v = fr & VT_VALMASK;
if (fr & VT_LVAL) { if (fr & VT_LVAL) {
int b, ll;
if (v == VT_LLOCAL) { if (v == VT_LLOCAL) {
v1.type.t = VT_PTR; v1.type.t = VT_PTR;
v1.r = VT_LOCAL | VT_LVAL; v1.r = VT_LOCAL | VT_LVAL;
@ -365,86 +371,72 @@ void load(int r, SValue *sv)
load(r, &v1); load(r, &v1);
fr = r; fr = r;
} }
ll = 0;
if ((ft & VT_BTYPE) == VT_FLOAT) { if ((ft & VT_BTYPE) == VT_FLOAT) {
o(0x6e0f66); /* movd */ b = 0x6e0f66, r = 0; /* movd */
r = 0;
} else if ((ft & VT_BTYPE) == VT_DOUBLE) { } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
o(0x7e0ff3); /* movq */ b = 0x7e0ff3, r = 0; /* movq */
r = 0;
} else if ((ft & VT_BTYPE) == VT_LDOUBLE) { } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
o(0xdb); /* fldt */ b = 0xdb, r = 5; /* fldt */
r = 5; } else if ((ft & VT_TYPE) == VT_BYTE) {
b = 0xbe0f; /* movsbl */
} else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
b = 0xb60f; /* movzbl */
} else if ((ft & VT_TYPE) == VT_SHORT) {
b = 0xbf0f; /* movswl */
} else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
b = 0xb70f; /* movzwl */
} else { } else {
int x; ll = is64_type(ft);
if ((ft & VT_TYPE) == VT_BYTE) { b = 0x8b;
x = 0xbe0f; /* movsbl */ }
} else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) { if (ll) {
x = 0xb60f; /* movzbl */ gen_modrm64(b, r, fr, sv->sym, fc);
} else if ((ft & VT_TYPE) == VT_SHORT) { } else {
x = 0xbf0f; /* movswl */ orex(ll, fr, r, b);
} else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) { gen_modrm(r, fr, sv->sym, fc);
x = 0xb70f; /* movzwl */
} else if (is64_type(ft)) {
gen_modrm64(0x8b, r, fr, sv->sym, fc);
return;
} else {
x = 0x8b; /* movl */
}
if (REX_BASE(r) || (!(fr & ~0x0f) && REX_BASE(fr)))
o(0x40 + REX_BASE(fr) + (REX_BASE(r) << 2));
o(x);
} }
gen_modrm(r, fr, sv->sym, fc);
} else { } else {
if (v == VT_CONST) { if (v == VT_CONST) {
if (fr & VT_SYM) { if (fr & VT_SYM) {
#ifdef TCC_TARGET_PE #ifdef TCC_TARGET_PE
o(0x8d48 + (REX_BASE(r) << 2)); orex(1,0,r,0x8d);
o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */ o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
gen_addrpc32(fr, sv->sym, fc); gen_addrpc32(fr, sv->sym, fc);
#else #else
if (sv->sym->type.t & VT_STATIC) { if (sv->sym->type.t & VT_STATIC) {
o(0x8d48 + REX_BASE(r)); orex(1,0,r,0x8d);
o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */ o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
gen_addrpc32(fr, sv->sym, fc); gen_addrpc32(fr, sv->sym, fc);
} else { } else {
o(0x8b48 + REX_BASE(r)); orex(1,0,r,0x8b);
o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */ o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
gen_gotpcrel(r, sv->sym, fc); gen_gotpcrel(fr, sv->sym, fc);
} }
#endif #endif
} else if (is64_type(ft)) { } else if (is64_type(ft)) {
o(0x48 + REX_BASE(r)); orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
gen_le64(sv->c.ull); gen_le64(sv->c.ull);
} else { } else {
if (REX_BASE(r)) orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
o(0x41);
o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
gen_le32(fc); gen_le32(fc);
} }
} else if (v == VT_LOCAL) { } else if (v == VT_LOCAL) {
o(0x48 | (REX_BASE(r) << 2)); orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
o(0x8d); /* lea xxx(%ebp), r */
gen_modrm(r, VT_LOCAL, sv->sym, fc); gen_modrm(r, VT_LOCAL, sv->sym, fc);
} else if (v == VT_CMP) { } else if (v == VT_CMP) {
if (REX_BASE(r)) orex(0,r,0,0);
o(0x41);
oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */ oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
if (REX_BASE(r)) orex(0,r,0, 0x0f); /* setxx %br */
o(0x41);
o(0x0f); /* setxx %br */
o(fc); o(fc);
o(0xc0 + REG_VALUE(r)); o(0xc0 + REG_VALUE(r));
} else if (v == VT_JMP || v == VT_JMPI) { } else if (v == VT_JMP || v == VT_JMPI) {
t = v & 1; t = v & 1;
if (REX_BASE(r)) orex(0,r,0,0);
o(0x41);
oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */ oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */ o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
gsym(fc); gsym(fc);
if (REX_BASE(r)) orex(0,r,0,0);
o(0x41);
oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */ oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
} else if (v != r) { } else if (v != r) {
if (r == TREG_XMM0) { if (r == TREG_XMM0) {
@ -462,8 +454,7 @@ void load(int r, SValue *sv)
o(0xf024); o(0xf024);
o(0xf02444dd); /* fldl -0x10(%rsp) */ o(0xf02444dd); /* fldl -0x10(%rsp) */
} else { } else {
o(0x48 | REX_BASE(r) | (REX_BASE(v) << 2)); orex(1,r,v, 0x89);
o(0x89);
o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */ o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
} }
} }
@ -519,11 +510,11 @@ void store(int r, SValue *v)
o(0x66); o(0x66);
o(pic); o(pic);
if (bt == VT_BYTE || bt == VT_BOOL) if (bt == VT_BYTE || bt == VT_BOOL)
o(0x88); orex(0, 0, r, 0x88);
else if (is64_type(bt)) else if (is64_type(bt))
op64 = 0x89; op64 = 0x89;
else else
o(0x89); orex(0, 0, r, 0x89);
} }
if (pic) { if (pic) {
/* xxx r, (%r11) where xxx is mov, movq, fld, or etc */ /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
@ -590,11 +581,12 @@ static int func_scratch;
void gen_offs_sp(int b, int r, int d) void gen_offs_sp(int b, int r, int d)
{ {
orex(1,0,r & 0x100 ? 0 : r, b);
if (d == (char)d) { if (d == (char)d) {
o(b | 0x4000 | (r << 11)); o(0x2444 | (REG_VALUE(r) << 3));
g(d); g(d);
} else { } else {
o(b | 0x8000 | (r << 11)); o(0x2484 | (REG_VALUE(r) << 3));
gen_le32(d); gen_le32(d);
} }
} }
@ -626,8 +618,7 @@ void gfunc_call(int nb_args)
size = (size + 15) & ~16; size = (size + 15) & ~16;
/* generate structure store */ /* generate structure store */
r = get_reg(RC_INT); r = get_reg(RC_INT);
o(0x48); gen_offs_sp(0x8d, r, args_size);
gen_offs_sp(0x24048d, r, args_size);
args_size += size; args_size += size;
/* generate memcpy call */ /* generate memcpy call */
@ -639,7 +630,7 @@ void gfunc_call(int nb_args)
} else if (bt == VT_LDOUBLE) { } else if (bt == VT_LDOUBLE) {
gv(RC_ST0); gv(RC_ST0);
gen_offs_sp(0x243cdb, 0, args_size); gen_offs_sp(0xdb, 0x107, args_size);
args_size += 16; args_size += 16;
} }
@ -660,9 +651,8 @@ void gfunc_call(int nb_args)
gv(RC_FLOAT); /* only one float register */ gv(RC_FLOAT); /* only one float register */
j = --gen_reg; j = --gen_reg;
if (j >= REGN) { if (j >= REGN) {
o(0x0f66),
/* movq %xmm0, j*8(%rsp) */ /* movq %xmm0, j*8(%rsp) */
gen_offs_sp(0x2444d6, 0, j*8); gen_offs_sp(0xd60f66, 0x100, j*8);
} else { } else {
/* movaps %xmm0, %xmmN */ /* movaps %xmm0, %xmmN */
o(0x280f); o(0x280f);
@ -670,15 +660,14 @@ void gfunc_call(int nb_args)
d = arg_regs[j]; d = arg_regs[j];
/* mov %xmm0, %rxx */ /* mov %xmm0, %rxx */
o(0x66); o(0x66);
o(0x7e0f48 + REX_BASE(d)); orex(1,d,0, 0x7e0f);
o(0xc0 + REG_VALUE(d)); o(0xc0 + REG_VALUE(d));
} }
} else { } else {
j = --gen_reg; j = --gen_reg;
if (j >= REGN) { if (j >= REGN) {
r = gv(RC_INT); r = gv(RC_INT);
o(0x48); gen_offs_sp(0x89, r, j*8);
gen_offs_sp(0x244489, r, j*8);
} else { } else {
d = arg_regs[j]; d = arg_regs[j];
if (d < NB_REGS) { if (d < NB_REGS) {
@ -686,8 +675,8 @@ void gfunc_call(int nb_args)
} else { } else {
r = gv(RC_INT); r = gv(RC_INT);
if (d != r) { if (d != r) {
o(0x8948 + REX_BASE(d)); orex(1,d,r, 0x89);
o(0xc0 + r*8 + REG_VALUE(d)); o(0xc0 + REG_VALUE(d) + REG_VALUE(r) * 8);
} }
} }
@ -695,7 +684,6 @@ void gfunc_call(int nb_args)
} }
vtop--; vtop--;
} }
save_regs(0); save_regs(0);
gcall_or_jmp(0); gcall_or_jmp(0);
vtop--; vtop--;
@ -845,6 +833,8 @@ void gfunc_call(int nb_args)
} }
} }
save_regs(0); /* save used temporary registers */
/* for struct arguments, we need to call memcpy and the function /* for struct arguments, we need to call memcpy and the function
call breaks register passing arguments we are preparing. call breaks register passing arguments we are preparing.
So, we process arguments which will be passed by stack first. */ So, we process arguments which will be passed by stack first. */
@ -866,9 +856,8 @@ void gfunc_call(int nb_args)
oad(0xec81, size); /* sub $xxx, %rsp */ oad(0xec81, size); /* sub $xxx, %rsp */
/* generate structure store */ /* generate structure store */
r = get_reg(RC_INT); r = get_reg(RC_INT);
o(0x48 + REX_BASE(r)); orex(1, r, 0, 0x89); /* mov %rsp, r */
o(0x89); /* mov %rsp, r */ o(0xe0 + REG_VALUE(r));
o(0xe0 + r);
{ {
/* following code breaks vtop[1] */ /* following code breaks vtop[1] */
SValue tmp = vtop[1]; SValue tmp = vtop[1];
@ -902,7 +891,7 @@ void gfunc_call(int nb_args)
/* XXX: implicit cast ? */ /* XXX: implicit cast ? */
if (j >= REGN) { if (j >= REGN) {
r = gv(RC_INT); r = gv(RC_INT);
o(0x50 + r); /* push r */ orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
args_size += 8; args_size += 8;
} }
} }
@ -910,6 +899,7 @@ void gfunc_call(int nb_args)
} }
vtop = orig_vtop; vtop = orig_vtop;
/* then, we prepare register passing arguments. /* then, we prepare register passing arguments.
Note that we cannot set RDX and RCX in this loop because gv() Note that we cannot set RDX and RCX in this loop because gv()
may break these temporary registers. Let's use R10 and R11 may break these temporary registers. Let's use R10 and R11
@ -950,8 +940,6 @@ void gfunc_call(int nb_args)
vtop--; vtop--;
} }
save_regs(0); /* save used temporary registers */
/* Copy R10 and R11 into RDX and RCX, respectively */ /* Copy R10 and R11 into RDX and RCX, respectively */
if (nb_reg_args > 2) { if (nb_reg_args > 2) {
o(0xd2894c); /* mov %r10, %rdx */ o(0xd2894c); /* mov %r10, %rdx */
@ -1194,41 +1182,37 @@ int gtst(int inv, int t)
void gen_opi(int op) void gen_opi(int op)
{ {
int r, fr, opc, c; int r, fr, opc, c;
int ll, uu, cc;
ll = is64_type(vtop[-1].type.t);
uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
switch(op) { switch(op) {
case '+': case '+':
case TOK_ADDC1: /* add with carry generation */ case TOK_ADDC1: /* add with carry generation */
opc = 0; opc = 0;
gen_op8: gen_op8:
if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST && if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
!is64_type(vtop->type.t)) {
/* constant case */ /* constant case */
vswap(); vswap();
r = gv(RC_INT); r = gv(RC_INT);
if (is64_type(vtop->type.t)) {
o(0x48 | REX_BASE(r));
}
vswap(); vswap();
c = vtop->c.i; c = vtop->c.i;
if (c == (char)c) { if (c == (char)c) {
/* XXX: generate inc and dec for smaller code ? */ /* XXX: generate inc and dec for smaller code ? */
o(0x83); orex(ll, r, 0, 0x83);
o(0xc0 | (opc << 3) | REG_VALUE(r)); o(0xc0 | (opc << 3) | REG_VALUE(r));
g(c); g(c);
} else { } else {
o(0x81); orex(ll, r, 0, 0x81);
oad(0xc0 | (opc << 3) | REG_VALUE(r), c); oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
} }
} else { } else {
gv2(RC_INT, RC_INT); gv2(RC_INT, RC_INT);
r = vtop[-1].r; r = vtop[-1].r;
fr = vtop[0].r; fr = vtop[0].r;
if (opc != 7 || orex(ll, r, fr, (opc << 3) | 0x01);
is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
o(0x48 | REX_BASE(r) | (REX_BASE(fr) << 2));
}
o((opc << 3) | 0x01);
o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8); o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
} }
vtop--; vtop--;
@ -1260,13 +1244,9 @@ void gen_opi(int op)
gv2(RC_INT, RC_INT); gv2(RC_INT, RC_INT);
r = vtop[-1].r; r = vtop[-1].r;
fr = vtop[0].r; fr = vtop[0].r;
if (is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) || orex(ll, fr, r, 0xaf0f); /* imul fr, r */
is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) { o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
o(0x48 | REX_BASE(fr) | (REX_BASE(r) << 2));
}
vtop--; vtop--;
o(0xaf0f); /* imul fr, r */
o(0xc0 + fr + r * 8);
break; break;
case TOK_SHL: case TOK_SHL:
opc = 4; opc = 4;
@ -1278,39 +1258,32 @@ void gen_opi(int op)
opc = 7; opc = 7;
gen_shift: gen_shift:
opc = 0xc0 | (opc << 3); opc = 0xc0 | (opc << 3);
if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) { if (cc) {
/* constant case */ /* constant case */
vswap(); vswap();
r = gv(RC_INT); r = gv(RC_INT);
if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
o(0x48 | REX_BASE(r));
c = 0x3f;
} else {
c = 0x1f;
}
vswap(); vswap();
c &= vtop->c.i; orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
o(0xc1); /* shl/shr/sar $xxx, r */ o(opc | REG_VALUE(r));
o(opc | r); g(vtop->c.i & (ll ? 63 : 31));
g(c);
} else { } else {
/* we generate the shift in ecx */ /* we generate the shift in ecx */
gv2(RC_INT, RC_RCX); gv2(RC_INT, RC_RCX);
r = vtop[-1].r; r = vtop[-1].r;
if ((vtop[-1].type.t & VT_BTYPE) == VT_LLONG) { orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
o(0x48 | REX_BASE(r)); o(opc | REG_VALUE(r));
}
o(0xd3); /* shl/shr/sar %cl, r */
o(opc | r);
} }
vtop--; vtop--;
break; break;
case '/':
case TOK_UDIV: case TOK_UDIV:
case TOK_PDIV:
case '%':
case TOK_UMOD: case TOK_UMOD:
case TOK_UMULL: uu = 1;
goto divmod;
case '/':
case '%':
case TOK_PDIV:
uu = 0;
divmod:
/* first operand must be in eax */ /* first operand must be in eax */
/* XXX: need better constraint for second operand */ /* XXX: need better constraint for second operand */
gv2(RC_RAX, RC_RCX); gv2(RC_RAX, RC_RCX);
@ -1318,36 +1291,13 @@ void gen_opi(int op)
fr = vtop[0].r; fr = vtop[0].r;
vtop--; vtop--;
save_reg(TREG_RDX); save_reg(TREG_RDX);
if (op == TOK_UMULL) { orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
o(0xf7); /* mul fr */ orex(ll, fr, 0, 0xf7); /* div fr, %eax */
o(0xe0 + fr); o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
vtop->r2 = TREG_RDX; if (op == '%' || op == TOK_UMOD)
r = TREG_RDX;
else
r = TREG_RAX; r = TREG_RAX;
} else {
if (op == TOK_UDIV || op == TOK_UMOD) {
if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
o(0xd23148); /* xor %rdx, %rdx */
o(0x48 + REX_BASE(fr));
} else {
o(0xd231); /* xor %edx, %edx */
}
o(0xf7); /* div fr, %eax */
o(0xf0 + fr);
} else {
if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
o(0x9948); /* cqto */
o(0x48 + REX_BASE(fr));
} else {
o(0x99); /* cltd */
}
o(0xf7); /* idiv fr, %eax */
o(0xf8 + fr);
}
if (op == '%' || op == TOK_UMOD)
r = TREG_RDX;
else
r = TREG_RAX;
}
vtop->r = r; vtop->r = r;
break; break;
default: default:
@ -1687,10 +1637,7 @@ void gen_cvt_ftoi(int t)
} else { } else {
assert(0); assert(0);
} }
if (size == 8) { orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
o(0x48 + REX_BASE(r));
}
o(0x2c0f); /* cvttss2si or cvttsd2si */
o(0xc0 + (REG_VALUE(r) << 3)); o(0xc0 + (REG_VALUE(r) << 3));
vtop->r = r; vtop->r = r;
} }