Reduce the generation of machine code for x86_64, Less of size

master
jiang 2014-04-29 23:57:22 +08:00
parent 2a8905c93b
commit 515169f21b
3 changed files with 244 additions and 251 deletions

8
tcc.h
View File

@ -738,19 +738,21 @@ struct TCCState {
#define VT_CMP 0x0033 /* the value is stored in processor flags (in vc) */ #define VT_CMP 0x0033 /* the value is stored in processor flags (in vc) */
#define VT_JMP 0x0034 /* value is the consequence of jmp true (even) */ #define VT_JMP 0x0034 /* value is the consequence of jmp true (even) */
#define VT_JMPI 0x0035 /* value is the consequence of jmp false (odd) */ #define VT_JMPI 0x0035 /* value is the consequence of jmp false (odd) */
#define VT_REF 0x0040 /* value is pointer to structure rather than address */ #define TREG_MEM 0x0040 /* x86_64-gen.c add for tcc.h: The current value can be */
#define VT_REF 0x0080 /* value is pointer to structure rather than address */
#define VT_LVAL 0x0100 /* var is an lvalue */ #define VT_LVAL 0x0100 /* var is an lvalue */
#define VT_SYM 0x0200 /* a symbol value is added */ #define VT_SYM 0x0200 /* a symbol value is added */
#define VT_MUSTCAST 0x0400 /* value must be casted to be correct (used for #define VT_MUSTCAST 0x0400 /* value must be casted to be correct (used for
char/short stored in integer registers) */ char/short stored in integer registers) */
#define VT_MUSTBOUND 0x0800 /* bound checking must be done before #define VT_MUSTBOUND 0x0800 /* bound checking must be done before
dereferencing value */ dereferencing value */
#define VT_BOUNDED 0x8000 /* value is bounded. The address of the
bounding function call point is in vc */
#define VT_LVAL_BYTE 0x1000 /* lvalue is a byte */ #define VT_LVAL_BYTE 0x1000 /* lvalue is a byte */
#define VT_LVAL_SHORT 0x2000 /* lvalue is a short */ #define VT_LVAL_SHORT 0x2000 /* lvalue is a short */
#define VT_LVAL_UNSIGNED 0x4000 /* lvalue is unsigned */ #define VT_LVAL_UNSIGNED 0x4000 /* lvalue is unsigned */
#define VT_LVAL_TYPE (VT_LVAL_BYTE | VT_LVAL_SHORT | VT_LVAL_UNSIGNED) #define VT_LVAL_TYPE (VT_LVAL_BYTE | VT_LVAL_SHORT | VT_LVAL_UNSIGNED)
#define VT_BOUNDED 0x8000 /* value is bounded. The address of the
bounding function call point is in vc */
#define VT_TMP 0x10000
/* types */ /* types */
#define VT_BTYPE 0x000f /* mask for basic type */ #define VT_BTYPE 0x000f /* mask for basic type */

View File

@ -909,8 +909,9 @@ ST_FUNC int gv(int rc)
/* one register type load */ /* one register type load */
load(r, vtop); load(r, vtop);
} }
vtop->r = r;
vtop->c.ptr_offset = 0;
} }
vtop->r = r;
#ifdef TCC_TARGET_C67 #ifdef TCC_TARGET_C67
/* uses register pairs for doubles */ /* uses register pairs for doubles */
if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)

View File

@ -29,28 +29,38 @@
/* a register can belong to several classes. The classes must be /* a register can belong to several classes. The classes must be
sorted from more general to more precise (see gv2() code which does sorted from more general to more precise (see gv2() code which does
assumptions on it). */ assumptions on it). */
#define RC_INT 0x0001 /* generic integer register */ #define RC_INT 0x0001 /* generic integer register */
#define RC_FLOAT 0x0002 /* generic float register */ #define RC_FLOAT 0x0002 /* generic float register */
#define RC_RAX 0x0004 #define RC_RAX 0x0004
#define RC_RCX 0x0008 #define RC_RCX 0x0008
#define RC_RDX 0x0010 #define RC_RDX 0x0010
#define RC_ST0 0x0080 /* only for long double */ #define RC_ST0 0x0020 /* only for long double */
#define RC_R8 0x0100 #define RC_R8 0x0040
#define RC_R9 0x0200 #define RC_R9 0x0080
#define RC_R10 0x0400 #define RC_XMM0 0x0100
#define RC_R11 0x0800 #define RC_XMM1 0x0200
#define RC_XMM0 0x1000 #define RC_XMM2 0x0400
#define RC_XMM1 0x2000 #define RC_XMM3 0x0800
#define RC_XMM2 0x4000 #define RC_XMM4 0x1000
#define RC_XMM3 0x8000 #define RC_XMM5 0x2000
#define RC_XMM4 0x10000 #define RC_XMM6 0x4000
#define RC_XMM5 0x20000 #define RC_XMM7 0x8000
#define RC_XMM6 0x40000 #define RC_RSI 0x10000
#define RC_XMM7 0x80000 #define RC_RDI 0x20000
#define RC_IRET RC_RAX /* function return: integer register */ #define RC_INT1 0x40000 /* function_pointer */
#define RC_LRET RC_RDX /* function return: second integer register */ #define RC_INT2 0x80000
#define RC_FRET RC_XMM0 /* function return: float register */ #define RC_RBX 0x100000
#define RC_QRET RC_XMM1 /* function return: second float register */ #define RC_R10 0x200000
#define RC_R11 0x400000
#define RC_R12 0x800000
#define RC_R13 0x1000000
#define RC_R14 0x2000000
#define RC_R15 0x4000000
#define RC_IRET RC_RAX /* function return: integer register */
#define RC_LRET RC_RDX /* function return: second integer register */
#define RC_FRET RC_XMM0 /* function return: float register */
#define RC_QRET RC_XMM1 /* function return: second float register */
#define RC_MASK (RC_INT|RC_INT1|RC_INT2|RC_FLOAT)
/* pretty names for the registers */ /* pretty names for the registers */
enum { enum {
@ -58,6 +68,7 @@ enum {
TREG_RCX = 1, TREG_RCX = 1,
TREG_RDX = 2, TREG_RDX = 2,
TREG_RSP = 4, TREG_RSP = 4,
TREG_ST0 = 5,
TREG_RSI = 6, TREG_RSI = 6,
TREG_RDI = 7, TREG_RDI = 7,
@ -75,13 +86,11 @@ enum {
TREG_XMM6 = 22, TREG_XMM6 = 22,
TREG_XMM7 = 23, TREG_XMM7 = 23,
TREG_ST0 = 24,
TREG_MEM = 0x20,
}; };
#define REX_BASE(reg) (((reg) >> 3) & 1) #define REX_BASE(reg) (((reg) >> 3) & 1)
#define REG_VALUE(reg) ((reg) & 7) #define REG_VALUE(reg) ((reg) & 7)
#define FLAG_GOT 0X01
/* return registers for function */ /* return registers for function */
#define REG_IRET TREG_RAX /* single word int return register */ #define REG_IRET TREG_RAX /* single word int return register */
@ -122,34 +131,30 @@ enum {
#include <assert.h> #include <assert.h>
ST_DATA const int reg_classes[NB_REGS] = { ST_DATA const int reg_classes[NB_REGS] = {
/* eax */ RC_INT | RC_RAX, /* eax */ RC_INT|RC_RAX|RC_INT2,
/* ecx */ RC_INT | RC_RCX, /* ecx */ RC_INT|RC_RCX|RC_INT2,
/* edx */ RC_INT | RC_RDX, /* edx */ RC_INT|RC_RDX,
RC_INT|RC_INT1|RC_INT2|RC_RBX,
0, 0,
0, /* st0 */ RC_ST0,
0, RC_RSI|RC_INT2,
0, RC_RDI|RC_INT2,
0, RC_INT|RC_R8|RC_INT2,
RC_R8, RC_INT|RC_R9|RC_INT2,
RC_R9, RC_INT|RC_INT1|RC_INT2|RC_R10,
RC_R10, RC_INT|RC_INT1|RC_INT2|RC_R11,
RC_R11, RC_INT|RC_INT1|RC_INT2|RC_R12,
0, RC_INT|RC_INT1|RC_INT2|RC_R13,
0, RC_INT|RC_INT1|RC_INT2|RC_R14,
0, RC_INT|RC_INT1|RC_INT2|RC_R15,
0, /* xmm0 */ RC_FLOAT | RC_XMM0,
/* xmm0 */ RC_FLOAT | RC_XMM0, RC_FLOAT|RC_XMM1,
/* xmm1 */ RC_FLOAT | RC_XMM1, RC_FLOAT|RC_XMM2,
/* xmm2 */ RC_FLOAT | RC_XMM2, RC_FLOAT|RC_XMM3,
/* xmm3 */ RC_FLOAT | RC_XMM3, RC_FLOAT|RC_XMM4,
/* xmm4 */ RC_FLOAT | RC_XMM4, RC_FLOAT|RC_XMM5,
/* xmm5 */ RC_FLOAT | RC_XMM5, RC_FLOAT|RC_XMM6,
/* xmm6 an xmm7 are included so gv() can be used on them, RC_FLOAT|RC_XMM7,
but they are not tagged with RC_FLOAT because they are
callee saved on Windows */
RC_XMM6,
RC_XMM7,
/* st0 */ RC_ST0
}; };
static unsigned long func_sub_sp_offset; static unsigned long func_sub_sp_offset;
@ -324,7 +329,7 @@ static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
} else { } else {
oad(0x85 | op_reg, c); oad(0x85 | op_reg, c);
} }
} else if ((r & VT_VALMASK) >= TREG_MEM) { } else if (r & TREG_MEM) {
if (c) { if (c) {
g(0x80 | op_reg | REG_VALUE(r)); g(0x80 | op_reg | REG_VALUE(r));
gen_le32(c); gen_le32(c);
@ -1609,39 +1614,42 @@ int gtst(int inv, int t)
/* generate an integer binary operation */ /* generate an integer binary operation */
void gen_opi(int op) void gen_opi(int op)
{ {
int r, fr, opc, c; int r, fr, opc, fc, c, ll, uu, cc, tt2;
int ll, uu, cc;
fr = vtop[0].r;
fc = vtop->c.ul;
ll = is64_type(vtop[-1].type.t); ll = is64_type(vtop[-1].type.t);
uu = (vtop[-1].type.t & VT_UNSIGNED) != 0; cc = (fr & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST; tt2 = (fr & (VT_LVAL | VT_LVAL_TYPE)) == VT_LVAL;
switch(op) { switch(op) {
case '+': case '+':
case TOK_ADDC1: /* add with carry generation */ case TOK_ADDC1: /* add with carry generation */
opc = 0; opc = 0;
gen_op8: gen_op8:
vswap();
r = gv(RC_INT);
vswap();
if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) { if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
/* constant case */ /* constant case */
vswap();
r = gv(RC_INT);
vswap();
c = vtop->c.i; c = vtop->c.i;
if (c == (char)c) { if (c == (char)c) {
/* XXX: generate inc and dec for smaller code ? */ /* XXX: generate inc and dec for smaller code ? */
orex(ll, r, 0, 0x83); orex(ll, r, 0, 0x83);
o(0xc0 | (opc << 3) | REG_VALUE(r)); o(0xc0 + REG_VALUE(r) + opc*8);
g(c); g(c);
} else { } else {
orex(ll, r, 0, 0x81); orex(ll, r, 0, 0x81);
oad(0xc0 | (opc << 3) | REG_VALUE(r), c); oad(0xc0 + REG_VALUE(r) + opc*8, c);
} }
} else { } else {
gv2(RC_INT, RC_INT); if(!tt2)
r = vtop[-1].r; fr = gv(RC_INT);
fr = vtop[0].r; orex(ll, fr, r, 0x03 + opc*8);
orex(ll, r, fr, (opc << 3) | 0x01); if(fr >= VT_CONST)
o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8); gen_modrm(r, fr, vtop->sym, fc);
else
o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8);
} }
vtop--; vtop--;
if (op >= TOK_ULT && op <= TOK_GT) { if (op >= TOK_ULT && op <= TOK_GT) {
@ -1669,11 +1677,27 @@ void gen_opi(int op)
opc = 1; opc = 1;
goto gen_op8; goto gen_op8;
case '*': case '*':
gv2(RC_INT, RC_INT); opc = 5;
r = vtop[-1].r; vswap();
fr = vtop[0].r; r = gv(RC_INT);
orex(ll, fr, r, 0xaf0f); /* imul fr, r */ vswap();
o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8); if(!tt2)
fr = gv(RC_INT);
if(r == TREG_RAX){
if(fr != TREG_RDX)
save_reg(TREG_RDX);
orex(ll, fr, r, 0xf7);
if(fr >= VT_CONST)
gen_modrm(opc, fr, vtop->sym, fc);
else
o(0xc0 + REG_VALUE(fr) + opc*8);
}else{
orex(ll, fr, r, 0xaf0f); /* imul fr, r */
if(fr >= VT_CONST)
gen_modrm(r, fr, vtop->sym, fc);
else
o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8);
}
vtop--; vtop--;
break; break;
case TOK_SHL: case TOK_SHL:
@ -1685,47 +1709,62 @@ void gen_opi(int op)
case TOK_SAR: case TOK_SAR:
opc = 7; opc = 7;
gen_shift: gen_shift:
opc = 0xc0 | (opc << 3);
if (cc) { if (cc) {
/* constant case */ /* constant case */
vswap(); vswap();
r = gv(RC_INT); r = gv(RC_INT);
vswap(); vswap();
orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */ c = vtop->c.i;
o(opc | REG_VALUE(r)); if(c == 1){
g(vtop->c.i & (ll ? 63 : 31)); orex(ll, r, 0, 0xd1);
o(0xc0 + REG_VALUE(r) + opc*8);
}else{
orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
o(0xc0 + REG_VALUE(r) + opc*8);
g(c & (ll ? 0x3f : 0x1f));
}
} else { } else {
/* we generate the shift in ecx */ /* we generate the shift in ecx */
gv2(RC_INT, RC_RCX); gv2(RC_INT, RC_RCX);
r = vtop[-1].r; r = vtop[-1].r;
orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */ orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
o(opc | REG_VALUE(r)); o(0xc0 + REG_VALUE(r) + opc*8);
} }
vtop--; vtop--;
break; break;
case TOK_UDIV: case TOK_UDIV:
case TOK_UMOD: case TOK_UMOD:
opc = 6;
uu = 1; uu = 1;
goto divmod; goto divmod;
case '/': case '/':
case '%': case '%':
case TOK_PDIV: case TOK_PDIV:
opc = 7;
uu = 0; uu = 0;
divmod: divmod:
/* first operand must be in eax */ /* first operand must be in eax */
/* XXX: need better constraint for second operand */ /* XXX: need better constraint for second operand */
gv2(RC_RAX, RC_RCX); if(!tt2){
r = vtop[-1].r; gv2(RC_RAX, RC_INT2);
fr = vtop[0].r; fr = vtop[0].r;
vtop--; }else{
save_reg(TREG_RDX); vswap();
orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */ gv(RC_RAX);
orex(ll, fr, 0, 0xf7); /* div fr, %eax */ vswap();
o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr)); }
save_reg(TREG_RDX);
orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cdq RDX:RAX <- sign-extend of RAX. */
orex(ll, fr, 0, 0xf7); /* div fr, %eax */
if(fr >= VT_CONST)
gen_modrm(opc, fr, vtop->sym, fc);
else
o(0xc0 + REG_VALUE(fr) + opc*8);
if (op == '%' || op == TOK_UMOD) if (op == '%' || op == TOK_UMOD)
r = TREG_RDX; r = TREG_RDX;
else else
r = TREG_RAX; r = TREG_RAX;
vtop--;
vtop->r = r; vtop->r = r;
break; break;
default: default:
@ -1744,9 +1783,8 @@ void gen_opl(int op)
/* XXX: need to use ST1 too */ /* XXX: need to use ST1 too */
void gen_opf(int op) void gen_opf(int op)
{ {
int a, ft, fc, swapped, r; int a, ft, fc, swapped, fr, r;
int float_type = int float_type = (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
(vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
/* convert constants to memory references */ /* convert constants to memory references */
if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) { if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
@ -1757,21 +1795,23 @@ void gen_opf(int op)
if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
gv(float_type); gv(float_type);
/* must put at least one value in the floating point register */ swapped = 0;
if ((vtop[-1].r & VT_LVAL) && fc = vtop->c.ul;
(vtop[0].r & VT_LVAL)) { ft = vtop->type.t;
vswap();
gv(float_type); if ((ft & VT_BTYPE) == VT_LDOUBLE) {
vswap(); /* swap the stack if needed so that t1 is the register and t2 is
} the memory reference */
swapped = 0; /* must put at least one value in the floating point register */
/* swap the stack if needed so that t1 is the register and t2 is if ((vtop[-1].r & VT_LVAL) && (vtop[0].r & VT_LVAL)) {
the memory reference */ vswap();
if (vtop[-1].r & VT_LVAL) { gv(float_type);
vswap(); vswap();
swapped = 1; }
} if (vtop[-1].r & VT_LVAL) {
if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { vswap();
swapped = 1;
}
if (op >= TOK_ULT && op <= TOK_GT) { if (op >= TOK_ULT && op <= TOK_GT) {
/* load on stack second operand */ /* load on stack second operand */
load(TREG_ST0, vtop); load(TREG_ST0, vtop);
@ -1782,10 +1822,10 @@ void gen_opf(int op)
swapped = 0; swapped = 0;
if (swapped) if (swapped)
o(0xc9d9); /* fxch %st(1) */ o(0xc9d9); /* fxch %st(1) */
if (op == TOK_EQ || op == TOK_NE) if (op == TOK_EQ || op == TOK_NE)
o(0xe9da); /* fucompp */ o(0xe9da); /* fucompp */
else else
o(0xd9de); /* fcompp */ o(0xd9de); /* fcompp */
o(0xe0df); /* fnstsw %ax */ o(0xe0df); /* fnstsw %ax */
if (op == TOK_EQ) { if (op == TOK_EQ) {
o(0x45e480); /* and $0x45, %ah */ o(0x45e480); /* and $0x45, %ah */
@ -1808,7 +1848,6 @@ void gen_opf(int op)
/* no memory reference possible for long double operations */ /* no memory reference possible for long double operations */
load(TREG_ST0, vtop); load(TREG_ST0, vtop);
swapped = !swapped; swapped = !swapped;
switch(op) { switch(op) {
default: default:
case '+': case '+':
@ -1828,63 +1867,45 @@ void gen_opf(int op)
a++; a++;
break; break;
} }
ft = vtop->type.t;
fc = vtop->c.ul;
o(0xde); /* fxxxp %st, %st(1) */ o(0xde); /* fxxxp %st, %st(1) */
o(0xc1 + (a << 3)); o(0xc1 + (a << 3));
vtop--; vtop--;
} }
} else { } else {
vswap();
gv(float_type);
vswap();
fr = vtop->r;
r = vtop[-1].r;
if (op >= TOK_ULT && op <= TOK_GT) { if (op >= TOK_ULT && op <= TOK_GT) {
/* if saved lvalue, then we must reload it */ switch(op){
r = vtop->r; case TOK_LE:
fc = vtop->c.ul; op = TOK_ULE; /* setae */
if ((r & VT_VALMASK) == VT_LLOCAL) { break;
SValue v1; case TOK_LT:
r = get_reg(RC_INT); op = TOK_ULT;
v1.type.t = VT_PTR; break;
v1.r = VT_LOCAL | VT_LVAL; case TOK_GE:
v1.c.ul = fc; op = TOK_UGE;
load(r, &v1); break;
fc = 0; case TOK_GT:
} op = TOK_UGT; /* seta */
break;
if (op == TOK_EQ || op == TOK_NE) { }
swapped = 0; assert(!(vtop[-1].r & VT_LVAL));
} else { if ((ft & VT_BTYPE) == VT_DOUBLE)
if (op == TOK_LE || op == TOK_LT) o(0x66);
swapped = !swapped; o(0x2e0f); /* ucomisd */
if (op == TOK_LE || op == TOK_GE) { if(fr >= VT_CONST)
op = 0x93; /* setae */ gen_modrm(r, fr, vtop->sym, fc);
} else { else
op = 0x97; /* seta */ o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8);
}
}
if (swapped) {
gv(RC_FLOAT);
vswap();
}
assert(!(vtop[-1].r & VT_LVAL));
if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
o(0x66);
if (op == TOK_EQ || op == TOK_NE)
o(0x2e0f); /* ucomisd */
else
o(0x2f0f); /* comisd */
if (vtop->r & VT_LVAL) {
gen_modrm(vtop[-1].r, r, vtop->sym, fc);
} else {
o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
}
vtop--; vtop--;
vtop->r = VT_CMP; vtop->r = VT_CMP;
vtop->c.i = op | 0x100; vtop->c.i = op | 0x100;
} else { } else {
assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE); assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
/* no memory reference possible for long double operations */
switch(op) { switch(op) {
default: default:
case '+': case '+':
@ -1900,44 +1921,20 @@ void gen_opf(int op)
a = 6; a = 6;
break; break;
} }
ft = vtop->type.t; assert((ft & VT_BTYPE) != VT_LDOUBLE);
fc = vtop->c.ul; assert(!(vtop[-1].r & VT_LVAL));
assert((ft & VT_BTYPE) != VT_LDOUBLE); if ((ft & VT_BTYPE) == VT_DOUBLE) {
o(0xf2);
r = vtop->r; } else {
/* if saved lvalue, then we must reload it */ o(0xf3);
if ((vtop->r & VT_VALMASK) == VT_LLOCAL) { }
SValue v1; o(0x0f);
r = get_reg(RC_INT); o(0x58 + a);
v1.type.t = VT_PTR; if(fr >= VT_CONST)
v1.r = VT_LOCAL | VT_LVAL; gen_modrm(r, fr, vtop->sym, fc);
v1.c.ul = fc; else
load(r, &v1); o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8);
fc = 0; vtop--;
}
assert(!(vtop[-1].r & VT_LVAL));
if (swapped) {
assert(vtop->r & VT_LVAL);
gv(RC_FLOAT);
vswap();
}
if ((ft & VT_BTYPE) == VT_DOUBLE) {
o(0xf2);
} else {
o(0xf3);
}
o(0x0f);
o(0x58 + a);
if (vtop->r & VT_LVAL) {
gen_modrm(vtop[-1].r, r, vtop->sym, fc);
} else {
o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
}
vtop--;
} }
} }
} }
@ -1946,103 +1943,96 @@ void gen_opf(int op)
and 'long long' cases. */ and 'long long' cases. */
void gen_cvt_itof(int t) void gen_cvt_itof(int t)
{ {
if ((t & VT_BTYPE) == VT_LDOUBLE) { int ft, bt, tbt, r;
ft = vtop->type.t;
bt = ft & VT_BTYPE;
tbt = t & VT_BTYPE;
r = gv(RC_INT);
if (tbt == VT_LDOUBLE) {
save_reg(TREG_ST0); save_reg(TREG_ST0);
gv(RC_INT); if ((ft & VT_BTYPE) == VT_LLONG) {
if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
/* signed long long to float/double/long double (unsigned case /* signed long long to float/double/long double (unsigned case
is handled generically) */ is handled generically) */
o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ o(0x50 + REG_VALUE(r)); /* push r */
o(0x242cdf); /* fildll (%rsp) */ o(0x242cdf); /* fildll (%rsp) */
o(0x08c48348); /* add $8, %rsp */ o(0x08c48348); /* add $8, %rsp */
} else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) == } else if ((ft & (VT_BTYPE | VT_UNSIGNED)) == (VT_INT | VT_UNSIGNED)) {
(VT_INT | VT_UNSIGNED)) {
/* unsigned int to float/double/long double */ /* unsigned int to float/double/long double */
o(0x6a); /* push $0 */ o(0x6a); /* push $0 */
g(0x00); g(0x00);
o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ o(0x50 + REG_VALUE(r)); /* push r */
o(0x242cdf); /* fildll (%rsp) */ o(0x242cdf); /* fildll (%rsp) */
o(0x10c48348); /* add $16, %rsp */ o(0x10c48348); /* add $16, %rsp */
} else { } else {
/* int to float/double/long double */ /* int to float/double/long double */
o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ o(0x50 + REG_VALUE(r)); /* push r */
o(0x2404db); /* fildl (%rsp) */ o(0x2404db); /* fildl (%rsp) */
o(0x08c48348); /* add $8, %rsp */ o(0x08c48348); /* add $8, %rsp */
} }
vtop->r = TREG_ST0; vtop->r = TREG_ST0;
} else { } else {
int r = get_reg(RC_FLOAT); int r_xmm;
gv(RC_INT); r_xmm = get_reg(RC_FLOAT);
o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0)); o(0xf2 + (tbt == VT_FLOAT));
if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) == if ((ft & (VT_BTYPE | VT_UNSIGNED)) == (VT_INT | VT_UNSIGNED) || bt == VT_LLONG) {
(VT_INT | VT_UNSIGNED) ||
(vtop->type.t & VT_BTYPE) == VT_LLONG) {
o(0x48); /* REX */ o(0x48); /* REX */
} }
o(0x2a0f); o(0x2a0f);
o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */ o(0xc0 + REG_VALUE(r) + REG_VALUE(r_xmm)*8); /* cvtsi2sd or cvtsi2ss */
vtop->r = r; vtop->r = r_xmm;
} }
} }
/* convert from one floating point type to another */ /* convert from one floating point type to another */
void gen_cvt_ftof(int t) void gen_cvt_ftof(int t)
{ {
int ft, bt, tbt; int ft, bt, tbt, r;
ft = vtop->type.t; ft = vtop->type.t;
bt = ft & VT_BTYPE; bt = ft & VT_BTYPE;
tbt = t & VT_BTYPE; tbt = t & VT_BTYPE;
if(bt == VT_LDOUBLE)
r = get_reg(RC_FLOAT);
else
r = gv(RC_FLOAT);
if (bt == VT_FLOAT) { if (bt == VT_FLOAT) {
gv(RC_FLOAT);
if (tbt == VT_DOUBLE) { if (tbt == VT_DOUBLE) {
o(0x140f); /* unpcklps */
o(0xc0 + REG_VALUE(vtop->r)*9);
o(0x5a0f); /* cvtps2pd */ o(0x5a0f); /* cvtps2pd */
o(0xc0 + REG_VALUE(vtop->r)*9); o(0xc0 + REG_VALUE(r) + REG_VALUE(r) * 8);
} else if (tbt == VT_LDOUBLE) { } else if (tbt == VT_LDOUBLE) {
save_reg(RC_ST0); /* movss %xmm0-7,-0x10(%rsp) */
/* movss %xmm0,-0x10(%rsp) */
o(0x110ff3); o(0x110ff3);
o(0x44 + REG_VALUE(vtop->r)*8); o(0xf02444 + REG_VALUE(r)*8);
o(0xf024);
o(0xf02444d9); /* flds -0x10(%rsp) */ o(0xf02444d9); /* flds -0x10(%rsp) */
vtop->r = TREG_ST0; vtop->r = TREG_ST0;
} }
} else if (bt == VT_DOUBLE) { } else if (bt == VT_DOUBLE) {
gv(RC_FLOAT);
if (tbt == VT_FLOAT) { if (tbt == VT_FLOAT) {
o(0x140f66); /* unpcklpd */
o(0xc0 + REG_VALUE(vtop->r)*9);
o(0x5a0f66); /* cvtpd2ps */ o(0x5a0f66); /* cvtpd2ps */
o(0xc0 + REG_VALUE(vtop->r)*9); o(0xc0 + REG_VALUE(r) + REG_VALUE(r) * 8);
} else if (tbt == VT_LDOUBLE) { } else if (tbt == VT_LDOUBLE) {
save_reg(RC_ST0); /* movsd %xmm0-7,-0x10(%rsp) */
/* movsd %xmm0,-0x10(%rsp) */
o(0x110ff2); o(0x110ff2);
o(0x44 + REG_VALUE(vtop->r)*8); o(0xf02444 + REG_VALUE(r)*8);
o(0xf024);
o(0xf02444dd); /* fldl -0x10(%rsp) */ o(0xf02444dd); /* fldl -0x10(%rsp) */
vtop->r = TREG_ST0; vtop->r = TREG_ST0;
} }
} else { } else {
int r;
gv(RC_ST0); gv(RC_ST0);
r = get_reg(RC_FLOAT);
if (tbt == VT_DOUBLE) { if (tbt == VT_DOUBLE) {
o(0xf0245cdd); /* fstpl -0x10(%rsp) */ o(0xf0245cdd); /* fstpl -0x10(%rsp) */
/* movsd -0x10(%rsp),%xmm0 */ /* movsd -0x10(%rsp),%xmm0-7 */
o(0x100ff2); o(0x100ff2);
o(0x44 + REG_VALUE(r)*8); o(0xf02444 + REG_VALUE(r)*8);
o(0xf024);
vtop->r = r; vtop->r = r;
} else if (tbt == VT_FLOAT) { } else if (tbt == VT_FLOAT) {
o(0xf0245cd9); /* fstps -0x10(%rsp) */ o(0xf0245cd9); /* fstps -0x10(%rsp) */
/* movss -0x10(%rsp),%xmm0 */ /* movss -0x10(%rsp),%xmm0-7 */
o(0x100ff3); o(0x100ff3);
o(0x44 + REG_VALUE(r)*8); o(0xf02444 + REG_VALUE(r)*8);
o(0xf024);
vtop->r = r; vtop->r = r;
} }
} }
@ -2051,20 +2041,20 @@ void gen_cvt_ftof(int t)
/* convert fp to int 't' type */ /* convert fp to int 't' type */
void gen_cvt_ftoi(int t) void gen_cvt_ftoi(int t)
{ {
int ft, bt, size, r; int ft, bt, ll, r, r_xmm;
ft = vtop->type.t; ft = vtop->type.t;
bt = ft & VT_BTYPE; bt = ft & VT_BTYPE;
if (bt == VT_LDOUBLE) { if (bt == VT_LDOUBLE) {
gen_cvt_ftof(VT_DOUBLE); gen_cvt_ftof(VT_DOUBLE);
bt = VT_DOUBLE; bt = VT_DOUBLE;
} }
r_xmm = gv(RC_FLOAT);
gv(RC_FLOAT); if ((t & VT_BTYPE) == VT_INT)
if (t != VT_INT) ll = 0;
size = 8;
else else
size = 4; ll = 1;
r = get_reg(RC_INT); r = get_reg(RC_INT);
if (bt == VT_FLOAT) { if (bt == VT_FLOAT) {
o(0xf3); o(0xf3);
@ -2073,8 +2063,8 @@ void gen_cvt_ftoi(int t)
} else { } else {
assert(0); assert(0);
} }
orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */ orex(ll, r, r_xmm, 0x2c0f); /* cvttss2si or cvttsd2si */
o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8); o(0xc0 + REG_VALUE(r_xmm) + (REG_VALUE(r) << 3));
vtop->r = r; vtop->r = r;
} }