Reduce the generation of machine code for x86_64, Less of size

master
jiang 2014-04-29 23:57:22 +08:00
parent 2a8905c93b
commit 515169f21b
3 changed files with 244 additions and 251 deletions

8
tcc.h
View File

@ -738,19 +738,21 @@ struct TCCState {
#define VT_CMP 0x0033 /* the value is stored in processor flags (in vc) */
#define VT_JMP 0x0034 /* value is the consequence of jmp true (even) */
#define VT_JMPI 0x0035 /* value is the consequence of jmp false (odd) */
#define VT_REF 0x0040 /* value is pointer to structure rather than address */
#define TREG_MEM 0x0040 /* x86_64-gen.c add for tcc.h: The current value can be */
#define VT_REF 0x0080 /* value is pointer to structure rather than address */
#define VT_LVAL 0x0100 /* var is an lvalue */
#define VT_SYM 0x0200 /* a symbol value is added */
#define VT_MUSTCAST 0x0400 /* value must be casted to be correct (used for
char/short stored in integer registers) */
#define VT_MUSTBOUND 0x0800 /* bound checking must be done before
dereferencing value */
#define VT_BOUNDED 0x8000 /* value is bounded. The address of the
bounding function call point is in vc */
#define VT_LVAL_BYTE 0x1000 /* lvalue is a byte */
#define VT_LVAL_SHORT 0x2000 /* lvalue is a short */
#define VT_LVAL_UNSIGNED 0x4000 /* lvalue is unsigned */
#define VT_LVAL_TYPE (VT_LVAL_BYTE | VT_LVAL_SHORT | VT_LVAL_UNSIGNED)
#define VT_BOUNDED 0x8000 /* value is bounded. The address of the
bounding function call point is in vc */
#define VT_TMP 0x10000
/* types */
#define VT_BTYPE 0x000f /* mask for basic type */

View File

@ -909,8 +909,9 @@ ST_FUNC int gv(int rc)
/* one register type load */
load(r, vtop);
}
vtop->r = r;
vtop->c.ptr_offset = 0;
}
vtop->r = r;
#ifdef TCC_TARGET_C67
/* uses register pairs for doubles */
if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)

View File

@ -29,28 +29,38 @@
/* a register can belong to several classes. The classes must be
sorted from more general to more precise (see gv2() code which does
assumptions on it). */
#define RC_INT 0x0001 /* generic integer register */
#define RC_FLOAT 0x0002 /* generic float register */
#define RC_RAX 0x0004
#define RC_RCX 0x0008
#define RC_RDX 0x0010
#define RC_ST0 0x0080 /* only for long double */
#define RC_R8 0x0100
#define RC_R9 0x0200
#define RC_R10 0x0400
#define RC_R11 0x0800
#define RC_XMM0 0x1000
#define RC_XMM1 0x2000
#define RC_XMM2 0x4000
#define RC_XMM3 0x8000
#define RC_XMM4 0x10000
#define RC_XMM5 0x20000
#define RC_XMM6 0x40000
#define RC_XMM7 0x80000
#define RC_IRET RC_RAX /* function return: integer register */
#define RC_LRET RC_RDX /* function return: second integer register */
#define RC_FRET RC_XMM0 /* function return: float register */
#define RC_QRET RC_XMM1 /* function return: second float register */
#define RC_INT 0x0001 /* generic integer register */
#define RC_FLOAT 0x0002 /* generic float register */
#define RC_RAX 0x0004
#define RC_RCX 0x0008
#define RC_RDX 0x0010
#define RC_ST0 0x0020 /* only for long double */
#define RC_R8 0x0040
#define RC_R9 0x0080
#define RC_XMM0 0x0100
#define RC_XMM1 0x0200
#define RC_XMM2 0x0400
#define RC_XMM3 0x0800
#define RC_XMM4 0x1000
#define RC_XMM5 0x2000
#define RC_XMM6 0x4000
#define RC_XMM7 0x8000
#define RC_RSI 0x10000
#define RC_RDI 0x20000
#define RC_INT1 0x40000 /* function_pointer */
#define RC_INT2 0x80000
#define RC_RBX 0x100000
#define RC_R10 0x200000
#define RC_R11 0x400000
#define RC_R12 0x800000
#define RC_R13 0x1000000
#define RC_R14 0x2000000
#define RC_R15 0x4000000
#define RC_IRET RC_RAX /* function return: integer register */
#define RC_LRET RC_RDX /* function return: second integer register */
#define RC_FRET RC_XMM0 /* function return: float register */
#define RC_QRET RC_XMM1 /* function return: second float register */
#define RC_MASK (RC_INT|RC_INT1|RC_INT2|RC_FLOAT)
/* pretty names for the registers */
enum {
@ -58,6 +68,7 @@ enum {
TREG_RCX = 1,
TREG_RDX = 2,
TREG_RSP = 4,
TREG_ST0 = 5,
TREG_RSI = 6,
TREG_RDI = 7,
@ -75,13 +86,11 @@ enum {
TREG_XMM6 = 22,
TREG_XMM7 = 23,
TREG_ST0 = 24,
TREG_MEM = 0x20,
};
#define REX_BASE(reg) (((reg) >> 3) & 1)
#define REG_VALUE(reg) ((reg) & 7)
#define FLAG_GOT 0X01
/* return registers for function */
#define REG_IRET TREG_RAX /* single word int return register */
@ -122,34 +131,30 @@ enum {
#include <assert.h>
ST_DATA const int reg_classes[NB_REGS] = {
/* eax */ RC_INT | RC_RAX,
/* ecx */ RC_INT | RC_RCX,
/* edx */ RC_INT | RC_RDX,
/* eax */ RC_INT|RC_RAX|RC_INT2,
/* ecx */ RC_INT|RC_RCX|RC_INT2,
/* edx */ RC_INT|RC_RDX,
RC_INT|RC_INT1|RC_INT2|RC_RBX,
0,
0,
0,
0,
0,
RC_R8,
RC_R9,
RC_R10,
RC_R11,
0,
0,
0,
0,
/* xmm0 */ RC_FLOAT | RC_XMM0,
/* xmm1 */ RC_FLOAT | RC_XMM1,
/* xmm2 */ RC_FLOAT | RC_XMM2,
/* xmm3 */ RC_FLOAT | RC_XMM3,
/* xmm4 */ RC_FLOAT | RC_XMM4,
/* xmm5 */ RC_FLOAT | RC_XMM5,
/* xmm6 an xmm7 are included so gv() can be used on them,
but they are not tagged with RC_FLOAT because they are
callee saved on Windows */
RC_XMM6,
RC_XMM7,
/* st0 */ RC_ST0
/* st0 */ RC_ST0,
RC_RSI|RC_INT2,
RC_RDI|RC_INT2,
RC_INT|RC_R8|RC_INT2,
RC_INT|RC_R9|RC_INT2,
RC_INT|RC_INT1|RC_INT2|RC_R10,
RC_INT|RC_INT1|RC_INT2|RC_R11,
RC_INT|RC_INT1|RC_INT2|RC_R12,
RC_INT|RC_INT1|RC_INT2|RC_R13,
RC_INT|RC_INT1|RC_INT2|RC_R14,
RC_INT|RC_INT1|RC_INT2|RC_R15,
/* xmm0 */ RC_FLOAT | RC_XMM0,
RC_FLOAT|RC_XMM1,
RC_FLOAT|RC_XMM2,
RC_FLOAT|RC_XMM3,
RC_FLOAT|RC_XMM4,
RC_FLOAT|RC_XMM5,
RC_FLOAT|RC_XMM6,
RC_FLOAT|RC_XMM7,
};
static unsigned long func_sub_sp_offset;
@ -324,7 +329,7 @@ static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
} else {
oad(0x85 | op_reg, c);
}
} else if ((r & VT_VALMASK) >= TREG_MEM) {
} else if (r & TREG_MEM) {
if (c) {
g(0x80 | op_reg | REG_VALUE(r));
gen_le32(c);
@ -1609,39 +1614,42 @@ int gtst(int inv, int t)
/* generate an integer binary operation */
void gen_opi(int op)
{
int r, fr, opc, c;
int ll, uu, cc;
int r, fr, opc, fc, c, ll, uu, cc, tt2;
fr = vtop[0].r;
fc = vtop->c.ul;
ll = is64_type(vtop[-1].type.t);
uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
cc = (fr & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
tt2 = (fr & (VT_LVAL | VT_LVAL_TYPE)) == VT_LVAL;
switch(op) {
case '+':
case TOK_ADDC1: /* add with carry generation */
opc = 0;
gen_op8:
vswap();
r = gv(RC_INT);
vswap();
if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
/* constant case */
vswap();
r = gv(RC_INT);
vswap();
c = vtop->c.i;
if (c == (char)c) {
/* XXX: generate inc and dec for smaller code ? */
orex(ll, r, 0, 0x83);
o(0xc0 | (opc << 3) | REG_VALUE(r));
g(c);
orex(ll, r, 0, 0x83);
o(0xc0 + REG_VALUE(r) + opc*8);
g(c);
} else {
orex(ll, r, 0, 0x81);
oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
oad(0xc0 + REG_VALUE(r) + opc*8, c);
}
} else {
gv2(RC_INT, RC_INT);
r = vtop[-1].r;
fr = vtop[0].r;
orex(ll, r, fr, (opc << 3) | 0x01);
o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
if(!tt2)
fr = gv(RC_INT);
orex(ll, fr, r, 0x03 + opc*8);
if(fr >= VT_CONST)
gen_modrm(r, fr, vtop->sym, fc);
else
o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8);
}
vtop--;
if (op >= TOK_ULT && op <= TOK_GT) {
@ -1669,11 +1677,27 @@ void gen_opi(int op)
opc = 1;
goto gen_op8;
case '*':
gv2(RC_INT, RC_INT);
r = vtop[-1].r;
fr = vtop[0].r;
orex(ll, fr, r, 0xaf0f); /* imul fr, r */
o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
opc = 5;
vswap();
r = gv(RC_INT);
vswap();
if(!tt2)
fr = gv(RC_INT);
if(r == TREG_RAX){
if(fr != TREG_RDX)
save_reg(TREG_RDX);
orex(ll, fr, r, 0xf7);
if(fr >= VT_CONST)
gen_modrm(opc, fr, vtop->sym, fc);
else
o(0xc0 + REG_VALUE(fr) + opc*8);
}else{
orex(ll, fr, r, 0xaf0f); /* imul fr, r */
if(fr >= VT_CONST)
gen_modrm(r, fr, vtop->sym, fc);
else
o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8);
}
vtop--;
break;
case TOK_SHL:
@ -1685,47 +1709,62 @@ void gen_opi(int op)
case TOK_SAR:
opc = 7;
gen_shift:
opc = 0xc0 | (opc << 3);
if (cc) {
/* constant case */
vswap();
r = gv(RC_INT);
vswap();
orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
o(opc | REG_VALUE(r));
g(vtop->c.i & (ll ? 63 : 31));
c = vtop->c.i;
if(c == 1){
orex(ll, r, 0, 0xd1);
o(0xc0 + REG_VALUE(r) + opc*8);
}else{
orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
o(0xc0 + REG_VALUE(r) + opc*8);
g(c & (ll ? 0x3f : 0x1f));
}
} else {
/* we generate the shift in ecx */
gv2(RC_INT, RC_RCX);
r = vtop[-1].r;
orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
o(opc | REG_VALUE(r));
o(0xc0 + REG_VALUE(r) + opc*8);
}
vtop--;
break;
case TOK_UDIV:
case TOK_UMOD:
opc = 6;
uu = 1;
goto divmod;
case '/':
case '%':
case TOK_PDIV:
opc = 7;
uu = 0;
divmod:
/* first operand must be in eax */
/* XXX: need better constraint for second operand */
gv2(RC_RAX, RC_RCX);
r = vtop[-1].r;
fr = vtop[0].r;
vtop--;
save_reg(TREG_RDX);
orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
orex(ll, fr, 0, 0xf7); /* div fr, %eax */
o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
if(!tt2){
gv2(RC_RAX, RC_INT2);
fr = vtop[0].r;
}else{
vswap();
gv(RC_RAX);
vswap();
}
save_reg(TREG_RDX);
orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cdq RDX:RAX <- sign-extend of RAX. */
orex(ll, fr, 0, 0xf7); /* div fr, %eax */
if(fr >= VT_CONST)
gen_modrm(opc, fr, vtop->sym, fc);
else
o(0xc0 + REG_VALUE(fr) + opc*8);
if (op == '%' || op == TOK_UMOD)
r = TREG_RDX;
else
r = TREG_RAX;
vtop--;
vtop->r = r;
break;
default:
@ -1744,9 +1783,8 @@ void gen_opl(int op)
/* XXX: need to use ST1 too */
void gen_opf(int op)
{
int a, ft, fc, swapped, r;
int float_type =
(vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
int a, ft, fc, swapped, fr, r;
int float_type = (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
/* convert constants to memory references */
if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
@ -1757,21 +1795,23 @@ void gen_opf(int op)
if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
gv(float_type);
/* must put at least one value in the floating point register */
if ((vtop[-1].r & VT_LVAL) &&
(vtop[0].r & VT_LVAL)) {
vswap();
gv(float_type);
vswap();
}
swapped = 0;
/* swap the stack if needed so that t1 is the register and t2 is
the memory reference */
if (vtop[-1].r & VT_LVAL) {
vswap();
swapped = 1;
}
if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
swapped = 0;
fc = vtop->c.ul;
ft = vtop->type.t;
if ((ft & VT_BTYPE) == VT_LDOUBLE) {
/* swap the stack if needed so that t1 is the register and t2 is
the memory reference */
/* must put at least one value in the floating point register */
if ((vtop[-1].r & VT_LVAL) && (vtop[0].r & VT_LVAL)) {
vswap();
gv(float_type);
vswap();
}
if (vtop[-1].r & VT_LVAL) {
vswap();
swapped = 1;
}
if (op >= TOK_ULT && op <= TOK_GT) {
/* load on stack second operand */
load(TREG_ST0, vtop);
@ -1782,10 +1822,10 @@ void gen_opf(int op)
swapped = 0;
if (swapped)
o(0xc9d9); /* fxch %st(1) */
if (op == TOK_EQ || op == TOK_NE)
o(0xe9da); /* fucompp */
else
o(0xd9de); /* fcompp */
if (op == TOK_EQ || op == TOK_NE)
o(0xe9da); /* fucompp */
else
o(0xd9de); /* fcompp */
o(0xe0df); /* fnstsw %ax */
if (op == TOK_EQ) {
o(0x45e480); /* and $0x45, %ah */
@ -1808,7 +1848,6 @@ void gen_opf(int op)
/* no memory reference possible for long double operations */
load(TREG_ST0, vtop);
swapped = !swapped;
switch(op) {
default:
case '+':
@ -1828,63 +1867,45 @@ void gen_opf(int op)
a++;
break;
}
ft = vtop->type.t;
fc = vtop->c.ul;
o(0xde); /* fxxxp %st, %st(1) */
o(0xc1 + (a << 3));
vtop--;
}
} else {
vswap();
gv(float_type);
vswap();
fr = vtop->r;
r = vtop[-1].r;
if (op >= TOK_ULT && op <= TOK_GT) {
/* if saved lvalue, then we must reload it */
r = vtop->r;
fc = vtop->c.ul;
if ((r & VT_VALMASK) == VT_LLOCAL) {
SValue v1;
r = get_reg(RC_INT);
v1.type.t = VT_PTR;
v1.r = VT_LOCAL | VT_LVAL;
v1.c.ul = fc;
load(r, &v1);
fc = 0;
}
if (op == TOK_EQ || op == TOK_NE) {
swapped = 0;
} else {
if (op == TOK_LE || op == TOK_LT)
swapped = !swapped;
if (op == TOK_LE || op == TOK_GE) {
op = 0x93; /* setae */
} else {
op = 0x97; /* seta */
}
}
if (swapped) {
gv(RC_FLOAT);
vswap();
}
assert(!(vtop[-1].r & VT_LVAL));
if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
o(0x66);
if (op == TOK_EQ || op == TOK_NE)
o(0x2e0f); /* ucomisd */
else
o(0x2f0f); /* comisd */
if (vtop->r & VT_LVAL) {
gen_modrm(vtop[-1].r, r, vtop->sym, fc);
} else {
o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
}
switch(op){
case TOK_LE:
op = TOK_ULE; /* setae */
break;
case TOK_LT:
op = TOK_ULT;
break;
case TOK_GE:
op = TOK_UGE;
break;
case TOK_GT:
op = TOK_UGT; /* seta */
break;
}
assert(!(vtop[-1].r & VT_LVAL));
if ((ft & VT_BTYPE) == VT_DOUBLE)
o(0x66);
o(0x2e0f); /* ucomisd */
if(fr >= VT_CONST)
gen_modrm(r, fr, vtop->sym, fc);
else
o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8);
vtop--;
vtop->r = VT_CMP;
vtop->c.i = op | 0x100;
} else {
assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
/* no memory reference possible for long double operations */
switch(op) {
default:
case '+':
@ -1900,44 +1921,20 @@ void gen_opf(int op)
a = 6;
break;
}
ft = vtop->type.t;
fc = vtop->c.ul;
assert((ft & VT_BTYPE) != VT_LDOUBLE);
r = vtop->r;
/* if saved lvalue, then we must reload it */
if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
SValue v1;
r = get_reg(RC_INT);
v1.type.t = VT_PTR;
v1.r = VT_LOCAL | VT_LVAL;
v1.c.ul = fc;
load(r, &v1);
fc = 0;
}
assert(!(vtop[-1].r & VT_LVAL));
if (swapped) {
assert(vtop->r & VT_LVAL);
gv(RC_FLOAT);
vswap();
}
if ((ft & VT_BTYPE) == VT_DOUBLE) {
o(0xf2);
} else {
o(0xf3);
}
o(0x0f);
o(0x58 + a);
if (vtop->r & VT_LVAL) {
gen_modrm(vtop[-1].r, r, vtop->sym, fc);
} else {
o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
}
vtop--;
assert((ft & VT_BTYPE) != VT_LDOUBLE);
assert(!(vtop[-1].r & VT_LVAL));
if ((ft & VT_BTYPE) == VT_DOUBLE) {
o(0xf2);
} else {
o(0xf3);
}
o(0x0f);
o(0x58 + a);
if(fr >= VT_CONST)
gen_modrm(r, fr, vtop->sym, fc);
else
o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8);
vtop--;
}
}
}
@ -1946,103 +1943,96 @@ void gen_opf(int op)
and 'long long' cases. */
void gen_cvt_itof(int t)
{
if ((t & VT_BTYPE) == VT_LDOUBLE) {
int ft, bt, tbt, r;
ft = vtop->type.t;
bt = ft & VT_BTYPE;
tbt = t & VT_BTYPE;
r = gv(RC_INT);
if (tbt == VT_LDOUBLE) {
save_reg(TREG_ST0);
gv(RC_INT);
if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
if ((ft & VT_BTYPE) == VT_LLONG) {
/* signed long long to float/double/long double (unsigned case
is handled generically) */
o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
o(0x50 + REG_VALUE(r)); /* push r */
o(0x242cdf); /* fildll (%rsp) */
o(0x08c48348); /* add $8, %rsp */
} else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
(VT_INT | VT_UNSIGNED)) {
} else if ((ft & (VT_BTYPE | VT_UNSIGNED)) == (VT_INT | VT_UNSIGNED)) {
/* unsigned int to float/double/long double */
o(0x6a); /* push $0 */
g(0x00);
o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
o(0x50 + REG_VALUE(r)); /* push r */
o(0x242cdf); /* fildll (%rsp) */
o(0x10c48348); /* add $16, %rsp */
} else {
/* int to float/double/long double */
o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
o(0x50 + REG_VALUE(r)); /* push r */
o(0x2404db); /* fildl (%rsp) */
o(0x08c48348); /* add $8, %rsp */
}
vtop->r = TREG_ST0;
} else {
int r = get_reg(RC_FLOAT);
gv(RC_INT);
o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
(VT_INT | VT_UNSIGNED) ||
(vtop->type.t & VT_BTYPE) == VT_LLONG) {
int r_xmm;
r_xmm = get_reg(RC_FLOAT);
o(0xf2 + (tbt == VT_FLOAT));
if ((ft & (VT_BTYPE | VT_UNSIGNED)) == (VT_INT | VT_UNSIGNED) || bt == VT_LLONG) {
o(0x48); /* REX */
}
o(0x2a0f);
o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
vtop->r = r;
o(0xc0 + REG_VALUE(r) + REG_VALUE(r_xmm)*8); /* cvtsi2sd or cvtsi2ss */
vtop->r = r_xmm;
}
}
/* convert from one floating point type to another */
void gen_cvt_ftof(int t)
{
int ft, bt, tbt;
int ft, bt, tbt, r;
ft = vtop->type.t;
bt = ft & VT_BTYPE;
tbt = t & VT_BTYPE;
if (bt == VT_FLOAT) {
gv(RC_FLOAT);
if(bt == VT_LDOUBLE)
r = get_reg(RC_FLOAT);
else
r = gv(RC_FLOAT);
if (bt == VT_FLOAT) {
if (tbt == VT_DOUBLE) {
o(0x140f); /* unpcklps */
o(0xc0 + REG_VALUE(vtop->r)*9);
o(0x5a0f); /* cvtps2pd */
o(0xc0 + REG_VALUE(vtop->r)*9);
o(0xc0 + REG_VALUE(r) + REG_VALUE(r) * 8);
} else if (tbt == VT_LDOUBLE) {
save_reg(RC_ST0);
/* movss %xmm0,-0x10(%rsp) */
/* movss %xmm0-7,-0x10(%rsp) */
o(0x110ff3);
o(0x44 + REG_VALUE(vtop->r)*8);
o(0xf024);
o(0xf02444 + REG_VALUE(r)*8);
o(0xf02444d9); /* flds -0x10(%rsp) */
vtop->r = TREG_ST0;
}
} else if (bt == VT_DOUBLE) {
gv(RC_FLOAT);
if (tbt == VT_FLOAT) {
o(0x140f66); /* unpcklpd */
o(0xc0 + REG_VALUE(vtop->r)*9);
o(0x5a0f66); /* cvtpd2ps */
o(0xc0 + REG_VALUE(vtop->r)*9);
o(0xc0 + REG_VALUE(r) + REG_VALUE(r) * 8);
} else if (tbt == VT_LDOUBLE) {
save_reg(RC_ST0);
/* movsd %xmm0,-0x10(%rsp) */
/* movsd %xmm0-7,-0x10(%rsp) */
o(0x110ff2);
o(0x44 + REG_VALUE(vtop->r)*8);
o(0xf024);
o(0xf02444 + REG_VALUE(r)*8);
o(0xf02444dd); /* fldl -0x10(%rsp) */
vtop->r = TREG_ST0;
}
} else {
int r;
gv(RC_ST0);
r = get_reg(RC_FLOAT);
if (tbt == VT_DOUBLE) {
o(0xf0245cdd); /* fstpl -0x10(%rsp) */
/* movsd -0x10(%rsp),%xmm0 */
/* movsd -0x10(%rsp),%xmm0-7 */
o(0x100ff2);
o(0x44 + REG_VALUE(r)*8);
o(0xf024);
o(0xf02444 + REG_VALUE(r)*8);
vtop->r = r;
} else if (tbt == VT_FLOAT) {
o(0xf0245cd9); /* fstps -0x10(%rsp) */
/* movss -0x10(%rsp),%xmm0 */
/* movss -0x10(%rsp),%xmm0-7 */
o(0x100ff3);
o(0x44 + REG_VALUE(r)*8);
o(0xf024);
o(0xf02444 + REG_VALUE(r)*8);
vtop->r = r;
}
}
@ -2051,20 +2041,20 @@ void gen_cvt_ftof(int t)
/* convert fp to int 't' type */
void gen_cvt_ftoi(int t)
{
int ft, bt, size, r;
int ft, bt, ll, r, r_xmm;
ft = vtop->type.t;
bt = ft & VT_BTYPE;
if (bt == VT_LDOUBLE) {
gen_cvt_ftof(VT_DOUBLE);
bt = VT_DOUBLE;
}
gv(RC_FLOAT);
if (t != VT_INT)
size = 8;
r_xmm = gv(RC_FLOAT);
if ((t & VT_BTYPE) == VT_INT)
ll = 0;
else
size = 4;
ll = 1;
r = get_reg(RC_INT);
if (bt == VT_FLOAT) {
o(0xf3);
@ -2073,8 +2063,8 @@ void gen_cvt_ftoi(int t)
} else {
assert(0);
}
orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
orex(ll, r, r_xmm, 0x2c0f); /* cvttss2si or cvttsd2si */
o(0xc0 + REG_VALUE(r_xmm) + (REG_VALUE(r) << 3));
vtop->r = r;
}