diff --git a/tccgen.c b/tccgen.c index c084186..f8efb12 100644 --- a/tccgen.c +++ b/tccgen.c @@ -88,7 +88,7 @@ ST_INLN int is_float(int t) { int bt; bt = t & VT_BTYPE; - return bt == VT_LDOUBLE || bt == VT_DOUBLE || bt == VT_FLOAT; + return bt == VT_LDOUBLE || bt == VT_DOUBLE || bt == VT_FLOAT || bt == VT_QFLOAT; } /* we use our own 'finite' function to avoid potential problems with @@ -688,9 +688,7 @@ static void gbound(void) ST_FUNC int gv(int rc) { int r, bit_pos, bit_size, size, align, i; -#ifndef TCC_TARGET_X86_64 int rc2; -#endif /* NOTE: get_reg can modify vstack[] */ if (vtop->type.t & VT_BITFIELD) { @@ -765,11 +763,14 @@ ST_FUNC int gv(int rc) #endif r = vtop->r & VT_VALMASK; -#ifndef TCC_TARGET_X86_64 rc2 = RC_INT; if (rc == RC_IRET) rc2 = RC_LRET; +#ifdef TCC_TARGET_X86_64 + else if (rc == RC_FRET) + rc2 = RC_QRET; #endif + /* need to reload if: - constant - lvalue (need to dereference pointer) @@ -777,18 +778,25 @@ ST_FUNC int gv(int rc) if (r >= VT_CONST || (vtop->r & VT_LVAL) || !(reg_classes[r] & rc) -#ifndef TCC_TARGET_X86_64 +#ifdef TCC_TARGET_X86_64 + || ((vtop->type.t & VT_BTYPE) == VT_QLONG && !(reg_classes[vtop->r2] & rc2)) + || ((vtop->type.t & VT_BTYPE) == VT_QFLOAT && !(reg_classes[vtop->r2] & rc2)) +#else || ((vtop->type.t & VT_BTYPE) == VT_LLONG && !(reg_classes[vtop->r2] & rc2)) #endif ) { r = get_reg(rc); -#ifndef TCC_TARGET_X86_64 +#ifdef TCC_TARGET_X86_64 + if (((vtop->type.t & VT_BTYPE) == VT_QLONG) || ((vtop->type.t & VT_BTYPE) == VT_QFLOAT)) { +#else if ((vtop->type.t & VT_BTYPE) == VT_LLONG) { +#endif int r2; unsigned long long ll; /* two register type load : expand to two words temporarily */ +#ifndef TCC_TARGET_X86_64 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) { /* load constant */ ll = vtop->c.ull; @@ -796,23 +804,32 @@ ST_FUNC int gv(int rc) load(r, vtop); vtop->r = r; /* save register value */ vpushi(ll >> 32); /* second word */ - } else if (r >= VT_CONST || /* XXX: test to VT_CONST incorrect ? */ + } else +#endif + if (r >= VT_CONST || /* XXX: test to VT_CONST incorrect ? */ (vtop->r & VT_LVAL)) { +#ifdef TCC_TARGET_X86_64 + int addr_type = VT_LLONG, load_size = 8, load_type = ((vtop->type.t & VT_BTYPE) == VT_QLONG) ? VT_LLONG : VT_DOUBLE; +#else + int addr_type = VT_INT, load_size = 4, load_type = VT_INT; +#endif /* We do not want to modifier the long long pointer here, so the safest (and less efficient) is to save all the other registers in the stack. XXX: totally inefficient. */ save_regs(1); /* load from memory */ + vtop->type.t = load_type; load(r, vtop); vdup(); vtop[-1].r = r; /* save register value */ /* increment pointer to get second word */ - vtop->type.t = VT_INT; + vtop->type.t = addr_type; gaddrof(); - vpushi(4); + vpushi(load_size); gen_op('+'); vtop->r |= VT_LVAL; + vtop->type.t = load_type; } else { /* move registers */ load(r, vtop); @@ -827,9 +844,7 @@ ST_FUNC int gv(int rc) vpop(); /* write second register */ vtop->r2 = r2; - } else -#endif - if ((vtop->r & VT_LVAL) && !is_float(vtop->type.t)) { + } else if ((vtop->r & VT_LVAL) && !is_float(vtop->type.t)) { int t1, t; /* lvalue of scalar type : need to use lvalue type because of possible cast */ @@ -2479,6 +2494,8 @@ ST_FUNC void vstore(void) #ifdef TCC_TARGET_X86_64 if ((ft & VT_BTYPE) == VT_LDOUBLE) { rc = RC_ST0; + } else if ((ft & VT_BTYPE) == VT_QFLOAT) { + rc = RC_FRET; } #endif } @@ -2497,29 +2514,29 @@ ST_FUNC void vstore(void) load(t, &sv); vtop[-1].r = t | VT_LVAL; } - store(r, vtop - 1); /* two word case handling : store second register at word + 4 (or +8 for x86-64) */ #ifdef TCC_TARGET_X86_64 - if ((ft & VT_BTYPE) == VT_QLONG) { + if (((ft & VT_BTYPE) == VT_QLONG) || ((ft & VT_BTYPE) == VT_QFLOAT)) { + int addr_type = VT_LLONG, load_size = 8, load_type = ((vtop->type.t & VT_BTYPE) == VT_QLONG) ? VT_LLONG : VT_DOUBLE; #else if ((ft & VT_BTYPE) == VT_LLONG) { + int addr_type = VT_INT, load_size = 4, load_type = VT_INT; #endif + vtop[-1].type.t = load_type; + store(r, vtop - 1); vswap(); /* convert to int to increment easily */ -#ifdef TCC_TARGET_X86_64 - vtop->type.t = VT_LLONG; + vtop->type.t = addr_type; gaddrof(); - vpushi(8); -#else - vtop->type.t = VT_INT; - gaddrof(); - vpushi(4); -#endif + vpushi(load_size); gen_op('+'); vtop->r |= VT_LVAL; vswap(); + vtop[-1].type.t = load_type; /* XXX: it works because r2 is spilled last ! */ store(vtop->r2, vtop - 1); + } else { + store(r, vtop - 1); } } vswap(); @@ -3897,8 +3914,16 @@ ST_FUNC void unary(void) /* return in register */ if (is_float(ret.type.t)) { ret.r = reg_fret(ret.type.t); +#ifdef TCC_TARGET_X86_64 + if ((ret.type.t & VT_BTYPE) == VT_QFLOAT) + ret.r2 = REG_QRET; +#endif } else { +#ifdef TCC_TARGET_X86_64 + if ((ret.type.t & VT_BTYPE) == VT_QLONG) +#else if ((ret.type.t & VT_BTYPE) == VT_LLONG) +#endif ret.r2 = REG_LRET; ret.r = REG_IRET; } diff --git a/tests/abitest.c b/tests/abitest.c index 8c1358f..aa11cc7 100644 --- a/tests/abitest.c +++ b/tests/abitest.c @@ -88,8 +88,8 @@ static int ret_2float_test(void) { /* * ret_2double_test: * - * On x86-64, a struct with 2 doubles should be packed into a single - * SSE register (this tests VT_QFLOAT). + * On x86-64, a struct with 2 doubles should be passed in two SSE + * registers. */ typedef struct ret_2double_test_type_s {double x, y;} ret_2double_test_type; typedef ret_2double_test_type (*ret_2double_test_function_type) (ret_2double_test_type); diff --git a/x86_64-gen.c b/x86_64-gen.c index 318384b..1c95b4a 100644 --- a/x86_64-gen.c +++ b/x86_64-gen.c @@ -23,7 +23,7 @@ #ifdef TARGET_DEFS_ONLY /* number of available registers */ -#define NB_REGS 5 +#define NB_REGS 6 #define NB_ASM_REGS 8 /* a register can belong to several classes. The classes must be @@ -39,10 +39,12 @@ #define RC_R10 0x0400 #define RC_R11 0x0800 #define RC_XMM0 0x0020 -#define RC_ST0 0x0040 /* only for long double */ +#define RC_XMM1 0x0040 +#define RC_ST0 0x0080 /* only for long double */ #define RC_IRET RC_RAX /* function return: integer register */ #define RC_LRET RC_RDX /* function return: second integer register */ #define RC_FRET RC_XMM0 /* function return: float register */ +#define RC_QRET RC_XMM1 /* function return: second float register */ /* pretty names for the registers */ enum { @@ -50,7 +52,8 @@ enum { TREG_RCX = 1, TREG_RDX = 2, TREG_XMM0 = 3, - TREG_ST0 = 4, + TREG_XMM1 = 4, + TREG_ST0 = 5, TREG_RSI = 6, TREG_RDI = 7, @@ -70,6 +73,7 @@ enum { #define REG_IRET TREG_RAX /* single word int return register */ #define REG_LRET TREG_RDX /* second word return register (for long long) */ #define REG_FRET TREG_XMM0 /* float return register */ +#define REG_QRET TREG_XMM1 /* second float return register */ /* defined if function parameters must be evaluated in reverse order */ #define INVERT_FUNC_PARAMS @@ -108,6 +112,7 @@ ST_DATA const int reg_classes[NB_REGS+7] = { /* ecx */ RC_INT | RC_RCX, /* edx */ RC_INT | RC_RDX, /* xmm0 */ RC_FLOAT | RC_XMM0, + /* xmm1 */ RC_FLOAT | RC_XMM1, /* st0 */ RC_ST0, 0, 0, @@ -375,7 +380,8 @@ void load(int r, SValue *sv) if ((ft & VT_BTYPE) == VT_FLOAT) { b = 0x6e0f66, r = 0; /* movd */ } else if ((ft & VT_BTYPE) == VT_DOUBLE) { - b = 0x7e0ff3, r = 0; /* movq */ + b = 0x7e0ff3; /* movq */ + r -= TREG_XMM0; } else if ((ft & VT_BTYPE) == VT_LDOUBLE) { b = 0xdb, r = 5; /* fldt */ } else if ((ft & VT_TYPE) == VT_BYTE) { @@ -387,6 +393,9 @@ void load(int r, SValue *sv) } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) { b = 0xb70f; /* movzwl */ } else { + assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG) + || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM) + || ((ft & VT_BTYPE) == VT_FUNC)); ll = is64_type(ft); b = 0x8b; } @@ -450,18 +459,30 @@ void load(int r, SValue *sv) orex(0,r,0,0); oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */ } else if (v != r) { - if (r == TREG_XMM0) { - assert(v == TREG_ST0); - /* gen_cvt_ftof(VT_DOUBLE); */ - o(0xf0245cdd); /* fstpl -0x10(%rsp) */ - /* movsd -0x10(%rsp),%xmm0 */ - o(0x44100ff2); - o(0xf024); + if ((r == TREG_XMM0) || (r == TREG_XMM1)) { + if (v == TREG_ST0) { + /* gen_cvt_ftof(VT_DOUBLE); */ + o(0xf0245cdd); /* fstpl -0x10(%rsp) */ + /* movsd -0x10(%rsp),%xmmN */ + o(0x100ff2); + o(0x44 + ((r - TREG_XMM0) << 3)); /* %xmmN */ + o(0xf024); + } else { + assert((v == TREG_XMM0) || (v == TREG_XMM1)); + if ((ft & VT_BTYPE) == VT_FLOAT) { + o(0x100ff3); + } else { + assert((ft & VT_BTYPE) == VT_DOUBLE); + o(0x100ff2); + } + o(0xc0 + (v - TREG_XMM0) + ((r - TREG_XMM0) << 3)); + } } else if (r == TREG_ST0) { - assert(v == TREG_XMM0); + assert((v == TREG_XMM0) || (v == TREG_XMM1)); /* gen_cvt_ftof(VT_LDOUBLE); */ /* movsd %xmm0,-0x10(%rsp) */ - o(0x44110ff2); + o(0x110ff2); + o(0x44 + ((r - TREG_XMM0) << 3)); /* %xmmN */ o(0xf024); o(0xf02444dd); /* fldl -0x10(%rsp) */ } else { @@ -510,7 +531,7 @@ void store(int r, SValue *v) o(0x66); o(pic); o(0xd60f); /* movq */ - r = 0; + r -= TREG_XMM0; } else if (bt == VT_LDOUBLE) { o(0xc0d9); /* fld %st(0) */ o(pic); @@ -679,7 +700,7 @@ void gfunc_call(int nb_args) struct_size += size; } else if (is_sse_float(vtop->type.t)) { - gv(RC_FLOAT); /* only one float register */ + gv(RC_XMM0); /* only one float register */ j = --gen_reg; if (j >= REGN) { /* movq %xmm0, j*8(%rsp) */ @@ -861,6 +882,7 @@ static X86_64_Mode classify_x86_64_inner(CType *ty) { case VT_LLONG: case VT_BOOL: case VT_PTR: + case VT_FUNC: case VT_ENUM: return x86_64_mode_integer; case VT_FLOAT: @@ -881,6 +903,8 @@ static X86_64_Mode classify_x86_64_inner(CType *ty) { return mode; } + + assert(0); } static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *reg_count) { @@ -963,6 +987,14 @@ static const uint8_t arg_regs[REGN] = { TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9 }; +static int arg_prepare_reg(int idx) { + if (idx == 2 || idx == 3) + /* idx=2: r10, idx=3: r11 */ + return idx + 8; + else + return arg_regs[idx]; +} + /* Generate function call. The function address is pushed first, then all the parameters in call order. This functions pops all the parameters and the function address. */ @@ -1051,7 +1083,7 @@ void gfunc_call(int nb_args) case x86_64_mode_sse: if (sse_reg > 8) { - gv(RC_FLOAT); + gv(RC_XMM0); o(0x50); /* push $rax */ /* movq %xmm0, (%rsp) */ o(0x04d60f66); @@ -1097,15 +1129,18 @@ void gfunc_call(int nb_args) break; case x86_64_mode_sse: - if (sse_reg > 8) { - sse_reg -= reg_count; - } else { - for (j = 0; j < reg_count; ++j) { - --sse_reg; - gv(RC_FLOAT); /* only one float register */ + sse_reg -= reg_count; + if (sse_reg + reg_count <= 8) { + gv(RC_XMM0); /* only one float register */ + if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */ /* movaps %xmm0, %xmmN */ o(0x280f); o(0xc0 + (sse_reg << 3)); + if (reg_count == 2) { + /* movaps %xmm1, %xmmN */ + o(0x280f); + o(0xc1 + ((sse_reg+1) << 3)); + } } } break; @@ -1113,16 +1148,17 @@ void gfunc_call(int nb_args) case x86_64_mode_integer: /* simple type */ /* XXX: implicit cast ? */ - if (gen_reg > 8) { - gen_reg -= reg_count; - } else { - for (j = 0; j < reg_count; ++j) { - --gen_reg; - int d = arg_regs[gen_reg]; - r = gv(RC_INT); - if (gen_reg == 2 || gen_reg == 3) - /* gen_reg=2: r10, gen_reg=3: r11 */ - d = gen_reg + 8; + gen_reg -= reg_count; + if (gen_reg + reg_count <= REGN) { + r = gv((reg_count == 1) ? RC_INT : RC_IRET); + int d = arg_prepare_reg(gen_reg); + orex(1,d,r,0x89); /* mov */ + o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d)); + if (reg_count == 2) { + /* Second word of two-word value should always be in rdx + this case is handled via RC_IRET */ + r = TREG_RDX; + d = arg_prepare_reg(gen_reg+1); orex(1,d,r,0x89); /* mov */ o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d)); } @@ -1263,13 +1299,13 @@ void gfunc_prolog(CType *func_type) case x86_64_mode_sse: if (sse_param_index + reg_count <= 8) { /* save arguments passed by register */ + loc -= reg_count * 8; + param_addr = loc; for (i = 0; i < reg_count; ++i) { - loc -= 8; o(0xd60f66); /* movq */ - gen_modrm(sse_param_index, VT_LOCAL, NULL, loc); + gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8); ++sse_param_index; } - param_addr = loc; } else { param_addr = addr; addr += size; @@ -1286,11 +1322,12 @@ void gfunc_prolog(CType *func_type) case x86_64_mode_integer: { if (reg_param_index + reg_count <= REGN) { /* save arguments passed by register */ + loc -= reg_count * 8; + param_addr = loc; for (i = 0; i < reg_count; ++i) { - push_arg_reg(reg_param_index); + gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8); ++reg_param_index; } - param_addr = loc; } else { param_addr = addr; addr += size; @@ -1547,12 +1584,12 @@ void gen_opl(int op) /* generate a floating point operation 'v = t1 op t2' instruction. The two operands are guaranted to have the same floating point type */ -/* XXX: need to use ST1 too */ +/* XXX: need to use ST1 and XMM1 too */ void gen_opf(int op) { int a, ft, fc, swapped, r; int float_type = - (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT; + (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_XMM0; /* to avoid xmm1 handling for now */ /* convert constants to memory references */ if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) { @@ -1803,9 +1840,9 @@ void gen_cvt_ftof(int t) ft = vtop->type.t; bt = ft & VT_BTYPE; tbt = t & VT_BTYPE; - + if (bt == VT_FLOAT) { - gv(RC_FLOAT); + gv(RC_XMM0); /* to avoid rewriting to handle xmm1 for now */ if (tbt == VT_DOUBLE) { o(0xc0140f); /* unpcklps */ o(0xc05a0f); /* cvtps2pd */ @@ -1817,7 +1854,7 @@ void gen_cvt_ftof(int t) vtop->r = TREG_ST0; } } else if (bt == VT_DOUBLE) { - gv(RC_FLOAT); + gv(RC_XMM0); /* to avoid rewriting to handle xmm1 for now */ if (tbt == VT_FLOAT) { o(0xc0140f66); /* unpcklpd */ o(0xc05a0f66); /* cvtpd2ps */ @@ -1857,7 +1894,7 @@ void gen_cvt_ftoi(int t) bt = VT_DOUBLE; } - gv(RC_FLOAT); + gv(RC_XMM0); if (t != VT_INT) size = 8; else