arm64: Optimise some integer operations with a constant operand.

master
Edmund Grimley Evans 2015-03-07 17:03:51 +00:00
parent ac70e6b840
commit d854dede03
3 changed files with 243 additions and 2 deletions

View File

@ -1331,9 +1331,131 @@ ST_FUNC int gtst(int inv, int t)
return gjmp(t);
}
static void arm64_gen_opil(int op, int l)
static int arm64_iconst(uint64_t *val, SValue *sv)
{
int x, a, b;
if ((sv->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
return 0;
if (val) {
int t = sv->type.t & (VT_BTYPE | VT_UNSIGNED);
// It's crazy how TCC has all these alternatives for storing a value:
if (t == (VT_LLONG | VT_UNSIGNED))
*val = sv->c.ull;
else if (t == VT_LLONG)
*val = sv->c.ll;
else if (t & VT_UNSIGNED)
*val = sv->c.ui;
else
*val = sv->c.i;
}
return 1;
}
static int arm64_gen_opic(int op, uint32_t l, int rev, uint64_t val,
uint32_t x, uint32_t a)
{
if (op == '-' && !rev) {
val = -val;
op = '+';
}
val = l ? val : (uint32_t)val;
switch (op) {
case '+': {
int s = l ? val >> 63 : val >> 31;
val = s ? -val : val;
val = l ? val : (uint32_t)val;
if (!(val & ~(uint64_t)0xfff))
o(0x11000000 | l << 31 | s << 30 | x | a << 5 | val << 10);
else if (!(val & ~(uint64_t)0xfff000))
o(0x11400000 | l << 31 | s << 30 | x | a << 5 | val >> 12 << 10);
else {
arm64_movimm(30, val); // use x30
o(0x0b1e0000 | l << 31 | s << 30 | x | a << 5);
}
return 1;
}
case '-':
if (!val)
o(0x4b0003e0 | l << 31 | x | a << 16); // neg
else if (val == (l ? (uint64_t)-1 : (uint32_t)-1))
o(0x2a2003e0 | l << 31 | x | a << 16); // mvn
else {
arm64_movimm(30, val); // use x30
o(0x4b0003c0 | l << 31 | x | a << 16); // sub
}
return 1;
case '^':
if (val == -1 || (val == 0xffffffff && !l)) {
o(0x2a2003e0 | l << 31 | x | a << 16); // mvn
return 1;
}
// fall through
case '&':
case '|': {
int e = arm64_encode_bimm64(l ? val : val | val << 32);
if (e < 0)
return 0;
o((op == '&' ? 0x12000000 :
op == '|' ? 0x32000000 : 0x52000000) |
l << 31 | x | a << 5 | (uint32_t)e << 10);
return 1;
}
case TOK_SAR:
case TOK_SHL:
case TOK_SHR: {
uint32_t n = 32 << l;
val = val & (n - 1);
if (rev)
return 0;
if (!val)
assert(0);
else if (op == TOK_SHL)
o(0x53000000 | l << 31 | l << 22 | x | a << 5 |
(n - val) << 16 | (n - 1 - val) << 10); // lsl
else
o(0x13000000 | (op == TOK_SHR) << 30 | l << 31 | l << 22 |
x | a << 5 | val << 16 | (n - 1) << 10); // lsr/asr
return 1;
}
}
return 0;
}
static void arm64_gen_opil(int op, uint32_t l)
{
uint32_t x, a, b;
// Special treatment for operations with a constant operand:
{
uint64_t val;
int rev = 1;
if (arm64_iconst(0, &vtop[0])) {
vswap();
rev = 0;
}
if (arm64_iconst(&val, &vtop[-1])) {
gv(RC_INT);
a = intr(vtop[0].r);
--vtop;
x = get_reg(RC_INT);
++vtop;
if (arm64_gen_opic(op, l, rev, val, intr(x), a)) {
vtop[0].r = x;
vswap();
--vtop;
return;
}
}
if (!rev)
vswap();
}
gv2(RC_INT, RC_INT);
assert(vtop[-1].r < VT_CONST && vtop[0].r < VT_CONST);
a = intr(vtop[-1].r);

View File

@ -2,6 +2,7 @@
// calling convention, but should give the same results on any architecture.
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
struct s1 { char x[1]; } s1 = { "0" };
@ -429,12 +430,94 @@ void movi(void)
pll(0xabcdef0123456789);
}
static uint32_t addip0(uint32_t x) { return x + 0; }
static uint64_t sublp0(uint64_t x) { return x - 0; }
static uint32_t addip123(uint32_t x) { return x + 123; }
static uint64_t addlm123(uint64_t x) { return x + -123; }
static uint64_t sublp4095(uint64_t x) { return x - 4095; }
static uint32_t subim503808(uint32_t x) { return x - -503808; }
static uint64_t addp12345(uint64_t x) { return x + 12345; }
static uint32_t subp12345(uint32_t x) { return x - 12345; }
static uint32_t mvni(uint32_t x) { return 0xffffffff - x; }
static uint64_t negl(uint64_t x) { return 0 - x; }
static uint32_t rsbi123(uint32_t x) { return 123 - x; }
static uint64_t rsbl123(uint64_t x) { return 123 - x; }
static uint32_t andi0(uint32_t x) { return x & 0; }
static uint64_t andlm1(uint64_t x) { return x & -1; }
static uint64_t orrl0(uint64_t x) { return x | 0; }
static uint32_t orrim1(uint32_t x) { return x | -1; }
static uint32_t eori0(uint32_t x) { return x ^ 0; }
static uint64_t eorlm1(uint64_t x) { return x ^ -1; }
static uint32_t and0xf0(uint32_t x) { return x & 0xf0; }
static uint64_t orr0xf0(uint64_t x) { return x | 0xf0; }
static uint64_t eor0xf0(uint64_t x) { return x ^ 0xf0; }
static uint32_t lsli0(uint32_t x) { return x << 0; }
static uint32_t lsri0(uint32_t x) { return x >> 0; }
static int64_t asrl0(int64_t x) { return x >> 0; }
static uint32_t lsli1(uint32_t x) { return x << 1; }
static uint32_t lsli31(uint32_t x) { return x << 31; }
static uint64_t lsll1(uint64_t x) { return x << 1; }
static uint64_t lsll63(uint64_t x) { return x << 63; }
static uint32_t lsri1(uint32_t x) { return x >> 1; }
static uint32_t lsri31(uint32_t x) { return x >> 31; }
static uint64_t lsrl1(uint64_t x) { return x >> 1; }
static uint64_t lsrl63(uint64_t x) { return x >> 63; }
static int32_t asri1(int32_t x) { return x >> 1; }
static int32_t asri31(int32_t x) { return x >> 31; }
static int64_t asrl1(int64_t x) { return x >> 1; }
static int64_t asrl63(int64_t x) { return x >> 63; }
void opi(void)
{
int x = 1000;
pll(addip0(x));
pll(sublp0(x));
pll(addip123(x));
pll(addlm123(x));
pll(sublp4095(x));
pll(subim503808(x));
pll(addp12345(x));
pll(subp12345(x));
pll(mvni(x));
pll(negl(x));
pll(rsbi123(x));
pll(rsbl123(x));
pll(andi0(x));
pll(andlm1(x));
pll(orrl0(x));
pll(orrim1(x));
pll(eori0(x));
pll(eorlm1(x));
pll(and0xf0(x));
pll(orr0xf0(x));
pll(eor0xf0(x));
pll(lsli0(x));
pll(lsri0(x));
pll(asrl0(x));
pll(lsli1(x));
pll(lsli31(x));
pll(lsll1(x));
pll(lsll63(x));
pll(lsri1(x));
pll(lsri31(x));
pll(lsrl1(x));
pll(lsrl63(x));
pll(asri1(x));
pll(asri31(x));
pll(asrl1(x));
pll(asrl63(x));
}
void pcs(void)
{
arg();
ret();
stdarg();
movi();
opi();
}
int main()

View File

@ -136,3 +136,39 @@ abcd1234ffffffff
ffffef0123456789
abcdef012345ffff
abcdef0123456789
3e8
3e8
463
36d
fffffffffffff3e9
7b3e8
3421
ffffd3af
fffffc17
fffffffffffffc18
fffffc93
fffffffffffffc93
0
3e8
3e8
ffffffff
3e8
fffffffffffffc17
e0
3f8
318
3e8
3e8
3e8
7d0
0
7d0
0
1f4
0
1f4
0
1f4
0
1f4
0