nixify
This commit is contained in:
+82
@@ -0,0 +1,82 @@
|
||||
#include "../all.h"
|
||||
|
||||
typedef struct Amd64Op Amd64Op;
|
||||
|
||||
enum Amd64Reg {
|
||||
RAX = RXX+1, /* caller-save */
|
||||
RCX, /* caller-save */
|
||||
RDX, /* caller-save */
|
||||
RSI, /* caller-save on sysv, callee-save on win */
|
||||
RDI, /* caller-save on sysv, callee-save on win */
|
||||
R8, /* caller-save */
|
||||
R9, /* caller-save */
|
||||
R10, /* caller-save */
|
||||
R11, /* caller-save */
|
||||
|
||||
RBX, /* callee-save */
|
||||
R12,
|
||||
R13,
|
||||
R14,
|
||||
R15,
|
||||
|
||||
RBP, /* globally live */
|
||||
RSP,
|
||||
|
||||
XMM0, /* sse */
|
||||
XMM1,
|
||||
XMM2,
|
||||
XMM3,
|
||||
XMM4,
|
||||
XMM5,
|
||||
XMM6,
|
||||
XMM7,
|
||||
XMM8,
|
||||
XMM9,
|
||||
XMM10,
|
||||
XMM11,
|
||||
XMM12,
|
||||
XMM13,
|
||||
XMM14,
|
||||
XMM15,
|
||||
|
||||
NFPR = XMM14 - XMM0 + 1, /* reserve XMM15 */
|
||||
NGPR = RSP - RAX + 1,
|
||||
NFPS = NFPR,
|
||||
|
||||
NGPS_SYSV = R11 - RAX + 1,
|
||||
NCLR_SYSV = R15 - RBX + 1,
|
||||
|
||||
NGPS_WIN = R11 - RAX + 1 - 2, /* -2 for RDI/RDI */
|
||||
NCLR_WIN = R15 - RBX + 1 + 2, /* +2 for RDI/RDI */
|
||||
};
|
||||
MAKESURE(reg_not_tmp, XMM15 < (int)Tmp0);
|
||||
|
||||
struct Amd64Op {
|
||||
char nmem;
|
||||
char zflag;
|
||||
char lflag;
|
||||
};
|
||||
|
||||
/* targ.c */
|
||||
extern Amd64Op amd64_op[];
|
||||
|
||||
/* sysv.c (abi) */
|
||||
extern int amd64_sysv_rsave[];
|
||||
extern int amd64_sysv_rclob[];
|
||||
bits amd64_sysv_retregs(Ref, int[2]);
|
||||
bits amd64_sysv_argregs(Ref, int[2]);
|
||||
void amd64_sysv_abi(Fn *);
|
||||
|
||||
/* winabi.c */
|
||||
extern int amd64_winabi_rsave[];
|
||||
extern int amd64_winabi_rclob[];
|
||||
bits amd64_winabi_retregs(Ref, int[2]);
|
||||
bits amd64_winabi_argregs(Ref, int[2]);
|
||||
void amd64_winabi_abi(Fn *);
|
||||
|
||||
/* isel.c */
|
||||
void amd64_isel(Fn *);
|
||||
|
||||
/* emit.c */
|
||||
void amd64_sysv_emitfn(Fn *, FILE *);
|
||||
void amd64_winabi_emitfn(Fn *, FILE *);
|
||||
+881
@@ -0,0 +1,881 @@
|
||||
#include "all.h"
|
||||
|
||||
|
||||
typedef struct E E;
|
||||
|
||||
struct E {
|
||||
FILE *f;
|
||||
Fn *fn;
|
||||
int fp;
|
||||
uint64_t fsz;
|
||||
int nclob;
|
||||
};
|
||||
|
||||
#define CMP(X) \
|
||||
X(Ciule, "be", "a") \
|
||||
X(Ciult, "b", "ae") \
|
||||
X(Cisle, "le", "g") \
|
||||
X(Cislt, "l", "ge") \
|
||||
X(Cisgt, "g", "le") \
|
||||
X(Cisge, "ge", "l") \
|
||||
X(Ciugt, "a", "be") \
|
||||
X(Ciuge, "ae", "b") \
|
||||
X(Cieq, "z", "nz") \
|
||||
X(Cine, "nz", "z") \
|
||||
X(NCmpI+Cfle, "?" , "?") \
|
||||
X(NCmpI+Cflt, "?", "?") \
|
||||
X(NCmpI+Cfgt, "a", "be") \
|
||||
X(NCmpI+Cfge, "ae", "b") \
|
||||
X(NCmpI+Cfo, "np", "p") \
|
||||
X(NCmpI+Cfuo, "p", "np")
|
||||
|
||||
enum {
|
||||
SLong = 0,
|
||||
SWord = 1,
|
||||
SShort = 2,
|
||||
SByte = 3,
|
||||
|
||||
Ki = -1, /* matches Kw and Kl */
|
||||
Ka = -2, /* matches all classes */
|
||||
};
|
||||
|
||||
/* Instruction format strings:
|
||||
*
|
||||
* if the format string starts with -, the instruction
|
||||
* is assumed to be 3-address and is put in 2-address
|
||||
* mode using an extra mov if necessary
|
||||
*
|
||||
* if the format string starts with +, the same as the
|
||||
* above applies, but commutativity is also assumed
|
||||
*
|
||||
* %k is used to set the class of the instruction,
|
||||
* it'll expand to "l", "q", "ss", "sd", depending
|
||||
* on the instruction class
|
||||
* %0 designates the first argument
|
||||
* %1 designates the second argument
|
||||
* %= designates the result
|
||||
*
|
||||
* if %k is not used, a prefix to 0, 1, or = must be
|
||||
* added, it can be:
|
||||
* M - memory reference
|
||||
* L - long (64 bits)
|
||||
* W - word (32 bits)
|
||||
* H - short (16 bits)
|
||||
* B - byte (8 bits)
|
||||
* S - single precision float
|
||||
* D - double precision float
|
||||
*/
|
||||
static struct {
|
||||
short op;
|
||||
short cls;
|
||||
char *fmt;
|
||||
} omap[] = {
|
||||
{ Oadd, Ka, "+add%k %1, %=" },
|
||||
{ Osub, Ka, "-sub%k %1, %=" },
|
||||
{ Oand, Ki, "+and%k %1, %=" },
|
||||
{ Oor, Ki, "+or%k %1, %=" },
|
||||
{ Oxor, Ki, "+xor%k %1, %=" },
|
||||
{ Osar, Ki, "-sar%k %B1, %=" },
|
||||
{ Oshr, Ki, "-shr%k %B1, %=" },
|
||||
{ Oshl, Ki, "-shl%k %B1, %=" },
|
||||
{ Omul, Ki, "+imul%k %1, %=" },
|
||||
{ Omul, Ks, "+mulss %1, %=" },
|
||||
{ Omul, Kd, "+mulsd %1, %=" },
|
||||
{ Odiv, Ka, "-div%k %1, %=" },
|
||||
{ Ostorel, Ka, "movq %L0, %M1" },
|
||||
{ Ostorew, Ka, "movl %W0, %M1" },
|
||||
{ Ostoreh, Ka, "movw %H0, %M1" },
|
||||
{ Ostoreb, Ka, "movb %B0, %M1" },
|
||||
{ Ostores, Ka, "movss %S0, %M1" },
|
||||
{ Ostored, Ka, "movsd %D0, %M1" },
|
||||
{ Oload, Ka, "mov%k %M0, %=" },
|
||||
{ Oloadsw, Kl, "movslq %M0, %L=" },
|
||||
{ Oloadsw, Kw, "movl %M0, %W=" },
|
||||
{ Oloaduw, Ki, "movl %M0, %W=" },
|
||||
{ Oloadsh, Ki, "movsw%k %M0, %=" },
|
||||
{ Oloaduh, Ki, "movzw%k %M0, %=" },
|
||||
{ Oloadsb, Ki, "movsb%k %M0, %=" },
|
||||
{ Oloadub, Ki, "movzb%k %M0, %=" },
|
||||
{ Oextsw, Kl, "movslq %W0, %L=" },
|
||||
{ Oextuw, Kl, "movl %W0, %W=" },
|
||||
{ Oextsh, Ki, "movsw%k %H0, %=" },
|
||||
{ Oextuh, Ki, "movzw%k %H0, %=" },
|
||||
{ Oextsb, Ki, "movsb%k %B0, %=" },
|
||||
{ Oextub, Ki, "movzb%k %B0, %=" },
|
||||
|
||||
{ Oexts, Kd, "cvtss2sd %0, %=" },
|
||||
{ Otruncd, Ks, "cvtsd2ss %0, %=" },
|
||||
{ Ostosi, Ki, "cvttss2si%k %0, %=" },
|
||||
{ Odtosi, Ki, "cvttsd2si%k %0, %=" },
|
||||
{ Oswtof, Ka, "cvtsi2%k %W0, %=" },
|
||||
{ Osltof, Ka, "cvtsi2%k %L0, %=" },
|
||||
{ Ocast, Ki, "movq %D0, %L=" },
|
||||
{ Ocast, Ka, "movq %L0, %D=" },
|
||||
|
||||
{ Oaddr, Ki, "lea%k %M0, %=" },
|
||||
{ Oswap, Ki, "xchg%k %0, %1" },
|
||||
{ Osign, Kl, "cqto" },
|
||||
{ Osign, Kw, "cltd" },
|
||||
{ Oxdiv, Ki, "div%k %0" },
|
||||
{ Oxidiv, Ki, "idiv%k %0" },
|
||||
{ Oxcmp, Ks, "ucomiss %S0, %S1" },
|
||||
{ Oxcmp, Kd, "ucomisd %D0, %D1" },
|
||||
{ Oxcmp, Ki, "cmp%k %0, %1" },
|
||||
{ Oxtest, Ki, "test%k %0, %1" },
|
||||
#define X(c, s, _) \
|
||||
{ Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
|
||||
CMP(X)
|
||||
#undef X
|
||||
{ Oflagfeq, Ki, "setz %B=\n\tmovzb%k %B=, %=" },
|
||||
{ Oflagfne, Ki, "setnz %B=\n\tmovzb%k %B=, %=" },
|
||||
{ NOp, 0, 0 }
|
||||
};
|
||||
|
||||
static char cmov[][2][16] = {
|
||||
#define X(c, s0, s1) \
|
||||
[c] = { \
|
||||
"cmov" s0 " %0, %=", \
|
||||
"cmov" s1 " %1, %=", \
|
||||
},
|
||||
CMP(X)
|
||||
#undef X
|
||||
};
|
||||
|
||||
static char *rname[][4] = {
|
||||
[RAX] = {"rax", "eax", "ax", "al"},
|
||||
[RBX] = {"rbx", "ebx", "bx", "bl"},
|
||||
[RCX] = {"rcx", "ecx", "cx", "cl"},
|
||||
[RDX] = {"rdx", "edx", "dx", "dl"},
|
||||
[RSI] = {"rsi", "esi", "si", "sil"},
|
||||
[RDI] = {"rdi", "edi", "di", "dil"},
|
||||
[RBP] = {"rbp", "ebp", "bp", "bpl"},
|
||||
[RSP] = {"rsp", "esp", "sp", "spl"},
|
||||
[R8 ] = {"r8" , "r8d", "r8w", "r8b"},
|
||||
[R9 ] = {"r9" , "r9d", "r9w", "r9b"},
|
||||
[R10] = {"r10", "r10d", "r10w", "r10b"},
|
||||
[R11] = {"r11", "r11d", "r11w", "r11b"},
|
||||
[R12] = {"r12", "r12d", "r12w", "r12b"},
|
||||
[R13] = {"r13", "r13d", "r13w", "r13b"},
|
||||
[R14] = {"r14", "r14d", "r14w", "r14b"},
|
||||
[R15] = {"r15", "r15d", "r15w", "r15b"},
|
||||
};
|
||||
|
||||
|
||||
static int
|
||||
slot(Ref r, E *e)
|
||||
{
|
||||
int s;
|
||||
|
||||
s = rsval(r);
|
||||
assert(s <= e->fn->slot);
|
||||
/* specific to NAlign == 3 */
|
||||
if (s < 0) {
|
||||
if (e->fp == RSP)
|
||||
return 4*-s - 8 + e->fsz + e->nclob*8;
|
||||
else
|
||||
return 4*-s;
|
||||
}
|
||||
else if (e->fp == RSP)
|
||||
return 4*s + e->nclob*8;
|
||||
else if (e->fn->vararg) {
|
||||
if (T.windows)
|
||||
return -4 * (e->fn->slot - s);
|
||||
else
|
||||
return -176 + -4 * (e->fn->slot - s);
|
||||
} else
|
||||
return -4 * (e->fn->slot - s);
|
||||
}
|
||||
|
||||
static void
|
||||
emitcon(Con *con, E *e)
|
||||
{
|
||||
char *p, *l;
|
||||
|
||||
switch (con->type) {
|
||||
case CAddr:
|
||||
l = str(con->sym.id);
|
||||
p = l[0] == '"' ? "" : T.assym;
|
||||
if (con->sym.type == SThr) {
|
||||
assert(!T.apple);
|
||||
fprintf(e->f, "%%fs:%s%s@tpoff", p, l);
|
||||
} else {
|
||||
assert((con->sym.type & ~SExt) == SGlo);
|
||||
fprintf(e->f, "%s%s", p, l);
|
||||
}
|
||||
if (con->bits.i)
|
||||
fprintf(e->f, "%+"PRId64, con->bits.i);
|
||||
break;
|
||||
case CBits:
|
||||
fprintf(e->f, "%"PRId64, con->bits.i);
|
||||
break;
|
||||
default:
|
||||
die("unreachable");
|
||||
}
|
||||
}
|
||||
|
||||
static char *
|
||||
regtoa(int reg, int sz)
|
||||
{
|
||||
static char buf[6];
|
||||
|
||||
assert(reg <= XMM15);
|
||||
if (reg >= XMM0) {
|
||||
sprintf(buf, "xmm%d", reg-XMM0);
|
||||
return buf;
|
||||
} else
|
||||
return rname[reg][sz];
|
||||
}
|
||||
|
||||
static Ref
|
||||
getarg(char c, Ins *i)
|
||||
{
|
||||
switch (c) {
|
||||
case '0':
|
||||
return i->arg[0];
|
||||
case '1':
|
||||
return i->arg[1];
|
||||
case '=':
|
||||
return i->to;
|
||||
default:
|
||||
die("invalid arg letter %c", c);
|
||||
}
|
||||
}
|
||||
|
||||
static void emitins(Ins, E *);
|
||||
|
||||
static void
|
||||
emitcopy(Ref r1, Ref r2, int k, E *e)
|
||||
{
|
||||
Ins icp;
|
||||
|
||||
icp.op = Ocopy;
|
||||
icp.arg[0] = r2;
|
||||
icp.to = r1;
|
||||
icp.cls = k;
|
||||
emitins(icp, e);
|
||||
}
|
||||
|
||||
static void
|
||||
emitf(char *s, Ins *i, E *e)
|
||||
{
|
||||
static char clstoa[][3] = {"l", "q", "ss", "sd"};
|
||||
char c;
|
||||
int sz;
|
||||
Ref ref;
|
||||
Mem *m;
|
||||
Con off;
|
||||
|
||||
switch (*s) {
|
||||
case '+':
|
||||
if (req(i->arg[1], i->to)) {
|
||||
ref = i->arg[0];
|
||||
i->arg[0] = i->arg[1];
|
||||
i->arg[1] = ref;
|
||||
}
|
||||
/* fall through */
|
||||
case '-':
|
||||
assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
|
||||
"cannot convert to 2-address");
|
||||
emitcopy(i->to, i->arg[0], i->cls, e);
|
||||
s++;
|
||||
break;
|
||||
}
|
||||
|
||||
fputc('\t', e->f);
|
||||
Next:
|
||||
while ((c = *s++) != '%')
|
||||
if (!c) {
|
||||
fputc('\n', e->f);
|
||||
return;
|
||||
} else
|
||||
fputc(c, e->f);
|
||||
switch ((c = *s++)) {
|
||||
case '%':
|
||||
fputc('%', e->f);
|
||||
break;
|
||||
case 'k':
|
||||
fputs(clstoa[i->cls], e->f);
|
||||
break;
|
||||
case '0':
|
||||
case '1':
|
||||
case '=':
|
||||
sz = KWIDE(i->cls) ? SLong : SWord;
|
||||
s--;
|
||||
goto Ref;
|
||||
case 'D':
|
||||
case 'S':
|
||||
sz = SLong; /* does not matter for floats */
|
||||
Ref:
|
||||
c = *s++;
|
||||
ref = getarg(c, i);
|
||||
switch (rtype(ref)) {
|
||||
case RTmp:
|
||||
assert(isreg(ref));
|
||||
fprintf(e->f, "%%%s", regtoa(ref.val, sz));
|
||||
break;
|
||||
case RSlot:
|
||||
fprintf(e->f, "%d(%%%s)",
|
||||
slot(ref, e),
|
||||
regtoa(e->fp, SLong)
|
||||
);
|
||||
break;
|
||||
case RMem:
|
||||
Mem:
|
||||
m = &e->fn->mem[ref.val];
|
||||
if (rtype(m->base) == RSlot) {
|
||||
off.type = CBits;
|
||||
off.bits.i = slot(m->base, e);
|
||||
addcon(&m->offset, &off, 1);
|
||||
m->base = TMP(e->fp);
|
||||
}
|
||||
if (m->offset.type != CUndef)
|
||||
emitcon(&m->offset, e);
|
||||
fputc('(', e->f);
|
||||
if (!req(m->base, R))
|
||||
fprintf(e->f, "%%%s",
|
||||
regtoa(m->base.val, SLong)
|
||||
);
|
||||
else if (m->offset.type == CAddr)
|
||||
fprintf(e->f, "%%rip");
|
||||
if (!req(m->index, R))
|
||||
fprintf(e->f, ", %%%s, %d",
|
||||
regtoa(m->index.val, SLong),
|
||||
m->scale
|
||||
);
|
||||
fputc(')', e->f);
|
||||
break;
|
||||
case RCon:
|
||||
fputc('$', e->f);
|
||||
emitcon(&e->fn->con[ref.val], e);
|
||||
break;
|
||||
default:
|
||||
die("unreachable");
|
||||
}
|
||||
break;
|
||||
case 'L':
|
||||
sz = SLong;
|
||||
goto Ref;
|
||||
case 'W':
|
||||
sz = SWord;
|
||||
goto Ref;
|
||||
case 'H':
|
||||
sz = SShort;
|
||||
goto Ref;
|
||||
case 'B':
|
||||
sz = SByte;
|
||||
goto Ref;
|
||||
case 'M':
|
||||
c = *s++;
|
||||
ref = getarg(c, i);
|
||||
switch (rtype(ref)) {
|
||||
case RMem:
|
||||
goto Mem;
|
||||
case RSlot:
|
||||
fprintf(e->f, "%d(%%%s)",
|
||||
slot(ref, e),
|
||||
regtoa(e->fp, SLong)
|
||||
);
|
||||
break;
|
||||
case RCon:
|
||||
off = e->fn->con[ref.val];
|
||||
emitcon(&off, e);
|
||||
if (off.type == CAddr)
|
||||
if (off.sym.type != SThr)
|
||||
fprintf(e->f, "(%%rip)");
|
||||
break;
|
||||
case RTmp:
|
||||
assert(isreg(ref));
|
||||
fprintf(e->f, "(%%%s)", regtoa(ref.val, SLong));
|
||||
break;
|
||||
default:
|
||||
die("unreachable");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
die("invalid format specifier %%%c", c);
|
||||
}
|
||||
goto Next;
|
||||
}
|
||||
|
||||
static bits negmask[4] = {
|
||||
[Ks] = 0x80000000,
|
||||
[Kd] = 0x8000000000000000,
|
||||
};
|
||||
|
||||
static void
|
||||
emitins(Ins i, E *e)
|
||||
{
|
||||
Ref r;
|
||||
int64_t val;
|
||||
int o, t0;
|
||||
Ins ineg;
|
||||
Con *con;
|
||||
char *sym;
|
||||
|
||||
switch (i.op) {
|
||||
default:
|
||||
if (isxsel(i.op))
|
||||
goto case_Oxsel;
|
||||
Table:
|
||||
/* most instructions are just pulled out of
|
||||
* the table omap[], some special cases are
|
||||
* detailed below */
|
||||
for (o=0;; o++) {
|
||||
/* this linear search should really be a binary
|
||||
* search */
|
||||
if (omap[o].op == NOp)
|
||||
die("no match for %s(%c)",
|
||||
optab[i.op].name, "wlsd"[i.cls]);
|
||||
if (omap[o].op == i.op)
|
||||
if (omap[o].cls == i.cls
|
||||
|| (omap[o].cls == Ki && KBASE(i.cls) == 0)
|
||||
|| (omap[o].cls == Ka))
|
||||
break;
|
||||
}
|
||||
emitf(omap[o].fmt, &i, e);
|
||||
break;
|
||||
case Onop:
|
||||
/* just do nothing for nops, they are inserted
|
||||
* by some passes */
|
||||
break;
|
||||
case Omul:
|
||||
/* here, we try to use the 3-addresss form
|
||||
* of multiplication when possible */
|
||||
if (rtype(i.arg[1]) == RCon) {
|
||||
r = i.arg[0];
|
||||
i.arg[0] = i.arg[1];
|
||||
i.arg[1] = r;
|
||||
}
|
||||
if (KBASE(i.cls) == 0 /* only available for ints */
|
||||
&& rtype(i.arg[0]) == RCon
|
||||
&& rtype(i.arg[1]) == RTmp) {
|
||||
emitf("imul%k %0, %1, %=", &i, e);
|
||||
break;
|
||||
}
|
||||
goto Table;
|
||||
case Osub:
|
||||
/* we have to use the negation trick to handle
|
||||
* some 3-address subtractions */
|
||||
if (req(i.to, i.arg[1]) && !req(i.arg[0], i.to)) {
|
||||
ineg = (Ins){Oneg, i.cls, i.to, {i.to}};
|
||||
emitins(ineg, e);
|
||||
emitf("add%k %0, %=", &i, e);
|
||||
break;
|
||||
}
|
||||
goto Table;
|
||||
case Oneg:
|
||||
if (!req(i.to, i.arg[0]))
|
||||
emitf("mov%k %0, %=", &i, e);
|
||||
if (KBASE(i.cls) == 0)
|
||||
emitf("neg%k %=", &i, e);
|
||||
else
|
||||
fprintf(e->f,
|
||||
"\txorp%c %sfp%d(%%rip), %%%s\n",
|
||||
"xxsd"[i.cls],
|
||||
T.asloc,
|
||||
stashbits(negmask[i.cls], 16),
|
||||
regtoa(i.to.val, SLong)
|
||||
);
|
||||
break;
|
||||
case Odiv:
|
||||
/* use xmm15 to adjust the instruction when the
|
||||
* conversion to 2-address in emitf() would fail */
|
||||
if (req(i.to, i.arg[1])) {
|
||||
i.arg[1] = TMP(XMM0+15);
|
||||
emitf("mov%k %=, %1", &i, e);
|
||||
emitf("mov%k %0, %=", &i, e);
|
||||
i.arg[0] = i.to;
|
||||
}
|
||||
goto Table;
|
||||
case Ocopy:
|
||||
/* copies are used for many things; see my note
|
||||
* to understand how to load big constants:
|
||||
* https://c9x.me/notes/2015-09-19.html */
|
||||
assert(rtype(i.to) != RMem);
|
||||
if (req(i.to, R) || req(i.arg[0], R))
|
||||
break;
|
||||
if (req(i.to, i.arg[0]))
|
||||
break;
|
||||
t0 = rtype(i.arg[0]);
|
||||
if (i.cls == Kl
|
||||
&& t0 == RCon
|
||||
&& e->fn->con[i.arg[0].val].type == CBits) {
|
||||
val = e->fn->con[i.arg[0].val].bits.i;
|
||||
if (isreg(i.to))
|
||||
if (val >= 0 && val <= UINT32_MAX) {
|
||||
emitf("movl %W0, %W=", &i, e);
|
||||
break;
|
||||
}
|
||||
if (rtype(i.to) == RSlot)
|
||||
if (val < INT32_MIN || val > INT32_MAX) {
|
||||
emitf("movl %0, %=", &i, e);
|
||||
emitf("movl %0>>32, 4+%=", &i, e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isreg(i.to)
|
||||
&& t0 == RCon
|
||||
&& e->fn->con[i.arg[0].val].type == CAddr) {
|
||||
emitf("lea%k %M0, %=", &i, e);
|
||||
break;
|
||||
}
|
||||
if (rtype(i.to) == RSlot
|
||||
&& (t0 == RSlot || t0 == RMem)) {
|
||||
i.cls = KWIDE(i.cls) ? Kd : Ks;
|
||||
i.arg[1] = TMP(XMM0+15);
|
||||
emitf("mov%k %0, %1", &i, e);
|
||||
emitf("mov%k %1, %=", &i, e);
|
||||
break;
|
||||
}
|
||||
/* conveniently, the assembler knows if it
|
||||
* should use movabsq when reading movq */
|
||||
emitf("mov%k %0, %=", &i, e);
|
||||
break;
|
||||
case Oaddr:
|
||||
if (rtype(i.arg[0]) != RCon)
|
||||
goto Table;
|
||||
con = &e->fn->con[i.arg[0].val];
|
||||
assert(isreg(i.to) && con->type == CAddr);
|
||||
sym = str(con->sym.id);
|
||||
if (T.apple && (con->sym.type & SThr)) {
|
||||
fprintf(e->f,
|
||||
"\tmovq %s%s@tlvp(%%rip), %%%s\n",
|
||||
sym[0] == '"' ? "" : T.assym, sym,
|
||||
regtoa(i.to.val, SLong));
|
||||
break;
|
||||
}
|
||||
if (T.windows && con->sym.type != SGlo)
|
||||
die("extern/thread unsupported on amd64_win");
|
||||
switch (con->sym.type) {
|
||||
case SThr:
|
||||
/* derive the symbol address from the TCB
|
||||
* address at offset 0 of %fs */
|
||||
emitf("movq %%fs:0, %L=", &i, e);
|
||||
fprintf(e->f, "\tleaq %s%s@tpoff",
|
||||
sym[0] == '"' ? "" : T.assym, sym);
|
||||
if (con->bits.i)
|
||||
fprintf(e->f, "%+"PRId64,
|
||||
con->bits.i);
|
||||
fprintf(e->f, "(%%%s), %%%s\n",
|
||||
regtoa(i.to.val, SLong),
|
||||
regtoa(i.to.val, SLong));
|
||||
break;
|
||||
case SExtThr:
|
||||
/* initial-exec TLS: load offset from
|
||||
* GOT, add to thread-base register */
|
||||
assert(!con->bits.i);
|
||||
emitf("movq %%fs:0, %L=", &i, e);
|
||||
fprintf(e->f,
|
||||
"\taddq %s%s@gottpoff(%%rip), %%%s\n",
|
||||
sym[0] == '"' ? "" : T.assym, sym,
|
||||
regtoa(i.to.val, SLong));
|
||||
break;
|
||||
case SExt:
|
||||
/* load address from the GOT */
|
||||
assert(!con->bits.i);
|
||||
fprintf(e->f,
|
||||
"\tmovq %s%s@gotpcrel(%%rip), %%%s\n",
|
||||
sym[0] == '"' ? "" : T.assym, sym,
|
||||
regtoa(i.to.val, SLong));
|
||||
break;
|
||||
default:
|
||||
goto Table;
|
||||
}
|
||||
break;
|
||||
case Ocall:
|
||||
/* calls simply have a weird syntax in AT&T
|
||||
* assembly... */
|
||||
switch (rtype(i.arg[0])) {
|
||||
case RCon:
|
||||
con = &e->fn->con[i.arg[0].val];
|
||||
fprintf(e->f, "\tcallq ");
|
||||
emitcon(con, e);
|
||||
if (con->type == CAddr
|
||||
&& (con->sym.type & SExt)
|
||||
&& !T.apple)
|
||||
fprintf(e->f, "@plt");
|
||||
fprintf(e->f, "\n");
|
||||
break;
|
||||
case RTmp:
|
||||
emitf("callq *%L0", &i, e);
|
||||
break;
|
||||
default:
|
||||
die("invalid call argument");
|
||||
}
|
||||
break;
|
||||
case Osalloc:
|
||||
/* there is no good reason why this is here
|
||||
* maybe we should split Osalloc in 2 different
|
||||
* instructions depending on the result
|
||||
*/
|
||||
assert(e->fp == RBP);
|
||||
emitf("subq %L0, %%rsp", &i, e);
|
||||
if (!req(i.to, R))
|
||||
emitcopy(i.to, TMP(RSP), Kl, e);
|
||||
break;
|
||||
case Oswap:
|
||||
if (KBASE(i.cls) == 0)
|
||||
goto Table;
|
||||
/* for floats, there is no swap instruction
|
||||
* so we use xmm15 as a temporary
|
||||
*/
|
||||
emitcopy(TMP(XMM0+15), i.arg[0], i.cls, e);
|
||||
emitcopy(i.arg[0], i.arg[1], i.cls, e);
|
||||
emitcopy(i.arg[1], TMP(XMM0+15), i.cls, e);
|
||||
break;
|
||||
case Odbgloc:
|
||||
emitdbgloc(i.arg[0].val, i.arg[1].val, e->f);
|
||||
break;
|
||||
case_Oxsel:
|
||||
if (req(i.to, i.arg[1]))
|
||||
emitf(cmov[i.op-Oxsel][0], &i, e);
|
||||
else {
|
||||
if (!req(i.to, i.arg[0]))
|
||||
emitf("mov %0, %=", &i, e);
|
||||
emitf(cmov[i.op-Oxsel][1], &i, e);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
sysv_framesz(E *e)
|
||||
{
|
||||
uint64_t i, o, f;
|
||||
|
||||
/* specific to NAlign == 3 */
|
||||
o = 0;
|
||||
if (!e->fn->leaf) {
|
||||
for (i=0, o=0; i<NCLR_SYSV; i++)
|
||||
o ^= e->fn->reg >> amd64_sysv_rclob[i];
|
||||
o &= 1;
|
||||
}
|
||||
f = e->fn->slot;
|
||||
f = (f + 3) & -4;
|
||||
if (f > 0
|
||||
&& e->fp == RSP
|
||||
&& e->fn->salign == 4)
|
||||
f += 2;
|
||||
e->fsz = 4*f + 8*o + 176*e->fn->vararg;
|
||||
}
|
||||
|
||||
void
|
||||
amd64_sysv_emitfn(Fn *fn, FILE *f)
|
||||
{
|
||||
static char *ctoa[][2] = {
|
||||
#define X(c, s, n) [c] = {s, n},
|
||||
CMP(X)
|
||||
#undef X
|
||||
};
|
||||
static int id0;
|
||||
Blk *b, *s;
|
||||
Ins *i, itmp;
|
||||
int *r, c, o, n, lbl;
|
||||
uint p;
|
||||
E *e;
|
||||
|
||||
e = &(E){.f = f, .fn = fn};
|
||||
emitfnlnk(fn->name, &fn->lnk, f);
|
||||
fputs("\tendbr64\n", f);
|
||||
if (!fn->leaf || fn->vararg || fn->dynalloc) {
|
||||
e->fp = RBP;
|
||||
fputs("\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
|
||||
} else
|
||||
e->fp = RSP;
|
||||
sysv_framesz(e);
|
||||
if (e->fsz)
|
||||
fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", e->fsz);
|
||||
if (fn->vararg) {
|
||||
o = -176;
|
||||
for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
|
||||
fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
|
||||
for (n=0; n<8; ++n, o+=16)
|
||||
fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
|
||||
}
|
||||
for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR_SYSV]; r++)
|
||||
if (fn->reg & BIT(*r)) {
|
||||
itmp.arg[0] = TMP(*r);
|
||||
emitf("pushq %L0", &itmp, e);
|
||||
e->nclob++;
|
||||
}
|
||||
|
||||
for (lbl=0, b=fn->start; b; b=b->link) {
|
||||
if (lbl || b->npred > 1) {
|
||||
for (p=0; p<b->npred; p++)
|
||||
if (b->pred[p]->id >= b->id)
|
||||
break;
|
||||
if (p != b->npred)
|
||||
fprintf(f, ".p2align 4\n");
|
||||
fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id);
|
||||
}
|
||||
for (i=b->ins; i!=&b->ins[b->nins]; i++)
|
||||
emitins(*i, e);
|
||||
lbl = 1;
|
||||
switch (b->jmp.type) {
|
||||
case Jhlt:
|
||||
fprintf(f, "\tud2\n");
|
||||
break;
|
||||
case Jret0:
|
||||
if (fn->dynalloc)
|
||||
fprintf(f,
|
||||
"\tmovq %%rbp, %%rsp\n"
|
||||
"\tsubq $%"PRIu64", %%rsp\n",
|
||||
e->fsz + e->nclob * 8);
|
||||
for (r=&amd64_sysv_rclob[NCLR_SYSV]; r>amd64_sysv_rclob;)
|
||||
if (fn->reg & BIT(*--r)) {
|
||||
itmp.arg[0] = TMP(*r);
|
||||
emitf("popq %L0", &itmp, e);
|
||||
}
|
||||
if (e->fp == RBP)
|
||||
fputs("\tleave\n", f);
|
||||
else if (e->fsz)
|
||||
fprintf(f,
|
||||
"\taddq $%"PRIu64", %%rsp\n",
|
||||
e->fsz);
|
||||
fputs("\tret\n", f);
|
||||
break;
|
||||
case Jjmp:
|
||||
Jmp:
|
||||
if (b->s1 != b->link)
|
||||
fprintf(f, "\tjmp %sbb%d\n",
|
||||
T.asloc, id0+b->s1->id);
|
||||
else
|
||||
lbl = 0;
|
||||
break;
|
||||
default:
|
||||
c = b->jmp.type - Jjf;
|
||||
if (0 <= c && c <= NCmp) {
|
||||
if (b->link == b->s2) {
|
||||
s = b->s1;
|
||||
b->s1 = b->s2;
|
||||
b->s2 = s;
|
||||
n = 0;
|
||||
} else
|
||||
n = 1;
|
||||
fprintf(f, "\tj%s %sbb%d\n", ctoa[c][n],
|
||||
T.asloc, id0+b->s2->id);
|
||||
goto Jmp;
|
||||
}
|
||||
die("unhandled jump %d", b->jmp.type);
|
||||
}
|
||||
}
|
||||
id0 += fn->nblk;
|
||||
if (!T.apple)
|
||||
elf_emitfnfin(fn->name, f);
|
||||
}
|
||||
|
||||
static void
|
||||
winabi_framesz(E *e)
|
||||
{
|
||||
uint64_t i, o, f;
|
||||
|
||||
/* specific to NAlign == 3 */
|
||||
o = 0;
|
||||
if (!e->fn->leaf) {
|
||||
for (i=0, o=0; i<NCLR_WIN; i++)
|
||||
o ^= e->fn->reg >> amd64_winabi_rclob[i];
|
||||
o &= 1;
|
||||
}
|
||||
f = e->fn->slot;
|
||||
f = (f + 3) & -4;
|
||||
if (f > 0
|
||||
&& e->fp == RSP
|
||||
&& e->fn->salign == 4)
|
||||
f += 2;
|
||||
e->fsz = 4*f + 8*o;
|
||||
}
|
||||
|
||||
void
|
||||
amd64_winabi_emitfn(Fn *fn, FILE *f)
|
||||
{
|
||||
static char *ctoa[][2] = {
|
||||
#define X(c, s, n) [c] = {s, n},
|
||||
CMP(X)
|
||||
#undef X
|
||||
};
|
||||
static int id0;
|
||||
Blk *b, *s;
|
||||
Ins *i, itmp;
|
||||
int *r, c, n, lbl;
|
||||
E *e;
|
||||
|
||||
e = &(E){.f = f, .fn = fn};
|
||||
emitfnlnk(fn->name, &fn->lnk, f);
|
||||
fputs("\tendbr64\n", f);
|
||||
if (fn->vararg) {
|
||||
fprintf(f, "\tmovq %%rcx, 0x8(%%rsp)\n");
|
||||
fprintf(f, "\tmovq %%rdx, 0x10(%%rsp)\n");
|
||||
fprintf(f, "\tmovq %%r8, 0x18(%%rsp)\n");
|
||||
fprintf(f, "\tmovq %%r9, 0x20(%%rsp)\n");
|
||||
}
|
||||
if (!fn->leaf || fn->vararg || fn->dynalloc) {
|
||||
e->fp = RBP;
|
||||
fputs("\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
|
||||
} else
|
||||
e->fp = RSP;
|
||||
winabi_framesz(e);
|
||||
if (e->fsz)
|
||||
fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", e->fsz);
|
||||
for (r=amd64_winabi_rclob; r<&amd64_winabi_rclob[NCLR_WIN]; r++)
|
||||
if (fn->reg & BIT(*r)) {
|
||||
itmp.arg[0] = TMP(*r);
|
||||
emitf("pushq %L0", &itmp, e);
|
||||
e->nclob++;
|
||||
}
|
||||
|
||||
for (lbl=0, b=fn->start; b; b=b->link) {
|
||||
if (lbl || b->npred > 1)
|
||||
fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id);
|
||||
for (i=b->ins; i!=&b->ins[b->nins]; i++)
|
||||
emitins(*i, e);
|
||||
lbl = 1;
|
||||
switch (b->jmp.type) {
|
||||
case Jhlt:
|
||||
fprintf(f, "\tud2\n");
|
||||
break;
|
||||
case Jret0:
|
||||
if (fn->dynalloc)
|
||||
fprintf(f,
|
||||
"\tmovq %%rbp, %%rsp\n"
|
||||
"\tsubq $%"PRIu64", %%rsp\n",
|
||||
e->fsz + e->nclob * 8);
|
||||
for (r=&amd64_winabi_rclob[NCLR_WIN]; r>amd64_winabi_rclob;)
|
||||
if (fn->reg & BIT(*--r)) {
|
||||
itmp.arg[0] = TMP(*r);
|
||||
emitf("popq %L0", &itmp, e);
|
||||
}
|
||||
if (e->fp == RBP)
|
||||
fputs("\tleave\n", f);
|
||||
else if (e->fsz)
|
||||
fprintf(f,
|
||||
"\taddq $%"PRIu64", %%rsp\n",
|
||||
e->fsz);
|
||||
fputs("\tret\n", f);
|
||||
break;
|
||||
case Jjmp:
|
||||
Jmp:
|
||||
if (b->s1 != b->link)
|
||||
fprintf(f, "\tjmp %sbb%d\n",
|
||||
T.asloc, id0+b->s1->id);
|
||||
else
|
||||
lbl = 0;
|
||||
break;
|
||||
default:
|
||||
c = b->jmp.type - Jjf;
|
||||
if (0 <= c && c <= NCmp) {
|
||||
if (b->link == b->s2 || c >= NCmpI) {
|
||||
s = b->s1;
|
||||
b->s1 = b->s2;
|
||||
b->s2 = s;
|
||||
n = 0;
|
||||
} else
|
||||
n = 1;
|
||||
fprintf(f, "\tj%s %sbb%d\n", ctoa[c][n],
|
||||
T.asloc, id0+b->s2->id);
|
||||
goto Jmp;
|
||||
}
|
||||
die("unhandled jump %d", b->jmp.type);
|
||||
}
|
||||
}
|
||||
id0 += fn->nblk;
|
||||
}
|
||||
+944
@@ -0,0 +1,944 @@
|
||||
#include "all.h"
|
||||
#include <limits.h>
|
||||
|
||||
/* For x86_64, do the following:
|
||||
*
|
||||
* - check that constants are used only in
|
||||
* places allowed
|
||||
* - ensure immediates always fit in 32b
|
||||
* - expose machine register contraints
|
||||
* on instructions like division.
|
||||
* - implement fast locals (the streak of
|
||||
* constant allocX in the first basic block)
|
||||
* - recognize complex addressing modes
|
||||
*
|
||||
* Invariant: the use counts that are used
|
||||
* in sel() must be sound. This
|
||||
* is not so trivial, maybe the
|
||||
* dce should be moved out...
|
||||
*/
|
||||
|
||||
static int amatch(Addr *, Num *, Ref, Fn *);
|
||||
|
||||
static int
|
||||
noimm(Ref r, Fn *fn)
|
||||
{
|
||||
int64_t val;
|
||||
|
||||
if (rtype(r) != RCon)
|
||||
return 0;
|
||||
switch (fn->con[r.val].type) {
|
||||
case CAddr:
|
||||
/* we only support the 'small'
|
||||
* code model of the ABI, this
|
||||
* means that we can always
|
||||
* address data with 32bits
|
||||
*/
|
||||
return 0;
|
||||
case CBits:
|
||||
val = fn->con[r.val].bits.i;
|
||||
return (val < INT32_MIN || val > INT32_MAX);
|
||||
default:
|
||||
die("invalid constant");
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
rslot(Ref r, Fn *fn)
|
||||
{
|
||||
if (rtype(r) != RTmp)
|
||||
return -1;
|
||||
return fn->tmp[r.val].slot;
|
||||
}
|
||||
|
||||
static int
|
||||
hascon(Ref r, Con **pc, Fn *fn)
|
||||
{
|
||||
switch (rtype(r)) {
|
||||
case RCon:
|
||||
*pc = &fn->con[r.val];
|
||||
return 1;
|
||||
case RMem:
|
||||
*pc = &fn->mem[r.val].offset;
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fixarg(Ref *r, int k, Ins *i, Fn *fn)
|
||||
{
|
||||
char buf[32];
|
||||
Addr a, *m;
|
||||
Con cc, *c;
|
||||
Ref r0, r1, r2, r3;
|
||||
int s, n, op;
|
||||
|
||||
r1 = r0 = *r;
|
||||
s = rslot(r0, fn);
|
||||
op = i ? i->op : Ocopy;
|
||||
if (KBASE(k) == 1 && rtype(r0) == RCon) {
|
||||
/* load floating points from memory
|
||||
* slots, they can't be used as
|
||||
* immediates
|
||||
*/
|
||||
r1 = MEM(fn->nmem);
|
||||
vgrow(&fn->mem, ++fn->nmem);
|
||||
memset(&a, 0, sizeof a);
|
||||
a.offset.type = CAddr;
|
||||
n = stashbits(fn->con[r0.val].bits.i, KWIDE(k) ? 8 : 4);
|
||||
/* quote the name so that we do not
|
||||
* add symbol prefixes on the apple
|
||||
* target variant
|
||||
*/
|
||||
sprintf(buf, "\"%sfp%d\"", T.asloc, n);
|
||||
a.offset.sym.id = intern(buf);
|
||||
fn->mem[fn->nmem-1] = a;
|
||||
}
|
||||
else if (op == Ocall && r == &i->arg[0]
|
||||
&& rtype(r0) == RCon && fn->con[r0.val].type != CAddr) {
|
||||
/* use a temporary register so that we
|
||||
* produce an indirect call
|
||||
*/
|
||||
r1 = newtmp("isel", Kl, fn);
|
||||
emit(Ocopy, Kl, r1, r0, R);
|
||||
}
|
||||
else if (op != Ocopy && k == Kl && noimm(r0, fn)) {
|
||||
/* load constants that do not fit in
|
||||
* a 32bit signed integer into a
|
||||
* long temporary
|
||||
*/
|
||||
r1 = newtmp("isel", Kl, fn);
|
||||
emit(Ocopy, Kl, r1, r0, R);
|
||||
}
|
||||
else if (s != -1) {
|
||||
/* load fast locals' addresses into
|
||||
* temporaries right before the
|
||||
* instruction
|
||||
*/
|
||||
r1 = newtmp("isel", Kl, fn);
|
||||
emit(Oaddr, Kl, r1, SLOT(s), R);
|
||||
}
|
||||
else if (op != Ocall && hascon(r0, &c, fn)
|
||||
&& c->type == CAddr && ((c->sym.type & SExt)
|
||||
|| (T.apple && c->sym.type == SThr))) {
|
||||
r1 = newtmp("isel", Kl, fn);
|
||||
if (c->bits.i) {
|
||||
r2 = newtmp("isel", Kl, fn);
|
||||
cc = (Con){.type = CBits};
|
||||
cc.bits.i = c->bits.i;
|
||||
r3 = newcon(&cc, fn);
|
||||
emit(Oadd, Kl, r1, r2, r3);
|
||||
} else
|
||||
r2 = r1;
|
||||
if (T.apple && (c->sym.type & SThr)) {
|
||||
emit(Ocopy, Kl, r2, TMP(RAX), R);
|
||||
r2 = newtmp("isel", Kl, fn);
|
||||
r3 = newtmp("isel", Kl, fn);
|
||||
emit(Ocall, 0, R, r3, CALL(17));
|
||||
emit(Ocopy, Kl, TMP(RDI), r2, R);
|
||||
emit(Oload, Kl, r3, r2, R);
|
||||
}
|
||||
cc = *c;
|
||||
cc.bits.i = 0;
|
||||
r3 = newcon(&cc, fn);
|
||||
emit(Oaddr, Kl, r2, r3, R);
|
||||
if (rtype(r0) == RMem) {
|
||||
m = &fn->mem[r0.val];
|
||||
m->offset.type = CUndef;
|
||||
m->base = r1;
|
||||
r1 = r0;
|
||||
}
|
||||
}
|
||||
else if (!(isstore(op) && r == &i->arg[1])
|
||||
&& !isload(op) && op != Ocall && rtype(r0) == RCon
|
||||
&& fn->con[r0.val].type == CAddr) {
|
||||
/* turn address operands into
|
||||
* lea/mov instructions
|
||||
*/
|
||||
r1 = newtmp("isel", Kl, fn);
|
||||
emit(Oaddr, Kl, r1, r0, R);
|
||||
}
|
||||
else if (rtype(r0) == RMem) {
|
||||
/* eliminate memory operands of
|
||||
* the form $foo(%rip, ...)
|
||||
*/
|
||||
m = &fn->mem[r0.val];
|
||||
if (req(m->base, R))
|
||||
if (m->offset.type == CAddr) {
|
||||
r0 = newtmp("isel", Kl, fn);
|
||||
emit(Oaddr, Kl, r0, newcon(&m->offset, fn), R);
|
||||
m->offset.type = CUndef;
|
||||
m->base = r0;
|
||||
}
|
||||
}
|
||||
else if (isxsel(op) && rtype(*r) == RCon) {
|
||||
r1 = newtmp("isel", i->cls, fn);
|
||||
emit(Ocopy, i->cls, r1, *r, R);
|
||||
}
|
||||
*r = r1;
|
||||
}
|
||||
|
||||
static void
|
||||
seladdr(Ref *r, Num *tn, Fn *fn)
|
||||
{
|
||||
Addr a;
|
||||
Ref r0;
|
||||
|
||||
r0 = *r;
|
||||
if (rtype(r0) == RTmp) {
|
||||
memset(&a, 0, sizeof a);
|
||||
if (!amatch(&a, tn, r0, fn))
|
||||
return;
|
||||
if (!req(a.base, R))
|
||||
if (a.offset.type == CAddr) {
|
||||
/* apple as does not support
|
||||
* $foo(%r0, %r1, M); try to
|
||||
* rewrite it or bail out if
|
||||
* impossible
|
||||
*/
|
||||
if (!req(a.index, R) || rtype(a.base) != RTmp)
|
||||
return;
|
||||
else {
|
||||
a.index = a.base;
|
||||
a.scale = 1;
|
||||
a.base = R;
|
||||
}
|
||||
}
|
||||
chuse(r0, -1, fn);
|
||||
vgrow(&fn->mem, ++fn->nmem);
|
||||
fn->mem[fn->nmem-1] = a;
|
||||
chuse(a.base, +1, fn);
|
||||
chuse(a.index, +1, fn);
|
||||
*r = MEM(fn->nmem-1);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
cmpswap(Ref arg[2], int op)
|
||||
{
|
||||
switch (op) {
|
||||
case NCmpI+Cflt:
|
||||
case NCmpI+Cfle:
|
||||
return 1;
|
||||
case NCmpI+Cfgt:
|
||||
case NCmpI+Cfge:
|
||||
return 0;
|
||||
}
|
||||
return rtype(arg[0]) == RCon;
|
||||
}
|
||||
|
||||
static void
|
||||
selcmp(Ref arg[2], int k, int swap, Fn *fn)
|
||||
{
|
||||
Ref r;
|
||||
Ins *icmp;
|
||||
|
||||
if (swap) {
|
||||
r = arg[1];
|
||||
arg[1] = arg[0];
|
||||
arg[0] = r;
|
||||
}
|
||||
emit(Oxcmp, k, R, arg[1], arg[0]);
|
||||
icmp = curi;
|
||||
if (rtype(arg[0]) == RCon) {
|
||||
assert(k != Kw);
|
||||
icmp->arg[1] = newtmp("isel", k, fn);
|
||||
emit(Ocopy, k, icmp->arg[1], arg[0], R);
|
||||
fixarg(&curi->arg[0], k, curi, fn);
|
||||
}
|
||||
fixarg(&icmp->arg[0], k, icmp, fn);
|
||||
fixarg(&icmp->arg[1], k, icmp, fn);
|
||||
}
|
||||
|
||||
static void
|
||||
sel(Ins i, Num *tn, Fn *fn)
|
||||
{
|
||||
Ref r0, r1, tmp[7];
|
||||
int x, j, k, kc, sh, swap;
|
||||
Ins *i0, *i1;
|
||||
|
||||
if (rtype(i.to) == RTmp)
|
||||
if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1]))
|
||||
if (fn->tmp[i.to.val].nuse == 0) {
|
||||
chuse(i.arg[0], -1, fn);
|
||||
chuse(i.arg[1], -1, fn);
|
||||
return;
|
||||
}
|
||||
i0 = curi;
|
||||
k = i.cls;
|
||||
switch (i.op) {
|
||||
case Odiv:
|
||||
case Orem:
|
||||
case Oudiv:
|
||||
case Ourem:
|
||||
if (KBASE(k) == 1)
|
||||
goto Emit;
|
||||
if (i.op == Odiv || i.op == Oudiv)
|
||||
r0 = TMP(RAX), r1 = TMP(RDX);
|
||||
else
|
||||
r0 = TMP(RDX), r1 = TMP(RAX);
|
||||
emit(Ocopy, k, i.to, r0, R);
|
||||
emit(Ocopy, k, R, r1, R);
|
||||
if (rtype(i.arg[1]) == RCon) {
|
||||
/* immediates not allowed for
|
||||
* divisions in x86
|
||||
*/
|
||||
r0 = newtmp("isel", k, fn);
|
||||
} else
|
||||
r0 = i.arg[1];
|
||||
if (fn->tmp[r0.val].slot != -1)
|
||||
err("unlikely argument %%%s in %s",
|
||||
fn->tmp[r0.val].name, optab[i.op].name);
|
||||
if (i.op == Odiv || i.op == Orem) {
|
||||
emit(Oxidiv, k, R, r0, R);
|
||||
emit(Osign, k, TMP(RDX), TMP(RAX), R);
|
||||
} else {
|
||||
emit(Oxdiv, k, R, r0, R);
|
||||
emit(Ocopy, k, TMP(RDX), CON_Z, R);
|
||||
}
|
||||
emit(Ocopy, k, TMP(RAX), i.arg[0], R);
|
||||
fixarg(&curi->arg[0], k, curi, fn);
|
||||
if (rtype(i.arg[1]) == RCon)
|
||||
emit(Ocopy, k, r0, i.arg[1], R);
|
||||
break;
|
||||
case Osar:
|
||||
case Oshr:
|
||||
case Oshl:
|
||||
r0 = i.arg[1];
|
||||
if (rtype(r0) == RCon)
|
||||
goto Emit;
|
||||
if (fn->tmp[r0.val].slot != -1)
|
||||
err("unlikely argument %%%s in %s",
|
||||
fn->tmp[r0.val].name, optab[i.op].name);
|
||||
i.arg[1] = TMP(RCX);
|
||||
emit(Ocopy, Kw, R, TMP(RCX), R);
|
||||
emiti(i);
|
||||
i1 = curi;
|
||||
emit(Ocopy, Kw, TMP(RCX), r0, R);
|
||||
fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
|
||||
break;
|
||||
case Ouwtof:
|
||||
r0 = newtmp("utof", Kl, fn);
|
||||
emit(Osltof, k, i.to, r0, R);
|
||||
emit(Oextuw, Kl, r0, i.arg[0], R);
|
||||
fixarg(&curi->arg[0], k, curi, fn);
|
||||
break;
|
||||
case Oultof:
|
||||
/* %mask =l and %arg.0, 1
|
||||
* %isbig =l shr %arg.0, 63
|
||||
* %divided =l shr %arg.0, %isbig
|
||||
* %or =l or %mask, %divided
|
||||
* %float =d sltof %or
|
||||
* %cast =l cast %float
|
||||
* %addend =l shl %isbig, 52
|
||||
* %sum =l add %cast, %addend
|
||||
* %result =d cast %sum
|
||||
*/
|
||||
r0 = newtmp("utof", k, fn);
|
||||
if (k == Ks)
|
||||
kc = Kw, sh = 23;
|
||||
else
|
||||
kc = Kl, sh = 52;
|
||||
for (j=0; j<4; j++)
|
||||
tmp[j] = newtmp("utof", Kl, fn);
|
||||
for (; j<7; j++)
|
||||
tmp[j] = newtmp("utof", kc, fn);
|
||||
emit(Ocast, k, i.to, tmp[6], R);
|
||||
emit(Oadd, kc, tmp[6], tmp[4], tmp[5]);
|
||||
emit(Oshl, kc, tmp[5], tmp[1], getcon(sh, fn));
|
||||
emit(Ocast, kc, tmp[4], r0, R);
|
||||
emit(Osltof, k, r0, tmp[3], R);
|
||||
emit(Oor, Kl, tmp[3], tmp[0], tmp[2]);
|
||||
emit(Oshr, Kl, tmp[2], i.arg[0], tmp[1]);
|
||||
sel(*curi++, 0, fn);
|
||||
emit(Oshr, Kl, tmp[1], i.arg[0], getcon(63, fn));
|
||||
fixarg(&curi->arg[0], Kl, curi, fn);
|
||||
emit(Oand, Kl, tmp[0], i.arg[0], getcon(1, fn));
|
||||
fixarg(&curi->arg[0], Kl, curi, fn);
|
||||
break;
|
||||
case Ostoui:
|
||||
i.op = Ostosi;
|
||||
kc = Ks;
|
||||
tmp[4] = getcon(0xdf000000, fn);
|
||||
goto Oftoui;
|
||||
case Odtoui:
|
||||
i.op = Odtosi;
|
||||
kc = Kd;
|
||||
tmp[4] = getcon(0xc3e0000000000000, fn);
|
||||
Oftoui:
|
||||
if (k == Kw) {
|
||||
r0 = newtmp("ftou", Kl, fn);
|
||||
emit(Ocopy, Kw, i.to, r0, R);
|
||||
i.cls = Kl;
|
||||
i.to = r0;
|
||||
goto Emit;
|
||||
}
|
||||
/* %try0 =l {s,d}tosi %fp
|
||||
* %mask =l sar %try0, 63
|
||||
*
|
||||
* mask is all ones if the first
|
||||
* try was oob, all zeroes o.w.
|
||||
*
|
||||
* %fps ={s,d} sub %fp, (1<<63)
|
||||
* %try1 =l {s,d}tosi %fps
|
||||
*
|
||||
* %tmp =l and %mask, %try1
|
||||
* %res =l or %tmp, %try0
|
||||
*/
|
||||
r0 = newtmp("ftou", kc, fn);
|
||||
for (j=0; j<4; j++)
|
||||
tmp[j] = newtmp("ftou", Kl, fn);
|
||||
emit(Oor, Kl, i.to, tmp[0], tmp[3]);
|
||||
emit(Oand, Kl, tmp[3], tmp[2], tmp[1]);
|
||||
emit(i.op, Kl, tmp[2], r0, R);
|
||||
emit(Oadd, kc, r0, tmp[4], i.arg[0]);
|
||||
i1 = curi; /* fixarg() can change curi */
|
||||
fixarg(&i1->arg[0], kc, i1, fn);
|
||||
fixarg(&i1->arg[1], kc, i1, fn);
|
||||
emit(Osar, Kl, tmp[1], tmp[0], getcon(63, fn));
|
||||
emit(i.op, Kl, tmp[0], i.arg[0], R);
|
||||
fixarg(&curi->arg[0], Kl, curi, fn);
|
||||
break;
|
||||
case Onop:
|
||||
break;
|
||||
case Ostored:
|
||||
case Ostores:
|
||||
case Ostorel:
|
||||
case Ostorew:
|
||||
case Ostoreh:
|
||||
case Ostoreb:
|
||||
if (rtype(i.arg[0]) == RCon) {
|
||||
if (i.op == Ostored)
|
||||
i.op = Ostorel;
|
||||
if (i.op == Ostores)
|
||||
i.op = Ostorew;
|
||||
}
|
||||
seladdr(&i.arg[1], tn, fn);
|
||||
goto Emit;
|
||||
case_Oload:
|
||||
seladdr(&i.arg[0], tn, fn);
|
||||
goto Emit;
|
||||
case Odbgloc:
|
||||
case Ocall:
|
||||
case Osalloc:
|
||||
case Ocopy:
|
||||
case Oadd:
|
||||
case Osub:
|
||||
case Oneg:
|
||||
case Omul:
|
||||
case Oand:
|
||||
case Oor:
|
||||
case Oxor:
|
||||
case Oxtest:
|
||||
case Ostosi:
|
||||
case Odtosi:
|
||||
case Oswtof:
|
||||
case Osltof:
|
||||
case Oexts:
|
||||
case Otruncd:
|
||||
case Ocast:
|
||||
case_Oxsel:
|
||||
case_Oext:
|
||||
Emit:
|
||||
emiti(i);
|
||||
i1 = curi; /* fixarg() can change curi */
|
||||
fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
|
||||
fixarg(&i1->arg[1], argcls(&i, 1), i1, fn);
|
||||
break;
|
||||
case Oalloc4:
|
||||
case Oalloc8:
|
||||
case Oalloc16:
|
||||
salloc(i.to, i.arg[0], fn);
|
||||
break;
|
||||
default:
|
||||
if (isext(i.op))
|
||||
goto case_Oext;
|
||||
if (isxsel(i.op))
|
||||
goto case_Oxsel;
|
||||
if (isload(i.op))
|
||||
goto case_Oload;
|
||||
if (iscmp(i.op, &kc, &x)) {
|
||||
switch (x) {
|
||||
case NCmpI+Cfeq:
|
||||
/* zf is set when operands are
|
||||
* unordered, so we may have to
|
||||
* check pf
|
||||
*/
|
||||
r0 = newtmp("isel", Kw, fn);
|
||||
r1 = newtmp("isel", Kw, fn);
|
||||
emit(Oand, Kw, i.to, r0, r1);
|
||||
emit(Oflagfo, k, r1, R, R);
|
||||
i.to = r0;
|
||||
break;
|
||||
case NCmpI+Cfne:
|
||||
r0 = newtmp("isel", Kw, fn);
|
||||
r1 = newtmp("isel", Kw, fn);
|
||||
emit(Oor, Kw, i.to, r0, r1);
|
||||
emit(Oflagfuo, k, r1, R, R);
|
||||
i.to = r0;
|
||||
break;
|
||||
}
|
||||
swap = cmpswap(i.arg, x);
|
||||
if (swap)
|
||||
x = cmpop(x);
|
||||
emit(Oflag+x, k, i.to, R, R);
|
||||
selcmp(i.arg, kc, swap, fn);
|
||||
break;
|
||||
}
|
||||
die("unknown instruction %s", optab[i.op].name);
|
||||
}
|
||||
|
||||
while (i0>curi && --i0) {
|
||||
assert(rslot(i0->arg[0], fn) == -1);
|
||||
assert(rslot(i0->arg[1], fn) == -1);
|
||||
}
|
||||
}
|
||||
|
||||
static Ins *
|
||||
flagi(Ins *i0, Ins *i)
|
||||
{
|
||||
while (i>i0) {
|
||||
i--;
|
||||
if (amd64_op[i->op].zflag)
|
||||
return i;
|
||||
if (amd64_op[i->op].lflag)
|
||||
continue;
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static Ins*
|
||||
selsel(Fn *fn, Blk *b, Ins *i, Num *tn)
|
||||
{
|
||||
Ref r, cr[2];
|
||||
int c, k, swap, gencmp, gencpy;
|
||||
Ins *isel0, *isel1, *fi;
|
||||
Tmp *t;
|
||||
|
||||
assert(i->op == Osel1);
|
||||
for (isel0=i; b->ins<isel0; isel0--) {
|
||||
if (isel0->op == Osel0)
|
||||
break;
|
||||
assert(isel0->op == Osel1);
|
||||
}
|
||||
assert(isel0->op == Osel0);
|
||||
r = isel0->arg[0];
|
||||
assert(rtype(r) == RTmp);
|
||||
t = &fn->tmp[r.val];
|
||||
fi = flagi(b->ins, isel0);
|
||||
cr[0] = cr[1] = R;
|
||||
gencmp = gencpy = swap = 0;
|
||||
k = Kw;
|
||||
c = Cine;
|
||||
if (!fi || !req(fi->to, r)) {
|
||||
gencmp = 1;
|
||||
cr[0] = r;
|
||||
cr[1] = CON_Z;
|
||||
}
|
||||
else if (iscmp(fi->op, &k, &c)) {
|
||||
if (c == NCmpI+Cfeq
|
||||
|| c == NCmpI+Cfne) {
|
||||
/* these are selected as 'and'
|
||||
* or 'or', so we check their
|
||||
* result with Cine
|
||||
*/
|
||||
c = Cine;
|
||||
goto Other;
|
||||
}
|
||||
swap = cmpswap(fi->arg, c);
|
||||
if (swap)
|
||||
c = cmpop(c);
|
||||
if (t->nuse == 1) {
|
||||
gencmp = 1;
|
||||
cr[0] = fi->arg[0];
|
||||
cr[1] = fi->arg[1];
|
||||
*fi = (Ins){.op = Onop};
|
||||
}
|
||||
}
|
||||
else if (fi->op == Oand && t->nuse == 1
|
||||
&& (rtype(fi->arg[0]) == RTmp ||
|
||||
rtype(fi->arg[1]) == RTmp)) {
|
||||
fi->op = Oxtest;
|
||||
fi->to = R;
|
||||
if (rtype(fi->arg[1]) == RCon) {
|
||||
r = fi->arg[1];
|
||||
fi->arg[1] = fi->arg[0];
|
||||
fi->arg[0] = r;
|
||||
}
|
||||
}
|
||||
else {
|
||||
Other:
|
||||
/* since flags are not tracked in liveness,
|
||||
* the result of the flag-setting instruction
|
||||
* has to be marked as live
|
||||
*/
|
||||
if (t->nuse == 1)
|
||||
gencpy = 1;
|
||||
}
|
||||
/* generate conditional moves */
|
||||
for (isel1=i; isel0<isel1; --isel1) {
|
||||
isel1->op = Oxsel+c;
|
||||
sel(*isel1, tn, fn);
|
||||
}
|
||||
assert(!gencmp || !gencpy);
|
||||
if (gencmp)
|
||||
selcmp(cr, k, swap, fn);
|
||||
if (gencpy)
|
||||
emit(Ocopy, Kw, R, r, R);
|
||||
*isel0 = (Ins){.op = Onop};
|
||||
return isel0;
|
||||
}
|
||||
|
||||
static void
|
||||
seljmp(Blk *b, Fn *fn)
|
||||
{
|
||||
Ref r;
|
||||
int c, k, swap;
|
||||
Ins *fi;
|
||||
Tmp *t;
|
||||
|
||||
if (b->jmp.type == Jret0
|
||||
|| b->jmp.type == Jjmp
|
||||
|| b->jmp.type == Jhlt)
|
||||
return;
|
||||
assert(b->jmp.type == Jjnz);
|
||||
r = b->jmp.arg;
|
||||
t = &fn->tmp[r.val];
|
||||
b->jmp.arg = R;
|
||||
assert(rtype(r) == RTmp);
|
||||
if (b->s1 == b->s2) {
|
||||
chuse(r, -1, fn);
|
||||
b->jmp.type = Jjmp;
|
||||
b->s2 = 0;
|
||||
return;
|
||||
}
|
||||
fi = flagi(b->ins, &b->ins[b->nins]);
|
||||
if (!fi || !req(fi->to, r)) {
|
||||
selcmp((Ref[2]){r, CON_Z}, Kw, 0, fn);
|
||||
b->jmp.type = Jjf + Cine;
|
||||
}
|
||||
else if (iscmp(fi->op, &k, &c)
|
||||
&& c != NCmpI+Cfeq /* see sel(), selsel() */
|
||||
&& c != NCmpI+Cfne) {
|
||||
swap = cmpswap(fi->arg, c);
|
||||
if (swap)
|
||||
c = cmpop(c);
|
||||
if (t->nuse == 1) {
|
||||
selcmp(fi->arg, k, swap, fn);
|
||||
*fi = (Ins){.op = Onop};
|
||||
}
|
||||
b->jmp.type = Jjf + c;
|
||||
}
|
||||
else if (fi->op == Oand && t->nuse == 1
|
||||
&& (rtype(fi->arg[0]) == RTmp ||
|
||||
rtype(fi->arg[1]) == RTmp)) {
|
||||
fi->op = Oxtest;
|
||||
fi->to = R;
|
||||
b->jmp.type = Jjf + Cine;
|
||||
if (rtype(fi->arg[1]) == RCon) {
|
||||
r = fi->arg[1];
|
||||
fi->arg[1] = fi->arg[0];
|
||||
fi->arg[0] = r;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* since flags are not tracked in liveness,
|
||||
* the result of the flag-setting instruction
|
||||
* has to be marked as live
|
||||
*/
|
||||
if (t->nuse == 1)
|
||||
emit(Ocopy, Kw, R, r, R);
|
||||
b->jmp.type = Jjf + Cine;
|
||||
}
|
||||
}
|
||||
|
||||
enum {
|
||||
Pob,
|
||||
Pbis,
|
||||
Pois,
|
||||
Pobis,
|
||||
Pbi1,
|
||||
Pobi1,
|
||||
};
|
||||
|
||||
/* mgen generated code
|
||||
*
|
||||
* (with-vars (o b i s)
|
||||
* (patterns
|
||||
* (ob (add (con o) (tmp b)))
|
||||
* (bis (add (tmp b) (mul (tmp i) (con s 1 2 4 8))))
|
||||
* (ois (add (con o) (mul (tmp i) (con s 1 2 4 8))))
|
||||
* (obis (add (con o) (tmp b) (mul (tmp i) (con s 1 2 4 8))))
|
||||
* (bi1 (add (tmp b) (tmp i)))
|
||||
* (obi1 (add (con o) (tmp b) (tmp i)))
|
||||
* ))
|
||||
*/
|
||||
|
||||
static int
|
||||
opn(int op, int l, int r)
|
||||
{
|
||||
static uchar Oaddtbl[91] = {
|
||||
2,
|
||||
2,2,
|
||||
4,4,5,
|
||||
6,6,8,8,
|
||||
4,4,9,10,9,
|
||||
7,7,5,8,9,5,
|
||||
4,4,12,10,12,12,12,
|
||||
4,4,9,10,9,9,12,9,
|
||||
11,11,5,8,9,5,12,9,5,
|
||||
7,7,5,8,9,5,12,9,5,5,
|
||||
11,11,5,8,9,5,12,9,5,5,5,
|
||||
4,4,9,10,9,9,12,9,9,9,9,9,
|
||||
7,7,5,8,9,5,12,9,5,5,5,9,5,
|
||||
};
|
||||
int t;
|
||||
|
||||
if (l < r)
|
||||
t = l, l = r, r = t;
|
||||
switch (op) {
|
||||
case Omul:
|
||||
if (2 <= l)
|
||||
if (r == 0) {
|
||||
return 3;
|
||||
}
|
||||
return 2;
|
||||
case Oadd:
|
||||
return Oaddtbl[(l + l*l)/2 + r];
|
||||
default:
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
refn(Ref r, Num *tn, Con *con)
|
||||
{
|
||||
int64_t n;
|
||||
|
||||
switch (rtype(r)) {
|
||||
case RTmp:
|
||||
if (!tn[r.val].n)
|
||||
tn[r.val].n = 2;
|
||||
return tn[r.val].n;
|
||||
case RCon:
|
||||
if (con[r.val].type != CBits)
|
||||
return 1;
|
||||
n = con[r.val].bits.i;
|
||||
if (n == 8 || n == 4 || n == 2 || n == 1)
|
||||
return 0;
|
||||
return 1;
|
||||
default:
|
||||
return INT_MIN;
|
||||
}
|
||||
}
|
||||
|
||||
static bits match[13] = {
|
||||
[4] = BIT(Pob),
|
||||
[5] = BIT(Pbi1),
|
||||
[6] = BIT(Pob) | BIT(Pois),
|
||||
[7] = BIT(Pob) | BIT(Pobi1),
|
||||
[8] = BIT(Pbi1) | BIT(Pbis),
|
||||
[9] = BIT(Pbi1) | BIT(Pobi1),
|
||||
[10] = BIT(Pbi1) | BIT(Pbis) | BIT(Pobi1) | BIT(Pobis),
|
||||
[11] = BIT(Pob) | BIT(Pobi1) | BIT(Pobis),
|
||||
[12] = BIT(Pbi1) | BIT(Pobi1) | BIT(Pobis),
|
||||
};
|
||||
|
||||
static uchar *matcher[] = {
|
||||
[Pbi1] = (uchar[]){
|
||||
1,3,1,3,2,0
|
||||
},
|
||||
[Pbis] = (uchar[]){
|
||||
5,1,8,5,27,1,5,1,2,5,13,3,1,1,3,3,3,2,0,1,
|
||||
3,3,3,2,3,1,0,1,29
|
||||
},
|
||||
[Pob] = (uchar[]){
|
||||
1,3,0,3,1,0
|
||||
},
|
||||
[Pobi1] = (uchar[]){
|
||||
5,3,9,9,10,33,12,35,45,1,5,3,11,9,7,9,4,9,
|
||||
17,1,3,0,3,1,3,2,0,3,1,1,3,0,34,1,37,1,5,2,
|
||||
5,7,2,7,8,37,29,1,3,0,1,32
|
||||
},
|
||||
[Pobis] = (uchar[]){
|
||||
5,2,10,7,11,19,49,1,1,3,3,3,2,1,3,0,3,1,0,
|
||||
1,3,0,5,1,8,5,25,1,5,1,2,5,13,3,1,1,3,3,3,
|
||||
2,0,1,3,3,3,2,26,1,51,1,5,1,6,5,9,1,3,0,51,
|
||||
3,1,1,3,0,45
|
||||
},
|
||||
[Pois] = (uchar[]){
|
||||
1,3,0,1,3,3,3,2,0
|
||||
},
|
||||
};
|
||||
|
||||
/* end of generated code */
|
||||
|
||||
static void
|
||||
anumber(Num *tn, Blk *b, Con *con)
|
||||
{
|
||||
Ins *i;
|
||||
Num *n;
|
||||
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++) {
|
||||
if (rtype(i->to) != RTmp)
|
||||
continue;
|
||||
n = &tn[i->to.val];
|
||||
n->l = i->arg[0];
|
||||
n->r = i->arg[1];
|
||||
n->nl = refn(n->l, tn, con);
|
||||
n->nr = refn(n->r, tn, con);
|
||||
n->n = opn(i->op, n->nl, n->nr);
|
||||
}
|
||||
}
|
||||
|
||||
static Ref
|
||||
adisp(Con *c, Num *tn, Ref r, Fn *fn, int s)
|
||||
{
|
||||
Ref v[2];
|
||||
int n;
|
||||
|
||||
while (!req(r, R)) {
|
||||
assert(rtype(r) == RTmp);
|
||||
n = refn(r, tn, fn->con);
|
||||
if (!(match[n] & BIT(Pob)))
|
||||
break;
|
||||
runmatch(matcher[Pob], tn, r, v);
|
||||
assert(rtype(v[0]) == RCon);
|
||||
addcon(c, &fn->con[v[0].val], s);
|
||||
r = v[1];
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
amatch(Addr *a, Num *tn, Ref r, Fn *fn)
|
||||
{
|
||||
static int pat[] = {Pobis, Pobi1, Pbis, Pois, Pbi1, -1};
|
||||
Ref ro, rb, ri, rs, v[4];
|
||||
Con *c, co;
|
||||
int s, n, *p;
|
||||
|
||||
if (rtype(r) != RTmp)
|
||||
return 0;
|
||||
|
||||
n = refn(r, tn, fn->con);
|
||||
memset(v, 0, sizeof v);
|
||||
for (p=pat; *p>=0; p++)
|
||||
if (match[n] & BIT(*p)) {
|
||||
runmatch(matcher[*p], tn, r, v);
|
||||
break;
|
||||
}
|
||||
if (*p < 0)
|
||||
v[1] = r;
|
||||
|
||||
memset(&co, 0, sizeof co);
|
||||
ro = v[0];
|
||||
rb = adisp(&co, tn, v[1], fn, 1);
|
||||
ri = v[2];
|
||||
rs = v[3];
|
||||
s = 1;
|
||||
|
||||
if (*p < 0 && co.type != CUndef)
|
||||
if (amatch(a, tn, rb, fn))
|
||||
return addcon(&a->offset, &co, 1);
|
||||
if (!req(ro, R)) {
|
||||
assert(rtype(ro) == RCon);
|
||||
c = &fn->con[ro.val];
|
||||
if (!addcon(&co, c, 1))
|
||||
return 0;
|
||||
}
|
||||
if (!req(rs, R)) {
|
||||
assert(rtype(rs) == RCon);
|
||||
c = &fn->con[rs.val];
|
||||
assert(c->type == CBits);
|
||||
s = c->bits.i;
|
||||
}
|
||||
ri = adisp(&co, tn, ri, fn, s);
|
||||
*a = (Addr){co, rb, ri, s};
|
||||
|
||||
if (rtype(ri) == RTmp)
|
||||
if (fn->tmp[ri.val].slot != -1) {
|
||||
if (a->scale != 1
|
||||
|| fn->tmp[rb.val].slot != -1)
|
||||
return 0;
|
||||
a->base = ri;
|
||||
a->index = rb;
|
||||
}
|
||||
if (!req(a->base, R)) {
|
||||
assert(rtype(a->base) == RTmp);
|
||||
s = fn->tmp[a->base.val].slot;
|
||||
if (s != -1)
|
||||
a->base = SLOT(s);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* instruction selection
|
||||
* requires use counts (as given by parsing)
|
||||
*/
|
||||
void
|
||||
amd64_isel(Fn *fn)
|
||||
{
|
||||
Blk *b, **sb;
|
||||
Ins *i;
|
||||
Phi *p;
|
||||
uint a;
|
||||
int n, al;
|
||||
int64_t sz;
|
||||
Num *num;
|
||||
|
||||
/* assign slots to fast allocs */
|
||||
b = fn->start;
|
||||
/* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
|
||||
for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++)
|
||||
if (i->op == al) {
|
||||
if (rtype(i->arg[0]) != RCon)
|
||||
break;
|
||||
sz = fn->con[i->arg[0].val].bits.i;
|
||||
if (sz < 0 || sz >= INT_MAX-15)
|
||||
err("invalid alloc size %"PRId64, sz);
|
||||
sz = (sz + n-1) & -n;
|
||||
sz /= 4;
|
||||
if (sz > INT_MAX - fn->slot)
|
||||
die("alloc too large");
|
||||
fn->tmp[i->to.val].slot = fn->slot;
|
||||
fn->slot += sz;
|
||||
fn->salign = 2 + al - Oalloc;
|
||||
*i = (Ins){.op = Onop};
|
||||
}
|
||||
|
||||
/* process basic blocks */
|
||||
n = fn->ntmp;
|
||||
num = emalloc(n * sizeof num[0]);
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
curi = &insb[NIns];
|
||||
for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
|
||||
for (p=(*sb)->phi; p; p=p->link) {
|
||||
for (a=0; p->blk[a] != b; a++)
|
||||
assert(a+1 < p->narg);
|
||||
fixarg(&p->arg[a], p->cls, 0, fn);
|
||||
}
|
||||
memset(num, 0, n * sizeof num[0]);
|
||||
anumber(num, b, fn->con);
|
||||
seljmp(b, fn);
|
||||
for (i=&b->ins[b->nins]; i!=b->ins;) {
|
||||
--i;
|
||||
assert(i->op != Osel0);
|
||||
if (i->op == Osel1)
|
||||
i = selsel(fn, b, i, num);
|
||||
else
|
||||
sel(*i, num, fn);
|
||||
}
|
||||
idup(b, curi, &insb[NIns]-curi);
|
||||
}
|
||||
free(num);
|
||||
|
||||
if (debug['I']) {
|
||||
fprintf(stderr, "\n> After instruction selection:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
+721
@@ -0,0 +1,721 @@
|
||||
#include "all.h"
|
||||
|
||||
typedef struct AClass AClass;
|
||||
typedef struct RAlloc RAlloc;
|
||||
|
||||
struct AClass {
|
||||
Typ *type;
|
||||
int inmem;
|
||||
int align;
|
||||
uint size;
|
||||
int cls[2];
|
||||
Ref ref[2];
|
||||
};
|
||||
|
||||
struct RAlloc {
|
||||
Ins i;
|
||||
RAlloc *link;
|
||||
};
|
||||
|
||||
static void
|
||||
classify(AClass *a, Typ *t, uint s)
|
||||
{
|
||||
Field *f;
|
||||
int *cls;
|
||||
uint n, s1;
|
||||
|
||||
for (n=0, s1=s; n<t->nunion; n++, s=s1)
|
||||
for (f=t->fields[n]; f->type!=FEnd; f++) {
|
||||
assert(s <= 16);
|
||||
cls = &a->cls[s/8];
|
||||
switch (f->type) {
|
||||
case FEnd:
|
||||
die("unreachable");
|
||||
case FPad:
|
||||
/* don't change anything */
|
||||
s += f->len;
|
||||
break;
|
||||
case Fs:
|
||||
case Fd:
|
||||
if (*cls == Kx)
|
||||
*cls = Kd;
|
||||
s += f->len;
|
||||
break;
|
||||
case Fb:
|
||||
case Fh:
|
||||
case Fw:
|
||||
case Fl:
|
||||
*cls = Kl;
|
||||
s += f->len;
|
||||
break;
|
||||
case FTyp:
|
||||
classify(a, &typ[f->len], s);
|
||||
s += typ[f->len].size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
typclass(AClass *a, Typ *t)
|
||||
{
|
||||
uint sz, al;
|
||||
|
||||
sz = t->size;
|
||||
al = 1u << t->align;
|
||||
|
||||
/* the ABI requires sizes to be rounded
|
||||
* up to the nearest multiple of 8, moreover
|
||||
* it makes it easy load and store structures
|
||||
* in registers
|
||||
*/
|
||||
if (al < 8)
|
||||
al = 8;
|
||||
sz = (sz + al-1) & -al;
|
||||
|
||||
a->type = t;
|
||||
a->size = sz;
|
||||
a->align = t->align;
|
||||
|
||||
if (t->isdark || sz > 16 || sz == 0) {
|
||||
/* large or unaligned structures are
|
||||
* required to be passed in memory
|
||||
*/
|
||||
a->inmem = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
a->cls[0] = Kx;
|
||||
a->cls[1] = Kx;
|
||||
a->inmem = 0;
|
||||
classify(a, t, 0);
|
||||
}
|
||||
|
||||
static int
|
||||
retr(Ref reg[2], AClass *aret)
|
||||
{
|
||||
static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}};
|
||||
int n, k, ca, nr[2];
|
||||
|
||||
nr[0] = nr[1] = 0;
|
||||
ca = 0;
|
||||
for (n=0; (uint)n*8<aret->size; n++) {
|
||||
k = KBASE(aret->cls[n]);
|
||||
reg[n] = TMP(retreg[k][nr[k]++]);
|
||||
ca += 1 << (2 * k);
|
||||
}
|
||||
return ca;
|
||||
}
|
||||
|
||||
static void
|
||||
selret(Blk *b, Fn *fn)
|
||||
{
|
||||
int j, k, ca;
|
||||
Ref r, r0, reg[2];
|
||||
AClass aret;
|
||||
|
||||
j = b->jmp.type;
|
||||
|
||||
if (!isret(j) || j == Jret0)
|
||||
return;
|
||||
|
||||
r0 = b->jmp.arg;
|
||||
b->jmp.type = Jret0;
|
||||
|
||||
if (j == Jretc) {
|
||||
typclass(&aret, &typ[fn->retty]);
|
||||
if (aret.inmem) {
|
||||
assert(rtype(fn->retr) == RTmp);
|
||||
emit(Ocopy, Kl, TMP(RAX), fn->retr, R);
|
||||
emit(Oblit1, 0, R, INT(aret.type->size), R);
|
||||
emit(Oblit0, 0, R, r0, fn->retr);
|
||||
ca = 1;
|
||||
} else {
|
||||
ca = retr(reg, &aret);
|
||||
if (aret.size > 8) {
|
||||
r = newtmp("abi", Kl, fn);
|
||||
emit(Oload, Kl, reg[1], r, R);
|
||||
emit(Oadd, Kl, r, r0, getcon(8, fn));
|
||||
}
|
||||
emit(Oload, Kl, reg[0], r0, R);
|
||||
}
|
||||
} else {
|
||||
k = j - Jretw;
|
||||
if (KBASE(k) == 0) {
|
||||
emit(Ocopy, k, TMP(RAX), r0, R);
|
||||
ca = 1;
|
||||
} else {
|
||||
emit(Ocopy, k, TMP(XMM0), r0, R);
|
||||
ca = 1 << 2;
|
||||
}
|
||||
}
|
||||
|
||||
b->jmp.arg = CALL(ca);
|
||||
}
|
||||
|
||||
static int
|
||||
argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env)
|
||||
{
|
||||
int varc, envc, nint, ni, nsse, ns, n, *pn;
|
||||
AClass *a;
|
||||
Ins *i;
|
||||
|
||||
if (aret && aret->inmem)
|
||||
nint = 5; /* hidden argument */
|
||||
else
|
||||
nint = 6;
|
||||
nsse = 8;
|
||||
varc = 0;
|
||||
envc = 0;
|
||||
for (i=i0, a=ac; i<i1; i++, a++)
|
||||
switch (i->op - op + Oarg) {
|
||||
case Oarg:
|
||||
if (KBASE(i->cls) == 0)
|
||||
pn = &nint;
|
||||
else
|
||||
pn = &nsse;
|
||||
if (*pn > 0) {
|
||||
--*pn;
|
||||
a->inmem = 0;
|
||||
} else
|
||||
a->inmem = 2;
|
||||
a->align = 3;
|
||||
a->size = 8;
|
||||
a->cls[0] = i->cls;
|
||||
break;
|
||||
case Oargc:
|
||||
n = i->arg[0].val;
|
||||
typclass(a, &typ[n]);
|
||||
if (a->inmem)
|
||||
continue;
|
||||
ni = ns = 0;
|
||||
for (n=0; (uint)n*8<a->size; n++)
|
||||
if (KBASE(a->cls[n]) == 0)
|
||||
ni++;
|
||||
else
|
||||
ns++;
|
||||
if (nint >= ni && nsse >= ns) {
|
||||
nint -= ni;
|
||||
nsse -= ns;
|
||||
} else
|
||||
a->inmem = 1;
|
||||
break;
|
||||
case Oarge:
|
||||
envc = 1;
|
||||
if (op == Opar)
|
||||
*env = i->to;
|
||||
else
|
||||
*env = i->arg[0];
|
||||
break;
|
||||
case Oargv:
|
||||
varc = 1;
|
||||
break;
|
||||
default:
|
||||
die("unreachable");
|
||||
}
|
||||
|
||||
if (varc && envc)
|
||||
err("sysv abi does not support variadic env calls");
|
||||
|
||||
return ((varc|envc) << 12) | ((6-nint) << 4) | ((8-nsse) << 8);
|
||||
}
|
||||
|
||||
int amd64_sysv_rsave[] = {
|
||||
RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
|
||||
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1
|
||||
};
|
||||
int amd64_sysv_rclob[] = {RBX, R12, R13, R14, R15, -1};
|
||||
|
||||
MAKESURE(sysv_arrays_ok,
|
||||
sizeof amd64_sysv_rsave == (NGPS_SYSV+NFPS+1) * sizeof(int) &&
|
||||
sizeof amd64_sysv_rclob == (NCLR_SYSV+1) * sizeof(int)
|
||||
);
|
||||
|
||||
/* layout of call's second argument (RCall)
|
||||
*
|
||||
* 29 12 8 4 3 0
|
||||
* |0...00|x|xxxx|xxxx|xx|xx| range
|
||||
* | | | | ` gp regs returned (0..2)
|
||||
* | | | ` sse regs returned (0..2)
|
||||
* | | ` gp regs passed (0..6)
|
||||
* | ` sse regs passed (0..8)
|
||||
* ` 1 if rax is used to pass data (0..1)
|
||||
*/
|
||||
|
||||
bits
|
||||
amd64_sysv_retregs(Ref r, int p[2])
|
||||
{
|
||||
bits b;
|
||||
int ni, nf;
|
||||
|
||||
assert(rtype(r) == RCall);
|
||||
b = 0;
|
||||
ni = r.val & 3;
|
||||
nf = (r.val >> 2) & 3;
|
||||
if (ni >= 1)
|
||||
b |= BIT(RAX);
|
||||
if (ni >= 2)
|
||||
b |= BIT(RDX);
|
||||
if (nf >= 1)
|
||||
b |= BIT(XMM0);
|
||||
if (nf >= 2)
|
||||
b |= BIT(XMM1);
|
||||
if (p) {
|
||||
p[0] = ni;
|
||||
p[1] = nf;
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
bits
|
||||
amd64_sysv_argregs(Ref r, int p[2])
|
||||
{
|
||||
bits b;
|
||||
int j, ni, nf, ra;
|
||||
|
||||
assert(rtype(r) == RCall);
|
||||
b = 0;
|
||||
ni = (r.val >> 4) & 15;
|
||||
nf = (r.val >> 8) & 15;
|
||||
ra = (r.val >> 12) & 1;
|
||||
for (j=0; j<ni; j++)
|
||||
b |= BIT(amd64_sysv_rsave[j]);
|
||||
for (j=0; j<nf; j++)
|
||||
b |= BIT(XMM0+j);
|
||||
if (p) {
|
||||
p[0] = ni + ra;
|
||||
p[1] = nf;
|
||||
}
|
||||
return b | (ra ? BIT(RAX) : 0);
|
||||
}
|
||||
|
||||
static Ref
|
||||
rarg(int ty, int *ni, int *ns)
|
||||
{
|
||||
if (KBASE(ty) == 0)
|
||||
return TMP(amd64_sysv_rsave[(*ni)++]);
|
||||
else
|
||||
return TMP(XMM0 + (*ns)++);
|
||||
}
|
||||
|
||||
static void
|
||||
selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
|
||||
{
|
||||
Ins *i;
|
||||
AClass *ac, *a, aret;
|
||||
int ca, ni, ns, al;
|
||||
uint stk, off;
|
||||
Ref r, r1, r2, reg[2], env;
|
||||
RAlloc *ra;
|
||||
|
||||
env = R;
|
||||
ac = alloc((i1-i0) * sizeof ac[0]);
|
||||
|
||||
if (!req(i1->arg[1], R)) {
|
||||
assert(rtype(i1->arg[1]) == RType);
|
||||
typclass(&aret, &typ[i1->arg[1].val]);
|
||||
ca = argsclass(i0, i1, ac, Oarg, &aret, &env);
|
||||
} else
|
||||
ca = argsclass(i0, i1, ac, Oarg, 0, &env);
|
||||
|
||||
for (stk=0, a=&ac[i1-i0]; a>ac;)
|
||||
if ((--a)->inmem) {
|
||||
if (a->align > 4)
|
||||
err("sysv abi requires alignments of 16 or less");
|
||||
stk += a->size;
|
||||
if (a->align == 4)
|
||||
stk += stk & 15;
|
||||
}
|
||||
stk += stk & 15;
|
||||
if (stk) {
|
||||
r = getcon(-(int64_t)stk, fn);
|
||||
emit(Osalloc, Kl, R, r, R);
|
||||
}
|
||||
|
||||
if (!req(i1->arg[1], R)) {
|
||||
if (aret.inmem) {
|
||||
/* get the return location from eax
|
||||
* it saves one callee-save reg */
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Ocopy, Kl, i1->to, TMP(RAX), R);
|
||||
ca += 1;
|
||||
} else {
|
||||
/* todo, may read out of bounds.
|
||||
* gcc did this up until 5.2, but
|
||||
* this should still be fixed.
|
||||
*/
|
||||
if (aret.size > 8) {
|
||||
r = newtmp("abi", Kl, fn);
|
||||
aret.ref[1] = newtmp("abi", aret.cls[1], fn);
|
||||
emit(Ostorel, 0, R, aret.ref[1], r);
|
||||
emit(Oadd, Kl, r, i1->to, getcon(8, fn));
|
||||
}
|
||||
aret.ref[0] = newtmp("abi", aret.cls[0], fn);
|
||||
emit(Ostorel, 0, R, aret.ref[0], i1->to);
|
||||
ca += retr(reg, &aret);
|
||||
if (aret.size > 8)
|
||||
emit(Ocopy, aret.cls[1], aret.ref[1], reg[1], R);
|
||||
emit(Ocopy, aret.cls[0], aret.ref[0], reg[0], R);
|
||||
r1 = i1->to;
|
||||
}
|
||||
/* allocate return pad */
|
||||
ra = alloc(sizeof *ra);
|
||||
/* specific to NAlign == 3 */
|
||||
al = aret.align >= 2 ? aret.align - 2 : 0;
|
||||
ra->i = (Ins){Oalloc+al, Kl, r1, {getcon(aret.size, fn)}};
|
||||
ra->link = (*rap);
|
||||
*rap = ra;
|
||||
} else {
|
||||
ra = 0;
|
||||
if (KBASE(i1->cls) == 0) {
|
||||
emit(Ocopy, i1->cls, i1->to, TMP(RAX), R);
|
||||
ca += 1;
|
||||
} else {
|
||||
emit(Ocopy, i1->cls, i1->to, TMP(XMM0), R);
|
||||
ca += 1 << 2;
|
||||
}
|
||||
}
|
||||
|
||||
emit(Ocall, i1->cls, R, i1->arg[0], CALL(ca));
|
||||
|
||||
if (!req(R, env))
|
||||
emit(Ocopy, Kl, TMP(RAX), env, R);
|
||||
else if ((ca >> 12) & 1) /* vararg call */
|
||||
emit(Ocopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R);
|
||||
|
||||
ni = ns = 0;
|
||||
if (ra && aret.inmem)
|
||||
emit(Ocopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */
|
||||
|
||||
for (i=i0, a=ac; i<i1; i++, a++) {
|
||||
if (i->op >= Oarge || a->inmem)
|
||||
continue;
|
||||
r1 = rarg(a->cls[0], &ni, &ns);
|
||||
if (i->op == Oargc) {
|
||||
if (a->size > 8) {
|
||||
r2 = rarg(a->cls[1], &ni, &ns);
|
||||
r = newtmp("abi", Kl, fn);
|
||||
emit(Oload, a->cls[1], r2, r, R);
|
||||
emit(Oadd, Kl, r, i->arg[1], getcon(8, fn));
|
||||
}
|
||||
emit(Oload, a->cls[0], r1, i->arg[1], R);
|
||||
} else
|
||||
emit(Ocopy, i->cls, r1, i->arg[0], R);
|
||||
}
|
||||
|
||||
if (!stk)
|
||||
return;
|
||||
|
||||
r = newtmp("abi", Kl, fn);
|
||||
for (i=i0, a=ac, off=0; i<i1; i++, a++) {
|
||||
if (i->op >= Oarge || !a->inmem)
|
||||
continue;
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
if (i->op == Oargc) {
|
||||
if (a->align == 4)
|
||||
off += off & 15;
|
||||
emit(Oblit1, 0, R, INT(a->type->size), R);
|
||||
emit(Oblit0, 0, R, i->arg[1], r1);
|
||||
} else
|
||||
emit(Ostorel, 0, R, i->arg[0], r1);
|
||||
emit(Oadd, Kl, r1, r, getcon(off, fn));
|
||||
off += a->size;
|
||||
}
|
||||
emit(Osalloc, Kl, r, getcon(stk, fn), R);
|
||||
}
|
||||
|
||||
static int
|
||||
selpar(Fn *fn, Ins *i0, Ins *i1)
|
||||
{
|
||||
AClass *ac, *a, aret;
|
||||
Ins *i;
|
||||
int ni, ns, s, al, fa;
|
||||
Ref r, env;
|
||||
|
||||
env = R;
|
||||
ac = alloc((i1-i0) * sizeof ac[0]);
|
||||
curi = &insb[NIns];
|
||||
ni = ns = 0;
|
||||
|
||||
if (fn->retty >= 0) {
|
||||
typclass(&aret, &typ[fn->retty]);
|
||||
fa = argsclass(i0, i1, ac, Opar, &aret, &env);
|
||||
} else
|
||||
fa = argsclass(i0, i1, ac, Opar, 0, &env);
|
||||
fn->reg = amd64_sysv_argregs(CALL(fa), 0);
|
||||
|
||||
for (i=i0, a=ac; i<i1; i++, a++) {
|
||||
if (i->op != Oparc || a->inmem)
|
||||
continue;
|
||||
if (a->size > 8) {
|
||||
r = newtmp("abi", Kl, fn);
|
||||
a->ref[1] = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, 0, R, a->ref[1], r);
|
||||
emit(Oadd, Kl, r, i->to, getcon(8, fn));
|
||||
}
|
||||
a->ref[0] = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, 0, R, a->ref[0], i->to);
|
||||
/* specific to NAlign == 3 */
|
||||
al = a->align >= 2 ? a->align - 2 : 0;
|
||||
emit(Oalloc+al, Kl, i->to, getcon(a->size, fn), R);
|
||||
}
|
||||
|
||||
if (fn->retty >= 0 && aret.inmem) {
|
||||
r = newtmp("abi", Kl, fn);
|
||||
emit(Ocopy, Kl, r, rarg(Kl, &ni, &ns), R);
|
||||
fn->retr = r;
|
||||
}
|
||||
|
||||
for (i=i0, a=ac, s=4; i<i1; i++, a++) {
|
||||
switch (a->inmem) {
|
||||
case 1:
|
||||
if (a->align > 4)
|
||||
err("sysv abi requires alignments of 16 or less");
|
||||
if (a->align == 4)
|
||||
s = (s+3) & -4;
|
||||
fn->tmp[i->to.val].slot = -s;
|
||||
s += a->size / 4;
|
||||
continue;
|
||||
case 2:
|
||||
emit(Oload, i->cls, i->to, SLOT(-s), R);
|
||||
s += 2;
|
||||
continue;
|
||||
}
|
||||
if (i->op == Opare)
|
||||
continue;
|
||||
r = rarg(a->cls[0], &ni, &ns);
|
||||
if (i->op == Oparc) {
|
||||
emit(Ocopy, a->cls[0], a->ref[0], r, R);
|
||||
if (a->size > 8) {
|
||||
r = rarg(a->cls[1], &ni, &ns);
|
||||
emit(Ocopy, a->cls[1], a->ref[1], r, R);
|
||||
}
|
||||
} else
|
||||
emit(Ocopy, i->cls, i->to, r, R);
|
||||
}
|
||||
|
||||
if (!req(R, env))
|
||||
emit(Ocopy, Kl, env, TMP(RAX), R);
|
||||
|
||||
return fa | (s*4)<<12;
|
||||
}
|
||||
|
||||
static Blk *
|
||||
split(Fn *fn, Blk *b)
|
||||
{
|
||||
Blk *bn;
|
||||
|
||||
++fn->nblk;
|
||||
bn = newblk();
|
||||
idup(bn, curi, &insb[NIns]-curi);
|
||||
curi = &insb[NIns];
|
||||
bn->visit = ++b->visit;
|
||||
bn->name = strf(PFn, "%s.%d", b->name, b->visit);
|
||||
bn->loop = b->loop;
|
||||
bn->link = b->link;
|
||||
b->link = bn;
|
||||
return bn;
|
||||
}
|
||||
|
||||
static void
|
||||
chpred(Blk *b, Blk *bp, Blk *bp1)
|
||||
{
|
||||
Phi *p;
|
||||
uint a;
|
||||
|
||||
for (p=b->phi; p; p=p->link) {
|
||||
for (a=0; p->blk[a]!=bp; a++)
|
||||
assert(a+1<p->narg);
|
||||
p->blk[a] = bp1;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
selvaarg(Fn *fn, Blk *b, Ins *i)
|
||||
{
|
||||
Ref loc, lreg, lstk, nr, r0, r1, c4, c8, c16, c, ap;
|
||||
Blk *b0, *bstk, *breg;
|
||||
int isint;
|
||||
|
||||
c4 = getcon(4, fn);
|
||||
c8 = getcon(8, fn);
|
||||
c16 = getcon(16, fn);
|
||||
ap = i->arg[0];
|
||||
isint = KBASE(i->cls) == 0;
|
||||
|
||||
/* @b [...]
|
||||
r0 =l add ap, (0 or 4)
|
||||
nr =l loadsw r0
|
||||
r1 =w cultw nr, (48 or 176)
|
||||
jnz r1, @breg, @bstk
|
||||
@breg
|
||||
r0 =l add ap, 16
|
||||
r1 =l loadl r0
|
||||
lreg =l add r1, nr
|
||||
r0 =w add nr, (8 or 16)
|
||||
r1 =l add ap, (0 or 4)
|
||||
storew r0, r1
|
||||
@bstk
|
||||
r0 =l add ap, 8
|
||||
lstk =l loadl r0
|
||||
r1 =l add lstk, 8
|
||||
storel r1, r0
|
||||
@b0
|
||||
%loc =l phi @breg %lreg, @bstk %lstk
|
||||
i->to =(i->cls) load %loc
|
||||
*/
|
||||
|
||||
loc = newtmp("abi", Kl, fn);
|
||||
emit(Oload, i->cls, i->to, loc, R);
|
||||
b0 = split(fn, b);
|
||||
b0->jmp = b->jmp;
|
||||
b0->s1 = b->s1;
|
||||
b0->s2 = b->s2;
|
||||
if (b->s1)
|
||||
chpred(b->s1, b, b0);
|
||||
if (b->s2 && b->s2 != b->s1)
|
||||
chpred(b->s2, b, b0);
|
||||
|
||||
lreg = newtmp("abi", Kl, fn);
|
||||
nr = newtmp("abi", Kl, fn);
|
||||
r0 = newtmp("abi", Kw, fn);
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorew, Kw, R, r0, r1);
|
||||
emit(Oadd, Kl, r1, ap, isint ? CON_Z : c4);
|
||||
emit(Oadd, Kw, r0, nr, isint ? c8 : c16);
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Oadd, Kl, lreg, r1, nr);
|
||||
emit(Oload, Kl, r1, r0, R);
|
||||
emit(Oadd, Kl, r0, ap, c16);
|
||||
breg = split(fn, b);
|
||||
breg->jmp.type = Jjmp;
|
||||
breg->s1 = b0;
|
||||
|
||||
lstk = newtmp("abi", Kl, fn);
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, Kw, R, r1, r0);
|
||||
emit(Oadd, Kl, r1, lstk, c8);
|
||||
emit(Oload, Kl, lstk, r0, R);
|
||||
emit(Oadd, Kl, r0, ap, c8);
|
||||
bstk = split(fn, b);
|
||||
bstk->jmp.type = Jjmp;
|
||||
bstk->s1 = b0;
|
||||
|
||||
b0->phi = alloc(sizeof *b0->phi);
|
||||
*b0->phi = (Phi){
|
||||
.cls = Kl, .to = loc,
|
||||
.narg = 2,
|
||||
.blk = vnew(2, sizeof b0->phi->blk[0], PFn),
|
||||
.arg = vnew(2, sizeof b0->phi->arg[0], PFn),
|
||||
};
|
||||
b0->phi->blk[0] = bstk;
|
||||
b0->phi->blk[1] = breg;
|
||||
b0->phi->arg[0] = lstk;
|
||||
b0->phi->arg[1] = lreg;
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
r1 = newtmp("abi", Kw, fn);
|
||||
b->jmp.type = Jjnz;
|
||||
b->jmp.arg = r1;
|
||||
b->s1 = breg;
|
||||
b->s2 = bstk;
|
||||
c = getcon(isint ? 48 : 176, fn);
|
||||
emit(Ocmpw+Ciult, Kw, r1, nr, c);
|
||||
emit(Oloadsw, Kl, nr, r0, R);
|
||||
emit(Oadd, Kl, r0, ap, isint ? CON_Z : c4);
|
||||
}
|
||||
|
||||
static void
|
||||
selvastart(Fn *fn, int fa, Ref ap)
|
||||
{
|
||||
Ref r0, r1;
|
||||
int gp, fp, sp;
|
||||
|
||||
gp = ((fa >> 4) & 15) * 8;
|
||||
fp = 48 + ((fa >> 8) & 15) * 16;
|
||||
sp = fa >> 12;
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, Kw, R, r1, r0);
|
||||
emit(Oadd, Kl, r1, TMP(RBP), getcon(-176, fn));
|
||||
emit(Oadd, Kl, r0, ap, getcon(16, fn));
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, Kw, R, r1, r0);
|
||||
emit(Oadd, Kl, r1, TMP(RBP), getcon(sp, fn));
|
||||
emit(Oadd, Kl, r0, ap, getcon(8, fn));
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorew, Kw, R, getcon(fp, fn), r0);
|
||||
emit(Oadd, Kl, r0, ap, getcon(4, fn));
|
||||
emit(Ostorew, Kw, R, getcon(gp, fn), ap);
|
||||
}
|
||||
|
||||
void
|
||||
amd64_sysv_abi(Fn *fn)
|
||||
{
|
||||
Blk *b;
|
||||
Ins *i, *i0;
|
||||
RAlloc *ral;
|
||||
int n0, n1, ioff, fa;
|
||||
|
||||
for (b=fn->start; b; b=b->link)
|
||||
b->visit = 0;
|
||||
|
||||
/* lower parameters */
|
||||
for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
|
||||
if (!ispar(i->op))
|
||||
break;
|
||||
fa = selpar(fn, b->ins, i);
|
||||
n0 = &insb[NIns] - curi;
|
||||
ioff = i - b->ins;
|
||||
n1 = b->nins - ioff;
|
||||
vgrow(&b->ins, n0+n1);
|
||||
icpy(b->ins+n0, b->ins+ioff, n1);
|
||||
icpy(b->ins, curi, n0);
|
||||
b->nins = n0+n1;
|
||||
|
||||
/* lower calls, returns, and vararg instructions */
|
||||
ral = 0;
|
||||
b = fn->start;
|
||||
do {
|
||||
if (!(b = b->link))
|
||||
b = fn->start; /* do it last */
|
||||
if (b->visit)
|
||||
continue;
|
||||
curi = &insb[NIns];
|
||||
selret(b, fn);
|
||||
for (i=&b->ins[b->nins]; i!=b->ins;)
|
||||
switch ((--i)->op) {
|
||||
default:
|
||||
emiti(*i);
|
||||
break;
|
||||
case Ocall:
|
||||
for (i0=i; i0>b->ins; i0--)
|
||||
if (!isarg((i0-1)->op))
|
||||
break;
|
||||
selcall(fn, i0, i, &ral);
|
||||
i = i0;
|
||||
break;
|
||||
case Ovastart:
|
||||
selvastart(fn, fa, i->arg[0]);
|
||||
break;
|
||||
case Ovaarg:
|
||||
selvaarg(fn, b, i);
|
||||
break;
|
||||
case Oarg:
|
||||
case Oargc:
|
||||
die("unreachable");
|
||||
}
|
||||
if (b == fn->start)
|
||||
for (; ral; ral=ral->link)
|
||||
emiti(ral->i);
|
||||
idup(b, curi, &insb[NIns]-curi);
|
||||
} while (b != fn->start);
|
||||
|
||||
if (debug['A']) {
|
||||
fprintf(stderr, "\n> After ABI lowering:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
#include "all.h"
|
||||
|
||||
Amd64Op amd64_op[NOp] = {
|
||||
#define O(op, t, x) [O##op] =
|
||||
#define X(nm, zf, lf) { nm, zf, lf, },
|
||||
#include "../ops.h"
|
||||
};
|
||||
|
||||
static int
|
||||
amd64_memargs(int op)
|
||||
{
|
||||
return amd64_op[op].nmem;
|
||||
}
|
||||
|
||||
#define AMD64_COMMON \
|
||||
.gpr0 = RAX, \
|
||||
.ngpr = NGPR, \
|
||||
.fpr0 = XMM0, \
|
||||
.nfpr = NFPR, \
|
||||
.rglob = BIT(RBP) | BIT(RSP), \
|
||||
.nrglob = 2, \
|
||||
.memargs = amd64_memargs, \
|
||||
.abi0 = elimsb, \
|
||||
.isel = amd64_isel, \
|
||||
.cansel = 1,
|
||||
|
||||
Target T_amd64_sysv = {
|
||||
.name = "amd64_sysv",
|
||||
.emitfin = elf_emitfin,
|
||||
.asloc = ".L",
|
||||
.abi1 = amd64_sysv_abi,
|
||||
.rsave = amd64_sysv_rsave,
|
||||
.nrsave = {NGPS_SYSV, NFPS},
|
||||
.retregs = amd64_sysv_retregs,
|
||||
.argregs = amd64_sysv_argregs,
|
||||
.emitfn = amd64_sysv_emitfn,
|
||||
AMD64_COMMON
|
||||
};
|
||||
|
||||
Target T_amd64_apple = {
|
||||
.name = "amd64_apple",
|
||||
.apple = 1,
|
||||
.emitfin = macho_emitfin,
|
||||
.asloc = "L",
|
||||
.assym = "_",
|
||||
.abi1 = amd64_sysv_abi,
|
||||
.rsave = amd64_sysv_rsave,
|
||||
.nrsave = {NGPS_SYSV, NFPS},
|
||||
.retregs = amd64_sysv_retregs,
|
||||
.argregs = amd64_sysv_argregs,
|
||||
.emitfn = amd64_sysv_emitfn,
|
||||
AMD64_COMMON
|
||||
};
|
||||
|
||||
Target T_amd64_win = {
|
||||
.name = "amd64_win",
|
||||
.windows = 1,
|
||||
.emitfin = pe_emitfin,
|
||||
.asloc = "L",
|
||||
.abi1 = amd64_winabi_abi,
|
||||
.rsave = amd64_winabi_rsave,
|
||||
.nrsave = {NGPS_WIN, NFPS},
|
||||
.retregs = amd64_winabi_retregs,
|
||||
.argregs = amd64_winabi_argregs,
|
||||
.emitfn = amd64_winabi_emitfn,
|
||||
AMD64_COMMON
|
||||
};
|
||||
Executable
+763
@@ -0,0 +1,763 @@
|
||||
#include "all.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
typedef enum ArgPassStyle {
|
||||
APS_Invalid = 0,
|
||||
APS_Register,
|
||||
APS_InlineOnStack,
|
||||
APS_CopyAndPointerInRegister,
|
||||
APS_CopyAndPointerOnStack,
|
||||
APS_VarargsTag,
|
||||
APS_EnvTag,
|
||||
} ArgPassStyle;
|
||||
|
||||
typedef struct ArgClass {
|
||||
Typ* type;
|
||||
ArgPassStyle style;
|
||||
int align;
|
||||
uint size;
|
||||
int cls;
|
||||
Ref ref;
|
||||
} ArgClass;
|
||||
|
||||
typedef struct ExtraAlloc ExtraAlloc;
|
||||
struct ExtraAlloc {
|
||||
Ins instr;
|
||||
ExtraAlloc* link;
|
||||
};
|
||||
|
||||
#define ALIGN_DOWN(n, a) ((n) & ~((a)-1))
|
||||
#define ALIGN_UP(n, a) ALIGN_DOWN((n) + (a)-1, (a))
|
||||
|
||||
// Number of stack bytes required be reserved for the callee.
|
||||
#define SHADOW_SPACE_SIZE 32
|
||||
|
||||
int amd64_winabi_rsave[] = {RCX, RDX, R8, R9, R10, R11, RAX, XMM0,
|
||||
XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8,
|
||||
XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1};
|
||||
int amd64_winabi_rclob[] = {RBX, R12, R13, R14, R15, RSI, RDI, -1};
|
||||
|
||||
MAKESURE(winabi_arrays_ok,
|
||||
sizeof amd64_winabi_rsave == (NGPS_WIN + NFPS + 1) * sizeof(int) &&
|
||||
sizeof amd64_winabi_rclob == (NCLR_WIN + 1) * sizeof(int));
|
||||
|
||||
// layout of call's second argument (RCall)
|
||||
//
|
||||
// bit 0: rax returned
|
||||
// bit 1: xmm0 returned
|
||||
// bits 23: 0
|
||||
// bits 4567: rcx, rdx, r8, r9 passed
|
||||
// bits 89ab: xmm0,1,2,3 passed
|
||||
// bit c: env call (rax passed)
|
||||
// bits d..1f: 0
|
||||
|
||||
bits amd64_winabi_retregs(Ref r, int p[2]) {
|
||||
assert(rtype(r) == RCall);
|
||||
|
||||
bits b = 0;
|
||||
int num_int_returns = r.val & 1;
|
||||
int num_float_returns = r.val & 2;
|
||||
if (num_int_returns == 1) {
|
||||
b |= BIT(RAX);
|
||||
} else {
|
||||
b |= BIT(XMM0);
|
||||
}
|
||||
if (p) {
|
||||
p[0] = num_int_returns;
|
||||
p[1] = num_float_returns;
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
static uint popcnt(bits b) {
|
||||
b = (b & 0x5555555555555555) + ((b >> 1) & 0x5555555555555555);
|
||||
b = (b & 0x3333333333333333) + ((b >> 2) & 0x3333333333333333);
|
||||
b = (b & 0x0f0f0f0f0f0f0f0f) + ((b >> 4) & 0x0f0f0f0f0f0f0f0f);
|
||||
b += (b >> 8);
|
||||
b += (b >> 16);
|
||||
b += (b >> 32);
|
||||
return b & 0xff;
|
||||
}
|
||||
|
||||
bits amd64_winabi_argregs(Ref r, int p[2]) {
|
||||
assert(rtype(r) == RCall);
|
||||
|
||||
// On SysV, these are counts. Here, a count isn't sufficient, we actually need
|
||||
// to know which ones are in use because they're not necessarily contiguous.
|
||||
int int_passed = (r.val >> 4) & 15;
|
||||
int float_passed = (r.val >> 8) & 15;
|
||||
bool env_param = (r.val >> 12) & 1;
|
||||
|
||||
bits b = 0;
|
||||
b |= (int_passed & 1) ? BIT(RCX) : 0;
|
||||
b |= (int_passed & 2) ? BIT(RDX) : 0;
|
||||
b |= (int_passed & 4) ? BIT(R8) : 0;
|
||||
b |= (int_passed & 8) ? BIT(R9) : 0;
|
||||
b |= (float_passed & 1) ? BIT(XMM0) : 0;
|
||||
b |= (float_passed & 2) ? BIT(XMM1) : 0;
|
||||
b |= (float_passed & 4) ? BIT(XMM2) : 0;
|
||||
b |= (float_passed & 8) ? BIT(XMM3) : 0;
|
||||
b |= env_param ? BIT(RAX) : 0;
|
||||
if (p) {
|
||||
// TODO: The only place this is used is live.c. I'm not sure what should be
|
||||
// returned here wrt to using the same counter for int/float regs on win.
|
||||
// For now, try the number of registers in use even though they're not
|
||||
// contiguous.
|
||||
p[0] = popcnt(int_passed);
|
||||
p[1] = popcnt(float_passed);
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
typedef struct RegisterUsage {
|
||||
// Counter for both int/float as they're counted together. Only if the bool's
|
||||
// set in regs_passed is the given register *actually* needed for a value
|
||||
// (i.e. needs to be saved, etc.).
|
||||
int num_regs_passed;
|
||||
|
||||
// Indexed first by 0=int, 1=float, use KBASE(cls).
|
||||
// Indexed second by register index in calling convention, so for integer,
|
||||
// 0=RCX, 1=RDX, 2=R8, 3=R9, and for float XMM0, XMM1, XMM2, XMM3.
|
||||
bool regs_passed[2][4];
|
||||
|
||||
bool rax_returned;
|
||||
bool xmm0_returned;
|
||||
|
||||
// This is also used as where the va_start will start for varargs functions
|
||||
// (there's no 'Oparv', so we need to keep track of a count here.)
|
||||
int num_named_args_passed;
|
||||
|
||||
// This is set when classifying the arguments for a call (but not when
|
||||
// classifying the parameters of a function definition).
|
||||
bool is_varargs_call;
|
||||
|
||||
bool has_env;
|
||||
} RegisterUsage;
|
||||
|
||||
static int register_usage_to_call_arg_value(RegisterUsage reg_usage) {
|
||||
return (reg_usage.rax_returned << 0) | //
|
||||
(reg_usage.xmm0_returned << 1) | //
|
||||
(reg_usage.regs_passed[0][0] << 4) | //
|
||||
(reg_usage.regs_passed[0][1] << 5) | //
|
||||
(reg_usage.regs_passed[0][2] << 6) | //
|
||||
(reg_usage.regs_passed[0][3] << 7) | //
|
||||
(reg_usage.regs_passed[1][0] << 8) | //
|
||||
(reg_usage.regs_passed[1][1] << 9) | //
|
||||
(reg_usage.regs_passed[1][2] << 10) | //
|
||||
(reg_usage.regs_passed[1][3] << 11) | //
|
||||
(reg_usage.has_env << 12);
|
||||
}
|
||||
|
||||
// Assigns the argument to a register if there's any left according to the
|
||||
// calling convention, and updates the regs_passed bools. Otherwise marks the
|
||||
// value as needing stack space to be passed.
|
||||
static void assign_register_or_stack(RegisterUsage* reg_usage,
|
||||
ArgClass* arg,
|
||||
bool is_float,
|
||||
bool by_copy) {
|
||||
if (reg_usage->num_regs_passed == 4) {
|
||||
arg->style = by_copy ? APS_CopyAndPointerOnStack : APS_InlineOnStack;
|
||||
} else {
|
||||
reg_usage->regs_passed[is_float][reg_usage->num_regs_passed] = true;
|
||||
++reg_usage->num_regs_passed;
|
||||
arg->style = by_copy ? APS_CopyAndPointerInRegister : APS_Register;
|
||||
}
|
||||
++reg_usage->num_named_args_passed;
|
||||
}
|
||||
|
||||
static bool type_is_by_copy(Typ* type) {
|
||||
// Note that only these sizes are passed by register, even though e.g. a
|
||||
// 5 byte struct would "fit", it still is passed by copy-and-pointer.
|
||||
return type->isdark || (type->size != 1 && type->size != 2 &&
|
||||
type->size != 4 && type->size != 8);
|
||||
}
|
||||
|
||||
// This function is used for both arguments and parameters.
|
||||
// begin_instr should either point at the first Oarg or Opar, and end_instr
|
||||
// should point past the last one (so to the Ocall for arguments, or to the
|
||||
// first 'real' instruction of the function for parameters).
|
||||
static void classify_arguments(RegisterUsage* reg_usage,
|
||||
Ins* begin_instr,
|
||||
Ins* end_instr,
|
||||
ArgClass* arg_classes,
|
||||
Ref* env) {
|
||||
ArgClass* arg = arg_classes;
|
||||
// For each argument, determine how it will be passed (int, float, stack)
|
||||
// and update the `reg_usage` counts. Additionally, fill out arg_classes for
|
||||
// each argument.
|
||||
for (Ins* instr = begin_instr; instr < end_instr; ++instr, ++arg) {
|
||||
switch (instr->op) {
|
||||
case Oarg:
|
||||
case Opar:
|
||||
assign_register_or_stack(reg_usage, arg, KBASE(instr->cls),
|
||||
/*by_copy=*/false);
|
||||
arg->cls = instr->cls;
|
||||
arg->align = 3;
|
||||
arg->size = 8;
|
||||
break;
|
||||
case Oargc:
|
||||
case Oparc: {
|
||||
int typ_index = instr->arg[0].val;
|
||||
Typ* type = &typ[typ_index];
|
||||
bool by_copy = type_is_by_copy(type);
|
||||
assign_register_or_stack(reg_usage, arg, /*is_float=*/false, by_copy);
|
||||
arg->cls = Kl;
|
||||
if (!by_copy && type->size <= 4) {
|
||||
arg->cls = Kw;
|
||||
}
|
||||
arg->align = 3;
|
||||
arg->size = type->size;
|
||||
break;
|
||||
}
|
||||
case Oarge:
|
||||
*env = instr->arg[0];
|
||||
arg->style = APS_EnvTag;
|
||||
reg_usage->has_env = true;
|
||||
break;
|
||||
case Opare:
|
||||
*env = instr->to;
|
||||
arg->style = APS_EnvTag;
|
||||
reg_usage->has_env = true;
|
||||
break;
|
||||
case Oargv:
|
||||
reg_usage->is_varargs_call = true;
|
||||
arg->style = APS_VarargsTag;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (reg_usage->has_env && reg_usage->is_varargs_call) {
|
||||
die("can't use env with varargs");
|
||||
}
|
||||
|
||||
// During a varargs call, float arguments have to be duplicated to their
|
||||
// associated integer register, so mark them as in-use too.
|
||||
if (reg_usage->is_varargs_call) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (reg_usage->regs_passed[/*float*/ 1][i]) {
|
||||
reg_usage->regs_passed[/*int*/ 0][i] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool is_integer_type(int ty) {
|
||||
assert(ty >= 0 && ty < 4 && "expecting Kw Kl Ks Kd");
|
||||
return KBASE(ty) == 0;
|
||||
}
|
||||
|
||||
static Ref register_for_arg(int cls, int counter) {
|
||||
assert(counter < 4);
|
||||
if (is_integer_type(cls)) {
|
||||
return TMP(amd64_winabi_rsave[counter]);
|
||||
} else {
|
||||
return TMP(XMM0 + counter);
|
||||
}
|
||||
}
|
||||
|
||||
static Ins* lower_call(Fn* func,
|
||||
Blk* block,
|
||||
Ins* call_instr,
|
||||
ExtraAlloc** pextra_alloc) {
|
||||
// Call arguments are instructions. Walk through them to find the end of the
|
||||
// call+args that we need to process (and return the instruction past the body
|
||||
// of the instruction for continuing processing).
|
||||
Ins* instr_past_args = call_instr - 1;
|
||||
for (; instr_past_args >= block->ins; --instr_past_args) {
|
||||
if (!isarg(instr_past_args->op)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ins* earliest_arg_instr = instr_past_args + 1;
|
||||
|
||||
// Don't need an ArgClass for the call itself, so one less than the total
|
||||
// number of instructions we're dealing with.
|
||||
uint num_args = call_instr - earliest_arg_instr;
|
||||
ArgClass* arg_classes = alloc(num_args * sizeof(ArgClass));
|
||||
|
||||
RegisterUsage reg_usage = {0};
|
||||
ArgClass ret_arg_class = {0};
|
||||
|
||||
// Ocall's two arguments are the the function to be called in 0, and, if the
|
||||
// the function returns a non-basic type, then arg[1] is a reference to the
|
||||
// type of the return. req checks if Refs are equal; `R` is 0.
|
||||
bool il_has_struct_return = !req(call_instr->arg[1], R);
|
||||
bool is_struct_return = false;
|
||||
if (il_has_struct_return) {
|
||||
Typ* ret_type = &typ[call_instr->arg[1].val];
|
||||
is_struct_return = type_is_by_copy(ret_type);
|
||||
if (is_struct_return) {
|
||||
assign_register_or_stack(®_usage, &ret_arg_class, /*is_float=*/false,
|
||||
/*by_copy=*/true);
|
||||
}
|
||||
ret_arg_class.size = ret_type->size;
|
||||
}
|
||||
Ref env = R;
|
||||
classify_arguments(®_usage, earliest_arg_instr, call_instr, arg_classes,
|
||||
&env);
|
||||
|
||||
// We now know which arguments are on the stack and which are in registers, so
|
||||
// we can allocate the correct amount of space to stash the stack-located ones
|
||||
// into.
|
||||
uint stack_usage = 0;
|
||||
for (uint i = 0; i < num_args; ++i) {
|
||||
ArgClass* arg = &arg_classes[i];
|
||||
// stack_usage only accounts for pushes that are for values that don't have
|
||||
// enough registers. Large struct copies are alloca'd separately, and then
|
||||
// only have (potentially) 8 bytes to add to stack_usage here.
|
||||
if (arg->style == APS_InlineOnStack) {
|
||||
if (arg->align > 4) {
|
||||
err("win abi cannot pass alignments > 16");
|
||||
}
|
||||
stack_usage += arg->size;
|
||||
} else if (arg->style == APS_CopyAndPointerOnStack) {
|
||||
stack_usage += 8;
|
||||
}
|
||||
}
|
||||
stack_usage = ALIGN_UP(stack_usage, 16);
|
||||
|
||||
// Note that here we're logically 'after' the call (due to emitting
|
||||
// instructions in reverse order), so we're doing a negative stack
|
||||
// allocation to clean up after the call.
|
||||
Ref stack_size_ref =
|
||||
getcon(-(int64_t)(stack_usage + SHADOW_SPACE_SIZE), func);
|
||||
emit(Osalloc, Kl, R, stack_size_ref, R);
|
||||
|
||||
ExtraAlloc* return_pad = NULL;
|
||||
if (is_struct_return) {
|
||||
return_pad = alloc(sizeof(ExtraAlloc));
|
||||
Ref ret_pad_ref = newtmp("abi.ret_pad", Kl, func);
|
||||
return_pad->instr =
|
||||
(Ins){Oalloc8, Kl, ret_pad_ref, {getcon(ret_arg_class.size, func)}};
|
||||
return_pad->link = (*pextra_alloc);
|
||||
*pextra_alloc = return_pad;
|
||||
reg_usage.rax_returned = true;
|
||||
emit(Ocopy, call_instr->cls, call_instr->to, TMP(RAX), R);
|
||||
} else {
|
||||
if (il_has_struct_return) {
|
||||
// In the case that at the IL level, a struct return was specified, but as
|
||||
// far as the calling convention is concerned it's not actually by
|
||||
// pointer, we need to store the return value into an alloca because
|
||||
// subsequent IL will still be treating the function return as a pointer.
|
||||
ExtraAlloc* return_copy = alloc(sizeof(ExtraAlloc));
|
||||
return_copy->instr =
|
||||
(Ins){Oalloc8, Kl, call_instr->to, {getcon(8, func)}};
|
||||
return_copy->link = (*pextra_alloc);
|
||||
*pextra_alloc = return_copy;
|
||||
Ref copy = newtmp("abi.copy", Kl, func);
|
||||
emit(Ostorel, 0, R, copy, call_instr->to);
|
||||
emit(Ocopy, Kl, copy, TMP(RAX), R);
|
||||
reg_usage.rax_returned = true;
|
||||
} else if (is_integer_type(call_instr->cls)) {
|
||||
// Only a basic type returned from the call, integer.
|
||||
emit(Ocopy, call_instr->cls, call_instr->to, TMP(RAX), R);
|
||||
reg_usage.rax_returned = true;
|
||||
} else {
|
||||
// Basic type, floating point.
|
||||
emit(Ocopy, call_instr->cls, call_instr->to, TMP(XMM0), R);
|
||||
reg_usage.xmm0_returned = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Emit the actual call instruction. There's no 'to' value by this point
|
||||
// because we've lowered it into register manipulation (that's the `R`),
|
||||
// arg[0] of the call is the function, and arg[1] is register usage is
|
||||
// documented as above (copied from SysV).
|
||||
emit(Ocall, call_instr->cls, R, call_instr->arg[0],
|
||||
CALL(register_usage_to_call_arg_value(reg_usage)));
|
||||
|
||||
if (!req(R, env)) {
|
||||
// If there's an env arg to be passed, it gets stashed in RAX.
|
||||
emit(Ocopy, Kl, TMP(RAX), env, R);
|
||||
}
|
||||
|
||||
if (reg_usage.is_varargs_call) {
|
||||
// Any float arguments need to be duplicated to integer registers. This is
|
||||
// required by the calling convention so that dumping to shadow space can be
|
||||
// done without a prototype and for varargs.
|
||||
#define DUP_IF_USED(index, floatreg, intreg) \
|
||||
if (reg_usage.regs_passed[/*float*/ 1][index]) { \
|
||||
emit(Ocast, Kl, TMP(intreg), TMP(floatreg), R); \
|
||||
}
|
||||
DUP_IF_USED(0, XMM0, RCX);
|
||||
DUP_IF_USED(1, XMM1, RDX);
|
||||
DUP_IF_USED(2, XMM2, R8);
|
||||
DUP_IF_USED(3, XMM3, R9);
|
||||
#undef DUP_IF_USED
|
||||
}
|
||||
|
||||
int reg_counter = 0;
|
||||
if (is_struct_return) {
|
||||
Ref first_reg = register_for_arg(Kl, reg_counter++);
|
||||
emit(Ocopy, Kl, first_reg, return_pad->instr.to, R);
|
||||
}
|
||||
|
||||
// This is where we actually do the load of values into registers or into
|
||||
// stack slots.
|
||||
Ref arg_stack_slots = newtmp("abi.args", Kl, func);
|
||||
uint slot_offset = SHADOW_SPACE_SIZE;
|
||||
ArgClass* arg = arg_classes;
|
||||
for (Ins* instr = earliest_arg_instr; instr != call_instr; ++instr, ++arg) {
|
||||
switch (arg->style) {
|
||||
case APS_Register: {
|
||||
Ref into = register_for_arg(arg->cls, reg_counter++);
|
||||
if (instr->op == Oargc) {
|
||||
// If this is a small struct being passed by value. The value in the
|
||||
// instruction in this case is a pointer, but it needs to be loaded
|
||||
// into the register.
|
||||
emit(Oload, arg->cls, into, instr->arg[1], R);
|
||||
} else {
|
||||
// Otherwise, a normal value passed in a register.
|
||||
emit(Ocopy, instr->cls, into, instr->arg[0], R);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case APS_InlineOnStack: {
|
||||
Ref slot = newtmp("abi.off", Kl, func);
|
||||
if (instr->op == Oargc) {
|
||||
// This is a small struct, so it's not passed by copy, but the
|
||||
// instruction is a pointer. So we need to copy it into the stack
|
||||
// slot. (And, remember that these are emitted backwards, so store,
|
||||
// then load.)
|
||||
Ref smalltmp = newtmp("abi.smalltmp", arg->cls, func);
|
||||
emit(Ostorel, 0, R, smalltmp, slot);
|
||||
emit(Oload, arg->cls, smalltmp, instr->arg[1], R);
|
||||
} else {
|
||||
// Stash the value into the stack slot.
|
||||
emit(Ostorel, 0, R, instr->arg[0], slot);
|
||||
}
|
||||
emit(Oadd, Kl, slot, arg_stack_slots, getcon(slot_offset, func));
|
||||
slot_offset += arg->size;
|
||||
break;
|
||||
}
|
||||
case APS_CopyAndPointerInRegister:
|
||||
case APS_CopyAndPointerOnStack: {
|
||||
// Alloca a space to copy into, and blit the value from the instr to the
|
||||
// copied location.
|
||||
ExtraAlloc* arg_copy = alloc(sizeof(ExtraAlloc));
|
||||
Ref copy_ref = newtmp("abi.copy", Kl, func);
|
||||
arg_copy->instr =
|
||||
(Ins){Oalloc8, Kl, copy_ref, {getcon(arg->size, func)}};
|
||||
arg_copy->link = (*pextra_alloc);
|
||||
*pextra_alloc = arg_copy;
|
||||
emit(Oblit1, 0, R, INT(arg->size), R);
|
||||
emit(Oblit0, 0, R, instr->arg[1], copy_ref);
|
||||
|
||||
// Now load the pointer into the correct register or stack slot.
|
||||
if (arg->style == APS_CopyAndPointerInRegister) {
|
||||
Ref into = register_for_arg(arg->cls, reg_counter++);
|
||||
emit(Ocopy, Kl, into, copy_ref, R);
|
||||
} else {
|
||||
assert(arg->style == APS_CopyAndPointerOnStack);
|
||||
Ref slot = newtmp("abi.off", Kl, func);
|
||||
emit(Ostorel, 0, R, copy_ref, slot);
|
||||
emit(Oadd, Kl, slot, arg_stack_slots, getcon(slot_offset, func));
|
||||
slot_offset += 8;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case APS_EnvTag:
|
||||
case APS_VarargsTag:
|
||||
// Nothing to do here, see right before the call for reg dupe.
|
||||
break;
|
||||
case APS_Invalid:
|
||||
die("unreachable");
|
||||
}
|
||||
}
|
||||
|
||||
if (stack_usage) {
|
||||
// The last (first in call order) thing we do is allocate the the stack
|
||||
// space we're going to fill with temporaries.
|
||||
emit(Osalloc, Kl, arg_stack_slots,
|
||||
getcon(stack_usage + SHADOW_SPACE_SIZE, func), R);
|
||||
} else {
|
||||
// When there's no usage for temporaries, we can add this into the other
|
||||
// alloca, but otherwise emit it separately (not storing into a reference)
|
||||
// so that it doesn't get removed later for being useless.
|
||||
emit(Osalloc, Kl, R, getcon(SHADOW_SPACE_SIZE, func), R);
|
||||
}
|
||||
|
||||
return instr_past_args;
|
||||
}
|
||||
|
||||
static void lower_block_return(Fn* func, Blk* block) {
|
||||
int jmp_type = block->jmp.type;
|
||||
|
||||
if (!isret(jmp_type) || jmp_type == Jret0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Save the argument, and set the block to be a void return because once it's
|
||||
// lowered it's handled by the the register/stack manipulation.
|
||||
Ref ret_arg = block->jmp.arg;
|
||||
block->jmp.type = Jret0;
|
||||
|
||||
RegisterUsage reg_usage = {0};
|
||||
|
||||
if (jmp_type == Jretc) {
|
||||
Typ* type = &typ[func->retty];
|
||||
if (type_is_by_copy(type)) {
|
||||
assert(rtype(func->retr) == RTmp);
|
||||
emit(Ocopy, Kl, TMP(RAX), func->retr, R);
|
||||
emit(Oblit1, 0, R, INT(type->size), R);
|
||||
emit(Oblit0, 0, R, ret_arg, func->retr);
|
||||
} else {
|
||||
emit(Oload, Kl, TMP(RAX), ret_arg, R);
|
||||
}
|
||||
reg_usage.rax_returned = true;
|
||||
} else {
|
||||
int k = jmp_type - Jretw;
|
||||
if (is_integer_type(k)) {
|
||||
emit(Ocopy, k, TMP(RAX), ret_arg, R);
|
||||
reg_usage.rax_returned = true;
|
||||
} else {
|
||||
emit(Ocopy, k, TMP(XMM0), ret_arg, R);
|
||||
reg_usage.xmm0_returned = true;
|
||||
}
|
||||
}
|
||||
block->jmp.arg = CALL(register_usage_to_call_arg_value(reg_usage));
|
||||
}
|
||||
|
||||
static void lower_vastart(Fn* func,
|
||||
RegisterUsage* param_reg_usage,
|
||||
Ref valist) {
|
||||
assert(func->vararg);
|
||||
// In varargs functions:
|
||||
// 1. the int registers are already dumped to the shadow stack space;
|
||||
// 2. any parameters passed in floating point registers have
|
||||
// been duplicated to the integer registers
|
||||
// 3. we ensure (later) that for varargs functions we're always using an rbp
|
||||
// frame pointer.
|
||||
// So, the ... argument is just indexed past rbp by the number of named values
|
||||
// that were actually passed.
|
||||
|
||||
Ref offset = newtmp("abi.vastart", Kl, func);
|
||||
emit(Ostorel, 0, R, offset, valist);
|
||||
|
||||
// *8 for sizeof(u64), +16 because the return address and rbp have been pushed
|
||||
// by the time we get to the body of the function.
|
||||
emit(Oadd, Kl, offset, TMP(RBP),
|
||||
getcon(param_reg_usage->num_named_args_passed * 8 + 16, func));
|
||||
}
|
||||
|
||||
static void lower_vaarg(Fn* func, Ins* vaarg_instr) {
|
||||
// va_list is just a void** on winx64, so load the pointer, then load the
|
||||
// argument from that pointer, then increment the pointer to the next arg.
|
||||
// (All emitted backwards as usual.)
|
||||
Ref inc = newtmp("abi.vaarg.inc", Kl, func);
|
||||
Ref ptr = newtmp("abi.vaarg.ptr", Kl, func);
|
||||
emit(Ostorel, 0, R, inc, vaarg_instr->arg[0]);
|
||||
emit(Oadd, Kl, inc, ptr, getcon(8, func));
|
||||
emit(Oload, vaarg_instr->cls, vaarg_instr->to, ptr, R);
|
||||
emit(Oload, Kl, ptr, vaarg_instr->arg[0], R);
|
||||
}
|
||||
|
||||
static void lower_args_for_block(Fn* func,
|
||||
Blk* block,
|
||||
RegisterUsage* param_reg_usage,
|
||||
ExtraAlloc** pextra_alloc) {
|
||||
// global temporary buffer used by emit. Reset to the end, and predecremented
|
||||
// when adding to it.
|
||||
curi = &insb[NIns];
|
||||
|
||||
lower_block_return(func, block);
|
||||
|
||||
if (block->nins) {
|
||||
// Work backwards through the instructions, either copying them unchanged,
|
||||
// or modifying as necessary.
|
||||
for (Ins* instr = &block->ins[block->nins - 1]; instr >= block->ins;) {
|
||||
switch (instr->op) {
|
||||
case Ocall:
|
||||
instr = lower_call(func, block, instr, pextra_alloc);
|
||||
break;
|
||||
case Ovastart:
|
||||
lower_vastart(func, param_reg_usage, instr->arg[0]);
|
||||
--instr;
|
||||
break;
|
||||
case Ovaarg:
|
||||
lower_vaarg(func, instr);
|
||||
--instr;
|
||||
break;
|
||||
case Oarg:
|
||||
case Oargc:
|
||||
die("unreachable");
|
||||
default:
|
||||
emiti(*instr);
|
||||
--instr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This it the start block, which is processed last. Add any allocas that
|
||||
// other blocks needed.
|
||||
bool is_start_block = block == func->start;
|
||||
if (is_start_block) {
|
||||
for (ExtraAlloc* ea = *pextra_alloc; ea; ea = ea->link) {
|
||||
emiti(ea->instr);
|
||||
}
|
||||
}
|
||||
|
||||
// emit/emiti add instructions from the end to the beginning of the temporary
|
||||
// global buffer. dup the final version into the final block storage.
|
||||
block->nins = &insb[NIns] - curi;
|
||||
idup(block, curi, block->nins);
|
||||
}
|
||||
|
||||
static Ins* find_end_of_func_parameters(Blk* start_block) {
|
||||
Ins* i;
|
||||
for (i = start_block->ins; i < &start_block->ins[start_block->nins]; ++i) {
|
||||
if (!ispar(i->op)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
// Copy from registers/stack into values.
|
||||
static RegisterUsage lower_func_parameters(Fn* func) {
|
||||
// This is half-open, so end points after the last Opar.
|
||||
Blk* start_block = func->start;
|
||||
Ins* start_of_params = start_block->ins;
|
||||
Ins* end_of_params = find_end_of_func_parameters(start_block);
|
||||
|
||||
size_t num_params = end_of_params - start_of_params;
|
||||
ArgClass* arg_classes = alloc(num_params * sizeof(ArgClass));
|
||||
ArgClass arg_ret = {0};
|
||||
|
||||
// global temporary buffer used by emit. Reset to the end, and predecremented
|
||||
// when adding to it.
|
||||
curi = &insb[NIns];
|
||||
|
||||
int reg_counter = 0;
|
||||
RegisterUsage reg_usage = {0};
|
||||
if (func->retty >= 0) {
|
||||
bool by_copy = type_is_by_copy(&typ[func->retty]);
|
||||
if (by_copy) {
|
||||
assign_register_or_stack(®_usage, &arg_ret, /*is_float=*/false,
|
||||
by_copy);
|
||||
Ref ret_ref = newtmp("abi.ret", Kl, func);
|
||||
emit(Ocopy, Kl, ret_ref, TMP(RCX), R);
|
||||
func->retr = ret_ref;
|
||||
++reg_counter;
|
||||
}
|
||||
}
|
||||
Ref env = R;
|
||||
classify_arguments(®_usage, start_of_params, end_of_params, arg_classes,
|
||||
&env);
|
||||
func->reg = amd64_winabi_argregs(
|
||||
CALL(register_usage_to_call_arg_value(reg_usage)), NULL);
|
||||
|
||||
// Copy from the registers or stack slots into the named parameters. Depending
|
||||
// on how they're passed, they either need to be copied or loaded.
|
||||
ArgClass* arg = arg_classes;
|
||||
uint slot_offset = SHADOW_SPACE_SIZE / 4 + 4;
|
||||
for (Ins* instr = start_of_params; instr < end_of_params; ++instr, ++arg) {
|
||||
switch (arg->style) {
|
||||
case APS_Register: {
|
||||
Ref from = register_for_arg(arg->cls, reg_counter++);
|
||||
// If it's a struct at the IL level, we need to copy the register into
|
||||
// an alloca so we have something to point at (same for InlineOnStack).
|
||||
if (instr->op == Oparc) {
|
||||
arg->ref = newtmp("abi", Kl, func);
|
||||
emit(Ostorel, 0, R, arg->ref, instr->to);
|
||||
emit(Ocopy, instr->cls, arg->ref, from, R);
|
||||
emit(Oalloc8, Kl, instr->to, getcon(arg->size, func), R);
|
||||
} else {
|
||||
emit(Ocopy, instr->cls, instr->to, from, R);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case APS_InlineOnStack:
|
||||
if (instr->op == Oparc) {
|
||||
arg->ref = newtmp("abi", Kl, func);
|
||||
emit(Ostorel, 0, R, arg->ref, instr->to);
|
||||
emit(Ocopy, instr->cls, arg->ref, SLOT(-slot_offset), R);
|
||||
emit(Oalloc8, Kl, instr->to, getcon(arg->size, func), R);
|
||||
} else {
|
||||
emit(Ocopy, Kl, instr->to, SLOT(-slot_offset), R);
|
||||
}
|
||||
slot_offset += 2;
|
||||
break;
|
||||
case APS_CopyAndPointerOnStack:
|
||||
emit(Oload, Kl, instr->to, SLOT(-slot_offset), R);
|
||||
slot_offset += 2;
|
||||
break;
|
||||
case APS_CopyAndPointerInRegister: {
|
||||
// Because this has to be a copy (that we own), it is sufficient to just
|
||||
// copy the register to the target.
|
||||
Ref from = register_for_arg(Kl, reg_counter++);
|
||||
emit(Ocopy, Kl, instr->to, from, R);
|
||||
break;
|
||||
}
|
||||
case APS_EnvTag:
|
||||
break;
|
||||
case APS_VarargsTag:
|
||||
case APS_Invalid:
|
||||
die("unreachable");
|
||||
}
|
||||
}
|
||||
|
||||
// If there was an `env`, it was passed in RAX, so copy it into the env ref.
|
||||
if (!req(R, env)) {
|
||||
emit(Ocopy, Kl, env, TMP(RAX), R);
|
||||
}
|
||||
|
||||
int num_created_instrs = &insb[NIns] - curi;
|
||||
int num_other_after_instrs = (int)(start_block->nins - num_params);
|
||||
int new_total_instrs = num_other_after_instrs + num_created_instrs;
|
||||
Ins* new_instrs = vnew(new_total_instrs, sizeof(Ins), PFn);
|
||||
Ins* instr_p = icpy(new_instrs, curi, num_created_instrs);
|
||||
icpy(instr_p, end_of_params, num_other_after_instrs);
|
||||
start_block->nins = new_total_instrs;
|
||||
start_block->ins = new_instrs;
|
||||
|
||||
return reg_usage;
|
||||
}
|
||||
|
||||
// The main job of this function is to lower generic instructions into the
|
||||
// specific details of how arguments are passed, and parameters are
|
||||
// interpreted for win x64. A useful reference is
|
||||
// https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention .
|
||||
//
|
||||
// Some of the major differences from SysV if you're comparing the code
|
||||
// (non-exhaustive):
|
||||
// - only 4 int and 4 float regs are used
|
||||
// - when an int register is assigned a value, its associated float register is
|
||||
// left unused (and vice versa). i.e. there's only one counter as you assign
|
||||
// arguments to registers.
|
||||
// - any structs that aren't 1/2/4/8 bytes in size are passed by pointer, not
|
||||
// by copying them into the stack. So e.g. if you pass something like
|
||||
// `struct { void*, int64_t }` by value, it first needs to be copied to
|
||||
// another alloca (in order to maintain value semantics at the language
|
||||
// level), then the pointer to that copy is treated as a regular integer
|
||||
// argument (which then itself may *also* be copied to the stack in the case
|
||||
// there's no integer register remaining.)
|
||||
// - when calling a varargs functions, floating point values must be duplicated
|
||||
// integer registers. Along with the above restrictions, this makes varargs
|
||||
// handling simpler for the callee than SysV.
|
||||
void amd64_winabi_abi(Fn* func) {
|
||||
// The first thing to do is lower incoming parameters to this function.
|
||||
RegisterUsage param_reg_usage = lower_func_parameters(func);
|
||||
|
||||
// This is the second larger part of the job. We walk all blocks, and rewrite
|
||||
// instructions returns, calls, and handling of varargs into their win x64
|
||||
// specific versions. Any other instructions are just passed through unchanged
|
||||
// by using `emiti`.
|
||||
|
||||
// Skip over the entry block, and do it at the end so that our later
|
||||
// modifications can add allocations to the start block. In particular, we
|
||||
// need to add stack allocas for copies when structs are passed or returned by
|
||||
// value.
|
||||
ExtraAlloc* extra_alloc = NULL;
|
||||
for (Blk* block = func->start->link; block; block = block->link) {
|
||||
lower_args_for_block(func, block, ¶m_reg_usage, &extra_alloc);
|
||||
}
|
||||
lower_args_for_block(func, func->start, ¶m_reg_usage, &extra_alloc);
|
||||
|
||||
if (debug['A']) {
|
||||
fprintf(stderr, "\n> After ABI lowering:\n");
|
||||
printfn(func, stderr);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user