This commit is contained in:
2026-06-11 10:59:54 -06:00
commit 8650a71f67
159 changed files with 78653 additions and 0 deletions
+852
View File
@@ -0,0 +1,852 @@
#include "all.h"
typedef struct Abi Abi;
typedef struct Class Class;
typedef struct Insl Insl;
typedef struct Params Params;
enum {
Cstk = 1, /* pass on the stack */
Cptr = 2, /* replaced by a pointer */
};
struct Class {
char class;
char ishfa;
struct {
char base;
uchar size;
} hfa;
uint size;
uint align;
Typ *t;
uchar nreg;
uchar ngp;
uchar nfp;
int reg[4];
int cls[4];
};
struct Insl {
Ins i;
Insl *link;
};
struct Params {
uint ngp;
uint nfp;
uint stk;
};
static int gpreg[12] = {R0, R1, R2, R3, R4, R5, R6, R7};
static int fpreg[12] = {V0, V1, V2, V3, V4, V5, V6, V7};
static int store[] = {
[Kw] = Ostorew, [Kl] = Ostorel,
[Ks] = Ostores, [Kd] = Ostored
};
/* layout of call's second argument (RCall)
*
* 13
* 29 14 | 9 5 2 0
* |0.00|x|x|xxxx|xxxx|xxx|xx| range
* | | | | | ` gp regs returned (0..2)
* | | | | ` fp regs returned (0..4)
* | | | ` gp regs passed (0..8)
* | | ` fp regs passed (0..8)
* | ` indirect result register x8 used (0..1)
* ` env pointer passed in x9 (0..1)
*/
static int
isfloatv(Typ *t, char *cls)
{
Field *f;
uint n;
for (n=0; n<t->nunion; n++)
for (f=t->fields[n]; f->type != FEnd; f++)
switch (f->type) {
case Fs:
if (*cls == Kd)
return 0;
*cls = Ks;
break;
case Fd:
if (*cls == Ks)
return 0;
*cls = Kd;
break;
case FTyp:
if (isfloatv(&typ[f->len], cls))
break;
/* fall through */
default:
return 0;
}
return 1;
}
static void
typclass(Class *c, Typ *t, int *gp, int *fp)
{
uint64_t sz, hfasz;
uint n;
sz = (t->size + 7) & -8;
c->t = t;
c->class = 0;
c->ngp = 0;
c->nfp = 0;
c->align = 8;
if (t->align > 3)
err("alignments larger than 8 are not supported");
c->size = sz;
c->hfa.base = Kx;
c->ishfa = isfloatv(t, &c->hfa.base);
hfasz = t->size/(KWIDE(c->hfa.base) ? 8 : 4);
c->ishfa &= !t->isdark && hfasz <= 4;
c->hfa.size = hfasz;
if (c->ishfa) {
for (n=0; n<hfasz; n++, c->nfp++) {
c->reg[n] = *fp++;
c->cls[n] = c->hfa.base;
}
c->nreg = n;
}
else if (t->isdark || sz > 16 || sz == 0) {
/* large structs are replaced by a
* pointer to some caller-allocated
* memory */
c->class |= Cptr;
c->size = 8;
c->ngp = 1;
*c->reg = *gp;
*c->cls = Kl;
}
else {
for (n=0; n<sz/8; n++, c->ngp++) {
c->reg[n] = *gp++;
c->cls[n] = Kl;
}
c->nreg = n;
}
}
static void
sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn)
{
uint n;
uint64_t off;
Ref r;
assert(nreg <= 4);
off = 0;
for (n=0; n<nreg; n++) {
tmp[n] = newtmp("abi", cls[n], fn);
r = newtmp("abi", Kl, fn);
emit(store[cls[n]], 0, R, tmp[n], r);
emit(Oadd, Kl, r, mem, getcon(off, fn));
off += KWIDE(cls[n]) ? 8 : 4;
}
}
/* todo, may read out of bounds */
static void
ldregs(int reg[], int cls[], int n, Ref mem, Fn *fn)
{
int i;
uint64_t off;
Ref r;
off = 0;
for (i=0; i<n; i++) {
r = newtmp("abi", Kl, fn);
emit(Oload, cls[i], TMP(reg[i]), r, R);
emit(Oadd, Kl, r, mem, getcon(off, fn));
off += KWIDE(cls[i]) ? 8 : 4;
}
}
static void
selret(Blk *b, Fn *fn)
{
int j, k, cty;
Ref r;
Class cr;
j = b->jmp.type;
if (!isret(j) || j == Jret0)
return;
r = b->jmp.arg;
b->jmp.type = Jret0;
if (j == Jretc) {
typclass(&cr, &typ[fn->retty], gpreg, fpreg);
if (cr.class & Cptr) {
assert(rtype(fn->retr) == RTmp);
emit(Oblit1, 0, R, INT(cr.t->size), R);
emit(Oblit0, 0, R, r, fn->retr);
cty = 0;
} else {
ldregs(cr.reg, cr.cls, cr.nreg, r, fn);
cty = (cr.nfp << 2) | cr.ngp;
}
} else {
k = j - Jretw;
if (KBASE(k) == 0) {
emit(Ocopy, k, TMP(R0), r, R);
cty = 1;
} else {
emit(Ocopy, k, TMP(V0), r, R);
cty = 1 << 2;
}
}
b->jmp.arg = CALL(cty);
}
static int
argsclass(Ins *i0, Ins *i1, Class *carg)
{
int va, envc, ngp, nfp, *gp, *fp;
Class *c;
Ins *i;
va = 0;
envc = 0;
gp = gpreg;
fp = fpreg;
ngp = 8;
nfp = 8;
for (i=i0, c=carg; i<i1; i++, c++)
switch (i->op) {
case Oargsb:
case Oargub:
case Oparsb:
case Oparub:
c->size = 1;
goto Scalar;
case Oargsh:
case Oarguh:
case Oparsh:
case Oparuh:
c->size = 2;
goto Scalar;
case Opar:
case Oarg:
c->size = 8;
if (T.apple && !KWIDE(i->cls))
c->size = 4;
Scalar:
c->align = c->size;
*c->cls = i->cls;
if (va) {
c->class |= Cstk;
c->size = 8;
c->align = 8;
break;
}
if (KBASE(i->cls) == 0 && ngp > 0) {
ngp--;
*c->reg = *gp++;
break;
}
if (KBASE(i->cls) == 1 && nfp > 0) {
nfp--;
*c->reg = *fp++;
break;
}
c->class |= Cstk;
break;
case Oparc:
case Oargc:
typclass(c, &typ[i->arg[0].val], gp, fp);
if (c->ngp <= ngp) {
if (c->nfp <= nfp) {
ngp -= c->ngp;
nfp -= c->nfp;
gp += c->ngp;
fp += c->nfp;
break;
} else
nfp = 0;
} else
ngp = 0;
c->class |= Cstk;
break;
case Opare:
case Oarge:
*c->reg = R9;
*c->cls = Kl;
envc = 1;
break;
case Oargv:
va = T.apple != 0;
break;
default:
die("unreachable");
}
return envc << 14 | (gp-gpreg) << 5 | (fp-fpreg) << 9;
}
bits
arm64_retregs(Ref r, int p[2])
{
bits b;
int ngp, nfp;
assert(rtype(r) == RCall);
ngp = r.val & 3;
nfp = (r.val >> 2) & 7;
if (p) {
p[0] = ngp;
p[1] = nfp;
}
b = 0;
while (ngp--)
b |= BIT(R0+ngp);
while (nfp--)
b |= BIT(V0+nfp);
return b;
}
bits
arm64_argregs(Ref r, int p[2])
{
bits b;
int ngp, nfp, x8, x9;
assert(rtype(r) == RCall);
ngp = (r.val >> 5) & 15;
nfp = (r.val >> 9) & 15;
x8 = (r.val >> 13) & 1;
x9 = (r.val >> 14) & 1;
if (p) {
p[0] = ngp + x8 + x9;
p[1] = nfp;
}
b = 0;
while (ngp--)
b |= BIT(R0+ngp);
while (nfp--)
b |= BIT(V0+nfp);
return b | ((bits)x8 << R8) | ((bits)x9 << R9);
}
static void
stkblob(Ref r, Class *c, Fn *fn, Insl **ilp)
{
Insl *il;
int al;
uint64_t sz;
il = alloc(sizeof *il);
al = c->t->align - 2; /* NAlign == 3 */
if (al < 0)
al = 0;
sz = c->class & Cptr ? c->t->size : c->size;
il->i = (Ins){Oalloc+al, Kl, r, {getcon(sz, fn)}};
il->link = *ilp;
*ilp = il;
}
static uint
align(uint x, uint al)
{
return (x + al-1) & -al;
}
static void
selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
{
Ins *i;
Class *ca, *c, cr;
int op, cty;
uint n, stk, off;;
Ref r, rstk, tmp[4];
ca = alloc((i1-i0) * sizeof ca[0]);
cty = argsclass(i0, i1, ca);
stk = 0;
for (i=i0, c=ca; i<i1; i++, c++) {
if (c->class & Cptr) {
i->arg[0] = newtmp("abi", Kl, fn);
stkblob(i->arg[0], c, fn, ilp);
i->op = Oarg;
}
if (c->class & Cstk) {
stk = align(stk, c->align);
stk += c->size;
}
}
stk = align(stk, 16);
rstk = getcon(stk, fn);
if (stk)
emit(Oadd, Kl, TMP(SP), TMP(SP), rstk);
if (!req(i1->arg[1], R)) {
typclass(&cr, &typ[i1->arg[1].val], gpreg, fpreg);
stkblob(i1->to, &cr, fn, ilp);
cty |= (cr.nfp << 2) | cr.ngp;
if (cr.class & Cptr) {
/* spill & rega expect calls to be
* followed by copies from regs,
* so we emit a dummy
*/
cty |= 1 << 13 | 1;
emit(Ocopy, Kw, R, TMP(R0), R);
} else {
sttmps(tmp, cr.cls, cr.nreg, i1->to, fn);
for (n=0; n<cr.nreg; n++) {
r = TMP(cr.reg[n]);
emit(Ocopy, cr.cls[n], tmp[n], r, R);
}
}
} else {
if (KBASE(i1->cls) == 0) {
emit(Ocopy, i1->cls, i1->to, TMP(R0), R);
cty |= 1;
} else {
emit(Ocopy, i1->cls, i1->to, TMP(V0), R);
cty |= 1 << 2;
}
}
emit(Ocall, 0, R, i1->arg[0], CALL(cty));
if (cty & (1 << 13))
/* struct return argument */
emit(Ocopy, Kl, TMP(R8), i1->to, R);
for (i=i0, c=ca; i<i1; i++, c++) {
if ((c->class & Cstk) != 0)
continue;
if (i->op == Oarg || i->op == Oarge || isargbh(i->op))
emit(Ocopy, *c->cls, TMP(*c->reg), i->arg[0], R);
if (i->op == Oargc)
ldregs(c->reg, c->cls, c->nreg, i->arg[1], fn);
}
/* populate the stack */
off = 0;
for (i=i0, c=ca; i<i1; i++, c++) {
if ((c->class & Cstk) == 0)
continue;
off = align(off, c->align);
r = newtmp("abi", Kl, fn);
if (i->op == Oarg || isargbh(i->op)) {
switch (c->size) {
case 1: op = Ostoreb; break;
case 2: op = Ostoreh; break;
case 4:
case 8: op = store[*c->cls]; break;
default: die("unreachable");
}
emit(op, 0, R, i->arg[0], r);
} else {
assert(i->op == Oargc);
emit(Oblit1, 0, R, INT(c->size), R);
emit(Oblit0, 0, R, i->arg[1], r);
}
emit(Oadd, Kl, r, TMP(SP), getcon(off, fn));
off += c->size;
}
if (stk)
emit(Osub, Kl, TMP(SP), TMP(SP), rstk);
for (i=i0, c=ca; i<i1; i++, c++)
if (c->class & Cptr) {
emit(Oblit1, 0, R, INT(c->t->size), R);
emit(Oblit0, 0, R, i->arg[1], i->arg[0]);
}
}
static Params
selpar(Fn *fn, Ins *i0, Ins *i1)
{
Class *ca, *c, cr;
Insl *il;
Ins *i;
int op, n, cty;
uint off;
Ref r, tmp[16], *t;
ca = alloc((i1-i0) * sizeof ca[0]);
curi = &insb[NIns];
cty = argsclass(i0, i1, ca);
fn->reg = arm64_argregs(CALL(cty), 0);
il = 0;
t = tmp;
for (i=i0, c=ca; i<i1; i++, c++) {
if (i->op != Oparc || (c->class & (Cptr|Cstk)))
continue;
sttmps(t, c->cls, c->nreg, i->to, fn);
stkblob(i->to, c, fn, &il);
t += c->nreg;
}
for (; il; il=il->link)
emiti(il->i);
if (fn->retty >= 0) {
typclass(&cr, &typ[fn->retty], gpreg, fpreg);
if (cr.class & Cptr) {
fn->retr = newtmp("abi", Kl, fn);
emit(Ocopy, Kl, fn->retr, TMP(R8), R);
fn->reg |= BIT(R8);
}
}
t = tmp;
off = 0;
for (i=i0, c=ca; i<i1; i++, c++)
if (i->op == Oparc && !(c->class & Cptr)) {
if (c->class & Cstk) {
off = align(off, c->align);
fn->tmp[i->to.val].slot = -(off+2);
off += c->size;
} else
for (n=0; n<c->nreg; n++) {
r = TMP(c->reg[n]);
emit(Ocopy, c->cls[n], *t++, r, R);
}
} else if (c->class & Cstk) {
off = align(off, c->align);
if (isparbh(i->op))
op = Oloadsb + (i->op - Oparsb);
else
op = Oload;
emit(op, *c->cls, i->to, SLOT(-(off+2)), R);
off += c->size;
} else {
emit(Ocopy, *c->cls, i->to, TMP(*c->reg), R);
}
return (Params){
.stk = align(off, 8),
.ngp = (cty >> 5) & 15,
.nfp = (cty >> 9) & 15
};
}
static Blk *
split(Fn *fn, Blk *b)
{
Blk *bn;
++fn->nblk;
bn = newblk();
idup(bn, curi, &insb[NIns]-curi);
curi = &insb[NIns];
bn->visit = ++b->visit;
bn->name = strf(PFn, "%s.%d", b->name, b->visit);
bn->loop = b->loop;
bn->link = b->link;
b->link = bn;
return bn;
}
static void
chpred(Blk *b, Blk *bp, Blk *bp1)
{
Phi *p;
uint a;
for (p=b->phi; p; p=p->link) {
for (a=0; p->blk[a]!=bp; a++)
assert(a+1<p->narg);
p->blk[a] = bp1;
}
}
static void
apple_selvaarg(Fn *fn, Blk *b, Ins *i)
{
Ref ap, stk, stk8, c8;
(void)b;
c8 = getcon(8, fn);
ap = i->arg[0];
stk8 = newtmp("abi", Kl, fn);
stk = newtmp("abi", Kl, fn);
emit(Ostorel, 0, R, stk8, ap);
emit(Oadd, Kl, stk8, stk, c8);
emit(Oload, i->cls, i->to, stk, R);
emit(Oload, Kl, stk, ap, R);
}
static void
arm64_selvaarg(Fn *fn, Blk *b, Ins *i)
{
Ref loc, lreg, lstk, nr, r0, r1, c8, c16, c24, c28, ap;
Blk *b0, *bstk, *breg;
int isgp;
c8 = getcon(8, fn);
c16 = getcon(16, fn);
c24 = getcon(24, fn);
c28 = getcon(28, fn);
ap = i->arg[0];
isgp = KBASE(i->cls) == 0;
/* @b [...]
r0 =l add ap, (24 or 28)
nr =l loadsw r0
r1 =w csltw nr, 0
jnz r1, @breg, @bstk
@breg
r0 =l add ap, (8 or 16)
r1 =l loadl r0
lreg =l add r1, nr
r0 =w add nr, (8 or 16)
r1 =l add ap, (24 or 28)
storew r0, r1
@bstk
lstk =l loadl ap
r0 =l add lstk, 8
storel r0, ap
@b0
%loc =l phi @breg %lreg, @bstk %lstk
i->to =(i->cls) load %loc
*/
loc = newtmp("abi", Kl, fn);
emit(Oload, i->cls, i->to, loc, R);
b0 = split(fn, b);
b0->jmp = b->jmp;
b0->s1 = b->s1;
b0->s2 = b->s2;
if (b->s1)
chpred(b->s1, b, b0);
if (b->s2 && b->s2 != b->s1)
chpred(b->s2, b, b0);
lreg = newtmp("abi", Kl, fn);
nr = newtmp("abi", Kl, fn);
r0 = newtmp("abi", Kw, fn);
r1 = newtmp("abi", Kl, fn);
emit(Ostorew, Kw, R, r0, r1);
emit(Oadd, Kl, r1, ap, isgp ? c24 : c28);
emit(Oadd, Kw, r0, nr, isgp ? c8 : c16);
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kl, fn);
emit(Oadd, Kl, lreg, r1, nr);
emit(Oload, Kl, r1, r0, R);
emit(Oadd, Kl, r0, ap, isgp ? c8 : c16);
breg = split(fn, b);
breg->jmp.type = Jjmp;
breg->s1 = b0;
lstk = newtmp("abi", Kl, fn);
r0 = newtmp("abi", Kl, fn);
emit(Ostorel, Kw, R, r0, ap);
emit(Oadd, Kl, r0, lstk, c8);
emit(Oload, Kl, lstk, ap, R);
bstk = split(fn, b);
bstk->jmp.type = Jjmp;
bstk->s1 = b0;
b0->phi = alloc(sizeof *b0->phi);
*b0->phi = (Phi){
.cls = Kl, .to = loc,
.narg = 2,
.blk = vnew(2, sizeof b0->phi->blk[0], PFn),
.arg = vnew(2, sizeof b0->phi->arg[0], PFn),
};
b0->phi->blk[0] = bstk;
b0->phi->blk[1] = breg;
b0->phi->arg[0] = lstk;
b0->phi->arg[1] = lreg;
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kw, fn);
b->jmp.type = Jjnz;
b->jmp.arg = r1;
b->s1 = breg;
b->s2 = bstk;
emit(Ocmpw+Cislt, Kw, r1, nr, CON_Z);
emit(Oloadsw, Kl, nr, r0, R);
emit(Oadd, Kl, r0, ap, isgp ? c24 : c28);
}
static void
apple_selvastart(Fn *fn, Params p, Ref ap)
{
Ref off, stk, arg;
off = getcon(p.stk, fn);
stk = newtmp("abi", Kl, fn);
arg = newtmp("abi", Kl, fn);
emit(Ostorel, 0, R, arg, ap);
emit(Oadd, Kl, arg, stk, off);
emit(Oaddr, Kl, stk, SLOT(-1), R);
}
static void
arm64_selvastart(Fn *fn, Params p, Ref ap)
{
Ref r0, r1, rsave;
rsave = newtmp("abi", Kl, fn);
r0 = newtmp("abi", Kl, fn);
emit(Ostorel, Kw, R, r0, ap);
emit(Oadd, Kl, r0, rsave, getcon(p.stk + 192, fn));
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kl, fn);
emit(Ostorel, Kw, R, r1, r0);
emit(Oadd, Kl, r1, rsave, getcon(64, fn));
emit(Oadd, Kl, r0, ap, getcon(8, fn));
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kl, fn);
emit(Ostorel, Kw, R, r1, r0);
emit(Oadd, Kl, r1, rsave, getcon(192, fn));
emit(Oaddr, Kl, rsave, SLOT(-1), R);
emit(Oadd, Kl, r0, ap, getcon(16, fn));
r0 = newtmp("abi", Kl, fn);
emit(Ostorew, Kw, R, getcon((p.ngp-8)*8, fn), r0);
emit(Oadd, Kl, r0, ap, getcon(24, fn));
r0 = newtmp("abi", Kl, fn);
emit(Ostorew, Kw, R, getcon((p.nfp-8)*16, fn), r0);
emit(Oadd, Kl, r0, ap, getcon(28, fn));
}
void
arm64_abi(Fn *fn)
{
Blk *b;
Ins *i, *i0;
Insl *il;
int n0, n1, ioff;
Params p;
for (b=fn->start; b; b=b->link)
b->visit = 0;
/* lower parameters */
for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
if (!ispar(i->op))
break;
p = selpar(fn, b->ins, i);
n0 = &insb[NIns] - curi;
ioff = i - b->ins;
n1 = b->nins - ioff;
vgrow(&b->ins, n0+n1);
icpy(b->ins+n0, b->ins+ioff, n1);
icpy(b->ins, curi, n0);
b->nins = n0+n1;
/* lower calls, returns, and vararg instructions */
il = 0;
b = fn->start;
do {
if (!(b = b->link))
b = fn->start; /* do it last */
if (b->visit)
continue;
curi = &insb[NIns];
selret(b, fn);
for (i=&b->ins[b->nins]; i!=b->ins;)
switch ((--i)->op) {
default:
emiti(*i);
break;
case Ocall:
for (i0=i; i0>b->ins; i0--)
if (!isarg((i0-1)->op))
break;
selcall(fn, i0, i, &il);
i = i0;
break;
case Ovastart:
if (T.apple)
apple_selvastart(fn, p, i->arg[0]);
else
arm64_selvastart(fn, p, i->arg[0]);
break;
case Ovaarg:
if (T.apple)
apple_selvaarg(fn, b, i);
else
arm64_selvaarg(fn, b, i);
break;
case Oarg:
case Oargc:
die("unreachable");
}
if (b == fn->start)
for (; il; il=il->link)
emiti(il->i);
idup(b, curi, &insb[NIns]-curi);
} while (b != fn->start);
if (debug['A']) {
fprintf(stderr, "\n> After ABI lowering:\n");
printfn(fn, stderr);
}
}
/* abi0 for apple target; introduces
* necessary sign extensions in calls
* and returns
*/
void
apple_extsb(Fn *fn)
{
Blk *b;
Ins *i0, *i1, *i;
int j, op;
Ref r;
for (b=fn->start; b; b=b->link) {
curi = &insb[NIns];
j = b->jmp.type;
if (isretbh(j)) {
r = newtmp("abi", Kw, fn);
op = Oextsb + (j - Jretsb);
emit(op, Kw, r, b->jmp.arg, R);
b->jmp.arg = r;
b->jmp.type = Jretw;
}
for (i=&b->ins[b->nins]; i>b->ins;) {
emiti(*--i);
if (i->op != Ocall)
continue;
for (i0=i1=i; i0>b->ins; i0--)
if (!isarg((i0-1)->op))
break;
for (i=i1; i>i0;) {
emiti(*--i);
if (isargbh(i->op)) {
i->to = newtmp("abi", Kl, fn);
curi->arg[0] = i->to;
}
}
for (i=i1; i>i0;)
if (isargbh((--i)->op)) {
op = Oextsb + (i->op - Oargsb);
emit(op, Kw, i->to, i->arg[0], R);
}
}
idup(b, curi, &insb[NIns]-curi);
}
if (debug['A']) {
fprintf(stderr, "\n> After Apple pre-ABI:\n");
printfn(fn, stderr);
}
}
+38
View File
@@ -0,0 +1,38 @@
#include "../all.h"
enum Arm64Reg {
R0 = RXX + 1,
R1, R2, R3, R4, R5, R6, R7,
R8, R9, R10, R11, R12, R13, R14, R15,
IP0, IP1, R18, R19, R20, R21, R22, R23,
R24, R25, R26, R27, R28, FP, LR, SP,
V0, V1, V2, V3, V4, V5, V6, V7,
V8, V9, V10, V11, V12, V13, V14, V15,
V16, V17, V18, V19, V20, V21, V22, V23,
V24, V25, V26, V27, V28, V29, V30, /* V31, */
NFPR = V30 - V0 + 1,
NGPR = SP - R0 + 1,
NGPS = R18 - R0 + 1 /* LR */ + 1,
NFPS = (V7 - V0 + 1) + (V30 - V16 + 1),
NCLR = (R28 - R19 + 1) + (V15 - V8 + 1),
};
MAKESURE(reg_not_tmp, V30 < (int)Tmp0);
/* targ.c */
extern int arm64_rsave[];
extern int arm64_rclob[];
/* abi.c */
bits arm64_retregs(Ref, int[2]);
bits arm64_argregs(Ref, int[2]);
void arm64_abi(Fn *);
void apple_extsb(Fn *);
/* isel.c */
int arm64_logimm(uint64_t, int);
void arm64_isel(Fn *);
/* emit.c */
void arm64_emitfn(Fn *, FILE *);
+692
View File
@@ -0,0 +1,692 @@
#include "all.h"
typedef struct E E;
struct E {
FILE *f;
Fn *fn;
uint64_t frame;
uint padding;
};
#define CMP(X) \
X(Cieq, "eq", "ne") \
X(Cine, "ne", "eq") \
X(Cisge, "ge", "lt") \
X(Cisgt, "gt", "le") \
X(Cisle, "le", "gt") \
X(Cislt, "lt", "ge") \
X(Ciuge, "cs", "cc") \
X(Ciugt, "hi", "ls") \
X(Ciule, "ls", "hi") \
X(Ciult, "cc", "cs") \
X(NCmpI+Cfeq, "eq", "ne") \
X(NCmpI+Cfge, "ge", "lt") \
X(NCmpI+Cfgt, "gt", "le") \
X(NCmpI+Cfle, "ls", "hi") \
X(NCmpI+Cflt, "mi", "pl") \
X(NCmpI+Cfne, "ne", "eq") \
X(NCmpI+Cfo, "vc", "vs") \
X(NCmpI+Cfuo, "vs", "vc")
enum {
Ki = -1, /* matches Kw and Kl */
Ka = -2, /* matches all classes */
};
static struct {
short op;
short cls;
char *fmt;
} omap[] = {
{ Oadd, Ki, "add %=, %0, %1" },
{ Oadd, Ka, "fadd %=, %0, %1" },
{ Osub, Ki, "sub %=, %0, %1" },
{ Osub, Ka, "fsub %=, %0, %1" },
{ Oneg, Ki, "neg %=, %0" },
{ Oneg, Ka, "fneg %=, %0" },
{ Oand, Ki, "and %=, %0, %1" },
{ Oor, Ki, "orr %=, %0, %1" },
{ Oxor, Ki, "eor %=, %0, %1" },
{ Osar, Ki, "asr %=, %0, %1" },
{ Oshr, Ki, "lsr %=, %0, %1" },
{ Oshl, Ki, "lsl %=, %0, %1" },
{ Omul, Ki, "mul %=, %0, %1" },
{ Omul, Ka, "fmul %=, %0, %1" },
{ Odiv, Ki, "sdiv %=, %0, %1" },
{ Odiv, Ka, "fdiv %=, %0, %1" },
{ Oudiv, Ki, "udiv %=, %0, %1" },
{ Orem, Ki, "sdiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" },
{ Ourem, Ki, "udiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" },
{ Ocopy, Ki, "mov %=, %0" },
{ Ocopy, Ka, "fmov %=, %0" },
{ Oswap, Ki, "mov %?, %0\n\tmov\t%0, %1\n\tmov\t%1, %?" },
{ Oswap, Ka, "fmov %?, %0\n\tfmov\t%0, %1\n\tfmov\t%1, %?" },
{ Ostoreb, Kw, "strb %W0, %M1" },
{ Ostoreh, Kw, "strh %W0, %M1" },
{ Ostorew, Kw, "str %W0, %M1" },
{ Ostorel, Kw, "str %L0, %M1" },
{ Ostores, Kw, "str %S0, %M1" },
{ Ostored, Kw, "str %D0, %M1" },
{ Oloadsb, Ki, "ldrsb %=, %M0" },
{ Oloadub, Ki, "ldrb %W=, %M0" },
{ Oloadsh, Ki, "ldrsh %=, %M0" },
{ Oloaduh, Ki, "ldrh %W=, %M0" },
{ Oloadsw, Kw, "ldr %=, %M0" },
{ Oloadsw, Kl, "ldrsw %=, %M0" },
{ Oloaduw, Ki, "ldr %W=, %M0" },
{ Oload, Ka, "ldr %=, %M0" },
{ Oextsb, Ki, "sxtb %=, %W0" },
{ Oextub, Ki, "uxtb %W=, %W0" },
{ Oextsh, Ki, "sxth %=, %W0" },
{ Oextuh, Ki, "uxth %W=, %W0" },
{ Oextsw, Ki, "sxtw %L=, %W0" },
{ Oextuw, Ki, "mov %W=, %W0" },
{ Oexts, Kd, "fcvt %=, %S0" },
{ Otruncd, Ks, "fcvt %=, %D0" },
{ Ocast, Kw, "fmov %=, %S0" },
{ Ocast, Kl, "fmov %=, %D0" },
{ Ocast, Ks, "fmov %=, %W0" },
{ Ocast, Kd, "fmov %=, %L0" },
{ Ostosi, Ka, "fcvtzs %=, %S0" },
{ Ostoui, Ka, "fcvtzu %=, %S0" },
{ Odtosi, Ka, "fcvtzs %=, %D0" },
{ Odtoui, Ka, "fcvtzu %=, %D0" },
{ Oswtof, Ka, "scvtf %=, %W0" },
{ Ouwtof, Ka, "ucvtf %=, %W0" },
{ Osltof, Ka, "scvtf %=, %L0" },
{ Oultof, Ka, "ucvtf %=, %L0" },
{ Ocall, Kw, "blr %L0" },
{ Oacmp, Ki, "cmp %0, %1" },
{ Oacmn, Ki, "cmn %0, %1" },
{ Oafcmp, Ka, "fcmpe %0, %1" },
#define X(c, str, _) \
{ Oflag+c, Ki, "cset %=, " str },
CMP(X)
#undef X
{ NOp, 0, 0 }
};
enum {
V31 = 0x1fffffff, /* local name for V31 */
};
static char *
rname(int r, int k)
{
static char buf[4];
if (r == SP) {
assert(k == Kl);
sprintf(buf, "sp");
}
else if (R0 <= r && r <= LR)
switch (k) {
default: die("invalid class");
case Kw: sprintf(buf, "w%d", r-R0); break;
case Kx:
case Kl: sprintf(buf, "x%d", r-R0); break;
}
else if (V0 <= r && r <= V30)
switch (k) {
default: die("invalid class");
case Ks: sprintf(buf, "s%d", r-V0); break;
case Kx:
case Kd: sprintf(buf, "d%d", r-V0); break;
}
else if (r == V31)
switch (k) {
default: die("invalid class");
case Ks: sprintf(buf, "s31"); break;
case Kd: sprintf(buf, "d31"); break;
}
else
die("invalid register");
return buf;
}
static uint64_t
slot(Ref r, E *e)
{
int s;
s = rsval(r);
if (s == -1)
return 16 + e->frame;
if (s < 0) {
if (e->fn->vararg && !T.apple)
return 16 + e->frame + 192 - (s+2);
else
return 16 + e->frame - (s+2);
} else
return 16 + e->padding + 4 * s;
}
static void
emitf(char *s, Ins *i, E *e)
{
Ref r;
int k, c;
Con *pc;
uint64_t n;
uint sp;
fputc('\t', e->f);
sp = 0;
for (;;) {
k = i->cls;
while ((c = *s++) != '%')
if (c == ' ' && !sp) {
fputc('\t', e->f);
sp = 1;
} else if (!c) {
fputc('\n', e->f);
return;
} else
fputc(c, e->f);
Switch:
switch ((c = *s++)) {
default:
die("invalid escape");
case 'W':
k = Kw;
goto Switch;
case 'L':
k = Kl;
goto Switch;
case 'S':
k = Ks;
goto Switch;
case 'D':
k = Kd;
goto Switch;
case '?':
if (KBASE(k) == 0)
fputs(rname(IP1, k), e->f);
else
fputs(rname(V31, k), e->f);
break;
case '=':
case '0':
r = c == '=' ? i->to : i->arg[0];
assert(isreg(r) || req(r, TMP(V31)));
fputs(rname(r.val, k), e->f);
break;
case '1':
r = i->arg[1];
switch (rtype(r)) {
default:
die("invalid second argument");
case RTmp:
assert(isreg(r));
fputs(rname(r.val, k), e->f);
break;
case RCon:
pc = &e->fn->con[r.val];
n = pc->bits.i;
assert(pc->type == CBits);
if (n >> 24) {
assert(arm64_logimm(n, k));
fprintf(e->f, "#%"PRIu64, n);
} else if (n & 0xfff000) {
assert(!(n & ~0xfff000ull));
fprintf(e->f, "#%"PRIu64", lsl #12",
n>>12);
} else {
assert(!(n & ~0xfffull));
fprintf(e->f, "#%"PRIu64, n);
}
break;
}
break;
case 'M':
c = *s++;
assert(c == '0' || c == '1' || c == '=');
r = c == '=' ? i->to : i->arg[c - '0'];
switch (rtype(r)) {
default:
die("todo (arm emit): unhandled ref");
case RTmp:
assert(isreg(r));
fprintf(e->f, "[%s]", rname(r.val, Kl));
break;
case RSlot:
fprintf(e->f, "[x29, %"PRIu64"]", slot(r, e));
break;
}
break;
}
}
}
static void
loadaddr(Con *c, char *rn, E *e)
{
char *p, *l, *s;
switch (c->sym.type) {
default:
die("unreachable");
case SGlo:
if (T.apple)
s = "\tadrp\tR, S@pageO\n"
"\tadd\tR, R, S@pageoffO\n";
else
s = "\tadrp\tR, SO\n"
"\tadd\tR, R, #:lo12:SO\n";
break;
case SExtThr:
if (!T.apple)
die("extern thread unavailable on arm64");
/* fall through */
case SThr:
if (T.apple)
s = "\tadrp\tR, S@tlvppage\n"
"\tldr\tR, [R, S@tlvppageoff]\n";
else
s = "\tmrs\tR, tpidr_el0\n"
"\tadd\tR, R, #:tprel_hi12:SO, lsl #12\n"
"\tadd\tR, R, #:tprel_lo12_nc:SO\n";
break;
case SExt:
if (T.apple)
s = "\tadrp\tR, S@gotpageO\n"
"\tldr\tR, [R, S@gotpageoffO]\n";
else
s = "\tadrp\tR, :got:SO\n"
"\tldr\tR, [R, #:got_lo12:SO]\n";
break;
}
l = str(c->sym.id);
p = l[0] == '"' ? "" : T.assym;
for (; *s; s++)
switch (*s) {
default:
fputc(*s, e->f);
break;
case 'R':
fputs(rn, e->f);
break;
case 'S':
fputs(p, e->f);
fputs(l, e->f);
break;
case 'O':
if (c->bits.i)
/* todo, handle large offsets */
fprintf(e->f, "+%"PRIi64, c->bits.i);
break;
}
}
static void
loadcon(Con *c, int r, int k, E *e)
{
char *rn;
int64_t n;
int w, sh;
w = KWIDE(k);
rn = rname(r, k);
n = c->bits.i;
if (c->type == CAddr) {
rn = rname(r, Kl);
loadaddr(c, rn, e);
return;
}
assert(c->type == CBits);
if (!w)
n = (int32_t)n;
if ((n | 0xffff) == -1 || arm64_logimm(n, k)) {
fprintf(e->f, "\tmov\t%s, #%"PRIi64"\n", rn, n);
} else {
fprintf(e->f, "\tmov\t%s, #%d\n",
rn, (int)(n & 0xffff));
for (sh=16; n>>=16; sh+=16) {
if ((!w && sh == 32) || sh == 64)
break;
fprintf(e->f, "\tmovk\t%s, #0x%x, lsl #%d\n",
rn, (uint)(n & 0xffff), sh);
}
}
}
static void emitins(Ins *, E *);
static int
fixarg(Ref *pr, int sz, int t, E *e)
{
Ins *i;
Ref r;
uint64_t s;
r = *pr;
if (rtype(r) == RSlot) {
s = slot(r, e);
if (s > sz * 4095u) {
if (t < 0)
return 1;
i = &(Ins){Oaddr, Kl, TMP(t), {r}};
emitins(i, e);
*pr = TMP(t);
}
}
return 0;
}
static void
emitins(Ins *i, E *e)
{
char *l, *p, *rn;
uint64_t s;
int o, t;
Ref r;
Con *c;
switch (i->op) {
default:
if (isload(i->op))
fixarg(&i->arg[0], loadsz(i), IP1, e);
if (isstore(i->op)) {
t = T.apple ? -1 : R18;
if (fixarg(&i->arg[1], storesz(i), t, e)) {
if (req(i->arg[0], TMP(IP1))) {
fprintf(e->f,
"\tfmov\t%c31, %c17\n",
"ds"[i->cls == Kw],
"xw"[i->cls == Kw]);
i->arg[0] = TMP(V31);
i->op = Ostores + (i->cls-Kw);
}
fixarg(&i->arg[1], storesz(i), IP1, e);
}
}
Table:
/* most instructions are just pulled out of
* the table omap[], some special cases are
* detailed below */
for (o=0;; o++) {
/* this linear search should really be a binary
* search */
if (omap[o].op == NOp)
die("no match for %s(%c)",
optab[i->op].name, "wlsd"[i->cls]);
if (omap[o].op == i->op)
if (omap[o].cls == i->cls || omap[o].cls == Ka
|| (omap[o].cls == Ki && KBASE(i->cls) == 0))
break;
}
emitf(omap[o].fmt, i, e);
break;
case Onop:
break;
case Ocopy:
if (req(i->to, i->arg[0]))
break;
if (rtype(i->to) == RSlot) {
r = i->to;
if (!isreg(i->arg[0])) {
i->to = TMP(IP1);
emitins(i, e);
i->arg[0] = i->to;
}
i->op = Ostorew + i->cls;
i->cls = Kw;
i->arg[1] = r;
emitins(i, e);
break;
}
assert(isreg(i->to));
switch (rtype(i->arg[0])) {
case RCon:
c = &e->fn->con[i->arg[0].val];
loadcon(c, i->to.val, i->cls, e);
break;
case RSlot:
i->op = Oload;
emitins(i, e);
break;
default:
assert(i->to.val != IP1);
goto Table;
}
break;
case Oaddr:
assert(rtype(i->arg[0]) == RSlot);
rn = rname(i->to.val, Kl);
s = slot(i->arg[0], e);
if (s <= 4095)
fprintf(e->f, "\tadd\t%s, x29, #%"PRIu64"\n", rn, s);
else if (s <= 65535)
fprintf(e->f,
"\tmov\t%s, #%"PRIu64"\n"
"\tadd\t%s, x29, %s\n",
rn, s, rn, rn
);
else
fprintf(e->f,
"\tmov\t%s, #%"PRIu64"\n"
"\tmovk\t%s, #%"PRIu64", lsl #16\n"
"\tadd\t%s, x29, %s\n",
rn, s & 0xFFFF, rn, s >> 16, rn, rn
);
break;
case Ocall:
if (rtype(i->arg[0]) != RCon)
goto Table;
c = &e->fn->con[i->arg[0].val];
if (c->type != CAddr
|| (c->sym.type & SThr)
|| c->bits.i)
die("invalid call argument");
l = str(c->sym.id);
p = l[0] == '"' ? "" : T.assym;
fprintf(e->f, "\tbl\t%s%s\n", p, l);
break;
case Osalloc:
emitf("sub sp, sp, %0", i, e);
if (!req(i->to, R))
emitf("mov %=, sp", i, e);
break;
case Odbgloc:
emitdbgloc(i->arg[0].val, i->arg[1].val, e->f);
break;
}
}
static void
framelayout(E *e)
{
int *r;
uint o;
uint64_t f;
for (o=0, r=arm64_rclob; *r>=0; r++)
o += 1 & (e->fn->reg >> *r);
f = e->fn->slot;
f = (f + 3) & -4;
o += o & 1;
e->padding = 4*(f-e->fn->slot);
e->frame = 4*f + 8*o;
}
/*
Stack-frame layout:
+=============+
| varargs |
| save area |
+-------------+
| callee-save | ^
| registers | |
+-------------+ |
| ... | |
| spill slots | |
| ... | | e->frame
+-------------+ |
| ... | |
| locals | |
| ... | |
+-------------+ |
| e->padding | v
+-------------+
| saved x29 |
| saved x30 |
+=============+ <- x29
*/
void
arm64_emitfn(Fn *fn, FILE *out)
{
static char *ctoa[][2] = {
#define X(c, s, n) [c] = {s, n},
CMP(X)
#undef X
};
static int id0;
int s, n, c, lbl, *r;
uint64_t o;
Blk *b, *t;
Ins *i;
E *e;
e = &(E){.f = out, .fn = fn};
if (T.apple)
e->fn->lnk.align = 4;
emitfnlnk(e->fn->name, &e->fn->lnk, e->f);
fputs("\thint\t#34\n", e->f);
framelayout(e);
if (e->fn->vararg && !T.apple) {
for (n=7; n>=0; n--)
fprintf(e->f, "\tstr\tq%d, [sp, -16]!\n", n);
for (n=7; n>=0; n-=2)
fprintf(e->f, "\tstp\tx%d, x%d, [sp, -16]!\n", n-1, n);
}
if (e->frame + 16 <= 512)
fprintf(e->f,
"\tstp\tx29, x30, [sp, -%"PRIu64"]!\n",
e->frame + 16
);
else if (e->frame <= 4095)
fprintf(e->f,
"\tsub\tsp, sp, #%"PRIu64"\n"
"\tstp\tx29, x30, [sp, -16]!\n",
e->frame
);
else if (e->frame <= 65535)
fprintf(e->f,
"\tmov\tx16, #%"PRIu64"\n"
"\tsub\tsp, sp, x16\n"
"\tstp\tx29, x30, [sp, -16]!\n",
e->frame
);
else
fprintf(e->f,
"\tmov\tx16, #%"PRIu64"\n"
"\tmovk\tx16, #%"PRIu64", lsl #16\n"
"\tsub\tsp, sp, x16\n"
"\tstp\tx29, x30, [sp, -16]!\n",
e->frame & 0xFFFF, e->frame >> 16
);
fputs("\tmov\tx29, sp\n", e->f);
s = (e->frame - e->padding) / 4;
for (r=arm64_rclob; *r>=0; r++)
if (e->fn->reg & BIT(*r)) {
s -= 2;
i = &(Ins){.arg = {TMP(*r), SLOT(s)}};
i->op = *r >= V0 ? Ostored : Ostorel;
emitins(i, e);
}
for (lbl=0, b=e->fn->start; b; b=b->link) {
if (lbl || b->npred > 1)
fprintf(e->f, "%s%d:\n", T.asloc, id0+b->id);
for (i=b->ins; i!=&b->ins[b->nins]; i++)
emitins(i, e);
lbl = 1;
switch (b->jmp.type) {
case Jhlt:
fprintf(e->f, "\tbrk\t#1000\n");
break;
case Jret0:
s = (e->frame - e->padding) / 4;
for (r=arm64_rclob; *r>=0; r++)
if (e->fn->reg & BIT(*r)) {
s -= 2;
i = &(Ins){Oload, 0, TMP(*r), {SLOT(s)}};
i->cls = *r >= V0 ? Kd : Kl;
emitins(i, e);
}
if (e->fn->dynalloc)
fputs("\tmov sp, x29\n", e->f);
o = e->frame + 16;
if (e->fn->vararg && !T.apple)
o += 192;
if (o <= 504)
fprintf(e->f,
"\tldp\tx29, x30, [sp], %"PRIu64"\n",
o
);
else if (o - 16 <= 4095)
fprintf(e->f,
"\tldp\tx29, x30, [sp], 16\n"
"\tadd\tsp, sp, #%"PRIu64"\n",
o - 16
);
else if (o - 16 <= 65535)
fprintf(e->f,
"\tldp\tx29, x30, [sp], 16\n"
"\tmov\tx16, #%"PRIu64"\n"
"\tadd\tsp, sp, x16\n",
o - 16
);
else
fprintf(e->f,
"\tldp\tx29, x30, [sp], 16\n"
"\tmov\tx16, #%"PRIu64"\n"
"\tmovk\tx16, #%"PRIu64", lsl #16\n"
"\tadd\tsp, sp, x16\n",
(o - 16) & 0xFFFF, (o - 16) >> 16
);
fprintf(e->f, "\tret\n");
break;
case Jjmp:
Jmp:
if (b->s1 != b->link)
fprintf(e->f,
"\tb\t%s%d\n",
T.asloc, id0+b->s1->id
);
else
lbl = 0;
break;
default:
c = b->jmp.type - Jjf;
if (c < 0 || c > NCmp)
die("unhandled jump %d", b->jmp.type);
if (b->link == b->s2) {
t = b->s1;
b->s1 = b->s2;
b->s2 = t;
n = 0;
} else
n = 1;
fprintf(e->f,
"\tb%s\t%s%d\n",
ctoa[c][n], T.asloc, id0+b->s2->id
);
goto Jmp;
}
}
id0 += e->fn->nblk;
if (!T.apple)
elf_emitfnfin(fn->name, out);
}
+316
View File
@@ -0,0 +1,316 @@
#include "all.h"
enum Imm {
Iother,
Iplo12,
Iphi12,
Iplo24,
Inlo12,
Inhi12,
Inlo24
};
static enum Imm
imm(Con *c, int k, int64_t *pn)
{
int64_t n;
int i;
if (c->type != CBits)
return Iother;
n = c->bits.i;
if (k == Kw)
n = (int32_t)n;
i = Iplo12;
if (n < 0) {
i = Inlo12;
n = -(uint64_t)n;
}
*pn = n;
if ((n & 0x000fff) == n)
return i;
if ((n & 0xfff000) == n)
return i + 1;
if ((n & 0xffffff) == n)
return i + 2;
return Iother;
}
int
arm64_logimm(uint64_t x, int k)
{
uint64_t n;
if (k == Kw)
x = (x & 0xffffffff) | x << 32;
if (x & 1)
x = ~x;
if (x == 0)
return 0;
if (x == 0xaaaaaaaaaaaaaaaa)
return 1;
n = x & 0xf;
if (0x1111111111111111 * n == x)
goto Check;
n = x & 0xff;
if (0x0101010101010101 * n == x)
goto Check;
n = x & 0xffff;
if (0x0001000100010001 * n == x)
goto Check;
n = x & 0xffffffff;
if (0x0000000100000001 * n == x)
goto Check;
n = x;
Check:
return (n & (n + (n & -n))) == 0;
}
static void
fixarg(Ref *pr, int k, int phi, Fn *fn)
{
char buf[32];
Con *c, cc;
Ref r0, r1, r2, r3;
int s, n;
r0 = *pr;
switch (rtype(r0)) {
case RCon:
c = &fn->con[r0.val];
if (T.apple
&& c->type == CAddr
&& (c->sym.type & SThr)) {
r1 = newtmp("isel", Kl, fn);
*pr = r1;
if (c->bits.i) {
r2 = newtmp("isel", Kl, fn);
cc = (Con){.type = CBits};
cc.bits.i = c->bits.i;
r3 = newcon(&cc, fn);
emit(Oadd, Kl, r1, r2, r3);
r1 = r2;
}
emit(Ocopy, Kl, r1, TMP(R0), R);
r1 = newtmp("isel", Kl, fn);
r2 = newtmp("isel", Kl, fn);
emit(Ocall, 0, R, r1, CALL(33));
emit(Ocopy, Kl, TMP(R0), r2, R);
emit(Oload, Kl, r1, r2, R);
cc = *c;
cc.bits.i = 0;
r3 = newcon(&cc, fn);
emit(Ocopy, Kl, r2, r3, R);
break;
}
if (KBASE(k) == 0 && phi)
return;
r1 = newtmp("isel", k, fn);
if (KBASE(k) == 0) {
emit(Ocopy, k, r1, r0, R);
} else {
n = stashbits(c->bits.i, KWIDE(k) ? 8 : 4);
vgrow(&fn->con, ++fn->ncon);
c = &fn->con[fn->ncon-1];
sprintf(buf, "\"%sfp%d\"", T.asloc, n);
*c = (Con){.type = CAddr};
c->sym.id = intern(buf);
r2 = newtmp("isel", Kl, fn);
emit(Oload, k, r1, r2, R);
emit(Ocopy, Kl, r2, CON(c-fn->con), R);
}
*pr = r1;
break;
case RTmp:
s = fn->tmp[r0.val].slot;
if (s == -1)
break;
r1 = newtmp("isel", Kl, fn);
emit(Oaddr, Kl, r1, SLOT(s), R);
*pr = r1;
break;
}
}
static int
selcmp(Ref arg[2], int k, Fn *fn)
{
Ref r, *iarg;
Con *c;
int swap, cmp, fix;
int64_t n;
if (KBASE(k) == 1) {
emit(Oafcmp, k, R, arg[0], arg[1]);
iarg = curi->arg;
fixarg(&iarg[0], k, 0, fn);
fixarg(&iarg[1], k, 0, fn);
return 0;
}
swap = rtype(arg[0]) == RCon;
if (swap) {
r = arg[1];
arg[1] = arg[0];
arg[0] = r;
}
fix = 1;
cmp = Oacmp;
r = arg[1];
if (rtype(r) == RCon) {
c = &fn->con[r.val];
switch (imm(c, k, &n)) {
default:
break;
case Iplo12:
case Iphi12:
fix = 0;
break;
case Inlo12:
case Inhi12:
cmp = Oacmn;
r = getcon(n, fn);
fix = 0;
break;
}
}
emit(cmp, k, R, arg[0], r);
iarg = curi->arg;
fixarg(&iarg[0], k, 0, fn);
if (fix)
fixarg(&iarg[1], k, 0, fn);
return swap;
}
static int
callable(Ref r, Fn *fn)
{
Con *c;
if (rtype(r) == RTmp)
return 1;
if (rtype(r) == RCon) {
c = &fn->con[r.val];
if (c->type == CAddr)
if (c->bits.i == 0)
return 1;
}
return 0;
}
static void
sel(Ins i, Fn *fn)
{
Ref *iarg;
Ins *i0;
int ck, cc;
if (INRANGE(i.op, Oalloc, Oalloc1)) {
i0 = curi - 1;
salloc(i.to, i.arg[0], fn);
fixarg(&i0->arg[0], Kl, 0, fn);
return;
}
if (iscmp(i.op, &ck, &cc)) {
emit(Oflag, i.cls, i.to, R, R);
i0 = curi;
if (selcmp(i.arg, ck, fn))
i0->op += cmpop(cc);
else
i0->op += cc;
return;
}
if (i.op == Ocall)
if (callable(i.arg[0], fn)) {
emiti(i);
return;
}
if (i.op != Onop) {
emiti(i);
iarg = curi->arg; /* fixarg() can change curi */
fixarg(&iarg[0], argcls(&i, 0), 0, fn);
fixarg(&iarg[1], argcls(&i, 1), 0, fn);
}
}
static void
seljmp(Blk *b, Fn *fn)
{
Ref r;
Ins *i, *ir;
int ck, cc, use;
if (b->jmp.type == Jret0
|| b->jmp.type == Jjmp
|| b->jmp.type == Jhlt)
return;
assert(b->jmp.type == Jjnz);
r = b->jmp.arg;
use = -1;
b->jmp.arg = R;
ir = 0;
i = &b->ins[b->nins];
while (i > b->ins)
if (req((--i)->to, r)) {
use = fn->tmp[r.val].nuse;
ir = i;
break;
}
if (ir && use == 1
&& iscmp(ir->op, &ck, &cc)) {
if (selcmp(ir->arg, ck, fn))
cc = cmpop(cc);
b->jmp.type = Jjf + cc;
*ir = (Ins){.op = Onop};
}
else {
selcmp((Ref[]){r, CON_Z}, Kw, fn);
b->jmp.type = Jjfine;
}
}
void
arm64_isel(Fn *fn)
{
Blk *b, **sb;
Ins *i;
Phi *p;
uint n, al;
int64_t sz;
/* assign slots to fast allocs */
b = fn->start;
/* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
for (i=b->ins; i<&b->ins[b->nins]; i++)
if (i->op == al) {
if (rtype(i->arg[0]) != RCon)
break;
sz = fn->con[i->arg[0].val].bits.i;
if (sz < 0 || sz >= INT_MAX-15)
err("invalid alloc size %"PRId64, sz);
sz = (sz + n-1) & -n;
sz /= 4;
fn->tmp[i->to.val].slot = fn->slot;
fn->slot += sz;
*i = (Ins){.op = Onop};
}
for (b=fn->start; b; b=b->link) {
curi = &insb[NIns];
for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
for (p=(*sb)->phi; p; p=p->link) {
for (n=0; p->blk[n] != b; n++)
assert(n+1 < p->narg);
fixarg(&p->arg[n], p->cls, 1, fn);
}
seljmp(b, fn);
for (i=&b->ins[b->nins]; i!=b->ins;)
sel(*--i, fn);
idup(b, curi, &insb[NIns]-curi);
}
if (debug['I']) {
fprintf(stderr, "\n> After instruction selection:\n");
printfn(fn, stderr);
}
}
+69
View File
@@ -0,0 +1,69 @@
#include "all.h"
int arm64_rsave[] = {
R0, R1, R2, R3, R4, R5, R6, R7,
R8, R9, R10, R11, R12, R13, R14, R15,
IP0, IP1, R18, LR,
V0, V1, V2, V3, V4, V5, V6, V7,
V16, V17, V18, V19, V20, V21, V22, V23,
V24, V25, V26, V27, V28, V29, V30,
-1
};
int arm64_rclob[] = {
R19, R20, R21, R22, R23, R24, R25, R26,
R27, R28,
V8, V9, V10, V11, V12, V13, V14, V15,
-1
};
#define RGLOB (BIT(FP) | BIT(SP) | BIT(IP1) | BIT(R18))
static int
arm64_memargs(int op)
{
(void)op;
return 0;
}
#define ARM64_COMMON \
.gpr0 = R0, \
.ngpr = NGPR, \
.fpr0 = V0, \
.nfpr = NFPR, \
.rglob = RGLOB, \
.nrglob = 4, \
.rsave = arm64_rsave, \
.nrsave = {NGPS, NFPS}, \
.retregs = arm64_retregs, \
.argregs = arm64_argregs, \
.memargs = arm64_memargs, \
.isel = arm64_isel, \
.abi1 = arm64_abi, \
.emitfn = arm64_emitfn, \
.cansel = 0, \
Target T_arm64 = {
.name = "arm64",
.abi0 = elimsb,
.emitfin = elf_emitfin,
.asloc = ".L",
ARM64_COMMON
};
Target T_arm64_apple = {
.name = "arm64_apple",
.apple = 1,
.abi0 = apple_extsb,
.emitfin = macho_emitfin,
.asloc = "L",
.assym = "_",
ARM64_COMMON
};
MAKESURE(globals_are_not_arguments,
(RGLOB & (BIT(R8+1) - 1)) == 0
);
MAKESURE(arrays_size_ok,
sizeof arm64_rsave == (NGPS+NFPS+1) * sizeof(int) &&
sizeof arm64_rclob == (NCLR+1) * sizeof(int)
);