forked from GitHub/gf-core
report
This commit is contained in:
@@ -1,16 +1,30 @@
|
||||
abstract Imper = {
|
||||
|
||||
cat
|
||||
Stm ;
|
||||
Program ;
|
||||
Typ ;
|
||||
ListTyp ;
|
||||
Fun ListTyp Typ ;
|
||||
Body ListTyp ;
|
||||
Stm ;
|
||||
Exp Typ ;
|
||||
Var Typ ;
|
||||
ListExp ListTyp ;
|
||||
|
||||
fun
|
||||
Empty : Program ;
|
||||
Funct : (AS : ListTyp) -> (V : Typ) ->
|
||||
(Body AS) -> (Fun AS V -> Program) -> Program ;
|
||||
|
||||
BodyNil : Stm -> Body NilTyp ;
|
||||
BodyCons : (A : Typ) -> (AS : ListTyp) ->
|
||||
(Var A -> Body AS) -> Body (ConsTyp A AS) ;
|
||||
|
||||
Decl : (A : Typ) -> (Var A -> Stm) -> Stm ;
|
||||
Assign : (A : Typ) -> Var A -> Exp A -> Stm -> Stm ;
|
||||
Return : (A : Typ) -> Exp A -> Stm ;
|
||||
While : Exp TInt -> Stm -> Stm -> Stm ;
|
||||
IfElse : Exp TInt -> Stm -> Stm -> Stm -> Stm ;
|
||||
Block : Stm -> Stm -> Stm ;
|
||||
End : Stm ;
|
||||
|
||||
@@ -19,37 +33,22 @@ abstract Imper = {
|
||||
EFloat : Int -> Int -> Exp TFloat ;
|
||||
EAddI : Exp TInt -> Exp TInt -> Exp TInt ;
|
||||
EAddF : Exp TFloat -> Exp TFloat -> Exp TFloat ;
|
||||
ESubI : Exp TInt -> Exp TInt -> Exp TInt ;
|
||||
ESubF : Exp TFloat -> Exp TFloat -> Exp TFloat ;
|
||||
EMulI : Exp TInt -> Exp TInt -> Exp TInt ;
|
||||
EMulF : Exp TFloat -> Exp TFloat -> Exp TFloat ;
|
||||
ELtI : Exp TInt -> Exp TInt -> Exp TInt ;
|
||||
ELtF : Exp TFloat -> Exp TFloat -> Exp TInt ;
|
||||
EApp : (AS : ListTyp) -> (V : Typ) -> Fun AS V -> ListExp AS -> Exp V ;
|
||||
|
||||
TInt : Typ ;
|
||||
TFloat : Typ ;
|
||||
|
||||
cat
|
||||
Program ;
|
||||
Typs ;
|
||||
Fun Typs Typ ;
|
||||
Body Typs ;
|
||||
Exps Typs ;
|
||||
NilTyp : ListTyp ;
|
||||
ConsTyp : Typ -> ListTyp -> ListTyp ;
|
||||
|
||||
fun
|
||||
Empty : Program ;
|
||||
Funct : (AS : Typs) -> (V : Typ) ->
|
||||
(Body AS) -> (Fun V AS -> Program) -> Program ;
|
||||
|
||||
NilTyp : Typs ;
|
||||
ConsTyp : Typ -> Typs -> Typs ;
|
||||
|
||||
BodyNil : Stm -> Body NilTyp ;
|
||||
BodyCons : (A : Typ) -> (AS : Typs) ->
|
||||
(Var A -> Body AS) -> Body (ConsTyp A AS) ;
|
||||
|
||||
EApp : (AS : Typs) -> (V : Typ) -> Fun AS V -> Exps AS -> Exp V ;
|
||||
|
||||
NilExp : Exps NilTyp ;
|
||||
ConsExp : (A : Typ) -> (AS : Typs) ->
|
||||
Exp A -> Exps AS -> Exps (ConsExp A AS) ;
|
||||
NilExp : ListExp NilTyp ;
|
||||
ConsExp : (A : Typ) -> (AS : ListTyp) ->
|
||||
Exp A -> ListExp AS -> ListExp (ConsExp A AS) ;
|
||||
|
||||
}
|
||||
|
||||
@@ -1,58 +1,16 @@
|
||||
--# -path=.:../prelude
|
||||
|
||||
concrete ImperC of Imper = open Prelude, Precedence, ResImper in {
|
||||
|
||||
flags lexer=codevars ; unlexer=code ; startcat=Stm ;
|
||||
|
||||
-- code inside function bodies
|
||||
concrete ImperC of Imper = open ResImper in {
|
||||
flags lexer=codevars ; unlexer=code ; startcat=Stm ;
|
||||
|
||||
lincat
|
||||
Stm = SS ;
|
||||
Typ = SS ;
|
||||
Exp = PrecExp ;
|
||||
Var = SS ;
|
||||
|
||||
lin
|
||||
Decl typ cont = continue (typ.s ++ cont.$0) cont ;
|
||||
Assign _ x exp = continue (x.s ++ "=" ++ ex exp) ;
|
||||
Return _ exp = statement ("return" ++ ex exp) ;
|
||||
While exp loop = continue ("while" ++ paren (ex exp) ++ loop.s) ;
|
||||
Block stm = continue ("{" ++ stm.s ++ "}") ;
|
||||
End = statement [] ;
|
||||
|
||||
EVar _ x = constant x.s ;
|
||||
EInt n = constant n.s ;
|
||||
EFloat a b = constant (a.s ++ "." ++ b.s) ;
|
||||
EMulI = infixL p3 "*" ;
|
||||
EMulF = infixL p3 "*" ;
|
||||
EAddI = infixL p2 "+" ;
|
||||
EAddF = infixL p2 "+" ;
|
||||
ELtI = infixN p1 "<" ;
|
||||
ELtF = infixN p1 "<" ;
|
||||
|
||||
TInt = ss "int" ;
|
||||
TFloat = ss "float" ;
|
||||
|
||||
-- top-level code consisting of function definitions
|
||||
|
||||
lincat
|
||||
Program = SS ;
|
||||
Typs = SS ;
|
||||
Fun = SS ;
|
||||
Body = {s,s2 : Str ; size : Size} ;
|
||||
Exps = {s : Str ; size : Size} ;
|
||||
ListExp = {s : Str ; size : Size} ;
|
||||
|
||||
lin
|
||||
Empty = ss [] ;
|
||||
Funct args val body cont = ss (
|
||||
val.s ++ cont.$0 ++ paren body.s2 ++ "{" ++
|
||||
body.s ++
|
||||
"}" ++ ";" ++
|
||||
cont.s
|
||||
) ;
|
||||
|
||||
NilTyp = ss [] ;
|
||||
ConsTyp = cc2 ;
|
||||
body.s ++ "}" ++ ";" ++ cont.s) ;
|
||||
|
||||
BodyNil stm = stm ** {s2 = [] ; size = Zero} ;
|
||||
BodyCons typ _ body = {
|
||||
@@ -61,8 +19,29 @@ flags lexer=codevars ; unlexer=code ; startcat=Stm ;
|
||||
size = nextSize body.size
|
||||
} ;
|
||||
|
||||
Decl typ cont = continues (typ.s ++ cont.$0) cont ;
|
||||
Assign _ x exp = continues (x.s ++ "=" ++ ex exp) ;
|
||||
Return _ exp = statement ("return" ++ ex exp) ;
|
||||
While exp loop = continue ("while" ++ paren (ex exp) ++ loop.s) ;
|
||||
IfElse exp t f = continue ("if" ++ paren (ex exp) ++ t.s ++ "else" ++ f.s) ;
|
||||
Block stm = continue ("{" ++ stm.s ++ "}") ;
|
||||
End = ss [] ;
|
||||
|
||||
EVar _ x = constant x.s ;
|
||||
EInt n = constant n.s ;
|
||||
EFloat a b = constant (a.s ++ "." ++ b.s) ;
|
||||
EMulI, EMulF = infixL P2 "*" ;
|
||||
EAddI, EAddF = infixL P1 "+" ;
|
||||
ESubI, ESubF = infixL P1 "-" ;
|
||||
ELtI, ELtF = infixN P0 "<" ;
|
||||
|
||||
EApp args val f exps = constant (f.s ++ paren exps.s) ;
|
||||
|
||||
TInt = ss "int" ;
|
||||
TFloat = ss "float" ;
|
||||
NilTyp = ss [] ;
|
||||
ConsTyp = cc2 ;
|
||||
|
||||
NilExp = ss [] ** {size = Zero} ;
|
||||
ConsExp _ _ e es = {
|
||||
s = ex e ++ separator "," es.size ++ es.s ;
|
||||
|
||||
@@ -1,17 +1,27 @@
|
||||
--# -path=.:../prelude
|
||||
|
||||
concrete ImperJVM of Imper = open Prelude, Precedence, ResImper in {
|
||||
concrete ImperJVM of Imper = open ResImper in {
|
||||
|
||||
flags lexer=codevars ; unlexer=code ; startcat=Stm ;
|
||||
|
||||
lincat
|
||||
Body = {s,s2 : Str} ; -- code, storage for locals
|
||||
Stm = Instr ;
|
||||
Typ = SS ;
|
||||
Exp = SS ;
|
||||
Var = SS ;
|
||||
|
||||
lin
|
||||
Decl typ cont = instrc (
|
||||
"alloc_" ++ typ.s ++ cont.$0
|
||||
Empty = ss [] ;
|
||||
Funct args val body rest = ss (
|
||||
".method" ++ rest.$0 ++ paren args.s ++ val.s ++ ";" ++
|
||||
".limit" ++ "locals" ++ body.s2 ++ ";" ++
|
||||
".limit" ++ "stack" ++ "1000" ++ ";" ++
|
||||
body.s ++
|
||||
".end" ++ "method" ++ ";" ++
|
||||
rest.s
|
||||
) ;
|
||||
BodyNil stm = stm ;
|
||||
BodyCons a as body = instrb a.s (
|
||||
"alloc" ++ a.s ++ body.$0 ++ body.s2) (body ** {s3 = []});
|
||||
|
||||
Decl typ cont = instrb typ.s (
|
||||
"alloc" ++ typ.s ++ cont.$0
|
||||
) cont ;
|
||||
Assign t x exp = instrc (
|
||||
exp.s ++
|
||||
@@ -20,26 +30,56 @@ flags lexer=codevars ; unlexer=code ; startcat=Stm ;
|
||||
Return t exp = instr (
|
||||
exp.s ++
|
||||
t.s ++ "_return") ;
|
||||
While exp loop = instrc (
|
||||
"TEST:" ++ exp.s ++
|
||||
"ifzero_goto" ++ "END" ++ ";" ++
|
||||
loop.s ++
|
||||
"END"
|
||||
) ;
|
||||
While exp loop =
|
||||
let
|
||||
test = "TEST_" ++ loop.s2 ;
|
||||
end = "END_" ++ loop.s2
|
||||
in instrl (
|
||||
test ++ ";" ++
|
||||
exp.s ++
|
||||
"ifzero" ++ end ++ ";" ++
|
||||
loop.s ++
|
||||
"goto" ++ test ++ ";" ++
|
||||
end
|
||||
) ;
|
||||
IfElse exp t f =
|
||||
let
|
||||
false = "FALSE_" ++ t.s2 ++ f.s2 ;
|
||||
true = "TRUE_" ++ t.s2 ++ f.s2
|
||||
in instrl (
|
||||
exp.s ++
|
||||
"ifzero" ++ false ++ ";" ++
|
||||
t.s ++
|
||||
"goto" ++ true ++ ";" ++
|
||||
false ++ ";" ++
|
||||
f.s ++
|
||||
true
|
||||
) ;
|
||||
Block stm = instrc stm.s ;
|
||||
End = ss [] ** {s3 = []} ;
|
||||
End = ss [] ** {s2,s3 = []} ;
|
||||
|
||||
EVar t x = instr (t.s ++ "_load" ++ x.s) ;
|
||||
EInt n = instr ("ipush" ++ n.s) ;
|
||||
EFloat a b = instr ("fpush" ++ a.s ++ "." ++ b.s) ;
|
||||
EAddI = binop "iadd" ;
|
||||
EAddF = binop "fadd" ;
|
||||
ESubI = binop "isub" ;
|
||||
ESubF = binop "fsub" ;
|
||||
EMulI = binop "imul" ;
|
||||
EMulF = binop "fmul" ;
|
||||
ELtI = binop ("call" ++ "ilt") ;
|
||||
ELtF = binop ("call" ++ "flt") ;
|
||||
EApp args val f exps = instr (
|
||||
exps.s ++
|
||||
"invoke" ++ f.s ++ paren args.s ++ val.s
|
||||
) ;
|
||||
|
||||
TInt = ss "i" ;
|
||||
TFloat = ss "f" ;
|
||||
TInt = ss "i" ;
|
||||
TFloat = ss "f" ;
|
||||
|
||||
NilTyp = ss [] ;
|
||||
ConsTyp = cc2 ;
|
||||
|
||||
NilExp = ss [] ;
|
||||
ConsExp _ _ = cc2 ;
|
||||
}
|
||||
|
||||
20
examples/gfcc/JVM.hs
Normal file
20
examples/gfcc/JVM.hs
Normal file
@@ -0,0 +1,20 @@
|
||||
module JVM where
|
||||
|
||||
mkJVM :: String -> String
|
||||
mkJVM = unlines . reverse . fst . foldl trans ([],([],0)) . lines where
|
||||
trans (code,(env,v)) s = case words s of
|
||||
".method":f:ns -> ((".method " ++ f ++ concat ns):code,([],0))
|
||||
"alloc":t:x:_ -> (code, ((x,v):env, v + size t))
|
||||
".limit":"locals":ns -> chCode (".limit locals " ++ show (length ns - 1))
|
||||
t:"_load" :x:_ -> chCode (t ++ "load " ++ look x)
|
||||
t:"_store":x:_ -> chCode (t ++ "store " ++ look x)
|
||||
t:"_return":_ -> chCode (t ++ "return")
|
||||
"goto":ns -> chCode ("goto " ++ concat ns)
|
||||
"ifzero":ns -> chCode ("ifzero " ++ concat ns)
|
||||
_ -> chCode s
|
||||
where
|
||||
chCode c = (c:code,(env,v))
|
||||
look x = maybe (x ++ show env) show $ lookup x env
|
||||
size t = case t of
|
||||
"d" -> 2
|
||||
_ -> 1
|
||||
@@ -1,16 +1,46 @@
|
||||
resource ResImper = open Prelude, Precedence in {
|
||||
resource ResImper = {
|
||||
|
||||
-- precedence
|
||||
|
||||
param
|
||||
Prec = P0 | P1 | P2 | P3 ;
|
||||
oper
|
||||
PrecExp : Type = {s : PrecTerm} ;
|
||||
continue : Str -> SS -> SS = \s -> infixSS ";" (ss s);
|
||||
statement : Str -> SS = \s -> postfixSS ";" (ss s);
|
||||
ex : PrecExp -> Str = \exp -> exp.s ! p0 ;
|
||||
infixL : Prec -> Str -> PrecExp -> PrecExp -> PrecExp =
|
||||
\p,h,x,y -> {s = mkInfixL h p x.s y.s} ;
|
||||
infixN : Prec -> Str -> PrecExp -> PrecExp -> PrecExp =
|
||||
\p,h,x,y -> {s = mkInfix h p x.s y.s} ;
|
||||
PrecExp : Type = {s : Prec => Str} ;
|
||||
ex : PrecExp -> Str = \exp -> exp.s ! P0 ;
|
||||
constant : Str -> PrecExp = \c -> {s = \\_ => c} ;
|
||||
infixN : Prec -> Str -> PrecExp -> PrecExp -> PrecExp = \p,f,x,y ->
|
||||
{s = \\k => mkPrec (x.s ! (nextPrec ! p) ++ f ++ y.s ! (nextPrec ! p)) ! p ! k} ;
|
||||
infixL : Prec -> Str -> PrecExp -> PrecExp -> PrecExp = \p,f,x,y ->
|
||||
{s = mkPrec (x.s ! p ++ f ++ y.s ! (nextPrec ! p)) ! p} ;
|
||||
|
||||
constant : Str -> PrecExp = \c -> {s = mkConst c} ;
|
||||
nextPrec : Prec => Prec = table {P0 => P1 ; P1 => P2 ; _ => P3} ;
|
||||
mkPrec : Str -> Prec => Prec => Str = \str -> table {
|
||||
P3 => table { -- use the term of precedence P3...
|
||||
_ => str} ; -- ...always without parentheses
|
||||
P2 => table { -- use the term of precedence P2...
|
||||
P3 => paren str ; -- ...in parentheses if P3 is expected...
|
||||
_ => str} ; -- ...otherwise without parentheses
|
||||
P1 => table {
|
||||
P3 | P2 => paren str ;
|
||||
_ => str} ;
|
||||
P0 => table {
|
||||
P0 => str ;
|
||||
_ => paren str}
|
||||
} ;
|
||||
|
||||
-- string operations
|
||||
|
||||
SS : Type = {s : Str} ;
|
||||
ss : Str -> SS = \s -> {s = s} ;
|
||||
cc2 : (_,_ : SS) -> SS = \x,y -> ss (x.s ++ y.s) ;
|
||||
|
||||
paren : Str -> Str = \str -> "(" ++ str ++ ")" ;
|
||||
|
||||
continues : Str -> SS -> SS = \s,t -> ss (s ++ ";" ++ t.s) ;
|
||||
continue : Str -> SS -> SS = \s,t -> ss (s ++ t.s) ;
|
||||
statement : Str -> SS = \s -> ss (s ++ ";");
|
||||
|
||||
-- taking cases of list size
|
||||
|
||||
param
|
||||
Size = Zero | One | More ;
|
||||
@@ -24,11 +54,17 @@ resource ResImper = open Prelude, Precedence in {
|
||||
_ => t
|
||||
} ;
|
||||
|
||||
-- for JVM
|
||||
Instr : Type = {s, s3 : Str} ; -- code, labels
|
||||
instr : Str -> Instr = \s -> statement s ** {s3 = []} ; ----
|
||||
instrc : Str -> Instr -> Instr = \s,i -> statement (s ++ i.s) ** {s3 = i.s3} ; ----
|
||||
-- operations for JVM
|
||||
|
||||
Instr : Type = {s,s2,s3 : Str} ; -- code, variables, labels
|
||||
instr : Str -> Instr = \s ->
|
||||
statement s ** {s2,s3 = []} ;
|
||||
instrc : Str -> Instr -> Instr = \s,i ->
|
||||
ss (s ++ ";" ++ i.s) ** {s2 = i.s2 ; s3 = i.s3} ;
|
||||
instrl : Str -> Instr -> Instr = \s,i ->
|
||||
ss (s ++ ";" ++ i.s) ** {s2 = i.s2 ; s3 = "L" ++ i.s3} ;
|
||||
instrb : Str -> Str -> Instr -> Instr = \v,s,i ->
|
||||
ss (s ++ ";" ++ i.s) ** {s2 = v ++ i.s2 ; s3 = i.s3} ;
|
||||
binop : Str -> SS -> SS -> SS = \op, x, y ->
|
||||
ss (x.s ++ y.s ++ op ++ ";") ;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,11 +20,12 @@
|
||||
\newcommand{\heading}[1]{\subsection{#1}}
|
||||
\newcommand{\explanation}[1]{{\small #1}}
|
||||
\newcommand{\empha}[1]{{\em #1}}
|
||||
\newcommand{\rarrow}{\; \rightarrow\;}
|
||||
|
||||
\newcommand{\nocolor}{} %% {\color[rgb]{0,0,0}}
|
||||
|
||||
|
||||
\title{{\bf Single-Source Language Definitions and Compilation as Linearization}}
|
||||
\title{{\bf Single-Source Language Definitions and Code Generation as Linearization}}
|
||||
|
||||
\author{Aarne Ranta \\
|
||||
Department of Computing Science \\
|
||||
@@ -38,9 +39,72 @@
|
||||
|
||||
\section{Introduction}
|
||||
|
||||
In this paper, we will describe a compiler that translates a
|
||||
subset of C into JVM-like byte code. The compiler has a number of
|
||||
unusual, yet attractive features:
|
||||
The experiment reported in this paper was prompted by a challenge
|
||||
posted by Lennart Augustsson to the participants of the workshop
|
||||
on Dependent Types in Programming held at Dagstuhl in September 2004.
|
||||
The challenge was to use dependent types to write a compiler from
|
||||
C to bytecode. This paper does not meet the challenge quite literally,
|
||||
since our compiler is for a different subset of C than Augustsson's
|
||||
specification, and since the bytecode that we generate is JVM instead
|
||||
of his format.
|
||||
|
||||
Augustsson's challenge did not specify \textit{how} dependent
|
||||
types are to be used, and the first of the two points we make in this
|
||||
paper is how we interpreted the use of dependent types:
|
||||
we use dependent types in combination with higher-order abstract syntax (HOAS)
|
||||
to define the grammar of the source language (here, the fragment of C).
|
||||
This grammar constitutes a single, declarative source, from which
|
||||
the compiler front end is derived, comprising both parser and type
|
||||
checker.
|
||||
|
||||
The complete code of the compiler is 300 lines. It is found in
|
||||
the appendices of this paper.
|
||||
|
||||
|
||||
\section{The Grammatical Framework}
|
||||
|
||||
The second point mentioned in the title,
|
||||
code generation as linearization, was suggested by
|
||||
the tool we have used for implementing the grammar:
|
||||
the \empha{Grammatical Framework} \cite{gf-jfp}. GF
|
||||
is similar to a Logical Framework (LF) extended with
|
||||
a notation for defining concrete syntax. GF was originally
|
||||
designed to help building multilingual
|
||||
translation systems for natural languages and also
|
||||
between formal and natural languages. The translation model
|
||||
implemented by GF is very simple:
|
||||
\begin{verbatim}
|
||||
linearization
|
||||
--------------->
|
||||
Abstract Syntax Language_i
|
||||
<---------------
|
||||
parsing
|
||||
\end{verbatim}
|
||||
An abstract syntax is similar to a \empha{theory} ,or a
|
||||
\empha{signature} in a logical framework. A
|
||||
concrete syntax defines, in a declarative way,
|
||||
a translation of abstract syntax trees (well-formed terms)
|
||||
into concrete language structures, and this definition can
|
||||
be used for both linearization and parsing.
|
||||
The number of languages related to one abstract syntax in
|
||||
this way is unlimited. If just one language is involved, GF
|
||||
works much the same way as any grammar formalism. The largest
|
||||
application known to us links 88 languages and translates
|
||||
numeral expressions between them.
|
||||
|
||||
From the GF point of view, the goal of the experiment
|
||||
is to investigate if GF is capable of implementing
|
||||
compilers using the ideas of single-source language definition
|
||||
and code generation as linearization. The working hypothesis
|
||||
was that it \textit{is} capable but inconvenient, and that,
|
||||
working out a complete example, we would find out what
|
||||
should be done to extend GF into a compiler construction tool.
|
||||
|
||||
|
||||
\subsection{Advantages and disadvantages}
|
||||
|
||||
Due to the way in which it is built, our compiler has
|
||||
a number of unusual, yet attractive features:
|
||||
\bequ
|
||||
The front end is defined by a grammar of C as its single source.
|
||||
|
||||
@@ -59,45 +123,7 @@ JVM code back into C code.
|
||||
The language has an interactive editor that also supports incremental
|
||||
compilation.
|
||||
\enqu
|
||||
The theoretical ideas making this kind of a compiler possible
|
||||
are familiar from various sources.
|
||||
The grammar that is
|
||||
powerful enough to enable a single-source language definition
|
||||
uses \empha{dependent types} and \empha{higher-order abstract syntax}
|
||||
in the same way as \empha{logical frameworks} \cite{harper,ALF,twelf}.
|
||||
The very idea of using a common abstract syntax for different
|
||||
languages was clearly exposed in \cite{landin}. The view of
|
||||
code generation as linearization is a central aspect of
|
||||
the classic compiler textbook \cite{aho-ullman}. The use
|
||||
of the same grammar both for parsing and linearization
|
||||
is a guiding principle of unification-based linguistic grammar
|
||||
formalisms \cite{pereira}. Interactive editors derived from
|
||||
grammars have been used in various programming and proof
|
||||
assistants \cite{teitelbaum-reps,metal,ALF}.
|
||||
|
||||
Even though the different ideas are well-known, they are
|
||||
applied less in practive than in theory. In particular,
|
||||
we have not seen them used together to construct a complete
|
||||
compiler. In our view, putting these ideas together is
|
||||
a satisfactory approach to compiling, since a compiler written
|
||||
in this way is completele declarative and therefore easy to
|
||||
modify and to port. It is also self-documenting. since the
|
||||
human-readable grammar defines the syntax and static
|
||||
semantics that is actually used in the implementation.
|
||||
|
||||
The tool that we have used for writing our compiler is GF, the
|
||||
\empha{Grammatical Framework} \cite{gf-jfp}. GF
|
||||
is a grammar formalism designed to help building multilingual
|
||||
translation systems for natural languages and also
|
||||
between formal and natural languages. One goal of this work
|
||||
has been to investigate if GF is capable of implementing
|
||||
compilers using the ideas of single-source language definition
|
||||
and code generation as linearization. The working hypothesis
|
||||
was that it \textit{is} capable but inconvenient, and that,
|
||||
working out a complete example, we would find out what
|
||||
should be done to extend GF into a compiler construction tool.
|
||||
|
||||
The various shortcomings and their causes will be explained in
|
||||
The problems that we have encountered and their causes will be explained in
|
||||
the relevant sections of this report. To summarize,
|
||||
\bequ
|
||||
The scoping conditions resulting from HOAS are slightly different
|
||||
@@ -110,14 +136,827 @@ Using HOAS to encode bindings of functions is somewhat cumbersome.
|
||||
The C parser derived from the GF grammar does not recognize all
|
||||
valid programs.
|
||||
\enqu
|
||||
The first two shortcomings seem to us inherent to the techniques
|
||||
The first two shortcomings seem to be inevitable with the technique
|
||||
we use. The real JVM syntax, however, is easy to obtain by simple
|
||||
string processing from our one. The latter two shortcomings
|
||||
suggest that GF should be fine-tuned to give better support
|
||||
to compiler construction, which, after all is not an intended
|
||||
to compiler construction, which, after all, is not an intended
|
||||
use of GF as it is now.
|
||||
|
||||
|
||||
\section{The abstract syntax}
|
||||
|
||||
An abstract syntax in GF consists of \texttt{cat} judgements
|
||||
declaring basic types, and \texttt{fun} judgements declaring
|
||||
functions. Syntax trees are well-formed terms of basic
|
||||
types. As for notation, each judgement form is recognized by
|
||||
its keyword, and the same keyword governs all judgements
|
||||
until the next keyword is encountered.
|
||||
|
||||
|
||||
\subsection{Statements}
|
||||
|
||||
Statements in C may involve variables, expressions, and
|
||||
other statements.
|
||||
The following \texttt{cat} judgements of GF define the syntactic categories
|
||||
that are needed to construct statements
|
||||
\begin{verbatim}
|
||||
cat
|
||||
Stm ;
|
||||
Typ ;
|
||||
Exp Typ ;
|
||||
Var Typ ;
|
||||
\end{verbatim}
|
||||
The type \texttt{Typ} is the type of C's datatypes.
|
||||
Expressions (\texttt{Exp}) is a dependent type: we
|
||||
will give rules to construct well-typed expressions of
|
||||
a given type. \texttt{Var}\ is the type of variables,
|
||||
of a given type, that get bound in C's variable
|
||||
declarations.
|
||||
|
||||
Let us start with the simplest kind of statements:
|
||||
declarations and assignments. The following \texttt{fun}
|
||||
rules define their abstract syntax:
|
||||
\begin{verbatim}
|
||||
fun
|
||||
Decl : (A : Typ) -> (Var A -> Stm) -> Stm ;
|
||||
Assign : (A : Typ) -> Var A -> Exp A -> Stm -> Stm ;
|
||||
\end{verbatim}
|
||||
Dependent types are used in \texttt{Assign} to
|
||||
control that a variable is always assigned a value of proper
|
||||
type. The \texttt{Decl}\ function captures the rule that
|
||||
a variable must be declared before it can be assigned to:
|
||||
its second argument is a \empha{continuation}, which is
|
||||
the sequence of statements that depend on (= may refer to)
|
||||
the declared variable.
|
||||
|
||||
We will treat all statements, except
|
||||
\texttt{return}s, in terms of continuations. A sequence of
|
||||
statements (which always has the type \texttt{Stm}) thus
|
||||
always ends in a \texttt{return}, or, abruptly, in
|
||||
an empty statement. Here are rules for some other
|
||||
statement forms:
|
||||
\begin{verbatim}
|
||||
Return : (A : Typ) -> Exp A -> Stm ;
|
||||
While : Exp TInt -> Stm -> Stm -> Stm ;
|
||||
IfElse : Exp TInt -> Stm -> Stm -> Stm -> Stm ;
|
||||
Block : Stm -> Stm -> Stm ;
|
||||
End : Stm ;
|
||||
\end{verbatim}
|
||||
Here is an example of a piece of code and its abstract syntax.
|
||||
\begin{verbatim}
|
||||
int x ; Decl TInt (\x ->
|
||||
x = 5 ; Assign TInt x (EInt 5) (
|
||||
return x ; Return TInt (EVar TInt x)))
|
||||
\end{verbatim}
|
||||
(Expression syntax is explained in the next section.)
|
||||
|
||||
|
||||
\subsection{Expressions and types}
|
||||
|
||||
We consider two C types: integers and floats.
|
||||
\begin{verbatim}
|
||||
TInt : Typ ;
|
||||
TFloat : Typ ;
|
||||
\end{verbatim}
|
||||
Well-typed expressions can be built from constants,
|
||||
from variables, and by means of binary operations.
|
||||
\begin{verbatim}
|
||||
EVar : (A : Typ) -> Var A -> Exp A ;
|
||||
EInt : Int -> Exp TInt ;
|
||||
EFloat : Int -> Int -> Exp TFloat ;
|
||||
EAddI : Exp TInt -> Exp TInt -> Exp TInt ;
|
||||
EAddF : Exp TFloat -> Exp TFloat -> Exp TFloat ;
|
||||
ESubI : Exp TInt -> Exp TInt -> Exp TInt ;
|
||||
ESubF : Exp TFloat -> Exp TFloat -> Exp TFloat ;
|
||||
EMulI : Exp TInt -> Exp TInt -> Exp TInt ;
|
||||
EMulF : Exp TFloat -> Exp TFloat -> Exp TFloat ;
|
||||
ELtI : Exp TInt -> Exp TInt -> Exp TInt ;
|
||||
ELtF : Exp TFloat -> Exp TFloat -> Exp TInt ;
|
||||
\end{verbatim}
|
||||
Notice that GF has a built-in type \texttt{Int} of
|
||||
integer literals, but floats are constructed by hand.
|
||||
|
||||
Yet another expression for are function calls. To this
|
||||
end, we need a notions of (user-defined) functions and
|
||||
argument lists. The type of functions depends on an
|
||||
argument type list and a value type. Expression lists
|
||||
are formed to match type lists.
|
||||
\begin{verbatim}
|
||||
cat
|
||||
ListTyp ;
|
||||
Fun ListTyp Typ ;
|
||||
ListExp ListTyp ;
|
||||
|
||||
fun
|
||||
EApp : (AS : ListTyp) -> (V : Typ) ->
|
||||
Fun AS V -> ListExp AS -> Exp V ;
|
||||
|
||||
NilTyp : ListTyp ;
|
||||
ConsTyp : Typ -> ListTyp -> ListTyp ;
|
||||
|
||||
NilExp : ListExp NilTyp ;
|
||||
ConsExp : (A : Typ) -> (AS : ListTyp) ->
|
||||
Exp A -> ListExp AS -> ListExp (ConsExp A AS) ;
|
||||
\end{verbatim}
|
||||
|
||||
|
||||
|
||||
\subsection{Functions}
|
||||
|
||||
On the top level, a program is a sequence of functions.
|
||||
Each function may refer to functions defined earlier
|
||||
in the program. The idea to express the binding of
|
||||
function symbols with HOAS is analogous to the binding
|
||||
of variables in statements, using a continuation.
|
||||
\begin{verbatim}
|
||||
cat
|
||||
Program ;
|
||||
Body ListTyp ;
|
||||
|
||||
fun
|
||||
Empty : Program ;
|
||||
Funct : (AS : ListTyp) -> (V : Typ) ->
|
||||
(Body AS) -> (Fun AS V -> Program) -> Program ;
|
||||
\end{verbatim}
|
||||
However, here we must also account for the binding of
|
||||
a function's parameters in its body. We could use
|
||||
vectors of variables, in the same way as vectors of
|
||||
expressions are used to give arguments to functions.
|
||||
However, this would lead to the need of cumbersome
|
||||
projection functions when dereferencing the parameters
|
||||
in the function body. A more elegant solution is
|
||||
to use HOAS to build function bodies:
|
||||
\begin{verbatim}
|
||||
BodyNil : Stm -> Body NilTyp ;
|
||||
BodyCons : (A : Typ) -> (AS : ListTyp) ->
|
||||
(Var A -> Body AS) -> Body (ConsTyp A AS) ;
|
||||
\end{verbatim}
|
||||
The end result is an abstract syntax whose relation
|
||||
to concrete syntax is somewhat remote. Here is an example of
|
||||
the code of a function and its abstract syntax:
|
||||
\begin{verbatim}
|
||||
Funct (
|
||||
int abs (int x){ ConsTyp TInt NilTyp) TInt
|
||||
(BodyCons TInt NilTyp (\x ->
|
||||
BodyNil (
|
||||
if (x < 0){ IfElse (ELtI (EVar TInt x) (EInt 0))
|
||||
return 0 - x ; (Block (Return TInt
|
||||
(ESubI (EInt 0) (EVar TInt x)))
|
||||
} End)
|
||||
else return x ; (Return TInt (EVar TInt x)) End)))
|
||||
} (\abs -> Empty)
|
||||
\end{verbatim}
|
||||
Notice, in particular, how far from the function header the
|
||||
name of the function appears in the syntax trees.
|
||||
|
||||
A more serious shortcoming of our way of defining functions
|
||||
is that it does not allow recursion. The reason is simple:
|
||||
the function symbol is only bound in the continuation
|
||||
of the program, not in the function body.
|
||||
|
||||
|
||||
\section{The concrete syntax of C}
|
||||
|
||||
A concrete syntax, for a given abstract syntax,
|
||||
consists of \texttt{lincat} judgements
|
||||
defining the \empha{linearization types} of each category,
|
||||
and \texttt{lin} judgements
|
||||
defining the \empha{linearization functions} of each function
|
||||
in the abstract syntax. The linearization functions are
|
||||
checked to be well-typed with respect the \texttt{lincat}
|
||||
definitions, and the syntax of GF forces them to be \empha{compositional}
|
||||
in the sense that the linearization of a complex tree is always
|
||||
a function of the linearizations of the subtrees. Schematically, if
|
||||
\[
|
||||
f \colon A_{1} \rarrow \cdots \rarrow A_{n} \rarrow A
|
||||
\]
|
||||
then
|
||||
\[
|
||||
\sugmap{f} \colon
|
||||
\sugmap{A_{1}} \rarrow \cdots
|
||||
\rarrow \sugmap{A_{n}} \rarrow \sugmap{A}
|
||||
\]
|
||||
and the linearization algorithm is simply
|
||||
\[
|
||||
\sugmap{(f \; a_{1} \; \ldots \; a_{n})} \; = \;
|
||||
\sugmap{f} \; \sugmap{a_{1}} \; \ldots \; \sugmap{a_{n}}
|
||||
\]
|
||||
using the \sugmap{} notation for both linearization types,
|
||||
linearization functions, and linearizations of trees.
|
||||
|
||||
Because of compositionality, no case analysis on expressions
|
||||
is possible in linearization rules. The values of linearization
|
||||
therefore have to carry information on how they are used in
|
||||
different situations, which means that linearization
|
||||
types are record types instead of just the string type.
|
||||
The simplest record type that is used is
|
||||
\begin{verbatim}
|
||||
{s : Str}
|
||||
\end{verbatim}
|
||||
If the linearization type of a category is not explicitly
|
||||
given by a \texttt{lincat} judgement, this type is
|
||||
used by default.
|
||||
|
||||
|
||||
|
||||
\subsection{Resource modules}
|
||||
|
||||
Resource modules define auxiliary notions that can be
|
||||
used in concrete syntax. These notions include
|
||||
\empha{parameter types} defined by \texttt{param}
|
||||
judgements, and \empha{operations} defined by
|
||||
\texttt{oper} judgements. These judgements are
|
||||
similar to datatype and function definitions
|
||||
in functional programming, with the restriction
|
||||
that parameter types must be finite and operations
|
||||
may not be recursive. It is due to these restrictions that
|
||||
the parsing algorithms are possible to derive from
|
||||
linearization rules.
|
||||
|
||||
The following string operations are useful in almost
|
||||
all grammars. They are actually included in a GF \texttt{Prelude},
|
||||
but are here defined from scratch to make the code shown in
|
||||
the Appendices complete.
|
||||
\begin{verbatim}
|
||||
oper
|
||||
SS : Type = {s : Str} ;
|
||||
ss : Str -> SS = \s -> {s = s} ;
|
||||
cc2 : (_,_ : SS) -> SS = \x,y -> ss (x.s ++ y.s) ;
|
||||
\end{verbatim}
|
||||
|
||||
|
||||
|
||||
\subsection{Expressions}
|
||||
|
||||
We want to be able to recognixe and generate one and the same expression with
|
||||
or without parentheses, depending on whether its precedence level
|
||||
is lower or higher than expected. For instance, a sum used as
|
||||
an operand of multiplication should be in parentheses. We
|
||||
capture this by defining a parameter type of
|
||||
precedence levels. Four levels are enough for the present
|
||||
fragment of C, so we define the auxiliary notions
|
||||
\begin{verbatim}
|
||||
param Prec = P0 | P1 | P2 | P3 ;
|
||||
oper PrecExp : Type = {s : Prec => Str} ;
|
||||
\end{verbatim}
|
||||
in a resource module (see Appendix D), and
|
||||
\begin{verbatim}
|
||||
lincat Exp = PrecExp ;
|
||||
\end{verbatim}
|
||||
in the concrete syntax of C itself. The following auxiliary notions
|
||||
are also used in the concrete syntax of C:
|
||||
\begin{verbatim}
|
||||
oper
|
||||
paren : Str -> Str = \str -> "(" ++ str ++ ")" ;
|
||||
ex : PrecExp -> Str = \exp -> exp.s ! P0 ;
|
||||
constant : Str -> PrecExp = \c -> {s = \\_ => c} ;
|
||||
infixN : Prec -> Str -> PrecExp -> PrecExp -> PrecExp = \p,f,x,y ->
|
||||
{s = mkPrec (x.s ! (nextPrec ! p) ++ f ++ y.s ! (nextPrec ! p)) ! p} ;
|
||||
infixL : Prec -> Str -> PrecExp -> PrecExp -> PrecExp = \p,f,x,y ->
|
||||
{s = mkPrec (x.s ! p ++ f ++ y.s ! (nextPrec ! p)) ! p} ;
|
||||
\end{verbatim}
|
||||
Here are the linearization rules of expressions:
|
||||
\begin{verbatim}
|
||||
lin
|
||||
EVar _ x = constant x.s ;
|
||||
EInt n = constant n.s ;
|
||||
EFloat a b = constant (a.s ++ "." ++ b.s) ;
|
||||
EMulI, EMulF = infixL P2 "*" ;
|
||||
EAddI, EAddF = infixL P1 "+" ;
|
||||
ESubI, ESubF = infixL P1 "-" ;
|
||||
ELtI, ELtF = infixN P0 "<" ;
|
||||
EApp args val f exps = constant (f.s ++ paren exps.s) ;
|
||||
\end{verbatim}
|
||||
A useful addition to GF when fine-tuned for compiler
|
||||
construction might be a hard-coded treatment of
|
||||
precedence---in particular, to permit the addition
|
||||
of superfluous parentheses, which is not allowed by
|
||||
the present grammar.
|
||||
|
||||
|
||||
\subsection{Statements}
|
||||
|
||||
Statements in C have
|
||||
the simplest linearization type, \verb6{s : Str}6.
|
||||
We use a handful of auxiliary operations to regulate
|
||||
the use of semicolons on a high level.
|
||||
\begin{verbatim}
|
||||
oper
|
||||
continues : Str -> SS -> SS = \s,t -> ss (s ++ ";" ++ t.s) ;
|
||||
continue : Str -> SS -> SS = \s,t -> ss (s ++ t.s) ;
|
||||
statement : Str -> SS = \s -> ss (s ++ ";");
|
||||
\end{verbatim}
|
||||
As for declarations, which bind variables, we notice the
|
||||
special projection \verb6.$06 to refet to the bound variable.
|
||||
Technically, terms that are linearized must be in $\eta$-long
|
||||
form, which guarantees that a variable symbol can always be
|
||||
found, and that a field representing it can be added to the
|
||||
linearization record.
|
||||
\begin{verbatim}
|
||||
lin
|
||||
Decl typ cont = continues (typ.s ++ cont.$0) cont ;
|
||||
Assign _ x exp = continues (x.s ++ "=" ++ ex exp) ;
|
||||
Return _ exp = statement ("return" ++ ex exp) ;
|
||||
While exp loop = continue ("while" ++ paren (ex exp) ++ loop.s) ;
|
||||
IfElse exp t f = continue ("if" ++ paren (ex exp) ++ t.s ++ "else" ++ f.s) ;
|
||||
Block stm = continue ("{" ++ stm.s ++ "}") ;
|
||||
End = ss [] ;
|
||||
\end{verbatim}
|
||||
|
||||
|
||||
\subsection{Functions}
|
||||
|
||||
The only new problem presented by functions is the proper
|
||||
distribution of commas in parameter and argument lists.
|
||||
Since compositionality prevents taking cases of list forms, e.g.
|
||||
\begin{verbatim}
|
||||
-- NilExp = ss [] ;
|
||||
-- ConsExp _ _ e NilExp = e ;
|
||||
-- ConsExp _ _ e es = ss (e.s ++ "," ++ es.s) ;
|
||||
\end{verbatim}
|
||||
we have to encode the size of a list in a parameter:
|
||||
\begin{verbatim}
|
||||
param
|
||||
Size = Zero | One | More ;
|
||||
oper
|
||||
nextSize : Size -> Size = \n -> case n of {
|
||||
Zero => One ;
|
||||
_ => More
|
||||
} ;
|
||||
separator : Str -> Size -> Str = \t,n -> case n of {
|
||||
Zero => [] ;
|
||||
_ => t
|
||||
} ;
|
||||
\end{verbatim}
|
||||
For expression lists, we now have
|
||||
\begin{verbatim}
|
||||
lincat
|
||||
ListExp = {s : Str ; size : Size} ;
|
||||
lin
|
||||
NilExp = ss [] ;
|
||||
ConsExp _ _ e es = {
|
||||
s = ex e ++ separator "," es.size ++ es.s ;
|
||||
size = nextSize es.size
|
||||
} ;
|
||||
\end{verbatim}
|
||||
Parameter lists are collected as components of function bodies
|
||||
(because of HOAS), and their size is encoded as yet another
|
||||
component.
|
||||
\begin{verbatim}
|
||||
lincat
|
||||
Body = {s,s2 : Str ; size : Size} ;
|
||||
lin
|
||||
Empty = ss [] ;
|
||||
Funct args val body cont = ss (
|
||||
val.s ++ cont.$0 ++ paren body.s2 ++ "{" ++
|
||||
body.s ++ "}" ++ ";" ++ cont.s) ;
|
||||
|
||||
BodyNil stm = stm ** {s2 = [] ; size = Zero} ;
|
||||
BodyCons typ _ body = {
|
||||
s = body.s ;
|
||||
s2 = typ.s ++ body.$0 ++ separator "," body.size ++ body.s2 ;
|
||||
size = nextSize body.size
|
||||
} ;
|
||||
\end{verbatim}
|
||||
|
||||
|
||||
|
||||
\section{The concrete syntax of JVM}
|
||||
|
||||
JVM syntax is, linguistically, more straightforward than
|
||||
the syntax of C, and could be described by a regular
|
||||
expression. The transition of our abstract syntax to JVM,
|
||||
however, is tricky because variables are replaced by
|
||||
their addresses (relative to the frame pointer), and
|
||||
linearization must maintain a symbol table that permits
|
||||
the lookup of a variable address. As shown in the code
|
||||
in Appendix C, we have not quite succeeded to do this
|
||||
in the generated code. Instead, we generate (non-JVM)
|
||||
\texttt{alloc} instructions and use another pass,
|
||||
written in Haskell (Appendix E), to replace variable
|
||||
symbols by their addresses.
|
||||
|
||||
A related problem is the generation of fresh labels for
|
||||
jumps. We solve this by maintaining a growing label
|
||||
as a field of the linearization of statements into
|
||||
instructions. The problem remains that the two branches
|
||||
in an \texttt{if-else} statement can use the same
|
||||
labels. Making them unique will have to be
|
||||
added to the post-processing pass. This is, however,
|
||||
always possible, because labels are nested in a
|
||||
disciplined way.
|
||||
|
||||
As it turned out too laborious to thread the label counter
|
||||
to expressions, we decided to compile comparison
|
||||
expressions into method calls, which should be provided
|
||||
by a run-time library.
|
||||
|
||||
The JVM syntax used is from the Jasmin assembler
|
||||
\cite{jasmin}, with small deviation which will
|
||||
be removed shortly.
|
||||
|
||||
|
||||
\subsection{A code example}
|
||||
|
||||
Here is a C source program, the JVM code obtained by linearization, and
|
||||
the postprocessed JVM code.
|
||||
\small
|
||||
\begin{verbatim}
|
||||
int abs (int x){ .method abs (i)i ;
|
||||
if (x < 0){ .limit locals i ; .method abs(i)i;
|
||||
return 0 - x ; .limit stack 1000 ; .limit locals 1
|
||||
} alloc i x ; .limit stack 1000 ;
|
||||
else return x ; i _load x ; iload 0
|
||||
} ; ipush 0 ; ipush 0 ;
|
||||
int main () { call ilt ; call ilt ;
|
||||
int i ; ifzero FALSE_ ; ifzero FALSE_;
|
||||
i = abs (16); ipush 0 ; ipush 0 ;
|
||||
} ; i _load x ; iload 0
|
||||
isub ; isub ;
|
||||
i _return ; ireturn
|
||||
; ;
|
||||
goto TRUE_ ; goto TRUE_;
|
||||
FALSE_ ; FALSE_ ;
|
||||
i _load x ; iload 0
|
||||
i _return ; ireturn
|
||||
TRUE_ ; TRUE_ ;
|
||||
.end method ; .end method ;
|
||||
.method main () i ; .method main()i;
|
||||
.limit locals i ; .limit locals 1
|
||||
.limit stack 1000 ; .limit stack 1000 ;
|
||||
alloc i i ; ipush 16 ;
|
||||
ipush 16 ; invoke abs (i)i ;
|
||||
invoke abs (i)i ; istore 0
|
||||
i _store i ; .end method ;
|
||||
.end method ;
|
||||
\end{verbatim}
|
||||
\normalsize
|
||||
|
||||
|
||||
\section{Related work}
|
||||
|
||||
The theoretical ideas behind our compiler experiment
|
||||
are familiar from various sources.
|
||||
Building single-source language definitions with
|
||||
dependent types and higher-order abstract syntax
|
||||
has been studied in various logical frameworks
|
||||
\cite{harper-honsell,magnusson-nordstr,twelf}.
|
||||
The idea of using a common abstract syntax for different
|
||||
languages was clearly exposed in \cite{landin}. The view of
|
||||
code generation as linearization is a central aspect of
|
||||
the classic compiler textbook \cite{aho-ullman}. The use
|
||||
of the same grammar both for parsing and linearization
|
||||
is a guiding principle of unification-based linguistic grammar
|
||||
formalisms \cite{pereira}. Interactive editors derived from
|
||||
grammars have been used in various programming and proof
|
||||
assistants \cite{teitelbaum,metal,ALF}.
|
||||
|
||||
Even though the different ideas are well-known, they are
|
||||
applied less in practice than in theory. In particular,
|
||||
we have not seen them used together to construct a complete
|
||||
compiler. In our view, putting these ideas together is
|
||||
an attractive approach to compiling, since a compiler written
|
||||
in this way is completely declarative, and therefore concise,
|
||||
and therefore easy to modify and extend. For instance, if
|
||||
a new language construct is added, the GF type checker
|
||||
verifies that the addition is propagated to all components
|
||||
of the compiler. As the implementation is declarative,
|
||||
it is also self-documenting, since a human-readable
|
||||
grammar defines the syntax and static
|
||||
semantics that is actually used in the implementation.
|
||||
|
||||
|
||||
\section{Conclusion}
|
||||
|
||||
We managed to compile a large subset of C, and growing it
|
||||
does not necessarily pose any new kinds of problems.
|
||||
|
||||
|
||||
\bibliographystyle{plain}
|
||||
|
||||
\bibliography{gf-bib}
|
||||
|
||||
|
||||
\newpage
|
||||
\section*{Appendix A: The abstract syntax}
|
||||
|
||||
\small
|
||||
\begin{verbatim}
|
||||
abstract Imper = {
|
||||
|
||||
cat
|
||||
Program ;
|
||||
Typ ;
|
||||
ListTyp ;
|
||||
Fun ListTyp Typ ;
|
||||
Body ListTyp ;
|
||||
Stm ;
|
||||
Exp Typ ;
|
||||
Var Typ ;
|
||||
ListExp ListTyp ;
|
||||
|
||||
fun
|
||||
Empty : Program ;
|
||||
Funct : (AS : ListTyp) -> (V : Typ) ->
|
||||
(Body AS) -> (Fun AS V -> Program) -> Program ;
|
||||
BodyNil : Stm -> Body NilTyp ;
|
||||
BodyCons : (A : Typ) -> (AS : ListTyp) ->
|
||||
(Var A -> Body AS) -> Body (ConsTyp A AS) ;
|
||||
|
||||
Decl : (A : Typ) -> (Var A -> Stm) -> Stm ;
|
||||
Assign : (A : Typ) -> Var A -> Exp A -> Stm -> Stm ;
|
||||
Return : (A : Typ) -> Exp A -> Stm ;
|
||||
While : Exp TInt -> Stm -> Stm -> Stm ;
|
||||
IfElse : Exp TInt -> Stm -> Stm -> Stm -> Stm ;
|
||||
Block : Stm -> Stm -> Stm ;
|
||||
End : Stm ;
|
||||
|
||||
EVar : (A : Typ) -> Var A -> Exp A ;
|
||||
EInt : Int -> Exp TInt ;
|
||||
EFloat : Int -> Int -> Exp TFloat ;
|
||||
ELtI : Exp TInt -> Exp TInt -> Exp TInt ;
|
||||
ELtF : Exp TFloat -> Exp TFloat -> Exp TInt ;
|
||||
EApp : (AS : ListTyp) -> (V : Typ) -> Fun AS V -> ListExp AS -> Exp V ;
|
||||
EAddI, EMulI, ESubI : Exp TInt -> Exp TInt -> Exp TInt ;
|
||||
EAddF, EMulF, ESubF : Exp TFloat -> Exp TFloat -> Exp TFloat ;
|
||||
|
||||
TInt, TFloat : Typ ;
|
||||
|
||||
NilTyp : ListTyp ;
|
||||
ConsTyp : Typ -> ListTyp -> ListTyp ;
|
||||
|
||||
NilExp : ListExp NilTyp ;
|
||||
ConsExp : (A : Typ) -> (AS : ListTyp) ->
|
||||
Exp A -> ListExp AS -> ListExp (ConsExp A AS) ;
|
||||
}
|
||||
\end{verbatim}
|
||||
\normalsize
|
||||
\newpage
|
||||
|
||||
|
||||
\section*{Appendix B: The concrete syntax of C}
|
||||
|
||||
\small
|
||||
\begin{verbatim}
|
||||
concrete ImperC of Imper = open ResImper in {
|
||||
flags lexer=codevars ; unlexer=code ; startcat=Stm ;
|
||||
|
||||
lincat
|
||||
Exp = PrecExp ;
|
||||
Body = {s,s2 : Str ; size : Size} ;
|
||||
ListExp = {s : Str ; size : Size} ;
|
||||
|
||||
lin
|
||||
Empty = ss [] ;
|
||||
Funct args val body cont = ss (
|
||||
val.s ++ cont.$0 ++ paren body.s2 ++ "{" ++
|
||||
body.s ++ "}" ++ ";" ++ cont.s) ;
|
||||
BodyNil stm = stm ** {s2 = [] ; size = Zero} ;
|
||||
BodyCons typ _ body = {
|
||||
s = body.s ;
|
||||
s2 = typ.s ++ body.$0 ++ separator "," body.size ++ body.s2 ;
|
||||
size = nextSize body.size
|
||||
} ;
|
||||
|
||||
Decl typ cont = continues (typ.s ++ cont.$0) cont ;
|
||||
Assign _ x exp = continues (x.s ++ "=" ++ ex exp) ;
|
||||
Return _ exp = statement ("return" ++ ex exp) ;
|
||||
While exp loop = continue ("while" ++ paren (ex exp) ++ loop.s) ;
|
||||
IfElse exp t f = continue ("if" ++ paren (ex exp) ++ t.s ++ "else" ++ f.s) ;
|
||||
Block stm = continue ("{" ++ stm.s ++ "}") ;
|
||||
End = ss [] ;
|
||||
|
||||
EVar _ x = constant x.s ;
|
||||
EInt n = constant n.s ;
|
||||
EFloat a b = constant (a.s ++ "." ++ b.s) ;
|
||||
EMulI, EMulF = infixL P2 "*" ;
|
||||
EAddI, EAddF = infixL P1 "+" ;
|
||||
ESubI, ESubF = infixL P1 "-" ;
|
||||
ELtI, ELtF = infixN P0 "<" ;
|
||||
EApp args val f exps = constant (f.s ++ paren exps.s) ;
|
||||
|
||||
TInt = ss "int" ;
|
||||
TFloat = ss "float" ;
|
||||
NilTyp = ss [] ;
|
||||
ConsTyp = cc2 ;
|
||||
|
||||
NilExp = ss [] ** {size = Zero} ;
|
||||
ConsExp _ _ e es = {
|
||||
s = ex e ++ separator "," es.size ++ es.s ;
|
||||
size = nextSize es.size
|
||||
} ;
|
||||
}
|
||||
\end{verbatim}
|
||||
\normalsize
|
||||
\newpage
|
||||
|
||||
|
||||
\section*{Appendix C: The concrete syntax of JVM}
|
||||
|
||||
\small
|
||||
\begin{verbatim}
|
||||
concrete ImperJVM of Imper = open ResImper in {
|
||||
|
||||
flags lexer=codevars ; unlexer=code ; startcat=Stm ;
|
||||
|
||||
lincat
|
||||
Body = {s,s2 : Str} ; -- code, storage for locals
|
||||
Stm = Instr ;
|
||||
|
||||
lin
|
||||
Empty = ss [] ;
|
||||
Funct args val body rest = ss (
|
||||
".method" ++ rest.$0 ++ paren args.s ++ val.s ++ ";" ++
|
||||
".limit" ++ "locals" ++ body.s2 ++ ";" ++
|
||||
".limit" ++ "stack" ++ "1000" ++ ";" ++
|
||||
body.s ++
|
||||
".end" ++ "method" ++ ";" ++
|
||||
rest.s
|
||||
) ;
|
||||
BodyNil stm = stm ;
|
||||
BodyCons a as body = instrb a.s [] (body ** {s3 = []});
|
||||
|
||||
Decl typ cont = instrb typ.s (
|
||||
"alloc_" ++ typ.s ++ cont.$0
|
||||
) cont ;
|
||||
Assign t x exp = instrc (
|
||||
exp.s ++
|
||||
t.s ++ "_store" ++ x.s
|
||||
) ;
|
||||
Return t exp = instr (
|
||||
exp.s ++
|
||||
t.s ++ "_return") ;
|
||||
While exp loop =
|
||||
let
|
||||
test = "TEST_" ++ loop.s2 ;
|
||||
end = "END_" ++ loop.s2
|
||||
in instrl (
|
||||
test ++ ";" ++
|
||||
exp.s ++
|
||||
"ifzero" ++ end ++ ";" ++
|
||||
loop.s ++
|
||||
"goto" ++ test ++ ";" ++
|
||||
end
|
||||
) ;
|
||||
IfElse exp t f =
|
||||
let
|
||||
false = "FALSE_" ++ t.s2 ++ f.s2 ;
|
||||
true = "TRUE_" ++ t.s2 ++ f.s2
|
||||
in instrl (
|
||||
exp.s ++
|
||||
"ifzero" ++ false ++ ";" ++
|
||||
t.s ++
|
||||
"goto" ++ true ++ ";" ++
|
||||
false ++ ";" ++
|
||||
f.s ++
|
||||
true
|
||||
) ;
|
||||
Block stm = instrc stm.s ;
|
||||
End = ss [] ** {s2,s3 = []} ;
|
||||
|
||||
EVar t x = instr (t.s ++ "_load" ++ x.s) ;
|
||||
EInt n = instr ("ipush" ++ n.s) ;
|
||||
EFloat a b = instr ("fpush" ++ a.s ++ "." ++ b.s) ;
|
||||
EAddI = binop "iadd" ;
|
||||
EAddF = binop "fadd" ;
|
||||
ESubI = binop "isub" ;
|
||||
ESubF = binop "fsub" ;
|
||||
EMulI = binop "imul" ;
|
||||
EMulF = binop "fmul" ;
|
||||
ELtI = binop ("call" ++ "ilt") ;
|
||||
ELtF = binop ("call" ++ "flt") ;
|
||||
EApp args val f exps = instr (
|
||||
exps.s ++
|
||||
"invoke" ++ f.s ++ paren args.s ++ val.s
|
||||
) ;
|
||||
|
||||
TInt = ss "i" ;
|
||||
TFloat = ss "f" ;
|
||||
|
||||
NilTyp = ss [] ;
|
||||
ConsTyp = cc2 ;
|
||||
|
||||
NilExp = ss [] ;
|
||||
ConsExp _ _ = cc2 ;
|
||||
}
|
||||
|
||||
\end{verbatim}
|
||||
\normalsize
|
||||
\newpage
|
||||
|
||||
\section*{Appendix D: Auxiliary operations for concrete syntax}
|
||||
|
||||
\small
|
||||
\begin{verbatim}
|
||||
resource ResImper = {
|
||||
|
||||
-- precedence
|
||||
|
||||
param
|
||||
Prec = P0 | P1 | P2 | P3 ;
|
||||
oper
|
||||
PrecExp : Type = {s : Prec => Str} ;
|
||||
ex : PrecExp -> Str = \exp -> exp.s ! P0 ;
|
||||
constant : Str -> PrecExp = \c -> {s = \\_ => c} ;
|
||||
infixN : Prec -> Str -> PrecExp -> PrecExp -> PrecExp = \p,f,x,y ->
|
||||
{s = \\k => mkPrec (x.s ! (nextPrec ! p) ++ f ++ y.s ! (nextPrec ! p)) ! p ! k} ;
|
||||
infixL : Prec -> Str -> PrecExp -> PrecExp -> PrecExp = \p,f,x,y ->
|
||||
{s = mkPrec (x.s ! p ++ f ++ y.s ! (nextPrec ! p)) ! p} ;
|
||||
|
||||
nextPrec : Prec => Prec = table {P0 => P1 ; P1 => P2 ; _ => P3} ;
|
||||
mkPrec : Str -> Prec => Prec => Str = \str -> table {
|
||||
P3 => table { -- use the term of precedence P3...
|
||||
_ => str} ; -- ...always without parentheses
|
||||
P2 => table { -- use the term of precedence P2...
|
||||
P3 => paren str ; -- ...in parentheses if P3 is expected...
|
||||
_ => str} ; -- ...otherwise without parentheses
|
||||
P1 => table {
|
||||
P3 | P2 => paren str ;
|
||||
_ => str} ;
|
||||
P0 => table {
|
||||
P0 => str ;
|
||||
_ => paren str}
|
||||
} ;
|
||||
|
||||
-- string operations
|
||||
|
||||
SS : Type = {s : Str} ;
|
||||
ss : Str -> SS = \s -> {s = s} ;
|
||||
cc2 : (_,_ : SS) -> SS = \x,y -> ss (x.s ++ y.s) ;
|
||||
|
||||
paren : Str -> Str = \str -> "(" ++ str ++ ")" ;
|
||||
|
||||
continues : Str -> SS -> SS = \s,t -> ss (s ++ ";" ++ t.s) ;
|
||||
continue : Str -> SS -> SS = \s,t -> ss (s ++ t.s) ;
|
||||
statement : Str -> SS = \s -> ss (s ++ ";");
|
||||
|
||||
-- taking cases of list size
|
||||
|
||||
param
|
||||
Size = Zero | One | More ;
|
||||
oper
|
||||
nextSize : Size -> Size = \n -> case n of {
|
||||
Zero => One ;
|
||||
_ => More
|
||||
} ;
|
||||
separator : Str -> Size -> Str = \t,n -> case n of {
|
||||
Zero => [] ;
|
||||
_ => t
|
||||
} ;
|
||||
|
||||
-- operations for JVM
|
||||
|
||||
Instr : Type = {s,s2,s3 : Str} ; -- code, variables, labels
|
||||
instr : Str -> Instr = \s ->
|
||||
statement s ** {s2,s3 = []} ;
|
||||
instrc : Str -> Instr -> Instr = \s,i ->
|
||||
ss (s ++ ";" ++ i.s) ** {s2 = i.s2 ; s3 = i.s3} ;
|
||||
instrl : Str -> Instr -> Instr = \s,i ->
|
||||
ss (s ++ ";" ++ i.s) ** {s2 = i.s2 ; s3 = "L" ++ i.s3} ;
|
||||
instrb : Str -> Str -> Instr -> Instr = \v,s,i ->
|
||||
ss (s ++ ";" ++ i.s) ** {s2 = v ++ i.s2 ; s3 = i.s3} ;
|
||||
binop : Str -> SS -> SS -> SS = \op, x, y ->
|
||||
ss (x.s ++ y.s ++ op ++ ";") ;
|
||||
}
|
||||
\end{verbatim}
|
||||
\normalsize
|
||||
\newpage
|
||||
|
||||
|
||||
\section*{Appendix E: Translation to real JVM}
|
||||
|
||||
This program is written in Haskell. Most of the changes concern
|
||||
spacing and could be done line by line; the really substantial
|
||||
change is due to the need to build a symbol table of variables
|
||||
stored relative to the frame pointer and look up variable
|
||||
addresses at each load and store.
|
||||
\small
|
||||
\begin{verbatim}
|
||||
module JVM where
|
||||
|
||||
mkJVM :: String -> String
|
||||
mkJVM = unlines . reverse . fst . foldl trans ([],([],0)) . lines where
|
||||
trans (code,(env,v)) s = case words s of
|
||||
".method":f:ns -> ((".method " ++ f ++ concat ns):code,([],0))
|
||||
"alloc":t:x:_ -> (code, ((x,v):env, v + size t))
|
||||
".limit":"locals":ns -> chCode (".limit locals " ++ show (length ns - 1))
|
||||
t:"_load" :x:_ -> chCode (t ++ "load " ++ look x)
|
||||
t:"_store":x:_ -> chCode (t ++ "store " ++ look x)
|
||||
t:"_return":_ -> chCode (t ++ "return")
|
||||
"goto":ns -> chCode ("goto " ++ concat ns)
|
||||
"ifzero":ns -> chCode ("ifzero " ++ concat ns)
|
||||
_ -> chCode s
|
||||
where
|
||||
chCode c = (c:code,(env,v))
|
||||
look x = maybe (x ++ show env) show $ lookup x env
|
||||
size t = case t of
|
||||
"d" -> 2
|
||||
_ -> 1
|
||||
\end{verbatim}
|
||||
\normalsize
|
||||
\newpage
|
||||
|
||||
|
||||
|
||||
\end{document}
|
||||
|
||||
72
examples/gfcc/even.c
Normal file
72
examples/gfcc/even.c
Normal file
@@ -0,0 +1,72 @@
|
||||
Funct
|
||||
(ConsTyp
|
||||
TInt
|
||||
NilTyp
|
||||
)
|
||||
TInt
|
||||
(BodyCons
|
||||
TInt
|
||||
NilTyp
|
||||
(\x -> BodyNil
|
||||
(IfElse
|
||||
(ELtI
|
||||
(EVar
|
||||
TInt
|
||||
x
|
||||
)
|
||||
(EInt
|
||||
0
|
||||
)
|
||||
)
|
||||
(Block
|
||||
(Return
|
||||
TInt
|
||||
(ESubI
|
||||
(EInt
|
||||
0
|
||||
)
|
||||
(EVar
|
||||
TInt
|
||||
x
|
||||
)
|
||||
)
|
||||
)
|
||||
End
|
||||
)
|
||||
(Return
|
||||
TInt
|
||||
(EVar
|
||||
TInt
|
||||
x
|
||||
)
|
||||
)
|
||||
End
|
||||
)
|
||||
)
|
||||
)
|
||||
(\abs -> Funct
|
||||
NilTyp
|
||||
TInt
|
||||
(BodyNil
|
||||
(Decl
|
||||
TInt
|
||||
(\i -> Assign
|
||||
TInt
|
||||
i
|
||||
(EApp
|
||||
(ConsTyp
|
||||
TInt
|
||||
NilTyp
|
||||
)
|
||||
TInt
|
||||
abs
|
||||
(ConsExp ? ? (EInt 16) NilExp)
|
||||
)
|
||||
End
|
||||
)
|
||||
)
|
||||
)
|
||||
(\main -> Empty
|
||||
)
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user