diff --git a/examples/gfcc/CleanJVM.hs b/examples/gfcc/CleanJVM.hs new file mode 100644 index 000000000..7c4c1bb54 --- /dev/null +++ b/examples/gfcc/CleanJVM.hs @@ -0,0 +1,59 @@ +module Main where + +import Char +import System + +--- translation from Symbolic JVM to real Jasmin code + +main :: IO () +main = do + jvm:src:_ <- getArgs + s <- readFile jvm + let cls = takeWhile (/='.') src + let obj = cls ++ ".j" + writeFile obj $ boilerplate cls + appendFile obj $ mkJVM cls s + putStrLn $ "wrote file " ++ obj + system $ "java -jar jasmin.jar " ++ obj + return () + +mkJVM :: String -> String -> String +mkJVM cls = unlines . reverse . fst . foldl trans ([],([],0)) . liness where + trans (code,(env,v)) s = case words s of + ".method":p:s:f:ns + | f == "main" -> + (".method public static main([Ljava/lang/String;)V":code,([],1)) + | otherwise -> + (unwords [".method",p,s, f ++ glue ns] : code,([],0)) + "alloc":t:x:_ -> (("; " ++ s):code, ((x,v):env, v + size t)) + ".limit":"locals":ns -> chCode (".limit locals " ++ show (length ns + 1)) + "runtime":f:ns -> chCode $ "invokestatic " ++ "runtime/" ++ f ++ glue ns + "static":f:ns -> chCode $ "invokestatic " ++ cls ++ "/" ++ f ++ glue ns + "alloc":ns -> chCode $ "; " ++ s + ins:x:_ | symb ins -> chCode $ ins ++ " " ++ look x + "goto":ns -> chCode $ "goto " ++ glue ns + "ifeq":ns -> chCode $ "ifeq " ++ glue ns + "label":ns -> chCode $ glue ns ++ ":" + ";":[] -> chCode "" + _ -> chCode s + where + chCode c = (c:code,(env,v)) + look x = maybe (error $ x ++ show env) show $ lookup x env + glue = concat --init . concat + symb = flip elem ["load","store"] . tail + size t = case t of + "d" -> 2 + _ -> 1 + liness = lines . map (\c -> if c==';' then '\n' else c) + + +boilerplate :: String -> String +boilerplate cls = unlines [ + ".class public " ++ cls, + ".super java/lang/Object", + ".method public ()V", + "aload_0", + "invokenonvirtual java/lang/Object/()V", + "return", + ".end method" + ] diff --git a/examples/gfcc/Imper.gf b/examples/gfcc/Imper.gf new file mode 100644 index 000000000..a207759b7 --- /dev/null +++ b/examples/gfcc/Imper.gf @@ -0,0 +1,53 @@ +abstract Imper = { + + flags startcat = Program ; + + cat + Program ; + Rec ListTyp ; + Typ ; + IsNum Typ ; + ListTyp ; + Fun ListTyp Typ ; + Stm ; + Exp Typ ; + Var Typ ; + ListExp ListTyp ; + + fun + Empty : Program ; + Funct : (AS : ListTyp) -> (V : Typ) -> + (Fun AS V -> Rec AS) -> Program ; + FunctNil : (V : Typ) -> + Stm -> (Fun NilTyp V -> Program) -> Program ; + RecOne : (A : Typ) -> (Var A -> Stm) -> Program -> Rec (ConsTyp A NilTyp) ; + RecCons : (A : Typ) -> (AS : ListTyp) -> + (Var A -> Rec AS) -> Program -> Rec (ConsTyp A AS) ; + + Decl : (A : Typ) -> (Var A -> Stm) -> Stm ; + Assign : (A : Typ) -> Var A -> Exp A -> Stm -> Stm ; + While : Exp TInt -> Stm -> Stm -> Stm ; + IfElse : Exp TInt -> Stm -> Stm -> Stm -> Stm ; + Block : Stm -> Stm -> Stm ; + Printf : (A : Typ) -> Exp A -> Stm -> Stm ; + Return : (A : Typ) -> Exp A -> Stm ; + Returnv : Stm ; + End : Stm ; + + EVar : (A : Typ) -> Var A -> Exp A ; + EInt : Int -> Exp TInt ; + EFloat : Int -> Int -> Exp TFloat ; + ELt : (n : Typ) -> IsNum n -> Exp n -> Exp n -> Exp TInt ; + EAdd, EMul, ESub : (n : Typ) -> IsNum n -> Exp n -> Exp n -> Exp n ; + EAppNil : (V : Typ) -> Fun NilTyp V -> Exp V ; + EApp : (AS : ListTyp) -> (V : Typ) -> Fun AS V -> ListExp AS -> Exp V ; + + TInt, TFloat : Typ ; + isNumInt : IsNum TInt ; isNumFloat : IsNum TFloat ; + NilTyp : ListTyp ; + ConsTyp : Typ -> ListTyp -> ListTyp ; + + OneExp : (A : Typ) -> Exp A -> ListExp (ConsTyp A NilTyp) ; + ConsExp : (A : Typ) -> (AS : ListTyp) -> + Exp A -> ListExp AS -> ListExp (ConsTyp A AS) ; +} diff --git a/examples/gfcc/ImperC.gf b/examples/gfcc/ImperC.gf new file mode 100644 index 000000000..57f9d9f9e --- /dev/null +++ b/examples/gfcc/ImperC.gf @@ -0,0 +1,56 @@ +--# -path=.:../../lib/prelude +concrete ImperC of Imper = open ResImper in { + flags lexer=codevars ; unlexer=code ; startcat=Program ; + + lincat + Exp = PrecExp ; + Typ = {s,s2 : Str} ; + Rec = {s,s2,s3 : Str} ; + + lin + Empty = ss [] ; + FunctNil val stm cont = ss ( + val.s ++ cont.$0 ++ paren [] ++ "{" ++ + stm.s ++ "}" ++ ";" ++ cont.s) ; + Funct args val rec = ss ( + val.s ++ rec.$0 ++ paren rec.s2 ++ "{" ++ + rec.s ++ "}" ++ ";" ++ rec.s3) ; + + RecOne typ stm prg = stm ** { + s2 = typ.s ++ stm.$0 ; + s3 = prg.s + } ; + RecCons typ _ body prg = { + s = body.s ; + s2 = typ.s ++ body.$0 ++ "," ++ body.s2 ; + s3 = prg.s + } ; + + Decl typ cont = continues (typ.s ++ cont.$0) cont ; + Assign _ x exp = continues (x.s ++ "=" ++ exp.s) ; + While exp loop = continue ("while" ++ paren exp.s ++ loop.s) ; + IfElse exp t f = continue ("if" ++ paren exp.s ++ t.s ++ "else" ++ f.s) ; + Block stm = continue ("{" ++ stm.s ++ "}") ; + Printf t e = continues ("printf" ++ paren (t.s2 ++ "," ++ e.s)) ; + Return _ exp = statement ("return" ++ exp.s) ; + Returnv = statement "return" ; + End = ss [] ; + + EVar _ x = constant x.s ; + EInt n = constant n.s ; + EFloat a b = constant (a.s ++ "." ++ b.s) ; + EMul _ _ = infixL 3 "*" ; + EAdd _ _ = infixL 2 "+" ; + ESub _ _ = infixL 2 "-" ; + ELt _ _ = infixN 1 "<" ; + + EAppNil val f = constant (f.s ++ paren []) ; + EApp args val f exps = constant (f.s ++ paren exps.s) ; + + TInt = {s = "int" ; s2 = "\"%d\""} ; + TFloat = {s = "float" ; s2 = "\"%f\""} ; + NilTyp = ss [] ; + ConsTyp = cc2 ; + OneExp _ e = e ; + ConsExp _ _ e es = ss (e.s ++ "," ++ es.s) ; +} diff --git a/examples/gfcc/ImperEng.gf b/examples/gfcc/ImperEng.gf new file mode 100644 index 000000000..6a2a10e7c --- /dev/null +++ b/examples/gfcc/ImperEng.gf @@ -0,0 +1,71 @@ +-- # -path=.:prelude +--# -path=.:../../lib/prelude + +-- Toy English phrasing of C programs. Intended use is with +-- speech synthesis. Printed code should use HTML formatting. +-- AR 5/10/2005. + +concrete ImperEng of Imper = open Prelude, ResImperEng in { + flags lexer=textvars ; unlexer=text ; startcat=Program ; + + lincat + Rec = {s,s2,s3 : Str} ; + + lin + Empty = ss [] ; + FunctNil val stm cont = ss ( + ["The function"] ++ cont.$0 ++ + "returns" ++ indef ++ val.s ++ "." ++ + ["It is defined as follows :"] ++ + stm.s ++ + PARA ++ + cont.s) ; + Funct args val rec = ss ( + ["The function"] ++ rec.$0 ++ + "takes" ++ rec.s2 ++ + "and" ++ "returns" ++ indef ++ val.s ++ "." ++ + ["It is defined as follows:"] ++ + rec.s ++ + PARA ++ + rec.s3) ; + + RecOne typ stm prg = stm ** { + s2 = indef ++ typ.s ++ stm.$0 ; + s3 = prg.s + } ; + RecCons typ _ body prg = { + s = body.s ; + s2 = indef ++ typ.s ++ body.$0 ++ "and" ++ body.s2 ; + s3 = prg.s + } ; + + Decl typ cont = continues ("let" ++ cont.$0 ++ "be" ++ indef ++ typ.s) cont ; + Assign _ x exp = continues ("set" ++ x.s ++ "to" ++ exp.s) ; + While exp loop = continues (["if"] ++ exp.s ++ + [", do the following :"] ++ loop.s ++ + ["test the condition and repeat the loop if the condition holds"]) ; + IfElse exp t f = continue ("if" ++ exp.s ++ [", then"] ++ t.s ++ "Else" ++ f.s) ; + Block stm = continue (stm.s) ; + Printf t e = continues ("print" ++ e.s) ; + Return _ exp = statement ("return" ++ exp.s) ; + Returnv = statement ["return from the function"] ; + End = ss [] ; + + EVar _ x = constant x.s ; + EInt n = constant n.s ; + EFloat a b = constant (a.s ++ "." ++ b.s) ; + EMul _ _ = prefix "product" ; + EAdd _ _ = prefix "sum" ; + ESub _ _ x y = ss (["the subtraction of"] ++ y.s ++ "from" ++ x.s) ; + ELt _ _ = comparison "smaller" ; + + EAppNil val f = constant f.s ; + EApp args val f exps = constant (f.s ++ ["applied to"] ++ exps.s) ; + + TInt = {s = "integer"} ; + TFloat = {s = "float"} ; + NilTyp = ss [] ; + ConsTyp = cc2 ; + OneExp _ e = e ; + ConsExp _ _ e es = ss (e.s ++ "and" ++ es.s) ; +} diff --git a/examples/gfcc/ImperJVM.gf b/examples/gfcc/ImperJVM.gf new file mode 100644 index 000000000..d5d390727 --- /dev/null +++ b/examples/gfcc/ImperJVM.gf @@ -0,0 +1,93 @@ +--# -path=.:../../lib/prelude +concrete ImperJVM of Imper = open ResImper in { + +flags lexer=codevars ; unlexer=code ; startcat=Stm ; + + lincat + Rec = {s,s2,s3 : Str} ; -- code, storage for locals, continuation + Typ = {s : Str ; t : TypIdent} ; + Stm = Instr ; + + lin + Empty = ss [] ; + FunctNil val stm cont = ss ( + ".method" ++ "public" ++ "static" ++ cont.$0 ++ paren [] ++ val.s ++ ";" ++ + ".limit" ++ "locals" ++ stm.s2 ++ ";" ++ + ".limit" ++ "stack" ++ "1000" ++ ";" ++ + stm.s ++ + ".end" ++ "method" ++ ";" ++ ";" ++ + cont.s + ) ; + Funct args val rec = ss ( + ".method" ++ "public" ++ "static" ++ rec.$0 ++ paren args.s ++ val.s ++ ";" ++ + ".limit" ++ "locals" ++ rec.s2 ++ ";" ++ + ".limit" ++ "stack" ++ "1000" ++ ";" ++ + rec.s ++ + ".end" ++ "method" ++ ";" ++ ";" ++ + rec.s3 + ) ; + + RecOne typ stm prg = instrb typ.s ( + ["alloc"] ++ typ.s ++ stm.$0 ++ stm.s2) {s = stm.s ; s2 = stm.s2 ; s3 = prg.s}; + + RecCons typ _ body prg = instrb typ.s ( + ["alloc"] ++ typ.s ++ body.$0 ++ body.s2) + {s = body.s ; s2 = body.s2 ; s3 = prg.s}; + + Decl typ cont = instrb typ.s ( + ["alloc"] ++ typ.s ++ cont.$0 + ) cont ; + Assign t x exp = instrc (exp.s ++ typInstr "store" t.t ++ x.s) ; + While exp loop = + let + test = "TEST_" ++ loop.s2 ; + end = "END_" ++ loop.s2 + in instrl ( + "label" ++ test ++ ";" ++ + exp.s ++ + "ifeq" ++ end ++ ";" ++ + loop.s ++ + "goto" ++ test ++ ";" ++ + "label" ++ end + ) ; + IfElse exp t f = + let + false = "FALSE_" ++ t.s2 ++ f.s2 ; + true = "TRUE_" ++ t.s2 ++ f.s2 + in instrl ( + exp.s ++ + "ifeq" ++ false ++ ";" ++ + t.s ++ + "goto" ++ true ++ ";" ++ + "label" ++ false ++ ";" ++ + f.s ++ + "label" ++ true + ) ; + Block stm = instrc stm.s ; + Printf t e = instrc (e.s ++ "runtime" ++ typInstr "printf" t.t ++ paren (t.s) ++ "V") ; + Return t exp = instr (exp.s ++ typInstr "return" t.t) ; + Returnv = instr "return" ; + End = ss [] ** {s2,s3 = []} ; + + EVar t x = instr (typInstr "load" t.t ++ x.s) ; + EInt n = instr ("ldc" ++ n.s) ; + EFloat a b = instr ("ldc" ++ a.s ++ "." ++ b.s) ; + EAdd t _ = binopt "add" t.t ; + ESub t _ = binopt "sub" t.t ; + EMul t _ = binopt "mul" t.t ; + ELt t _ = binop ("runtime" ++ typInstr "lt" t.t ++ paren (t.s ++ t.s) ++ "I") ; + EAppNil val f = instr ( + "static" ++ f.s ++ paren [] ++ val.s + ) ; + EApp args val f exps = instr ( + exps.s ++ + "static" ++ f.s ++ paren args.s ++ val.s + ) ; + + TInt = {s = "I" ; t = TIInt} ; + TFloat = {s = "F" ; t = TIFloat} ; + NilTyp = ss [] ; + ConsTyp = cc2 ; + OneExp _ e = e ; + ConsExp _ _ = cc2 ; +} diff --git a/examples/gfcc/JVM.hs b/examples/gfcc/JVM.hs new file mode 100644 index 000000000..380570049 --- /dev/null +++ b/examples/gfcc/JVM.hs @@ -0,0 +1,20 @@ +module JVM where + +mkJVM :: String -> String +mkJVM = unlines . reverse . fst . foldl trans ([],([],0)) . lines where + trans (code,(env,v)) s = case words s of + ".method":f:ns -> ((".method " ++ f ++ concat ns):code,([],0)) + "alloc":t:x:_ -> (code, ((x,v):env, v + size t)) + ".limit":"locals":ns -> chCode (".limit locals " ++ show (length ns - 1)) + t:"_load" :x:_ -> chCode (t ++ "load " ++ look x) + t:"_store":x:_ -> chCode (t ++ "store " ++ look x) + t:"_return":_ -> chCode (t ++ "return") + "goto":ns -> chCode ("goto " ++ concat ns) + "ifzero":ns -> chCode ("ifzero " ++ concat ns) + _ -> chCode s + where + chCode c = (c:code,(env,v)) + look x = maybe (x ++ show env) show $ lookup x env + size t = case t of + "d" -> 2 + _ -> 1 diff --git a/examples/gfcc/Makefile b/examples/gfcc/Makefile new file mode 100644 index 000000000..d260049fa --- /dev/null +++ b/examples/gfcc/Makefile @@ -0,0 +1,12 @@ +all: pgf runtime + +pgf: + gf -make ImperC.gf ImperJVM.gf + +runtime: + java -jar jasmin.jar runtime.j + +doc: + pdflatex complin.tex + pdflatex complin.tex + diff --git a/examples/gfcc/ResImper.gf b/examples/gfcc/ResImper.gf new file mode 100644 index 000000000..57cdf9434 --- /dev/null +++ b/examples/gfcc/ResImper.gf @@ -0,0 +1,85 @@ +resource ResImper = open Predef in { + + -- precedence + + param PAssoc = PN | PL | PR ; + + oper + Prec : PType = Predef.Ints 4 ; + PrecExp : Type = {s : Str ; p : Prec ; a : PAssoc} ; + + mkPrec : Prec -> PAssoc -> Str -> PrecExp = \p,a,f -> + {s = f ; p = p ; a = a} ; + + usePrec : PrecExp -> Prec -> Str = \x,p -> + case < : Prec * Prec> of { + <3,4> | <2,3> | <2,4> => paren x.s ; + <1,1> | <1,0> | <0,0> => x.s ; + <1,_> | <0,_> => paren x.s ; + _ => x.s + } ; + + constant : Str -> PrecExp = mkPrec 4 PN ; + + infixN : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y -> + mkPrec p PN (usePrec x (nextPrec p) ++ f ++ usePrec y (nextPrec p)) ; + infixL : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y -> + mkPrec p PL (usePrec x p ++ f ++ usePrec y (nextPrec p)) ; + infixR : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y -> + mkPrec p PR (usePrec x (nextPrec p) ++ f ++ usePrec y p) ; + + nextPrec : Prec -> Prec = \p -> case

of { + 4 => 4 ; + n => Predef.plus n 1 + } ; + + -- string operations + + SS : Type = {s : Str} ; + ss : Str -> SS = \s -> {s = s} ; + cc2 : (_,_ : SS) -> SS = \x,y -> ss (x.s ++ y.s) ; + + paren : Str -> Str = \str -> "(" ++ str ++ ")" ; + + continues : Str -> SS -> SS = \s,t -> ss (s ++ ";" ++ t.s) ; + continue : Str -> SS -> SS = \s,t -> ss (s ++ t.s) ; + statement : Str -> SS = \s -> ss (s ++ ";"); + + -- taking cases of list size + + param + Size = Zero | One | More ; + oper + nextSize : Size -> Size = \n -> case n of { + Zero => One ; + _ => More + } ; + separator : Str -> Size -> Str = \t,n -> case n of { + Zero => [] ; + _ => t + } ; + + -- operations for JVM + + param TypIdent = TIInt | TIFloat ; -- to be continued + + oper + typInstr : Str -> TypIdent -> Str = \instr,t -> case t of { + TIInt => "i" + instr ; + TIFloat => "f" + instr + } ; + + Instr : Type = {s,s2,s3 : Str} ; -- code, variables, labels + instr : Str -> Instr = \s -> + statement s ** {s2,s3 = []} ; + instrc : Str -> Instr -> Instr = \s,i -> + ss (s ++ ";" ++ i.s) ** {s2 = i.s2 ; s3 = i.s3} ; + instrl : Str -> Instr -> Instr = \s,i -> + ss (s ++ ";" ++ i.s) ** {s2 = i.s2 ; s3 = "L" ++ i.s3} ; + instrb : Str -> Str -> Instr -> Instr = \v,s,i -> + ss (s ++ ";" ++ i.s) ** {s2 = v ++ i.s2 ; s3 = i.s3} ; + binop : Str -> SS -> SS -> SS = \op, x, y -> + ss (x.s ++ y.s ++ op ++ ";") ; + binopt : Str -> TypIdent -> SS -> SS -> SS = \op, t -> + binop (typInstr op t) ; +} diff --git a/examples/gfcc/ResImperEng.gf b/examples/gfcc/ResImperEng.gf new file mode 100644 index 000000000..87cb30a54 --- /dev/null +++ b/examples/gfcc/ResImperEng.gf @@ -0,0 +1,16 @@ +resource ResImperEng = open Predef, Prelude in { + + oper + indef = pre {"a" ; + "an" / strs {"a" ; "e" ; "i" ; "o" ; "A" ; "E" ; "I" ; "O" }} ; + + constant : Str -> SS = ss ; + prefix : Str -> SS -> SS -> SS = \f,x,y -> + ss ("the" ++ f ++ "of" ++ x.s ++ "and" ++ y.s) ; + comparison : Str -> SS -> SS -> SS = \f,x,y -> + ss (x.s ++ "is" ++ f ++ "than" ++ y.s) ; + continues : Str -> SS -> SS = \s,t -> ss (s ++ "." ++ t.s) ; + continue : Str -> SS -> SS = \s,t -> ss (s ++ t.s) ; + statement : Str -> SS = \s -> ss (s ++ "."); + +} diff --git a/examples/gfcc/abs.c b/examples/gfcc/abs.c new file mode 100644 index 000000000..947711c13 --- /dev/null +++ b/examples/gfcc/abs.c @@ -0,0 +1,20 @@ +int abs (int x){ + int y ; + { + if (x < 0){ + y = 0 - x ; + } + else { + y = x ; + } + } + return y ; + } ; +int main () { + int i ; + i = abs (16); + printf ("%d",i) ; + return ; + } ; + + diff --git a/examples/gfcc/complin.bbl b/examples/gfcc/complin.bbl new file mode 100644 index 000000000..cf18b704f --- /dev/null +++ b/examples/gfcc/complin.bbl @@ -0,0 +1,96 @@ +\begin{thebibliography}{10} + +\bibitem{aho-ullman} +A.~Aho, R.~Sethi, and J.~Ullman. +\newblock {\em {Compilers: Principles, Techniques, and Tools}}. +\newblock {Addison-Wesley}, 1988. + +\bibitem{cayenne} +L.~Augustsson. +\newblock {Cayenne---a language with dependent types}. +\newblock In {\em Proc. of {ICFP'98}}. ACM Press, September 1998. + +\bibitem{bnfc} +M.~Forsberg and A.~Ranta. +\newblock {\mbox{BNF Converter Homepage}}. +\newblock \verb!http://www.cs.chalmers.se/~markus/BNFC/!, 2002--2004. + +\bibitem{harper-honsell} +R.~Harper, F.~Honsell, and G.~Plotkin. +\newblock {A Framework for Defining Logics}. +\newblock {\em {JACM}}, 40(1):143--184, 1993. + +\bibitem{metal} +G.~Kahn, B.~Lang, B.~Mélèse, and E.~Morcos. +\newblock Metal: a formalism to specify formalisms. +\newblock {\em Science of {C}omputer {P}rogramming}, 3:151--188, 1983. + +\bibitem{khegai} +J.\ Khegai, B.\ Nordström, and A.\ Ranta. +\newblock {Multilingual Syntax Editing in GF}. +\newblock In A.~Gelbukh, editor, {\em {Intelligent Text Processing and + Computational Linguistics (CICLing-2003), Mexico City, February 2003}}, + volume 2588 of {\em LNCS}, pages 453--464. Springer-Verlag, {2003}. +%\newblock URL \url{http://www.cs.chalmers.se/~aarne/articles/mexico.ps.gz}. + +\bibitem{knuth-attr} +D.~Knuth. +\newblock Semantics of context-free languages. +\newblock {\em Mathematical {Systems} {Theory}}, 2:127--145, 1968. + +\bibitem{landin} +P.~Landin. +\newblock The next 700 programming languages. +\newblock {\em {Communications of the ACM}}, 9:157--166, 1966. + +\bibitem{magnusson-nordstr} +L.~Magnusson and B.~Nordstr\"{o}m. +\newblock The {ALF} proof editor and its proof engine. +\newblock In {\em {Types for Proofs and Programs}}, LNCS 806, pages 213--237. + Springer, 1994. + +\bibitem{happy} +S.~Marlow. +\newblock {Happy, The Parser Generator for Haskell}, 2001. +\newblock \verb6http://www.haskell.org/happy/6. + +\bibitem{jasmin} +J.~Meyer and T.~Downing. +\newblock {\em {Java Virtual Machine}}. +\newblock O'Reilly, 1997. + +\bibitem{semBNF} +{P. M\"aenp\"a\"a}. +\newblock {Semantic BNF}. +\newblock In E.~Gimenez and C.~Paulin-Mohring, editors, {\em Types for Proofs + and Programs, TYPES'96}, volume 1512 of {\em LNCS}, pages 196--215. + Springer-Verlag, 1998. + +\bibitem{pereira-shieber} +F.~Pereira and S.~Shieber. +\newblock {\em {Prolog and Natural-Language Analysis}}. +\newblock {CSLI}, Stanford, 1987. + +\bibitem{twelf} +F.~Pfenning. +\newblock {The Twelf Project}. +\newblock \verb!http://www-2.cs.cmu.edu/~twelf!, 2002. + +\bibitem{gf-jfp} +A.~Ranta. +\newblock {Grammatical Framework: A Type-Theoretical Grammar Formalism}. +\newblock {\em {The Journal of Functional Programming}}, 14(2):145--189, 2004. +%\newblock URL \url{http://www.cs.chalmers.se/~aarne/articles/gf-jfp.ps.gz}. + +\bibitem{gf-homepage} +A.~Ranta, K.~Angelov, and T.~Hallgren. +\newblock {\mbox{Grammatical Framework Homepage}}. +\newblock \verb!http://grammaticalframework.org!, 2000--2009. + +\bibitem{teitelbaum} +T.~Teitelbaum and T.~Reps. +\newblock The {Cornell} {Program} {Synthesizer}: a syntax-directed programming + environment. +\newblock {\em Commun. {ACM}}, 24(9):563--573, 1981. + +\end{thebibliography} diff --git a/examples/gfcc/complin.tex b/examples/gfcc/complin.tex new file mode 100644 index 000000000..9e624a846 --- /dev/null +++ b/examples/gfcc/complin.tex @@ -0,0 +1,1477 @@ +\documentclass[12pt]{article} + +\usepackage[latin1]{inputenc} +%\input{psfig.sty} + +\setlength{\oddsidemargin}{0mm} +%\setlength{\evensidemargin}{0mm} +\setlength{\evensidemargin}{-2mm} +\setlength{\topmargin}{-16mm} +\setlength{\textheight}{240mm} +\setlength{\textwidth}{158mm} + +%\setlength{\parskip}{2mm} +%\setlength{\parindent}{0mm} + +\input{macros} + +\newcommand{\begit}{\begin{itemize}} +\newcommand{\enit}{\end{itemize}} +\newcommand{\HOAS}{higher-order abstract syntax} +\newcommand{\newone}{} %%{\newpage} +\newcommand{\heading}[1]{\subsection{#1}} +\newcommand{\explanation}[1]{{\small #1}} +\newcommand{\empha}[1]{{\em #1}} +\newcommand{\commentOut}[1]{} +\newcommand{\rarrow}{\; \rightarrow\;} + +\newcommand{\nocolor}{} %% {\color[rgb]{0,0,0}} + + +\title{{\bf Declarative Language Definitions and Code Generation as Linearization}} + +\author{Aarne Ranta \\ + Department of Computing Science \\ + Chalmers University of Technology and the University of Gothenburg\\ + {\tt aarne@cs.chalmers.se}} + +\date{30 November 2004} + +\begin{document} + +\maketitle + + +\subsection*{Abstract} + +{\em +This paper presents a compiler for a fragment of the C programming +language, with JVM (Java Virtual Machine) as target language. +The compiler is implemented in a purely declarative way: +its definition consists of an abstract syntax of program +structures and two concrete syntaxes matching the abstract +syntax: one for C and one for JVM. From these grammar components, +the compiler is derived by using the GF (Grammatical Framework) +grammat tool: the front end consists of parsing and semantic +checking in accordance to the C grammar, and the back end consists +of linearization in accordance to the JVM grammar. The tool provides +other functionalities as well, such as decompilation and interactive +editing. +} + +\section{Introduction} + +The experiment reported in this paper was prompted by a challenge +posted by Lennart Augustsson to the participants of the workshop +on Dependent Types in Programming held at Dagstuhl in September 2004. +The challenge was to use dependent types to write a compiler from +C to bytecode. This paper does not meet the challenge quite literally, +since our compiler is for a different subset of C than Augustsson's +specification, and since the bytecode that we generate is JVM instead +of his format. But it definitely makes use of dependent types. + +Augustsson's challenge did not specify \textit{how} dependent +types are to be used, and the first of the two points we make in this +paper (and its title) reflects our interpretation: +we use dependent types, in combination with higher-order abstract syntax (HOAS), +to define the grammar of the source language (here, the fragment of C). +The grammar constitutes the single, declarative source from which +the compiler front end is derived, comprising both parser and type +checker. + +The second point, code generation by linearization, means that the +back end is likewise implemented by a grammar of the target +language (in this case, a fragment of JVM). This grammar is the +declarative source from which the compiler back end is derived. +In addition, some postprocessing is needed to +make the code conform to Jasmin assembler requirements. + +The complete code of the compiler is 300 lines: 250 lines for the grammars, +50 lines for the postprocessor. The code +is presented in the appendices of this paper. + + + +\section{The Grammatical Framework} + +The tool we have used for implementing the compiler is +GF, the \empha{Grammatical Framework} \cite{gf-jfp}. GF +is similar to a Logical Framework (LF) +\cite{harper-honsell} +extended with +a notation for defining concrete syntax. GF was originally +designed to help building multilingual +translation systems for natural languages and also +between formal and natural languages. The translation model +implemented by GF is very simple: +\begin{verbatim} + parsing linearization + ------------> ------------> + Language_1 Abstract Syntax Language_2 + <------------ <------------ + linearization parsing +\end{verbatim} +An abstract syntax is similar to a \empha{theory}, or a +\empha{signature} in a logical framework. A +concrete syntax defines, in a declarative way, +a translation of abstract syntax trees (well-formed terms) +into concrete language structures, and from this definition, one can +derive both linearization and parsing. + +To give an example, +a (somewhat simplified) translator for addition expressions +consists of the abstract syntax rule +\begin{verbatim} + fun EAdd : (A : Typ) -> Exp A -> Exp A -> Exp A ; +\end{verbatim} +the C concrete syntax rule +\begin{verbatim} + lin EAdd _ x y = {s = x.s ++ "+" ++ y.s ; prec = 2} ; +\end{verbatim} +and the JVM concrete syntax rule +\begin{verbatim} + lin EAdd t x y = {s = x.s ++ y.s ++ t.s ++ "_add"} ; +\end{verbatim} +The abstract syntax rule uses a type argument to capture +the fact that addition is polymorphic (which is a simplification, +because we will restrict the rule to numeric types only) +and that both operands have the same type as the value. +The C rule shows that the type information is suppressed, +and that the expression has precedence level 2 (which is a simplification, +since we will also treat associativity). +The JVM rule shows how addition is translated to stack machine +instructions, where the type of the postfixed addition instruction has to +be made explicit. Our compiler, like any GF translation system, will +consist of rules like these. + +The number of languages related to one abstract syntax in +a translation system is of course not limited to two. +Sometimes just one language is involved; +GF then works much the same way as any grammar +formalism or parser generator. +The largest number of languages in an application known to us is 88; +its domain are numeral expressions from 1 to 999,999 \cite{gf-homepage}. + +In addition to linearization and parsing, GF supports grammar-based +\empha{multilingual authoring} \cite{khegai}: interactive editing +of abstract syntax trees with immediate feedback as linearized texts, +and the possibility to textual through the parsers. + +From the GF point of view, the goal of the compiler experiment +is to investigate if GF is capable of implementing +compilers using the ideas of single-source language definition +and code generation as linearization. The working hypothesis +was that it \textit{is} capable but inconvenient, and that, +working out a complete example, we would find out what +should be done to extend GF into a compiler construction tool. + + +\subsection{Advantages and disadvantages} + +Due to the way in which it is built, our compiler has +a number of unusual, yet attractive features: +\bequ +The front end is defined by a grammar of C as its single source. + +The grammar defines both abstract and concrete syntax, and also +semantic well-formedness (types, variable scopes). + +The back end is implemented by means of a grammar of JVM providing +another concrete syntax to the abstract syntax of C. + +As a result of the way JVM is defined, only semantically well formed +JVM programs are generated. + +The JVM grammar can also be used as a decompiler, which translates +JVM code back into C code. + +The language has an interactive editor that also supports incremental +compilation. +\enqu +The problems that we encountered and their causes will be explained in +the relevant sections of this report. To summarize, +\bequ +The scoping conditions resulting from \HOAS\ are slightly different +from the standard ones of C. + +Our JVM syntax is slightly different from the specification, and +hence needs some postprocessing. + +Using \HOAS\ to encode all bindings is sometimes cumbersome. +\enqu +The first shortcoming seems to be inevitable with the technique +we use: just like lambda calculus, our C semantics allows +overshadowing of earlier bindings by later ones. +The second problem is systematically solved by using +an intermediate JVM format, where symbolic variable addresses +are used instead of numeric stack addresses. +The last shortcoming is partly inherent in the problem of binding: +to spell out, in any formal notation, +what happens in complex binding structures \textit{is} +complicated. But it also suggests ways in which GF could be +tuned to give better support +to compiler construction, which, after all, is not an intended +use of GF as it is now. + + + + + + + + +\section{The abstract syntax} + +An \empha{abstract syntax} in GF consists of \texttt{cat} judgements +\[ +\mbox{\texttt{cat}} \; C \; \Gamma +\] +declaring basic types (depending on a context $\Gamma$), +and \texttt{fun} judgements +\[ +\mbox{\texttt{fun}} \; f \; \mbox{\texttt{:}} \; A +\] +declaring functions $f$ of any type $A$, which can be a basic type or +a function type. +\empha{Syntax trees} are well-formed terms of basic +types, in $\eta$-long normal form. + +As for notation, each judgement form is recognized by +its keyword (\texttt{cat}, \texttt{fun}, etc), +and the same keyword governs all judgements +until the next keyword is encountered. + +The abstract syntax that we will present is no doubt closer +to C than to JVM. One reason is that what we are building is a +\textit{C compiler}, and match with the target language is a +secondary consideration. Another, more general reason is that +C is a higher-level language and JVM which means, among +other things, that C makes more semantic distinctions. +In general, the abstract syntax of a translation system +must reflect all semantic distinctions that can be made +in the languages involved, and then it is a good idea to +start with looking at what the most distinctive language +needs. + + + +\subsection{Statements} + +Statements in C may involve variables, expressions, and +other statements. +The following \texttt{cat} judgements of GF define the syntactic categories +that are needed to construct statements +\begin{verbatim} + cat + Stm ; + Typ ; + Exp Typ ; + Var Typ ; +\end{verbatim} +The type \texttt{Typ} is the type of C's datatypes. +The type of expressions is a dependent type, +since it has a nonempty context, indicating that \texttt{Exp} takes +a \texttt{Typ} as argument. The rules for \texttt{Exp} +will thus be rules to construct well-typed expressions of +a given type. \texttt{Var}\ is the type of variables, +of a given type, that get bound in C's variable +declarations. + +Let us start with the simplest kind of statements: +declarations and assignments. The following \texttt{fun} +rules define their abstract syntax: +\begin{verbatim} + fun + Decl : (A : Typ) -> (Var A -> Stm) -> Stm ; + Assign : (A : Typ) -> Var A -> Exp A -> Stm -> Stm ; +\end{verbatim} +The \texttt{Decl}\ function captures the rule that +a variable must be declared before it can be used or assigned to: +its second argument is a \empha{continuation}, which is +the sequence of statements that depend on (= may refer to) +the declared variable. +The \texttt{Assign} function uses dependent types to +control that a variable is always assigned a value of proper +type. + +We will treat all statements, except +\texttt{return}s, in terms of continuations. A sequence of +statements (which always has the type \texttt{Stm}) thus +always ends in a \texttt{return}, or, abruptly, in +an empty statement, \texttt{End}. Here are rules for some other +statement forms: +\begin{verbatim} + While : Exp TInt -> Stm -> Stm -> Stm ; + IfElse : Exp TInt -> Stm -> Stm -> Stm -> Stm ; + Block : Stm -> Stm -> Stm ; + Return : (A : Typ) -> Exp A -> Stm ; + End : Stm ; +\end{verbatim} +Here is an example of a piece of code and its abstract syntax. +\begin{verbatim} + int x ; Decl (TNum TInt) (\x -> + x = 5 ; Assign (TNum TInt) x (EInt 5) ( + return x ; Return (TNum TInt) (EVar (TNum TInt) x))) +\end{verbatim} +The details of expression and type +syntax will be explained in the next section. + +Our binding syntax is more liberal than C's in two ways. +First, +lambda calculus permits overshadowing previous bindings +by new ones, e.g. to write \verb6\x -> (\x -> f x)6. +The corresponding overshadowing of declarations is not +legal in C, within one and the same block. +Secondly, we allow declarations anywhere in a block, +not just in the beginning. +The second deviation would be easy to mend, whereas +the first one is inherent to the method of \HOAS. + + + +\subsection{Types and expressions} + +Our fragment of C has two types: integers and floats. +Many operators of C are overloaded so that they can +be used for both of these types, as well as for +some other numeric types---but not for e.g.\ arrays +and structures. We capture this distinction by a notion +reminiscent of \empha{type classes}: we introduce a special +category of numeric types, and a coercion of numeric types +into types in general. +\begin{verbatim} + cat + NumTyp ; + fun + TInt, TFloat : NumTyp ; + TNum : NumTyp -> Typ ; +\end{verbatim} +Well-typed expressions are built from constants, +from variables, and by means of binary operations. +\begin{verbatim} + EVar : (A : Typ) -> Var A -> Exp A ; + EInt : Int -> Exp (TNum TInt) ; + EFloat : Int -> Int -> Exp (TNum TFloat) ; + ELt : (n : NumTyp) -> let Ex = Exp (TNum n) in + Ex -> Ex -> Exp (TNum TInt) ; + EAdd, EMul, ESub : (n : NumTyp) -> let Ex = Exp (TNum n) in + Ex -> Ex -> Ex ; +\end{verbatim} +Notice that the category \texttt{Var} has no constructors, +but its expressions are only created by +variable bindings in \HOAS. +Notice also that GF has a built-in type \texttt{Int} of +integer literals, but floats are constructed by hand. + +Yet another expression form are function calls. To this +end, we need a notion of (user-defined) functions and +argument lists. The type of functions depends on an +argument type list and a value type. Expression lists +are formed to match type lists. +\begin{verbatim} + cat + ListTyp ; + Fun ListTyp Typ ; + ListExp ListTyp ; + + fun + EAppNil : (V : Typ) -> Fun NilTyp V -> Exp V ; + EApp : (AS : ListTyp) -> (V : Typ) -> + Fun AS V -> ListExp AS -> Exp V ; + + NilTyp : ListTyp ; + ConsTyp : Typ -> ListTyp -> ListTyp ; + + OneExp : (A : Typ) -> Exp A -> ListExp (ConsTyp A NilTyp) ; + ConsExp : (A : Typ) -> (AS : ListTyp) -> + Exp A -> ListExp AS -> ListExp (ConsExp A AS) ; +\end{verbatim} +The separation between zero-element applications and other +applications is a concession to the concrete syntax of C: +it in this way that we can control the use of commas so that +they appear between arguments (\texttt{(x,y,z)}) but not +after the last argument (\texttt{(x,y,z,)}). +The compositionality of linearization (Section~\ref{compositionality} below) +forbids case analysis on the length of the lists. + + +\subsection{Functions} + +On the top level, a program is a sequence of functions. +Each function may refer to functions defined earlier +in the program. The idea to express the binding of +function symbols with \HOAS\ is analogous to the binding +of variables in statements, using a continuation. +As with variables, the principal way to build function symbols is as +bound variables (in addition, there can be some +built-in functions, unlike in the case of variables). +The continuation of can be recursive, which we express by +making the function body into a part of the continuation; +the category \texttt{Rec} is the combination of a function +body and the subsequent function definitions. +\begin{verbatim} + cat + Program ; + Rec ListTyp ; + fun + Empty : Program ; + Funct : (AS : ListTyp) -> (V : Typ) -> + (Fun AS V -> Rec AS) -> Program ; + FunctNil : (V : Typ) -> + Stm -> (Fun NilTyp V -> Program) -> Program ; +\end{verbatim} +For syntactic reasons similar to function application +expressions in the previous section, we have distinguished between +empty and non-empty argument lists. + +The tricky problem with function definitions +is that they involve two nested binding constructions: +the outer binding of the function symbol and the inner +binding of the function parameter lists. +For the latter, we could use +vectors of variables, in the same way as vectors of +expressions are used to give arguments to functions. +However, this would lead to the need of cumbersome +projection functions when using the parameters +in the function body. A more elegant solution is +to use \HOAS\ to build function bodies: +\begin{verbatim} + RecOne : (A : Typ) -> + (Var A -> Stm) -> Program -> Rec (ConsTyp A NilTyp) ; + RecCons : (A : Typ) -> (AS : ListTyp) -> + (Var A -> Rec AS) -> Program -> Rec (ConsTyp A AS) ; +\end{verbatim} +The end result is an abstract syntax whose relation +to concrete syntax is somewhat remote. Here is an example of +the code of a function and its abstract syntax: +\begin{verbatim} + let int = TNum TInt in + int fact Funct (ConsTyp int NilTyp) int (\fact -> + (int n) { RecOne int (\n -> + int f ; Decl int (\f -> + f = 1 ; Assign int f (EInt 1) ( + while (1 < n) { While (ELt int (EInt 1) (EVar int n)) (Block ( + f = n * f ; Assign int f (EMul int (EVar int n) (EVar int f)) ( + n = n - 1 ; Assign int n (ESub int (EVar int n) (EInt 1)) + } End)) + return f ; (Return int (EVar int f))) End))) + } ; Empty) +\end{verbatim} + + +\subsection{The \texttt{printf} function} + +To give a valid type to the C function \texttt{printf} +is one of the things that can be done with +dependent types \cite{cayenne}. We have not defined \texttt{printf} +in its full strength, partly because of the difficulties to compile +it into JVM. But we use special cases of \texttt{printf} as +statements, to be able to print values of different types. +\begin{verbatim} + Printf : (A : Typ) -> Exp A -> Stm -> Stm ; +\end{verbatim} + + + + + +\section{The concrete syntax of C} + +A concrete syntax, for a given abstract syntax, +consists of \texttt{lincat} judgements +\[ +\mbox{\texttt{lincat}} \; C \; \mbox{\texttt{=}} \; T +\] +defining the \empha{linearization types} $T$ of each category $C$, +and \texttt{lin} judgements +\[ +\mbox{\texttt{lin}} \; f \; \mbox{\texttt{=}} \; t +\] +defining the \empha{linearization functions} $t$ of each function $f$ +in the abstract syntax. The linearization functions are +checked to be well-typed with respect the \texttt{lincat} +definitions, and the syntax of GF forces them to be \empha{compositional} +in the sense that the linearization of a complex tree is always +a function of the linearizations of the subtrees. Schematically, if +\[ + f \colon A_{1} \rarrow \cdots \rarrow A_{n} \rarrow A +\] +then +\[ + \sugmap{f} \colon + \sugmap{A_{1}} \rarrow \cdots + \rarrow \sugmap{A_{n}} \rarrow \sugmap{A} +\] +and the linearization algorithm is simply +\[ + \sugmap{(f \; a_{1} \; \ldots \; a_{n})} \; = \; + \sugmap{f} \; \sugmap{a_{1}} \; \ldots \; \sugmap{a_{n}} +\] +using the \sugmap{} notation for both linearization types, +linearization functions, and linearizations of trees. + +\label{compositionality} +Because of compositionality, no case analysis on expressions +is possible in linearization rules. The values of linearization +therefore have to carry information on how they are used in +different situations. Therefore linearization +types are generally record types instead of just the string type. +The simplest record type that is used in GF is +\bece +\verb6{s : Str}6 +\ence +If the linearization type of a category is not explicitly +given by a \texttt{lincat} judgement, this type is +used by default. + +With \HOAS, a syntax tree can have variable-bindings in its +constituents. The linearization of such a constituent +is compositionally defined to be the record linearizing the body +extended with fields for each of the variable symbols: +\[ +\sugmap{(\lambda x_{0} \rarrow \cdots \rarrow \lambda x_{n} \rarrow b)} +\;=\; +\sugmap{b} *\!* \{\$_{0} = \sugmap{x_{0}} ; \ldots ; \$_{n} = \sugmap{x_{n}}\} +\] +Notice that the variable symbols can +always be found because linearizable trees are in $\eta$-long normal form. +Also notice that we are here using the +\sugmap{} notation in yet another way, to denote the magic operation +that converts variable symbols into strings. + + +\subsection{Resource modules} + +Resource modules define auxiliary notions that can be +used in concrete syntax. These notions include +\empha{parameter types} defined by \texttt{param} +judgements +\[ +\mbox{\texttt{param}} \; P \; \mbox{\texttt{=}} + \; C_{1} \; \Gamma_{1} \; \mid \; \cdots \; \mid \; + \; C_{n} \; \Gamma_{n} +\] +and \empha{operations} defined by +\texttt{oper} judgements +\[ +\mbox{\texttt{oper}} \; f \; \mbox{\texttt{:}} \; T \; \mbox{\texttt{=}} \; t +\] +These judgements are +similar to datatype and function definitions +in functional programming, with the restriction +that parameter types must be finite and operations +may not be recursive. It is due to these restrictions that +we can always derive a parsing algorithm from a set of +linearization rules. + +In addition to types defined in \texttt{param} judgements, +initial segments of natural numbers, \texttt{Ints n}, +can be used as parameter types. This is the most important parameter +type we use in the syntax of C, to represent precedence. + +The following string operations are useful in almost +all grammars. They are actually included in a GF \texttt{Prelude}, +but are here defined from scratch to make the code shown in +the Appendices complete. +\begin{verbatim} + oper + SS : Type = {s : Str} ; + ss : Str -> SS = \s -> {s = s} ; + cc2 : (_,_ : SS) -> SS = \x,y -> ss (x.s ++ y.s) ; + paren : Str -> Str = \str -> "(" ++ str ++ ")" ; +\end{verbatim} + + + +\subsection{Precedence} + +We want to be able to recognize and generate one and the same expression with +or without parentheses, depending on whether its precedence level +is lower or higher than expected. For instance, a sum used as +an operand of multiplication must be in parentheses. We +capture this by defining a parameter type of +precedence levels. Five levels are enough for the present +fragment of C, so we use the enumeration type of +integers from 0 to 4 to define the \empha{inherent precedence level} +of an expression +\begin{verbatim} + oper + Prec : PType = Predef.Ints 4 ; + PrecExp : Type = {s : Str ; p : Prec} ; +\end{verbatim} +in a resource module (see Appendix D), and +\begin{verbatim} + lincat Exp = PrecExp ; +\end{verbatim} +in the concrete syntax of C itself. + +To build an expression that has a certain inherent precedence level, +we use the operation +\begin{verbatim} + mkPrec : Prec -> Str -> PrecExp = \p,s -> {s = s ; p = p} ; +\end{verbatim} +To use an expression of a given inherent level at some expected level, +we define a function that says that, if the inherent level is lower +than the expected level, parentheses are required. +\begin{verbatim} + usePrec : PrecExp -> Prec -> Str = \x,p -> + ifThenStr + (less x.p p) + (paren x.s) + x.s ; +\end{verbatim} +(The code shown in Appendix D is at the moment more cumbersome, +due to holes in the support for integer arithmetic in GF.) + +With the help of \texttt{mkPrec} and \texttt{usePrec}, +we can now define the main high-level operations that are +used in the concrete syntax itself---constants (highest level), +non-associative infixes, and left associative infixes: +\begin{verbatim} + constant : Str -> PrecExp = mkPrec 4 ; + + infixN : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y -> + mkPrec p (usePrec x (nextPrec p) ++ f ++ usePrec y (nextPrec p)) ; + infixL : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y -> + mkPrec p (usePrec x p ++ f ++ usePrec y (nextPrec p)) ; +\end{verbatim} +(The code in Appendix D adds to this an associativity parameter, +which is redundant in GF, but which we use to instruct the Happy +parser generator.) + + +\subsection{Expressions} + +With the machinery introduced, the linearization rules of expressions +are simple and concise: +\begin{verbatim} + EVar _ x = constant x.s ; + EInt n = constant n.s ; + EFloat a b = constant (a.s ++ "." ++ b.s) ; + EMul _ = infixL 3 "*" ; + EAdd _ = infixL 2 "+" ; + ESub _ = infixL 2 "-" ; + ELt _ = infixN 1 "<" ; + + EAppNil val f = constant (f.s ++ paren []) ; + EApp args val f exps = constant (f.s ++ paren exps.s) ; +\end{verbatim} + + +\subsection{Types} + +Types are expressed in two different ways: +in declarations, we have \texttt{int} and \texttt{float}, but +as formatting arguments to \texttt{printf}, we have +\verb6"%d"6 and \verb6"%f"6, with the quotes belonging to the +names. The simplest solution in GF is to linearize types +to records with two string fields. +\begin{verbatim} + lincat + Typ, NumTyp = {s,s2 : Str} ; + lin + TInt = {s = "int" ; s2 = "\"%d\""} ; + TFloat = {s = "float" ; s2 = "\"%f\""} ; +\end{verbatim} + + +\subsection{Statements} + +Statements in C have +the simplest linearization type, \verb6{s : Str}6. +We use a handful of auxiliary operations to regulate +the use of semicolons on a high level. +\begin{verbatim} + oper + continues : Str -> SS -> SS = \s,t -> ss (s ++ ";" ++ t.s) ; + continue : Str -> SS -> SS = \s,t -> ss (s ++ t.s) ; + statement : Str -> SS = \s -> ss (s ++ ";"); +\end{verbatim} +As for declarations, which bind variables, we notice the +projection \verb6.$06 to refer to the bound variable. +Also notice the use of the \texttt{s2} field of the type +in \texttt{printf}. +\begin{verbatim} + lin + Decl typ cont = continues (typ.s ++ cont.$0) cont ; + Assign _ x exp = continues (x.s ++ "=" ++ exp.s) ; + While exp loop = continue ("while" ++ paren exp.s ++ loop.s) ; + IfElse exp t f = continue ("if" ++ paren exp.s ++ t.s ++ "else" ++ f.s) ; + Block stm = continue ("{" ++ stm.s ++ "}") ; + Printf t e = continues ("printf" ++ paren (t.s2 ++ "," ++ e.s)) ; + Return _ exp = statement ("return" ++ exp.s) ; + Returnv = statement "return" ; + End = ss [] ; +\end{verbatim} + + +\subsection{Functions and programs} + +The category \texttt{Rec} of recursive function bodies with continuations +has three components: the function body itself, the parameter list, and +the program that follows. We express this by a linearization type that +contains three strings: +\begin{verbatim} + lincat Rec = {s,s2,s3 : Str} ; +\end{verbatim} +The body construction rules accumulate the parameter list +independently of the two other components: +\begin{verbatim} + lin + RecOne typ stm prg = stm ** { + s2 = typ.s ++ stm.$0 ; + s3 = prg.s + } ; + RecCons typ _ body prg = { + s = body.s ; + s2 = typ.s ++ body.$0 ++ "," ++ body.s2 ; + s3 = prg.s + } ; +\end{verbatim} +The top-level program construction rules rearrange the three +components into a linear structure: +\begin{verbatim} + FunctNil val stm cont = ss ( + val.s ++ cont.$0 ++ paren [] ++ "{" ++ + stm.s ++ "}" ++ ";" ++ cont.s) ; + Funct args val rec = ss ( + val.s ++ rec.$0 ++ paren rec.s2 ++ "{" ++ + rec.s ++ "}" ++ ";" ++ rec.s3) ; +\end{verbatim} + + +%%\subsection{Lexing and unlexing} + + + +\section{The concrete syntax of JVM} + +JVM syntax is, linguistically, more straightforward than +the syntax of C, and could even be defined by a regular +expression. However, the JVM syntax that our compiler +generates does not comprise full JVM, but only the fragment +that corresponds to well-formed C programs. + +The JVM syntax we use is a symbolic variant of the Jasmin assembler +\cite{jasmin}. +The main deviation from Jasmin are +variable addresses, as described in Section~\ref{postproc}. +The other deviations have to do with spacing: the normal +unlexer of GF puts spaces between constituents, whereas +in JVM, type names are integral parts of instruction names. +We indicate gluing uniformly by generating an underscore +on the side from which the adjacent element is glued. Thus +e.g.\ \verb6i _load6 becomes \verb6iload6. + + +\subsection{Symbolic JVM} +\label{postproc} + +What makes the translation from our abstract syntax to JVM +tricky is that variables must be replaced by +numeric addresses (relative to the frame pointer). +Code generation must therefore maintain a symbol table that permits +the lookup of variable addresses. As shown in the code +in Appendix C, we do not treat symbol tables +in linearization, but instead generated code in +\empha{Symbolic JVM}---that is, JVM with symbolic addresses. +Therefore we need a postprocessor that resolves the symbolic addresses, +shown in Appendix E. + +To make the postprocessor straightforward, +Symbolic JVM has special \texttt{alloc} instructions, +which are not present in real JVM. +Our compiler generates \texttt{alloc} instructions from +variable declarations. +The postprocessor comments out the \texttt{alloc} instructions, but we +found it a good idea not to erase them completely, since they make the +code more readable. + +The following example shows how the three representations (C, Symbolic JVM, JVM) +look like for a piece of code. +\begin{verbatim} + int x ; alloc i x ; x gets address 0 + int y ; alloc i y ; y gets address 1 + x = 5 ; ldc 5 ldc 5 + i _store x istore 0 + y = x ; i _load x iload 0 + i _store y istore 1 +\end{verbatim} + + +\subsection{Labels and jumps} + +A problem related to variable addresses +is the generation of fresh labels for +jumps. We solve this in linearization +by maintaining a growing label suffix +as a field of the linearization of statements into +instructions. The problem remains that statements on the +same nesting level, e.g.\ the two branches +of an \texttt{if-else} statement can use the same +labels. Making them unique must be +added to the post-processing pass. This is +always possible, because labels are nested in a +disciplined way, and jumps can never go to remote labels. + +As it turned out laborious to thread the label counter +to expressions, we decided to compile comparison +expressions (\verb6x < y6) into function calls, and provide the functions in +a run-time library. This will no more work for the +conjunction (\verb6x && y6) +and disjunction (\verb6x || y6), if we want to keep their semantics +lazy, since function calls are strict in their arguments. + + + + + +\subsection{How to restore code generation by linearization} + +Since postprocessing is needed, we have not quite achieved +the goal of code generation as linearization---if +linearization is understood in the +sense of GF. In GF, linearization can only depend +on parameters from finite parameter sets. Since the size of +a symbol table can grow indefinitely, it is not +possible to encode linearization with updates to and +lookups from a symbol table, as is usual in code generation. + +One attempt we made to achieve JVM linearization with +numeric addresses was to alpha-convert abstract syntax syntax trees +so that variables get indexed with integers that indicate their +depths in the tree. This hack works in the present fragment of C +because all variables need the same amount of memory (one word), +but would break down if we added double-precision floats. Therefore +we have used the less pure (from the point of view of +code generation as linearization) method of +symbolic addresses. + +It would certainly be possible to generate variable addresses +directly in the syntax trees by using dependent types; but this +would clutter the abstract +syntax in a way that is hard to motivate when we are in +the business of describing the syntax of C. The abstract syntax would +have to, so to say, anticipate all demands of the compiler's +target languages. + + +\subsection{Problems with the JVM bytecode verifier} + +An inherent restriction for linearization in GF is compositionality. +This prevents optimizations during linearization +by clever instruction selection, elimination of superfluous +labels and jumps, etc. One such optimization, the removal +of unreachable code (i.e.\ code after a \texttt{return} instruction) +is actually required by the JVM byte code verifier. +The solution is, again, to perform this optimization in postprocessing. +What we currently do, however, is to be careful and write +C programs so that they always end with a return statement in the +outermost block. + +Another problem related to \texttt{return} instructions is that +both C and JVM programs have a designated \texttt{main} function. +This function must have a certain type, which is different in C and +JVM. In C, \texttt{main} returns an integer encoding what +errors may have happend during execution. The JVM +\texttt{main}, on the other hand, returns a \texttt{void}, i.e.\ +no value at all. A \texttt{main} program returning an +integer therefore provokes a JVM bytecode verifier error. +The postprocessor could take care of this; but currently +we just write programs with void \texttt{return}s in the +\texttt{main} functions. + +The parameter list of \texttt{main} is also different in C (empty list) +and JVM (a string array \texttt{args}). We handle this problem +with an \empha{ad hoc} postprocessor rule. + +Every function prelude in JVM must indicate the maximum space for +local variables, and the maximum evaluation stack space (within +the function's own stack frame). The locals limit is computed in +linearization by maintaining a counter field. The stack limit +is blindly set to 1000; it would be possible to set an +accurate limit in the postprocessing phase. + + +\section{Translation as linearization vs.\ transfer} + +Many of the problems we have encountered in code generation by +linearization are familiar from +translation systems for natural languages. For instance, to translate +the English pronoun \eex{you} to German, you have to choose +between \eex{du, ihr, Sie}; for Italian, there are four +variants, and so on. To deal with this by linearization, +all semantic distinctions made in any of the involved languages +have to be present in the common abstract syntax. The usual solution to +this problem is not a universal abstract syntax, but +\empha{transfer}: translation does not just linearize +the same syntax trees to another language, but uses +a noncompositional function that translates +trees of one language into trees of another. + +Using transfer in the +back end is precisely what traditional compilers do. +The transfer function in our case would be a noncompositional +function from the abstract syntax of C to a different abstract +syntax of JVM. The abstract syntax notation of GF permits +definitions of functions, and the GF interpreter can be used +for evaluating terms into normal form. Thus one could write +the code generator just like in any functional language: +by sending in an environment and a syntax tree, and +returning a new environment with an instruction list: +\begin{verbatim} + fun + transStm : Env -> Stm -> EnvInstr ; + def + transStm env (Decl typ cont) = ... + transStm env (While (ELt a b) stm cont) = ... + transStm env (While exp stm cont) = ... +\end{verbatim} +This would be cumbersome in practice, because +GF does not have programming-language facilities +like built-in lists and tuples, or monads. Moreover, +the compiler could no longer be inverted into a decompiler, +in the way true linearization can be inverted into a parser. + + + +\section{Parser generation} +\label{bnfc} + +The whole GF part of the compiler (parser, type checker, Symbolic JVM +generator) can be run in the GF interpreter. +The weakest point of the resulting compiler, by current +standards, is the parser. GF is a powerful grammar formalism, which +needs a very general parser, taking care of ambiguities and other +problems that are typical of natural languages but should be +overcome in programming languages by design. The parser is moreover run +in an interpreter that takes the grammar (in a suitably compiled form) +as an argument. + +Fortunately, it is easy to replace the generic, interpreting GF parser +by a compiled LR(1) parser. GF supports the translation of a concrete +syntax into the \empha{Labelled BNF} (LBNF) format, %% \cite{lbnf}, +which in turn can be translated to parser generator code +(Happy, Bison, or JavaCUP), by the BNF Converter \cite{bnfc}. +The parser we are therefore using in the compiler is a Haskell +program generated by Happy \cite{happy}. + +We regard parser generation +as a first step towards developing GF into a +production-quality compiler compiler. The efficiency of the parser +is not the only relevant thing. Another advantage of an LR(1) +parser generator is that it performs an analysis on the grammar +finding conflicts, and provides a debugger. It may be +difficult for a human to predict how a context-free grammar +performs at parsing; it is much more difficult to do this for +a grammar written in the abstract way that GF permits (cf.\ the +example in Appendix B). + +The current version of the C grammar is ambiguous. GF's own +parser returns all alternatives, whereas the parser generated by +Happy rules out some of them by its normal conflict handling +policy. This means, in practice, that extra brackets are +sometimes needed to group staments together. + + +\subsection{Another notation for \HOAS} + +Describing variable bindings with \HOAS\ is sometimes considered +unintuitive. Let us consider the declaration rule of C (without +type dependencies for simplicity): +\begin{verbatim} + fun Decl : Typ -> (Var -> Stm) -> Stm ; + lin Decl typ stm = {s = typ.s ++ stm.$0 ++ ";" ++stm.s} ; +\end{verbatim} +Compare this with a corresponding LBNF rule (also using a continuation): +\begin{verbatim} + Decl. Stm ::= Typ Ident ";" Stm ; +\end{verbatim} +To explain bindings attached to this rule, one can say, in natural language, +that the identifier gets bound in the statement that follows. +This means that syntax trees formed by this rule do not have +the form \verb6(Decl typ x stm)6, but the form \verb6(Decl typ (\x -> stm))6. + +One way to formalize the informal binding rules stated beside +BNF rules is to use \empha{profiles}: data structures describing +the way in which the logical arguments of the syntax tree are +represented by the linearized form. The declaration rule can be +written using a profile notation as follows: +\begin{verbatim} + Decl [1,(2)3]. Stm ::= Typ Ident ";" Stm ; +\end{verbatim} +When compiling GF grammars into LBNF, we were forced to enrich +LBNF by a (more general) profile notation +(cf.\ \cite{gf-jfp}, Section 3.3). This suggested at the same +time that profiles could provide a user-fiendly notation for +\HOAS\ avoiding the explicit use of lambda calculus. + + + +\section{Using the compiler} + +Our compiler is invoked, of course, by the command \texttt{gfcc}. +It produces a JVM \texttt{.class} file, by running the +Jasmin bytecode assembler \cite{jasmin} on a Jasmin (\texttt{.j}) +file: +\begin{verbatim} + % gfcc factorial.c + > > wrote file factorial.j + Generated: factorial.class +\end{verbatim} +The Jasmin code is produced by a postprocessor, written in Haskell +(Appendix E), from the Symbolic JVM format that is produced by +linearization. The reasons why actual Jasmin is not generated +by linearization are explained in Section~\ref{postproc} above. + +In addition to the batch compiler, GF provides an interactive +syntax editor, in which C programs can be constructed by +stepwise refinements, local changes, etc.\ \cite{khegai}. The user of the +editor can work simultaneously on all languages involved. +In our case, this means that changes can be done both to +the C code and to the JVM code, and they are automatically +carried over from one language to the other. +\commentOut{ +A screen dump of the editor is shown in Fig~\ref{demo}. + +\begin{figure} +\centerline{\psfig{figure=demo2.epsi}} \caption{ +GF editor session where an integer +expression is expected to be given. The left window shows the +abstract syntax tree, and the right window the evolving C and +JVM code. The editor focus is shadowed, and the refinement alternatives +are shown in a pop-up window. +} +\label{demo} +\end{figure} +} + + +\section{Related work} + +The theoretical ideas behind our compiler experiment +are familiar from various sources. +Single-source language and compiler definitions +can be built using attribute grammars \cite{knuth-attr}. +The use of +dependent types in combination with higher-order abstract syntax +has been studied in various logical frameworks +\cite{harper-honsell,magnusson-nordstr,twelf}. +The addition of linearization rules to type-theoretical +abstract syntax is studied in \cite{semBNF}, which also +compares the method with attribute grammars. + +The idea of using a common abstract syntax for different +languages was clearly exposed by Landin \cite{landin}. The view of +code generation as linearization is a central aspect of +the classic compiler textbook by Aho, Sethi, and Ullman +\cite{aho-ullman}. +The use of one and the same grammar both for parsing and linearization +is a guiding principle of unification-based linguistic grammar +formalisms \cite{pereira-shieber}. Interactive editors derived from +grammars have been developed in various programming and proof +assistants \cite{teitelbaum,metal,magnusson-nordstr}. + +Even though the different ideas are well-known, +we have not seen them used together to construct a complete +compiler. In our view, putting these ideas together is +an attractive approach to compiling, since a compiler written +in this way is completely declarative, hence concise, +and therefore easy to modify and extend. What is more, if +a new language construct is added, the GF type checker +verifies that the addition is propagated to all components +of the compiler. As the implementation is declarative, +it is also self-documenting, since a human-readable +grammar defines the syntax and static +semantics that is actually used in the implementation. + + +\section{Conclusion} + +The \texttt{gfcc} compiler translates a representative +fragment of C to JVM, and growing the fragment +does not necessarily pose any new kinds of problems. +Using \HOAS\ and dependent types to describe the abstract +syntax of C works fine, and defining the concrete syntax +of C on top of this using GF linearization machinery is +possible. To build a parser that is more efficient than +GF's generic one, GF offers code generation for standard +parser tools. + +One result of the experiment is the beginning of a +library for dealing with typical programming language structures +such as precedence. This library is exploited in the parser +generator, which maps certain parameters used into GF grammars +into precedence directives in labelled BNF grammars. + +The most serious difficulty with JVM code generation by linearization +is to maintain a symbol table mapping variables to addresses. +The solution we have chosen is to generate Symbolic JVM, that is, +JVM with symbolic addresses, and translate the symbolic addresses to +(relative) memory locations by a postprocessor. + +Since the postprocessor works uniformly for the whole Symbolic JVM, +building a new compiler to generate JVM should now be +possible by just writing GF grammars. The most immediate +idea for developing GF as a compiler tool is to define +a similar symbolic format for an intermediate language, +which uses three-operand code and virtual registers. + + + +\bibliographystyle{plain} + +\bibliography{gf-bib} + + +\newpage +\subsection*{Appendix A: The abstract syntax} + +\small +\begin{verbatim} +abstract Imper = PredefAbs ** { + cat + Program ; + Rec ListTyp ; + Typ ; + NumTyp ; + ListTyp ; + Fun ListTyp Typ ; + Body ListTyp ; + Stm ; + Exp Typ ; + Var Typ ; + ListExp ListTyp ; + + fun + Empty : Program ; + Funct : (AS : ListTyp) -> (V : Typ) -> (Fun AS V -> Rec AS) -> Program ; + FunctNil : (V : Typ) -> Stm -> (Fun NilTyp V -> Program) -> Program ; + RecOne : (A : Typ) -> (Var A -> Stm) -> Program -> Rec (ConsTyp A NilTyp) ; + RecCons : (A : Typ) -> (AS : ListTyp) -> + (Var A -> Rec AS) -> Program -> Rec (ConsTyp A AS) ; + + Decl : (A : Typ) -> (Var A -> Stm) -> Stm ; + Assign : (A : Typ) -> Var A -> Exp A -> Stm -> Stm ; + While : Exp (TNum TInt) -> Stm -> Stm -> Stm ; + IfElse : Exp (TNum TInt) -> Stm -> Stm -> Stm -> Stm ; + Block : Stm -> Stm -> Stm ; + Printf : (A : Typ) -> Exp A -> Stm -> Stm ; + Return : (A : Typ) -> Exp A -> Stm ; + Returnv : Stm ; + End : Stm ; + + EVar : (A : Typ) -> Var A -> Exp A ; + EInt : Int -> Exp (TNum TInt) ; + EFloat : Int -> Int -> Exp (TNum TFloat) ; + ELt : (n : NumTyp) -> let Ex = Exp (TNum n) in Ex -> Ex -> Exp (TNum TInt) ; + EAdd, EMul, ESub : (n : NumTyp) -> let Ex = Exp (TNum n) in Ex -> Ex -> Ex ; + EAppNil : (V : Typ) -> Fun NilTyp V -> Exp V ; + EApp : (AS : ListTyp) -> (V : Typ) -> Fun AS V -> ListExp AS -> Exp V ; + + TNum : NumTyp -> Typ ; + TInt, TFloat : NumTyp ; + NilTyp : ListTyp ; + ConsTyp : Typ -> ListTyp -> ListTyp ; + OneExp : (A : Typ) -> Exp A -> ListExp (ConsTyp A NilTyp) ; + ConsExp : (A : Typ) -> (AS : ListTyp) -> + Exp A -> ListExp AS -> ListExp (ConsTyp A AS) ; +} +\end{verbatim} +\normalsize +\newpage + + +\subsection*{Appendix B: The concrete syntax of C} + +\small +\begin{verbatim} +concrete ImperC of Imper = open ResImper in { + flags lexer=codevars ; unlexer=code ; startcat=Program ; + + lincat + Exp = PrecExp ; + Typ, NumTyp = {s,s2 : Str} ; + Rec = {s,s2,s3 : Str} ; + lin + Empty = ss [] ; + FunctNil val stm cont = ss ( + val.s ++ cont.$0 ++ paren [] ++ "{" ++ stm.s ++ "}" ++ ";" ++ cont.s) ; + Funct args val rec = ss ( + val.s ++ rec.$0 ++ paren rec.s2 ++ "{" ++ rec.s ++ "}" ++ ";" ++ rec.s3) ; + RecOne typ stm prg = stm ** { + s2 = typ.s ++ stm.$0 ; + s3 = prg.s + } ; + RecCons typ _ body prg = { + s = body.s ; + s2 = typ.s ++ body.$0 ++ "," ++ body.s2 ; + s3 = prg.s + } ; + + Decl typ cont = continues (typ.s ++ cont.$0) cont ; + Assign _ x exp = continues (x.s ++ "=" ++ exp.s) ; + While exp loop = continue ("while" ++ paren exp.s ++ loop.s) ; + IfElse exp t f = continue ("if" ++ paren exp.s ++ t.s ++ "else" ++ f.s) ; + Block stm = continue ("{" ++ stm.s ++ "}") ; + Printf t e = continues ("printf" ++ paren (t.s2 ++ "," ++ e.s)) ; + Return _ exp = statement ("return" ++ exp.s) ; + Returnv = statement "return" ; + End = ss [] ; + + EVar _ x = constant x.s ; + EInt n = constant n.s ; + EFloat a b = constant (a.s ++ "." ++ b.s) ; + EMul _ = infixL 3 "*" ; + EAdd _ = infixL 2 "+" ; + ESub _ = infixL 2 "-" ; + ELt _ = infixN 1 "<" ; + EAppNil val f = constant (f.s ++ paren []) ; + EApp args val f exps = constant (f.s ++ paren exps.s) ; + + TNum t = t ; + TInt = {s = "int" ; s2 = "\"%d\""} ; TFloat = {s = "float" ; s2 = "\"%f\""} ; + NilTyp = ss [] ; ConsTyp = cc2 ; + OneExp _ e = e ; ConsExp _ _ e es = ss (e.s ++ "," ++ es.s) ; +} +\end{verbatim} +\normalsize +\newpage + + +\subsection*{Appendix C: The concrete syntax of JVM} + +\small +\begin{verbatim} +concrete ImperJVM of Imper = open ResImper in { + flags lexer=codevars ; unlexer=code ; startcat=Program ; + + lincat + Rec = {s,s2,s3 : Str} ; -- code, storage for locals, continuation + Stm = Instr ; + + lin + Empty = ss [] ; + FunctNil val stm cont = ss ( + ".method" ++ "public" ++ "static" ++ cont.$0 ++ paren [] ++ val.s ++ ";" ++ + ".limit" ++ "locals" ++ stm.s2 ++ ";" ++ + ".limit" ++ "stack" ++ "1000" ++ ";" ++ + stm.s ++ + ".end" ++ "method" ++ ";" ++ ";" ++ + cont.s + ) ; + Funct args val rec = ss ( + ".method" ++ "public" ++ "static" ++ rec.$0 ++ paren args.s ++ val.s ++ ";" ++ + ".limit" ++ "locals" ++ rec.s2 ++ ";" ++ + ".limit" ++ "stack" ++ "1000" ++ ";" ++ + rec.s ++ + ".end" ++ "method" ++ ";" ++ ";" ++ + rec.s3 + ) ; + + RecOne typ stm prg = instrb typ.s ( + ["alloc"] ++ typ.s ++ stm.$0 ++ stm.s2) {s = stm.s ; s2 = stm.s2 ; s3 = prg.s}; + + RecCons typ _ body prg = instrb typ.s ( + ["alloc"] ++ typ.s ++ body.$0 ++ body.s2) + {s = body.s ; s2 = body.s2 ; s3 = prg.s}; + + Decl typ cont = instrb typ.s ( + ["alloc"] ++ typ.s ++ cont.$0 + ) cont ; + Assign t x exp = instrc (exp.s ++ t.s ++ "_store" ++ x.s) ; + While exp loop = + let + test = "TEST_" ++ loop.s2 ; + end = "END_" ++ loop.s2 + in instrl ( + "label" ++ test ++ ";" ++ + exp.s ++ + "ifeq" ++ end ++ ";" ++ + loop.s ++ + "goto" ++ test ++ ";" ++ + "label" ++ end + ) ; + IfElse exp t f = + let + false = "FALSE_" ++ t.s2 ++ f.s2 ; + true = "TRUE_" ++ t.s2 ++ f.s2 + in instrl ( + exp.s ++ + "ifeq" ++ false ++ ";" ++ + t.s ++ + "goto" ++ true ++ ";" ++ + "label" ++ false ++ ";" ++ + f.s ++ + "label" ++ true + ) ; + Block stm = instrc stm.s ; + Printf t e = instrc (e.s ++ "invokestatic" ++ t.s ++ "runtime/printf" ++ paren (t.s) ++ "v") ; + Return t exp = instr (exp.s ++ t.s ++ "_return") ; + Returnv = instr "return" ; + End = ss [] ** {s2,s3 = []} ; + + EVar t x = instr (t.s ++ "_load" ++ x.s) ; + EInt n = instr ("ldc" ++ n.s) ; + EFloat a b = instr ("ldc" ++ a.s ++ "." ++ b.s) ; + EAdd = binopt "_add" ; + ESub = binopt "_sub" ; + EMul = binopt "_mul" ; + ELt t = binop ("invokestatic" ++ t.s ++ "runtime/lt" ++ paren (t.s ++ t.s) ++ "i") ; + EAppNil val f = instr ( + "invokestatic" ++ f.s ++ paren [] ++ val.s + ) ; + EApp args val f exps = instr ( + exps.s ++ + "invokestatic" ++ f.s ++ paren args.s ++ val.s + ) ; + + TNum t = t ; + TInt = ss "i" ; + TFloat = ss "f" ; + NilTyp = ss [] ; + ConsTyp = cc2 ; + OneExp _ e = e ; + ConsExp _ _ = cc2 ; +} +\end{verbatim} +\normalsize +\newpage + +\subsection*{Appendix D: Auxiliary operations for concrete syntax} + +\small +\begin{verbatim} +resource ResImper = open Predef in { + + -- precedence + + param PAssoc = PN | PL | PR ; + + oper + Prec : PType = Predef.Ints 4 ; + PrecExp : Type = {s : Str ; p : Prec ; a : PAssoc} ; + + mkPrec : Prec -> PAssoc -> Str -> PrecExp = \p,a,f -> + {s = f ; p = p ; a = a} ; + + usePrec : PrecExp -> Prec -> Str = \x,p -> + case < : Prec * Prec> of { + <3,4> | <2,3> | <2,4> => paren x.s ; + <1,1> | <1,0> | <0,0> => x.s ; + <1,_> | <0,_> => paren x.s ; + _ => x.s + } ; + + constant : Str -> PrecExp = mkPrec 4 PN ; + + infixN : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y -> + mkPrec p PN (usePrec x (nextPrec p) ++ f ++ usePrec y (nextPrec p)) ; + infixL : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y -> + mkPrec p PL (usePrec x p ++ f ++ usePrec y (nextPrec p)) ; + infixR : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y -> + mkPrec p PR (usePrec x (nextPrec p) ++ f ++ usePrec y p) ; + + nextPrec : Prec -> Prec = \p -> case

of { + 4 => 4 ; + n => Predef.plus n 1 + } ; + + -- string operations + + SS : Type = {s : Str} ; + ss : Str -> SS = \s -> {s = s} ; + cc2 : (_,_ : SS) -> SS = \x,y -> ss (x.s ++ y.s) ; + paren : Str -> Str = \str -> "(" ++ str ++ ")" ; + + continues : Str -> SS -> SS = \s,t -> ss (s ++ ";" ++ t.s) ; + continue : Str -> SS -> SS = \s,t -> ss (s ++ t.s) ; + statement : Str -> SS = \s -> ss (s ++ ";"); + + -- operations for JVM + + Instr : Type = {s,s2,s3 : Str} ; -- code, variables, labels + instr : Str -> Instr = \s -> + statement s ** {s2,s3 = []} ; + instrc : Str -> Instr -> Instr = \s,i -> + ss (s ++ ";" ++ i.s) ** {s2 = i.s2 ; s3 = i.s3} ; + instrl : Str -> Instr -> Instr = \s,i -> + ss (s ++ ";" ++ i.s) ** {s2 = i.s2 ; s3 = "L" ++ i.s3} ; + instrb : Str -> Str -> Instr -> Instr = \v,s,i -> + ss (s ++ ";" ++ i.s) ** {s2 = v ++ i.s2 ; s3 = i.s3} ; + binop : Str -> SS -> SS -> SS = \op, x, y -> + ss (x.s ++ y.s ++ op ++ ";") ; + binopt : Str -> SS -> SS -> SS -> SS = \op, t -> + binop (t.s ++ op) ; +} +\end{verbatim} +\normalsize +\newpage + + +\subsection*{Appendix E: Translation of Symbolic JVM to Jasmin} + +\small +\begin{verbatim} +module Main where +import Char +import System + +main :: IO () +main = do + jvm:src:_ <- getArgs + s <- readFile jvm + let cls = takeWhile (/='.') src + let obj = cls ++ ".j" + writeFile obj $ boilerplate cls + appendFile obj $ mkJVM cls s + putStrLn $ "wrote file " ++ obj + +mkJVM :: String -> String -> String +mkJVM cls = unlines . reverse . fst . foldl trans ([],([],0)) . lines where + trans (code,(env,v)) s = case words s of + ".method":p:s:f:ns + | f == "main" -> (".method public static main([Ljava/lang/String;)V":code,([],1)) + | otherwise -> (unwords [".method",p,s, f ++ typesig ns] : code,([],0)) + "alloc":t:x:_ -> (("; " ++ s):code, ((x,v):env, v + size t)) + ".limit":"locals":ns -> chCode (".limit locals " ++ show (length ns)) + "invokestatic":t:f:ns | take 8 f == "runtime/" -> + chCode $ "invokestatic " ++ "runtime/" ++ t ++ drop 8 f ++ typesig ns + "invokestatic":f:ns -> chCode $ "invokestatic " ++ cls ++ "/" ++ f ++ typesig ns + "alloc":ns -> chCode $ "; " ++ s + t:('_':instr):[";"] -> chCode $ t ++ instr + t:('_':instr):x:_ -> chCode $ t ++ instr ++ " " ++ look x + "goto":ns -> chCode $ "goto " ++ label ns + "ifeq":ns -> chCode $ "ifeq " ++ label ns + "label":ns -> chCode $ label ns ++ ":" + ";":[] -> chCode "" + _ -> chCode s + where + chCode c = (c:code,(env,v)) + look x = maybe (error $ x ++ show env) show $ lookup x env + typesig = init . map toUpper . concat + label = init . concat + size t = case t of + "d" -> 2 + _ -> 1 + +boilerplate :: String -> String +boilerplate cls = unlines [ + ".class public " ++ cls, ".super java/lang/Object", + ".method public ()V","aload_0", + "invokenonvirtual java/lang/Object/()V","return", + ".end method"] +\end{verbatim} +\normalsize +\newpage + + + + + + +\end{document} + +\begin{verbatim} +\end{verbatim} + diff --git a/examples/gfcc/factorial.c b/examples/gfcc/factorial.c new file mode 100644 index 000000000..76fee32d0 --- /dev/null +++ b/examples/gfcc/factorial.c @@ -0,0 +1,38 @@ +int fact (int n) { + int f ; + f = 1 ; + { + while (1 < n) { + f = n * f ; + n = n - 1 ; + } + } + return f ; +} ; + +int factr (int n) { + int f ; + { + if (n < 2) { + f = 1 ; + } + else { + f = n * factr (n-1) ; + } + } + return f ; +} ; + +int main () { + int n ; + n = 1 ; + { + while (n < 11) { + printf("%d",fact(n)) ; + printf("%d",factr(n)) ; + n = n+1 ; + } + } + return ; +} ; + diff --git a/examples/gfcc/fibonacci.c b/examples/gfcc/fibonacci.c new file mode 100644 index 000000000..80e8a0d5c --- /dev/null +++ b/examples/gfcc/fibonacci.c @@ -0,0 +1,18 @@ +int mx () { + return 5000000 ; +} ; + +int main () { + int lo ; int hi ; + lo = 1 ; + hi = lo ; + printf("%d",lo) ; + { + while (hi < mx()) { + printf("%d",hi) ; + hi = lo + hi ; + lo = hi - lo ; + } + } + return ; +} ; diff --git a/examples/gfcc/runtime.j b/examples/gfcc/runtime.j new file mode 100644 index 000000000..88db0b9b8 --- /dev/null +++ b/examples/gfcc/runtime.j @@ -0,0 +1,55 @@ +.class public runtime +.super java/lang/Object +; +; standard initializer +.method public ()V + aload_0 + invokenonvirtual java/lang/Object/()V + return +.end method + +.method public static ilt(II)I +.limit locals 2 +.limit stack 2 + iload_0 + iload_1 + if_icmpge Label0 + iconst_1 + ireturn + Label0: + iconst_0 + ireturn +.end method + +.method public static flt(FF)I +.limit locals 2 +.limit stack 2 + fload_0 + fload_1 + fcmpl + ifge Label0 + iconst_1 + ireturn + Label0: + iconst_0 + ireturn +.end method + +.method public static iprintf(I)V +.limit locals 1 +.limit stack 1000 + getstatic java/lang/System/out Ljava/io/PrintStream; + iload_0 + invokevirtual java/io/PrintStream/println(I)V + return +.end method + +.method public static fprintf(F)V +.limit locals 1 +.limit stack 1000 + getstatic java/lang/System/out Ljava/io/PrintStream; + fload_0 + invokevirtual java/io/PrintStream/println(F)V + return +.end method +