Translating linearization functions to Haskell: significant code size reductions

+ Instead of including lists of parameter values generated by GF, generate
  code to enumerate parameter values (in the same order as GF). This seems
  to give a factor of 2-3 code size reduction in the Phrasebook (e.g.
  from 84MB to 25MB for Hin, from 338MB to 154MB for Fre).

+ Deduplicate table entries, i.e. convert "table [..,E,..,E,..,E,..]" into
  "let x = E in table [..,x,..,x,..,x,..]". This gives even more significant
  code size reduction in some cases, e.g. from 569MB to 15MB for
  PhrasebookFin.

All phrasebook languages can now be converted to compilable Haskell code,
except PhrasebookPes, which still has the name clash problem.
This commit is contained in:
hallgren
2015-01-06 19:57:24 +00:00
parent cbd873839b
commit 0694a915d2

View File

@@ -7,7 +7,7 @@ import GF.Data.ErrM
import GF.Data.Utilities(mapSnd)
import GF.Text.Pretty
import GF.Grammar.Grammar
import GF.Grammar.Lookup(lookupFunType,allParamValues,lookupOrigInfo,allOrigInfos)
import GF.Grammar.Lookup(lookupFunType,lookupOrigInfo,allOrigInfos)--,allParamValues
import GF.Grammar.Macros(typeForm,collectOp,mkAbs,mkApp)
import GF.Grammar.Lockfield(isLockLabel)
import GF.Grammar.Predef(cPredef,cInts)
@@ -71,21 +71,24 @@ concrete2haskell opts gr cenv absname cnc modinfo =
haskPreamble :: ModuleName -> ModuleName -> Doc
haskPreamble absname cncname =
"{-# LANGUAGE MultiParamTypeClasses, FunctionalDependencies, FlexibleInstances, LambdaCase #-}" $+$
"module" <+> cncname <+> "where" $+$
"{-# LANGUAGE MultiParamTypeClasses, FunctionalDependencies, FlexibleInstances, LambdaCase #-}" $$
"module" <+> cncname <+> "where" $$
"import Prelude hiding (Ordering(..))" $$
"import qualified Data.Map as M" $+$
"import Data.Map((!))" $+$
"import qualified" <+> absname <+> "as A" $+$
"import Control.Applicative((<$>),(<*>))" $$
"import qualified Data.Map as M" $$
"import Data.Map((!))" $$
"import qualified" <+> absname <+> "as A" $$
"----------------------------------------------------" $$
"-- automatic translation from GF to Haskell" $$
"----------------------------------------------------" $$
"class EnumAll a where enumAll :: [a]" $$
"type Str = [String]" $$
"linString (A.GString s) = R_s [s]" $$
"linInt (A.GInt i) = R_s [show i]" $$
"linFloat (A.GFloat x) = R_s [show x]" $$
"" $$
"table is vs = let m = M.fromList (zip is vs) in (m!)"
--"table is vs = let m = M.fromList (zip is vs) in (m!)"
"table vs = let m = M.fromList (zip enumAll vs) in (m!)"
toHaskell gId gr absname cenv (name,jment) =
case jment of
@@ -173,11 +176,10 @@ coerce env ty t =
R [(l,(Just ft,coerce env ft f))|(l,(_,f))<-r,Just ft<-[lookup l rt]]
(RecType rt,Vr x)->
case lookup x env of
Just ty' | ty'/=ty ->
Just ty' | ty'/=ty -> -- better to compare to normal form of ty'
--trace ("coerce "++render ty'++" to "++render ty) $
App (to_rcon (map fst rt)) t
_ -> trace ("no coerce to "++render ty) t
_ -> t
_ -> t
where
extend env (x,(Just ty,rhs)) = (x,ty):env
@@ -193,7 +195,8 @@ convert' atomic gId gr = if atomic then ppA else ppT
case t of
Let (x,(_,xt)) t -> sep ["let"<+>x<+>"="<+>ppT xt,"in"<+>ppT t]
Abs b x t -> "\\"<+>x<+>"->"<+>ppT t
V ty ts -> hang "table" 4 (sep [list (enumAll ty),list ts])
-- V ty ts -> hang "table" 4 (sep [list (enumAll ty),list ts])
V ty ts -> hang "table" 4 (dedup ts)
T (TTyped ty) cs -> hang "\\case" 2 (vcat (map ppCase cs))
S t p -> hang (ppB t) 4 (ppA p)
C t1 t2 -> hang (ppA t1<+>"++") 4 (ppA t2)
@@ -257,12 +260,28 @@ convert' atomic gId gr = if atomic then ppA else ppT
token = brackets . doubleQuotes
list = brackets . fsep . punctuate "," . map ppT
fields = map (ppA.snd.snd) . sort . filter (not.isLockLabel.fst)
enumAll ty = case allParamValues gr ty of
Ok ts -> ts
-- enumAll ty = case allParamValues gr ty of Ok ts -> ts
list = brackets . fsep . punctuate "," . map ppT
list' = brackets . fsep . punctuate ","
dedup ts =
if M.null dups
then list ts
else parens $
"let"<+>vcat [ev i<+>"="<+>ppT t|(i,t)<-defs] $$
"in"<+>list' (zipWith entry ts is)
where
entry t i = maybe (ppT t) ev (M.lookup i dups)
ev i = "e'"<>i
defs = [(i1,t)|(t,i1:_:_)<-ms]
dups = M.fromList [(i2,i1)|(_,i1:is@(_:_))<-ms,i2<-i1:is]
ms = M.toList m
m = fmap sort (M.fromListWith (++) (zip ts [[i]|i<-is]))
is = [0..]::[Int]
convType = convType' False
convTypeA = convType' True
@@ -302,9 +321,10 @@ rcon_name ls = "R"++concat (sort ['_':render l|l<-ls,not (isLockLabel l)])
to_rcon = con . ("to_"++) . rcon_name
recordType ls =
"data"<+>app<+>"="<+>app <+> "deriving (Eq,Ord,Show)" $+$
vcat (zipWith projection vs ls) $+$
to_rcon ls<+>"r"<+>"="<+>cn<+>fsep [parens (proj l<+>"r")|l<-ls] $+$ ""
"data"<+>app<+>"="<+>app <+> "deriving (Eq,Ord,Show)" $$
enumAllInstance $$
vcat (zipWith projection vs ls) $$
to_rcon ls<+>"r"<+>"="<+>cn<+>fsep [parens (proj l<+>"r")|l<-ls] $$ ""
where
cn = rcon ls
-- Not all record labels are syntactically correct as type variables in Haskell
@@ -317,6 +337,14 @@ recordType ls =
hang ("instance"<+>"Has_"<>l<+>parens app<+>v<+>"where") 4
(proj l<+>parens app<+>"="<+>v)
enumAllInstance =
hang ("instance"<+>ctx<+>"EnumAll"<+>parens app<+>"where") 4
("enumAll"<+>"="<+>enumCon cn n)
where
ctx = if n==0
then empty
else parens (fsep (punctuate "," ["EnumAll"<+>v|v<-vs]))<+>"=>"
labelClass l =
hang ("class"<+>"Has_"<>l<+>"r"<+>"a"<+>"| r -> a"<+>"where") 4
(proj l<+>"::"<+>"r -> a")
@@ -328,7 +356,10 @@ paramType gId gr q@(_,n) =
((S.singleton (m,n),argTypes ps),
"data"<+>gId n<+>"="<+>
sep [fsep (punctuate " |" (map param ps)),
pp "deriving (Eq,Ord,Show)"])
pp "deriving (Eq,Ord,Show)"] $$
hang ("instance EnumAll"<+>gId n<+>"where") 4
("enumAll"<+>"="<+>sep (punctuate "++" (map enumParam ps)))
)
Ok (m,ResOper _ (Just (L _ t)))
| m==cPredef && n==cInts ->
((S.singleton (m,n),S.empty),pp "type GInts n = Int")
@@ -340,3 +371,11 @@ paramType gId gr q@(_,n) =
param (n,ctx) = gId n<+>[convTypeA gId t|(_,_,t)<-ctx]
argTypes = S.unions . map argTypes1
argTypes1 (n,ctx) = S.unions [paramTypes gr t|(_,_,t)<-ctx]
enumParam (n,ctx) = enumCon (gId n) (length ctx)
enumCon name arity =
if arity==0
then brackets name
else parens $
fsep ((name<+>"<$>"):punctuate "<*>" (replicate arity (pp "enumAll")))