1
0
forked from GitHub/gf-core
Files
gf-core/src/GF/Canon/CanonToGFCC.hs
2006-10-02 19:16:38 +00:00

450 lines
15 KiB
Haskell

----------------------------------------------------------------------
-- |
-- Module : CanonToGFCC
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/06/17 14:15:17 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.15 $
--
-- GFC to GFCC compiler. AR Aug-Oct 2006
-----------------------------------------------------------------------------
module GF.Canon.CanonToGFCC (prCanon2gfcc, prCanon2f_gfcc) where
import GF.Canon.AbsGFC
import qualified GF.Canon.GFC as GFC
import qualified GF.Canon.Look as Look
import qualified GF.Canon.Subexpressions as Sub
import qualified GF.Canon.GFCC.AbsGFCC as C
import qualified GF.Canon.GFCC.PrintGFCC as Pr
import GF.Canon.GFC
import GF.Canon.Share
import qualified GF.Grammar.Abstract as A
import qualified GF.Grammar.Macros as GM
import GF.Canon.MkGFC
import GF.Canon.CMacros
import qualified GF.Infra.Modules as M
import qualified GF.Infra.Option as O
import GF.UseGrammar.Linear (expandLinTables, unoptimizeCanon)
-- these are needed for FCFG printing and might be moved
import GF.FCFG.ToFCFG (printFGrammar)
import GF.Conversion.GFC (gfc2fcfg)
import GF.Infra.Option (noOptions)
import GF.Infra.Ident
import GF.Data.Operations
import GF.Text.UTF8
import Data.List
import qualified Data.Map as Map
import Debug.Trace ----
-- the main function: generate GFCC from GFCM.
prCanon2gfcc :: CanonGrammar -> String
prCanon2gfcc =
Pr.printTree . canon2gfcc . reorder . utf8Conv . canon2canon . normalize
-- print FCFG corresponding to the GFCC
prCanon2f_gfcc :: CanonGrammar -> String
prCanon2f_gfcc =
unlines . map printFGrammar . toFCFG .
reorder . utf8Conv . canon2canon . normalizeNoOpt
where
toFCFG cgr@(M.MGrammar (am:cms)) =
[gfc2fcfg noOptions (M.MGrammar [am,cm],c) | cm@(c,_) <- cms]
-- gfc2fcfg :: Options -> (CanonGrammar, Ident) -> FGrammar
-- This is needed to reorganize the grammar. GFCC has its own back-end optimization.
-- But we need to have the canonical order in tables, created by valOpt
normalize :: CanonGrammar -> CanonGrammar
normalize = share . unoptimizeCanon . Sub.unSubelimCanon where
share = M.MGrammar . map (shareModule valOpt) . M.modules --- allOpt
-- for FCFG generation
normalizeNoOpt = unoptimizeCanon . Sub.unSubelimCanon
-- Generate GFCC from GFCM.
-- this assumes a grammar translated by canon2canon
canon2gfcc :: CanonGrammar -> C.Grammar
canon2gfcc cgr@(M.MGrammar ((a,M.ModMod abm):cms)) =
C.Grm (C.Hdr (i2i a) cs) (C.Abs adefs) cncs where
cs = map (i2i . fst) cms
adefs = [C.Fun f' (mkType ty) (C.Tr (C.AC f') []) |
(f,GFC.AbsFun ty _) <- tree2list (M.jments abm), let f' = i2i f]
cncs = [C.Cnc (i2i lang) (concr m) | (lang,M.ModMod m) <- cms]
concr mo = optConcrete
[C.Lin (i2i f) (mkTerm tr) |
(f,GFC.CncFun _ _ tr _) <- tree2list (M.jments mo)]
i2i :: Ident -> C.CId
i2i (IC c) = C.CId c
mkType :: A.Type -> C.Type
mkType t = case GM.catSkeleton t of
Ok (cs,c) -> C.Typ (map (i2i . snd) cs) (i2i $ snd c)
mkTerm :: Term -> C.Term
mkTerm tr = case tr of
Arg (A _ i) -> C.V i
EInt i -> C.C i
-- record parameter alias - created in gfc preprocessing
R [Ass (L (IC "_")) i, Ass (L (IC "__")) t] -> C.RP (mkTerm i) (mkTerm t)
-- ordinary record
R rs -> C.R [mkTerm t | Ass _ t <- rs]
P t l -> C.P (mkTerm t) (C.C (mkLab l))
T _ cs -> error $ "improper optimization for gfcc in" +++ A.prt tr
V _ cs -> C.R [mkTerm t | t <- cs]
S t p -> C.P (mkTerm t) (mkTerm p)
C s t -> C.S [mkTerm x | x <- [s,t]]
FV ts -> C.FV [mkTerm t | t <- ts]
K (KS s) -> C.K (C.KS s)
K (KP ss _) -> C.K (C.KP ss []) ---- TODO: prefix variants
E -> C.S []
Par _ _ -> prtTrace tr $ C.C 66661 ---- for debugging
_ -> C.S [C.K (C.KS (A.prt tr +++ "66662"))] ---- for debugging
where
mkLab (L (IC l)) = case l of
'_':ds -> (read ds) :: Integer
_ -> prtTrace tr $ 66663
-- return just one module per language
reorder :: CanonGrammar -> CanonGrammar
reorder cg = M.MGrammar $
(abs, M.ModMod $
M.Module M.MTAbstract M.MSComplete [] [] [] adefs):
[(c, M.ModMod $
M.Module (M.MTConcrete abs) M.MSComplete [] [] [] (sorted2tree js))
| (c,js) <- cncs]
where
abs = maybe (error "no abstract") id $ M.greatestAbstract cg
mos = M.allModMod cg
adefs =
sorted2tree $ sortBy (\ (f,_) (g,_) -> compare f g)
[finfo |
(i,mo) <- M.allModMod cg, M.isModAbs mo,
finfo <- tree2list (M.jments mo)]
cncs = sortBy (\ (x,_) (y,_) -> compare x y)
[(lang, concr lang) | lang <- M.allConcretes cg abs]
concr la = sortBy (\ (f,_) (g,_) -> compare f g)
[changeTyp finfo |
(i,mo) <- mos, M.isModCnc mo, elem i (M.allExtends cg la),
finfo <- tree2list (M.jments mo)]
-- convert to UTF8 if not yet converted
utf8Conv :: CanonGrammar -> CanonGrammar
utf8Conv = M.MGrammar . map toUTF8 . M.modules where
toUTF8 mo = case mo of
(i, M.ModMod m)
| hasFlagCanon (flagCanon "coding" "utf8") mo -> mo
| otherwise -> (i, M.ModMod $
m{ M.jments =
mapTree (onSnd (mapInfoTerms (onTokens encodeUTF8))) (M.jments m),
M.flags = setFlag "coding" "utf8" (M.flags m) }
)
_ -> mo
-- translate tables and records to arrays, parameters and labels to indices
canon2canon :: CanonGrammar -> CanonGrammar
canon2canon = recollect . map cl2cl . repartition where
recollect =
M.MGrammar . nubBy (\ (i,_) (j,_) -> i==j) . concatMap M.modules
cl2cl cg = tr $ M.MGrammar $ map c2c $ M.modules cg where
c2c (c,m) = case m of
M.ModMod mo@(M.Module _ _ _ _ _ js) ->
(c, M.ModMod $ M.replaceJudgements mo $ mapTree j2j js)
_ -> (c,m)
j2j (f,j) = case j of
GFC.CncFun x y tr z -> (f,GFC.CncFun x y (t2t tr) z)
_ -> (f,j)
t2t = term2term cg pv
pv@(labels,untyps,typs) = paramValues cg
tr = trace $
(unlines [A.prt c ++ "." ++ unwords (map A.prt l) +++ "=" +++ show i |
((c,l),i) <- Map.toList labels]) ++
(unlines [A.prt t +++ "=" +++ show i |
(t,i) <- Map.toList untyps]) ++
(unlines [A.prt t |
(t,_) <- Map.toList typs])
type ParamEnv =
(Map.Map (Ident,[Label]) (CType,Integer), -- numbered labels
Map.Map Term Integer, -- untyped terms to values
Map.Map CType (Map.Map Term Integer)) -- types to their terms to values
--- gathers those param types that are actually used in lincats and in lin terms
paramValues :: CanonGrammar -> ParamEnv
paramValues cgr = (labels,untyps,typs) where
params = [(ty, errVal [] $ Look.allParamValues cgr ty) | ty <- partyps]
partyps = nub $ [ty |
(_,(_,CncCat (RecType ls) _ _)) <- jments,
ty0 <- [ty | Lbg _ ty <- unlockTyp ls],
ty <- typsFrom ty0
] ++ [
Cn (CIQ m ty) |
(m,(ty,ResPar _)) <- jments
] ++ [ty |
(_,(_,CncFun _ _ tr _)) <- jments,
ty <- err (const []) snd $ appSTM (typsFromTrm tr) []
]
typsFrom ty = case ty of
Table p t -> typsFrom p ++ typsFrom t
RecType ls -> RecType (unlockTyp ls) : concat [typsFrom t | Lbg _ t <- ls]
_ -> [ty]
typsFromTrm :: Term -> STM [CType] Term
typsFromTrm tr = case tr of
V ty ts -> updateSTM (ty:) >> mapM_ typsFromTrm ts >> return tr
T ty cs -> updateSTM (ty:) >> mapM_ typsFromTrm [t | Cas _ t <- cs] >> return tr
_ -> composOp typsFromTrm tr
jments = [(m,j) | (m,mo) <- M.allModMod cgr, j <- tree2list $ M.jments mo]
typs = Map.fromList [(ci,Map.fromList (zip vs [0..])) | (ci,vs) <- params]
untyps = Map.fromList $ concatMap Map.toList [typ | (_,typ) <- Map.toList typs]
lincats =
[(IC cat,[Lbg (L (IC "s")) TStr]) | cat <- ["Int", "Float", "String"]] ++
[(cat,(unlockTyp ls)) | (_,(cat,CncCat (RecType ls) _ _)) <- jments]
labels = Map.fromList $ concat
[((cat,[lab]),(typ,i)):
[((cat,[lab,lab2]),(ty,j)) |
rs <- getRec typ, (Lbg lab2 ty,j) <- zip rs [0..]]
|
(cat,ls) <- lincats, (Lbg lab typ,i) <- zip ls [0..]]
-- go to tables recursively
---- TODO: even go to deeper records
where
getRec typ = case typ of
RecType rs -> [rs]
Table _ t -> getRec t
_ -> []
term2term :: CanonGrammar -> ParamEnv -> Term -> Term
term2term cgr env@(labels,untyps,typs) tr = case tr of
Par _ _ -> mkValCase tr
R rs ->
let
rs' = [Ass (mkLab i) (t2t t) |
(i,Ass l t) <- zip [0..] (unlock rs)]
in if (any (isStr . trmAss) rs)
then R rs'
else R [Ass (L (IC "_")) (mkValCase tr), Ass (L (IC "__")) (R rs')]
P t l -> r2r tr
T _ cs0 -> case expandLinTables cgr tr of -- normalize order of cases
Ok (T ty cs) -> checkCases cs $ V ty [t2t t | Cas _ t <- cs]
_ -> K (KS (A.prt tr +++ prtTrace tr "66668"))
V ty ts -> V ty [t2t t | t <- ts]
S t p -> S (t2t t) (t2t p)
_ -> composSafeOp t2t tr
where
t2t = term2term cgr env
checkCases cs a =
if null [() | Cas (_:_:_) _ <- cs] -- no share option active
then a
else error $ "Share optimization illegal for gfcc in" +++ A.prt tr ++++
"Recompile with -optimize=(values | none | subs | all_subs)."
r2r tr@(P (S (V ty ts) v) l) = t2t $ S (V ty [comp (P t l) | t <- ts]) v
r2r tr@(P p _) = case getLab tr of
Ok (cat,labs) -> P (t2t p) . mkLab $ maybe (prtTrace tr $ 66664) snd $
Map.lookup (cat,labs) labels
_ -> K (KS (A.prt tr +++ prtTrace tr "66665"))
-- this goes recursively into tables (ignored) and records (accumulated)
getLab tr = case tr of
Arg (A cat _) -> return (cat,[])
P p lab2 -> do
(cat,labs) <- getLab p
return (cat,labs++[lab2])
S p _ -> getLab p
_ -> Bad "getLab"
doVar :: Term -> STM [((CType,[Term]),(Term,Term))] Term
doVar tr = case getLab tr of
Ok (cat, lab) -> do
k <- readSTM >>= return . length
let tr' = LI $ identC $ show k
let tyvs = case Map.lookup (cat,lab) labels of
Just (ty,_) -> case Map.lookup ty typs of
Just vs -> (ty,[t |
(t,_) <- sortBy (\x y -> compare (snd x) (snd y))
(Map.assocs vs)])
_ -> error $ A.prt ty
_ -> error $ A.prt tr
updateSTM ((tyvs, (tr', tr)):)
return tr'
_ -> composOp doVar tr
mkValCase tr = case appSTM (doVar tr) [] of
Ok (tr', st@(_:_)) -> t2t $ comp $ foldr mkCase tr' st
_ -> valNum tr
mkCase ((ty,vs),(x,p)) tr =
S (V ty [mkBranch x v tr | v <- vs]) p
mkBranch x t tr = case tr of
_ | tr == x -> t
_ -> composSafeOp (mkBranch x t) tr
mkLab k = L (IC ("_" ++ show k))
valNum tr = maybe (tryPerm tr) EInt $ Map.lookup tr untyps
where
tryPerm tr = case tr of
R rs -> case Map.lookup (R rs) untyps of
Just v -> EInt v
_ -> valNumFV $ tryVar tr
_ -> valNumFV $ tryVar tr
tryVar tr = case tr of
Par c ts -> [Par c ts' | ts' <- combinations (map tryVar ts)]
FV ts -> ts
_ -> [tr]
valNumFV ts = case ts of
[tr] -> K (KS (A.prt tr +++ prtTrace tr "66667"))
_ -> FV $ map valNum ts
isStr tr = case tr of
Par _ _ -> False
EInt _ -> False
R rs -> any (isStr . trmAss) rs
FV ts -> any isStr ts
S t _ -> isStr t
E -> True
T _ cs -> any isStr [v | Cas _ v <- cs]
V _ ts -> any isStr ts
P t r -> case getLab tr of
Ok (cat,labs) -> case
Map.lookup (cat,labs) labels of
Just (ty,_) -> isStrType ty
_ -> True ---- TODO?
_ -> True
_ -> True ----
isStrType ty = case ty of
TStr -> True
RecType ts -> any isStrType [t | Lbg _ t <- ts]
Table _ t -> isStrType t
_ -> False
trmAss (Ass _ t) = t
--- this is mainly needed for parameter record projections
comp t = errVal t $ Look.ccompute cgr [] t
-- remove lock fields; in fact, any empty records and record types
unlock = filter notlock where
notlock (Ass l t) = case t of --- need not look at l
R [] -> False
_ -> True
unlockTyp = filter notlock where
notlock (Lbg l t) = case t of --- need not look at l
RecType [] -> False
_ -> True
prtTrace tr n = n ----trace ("-- ERROR" +++ A.prt tr +++ show n +++ show tr) n
prTrace tr n = trace ("-- OBSERVE" +++ A.prt tr +++ show n +++ show tr) n
-- back-end optimization:
-- suffix analysis followed by common subexpression elimination
optConcrete :: [C.CncDef] -> [C.CncDef]
optConcrete defs = subex
[C.Lin f (optTerm t) | C.Lin f t <- defs]
-- analyse word form lists into prefix + suffixes
-- suffix sets can later be shared by subex elim
optTerm :: C.Term -> C.Term
optTerm tr = case tr of
C.R ts@(_:_:_) | all isK ts -> mkSuff $ optToks [s | C.K (C.KS s) <- ts]
C.R ts -> C.R $ map optTerm ts
C.P t v -> C.P (optTerm t) v
-- C.A x t -> C.A x (optTerm t)
_ -> tr
where
optToks ss = prf : suffs where
prf = pref (head ss) (tail ss)
suffs = map (drop (length prf)) ss
pref cand ss = case ss of
s1:ss2 -> if isPrefixOf cand s1 then pref cand ss2 else pref (init cand) ss
_ -> cand
isK t = case t of
C.K (C.KS _) -> True
_ -> False
mkSuff ("":ws) = C.R (map (C.K . C.KS) ws)
mkSuff (p:ws) = C.W p (C.R (map (C.K . C.KS) ws))
-- common subexpression elimination; see ./Subexpression.hs for the idea
subex :: [C.CncDef] -> [C.CncDef]
subex js = errVal js $ do
(tree,_) <- appSTM (getSubtermsMod js) (Map.empty,0)
return $ addSubexpConsts tree js
type TermList = Map.Map C.Term (Int,Int) -- number of occs, id
type TermM a = STM (TermList,Int) a
addSubexpConsts :: TermList -> [C.CncDef] -> [C.CncDef]
addSubexpConsts tree lins =
let opers = sortBy (\ (C.Lin f _) (C.Lin g _) -> compare f g)
[C.Lin (fid id) trm | (trm,(_,id)) <- list]
in map mkOne $ opers ++ lins
where
mkOne (C.Lin f trm) = (C.Lin f (recomp f trm))
recomp f t = case Map.lookup t tree of
Just (_,id) | fid id /= f -> C.F $ fid id -- not to replace oper itself
_ -> case t of
C.R ts -> C.R $ map (recomp f) ts
C.S ts -> C.S $ map (recomp f) ts
C.W s t -> C.W s (recomp f t)
C.P t p -> C.P (recomp f t) (recomp f p)
C.RP t p -> C.RP (recomp f t) (recomp f p)
-- C.A x t -> C.A x (recomp f t)
_ -> t
fid n = C.CId $ "_" ++ show n
list = Map.toList tree
getSubtermsMod :: [C.CncDef] -> TermM TermList
getSubtermsMod js = do
mapM (getInfo collectSubterms) js
(tree0,_) <- readSTM
return $ Map.filter (\ (nu,_) -> nu > 1) tree0
where
getInfo get (C.Lin f trm) = do
get trm
return ()
collectSubterms :: C.Term -> TermM ()
collectSubterms t = case t of
C.R ts -> do
mapM collectSubterms ts
add t
C.RP u v -> do
collectSubterms v
add t
C.S ts -> do
mapM collectSubterms ts
add t
C.W s u -> do
collectSubterms u
add t
_ -> return ()
where
add t = do
(ts,i) <- readSTM
let
((count,id),next) = case Map.lookup t ts of
Just (nu,id) -> ((nu+1,id), i)
_ -> ((1, i ), i+1)
writeSTM (Map.insert t (count,id) ts, next)