Now PMCFG is compiled per module and at the end we only link it. The new compilation schema is few times faster.

This commit is contained in:
kr.angelov
2011-11-10 14:09:41 +00:00
parent 4baa44a933
commit 416d231c5e
22 changed files with 602 additions and 517 deletions

View File

@@ -1,10 +1,11 @@
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE BangPatterns #-}
module GF.Compile.GrammarToPGF (mkCanon2pgf) where
import GF.Compile.Export
import GF.Compile.GeneratePMCFG
import PGF.CId
import PGF.Data(fidInt,fidFloat,fidString)
import PGF.Optimize(updateProductionIndices)
import qualified PGF.Macros as CM
import qualified PGF.Data as C
@@ -15,8 +16,8 @@ import GF.Grammar.Grammar
import qualified GF.Grammar.Lookup as Look
import qualified GF.Grammar as A
import qualified GF.Grammar.Macros as GM
--import qualified GF.Compile.Compute.Concrete as Compute ----
import qualified GF.Infra.Option as O
import GF.Compile.GeneratePMCFG
import GF.Infra.Ident
import GF.Infra.Option
@@ -25,61 +26,72 @@ import GF.Data.Operations
import Data.List
import Data.Function
import Data.Char (isDigit,isSpace)
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.ByteString.Char8 as BS
import Data.Array.IArray
import Text.PrettyPrint
--import Debug.Trace ----
-- when developing, swap commenting
--traceD s t = trace s t
traceD s t = t
import Control.Monad.Identity
-- the main function: generate PGF from GF.
mkCanon2pgf :: Options -> Ident -> SourceGrammar -> IO D.PGF
mkCanon2pgf opts cnc gr = (canon2pgf opts gr . reorder abs) gr
where
abs = err (const cnc) id $ abstractOfConcrete gr cnc
-- Generate PGF from grammar.
type AbsConcsGrammar = (IdModInfo,[IdModInfo]) -- (abstract,concretes)
type IdModInfo = (Ident,SourceModInfo)
canon2pgf :: Options -> SourceGrammar -> AbsConcsGrammar -> IO D.PGF
canon2pgf opts gr (am,cms) = do
if dump opts DumpCanon
then putStrLn (render (vcat (map (ppModule Qualified) (am:cms))))
else return ()
(an,abs) <- mkAbstr am
cncs <- mapM (mkConcr am) cms
mkCanon2pgf :: Options -> SourceGrammar -> Ident -> IO D.PGF
mkCanon2pgf opts gr am = do
(an,abs) <- mkAbstr gr am
cncs <- mapM (mkConcr gr) (allConcretes gr am)
return $ updateProductionIndices (D.PGF Map.empty an abs (Map.fromList cncs))
where
mkAbstr (a,abm) = return (i2i a, D.Abstr flags funs cats)
mkAbstr gr am = return (i2i am, D.Abstr flags funs cats)
where
flags = Map.fromList [(mkCId f,C.LStr x) | (f,x) <- optionsPGF (mflags abm)]
aflags =
concatOptions (reverse [mflags mo | (_,mo) <- modules gr, isModAbs mo])
adefs =
[((cPredefAbs,c), AbsCat (Just (L NoLoc []))) | c <- [cFloat,cInt,cString]] ++
Look.allOrigInfos gr am
flags = Map.fromList [(mkCId f,C.LStr x) | (f,x) <- optionsPGF aflags]
funs = Map.fromAscList [(i2i f, (mkType [] ty, mkArrity ma, mkDef pty, 0)) |
(f,AbsFun (Just (L _ ty)) ma pty _) <- Map.toAscList (jments abm)]
((m,f),AbsFun (Just (L _ ty)) ma pty _) <- adefs]
cats = Map.fromAscList [(i2i c, (snd (mkContext [] cont),catfuns c)) |
(c,AbsCat (Just (L _ cont))) <- Map.toAscList (jments abm)]
((m,c),AbsCat (Just (L _ cont))) <- adefs]
catfuns cat =
(map (\x -> (0,snd x)) . sortBy (compare `on` fst))
[(loc,i2i f) | (f,AbsFun (Just (L loc ty)) _ _ (Just True)) <- tree2list (jments abm), snd (GM.valCat ty) == cat]
[(loc,i2i f) | ((m,f),AbsFun (Just (L loc ty)) _ _ (Just True)) <- adefs, snd (GM.valCat ty) == cat]
mkConcr am cm@(lang,mo) = do
cnc <- convertConcrete opts gr am cm
return (i2i lang, cnc)
mkConcr gr cm = do
return (i2i cm, D.Concr flags
printnames
cncfuns
lindefs
sequences
productions
IntMap.empty
Map.empty
cnccats
IntMap.empty
fid_cnt2)
where
cflags = concatOptions [mflags mo | (i,mo) <- modules gr, isModCnc mo,
Just r <- [lookup i (allExtendSpecs gr cm)]]
cdefs = [((cPredefAbs,c), CncCat (Just (L NoLoc GM.defLinType)) Nothing Nothing Nothing) | c <- [cInt,cFloat,cString]] ++
Look.allOrigInfos gr cm
flags = Map.fromList [(mkCId f,C.LStr x) | (f,x) <- optionsPGF cflags]
!(!fid_cnt1,!cnccats) = genCncCats gr am cm cdefs
!(!fid_cnt2,!productions,!lindefs,!sequences,!cncfuns)
= genCncFuns gr am cm cdefs fid_cnt1 cnccats
printnames = genPrintNames cdefs
i2i :: Ident -> CId
i2i = CId . ident2bs
b2b :: A.BindType -> C.BindType
b2b A.Explicit = C.Explicit
b2b A.Implicit = C.Implicit
mkType :: [Ident] -> A.Type -> C.Type
mkType scope t =
case GM.typeForm t of
@@ -94,7 +106,7 @@ mkExp scope t =
Vr x -> case lookup x (zip scope [0..]) of
Just i -> C.EVar i
Nothing -> C.EMeta 0
Abs b x t-> C.EAbs (b2b b) (i2i x) (mkExp (x:scope) t)
Abs b x t-> C.EAbs b (i2i x) (mkExp (x:scope) t)
App t1 t2-> C.EApp (mkExp scope t1) (mkExp scope t2)
EInt i -> C.ELit (C.LInt (fromIntegral i))
EFloat f -> C.ELit (C.LFlt f)
@@ -120,8 +132,8 @@ mkPatt scope p =
mkContext :: [Ident] -> A.Context -> ([Ident],[C.Hypo])
mkContext scope hyps = mapAccumL (\scope (bt,x,ty) -> let ty' = mkType scope ty
in if x == identW
then ( scope,(b2b bt,i2i x,ty'))
else (x:scope,(b2b bt,i2i x,ty'))) scope hyps
then ( scope,(bt,i2i x,ty'))
else (x:scope,(bt,i2i x,ty'))) scope hyps
mkDef (Just eqs) = Just [C.Equ ps' (mkExp scope' e) | L _ (ps,e) <- eqs, let (scope',ps') = mapAccumL mkPatt [] ps]
mkDef Nothing = Nothing
@@ -148,28 +160,121 @@ compilePatt eqs = whilePP eqs Map.empty
mkCase cns vrs = Case (fmap compilePatt cns) (compilePatt vrs)
-- return just one module per language
reorder :: Ident -> SourceGrammar -> AbsConcsGrammar
reorder abs cg =
-- M.MGrammar $
((abs, ModInfo MTAbstract MSComplete aflags [] Nothing [] [] "" adefs),
[(cnc, ModInfo (MTConcrete abs) MSComplete cflags [] Nothing [] [] "" cdefs)
| cnc <- allConcretes cg abs, let (cflags,cdefs) = concr cnc])
genCncCats gr am cm cdefs =
let (index,cats) = mkCncCats 0 cdefs
in (index, Map.fromList cats)
where
aflags =
concatOptions (reverse [mflags mo | (_,mo) <- modules cg, isModAbs mo])
mkCncCats index [] = (index,[])
mkCncCats index (((m,id),CncCat (Just (L _ lincat)) _ _ _):cdefs)
| id == cInt =
let cc = pgfCncCat gr lincat fidInt
(index',cats) = mkCncCats index cdefs
in (index', (i2i id,cc) : cats)
| id == cFloat =
let cc = pgfCncCat gr lincat fidFloat
(index',cats) = mkCncCats index cdefs
in (index', (i2i id,cc) : cats)
| id == cString =
let cc = pgfCncCat gr lincat fidString
(index',cats) = mkCncCats index cdefs
in (index', (i2i id,cc) : cats)
| otherwise =
let cc@(C.CncCat s e _) = pgfCncCat gr lincat index
(index',cats) = mkCncCats (e+1) cdefs
in (index', (i2i id,cc) : cats)
mkCncCats index (_ :cdefs) = mkCncCats index cdefs
adefs =
Map.fromList (predefADefs ++ Look.allOrigInfos cg abs)
genCncFuns gr am cm cdefs fid_cnt cnccats =
let (fid_cnt1,funs_cnt1,seqs1,funs1,lindefs) = mkCncCats cdefs fid_cnt 0 Map.empty [] IntMap.empty
(fid_cnt2,funs_cnt2,seqs2,funs2,prods) = mkCncFuns cdefs fid_cnt1 funs_cnt1 seqs1 funs1 lindefs Map.empty IntMap.empty
in (fid_cnt2,prods,lindefs,mkSetArray seqs2,array (0,funs_cnt2-1) funs2)
where
mkCncCats [] fid_cnt funs_cnt seqs funs lindefs =
(fid_cnt,funs_cnt,seqs,funs,lindefs)
mkCncCats (((m,id),CncCat _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt seqs funs lindefs =
let !funs_cnt' = let (s_funid, e_funid) = bounds funs0
in funs_cnt+(e_funid-s_funid+1)
lindefs' = foldl' (toLinDef (am,id) funs_cnt) lindefs prods0
!(seqs',funs') = foldl' (toCncFun funs_cnt (m,id)) (seqs,funs) (assocs funs0)
in mkCncCats cdefs fid_cnt funs_cnt' seqs' funs' lindefs'
mkCncCats (_ :cdefs) fid_cnt funs_cnt seqs funs lindefs =
mkCncCats cdefs fid_cnt funs_cnt seqs funs lindefs
mkCncFuns [] fid_cnt funs_cnt seqs funs lindefs crc prods =
(fid_cnt,funs_cnt,seqs,funs,prods)
mkCncFuns (((m,id),CncFun _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt seqs funs lindefs crc prods =
let Ok ty_C = fmap GM.typeForm (Look.lookupFunType gr am id)
!funs_cnt' = let (s_funid, e_funid) = bounds funs0
in funs_cnt+(e_funid-s_funid+1)
!(fid_cnt',crc',prods')
= foldl' (toProd lindefs ty_C funs_cnt)
(fid_cnt,crc,prods) prods0
!(seqs',funs') = foldl' (toCncFun funs_cnt (m,id)) (seqs,funs) (assocs funs0)
in mkCncFuns cdefs fid_cnt' funs_cnt' seqs' funs' lindefs crc' prods'
mkCncFuns (_ :cdefs) fid_cnt funs_cnt seqs funs lindefs crc prods =
mkCncFuns cdefs fid_cnt funs_cnt seqs funs lindefs crc prods
toProd lindefs (ctxt_C,res_C,_) offs st (Production fid0 funid0 args0) =
let !((fid_cnt,crc,prods),args) = mapAccumL mkArg st (zip ctxt_C args0)
set0 = Set.fromList (map (C.PApply (offs+funid0)) (sequence args))
fid = mkFId res_C fid0
!prods' = case IntMap.lookup fid prods of
Just set -> IntMap.insert fid (Set.union set0 set) prods
Nothing -> IntMap.insert fid set0 prods
in (fid_cnt,crc,prods')
where
predefADefs =
[(c, AbsCat (Just (L NoLoc []))) | c <- [cFloat,cInt,cString]]
mkArg st@(fid_cnt,crc,prods) ((_,_,ty),fid0s ) =
case fid0s of
[fid0] -> (st,map (flip C.PArg (mkFId arg_C fid0)) ctxt)
fid0s -> case Map.lookup fids crc of
Just fid -> (st,map (flip C.PArg fid) ctxt)
Nothing -> let !crc' = Map.insert fids fid_cnt crc
!prods' = IntMap.insert fid_cnt (Set.fromList (map C.PCoerce fids)) prods
in ((fid_cnt+1,crc',prods'),map (flip C.PArg fid_cnt) ctxt)
where
(hargs_C,arg_C) = GM.catSkeleton ty
ctxt = mapM (mkCtxt lindefs) hargs_C
fids = map (mkFId arg_C) fid0s
concr la = (flags, Map.fromList (predefCDefs ++ jments))
where
flags = concatOptions [mflags mo | (i,mo) <- modules cg, isModCnc mo,
Just r <- [lookup i (allExtendSpecs cg la)]]
jments = Look.allOrigInfos cg la
predefCDefs =
[(c, CncCat (Just (L NoLoc GM.defLinType)) Nothing Nothing) | c <- [cInt,cFloat,cString]]
toLinDef res offs lindefs (Production fid0 funid0 _) =
IntMap.insertWith (++) fid [offs+funid0] lindefs
where
fid = mkFId res fid0
mkFId (_,cat) fid0 =
case Map.lookup (i2i cat) cnccats of
Just (C.CncCat s e _) -> s+fid0
Nothing -> error "GrammarToPGF.mkFId failed"
mkCtxt lindefs (_,cat) =
case Map.lookup (i2i cat) cnccats of
Just (C.CncCat s e _) -> [(C.fidVar,fid) | fid <- [s..e], Just _ <- [IntMap.lookup fid lindefs]]
Nothing -> error "GrammarToPGF.mkCtxt failed"
toCncFun offs (m,id) (seqs,funs) (funid0,lins0) =
let Ok (ModInfo{mseqs=Just mseqs}) = lookupModule gr m
!(!seqs',lins) = mapAccumL (mkLin mseqs) seqs (elems lins0)
in (seqs',(offs+funid0,C.CncFun (i2i id) (mkArray lins)):funs)
where
mkLin mseqs seqs seqid =
let seq = mseqs ! seqid
in case Map.lookup seq seqs of
Just seqid -> (seqs,seqid)
Nothing -> let !seqid = Map.size seqs
!seqs' = Map.insert seq seqid seqs
in (seqs',seqid)
genPrintNames cdefs =
Map.fromAscList [(i2i id, name) | ((m,id),info) <- cdefs, name <- prn info]
where
prn (CncFun _ _ (Just (L _ tr)) _) = [flatten tr]
prn (CncCat _ _ (Just (L _ tr)) _) = [flatten tr]
prn _ = []
flatten (K s) = s
flatten (Alts x _) = flatten x
flatten (C x y) = flatten x +++ flatten y
mkArray lst = listArray (0,length lst-1) lst
mkSetArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]