1
0
forked from GitHub/gf-core
Files
gf-core/src/compiler/GF/Compile/GrammarToPGF.hs

471 lines
19 KiB
Haskell

{-# LANGUAGE BangPatterns, FlexibleContexts, MagicHash #-}
module GF.Compile.GrammarToPGF (grammar2PGF) where
import GF.Compile.GeneratePMCFG
import GF.Compile.GenerateBC
import GF.Compile.OptimizePGF
import PGF2 hiding (mkType)
import PGF2.Transactions
import GF.Grammar.Predef
import GF.Grammar.Grammar hiding (Production)
import qualified GF.Grammar.Lookup as Look
import qualified GF.Grammar as A
import qualified GF.Grammar.Macros as GM
import GF.Infra.Ident
import GF.Infra.Option
import GF.Infra.UseIO (IOE)
import GF.Data.Operations
import Data.List
import Data.Char
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import Data.Array.IArray
import Data.Maybe(fromMaybe)
import System.FilePath
import System.Directory
import GHC.Prim
import GHC.Base(getTag)
grammar2PGF :: Options -> SourceGrammar -> ModuleName -> Map.Map PGF2.Fun Double -> IO PGF
grammar2PGF opts gr am probs = do
gr <- mkAbstr am probs
return gr {-do
cnc_infos <- getConcreteInfos gr am
return $
build (let gflags = if flag optSplitPGF opts
then [("split", LStr "true")]
else []
(an,abs) = mkAbstr am probs
cncs = map (mkConcr opts abs) cnc_infos
in newPGF gflags an abs cncs)-}
where
aflags = err (const noOptions) mflags (lookupModule gr am)
mkAbstr :: ModuleName -> Map.Map PGF2.Fun Double -> IO PGF
mkAbstr am probs = do
let abs_name = mi2i am
mb_ngf_path <-
if snd (flag optLinkTargets opts)
then do let fname = maybe id (</>)
(flag optOutputDir opts)
(fromMaybe abs_name (flag optName opts)<.>"ngf")
exists <- doesFileExist fname
if exists
then removeFile fname
else return ()
putStr ("(Boot image "++fname++") ")
return (Just fname)
else do return Nothing
gr <- newNGF abs_name mb_ngf_path
modifyPGF gr $ do
sequence_ [setAbstractFlag name value | (name,value) <- flags]
sequence_ [createCategory c ctxt p | (c,ctxt,p) <- cats]
sequence_ [createFunction f ty arity p | (f,ty,arity,_,p) <- funs]
where
adefs =
[((cPredefAbs,c), AbsCat (Just (L NoLoc []))) | c <- [cFloat,cInt,cString]] ++
Look.allOrigInfos gr am
flags = optionsPGF aflags
toLogProb = realToFrac . negate . log
cats = [(c', snd (mkContext [] cont), toLogProb (fromMaybe 0 (Map.lookup c' probs))) |
((m,c),AbsCat (Just (L _ cont))) <- adefs, let c' = i2i c]
funs = [(f', mkType [] ty, arity, bcode, toLogProb (fromMaybe 0 (Map.lookup f' funs_probs))) |
((m,f),AbsFun (Just (L _ ty)) ma mdef _) <- adefs,
let arity = mkArity ma mdef ty,
let bcode = mkDef gr arity mdef,
let f' = i2i f]
funs_probs = (Map.fromList . concat . Map.elems . fmap pad . Map.fromListWith (++))
[(i2i cat,[(i2i f,Map.lookup f' probs)]) | ((m,f),AbsFun (Just (L _ ty)) _ _ _) <- adefs,
let (_,(_,cat),_) = GM.typeForm ty,
let f' = i2i f]
where
pad :: [(a,Maybe Double)] -> [(a,Double)]
pad pfs = [(f,fromMaybe deflt mb_p) | (f,mb_p) <- pfs]
where
deflt = case length [f | (f,Nothing) <- pfs] of
0 -> 0
n -> max 0 ((1 - sum [d | (f,Just d) <- pfs]) / fromIntegral n)
{-
mkConcr opts abs (cm,ex_seqs,cdefs) =
let cflags = err (const noOptions) mflags (lookupModule gr cm)
ciCmp | flag optCaseSensitive cflags = compare
| otherwise = compareCaseInsensitive
flags = optionsPGF aflags
seqs = (mkSetArray . Set.fromList . concat) $
(elems (ex_seqs :: Array SeqId [Symbol]) : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm])
!(!fid_cnt1,!cnccats) = genCncCats gr am cm cdefs
cnccat_ranges = Map.fromList (map (\(cid,s,e,_) -> (cid,(s,e))) cnccats)
!(!fid_cnt2,!productions,!lindefs,!linrefs,!cncfuns)
= genCncFuns gr am cm ex_seqs ciCmp seqs cdefs fid_cnt1 cnccat_ranges
printnames = genPrintNames cdefs
startCat = (fromMaybe "S" (flag optStartCat aflags))
(lindefs',linrefs',productions',cncfuns',sequences',cnccats') =
(if flag optOptimizePGF opts then optimizePGF startCat else id)
(lindefs,linrefs,productions,cncfuns,elems seqs,cnccats)
in (mi2i cm, newConcr abs
flags
printnames
lindefs'
linrefs'
productions'
cncfuns'
sequences'
cnccats'
fid_cnt2)
getConcreteInfos gr am = mapM flatten (allConcretes gr am)
where
flatten cm = do
(seqs,infos) <- addMissingPMCFGs cm Map.empty
(lit_infos ++ Look.allOrigInfos gr cm)
return (cm,mkMapArray seqs :: Array SeqId [Symbol],infos)
lit_infos = [((cPredefAbs,c), CncCat (Just (L NoLoc GM.defLinType)) Nothing Nothing Nothing Nothing) | c <- [cInt,cFloat,cString]]
-- if some module was compiled with -no-pmcfg, then
-- we have to create the PMCFG code just before linking
addMissingPMCFGs cm seqs [] = return (seqs,[])
addMissingPMCFGs cm seqs (((m,id), info):is) = do
(seqs,info) <- addPMCFG opts gr cenv Nothing am cm seqs id info
(seqs,infos) <- addMissingPMCFGs cm seqs is
return (seqs, ((m,id), info) : infos)
-}
i2i :: Ident -> String
i2i = showIdent
mi2i :: ModuleName -> String
mi2i (MN i) = i2i i
mkType :: [Ident] -> A.Type -> PGF2.Type
mkType scope t =
case GM.typeForm t of
(hyps,(_,cat),args) -> let (scope',hyps') = mkContext scope hyps
in DTyp hyps' (i2i cat) (map (mkExp scope') args)
mkExp :: [Ident] -> A.Term -> Expr
mkExp scope t =
case t of
Q (_,c) -> EFun (i2i c)
QC (_,c) -> EFun (i2i c)
Vr x -> case lookup x (zip scope [0..]) of
Just i -> EVar i
Nothing -> EMeta 0
Abs b x t-> EAbs b (i2i x) (mkExp (x:scope) t)
App t1 t2-> EApp (mkExp scope t1) (mkExp scope t2)
EInt i -> ELit (LInt (fromIntegral i))
EFloat f -> ELit (LFlt f)
K s -> ELit (LStr s)
Meta i -> EMeta i
_ -> EMeta 0
{-
mkPatt scope p =
case p of
A.PP (_,c) ps->let (scope',ps') = mapAccumL mkPatt scope ps
in (scope',C.PApp (i2i c) ps')
A.PV x -> (x:scope,C.PVar (i2i x))
A.PAs x p -> let (scope',p') = mkPatt scope p
in (x:scope',C.PAs (i2i x) p')
A.PW -> ( scope,C.PWild)
A.PInt i -> ( scope,C.PLit (C.LInt (fromIntegral i)))
A.PFloat f -> ( scope,C.PLit (C.LFlt f))
A.PString s -> ( scope,C.PLit (C.LStr s))
A.PImplArg p-> let (scope',p') = mkPatt scope p
in (scope',C.PImplArg p')
A.PTilde t -> ( scope,C.PTilde (mkExp scope t))
-}
mkContext :: [Ident] -> A.Context -> ([Ident],[PGF2.Hypo])
mkContext scope hyps = mapAccumL (\scope (bt,x,ty) -> let ty' = mkType scope ty
in if x == identW
then ( scope,(bt,i2i x,ty'))
else (x:scope,(bt,i2i x,ty'))) scope hyps
mkDef gr arity (Just eqs) = generateByteCode gr arity eqs
mkDef gr arity Nothing = []
mkArity (Just a) _ ty = a -- known arity, i.e. defined function
mkArity Nothing (Just _) ty = 0 -- defined function with no arity - must be an axiom
mkArity Nothing _ ty = let (ctxt, _, _) = GM.typeForm ty -- constructor
in length ctxt
{-
genCncCats gr am cm cdefs = mkCncCats 0 cdefs
where
mkCncCats index [] = (index,[])
mkCncCats index (((m,id),CncCat (Just (L _ lincat)) _ _ _ _):cdefs)
| id == cInt =
let cc = pgfCncCat gr (i2i id) lincat fidInt
(index',cats) = mkCncCats index cdefs
in (index', cc : cats)
| id == cFloat =
let cc = pgfCncCat gr (i2i id) lincat fidFloat
(index',cats) = mkCncCats index cdefs
in (index', cc : cats)
| id == cString =
let cc = pgfCncCat gr (i2i id) lincat fidString
(index',cats) = mkCncCats index cdefs
in (index', cc : cats)
| otherwise =
let cc@(_, _s, e, _) = pgfCncCat gr (i2i id) lincat index
(index',cats) = mkCncCats (e+1) cdefs
in (index', cc : cats)
mkCncCats index (_ :cdefs) = mkCncCats index cdefs
genCncFuns :: Grammar
-> ModuleName
-> ModuleName
-> Array SeqId [Symbol]
-> ([Symbol] -> [Symbol] -> Ordering)
-> Array SeqId [Symbol]
-> [(QIdent, Info)]
-> FId
-> Map.Map PGF2.Cat (Int,Int)
-> (FId,
[(FId, [Production])],
[(FId, [FunId])],
[(FId, [FunId])],
[(PGF2.Fun,[SeqId])])
genCncFuns gr am cm ex_seqs ciCmp seqs cdefs fid_cnt cnccat_ranges =
let (fid_cnt1,funs_cnt1,funs1,lindefs,linrefs) = mkCncCats cdefs fid_cnt 0 [] IntMap.empty IntMap.empty
(fid_cnt2,funs_cnt2,funs2,prods0) = mkCncFuns cdefs fid_cnt1 funs_cnt1 funs1 lindefs Map.empty IntMap.empty
prods = [(fid,Set.toList prodSet) | (fid,prodSet) <- IntMap.toList prods0]
in (fid_cnt2,prods,IntMap.toList lindefs,IntMap.toList linrefs,reverse funs2)
where
mkCncCats [] fid_cnt funs_cnt funs lindefs linrefs =
(fid_cnt,funs_cnt,funs,lindefs,linrefs)
mkCncCats (((m,id),CncCat _ _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs linrefs =
let !funs_cnt' = let (s_funid, e_funid) = bounds funs0
in funs_cnt+(e_funid-s_funid+1)
lindefs' = foldl' (toLinDef (am,id) funs_cnt) lindefs prods0
linrefs' = foldl' (toLinRef (am,id) funs_cnt) linrefs prods0
funs' = foldl' (toCncFun funs_cnt (m,mkLinDefId id)) funs (assocs funs0)
in mkCncCats cdefs fid_cnt funs_cnt' funs' lindefs' linrefs'
mkCncCats (_ :cdefs) fid_cnt funs_cnt funs lindefs linrefs =
mkCncCats cdefs fid_cnt funs_cnt funs lindefs linrefs
mkCncFuns [] fid_cnt funs_cnt funs lindefs crc prods =
(fid_cnt,funs_cnt,funs,prods)
mkCncFuns (((m,id),CncFun _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs crc prods =
let ty_C = err error (\x -> x) $ fmap GM.typeForm (Look.lookupFunType gr am id)
!funs_cnt' = let (s_funid, e_funid) = bounds funs0
in funs_cnt+(e_funid-s_funid+1)
!(fid_cnt',crc',prods')
= foldl' (toProd lindefs ty_C funs_cnt)
(fid_cnt,crc,prods) prods0
funs' = foldl' (toCncFun funs_cnt (m,id)) funs (assocs funs0)
in mkCncFuns cdefs fid_cnt' funs_cnt' funs' lindefs crc' prods'
mkCncFuns (_ :cdefs) fid_cnt funs_cnt funs lindefs crc prods =
mkCncFuns cdefs fid_cnt funs_cnt funs lindefs crc prods
toProd lindefs (ctxt_C,res_C,_) offs st (A.Production fid0 funid0 args0) =
let !((fid_cnt,crc,prods),args) = mapAccumL mkArg st (zip ctxt_C args0)
set0 = Set.fromList (map (PApply (offs+funid0)) (sequence args))
fid = mkFId res_C fid0
!prods' = case IntMap.lookup fid prods of
Just set -> IntMap.insert fid (Set.union set0 set) prods
Nothing -> IntMap.insert fid set0 prods
in (fid_cnt,crc,prods')
where
mkArg st@(fid_cnt,crc,prods) ((_,_,ty),fid0s) =
case fid0s of
[fid0] -> (st,map (flip PArg (mkFId arg_C fid0)) ctxt)
fid0s -> case Map.lookup fids crc of
Just fid -> (st,map (flip PArg fid) ctxt)
Nothing -> let !crc' = Map.insert fids fid_cnt crc
!prods' = IntMap.insert fid_cnt (Set.fromList (map PCoerce fids)) prods
in ((fid_cnt+1,crc',prods'),map (flip PArg fid_cnt) ctxt)
where
(hargs_C,arg_C) = GM.catSkeleton ty
ctxt = mapM (mkCtxt lindefs) hargs_C
fids = map (mkFId arg_C) fid0s
mkLinDefId id = prefixIdent "lindef " id
toLinDef res offs lindefs (A.Production fid0 funid0 args) =
if args == [[fidVar]]
then IntMap.insertWith (++) fid [offs+funid0] lindefs
else lindefs
where
fid = mkFId res fid0
toLinRef res offs linrefs (A.Production fid0 funid0 [fargs]) =
if fid0 == fidVar
then foldr (\fid -> IntMap.insertWith (++) fid [offs+funid0]) linrefs fids
else linrefs
where
fids = map (mkFId res) fargs
mkFId (_,cat) fid0 =
case Map.lookup (i2i cat) cnccat_ranges of
Just (s,e) -> s+fid0
Nothing -> error ("GrammarToPGF.mkFId: missing category "++showIdent cat)
mkCtxt lindefs (_,cat) =
case Map.lookup (i2i cat) cnccat_ranges of
Just (s,e) -> [(fid,fid) | fid <- [s..e], Just _ <- [IntMap.lookup fid lindefs]]
Nothing -> error "GrammarToPGF.mkCtxt failed"
toCncFun offs (m,id) funs (funid0,lins0) =
let mseqs = case lookupModule gr m of
Ok (ModInfo{mseqs=Just mseqs}) -> mseqs
_ -> ex_seqs
in (i2i id, map (newIndex mseqs) (elems lins0)):funs
where
newIndex mseqs i = binSearch (mseqs ! i) seqs (bounds seqs)
binSearch v arr (i,j)
| i <= j = case ciCmp v (arr ! k) of
LT -> binSearch v arr (i,k-1)
EQ -> k
GT -> binSearch v arr (k+1,j)
| otherwise = error "binSearch"
where
k = (i+j) `div` 2
genPrintNames cdefs =
[(i2i id, name) | ((m,id),info) <- cdefs, name <- prn info]
where
prn (CncFun _ _ (Just (L _ tr)) _) = [flatten tr]
prn (CncCat _ _ _ (Just (L _ tr)) _) = [flatten tr]
prn _ = []
flatten (K s) = s
flatten (Alts x _) = flatten x
flatten (C x y) = flatten x +++ flatten y
mkArray lst = listArray (0,length lst-1) lst
mkMapArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
mkSetArray set = listArray (0,Set.size set-1) (Set.toList set)
-- The following is a version of Data.List.sortBy which together
-- with the sorting also eliminates duplicate values
sortNubBy cmp = mergeAll . sequences
where
sequences (a:b:xs) =
case cmp a b of
GT -> descending b [a] xs
EQ -> sequences (b:xs)
LT -> ascending b (a:) xs
sequences xs = [xs]
descending a as [] = [a:as]
descending a as (b:bs) =
case cmp a b of
GT -> descending b (a:as) bs
EQ -> descending a as bs
LT -> (a:as) : sequences (b:bs)
ascending a as [] = let !x = as [a]
in [x]
ascending a as (b:bs) =
case cmp a b of
GT -> let !x = as [a]
in x : sequences (b:bs)
EQ -> ascending a as bs
LT -> ascending b (\ys -> as (a:ys)) bs
mergeAll [x] = x
mergeAll xs = mergeAll (mergePairs xs)
mergePairs (a:b:xs) = let !x = merge a b
in x : mergePairs xs
mergePairs xs = xs
merge as@(a:as') bs@(b:bs') =
case cmp a b of
GT -> b:merge as bs'
EQ -> a:merge as' bs'
LT -> a:merge as' bs
merge [] bs = bs
merge as [] = as
-- The following function does case-insensitive comparison of sequences.
-- This is used to allow case-insensitive parsing, while
-- the linearizer still has access to the original cases.
compareCaseInsensitive [] [] = EQ
compareCaseInsensitive [] _ = LT
compareCaseInsensitive _ [] = GT
compareCaseInsensitive (x:xs) (y:ys) =
case compareSym x y of
EQ -> compareCaseInsensitive xs ys
x -> x
where
compareSym s1 s2 =
case s1 of
SymCat d1 r1
-> case s2 of
SymCat d2 r2
-> case compare d1 d2 of
EQ -> r1 `compare` r2
x -> x
_ -> LT
SymLit d1 r1
-> case s2 of
SymCat {} -> GT
SymLit d2 r2
-> case compare d1 d2 of
EQ -> r1 `compare` r2
x -> x
_ -> LT
SymVar d1 r1
-> if tagToEnum# (getTag s2 ># 2#)
then LT
else case s2 of
SymVar d2 r2
-> case compare d1 d2 of
EQ -> r1 `compare` r2
x -> x
_ -> GT
SymKS t1
-> if tagToEnum# (getTag s2 ># 3#)
then LT
else case s2 of
SymKS t2 -> t1 `compareToken` t2
_ -> GT
SymKP a1 b1
-> if tagToEnum# (getTag s2 ># 4#)
then LT
else case s2 of
SymKP a2 b2
-> case compare a1 a2 of
EQ -> b1 `compare` b2
x -> x
_ -> GT
_ -> let t1 = getTag s1
t2 = getTag s2
in if tagToEnum# (t1 <# t2)
then LT
else if tagToEnum# (t1 ==# t2)
then EQ
else GT
compareToken [] [] = EQ
compareToken [] _ = LT
compareToken _ [] = GT
compareToken (x:xs) (y:ys)
| x == y = compareToken xs ys
| otherwise = case compare (toLower x) (toLower y) of
EQ -> case compareToken xs ys of
EQ -> compare x y
x -> x
x -> x
-}