forked from GitHub/gf-core
471 lines
19 KiB
Haskell
471 lines
19 KiB
Haskell
{-# LANGUAGE BangPatterns, FlexibleContexts, MagicHash #-}
|
|
module GF.Compile.GrammarToPGF (grammar2PGF) where
|
|
|
|
import GF.Compile.GeneratePMCFG
|
|
import GF.Compile.GenerateBC
|
|
import GF.Compile.OptimizePGF
|
|
|
|
import PGF2 hiding (mkType)
|
|
import PGF2.Transactions
|
|
import GF.Grammar.Predef
|
|
import GF.Grammar.Grammar hiding (Production)
|
|
import qualified GF.Grammar.Lookup as Look
|
|
import qualified GF.Grammar as A
|
|
import qualified GF.Grammar.Macros as GM
|
|
|
|
import GF.Infra.Ident
|
|
import GF.Infra.Option
|
|
import GF.Infra.UseIO (IOE)
|
|
import GF.Data.Operations
|
|
|
|
import Data.List
|
|
import Data.Char
|
|
import qualified Data.Set as Set
|
|
import qualified Data.Map as Map
|
|
import qualified Data.IntMap as IntMap
|
|
import Data.Array.IArray
|
|
import Data.Maybe(fromMaybe)
|
|
import System.FilePath
|
|
import System.Directory
|
|
|
|
import GHC.Prim
|
|
import GHC.Base(getTag)
|
|
|
|
grammar2PGF :: Options -> SourceGrammar -> ModuleName -> Map.Map PGF2.Fun Double -> IO PGF
|
|
grammar2PGF opts gr am probs = do
|
|
gr <- mkAbstr am probs
|
|
return gr {-do
|
|
cnc_infos <- getConcreteInfos gr am
|
|
return $
|
|
build (let gflags = if flag optSplitPGF opts
|
|
then [("split", LStr "true")]
|
|
else []
|
|
(an,abs) = mkAbstr am probs
|
|
cncs = map (mkConcr opts abs) cnc_infos
|
|
in newPGF gflags an abs cncs)-}
|
|
where
|
|
aflags = err (const noOptions) mflags (lookupModule gr am)
|
|
|
|
mkAbstr :: ModuleName -> Map.Map PGF2.Fun Double -> IO PGF
|
|
mkAbstr am probs = do
|
|
let abs_name = mi2i am
|
|
mb_ngf_path <-
|
|
if snd (flag optLinkTargets opts)
|
|
then do let fname = maybe id (</>)
|
|
(flag optOutputDir opts)
|
|
(fromMaybe abs_name (flag optName opts)<.>"ngf")
|
|
exists <- doesFileExist fname
|
|
if exists
|
|
then removeFile fname
|
|
else return ()
|
|
putStr ("(Boot image "++fname++") ")
|
|
return (Just fname)
|
|
else do return Nothing
|
|
gr <- newNGF abs_name mb_ngf_path
|
|
modifyPGF gr $ do
|
|
sequence_ [setAbstractFlag name value | (name,value) <- flags]
|
|
sequence_ [createCategory c ctxt p | (c,ctxt,p) <- cats]
|
|
sequence_ [createFunction f ty arity p | (f,ty,arity,_,p) <- funs]
|
|
where
|
|
adefs =
|
|
[((cPredefAbs,c), AbsCat (Just (L NoLoc []))) | c <- [cFloat,cInt,cString]] ++
|
|
Look.allOrigInfos gr am
|
|
|
|
flags = optionsPGF aflags
|
|
|
|
toLogProb = realToFrac . negate . log
|
|
|
|
cats = [(c', snd (mkContext [] cont), toLogProb (fromMaybe 0 (Map.lookup c' probs))) |
|
|
((m,c),AbsCat (Just (L _ cont))) <- adefs, let c' = i2i c]
|
|
|
|
funs = [(f', mkType [] ty, arity, bcode, toLogProb (fromMaybe 0 (Map.lookup f' funs_probs))) |
|
|
((m,f),AbsFun (Just (L _ ty)) ma mdef _) <- adefs,
|
|
let arity = mkArity ma mdef ty,
|
|
let bcode = mkDef gr arity mdef,
|
|
let f' = i2i f]
|
|
|
|
funs_probs = (Map.fromList . concat . Map.elems . fmap pad . Map.fromListWith (++))
|
|
[(i2i cat,[(i2i f,Map.lookup f' probs)]) | ((m,f),AbsFun (Just (L _ ty)) _ _ _) <- adefs,
|
|
let (_,(_,cat),_) = GM.typeForm ty,
|
|
let f' = i2i f]
|
|
where
|
|
pad :: [(a,Maybe Double)] -> [(a,Double)]
|
|
pad pfs = [(f,fromMaybe deflt mb_p) | (f,mb_p) <- pfs]
|
|
where
|
|
deflt = case length [f | (f,Nothing) <- pfs] of
|
|
0 -> 0
|
|
n -> max 0 ((1 - sum [d | (f,Just d) <- pfs]) / fromIntegral n)
|
|
{-
|
|
mkConcr opts abs (cm,ex_seqs,cdefs) =
|
|
let cflags = err (const noOptions) mflags (lookupModule gr cm)
|
|
ciCmp | flag optCaseSensitive cflags = compare
|
|
| otherwise = compareCaseInsensitive
|
|
|
|
flags = optionsPGF aflags
|
|
|
|
seqs = (mkSetArray . Set.fromList . concat) $
|
|
(elems (ex_seqs :: Array SeqId [Symbol]) : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm])
|
|
|
|
!(!fid_cnt1,!cnccats) = genCncCats gr am cm cdefs
|
|
cnccat_ranges = Map.fromList (map (\(cid,s,e,_) -> (cid,(s,e))) cnccats)
|
|
!(!fid_cnt2,!productions,!lindefs,!linrefs,!cncfuns)
|
|
= genCncFuns gr am cm ex_seqs ciCmp seqs cdefs fid_cnt1 cnccat_ranges
|
|
|
|
printnames = genPrintNames cdefs
|
|
|
|
startCat = (fromMaybe "S" (flag optStartCat aflags))
|
|
|
|
(lindefs',linrefs',productions',cncfuns',sequences',cnccats') =
|
|
(if flag optOptimizePGF opts then optimizePGF startCat else id)
|
|
(lindefs,linrefs,productions,cncfuns,elems seqs,cnccats)
|
|
|
|
in (mi2i cm, newConcr abs
|
|
flags
|
|
printnames
|
|
lindefs'
|
|
linrefs'
|
|
productions'
|
|
cncfuns'
|
|
sequences'
|
|
cnccats'
|
|
fid_cnt2)
|
|
|
|
getConcreteInfos gr am = mapM flatten (allConcretes gr am)
|
|
where
|
|
flatten cm = do
|
|
(seqs,infos) <- addMissingPMCFGs cm Map.empty
|
|
(lit_infos ++ Look.allOrigInfos gr cm)
|
|
return (cm,mkMapArray seqs :: Array SeqId [Symbol],infos)
|
|
|
|
lit_infos = [((cPredefAbs,c), CncCat (Just (L NoLoc GM.defLinType)) Nothing Nothing Nothing Nothing) | c <- [cInt,cFloat,cString]]
|
|
|
|
-- if some module was compiled with -no-pmcfg, then
|
|
-- we have to create the PMCFG code just before linking
|
|
addMissingPMCFGs cm seqs [] = return (seqs,[])
|
|
addMissingPMCFGs cm seqs (((m,id), info):is) = do
|
|
(seqs,info) <- addPMCFG opts gr cenv Nothing am cm seqs id info
|
|
(seqs,infos) <- addMissingPMCFGs cm seqs is
|
|
return (seqs, ((m,id), info) : infos)
|
|
-}
|
|
i2i :: Ident -> String
|
|
i2i = showIdent
|
|
|
|
mi2i :: ModuleName -> String
|
|
mi2i (MN i) = i2i i
|
|
|
|
mkType :: [Ident] -> A.Type -> PGF2.Type
|
|
mkType scope t =
|
|
case GM.typeForm t of
|
|
(hyps,(_,cat),args) -> let (scope',hyps') = mkContext scope hyps
|
|
in DTyp hyps' (i2i cat) (map (mkExp scope') args)
|
|
|
|
mkExp :: [Ident] -> A.Term -> Expr
|
|
mkExp scope t =
|
|
case t of
|
|
Q (_,c) -> EFun (i2i c)
|
|
QC (_,c) -> EFun (i2i c)
|
|
Vr x -> case lookup x (zip scope [0..]) of
|
|
Just i -> EVar i
|
|
Nothing -> EMeta 0
|
|
Abs b x t-> EAbs b (i2i x) (mkExp (x:scope) t)
|
|
App t1 t2-> EApp (mkExp scope t1) (mkExp scope t2)
|
|
EInt i -> ELit (LInt (fromIntegral i))
|
|
EFloat f -> ELit (LFlt f)
|
|
K s -> ELit (LStr s)
|
|
Meta i -> EMeta i
|
|
_ -> EMeta 0
|
|
{-
|
|
mkPatt scope p =
|
|
case p of
|
|
A.PP (_,c) ps->let (scope',ps') = mapAccumL mkPatt scope ps
|
|
in (scope',C.PApp (i2i c) ps')
|
|
A.PV x -> (x:scope,C.PVar (i2i x))
|
|
A.PAs x p -> let (scope',p') = mkPatt scope p
|
|
in (x:scope',C.PAs (i2i x) p')
|
|
A.PW -> ( scope,C.PWild)
|
|
A.PInt i -> ( scope,C.PLit (C.LInt (fromIntegral i)))
|
|
A.PFloat f -> ( scope,C.PLit (C.LFlt f))
|
|
A.PString s -> ( scope,C.PLit (C.LStr s))
|
|
A.PImplArg p-> let (scope',p') = mkPatt scope p
|
|
in (scope',C.PImplArg p')
|
|
A.PTilde t -> ( scope,C.PTilde (mkExp scope t))
|
|
-}
|
|
|
|
mkContext :: [Ident] -> A.Context -> ([Ident],[PGF2.Hypo])
|
|
mkContext scope hyps = mapAccumL (\scope (bt,x,ty) -> let ty' = mkType scope ty
|
|
in if x == identW
|
|
then ( scope,(bt,i2i x,ty'))
|
|
else (x:scope,(bt,i2i x,ty'))) scope hyps
|
|
|
|
mkDef gr arity (Just eqs) = generateByteCode gr arity eqs
|
|
mkDef gr arity Nothing = []
|
|
|
|
mkArity (Just a) _ ty = a -- known arity, i.e. defined function
|
|
mkArity Nothing (Just _) ty = 0 -- defined function with no arity - must be an axiom
|
|
mkArity Nothing _ ty = let (ctxt, _, _) = GM.typeForm ty -- constructor
|
|
in length ctxt
|
|
{-
|
|
genCncCats gr am cm cdefs = mkCncCats 0 cdefs
|
|
where
|
|
mkCncCats index [] = (index,[])
|
|
mkCncCats index (((m,id),CncCat (Just (L _ lincat)) _ _ _ _):cdefs)
|
|
| id == cInt =
|
|
let cc = pgfCncCat gr (i2i id) lincat fidInt
|
|
(index',cats) = mkCncCats index cdefs
|
|
in (index', cc : cats)
|
|
| id == cFloat =
|
|
let cc = pgfCncCat gr (i2i id) lincat fidFloat
|
|
(index',cats) = mkCncCats index cdefs
|
|
in (index', cc : cats)
|
|
| id == cString =
|
|
let cc = pgfCncCat gr (i2i id) lincat fidString
|
|
(index',cats) = mkCncCats index cdefs
|
|
in (index', cc : cats)
|
|
| otherwise =
|
|
let cc@(_, _s, e, _) = pgfCncCat gr (i2i id) lincat index
|
|
(index',cats) = mkCncCats (e+1) cdefs
|
|
in (index', cc : cats)
|
|
mkCncCats index (_ :cdefs) = mkCncCats index cdefs
|
|
|
|
genCncFuns :: Grammar
|
|
-> ModuleName
|
|
-> ModuleName
|
|
-> Array SeqId [Symbol]
|
|
-> ([Symbol] -> [Symbol] -> Ordering)
|
|
-> Array SeqId [Symbol]
|
|
-> [(QIdent, Info)]
|
|
-> FId
|
|
-> Map.Map PGF2.Cat (Int,Int)
|
|
-> (FId,
|
|
[(FId, [Production])],
|
|
[(FId, [FunId])],
|
|
[(FId, [FunId])],
|
|
[(PGF2.Fun,[SeqId])])
|
|
genCncFuns gr am cm ex_seqs ciCmp seqs cdefs fid_cnt cnccat_ranges =
|
|
let (fid_cnt1,funs_cnt1,funs1,lindefs,linrefs) = mkCncCats cdefs fid_cnt 0 [] IntMap.empty IntMap.empty
|
|
(fid_cnt2,funs_cnt2,funs2,prods0) = mkCncFuns cdefs fid_cnt1 funs_cnt1 funs1 lindefs Map.empty IntMap.empty
|
|
prods = [(fid,Set.toList prodSet) | (fid,prodSet) <- IntMap.toList prods0]
|
|
in (fid_cnt2,prods,IntMap.toList lindefs,IntMap.toList linrefs,reverse funs2)
|
|
where
|
|
mkCncCats [] fid_cnt funs_cnt funs lindefs linrefs =
|
|
(fid_cnt,funs_cnt,funs,lindefs,linrefs)
|
|
mkCncCats (((m,id),CncCat _ _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs linrefs =
|
|
let !funs_cnt' = let (s_funid, e_funid) = bounds funs0
|
|
in funs_cnt+(e_funid-s_funid+1)
|
|
lindefs' = foldl' (toLinDef (am,id) funs_cnt) lindefs prods0
|
|
linrefs' = foldl' (toLinRef (am,id) funs_cnt) linrefs prods0
|
|
funs' = foldl' (toCncFun funs_cnt (m,mkLinDefId id)) funs (assocs funs0)
|
|
in mkCncCats cdefs fid_cnt funs_cnt' funs' lindefs' linrefs'
|
|
mkCncCats (_ :cdefs) fid_cnt funs_cnt funs lindefs linrefs =
|
|
mkCncCats cdefs fid_cnt funs_cnt funs lindefs linrefs
|
|
|
|
mkCncFuns [] fid_cnt funs_cnt funs lindefs crc prods =
|
|
(fid_cnt,funs_cnt,funs,prods)
|
|
mkCncFuns (((m,id),CncFun _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs crc prods =
|
|
let ty_C = err error (\x -> x) $ fmap GM.typeForm (Look.lookupFunType gr am id)
|
|
!funs_cnt' = let (s_funid, e_funid) = bounds funs0
|
|
in funs_cnt+(e_funid-s_funid+1)
|
|
!(fid_cnt',crc',prods')
|
|
= foldl' (toProd lindefs ty_C funs_cnt)
|
|
(fid_cnt,crc,prods) prods0
|
|
funs' = foldl' (toCncFun funs_cnt (m,id)) funs (assocs funs0)
|
|
in mkCncFuns cdefs fid_cnt' funs_cnt' funs' lindefs crc' prods'
|
|
mkCncFuns (_ :cdefs) fid_cnt funs_cnt funs lindefs crc prods =
|
|
mkCncFuns cdefs fid_cnt funs_cnt funs lindefs crc prods
|
|
|
|
toProd lindefs (ctxt_C,res_C,_) offs st (A.Production fid0 funid0 args0) =
|
|
let !((fid_cnt,crc,prods),args) = mapAccumL mkArg st (zip ctxt_C args0)
|
|
set0 = Set.fromList (map (PApply (offs+funid0)) (sequence args))
|
|
fid = mkFId res_C fid0
|
|
!prods' = case IntMap.lookup fid prods of
|
|
Just set -> IntMap.insert fid (Set.union set0 set) prods
|
|
Nothing -> IntMap.insert fid set0 prods
|
|
in (fid_cnt,crc,prods')
|
|
where
|
|
mkArg st@(fid_cnt,crc,prods) ((_,_,ty),fid0s) =
|
|
case fid0s of
|
|
[fid0] -> (st,map (flip PArg (mkFId arg_C fid0)) ctxt)
|
|
fid0s -> case Map.lookup fids crc of
|
|
Just fid -> (st,map (flip PArg fid) ctxt)
|
|
Nothing -> let !crc' = Map.insert fids fid_cnt crc
|
|
!prods' = IntMap.insert fid_cnt (Set.fromList (map PCoerce fids)) prods
|
|
in ((fid_cnt+1,crc',prods'),map (flip PArg fid_cnt) ctxt)
|
|
where
|
|
(hargs_C,arg_C) = GM.catSkeleton ty
|
|
ctxt = mapM (mkCtxt lindefs) hargs_C
|
|
fids = map (mkFId arg_C) fid0s
|
|
|
|
mkLinDefId id = prefixIdent "lindef " id
|
|
|
|
toLinDef res offs lindefs (A.Production fid0 funid0 args) =
|
|
if args == [[fidVar]]
|
|
then IntMap.insertWith (++) fid [offs+funid0] lindefs
|
|
else lindefs
|
|
where
|
|
fid = mkFId res fid0
|
|
|
|
toLinRef res offs linrefs (A.Production fid0 funid0 [fargs]) =
|
|
if fid0 == fidVar
|
|
then foldr (\fid -> IntMap.insertWith (++) fid [offs+funid0]) linrefs fids
|
|
else linrefs
|
|
where
|
|
fids = map (mkFId res) fargs
|
|
|
|
mkFId (_,cat) fid0 =
|
|
case Map.lookup (i2i cat) cnccat_ranges of
|
|
Just (s,e) -> s+fid0
|
|
Nothing -> error ("GrammarToPGF.mkFId: missing category "++showIdent cat)
|
|
|
|
mkCtxt lindefs (_,cat) =
|
|
case Map.lookup (i2i cat) cnccat_ranges of
|
|
Just (s,e) -> [(fid,fid) | fid <- [s..e], Just _ <- [IntMap.lookup fid lindefs]]
|
|
Nothing -> error "GrammarToPGF.mkCtxt failed"
|
|
|
|
toCncFun offs (m,id) funs (funid0,lins0) =
|
|
let mseqs = case lookupModule gr m of
|
|
Ok (ModInfo{mseqs=Just mseqs}) -> mseqs
|
|
_ -> ex_seqs
|
|
in (i2i id, map (newIndex mseqs) (elems lins0)):funs
|
|
where
|
|
newIndex mseqs i = binSearch (mseqs ! i) seqs (bounds seqs)
|
|
|
|
binSearch v arr (i,j)
|
|
| i <= j = case ciCmp v (arr ! k) of
|
|
LT -> binSearch v arr (i,k-1)
|
|
EQ -> k
|
|
GT -> binSearch v arr (k+1,j)
|
|
| otherwise = error "binSearch"
|
|
where
|
|
k = (i+j) `div` 2
|
|
|
|
|
|
genPrintNames cdefs =
|
|
[(i2i id, name) | ((m,id),info) <- cdefs, name <- prn info]
|
|
where
|
|
prn (CncFun _ _ (Just (L _ tr)) _) = [flatten tr]
|
|
prn (CncCat _ _ _ (Just (L _ tr)) _) = [flatten tr]
|
|
prn _ = []
|
|
|
|
flatten (K s) = s
|
|
flatten (Alts x _) = flatten x
|
|
flatten (C x y) = flatten x +++ flatten y
|
|
|
|
mkArray lst = listArray (0,length lst-1) lst
|
|
mkMapArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
|
|
mkSetArray set = listArray (0,Set.size set-1) (Set.toList set)
|
|
|
|
-- The following is a version of Data.List.sortBy which together
|
|
-- with the sorting also eliminates duplicate values
|
|
sortNubBy cmp = mergeAll . sequences
|
|
where
|
|
sequences (a:b:xs) =
|
|
case cmp a b of
|
|
GT -> descending b [a] xs
|
|
EQ -> sequences (b:xs)
|
|
LT -> ascending b (a:) xs
|
|
sequences xs = [xs]
|
|
|
|
descending a as [] = [a:as]
|
|
descending a as (b:bs) =
|
|
case cmp a b of
|
|
GT -> descending b (a:as) bs
|
|
EQ -> descending a as bs
|
|
LT -> (a:as) : sequences (b:bs)
|
|
|
|
ascending a as [] = let !x = as [a]
|
|
in [x]
|
|
ascending a as (b:bs) =
|
|
case cmp a b of
|
|
GT -> let !x = as [a]
|
|
in x : sequences (b:bs)
|
|
EQ -> ascending a as bs
|
|
LT -> ascending b (\ys -> as (a:ys)) bs
|
|
|
|
mergeAll [x] = x
|
|
mergeAll xs = mergeAll (mergePairs xs)
|
|
|
|
mergePairs (a:b:xs) = let !x = merge a b
|
|
in x : mergePairs xs
|
|
mergePairs xs = xs
|
|
|
|
merge as@(a:as') bs@(b:bs') =
|
|
case cmp a b of
|
|
GT -> b:merge as bs'
|
|
EQ -> a:merge as' bs'
|
|
LT -> a:merge as' bs
|
|
merge [] bs = bs
|
|
merge as [] = as
|
|
|
|
-- The following function does case-insensitive comparison of sequences.
|
|
-- This is used to allow case-insensitive parsing, while
|
|
-- the linearizer still has access to the original cases.
|
|
|
|
compareCaseInsensitive [] [] = EQ
|
|
compareCaseInsensitive [] _ = LT
|
|
compareCaseInsensitive _ [] = GT
|
|
compareCaseInsensitive (x:xs) (y:ys) =
|
|
case compareSym x y of
|
|
EQ -> compareCaseInsensitive xs ys
|
|
x -> x
|
|
where
|
|
compareSym s1 s2 =
|
|
case s1 of
|
|
SymCat d1 r1
|
|
-> case s2 of
|
|
SymCat d2 r2
|
|
-> case compare d1 d2 of
|
|
EQ -> r1 `compare` r2
|
|
x -> x
|
|
_ -> LT
|
|
SymLit d1 r1
|
|
-> case s2 of
|
|
SymCat {} -> GT
|
|
SymLit d2 r2
|
|
-> case compare d1 d2 of
|
|
EQ -> r1 `compare` r2
|
|
x -> x
|
|
_ -> LT
|
|
SymVar d1 r1
|
|
-> if tagToEnum# (getTag s2 ># 2#)
|
|
then LT
|
|
else case s2 of
|
|
SymVar d2 r2
|
|
-> case compare d1 d2 of
|
|
EQ -> r1 `compare` r2
|
|
x -> x
|
|
_ -> GT
|
|
SymKS t1
|
|
-> if tagToEnum# (getTag s2 ># 3#)
|
|
then LT
|
|
else case s2 of
|
|
SymKS t2 -> t1 `compareToken` t2
|
|
_ -> GT
|
|
SymKP a1 b1
|
|
-> if tagToEnum# (getTag s2 ># 4#)
|
|
then LT
|
|
else case s2 of
|
|
SymKP a2 b2
|
|
-> case compare a1 a2 of
|
|
EQ -> b1 `compare` b2
|
|
x -> x
|
|
_ -> GT
|
|
_ -> let t1 = getTag s1
|
|
t2 = getTag s2
|
|
in if tagToEnum# (t1 <# t2)
|
|
then LT
|
|
else if tagToEnum# (t1 ==# t2)
|
|
then EQ
|
|
else GT
|
|
|
|
compareToken [] [] = EQ
|
|
compareToken [] _ = LT
|
|
compareToken _ [] = GT
|
|
compareToken (x:xs) (y:ys)
|
|
| x == y = compareToken xs ys
|
|
| otherwise = case compare (toLower x) (toLower y) of
|
|
EQ -> case compareToken xs ys of
|
|
EQ -> compare x y
|
|
x -> x
|
|
x -> x
|
|
-}
|