mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
the new optimized incremental parser and the common subexpression elimination optimization in PMCFG
This commit is contained in:
@@ -40,7 +40,6 @@ exportPGF opts fmt pgf =
|
||||
FmtProlog_Abs -> multi "pl" grammar2prolog_abs
|
||||
FmtBNF -> single "bnf" bnfPrinter
|
||||
FmtEBNF -> single "ebnf" (ebnfPrinter opts)
|
||||
FmtFCFG -> single "fcfg" fcfgPrinter
|
||||
FmtSRGS_XML -> single "grxml" (srgsXmlPrinter opts)
|
||||
FmtSRGS_XML_NonRec -> single "grxml" (srgsXmlNonRecursivePrinter opts)
|
||||
FmtSRGS_ABNF -> single "gram" (srgsAbnfPrinter opts)
|
||||
|
||||
@@ -11,11 +11,13 @@ import GF.Data.ErrM
|
||||
import GF.Infra.Option
|
||||
|
||||
import Control.Monad (mplus)
|
||||
import Data.Array (Array)
|
||||
import qualified Data.Array as Array
|
||||
import Data.Array.Unboxed (UArray)
|
||||
import qualified Data.Array.IArray as Array
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.IntMap as IntMap
|
||||
|
||||
pgf2js :: PGF -> String
|
||||
pgf2js pgf =
|
||||
@@ -89,31 +91,44 @@ children = JS.Ident "cs"
|
||||
-- Parser
|
||||
parser2js :: String -> ParserInfo -> [JS.Expr]
|
||||
parser2js start p = [new "Parser" [JS.EStr start,
|
||||
JS.EArray $ map frule2js (Array.elems (allRules p)),
|
||||
JS.EObj $ map cats (Map.assocs (startupCats p))]]
|
||||
JS.EArray $ [frule2js p cat prod | (cat,set) <- IntMap.toList (productions p), prod <- Set.toList set],
|
||||
JS.EObj $ map cats (Map.assocs (startCats p))]]
|
||||
where
|
||||
cats (c,is) = JS.Prop (JS.IdentPropName (JS.Ident (prCId c))) (JS.EArray (map JS.EInt is))
|
||||
|
||||
frule2js :: FRule -> JS.Expr
|
||||
frule2js (FRule f ps args res lins) = new "Rule" [JS.EInt res, name2js (f,ps), JS.EArray (map JS.EInt args), lins2js lins]
|
||||
frule2js :: ParserInfo -> FCat -> Production -> JS.Expr
|
||||
frule2js p res (FApply funid args) = new "Rule" [JS.EInt res, name2js (f,ps), JS.EArray (map JS.EInt args), lins2js p lins]
|
||||
where
|
||||
FFun f ps lins = functions p Array.! funid
|
||||
frule2js p res (FCoerce arg) = new "Rule" [JS.EInt res, daughter 0, JS.EArray [JS.EInt arg], JS.EArray [JS.EArray [sym2js (FSymCat 0 i)] | i <- [0..catLinArity arg-1]]]
|
||||
where
|
||||
catLinArity :: FCat -> Int
|
||||
catLinArity c = maximum (1:[Array.rangeSize (Array.bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c])
|
||||
|
||||
topdownRules cat = f cat []
|
||||
where
|
||||
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions p))
|
||||
|
||||
g (FApply funid args) rules = (functions p Array.! funid,args) : rules
|
||||
g (FCoerce cat) rules = f cat rules
|
||||
|
||||
|
||||
name2js :: (CId,[Profile]) -> JS.Expr
|
||||
name2js (f,ps) | f == wildCId = fromProfile (head ps)
|
||||
| otherwise = new "FunApp" $ [JS.EStr $ prCId f, JS.EArray (map fromProfile ps)]
|
||||
name2js (f,ps) = new "FunApp" $ [JS.EStr $ prCId f, JS.EArray (map fromProfile ps)]
|
||||
where
|
||||
fromProfile :: Profile -> JS.Expr
|
||||
fromProfile [] = new "MetaVar" []
|
||||
fromProfile [x] = daughter x
|
||||
fromProfile args = new "Unify" [JS.EArray (map daughter args)]
|
||||
|
||||
daughter i = new "Arg" [JS.EInt i]
|
||||
daughter i = new "Arg" [JS.EInt i]
|
||||
|
||||
lins2js :: Array FIndex (Array FPointPos FSymbol) -> JS.Expr
|
||||
lins2js ls = JS.EArray [ JS.EArray [ sym2js s | s <- Array.elems l] | l <- Array.elems ls]
|
||||
lins2js :: ParserInfo -> UArray FIndex SeqId -> JS.Expr
|
||||
lins2js p ls = JS.EArray [JS.EArray [sym2js s | s <- Array.elems (sequences p Array.! seqid)] | seqid <- Array.elems ls]
|
||||
|
||||
sym2js :: FSymbol -> JS.Expr
|
||||
sym2js (FSymCat l n) = new "ArgProj" [JS.EInt n, JS.EInt l]
|
||||
sym2js (FSymTok t) = new "Terminal" [JS.EStr t]
|
||||
sym2js (FSymCat n l) = new "ArgProj" [JS.EInt n, JS.EInt l]
|
||||
sym2js (FSymTok (KS t)) = new "Terminal" [JS.EStr t]
|
||||
|
||||
new :: String -> [JS.Expr] -> JS.Expr
|
||||
new f xs = JS.ENew (JS.Ident f) xs
|
||||
|
||||
@@ -25,17 +25,18 @@ import GF.Data.SortedList
|
||||
import GF.Data.Utilities (updateNthM, sortNub)
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.List as List
|
||||
import qualified Data.ByteString.Char8 as BS
|
||||
import Data.Array
|
||||
import Data.Array.IArray
|
||||
import Data.Maybe
|
||||
import Control.Monad
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- main conversion function
|
||||
|
||||
convertConcrete :: Abstr -> Concr -> FGrammar
|
||||
convertConcrete :: Abstr -> Concr -> ParserInfo
|
||||
convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats'
|
||||
where abs_defs = Map.assocs (funs abs)
|
||||
conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
|
||||
@@ -91,14 +92,14 @@ expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns,
|
||||
|
||||
-- replaces __NCat with _B and _Var_Cat with _.
|
||||
-- the temporary names are just there to avoid name collisions.
|
||||
fixHoasFuns :: FGrammar -> FGrammar
|
||||
fixHoasFuns (rs, cs) = ([FRule (fixName n) ps args cat lins | FRule n ps args cat lins <- rs], cs)
|
||||
fixHoasFuns :: ParserInfo -> ParserInfo
|
||||
fixHoasFuns pinfo = pinfo{functions=mkArray [FFun (fixName n) prof lins | FFun n prof lins <- elems (functions pinfo)]}
|
||||
where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B")
|
||||
| BS.pack "_Var_" `BS.isPrefixOf` n = wildCId
|
||||
fixName n = n
|
||||
|
||||
convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> FGrammar
|
||||
convert abs_defs cnc_defs cat_defs = getFGrammar (loop frulesEnv)
|
||||
convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> ParserInfo
|
||||
convert abs_defs cnc_defs cat_defs = getParserInfo (loop grammarEnv)
|
||||
where
|
||||
srules = [
|
||||
(XRule id args res (map findLinType args) (findLinType res) term) |
|
||||
@@ -107,26 +108,26 @@ convert abs_defs cnc_defs cat_defs = getFGrammar (loop frulesEnv)
|
||||
|
||||
findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
|
||||
|
||||
(xrulesMap,frulesEnv) = List.foldl' helper (Map.empty,emptyFRulesEnv) srules
|
||||
(xrulesMap,grammarEnv) = List.foldl' helper (Map.empty,emptyFFunsEnv) srules
|
||||
where
|
||||
helper (xrulesMap,frulesEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) =
|
||||
helper (xrulesMap,grammarEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) =
|
||||
let xrulesMap' = Map.insertWith (++) abs_res [rule] xrulesMap
|
||||
frulesEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env)
|
||||
frulesEnv
|
||||
grammarEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env)
|
||||
grammarEnv
|
||||
(mkSingletonSelectors cnc_defs cnc_res)
|
||||
in xrulesMap' `seq` frulesEnv' `seq` (xrulesMap',frulesEnv')
|
||||
in xrulesMap' `seq` grammarEnv' `seq` (xrulesMap',grammarEnv')
|
||||
|
||||
loop frulesEnv =
|
||||
let (todo, frulesEnv') = takeToDoRules xrulesMap frulesEnv
|
||||
loop grammarEnv =
|
||||
let (todo, grammarEnv') = takeToDoRules xrulesMap grammarEnv
|
||||
in case todo of
|
||||
[] -> frulesEnv'
|
||||
[] -> grammarEnv'
|
||||
_ -> loop $! List.foldl' (\env (srules,selector) ->
|
||||
List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) frulesEnv' todo
|
||||
List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) grammarEnv' todo
|
||||
|
||||
convertRule :: TermMap -> TermSelector -> XRule -> FRulesEnv -> FRulesEnv
|
||||
convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv =
|
||||
convertRule :: TermMap -> TermSelector -> XRule -> GrammarEnv -> GrammarEnv
|
||||
convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) grammarEnv =
|
||||
foldBM addRule
|
||||
frulesEnv
|
||||
grammarEnv
|
||||
(convertTerm cnc_defs selector term [([],[])])
|
||||
(protoFCat cat, map (\scat -> (protoFCat scat,[])) args, ctype, ctypes)
|
||||
where
|
||||
@@ -137,9 +138,10 @@ convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv =
|
||||
(env1, xargs1) = List.mapAccumL (genFCatArg cnc_defs ctype) env xargs
|
||||
in case xcat of
|
||||
PFCat _ [] _ -> (env , args, all_args)
|
||||
_ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args)) (env1,[],[]) (zip3 newArgs' ctypes [0..])
|
||||
_ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args))
|
||||
(env1,[],[]) (zip3 newArgs' ctypes [0..])
|
||||
|
||||
newLinRec = listArray (0,length linRec-1) [translateLin idxArgs path linRec | path <- case newCat' of {PFCat _ rcs _ -> rcs}]
|
||||
(env3,newLinRec) = List.mapAccumL (translateLin idxArgs linRec) env2 (case newCat' of {PFCat _ rcs _ -> rcs})
|
||||
|
||||
(_,newProfile) = List.mapAccumL accumProf 0 newArgs'
|
||||
where
|
||||
@@ -147,18 +149,19 @@ convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv =
|
||||
accumProf nr (_ ,xpaths) = (nr+cnt+1, [nr..nr+cnt])
|
||||
where cnt = length xpaths
|
||||
|
||||
rule = FRule fun newProfile newArgs newCat newLinRec
|
||||
in addFRule env2 rule
|
||||
(env4,funid) = addFFun env3 (FFun fun newProfile (mkArray newLinRec))
|
||||
|
||||
translateLin idxArgs lbl' [] = array (0,-1) []
|
||||
translateLin idxArgs lbl' ((lbl,syms) : lins)
|
||||
| lbl' == lbl = listArray (0,length syms-1) (map instSym syms)
|
||||
| otherwise = translateLin idxArgs lbl' lins
|
||||
in addProduction env4 newCat (FApply funid newArgs)
|
||||
|
||||
translateLin idxArgs [] grammarEnv lbl' = error "translateLin"
|
||||
translateLin idxArgs ((lbl,syms) : lins) grammarEnv lbl'
|
||||
| lbl' == lbl = addFSeq grammarEnv (lbl,map instSym syms)
|
||||
| otherwise = translateLin idxArgs lins grammarEnv lbl'
|
||||
where
|
||||
instSym = either (\(lbl, nr, xnr) -> instCat lbl nr xnr 0 idxArgs) FSymTok
|
||||
instCat lbl nr xnr nr' ((idx,xargs):idxArgs)
|
||||
| nr == idx = let (fcat, PFCat _ rcs _) = xargs !! xnr
|
||||
in FSymCat (index lbl rcs 0) (nr'+xnr)
|
||||
in FSymCat (nr'+xnr) (index lbl rcs 0)
|
||||
| otherwise = instCat lbl nr xnr (nr'+length xargs) idxArgs
|
||||
|
||||
index lbl' (lbl:lbls) idx
|
||||
@@ -173,7 +176,7 @@ type CnvMonad a = BacktrackM Env a
|
||||
|
||||
type FPath = [FIndex]
|
||||
type Env = (ProtoFCat, [(ProtoFCat,[FPath])], Term, [Term])
|
||||
type LinRec = [(FPath, [Either (FPath, FIndex, Int) FToken])]
|
||||
type LinRec = [(FPath, [Either (FPath, FIndex, Int) Tokn])]
|
||||
|
||||
type TermMap = Map.Map CId Term
|
||||
|
||||
@@ -190,11 +193,11 @@ convertTerm cnc_defs selector (S ts) ((lbl_path,lin) : lins) = do projectH
|
||||
foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts)
|
||||
convertTerm cnc_defs selector (K (KS str)) ((lbl_path,lin) : lins) =
|
||||
do projectHead lbl_path
|
||||
return ((lbl_path,Right str : lin) : lins)
|
||||
return ((lbl_path,Right (KS str) : lin) : lins)
|
||||
convertTerm cnc_defs selector (K (KP strs vars))((lbl_path,lin) : lins) =
|
||||
do projectHead lbl_path
|
||||
toks <- member (strs:[strs' | Alt strs' _ <- vars])
|
||||
return ((lbl_path, map Right toks ++ lin) : lins)
|
||||
return ((lbl_path, map (Right . KS) toks ++ lin) : lins)
|
||||
convertTerm cnc_defs selector (F id) lins = do term <- Map.lookup id cnc_defs
|
||||
convertTerm cnc_defs selector term lins
|
||||
convertTerm cnc_defs selector (W s t) ((lbl_path,lin) : lins) = do
|
||||
@@ -273,75 +276,105 @@ selectTerm (index:path) (R record) = selectTerm path (record !! index)
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- FRulesEnv
|
||||
-- GrammarEnv
|
||||
|
||||
data FRulesEnv = FRulesEnv {-# UNPACK #-} !Int FCatSet [FRule]
|
||||
|
||||
data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int FCatSet FSeqSet FFunSet (IntMap.IntMap (Set.Set Production))
|
||||
type FCatSet = Map.Map CId (Map.Map [FPath] (Map.Map [(FPath,FIndex)] (Either FCat FCat)))
|
||||
type FSeqSet = Map.Map FSeq SeqId
|
||||
type FFunSet = Map.Map FFun FunId
|
||||
|
||||
data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)]
|
||||
|
||||
protoFCat :: CId -> ProtoFCat
|
||||
protoFCat cat = PFCat cat [] []
|
||||
|
||||
emptyFRulesEnv = FRulesEnv 0 (ins fcatString (mkCId "String") [[0]] [] $
|
||||
ins fcatInt (mkCId "Int") [[0]] [] $
|
||||
ins fcatFloat (mkCId "Float") [[0]] [] $
|
||||
ins fcatVar (mkCId "_Var") [[0]] [] $
|
||||
Map.empty) []
|
||||
emptyFFunsEnv = GrammarEnv 0 initFCatSet Map.empty Map.empty IntMap.empty
|
||||
where
|
||||
ins fcat cat rcs tcs fcatSet =
|
||||
Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s fcatSet
|
||||
initFCatSet = (ins fcatString (mkCId "String") [[0]] [] $
|
||||
ins fcatInt (mkCId "Int") [[0]] [] $
|
||||
ins fcatFloat (mkCId "Float") [[0]] [] $
|
||||
ins fcatVar (mkCId "_Var") [[0]] [] $
|
||||
Map.empty)
|
||||
|
||||
ins fcat cat rcs tcs catSet =
|
||||
Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet
|
||||
where
|
||||
right_fcat = Right fcat
|
||||
tmap_s = Map.singleton tcs right_fcat
|
||||
rmap_s = Map.singleton rcs tmap_s
|
||||
|
||||
addFRule :: FRulesEnv -> FRule -> FRulesEnv
|
||||
addFRule (FRulesEnv last_id fcatSet rules) rule = FRulesEnv last_id fcatSet (rule:rules)
|
||||
addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv
|
||||
addProduction (GrammarEnv last_id catSet seqSet funSet prodSet) cat p =
|
||||
GrammarEnv last_id catSet seqSet funSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet)
|
||||
|
||||
getFGrammar :: FRulesEnv -> FGrammar
|
||||
getFGrammar (FRulesEnv last_id fcatSet rules) = (rules, Map.map getFCatList fcatSet)
|
||||
addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId)
|
||||
addFSeq env@(GrammarEnv last_id catSet seqSet funSet prodSet) (_,lst) =
|
||||
case Map.lookup seq seqSet of
|
||||
Just id -> (env,id)
|
||||
Nothing -> let !last_seq = Map.size seqSet
|
||||
in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet prodSet,last_seq)
|
||||
where
|
||||
seq = mkArray lst
|
||||
|
||||
addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId)
|
||||
addFFun env@(GrammarEnv last_id catSet seqSet funSet prodSet) fun =
|
||||
case Map.lookup fun funSet of
|
||||
Just id -> (env,id)
|
||||
Nothing -> let !last_funid = Map.size funSet
|
||||
in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) prodSet,last_funid)
|
||||
|
||||
getParserInfo :: GrammarEnv -> ParserInfo
|
||||
getParserInfo (GrammarEnv last_id catSet seqSet funSet prodSet) =
|
||||
ParserInfo { functions = mkArray funSet
|
||||
, sequences = mkArray seqSet
|
||||
, productions = prodSet
|
||||
, startCats = Map.map getFCatList catSet
|
||||
}
|
||||
where
|
||||
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
|
||||
|
||||
getFCatList rcs = Map.fold (\tcs lst -> Map.fold (\x lst -> either id id x : lst) lst tcs) [] rcs
|
||||
|
||||
genFCatHead :: FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat)
|
||||
genFCatHead env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs) =
|
||||
case Map.lookup cat fcatSet >>= Map.lookup rcs >>= Map.lookup tcs of
|
||||
Just (Left fcat) -> (FRulesEnv last_id (ins fcat) rules, fcat)
|
||||
|
||||
genFCatHead :: GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat)
|
||||
genFCatHead env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) =
|
||||
case Map.lookup cat catSet >>= Map.lookup rcs >>= Map.lookup tcs of
|
||||
Just (Left fcat) -> (GrammarEnv last_id (ins fcat) seqSet funSet prodSet, fcat)
|
||||
Just (Right fcat) -> (env, fcat)
|
||||
Nothing -> let fcat = last_id+1
|
||||
in (FRulesEnv fcat (ins fcat) rules, fcat)
|
||||
in (GrammarEnv fcat (ins fcat) seqSet funSet prodSet, fcat)
|
||||
where
|
||||
ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s fcatSet
|
||||
ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet
|
||||
where
|
||||
right_fcat = Right fcat
|
||||
tmap_s = Map.singleton tcs right_fcat
|
||||
rmap_s = Map.singleton rcs tmap_s
|
||||
|
||||
genFCatArg :: TermMap -> Term -> FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat)
|
||||
genFCatArg cnc_defs ctype env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs) =
|
||||
case Map.lookup cat fcatSet >>= Map.lookup rcs of
|
||||
genFCatArg :: TermMap -> Term -> GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat)
|
||||
genFCatArg cnc_defs ctype env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) =
|
||||
case Map.lookup cat catSet >>= Map.lookup rcs of
|
||||
Just tmap -> case Map.lookup tcs tmap of
|
||||
Just (Left fcat) -> (env, fcat)
|
||||
Just (Right fcat) -> (env, fcat)
|
||||
Just (Left fcat) -> (env, fcat)
|
||||
Just (Right fcat) -> (env, fcat)
|
||||
Nothing -> ins tmap
|
||||
Nothing -> ins Map.empty
|
||||
where
|
||||
ins tmap =
|
||||
let fcat = last_id+1
|
||||
(either_fcat,last_id1,tmap1,rules1)
|
||||
= foldBM (\tcs st (either_fcat,last_id,tmap,rules) ->
|
||||
(either_fcat,last_id1,tmap1,prodSet1)
|
||||
= foldBM (\tcs st (either_fcat,last_id,tmap,prodSet) ->
|
||||
let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap
|
||||
rule = FRule wildCId [[0]] [fcat_arg] fcat
|
||||
(listArray (0,length rcs-1) [listArray (0,0) [FSymCat lbl 0] | lbl <- [0..length rcs-1]])
|
||||
p = FCoerce fcat_arg
|
||||
prodSet1 = IntMap.insertWith Set.union fcat (Set.singleton p) prodSet
|
||||
in if st
|
||||
then (Right fcat, last_id1,tmap1,rule:rules)
|
||||
else (either_fcat,last_id, tmap, rules))
|
||||
(Left fcat,fcat,Map.insert tcs either_fcat tmap,rules)
|
||||
then (Right fcat, last_id1,tmap1,prodSet1)
|
||||
else (either_fcat,last_id, tmap ,prodSet ))
|
||||
(Left fcat,fcat,Map.insert tcs either_fcat tmap,prodSet)
|
||||
(gen_tcs ctype [] [])
|
||||
False
|
||||
rmap1 = Map.singleton rcs tmap1
|
||||
in (FRulesEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 fcatSet) rules1, fcat)
|
||||
in (GrammarEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 catSet) seqSet funSet prodSet1, fcat)
|
||||
where
|
||||
addArg tcs last_id tmap =
|
||||
case Map.lookup tcs tmap of
|
||||
@@ -380,10 +413,11 @@ data XRule = XRule CId {- function -}
|
||||
Term {- result lin-type representation -}
|
||||
Term {- body -}
|
||||
|
||||
takeToDoRules :: XRulesMap -> FRulesEnv -> ([([XRule], TermSelector)], FRulesEnv)
|
||||
takeToDoRules xrulesMap (FRulesEnv last_id fcatSet rules) = (todo,FRulesEnv last_id fcatSet' rules)
|
||||
takeToDoRules :: XRulesMap -> GrammarEnv -> ([([XRule], TermSelector)], GrammarEnv)
|
||||
takeToDoRules xrulesMap (GrammarEnv last_id catSet seqSet funSet prodSet) =
|
||||
(todo,GrammarEnv last_id catSet' seqSet funSet prodSet)
|
||||
where
|
||||
(todo,fcatSet') =
|
||||
(todo,catSet') =
|
||||
Map.mapAccumWithKey (\todo cat rmap ->
|
||||
let (todo1,rmap1) = Map.mapAccumWithKey (\todo rcs tmap ->
|
||||
let (tcss,tmap') = Map.mapAccumWithKey (\tcss tcs either_xcat ->
|
||||
@@ -398,7 +432,7 @@ takeToDoRules xrulesMap (FRulesEnv last_id fcatSet rules) = (todo,FRulesEnv last
|
||||
|
||||
in case mb_srules of
|
||||
Just srules -> (todo1,rmap1)
|
||||
Nothing -> (todo ,rmap1)) [] fcatSet
|
||||
Nothing -> (todo ,rmap1)) [] catSet
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
@@ -524,3 +558,5 @@ projectProtoFCat path0 (PFCat cat rcs tcs) = do
|
||||
| path0 > path = path : addConstraint rcs
|
||||
| path0 == path = path : rcs
|
||||
addConstraint rcs = path0 : rcs
|
||||
|
||||
mkArray lst = listArray (0,length lst-1) lst
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
{-# OPTIONS -fbang-patterns #-}
|
||||
{-# OPTIONS -fbang-patterns -cpp #-}
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : Krasimir Angelov
|
||||
@@ -12,14 +12,12 @@
|
||||
-- the conversion is only equivalent if the GFC grammar has a context-free backbone.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
|
||||
module GF.Compile.GeneratePMCFG
|
||||
(convertConcrete) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Macros --hiding (prt)
|
||||
import PGF.Parsing.FCFG.Utilities
|
||||
|
||||
import GF.Data.BacktrackM
|
||||
import GF.Data.SortedList
|
||||
@@ -28,8 +26,9 @@ import GF.Data.Utilities (updateNthM, sortNub)
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.List as List
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.ByteString.Char8 as BS
|
||||
import Data.Array
|
||||
import Data.Array.IArray
|
||||
import Data.Maybe
|
||||
import Control.Monad
|
||||
import Debug.Trace
|
||||
@@ -37,7 +36,7 @@ import Debug.Trace
|
||||
----------------------------------------------------------------------
|
||||
-- main conversion function
|
||||
|
||||
convertConcrete :: Abstr -> Concr -> FGrammar
|
||||
convertConcrete :: Abstr -> Concr -> ParserInfo
|
||||
convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats'
|
||||
where abs_defs = Map.assocs (funs abs)
|
||||
conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
|
||||
@@ -93,14 +92,14 @@ expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns,
|
||||
|
||||
-- replaces __NCat with _B and _Var_Cat with _.
|
||||
-- the temporary names are just there to avoid name collisions.
|
||||
fixHoasFuns :: FGrammar -> FGrammar
|
||||
fixHoasFuns (!rs, !cs) = ([FRule (fixName n) ps args cat lins | FRule n ps args cat lins <- rs], cs)
|
||||
fixHoasFuns :: ParserInfo -> ParserInfo
|
||||
fixHoasFuns pinfo = pinfo{functions=mkArray [FFun (fixName n) prof lins | FFun n prof lins <- elems (functions pinfo)]}
|
||||
where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B")
|
||||
| BS.pack "_Var_" `BS.isPrefixOf` n = wildCId
|
||||
fixName n = n
|
||||
|
||||
convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> FGrammar
|
||||
convert abs_defs cnc_defs cat_defs = getFGrammar (List.foldl' (convertRule cnc_defs) emptyFRulesEnv srules)
|
||||
convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> ParserInfo
|
||||
convert abs_defs cnc_defs cat_defs = getParserInfo (List.foldl' (convertRule cnc_defs) (emptyFRulesEnv cnc_defs cat_defs) srules)
|
||||
where
|
||||
srules = [
|
||||
(XRule id args res (map findLinType args) (findLinType res) term) |
|
||||
@@ -109,23 +108,40 @@ convert abs_defs cnc_defs cat_defs = getFGrammar (List.foldl' (convertRule cnc_d
|
||||
|
||||
findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
|
||||
|
||||
brk :: (GrammarEnv -> GrammarEnv) -> (GrammarEnv -> GrammarEnv)
|
||||
brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
|
||||
case f (GrammarEnv last_id catSet seqSet funSet crcSet IntMap.empty) of
|
||||
(GrammarEnv last_id catSet seqSet funSet crcSet topdown1) -> IntMap.foldWithKey optimize (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) topdown1
|
||||
where
|
||||
optimize cat ps env = IntMap.foldWithKey ff env (IntMap.fromListWith (++) [(funid,[args]) | FApply funid args <- Set.toList ps])
|
||||
where
|
||||
ff :: FunId -> [[FCat]] -> GrammarEnv -> GrammarEnv
|
||||
ff funid xs env
|
||||
| product (map Set.size ys) == count =
|
||||
case List.mapAccumL (\env c -> addFCoercion env (Set.toList c)) env ys of
|
||||
(env,args) -> addProduction env cat (FApply funid args)
|
||||
| otherwise = List.foldl (\env args -> addProduction env cat (FApply funid args)) env xs
|
||||
where
|
||||
count = length xs
|
||||
ys = foldr (zipWith Set.insert) (repeat Set.empty) xs
|
||||
|
||||
convertRule :: TermMap -> FRulesEnv -> XRule -> FRulesEnv
|
||||
convertRule cnc_defs frulesEnv (XRule fun args cat ctypes ctype term) =
|
||||
foldBM addRule
|
||||
frulesEnv
|
||||
(convertTerm cnc_defs [] ctype term [([],[])])
|
||||
(protoFCat cnc_defs cat ctype, zipWith (protoFCat cnc_defs) args ctypes)
|
||||
convertRule :: TermMap -> GrammarEnv -> XRule -> GrammarEnv
|
||||
convertRule cnc_defs grammarEnv (XRule fun args cat ctypes ctype term) = trace (show fun) $
|
||||
brk (\grammarEnv -> foldBM addRule
|
||||
grammarEnv
|
||||
(convertTerm cnc_defs [] ctype term [([],[])])
|
||||
(protoFCat cnc_defs cat ctype, zipWith (protoFCat cnc_defs) args ctypes)) grammarEnv
|
||||
where
|
||||
addRule linRec (newCat', newArgs') env0 =
|
||||
let (env1, newCat) = genFCatHead env0 newCat'
|
||||
(env2, newArgs) = List.mapAccumL (genFCatArg cnc_defs) env1 newArgs'
|
||||
let [newCat] = getFCats env0 newCat'
|
||||
(env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs'
|
||||
|
||||
newLinRec = mkArray (map (mkArray . snd) linRec)
|
||||
mkArray lst = listArray (0,length lst-1) lst
|
||||
(env2,lins) = List.mapAccumL addFSeq env1 linRec
|
||||
newLinRec = mkArray lins
|
||||
|
||||
rule = FRule fun [] newArgs newCat newLinRec
|
||||
in addFRule env2 rule
|
||||
(env3,funid) = addFFun env2 (FFun fun [[n] | n <- [0..length newArgs-1]] newLinRec)
|
||||
|
||||
in addProduction env3 newCat (FApply funid newArgs)
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- term conversion
|
||||
@@ -133,7 +149,7 @@ convertRule cnc_defs frulesEnv (XRule fun args cat ctypes ctype term) =
|
||||
type CnvMonad a = BacktrackM Env a
|
||||
|
||||
type FPath = [FIndex]
|
||||
data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)] Term
|
||||
data ProtoFCat = PFCat CId [FPath] [(FPath,[FIndex])]
|
||||
type Env = (ProtoFCat, [ProtoFCat])
|
||||
type LinRec = [(FPath, [FSymbol])]
|
||||
data XRule = XRule CId {- function -}
|
||||
@@ -144,7 +160,16 @@ data XRule = XRule CId {- function -}
|
||||
Term {- body -}
|
||||
|
||||
protoFCat :: TermMap -> CId -> Term -> ProtoFCat
|
||||
protoFCat cnc_defs cat ctype = PFCat cat (getRCS cnc_defs ctype) [] ctype
|
||||
protoFCat cnc_defs cat ctype =
|
||||
let (rcs,tcs) = loop [] [] [] ctype
|
||||
in PFCat cat rcs tcs
|
||||
where
|
||||
loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record)
|
||||
loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs)
|
||||
loop path rcs tcs (S _) = (path:rcs, tcs)
|
||||
loop path rcs tcs (F id) = case Map.lookup id cnc_defs of
|
||||
Just term -> loop path rcs tcs term
|
||||
Nothing -> error ("unknown identifier: "++show id)
|
||||
|
||||
type TermMap = Map.Map CId Term
|
||||
|
||||
@@ -156,11 +181,12 @@ convertTerm cnc_defs sel ctype (P term p) lins = do nr <- e
|
||||
convertTerm cnc_defs (nr:sel) ctype term lins
|
||||
convertTerm cnc_defs sel ctype (FV vars) lins = do term <- member vars
|
||||
convertTerm cnc_defs sel ctype term lins
|
||||
convertTerm cnc_defs sel ctype (S ts) ((lbl_path,lin) : lins) = foldM (\lins t -> convertTerm cnc_defs sel ctype t lins) ((lbl_path,lin) : lins) (reverse ts)
|
||||
convertTerm cnc_defs sel ctype (K (KS str)) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok str : lin) : lins)
|
||||
convertTerm cnc_defs sel ctype (S ts) lins = foldM (\lins t -> convertTerm cnc_defs sel ctype t lins) lins (reverse ts)
|
||||
--convertTerm cnc_defs sel ctype (K t) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok t : lin) : lins)
|
||||
convertTerm cnc_defs sel ctype (K (KS t)) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok (KS t) : lin) : lins)
|
||||
convertTerm cnc_defs sel ctype (K (KP strs vars))((lbl_path,lin) : lins) =
|
||||
do toks <- member (strs:[strs' | Alt strs' _ <- vars])
|
||||
return ((lbl_path, map FSymTok toks ++ lin) : lins)
|
||||
return ((lbl_path, map (FSymTok . KS) toks ++ lin) : lins)
|
||||
convertTerm cnc_defs sel ctype (F id) lins = do term <- Map.lookup id cnc_defs
|
||||
convertTerm cnc_defs sel ctype term lins
|
||||
convertTerm cnc_defs sel ctype (W s t) ((lbl_path,lin) : lins) = do
|
||||
@@ -183,8 +209,8 @@ convertArg (C max) nr path lbl_path lin lins = do
|
||||
return lins
|
||||
convertArg (S _) nr path lbl_path lin lins = do
|
||||
(_, args) <- readState
|
||||
let PFCat cat rcs tcs _ = args !! nr
|
||||
return ((lbl_path, FSymCat (index path rcs 0) nr : lin) : lins)
|
||||
let PFCat cat rcs tcs = args !! nr
|
||||
return ((lbl_path, FSymCat nr (index path rcs 0) : lin) : lins)
|
||||
where
|
||||
index lbl' (lbl:lbls) idx
|
||||
| lbl' == lbl = idx
|
||||
@@ -210,8 +236,11 @@ convertRec cnc_defs (index:sub_sel) ctype record lbl_path lin lins = do
|
||||
|
||||
evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex
|
||||
evalTerm cnc_defs path (V nr) = do (_, args) <- readState
|
||||
let PFCat _ _ _ ctype = args !! nr
|
||||
unifyPType nr (reverse path) (selectTerm path ctype)
|
||||
let PFCat _ _ tcs = args !! nr
|
||||
rpath = reverse path
|
||||
index <- member (fromMaybe (error "evalTerm: wrong path") (lookup rpath tcs))
|
||||
restrictArg nr rpath index
|
||||
return index
|
||||
evalTerm cnc_defs path (C nr) = return nr
|
||||
evalTerm cnc_defs path (R record) = case path of
|
||||
(index:path) -> evalTerm cnc_defs path (record !! index)
|
||||
@@ -222,112 +251,80 @@ evalTerm cnc_defs path (F id) = do term <- Map.lookup id cnc_defs
|
||||
evalTerm cnc_defs path term
|
||||
evalTerm cnc_defs path x = error ("evalTerm ("++show x++")")
|
||||
|
||||
unifyPType :: FIndex -> FPath -> Term -> CnvMonad FIndex
|
||||
unifyPType nr path (C max_index) =
|
||||
do (_, args) <- readState
|
||||
let PFCat _ _ tcs _ = args !! nr
|
||||
case lookup path tcs of
|
||||
Just index -> return index
|
||||
Nothing -> do index <- member [0..max_index]
|
||||
restrictArg nr path index
|
||||
return index
|
||||
unifyPType nr path t = error $ "unifyPType " ++ show t ---- AR 2/10/2007
|
||||
|
||||
selectTerm :: FPath -> Term -> Term
|
||||
selectTerm [] term = term
|
||||
selectTerm (index:path) (R record) = selectTerm path (record !! index)
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- FRulesEnv
|
||||
-- GrammarEnv
|
||||
|
||||
data FRulesEnv = FRulesEnv {-# UNPACK #-} !Int FCatSet [FRule]
|
||||
type FCatSet = Map.Map CId (Map.Map [(FPath,FIndex)] FCat)
|
||||
data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production))
|
||||
type CatSet = Map.Map CId (FCat,FCat,[Int])
|
||||
type SeqSet = Map.Map FSeq SeqId
|
||||
type FunSet = Map.Map FFun FunId
|
||||
type CoerceSet= Map.Map [FCat] FCat
|
||||
|
||||
emptyFRulesEnv = FRulesEnv 0 (ins fcatString (mkCId "String") [] $
|
||||
ins fcatInt (mkCId "Int") [] $
|
||||
ins fcatFloat (mkCId "Float") [] $
|
||||
ins fcatVar (mkCId "_Var") [] $
|
||||
Map.empty) []
|
||||
emptyFRulesEnv cnc_defs lincats =
|
||||
let (last_id,catSet) = Map.mapAccum computeCatRange 0 lincats
|
||||
in GrammarEnv last_id catSet Map.empty Map.empty Map.empty IntMap.empty
|
||||
where
|
||||
ins fcat cat tcs fcatSet =
|
||||
Map.insertWith (\_ -> Map.insert tcs fcat) cat tmap_s fcatSet
|
||||
computeCatRange index ctype = (index+size,(index,index+size-1,poly))
|
||||
where
|
||||
tmap_s = Map.singleton tcs fcat
|
||||
(size,poly) = getMultipliers 1 [] ctype
|
||||
|
||||
getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record
|
||||
getMultipliers m ms (S _) = (m,ms)
|
||||
getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms)
|
||||
getMultipliers m ms (F id) = case Map.lookup id cnc_defs of
|
||||
Just term -> getMultipliers m ms term
|
||||
Nothing -> error ("unknown identifier: "++prCId id)
|
||||
|
||||
addFRule :: FRulesEnv -> FRule -> FRulesEnv
|
||||
addFRule (FRulesEnv last_id fcatSet rules) rule = FRulesEnv last_id fcatSet (rule:rules)
|
||||
addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv
|
||||
addProduction (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) cat p =
|
||||
GrammarEnv last_id catSet seqSet funSet crcSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet)
|
||||
|
||||
getFGrammar :: FRulesEnv -> FGrammar
|
||||
getFGrammar (FRulesEnv last_id fcatSet rules) = (rules, Map.map Map.elems fcatSet)
|
||||
|
||||
genFCatHead :: FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat)
|
||||
genFCatHead env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs _) =
|
||||
case Map.lookup cat fcatSet >>= Map.lookup tcs of
|
||||
Just fcat -> (env, fcat)
|
||||
Nothing -> let fcat = last_id+1
|
||||
in (FRulesEnv fcat (ins fcat) rules, fcat)
|
||||
addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId)
|
||||
addFSeq env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (_,lst) =
|
||||
case Map.lookup seq seqSet of
|
||||
Just id -> (env,id)
|
||||
Nothing -> let !last_seq = Map.size seqSet
|
||||
in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet crcSet prodSet,last_seq)
|
||||
where
|
||||
ins fcat = Map.insertWith (\_ -> Map.insert tcs fcat) cat tmap_s fcatSet
|
||||
where
|
||||
tmap_s = Map.singleton tcs fcat
|
||||
seq = mkArray lst
|
||||
|
||||
genFCatArg :: TermMap -> FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat)
|
||||
genFCatArg cnc_defs env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs ctype) =
|
||||
case Map.lookup cat fcatSet of
|
||||
Just tmap -> case Map.lookup tcs tmap of
|
||||
Just fcat -> (env, fcat)
|
||||
Nothing -> ins tmap
|
||||
Nothing -> ins Map.empty
|
||||
addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId)
|
||||
addFFun env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) fun =
|
||||
case Map.lookup fun funSet of
|
||||
Just id -> (env,id)
|
||||
Nothing -> let !last_funid = Map.size funSet
|
||||
in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) crcSet prodSet,last_funid)
|
||||
|
||||
addFCoercion :: GrammarEnv -> [FCat] -> (GrammarEnv,FCat)
|
||||
addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fcats =
|
||||
case sub_fcats of
|
||||
[fcat] -> (env,fcat)
|
||||
_ -> case Map.lookup sub_fcats crcSet of
|
||||
Just fcat -> (env,fcat)
|
||||
Nothing -> let !fcat = last_id+1
|
||||
in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat)
|
||||
|
||||
getParserInfo :: GrammarEnv -> ParserInfo
|
||||
getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
|
||||
ParserInfo { functions = mkArray funSet
|
||||
, sequences = mkArray seqSet
|
||||
, productions = IntMap.union prodSet coercions
|
||||
, startCats = Map.map (\(start,end,_) -> range (start,end)) catSet
|
||||
}
|
||||
where
|
||||
ins tmap =
|
||||
let fcat = last_id+1
|
||||
(last_id1,tmap1,rules1)
|
||||
= foldBM (\tcs st (last_id,tmap,rules) ->
|
||||
let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap
|
||||
rule = FRule wildCId [[0]] [fcat_arg] fcat
|
||||
(listArray (0,length rcs-1) [listArray (0,0) [FSymCat lbl 0] | lbl <- [0..length rcs-1]])
|
||||
in if st
|
||||
then (last_id1,tmap1,rule:rules)
|
||||
else (last_id, tmap, rules))
|
||||
(fcat,Map.insert tcs fcat tmap,rules)
|
||||
(gen_tcs ctype [] [])
|
||||
False
|
||||
in (FRulesEnv last_id1 (Map.insert cat tmap1 fcatSet) rules1, fcat)
|
||||
where
|
||||
addArg tcs last_id tmap =
|
||||
case Map.lookup tcs tmap of
|
||||
Just fcat -> (last_id, tmap, fcat)
|
||||
Nothing -> let fcat = last_id+1
|
||||
in (fcat, Map.insert tcs fcat tmap, fcat)
|
||||
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
|
||||
|
||||
coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet]
|
||||
|
||||
gen_tcs :: Term -> FPath -> [(FPath,FIndex)] -> BacktrackM Bool [(FPath,FIndex)]
|
||||
gen_tcs (R record) path acc = foldM (\acc (label,ctype) -> gen_tcs ctype (label:path) acc) acc (zip [0..] record)
|
||||
gen_tcs (S _) path acc = return acc
|
||||
gen_tcs (C max_index) path acc =
|
||||
case List.lookup path tcs of
|
||||
Just index -> return $! addConstraint path index acc
|
||||
Nothing -> do writeState True
|
||||
index <- member [0..max_index]
|
||||
return $! addConstraint path index acc
|
||||
where
|
||||
addConstraint path0 index0 (c@(path,index) : cs)
|
||||
| path0 > path = c:addConstraint path0 index0 cs
|
||||
addConstraint path0 index0 cs = (path0,index0) : cs
|
||||
gen_tcs (F id) path acc = case Map.lookup id cnc_defs of
|
||||
Just term -> gen_tcs term path acc
|
||||
Nothing -> error ("unknown identifier: "++prCId id)
|
||||
|
||||
|
||||
getRCS :: TermMap -> Term -> [FPath]
|
||||
getRCS cnc_defs = loop [] []
|
||||
getFCats :: GrammarEnv -> ProtoFCat -> [FCat]
|
||||
getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat cat rcs tcs) =
|
||||
case Map.lookup cat catSet of
|
||||
Just (start,end,ms) -> reverse (solutions (variants ms tcs start) ())
|
||||
where
|
||||
loop path rcs (R record) = List.foldl' (\rcs (index,term) -> loop (index:path) rcs term) rcs (zip [0..] record)
|
||||
loop path rcs (C i) = rcs
|
||||
loop path rcs (S _) = path:rcs
|
||||
loop path rcs (F id) = case Map.lookup id cnc_defs of
|
||||
Just term -> loop path rcs term
|
||||
Nothing -> error ("unknown identifier: "++show id)
|
||||
variants _ [] fcat = return fcat
|
||||
variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices
|
||||
variants ms tcs ((m*index) + fcat)
|
||||
|
||||
------------------------------------------------------------
|
||||
-- updating the MCF rule
|
||||
@@ -345,12 +342,14 @@ restrictHead path term
|
||||
writeState (head', args)
|
||||
|
||||
restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat
|
||||
restrictProtoFCat path0 index0 (PFCat cat rcs tcs ctype) = do
|
||||
restrictProtoFCat path0 index0 (PFCat cat rcs tcs) = do
|
||||
tcs <- addConstraint tcs
|
||||
return (PFCat cat rcs tcs ctype)
|
||||
return (PFCat cat rcs tcs)
|
||||
where
|
||||
addConstraint (c@(path,index) : cs)
|
||||
| path0 > path = liftM (c:) (addConstraint cs)
|
||||
| path0 == path = guard (index0 == index) >>
|
||||
return (c : cs)
|
||||
addConstraint cs = return ((path0,index0) : cs)
|
||||
addConstraint [] = error "restrictProtoFCat: unknown path"
|
||||
addConstraint (c@(path,indices) : tcs)
|
||||
| path0 == path = guard (index0 `elem` indices) >>
|
||||
return ((path,[index0]) : tcs)
|
||||
| otherwise = liftM (c:) (addConstraint tcs)
|
||||
|
||||
mkArray lst = listArray (0,length lst-1) lst
|
||||
|
||||
@@ -7,7 +7,6 @@ import qualified GF.Compile.GenerateFCFG as FCFG
|
||||
import qualified GF.Compile.GeneratePMCFG as PMCFG
|
||||
|
||||
import PGF.CId
|
||||
import PGF.BuildParser (buildParserInfo)
|
||||
import qualified PGF.Macros as CM
|
||||
import qualified PGF.Data as C
|
||||
import qualified PGF.Data as D
|
||||
@@ -54,9 +53,9 @@ mkCanon2gfcc opts cnc gr =
|
||||
addParsers :: D.PGF -> D.PGF
|
||||
addParsers pgf = pgf { D.concretes = Map.map conv (D.concretes pgf) }
|
||||
where
|
||||
conv cnc = cnc { D.parser = Just (buildParserInfo fcfg) }
|
||||
conv cnc = cnc { D.parser = Just pinfo }
|
||||
where
|
||||
fcfg
|
||||
pinfo
|
||||
| Map.lookup (mkCId "erasing") (D.cflags cnc) == Just "on" = PMCFG.convertConcrete (D.abstract pgf) cnc
|
||||
| otherwise = FCFG.convertConcrete (D.abstract pgf) cnc
|
||||
|
||||
|
||||
@@ -91,7 +91,6 @@ data OutputFormat = FmtPGF
|
||||
| FmtEBNF
|
||||
| FmtRegular
|
||||
| FmtNoLR
|
||||
| FmtFCFG
|
||||
| FmtSRGS_XML
|
||||
| FmtSRGS_XML_NonRec
|
||||
| FmtSRGS_ABNF
|
||||
@@ -497,7 +496,6 @@ outputFormats =
|
||||
("ebnf", FmtEBNF),
|
||||
("regular", FmtRegular),
|
||||
("nolr", FmtNoLR),
|
||||
("fcfg", FmtFCFG),
|
||||
("srgs_xml", FmtSRGS_XML),
|
||||
("srgs_xml_nonrec", FmtSRGS_XML_NonRec),
|
||||
("srgs_abnf", FmtSRGS_ABNF),
|
||||
|
||||
@@ -4,21 +4,19 @@
|
||||
--
|
||||
-- Approximates PGF grammars with context-free grammars.
|
||||
----------------------------------------------------------------------
|
||||
module GF.Speech.PGFToCFG (bnfPrinter,
|
||||
fcfgPrinter, pgfToCFG) where
|
||||
module GF.Speech.PGFToCFG (bnfPrinter, pgfToCFG) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data as PGF
|
||||
import PGF.Macros
|
||||
import GF.Data.MultiMap (MultiMap)
|
||||
import qualified GF.Data.MultiMap as MultiMap
|
||||
import GF.Infra.Ident
|
||||
import GF.Speech.CFG
|
||||
|
||||
import Data.Array as Array
|
||||
import Data.Array.IArray as Array
|
||||
import Data.List
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.IntMap as IntMap
|
||||
import Data.Maybe
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
@@ -29,21 +27,6 @@ bnfPrinter = toBNF id
|
||||
toBNF :: (CFG -> CFG) -> PGF -> CId -> String
|
||||
toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc
|
||||
|
||||
-- FIXME: move this somewhere else
|
||||
fcfgPrinter :: PGF -> CId -> String
|
||||
fcfgPrinter pgf cnc = unlines (map showRule rules)
|
||||
where
|
||||
pinfo = fromMaybe (error "fcfgPrinter") (lookParser pgf cnc)
|
||||
|
||||
rules :: [FRule]
|
||||
rules = Array.elems (PGF.allRules pinfo)
|
||||
|
||||
showRule (FRule cid ps cs fc arr) = prCId cid ++ " " ++ show ps ++ ". " ++ showCat fc ++ " ::= [" ++ concat (intersperse ", " (map showCat cs)) ++ "] = " ++ showLin arr
|
||||
where
|
||||
showLin arr = "[" ++ concat (intersperse ", " [ unwords (map showFSymbol (Array.elems r)) | r <- Array.elems arr]) ++ "]"
|
||||
showFSymbol (FSymCat i j) = showCat (cs!!j) ++ "_" ++ show j ++ "." ++ show i
|
||||
showFSymbol (FSymTok t) = t
|
||||
showCat c = "C" ++ show c
|
||||
|
||||
pgfToCFG :: PGF
|
||||
-> CId -- ^ Concrete syntax name
|
||||
@@ -52,12 +35,13 @@ pgfToCFG pgf lang = mkCFG (lookStartCat pgf) extCats (startRules ++ concatMap fr
|
||||
where
|
||||
pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang)
|
||||
|
||||
rules :: [FRule]
|
||||
rules = Array.elems (PGF.allRules pinfo)
|
||||
rules :: [(FCat,Production)]
|
||||
rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.productions pinfo)
|
||||
, prod <- Set.toList set]
|
||||
|
||||
fcatCats :: Map FCat Cat
|
||||
fcatCats = Map.fromList [(fc, prCId c ++ "_" ++ show i)
|
||||
| (c,fcs) <- Map.toList (startupCats pinfo),
|
||||
| (c,fcs) <- Map.toList (startCats pinfo),
|
||||
(fc,i) <- zip fcs [1..]]
|
||||
|
||||
fcatCat :: FCat -> Cat
|
||||
@@ -69,49 +53,61 @@ pgfToCFG pgf lang = mkCFG (lookStartCat pgf) extCats (startRules ++ concatMap fr
|
||||
|
||||
-- gets the number of fields in the lincat for the given category
|
||||
catLinArity :: FCat -> Int
|
||||
catLinArity c = maximum (1:[rangeSize (bounds rhs) | FRule _ _ _ _ rhs <- Map.findWithDefault [] c rulesByFCat])
|
||||
catLinArity c = maximum (1:[rangeSize (bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c])
|
||||
|
||||
topdownRules cat = f cat []
|
||||
where
|
||||
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo))
|
||||
|
||||
g (FApply funid args) rules = (functions pinfo ! funid,args) : rules
|
||||
g (FCoerce cat) rules = f cat rules
|
||||
|
||||
rulesByFCat :: Map FCat [FRule]
|
||||
rulesByFCat = Map.fromListWith (++) [(c,[r]) | r@(FRule _ _ _ c _) <- rules]
|
||||
|
||||
extCats :: Set Cat
|
||||
extCats = Set.fromList $ map lhsCat startRules
|
||||
|
||||
startRules :: [CFRule]
|
||||
startRules = [CFRule (prCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0)
|
||||
| (c,fcs) <- Map.toList (startupCats pinfo),
|
||||
| (c,fcs) <- Map.toList (startCats pinfo),
|
||||
fc <- fcs, not (isLiteralFCat fc),
|
||||
r <- [0..catLinArity fc-1]]
|
||||
|
||||
fruleToCFRule :: FRule -> [CFRule]
|
||||
fruleToCFRule (FRule f ps args c rhs) =
|
||||
fruleToCFRule :: (FCat,Production) -> [CFRule]
|
||||
fruleToCFRule (c,FApply funid args) =
|
||||
[CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps))
|
||||
| (l,row) <- Array.assocs rhs, not (containsLiterals row)]
|
||||
| (l,seqid) <- Array.assocs rhs
|
||||
, let row = sequences pinfo ! seqid
|
||||
, not (containsLiterals row)]
|
||||
where
|
||||
FFun f ps rhs = functions pinfo ! funid
|
||||
|
||||
mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
|
||||
mkRhs = map fsymbolToSymbol . Array.elems
|
||||
|
||||
containsLiterals :: Array FPointPos FSymbol -> Bool
|
||||
containsLiterals row = any isLiteralFCat [args!!n | FSymCat _ n <- Array.elems row]
|
||||
containsLiterals row = any isLiteralFCat [args!!n | FSymCat n _ <- Array.elems row]
|
||||
|
||||
fsymbolToSymbol :: FSymbol -> CFSymbol
|
||||
fsymbolToSymbol (FSymCat l n) = NonTerminal (fcatToCat (args!!n) l)
|
||||
fsymbolToSymbol (FSymTok t) = Terminal t
|
||||
fsymbolToSymbol (FSymCat n l) = NonTerminal (fcatToCat (args!!n) l)
|
||||
fsymbolToSymbol (FSymTok (KS t)) = Terminal t
|
||||
|
||||
fixProfile :: Array FPointPos FSymbol -> Profile -> Profile
|
||||
fixProfile row = concatMap positions
|
||||
where
|
||||
nts = zip [0..] [nt | nt@(FSymCat _ _) <- Array.elems row ]
|
||||
positions i = [k | (k,FSymCat _ j) <- nts, j == i]
|
||||
nts = zip [0..] [nt | nt@(FSymCat _ _) <- Array.elems row]
|
||||
positions i = [k | (k,FSymCat j _) <- nts, j == i]
|
||||
|
||||
profilesToTerm :: [Profile] -> CFTerm
|
||||
profilesToTerm [[n]] | f == wildCId = CFRes n
|
||||
profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps)
|
||||
where (argTypes,_) = catSkeleton $ lookType pgf f
|
||||
|
||||
profileToTerm :: CId -> Profile -> CFTerm
|
||||
profileToTerm t [] = CFMeta t
|
||||
profileToTerm _ xs = CFRes (last xs) -- FIXME: unify
|
||||
fruleToCFRule (c,FCoerce c') =
|
||||
[CFRule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0)
|
||||
| l <- [0..catLinArity c-1]]
|
||||
|
||||
|
||||
isLiteralFCat :: FCat -> Bool
|
||||
isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])
|
||||
|
||||
@@ -25,6 +25,7 @@ import qualified Text.ParserCombinators.ReadP as RP
|
||||
import System.Cmd
|
||||
import System.CPUTime
|
||||
import Control.Exception
|
||||
import Control.Monad
|
||||
import Data.Version
|
||||
import GF.System.Signal
|
||||
--import System.IO.Error (try)
|
||||
@@ -203,9 +204,10 @@ wordCompletion gfenv line0 prefix0 p =
|
||||
-> do mb_state0 <- try (evaluate (initState pgf (optLang opts) (optCat opts)))
|
||||
case mb_state0 of
|
||||
Right state0 -> let ws = words (take (length s - length prefix) s)
|
||||
state = foldl nextState state0 ws
|
||||
compls = getCompletions state prefix
|
||||
in ret ' ' (map (encode gfenv) (Map.keys compls))
|
||||
in case foldM nextState state0 ws of
|
||||
Nothing -> ret ' ' []
|
||||
Just state -> let compls = getCompletions state prefix
|
||||
in ret ' ' (map (encode gfenv) (Map.keys compls))
|
||||
Left _ -> ret ' ' []
|
||||
CmplOpt (Just (Command n _ _)) pref
|
||||
-> case Map.lookup n (commands cmdEnv) of
|
||||
|
||||
10
src/PGF.hs
10
src/PGF.hs
@@ -77,6 +77,7 @@ import Data.Char
|
||||
import qualified Data.Map as Map
|
||||
import Data.Maybe
|
||||
import System.Random (newStdGen)
|
||||
import Control.Monad
|
||||
|
||||
---------------------------------------------------
|
||||
-- Interface
|
||||
@@ -211,7 +212,7 @@ parse pgf lang cat s =
|
||||
Just cnc -> case parser cnc of
|
||||
Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on"
|
||||
then Incremental.parse pinfo (mkCId cat) (words s)
|
||||
else case parseFCFG "bottomup" pinfo (mkCId cat) (words s) of
|
||||
else case parseFCFG "topdown" pinfo (mkCId cat) (words s) of
|
||||
Ok x -> x
|
||||
Bad s -> error s
|
||||
Nothing -> error ("No parser built for language: " ++ lang)
|
||||
@@ -259,9 +260,10 @@ startCat pgf = lookStartCat pgf
|
||||
complete pgf from cat input =
|
||||
let (ws,prefix) = tokensAndPrefix input
|
||||
state0 = initState pgf from cat
|
||||
state = foldl Incremental.nextState state0 ws
|
||||
compls = Incremental.getCompletions state prefix
|
||||
in [unwords (ws++[c]) ++ " " | c <- Map.keys compls]
|
||||
in case foldM Incremental.nextState state0 ws of
|
||||
Nothing -> []
|
||||
Just state -> let compls = Incremental.getCompletions state prefix
|
||||
in [unwords (ws++[c]) ++ " " | c <- Map.keys compls]
|
||||
where
|
||||
tokensAndPrefix :: String -> ([String],String)
|
||||
tokensAndPrefix s | not (null s) && isSpace (last s) = (words s, "")
|
||||
|
||||
@@ -15,50 +15,62 @@ import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Parsing.FCFG.Utilities
|
||||
|
||||
import Data.Array
|
||||
import Data.Array.IArray
|
||||
import Data.Maybe
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import Debug.Trace
|
||||
|
||||
|
||||
data ParserInfoEx
|
||||
= ParserInfoEx { epsilonRules :: [(FunId,[FCat],FCat)]
|
||||
, leftcornerCats :: Assoc FCat [(FunId,[FCat],FCat)]
|
||||
, leftcornerTokens :: Assoc String [(FunId,[FCat],FCat)]
|
||||
, grammarToks :: [String]
|
||||
}
|
||||
|
||||
------------------------------------------------------------
|
||||
-- parser information
|
||||
|
||||
getLeftCornerTok (FRule _ _ _ _ lins)
|
||||
getLeftCornerTok pinfo (FFun _ _ lins)
|
||||
| inRange (bounds syms) 0 = case syms ! 0 of
|
||||
FSymTok tok -> [tok]
|
||||
FSymTok (KS tok) -> [tok]
|
||||
_ -> []
|
||||
| otherwise = []
|
||||
where
|
||||
syms = (sequences pinfo) ! (lins ! 0)
|
||||
|
||||
getLeftCornerCat pinfo args (FFun _ _ lins)
|
||||
| inRange (bounds syms) 0 = case syms ! 0 of
|
||||
FSymCat d _ -> let cat = args !! d
|
||||
in case IntMap.lookup cat (productions pinfo) of
|
||||
Just set -> cat : [cat' | FCoerce cat' <- Set.toList set]
|
||||
Nothing -> [cat]
|
||||
_ -> []
|
||||
| otherwise = []
|
||||
where
|
||||
syms = lins ! 0
|
||||
syms = (sequences pinfo) ! (lins ! 0)
|
||||
|
||||
getLeftCornerCat (FRule _ _ args _ lins)
|
||||
| inRange (bounds syms) 0 = case syms ! 0 of
|
||||
FSymCat _ d -> [args !! d]
|
||||
_ -> []
|
||||
| otherwise = []
|
||||
where
|
||||
syms = lins ! 0
|
||||
buildParserInfo :: ParserInfo -> ParserInfoEx
|
||||
buildParserInfo pinfo =
|
||||
ParserInfoEx { epsilonRules = epsilonrules
|
||||
, leftcornerCats = leftcorncats
|
||||
, leftcornerTokens = leftcorntoks
|
||||
, grammarToks = grammartoks
|
||||
}
|
||||
|
||||
buildParserInfo :: FGrammar -> ParserInfo
|
||||
buildParserInfo (grammar,startup) = -- trace (unlines [prt (x,Set.toList set) | (x,set) <- Map.toList leftcornFilter]) $
|
||||
ParserInfo { allRules = allrules
|
||||
, topdownRules = topdownrules
|
||||
-- , emptyRules = emptyrules
|
||||
, epsilonRules = epsilonrules
|
||||
, leftcornerCats = leftcorncats
|
||||
, leftcornerTokens = leftcorntoks
|
||||
, grammarCats = grammarcats
|
||||
, grammarToks = grammartoks
|
||||
, startupCats = startup
|
||||
}
|
||||
|
||||
where allrules = listArray (0,length grammar-1) grammar
|
||||
topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule _ _ _ cat _) <- assocs allrules]
|
||||
epsilonrules = [ ruleid | (ruleid, FRule _ _ _ _ lins) <- assocs allrules,
|
||||
not (inRange (bounds (lins ! 0)) 0) ]
|
||||
leftcorncats = accumAssoc id [ (cat, ruleid) | (ruleid, rule) <- assocs allrules, cat <- getLeftCornerCat rule ]
|
||||
leftcorntoks = accumAssoc id [ (tok, ruleid) | (ruleid, rule) <- assocs allrules, tok <- getLeftCornerTok rule ]
|
||||
grammarcats = aElems topdownrules
|
||||
grammartoks = nubsort [t | (FRule _ _ _ _ lins) <- grammar, lin <- elems lins, FSymTok t <- elems lin]
|
||||
where epsilonrules = [ (ruleid,args,cat)
|
||||
| (cat,set) <- IntMap.toList (productions pinfo)
|
||||
, (FApply ruleid args) <- Set.toList set
|
||||
, let (FFun _ _ lins) = (functions pinfo) ! ruleid
|
||||
, not (inRange (bounds ((sequences pinfo) ! (lins ! 0))) 0) ]
|
||||
leftcorncats = accumAssoc id [ (cat', (ruleid, args, cat))
|
||||
| (cat,set) <- IntMap.toList (productions pinfo)
|
||||
, (FApply ruleid args) <- Set.toList set
|
||||
, cat' <- getLeftCornerCat pinfo args ((functions pinfo) ! ruleid) ]
|
||||
leftcorntoks = accumAssoc id [ (tok, (ruleid, args, cat))
|
||||
| (cat,set) <- IntMap.toList (productions pinfo)
|
||||
, (FApply ruleid args) <- Set.toList set
|
||||
, tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ]
|
||||
grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymTok (KS t) <- elems lin]
|
||||
|
||||
@@ -2,11 +2,13 @@ module PGF.Data where
|
||||
|
||||
import PGF.CId
|
||||
import GF.Text.UTF8
|
||||
import GF.Data.Assoc
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.IntMap as IntMap
|
||||
import Data.List
|
||||
import Data.Array
|
||||
import Data.Array.Unboxed
|
||||
|
||||
-- internal datatypes for PGF
|
||||
|
||||
@@ -108,32 +110,28 @@ data Equation =
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
|
||||
type FToken = String
|
||||
type FCat = Int
|
||||
type FIndex = Int
|
||||
data FSymbol
|
||||
= FSymCat {-# UNPACK #-} !FIndex {-# UNPACK #-} !Int
|
||||
| FSymTok FToken
|
||||
type Profile = [Int]
|
||||
type FPointPos = Int
|
||||
type FGrammar = ([FRule], Map.Map CId [FCat])
|
||||
data FRule = FRule CId [Profile] [FCat] FCat (Array FIndex (Array FPointPos FSymbol))
|
||||
|
||||
type RuleId = Int
|
||||
data FSymbol
|
||||
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
|
||||
| FSymTok Tokn
|
||||
deriving (Eq,Ord,Show)
|
||||
type Profile = [Int]
|
||||
data Production
|
||||
= FApply {-# UNPACK #-} !FunId [FCat]
|
||||
| FCoerce {-# UNPACK #-} !FCat
|
||||
deriving (Eq,Ord,Show)
|
||||
data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
|
||||
type FSeq = Array FPointPos FSymbol
|
||||
type FunId = Int
|
||||
type SeqId = Int
|
||||
|
||||
data ParserInfo
|
||||
= ParserInfo { allRules :: Array RuleId FRule
|
||||
, topdownRules :: Assoc FCat [RuleId]
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
|
||||
-- , emptyRules :: [RuleId]
|
||||
, epsilonRules :: [RuleId]
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
|
||||
, leftcornerCats :: Assoc FCat [RuleId]
|
||||
, leftcornerTokens :: Assoc FToken [RuleId]
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
|
||||
, grammarCats :: [FCat]
|
||||
, grammarToks :: [FToken]
|
||||
, startupCats :: Map.Map CId [FCat]
|
||||
= ParserInfo { functions :: Array FunId FFun
|
||||
, sequences :: Array SeqId FSeq
|
||||
, productions :: IntMap.IntMap (Set.Set Production)
|
||||
, startCats :: Map.Map CId [FCat]
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -49,12 +49,6 @@ lookValCat pgf = valCat . lookType pgf
|
||||
lookParser :: PGF -> CId -> Maybe ParserInfo
|
||||
lookParser pgf lang = Map.lookup lang (concretes pgf) >>= parser
|
||||
|
||||
lookFCFG :: PGF -> CId -> Maybe FGrammar
|
||||
lookFCFG pgf lang = fmap toFGrammar $ lookParser pgf lang
|
||||
where
|
||||
toFGrammar :: ParserInfo -> FGrammar
|
||||
toFGrammar pinfo = (Array.elems (allRules pinfo), startupCats pinfo)
|
||||
|
||||
lookStartCat :: PGF -> String
|
||||
lookStartCat pgf = fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat"))
|
||||
[gflags pgf, aflags (abstract pgf)]
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module PGF.Parsing.FCFG
|
||||
(buildParserInfo,ParserInfo,parseFCFG) where
|
||||
(ParserInfo,parseFCFG) where
|
||||
|
||||
import GF.Data.ErrM
|
||||
import GF.Data.Assoc
|
||||
@@ -17,7 +17,6 @@ import GF.Data.SortedList
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Macros
|
||||
import PGF.BuildParser
|
||||
import PGF.Parsing.FCFG.Utilities
|
||||
import qualified PGF.Parsing.FCFG.Active as Active
|
||||
import qualified PGF.Parsing.FCFG.Incremental as Incremental
|
||||
|
||||
@@ -17,17 +17,22 @@ import qualified GF.Data.MultiMap as MM
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Parsing.FCFG.Utilities
|
||||
import PGF.BuildParser
|
||||
|
||||
import Control.Monad (guard)
|
||||
|
||||
import qualified Data.List as List
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.Set as Set
|
||||
import Data.Array
|
||||
import Data.Array.IArray
|
||||
import Debug.Trace
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * parsing
|
||||
|
||||
type FToken = String
|
||||
|
||||
makeFinalEdge cat 0 0 = (cat, [EmptyRange])
|
||||
makeFinalEdge cat i j = (cat, [makeRange i j])
|
||||
|
||||
@@ -36,77 +41,79 @@ parse :: String -> ParserInfo -> CId -> [FToken] -> [Tree]
|
||||
parse strategy pinfo start toks = nubsort $ filteredForests >>= forest2trees
|
||||
where
|
||||
inTokens = input toks
|
||||
starts = Map.findWithDefault [] start (startupCats pinfo)
|
||||
starts = Map.findWithDefault [] start (startCats pinfo)
|
||||
schart = xchart2syntaxchart chart pinfo
|
||||
(i,j) = inputBounds inTokens
|
||||
finalEdges = [makeFinalEdge cat i j | cat <- starts]
|
||||
forests = chart2forests schart (const False) finalEdges
|
||||
filteredForests = forests >>= applyProfileToForest
|
||||
|
||||
chart = process strategy pinfo inTokens axioms emptyXChart
|
||||
axioms | isBU strategy = literals pinfo inTokens ++ initialBU pinfo inTokens
|
||||
| isTD strategy = literals pinfo inTokens ++ initialTD pinfo starts inTokens
|
||||
pinfoex = buildParserInfo pinfo
|
||||
|
||||
chart = process strategy pinfo pinfoex inTokens axioms emptyXChart
|
||||
axioms | isBU strategy = literals pinfoex inTokens ++ initialBU pinfo pinfoex inTokens
|
||||
| isTD strategy = literals pinfoex inTokens ++ initialTD pinfo starts inTokens
|
||||
|
||||
isBU s = s=="b"
|
||||
isTD s = s=="t"
|
||||
|
||||
-- used in prediction
|
||||
emptyChildren :: RuleId -> ParserInfo -> SyntaxNode RuleId RangeRec
|
||||
emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) [])
|
||||
where
|
||||
FRule _ _ rhs _ _ = allRules pinfo ! ruleid
|
||||
emptyChildren :: FunId -> [FCat] -> SyntaxNode FunId RangeRec
|
||||
emptyChildren ruleid args = SNode ruleid (replicate (length args) [])
|
||||
|
||||
process :: String -> ParserInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat
|
||||
process strategy pinfo toks [] chart = chart
|
||||
process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart
|
||||
|
||||
process :: String -> ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -> XChart FCat -> XChart FCat
|
||||
process strategy pinfo pinfoex toks [] chart = chart
|
||||
process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo pinfoex toks items $! univRule item chart
|
||||
where
|
||||
univRule cat item@(Active found rng lbl ppos node@(SNode ruleid recs)) chart
|
||||
univRule item@(Active found rng lbl ppos node@(SNode ruleid recs) args cat) chart
|
||||
| inRange (bounds lin) ppos =
|
||||
case lin ! ppos of
|
||||
FSymCat r d -> let c = args !! d
|
||||
FSymCat d r -> let c = args !! d
|
||||
in case recs !! d of
|
||||
[] -> case insertXChart chart item c of
|
||||
Nothing -> chart
|
||||
Just chart -> let items = do item@(Final found' _) <- lookupXChartFinal chart c
|
||||
Just chart -> let items = do item@(Final found' _ _ _) <- lookupXChartFinal chart c
|
||||
rng <- concatRange rng (found' !! r)
|
||||
return (c, Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)))
|
||||
return (Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)) args cat)
|
||||
++
|
||||
do guard (isTD strategy)
|
||||
ruleid <- topdownRules pinfo ? c
|
||||
return (c, Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo))
|
||||
in process strategy pinfo toks items chart
|
||||
(ruleid,args) <- topdownRules pinfo c
|
||||
return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args c)
|
||||
in process strategy pinfo pinfoex toks items chart
|
||||
found' -> let items = do rng <- concatRange rng (found' !! r)
|
||||
return (c, Active found rng lbl (ppos+1) node)
|
||||
in process strategy pinfo toks items chart
|
||||
FSymTok tok -> let items = do t_rng <- inputToken toks ? tok
|
||||
return (Active found rng lbl (ppos+1) node args cat)
|
||||
in process strategy pinfo pinfoex toks items chart
|
||||
FSymTok (KS tok)
|
||||
-> let items = do t_rng <- inputToken toks ? tok
|
||||
rng' <- concatRange rng t_rng
|
||||
return (cat, Active found rng' lbl (ppos+1) node)
|
||||
in process strategy pinfo toks items chart
|
||||
return (Active found rng' lbl (ppos+1) node args cat)
|
||||
in process strategy pinfo pinfoex toks items chart
|
||||
| otherwise =
|
||||
if inRange (bounds lins) (lbl+1)
|
||||
then univRule cat (Active (rng:found) EmptyRange (lbl+1) 0 node) chart
|
||||
else univRule cat (Final (reverse (rng:found)) node) chart
|
||||
then univRule (Active (rng:found) EmptyRange (lbl+1) 0 node args cat) chart
|
||||
else univRule (Final (reverse (rng:found)) node args cat) chart
|
||||
where
|
||||
(FRule _ _ args cat lins) = allRules pinfo ! ruleid
|
||||
lin = lins ! lbl
|
||||
univRule cat item@(Final found' node) chart =
|
||||
(FFun _ _ lins) = functions pinfo ! ruleid
|
||||
lin = sequences pinfo ! (lins ! lbl)
|
||||
univRule item@(Final found' node args cat) chart =
|
||||
case insertXChart chart item cat of
|
||||
Nothing -> chart
|
||||
Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _)) <- lookupXChartAct chart cat
|
||||
let FRule _ _ args _ lins = allRules pinfo ! ruleid
|
||||
FSymCat r d = lins ! l ! ppos
|
||||
Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _) args c) <- lookupXChartAct chart cat
|
||||
let FFun _ _ lins = functions pinfo ! ruleid
|
||||
FSymCat d r = (sequences pinfo ! (lins ! l)) ! ppos
|
||||
rng <- concatRange rng (found' !! r)
|
||||
return (args !! d, Active found rng l (ppos+1) (updateChildren node d found'))
|
||||
return (Active found rng l (ppos+1) (updateChildren node d found') args c)
|
||||
++
|
||||
do guard (isBU strategy)
|
||||
ruleid <- leftcornerCats pinfo ? cat
|
||||
let FRule _ _ args _ lins = allRules pinfo ! ruleid
|
||||
FSymCat r d = lins ! 0 ! 0
|
||||
return (args !! d, Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid pinfo) d found'))
|
||||
(ruleid,args,c) <- leftcornerCats pinfoex ? cat
|
||||
let FFun _ _ lins = functions pinfo ! ruleid
|
||||
FSymCat d r = (sequences pinfo ! (lins ! 0)) ! 0
|
||||
return (Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid args) d found') args c)
|
||||
|
||||
updateChildren :: SyntaxNode RuleId RangeRec -> Int -> RangeRec -> SyntaxNode RuleId RangeRec
|
||||
updateChildren :: SyntaxNode FunId RangeRec -> Int -> RangeRec -> SyntaxNode FunId RangeRec
|
||||
updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs
|
||||
in process strategy pinfo toks items chart
|
||||
in process strategy pinfo pinfoex toks items chart
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * XChart
|
||||
@@ -116,21 +123,23 @@ data Item
|
||||
Range
|
||||
{-# UNPACK #-} !FIndex
|
||||
{-# UNPACK #-} !FPointPos
|
||||
(SyntaxNode RuleId RangeRec)
|
||||
| Final RangeRec (SyntaxNode RuleId RangeRec)
|
||||
deriving (Eq, Ord)
|
||||
(SyntaxNode FunId RangeRec)
|
||||
[FCat]
|
||||
FCat
|
||||
| Final RangeRec (SyntaxNode FunId RangeRec) [FCat] FCat
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item)
|
||||
|
||||
emptyXChart :: Ord c => XChart c
|
||||
emptyXChart = XChart MM.empty MM.empty
|
||||
|
||||
insertXChart (XChart actives finals) item@(Active _ _ _ _ _) c =
|
||||
insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _ _) c =
|
||||
case MM.insert' c item actives of
|
||||
Nothing -> Nothing
|
||||
Just actives -> Just (XChart actives finals)
|
||||
|
||||
insertXChart (XChart actives finals) item@(Final _ _) c =
|
||||
insertXChart (XChart actives finals) item@(Final _ _ _ _) c =
|
||||
case MM.insert' c item finals of
|
||||
Nothing -> Nothing
|
||||
Just finals -> Just (XChart actives finals)
|
||||
@@ -142,17 +151,17 @@ xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (
|
||||
xchart2syntaxchart (XChart actives finals) pinfo =
|
||||
accumAssoc groupSyntaxNodes $
|
||||
[ case node of
|
||||
SNode ruleid rrecs -> let FRule fun prof rhs cat _ = allRules pinfo ! ruleid
|
||||
SNode ruleid rrecs -> let FFun fun prof _ = functions pinfo ! ruleid
|
||||
in ((cat,found), SNode (fun,prof) (zip rhs rrecs))
|
||||
SString s -> ((cat,found), SString s)
|
||||
SInt n -> ((cat,found), SInt n)
|
||||
SFloat f -> ((cat,found), SFloat f)
|
||||
| (cat, Final found node) <- MM.toList finals
|
||||
| (Final found node rhs cat) <- MM.elems finals
|
||||
]
|
||||
|
||||
literals :: ParserInfo -> Input FToken -> [(FCat,Item)]
|
||||
literals pinfo toks =
|
||||
[let (c,node) = lexer t in (c,Final [rng] node) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfo)]
|
||||
literals :: ParserInfoEx -> Input FToken -> [Item]
|
||||
literals pinfoex toks =
|
||||
[let (c,node) = lexer t in (Final [rng] node [] c) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfoex)]
|
||||
where
|
||||
lexer t =
|
||||
case reads t of
|
||||
@@ -166,24 +175,30 @@ literals pinfo toks =
|
||||
-- Earley --
|
||||
|
||||
-- called with all starting categories
|
||||
initialTD :: ParserInfo -> [FCat] -> Input FToken -> [(FCat,Item)]
|
||||
initialTD :: ParserInfo -> [FCat] -> Input FToken -> [Item]
|
||||
initialTD pinfo starts toks =
|
||||
do cat <- starts
|
||||
ruleid <- topdownRules pinfo ? cat
|
||||
return (cat,Active [] (Range 0 0) 0 0 (emptyChildren ruleid pinfo))
|
||||
(ruleid,args) <- topdownRules pinfo cat
|
||||
return (Active [] (Range 0 0) 0 0 (emptyChildren ruleid args) args cat)
|
||||
|
||||
topdownRules pinfo cat = f cat []
|
||||
where
|
||||
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo))
|
||||
|
||||
g (FApply ruleid args) rules = (ruleid,args) : rules
|
||||
g (FCoerce cat) rules = f cat rules
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Kilbury --
|
||||
|
||||
initialBU :: ParserInfo -> Input FToken -> [(FCat,Item)]
|
||||
initialBU pinfo toks =
|
||||
initialBU :: ParserInfo -> ParserInfoEx -> Input FToken -> [Item]
|
||||
initialBU pinfo pinfoex toks =
|
||||
do (tok,rngs) <- aAssocs (inputToken toks)
|
||||
ruleid <- leftcornerTokens pinfo ? tok
|
||||
let FRule _ _ _ cat _ = allRules pinfo ! ruleid
|
||||
(ruleid,args,cat) <- leftcornerTokens pinfoex ? tok
|
||||
rng <- rngs
|
||||
return (cat,Active [] rng 0 1 (emptyChildren ruleid pinfo))
|
||||
return (Active [] rng 0 1 (emptyChildren ruleid args) args cat)
|
||||
++
|
||||
do ruleid <- epsilonRules pinfo
|
||||
let FRule _ _ _ cat _ = allRules pinfo ! ruleid
|
||||
return (cat,Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo))
|
||||
do (ruleid,args,cat) <- epsilonRules pinfoex
|
||||
let FFun _ _ _ = functions pinfo ! ruleid
|
||||
return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args cat)
|
||||
|
||||
@@ -8,55 +8,54 @@ module PGF.Parsing.FCFG.Incremental
|
||||
, parse
|
||||
) where
|
||||
|
||||
import Data.Array
|
||||
import Data.Array.IArray
|
||||
import Data.Array.Base (unsafeAt)
|
||||
import Data.List (isPrefixOf, foldl')
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Maybe (fromMaybe, maybe)
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.Set as Set
|
||||
import Control.Monad
|
||||
|
||||
import GF.Data.Assoc
|
||||
import GF.Data.SortedList
|
||||
import qualified GF.Data.MultiMap as MM
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Parsing.FCFG.Utilities
|
||||
import Debug.Trace
|
||||
|
||||
parse :: ParserInfo -> CId -> [FToken] -> [Tree]
|
||||
parse pinfo start toks = extractExps (foldl' nextState (initState pinfo start) toks) start
|
||||
parse :: ParserInfo -> CId -> [String] -> [Tree]
|
||||
parse pinfo start toks = maybe [] (\ps -> extractExps ps start) (foldM nextState (initState pinfo start) toks)
|
||||
|
||||
initState :: ParserInfo -> CId -> ParseState
|
||||
initState pinfo start =
|
||||
let items = do
|
||||
c <- Map.findWithDefault [] start (startupCats pinfo)
|
||||
ruleid <- topdownRules pinfo ? c
|
||||
let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
|
||||
lbl <- indices lins
|
||||
return (Active 0 lbl 0 ruleid args cat)
|
||||
cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
|
||||
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) [] cat (productions pinfo)
|
||||
let FFun fn _ lins = functions pinfo ! funid
|
||||
(lbl,seqid) <- assocs lins
|
||||
return (Active 0 0 funid seqid args (AK cat lbl))
|
||||
|
||||
forest = IntMap.fromListWith Set.union [(cat, Set.singleton (Passive ruleid args)) | (ruleid, FRule _ _ args cat _) <- assocs (allRules pinfo)]
|
||||
|
||||
max_fid = maximum (0:[maximum (cat:args) | (ruleid, FRule _ _ args cat _) <- assocs (allRules pinfo)])+1
|
||||
max_fid = maximum (0:[maximum (cat:args) | (cat, set) <- IntMap.toList (productions pinfo)
|
||||
, p <- Set.toList set
|
||||
, let args = case p of {FApply _ args -> args; FCoerce cat -> [cat]}])+1
|
||||
|
||||
in State pinfo
|
||||
(Chart MM.empty [] Map.empty forest max_fid 0)
|
||||
(Chart emptyAC [] emptyPC (productions pinfo) max_fid 0)
|
||||
(Set.fromList items)
|
||||
|
||||
-- | From the current state and the next token
|
||||
-- 'nextState' computes a new state where the token
|
||||
-- is consumed and the current position shifted by one.
|
||||
nextState :: ParseState -> String -> ParseState
|
||||
nextState :: ParseState -> String -> Maybe ParseState
|
||||
nextState (State pinfo chart items) t =
|
||||
let (items1,chart1) = process add (allRules pinfo) (Set.toList items) (Set.empty,chart)
|
||||
chart2 = chart1{ active =MM.empty
|
||||
let (items1,chart1) = process add (sequences pinfo) (functions pinfo) (Set.toList items) Set.empty chart
|
||||
chart2 = chart1{ active =emptyAC
|
||||
, actives=active chart1 : actives chart1
|
||||
, passive=Map.empty
|
||||
, passive=emptyPC
|
||||
, offset =offset chart1+1
|
||||
}
|
||||
in State pinfo chart2 items1
|
||||
in if Set.null items1
|
||||
then Nothing
|
||||
else Just (State pinfo chart2 items1)
|
||||
where
|
||||
add tok item set
|
||||
| tok == t = Set.insert item set
|
||||
@@ -68,107 +67,157 @@ nextState (State pinfo chart items) t =
|
||||
-- the GF interpreter.
|
||||
getCompletions :: ParseState -> String -> Map.Map String ParseState
|
||||
getCompletions (State pinfo chart items) w =
|
||||
let (map',chart1) = process add (allRules pinfo) (Set.toList items) (MM.empty,chart)
|
||||
chart2 = chart1{ active =MM.empty
|
||||
let (map',chart1) = process add (sequences pinfo) (functions pinfo) (Set.toList items) Map.empty chart
|
||||
chart2 = chart1{ active =emptyAC
|
||||
, actives=active chart1 : actives chart1
|
||||
, passive=Map.empty
|
||||
, passive=emptyPC
|
||||
, offset =offset chart1+1
|
||||
}
|
||||
in fmap (State pinfo chart2) map'
|
||||
where
|
||||
add tok item map
|
||||
| isPrefixOf w tok = fromMaybe map (MM.insert' tok item map)
|
||||
| isPrefixOf w tok = Map.insertWith Set.union tok (Set.singleton item) map
|
||||
| otherwise = map
|
||||
|
||||
extractExps :: ParseState -> CId -> [Tree]
|
||||
extractExps (State pinfo chart items) start = exps
|
||||
where
|
||||
(_,st) = process (\_ _ -> id) (allRules pinfo) (Set.toList items) ((),chart)
|
||||
(_,st) = process (\_ _ -> id) (sequences pinfo) (functions pinfo) (Set.toList items) () chart
|
||||
|
||||
exps = nubsort $ do
|
||||
c <- Map.findWithDefault [] start (startupCats pinfo)
|
||||
ruleid <- topdownRules pinfo ? c
|
||||
let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
|
||||
cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
|
||||
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) [] cat (productions pinfo)
|
||||
let FFun fn _ lins = functions pinfo ! funid
|
||||
lbl <- indices lins
|
||||
fid <- Map.lookup (PK c lbl 0) (passive st)
|
||||
Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
|
||||
go Set.empty fid
|
||||
|
||||
go rec fid
|
||||
| Set.member fid rec = mzero
|
||||
| otherwise = do set <- IntMap.lookup fid (forest st)
|
||||
Passive ruleid args <- Set.toList set
|
||||
let (FRule fn _ _ cat lins) = allRules pinfo ! ruleid
|
||||
if fn == wildCId
|
||||
then go (Set.insert fid rec) (head args)
|
||||
else do args <- mapM (go (Set.insert fid rec)) args
|
||||
return (Fun fn args)
|
||||
go rec fcat
|
||||
| Set.member fcat rec = mzero
|
||||
| otherwise = do (funid,args) <- foldForest (\funid args -> (:) (funid,args)) [] fcat (forest st)
|
||||
let FFun fn _ lins = functions pinfo ! funid
|
||||
args <- mapM (go (Set.insert fcat rec)) args
|
||||
return (Fun fn args)
|
||||
|
||||
process fn !rules [] acc_chart = acc_chart
|
||||
process fn !rules (item:items) acc_chart = univRule item acc_chart
|
||||
process fn !seqs !funs [] acc chart = (acc,chart)
|
||||
process fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart
|
||||
| inRange (bounds lin) ppos =
|
||||
case unsafeAt lin ppos of
|
||||
FSymCat d r -> let !fid = args !! d
|
||||
key = AK fid r
|
||||
|
||||
items2 = case lookupPC (mkPK key k) (passive chart) of
|
||||
Nothing -> items
|
||||
Just id -> (Active j (ppos+1) funid seqid (updateAt d id args) key0) : items
|
||||
items3 = foldForest (\funid args -> (:) (Active k 0 funid (rhs funid r) args key)) items2 fid (forest chart)
|
||||
in case lookupAC key (active chart) of
|
||||
Nothing -> process fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)}
|
||||
Just set | Set.member item set -> process fn seqs funs items acc chart
|
||||
| otherwise -> process fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)}
|
||||
FSymTok (KS tok) -> let !acc' = fn tok (Active j (ppos+1) funid seqid args key0) acc
|
||||
in process fn seqs funs items acc' chart
|
||||
| otherwise =
|
||||
case lookupPC (mkPK key0 j) (passive chart) of
|
||||
Nothing -> let fid = nextId chart
|
||||
|
||||
items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of
|
||||
Nothing -> items
|
||||
Just set -> Set.fold (\(Active j' ppos funid seqid args keyc) ->
|
||||
let FSymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos
|
||||
in (:) (Active j' (ppos+1) funid seqid (updateAt d fid args) keyc)) items set
|
||||
in process fn seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart)
|
||||
,forest =IntMap.insert fid (Set.singleton (FApply funid args)) (forest chart)
|
||||
,nextId =nextId chart+1
|
||||
}
|
||||
Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items
|
||||
in process fn seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (FApply funid args)) (forest chart)}
|
||||
where
|
||||
univRule (Active j lbl ppos ruleid args fid0) acc_chart@(acc,chart)
|
||||
| inRange (bounds lin) ppos =
|
||||
case unsafeAt lin ppos of
|
||||
FSymCat r d -> let !fid = args !! d
|
||||
in case MM.insert' (AK fid r) item (active chart) of
|
||||
Nothing -> process fn rules items $ acc_chart
|
||||
Just actCat -> (case Map.lookup (PK fid r k) (passive chart) of
|
||||
Nothing -> id
|
||||
Just id -> process fn rules [Active j lbl (ppos+1) ruleid (updateAt d id args) fid0]) $
|
||||
(case IntMap.lookup fid (forest chart) of
|
||||
Nothing -> id
|
||||
Just set -> process fn rules (Set.fold (\(Passive ruleid args) -> (:) (Active k r 0 ruleid args fid)) [] set)) $
|
||||
process fn rules items $
|
||||
(acc,chart{active=actCat})
|
||||
FSymTok tok -> process fn rules items $
|
||||
(fn tok (Active j lbl (ppos+1) ruleid args fid0) acc,chart)
|
||||
| otherwise = case Map.lookup (PK fid0 lbl j) (passive chart) of
|
||||
Nothing -> let fid = nextId chart
|
||||
in process fn rules [Active j' lbl (ppos+1) ruleid (updateAt d fid args) fidc
|
||||
| Active j' lbl ppos ruleid args fidc <- ((active chart:actives chart) !! (k-j)) MM.! (AK fid0 lbl),
|
||||
let FSymCat _ d = unsafeAt (rhs ruleid lbl) ppos] $
|
||||
process fn rules items $
|
||||
(acc,chart{passive=Map.insert (PK fid0 lbl j) fid (passive chart)
|
||||
,forest =IntMap.insert fid (Set.singleton (Passive ruleid args)) (forest chart)
|
||||
,nextId =nextId chart+1
|
||||
})
|
||||
Just id -> process fn rules items $
|
||||
(acc,chart{forest = IntMap.insertWith Set.union id (Set.singleton (Passive ruleid args)) (forest chart)})
|
||||
where
|
||||
!lin = rhs ruleid lbl
|
||||
!k = offset chart
|
||||
!lin = unsafeAt seqs seqid
|
||||
!k = offset chart
|
||||
|
||||
rhs ruleid lbl = unsafeAt lins lbl
|
||||
mkPK (AK fid lbl) j = PK fid lbl j
|
||||
|
||||
rhs funid lbl = unsafeAt lins lbl
|
||||
where
|
||||
(FRule _ _ _ cat lins) = unsafeAt rules ruleid
|
||||
FFun _ _ lins = unsafeAt funs funid
|
||||
|
||||
updateAt :: Int -> a -> [a] -> [a]
|
||||
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
|
||||
|
||||
|
||||
----------------------------------------------------------------
|
||||
-- Active Chart
|
||||
----------------------------------------------------------------
|
||||
|
||||
data Active
|
||||
= Active {-# UNPACK #-} !Int
|
||||
{-# UNPACK #-} !FIndex
|
||||
{-# UNPACK #-} !FPointPos
|
||||
{-# UNPACK #-} !RuleId
|
||||
{-# UNPACK #-} !FunId
|
||||
{-# UNPACK #-} !SeqId
|
||||
[FCat]
|
||||
{-# UNPACK #-} !FCat
|
||||
{-# UNPACK #-} !ActiveKey
|
||||
deriving (Eq,Show,Ord)
|
||||
data Passive
|
||||
= Passive {-# UNPACK #-} !RuleId
|
||||
[FCat]
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
data ActiveKey
|
||||
= AK {-# UNPACK #-} !FCat
|
||||
{-# UNPACK #-} !FIndex
|
||||
deriving (Eq,Ord,Show)
|
||||
type ActiveChart = IntMap.IntMap (IntMap.IntMap (Set.Set Active))
|
||||
|
||||
emptyAC :: ActiveChart
|
||||
emptyAC = IntMap.empty
|
||||
|
||||
lookupAC :: ActiveKey -> ActiveChart -> Maybe (Set.Set Active)
|
||||
lookupAC (AK fcat l) chart = IntMap.lookup fcat chart >>= IntMap.lookup l
|
||||
|
||||
labelsAC :: FCat -> ActiveChart -> [FIndex]
|
||||
labelsAC fcat chart =
|
||||
case IntMap.lookup fcat chart of
|
||||
Nothing -> []
|
||||
Just map -> IntMap.keys map
|
||||
|
||||
insertAC :: ActiveKey -> Set.Set Active -> ActiveChart -> ActiveChart
|
||||
insertAC (AK fcat l) set chart = IntMap.insertWith IntMap.union fcat (IntMap.singleton l set) chart
|
||||
|
||||
|
||||
----------------------------------------------------------------
|
||||
-- Passive Chart
|
||||
----------------------------------------------------------------
|
||||
|
||||
data PassiveKey
|
||||
= PK {-# UNPACK #-} !FCat
|
||||
{-# UNPACK #-} !FIndex
|
||||
{-# UNPACK #-} !Int
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
type PassiveChart = Map.Map PassiveKey FCat
|
||||
|
||||
emptyPC :: PassiveChart
|
||||
emptyPC = Map.empty
|
||||
|
||||
lookupPC :: PassiveKey -> PassiveChart -> Maybe FCat
|
||||
lookupPC key chart = Map.lookup key chart
|
||||
|
||||
insertPC :: PassiveKey -> FCat -> PassiveChart -> PassiveChart
|
||||
insertPC key fcat chart = Map.insert key fcat chart
|
||||
|
||||
|
||||
----------------------------------------------------------------
|
||||
-- Forest
|
||||
----------------------------------------------------------------
|
||||
|
||||
foldForest :: (FunId -> [FCat] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b
|
||||
foldForest f b fcat forest =
|
||||
case IntMap.lookup fcat forest of
|
||||
Nothing -> b
|
||||
Just set -> Set.fold foldPassive b set
|
||||
where
|
||||
foldPassive (FCoerce fcat) b = foldForest f b fcat forest
|
||||
foldPassive (FApply funid args) b = f funid args b
|
||||
|
||||
|
||||
----------------------------------------------------------------
|
||||
-- Parse State
|
||||
----------------------------------------------------------------
|
||||
|
||||
-- | An abstract data type whose values represent
|
||||
-- the current state in an incremental parser.
|
||||
@@ -176,10 +225,11 @@ data ParseState = State ParserInfo Chart (Set.Set Active)
|
||||
|
||||
data Chart
|
||||
= Chart
|
||||
{ active :: MM.MultiMap ActiveKey Active
|
||||
, actives :: [MM.MultiMap ActiveKey Active]
|
||||
, passive :: Map.Map PassiveKey FCat
|
||||
, forest :: IntMap.IntMap (Set.Set Passive)
|
||||
{ active :: ActiveChart
|
||||
, actives :: [ActiveChart]
|
||||
, passive :: PassiveChart
|
||||
, forest :: IntMap.IntMap (Set.Set Production)
|
||||
, nextId :: {-# UNPACK #-} !FCat
|
||||
, offset :: {-# UNPACK #-} !Int
|
||||
}
|
||||
deriving Show
|
||||
|
||||
@@ -31,7 +31,7 @@ type RangeRec = [Range]
|
||||
|
||||
data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
|
||||
| EmptyRange
|
||||
deriving (Eq, Ord)
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
makeRange :: Int -> Int -> Range
|
||||
makeRange = Range
|
||||
@@ -83,7 +83,7 @@ data SyntaxNode n e = SMeta
|
||||
| SString String
|
||||
| SInt Integer
|
||||
| SFloat Double
|
||||
deriving (Eq,Ord)
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]]
|
||||
groupSyntaxNodes [] = []
|
||||
|
||||
@@ -3,13 +3,12 @@ module PGF.Raw.Convert (toPGF,fromPGF) where
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Raw.Abstract
|
||||
import PGF.BuildParser (buildParserInfo)
|
||||
import PGF.Parsing.FCFG.Utilities
|
||||
import qualified GF.Compile.GenerateFCFG as FCFG
|
||||
import qualified GF.Compile.GeneratePMCFG as PMCFG
|
||||
|
||||
import qualified Data.Array as Array
|
||||
import qualified Data.Map as Map
|
||||
import Data.Array.IArray
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.IntMap as IntMap
|
||||
|
||||
pgfMajorVersion, pgfMinorVersion :: Integer
|
||||
(pgfMajorVersion, pgfMinorVersion) = (1,0)
|
||||
@@ -54,11 +53,11 @@ toConcr pgf rexp =
|
||||
lindefs = Map.empty,
|
||||
printnames = Map.empty,
|
||||
paramlincats = Map.empty,
|
||||
parser = Just (buildParserOnDemand cnc) -- This thunk will be overwritten if there is a parser
|
||||
parser = Just (PMCFG.convertConcrete (abstract pgf) cnc)
|
||||
-- This thunk will be overwritten if there is a parser
|
||||
-- compiled in the PGF file. We use lazy evaluation here
|
||||
-- to make sure that buildParserOnDemand is called only
|
||||
-- if it is needed.
|
||||
|
||||
}) rexp
|
||||
in cnc
|
||||
where
|
||||
@@ -72,41 +71,44 @@ toConcr pgf rexp =
|
||||
add cnc (App "param" ts) = cnc { paramlincats = mkTermMap ts }
|
||||
add cnc (App "parser" ts) = cnc { parser = Just (toPInfo ts) }
|
||||
|
||||
buildParserOnDemand cnc = buildParserInfo fcfg
|
||||
where
|
||||
fcfg
|
||||
| Map.lookup (mkCId "erasing") (cflags cnc) == Just "on" = PMCFG.convertConcrete (abstract pgf) cnc
|
||||
| otherwise = FCFG.convertConcrete (abstract pgf) cnc
|
||||
|
||||
toPInfo :: [RExp] -> ParserInfo
|
||||
toPInfo [App "rules" rs, App "startupcats" cs] = buildParserInfo (rules, cats)
|
||||
toPInfo [App "functions" fs, App "sequences" ss, App "productions" ps,App "startcats" cs] =
|
||||
ParserInfo { functions = functions
|
||||
, sequences = seqs
|
||||
, productions = productions
|
||||
, startCats = cats
|
||||
}
|
||||
where
|
||||
rules = map toFRule rs
|
||||
cats = Map.fromList [(mkCId c, map expToInt fs) | App c fs <- cs]
|
||||
functions = mkArray (map toFFun fs)
|
||||
seqs = mkArray (map toFSeq ss)
|
||||
productions = IntMap.fromList (map toProductionSet ps)
|
||||
cats = Map.fromList [(mkCId c, (map expToInt xs)) | App c xs <- cs]
|
||||
|
||||
toFRule :: RExp -> FRule
|
||||
toFRule (App "rule"
|
||||
[n,
|
||||
App "cats" (rt:at),
|
||||
App "R" ls]) = FRule fun prof args res lins
|
||||
toFFun :: RExp -> FFun
|
||||
toFFun (App f [App "P" ts,App "R" ls]) = FFun fun prof lins
|
||||
where
|
||||
fun = mkCId f
|
||||
prof = map toProfile ts
|
||||
lins = mkArray [fromIntegral seqid | AInt seqid <- ls]
|
||||
|
||||
toProfile :: RExp -> Profile
|
||||
toProfile AMet = []
|
||||
toProfile (App "_A" [t]) = [expToInt t]
|
||||
toProfile (App "_U" ts) = [expToInt t | App "_A" [t] <- ts]
|
||||
|
||||
toFSeq :: RExp -> FSeq
|
||||
toFSeq (App "seq" ss) = mkArray [toSymbol s | s <- ss]
|
||||
|
||||
toProductionSet :: RExp -> (FCat,Set.Set Production)
|
||||
toProductionSet (App "td" (rt : xs)) = (expToInt rt, Set.fromList (map toProduction xs))
|
||||
where
|
||||
(fun,prof) = toFName n
|
||||
args = map expToInt at
|
||||
res = expToInt rt
|
||||
lins = mkArray [mkArray [toSymbol s | s <- l] | App "S" l <- ls]
|
||||
|
||||
toFName :: RExp -> (CId,[Profile])
|
||||
toFName (App "_A" [x]) = (wildCId, [[expToInt x]])
|
||||
toFName (App f ts) = (mkCId f, map toProfile ts)
|
||||
where
|
||||
toProfile :: RExp -> Profile
|
||||
toProfile AMet = []
|
||||
toProfile (App "_A" [t]) = [expToInt t]
|
||||
toProfile (App "_U" ts) = [expToInt t | App "_A" [t] <- ts]
|
||||
toProduction (App "A" (ruleid : at)) = FApply (expToInt ruleid) (map expToInt at)
|
||||
toProduction (App "C" [fcat]) = FCoerce (expToInt fcat)
|
||||
|
||||
toSymbol :: RExp -> FSymbol
|
||||
toSymbol (App "P" [n,l]) = FSymCat (expToInt l) (expToInt n)
|
||||
toSymbol (AStr t) = FSymTok t
|
||||
toSymbol (App "P" [n,l]) = FSymCat (expToInt n) (expToInt l)
|
||||
toSymbol (App "KP" (d:alts)) = FSymTok (toKP d alts)
|
||||
toSymbol (AStr t) = FSymTok (KS t)
|
||||
|
||||
toType :: RExp -> Type
|
||||
toType e = case e of
|
||||
@@ -142,8 +144,15 @@ toTerm e = case e of
|
||||
App f [] -> F (mkCId f)
|
||||
AInt i -> C (fromInteger i)
|
||||
AMet -> TM "?"
|
||||
AStr s -> K (KS s) ----
|
||||
App "KP" (d:alts) -> K (toKP d alts)
|
||||
AStr s -> K (KS s)
|
||||
_ -> error $ "term " ++ show e
|
||||
|
||||
toKP d alts = KP (toStr d) (map toAlt alts)
|
||||
where
|
||||
toStr (App "S" vs) = [v | AStr v <- vs]
|
||||
toAlt (App "A" [x,y]) = Alt (toStr x) (toStr y)
|
||||
|
||||
|
||||
------------------------------
|
||||
--- from internal to parser --
|
||||
@@ -192,8 +201,7 @@ fromExp e = case e of
|
||||
ELit (LFlt d) -> AFlt d
|
||||
ELit (LInt i) -> AInt (toInteger i)
|
||||
EMeta _ -> AMet ----
|
||||
EEq eqs ->
|
||||
App "Eq" [App "E" (map fromExp (v:ps)) | Equ ps v <- eqs]
|
||||
EEq eqs -> App "Eq" [App "E" (map fromExp (v:ps)) | Equ ps v <- eqs]
|
||||
|
||||
fromTerm :: Term -> RExp
|
||||
fromTerm e = case e of
|
||||
@@ -206,8 +214,11 @@ fromTerm e = case e of
|
||||
TM _ -> AMet
|
||||
F f -> App (prCId f) []
|
||||
V i -> App "A" [AInt (toInteger i)]
|
||||
K (KS s) -> AStr s ----
|
||||
K (KP d vs) -> App "FV" (str d : [str v | Alt v _ <- vs]) ----
|
||||
K t -> fromTokn t
|
||||
|
||||
fromTokn :: Tokn -> RExp
|
||||
fromTokn (KS s) = AStr s
|
||||
fromTokn (KP d vs) = App "KP" (str d : [App "A" [str v, str x] | Alt v x <- vs])
|
||||
where
|
||||
str v = App "S" (map AStr v)
|
||||
|
||||
@@ -215,39 +226,42 @@ fromTerm e = case e of
|
||||
|
||||
fromPInfo :: ParserInfo -> RExp
|
||||
fromPInfo p = App "parser" [
|
||||
App "rules" [fromFRule rule | rule <- Array.elems (allRules p)],
|
||||
App "startupcats" [App (prCId f) (map intToExp cs) | (f,cs) <- Map.toList (startupCats p)]
|
||||
App "functions" [fromFFun fun | fun <- elems (functions p)],
|
||||
App "sequences" [fromFSeq seq | seq <- elems (sequences p)],
|
||||
App "productions" [fromProductionSet xs | xs <- IntMap.toList (productions p)],
|
||||
App "startcats" [App (prCId f) (map intToExp xs) | (f,xs) <- Map.toList (startCats p)]
|
||||
]
|
||||
|
||||
fromFRule :: FRule -> RExp
|
||||
fromFRule (FRule fun prof args res lins) =
|
||||
App "rule" [fromFName (fun,prof),
|
||||
App "cats" (intToExp res:map intToExp args),
|
||||
App "R" [App "S" [fromSymbol s | s <- Array.elems l] | l <- Array.elems lins]
|
||||
]
|
||||
|
||||
fromFName :: (CId,[Profile]) -> RExp
|
||||
fromFName (f,ps) | f == wildCId = fromProfile (head ps)
|
||||
| otherwise = App (prCId f) (map fromProfile ps)
|
||||
fromFFun :: FFun -> RExp
|
||||
fromFFun (FFun fun prof lins) = App (prCId fun) [App "P" (map fromProfile prof), App "R" [intToExp seqid | seqid <- elems lins]]
|
||||
where
|
||||
fromProfile :: Profile -> RExp
|
||||
fromProfile [] = AMet
|
||||
fromProfile [x] = daughter x
|
||||
fromProfile args = App "_U" (map daughter args)
|
||||
|
||||
|
||||
daughter n = App "_A" [intToExp n]
|
||||
|
||||
fromSymbol :: FSymbol -> RExp
|
||||
fromSymbol (FSymCat l n) = App "P" [intToExp n, intToExp l]
|
||||
fromSymbol (FSymTok t) = AStr t
|
||||
fromSymbol (FSymCat n l) = App "P" [intToExp n, intToExp l]
|
||||
fromSymbol (FSymTok t) = fromTokn t
|
||||
|
||||
fromFSeq :: FSeq -> RExp
|
||||
fromFSeq seq = App "seq" [fromSymbol s | s <- elems seq]
|
||||
|
||||
fromProductionSet :: (FCat,Set.Set Production) -> RExp
|
||||
fromProductionSet (cat,xs) = App "td" (intToExp cat : map fromPassive (Set.toList xs))
|
||||
where
|
||||
fromPassive (FApply ruleid args) = App "A" (intToExp ruleid : map intToExp args)
|
||||
fromPassive (FCoerce fcat) = App "C" [intToExp fcat]
|
||||
|
||||
-- ** Utilities
|
||||
|
||||
mkTermMap :: [RExp] -> Map.Map CId Term
|
||||
mkTermMap ts = Map.fromAscList [(mkCId f,toTerm v) | App f [v] <- ts]
|
||||
|
||||
mkArray :: [a] -> Array.Array Int a
|
||||
mkArray xs = Array.listArray (0, length xs - 1) xs
|
||||
mkArray :: IArray a e => [e] -> a Int e
|
||||
mkArray xs = listArray (0, length xs - 1) xs
|
||||
|
||||
expToInt :: Integral a => RExp -> a
|
||||
expToInt (App "neg" [AInt i]) = fromIntegral (negate i)
|
||||
|
||||
Reference in New Issue
Block a user