the new optimized incremental parser and the common subexpression elimination optimization in PMCFG

This commit is contained in:
krasimir
2008-10-14 08:00:50 +00:00
parent fa4003535f
commit 1fc909c101
17 changed files with 654 additions and 526 deletions

View File

@@ -40,7 +40,6 @@ exportPGF opts fmt pgf =
FmtProlog_Abs -> multi "pl" grammar2prolog_abs
FmtBNF -> single "bnf" bnfPrinter
FmtEBNF -> single "ebnf" (ebnfPrinter opts)
FmtFCFG -> single "fcfg" fcfgPrinter
FmtSRGS_XML -> single "grxml" (srgsXmlPrinter opts)
FmtSRGS_XML_NonRec -> single "grxml" (srgsXmlNonRecursivePrinter opts)
FmtSRGS_ABNF -> single "gram" (srgsAbnfPrinter opts)

View File

@@ -11,11 +11,13 @@ import GF.Data.ErrM
import GF.Infra.Option
import Control.Monad (mplus)
import Data.Array (Array)
import qualified Data.Array as Array
import Data.Array.Unboxed (UArray)
import qualified Data.Array.IArray as Array
import Data.Maybe (fromMaybe)
import Data.Map (Map)
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
pgf2js :: PGF -> String
pgf2js pgf =
@@ -89,31 +91,44 @@ children = JS.Ident "cs"
-- Parser
parser2js :: String -> ParserInfo -> [JS.Expr]
parser2js start p = [new "Parser" [JS.EStr start,
JS.EArray $ map frule2js (Array.elems (allRules p)),
JS.EObj $ map cats (Map.assocs (startupCats p))]]
JS.EArray $ [frule2js p cat prod | (cat,set) <- IntMap.toList (productions p), prod <- Set.toList set],
JS.EObj $ map cats (Map.assocs (startCats p))]]
where
cats (c,is) = JS.Prop (JS.IdentPropName (JS.Ident (prCId c))) (JS.EArray (map JS.EInt is))
frule2js :: FRule -> JS.Expr
frule2js (FRule f ps args res lins) = new "Rule" [JS.EInt res, name2js (f,ps), JS.EArray (map JS.EInt args), lins2js lins]
frule2js :: ParserInfo -> FCat -> Production -> JS.Expr
frule2js p res (FApply funid args) = new "Rule" [JS.EInt res, name2js (f,ps), JS.EArray (map JS.EInt args), lins2js p lins]
where
FFun f ps lins = functions p Array.! funid
frule2js p res (FCoerce arg) = new "Rule" [JS.EInt res, daughter 0, JS.EArray [JS.EInt arg], JS.EArray [JS.EArray [sym2js (FSymCat 0 i)] | i <- [0..catLinArity arg-1]]]
where
catLinArity :: FCat -> Int
catLinArity c = maximum (1:[Array.rangeSize (Array.bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c])
topdownRules cat = f cat []
where
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions p))
g (FApply funid args) rules = (functions p Array.! funid,args) : rules
g (FCoerce cat) rules = f cat rules
name2js :: (CId,[Profile]) -> JS.Expr
name2js (f,ps) | f == wildCId = fromProfile (head ps)
| otherwise = new "FunApp" $ [JS.EStr $ prCId f, JS.EArray (map fromProfile ps)]
name2js (f,ps) = new "FunApp" $ [JS.EStr $ prCId f, JS.EArray (map fromProfile ps)]
where
fromProfile :: Profile -> JS.Expr
fromProfile [] = new "MetaVar" []
fromProfile [x] = daughter x
fromProfile args = new "Unify" [JS.EArray (map daughter args)]
daughter i = new "Arg" [JS.EInt i]
daughter i = new "Arg" [JS.EInt i]
lins2js :: Array FIndex (Array FPointPos FSymbol) -> JS.Expr
lins2js ls = JS.EArray [ JS.EArray [ sym2js s | s <- Array.elems l] | l <- Array.elems ls]
lins2js :: ParserInfo -> UArray FIndex SeqId -> JS.Expr
lins2js p ls = JS.EArray [JS.EArray [sym2js s | s <- Array.elems (sequences p Array.! seqid)] | seqid <- Array.elems ls]
sym2js :: FSymbol -> JS.Expr
sym2js (FSymCat l n) = new "ArgProj" [JS.EInt n, JS.EInt l]
sym2js (FSymTok t) = new "Terminal" [JS.EStr t]
sym2js (FSymCat n l) = new "ArgProj" [JS.EInt n, JS.EInt l]
sym2js (FSymTok (KS t)) = new "Terminal" [JS.EStr t]
new :: String -> [JS.Expr] -> JS.Expr
new f xs = JS.ENew (JS.Ident f) xs

View File

@@ -25,17 +25,18 @@ import GF.Data.SortedList
import GF.Data.Utilities (updateNthM, sortNub)
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import qualified Data.List as List
import qualified Data.ByteString.Char8 as BS
import Data.Array
import Data.Array.IArray
import Data.Maybe
import Control.Monad
----------------------------------------------------------------------
-- main conversion function
convertConcrete :: Abstr -> Concr -> FGrammar
convertConcrete :: Abstr -> Concr -> ParserInfo
convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats'
where abs_defs = Map.assocs (funs abs)
conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
@@ -91,14 +92,14 @@ expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns,
-- replaces __NCat with _B and _Var_Cat with _.
-- the temporary names are just there to avoid name collisions.
fixHoasFuns :: FGrammar -> FGrammar
fixHoasFuns (rs, cs) = ([FRule (fixName n) ps args cat lins | FRule n ps args cat lins <- rs], cs)
fixHoasFuns :: ParserInfo -> ParserInfo
fixHoasFuns pinfo = pinfo{functions=mkArray [FFun (fixName n) prof lins | FFun n prof lins <- elems (functions pinfo)]}
where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B")
| BS.pack "_Var_" `BS.isPrefixOf` n = wildCId
fixName n = n
convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> FGrammar
convert abs_defs cnc_defs cat_defs = getFGrammar (loop frulesEnv)
convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> ParserInfo
convert abs_defs cnc_defs cat_defs = getParserInfo (loop grammarEnv)
where
srules = [
(XRule id args res (map findLinType args) (findLinType res) term) |
@@ -107,26 +108,26 @@ convert abs_defs cnc_defs cat_defs = getFGrammar (loop frulesEnv)
findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
(xrulesMap,frulesEnv) = List.foldl' helper (Map.empty,emptyFRulesEnv) srules
(xrulesMap,grammarEnv) = List.foldl' helper (Map.empty,emptyFFunsEnv) srules
where
helper (xrulesMap,frulesEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) =
helper (xrulesMap,grammarEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) =
let xrulesMap' = Map.insertWith (++) abs_res [rule] xrulesMap
frulesEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env)
frulesEnv
grammarEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env)
grammarEnv
(mkSingletonSelectors cnc_defs cnc_res)
in xrulesMap' `seq` frulesEnv' `seq` (xrulesMap',frulesEnv')
in xrulesMap' `seq` grammarEnv' `seq` (xrulesMap',grammarEnv')
loop frulesEnv =
let (todo, frulesEnv') = takeToDoRules xrulesMap frulesEnv
loop grammarEnv =
let (todo, grammarEnv') = takeToDoRules xrulesMap grammarEnv
in case todo of
[] -> frulesEnv'
[] -> grammarEnv'
_ -> loop $! List.foldl' (\env (srules,selector) ->
List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) frulesEnv' todo
List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) grammarEnv' todo
convertRule :: TermMap -> TermSelector -> XRule -> FRulesEnv -> FRulesEnv
convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv =
convertRule :: TermMap -> TermSelector -> XRule -> GrammarEnv -> GrammarEnv
convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) grammarEnv =
foldBM addRule
frulesEnv
grammarEnv
(convertTerm cnc_defs selector term [([],[])])
(protoFCat cat, map (\scat -> (protoFCat scat,[])) args, ctype, ctypes)
where
@@ -137,9 +138,10 @@ convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv =
(env1, xargs1) = List.mapAccumL (genFCatArg cnc_defs ctype) env xargs
in case xcat of
PFCat _ [] _ -> (env , args, all_args)
_ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args)) (env1,[],[]) (zip3 newArgs' ctypes [0..])
_ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args))
(env1,[],[]) (zip3 newArgs' ctypes [0..])
newLinRec = listArray (0,length linRec-1) [translateLin idxArgs path linRec | path <- case newCat' of {PFCat _ rcs _ -> rcs}]
(env3,newLinRec) = List.mapAccumL (translateLin idxArgs linRec) env2 (case newCat' of {PFCat _ rcs _ -> rcs})
(_,newProfile) = List.mapAccumL accumProf 0 newArgs'
where
@@ -147,18 +149,19 @@ convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv =
accumProf nr (_ ,xpaths) = (nr+cnt+1, [nr..nr+cnt])
where cnt = length xpaths
rule = FRule fun newProfile newArgs newCat newLinRec
in addFRule env2 rule
(env4,funid) = addFFun env3 (FFun fun newProfile (mkArray newLinRec))
translateLin idxArgs lbl' [] = array (0,-1) []
translateLin idxArgs lbl' ((lbl,syms) : lins)
| lbl' == lbl = listArray (0,length syms-1) (map instSym syms)
| otherwise = translateLin idxArgs lbl' lins
in addProduction env4 newCat (FApply funid newArgs)
translateLin idxArgs [] grammarEnv lbl' = error "translateLin"
translateLin idxArgs ((lbl,syms) : lins) grammarEnv lbl'
| lbl' == lbl = addFSeq grammarEnv (lbl,map instSym syms)
| otherwise = translateLin idxArgs lins grammarEnv lbl'
where
instSym = either (\(lbl, nr, xnr) -> instCat lbl nr xnr 0 idxArgs) FSymTok
instCat lbl nr xnr nr' ((idx,xargs):idxArgs)
| nr == idx = let (fcat, PFCat _ rcs _) = xargs !! xnr
in FSymCat (index lbl rcs 0) (nr'+xnr)
in FSymCat (nr'+xnr) (index lbl rcs 0)
| otherwise = instCat lbl nr xnr (nr'+length xargs) idxArgs
index lbl' (lbl:lbls) idx
@@ -173,7 +176,7 @@ type CnvMonad a = BacktrackM Env a
type FPath = [FIndex]
type Env = (ProtoFCat, [(ProtoFCat,[FPath])], Term, [Term])
type LinRec = [(FPath, [Either (FPath, FIndex, Int) FToken])]
type LinRec = [(FPath, [Either (FPath, FIndex, Int) Tokn])]
type TermMap = Map.Map CId Term
@@ -190,11 +193,11 @@ convertTerm cnc_defs selector (S ts) ((lbl_path,lin) : lins) = do projectH
foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts)
convertTerm cnc_defs selector (K (KS str)) ((lbl_path,lin) : lins) =
do projectHead lbl_path
return ((lbl_path,Right str : lin) : lins)
return ((lbl_path,Right (KS str) : lin) : lins)
convertTerm cnc_defs selector (K (KP strs vars))((lbl_path,lin) : lins) =
do projectHead lbl_path
toks <- member (strs:[strs' | Alt strs' _ <- vars])
return ((lbl_path, map Right toks ++ lin) : lins)
return ((lbl_path, map (Right . KS) toks ++ lin) : lins)
convertTerm cnc_defs selector (F id) lins = do term <- Map.lookup id cnc_defs
convertTerm cnc_defs selector term lins
convertTerm cnc_defs selector (W s t) ((lbl_path,lin) : lins) = do
@@ -273,75 +276,105 @@ selectTerm (index:path) (R record) = selectTerm path (record !! index)
----------------------------------------------------------------------
-- FRulesEnv
-- GrammarEnv
data FRulesEnv = FRulesEnv {-# UNPACK #-} !Int FCatSet [FRule]
data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int FCatSet FSeqSet FFunSet (IntMap.IntMap (Set.Set Production))
type FCatSet = Map.Map CId (Map.Map [FPath] (Map.Map [(FPath,FIndex)] (Either FCat FCat)))
type FSeqSet = Map.Map FSeq SeqId
type FFunSet = Map.Map FFun FunId
data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)]
protoFCat :: CId -> ProtoFCat
protoFCat cat = PFCat cat [] []
emptyFRulesEnv = FRulesEnv 0 (ins fcatString (mkCId "String") [[0]] [] $
ins fcatInt (mkCId "Int") [[0]] [] $
ins fcatFloat (mkCId "Float") [[0]] [] $
ins fcatVar (mkCId "_Var") [[0]] [] $
Map.empty) []
emptyFFunsEnv = GrammarEnv 0 initFCatSet Map.empty Map.empty IntMap.empty
where
ins fcat cat rcs tcs fcatSet =
Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s fcatSet
initFCatSet = (ins fcatString (mkCId "String") [[0]] [] $
ins fcatInt (mkCId "Int") [[0]] [] $
ins fcatFloat (mkCId "Float") [[0]] [] $
ins fcatVar (mkCId "_Var") [[0]] [] $
Map.empty)
ins fcat cat rcs tcs catSet =
Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet
where
right_fcat = Right fcat
tmap_s = Map.singleton tcs right_fcat
rmap_s = Map.singleton rcs tmap_s
addFRule :: FRulesEnv -> FRule -> FRulesEnv
addFRule (FRulesEnv last_id fcatSet rules) rule = FRulesEnv last_id fcatSet (rule:rules)
addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv
addProduction (GrammarEnv last_id catSet seqSet funSet prodSet) cat p =
GrammarEnv last_id catSet seqSet funSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet)
getFGrammar :: FRulesEnv -> FGrammar
getFGrammar (FRulesEnv last_id fcatSet rules) = (rules, Map.map getFCatList fcatSet)
addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId)
addFSeq env@(GrammarEnv last_id catSet seqSet funSet prodSet) (_,lst) =
case Map.lookup seq seqSet of
Just id -> (env,id)
Nothing -> let !last_seq = Map.size seqSet
in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet prodSet,last_seq)
where
seq = mkArray lst
addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId)
addFFun env@(GrammarEnv last_id catSet seqSet funSet prodSet) fun =
case Map.lookup fun funSet of
Just id -> (env,id)
Nothing -> let !last_funid = Map.size funSet
in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) prodSet,last_funid)
getParserInfo :: GrammarEnv -> ParserInfo
getParserInfo (GrammarEnv last_id catSet seqSet funSet prodSet) =
ParserInfo { functions = mkArray funSet
, sequences = mkArray seqSet
, productions = prodSet
, startCats = Map.map getFCatList catSet
}
where
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
getFCatList rcs = Map.fold (\tcs lst -> Map.fold (\x lst -> either id id x : lst) lst tcs) [] rcs
genFCatHead :: FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat)
genFCatHead env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs) =
case Map.lookup cat fcatSet >>= Map.lookup rcs >>= Map.lookup tcs of
Just (Left fcat) -> (FRulesEnv last_id (ins fcat) rules, fcat)
genFCatHead :: GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat)
genFCatHead env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) =
case Map.lookup cat catSet >>= Map.lookup rcs >>= Map.lookup tcs of
Just (Left fcat) -> (GrammarEnv last_id (ins fcat) seqSet funSet prodSet, fcat)
Just (Right fcat) -> (env, fcat)
Nothing -> let fcat = last_id+1
in (FRulesEnv fcat (ins fcat) rules, fcat)
in (GrammarEnv fcat (ins fcat) seqSet funSet prodSet, fcat)
where
ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s fcatSet
ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet
where
right_fcat = Right fcat
tmap_s = Map.singleton tcs right_fcat
rmap_s = Map.singleton rcs tmap_s
genFCatArg :: TermMap -> Term -> FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat)
genFCatArg cnc_defs ctype env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs) =
case Map.lookup cat fcatSet >>= Map.lookup rcs of
genFCatArg :: TermMap -> Term -> GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat)
genFCatArg cnc_defs ctype env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) =
case Map.lookup cat catSet >>= Map.lookup rcs of
Just tmap -> case Map.lookup tcs tmap of
Just (Left fcat) -> (env, fcat)
Just (Right fcat) -> (env, fcat)
Just (Left fcat) -> (env, fcat)
Just (Right fcat) -> (env, fcat)
Nothing -> ins tmap
Nothing -> ins Map.empty
where
ins tmap =
let fcat = last_id+1
(either_fcat,last_id1,tmap1,rules1)
= foldBM (\tcs st (either_fcat,last_id,tmap,rules) ->
(either_fcat,last_id1,tmap1,prodSet1)
= foldBM (\tcs st (either_fcat,last_id,tmap,prodSet) ->
let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap
rule = FRule wildCId [[0]] [fcat_arg] fcat
(listArray (0,length rcs-1) [listArray (0,0) [FSymCat lbl 0] | lbl <- [0..length rcs-1]])
p = FCoerce fcat_arg
prodSet1 = IntMap.insertWith Set.union fcat (Set.singleton p) prodSet
in if st
then (Right fcat, last_id1,tmap1,rule:rules)
else (either_fcat,last_id, tmap, rules))
(Left fcat,fcat,Map.insert tcs either_fcat tmap,rules)
then (Right fcat, last_id1,tmap1,prodSet1)
else (either_fcat,last_id, tmap ,prodSet ))
(Left fcat,fcat,Map.insert tcs either_fcat tmap,prodSet)
(gen_tcs ctype [] [])
False
rmap1 = Map.singleton rcs tmap1
in (FRulesEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 fcatSet) rules1, fcat)
in (GrammarEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 catSet) seqSet funSet prodSet1, fcat)
where
addArg tcs last_id tmap =
case Map.lookup tcs tmap of
@@ -380,10 +413,11 @@ data XRule = XRule CId {- function -}
Term {- result lin-type representation -}
Term {- body -}
takeToDoRules :: XRulesMap -> FRulesEnv -> ([([XRule], TermSelector)], FRulesEnv)
takeToDoRules xrulesMap (FRulesEnv last_id fcatSet rules) = (todo,FRulesEnv last_id fcatSet' rules)
takeToDoRules :: XRulesMap -> GrammarEnv -> ([([XRule], TermSelector)], GrammarEnv)
takeToDoRules xrulesMap (GrammarEnv last_id catSet seqSet funSet prodSet) =
(todo,GrammarEnv last_id catSet' seqSet funSet prodSet)
where
(todo,fcatSet') =
(todo,catSet') =
Map.mapAccumWithKey (\todo cat rmap ->
let (todo1,rmap1) = Map.mapAccumWithKey (\todo rcs tmap ->
let (tcss,tmap') = Map.mapAccumWithKey (\tcss tcs either_xcat ->
@@ -398,7 +432,7 @@ takeToDoRules xrulesMap (FRulesEnv last_id fcatSet rules) = (todo,FRulesEnv last
in case mb_srules of
Just srules -> (todo1,rmap1)
Nothing -> (todo ,rmap1)) [] fcatSet
Nothing -> (todo ,rmap1)) [] catSet
------------------------------------------------------------
@@ -524,3 +558,5 @@ projectProtoFCat path0 (PFCat cat rcs tcs) = do
| path0 > path = path : addConstraint rcs
| path0 == path = path : rcs
addConstraint rcs = path0 : rcs
mkArray lst = listArray (0,length lst-1) lst

View File

@@ -1,4 +1,4 @@
{-# OPTIONS -fbang-patterns #-}
{-# OPTIONS -fbang-patterns -cpp #-}
----------------------------------------------------------------------
-- |
-- Maintainer : Krasimir Angelov
@@ -12,14 +12,12 @@
-- the conversion is only equivalent if the GFC grammar has a context-free backbone.
-----------------------------------------------------------------------------
module GF.Compile.GeneratePMCFG
(convertConcrete) where
import PGF.CId
import PGF.Data
import PGF.Macros --hiding (prt)
import PGF.Parsing.FCFG.Utilities
import GF.Data.BacktrackM
import GF.Data.SortedList
@@ -28,8 +26,9 @@ import GF.Data.Utilities (updateNthM, sortNub)
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.List as List
import qualified Data.IntMap as IntMap
import qualified Data.ByteString.Char8 as BS
import Data.Array
import Data.Array.IArray
import Data.Maybe
import Control.Monad
import Debug.Trace
@@ -37,7 +36,7 @@ import Debug.Trace
----------------------------------------------------------------------
-- main conversion function
convertConcrete :: Abstr -> Concr -> FGrammar
convertConcrete :: Abstr -> Concr -> ParserInfo
convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats'
where abs_defs = Map.assocs (funs abs)
conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
@@ -93,14 +92,14 @@ expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns,
-- replaces __NCat with _B and _Var_Cat with _.
-- the temporary names are just there to avoid name collisions.
fixHoasFuns :: FGrammar -> FGrammar
fixHoasFuns (!rs, !cs) = ([FRule (fixName n) ps args cat lins | FRule n ps args cat lins <- rs], cs)
fixHoasFuns :: ParserInfo -> ParserInfo
fixHoasFuns pinfo = pinfo{functions=mkArray [FFun (fixName n) prof lins | FFun n prof lins <- elems (functions pinfo)]}
where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B")
| BS.pack "_Var_" `BS.isPrefixOf` n = wildCId
fixName n = n
convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> FGrammar
convert abs_defs cnc_defs cat_defs = getFGrammar (List.foldl' (convertRule cnc_defs) emptyFRulesEnv srules)
convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> ParserInfo
convert abs_defs cnc_defs cat_defs = getParserInfo (List.foldl' (convertRule cnc_defs) (emptyFRulesEnv cnc_defs cat_defs) srules)
where
srules = [
(XRule id args res (map findLinType args) (findLinType res) term) |
@@ -109,23 +108,40 @@ convert abs_defs cnc_defs cat_defs = getFGrammar (List.foldl' (convertRule cnc_d
findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
brk :: (GrammarEnv -> GrammarEnv) -> (GrammarEnv -> GrammarEnv)
brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
case f (GrammarEnv last_id catSet seqSet funSet crcSet IntMap.empty) of
(GrammarEnv last_id catSet seqSet funSet crcSet topdown1) -> IntMap.foldWithKey optimize (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) topdown1
where
optimize cat ps env = IntMap.foldWithKey ff env (IntMap.fromListWith (++) [(funid,[args]) | FApply funid args <- Set.toList ps])
where
ff :: FunId -> [[FCat]] -> GrammarEnv -> GrammarEnv
ff funid xs env
| product (map Set.size ys) == count =
case List.mapAccumL (\env c -> addFCoercion env (Set.toList c)) env ys of
(env,args) -> addProduction env cat (FApply funid args)
| otherwise = List.foldl (\env args -> addProduction env cat (FApply funid args)) env xs
where
count = length xs
ys = foldr (zipWith Set.insert) (repeat Set.empty) xs
convertRule :: TermMap -> FRulesEnv -> XRule -> FRulesEnv
convertRule cnc_defs frulesEnv (XRule fun args cat ctypes ctype term) =
foldBM addRule
frulesEnv
(convertTerm cnc_defs [] ctype term [([],[])])
(protoFCat cnc_defs cat ctype, zipWith (protoFCat cnc_defs) args ctypes)
convertRule :: TermMap -> GrammarEnv -> XRule -> GrammarEnv
convertRule cnc_defs grammarEnv (XRule fun args cat ctypes ctype term) = trace (show fun) $
brk (\grammarEnv -> foldBM addRule
grammarEnv
(convertTerm cnc_defs [] ctype term [([],[])])
(protoFCat cnc_defs cat ctype, zipWith (protoFCat cnc_defs) args ctypes)) grammarEnv
where
addRule linRec (newCat', newArgs') env0 =
let (env1, newCat) = genFCatHead env0 newCat'
(env2, newArgs) = List.mapAccumL (genFCatArg cnc_defs) env1 newArgs'
let [newCat] = getFCats env0 newCat'
(env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs'
newLinRec = mkArray (map (mkArray . snd) linRec)
mkArray lst = listArray (0,length lst-1) lst
(env2,lins) = List.mapAccumL addFSeq env1 linRec
newLinRec = mkArray lins
rule = FRule fun [] newArgs newCat newLinRec
in addFRule env2 rule
(env3,funid) = addFFun env2 (FFun fun [[n] | n <- [0..length newArgs-1]] newLinRec)
in addProduction env3 newCat (FApply funid newArgs)
----------------------------------------------------------------------
-- term conversion
@@ -133,7 +149,7 @@ convertRule cnc_defs frulesEnv (XRule fun args cat ctypes ctype term) =
type CnvMonad a = BacktrackM Env a
type FPath = [FIndex]
data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)] Term
data ProtoFCat = PFCat CId [FPath] [(FPath,[FIndex])]
type Env = (ProtoFCat, [ProtoFCat])
type LinRec = [(FPath, [FSymbol])]
data XRule = XRule CId {- function -}
@@ -144,7 +160,16 @@ data XRule = XRule CId {- function -}
Term {- body -}
protoFCat :: TermMap -> CId -> Term -> ProtoFCat
protoFCat cnc_defs cat ctype = PFCat cat (getRCS cnc_defs ctype) [] ctype
protoFCat cnc_defs cat ctype =
let (rcs,tcs) = loop [] [] [] ctype
in PFCat cat rcs tcs
where
loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record)
loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs)
loop path rcs tcs (S _) = (path:rcs, tcs)
loop path rcs tcs (F id) = case Map.lookup id cnc_defs of
Just term -> loop path rcs tcs term
Nothing -> error ("unknown identifier: "++show id)
type TermMap = Map.Map CId Term
@@ -156,11 +181,12 @@ convertTerm cnc_defs sel ctype (P term p) lins = do nr <- e
convertTerm cnc_defs (nr:sel) ctype term lins
convertTerm cnc_defs sel ctype (FV vars) lins = do term <- member vars
convertTerm cnc_defs sel ctype term lins
convertTerm cnc_defs sel ctype (S ts) ((lbl_path,lin) : lins) = foldM (\lins t -> convertTerm cnc_defs sel ctype t lins) ((lbl_path,lin) : lins) (reverse ts)
convertTerm cnc_defs sel ctype (K (KS str)) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok str : lin) : lins)
convertTerm cnc_defs sel ctype (S ts) lins = foldM (\lins t -> convertTerm cnc_defs sel ctype t lins) lins (reverse ts)
--convertTerm cnc_defs sel ctype (K t) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok t : lin) : lins)
convertTerm cnc_defs sel ctype (K (KS t)) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok (KS t) : lin) : lins)
convertTerm cnc_defs sel ctype (K (KP strs vars))((lbl_path,lin) : lins) =
do toks <- member (strs:[strs' | Alt strs' _ <- vars])
return ((lbl_path, map FSymTok toks ++ lin) : lins)
return ((lbl_path, map (FSymTok . KS) toks ++ lin) : lins)
convertTerm cnc_defs sel ctype (F id) lins = do term <- Map.lookup id cnc_defs
convertTerm cnc_defs sel ctype term lins
convertTerm cnc_defs sel ctype (W s t) ((lbl_path,lin) : lins) = do
@@ -183,8 +209,8 @@ convertArg (C max) nr path lbl_path lin lins = do
return lins
convertArg (S _) nr path lbl_path lin lins = do
(_, args) <- readState
let PFCat cat rcs tcs _ = args !! nr
return ((lbl_path, FSymCat (index path rcs 0) nr : lin) : lins)
let PFCat cat rcs tcs = args !! nr
return ((lbl_path, FSymCat nr (index path rcs 0) : lin) : lins)
where
index lbl' (lbl:lbls) idx
| lbl' == lbl = idx
@@ -210,8 +236,11 @@ convertRec cnc_defs (index:sub_sel) ctype record lbl_path lin lins = do
evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex
evalTerm cnc_defs path (V nr) = do (_, args) <- readState
let PFCat _ _ _ ctype = args !! nr
unifyPType nr (reverse path) (selectTerm path ctype)
let PFCat _ _ tcs = args !! nr
rpath = reverse path
index <- member (fromMaybe (error "evalTerm: wrong path") (lookup rpath tcs))
restrictArg nr rpath index
return index
evalTerm cnc_defs path (C nr) = return nr
evalTerm cnc_defs path (R record) = case path of
(index:path) -> evalTerm cnc_defs path (record !! index)
@@ -222,112 +251,80 @@ evalTerm cnc_defs path (F id) = do term <- Map.lookup id cnc_defs
evalTerm cnc_defs path term
evalTerm cnc_defs path x = error ("evalTerm ("++show x++")")
unifyPType :: FIndex -> FPath -> Term -> CnvMonad FIndex
unifyPType nr path (C max_index) =
do (_, args) <- readState
let PFCat _ _ tcs _ = args !! nr
case lookup path tcs of
Just index -> return index
Nothing -> do index <- member [0..max_index]
restrictArg nr path index
return index
unifyPType nr path t = error $ "unifyPType " ++ show t ---- AR 2/10/2007
selectTerm :: FPath -> Term -> Term
selectTerm [] term = term
selectTerm (index:path) (R record) = selectTerm path (record !! index)
----------------------------------------------------------------------
-- FRulesEnv
-- GrammarEnv
data FRulesEnv = FRulesEnv {-# UNPACK #-} !Int FCatSet [FRule]
type FCatSet = Map.Map CId (Map.Map [(FPath,FIndex)] FCat)
data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production))
type CatSet = Map.Map CId (FCat,FCat,[Int])
type SeqSet = Map.Map FSeq SeqId
type FunSet = Map.Map FFun FunId
type CoerceSet= Map.Map [FCat] FCat
emptyFRulesEnv = FRulesEnv 0 (ins fcatString (mkCId "String") [] $
ins fcatInt (mkCId "Int") [] $
ins fcatFloat (mkCId "Float") [] $
ins fcatVar (mkCId "_Var") [] $
Map.empty) []
emptyFRulesEnv cnc_defs lincats =
let (last_id,catSet) = Map.mapAccum computeCatRange 0 lincats
in GrammarEnv last_id catSet Map.empty Map.empty Map.empty IntMap.empty
where
ins fcat cat tcs fcatSet =
Map.insertWith (\_ -> Map.insert tcs fcat) cat tmap_s fcatSet
computeCatRange index ctype = (index+size,(index,index+size-1,poly))
where
tmap_s = Map.singleton tcs fcat
(size,poly) = getMultipliers 1 [] ctype
getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record
getMultipliers m ms (S _) = (m,ms)
getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms)
getMultipliers m ms (F id) = case Map.lookup id cnc_defs of
Just term -> getMultipliers m ms term
Nothing -> error ("unknown identifier: "++prCId id)
addFRule :: FRulesEnv -> FRule -> FRulesEnv
addFRule (FRulesEnv last_id fcatSet rules) rule = FRulesEnv last_id fcatSet (rule:rules)
addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv
addProduction (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) cat p =
GrammarEnv last_id catSet seqSet funSet crcSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet)
getFGrammar :: FRulesEnv -> FGrammar
getFGrammar (FRulesEnv last_id fcatSet rules) = (rules, Map.map Map.elems fcatSet)
genFCatHead :: FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat)
genFCatHead env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs _) =
case Map.lookup cat fcatSet >>= Map.lookup tcs of
Just fcat -> (env, fcat)
Nothing -> let fcat = last_id+1
in (FRulesEnv fcat (ins fcat) rules, fcat)
addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId)
addFSeq env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (_,lst) =
case Map.lookup seq seqSet of
Just id -> (env,id)
Nothing -> let !last_seq = Map.size seqSet
in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet crcSet prodSet,last_seq)
where
ins fcat = Map.insertWith (\_ -> Map.insert tcs fcat) cat tmap_s fcatSet
where
tmap_s = Map.singleton tcs fcat
seq = mkArray lst
genFCatArg :: TermMap -> FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat)
genFCatArg cnc_defs env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs ctype) =
case Map.lookup cat fcatSet of
Just tmap -> case Map.lookup tcs tmap of
Just fcat -> (env, fcat)
Nothing -> ins tmap
Nothing -> ins Map.empty
addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId)
addFFun env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) fun =
case Map.lookup fun funSet of
Just id -> (env,id)
Nothing -> let !last_funid = Map.size funSet
in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) crcSet prodSet,last_funid)
addFCoercion :: GrammarEnv -> [FCat] -> (GrammarEnv,FCat)
addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fcats =
case sub_fcats of
[fcat] -> (env,fcat)
_ -> case Map.lookup sub_fcats crcSet of
Just fcat -> (env,fcat)
Nothing -> let !fcat = last_id+1
in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat)
getParserInfo :: GrammarEnv -> ParserInfo
getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
ParserInfo { functions = mkArray funSet
, sequences = mkArray seqSet
, productions = IntMap.union prodSet coercions
, startCats = Map.map (\(start,end,_) -> range (start,end)) catSet
}
where
ins tmap =
let fcat = last_id+1
(last_id1,tmap1,rules1)
= foldBM (\tcs st (last_id,tmap,rules) ->
let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap
rule = FRule wildCId [[0]] [fcat_arg] fcat
(listArray (0,length rcs-1) [listArray (0,0) [FSymCat lbl 0] | lbl <- [0..length rcs-1]])
in if st
then (last_id1,tmap1,rule:rules)
else (last_id, tmap, rules))
(fcat,Map.insert tcs fcat tmap,rules)
(gen_tcs ctype [] [])
False
in (FRulesEnv last_id1 (Map.insert cat tmap1 fcatSet) rules1, fcat)
where
addArg tcs last_id tmap =
case Map.lookup tcs tmap of
Just fcat -> (last_id, tmap, fcat)
Nothing -> let fcat = last_id+1
in (fcat, Map.insert tcs fcat tmap, fcat)
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet]
gen_tcs :: Term -> FPath -> [(FPath,FIndex)] -> BacktrackM Bool [(FPath,FIndex)]
gen_tcs (R record) path acc = foldM (\acc (label,ctype) -> gen_tcs ctype (label:path) acc) acc (zip [0..] record)
gen_tcs (S _) path acc = return acc
gen_tcs (C max_index) path acc =
case List.lookup path tcs of
Just index -> return $! addConstraint path index acc
Nothing -> do writeState True
index <- member [0..max_index]
return $! addConstraint path index acc
where
addConstraint path0 index0 (c@(path,index) : cs)
| path0 > path = c:addConstraint path0 index0 cs
addConstraint path0 index0 cs = (path0,index0) : cs
gen_tcs (F id) path acc = case Map.lookup id cnc_defs of
Just term -> gen_tcs term path acc
Nothing -> error ("unknown identifier: "++prCId id)
getRCS :: TermMap -> Term -> [FPath]
getRCS cnc_defs = loop [] []
getFCats :: GrammarEnv -> ProtoFCat -> [FCat]
getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat cat rcs tcs) =
case Map.lookup cat catSet of
Just (start,end,ms) -> reverse (solutions (variants ms tcs start) ())
where
loop path rcs (R record) = List.foldl' (\rcs (index,term) -> loop (index:path) rcs term) rcs (zip [0..] record)
loop path rcs (C i) = rcs
loop path rcs (S _) = path:rcs
loop path rcs (F id) = case Map.lookup id cnc_defs of
Just term -> loop path rcs term
Nothing -> error ("unknown identifier: "++show id)
variants _ [] fcat = return fcat
variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices
variants ms tcs ((m*index) + fcat)
------------------------------------------------------------
-- updating the MCF rule
@@ -345,12 +342,14 @@ restrictHead path term
writeState (head', args)
restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat
restrictProtoFCat path0 index0 (PFCat cat rcs tcs ctype) = do
restrictProtoFCat path0 index0 (PFCat cat rcs tcs) = do
tcs <- addConstraint tcs
return (PFCat cat rcs tcs ctype)
return (PFCat cat rcs tcs)
where
addConstraint (c@(path,index) : cs)
| path0 > path = liftM (c:) (addConstraint cs)
| path0 == path = guard (index0 == index) >>
return (c : cs)
addConstraint cs = return ((path0,index0) : cs)
addConstraint [] = error "restrictProtoFCat: unknown path"
addConstraint (c@(path,indices) : tcs)
| path0 == path = guard (index0 `elem` indices) >>
return ((path,[index0]) : tcs)
| otherwise = liftM (c:) (addConstraint tcs)
mkArray lst = listArray (0,length lst-1) lst

View File

@@ -7,7 +7,6 @@ import qualified GF.Compile.GenerateFCFG as FCFG
import qualified GF.Compile.GeneratePMCFG as PMCFG
import PGF.CId
import PGF.BuildParser (buildParserInfo)
import qualified PGF.Macros as CM
import qualified PGF.Data as C
import qualified PGF.Data as D
@@ -54,9 +53,9 @@ mkCanon2gfcc opts cnc gr =
addParsers :: D.PGF -> D.PGF
addParsers pgf = pgf { D.concretes = Map.map conv (D.concretes pgf) }
where
conv cnc = cnc { D.parser = Just (buildParserInfo fcfg) }
conv cnc = cnc { D.parser = Just pinfo }
where
fcfg
pinfo
| Map.lookup (mkCId "erasing") (D.cflags cnc) == Just "on" = PMCFG.convertConcrete (D.abstract pgf) cnc
| otherwise = FCFG.convertConcrete (D.abstract pgf) cnc

View File

@@ -91,7 +91,6 @@ data OutputFormat = FmtPGF
| FmtEBNF
| FmtRegular
| FmtNoLR
| FmtFCFG
| FmtSRGS_XML
| FmtSRGS_XML_NonRec
| FmtSRGS_ABNF
@@ -497,7 +496,6 @@ outputFormats =
("ebnf", FmtEBNF),
("regular", FmtRegular),
("nolr", FmtNoLR),
("fcfg", FmtFCFG),
("srgs_xml", FmtSRGS_XML),
("srgs_xml_nonrec", FmtSRGS_XML_NonRec),
("srgs_abnf", FmtSRGS_ABNF),

View File

@@ -4,21 +4,19 @@
--
-- Approximates PGF grammars with context-free grammars.
----------------------------------------------------------------------
module GF.Speech.PGFToCFG (bnfPrinter,
fcfgPrinter, pgfToCFG) where
module GF.Speech.PGFToCFG (bnfPrinter, pgfToCFG) where
import PGF.CId
import PGF.Data as PGF
import PGF.Macros
import GF.Data.MultiMap (MultiMap)
import qualified GF.Data.MultiMap as MultiMap
import GF.Infra.Ident
import GF.Speech.CFG
import Data.Array as Array
import Data.Array.IArray as Array
import Data.List
import Data.Map (Map)
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import Data.Maybe
import Data.Set (Set)
import qualified Data.Set as Set
@@ -29,21 +27,6 @@ bnfPrinter = toBNF id
toBNF :: (CFG -> CFG) -> PGF -> CId -> String
toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc
-- FIXME: move this somewhere else
fcfgPrinter :: PGF -> CId -> String
fcfgPrinter pgf cnc = unlines (map showRule rules)
where
pinfo = fromMaybe (error "fcfgPrinter") (lookParser pgf cnc)
rules :: [FRule]
rules = Array.elems (PGF.allRules pinfo)
showRule (FRule cid ps cs fc arr) = prCId cid ++ " " ++ show ps ++ ". " ++ showCat fc ++ " ::= [" ++ concat (intersperse ", " (map showCat cs)) ++ "] = " ++ showLin arr
where
showLin arr = "[" ++ concat (intersperse ", " [ unwords (map showFSymbol (Array.elems r)) | r <- Array.elems arr]) ++ "]"
showFSymbol (FSymCat i j) = showCat (cs!!j) ++ "_" ++ show j ++ "." ++ show i
showFSymbol (FSymTok t) = t
showCat c = "C" ++ show c
pgfToCFG :: PGF
-> CId -- ^ Concrete syntax name
@@ -52,12 +35,13 @@ pgfToCFG pgf lang = mkCFG (lookStartCat pgf) extCats (startRules ++ concatMap fr
where
pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang)
rules :: [FRule]
rules = Array.elems (PGF.allRules pinfo)
rules :: [(FCat,Production)]
rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.productions pinfo)
, prod <- Set.toList set]
fcatCats :: Map FCat Cat
fcatCats = Map.fromList [(fc, prCId c ++ "_" ++ show i)
| (c,fcs) <- Map.toList (startupCats pinfo),
| (c,fcs) <- Map.toList (startCats pinfo),
(fc,i) <- zip fcs [1..]]
fcatCat :: FCat -> Cat
@@ -69,49 +53,61 @@ pgfToCFG pgf lang = mkCFG (lookStartCat pgf) extCats (startRules ++ concatMap fr
-- gets the number of fields in the lincat for the given category
catLinArity :: FCat -> Int
catLinArity c = maximum (1:[rangeSize (bounds rhs) | FRule _ _ _ _ rhs <- Map.findWithDefault [] c rulesByFCat])
catLinArity c = maximum (1:[rangeSize (bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c])
topdownRules cat = f cat []
where
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo))
g (FApply funid args) rules = (functions pinfo ! funid,args) : rules
g (FCoerce cat) rules = f cat rules
rulesByFCat :: Map FCat [FRule]
rulesByFCat = Map.fromListWith (++) [(c,[r]) | r@(FRule _ _ _ c _) <- rules]
extCats :: Set Cat
extCats = Set.fromList $ map lhsCat startRules
startRules :: [CFRule]
startRules = [CFRule (prCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0)
| (c,fcs) <- Map.toList (startupCats pinfo),
| (c,fcs) <- Map.toList (startCats pinfo),
fc <- fcs, not (isLiteralFCat fc),
r <- [0..catLinArity fc-1]]
fruleToCFRule :: FRule -> [CFRule]
fruleToCFRule (FRule f ps args c rhs) =
fruleToCFRule :: (FCat,Production) -> [CFRule]
fruleToCFRule (c,FApply funid args) =
[CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps))
| (l,row) <- Array.assocs rhs, not (containsLiterals row)]
| (l,seqid) <- Array.assocs rhs
, let row = sequences pinfo ! seqid
, not (containsLiterals row)]
where
FFun f ps rhs = functions pinfo ! funid
mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
mkRhs = map fsymbolToSymbol . Array.elems
containsLiterals :: Array FPointPos FSymbol -> Bool
containsLiterals row = any isLiteralFCat [args!!n | FSymCat _ n <- Array.elems row]
containsLiterals row = any isLiteralFCat [args!!n | FSymCat n _ <- Array.elems row]
fsymbolToSymbol :: FSymbol -> CFSymbol
fsymbolToSymbol (FSymCat l n) = NonTerminal (fcatToCat (args!!n) l)
fsymbolToSymbol (FSymTok t) = Terminal t
fsymbolToSymbol (FSymCat n l) = NonTerminal (fcatToCat (args!!n) l)
fsymbolToSymbol (FSymTok (KS t)) = Terminal t
fixProfile :: Array FPointPos FSymbol -> Profile -> Profile
fixProfile row = concatMap positions
where
nts = zip [0..] [nt | nt@(FSymCat _ _) <- Array.elems row ]
positions i = [k | (k,FSymCat _ j) <- nts, j == i]
nts = zip [0..] [nt | nt@(FSymCat _ _) <- Array.elems row]
positions i = [k | (k,FSymCat j _) <- nts, j == i]
profilesToTerm :: [Profile] -> CFTerm
profilesToTerm [[n]] | f == wildCId = CFRes n
profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps)
where (argTypes,_) = catSkeleton $ lookType pgf f
profileToTerm :: CId -> Profile -> CFTerm
profileToTerm t [] = CFMeta t
profileToTerm _ xs = CFRes (last xs) -- FIXME: unify
fruleToCFRule (c,FCoerce c') =
[CFRule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0)
| l <- [0..catLinArity c-1]]
isLiteralFCat :: FCat -> Bool
isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])

View File

@@ -25,6 +25,7 @@ import qualified Text.ParserCombinators.ReadP as RP
import System.Cmd
import System.CPUTime
import Control.Exception
import Control.Monad
import Data.Version
import GF.System.Signal
--import System.IO.Error (try)
@@ -203,9 +204,10 @@ wordCompletion gfenv line0 prefix0 p =
-> do mb_state0 <- try (evaluate (initState pgf (optLang opts) (optCat opts)))
case mb_state0 of
Right state0 -> let ws = words (take (length s - length prefix) s)
state = foldl nextState state0 ws
compls = getCompletions state prefix
in ret ' ' (map (encode gfenv) (Map.keys compls))
in case foldM nextState state0 ws of
Nothing -> ret ' ' []
Just state -> let compls = getCompletions state prefix
in ret ' ' (map (encode gfenv) (Map.keys compls))
Left _ -> ret ' ' []
CmplOpt (Just (Command n _ _)) pref
-> case Map.lookup n (commands cmdEnv) of

View File

@@ -77,6 +77,7 @@ import Data.Char
import qualified Data.Map as Map
import Data.Maybe
import System.Random (newStdGen)
import Control.Monad
---------------------------------------------------
-- Interface
@@ -211,7 +212,7 @@ parse pgf lang cat s =
Just cnc -> case parser cnc of
Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on"
then Incremental.parse pinfo (mkCId cat) (words s)
else case parseFCFG "bottomup" pinfo (mkCId cat) (words s) of
else case parseFCFG "topdown" pinfo (mkCId cat) (words s) of
Ok x -> x
Bad s -> error s
Nothing -> error ("No parser built for language: " ++ lang)
@@ -259,9 +260,10 @@ startCat pgf = lookStartCat pgf
complete pgf from cat input =
let (ws,prefix) = tokensAndPrefix input
state0 = initState pgf from cat
state = foldl Incremental.nextState state0 ws
compls = Incremental.getCompletions state prefix
in [unwords (ws++[c]) ++ " " | c <- Map.keys compls]
in case foldM Incremental.nextState state0 ws of
Nothing -> []
Just state -> let compls = Incremental.getCompletions state prefix
in [unwords (ws++[c]) ++ " " | c <- Map.keys compls]
where
tokensAndPrefix :: String -> ([String],String)
tokensAndPrefix s | not (null s) && isSpace (last s) = (words s, "")

View File

@@ -15,50 +15,62 @@ import PGF.CId
import PGF.Data
import PGF.Parsing.FCFG.Utilities
import Data.Array
import Data.Array.IArray
import Data.Maybe
import qualified Data.IntMap as IntMap
import qualified Data.Map as Map
import qualified Data.Set as Set
import Debug.Trace
data ParserInfoEx
= ParserInfoEx { epsilonRules :: [(FunId,[FCat],FCat)]
, leftcornerCats :: Assoc FCat [(FunId,[FCat],FCat)]
, leftcornerTokens :: Assoc String [(FunId,[FCat],FCat)]
, grammarToks :: [String]
}
------------------------------------------------------------
-- parser information
getLeftCornerTok (FRule _ _ _ _ lins)
getLeftCornerTok pinfo (FFun _ _ lins)
| inRange (bounds syms) 0 = case syms ! 0 of
FSymTok tok -> [tok]
FSymTok (KS tok) -> [tok]
_ -> []
| otherwise = []
where
syms = (sequences pinfo) ! (lins ! 0)
getLeftCornerCat pinfo args (FFun _ _ lins)
| inRange (bounds syms) 0 = case syms ! 0 of
FSymCat d _ -> let cat = args !! d
in case IntMap.lookup cat (productions pinfo) of
Just set -> cat : [cat' | FCoerce cat' <- Set.toList set]
Nothing -> [cat]
_ -> []
| otherwise = []
where
syms = lins ! 0
syms = (sequences pinfo) ! (lins ! 0)
getLeftCornerCat (FRule _ _ args _ lins)
| inRange (bounds syms) 0 = case syms ! 0 of
FSymCat _ d -> [args !! d]
_ -> []
| otherwise = []
where
syms = lins ! 0
buildParserInfo :: ParserInfo -> ParserInfoEx
buildParserInfo pinfo =
ParserInfoEx { epsilonRules = epsilonrules
, leftcornerCats = leftcorncats
, leftcornerTokens = leftcorntoks
, grammarToks = grammartoks
}
buildParserInfo :: FGrammar -> ParserInfo
buildParserInfo (grammar,startup) = -- trace (unlines [prt (x,Set.toList set) | (x,set) <- Map.toList leftcornFilter]) $
ParserInfo { allRules = allrules
, topdownRules = topdownrules
-- , emptyRules = emptyrules
, epsilonRules = epsilonrules
, leftcornerCats = leftcorncats
, leftcornerTokens = leftcorntoks
, grammarCats = grammarcats
, grammarToks = grammartoks
, startupCats = startup
}
where allrules = listArray (0,length grammar-1) grammar
topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule _ _ _ cat _) <- assocs allrules]
epsilonrules = [ ruleid | (ruleid, FRule _ _ _ _ lins) <- assocs allrules,
not (inRange (bounds (lins ! 0)) 0) ]
leftcorncats = accumAssoc id [ (cat, ruleid) | (ruleid, rule) <- assocs allrules, cat <- getLeftCornerCat rule ]
leftcorntoks = accumAssoc id [ (tok, ruleid) | (ruleid, rule) <- assocs allrules, tok <- getLeftCornerTok rule ]
grammarcats = aElems topdownrules
grammartoks = nubsort [t | (FRule _ _ _ _ lins) <- grammar, lin <- elems lins, FSymTok t <- elems lin]
where epsilonrules = [ (ruleid,args,cat)
| (cat,set) <- IntMap.toList (productions pinfo)
, (FApply ruleid args) <- Set.toList set
, let (FFun _ _ lins) = (functions pinfo) ! ruleid
, not (inRange (bounds ((sequences pinfo) ! (lins ! 0))) 0) ]
leftcorncats = accumAssoc id [ (cat', (ruleid, args, cat))
| (cat,set) <- IntMap.toList (productions pinfo)
, (FApply ruleid args) <- Set.toList set
, cat' <- getLeftCornerCat pinfo args ((functions pinfo) ! ruleid) ]
leftcorntoks = accumAssoc id [ (tok, (ruleid, args, cat))
| (cat,set) <- IntMap.toList (productions pinfo)
, (FApply ruleid args) <- Set.toList set
, tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ]
grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymTok (KS t) <- elems lin]

View File

@@ -2,11 +2,13 @@ module PGF.Data where
import PGF.CId
import GF.Text.UTF8
import GF.Data.Assoc
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.List
import Data.Array
import Data.Array.Unboxed
-- internal datatypes for PGF
@@ -108,32 +110,28 @@ data Equation =
deriving (Eq,Ord,Show)
type FToken = String
type FCat = Int
type FIndex = Int
data FSymbol
= FSymCat {-# UNPACK #-} !FIndex {-# UNPACK #-} !Int
| FSymTok FToken
type Profile = [Int]
type FPointPos = Int
type FGrammar = ([FRule], Map.Map CId [FCat])
data FRule = FRule CId [Profile] [FCat] FCat (Array FIndex (Array FPointPos FSymbol))
type RuleId = Int
data FSymbol
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymTok Tokn
deriving (Eq,Ord,Show)
type Profile = [Int]
data Production
= FApply {-# UNPACK #-} !FunId [FCat]
| FCoerce {-# UNPACK #-} !FCat
deriving (Eq,Ord,Show)
data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
type FSeq = Array FPointPos FSymbol
type FunId = Int
type SeqId = Int
data ParserInfo
= ParserInfo { allRules :: Array RuleId FRule
, topdownRules :: Assoc FCat [RuleId]
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
-- , emptyRules :: [RuleId]
, epsilonRules :: [RuleId]
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
, leftcornerCats :: Assoc FCat [RuleId]
, leftcornerTokens :: Assoc FToken [RuleId]
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
, grammarCats :: [FCat]
, grammarToks :: [FToken]
, startupCats :: Map.Map CId [FCat]
= ParserInfo { functions :: Array FunId FFun
, sequences :: Array SeqId FSeq
, productions :: IntMap.IntMap (Set.Set Production)
, startCats :: Map.Map CId [FCat]
}

View File

@@ -49,12 +49,6 @@ lookValCat pgf = valCat . lookType pgf
lookParser :: PGF -> CId -> Maybe ParserInfo
lookParser pgf lang = Map.lookup lang (concretes pgf) >>= parser
lookFCFG :: PGF -> CId -> Maybe FGrammar
lookFCFG pgf lang = fmap toFGrammar $ lookParser pgf lang
where
toFGrammar :: ParserInfo -> FGrammar
toFGrammar pinfo = (Array.elems (allRules pinfo), startupCats pinfo)
lookStartCat :: PGF -> String
lookStartCat pgf = fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat"))
[gflags pgf, aflags (abstract pgf)]

View File

@@ -8,7 +8,7 @@
-----------------------------------------------------------------------------
module PGF.Parsing.FCFG
(buildParserInfo,ParserInfo,parseFCFG) where
(ParserInfo,parseFCFG) where
import GF.Data.ErrM
import GF.Data.Assoc
@@ -17,7 +17,6 @@ import GF.Data.SortedList
import PGF.CId
import PGF.Data
import PGF.Macros
import PGF.BuildParser
import PGF.Parsing.FCFG.Utilities
import qualified PGF.Parsing.FCFG.Active as Active
import qualified PGF.Parsing.FCFG.Incremental as Incremental

View File

@@ -17,17 +17,22 @@ import qualified GF.Data.MultiMap as MM
import PGF.CId
import PGF.Data
import PGF.Parsing.FCFG.Utilities
import PGF.BuildParser
import Control.Monad (guard)
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Data.Array
import Data.Array.IArray
import Debug.Trace
----------------------------------------------------------------------
-- * parsing
type FToken = String
makeFinalEdge cat 0 0 = (cat, [EmptyRange])
makeFinalEdge cat i j = (cat, [makeRange i j])
@@ -36,77 +41,79 @@ parse :: String -> ParserInfo -> CId -> [FToken] -> [Tree]
parse strategy pinfo start toks = nubsort $ filteredForests >>= forest2trees
where
inTokens = input toks
starts = Map.findWithDefault [] start (startupCats pinfo)
starts = Map.findWithDefault [] start (startCats pinfo)
schart = xchart2syntaxchart chart pinfo
(i,j) = inputBounds inTokens
finalEdges = [makeFinalEdge cat i j | cat <- starts]
forests = chart2forests schart (const False) finalEdges
filteredForests = forests >>= applyProfileToForest
chart = process strategy pinfo inTokens axioms emptyXChart
axioms | isBU strategy = literals pinfo inTokens ++ initialBU pinfo inTokens
| isTD strategy = literals pinfo inTokens ++ initialTD pinfo starts inTokens
pinfoex = buildParserInfo pinfo
chart = process strategy pinfo pinfoex inTokens axioms emptyXChart
axioms | isBU strategy = literals pinfoex inTokens ++ initialBU pinfo pinfoex inTokens
| isTD strategy = literals pinfoex inTokens ++ initialTD pinfo starts inTokens
isBU s = s=="b"
isTD s = s=="t"
-- used in prediction
emptyChildren :: RuleId -> ParserInfo -> SyntaxNode RuleId RangeRec
emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) [])
where
FRule _ _ rhs _ _ = allRules pinfo ! ruleid
emptyChildren :: FunId -> [FCat] -> SyntaxNode FunId RangeRec
emptyChildren ruleid args = SNode ruleid (replicate (length args) [])
process :: String -> ParserInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat
process strategy pinfo toks [] chart = chart
process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart
process :: String -> ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -> XChart FCat -> XChart FCat
process strategy pinfo pinfoex toks [] chart = chart
process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo pinfoex toks items $! univRule item chart
where
univRule cat item@(Active found rng lbl ppos node@(SNode ruleid recs)) chart
univRule item@(Active found rng lbl ppos node@(SNode ruleid recs) args cat) chart
| inRange (bounds lin) ppos =
case lin ! ppos of
FSymCat r d -> let c = args !! d
FSymCat d r -> let c = args !! d
in case recs !! d of
[] -> case insertXChart chart item c of
Nothing -> chart
Just chart -> let items = do item@(Final found' _) <- lookupXChartFinal chart c
Just chart -> let items = do item@(Final found' _ _ _) <- lookupXChartFinal chart c
rng <- concatRange rng (found' !! r)
return (c, Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)))
return (Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)) args cat)
++
do guard (isTD strategy)
ruleid <- topdownRules pinfo ? c
return (c, Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo))
in process strategy pinfo toks items chart
(ruleid,args) <- topdownRules pinfo c
return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args c)
in process strategy pinfo pinfoex toks items chart
found' -> let items = do rng <- concatRange rng (found' !! r)
return (c, Active found rng lbl (ppos+1) node)
in process strategy pinfo toks items chart
FSymTok tok -> let items = do t_rng <- inputToken toks ? tok
return (Active found rng lbl (ppos+1) node args cat)
in process strategy pinfo pinfoex toks items chart
FSymTok (KS tok)
-> let items = do t_rng <- inputToken toks ? tok
rng' <- concatRange rng t_rng
return (cat, Active found rng' lbl (ppos+1) node)
in process strategy pinfo toks items chart
return (Active found rng' lbl (ppos+1) node args cat)
in process strategy pinfo pinfoex toks items chart
| otherwise =
if inRange (bounds lins) (lbl+1)
then univRule cat (Active (rng:found) EmptyRange (lbl+1) 0 node) chart
else univRule cat (Final (reverse (rng:found)) node) chart
then univRule (Active (rng:found) EmptyRange (lbl+1) 0 node args cat) chart
else univRule (Final (reverse (rng:found)) node args cat) chart
where
(FRule _ _ args cat lins) = allRules pinfo ! ruleid
lin = lins ! lbl
univRule cat item@(Final found' node) chart =
(FFun _ _ lins) = functions pinfo ! ruleid
lin = sequences pinfo ! (lins ! lbl)
univRule item@(Final found' node args cat) chart =
case insertXChart chart item cat of
Nothing -> chart
Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _)) <- lookupXChartAct chart cat
let FRule _ _ args _ lins = allRules pinfo ! ruleid
FSymCat r d = lins ! l ! ppos
Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _) args c) <- lookupXChartAct chart cat
let FFun _ _ lins = functions pinfo ! ruleid
FSymCat d r = (sequences pinfo ! (lins ! l)) ! ppos
rng <- concatRange rng (found' !! r)
return (args !! d, Active found rng l (ppos+1) (updateChildren node d found'))
return (Active found rng l (ppos+1) (updateChildren node d found') args c)
++
do guard (isBU strategy)
ruleid <- leftcornerCats pinfo ? cat
let FRule _ _ args _ lins = allRules pinfo ! ruleid
FSymCat r d = lins ! 0 ! 0
return (args !! d, Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid pinfo) d found'))
(ruleid,args,c) <- leftcornerCats pinfoex ? cat
let FFun _ _ lins = functions pinfo ! ruleid
FSymCat d r = (sequences pinfo ! (lins ! 0)) ! 0
return (Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid args) d found') args c)
updateChildren :: SyntaxNode RuleId RangeRec -> Int -> RangeRec -> SyntaxNode RuleId RangeRec
updateChildren :: SyntaxNode FunId RangeRec -> Int -> RangeRec -> SyntaxNode FunId RangeRec
updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs
in process strategy pinfo toks items chart
in process strategy pinfo pinfoex toks items chart
----------------------------------------------------------------------
-- * XChart
@@ -116,21 +123,23 @@ data Item
Range
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !FPointPos
(SyntaxNode RuleId RangeRec)
| Final RangeRec (SyntaxNode RuleId RangeRec)
deriving (Eq, Ord)
(SyntaxNode FunId RangeRec)
[FCat]
FCat
| Final RangeRec (SyntaxNode FunId RangeRec) [FCat] FCat
deriving (Eq, Ord, Show)
data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item)
emptyXChart :: Ord c => XChart c
emptyXChart = XChart MM.empty MM.empty
insertXChart (XChart actives finals) item@(Active _ _ _ _ _) c =
insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _ _) c =
case MM.insert' c item actives of
Nothing -> Nothing
Just actives -> Just (XChart actives finals)
insertXChart (XChart actives finals) item@(Final _ _) c =
insertXChart (XChart actives finals) item@(Final _ _ _ _) c =
case MM.insert' c item finals of
Nothing -> Nothing
Just finals -> Just (XChart actives finals)
@@ -142,17 +151,17 @@ xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (
xchart2syntaxchart (XChart actives finals) pinfo =
accumAssoc groupSyntaxNodes $
[ case node of
SNode ruleid rrecs -> let FRule fun prof rhs cat _ = allRules pinfo ! ruleid
SNode ruleid rrecs -> let FFun fun prof _ = functions pinfo ! ruleid
in ((cat,found), SNode (fun,prof) (zip rhs rrecs))
SString s -> ((cat,found), SString s)
SInt n -> ((cat,found), SInt n)
SFloat f -> ((cat,found), SFloat f)
| (cat, Final found node) <- MM.toList finals
| (Final found node rhs cat) <- MM.elems finals
]
literals :: ParserInfo -> Input FToken -> [(FCat,Item)]
literals pinfo toks =
[let (c,node) = lexer t in (c,Final [rng] node) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfo)]
literals :: ParserInfoEx -> Input FToken -> [Item]
literals pinfoex toks =
[let (c,node) = lexer t in (Final [rng] node [] c) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfoex)]
where
lexer t =
case reads t of
@@ -166,24 +175,30 @@ literals pinfo toks =
-- Earley --
-- called with all starting categories
initialTD :: ParserInfo -> [FCat] -> Input FToken -> [(FCat,Item)]
initialTD :: ParserInfo -> [FCat] -> Input FToken -> [Item]
initialTD pinfo starts toks =
do cat <- starts
ruleid <- topdownRules pinfo ? cat
return (cat,Active [] (Range 0 0) 0 0 (emptyChildren ruleid pinfo))
(ruleid,args) <- topdownRules pinfo cat
return (Active [] (Range 0 0) 0 0 (emptyChildren ruleid args) args cat)
topdownRules pinfo cat = f cat []
where
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo))
g (FApply ruleid args) rules = (ruleid,args) : rules
g (FCoerce cat) rules = f cat rules
----------------------------------------------------------------------
-- Kilbury --
initialBU :: ParserInfo -> Input FToken -> [(FCat,Item)]
initialBU pinfo toks =
initialBU :: ParserInfo -> ParserInfoEx -> Input FToken -> [Item]
initialBU pinfo pinfoex toks =
do (tok,rngs) <- aAssocs (inputToken toks)
ruleid <- leftcornerTokens pinfo ? tok
let FRule _ _ _ cat _ = allRules pinfo ! ruleid
(ruleid,args,cat) <- leftcornerTokens pinfoex ? tok
rng <- rngs
return (cat,Active [] rng 0 1 (emptyChildren ruleid pinfo))
return (Active [] rng 0 1 (emptyChildren ruleid args) args cat)
++
do ruleid <- epsilonRules pinfo
let FRule _ _ _ cat _ = allRules pinfo ! ruleid
return (cat,Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo))
do (ruleid,args,cat) <- epsilonRules pinfoex
let FFun _ _ _ = functions pinfo ! ruleid
return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args cat)

View File

@@ -8,55 +8,54 @@ module PGF.Parsing.FCFG.Incremental
, parse
) where
import Data.Array
import Data.Array.IArray
import Data.Array.Base (unsafeAt)
import Data.List (isPrefixOf, foldl')
import Data.Maybe (fromMaybe)
import Data.Maybe (fromMaybe, maybe)
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Control.Monad
import GF.Data.Assoc
import GF.Data.SortedList
import qualified GF.Data.MultiMap as MM
import PGF.CId
import PGF.Data
import PGF.Parsing.FCFG.Utilities
import Debug.Trace
parse :: ParserInfo -> CId -> [FToken] -> [Tree]
parse pinfo start toks = extractExps (foldl' nextState (initState pinfo start) toks) start
parse :: ParserInfo -> CId -> [String] -> [Tree]
parse pinfo start toks = maybe [] (\ps -> extractExps ps start) (foldM nextState (initState pinfo start) toks)
initState :: ParserInfo -> CId -> ParseState
initState pinfo start =
let items = do
c <- Map.findWithDefault [] start (startupCats pinfo)
ruleid <- topdownRules pinfo ? c
let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
lbl <- indices lins
return (Active 0 lbl 0 ruleid args cat)
cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) [] cat (productions pinfo)
let FFun fn _ lins = functions pinfo ! funid
(lbl,seqid) <- assocs lins
return (Active 0 0 funid seqid args (AK cat lbl))
forest = IntMap.fromListWith Set.union [(cat, Set.singleton (Passive ruleid args)) | (ruleid, FRule _ _ args cat _) <- assocs (allRules pinfo)]
max_fid = maximum (0:[maximum (cat:args) | (ruleid, FRule _ _ args cat _) <- assocs (allRules pinfo)])+1
max_fid = maximum (0:[maximum (cat:args) | (cat, set) <- IntMap.toList (productions pinfo)
, p <- Set.toList set
, let args = case p of {FApply _ args -> args; FCoerce cat -> [cat]}])+1
in State pinfo
(Chart MM.empty [] Map.empty forest max_fid 0)
(Chart emptyAC [] emptyPC (productions pinfo) max_fid 0)
(Set.fromList items)
-- | From the current state and the next token
-- 'nextState' computes a new state where the token
-- is consumed and the current position shifted by one.
nextState :: ParseState -> String -> ParseState
nextState :: ParseState -> String -> Maybe ParseState
nextState (State pinfo chart items) t =
let (items1,chart1) = process add (allRules pinfo) (Set.toList items) (Set.empty,chart)
chart2 = chart1{ active =MM.empty
let (items1,chart1) = process add (sequences pinfo) (functions pinfo) (Set.toList items) Set.empty chart
chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1
, passive=Map.empty
, passive=emptyPC
, offset =offset chart1+1
}
in State pinfo chart2 items1
in if Set.null items1
then Nothing
else Just (State pinfo chart2 items1)
where
add tok item set
| tok == t = Set.insert item set
@@ -68,107 +67,157 @@ nextState (State pinfo chart items) t =
-- the GF interpreter.
getCompletions :: ParseState -> String -> Map.Map String ParseState
getCompletions (State pinfo chart items) w =
let (map',chart1) = process add (allRules pinfo) (Set.toList items) (MM.empty,chart)
chart2 = chart1{ active =MM.empty
let (map',chart1) = process add (sequences pinfo) (functions pinfo) (Set.toList items) Map.empty chart
chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1
, passive=Map.empty
, passive=emptyPC
, offset =offset chart1+1
}
in fmap (State pinfo chart2) map'
where
add tok item map
| isPrefixOf w tok = fromMaybe map (MM.insert' tok item map)
| isPrefixOf w tok = Map.insertWith Set.union tok (Set.singleton item) map
| otherwise = map
extractExps :: ParseState -> CId -> [Tree]
extractExps (State pinfo chart items) start = exps
where
(_,st) = process (\_ _ -> id) (allRules pinfo) (Set.toList items) ((),chart)
(_,st) = process (\_ _ -> id) (sequences pinfo) (functions pinfo) (Set.toList items) () chart
exps = nubsort $ do
c <- Map.findWithDefault [] start (startupCats pinfo)
ruleid <- topdownRules pinfo ? c
let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) [] cat (productions pinfo)
let FFun fn _ lins = functions pinfo ! funid
lbl <- indices lins
fid <- Map.lookup (PK c lbl 0) (passive st)
Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
go Set.empty fid
go rec fid
| Set.member fid rec = mzero
| otherwise = do set <- IntMap.lookup fid (forest st)
Passive ruleid args <- Set.toList set
let (FRule fn _ _ cat lins) = allRules pinfo ! ruleid
if fn == wildCId
then go (Set.insert fid rec) (head args)
else do args <- mapM (go (Set.insert fid rec)) args
return (Fun fn args)
go rec fcat
| Set.member fcat rec = mzero
| otherwise = do (funid,args) <- foldForest (\funid args -> (:) (funid,args)) [] fcat (forest st)
let FFun fn _ lins = functions pinfo ! funid
args <- mapM (go (Set.insert fcat rec)) args
return (Fun fn args)
process fn !rules [] acc_chart = acc_chart
process fn !rules (item:items) acc_chart = univRule item acc_chart
process fn !seqs !funs [] acc chart = (acc,chart)
process fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart
| inRange (bounds lin) ppos =
case unsafeAt lin ppos of
FSymCat d r -> let !fid = args !! d
key = AK fid r
items2 = case lookupPC (mkPK key k) (passive chart) of
Nothing -> items
Just id -> (Active j (ppos+1) funid seqid (updateAt d id args) key0) : items
items3 = foldForest (\funid args -> (:) (Active k 0 funid (rhs funid r) args key)) items2 fid (forest chart)
in case lookupAC key (active chart) of
Nothing -> process fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)}
Just set | Set.member item set -> process fn seqs funs items acc chart
| otherwise -> process fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)}
FSymTok (KS tok) -> let !acc' = fn tok (Active j (ppos+1) funid seqid args key0) acc
in process fn seqs funs items acc' chart
| otherwise =
case lookupPC (mkPK key0 j) (passive chart) of
Nothing -> let fid = nextId chart
items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of
Nothing -> items
Just set -> Set.fold (\(Active j' ppos funid seqid args keyc) ->
let FSymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos
in (:) (Active j' (ppos+1) funid seqid (updateAt d fid args) keyc)) items set
in process fn seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart)
,forest =IntMap.insert fid (Set.singleton (FApply funid args)) (forest chart)
,nextId =nextId chart+1
}
Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items
in process fn seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (FApply funid args)) (forest chart)}
where
univRule (Active j lbl ppos ruleid args fid0) acc_chart@(acc,chart)
| inRange (bounds lin) ppos =
case unsafeAt lin ppos of
FSymCat r d -> let !fid = args !! d
in case MM.insert' (AK fid r) item (active chart) of
Nothing -> process fn rules items $ acc_chart
Just actCat -> (case Map.lookup (PK fid r k) (passive chart) of
Nothing -> id
Just id -> process fn rules [Active j lbl (ppos+1) ruleid (updateAt d id args) fid0]) $
(case IntMap.lookup fid (forest chart) of
Nothing -> id
Just set -> process fn rules (Set.fold (\(Passive ruleid args) -> (:) (Active k r 0 ruleid args fid)) [] set)) $
process fn rules items $
(acc,chart{active=actCat})
FSymTok tok -> process fn rules items $
(fn tok (Active j lbl (ppos+1) ruleid args fid0) acc,chart)
| otherwise = case Map.lookup (PK fid0 lbl j) (passive chart) of
Nothing -> let fid = nextId chart
in process fn rules [Active j' lbl (ppos+1) ruleid (updateAt d fid args) fidc
| Active j' lbl ppos ruleid args fidc <- ((active chart:actives chart) !! (k-j)) MM.! (AK fid0 lbl),
let FSymCat _ d = unsafeAt (rhs ruleid lbl) ppos] $
process fn rules items $
(acc,chart{passive=Map.insert (PK fid0 lbl j) fid (passive chart)
,forest =IntMap.insert fid (Set.singleton (Passive ruleid args)) (forest chart)
,nextId =nextId chart+1
})
Just id -> process fn rules items $
(acc,chart{forest = IntMap.insertWith Set.union id (Set.singleton (Passive ruleid args)) (forest chart)})
where
!lin = rhs ruleid lbl
!k = offset chart
!lin = unsafeAt seqs seqid
!k = offset chart
rhs ruleid lbl = unsafeAt lins lbl
mkPK (AK fid lbl) j = PK fid lbl j
rhs funid lbl = unsafeAt lins lbl
where
(FRule _ _ _ cat lins) = unsafeAt rules ruleid
FFun _ _ lins = unsafeAt funs funid
updateAt :: Int -> a -> [a] -> [a]
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
----------------------------------------------------------------
-- Active Chart
----------------------------------------------------------------
data Active
= Active {-# UNPACK #-} !Int
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !FPointPos
{-# UNPACK #-} !RuleId
{-# UNPACK #-} !FunId
{-# UNPACK #-} !SeqId
[FCat]
{-# UNPACK #-} !FCat
{-# UNPACK #-} !ActiveKey
deriving (Eq,Show,Ord)
data Passive
= Passive {-# UNPACK #-} !RuleId
[FCat]
deriving (Eq,Ord,Show)
data ActiveKey
= AK {-# UNPACK #-} !FCat
{-# UNPACK #-} !FIndex
deriving (Eq,Ord,Show)
type ActiveChart = IntMap.IntMap (IntMap.IntMap (Set.Set Active))
emptyAC :: ActiveChart
emptyAC = IntMap.empty
lookupAC :: ActiveKey -> ActiveChart -> Maybe (Set.Set Active)
lookupAC (AK fcat l) chart = IntMap.lookup fcat chart >>= IntMap.lookup l
labelsAC :: FCat -> ActiveChart -> [FIndex]
labelsAC fcat chart =
case IntMap.lookup fcat chart of
Nothing -> []
Just map -> IntMap.keys map
insertAC :: ActiveKey -> Set.Set Active -> ActiveChart -> ActiveChart
insertAC (AK fcat l) set chart = IntMap.insertWith IntMap.union fcat (IntMap.singleton l set) chart
----------------------------------------------------------------
-- Passive Chart
----------------------------------------------------------------
data PassiveKey
= PK {-# UNPACK #-} !FCat
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !Int
deriving (Eq,Ord,Show)
type PassiveChart = Map.Map PassiveKey FCat
emptyPC :: PassiveChart
emptyPC = Map.empty
lookupPC :: PassiveKey -> PassiveChart -> Maybe FCat
lookupPC key chart = Map.lookup key chart
insertPC :: PassiveKey -> FCat -> PassiveChart -> PassiveChart
insertPC key fcat chart = Map.insert key fcat chart
----------------------------------------------------------------
-- Forest
----------------------------------------------------------------
foldForest :: (FunId -> [FCat] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b
foldForest f b fcat forest =
case IntMap.lookup fcat forest of
Nothing -> b
Just set -> Set.fold foldPassive b set
where
foldPassive (FCoerce fcat) b = foldForest f b fcat forest
foldPassive (FApply funid args) b = f funid args b
----------------------------------------------------------------
-- Parse State
----------------------------------------------------------------
-- | An abstract data type whose values represent
-- the current state in an incremental parser.
@@ -176,10 +225,11 @@ data ParseState = State ParserInfo Chart (Set.Set Active)
data Chart
= Chart
{ active :: MM.MultiMap ActiveKey Active
, actives :: [MM.MultiMap ActiveKey Active]
, passive :: Map.Map PassiveKey FCat
, forest :: IntMap.IntMap (Set.Set Passive)
{ active :: ActiveChart
, actives :: [ActiveChart]
, passive :: PassiveChart
, forest :: IntMap.IntMap (Set.Set Production)
, nextId :: {-# UNPACK #-} !FCat
, offset :: {-# UNPACK #-} !Int
}
deriving Show

View File

@@ -31,7 +31,7 @@ type RangeRec = [Range]
data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
| EmptyRange
deriving (Eq, Ord)
deriving (Eq, Ord, Show)
makeRange :: Int -> Int -> Range
makeRange = Range
@@ -83,7 +83,7 @@ data SyntaxNode n e = SMeta
| SString String
| SInt Integer
| SFloat Double
deriving (Eq,Ord)
deriving (Eq,Ord,Show)
groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]]
groupSyntaxNodes [] = []

View File

@@ -3,13 +3,12 @@ module PGF.Raw.Convert (toPGF,fromPGF) where
import PGF.CId
import PGF.Data
import PGF.Raw.Abstract
import PGF.BuildParser (buildParserInfo)
import PGF.Parsing.FCFG.Utilities
import qualified GF.Compile.GenerateFCFG as FCFG
import qualified GF.Compile.GeneratePMCFG as PMCFG
import qualified Data.Array as Array
import qualified Data.Map as Map
import Data.Array.IArray
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
pgfMajorVersion, pgfMinorVersion :: Integer
(pgfMajorVersion, pgfMinorVersion) = (1,0)
@@ -54,11 +53,11 @@ toConcr pgf rexp =
lindefs = Map.empty,
printnames = Map.empty,
paramlincats = Map.empty,
parser = Just (buildParserOnDemand cnc) -- This thunk will be overwritten if there is a parser
parser = Just (PMCFG.convertConcrete (abstract pgf) cnc)
-- This thunk will be overwritten if there is a parser
-- compiled in the PGF file. We use lazy evaluation here
-- to make sure that buildParserOnDemand is called only
-- if it is needed.
}) rexp
in cnc
where
@@ -72,41 +71,44 @@ toConcr pgf rexp =
add cnc (App "param" ts) = cnc { paramlincats = mkTermMap ts }
add cnc (App "parser" ts) = cnc { parser = Just (toPInfo ts) }
buildParserOnDemand cnc = buildParserInfo fcfg
where
fcfg
| Map.lookup (mkCId "erasing") (cflags cnc) == Just "on" = PMCFG.convertConcrete (abstract pgf) cnc
| otherwise = FCFG.convertConcrete (abstract pgf) cnc
toPInfo :: [RExp] -> ParserInfo
toPInfo [App "rules" rs, App "startupcats" cs] = buildParserInfo (rules, cats)
toPInfo [App "functions" fs, App "sequences" ss, App "productions" ps,App "startcats" cs] =
ParserInfo { functions = functions
, sequences = seqs
, productions = productions
, startCats = cats
}
where
rules = map toFRule rs
cats = Map.fromList [(mkCId c, map expToInt fs) | App c fs <- cs]
functions = mkArray (map toFFun fs)
seqs = mkArray (map toFSeq ss)
productions = IntMap.fromList (map toProductionSet ps)
cats = Map.fromList [(mkCId c, (map expToInt xs)) | App c xs <- cs]
toFRule :: RExp -> FRule
toFRule (App "rule"
[n,
App "cats" (rt:at),
App "R" ls]) = FRule fun prof args res lins
toFFun :: RExp -> FFun
toFFun (App f [App "P" ts,App "R" ls]) = FFun fun prof lins
where
fun = mkCId f
prof = map toProfile ts
lins = mkArray [fromIntegral seqid | AInt seqid <- ls]
toProfile :: RExp -> Profile
toProfile AMet = []
toProfile (App "_A" [t]) = [expToInt t]
toProfile (App "_U" ts) = [expToInt t | App "_A" [t] <- ts]
toFSeq :: RExp -> FSeq
toFSeq (App "seq" ss) = mkArray [toSymbol s | s <- ss]
toProductionSet :: RExp -> (FCat,Set.Set Production)
toProductionSet (App "td" (rt : xs)) = (expToInt rt, Set.fromList (map toProduction xs))
where
(fun,prof) = toFName n
args = map expToInt at
res = expToInt rt
lins = mkArray [mkArray [toSymbol s | s <- l] | App "S" l <- ls]
toFName :: RExp -> (CId,[Profile])
toFName (App "_A" [x]) = (wildCId, [[expToInt x]])
toFName (App f ts) = (mkCId f, map toProfile ts)
where
toProfile :: RExp -> Profile
toProfile AMet = []
toProfile (App "_A" [t]) = [expToInt t]
toProfile (App "_U" ts) = [expToInt t | App "_A" [t] <- ts]
toProduction (App "A" (ruleid : at)) = FApply (expToInt ruleid) (map expToInt at)
toProduction (App "C" [fcat]) = FCoerce (expToInt fcat)
toSymbol :: RExp -> FSymbol
toSymbol (App "P" [n,l]) = FSymCat (expToInt l) (expToInt n)
toSymbol (AStr t) = FSymTok t
toSymbol (App "P" [n,l]) = FSymCat (expToInt n) (expToInt l)
toSymbol (App "KP" (d:alts)) = FSymTok (toKP d alts)
toSymbol (AStr t) = FSymTok (KS t)
toType :: RExp -> Type
toType e = case e of
@@ -142,8 +144,15 @@ toTerm e = case e of
App f [] -> F (mkCId f)
AInt i -> C (fromInteger i)
AMet -> TM "?"
AStr s -> K (KS s) ----
App "KP" (d:alts) -> K (toKP d alts)
AStr s -> K (KS s)
_ -> error $ "term " ++ show e
toKP d alts = KP (toStr d) (map toAlt alts)
where
toStr (App "S" vs) = [v | AStr v <- vs]
toAlt (App "A" [x,y]) = Alt (toStr x) (toStr y)
------------------------------
--- from internal to parser --
@@ -192,8 +201,7 @@ fromExp e = case e of
ELit (LFlt d) -> AFlt d
ELit (LInt i) -> AInt (toInteger i)
EMeta _ -> AMet ----
EEq eqs ->
App "Eq" [App "E" (map fromExp (v:ps)) | Equ ps v <- eqs]
EEq eqs -> App "Eq" [App "E" (map fromExp (v:ps)) | Equ ps v <- eqs]
fromTerm :: Term -> RExp
fromTerm e = case e of
@@ -206,8 +214,11 @@ fromTerm e = case e of
TM _ -> AMet
F f -> App (prCId f) []
V i -> App "A" [AInt (toInteger i)]
K (KS s) -> AStr s ----
K (KP d vs) -> App "FV" (str d : [str v | Alt v _ <- vs]) ----
K t -> fromTokn t
fromTokn :: Tokn -> RExp
fromTokn (KS s) = AStr s
fromTokn (KP d vs) = App "KP" (str d : [App "A" [str v, str x] | Alt v x <- vs])
where
str v = App "S" (map AStr v)
@@ -215,39 +226,42 @@ fromTerm e = case e of
fromPInfo :: ParserInfo -> RExp
fromPInfo p = App "parser" [
App "rules" [fromFRule rule | rule <- Array.elems (allRules p)],
App "startupcats" [App (prCId f) (map intToExp cs) | (f,cs) <- Map.toList (startupCats p)]
App "functions" [fromFFun fun | fun <- elems (functions p)],
App "sequences" [fromFSeq seq | seq <- elems (sequences p)],
App "productions" [fromProductionSet xs | xs <- IntMap.toList (productions p)],
App "startcats" [App (prCId f) (map intToExp xs) | (f,xs) <- Map.toList (startCats p)]
]
fromFRule :: FRule -> RExp
fromFRule (FRule fun prof args res lins) =
App "rule" [fromFName (fun,prof),
App "cats" (intToExp res:map intToExp args),
App "R" [App "S" [fromSymbol s | s <- Array.elems l] | l <- Array.elems lins]
]
fromFName :: (CId,[Profile]) -> RExp
fromFName (f,ps) | f == wildCId = fromProfile (head ps)
| otherwise = App (prCId f) (map fromProfile ps)
fromFFun :: FFun -> RExp
fromFFun (FFun fun prof lins) = App (prCId fun) [App "P" (map fromProfile prof), App "R" [intToExp seqid | seqid <- elems lins]]
where
fromProfile :: Profile -> RExp
fromProfile [] = AMet
fromProfile [x] = daughter x
fromProfile args = App "_U" (map daughter args)
daughter n = App "_A" [intToExp n]
fromSymbol :: FSymbol -> RExp
fromSymbol (FSymCat l n) = App "P" [intToExp n, intToExp l]
fromSymbol (FSymTok t) = AStr t
fromSymbol (FSymCat n l) = App "P" [intToExp n, intToExp l]
fromSymbol (FSymTok t) = fromTokn t
fromFSeq :: FSeq -> RExp
fromFSeq seq = App "seq" [fromSymbol s | s <- elems seq]
fromProductionSet :: (FCat,Set.Set Production) -> RExp
fromProductionSet (cat,xs) = App "td" (intToExp cat : map fromPassive (Set.toList xs))
where
fromPassive (FApply ruleid args) = App "A" (intToExp ruleid : map intToExp args)
fromPassive (FCoerce fcat) = App "C" [intToExp fcat]
-- ** Utilities
mkTermMap :: [RExp] -> Map.Map CId Term
mkTermMap ts = Map.fromAscList [(mkCId f,toTerm v) | App f [v] <- ts]
mkArray :: [a] -> Array.Array Int a
mkArray xs = Array.listArray (0, length xs - 1) xs
mkArray :: IArray a e => [e] -> a Int e
mkArray xs = listArray (0, length xs - 1) xs
expToInt :: Integral a => RExp -> a
expToInt (App "neg" [AInt i]) = fromIntegral (negate i)