the new optimized incremental parser and the common subexpression elimination optimization in PMCFG

This commit is contained in:
krasimir
2008-10-14 08:00:50 +00:00
parent 0c66ad597d
commit 4573d10442
17 changed files with 654 additions and 526 deletions

View File

@@ -40,7 +40,6 @@ exportPGF opts fmt pgf =
FmtProlog_Abs -> multi "pl" grammar2prolog_abs FmtProlog_Abs -> multi "pl" grammar2prolog_abs
FmtBNF -> single "bnf" bnfPrinter FmtBNF -> single "bnf" bnfPrinter
FmtEBNF -> single "ebnf" (ebnfPrinter opts) FmtEBNF -> single "ebnf" (ebnfPrinter opts)
FmtFCFG -> single "fcfg" fcfgPrinter
FmtSRGS_XML -> single "grxml" (srgsXmlPrinter opts) FmtSRGS_XML -> single "grxml" (srgsXmlPrinter opts)
FmtSRGS_XML_NonRec -> single "grxml" (srgsXmlNonRecursivePrinter opts) FmtSRGS_XML_NonRec -> single "grxml" (srgsXmlNonRecursivePrinter opts)
FmtSRGS_ABNF -> single "gram" (srgsAbnfPrinter opts) FmtSRGS_ABNF -> single "gram" (srgsAbnfPrinter opts)

View File

@@ -11,11 +11,13 @@ import GF.Data.ErrM
import GF.Infra.Option import GF.Infra.Option
import Control.Monad (mplus) import Control.Monad (mplus)
import Data.Array (Array) import Data.Array.Unboxed (UArray)
import qualified Data.Array as Array import qualified Data.Array.IArray as Array
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Data.Map (Map) import Data.Map (Map)
import qualified Data.Set as Set
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
pgf2js :: PGF -> String pgf2js :: PGF -> String
pgf2js pgf = pgf2js pgf =
@@ -89,31 +91,44 @@ children = JS.Ident "cs"
-- Parser -- Parser
parser2js :: String -> ParserInfo -> [JS.Expr] parser2js :: String -> ParserInfo -> [JS.Expr]
parser2js start p = [new "Parser" [JS.EStr start, parser2js start p = [new "Parser" [JS.EStr start,
JS.EArray $ map frule2js (Array.elems (allRules p)), JS.EArray $ [frule2js p cat prod | (cat,set) <- IntMap.toList (productions p), prod <- Set.toList set],
JS.EObj $ map cats (Map.assocs (startupCats p))]] JS.EObj $ map cats (Map.assocs (startCats p))]]
where where
cats (c,is) = JS.Prop (JS.IdentPropName (JS.Ident (prCId c))) (JS.EArray (map JS.EInt is)) cats (c,is) = JS.Prop (JS.IdentPropName (JS.Ident (prCId c))) (JS.EArray (map JS.EInt is))
frule2js :: FRule -> JS.Expr frule2js :: ParserInfo -> FCat -> Production -> JS.Expr
frule2js (FRule f ps args res lins) = new "Rule" [JS.EInt res, name2js (f,ps), JS.EArray (map JS.EInt args), lins2js lins] frule2js p res (FApply funid args) = new "Rule" [JS.EInt res, name2js (f,ps), JS.EArray (map JS.EInt args), lins2js p lins]
where
FFun f ps lins = functions p Array.! funid
frule2js p res (FCoerce arg) = new "Rule" [JS.EInt res, daughter 0, JS.EArray [JS.EInt arg], JS.EArray [JS.EArray [sym2js (FSymCat 0 i)] | i <- [0..catLinArity arg-1]]]
where
catLinArity :: FCat -> Int
catLinArity c = maximum (1:[Array.rangeSize (Array.bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c])
topdownRules cat = f cat []
where
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions p))
g (FApply funid args) rules = (functions p Array.! funid,args) : rules
g (FCoerce cat) rules = f cat rules
name2js :: (CId,[Profile]) -> JS.Expr name2js :: (CId,[Profile]) -> JS.Expr
name2js (f,ps) | f == wildCId = fromProfile (head ps) name2js (f,ps) = new "FunApp" $ [JS.EStr $ prCId f, JS.EArray (map fromProfile ps)]
| otherwise = new "FunApp" $ [JS.EStr $ prCId f, JS.EArray (map fromProfile ps)]
where where
fromProfile :: Profile -> JS.Expr fromProfile :: Profile -> JS.Expr
fromProfile [] = new "MetaVar" [] fromProfile [] = new "MetaVar" []
fromProfile [x] = daughter x fromProfile [x] = daughter x
fromProfile args = new "Unify" [JS.EArray (map daughter args)] fromProfile args = new "Unify" [JS.EArray (map daughter args)]
daughter i = new "Arg" [JS.EInt i] daughter i = new "Arg" [JS.EInt i]
lins2js :: Array FIndex (Array FPointPos FSymbol) -> JS.Expr lins2js :: ParserInfo -> UArray FIndex SeqId -> JS.Expr
lins2js ls = JS.EArray [ JS.EArray [ sym2js s | s <- Array.elems l] | l <- Array.elems ls] lins2js p ls = JS.EArray [JS.EArray [sym2js s | s <- Array.elems (sequences p Array.! seqid)] | seqid <- Array.elems ls]
sym2js :: FSymbol -> JS.Expr sym2js :: FSymbol -> JS.Expr
sym2js (FSymCat l n) = new "ArgProj" [JS.EInt n, JS.EInt l] sym2js (FSymCat n l) = new "ArgProj" [JS.EInt n, JS.EInt l]
sym2js (FSymTok t) = new "Terminal" [JS.EStr t] sym2js (FSymTok (KS t)) = new "Terminal" [JS.EStr t]
new :: String -> [JS.Expr] -> JS.Expr new :: String -> [JS.Expr] -> JS.Expr
new f xs = JS.ENew (JS.Ident f) xs new f xs = JS.ENew (JS.Ident f) xs

View File

@@ -25,17 +25,18 @@ import GF.Data.SortedList
import GF.Data.Utilities (updateNthM, sortNub) import GF.Data.Utilities (updateNthM, sortNub)
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.List as List import qualified Data.List as List
import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Char8 as BS
import Data.Array import Data.Array.IArray
import Data.Maybe import Data.Maybe
import Control.Monad import Control.Monad
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- main conversion function -- main conversion function
convertConcrete :: Abstr -> Concr -> FGrammar convertConcrete :: Abstr -> Concr -> ParserInfo
convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats' convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats'
where abs_defs = Map.assocs (funs abs) where abs_defs = Map.assocs (funs abs)
conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
@@ -91,14 +92,14 @@ expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns,
-- replaces __NCat with _B and _Var_Cat with _. -- replaces __NCat with _B and _Var_Cat with _.
-- the temporary names are just there to avoid name collisions. -- the temporary names are just there to avoid name collisions.
fixHoasFuns :: FGrammar -> FGrammar fixHoasFuns :: ParserInfo -> ParserInfo
fixHoasFuns (rs, cs) = ([FRule (fixName n) ps args cat lins | FRule n ps args cat lins <- rs], cs) fixHoasFuns pinfo = pinfo{functions=mkArray [FFun (fixName n) prof lins | FFun n prof lins <- elems (functions pinfo)]}
where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B") where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B")
| BS.pack "_Var_" `BS.isPrefixOf` n = wildCId | BS.pack "_Var_" `BS.isPrefixOf` n = wildCId
fixName n = n fixName n = n
convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> FGrammar convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> ParserInfo
convert abs_defs cnc_defs cat_defs = getFGrammar (loop frulesEnv) convert abs_defs cnc_defs cat_defs = getParserInfo (loop grammarEnv)
where where
srules = [ srules = [
(XRule id args res (map findLinType args) (findLinType res) term) | (XRule id args res (map findLinType args) (findLinType res) term) |
@@ -107,26 +108,26 @@ convert abs_defs cnc_defs cat_defs = getFGrammar (loop frulesEnv)
findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs) findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
(xrulesMap,frulesEnv) = List.foldl' helper (Map.empty,emptyFRulesEnv) srules (xrulesMap,grammarEnv) = List.foldl' helper (Map.empty,emptyFFunsEnv) srules
where where
helper (xrulesMap,frulesEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) = helper (xrulesMap,grammarEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) =
let xrulesMap' = Map.insertWith (++) abs_res [rule] xrulesMap let xrulesMap' = Map.insertWith (++) abs_res [rule] xrulesMap
frulesEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env) grammarEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env)
frulesEnv grammarEnv
(mkSingletonSelectors cnc_defs cnc_res) (mkSingletonSelectors cnc_defs cnc_res)
in xrulesMap' `seq` frulesEnv' `seq` (xrulesMap',frulesEnv') in xrulesMap' `seq` grammarEnv' `seq` (xrulesMap',grammarEnv')
loop frulesEnv = loop grammarEnv =
let (todo, frulesEnv') = takeToDoRules xrulesMap frulesEnv let (todo, grammarEnv') = takeToDoRules xrulesMap grammarEnv
in case todo of in case todo of
[] -> frulesEnv' [] -> grammarEnv'
_ -> loop $! List.foldl' (\env (srules,selector) -> _ -> loop $! List.foldl' (\env (srules,selector) ->
List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) frulesEnv' todo List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) grammarEnv' todo
convertRule :: TermMap -> TermSelector -> XRule -> FRulesEnv -> FRulesEnv convertRule :: TermMap -> TermSelector -> XRule -> GrammarEnv -> GrammarEnv
convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv = convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) grammarEnv =
foldBM addRule foldBM addRule
frulesEnv grammarEnv
(convertTerm cnc_defs selector term [([],[])]) (convertTerm cnc_defs selector term [([],[])])
(protoFCat cat, map (\scat -> (protoFCat scat,[])) args, ctype, ctypes) (protoFCat cat, map (\scat -> (protoFCat scat,[])) args, ctype, ctypes)
where where
@@ -137,9 +138,10 @@ convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv =
(env1, xargs1) = List.mapAccumL (genFCatArg cnc_defs ctype) env xargs (env1, xargs1) = List.mapAccumL (genFCatArg cnc_defs ctype) env xargs
in case xcat of in case xcat of
PFCat _ [] _ -> (env , args, all_args) PFCat _ [] _ -> (env , args, all_args)
_ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args)) (env1,[],[]) (zip3 newArgs' ctypes [0..]) _ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args))
(env1,[],[]) (zip3 newArgs' ctypes [0..])
newLinRec = listArray (0,length linRec-1) [translateLin idxArgs path linRec | path <- case newCat' of {PFCat _ rcs _ -> rcs}] (env3,newLinRec) = List.mapAccumL (translateLin idxArgs linRec) env2 (case newCat' of {PFCat _ rcs _ -> rcs})
(_,newProfile) = List.mapAccumL accumProf 0 newArgs' (_,newProfile) = List.mapAccumL accumProf 0 newArgs'
where where
@@ -147,18 +149,19 @@ convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv =
accumProf nr (_ ,xpaths) = (nr+cnt+1, [nr..nr+cnt]) accumProf nr (_ ,xpaths) = (nr+cnt+1, [nr..nr+cnt])
where cnt = length xpaths where cnt = length xpaths
rule = FRule fun newProfile newArgs newCat newLinRec (env4,funid) = addFFun env3 (FFun fun newProfile (mkArray newLinRec))
in addFRule env2 rule
translateLin idxArgs lbl' [] = array (0,-1) [] in addProduction env4 newCat (FApply funid newArgs)
translateLin idxArgs lbl' ((lbl,syms) : lins)
| lbl' == lbl = listArray (0,length syms-1) (map instSym syms) translateLin idxArgs [] grammarEnv lbl' = error "translateLin"
| otherwise = translateLin idxArgs lbl' lins translateLin idxArgs ((lbl,syms) : lins) grammarEnv lbl'
| lbl' == lbl = addFSeq grammarEnv (lbl,map instSym syms)
| otherwise = translateLin idxArgs lins grammarEnv lbl'
where where
instSym = either (\(lbl, nr, xnr) -> instCat lbl nr xnr 0 idxArgs) FSymTok instSym = either (\(lbl, nr, xnr) -> instCat lbl nr xnr 0 idxArgs) FSymTok
instCat lbl nr xnr nr' ((idx,xargs):idxArgs) instCat lbl nr xnr nr' ((idx,xargs):idxArgs)
| nr == idx = let (fcat, PFCat _ rcs _) = xargs !! xnr | nr == idx = let (fcat, PFCat _ rcs _) = xargs !! xnr
in FSymCat (index lbl rcs 0) (nr'+xnr) in FSymCat (nr'+xnr) (index lbl rcs 0)
| otherwise = instCat lbl nr xnr (nr'+length xargs) idxArgs | otherwise = instCat lbl nr xnr (nr'+length xargs) idxArgs
index lbl' (lbl:lbls) idx index lbl' (lbl:lbls) idx
@@ -173,7 +176,7 @@ type CnvMonad a = BacktrackM Env a
type FPath = [FIndex] type FPath = [FIndex]
type Env = (ProtoFCat, [(ProtoFCat,[FPath])], Term, [Term]) type Env = (ProtoFCat, [(ProtoFCat,[FPath])], Term, [Term])
type LinRec = [(FPath, [Either (FPath, FIndex, Int) FToken])] type LinRec = [(FPath, [Either (FPath, FIndex, Int) Tokn])]
type TermMap = Map.Map CId Term type TermMap = Map.Map CId Term
@@ -190,11 +193,11 @@ convertTerm cnc_defs selector (S ts) ((lbl_path,lin) : lins) = do projectH
foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts) foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts)
convertTerm cnc_defs selector (K (KS str)) ((lbl_path,lin) : lins) = convertTerm cnc_defs selector (K (KS str)) ((lbl_path,lin) : lins) =
do projectHead lbl_path do projectHead lbl_path
return ((lbl_path,Right str : lin) : lins) return ((lbl_path,Right (KS str) : lin) : lins)
convertTerm cnc_defs selector (K (KP strs vars))((lbl_path,lin) : lins) = convertTerm cnc_defs selector (K (KP strs vars))((lbl_path,lin) : lins) =
do projectHead lbl_path do projectHead lbl_path
toks <- member (strs:[strs' | Alt strs' _ <- vars]) toks <- member (strs:[strs' | Alt strs' _ <- vars])
return ((lbl_path, map Right toks ++ lin) : lins) return ((lbl_path, map (Right . KS) toks ++ lin) : lins)
convertTerm cnc_defs selector (F id) lins = do term <- Map.lookup id cnc_defs convertTerm cnc_defs selector (F id) lins = do term <- Map.lookup id cnc_defs
convertTerm cnc_defs selector term lins convertTerm cnc_defs selector term lins
convertTerm cnc_defs selector (W s t) ((lbl_path,lin) : lins) = do convertTerm cnc_defs selector (W s t) ((lbl_path,lin) : lins) = do
@@ -273,75 +276,105 @@ selectTerm (index:path) (R record) = selectTerm path (record !! index)
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- FRulesEnv -- GrammarEnv
data FRulesEnv = FRulesEnv {-# UNPACK #-} !Int FCatSet [FRule]
data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int FCatSet FSeqSet FFunSet (IntMap.IntMap (Set.Set Production))
type FCatSet = Map.Map CId (Map.Map [FPath] (Map.Map [(FPath,FIndex)] (Either FCat FCat))) type FCatSet = Map.Map CId (Map.Map [FPath] (Map.Map [(FPath,FIndex)] (Either FCat FCat)))
type FSeqSet = Map.Map FSeq SeqId
type FFunSet = Map.Map FFun FunId
data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)] data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)]
protoFCat :: CId -> ProtoFCat protoFCat :: CId -> ProtoFCat
protoFCat cat = PFCat cat [] [] protoFCat cat = PFCat cat [] []
emptyFRulesEnv = FRulesEnv 0 (ins fcatString (mkCId "String") [[0]] [] $ emptyFFunsEnv = GrammarEnv 0 initFCatSet Map.empty Map.empty IntMap.empty
ins fcatInt (mkCId "Int") [[0]] [] $
ins fcatFloat (mkCId "Float") [[0]] [] $
ins fcatVar (mkCId "_Var") [[0]] [] $
Map.empty) []
where where
ins fcat cat rcs tcs fcatSet = initFCatSet = (ins fcatString (mkCId "String") [[0]] [] $
Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s fcatSet ins fcatInt (mkCId "Int") [[0]] [] $
ins fcatFloat (mkCId "Float") [[0]] [] $
ins fcatVar (mkCId "_Var") [[0]] [] $
Map.empty)
ins fcat cat rcs tcs catSet =
Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet
where where
right_fcat = Right fcat right_fcat = Right fcat
tmap_s = Map.singleton tcs right_fcat tmap_s = Map.singleton tcs right_fcat
rmap_s = Map.singleton rcs tmap_s rmap_s = Map.singleton rcs tmap_s
addFRule :: FRulesEnv -> FRule -> FRulesEnv addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv
addFRule (FRulesEnv last_id fcatSet rules) rule = FRulesEnv last_id fcatSet (rule:rules) addProduction (GrammarEnv last_id catSet seqSet funSet prodSet) cat p =
GrammarEnv last_id catSet seqSet funSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet)
getFGrammar :: FRulesEnv -> FGrammar addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId)
getFGrammar (FRulesEnv last_id fcatSet rules) = (rules, Map.map getFCatList fcatSet) addFSeq env@(GrammarEnv last_id catSet seqSet funSet prodSet) (_,lst) =
case Map.lookup seq seqSet of
Just id -> (env,id)
Nothing -> let !last_seq = Map.size seqSet
in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet prodSet,last_seq)
where where
seq = mkArray lst
addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId)
addFFun env@(GrammarEnv last_id catSet seqSet funSet prodSet) fun =
case Map.lookup fun funSet of
Just id -> (env,id)
Nothing -> let !last_funid = Map.size funSet
in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) prodSet,last_funid)
getParserInfo :: GrammarEnv -> ParserInfo
getParserInfo (GrammarEnv last_id catSet seqSet funSet prodSet) =
ParserInfo { functions = mkArray funSet
, sequences = mkArray seqSet
, productions = prodSet
, startCats = Map.map getFCatList catSet
}
where
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
getFCatList rcs = Map.fold (\tcs lst -> Map.fold (\x lst -> either id id x : lst) lst tcs) [] rcs getFCatList rcs = Map.fold (\tcs lst -> Map.fold (\x lst -> either id id x : lst) lst tcs) [] rcs
genFCatHead :: FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat)
genFCatHead env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs) = genFCatHead :: GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat)
case Map.lookup cat fcatSet >>= Map.lookup rcs >>= Map.lookup tcs of genFCatHead env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) =
Just (Left fcat) -> (FRulesEnv last_id (ins fcat) rules, fcat) case Map.lookup cat catSet >>= Map.lookup rcs >>= Map.lookup tcs of
Just (Left fcat) -> (GrammarEnv last_id (ins fcat) seqSet funSet prodSet, fcat)
Just (Right fcat) -> (env, fcat) Just (Right fcat) -> (env, fcat)
Nothing -> let fcat = last_id+1 Nothing -> let fcat = last_id+1
in (FRulesEnv fcat (ins fcat) rules, fcat) in (GrammarEnv fcat (ins fcat) seqSet funSet prodSet, fcat)
where where
ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s fcatSet ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet
where where
right_fcat = Right fcat right_fcat = Right fcat
tmap_s = Map.singleton tcs right_fcat tmap_s = Map.singleton tcs right_fcat
rmap_s = Map.singleton rcs tmap_s rmap_s = Map.singleton rcs tmap_s
genFCatArg :: TermMap -> Term -> FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat) genFCatArg :: TermMap -> Term -> GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat)
genFCatArg cnc_defs ctype env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs) = genFCatArg cnc_defs ctype env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) =
case Map.lookup cat fcatSet >>= Map.lookup rcs of case Map.lookup cat catSet >>= Map.lookup rcs of
Just tmap -> case Map.lookup tcs tmap of Just tmap -> case Map.lookup tcs tmap of
Just (Left fcat) -> (env, fcat) Just (Left fcat) -> (env, fcat)
Just (Right fcat) -> (env, fcat) Just (Right fcat) -> (env, fcat)
Nothing -> ins tmap Nothing -> ins tmap
Nothing -> ins Map.empty Nothing -> ins Map.empty
where where
ins tmap = ins tmap =
let fcat = last_id+1 let fcat = last_id+1
(either_fcat,last_id1,tmap1,rules1) (either_fcat,last_id1,tmap1,prodSet1)
= foldBM (\tcs st (either_fcat,last_id,tmap,rules) -> = foldBM (\tcs st (either_fcat,last_id,tmap,prodSet) ->
let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap
rule = FRule wildCId [[0]] [fcat_arg] fcat p = FCoerce fcat_arg
(listArray (0,length rcs-1) [listArray (0,0) [FSymCat lbl 0] | lbl <- [0..length rcs-1]]) prodSet1 = IntMap.insertWith Set.union fcat (Set.singleton p) prodSet
in if st in if st
then (Right fcat, last_id1,tmap1,rule:rules) then (Right fcat, last_id1,tmap1,prodSet1)
else (either_fcat,last_id, tmap, rules)) else (either_fcat,last_id, tmap ,prodSet ))
(Left fcat,fcat,Map.insert tcs either_fcat tmap,rules) (Left fcat,fcat,Map.insert tcs either_fcat tmap,prodSet)
(gen_tcs ctype [] []) (gen_tcs ctype [] [])
False False
rmap1 = Map.singleton rcs tmap1 rmap1 = Map.singleton rcs tmap1
in (FRulesEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 fcatSet) rules1, fcat) in (GrammarEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 catSet) seqSet funSet prodSet1, fcat)
where where
addArg tcs last_id tmap = addArg tcs last_id tmap =
case Map.lookup tcs tmap of case Map.lookup tcs tmap of
@@ -380,10 +413,11 @@ data XRule = XRule CId {- function -}
Term {- result lin-type representation -} Term {- result lin-type representation -}
Term {- body -} Term {- body -}
takeToDoRules :: XRulesMap -> FRulesEnv -> ([([XRule], TermSelector)], FRulesEnv) takeToDoRules :: XRulesMap -> GrammarEnv -> ([([XRule], TermSelector)], GrammarEnv)
takeToDoRules xrulesMap (FRulesEnv last_id fcatSet rules) = (todo,FRulesEnv last_id fcatSet' rules) takeToDoRules xrulesMap (GrammarEnv last_id catSet seqSet funSet prodSet) =
(todo,GrammarEnv last_id catSet' seqSet funSet prodSet)
where where
(todo,fcatSet') = (todo,catSet') =
Map.mapAccumWithKey (\todo cat rmap -> Map.mapAccumWithKey (\todo cat rmap ->
let (todo1,rmap1) = Map.mapAccumWithKey (\todo rcs tmap -> let (todo1,rmap1) = Map.mapAccumWithKey (\todo rcs tmap ->
let (tcss,tmap') = Map.mapAccumWithKey (\tcss tcs either_xcat -> let (tcss,tmap') = Map.mapAccumWithKey (\tcss tcs either_xcat ->
@@ -398,7 +432,7 @@ takeToDoRules xrulesMap (FRulesEnv last_id fcatSet rules) = (todo,FRulesEnv last
in case mb_srules of in case mb_srules of
Just srules -> (todo1,rmap1) Just srules -> (todo1,rmap1)
Nothing -> (todo ,rmap1)) [] fcatSet Nothing -> (todo ,rmap1)) [] catSet
------------------------------------------------------------ ------------------------------------------------------------
@@ -524,3 +558,5 @@ projectProtoFCat path0 (PFCat cat rcs tcs) = do
| path0 > path = path : addConstraint rcs | path0 > path = path : addConstraint rcs
| path0 == path = path : rcs | path0 == path = path : rcs
addConstraint rcs = path0 : rcs addConstraint rcs = path0 : rcs
mkArray lst = listArray (0,length lst-1) lst

View File

@@ -1,4 +1,4 @@
{-# OPTIONS -fbang-patterns #-} {-# OPTIONS -fbang-patterns -cpp #-}
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- | -- |
-- Maintainer : Krasimir Angelov -- Maintainer : Krasimir Angelov
@@ -12,14 +12,12 @@
-- the conversion is only equivalent if the GFC grammar has a context-free backbone. -- the conversion is only equivalent if the GFC grammar has a context-free backbone.
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
module GF.Compile.GeneratePMCFG module GF.Compile.GeneratePMCFG
(convertConcrete) where (convertConcrete) where
import PGF.CId import PGF.CId
import PGF.Data import PGF.Data
import PGF.Macros --hiding (prt) import PGF.Macros --hiding (prt)
import PGF.Parsing.FCFG.Utilities
import GF.Data.BacktrackM import GF.Data.BacktrackM
import GF.Data.SortedList import GF.Data.SortedList
@@ -28,8 +26,9 @@ import GF.Data.Utilities (updateNthM, sortNub)
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.List as List import qualified Data.List as List
import qualified Data.IntMap as IntMap
import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Char8 as BS
import Data.Array import Data.Array.IArray
import Data.Maybe import Data.Maybe
import Control.Monad import Control.Monad
import Debug.Trace import Debug.Trace
@@ -37,7 +36,7 @@ import Debug.Trace
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- main conversion function -- main conversion function
convertConcrete :: Abstr -> Concr -> FGrammar convertConcrete :: Abstr -> Concr -> ParserInfo
convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats' convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats'
where abs_defs = Map.assocs (funs abs) where abs_defs = Map.assocs (funs abs)
conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
@@ -93,14 +92,14 @@ expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns,
-- replaces __NCat with _B and _Var_Cat with _. -- replaces __NCat with _B and _Var_Cat with _.
-- the temporary names are just there to avoid name collisions. -- the temporary names are just there to avoid name collisions.
fixHoasFuns :: FGrammar -> FGrammar fixHoasFuns :: ParserInfo -> ParserInfo
fixHoasFuns (!rs, !cs) = ([FRule (fixName n) ps args cat lins | FRule n ps args cat lins <- rs], cs) fixHoasFuns pinfo = pinfo{functions=mkArray [FFun (fixName n) prof lins | FFun n prof lins <- elems (functions pinfo)]}
where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B") where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B")
| BS.pack "_Var_" `BS.isPrefixOf` n = wildCId | BS.pack "_Var_" `BS.isPrefixOf` n = wildCId
fixName n = n fixName n = n
convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> FGrammar convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> ParserInfo
convert abs_defs cnc_defs cat_defs = getFGrammar (List.foldl' (convertRule cnc_defs) emptyFRulesEnv srules) convert abs_defs cnc_defs cat_defs = getParserInfo (List.foldl' (convertRule cnc_defs) (emptyFRulesEnv cnc_defs cat_defs) srules)
where where
srules = [ srules = [
(XRule id args res (map findLinType args) (findLinType res) term) | (XRule id args res (map findLinType args) (findLinType res) term) |
@@ -109,23 +108,40 @@ convert abs_defs cnc_defs cat_defs = getFGrammar (List.foldl' (convertRule cnc_d
findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs) findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
brk :: (GrammarEnv -> GrammarEnv) -> (GrammarEnv -> GrammarEnv)
brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
case f (GrammarEnv last_id catSet seqSet funSet crcSet IntMap.empty) of
(GrammarEnv last_id catSet seqSet funSet crcSet topdown1) -> IntMap.foldWithKey optimize (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) topdown1
where
optimize cat ps env = IntMap.foldWithKey ff env (IntMap.fromListWith (++) [(funid,[args]) | FApply funid args <- Set.toList ps])
where
ff :: FunId -> [[FCat]] -> GrammarEnv -> GrammarEnv
ff funid xs env
| product (map Set.size ys) == count =
case List.mapAccumL (\env c -> addFCoercion env (Set.toList c)) env ys of
(env,args) -> addProduction env cat (FApply funid args)
| otherwise = List.foldl (\env args -> addProduction env cat (FApply funid args)) env xs
where
count = length xs
ys = foldr (zipWith Set.insert) (repeat Set.empty) xs
convertRule :: TermMap -> FRulesEnv -> XRule -> FRulesEnv convertRule :: TermMap -> GrammarEnv -> XRule -> GrammarEnv
convertRule cnc_defs frulesEnv (XRule fun args cat ctypes ctype term) = convertRule cnc_defs grammarEnv (XRule fun args cat ctypes ctype term) = trace (show fun) $
foldBM addRule brk (\grammarEnv -> foldBM addRule
frulesEnv grammarEnv
(convertTerm cnc_defs [] ctype term [([],[])]) (convertTerm cnc_defs [] ctype term [([],[])])
(protoFCat cnc_defs cat ctype, zipWith (protoFCat cnc_defs) args ctypes) (protoFCat cnc_defs cat ctype, zipWith (protoFCat cnc_defs) args ctypes)) grammarEnv
where where
addRule linRec (newCat', newArgs') env0 = addRule linRec (newCat', newArgs') env0 =
let (env1, newCat) = genFCatHead env0 newCat' let [newCat] = getFCats env0 newCat'
(env2, newArgs) = List.mapAccumL (genFCatArg cnc_defs) env1 newArgs' (env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs'
newLinRec = mkArray (map (mkArray . snd) linRec) (env2,lins) = List.mapAccumL addFSeq env1 linRec
mkArray lst = listArray (0,length lst-1) lst newLinRec = mkArray lins
rule = FRule fun [] newArgs newCat newLinRec (env3,funid) = addFFun env2 (FFun fun [[n] | n <- [0..length newArgs-1]] newLinRec)
in addFRule env2 rule
in addProduction env3 newCat (FApply funid newArgs)
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- term conversion -- term conversion
@@ -133,7 +149,7 @@ convertRule cnc_defs frulesEnv (XRule fun args cat ctypes ctype term) =
type CnvMonad a = BacktrackM Env a type CnvMonad a = BacktrackM Env a
type FPath = [FIndex] type FPath = [FIndex]
data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)] Term data ProtoFCat = PFCat CId [FPath] [(FPath,[FIndex])]
type Env = (ProtoFCat, [ProtoFCat]) type Env = (ProtoFCat, [ProtoFCat])
type LinRec = [(FPath, [FSymbol])] type LinRec = [(FPath, [FSymbol])]
data XRule = XRule CId {- function -} data XRule = XRule CId {- function -}
@@ -144,7 +160,16 @@ data XRule = XRule CId {- function -}
Term {- body -} Term {- body -}
protoFCat :: TermMap -> CId -> Term -> ProtoFCat protoFCat :: TermMap -> CId -> Term -> ProtoFCat
protoFCat cnc_defs cat ctype = PFCat cat (getRCS cnc_defs ctype) [] ctype protoFCat cnc_defs cat ctype =
let (rcs,tcs) = loop [] [] [] ctype
in PFCat cat rcs tcs
where
loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record)
loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs)
loop path rcs tcs (S _) = (path:rcs, tcs)
loop path rcs tcs (F id) = case Map.lookup id cnc_defs of
Just term -> loop path rcs tcs term
Nothing -> error ("unknown identifier: "++show id)
type TermMap = Map.Map CId Term type TermMap = Map.Map CId Term
@@ -156,11 +181,12 @@ convertTerm cnc_defs sel ctype (P term p) lins = do nr <- e
convertTerm cnc_defs (nr:sel) ctype term lins convertTerm cnc_defs (nr:sel) ctype term lins
convertTerm cnc_defs sel ctype (FV vars) lins = do term <- member vars convertTerm cnc_defs sel ctype (FV vars) lins = do term <- member vars
convertTerm cnc_defs sel ctype term lins convertTerm cnc_defs sel ctype term lins
convertTerm cnc_defs sel ctype (S ts) ((lbl_path,lin) : lins) = foldM (\lins t -> convertTerm cnc_defs sel ctype t lins) ((lbl_path,lin) : lins) (reverse ts) convertTerm cnc_defs sel ctype (S ts) lins = foldM (\lins t -> convertTerm cnc_defs sel ctype t lins) lins (reverse ts)
convertTerm cnc_defs sel ctype (K (KS str)) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok str : lin) : lins) --convertTerm cnc_defs sel ctype (K t) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok t : lin) : lins)
convertTerm cnc_defs sel ctype (K (KS t)) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok (KS t) : lin) : lins)
convertTerm cnc_defs sel ctype (K (KP strs vars))((lbl_path,lin) : lins) = convertTerm cnc_defs sel ctype (K (KP strs vars))((lbl_path,lin) : lins) =
do toks <- member (strs:[strs' | Alt strs' _ <- vars]) do toks <- member (strs:[strs' | Alt strs' _ <- vars])
return ((lbl_path, map FSymTok toks ++ lin) : lins) return ((lbl_path, map (FSymTok . KS) toks ++ lin) : lins)
convertTerm cnc_defs sel ctype (F id) lins = do term <- Map.lookup id cnc_defs convertTerm cnc_defs sel ctype (F id) lins = do term <- Map.lookup id cnc_defs
convertTerm cnc_defs sel ctype term lins convertTerm cnc_defs sel ctype term lins
convertTerm cnc_defs sel ctype (W s t) ((lbl_path,lin) : lins) = do convertTerm cnc_defs sel ctype (W s t) ((lbl_path,lin) : lins) = do
@@ -183,8 +209,8 @@ convertArg (C max) nr path lbl_path lin lins = do
return lins return lins
convertArg (S _) nr path lbl_path lin lins = do convertArg (S _) nr path lbl_path lin lins = do
(_, args) <- readState (_, args) <- readState
let PFCat cat rcs tcs _ = args !! nr let PFCat cat rcs tcs = args !! nr
return ((lbl_path, FSymCat (index path rcs 0) nr : lin) : lins) return ((lbl_path, FSymCat nr (index path rcs 0) : lin) : lins)
where where
index lbl' (lbl:lbls) idx index lbl' (lbl:lbls) idx
| lbl' == lbl = idx | lbl' == lbl = idx
@@ -210,8 +236,11 @@ convertRec cnc_defs (index:sub_sel) ctype record lbl_path lin lins = do
evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex
evalTerm cnc_defs path (V nr) = do (_, args) <- readState evalTerm cnc_defs path (V nr) = do (_, args) <- readState
let PFCat _ _ _ ctype = args !! nr let PFCat _ _ tcs = args !! nr
unifyPType nr (reverse path) (selectTerm path ctype) rpath = reverse path
index <- member (fromMaybe (error "evalTerm: wrong path") (lookup rpath tcs))
restrictArg nr rpath index
return index
evalTerm cnc_defs path (C nr) = return nr evalTerm cnc_defs path (C nr) = return nr
evalTerm cnc_defs path (R record) = case path of evalTerm cnc_defs path (R record) = case path of
(index:path) -> evalTerm cnc_defs path (record !! index) (index:path) -> evalTerm cnc_defs path (record !! index)
@@ -222,112 +251,80 @@ evalTerm cnc_defs path (F id) = do term <- Map.lookup id cnc_defs
evalTerm cnc_defs path term evalTerm cnc_defs path term
evalTerm cnc_defs path x = error ("evalTerm ("++show x++")") evalTerm cnc_defs path x = error ("evalTerm ("++show x++")")
unifyPType :: FIndex -> FPath -> Term -> CnvMonad FIndex
unifyPType nr path (C max_index) =
do (_, args) <- readState
let PFCat _ _ tcs _ = args !! nr
case lookup path tcs of
Just index -> return index
Nothing -> do index <- member [0..max_index]
restrictArg nr path index
return index
unifyPType nr path t = error $ "unifyPType " ++ show t ---- AR 2/10/2007
selectTerm :: FPath -> Term -> Term
selectTerm [] term = term
selectTerm (index:path) (R record) = selectTerm path (record !! index)
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- FRulesEnv -- GrammarEnv
data FRulesEnv = FRulesEnv {-# UNPACK #-} !Int FCatSet [FRule] data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production))
type FCatSet = Map.Map CId (Map.Map [(FPath,FIndex)] FCat) type CatSet = Map.Map CId (FCat,FCat,[Int])
type SeqSet = Map.Map FSeq SeqId
type FunSet = Map.Map FFun FunId
type CoerceSet= Map.Map [FCat] FCat
emptyFRulesEnv = FRulesEnv 0 (ins fcatString (mkCId "String") [] $ emptyFRulesEnv cnc_defs lincats =
ins fcatInt (mkCId "Int") [] $ let (last_id,catSet) = Map.mapAccum computeCatRange 0 lincats
ins fcatFloat (mkCId "Float") [] $ in GrammarEnv last_id catSet Map.empty Map.empty Map.empty IntMap.empty
ins fcatVar (mkCId "_Var") [] $
Map.empty) []
where where
ins fcat cat tcs fcatSet = computeCatRange index ctype = (index+size,(index,index+size-1,poly))
Map.insertWith (\_ -> Map.insert tcs fcat) cat tmap_s fcatSet
where where
tmap_s = Map.singleton tcs fcat (size,poly) = getMultipliers 1 [] ctype
getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record
getMultipliers m ms (S _) = (m,ms)
getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms)
getMultipliers m ms (F id) = case Map.lookup id cnc_defs of
Just term -> getMultipliers m ms term
Nothing -> error ("unknown identifier: "++prCId id)
addFRule :: FRulesEnv -> FRule -> FRulesEnv addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv
addFRule (FRulesEnv last_id fcatSet rules) rule = FRulesEnv last_id fcatSet (rule:rules) addProduction (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) cat p =
GrammarEnv last_id catSet seqSet funSet crcSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet)
getFGrammar :: FRulesEnv -> FGrammar addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId)
getFGrammar (FRulesEnv last_id fcatSet rules) = (rules, Map.map Map.elems fcatSet) addFSeq env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (_,lst) =
case Map.lookup seq seqSet of
genFCatHead :: FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat) Just id -> (env,id)
genFCatHead env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs _) = Nothing -> let !last_seq = Map.size seqSet
case Map.lookup cat fcatSet >>= Map.lookup tcs of in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet crcSet prodSet,last_seq)
Just fcat -> (env, fcat)
Nothing -> let fcat = last_id+1
in (FRulesEnv fcat (ins fcat) rules, fcat)
where where
ins fcat = Map.insertWith (\_ -> Map.insert tcs fcat) cat tmap_s fcatSet seq = mkArray lst
where
tmap_s = Map.singleton tcs fcat
genFCatArg :: TermMap -> FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat) addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId)
genFCatArg cnc_defs env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs ctype) = addFFun env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) fun =
case Map.lookup cat fcatSet of case Map.lookup fun funSet of
Just tmap -> case Map.lookup tcs tmap of Just id -> (env,id)
Just fcat -> (env, fcat) Nothing -> let !last_funid = Map.size funSet
Nothing -> ins tmap in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) crcSet prodSet,last_funid)
Nothing -> ins Map.empty
addFCoercion :: GrammarEnv -> [FCat] -> (GrammarEnv,FCat)
addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fcats =
case sub_fcats of
[fcat] -> (env,fcat)
_ -> case Map.lookup sub_fcats crcSet of
Just fcat -> (env,fcat)
Nothing -> let !fcat = last_id+1
in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat)
getParserInfo :: GrammarEnv -> ParserInfo
getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
ParserInfo { functions = mkArray funSet
, sequences = mkArray seqSet
, productions = IntMap.union prodSet coercions
, startCats = Map.map (\(start,end,_) -> range (start,end)) catSet
}
where where
ins tmap = mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
let fcat = last_id+1
(last_id1,tmap1,rules1) coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet]
= foldBM (\tcs st (last_id,tmap,rules) ->
let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap
rule = FRule wildCId [[0]] [fcat_arg] fcat
(listArray (0,length rcs-1) [listArray (0,0) [FSymCat lbl 0] | lbl <- [0..length rcs-1]])
in if st
then (last_id1,tmap1,rule:rules)
else (last_id, tmap, rules))
(fcat,Map.insert tcs fcat tmap,rules)
(gen_tcs ctype [] [])
False
in (FRulesEnv last_id1 (Map.insert cat tmap1 fcatSet) rules1, fcat)
where
addArg tcs last_id tmap =
case Map.lookup tcs tmap of
Just fcat -> (last_id, tmap, fcat)
Nothing -> let fcat = last_id+1
in (fcat, Map.insert tcs fcat tmap, fcat)
gen_tcs :: Term -> FPath -> [(FPath,FIndex)] -> BacktrackM Bool [(FPath,FIndex)] getFCats :: GrammarEnv -> ProtoFCat -> [FCat]
gen_tcs (R record) path acc = foldM (\acc (label,ctype) -> gen_tcs ctype (label:path) acc) acc (zip [0..] record) getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat cat rcs tcs) =
gen_tcs (S _) path acc = return acc case Map.lookup cat catSet of
gen_tcs (C max_index) path acc = Just (start,end,ms) -> reverse (solutions (variants ms tcs start) ())
case List.lookup path tcs of
Just index -> return $! addConstraint path index acc
Nothing -> do writeState True
index <- member [0..max_index]
return $! addConstraint path index acc
where
addConstraint path0 index0 (c@(path,index) : cs)
| path0 > path = c:addConstraint path0 index0 cs
addConstraint path0 index0 cs = (path0,index0) : cs
gen_tcs (F id) path acc = case Map.lookup id cnc_defs of
Just term -> gen_tcs term path acc
Nothing -> error ("unknown identifier: "++prCId id)
getRCS :: TermMap -> Term -> [FPath]
getRCS cnc_defs = loop [] []
where where
loop path rcs (R record) = List.foldl' (\rcs (index,term) -> loop (index:path) rcs term) rcs (zip [0..] record) variants _ [] fcat = return fcat
loop path rcs (C i) = rcs variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices
loop path rcs (S _) = path:rcs variants ms tcs ((m*index) + fcat)
loop path rcs (F id) = case Map.lookup id cnc_defs of
Just term -> loop path rcs term
Nothing -> error ("unknown identifier: "++show id)
------------------------------------------------------------ ------------------------------------------------------------
-- updating the MCF rule -- updating the MCF rule
@@ -345,12 +342,14 @@ restrictHead path term
writeState (head', args) writeState (head', args)
restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat
restrictProtoFCat path0 index0 (PFCat cat rcs tcs ctype) = do restrictProtoFCat path0 index0 (PFCat cat rcs tcs) = do
tcs <- addConstraint tcs tcs <- addConstraint tcs
return (PFCat cat rcs tcs ctype) return (PFCat cat rcs tcs)
where where
addConstraint (c@(path,index) : cs) addConstraint [] = error "restrictProtoFCat: unknown path"
| path0 > path = liftM (c:) (addConstraint cs) addConstraint (c@(path,indices) : tcs)
| path0 == path = guard (index0 == index) >> | path0 == path = guard (index0 `elem` indices) >>
return (c : cs) return ((path,[index0]) : tcs)
addConstraint cs = return ((path0,index0) : cs) | otherwise = liftM (c:) (addConstraint tcs)
mkArray lst = listArray (0,length lst-1) lst

View File

@@ -7,7 +7,6 @@ import qualified GF.Compile.GenerateFCFG as FCFG
import qualified GF.Compile.GeneratePMCFG as PMCFG import qualified GF.Compile.GeneratePMCFG as PMCFG
import PGF.CId import PGF.CId
import PGF.BuildParser (buildParserInfo)
import qualified PGF.Macros as CM import qualified PGF.Macros as CM
import qualified PGF.Data as C import qualified PGF.Data as C
import qualified PGF.Data as D import qualified PGF.Data as D
@@ -54,9 +53,9 @@ mkCanon2gfcc opts cnc gr =
addParsers :: D.PGF -> D.PGF addParsers :: D.PGF -> D.PGF
addParsers pgf = pgf { D.concretes = Map.map conv (D.concretes pgf) } addParsers pgf = pgf { D.concretes = Map.map conv (D.concretes pgf) }
where where
conv cnc = cnc { D.parser = Just (buildParserInfo fcfg) } conv cnc = cnc { D.parser = Just pinfo }
where where
fcfg pinfo
| Map.lookup (mkCId "erasing") (D.cflags cnc) == Just "on" = PMCFG.convertConcrete (D.abstract pgf) cnc | Map.lookup (mkCId "erasing") (D.cflags cnc) == Just "on" = PMCFG.convertConcrete (D.abstract pgf) cnc
| otherwise = FCFG.convertConcrete (D.abstract pgf) cnc | otherwise = FCFG.convertConcrete (D.abstract pgf) cnc

View File

@@ -91,7 +91,6 @@ data OutputFormat = FmtPGF
| FmtEBNF | FmtEBNF
| FmtRegular | FmtRegular
| FmtNoLR | FmtNoLR
| FmtFCFG
| FmtSRGS_XML | FmtSRGS_XML
| FmtSRGS_XML_NonRec | FmtSRGS_XML_NonRec
| FmtSRGS_ABNF | FmtSRGS_ABNF
@@ -497,7 +496,6 @@ outputFormats =
("ebnf", FmtEBNF), ("ebnf", FmtEBNF),
("regular", FmtRegular), ("regular", FmtRegular),
("nolr", FmtNoLR), ("nolr", FmtNoLR),
("fcfg", FmtFCFG),
("srgs_xml", FmtSRGS_XML), ("srgs_xml", FmtSRGS_XML),
("srgs_xml_nonrec", FmtSRGS_XML_NonRec), ("srgs_xml_nonrec", FmtSRGS_XML_NonRec),
("srgs_abnf", FmtSRGS_ABNF), ("srgs_abnf", FmtSRGS_ABNF),

View File

@@ -4,21 +4,19 @@
-- --
-- Approximates PGF grammars with context-free grammars. -- Approximates PGF grammars with context-free grammars.
---------------------------------------------------------------------- ----------------------------------------------------------------------
module GF.Speech.PGFToCFG (bnfPrinter, module GF.Speech.PGFToCFG (bnfPrinter, pgfToCFG) where
fcfgPrinter, pgfToCFG) where
import PGF.CId import PGF.CId
import PGF.Data as PGF import PGF.Data as PGF
import PGF.Macros import PGF.Macros
import GF.Data.MultiMap (MultiMap)
import qualified GF.Data.MultiMap as MultiMap
import GF.Infra.Ident import GF.Infra.Ident
import GF.Speech.CFG import GF.Speech.CFG
import Data.Array as Array import Data.Array.IArray as Array
import Data.List import Data.List
import Data.Map (Map) import Data.Map (Map)
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import Data.Maybe import Data.Maybe
import Data.Set (Set) import Data.Set (Set)
import qualified Data.Set as Set import qualified Data.Set as Set
@@ -29,21 +27,6 @@ bnfPrinter = toBNF id
toBNF :: (CFG -> CFG) -> PGF -> CId -> String toBNF :: (CFG -> CFG) -> PGF -> CId -> String
toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc
-- FIXME: move this somewhere else
fcfgPrinter :: PGF -> CId -> String
fcfgPrinter pgf cnc = unlines (map showRule rules)
where
pinfo = fromMaybe (error "fcfgPrinter") (lookParser pgf cnc)
rules :: [FRule]
rules = Array.elems (PGF.allRules pinfo)
showRule (FRule cid ps cs fc arr) = prCId cid ++ " " ++ show ps ++ ". " ++ showCat fc ++ " ::= [" ++ concat (intersperse ", " (map showCat cs)) ++ "] = " ++ showLin arr
where
showLin arr = "[" ++ concat (intersperse ", " [ unwords (map showFSymbol (Array.elems r)) | r <- Array.elems arr]) ++ "]"
showFSymbol (FSymCat i j) = showCat (cs!!j) ++ "_" ++ show j ++ "." ++ show i
showFSymbol (FSymTok t) = t
showCat c = "C" ++ show c
pgfToCFG :: PGF pgfToCFG :: PGF
-> CId -- ^ Concrete syntax name -> CId -- ^ Concrete syntax name
@@ -52,12 +35,13 @@ pgfToCFG pgf lang = mkCFG (lookStartCat pgf) extCats (startRules ++ concatMap fr
where where
pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang) pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang)
rules :: [FRule] rules :: [(FCat,Production)]
rules = Array.elems (PGF.allRules pinfo) rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.productions pinfo)
, prod <- Set.toList set]
fcatCats :: Map FCat Cat fcatCats :: Map FCat Cat
fcatCats = Map.fromList [(fc, prCId c ++ "_" ++ show i) fcatCats = Map.fromList [(fc, prCId c ++ "_" ++ show i)
| (c,fcs) <- Map.toList (startupCats pinfo), | (c,fcs) <- Map.toList (startCats pinfo),
(fc,i) <- zip fcs [1..]] (fc,i) <- zip fcs [1..]]
fcatCat :: FCat -> Cat fcatCat :: FCat -> Cat
@@ -69,49 +53,61 @@ pgfToCFG pgf lang = mkCFG (lookStartCat pgf) extCats (startRules ++ concatMap fr
-- gets the number of fields in the lincat for the given category -- gets the number of fields in the lincat for the given category
catLinArity :: FCat -> Int catLinArity :: FCat -> Int
catLinArity c = maximum (1:[rangeSize (bounds rhs) | FRule _ _ _ _ rhs <- Map.findWithDefault [] c rulesByFCat]) catLinArity c = maximum (1:[rangeSize (bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c])
topdownRules cat = f cat []
where
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo))
g (FApply funid args) rules = (functions pinfo ! funid,args) : rules
g (FCoerce cat) rules = f cat rules
rulesByFCat :: Map FCat [FRule]
rulesByFCat = Map.fromListWith (++) [(c,[r]) | r@(FRule _ _ _ c _) <- rules]
extCats :: Set Cat extCats :: Set Cat
extCats = Set.fromList $ map lhsCat startRules extCats = Set.fromList $ map lhsCat startRules
startRules :: [CFRule] startRules :: [CFRule]
startRules = [CFRule (prCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0) startRules = [CFRule (prCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0)
| (c,fcs) <- Map.toList (startupCats pinfo), | (c,fcs) <- Map.toList (startCats pinfo),
fc <- fcs, not (isLiteralFCat fc), fc <- fcs, not (isLiteralFCat fc),
r <- [0..catLinArity fc-1]] r <- [0..catLinArity fc-1]]
fruleToCFRule :: FRule -> [CFRule] fruleToCFRule :: (FCat,Production) -> [CFRule]
fruleToCFRule (FRule f ps args c rhs) = fruleToCFRule (c,FApply funid args) =
[CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps)) [CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps))
| (l,row) <- Array.assocs rhs, not (containsLiterals row)] | (l,seqid) <- Array.assocs rhs
, let row = sequences pinfo ! seqid
, not (containsLiterals row)]
where where
FFun f ps rhs = functions pinfo ! funid
mkRhs :: Array FPointPos FSymbol -> [CFSymbol] mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
mkRhs = map fsymbolToSymbol . Array.elems mkRhs = map fsymbolToSymbol . Array.elems
containsLiterals :: Array FPointPos FSymbol -> Bool containsLiterals :: Array FPointPos FSymbol -> Bool
containsLiterals row = any isLiteralFCat [args!!n | FSymCat _ n <- Array.elems row] containsLiterals row = any isLiteralFCat [args!!n | FSymCat n _ <- Array.elems row]
fsymbolToSymbol :: FSymbol -> CFSymbol fsymbolToSymbol :: FSymbol -> CFSymbol
fsymbolToSymbol (FSymCat l n) = NonTerminal (fcatToCat (args!!n) l) fsymbolToSymbol (FSymCat n l) = NonTerminal (fcatToCat (args!!n) l)
fsymbolToSymbol (FSymTok t) = Terminal t fsymbolToSymbol (FSymTok (KS t)) = Terminal t
fixProfile :: Array FPointPos FSymbol -> Profile -> Profile fixProfile :: Array FPointPos FSymbol -> Profile -> Profile
fixProfile row = concatMap positions fixProfile row = concatMap positions
where where
nts = zip [0..] [nt | nt@(FSymCat _ _) <- Array.elems row ] nts = zip [0..] [nt | nt@(FSymCat _ _) <- Array.elems row]
positions i = [k | (k,FSymCat _ j) <- nts, j == i] positions i = [k | (k,FSymCat j _) <- nts, j == i]
profilesToTerm :: [Profile] -> CFTerm profilesToTerm :: [Profile] -> CFTerm
profilesToTerm [[n]] | f == wildCId = CFRes n
profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps) profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps)
where (argTypes,_) = catSkeleton $ lookType pgf f where (argTypes,_) = catSkeleton $ lookType pgf f
profileToTerm :: CId -> Profile -> CFTerm profileToTerm :: CId -> Profile -> CFTerm
profileToTerm t [] = CFMeta t profileToTerm t [] = CFMeta t
profileToTerm _ xs = CFRes (last xs) -- FIXME: unify profileToTerm _ xs = CFRes (last xs) -- FIXME: unify
fruleToCFRule (c,FCoerce c') =
[CFRule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0)
| l <- [0..catLinArity c-1]]
isLiteralFCat :: FCat -> Bool isLiteralFCat :: FCat -> Bool
isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar]) isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])

View File

@@ -25,6 +25,7 @@ import qualified Text.ParserCombinators.ReadP as RP
import System.Cmd import System.Cmd
import System.CPUTime import System.CPUTime
import Control.Exception import Control.Exception
import Control.Monad
import Data.Version import Data.Version
import GF.System.Signal import GF.System.Signal
--import System.IO.Error (try) --import System.IO.Error (try)
@@ -203,9 +204,10 @@ wordCompletion gfenv line0 prefix0 p =
-> do mb_state0 <- try (evaluate (initState pgf (optLang opts) (optCat opts))) -> do mb_state0 <- try (evaluate (initState pgf (optLang opts) (optCat opts)))
case mb_state0 of case mb_state0 of
Right state0 -> let ws = words (take (length s - length prefix) s) Right state0 -> let ws = words (take (length s - length prefix) s)
state = foldl nextState state0 ws in case foldM nextState state0 ws of
compls = getCompletions state prefix Nothing -> ret ' ' []
in ret ' ' (map (encode gfenv) (Map.keys compls)) Just state -> let compls = getCompletions state prefix
in ret ' ' (map (encode gfenv) (Map.keys compls))
Left _ -> ret ' ' [] Left _ -> ret ' ' []
CmplOpt (Just (Command n _ _)) pref CmplOpt (Just (Command n _ _)) pref
-> case Map.lookup n (commands cmdEnv) of -> case Map.lookup n (commands cmdEnv) of

View File

@@ -77,6 +77,7 @@ import Data.Char
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Maybe import Data.Maybe
import System.Random (newStdGen) import System.Random (newStdGen)
import Control.Monad
--------------------------------------------------- ---------------------------------------------------
-- Interface -- Interface
@@ -211,7 +212,7 @@ parse pgf lang cat s =
Just cnc -> case parser cnc of Just cnc -> case parser cnc of
Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on" Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on"
then Incremental.parse pinfo (mkCId cat) (words s) then Incremental.parse pinfo (mkCId cat) (words s)
else case parseFCFG "bottomup" pinfo (mkCId cat) (words s) of else case parseFCFG "topdown" pinfo (mkCId cat) (words s) of
Ok x -> x Ok x -> x
Bad s -> error s Bad s -> error s
Nothing -> error ("No parser built for language: " ++ lang) Nothing -> error ("No parser built for language: " ++ lang)
@@ -259,9 +260,10 @@ startCat pgf = lookStartCat pgf
complete pgf from cat input = complete pgf from cat input =
let (ws,prefix) = tokensAndPrefix input let (ws,prefix) = tokensAndPrefix input
state0 = initState pgf from cat state0 = initState pgf from cat
state = foldl Incremental.nextState state0 ws in case foldM Incremental.nextState state0 ws of
compls = Incremental.getCompletions state prefix Nothing -> []
in [unwords (ws++[c]) ++ " " | c <- Map.keys compls] Just state -> let compls = Incremental.getCompletions state prefix
in [unwords (ws++[c]) ++ " " | c <- Map.keys compls]
where where
tokensAndPrefix :: String -> ([String],String) tokensAndPrefix :: String -> ([String],String)
tokensAndPrefix s | not (null s) && isSpace (last s) = (words s, "") tokensAndPrefix s | not (null s) && isSpace (last s) = (words s, "")

View File

@@ -15,50 +15,62 @@ import PGF.CId
import PGF.Data import PGF.Data
import PGF.Parsing.FCFG.Utilities import PGF.Parsing.FCFG.Utilities
import Data.Array import Data.Array.IArray
import Data.Maybe import Data.Maybe
import qualified Data.IntMap as IntMap
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import Debug.Trace import Debug.Trace
data ParserInfoEx
= ParserInfoEx { epsilonRules :: [(FunId,[FCat],FCat)]
, leftcornerCats :: Assoc FCat [(FunId,[FCat],FCat)]
, leftcornerTokens :: Assoc String [(FunId,[FCat],FCat)]
, grammarToks :: [String]
}
------------------------------------------------------------ ------------------------------------------------------------
-- parser information -- parser information
getLeftCornerTok (FRule _ _ _ _ lins) getLeftCornerTok pinfo (FFun _ _ lins)
| inRange (bounds syms) 0 = case syms ! 0 of | inRange (bounds syms) 0 = case syms ! 0 of
FSymTok tok -> [tok] FSymTok (KS tok) -> [tok]
_ -> []
| otherwise = []
where
syms = (sequences pinfo) ! (lins ! 0)
getLeftCornerCat pinfo args (FFun _ _ lins)
| inRange (bounds syms) 0 = case syms ! 0 of
FSymCat d _ -> let cat = args !! d
in case IntMap.lookup cat (productions pinfo) of
Just set -> cat : [cat' | FCoerce cat' <- Set.toList set]
Nothing -> [cat]
_ -> [] _ -> []
| otherwise = [] | otherwise = []
where where
syms = lins ! 0 syms = (sequences pinfo) ! (lins ! 0)
getLeftCornerCat (FRule _ _ args _ lins) buildParserInfo :: ParserInfo -> ParserInfoEx
| inRange (bounds syms) 0 = case syms ! 0 of buildParserInfo pinfo =
FSymCat _ d -> [args !! d] ParserInfoEx { epsilonRules = epsilonrules
_ -> [] , leftcornerCats = leftcorncats
| otherwise = [] , leftcornerTokens = leftcorntoks
where , grammarToks = grammartoks
syms = lins ! 0 }
buildParserInfo :: FGrammar -> ParserInfo where epsilonrules = [ (ruleid,args,cat)
buildParserInfo (grammar,startup) = -- trace (unlines [prt (x,Set.toList set) | (x,set) <- Map.toList leftcornFilter]) $ | (cat,set) <- IntMap.toList (productions pinfo)
ParserInfo { allRules = allrules , (FApply ruleid args) <- Set.toList set
, topdownRules = topdownrules , let (FFun _ _ lins) = (functions pinfo) ! ruleid
-- , emptyRules = emptyrules , not (inRange (bounds ((sequences pinfo) ! (lins ! 0))) 0) ]
, epsilonRules = epsilonrules leftcorncats = accumAssoc id [ (cat', (ruleid, args, cat))
, leftcornerCats = leftcorncats | (cat,set) <- IntMap.toList (productions pinfo)
, leftcornerTokens = leftcorntoks , (FApply ruleid args) <- Set.toList set
, grammarCats = grammarcats , cat' <- getLeftCornerCat pinfo args ((functions pinfo) ! ruleid) ]
, grammarToks = grammartoks leftcorntoks = accumAssoc id [ (tok, (ruleid, args, cat))
, startupCats = startup | (cat,set) <- IntMap.toList (productions pinfo)
} , (FApply ruleid args) <- Set.toList set
, tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ]
where allrules = listArray (0,length grammar-1) grammar grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymTok (KS t) <- elems lin]
topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule _ _ _ cat _) <- assocs allrules]
epsilonrules = [ ruleid | (ruleid, FRule _ _ _ _ lins) <- assocs allrules,
not (inRange (bounds (lins ! 0)) 0) ]
leftcorncats = accumAssoc id [ (cat, ruleid) | (ruleid, rule) <- assocs allrules, cat <- getLeftCornerCat rule ]
leftcorntoks = accumAssoc id [ (tok, ruleid) | (ruleid, rule) <- assocs allrules, tok <- getLeftCornerTok rule ]
grammarcats = aElems topdownrules
grammartoks = nubsort [t | (FRule _ _ _ _ lins) <- grammar, lin <- elems lins, FSymTok t <- elems lin]

View File

@@ -2,11 +2,13 @@ module PGF.Data where
import PGF.CId import PGF.CId
import GF.Text.UTF8 import GF.Text.UTF8
import GF.Data.Assoc
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.List import Data.List
import Data.Array import Data.Array
import Data.Array.Unboxed
-- internal datatypes for PGF -- internal datatypes for PGF
@@ -108,32 +110,28 @@ data Equation =
deriving (Eq,Ord,Show) deriving (Eq,Ord,Show)
type FToken = String
type FCat = Int type FCat = Int
type FIndex = Int type FIndex = Int
data FSymbol
= FSymCat {-# UNPACK #-} !FIndex {-# UNPACK #-} !Int
| FSymTok FToken
type Profile = [Int]
type FPointPos = Int type FPointPos = Int
type FGrammar = ([FRule], Map.Map CId [FCat]) data FSymbol
data FRule = FRule CId [Profile] [FCat] FCat (Array FIndex (Array FPointPos FSymbol)) = FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymTok Tokn
type RuleId = Int deriving (Eq,Ord,Show)
type Profile = [Int]
data Production
= FApply {-# UNPACK #-} !FunId [FCat]
| FCoerce {-# UNPACK #-} !FCat
deriving (Eq,Ord,Show)
data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
type FSeq = Array FPointPos FSymbol
type FunId = Int
type SeqId = Int
data ParserInfo data ParserInfo
= ParserInfo { allRules :: Array RuleId FRule = ParserInfo { functions :: Array FunId FFun
, topdownRules :: Assoc FCat [RuleId] , sequences :: Array SeqId FSeq
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley): , productions :: IntMap.IntMap (Set.Set Production)
-- , emptyRules :: [RuleId] , startCats :: Map.Map CId [FCat]
, epsilonRules :: [RuleId]
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
, leftcornerCats :: Assoc FCat [RuleId]
, leftcornerTokens :: Assoc FToken [RuleId]
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
, grammarCats :: [FCat]
, grammarToks :: [FToken]
, startupCats :: Map.Map CId [FCat]
} }

View File

@@ -49,12 +49,6 @@ lookValCat pgf = valCat . lookType pgf
lookParser :: PGF -> CId -> Maybe ParserInfo lookParser :: PGF -> CId -> Maybe ParserInfo
lookParser pgf lang = Map.lookup lang (concretes pgf) >>= parser lookParser pgf lang = Map.lookup lang (concretes pgf) >>= parser
lookFCFG :: PGF -> CId -> Maybe FGrammar
lookFCFG pgf lang = fmap toFGrammar $ lookParser pgf lang
where
toFGrammar :: ParserInfo -> FGrammar
toFGrammar pinfo = (Array.elems (allRules pinfo), startupCats pinfo)
lookStartCat :: PGF -> String lookStartCat :: PGF -> String
lookStartCat pgf = fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat")) lookStartCat pgf = fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat"))
[gflags pgf, aflags (abstract pgf)] [gflags pgf, aflags (abstract pgf)]

View File

@@ -8,7 +8,7 @@
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
module PGF.Parsing.FCFG module PGF.Parsing.FCFG
(buildParserInfo,ParserInfo,parseFCFG) where (ParserInfo,parseFCFG) where
import GF.Data.ErrM import GF.Data.ErrM
import GF.Data.Assoc import GF.Data.Assoc
@@ -17,7 +17,6 @@ import GF.Data.SortedList
import PGF.CId import PGF.CId
import PGF.Data import PGF.Data
import PGF.Macros import PGF.Macros
import PGF.BuildParser
import PGF.Parsing.FCFG.Utilities import PGF.Parsing.FCFG.Utilities
import qualified PGF.Parsing.FCFG.Active as Active import qualified PGF.Parsing.FCFG.Active as Active
import qualified PGF.Parsing.FCFG.Incremental as Incremental import qualified PGF.Parsing.FCFG.Incremental as Incremental

View File

@@ -17,17 +17,22 @@ import qualified GF.Data.MultiMap as MM
import PGF.CId import PGF.CId
import PGF.Data import PGF.Data
import PGF.Parsing.FCFG.Utilities import PGF.Parsing.FCFG.Utilities
import PGF.BuildParser
import Control.Monad (guard) import Control.Monad (guard)
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set import qualified Data.Set as Set
import Data.Array import Data.Array.IArray
import Debug.Trace
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- * parsing -- * parsing
type FToken = String
makeFinalEdge cat 0 0 = (cat, [EmptyRange]) makeFinalEdge cat 0 0 = (cat, [EmptyRange])
makeFinalEdge cat i j = (cat, [makeRange i j]) makeFinalEdge cat i j = (cat, [makeRange i j])
@@ -36,77 +41,79 @@ parse :: String -> ParserInfo -> CId -> [FToken] -> [Tree]
parse strategy pinfo start toks = nubsort $ filteredForests >>= forest2trees parse strategy pinfo start toks = nubsort $ filteredForests >>= forest2trees
where where
inTokens = input toks inTokens = input toks
starts = Map.findWithDefault [] start (startupCats pinfo) starts = Map.findWithDefault [] start (startCats pinfo)
schart = xchart2syntaxchart chart pinfo schart = xchart2syntaxchart chart pinfo
(i,j) = inputBounds inTokens (i,j) = inputBounds inTokens
finalEdges = [makeFinalEdge cat i j | cat <- starts] finalEdges = [makeFinalEdge cat i j | cat <- starts]
forests = chart2forests schart (const False) finalEdges forests = chart2forests schart (const False) finalEdges
filteredForests = forests >>= applyProfileToForest filteredForests = forests >>= applyProfileToForest
chart = process strategy pinfo inTokens axioms emptyXChart pinfoex = buildParserInfo pinfo
axioms | isBU strategy = literals pinfo inTokens ++ initialBU pinfo inTokens
| isTD strategy = literals pinfo inTokens ++ initialTD pinfo starts inTokens chart = process strategy pinfo pinfoex inTokens axioms emptyXChart
axioms | isBU strategy = literals pinfoex inTokens ++ initialBU pinfo pinfoex inTokens
| isTD strategy = literals pinfoex inTokens ++ initialTD pinfo starts inTokens
isBU s = s=="b" isBU s = s=="b"
isTD s = s=="t" isTD s = s=="t"
-- used in prediction -- used in prediction
emptyChildren :: RuleId -> ParserInfo -> SyntaxNode RuleId RangeRec emptyChildren :: FunId -> [FCat] -> SyntaxNode FunId RangeRec
emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) []) emptyChildren ruleid args = SNode ruleid (replicate (length args) [])
where
FRule _ _ rhs _ _ = allRules pinfo ! ruleid
process :: String -> ParserInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat
process strategy pinfo toks [] chart = chart process :: String -> ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -> XChart FCat -> XChart FCat
process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart process strategy pinfo pinfoex toks [] chart = chart
process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo pinfoex toks items $! univRule item chart
where where
univRule cat item@(Active found rng lbl ppos node@(SNode ruleid recs)) chart univRule item@(Active found rng lbl ppos node@(SNode ruleid recs) args cat) chart
| inRange (bounds lin) ppos = | inRange (bounds lin) ppos =
case lin ! ppos of case lin ! ppos of
FSymCat r d -> let c = args !! d FSymCat d r -> let c = args !! d
in case recs !! d of in case recs !! d of
[] -> case insertXChart chart item c of [] -> case insertXChart chart item c of
Nothing -> chart Nothing -> chart
Just chart -> let items = do item@(Final found' _) <- lookupXChartFinal chart c Just chart -> let items = do item@(Final found' _ _ _) <- lookupXChartFinal chart c
rng <- concatRange rng (found' !! r) rng <- concatRange rng (found' !! r)
return (c, Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs))) return (Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)) args cat)
++ ++
do guard (isTD strategy) do guard (isTD strategy)
ruleid <- topdownRules pinfo ? c (ruleid,args) <- topdownRules pinfo c
return (c, Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo)) return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args c)
in process strategy pinfo toks items chart in process strategy pinfo pinfoex toks items chart
found' -> let items = do rng <- concatRange rng (found' !! r) found' -> let items = do rng <- concatRange rng (found' !! r)
return (c, Active found rng lbl (ppos+1) node) return (Active found rng lbl (ppos+1) node args cat)
in process strategy pinfo toks items chart in process strategy pinfo pinfoex toks items chart
FSymTok tok -> let items = do t_rng <- inputToken toks ? tok FSymTok (KS tok)
-> let items = do t_rng <- inputToken toks ? tok
rng' <- concatRange rng t_rng rng' <- concatRange rng t_rng
return (cat, Active found rng' lbl (ppos+1) node) return (Active found rng' lbl (ppos+1) node args cat)
in process strategy pinfo toks items chart in process strategy pinfo pinfoex toks items chart
| otherwise = | otherwise =
if inRange (bounds lins) (lbl+1) if inRange (bounds lins) (lbl+1)
then univRule cat (Active (rng:found) EmptyRange (lbl+1) 0 node) chart then univRule (Active (rng:found) EmptyRange (lbl+1) 0 node args cat) chart
else univRule cat (Final (reverse (rng:found)) node) chart else univRule (Final (reverse (rng:found)) node args cat) chart
where where
(FRule _ _ args cat lins) = allRules pinfo ! ruleid (FFun _ _ lins) = functions pinfo ! ruleid
lin = lins ! lbl lin = sequences pinfo ! (lins ! lbl)
univRule cat item@(Final found' node) chart = univRule item@(Final found' node args cat) chart =
case insertXChart chart item cat of case insertXChart chart item cat of
Nothing -> chart Nothing -> chart
Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _)) <- lookupXChartAct chart cat Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _) args c) <- lookupXChartAct chart cat
let FRule _ _ args _ lins = allRules pinfo ! ruleid let FFun _ _ lins = functions pinfo ! ruleid
FSymCat r d = lins ! l ! ppos FSymCat d r = (sequences pinfo ! (lins ! l)) ! ppos
rng <- concatRange rng (found' !! r) rng <- concatRange rng (found' !! r)
return (args !! d, Active found rng l (ppos+1) (updateChildren node d found')) return (Active found rng l (ppos+1) (updateChildren node d found') args c)
++ ++
do guard (isBU strategy) do guard (isBU strategy)
ruleid <- leftcornerCats pinfo ? cat (ruleid,args,c) <- leftcornerCats pinfoex ? cat
let FRule _ _ args _ lins = allRules pinfo ! ruleid let FFun _ _ lins = functions pinfo ! ruleid
FSymCat r d = lins ! 0 ! 0 FSymCat d r = (sequences pinfo ! (lins ! 0)) ! 0
return (args !! d, Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid pinfo) d found')) return (Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid args) d found') args c)
updateChildren :: SyntaxNode RuleId RangeRec -> Int -> RangeRec -> SyntaxNode RuleId RangeRec updateChildren :: SyntaxNode FunId RangeRec -> Int -> RangeRec -> SyntaxNode FunId RangeRec
updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs
in process strategy pinfo toks items chart in process strategy pinfo pinfoex toks items chart
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- * XChart -- * XChart
@@ -116,21 +123,23 @@ data Item
Range Range
{-# UNPACK #-} !FIndex {-# UNPACK #-} !FIndex
{-# UNPACK #-} !FPointPos {-# UNPACK #-} !FPointPos
(SyntaxNode RuleId RangeRec) (SyntaxNode FunId RangeRec)
| Final RangeRec (SyntaxNode RuleId RangeRec) [FCat]
deriving (Eq, Ord) FCat
| Final RangeRec (SyntaxNode FunId RangeRec) [FCat] FCat
deriving (Eq, Ord, Show)
data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item) data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item)
emptyXChart :: Ord c => XChart c emptyXChart :: Ord c => XChart c
emptyXChart = XChart MM.empty MM.empty emptyXChart = XChart MM.empty MM.empty
insertXChart (XChart actives finals) item@(Active _ _ _ _ _) c = insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _ _) c =
case MM.insert' c item actives of case MM.insert' c item actives of
Nothing -> Nothing Nothing -> Nothing
Just actives -> Just (XChart actives finals) Just actives -> Just (XChart actives finals)
insertXChart (XChart actives finals) item@(Final _ _) c = insertXChart (XChart actives finals) item@(Final _ _ _ _) c =
case MM.insert' c item finals of case MM.insert' c item finals of
Nothing -> Nothing Nothing -> Nothing
Just finals -> Just (XChart actives finals) Just finals -> Just (XChart actives finals)
@@ -142,17 +151,17 @@ xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (
xchart2syntaxchart (XChart actives finals) pinfo = xchart2syntaxchart (XChart actives finals) pinfo =
accumAssoc groupSyntaxNodes $ accumAssoc groupSyntaxNodes $
[ case node of [ case node of
SNode ruleid rrecs -> let FRule fun prof rhs cat _ = allRules pinfo ! ruleid SNode ruleid rrecs -> let FFun fun prof _ = functions pinfo ! ruleid
in ((cat,found), SNode (fun,prof) (zip rhs rrecs)) in ((cat,found), SNode (fun,prof) (zip rhs rrecs))
SString s -> ((cat,found), SString s) SString s -> ((cat,found), SString s)
SInt n -> ((cat,found), SInt n) SInt n -> ((cat,found), SInt n)
SFloat f -> ((cat,found), SFloat f) SFloat f -> ((cat,found), SFloat f)
| (cat, Final found node) <- MM.toList finals | (Final found node rhs cat) <- MM.elems finals
] ]
literals :: ParserInfo -> Input FToken -> [(FCat,Item)] literals :: ParserInfoEx -> Input FToken -> [Item]
literals pinfo toks = literals pinfoex toks =
[let (c,node) = lexer t in (c,Final [rng] node) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfo)] [let (c,node) = lexer t in (Final [rng] node [] c) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfoex)]
where where
lexer t = lexer t =
case reads t of case reads t of
@@ -166,24 +175,30 @@ literals pinfo toks =
-- Earley -- -- Earley --
-- called with all starting categories -- called with all starting categories
initialTD :: ParserInfo -> [FCat] -> Input FToken -> [(FCat,Item)] initialTD :: ParserInfo -> [FCat] -> Input FToken -> [Item]
initialTD pinfo starts toks = initialTD pinfo starts toks =
do cat <- starts do cat <- starts
ruleid <- topdownRules pinfo ? cat (ruleid,args) <- topdownRules pinfo cat
return (cat,Active [] (Range 0 0) 0 0 (emptyChildren ruleid pinfo)) return (Active [] (Range 0 0) 0 0 (emptyChildren ruleid args) args cat)
topdownRules pinfo cat = f cat []
where
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo))
g (FApply ruleid args) rules = (ruleid,args) : rules
g (FCoerce cat) rules = f cat rules
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- Kilbury -- -- Kilbury --
initialBU :: ParserInfo -> Input FToken -> [(FCat,Item)] initialBU :: ParserInfo -> ParserInfoEx -> Input FToken -> [Item]
initialBU pinfo toks = initialBU pinfo pinfoex toks =
do (tok,rngs) <- aAssocs (inputToken toks) do (tok,rngs) <- aAssocs (inputToken toks)
ruleid <- leftcornerTokens pinfo ? tok (ruleid,args,cat) <- leftcornerTokens pinfoex ? tok
let FRule _ _ _ cat _ = allRules pinfo ! ruleid
rng <- rngs rng <- rngs
return (cat,Active [] rng 0 1 (emptyChildren ruleid pinfo)) return (Active [] rng 0 1 (emptyChildren ruleid args) args cat)
++ ++
do ruleid <- epsilonRules pinfo do (ruleid,args,cat) <- epsilonRules pinfoex
let FRule _ _ _ cat _ = allRules pinfo ! ruleid let FFun _ _ _ = functions pinfo ! ruleid
return (cat,Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo)) return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args cat)

View File

@@ -8,55 +8,54 @@ module PGF.Parsing.FCFG.Incremental
, parse , parse
) where ) where
import Data.Array import Data.Array.IArray
import Data.Array.Base (unsafeAt) import Data.Array.Base (unsafeAt)
import Data.List (isPrefixOf, foldl') import Data.List (isPrefixOf, foldl')
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe, maybe)
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.IntMap as IntMap import qualified Data.IntMap as IntMap
import qualified Data.Set as Set import qualified Data.Set as Set
import Control.Monad import Control.Monad
import GF.Data.Assoc
import GF.Data.SortedList import GF.Data.SortedList
import qualified GF.Data.MultiMap as MM
import PGF.CId import PGF.CId
import PGF.Data import PGF.Data
import PGF.Parsing.FCFG.Utilities
import Debug.Trace import Debug.Trace
parse :: ParserInfo -> CId -> [FToken] -> [Tree] parse :: ParserInfo -> CId -> [String] -> [Tree]
parse pinfo start toks = extractExps (foldl' nextState (initState pinfo start) toks) start parse pinfo start toks = maybe [] (\ps -> extractExps ps start) (foldM nextState (initState pinfo start) toks)
initState :: ParserInfo -> CId -> ParseState initState :: ParserInfo -> CId -> ParseState
initState pinfo start = initState pinfo start =
let items = do let items = do
c <- Map.findWithDefault [] start (startupCats pinfo) cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
ruleid <- topdownRules pinfo ? c (funid,args) <- foldForest (\funid args -> (:) (funid,args)) [] cat (productions pinfo)
let (FRule fn _ args cat lins) = allRules pinfo ! ruleid let FFun fn _ lins = functions pinfo ! funid
lbl <- indices lins (lbl,seqid) <- assocs lins
return (Active 0 lbl 0 ruleid args cat) return (Active 0 0 funid seqid args (AK cat lbl))
forest = IntMap.fromListWith Set.union [(cat, Set.singleton (Passive ruleid args)) | (ruleid, FRule _ _ args cat _) <- assocs (allRules pinfo)] max_fid = maximum (0:[maximum (cat:args) | (cat, set) <- IntMap.toList (productions pinfo)
, p <- Set.toList set
max_fid = maximum (0:[maximum (cat:args) | (ruleid, FRule _ _ args cat _) <- assocs (allRules pinfo)])+1 , let args = case p of {FApply _ args -> args; FCoerce cat -> [cat]}])+1
in State pinfo in State pinfo
(Chart MM.empty [] Map.empty forest max_fid 0) (Chart emptyAC [] emptyPC (productions pinfo) max_fid 0)
(Set.fromList items) (Set.fromList items)
-- | From the current state and the next token -- | From the current state and the next token
-- 'nextState' computes a new state where the token -- 'nextState' computes a new state where the token
-- is consumed and the current position shifted by one. -- is consumed and the current position shifted by one.
nextState :: ParseState -> String -> ParseState nextState :: ParseState -> String -> Maybe ParseState
nextState (State pinfo chart items) t = nextState (State pinfo chart items) t =
let (items1,chart1) = process add (allRules pinfo) (Set.toList items) (Set.empty,chart) let (items1,chart1) = process add (sequences pinfo) (functions pinfo) (Set.toList items) Set.empty chart
chart2 = chart1{ active =MM.empty chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1 , actives=active chart1 : actives chart1
, passive=Map.empty , passive=emptyPC
, offset =offset chart1+1 , offset =offset chart1+1
} }
in State pinfo chart2 items1 in if Set.null items1
then Nothing
else Just (State pinfo chart2 items1)
where where
add tok item set add tok item set
| tok == t = Set.insert item set | tok == t = Set.insert item set
@@ -68,107 +67,157 @@ nextState (State pinfo chart items) t =
-- the GF interpreter. -- the GF interpreter.
getCompletions :: ParseState -> String -> Map.Map String ParseState getCompletions :: ParseState -> String -> Map.Map String ParseState
getCompletions (State pinfo chart items) w = getCompletions (State pinfo chart items) w =
let (map',chart1) = process add (allRules pinfo) (Set.toList items) (MM.empty,chart) let (map',chart1) = process add (sequences pinfo) (functions pinfo) (Set.toList items) Map.empty chart
chart2 = chart1{ active =MM.empty chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1 , actives=active chart1 : actives chart1
, passive=Map.empty , passive=emptyPC
, offset =offset chart1+1 , offset =offset chart1+1
} }
in fmap (State pinfo chart2) map' in fmap (State pinfo chart2) map'
where where
add tok item map add tok item map
| isPrefixOf w tok = fromMaybe map (MM.insert' tok item map) | isPrefixOf w tok = Map.insertWith Set.union tok (Set.singleton item) map
| otherwise = map | otherwise = map
extractExps :: ParseState -> CId -> [Tree] extractExps :: ParseState -> CId -> [Tree]
extractExps (State pinfo chart items) start = exps extractExps (State pinfo chart items) start = exps
where where
(_,st) = process (\_ _ -> id) (allRules pinfo) (Set.toList items) ((),chart) (_,st) = process (\_ _ -> id) (sequences pinfo) (functions pinfo) (Set.toList items) () chart
exps = nubsort $ do exps = nubsort $ do
c <- Map.findWithDefault [] start (startupCats pinfo) cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
ruleid <- topdownRules pinfo ? c (funid,args) <- foldForest (\funid args -> (:) (funid,args)) [] cat (productions pinfo)
let (FRule fn _ args cat lins) = allRules pinfo ! ruleid let FFun fn _ lins = functions pinfo ! funid
lbl <- indices lins lbl <- indices lins
fid <- Map.lookup (PK c lbl 0) (passive st) Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
go Set.empty fid go Set.empty fid
go rec fid go rec fcat
| Set.member fid rec = mzero | Set.member fcat rec = mzero
| otherwise = do set <- IntMap.lookup fid (forest st) | otherwise = do (funid,args) <- foldForest (\funid args -> (:) (funid,args)) [] fcat (forest st)
Passive ruleid args <- Set.toList set let FFun fn _ lins = functions pinfo ! funid
let (FRule fn _ _ cat lins) = allRules pinfo ! ruleid args <- mapM (go (Set.insert fcat rec)) args
if fn == wildCId return (Fun fn args)
then go (Set.insert fid rec) (head args)
else do args <- mapM (go (Set.insert fid rec)) args
return (Fun fn args)
process fn !rules [] acc_chart = acc_chart process fn !seqs !funs [] acc chart = (acc,chart)
process fn !rules (item:items) acc_chart = univRule item acc_chart process fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart
| inRange (bounds lin) ppos =
case unsafeAt lin ppos of
FSymCat d r -> let !fid = args !! d
key = AK fid r
items2 = case lookupPC (mkPK key k) (passive chart) of
Nothing -> items
Just id -> (Active j (ppos+1) funid seqid (updateAt d id args) key0) : items
items3 = foldForest (\funid args -> (:) (Active k 0 funid (rhs funid r) args key)) items2 fid (forest chart)
in case lookupAC key (active chart) of
Nothing -> process fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)}
Just set | Set.member item set -> process fn seqs funs items acc chart
| otherwise -> process fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)}
FSymTok (KS tok) -> let !acc' = fn tok (Active j (ppos+1) funid seqid args key0) acc
in process fn seqs funs items acc' chart
| otherwise =
case lookupPC (mkPK key0 j) (passive chart) of
Nothing -> let fid = nextId chart
items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of
Nothing -> items
Just set -> Set.fold (\(Active j' ppos funid seqid args keyc) ->
let FSymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos
in (:) (Active j' (ppos+1) funid seqid (updateAt d fid args) keyc)) items set
in process fn seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart)
,forest =IntMap.insert fid (Set.singleton (FApply funid args)) (forest chart)
,nextId =nextId chart+1
}
Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items
in process fn seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (FApply funid args)) (forest chart)}
where where
univRule (Active j lbl ppos ruleid args fid0) acc_chart@(acc,chart) !lin = unsafeAt seqs seqid
| inRange (bounds lin) ppos = !k = offset chart
case unsafeAt lin ppos of
FSymCat r d -> let !fid = args !! d
in case MM.insert' (AK fid r) item (active chart) of
Nothing -> process fn rules items $ acc_chart
Just actCat -> (case Map.lookup (PK fid r k) (passive chart) of
Nothing -> id
Just id -> process fn rules [Active j lbl (ppos+1) ruleid (updateAt d id args) fid0]) $
(case IntMap.lookup fid (forest chart) of
Nothing -> id
Just set -> process fn rules (Set.fold (\(Passive ruleid args) -> (:) (Active k r 0 ruleid args fid)) [] set)) $
process fn rules items $
(acc,chart{active=actCat})
FSymTok tok -> process fn rules items $
(fn tok (Active j lbl (ppos+1) ruleid args fid0) acc,chart)
| otherwise = case Map.lookup (PK fid0 lbl j) (passive chart) of
Nothing -> let fid = nextId chart
in process fn rules [Active j' lbl (ppos+1) ruleid (updateAt d fid args) fidc
| Active j' lbl ppos ruleid args fidc <- ((active chart:actives chart) !! (k-j)) MM.! (AK fid0 lbl),
let FSymCat _ d = unsafeAt (rhs ruleid lbl) ppos] $
process fn rules items $
(acc,chart{passive=Map.insert (PK fid0 lbl j) fid (passive chart)
,forest =IntMap.insert fid (Set.singleton (Passive ruleid args)) (forest chart)
,nextId =nextId chart+1
})
Just id -> process fn rules items $
(acc,chart{forest = IntMap.insertWith Set.union id (Set.singleton (Passive ruleid args)) (forest chart)})
where
!lin = rhs ruleid lbl
!k = offset chart
rhs ruleid lbl = unsafeAt lins lbl mkPK (AK fid lbl) j = PK fid lbl j
rhs funid lbl = unsafeAt lins lbl
where where
(FRule _ _ _ cat lins) = unsafeAt rules ruleid FFun _ _ lins = unsafeAt funs funid
updateAt :: Int -> a -> [a] -> [a] updateAt :: Int -> a -> [a] -> [a]
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs] updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
----------------------------------------------------------------
-- Active Chart
----------------------------------------------------------------
data Active data Active
= Active {-# UNPACK #-} !Int = Active {-# UNPACK #-} !Int
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !FPointPos {-# UNPACK #-} !FPointPos
{-# UNPACK #-} !RuleId {-# UNPACK #-} !FunId
{-# UNPACK #-} !SeqId
[FCat] [FCat]
{-# UNPACK #-} !FCat {-# UNPACK #-} !ActiveKey
deriving (Eq,Show,Ord) deriving (Eq,Show,Ord)
data Passive
= Passive {-# UNPACK #-} !RuleId
[FCat]
deriving (Eq,Ord,Show)
data ActiveKey data ActiveKey
= AK {-# UNPACK #-} !FCat = AK {-# UNPACK #-} !FCat
{-# UNPACK #-} !FIndex {-# UNPACK #-} !FIndex
deriving (Eq,Ord,Show) deriving (Eq,Ord,Show)
type ActiveChart = IntMap.IntMap (IntMap.IntMap (Set.Set Active))
emptyAC :: ActiveChart
emptyAC = IntMap.empty
lookupAC :: ActiveKey -> ActiveChart -> Maybe (Set.Set Active)
lookupAC (AK fcat l) chart = IntMap.lookup fcat chart >>= IntMap.lookup l
labelsAC :: FCat -> ActiveChart -> [FIndex]
labelsAC fcat chart =
case IntMap.lookup fcat chart of
Nothing -> []
Just map -> IntMap.keys map
insertAC :: ActiveKey -> Set.Set Active -> ActiveChart -> ActiveChart
insertAC (AK fcat l) set chart = IntMap.insertWith IntMap.union fcat (IntMap.singleton l set) chart
----------------------------------------------------------------
-- Passive Chart
----------------------------------------------------------------
data PassiveKey data PassiveKey
= PK {-# UNPACK #-} !FCat = PK {-# UNPACK #-} !FCat
{-# UNPACK #-} !FIndex {-# UNPACK #-} !FIndex
{-# UNPACK #-} !Int {-# UNPACK #-} !Int
deriving (Eq,Ord,Show) deriving (Eq,Ord,Show)
type PassiveChart = Map.Map PassiveKey FCat
emptyPC :: PassiveChart
emptyPC = Map.empty
lookupPC :: PassiveKey -> PassiveChart -> Maybe FCat
lookupPC key chart = Map.lookup key chart
insertPC :: PassiveKey -> FCat -> PassiveChart -> PassiveChart
insertPC key fcat chart = Map.insert key fcat chart
----------------------------------------------------------------
-- Forest
----------------------------------------------------------------
foldForest :: (FunId -> [FCat] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b
foldForest f b fcat forest =
case IntMap.lookup fcat forest of
Nothing -> b
Just set -> Set.fold foldPassive b set
where
foldPassive (FCoerce fcat) b = foldForest f b fcat forest
foldPassive (FApply funid args) b = f funid args b
----------------------------------------------------------------
-- Parse State
----------------------------------------------------------------
-- | An abstract data type whose values represent -- | An abstract data type whose values represent
-- the current state in an incremental parser. -- the current state in an incremental parser.
@@ -176,10 +225,11 @@ data ParseState = State ParserInfo Chart (Set.Set Active)
data Chart data Chart
= Chart = Chart
{ active :: MM.MultiMap ActiveKey Active { active :: ActiveChart
, actives :: [MM.MultiMap ActiveKey Active] , actives :: [ActiveChart]
, passive :: Map.Map PassiveKey FCat , passive :: PassiveChart
, forest :: IntMap.IntMap (Set.Set Passive) , forest :: IntMap.IntMap (Set.Set Production)
, nextId :: {-# UNPACK #-} !FCat , nextId :: {-# UNPACK #-} !FCat
, offset :: {-# UNPACK #-} !Int , offset :: {-# UNPACK #-} !Int
} }
deriving Show

View File

@@ -31,7 +31,7 @@ type RangeRec = [Range]
data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
| EmptyRange | EmptyRange
deriving (Eq, Ord) deriving (Eq, Ord, Show)
makeRange :: Int -> Int -> Range makeRange :: Int -> Int -> Range
makeRange = Range makeRange = Range
@@ -83,7 +83,7 @@ data SyntaxNode n e = SMeta
| SString String | SString String
| SInt Integer | SInt Integer
| SFloat Double | SFloat Double
deriving (Eq,Ord) deriving (Eq,Ord,Show)
groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]] groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]]
groupSyntaxNodes [] = [] groupSyntaxNodes [] = []

View File

@@ -3,13 +3,12 @@ module PGF.Raw.Convert (toPGF,fromPGF) where
import PGF.CId import PGF.CId
import PGF.Data import PGF.Data
import PGF.Raw.Abstract import PGF.Raw.Abstract
import PGF.BuildParser (buildParserInfo)
import PGF.Parsing.FCFG.Utilities
import qualified GF.Compile.GenerateFCFG as FCFG
import qualified GF.Compile.GeneratePMCFG as PMCFG import qualified GF.Compile.GeneratePMCFG as PMCFG
import qualified Data.Array as Array import Data.Array.IArray
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
pgfMajorVersion, pgfMinorVersion :: Integer pgfMajorVersion, pgfMinorVersion :: Integer
(pgfMajorVersion, pgfMinorVersion) = (1,0) (pgfMajorVersion, pgfMinorVersion) = (1,0)
@@ -54,11 +53,11 @@ toConcr pgf rexp =
lindefs = Map.empty, lindefs = Map.empty,
printnames = Map.empty, printnames = Map.empty,
paramlincats = Map.empty, paramlincats = Map.empty,
parser = Just (buildParserOnDemand cnc) -- This thunk will be overwritten if there is a parser parser = Just (PMCFG.convertConcrete (abstract pgf) cnc)
-- This thunk will be overwritten if there is a parser
-- compiled in the PGF file. We use lazy evaluation here -- compiled in the PGF file. We use lazy evaluation here
-- to make sure that buildParserOnDemand is called only -- to make sure that buildParserOnDemand is called only
-- if it is needed. -- if it is needed.
}) rexp }) rexp
in cnc in cnc
where where
@@ -72,41 +71,44 @@ toConcr pgf rexp =
add cnc (App "param" ts) = cnc { paramlincats = mkTermMap ts } add cnc (App "param" ts) = cnc { paramlincats = mkTermMap ts }
add cnc (App "parser" ts) = cnc { parser = Just (toPInfo ts) } add cnc (App "parser" ts) = cnc { parser = Just (toPInfo ts) }
buildParserOnDemand cnc = buildParserInfo fcfg
where
fcfg
| Map.lookup (mkCId "erasing") (cflags cnc) == Just "on" = PMCFG.convertConcrete (abstract pgf) cnc
| otherwise = FCFG.convertConcrete (abstract pgf) cnc
toPInfo :: [RExp] -> ParserInfo toPInfo :: [RExp] -> ParserInfo
toPInfo [App "rules" rs, App "startupcats" cs] = buildParserInfo (rules, cats) toPInfo [App "functions" fs, App "sequences" ss, App "productions" ps,App "startcats" cs] =
ParserInfo { functions = functions
, sequences = seqs
, productions = productions
, startCats = cats
}
where where
rules = map toFRule rs functions = mkArray (map toFFun fs)
cats = Map.fromList [(mkCId c, map expToInt fs) | App c fs <- cs] seqs = mkArray (map toFSeq ss)
productions = IntMap.fromList (map toProductionSet ps)
cats = Map.fromList [(mkCId c, (map expToInt xs)) | App c xs <- cs]
toFRule :: RExp -> FRule toFFun :: RExp -> FFun
toFRule (App "rule" toFFun (App f [App "P" ts,App "R" ls]) = FFun fun prof lins
[n, where
App "cats" (rt:at), fun = mkCId f
App "R" ls]) = FRule fun prof args res lins prof = map toProfile ts
lins = mkArray [fromIntegral seqid | AInt seqid <- ls]
toProfile :: RExp -> Profile
toProfile AMet = []
toProfile (App "_A" [t]) = [expToInt t]
toProfile (App "_U" ts) = [expToInt t | App "_A" [t] <- ts]
toFSeq :: RExp -> FSeq
toFSeq (App "seq" ss) = mkArray [toSymbol s | s <- ss]
toProductionSet :: RExp -> (FCat,Set.Set Production)
toProductionSet (App "td" (rt : xs)) = (expToInt rt, Set.fromList (map toProduction xs))
where where
(fun,prof) = toFName n toProduction (App "A" (ruleid : at)) = FApply (expToInt ruleid) (map expToInt at)
args = map expToInt at toProduction (App "C" [fcat]) = FCoerce (expToInt fcat)
res = expToInt rt
lins = mkArray [mkArray [toSymbol s | s <- l] | App "S" l <- ls]
toFName :: RExp -> (CId,[Profile])
toFName (App "_A" [x]) = (wildCId, [[expToInt x]])
toFName (App f ts) = (mkCId f, map toProfile ts)
where
toProfile :: RExp -> Profile
toProfile AMet = []
toProfile (App "_A" [t]) = [expToInt t]
toProfile (App "_U" ts) = [expToInt t | App "_A" [t] <- ts]
toSymbol :: RExp -> FSymbol toSymbol :: RExp -> FSymbol
toSymbol (App "P" [n,l]) = FSymCat (expToInt l) (expToInt n) toSymbol (App "P" [n,l]) = FSymCat (expToInt n) (expToInt l)
toSymbol (AStr t) = FSymTok t toSymbol (App "KP" (d:alts)) = FSymTok (toKP d alts)
toSymbol (AStr t) = FSymTok (KS t)
toType :: RExp -> Type toType :: RExp -> Type
toType e = case e of toType e = case e of
@@ -142,8 +144,15 @@ toTerm e = case e of
App f [] -> F (mkCId f) App f [] -> F (mkCId f)
AInt i -> C (fromInteger i) AInt i -> C (fromInteger i)
AMet -> TM "?" AMet -> TM "?"
AStr s -> K (KS s) ---- App "KP" (d:alts) -> K (toKP d alts)
AStr s -> K (KS s)
_ -> error $ "term " ++ show e _ -> error $ "term " ++ show e
toKP d alts = KP (toStr d) (map toAlt alts)
where
toStr (App "S" vs) = [v | AStr v <- vs]
toAlt (App "A" [x,y]) = Alt (toStr x) (toStr y)
------------------------------ ------------------------------
--- from internal to parser -- --- from internal to parser --
@@ -192,8 +201,7 @@ fromExp e = case e of
ELit (LFlt d) -> AFlt d ELit (LFlt d) -> AFlt d
ELit (LInt i) -> AInt (toInteger i) ELit (LInt i) -> AInt (toInteger i)
EMeta _ -> AMet ---- EMeta _ -> AMet ----
EEq eqs -> EEq eqs -> App "Eq" [App "E" (map fromExp (v:ps)) | Equ ps v <- eqs]
App "Eq" [App "E" (map fromExp (v:ps)) | Equ ps v <- eqs]
fromTerm :: Term -> RExp fromTerm :: Term -> RExp
fromTerm e = case e of fromTerm e = case e of
@@ -206,8 +214,11 @@ fromTerm e = case e of
TM _ -> AMet TM _ -> AMet
F f -> App (prCId f) [] F f -> App (prCId f) []
V i -> App "A" [AInt (toInteger i)] V i -> App "A" [AInt (toInteger i)]
K (KS s) -> AStr s ---- K t -> fromTokn t
K (KP d vs) -> App "FV" (str d : [str v | Alt v _ <- vs]) ----
fromTokn :: Tokn -> RExp
fromTokn (KS s) = AStr s
fromTokn (KP d vs) = App "KP" (str d : [App "A" [str v, str x] | Alt v x <- vs])
where where
str v = App "S" (map AStr v) str v = App "S" (map AStr v)
@@ -215,39 +226,42 @@ fromTerm e = case e of
fromPInfo :: ParserInfo -> RExp fromPInfo :: ParserInfo -> RExp
fromPInfo p = App "parser" [ fromPInfo p = App "parser" [
App "rules" [fromFRule rule | rule <- Array.elems (allRules p)], App "functions" [fromFFun fun | fun <- elems (functions p)],
App "startupcats" [App (prCId f) (map intToExp cs) | (f,cs) <- Map.toList (startupCats p)] App "sequences" [fromFSeq seq | seq <- elems (sequences p)],
App "productions" [fromProductionSet xs | xs <- IntMap.toList (productions p)],
App "startcats" [App (prCId f) (map intToExp xs) | (f,xs) <- Map.toList (startCats p)]
] ]
fromFRule :: FRule -> RExp fromFFun :: FFun -> RExp
fromFRule (FRule fun prof args res lins) = fromFFun (FFun fun prof lins) = App (prCId fun) [App "P" (map fromProfile prof), App "R" [intToExp seqid | seqid <- elems lins]]
App "rule" [fromFName (fun,prof),
App "cats" (intToExp res:map intToExp args),
App "R" [App "S" [fromSymbol s | s <- Array.elems l] | l <- Array.elems lins]
]
fromFName :: (CId,[Profile]) -> RExp
fromFName (f,ps) | f == wildCId = fromProfile (head ps)
| otherwise = App (prCId f) (map fromProfile ps)
where where
fromProfile :: Profile -> RExp fromProfile :: Profile -> RExp
fromProfile [] = AMet fromProfile [] = AMet
fromProfile [x] = daughter x fromProfile [x] = daughter x
fromProfile args = App "_U" (map daughter args) fromProfile args = App "_U" (map daughter args)
daughter n = App "_A" [intToExp n] daughter n = App "_A" [intToExp n]
fromSymbol :: FSymbol -> RExp fromSymbol :: FSymbol -> RExp
fromSymbol (FSymCat l n) = App "P" [intToExp n, intToExp l] fromSymbol (FSymCat n l) = App "P" [intToExp n, intToExp l]
fromSymbol (FSymTok t) = AStr t fromSymbol (FSymTok t) = fromTokn t
fromFSeq :: FSeq -> RExp
fromFSeq seq = App "seq" [fromSymbol s | s <- elems seq]
fromProductionSet :: (FCat,Set.Set Production) -> RExp
fromProductionSet (cat,xs) = App "td" (intToExp cat : map fromPassive (Set.toList xs))
where
fromPassive (FApply ruleid args) = App "A" (intToExp ruleid : map intToExp args)
fromPassive (FCoerce fcat) = App "C" [intToExp fcat]
-- ** Utilities -- ** Utilities
mkTermMap :: [RExp] -> Map.Map CId Term mkTermMap :: [RExp] -> Map.Map CId Term
mkTermMap ts = Map.fromAscList [(mkCId f,toTerm v) | App f [v] <- ts] mkTermMap ts = Map.fromAscList [(mkCId f,toTerm v) | App f [v] <- ts]
mkArray :: [a] -> Array.Array Int a mkArray :: IArray a e => [e] -> a Int e
mkArray xs = Array.listArray (0, length xs - 1) xs mkArray xs = listArray (0, length xs - 1) xs
expToInt :: Integral a => RExp -> a expToInt :: Integral a => RExp -> a
expToInt (App "neg" [AInt i]) = fromIntegral (negate i) expToInt (App "neg" [AInt i]) = fromIntegral (negate i)