From 1fc909c1014484962969ded7763f4f50b6867829 Mon Sep 17 00:00:00 2001 From: krasimir Date: Tue, 14 Oct 2008 08:00:50 +0000 Subject: [PATCH] the new optimized incremental parser and the common subexpression elimination optimization in PMCFG --- src/GF/Compile/Export.hs | 1 - src/GF/Compile/GFCCtoJS.hs | 41 +++-- src/GF/Compile/GenerateFCFG.hs | 168 +++++++++++------- src/GF/Compile/GeneratePMCFG.hs | 259 ++++++++++++++-------------- src/GF/Compile/GrammarToGFCC.hs | 5 +- src/GF/Infra/Option.hs | 2 - src/GF/Speech/PGFToCFG.hs | 68 ++++---- src/GFI.hs | 8 +- src/PGF.hs | 10 +- src/PGF/BuildParser.hs | 76 ++++---- src/PGF/Data.hs | 42 +++-- src/PGF/Macros.hs | 6 - src/PGF/Parsing/FCFG.hs | 3 +- src/PGF/Parsing/FCFG/Active.hs | 135 ++++++++------- src/PGF/Parsing/FCFG/Incremental.hs | 222 +++++++++++++++--------- src/PGF/Parsing/FCFG/Utilities.hs | 4 +- src/PGF/Raw/Convert.hs | 130 +++++++------- 17 files changed, 654 insertions(+), 526 deletions(-) diff --git a/src/GF/Compile/Export.hs b/src/GF/Compile/Export.hs index f4e5b2884..8b924113d 100644 --- a/src/GF/Compile/Export.hs +++ b/src/GF/Compile/Export.hs @@ -40,7 +40,6 @@ exportPGF opts fmt pgf = FmtProlog_Abs -> multi "pl" grammar2prolog_abs FmtBNF -> single "bnf" bnfPrinter FmtEBNF -> single "ebnf" (ebnfPrinter opts) - FmtFCFG -> single "fcfg" fcfgPrinter FmtSRGS_XML -> single "grxml" (srgsXmlPrinter opts) FmtSRGS_XML_NonRec -> single "grxml" (srgsXmlNonRecursivePrinter opts) FmtSRGS_ABNF -> single "gram" (srgsAbnfPrinter opts) diff --git a/src/GF/Compile/GFCCtoJS.hs b/src/GF/Compile/GFCCtoJS.hs index 3fe8b1635..12c424844 100644 --- a/src/GF/Compile/GFCCtoJS.hs +++ b/src/GF/Compile/GFCCtoJS.hs @@ -11,11 +11,13 @@ import GF.Data.ErrM import GF.Infra.Option import Control.Monad (mplus) -import Data.Array (Array) -import qualified Data.Array as Array +import Data.Array.Unboxed (UArray) +import qualified Data.Array.IArray as Array import Data.Maybe (fromMaybe) import Data.Map (Map) +import qualified Data.Set as Set import qualified Data.Map as Map +import qualified Data.IntMap as IntMap pgf2js :: PGF -> String pgf2js pgf = @@ -89,31 +91,44 @@ children = JS.Ident "cs" -- Parser parser2js :: String -> ParserInfo -> [JS.Expr] parser2js start p = [new "Parser" [JS.EStr start, - JS.EArray $ map frule2js (Array.elems (allRules p)), - JS.EObj $ map cats (Map.assocs (startupCats p))]] + JS.EArray $ [frule2js p cat prod | (cat,set) <- IntMap.toList (productions p), prod <- Set.toList set], + JS.EObj $ map cats (Map.assocs (startCats p))]] where cats (c,is) = JS.Prop (JS.IdentPropName (JS.Ident (prCId c))) (JS.EArray (map JS.EInt is)) -frule2js :: FRule -> JS.Expr -frule2js (FRule f ps args res lins) = new "Rule" [JS.EInt res, name2js (f,ps), JS.EArray (map JS.EInt args), lins2js lins] +frule2js :: ParserInfo -> FCat -> Production -> JS.Expr +frule2js p res (FApply funid args) = new "Rule" [JS.EInt res, name2js (f,ps), JS.EArray (map JS.EInt args), lins2js p lins] + where + FFun f ps lins = functions p Array.! funid +frule2js p res (FCoerce arg) = new "Rule" [JS.EInt res, daughter 0, JS.EArray [JS.EInt arg], JS.EArray [JS.EArray [sym2js (FSymCat 0 i)] | i <- [0..catLinArity arg-1]]] + where + catLinArity :: FCat -> Int + catLinArity c = maximum (1:[Array.rangeSize (Array.bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c]) + + topdownRules cat = f cat [] + where + f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions p)) + + g (FApply funid args) rules = (functions p Array.! funid,args) : rules + g (FCoerce cat) rules = f cat rules + name2js :: (CId,[Profile]) -> JS.Expr -name2js (f,ps) | f == wildCId = fromProfile (head ps) - | otherwise = new "FunApp" $ [JS.EStr $ prCId f, JS.EArray (map fromProfile ps)] +name2js (f,ps) = new "FunApp" $ [JS.EStr $ prCId f, JS.EArray (map fromProfile ps)] where fromProfile :: Profile -> JS.Expr fromProfile [] = new "MetaVar" [] fromProfile [x] = daughter x fromProfile args = new "Unify" [JS.EArray (map daughter args)] - daughter i = new "Arg" [JS.EInt i] +daughter i = new "Arg" [JS.EInt i] -lins2js :: Array FIndex (Array FPointPos FSymbol) -> JS.Expr -lins2js ls = JS.EArray [ JS.EArray [ sym2js s | s <- Array.elems l] | l <- Array.elems ls] +lins2js :: ParserInfo -> UArray FIndex SeqId -> JS.Expr +lins2js p ls = JS.EArray [JS.EArray [sym2js s | s <- Array.elems (sequences p Array.! seqid)] | seqid <- Array.elems ls] sym2js :: FSymbol -> JS.Expr -sym2js (FSymCat l n) = new "ArgProj" [JS.EInt n, JS.EInt l] -sym2js (FSymTok t) = new "Terminal" [JS.EStr t] +sym2js (FSymCat n l) = new "ArgProj" [JS.EInt n, JS.EInt l] +sym2js (FSymTok (KS t)) = new "Terminal" [JS.EStr t] new :: String -> [JS.Expr] -> JS.Expr new f xs = JS.ENew (JS.Ident f) xs diff --git a/src/GF/Compile/GenerateFCFG.hs b/src/GF/Compile/GenerateFCFG.hs index c2854ef3d..de6c05ef1 100644 --- a/src/GF/Compile/GenerateFCFG.hs +++ b/src/GF/Compile/GenerateFCFG.hs @@ -25,17 +25,18 @@ import GF.Data.SortedList import GF.Data.Utilities (updateNthM, sortNub) import qualified Data.Map as Map +import qualified Data.IntMap as IntMap import qualified Data.Set as Set import qualified Data.List as List import qualified Data.ByteString.Char8 as BS -import Data.Array +import Data.Array.IArray import Data.Maybe import Control.Monad ---------------------------------------------------------------------- -- main conversion function -convertConcrete :: Abstr -> Concr -> FGrammar +convertConcrete :: Abstr -> Concr -> ParserInfo convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats' where abs_defs = Map.assocs (funs abs) conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" @@ -91,14 +92,14 @@ expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns, -- replaces __NCat with _B and _Var_Cat with _. -- the temporary names are just there to avoid name collisions. -fixHoasFuns :: FGrammar -> FGrammar -fixHoasFuns (rs, cs) = ([FRule (fixName n) ps args cat lins | FRule n ps args cat lins <- rs], cs) +fixHoasFuns :: ParserInfo -> ParserInfo +fixHoasFuns pinfo = pinfo{functions=mkArray [FFun (fixName n) prof lins | FFun n prof lins <- elems (functions pinfo)]} where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B") | BS.pack "_Var_" `BS.isPrefixOf` n = wildCId fixName n = n -convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> FGrammar -convert abs_defs cnc_defs cat_defs = getFGrammar (loop frulesEnv) +convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> ParserInfo +convert abs_defs cnc_defs cat_defs = getParserInfo (loop grammarEnv) where srules = [ (XRule id args res (map findLinType args) (findLinType res) term) | @@ -107,26 +108,26 @@ convert abs_defs cnc_defs cat_defs = getFGrammar (loop frulesEnv) findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs) - (xrulesMap,frulesEnv) = List.foldl' helper (Map.empty,emptyFRulesEnv) srules + (xrulesMap,grammarEnv) = List.foldl' helper (Map.empty,emptyFFunsEnv) srules where - helper (xrulesMap,frulesEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) = + helper (xrulesMap,grammarEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) = let xrulesMap' = Map.insertWith (++) abs_res [rule] xrulesMap - frulesEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env) - frulesEnv + grammarEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env) + grammarEnv (mkSingletonSelectors cnc_defs cnc_res) - in xrulesMap' `seq` frulesEnv' `seq` (xrulesMap',frulesEnv') + in xrulesMap' `seq` grammarEnv' `seq` (xrulesMap',grammarEnv') - loop frulesEnv = - let (todo, frulesEnv') = takeToDoRules xrulesMap frulesEnv + loop grammarEnv = + let (todo, grammarEnv') = takeToDoRules xrulesMap grammarEnv in case todo of - [] -> frulesEnv' + [] -> grammarEnv' _ -> loop $! List.foldl' (\env (srules,selector) -> - List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) frulesEnv' todo + List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) grammarEnv' todo -convertRule :: TermMap -> TermSelector -> XRule -> FRulesEnv -> FRulesEnv -convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv = +convertRule :: TermMap -> TermSelector -> XRule -> GrammarEnv -> GrammarEnv +convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) grammarEnv = foldBM addRule - frulesEnv + grammarEnv (convertTerm cnc_defs selector term [([],[])]) (protoFCat cat, map (\scat -> (protoFCat scat,[])) args, ctype, ctypes) where @@ -137,9 +138,10 @@ convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv = (env1, xargs1) = List.mapAccumL (genFCatArg cnc_defs ctype) env xargs in case xcat of PFCat _ [] _ -> (env , args, all_args) - _ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args)) (env1,[],[]) (zip3 newArgs' ctypes [0..]) + _ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args)) + (env1,[],[]) (zip3 newArgs' ctypes [0..]) - newLinRec = listArray (0,length linRec-1) [translateLin idxArgs path linRec | path <- case newCat' of {PFCat _ rcs _ -> rcs}] + (env3,newLinRec) = List.mapAccumL (translateLin idxArgs linRec) env2 (case newCat' of {PFCat _ rcs _ -> rcs}) (_,newProfile) = List.mapAccumL accumProf 0 newArgs' where @@ -147,18 +149,19 @@ convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv = accumProf nr (_ ,xpaths) = (nr+cnt+1, [nr..nr+cnt]) where cnt = length xpaths - rule = FRule fun newProfile newArgs newCat newLinRec - in addFRule env2 rule + (env4,funid) = addFFun env3 (FFun fun newProfile (mkArray newLinRec)) -translateLin idxArgs lbl' [] = array (0,-1) [] -translateLin idxArgs lbl' ((lbl,syms) : lins) - | lbl' == lbl = listArray (0,length syms-1) (map instSym syms) - | otherwise = translateLin idxArgs lbl' lins + in addProduction env4 newCat (FApply funid newArgs) + +translateLin idxArgs [] grammarEnv lbl' = error "translateLin" +translateLin idxArgs ((lbl,syms) : lins) grammarEnv lbl' + | lbl' == lbl = addFSeq grammarEnv (lbl,map instSym syms) + | otherwise = translateLin idxArgs lins grammarEnv lbl' where instSym = either (\(lbl, nr, xnr) -> instCat lbl nr xnr 0 idxArgs) FSymTok instCat lbl nr xnr nr' ((idx,xargs):idxArgs) | nr == idx = let (fcat, PFCat _ rcs _) = xargs !! xnr - in FSymCat (index lbl rcs 0) (nr'+xnr) + in FSymCat (nr'+xnr) (index lbl rcs 0) | otherwise = instCat lbl nr xnr (nr'+length xargs) idxArgs index lbl' (lbl:lbls) idx @@ -173,7 +176,7 @@ type CnvMonad a = BacktrackM Env a type FPath = [FIndex] type Env = (ProtoFCat, [(ProtoFCat,[FPath])], Term, [Term]) -type LinRec = [(FPath, [Either (FPath, FIndex, Int) FToken])] +type LinRec = [(FPath, [Either (FPath, FIndex, Int) Tokn])] type TermMap = Map.Map CId Term @@ -190,11 +193,11 @@ convertTerm cnc_defs selector (S ts) ((lbl_path,lin) : lins) = do projectH foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts) convertTerm cnc_defs selector (K (KS str)) ((lbl_path,lin) : lins) = do projectHead lbl_path - return ((lbl_path,Right str : lin) : lins) + return ((lbl_path,Right (KS str) : lin) : lins) convertTerm cnc_defs selector (K (KP strs vars))((lbl_path,lin) : lins) = do projectHead lbl_path toks <- member (strs:[strs' | Alt strs' _ <- vars]) - return ((lbl_path, map Right toks ++ lin) : lins) + return ((lbl_path, map (Right . KS) toks ++ lin) : lins) convertTerm cnc_defs selector (F id) lins = do term <- Map.lookup id cnc_defs convertTerm cnc_defs selector term lins convertTerm cnc_defs selector (W s t) ((lbl_path,lin) : lins) = do @@ -273,75 +276,105 @@ selectTerm (index:path) (R record) = selectTerm path (record !! index) ---------------------------------------------------------------------- --- FRulesEnv +-- GrammarEnv -data FRulesEnv = FRulesEnv {-# UNPACK #-} !Int FCatSet [FRule] + +data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int FCatSet FSeqSet FFunSet (IntMap.IntMap (Set.Set Production)) type FCatSet = Map.Map CId (Map.Map [FPath] (Map.Map [(FPath,FIndex)] (Either FCat FCat))) +type FSeqSet = Map.Map FSeq SeqId +type FFunSet = Map.Map FFun FunId data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)] protoFCat :: CId -> ProtoFCat protoFCat cat = PFCat cat [] [] -emptyFRulesEnv = FRulesEnv 0 (ins fcatString (mkCId "String") [[0]] [] $ - ins fcatInt (mkCId "Int") [[0]] [] $ - ins fcatFloat (mkCId "Float") [[0]] [] $ - ins fcatVar (mkCId "_Var") [[0]] [] $ - Map.empty) [] +emptyFFunsEnv = GrammarEnv 0 initFCatSet Map.empty Map.empty IntMap.empty where - ins fcat cat rcs tcs fcatSet = - Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s fcatSet + initFCatSet = (ins fcatString (mkCId "String") [[0]] [] $ + ins fcatInt (mkCId "Int") [[0]] [] $ + ins fcatFloat (mkCId "Float") [[0]] [] $ + ins fcatVar (mkCId "_Var") [[0]] [] $ + Map.empty) + + ins fcat cat rcs tcs catSet = + Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet where right_fcat = Right fcat tmap_s = Map.singleton tcs right_fcat rmap_s = Map.singleton rcs tmap_s -addFRule :: FRulesEnv -> FRule -> FRulesEnv -addFRule (FRulesEnv last_id fcatSet rules) rule = FRulesEnv last_id fcatSet (rule:rules) +addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv +addProduction (GrammarEnv last_id catSet seqSet funSet prodSet) cat p = + GrammarEnv last_id catSet seqSet funSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet) -getFGrammar :: FRulesEnv -> FGrammar -getFGrammar (FRulesEnv last_id fcatSet rules) = (rules, Map.map getFCatList fcatSet) +addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId) +addFSeq env@(GrammarEnv last_id catSet seqSet funSet prodSet) (_,lst) = + case Map.lookup seq seqSet of + Just id -> (env,id) + Nothing -> let !last_seq = Map.size seqSet + in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet prodSet,last_seq) where + seq = mkArray lst + +addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId) +addFFun env@(GrammarEnv last_id catSet seqSet funSet prodSet) fun = + case Map.lookup fun funSet of + Just id -> (env,id) + Nothing -> let !last_funid = Map.size funSet + in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) prodSet,last_funid) + +getParserInfo :: GrammarEnv -> ParserInfo +getParserInfo (GrammarEnv last_id catSet seqSet funSet prodSet) = + ParserInfo { functions = mkArray funSet + , sequences = mkArray seqSet + , productions = prodSet + , startCats = Map.map getFCatList catSet + } + where + mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] + getFCatList rcs = Map.fold (\tcs lst -> Map.fold (\x lst -> either id id x : lst) lst tcs) [] rcs -genFCatHead :: FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat) -genFCatHead env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs) = - case Map.lookup cat fcatSet >>= Map.lookup rcs >>= Map.lookup tcs of - Just (Left fcat) -> (FRulesEnv last_id (ins fcat) rules, fcat) + +genFCatHead :: GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat) +genFCatHead env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) = + case Map.lookup cat catSet >>= Map.lookup rcs >>= Map.lookup tcs of + Just (Left fcat) -> (GrammarEnv last_id (ins fcat) seqSet funSet prodSet, fcat) Just (Right fcat) -> (env, fcat) Nothing -> let fcat = last_id+1 - in (FRulesEnv fcat (ins fcat) rules, fcat) + in (GrammarEnv fcat (ins fcat) seqSet funSet prodSet, fcat) where - ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s fcatSet + ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet where right_fcat = Right fcat tmap_s = Map.singleton tcs right_fcat rmap_s = Map.singleton rcs tmap_s -genFCatArg :: TermMap -> Term -> FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat) -genFCatArg cnc_defs ctype env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs) = - case Map.lookup cat fcatSet >>= Map.lookup rcs of +genFCatArg :: TermMap -> Term -> GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat) +genFCatArg cnc_defs ctype env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) = + case Map.lookup cat catSet >>= Map.lookup rcs of Just tmap -> case Map.lookup tcs tmap of - Just (Left fcat) -> (env, fcat) - Just (Right fcat) -> (env, fcat) + Just (Left fcat) -> (env, fcat) + Just (Right fcat) -> (env, fcat) Nothing -> ins tmap Nothing -> ins Map.empty where ins tmap = let fcat = last_id+1 - (either_fcat,last_id1,tmap1,rules1) - = foldBM (\tcs st (either_fcat,last_id,tmap,rules) -> + (either_fcat,last_id1,tmap1,prodSet1) + = foldBM (\tcs st (either_fcat,last_id,tmap,prodSet) -> let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap - rule = FRule wildCId [[0]] [fcat_arg] fcat - (listArray (0,length rcs-1) [listArray (0,0) [FSymCat lbl 0] | lbl <- [0..length rcs-1]]) + p = FCoerce fcat_arg + prodSet1 = IntMap.insertWith Set.union fcat (Set.singleton p) prodSet in if st - then (Right fcat, last_id1,tmap1,rule:rules) - else (either_fcat,last_id, tmap, rules)) - (Left fcat,fcat,Map.insert tcs either_fcat tmap,rules) + then (Right fcat, last_id1,tmap1,prodSet1) + else (either_fcat,last_id, tmap ,prodSet )) + (Left fcat,fcat,Map.insert tcs either_fcat tmap,prodSet) (gen_tcs ctype [] []) False rmap1 = Map.singleton rcs tmap1 - in (FRulesEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 fcatSet) rules1, fcat) + in (GrammarEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 catSet) seqSet funSet prodSet1, fcat) where addArg tcs last_id tmap = case Map.lookup tcs tmap of @@ -380,10 +413,11 @@ data XRule = XRule CId {- function -} Term {- result lin-type representation -} Term {- body -} -takeToDoRules :: XRulesMap -> FRulesEnv -> ([([XRule], TermSelector)], FRulesEnv) -takeToDoRules xrulesMap (FRulesEnv last_id fcatSet rules) = (todo,FRulesEnv last_id fcatSet' rules) +takeToDoRules :: XRulesMap -> GrammarEnv -> ([([XRule], TermSelector)], GrammarEnv) +takeToDoRules xrulesMap (GrammarEnv last_id catSet seqSet funSet prodSet) = + (todo,GrammarEnv last_id catSet' seqSet funSet prodSet) where - (todo,fcatSet') = + (todo,catSet') = Map.mapAccumWithKey (\todo cat rmap -> let (todo1,rmap1) = Map.mapAccumWithKey (\todo rcs tmap -> let (tcss,tmap') = Map.mapAccumWithKey (\tcss tcs either_xcat -> @@ -398,7 +432,7 @@ takeToDoRules xrulesMap (FRulesEnv last_id fcatSet rules) = (todo,FRulesEnv last in case mb_srules of Just srules -> (todo1,rmap1) - Nothing -> (todo ,rmap1)) [] fcatSet + Nothing -> (todo ,rmap1)) [] catSet ------------------------------------------------------------ @@ -524,3 +558,5 @@ projectProtoFCat path0 (PFCat cat rcs tcs) = do | path0 > path = path : addConstraint rcs | path0 == path = path : rcs addConstraint rcs = path0 : rcs + +mkArray lst = listArray (0,length lst-1) lst diff --git a/src/GF/Compile/GeneratePMCFG.hs b/src/GF/Compile/GeneratePMCFG.hs index e0343e8d6..b24a629a7 100644 --- a/src/GF/Compile/GeneratePMCFG.hs +++ b/src/GF/Compile/GeneratePMCFG.hs @@ -1,4 +1,4 @@ -{-# OPTIONS -fbang-patterns #-} +{-# OPTIONS -fbang-patterns -cpp #-} ---------------------------------------------------------------------- -- | -- Maintainer : Krasimir Angelov @@ -12,14 +12,12 @@ -- the conversion is only equivalent if the GFC grammar has a context-free backbone. ----------------------------------------------------------------------------- - module GF.Compile.GeneratePMCFG (convertConcrete) where import PGF.CId import PGF.Data import PGF.Macros --hiding (prt) -import PGF.Parsing.FCFG.Utilities import GF.Data.BacktrackM import GF.Data.SortedList @@ -28,8 +26,9 @@ import GF.Data.Utilities (updateNthM, sortNub) import qualified Data.Map as Map import qualified Data.Set as Set import qualified Data.List as List +import qualified Data.IntMap as IntMap import qualified Data.ByteString.Char8 as BS -import Data.Array +import Data.Array.IArray import Data.Maybe import Control.Monad import Debug.Trace @@ -37,7 +36,7 @@ import Debug.Trace ---------------------------------------------------------------------- -- main conversion function -convertConcrete :: Abstr -> Concr -> FGrammar +convertConcrete :: Abstr -> Concr -> ParserInfo convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats' where abs_defs = Map.assocs (funs abs) conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" @@ -93,14 +92,14 @@ expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns, -- replaces __NCat with _B and _Var_Cat with _. -- the temporary names are just there to avoid name collisions. -fixHoasFuns :: FGrammar -> FGrammar -fixHoasFuns (!rs, !cs) = ([FRule (fixName n) ps args cat lins | FRule n ps args cat lins <- rs], cs) +fixHoasFuns :: ParserInfo -> ParserInfo +fixHoasFuns pinfo = pinfo{functions=mkArray [FFun (fixName n) prof lins | FFun n prof lins <- elems (functions pinfo)]} where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B") | BS.pack "_Var_" `BS.isPrefixOf` n = wildCId fixName n = n -convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> FGrammar -convert abs_defs cnc_defs cat_defs = getFGrammar (List.foldl' (convertRule cnc_defs) emptyFRulesEnv srules) +convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> ParserInfo +convert abs_defs cnc_defs cat_defs = getParserInfo (List.foldl' (convertRule cnc_defs) (emptyFRulesEnv cnc_defs cat_defs) srules) where srules = [ (XRule id args res (map findLinType args) (findLinType res) term) | @@ -109,23 +108,40 @@ convert abs_defs cnc_defs cat_defs = getFGrammar (List.foldl' (convertRule cnc_d findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs) +brk :: (GrammarEnv -> GrammarEnv) -> (GrammarEnv -> GrammarEnv) +brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = + case f (GrammarEnv last_id catSet seqSet funSet crcSet IntMap.empty) of + (GrammarEnv last_id catSet seqSet funSet crcSet topdown1) -> IntMap.foldWithKey optimize (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) topdown1 + where + optimize cat ps env = IntMap.foldWithKey ff env (IntMap.fromListWith (++) [(funid,[args]) | FApply funid args <- Set.toList ps]) + where + ff :: FunId -> [[FCat]] -> GrammarEnv -> GrammarEnv + ff funid xs env + | product (map Set.size ys) == count = + case List.mapAccumL (\env c -> addFCoercion env (Set.toList c)) env ys of + (env,args) -> addProduction env cat (FApply funid args) + | otherwise = List.foldl (\env args -> addProduction env cat (FApply funid args)) env xs + where + count = length xs + ys = foldr (zipWith Set.insert) (repeat Set.empty) xs -convertRule :: TermMap -> FRulesEnv -> XRule -> FRulesEnv -convertRule cnc_defs frulesEnv (XRule fun args cat ctypes ctype term) = - foldBM addRule - frulesEnv - (convertTerm cnc_defs [] ctype term [([],[])]) - (protoFCat cnc_defs cat ctype, zipWith (protoFCat cnc_defs) args ctypes) +convertRule :: TermMap -> GrammarEnv -> XRule -> GrammarEnv +convertRule cnc_defs grammarEnv (XRule fun args cat ctypes ctype term) = trace (show fun) $ + brk (\grammarEnv -> foldBM addRule + grammarEnv + (convertTerm cnc_defs [] ctype term [([],[])]) + (protoFCat cnc_defs cat ctype, zipWith (protoFCat cnc_defs) args ctypes)) grammarEnv where addRule linRec (newCat', newArgs') env0 = - let (env1, newCat) = genFCatHead env0 newCat' - (env2, newArgs) = List.mapAccumL (genFCatArg cnc_defs) env1 newArgs' + let [newCat] = getFCats env0 newCat' + (env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs' - newLinRec = mkArray (map (mkArray . snd) linRec) - mkArray lst = listArray (0,length lst-1) lst + (env2,lins) = List.mapAccumL addFSeq env1 linRec + newLinRec = mkArray lins - rule = FRule fun [] newArgs newCat newLinRec - in addFRule env2 rule + (env3,funid) = addFFun env2 (FFun fun [[n] | n <- [0..length newArgs-1]] newLinRec) + + in addProduction env3 newCat (FApply funid newArgs) ---------------------------------------------------------------------- -- term conversion @@ -133,7 +149,7 @@ convertRule cnc_defs frulesEnv (XRule fun args cat ctypes ctype term) = type CnvMonad a = BacktrackM Env a type FPath = [FIndex] -data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)] Term +data ProtoFCat = PFCat CId [FPath] [(FPath,[FIndex])] type Env = (ProtoFCat, [ProtoFCat]) type LinRec = [(FPath, [FSymbol])] data XRule = XRule CId {- function -} @@ -144,7 +160,16 @@ data XRule = XRule CId {- function -} Term {- body -} protoFCat :: TermMap -> CId -> Term -> ProtoFCat -protoFCat cnc_defs cat ctype = PFCat cat (getRCS cnc_defs ctype) [] ctype +protoFCat cnc_defs cat ctype = + let (rcs,tcs) = loop [] [] [] ctype + in PFCat cat rcs tcs + where + loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record) + loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs) + loop path rcs tcs (S _) = (path:rcs, tcs) + loop path rcs tcs (F id) = case Map.lookup id cnc_defs of + Just term -> loop path rcs tcs term + Nothing -> error ("unknown identifier: "++show id) type TermMap = Map.Map CId Term @@ -156,11 +181,12 @@ convertTerm cnc_defs sel ctype (P term p) lins = do nr <- e convertTerm cnc_defs (nr:sel) ctype term lins convertTerm cnc_defs sel ctype (FV vars) lins = do term <- member vars convertTerm cnc_defs sel ctype term lins -convertTerm cnc_defs sel ctype (S ts) ((lbl_path,lin) : lins) = foldM (\lins t -> convertTerm cnc_defs sel ctype t lins) ((lbl_path,lin) : lins) (reverse ts) -convertTerm cnc_defs sel ctype (K (KS str)) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok str : lin) : lins) +convertTerm cnc_defs sel ctype (S ts) lins = foldM (\lins t -> convertTerm cnc_defs sel ctype t lins) lins (reverse ts) +--convertTerm cnc_defs sel ctype (K t) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok t : lin) : lins) +convertTerm cnc_defs sel ctype (K (KS t)) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok (KS t) : lin) : lins) convertTerm cnc_defs sel ctype (K (KP strs vars))((lbl_path,lin) : lins) = do toks <- member (strs:[strs' | Alt strs' _ <- vars]) - return ((lbl_path, map FSymTok toks ++ lin) : lins) + return ((lbl_path, map (FSymTok . KS) toks ++ lin) : lins) convertTerm cnc_defs sel ctype (F id) lins = do term <- Map.lookup id cnc_defs convertTerm cnc_defs sel ctype term lins convertTerm cnc_defs sel ctype (W s t) ((lbl_path,lin) : lins) = do @@ -183,8 +209,8 @@ convertArg (C max) nr path lbl_path lin lins = do return lins convertArg (S _) nr path lbl_path lin lins = do (_, args) <- readState - let PFCat cat rcs tcs _ = args !! nr - return ((lbl_path, FSymCat (index path rcs 0) nr : lin) : lins) + let PFCat cat rcs tcs = args !! nr + return ((lbl_path, FSymCat nr (index path rcs 0) : lin) : lins) where index lbl' (lbl:lbls) idx | lbl' == lbl = idx @@ -210,8 +236,11 @@ convertRec cnc_defs (index:sub_sel) ctype record lbl_path lin lins = do evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex evalTerm cnc_defs path (V nr) = do (_, args) <- readState - let PFCat _ _ _ ctype = args !! nr - unifyPType nr (reverse path) (selectTerm path ctype) + let PFCat _ _ tcs = args !! nr + rpath = reverse path + index <- member (fromMaybe (error "evalTerm: wrong path") (lookup rpath tcs)) + restrictArg nr rpath index + return index evalTerm cnc_defs path (C nr) = return nr evalTerm cnc_defs path (R record) = case path of (index:path) -> evalTerm cnc_defs path (record !! index) @@ -222,112 +251,80 @@ evalTerm cnc_defs path (F id) = do term <- Map.lookup id cnc_defs evalTerm cnc_defs path term evalTerm cnc_defs path x = error ("evalTerm ("++show x++")") -unifyPType :: FIndex -> FPath -> Term -> CnvMonad FIndex -unifyPType nr path (C max_index) = - do (_, args) <- readState - let PFCat _ _ tcs _ = args !! nr - case lookup path tcs of - Just index -> return index - Nothing -> do index <- member [0..max_index] - restrictArg nr path index - return index -unifyPType nr path t = error $ "unifyPType " ++ show t ---- AR 2/10/2007 - -selectTerm :: FPath -> Term -> Term -selectTerm [] term = term -selectTerm (index:path) (R record) = selectTerm path (record !! index) - ---------------------------------------------------------------------- --- FRulesEnv +-- GrammarEnv -data FRulesEnv = FRulesEnv {-# UNPACK #-} !Int FCatSet [FRule] -type FCatSet = Map.Map CId (Map.Map [(FPath,FIndex)] FCat) +data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production)) +type CatSet = Map.Map CId (FCat,FCat,[Int]) +type SeqSet = Map.Map FSeq SeqId +type FunSet = Map.Map FFun FunId +type CoerceSet= Map.Map [FCat] FCat -emptyFRulesEnv = FRulesEnv 0 (ins fcatString (mkCId "String") [] $ - ins fcatInt (mkCId "Int") [] $ - ins fcatFloat (mkCId "Float") [] $ - ins fcatVar (mkCId "_Var") [] $ - Map.empty) [] +emptyFRulesEnv cnc_defs lincats = + let (last_id,catSet) = Map.mapAccum computeCatRange 0 lincats + in GrammarEnv last_id catSet Map.empty Map.empty Map.empty IntMap.empty where - ins fcat cat tcs fcatSet = - Map.insertWith (\_ -> Map.insert tcs fcat) cat tmap_s fcatSet + computeCatRange index ctype = (index+size,(index,index+size-1,poly)) where - tmap_s = Map.singleton tcs fcat + (size,poly) = getMultipliers 1 [] ctype + + getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record + getMultipliers m ms (S _) = (m,ms) + getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms) + getMultipliers m ms (F id) = case Map.lookup id cnc_defs of + Just term -> getMultipliers m ms term + Nothing -> error ("unknown identifier: "++prCId id) -addFRule :: FRulesEnv -> FRule -> FRulesEnv -addFRule (FRulesEnv last_id fcatSet rules) rule = FRulesEnv last_id fcatSet (rule:rules) +addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv +addProduction (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) cat p = + GrammarEnv last_id catSet seqSet funSet crcSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet) -getFGrammar :: FRulesEnv -> FGrammar -getFGrammar (FRulesEnv last_id fcatSet rules) = (rules, Map.map Map.elems fcatSet) - -genFCatHead :: FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat) -genFCatHead env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs _) = - case Map.lookup cat fcatSet >>= Map.lookup tcs of - Just fcat -> (env, fcat) - Nothing -> let fcat = last_id+1 - in (FRulesEnv fcat (ins fcat) rules, fcat) +addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId) +addFSeq env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (_,lst) = + case Map.lookup seq seqSet of + Just id -> (env,id) + Nothing -> let !last_seq = Map.size seqSet + in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet crcSet prodSet,last_seq) where - ins fcat = Map.insertWith (\_ -> Map.insert tcs fcat) cat tmap_s fcatSet - where - tmap_s = Map.singleton tcs fcat + seq = mkArray lst -genFCatArg :: TermMap -> FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat) -genFCatArg cnc_defs env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs ctype) = - case Map.lookup cat fcatSet of - Just tmap -> case Map.lookup tcs tmap of - Just fcat -> (env, fcat) - Nothing -> ins tmap - Nothing -> ins Map.empty +addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId) +addFFun env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) fun = + case Map.lookup fun funSet of + Just id -> (env,id) + Nothing -> let !last_funid = Map.size funSet + in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) crcSet prodSet,last_funid) + +addFCoercion :: GrammarEnv -> [FCat] -> (GrammarEnv,FCat) +addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fcats = + case sub_fcats of + [fcat] -> (env,fcat) + _ -> case Map.lookup sub_fcats crcSet of + Just fcat -> (env,fcat) + Nothing -> let !fcat = last_id+1 + in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat) + +getParserInfo :: GrammarEnv -> ParserInfo +getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = + ParserInfo { functions = mkArray funSet + , sequences = mkArray seqSet + , productions = IntMap.union prodSet coercions + , startCats = Map.map (\(start,end,_) -> range (start,end)) catSet + } where - ins tmap = - let fcat = last_id+1 - (last_id1,tmap1,rules1) - = foldBM (\tcs st (last_id,tmap,rules) -> - let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap - rule = FRule wildCId [[0]] [fcat_arg] fcat - (listArray (0,length rcs-1) [listArray (0,0) [FSymCat lbl 0] | lbl <- [0..length rcs-1]]) - in if st - then (last_id1,tmap1,rule:rules) - else (last_id, tmap, rules)) - (fcat,Map.insert tcs fcat tmap,rules) - (gen_tcs ctype [] []) - False - in (FRulesEnv last_id1 (Map.insert cat tmap1 fcatSet) rules1, fcat) - where - addArg tcs last_id tmap = - case Map.lookup tcs tmap of - Just fcat -> (last_id, tmap, fcat) - Nothing -> let fcat = last_id+1 - in (fcat, Map.insert tcs fcat tmap, fcat) + mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] + + coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet] - gen_tcs :: Term -> FPath -> [(FPath,FIndex)] -> BacktrackM Bool [(FPath,FIndex)] - gen_tcs (R record) path acc = foldM (\acc (label,ctype) -> gen_tcs ctype (label:path) acc) acc (zip [0..] record) - gen_tcs (S _) path acc = return acc - gen_tcs (C max_index) path acc = - case List.lookup path tcs of - Just index -> return $! addConstraint path index acc - Nothing -> do writeState True - index <- member [0..max_index] - return $! addConstraint path index acc - where - addConstraint path0 index0 (c@(path,index) : cs) - | path0 > path = c:addConstraint path0 index0 cs - addConstraint path0 index0 cs = (path0,index0) : cs - gen_tcs (F id) path acc = case Map.lookup id cnc_defs of - Just term -> gen_tcs term path acc - Nothing -> error ("unknown identifier: "++prCId id) - - -getRCS :: TermMap -> Term -> [FPath] -getRCS cnc_defs = loop [] [] +getFCats :: GrammarEnv -> ProtoFCat -> [FCat] +getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat cat rcs tcs) = + case Map.lookup cat catSet of + Just (start,end,ms) -> reverse (solutions (variants ms tcs start) ()) where - loop path rcs (R record) = List.foldl' (\rcs (index,term) -> loop (index:path) rcs term) rcs (zip [0..] record) - loop path rcs (C i) = rcs - loop path rcs (S _) = path:rcs - loop path rcs (F id) = case Map.lookup id cnc_defs of - Just term -> loop path rcs term - Nothing -> error ("unknown identifier: "++show id) + variants _ [] fcat = return fcat + variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices + variants ms tcs ((m*index) + fcat) ------------------------------------------------------------ -- updating the MCF rule @@ -345,12 +342,14 @@ restrictHead path term writeState (head', args) restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat -restrictProtoFCat path0 index0 (PFCat cat rcs tcs ctype) = do +restrictProtoFCat path0 index0 (PFCat cat rcs tcs) = do tcs <- addConstraint tcs - return (PFCat cat rcs tcs ctype) + return (PFCat cat rcs tcs) where - addConstraint (c@(path,index) : cs) - | path0 > path = liftM (c:) (addConstraint cs) - | path0 == path = guard (index0 == index) >> - return (c : cs) - addConstraint cs = return ((path0,index0) : cs) + addConstraint [] = error "restrictProtoFCat: unknown path" + addConstraint (c@(path,indices) : tcs) + | path0 == path = guard (index0 `elem` indices) >> + return ((path,[index0]) : tcs) + | otherwise = liftM (c:) (addConstraint tcs) + +mkArray lst = listArray (0,length lst-1) lst diff --git a/src/GF/Compile/GrammarToGFCC.hs b/src/GF/Compile/GrammarToGFCC.hs index f16497a0a..e57937f52 100644 --- a/src/GF/Compile/GrammarToGFCC.hs +++ b/src/GF/Compile/GrammarToGFCC.hs @@ -7,7 +7,6 @@ import qualified GF.Compile.GenerateFCFG as FCFG import qualified GF.Compile.GeneratePMCFG as PMCFG import PGF.CId -import PGF.BuildParser (buildParserInfo) import qualified PGF.Macros as CM import qualified PGF.Data as C import qualified PGF.Data as D @@ -54,9 +53,9 @@ mkCanon2gfcc opts cnc gr = addParsers :: D.PGF -> D.PGF addParsers pgf = pgf { D.concretes = Map.map conv (D.concretes pgf) } where - conv cnc = cnc { D.parser = Just (buildParserInfo fcfg) } + conv cnc = cnc { D.parser = Just pinfo } where - fcfg + pinfo | Map.lookup (mkCId "erasing") (D.cflags cnc) == Just "on" = PMCFG.convertConcrete (D.abstract pgf) cnc | otherwise = FCFG.convertConcrete (D.abstract pgf) cnc diff --git a/src/GF/Infra/Option.hs b/src/GF/Infra/Option.hs index 48352fc91..1a62c94ae 100644 --- a/src/GF/Infra/Option.hs +++ b/src/GF/Infra/Option.hs @@ -91,7 +91,6 @@ data OutputFormat = FmtPGF | FmtEBNF | FmtRegular | FmtNoLR - | FmtFCFG | FmtSRGS_XML | FmtSRGS_XML_NonRec | FmtSRGS_ABNF @@ -497,7 +496,6 @@ outputFormats = ("ebnf", FmtEBNF), ("regular", FmtRegular), ("nolr", FmtNoLR), - ("fcfg", FmtFCFG), ("srgs_xml", FmtSRGS_XML), ("srgs_xml_nonrec", FmtSRGS_XML_NonRec), ("srgs_abnf", FmtSRGS_ABNF), diff --git a/src/GF/Speech/PGFToCFG.hs b/src/GF/Speech/PGFToCFG.hs index 018cb682e..ee778a106 100644 --- a/src/GF/Speech/PGFToCFG.hs +++ b/src/GF/Speech/PGFToCFG.hs @@ -4,21 +4,19 @@ -- -- Approximates PGF grammars with context-free grammars. ---------------------------------------------------------------------- -module GF.Speech.PGFToCFG (bnfPrinter, - fcfgPrinter, pgfToCFG) where +module GF.Speech.PGFToCFG (bnfPrinter, pgfToCFG) where import PGF.CId import PGF.Data as PGF import PGF.Macros -import GF.Data.MultiMap (MultiMap) -import qualified GF.Data.MultiMap as MultiMap import GF.Infra.Ident import GF.Speech.CFG -import Data.Array as Array +import Data.Array.IArray as Array import Data.List import Data.Map (Map) import qualified Data.Map as Map +import qualified Data.IntMap as IntMap import Data.Maybe import Data.Set (Set) import qualified Data.Set as Set @@ -29,21 +27,6 @@ bnfPrinter = toBNF id toBNF :: (CFG -> CFG) -> PGF -> CId -> String toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc --- FIXME: move this somewhere else -fcfgPrinter :: PGF -> CId -> String -fcfgPrinter pgf cnc = unlines (map showRule rules) - where - pinfo = fromMaybe (error "fcfgPrinter") (lookParser pgf cnc) - - rules :: [FRule] - rules = Array.elems (PGF.allRules pinfo) - - showRule (FRule cid ps cs fc arr) = prCId cid ++ " " ++ show ps ++ ". " ++ showCat fc ++ " ::= [" ++ concat (intersperse ", " (map showCat cs)) ++ "] = " ++ showLin arr - where - showLin arr = "[" ++ concat (intersperse ", " [ unwords (map showFSymbol (Array.elems r)) | r <- Array.elems arr]) ++ "]" - showFSymbol (FSymCat i j) = showCat (cs!!j) ++ "_" ++ show j ++ "." ++ show i - showFSymbol (FSymTok t) = t - showCat c = "C" ++ show c pgfToCFG :: PGF -> CId -- ^ Concrete syntax name @@ -52,12 +35,13 @@ pgfToCFG pgf lang = mkCFG (lookStartCat pgf) extCats (startRules ++ concatMap fr where pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang) - rules :: [FRule] - rules = Array.elems (PGF.allRules pinfo) + rules :: [(FCat,Production)] + rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.productions pinfo) + , prod <- Set.toList set] fcatCats :: Map FCat Cat fcatCats = Map.fromList [(fc, prCId c ++ "_" ++ show i) - | (c,fcs) <- Map.toList (startupCats pinfo), + | (c,fcs) <- Map.toList (startCats pinfo), (fc,i) <- zip fcs [1..]] fcatCat :: FCat -> Cat @@ -69,49 +53,61 @@ pgfToCFG pgf lang = mkCFG (lookStartCat pgf) extCats (startRules ++ concatMap fr -- gets the number of fields in the lincat for the given category catLinArity :: FCat -> Int - catLinArity c = maximum (1:[rangeSize (bounds rhs) | FRule _ _ _ _ rhs <- Map.findWithDefault [] c rulesByFCat]) + catLinArity c = maximum (1:[rangeSize (bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c]) + + topdownRules cat = f cat [] + where + f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo)) + + g (FApply funid args) rules = (functions pinfo ! funid,args) : rules + g (FCoerce cat) rules = f cat rules - rulesByFCat :: Map FCat [FRule] - rulesByFCat = Map.fromListWith (++) [(c,[r]) | r@(FRule _ _ _ c _) <- rules] extCats :: Set Cat extCats = Set.fromList $ map lhsCat startRules startRules :: [CFRule] startRules = [CFRule (prCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0) - | (c,fcs) <- Map.toList (startupCats pinfo), + | (c,fcs) <- Map.toList (startCats pinfo), fc <- fcs, not (isLiteralFCat fc), r <- [0..catLinArity fc-1]] - fruleToCFRule :: FRule -> [CFRule] - fruleToCFRule (FRule f ps args c rhs) = + fruleToCFRule :: (FCat,Production) -> [CFRule] + fruleToCFRule (c,FApply funid args) = [CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps)) - | (l,row) <- Array.assocs rhs, not (containsLiterals row)] + | (l,seqid) <- Array.assocs rhs + , let row = sequences pinfo ! seqid + , not (containsLiterals row)] where + FFun f ps rhs = functions pinfo ! funid + mkRhs :: Array FPointPos FSymbol -> [CFSymbol] mkRhs = map fsymbolToSymbol . Array.elems containsLiterals :: Array FPointPos FSymbol -> Bool - containsLiterals row = any isLiteralFCat [args!!n | FSymCat _ n <- Array.elems row] + containsLiterals row = any isLiteralFCat [args!!n | FSymCat n _ <- Array.elems row] fsymbolToSymbol :: FSymbol -> CFSymbol - fsymbolToSymbol (FSymCat l n) = NonTerminal (fcatToCat (args!!n) l) - fsymbolToSymbol (FSymTok t) = Terminal t + fsymbolToSymbol (FSymCat n l) = NonTerminal (fcatToCat (args!!n) l) + fsymbolToSymbol (FSymTok (KS t)) = Terminal t fixProfile :: Array FPointPos FSymbol -> Profile -> Profile fixProfile row = concatMap positions where - nts = zip [0..] [nt | nt@(FSymCat _ _) <- Array.elems row ] - positions i = [k | (k,FSymCat _ j) <- nts, j == i] + nts = zip [0..] [nt | nt@(FSymCat _ _) <- Array.elems row] + positions i = [k | (k,FSymCat j _) <- nts, j == i] profilesToTerm :: [Profile] -> CFTerm - profilesToTerm [[n]] | f == wildCId = CFRes n profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps) where (argTypes,_) = catSkeleton $ lookType pgf f profileToTerm :: CId -> Profile -> CFTerm profileToTerm t [] = CFMeta t profileToTerm _ xs = CFRes (last xs) -- FIXME: unify + fruleToCFRule (c,FCoerce c') = + [CFRule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0) + | l <- [0..catLinArity c-1]] + isLiteralFCat :: FCat -> Bool isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar]) diff --git a/src/GFI.hs b/src/GFI.hs index 03fbb184f..1e9cfba2f 100644 --- a/src/GFI.hs +++ b/src/GFI.hs @@ -25,6 +25,7 @@ import qualified Text.ParserCombinators.ReadP as RP import System.Cmd import System.CPUTime import Control.Exception +import Control.Monad import Data.Version import GF.System.Signal --import System.IO.Error (try) @@ -203,9 +204,10 @@ wordCompletion gfenv line0 prefix0 p = -> do mb_state0 <- try (evaluate (initState pgf (optLang opts) (optCat opts))) case mb_state0 of Right state0 -> let ws = words (take (length s - length prefix) s) - state = foldl nextState state0 ws - compls = getCompletions state prefix - in ret ' ' (map (encode gfenv) (Map.keys compls)) + in case foldM nextState state0 ws of + Nothing -> ret ' ' [] + Just state -> let compls = getCompletions state prefix + in ret ' ' (map (encode gfenv) (Map.keys compls)) Left _ -> ret ' ' [] CmplOpt (Just (Command n _ _)) pref -> case Map.lookup n (commands cmdEnv) of diff --git a/src/PGF.hs b/src/PGF.hs index c078bf3bf..347b52ccc 100644 --- a/src/PGF.hs +++ b/src/PGF.hs @@ -77,6 +77,7 @@ import Data.Char import qualified Data.Map as Map import Data.Maybe import System.Random (newStdGen) +import Control.Monad --------------------------------------------------- -- Interface @@ -211,7 +212,7 @@ parse pgf lang cat s = Just cnc -> case parser cnc of Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on" then Incremental.parse pinfo (mkCId cat) (words s) - else case parseFCFG "bottomup" pinfo (mkCId cat) (words s) of + else case parseFCFG "topdown" pinfo (mkCId cat) (words s) of Ok x -> x Bad s -> error s Nothing -> error ("No parser built for language: " ++ lang) @@ -259,9 +260,10 @@ startCat pgf = lookStartCat pgf complete pgf from cat input = let (ws,prefix) = tokensAndPrefix input state0 = initState pgf from cat - state = foldl Incremental.nextState state0 ws - compls = Incremental.getCompletions state prefix - in [unwords (ws++[c]) ++ " " | c <- Map.keys compls] + in case foldM Incremental.nextState state0 ws of + Nothing -> [] + Just state -> let compls = Incremental.getCompletions state prefix + in [unwords (ws++[c]) ++ " " | c <- Map.keys compls] where tokensAndPrefix :: String -> ([String],String) tokensAndPrefix s | not (null s) && isSpace (last s) = (words s, "") diff --git a/src/PGF/BuildParser.hs b/src/PGF/BuildParser.hs index 9dfab3130..1603a3dab 100644 --- a/src/PGF/BuildParser.hs +++ b/src/PGF/BuildParser.hs @@ -15,50 +15,62 @@ import PGF.CId import PGF.Data import PGF.Parsing.FCFG.Utilities -import Data.Array +import Data.Array.IArray import Data.Maybe +import qualified Data.IntMap as IntMap import qualified Data.Map as Map import qualified Data.Set as Set import Debug.Trace +data ParserInfoEx + = ParserInfoEx { epsilonRules :: [(FunId,[FCat],FCat)] + , leftcornerCats :: Assoc FCat [(FunId,[FCat],FCat)] + , leftcornerTokens :: Assoc String [(FunId,[FCat],FCat)] + , grammarToks :: [String] + } + ------------------------------------------------------------ -- parser information -getLeftCornerTok (FRule _ _ _ _ lins) +getLeftCornerTok pinfo (FFun _ _ lins) | inRange (bounds syms) 0 = case syms ! 0 of - FSymTok tok -> [tok] + FSymTok (KS tok) -> [tok] + _ -> [] + | otherwise = [] + where + syms = (sequences pinfo) ! (lins ! 0) + +getLeftCornerCat pinfo args (FFun _ _ lins) + | inRange (bounds syms) 0 = case syms ! 0 of + FSymCat d _ -> let cat = args !! d + in case IntMap.lookup cat (productions pinfo) of + Just set -> cat : [cat' | FCoerce cat' <- Set.toList set] + Nothing -> [cat] _ -> [] | otherwise = [] where - syms = lins ! 0 + syms = (sequences pinfo) ! (lins ! 0) -getLeftCornerCat (FRule _ _ args _ lins) - | inRange (bounds syms) 0 = case syms ! 0 of - FSymCat _ d -> [args !! d] - _ -> [] - | otherwise = [] - where - syms = lins ! 0 +buildParserInfo :: ParserInfo -> ParserInfoEx +buildParserInfo pinfo = + ParserInfoEx { epsilonRules = epsilonrules + , leftcornerCats = leftcorncats + , leftcornerTokens = leftcorntoks + , grammarToks = grammartoks + } -buildParserInfo :: FGrammar -> ParserInfo -buildParserInfo (grammar,startup) = -- trace (unlines [prt (x,Set.toList set) | (x,set) <- Map.toList leftcornFilter]) $ - ParserInfo { allRules = allrules - , topdownRules = topdownrules - -- , emptyRules = emptyrules - , epsilonRules = epsilonrules - , leftcornerCats = leftcorncats - , leftcornerTokens = leftcorntoks - , grammarCats = grammarcats - , grammarToks = grammartoks - , startupCats = startup - } - - where allrules = listArray (0,length grammar-1) grammar - topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule _ _ _ cat _) <- assocs allrules] - epsilonrules = [ ruleid | (ruleid, FRule _ _ _ _ lins) <- assocs allrules, - not (inRange (bounds (lins ! 0)) 0) ] - leftcorncats = accumAssoc id [ (cat, ruleid) | (ruleid, rule) <- assocs allrules, cat <- getLeftCornerCat rule ] - leftcorntoks = accumAssoc id [ (tok, ruleid) | (ruleid, rule) <- assocs allrules, tok <- getLeftCornerTok rule ] - grammarcats = aElems topdownrules - grammartoks = nubsort [t | (FRule _ _ _ _ lins) <- grammar, lin <- elems lins, FSymTok t <- elems lin] + where epsilonrules = [ (ruleid,args,cat) + | (cat,set) <- IntMap.toList (productions pinfo) + , (FApply ruleid args) <- Set.toList set + , let (FFun _ _ lins) = (functions pinfo) ! ruleid + , not (inRange (bounds ((sequences pinfo) ! (lins ! 0))) 0) ] + leftcorncats = accumAssoc id [ (cat', (ruleid, args, cat)) + | (cat,set) <- IntMap.toList (productions pinfo) + , (FApply ruleid args) <- Set.toList set + , cat' <- getLeftCornerCat pinfo args ((functions pinfo) ! ruleid) ] + leftcorntoks = accumAssoc id [ (tok, (ruleid, args, cat)) + | (cat,set) <- IntMap.toList (productions pinfo) + , (FApply ruleid args) <- Set.toList set + , tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ] + grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymTok (KS t) <- elems lin] diff --git a/src/PGF/Data.hs b/src/PGF/Data.hs index 7f240dc4c..76659912c 100644 --- a/src/PGF/Data.hs +++ b/src/PGF/Data.hs @@ -2,11 +2,13 @@ module PGF.Data where import PGF.CId import GF.Text.UTF8 -import GF.Data.Assoc import qualified Data.Map as Map +import qualified Data.Set as Set +import qualified Data.IntMap as IntMap import Data.List import Data.Array +import Data.Array.Unboxed -- internal datatypes for PGF @@ -108,32 +110,28 @@ data Equation = deriving (Eq,Ord,Show) -type FToken = String type FCat = Int type FIndex = Int -data FSymbol - = FSymCat {-# UNPACK #-} !FIndex {-# UNPACK #-} !Int - | FSymTok FToken -type Profile = [Int] type FPointPos = Int -type FGrammar = ([FRule], Map.Map CId [FCat]) -data FRule = FRule CId [Profile] [FCat] FCat (Array FIndex (Array FPointPos FSymbol)) - -type RuleId = Int +data FSymbol + = FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex + | FSymTok Tokn + deriving (Eq,Ord,Show) +type Profile = [Int] +data Production + = FApply {-# UNPACK #-} !FunId [FCat] + | FCoerce {-# UNPACK #-} !FCat + deriving (Eq,Ord,Show) +data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show) +type FSeq = Array FPointPos FSymbol +type FunId = Int +type SeqId = Int data ParserInfo - = ParserInfo { allRules :: Array RuleId FRule - , topdownRules :: Assoc FCat [RuleId] - -- ^ used in 'GF.Parsing.MCFG.Active' (Earley): - -- , emptyRules :: [RuleId] - , epsilonRules :: [RuleId] - -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): - , leftcornerCats :: Assoc FCat [RuleId] - , leftcornerTokens :: Assoc FToken [RuleId] - -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): - , grammarCats :: [FCat] - , grammarToks :: [FToken] - , startupCats :: Map.Map CId [FCat] + = ParserInfo { functions :: Array FunId FFun + , sequences :: Array SeqId FSeq + , productions :: IntMap.IntMap (Set.Set Production) + , startCats :: Map.Map CId [FCat] } diff --git a/src/PGF/Macros.hs b/src/PGF/Macros.hs index 4c73817dc..9218fcb8f 100644 --- a/src/PGF/Macros.hs +++ b/src/PGF/Macros.hs @@ -49,12 +49,6 @@ lookValCat pgf = valCat . lookType pgf lookParser :: PGF -> CId -> Maybe ParserInfo lookParser pgf lang = Map.lookup lang (concretes pgf) >>= parser -lookFCFG :: PGF -> CId -> Maybe FGrammar -lookFCFG pgf lang = fmap toFGrammar $ lookParser pgf lang - where - toFGrammar :: ParserInfo -> FGrammar - toFGrammar pinfo = (Array.elems (allRules pinfo), startupCats pinfo) - lookStartCat :: PGF -> String lookStartCat pgf = fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat")) [gflags pgf, aflags (abstract pgf)] diff --git a/src/PGF/Parsing/FCFG.hs b/src/PGF/Parsing/FCFG.hs index 4ca6a956a..fe56f8712 100644 --- a/src/PGF/Parsing/FCFG.hs +++ b/src/PGF/Parsing/FCFG.hs @@ -8,7 +8,7 @@ ----------------------------------------------------------------------------- module PGF.Parsing.FCFG - (buildParserInfo,ParserInfo,parseFCFG) where + (ParserInfo,parseFCFG) where import GF.Data.ErrM import GF.Data.Assoc @@ -17,7 +17,6 @@ import GF.Data.SortedList import PGF.CId import PGF.Data import PGF.Macros -import PGF.BuildParser import PGF.Parsing.FCFG.Utilities import qualified PGF.Parsing.FCFG.Active as Active import qualified PGF.Parsing.FCFG.Incremental as Incremental diff --git a/src/PGF/Parsing/FCFG/Active.hs b/src/PGF/Parsing/FCFG/Active.hs index 4386bfdd1..0927a719b 100644 --- a/src/PGF/Parsing/FCFG/Active.hs +++ b/src/PGF/Parsing/FCFG/Active.hs @@ -17,17 +17,22 @@ import qualified GF.Data.MultiMap as MM import PGF.CId import PGF.Data import PGF.Parsing.FCFG.Utilities +import PGF.BuildParser import Control.Monad (guard) import qualified Data.List as List import qualified Data.Map as Map +import qualified Data.IntMap as IntMap import qualified Data.Set as Set -import Data.Array +import Data.Array.IArray +import Debug.Trace ---------------------------------------------------------------------- -- * parsing +type FToken = String + makeFinalEdge cat 0 0 = (cat, [EmptyRange]) makeFinalEdge cat i j = (cat, [makeRange i j]) @@ -36,77 +41,79 @@ parse :: String -> ParserInfo -> CId -> [FToken] -> [Tree] parse strategy pinfo start toks = nubsort $ filteredForests >>= forest2trees where inTokens = input toks - starts = Map.findWithDefault [] start (startupCats pinfo) + starts = Map.findWithDefault [] start (startCats pinfo) schart = xchart2syntaxchart chart pinfo (i,j) = inputBounds inTokens finalEdges = [makeFinalEdge cat i j | cat <- starts] forests = chart2forests schart (const False) finalEdges filteredForests = forests >>= applyProfileToForest - chart = process strategy pinfo inTokens axioms emptyXChart - axioms | isBU strategy = literals pinfo inTokens ++ initialBU pinfo inTokens - | isTD strategy = literals pinfo inTokens ++ initialTD pinfo starts inTokens + pinfoex = buildParserInfo pinfo + + chart = process strategy pinfo pinfoex inTokens axioms emptyXChart + axioms | isBU strategy = literals pinfoex inTokens ++ initialBU pinfo pinfoex inTokens + | isTD strategy = literals pinfoex inTokens ++ initialTD pinfo starts inTokens isBU s = s=="b" isTD s = s=="t" -- used in prediction -emptyChildren :: RuleId -> ParserInfo -> SyntaxNode RuleId RangeRec -emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) []) - where - FRule _ _ rhs _ _ = allRules pinfo ! ruleid +emptyChildren :: FunId -> [FCat] -> SyntaxNode FunId RangeRec +emptyChildren ruleid args = SNode ruleid (replicate (length args) []) -process :: String -> ParserInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat -process strategy pinfo toks [] chart = chart -process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart + +process :: String -> ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -> XChart FCat -> XChart FCat +process strategy pinfo pinfoex toks [] chart = chart +process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo pinfoex toks items $! univRule item chart where - univRule cat item@(Active found rng lbl ppos node@(SNode ruleid recs)) chart + univRule item@(Active found rng lbl ppos node@(SNode ruleid recs) args cat) chart | inRange (bounds lin) ppos = case lin ! ppos of - FSymCat r d -> let c = args !! d + FSymCat d r -> let c = args !! d in case recs !! d of [] -> case insertXChart chart item c of Nothing -> chart - Just chart -> let items = do item@(Final found' _) <- lookupXChartFinal chart c + Just chart -> let items = do item@(Final found' _ _ _) <- lookupXChartFinal chart c rng <- concatRange rng (found' !! r) - return (c, Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs))) + return (Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)) args cat) ++ do guard (isTD strategy) - ruleid <- topdownRules pinfo ? c - return (c, Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo)) - in process strategy pinfo toks items chart + (ruleid,args) <- topdownRules pinfo c + return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args c) + in process strategy pinfo pinfoex toks items chart found' -> let items = do rng <- concatRange rng (found' !! r) - return (c, Active found rng lbl (ppos+1) node) - in process strategy pinfo toks items chart - FSymTok tok -> let items = do t_rng <- inputToken toks ? tok + return (Active found rng lbl (ppos+1) node args cat) + in process strategy pinfo pinfoex toks items chart + FSymTok (KS tok) + -> let items = do t_rng <- inputToken toks ? tok rng' <- concatRange rng t_rng - return (cat, Active found rng' lbl (ppos+1) node) - in process strategy pinfo toks items chart + return (Active found rng' lbl (ppos+1) node args cat) + in process strategy pinfo pinfoex toks items chart | otherwise = if inRange (bounds lins) (lbl+1) - then univRule cat (Active (rng:found) EmptyRange (lbl+1) 0 node) chart - else univRule cat (Final (reverse (rng:found)) node) chart + then univRule (Active (rng:found) EmptyRange (lbl+1) 0 node args cat) chart + else univRule (Final (reverse (rng:found)) node args cat) chart where - (FRule _ _ args cat lins) = allRules pinfo ! ruleid - lin = lins ! lbl - univRule cat item@(Final found' node) chart = + (FFun _ _ lins) = functions pinfo ! ruleid + lin = sequences pinfo ! (lins ! lbl) + univRule item@(Final found' node args cat) chart = case insertXChart chart item cat of Nothing -> chart - Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _)) <- lookupXChartAct chart cat - let FRule _ _ args _ lins = allRules pinfo ! ruleid - FSymCat r d = lins ! l ! ppos + Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _) args c) <- lookupXChartAct chart cat + let FFun _ _ lins = functions pinfo ! ruleid + FSymCat d r = (sequences pinfo ! (lins ! l)) ! ppos rng <- concatRange rng (found' !! r) - return (args !! d, Active found rng l (ppos+1) (updateChildren node d found')) + return (Active found rng l (ppos+1) (updateChildren node d found') args c) ++ do guard (isBU strategy) - ruleid <- leftcornerCats pinfo ? cat - let FRule _ _ args _ lins = allRules pinfo ! ruleid - FSymCat r d = lins ! 0 ! 0 - return (args !! d, Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid pinfo) d found')) + (ruleid,args,c) <- leftcornerCats pinfoex ? cat + let FFun _ _ lins = functions pinfo ! ruleid + FSymCat d r = (sequences pinfo ! (lins ! 0)) ! 0 + return (Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid args) d found') args c) - updateChildren :: SyntaxNode RuleId RangeRec -> Int -> RangeRec -> SyntaxNode RuleId RangeRec + updateChildren :: SyntaxNode FunId RangeRec -> Int -> RangeRec -> SyntaxNode FunId RangeRec updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs - in process strategy pinfo toks items chart + in process strategy pinfo pinfoex toks items chart ---------------------------------------------------------------------- -- * XChart @@ -116,21 +123,23 @@ data Item Range {-# UNPACK #-} !FIndex {-# UNPACK #-} !FPointPos - (SyntaxNode RuleId RangeRec) - | Final RangeRec (SyntaxNode RuleId RangeRec) - deriving (Eq, Ord) + (SyntaxNode FunId RangeRec) + [FCat] + FCat + | Final RangeRec (SyntaxNode FunId RangeRec) [FCat] FCat + deriving (Eq, Ord, Show) data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item) emptyXChart :: Ord c => XChart c emptyXChart = XChart MM.empty MM.empty -insertXChart (XChart actives finals) item@(Active _ _ _ _ _) c = +insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _ _) c = case MM.insert' c item actives of Nothing -> Nothing Just actives -> Just (XChart actives finals) -insertXChart (XChart actives finals) item@(Final _ _) c = +insertXChart (XChart actives finals) item@(Final _ _ _ _) c = case MM.insert' c item finals of Nothing -> Nothing Just finals -> Just (XChart actives finals) @@ -142,17 +151,17 @@ xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) ( xchart2syntaxchart (XChart actives finals) pinfo = accumAssoc groupSyntaxNodes $ [ case node of - SNode ruleid rrecs -> let FRule fun prof rhs cat _ = allRules pinfo ! ruleid + SNode ruleid rrecs -> let FFun fun prof _ = functions pinfo ! ruleid in ((cat,found), SNode (fun,prof) (zip rhs rrecs)) SString s -> ((cat,found), SString s) SInt n -> ((cat,found), SInt n) SFloat f -> ((cat,found), SFloat f) - | (cat, Final found node) <- MM.toList finals + | (Final found node rhs cat) <- MM.elems finals ] -literals :: ParserInfo -> Input FToken -> [(FCat,Item)] -literals pinfo toks = - [let (c,node) = lexer t in (c,Final [rng] node) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfo)] +literals :: ParserInfoEx -> Input FToken -> [Item] +literals pinfoex toks = + [let (c,node) = lexer t in (Final [rng] node [] c) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfoex)] where lexer t = case reads t of @@ -166,24 +175,30 @@ literals pinfo toks = -- Earley -- -- called with all starting categories -initialTD :: ParserInfo -> [FCat] -> Input FToken -> [(FCat,Item)] +initialTD :: ParserInfo -> [FCat] -> Input FToken -> [Item] initialTD pinfo starts toks = do cat <- starts - ruleid <- topdownRules pinfo ? cat - return (cat,Active [] (Range 0 0) 0 0 (emptyChildren ruleid pinfo)) + (ruleid,args) <- topdownRules pinfo cat + return (Active [] (Range 0 0) 0 0 (emptyChildren ruleid args) args cat) + +topdownRules pinfo cat = f cat [] + where + f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo)) + + g (FApply ruleid args) rules = (ruleid,args) : rules + g (FCoerce cat) rules = f cat rules ---------------------------------------------------------------------- -- Kilbury -- -initialBU :: ParserInfo -> Input FToken -> [(FCat,Item)] -initialBU pinfo toks = +initialBU :: ParserInfo -> ParserInfoEx -> Input FToken -> [Item] +initialBU pinfo pinfoex toks = do (tok,rngs) <- aAssocs (inputToken toks) - ruleid <- leftcornerTokens pinfo ? tok - let FRule _ _ _ cat _ = allRules pinfo ! ruleid + (ruleid,args,cat) <- leftcornerTokens pinfoex ? tok rng <- rngs - return (cat,Active [] rng 0 1 (emptyChildren ruleid pinfo)) + return (Active [] rng 0 1 (emptyChildren ruleid args) args cat) ++ - do ruleid <- epsilonRules pinfo - let FRule _ _ _ cat _ = allRules pinfo ! ruleid - return (cat,Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo)) + do (ruleid,args,cat) <- epsilonRules pinfoex + let FFun _ _ _ = functions pinfo ! ruleid + return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args cat) diff --git a/src/PGF/Parsing/FCFG/Incremental.hs b/src/PGF/Parsing/FCFG/Incremental.hs index 23b0424cc..4f35ed169 100644 --- a/src/PGF/Parsing/FCFG/Incremental.hs +++ b/src/PGF/Parsing/FCFG/Incremental.hs @@ -8,55 +8,54 @@ module PGF.Parsing.FCFG.Incremental , parse ) where -import Data.Array +import Data.Array.IArray import Data.Array.Base (unsafeAt) import Data.List (isPrefixOf, foldl') -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, maybe) import qualified Data.Map as Map import qualified Data.IntMap as IntMap import qualified Data.Set as Set import Control.Monad -import GF.Data.Assoc import GF.Data.SortedList -import qualified GF.Data.MultiMap as MM import PGF.CId import PGF.Data -import PGF.Parsing.FCFG.Utilities import Debug.Trace -parse :: ParserInfo -> CId -> [FToken] -> [Tree] -parse pinfo start toks = extractExps (foldl' nextState (initState pinfo start) toks) start +parse :: ParserInfo -> CId -> [String] -> [Tree] +parse pinfo start toks = maybe [] (\ps -> extractExps ps start) (foldM nextState (initState pinfo start) toks) initState :: ParserInfo -> CId -> ParseState initState pinfo start = let items = do - c <- Map.findWithDefault [] start (startupCats pinfo) - ruleid <- topdownRules pinfo ? c - let (FRule fn _ args cat lins) = allRules pinfo ! ruleid - lbl <- indices lins - return (Active 0 lbl 0 ruleid args cat) + cat <- fromMaybe [] (Map.lookup start (startCats pinfo)) + (funid,args) <- foldForest (\funid args -> (:) (funid,args)) [] cat (productions pinfo) + let FFun fn _ lins = functions pinfo ! funid + (lbl,seqid) <- assocs lins + return (Active 0 0 funid seqid args (AK cat lbl)) - forest = IntMap.fromListWith Set.union [(cat, Set.singleton (Passive ruleid args)) | (ruleid, FRule _ _ args cat _) <- assocs (allRules pinfo)] - - max_fid = maximum (0:[maximum (cat:args) | (ruleid, FRule _ _ args cat _) <- assocs (allRules pinfo)])+1 + max_fid = maximum (0:[maximum (cat:args) | (cat, set) <- IntMap.toList (productions pinfo) + , p <- Set.toList set + , let args = case p of {FApply _ args -> args; FCoerce cat -> [cat]}])+1 in State pinfo - (Chart MM.empty [] Map.empty forest max_fid 0) + (Chart emptyAC [] emptyPC (productions pinfo) max_fid 0) (Set.fromList items) -- | From the current state and the next token -- 'nextState' computes a new state where the token -- is consumed and the current position shifted by one. -nextState :: ParseState -> String -> ParseState +nextState :: ParseState -> String -> Maybe ParseState nextState (State pinfo chart items) t = - let (items1,chart1) = process add (allRules pinfo) (Set.toList items) (Set.empty,chart) - chart2 = chart1{ active =MM.empty + let (items1,chart1) = process add (sequences pinfo) (functions pinfo) (Set.toList items) Set.empty chart + chart2 = chart1{ active =emptyAC , actives=active chart1 : actives chart1 - , passive=Map.empty + , passive=emptyPC , offset =offset chart1+1 } - in State pinfo chart2 items1 + in if Set.null items1 + then Nothing + else Just (State pinfo chart2 items1) where add tok item set | tok == t = Set.insert item set @@ -68,107 +67,157 @@ nextState (State pinfo chart items) t = -- the GF interpreter. getCompletions :: ParseState -> String -> Map.Map String ParseState getCompletions (State pinfo chart items) w = - let (map',chart1) = process add (allRules pinfo) (Set.toList items) (MM.empty,chart) - chart2 = chart1{ active =MM.empty + let (map',chart1) = process add (sequences pinfo) (functions pinfo) (Set.toList items) Map.empty chart + chart2 = chart1{ active =emptyAC , actives=active chart1 : actives chart1 - , passive=Map.empty + , passive=emptyPC , offset =offset chart1+1 } in fmap (State pinfo chart2) map' where add tok item map - | isPrefixOf w tok = fromMaybe map (MM.insert' tok item map) + | isPrefixOf w tok = Map.insertWith Set.union tok (Set.singleton item) map | otherwise = map extractExps :: ParseState -> CId -> [Tree] extractExps (State pinfo chart items) start = exps where - (_,st) = process (\_ _ -> id) (allRules pinfo) (Set.toList items) ((),chart) + (_,st) = process (\_ _ -> id) (sequences pinfo) (functions pinfo) (Set.toList items) () chart exps = nubsort $ do - c <- Map.findWithDefault [] start (startupCats pinfo) - ruleid <- topdownRules pinfo ? c - let (FRule fn _ args cat lins) = allRules pinfo ! ruleid + cat <- fromMaybe [] (Map.lookup start (startCats pinfo)) + (funid,args) <- foldForest (\funid args -> (:) (funid,args)) [] cat (productions pinfo) + let FFun fn _ lins = functions pinfo ! funid lbl <- indices lins - fid <- Map.lookup (PK c lbl 0) (passive st) + Just fid <- [lookupPC (PK cat lbl 0) (passive st)] go Set.empty fid - go rec fid - | Set.member fid rec = mzero - | otherwise = do set <- IntMap.lookup fid (forest st) - Passive ruleid args <- Set.toList set - let (FRule fn _ _ cat lins) = allRules pinfo ! ruleid - if fn == wildCId - then go (Set.insert fid rec) (head args) - else do args <- mapM (go (Set.insert fid rec)) args - return (Fun fn args) + go rec fcat + | Set.member fcat rec = mzero + | otherwise = do (funid,args) <- foldForest (\funid args -> (:) (funid,args)) [] fcat (forest st) + let FFun fn _ lins = functions pinfo ! funid + args <- mapM (go (Set.insert fcat rec)) args + return (Fun fn args) -process fn !rules [] acc_chart = acc_chart -process fn !rules (item:items) acc_chart = univRule item acc_chart +process fn !seqs !funs [] acc chart = (acc,chart) +process fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart + | inRange (bounds lin) ppos = + case unsafeAt lin ppos of + FSymCat d r -> let !fid = args !! d + key = AK fid r + + items2 = case lookupPC (mkPK key k) (passive chart) of + Nothing -> items + Just id -> (Active j (ppos+1) funid seqid (updateAt d id args) key0) : items + items3 = foldForest (\funid args -> (:) (Active k 0 funid (rhs funid r) args key)) items2 fid (forest chart) + in case lookupAC key (active chart) of + Nothing -> process fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)} + Just set | Set.member item set -> process fn seqs funs items acc chart + | otherwise -> process fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)} + FSymTok (KS tok) -> let !acc' = fn tok (Active j (ppos+1) funid seqid args key0) acc + in process fn seqs funs items acc' chart + | otherwise = + case lookupPC (mkPK key0 j) (passive chart) of + Nothing -> let fid = nextId chart + + items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of + Nothing -> items + Just set -> Set.fold (\(Active j' ppos funid seqid args keyc) -> + let FSymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos + in (:) (Active j' (ppos+1) funid seqid (updateAt d fid args) keyc)) items set + in process fn seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart) + ,forest =IntMap.insert fid (Set.singleton (FApply funid args)) (forest chart) + ,nextId =nextId chart+1 + } + Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items + in process fn seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (FApply funid args)) (forest chart)} where - univRule (Active j lbl ppos ruleid args fid0) acc_chart@(acc,chart) - | inRange (bounds lin) ppos = - case unsafeAt lin ppos of - FSymCat r d -> let !fid = args !! d - in case MM.insert' (AK fid r) item (active chart) of - Nothing -> process fn rules items $ acc_chart - Just actCat -> (case Map.lookup (PK fid r k) (passive chart) of - Nothing -> id - Just id -> process fn rules [Active j lbl (ppos+1) ruleid (updateAt d id args) fid0]) $ - (case IntMap.lookup fid (forest chart) of - Nothing -> id - Just set -> process fn rules (Set.fold (\(Passive ruleid args) -> (:) (Active k r 0 ruleid args fid)) [] set)) $ - process fn rules items $ - (acc,chart{active=actCat}) - FSymTok tok -> process fn rules items $ - (fn tok (Active j lbl (ppos+1) ruleid args fid0) acc,chart) - | otherwise = case Map.lookup (PK fid0 lbl j) (passive chart) of - Nothing -> let fid = nextId chart - in process fn rules [Active j' lbl (ppos+1) ruleid (updateAt d fid args) fidc - | Active j' lbl ppos ruleid args fidc <- ((active chart:actives chart) !! (k-j)) MM.! (AK fid0 lbl), - let FSymCat _ d = unsafeAt (rhs ruleid lbl) ppos] $ - process fn rules items $ - (acc,chart{passive=Map.insert (PK fid0 lbl j) fid (passive chart) - ,forest =IntMap.insert fid (Set.singleton (Passive ruleid args)) (forest chart) - ,nextId =nextId chart+1 - }) - Just id -> process fn rules items $ - (acc,chart{forest = IntMap.insertWith Set.union id (Set.singleton (Passive ruleid args)) (forest chart)}) - where - !lin = rhs ruleid lbl - !k = offset chart + !lin = unsafeAt seqs seqid + !k = offset chart - rhs ruleid lbl = unsafeAt lins lbl + mkPK (AK fid lbl) j = PK fid lbl j + + rhs funid lbl = unsafeAt lins lbl where - (FRule _ _ _ cat lins) = unsafeAt rules ruleid + FFun _ _ lins = unsafeAt funs funid updateAt :: Int -> a -> [a] -> [a] updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs] +---------------------------------------------------------------- +-- Active Chart +---------------------------------------------------------------- + data Active = Active {-# UNPACK #-} !Int - {-# UNPACK #-} !FIndex {-# UNPACK #-} !FPointPos - {-# UNPACK #-} !RuleId + {-# UNPACK #-} !FunId + {-# UNPACK #-} !SeqId [FCat] - {-# UNPACK #-} !FCat + {-# UNPACK #-} !ActiveKey deriving (Eq,Show,Ord) -data Passive - = Passive {-# UNPACK #-} !RuleId - [FCat] - deriving (Eq,Ord,Show) - data ActiveKey = AK {-# UNPACK #-} !FCat {-# UNPACK #-} !FIndex deriving (Eq,Ord,Show) +type ActiveChart = IntMap.IntMap (IntMap.IntMap (Set.Set Active)) + +emptyAC :: ActiveChart +emptyAC = IntMap.empty + +lookupAC :: ActiveKey -> ActiveChart -> Maybe (Set.Set Active) +lookupAC (AK fcat l) chart = IntMap.lookup fcat chart >>= IntMap.lookup l + +labelsAC :: FCat -> ActiveChart -> [FIndex] +labelsAC fcat chart = + case IntMap.lookup fcat chart of + Nothing -> [] + Just map -> IntMap.keys map + +insertAC :: ActiveKey -> Set.Set Active -> ActiveChart -> ActiveChart +insertAC (AK fcat l) set chart = IntMap.insertWith IntMap.union fcat (IntMap.singleton l set) chart + + +---------------------------------------------------------------- +-- Passive Chart +---------------------------------------------------------------- + data PassiveKey = PK {-# UNPACK #-} !FCat {-# UNPACK #-} !FIndex {-# UNPACK #-} !Int deriving (Eq,Ord,Show) +type PassiveChart = Map.Map PassiveKey FCat + +emptyPC :: PassiveChart +emptyPC = Map.empty + +lookupPC :: PassiveKey -> PassiveChart -> Maybe FCat +lookupPC key chart = Map.lookup key chart + +insertPC :: PassiveKey -> FCat -> PassiveChart -> PassiveChart +insertPC key fcat chart = Map.insert key fcat chart + + +---------------------------------------------------------------- +-- Forest +---------------------------------------------------------------- + +foldForest :: (FunId -> [FCat] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b +foldForest f b fcat forest = + case IntMap.lookup fcat forest of + Nothing -> b + Just set -> Set.fold foldPassive b set + where + foldPassive (FCoerce fcat) b = foldForest f b fcat forest + foldPassive (FApply funid args) b = f funid args b + + +---------------------------------------------------------------- +-- Parse State +---------------------------------------------------------------- -- | An abstract data type whose values represent -- the current state in an incremental parser. @@ -176,10 +225,11 @@ data ParseState = State ParserInfo Chart (Set.Set Active) data Chart = Chart - { active :: MM.MultiMap ActiveKey Active - , actives :: [MM.MultiMap ActiveKey Active] - , passive :: Map.Map PassiveKey FCat - , forest :: IntMap.IntMap (Set.Set Passive) + { active :: ActiveChart + , actives :: [ActiveChart] + , passive :: PassiveChart + , forest :: IntMap.IntMap (Set.Set Production) , nextId :: {-# UNPACK #-} !FCat , offset :: {-# UNPACK #-} !Int } + deriving Show diff --git a/src/PGF/Parsing/FCFG/Utilities.hs b/src/PGF/Parsing/FCFG/Utilities.hs index 4187d0f24..6a2c13c0a 100644 --- a/src/PGF/Parsing/FCFG/Utilities.hs +++ b/src/PGF/Parsing/FCFG/Utilities.hs @@ -31,7 +31,7 @@ type RangeRec = [Range] data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int | EmptyRange - deriving (Eq, Ord) + deriving (Eq, Ord, Show) makeRange :: Int -> Int -> Range makeRange = Range @@ -83,7 +83,7 @@ data SyntaxNode n e = SMeta | SString String | SInt Integer | SFloat Double - deriving (Eq,Ord) + deriving (Eq,Ord,Show) groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]] groupSyntaxNodes [] = [] diff --git a/src/PGF/Raw/Convert.hs b/src/PGF/Raw/Convert.hs index 0c9338012..2912bced1 100644 --- a/src/PGF/Raw/Convert.hs +++ b/src/PGF/Raw/Convert.hs @@ -3,13 +3,12 @@ module PGF.Raw.Convert (toPGF,fromPGF) where import PGF.CId import PGF.Data import PGF.Raw.Abstract -import PGF.BuildParser (buildParserInfo) -import PGF.Parsing.FCFG.Utilities -import qualified GF.Compile.GenerateFCFG as FCFG import qualified GF.Compile.GeneratePMCFG as PMCFG -import qualified Data.Array as Array -import qualified Data.Map as Map +import Data.Array.IArray +import qualified Data.Map as Map +import qualified Data.Set as Set +import qualified Data.IntMap as IntMap pgfMajorVersion, pgfMinorVersion :: Integer (pgfMajorVersion, pgfMinorVersion) = (1,0) @@ -54,11 +53,11 @@ toConcr pgf rexp = lindefs = Map.empty, printnames = Map.empty, paramlincats = Map.empty, - parser = Just (buildParserOnDemand cnc) -- This thunk will be overwritten if there is a parser + parser = Just (PMCFG.convertConcrete (abstract pgf) cnc) + -- This thunk will be overwritten if there is a parser -- compiled in the PGF file. We use lazy evaluation here -- to make sure that buildParserOnDemand is called only -- if it is needed. - }) rexp in cnc where @@ -72,41 +71,44 @@ toConcr pgf rexp = add cnc (App "param" ts) = cnc { paramlincats = mkTermMap ts } add cnc (App "parser" ts) = cnc { parser = Just (toPInfo ts) } - buildParserOnDemand cnc = buildParserInfo fcfg - where - fcfg - | Map.lookup (mkCId "erasing") (cflags cnc) == Just "on" = PMCFG.convertConcrete (abstract pgf) cnc - | otherwise = FCFG.convertConcrete (abstract pgf) cnc - toPInfo :: [RExp] -> ParserInfo -toPInfo [App "rules" rs, App "startupcats" cs] = buildParserInfo (rules, cats) +toPInfo [App "functions" fs, App "sequences" ss, App "productions" ps,App "startcats" cs] = + ParserInfo { functions = functions + , sequences = seqs + , productions = productions + , startCats = cats + } where - rules = map toFRule rs - cats = Map.fromList [(mkCId c, map expToInt fs) | App c fs <- cs] + functions = mkArray (map toFFun fs) + seqs = mkArray (map toFSeq ss) + productions = IntMap.fromList (map toProductionSet ps) + cats = Map.fromList [(mkCId c, (map expToInt xs)) | App c xs <- cs] - toFRule :: RExp -> FRule - toFRule (App "rule" - [n, - App "cats" (rt:at), - App "R" ls]) = FRule fun prof args res lins + toFFun :: RExp -> FFun + toFFun (App f [App "P" ts,App "R" ls]) = FFun fun prof lins + where + fun = mkCId f + prof = map toProfile ts + lins = mkArray [fromIntegral seqid | AInt seqid <- ls] + + toProfile :: RExp -> Profile + toProfile AMet = [] + toProfile (App "_A" [t]) = [expToInt t] + toProfile (App "_U" ts) = [expToInt t | App "_A" [t] <- ts] + + toFSeq :: RExp -> FSeq + toFSeq (App "seq" ss) = mkArray [toSymbol s | s <- ss] + + toProductionSet :: RExp -> (FCat,Set.Set Production) + toProductionSet (App "td" (rt : xs)) = (expToInt rt, Set.fromList (map toProduction xs)) where - (fun,prof) = toFName n - args = map expToInt at - res = expToInt rt - lins = mkArray [mkArray [toSymbol s | s <- l] | App "S" l <- ls] - -toFName :: RExp -> (CId,[Profile]) -toFName (App "_A" [x]) = (wildCId, [[expToInt x]]) -toFName (App f ts) = (mkCId f, map toProfile ts) - where - toProfile :: RExp -> Profile - toProfile AMet = [] - toProfile (App "_A" [t]) = [expToInt t] - toProfile (App "_U" ts) = [expToInt t | App "_A" [t] <- ts] + toProduction (App "A" (ruleid : at)) = FApply (expToInt ruleid) (map expToInt at) + toProduction (App "C" [fcat]) = FCoerce (expToInt fcat) toSymbol :: RExp -> FSymbol -toSymbol (App "P" [n,l]) = FSymCat (expToInt l) (expToInt n) -toSymbol (AStr t) = FSymTok t +toSymbol (App "P" [n,l]) = FSymCat (expToInt n) (expToInt l) +toSymbol (App "KP" (d:alts)) = FSymTok (toKP d alts) +toSymbol (AStr t) = FSymTok (KS t) toType :: RExp -> Type toType e = case e of @@ -142,8 +144,15 @@ toTerm e = case e of App f [] -> F (mkCId f) AInt i -> C (fromInteger i) AMet -> TM "?" - AStr s -> K (KS s) ---- + App "KP" (d:alts) -> K (toKP d alts) + AStr s -> K (KS s) _ -> error $ "term " ++ show e + +toKP d alts = KP (toStr d) (map toAlt alts) + where + toStr (App "S" vs) = [v | AStr v <- vs] + toAlt (App "A" [x,y]) = Alt (toStr x) (toStr y) + ------------------------------ --- from internal to parser -- @@ -192,8 +201,7 @@ fromExp e = case e of ELit (LFlt d) -> AFlt d ELit (LInt i) -> AInt (toInteger i) EMeta _ -> AMet ---- - EEq eqs -> - App "Eq" [App "E" (map fromExp (v:ps)) | Equ ps v <- eqs] + EEq eqs -> App "Eq" [App "E" (map fromExp (v:ps)) | Equ ps v <- eqs] fromTerm :: Term -> RExp fromTerm e = case e of @@ -206,8 +214,11 @@ fromTerm e = case e of TM _ -> AMet F f -> App (prCId f) [] V i -> App "A" [AInt (toInteger i)] - K (KS s) -> AStr s ---- - K (KP d vs) -> App "FV" (str d : [str v | Alt v _ <- vs]) ---- + K t -> fromTokn t + +fromTokn :: Tokn -> RExp +fromTokn (KS s) = AStr s +fromTokn (KP d vs) = App "KP" (str d : [App "A" [str v, str x] | Alt v x <- vs]) where str v = App "S" (map AStr v) @@ -215,39 +226,42 @@ fromTerm e = case e of fromPInfo :: ParserInfo -> RExp fromPInfo p = App "parser" [ - App "rules" [fromFRule rule | rule <- Array.elems (allRules p)], - App "startupcats" [App (prCId f) (map intToExp cs) | (f,cs) <- Map.toList (startupCats p)] + App "functions" [fromFFun fun | fun <- elems (functions p)], + App "sequences" [fromFSeq seq | seq <- elems (sequences p)], + App "productions" [fromProductionSet xs | xs <- IntMap.toList (productions p)], + App "startcats" [App (prCId f) (map intToExp xs) | (f,xs) <- Map.toList (startCats p)] ] -fromFRule :: FRule -> RExp -fromFRule (FRule fun prof args res lins) = - App "rule" [fromFName (fun,prof), - App "cats" (intToExp res:map intToExp args), - App "R" [App "S" [fromSymbol s | s <- Array.elems l] | l <- Array.elems lins] - ] - -fromFName :: (CId,[Profile]) -> RExp -fromFName (f,ps) | f == wildCId = fromProfile (head ps) - | otherwise = App (prCId f) (map fromProfile ps) +fromFFun :: FFun -> RExp +fromFFun (FFun fun prof lins) = App (prCId fun) [App "P" (map fromProfile prof), App "R" [intToExp seqid | seqid <- elems lins]] where fromProfile :: Profile -> RExp fromProfile [] = AMet fromProfile [x] = daughter x fromProfile args = App "_U" (map daughter args) - + daughter n = App "_A" [intToExp n] fromSymbol :: FSymbol -> RExp -fromSymbol (FSymCat l n) = App "P" [intToExp n, intToExp l] -fromSymbol (FSymTok t) = AStr t +fromSymbol (FSymCat n l) = App "P" [intToExp n, intToExp l] +fromSymbol (FSymTok t) = fromTokn t + +fromFSeq :: FSeq -> RExp +fromFSeq seq = App "seq" [fromSymbol s | s <- elems seq] + +fromProductionSet :: (FCat,Set.Set Production) -> RExp +fromProductionSet (cat,xs) = App "td" (intToExp cat : map fromPassive (Set.toList xs)) + where + fromPassive (FApply ruleid args) = App "A" (intToExp ruleid : map intToExp args) + fromPassive (FCoerce fcat) = App "C" [intToExp fcat] -- ** Utilities mkTermMap :: [RExp] -> Map.Map CId Term mkTermMap ts = Map.fromAscList [(mkCId f,toTerm v) | App f [v] <- ts] -mkArray :: [a] -> Array.Array Int a -mkArray xs = Array.listArray (0, length xs - 1) xs +mkArray :: IArray a e => [e] -> a Int e +mkArray xs = listArray (0, length xs - 1) xs expToInt :: Integral a => RExp -> a expToInt (App "neg" [AInt i]) = fromIntegral (negate i)