From c036459214852ca01868f5da81408f49b22a49e9 Mon Sep 17 00:00:00 2001 From: krasimir Date: Mon, 14 Dec 2009 10:54:22 +0000 Subject: [PATCH] remove the old parsing code and the -erasing=on flag --- GF.cabal | 10 +- src/compiler/GF/Compile/GenerateFCFG.hs | 568 ------------------ src/compiler/GF/Compile/GeneratePMCFG.hs | 6 +- src/compiler/GF/Compile/GeneratePMCFGOld.hs | 374 ------------ src/compiler/GF/Compile/GrammarToPGF.hs | 9 +- src/compiler/GF/Compile/PGFtoJS.hs | 5 +- src/compiler/GF/Infra/Option.hs | 5 - src/compiler/GF/Speech/PGFToCFG.hs | 16 +- src/runtime/haskell/PGF.hs | 21 +- src/runtime/haskell/PGF/Binary.hs | 4 +- src/runtime/haskell/PGF/BuildParser.hs | 76 --- src/runtime/haskell/PGF/PMCFG.hs | 11 +- .../{Parsing/FCFG/Incremental.hs => Parse.hs} | 16 +- .../haskell/PGF/Parsing/FCFG/Active.hs | 205 ------- .../haskell/PGF/Parsing/FCFG/Utilities.hs | 188 ------ 15 files changed, 41 insertions(+), 1473 deletions(-) delete mode 100644 src/compiler/GF/Compile/GenerateFCFG.hs delete mode 100644 src/compiler/GF/Compile/GeneratePMCFGOld.hs delete mode 100644 src/runtime/haskell/PGF/BuildParser.hs rename src/runtime/haskell/PGF/{Parsing/FCFG/Incremental.hs => Parse.hs} (95%) delete mode 100644 src/runtime/haskell/PGF/Parsing/FCFG/Active.hs delete mode 100644 src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs diff --git a/GF.cabal b/GF.cabal index 871f38b89..9c26e52d4 100644 --- a/GF.cabal +++ b/GF.cabal @@ -39,10 +39,7 @@ library PGF.Macros PGF.Generate PGF.Linearize - PGF.BuildParser - PGF.Parsing.FCFG.Utilities - PGF.Parsing.FCFG.Active - PGF.Parsing.FCFG.Incremental + PGF.Parse PGF.Expr PGF.Type PGF.Tree @@ -66,7 +63,6 @@ library GF.Infra.Option GF.Data.ErrM GF.Data.BacktrackM - GF.Compile.GenerateFCFG GF.Compile.GeneratePMCFG -- not really part of GF but I have changed the original binary library -- and we have to keep the copy for now. @@ -106,7 +102,6 @@ executable gf GF.Data.Utilities GF.Data.SortedList GF.Data.Assoc - GF.Compile.GenerateFCFG GF.Data.ErrM GF.Data.Operations GF.Infra.Ident @@ -169,9 +164,6 @@ executable gf PGF.Macros PGF.Generate PGF.Linearize - PGF.BuildParser - PGF.Parsing.FCFG.Utilities - PGF.Parsing.FCFG.Active PGF.Binary PGF.Paraphrase PGF.TypeCheck diff --git a/src/compiler/GF/Compile/GenerateFCFG.hs b/src/compiler/GF/Compile/GenerateFCFG.hs deleted file mode 100644 index 52e95f686..000000000 --- a/src/compiler/GF/Compile/GenerateFCFG.hs +++ /dev/null @@ -1,568 +0,0 @@ ----------------------------------------------------------------------- --- | --- Maintainer : Krasimir Angelov --- Stability : (stable) --- Portability : (portable) --- --- Converting SimpleGFC grammars to fast nonerasing MCFG grammar. --- --- the resulting grammars might be /very large/ --- --- the conversion is only equivalent if the GFC grammar has a context-free backbone. ------------------------------------------------------------------------------ - - -module GF.Compile.GenerateFCFG - (convertConcrete) where - -import PGF.CId -import PGF.Data -import PGF.Macros --hiding (prt) -import PGF.Parsing.FCFG.Utilities - -import GF.Data.BacktrackM -import GF.Data.SortedList -import GF.Data.Utilities (updateNthM, sortNub) - -import qualified Data.Map as Map -import qualified Data.IntMap as IntMap -import qualified Data.Set as Set -import qualified Data.List as List -import qualified Data.ByteString.Char8 as BS -import Data.Array.IArray -import Data.Maybe -import Control.Monad - ----------------------------------------------------------------------- --- main conversion function - -convertConcrete :: Abstr -> Concr -> ParserInfo -convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats' - where abs_defs = Map.assocs (funs abs) - conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" - cats = lincats cnc - (abs_defs',conc',cats') = expandHOAS abs_defs conc cats - -expandHOAS :: [(CId,(Type,Int,[Equation]))] -> TermMap -> TermMap -> ([(CId,(Type,Int,[Equation]))],TermMap,TermMap) -expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns, - Map.unions [lins, hoLins, varLins], - Map.unions [lincats, hoLincats, varLincat]) - where - -- replace higher-order fun argument types with new categories - funs' = [(f,(fixType ty,a,e)) | (f,(ty,a,e)) <- funs] - where - fixType :: Type -> Type - fixType ty = let (ats,rt) = typeSkeleton ty in cftype (map catName ats) rt - - hoTypes :: [(Int,CId)] - hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- funs, (n,c) <- fst (typeSkeleton ty), n > 0] - hoCats = sortNub (map snd hoTypes) - -- for each Cat with N bindings, we add a new category _NCat - -- each new category contains a single function __NCat : Cat -> _Var -> ... -> _Var -> _NCat - hoFuns = [(funName ty,(cftype (c : replicate n varCat) (catName ty),0,[])) | ty@(n,c) <- hoTypes] - -- lincats for the new categories - hoLincats = Map.fromList [(catName ty, modifyRec (++ replicate n (S [])) (lincatOf c)) | ty@(n,c) <- hoTypes] - -- linearizations of the new functions, lin __NCat v_0 ... v_n-1 x = { s1 = x.s1; ...; sk = x.sk; $0 = v_0.s ... - hoLins = Map.fromList [ (funName ty, mkLin c n) | ty@(n,c) <- hoTypes] - where mkLin c n = modifyRec (\fs -> [P (V 0) (C j) | j <- [0..length fs-1]] ++ [P (V i) (C 0) | i <- [1..n]]) (lincatOf c) - -- for each Cat, we a add a fun _Var_Cat : _Var -> Cat - varFuns = [(varFunName cat, (cftype [varCat] cat,0,[])) | cat <- hoCats] - -- linearizations of the _Var_Cat functions - varLins = Map.fromList [(varFunName cat, R [P (V 0) (C 0)]) | cat <- hoCats] - -- lincat for the _Var category - varLincat = Map.singleton varCat (R [S []]) - - lincatOf c = fromMaybe (error $ "No lincat for " ++ showCId c) $ Map.lookup c lincats - - modifyRec :: ([Term] -> [Term]) -> Term -> Term - modifyRec f (R xs) = R (f xs) - modifyRec _ t = error $ "Not a record: " ++ show t - - varCat = mkCId "_Var" - - catName :: (Int,CId) -> CId - catName (0,c) = c - catName (n,c) = mkCId ("_" ++ show n ++ showCId c) - - funName :: (Int,CId) -> CId - funName (n,c) = mkCId ("__" ++ show n ++ showCId c) - - varFunName :: CId -> CId - varFunName c = mkCId ("_Var_" ++ showCId c) - --- replaces __NCat with _B and _Var_Cat with _. --- the temporary names are just there to avoid name collisions. -fixHoasFuns :: ParserInfo -> ParserInfo -fixHoasFuns pinfo = pinfo{functions=mkArray [FFun (fixName n) prof lins | FFun n prof lins <- elems (functions pinfo)]} - where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B") - | BS.pack "_Var_" `BS.isPrefixOf` n = wildCId - fixName n = n - -convert :: [(CId,(Type,Int,[Equation]))] -> TermMap -> TermMap -> ParserInfo -convert abs_defs cnc_defs cat_defs = getParserInfo (loop grammarEnv) - where - srules = [ - (XRule id args res (map findLinType args) (findLinType res) term) | - (id, (ty,_,_)) <- abs_defs, let (args,res) = catSkeleton ty, - term <- maybeToList (Map.lookup id cnc_defs)] - - findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs) - - (xrulesMap,grammarEnv) = List.foldl' helper (Map.empty,emptyFFunsEnv) srules - where - helper (xrulesMap,grammarEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) = - let xrulesMap' = Map.insertWith (++) abs_res [rule] xrulesMap - grammarEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env) - grammarEnv - (mkSingletonSelectors cnc_defs cnc_res) - in xrulesMap' `seq` grammarEnv' `seq` (xrulesMap',grammarEnv') - - loop grammarEnv = - let (todo, grammarEnv') = takeToDoRules xrulesMap grammarEnv - in case todo of - [] -> grammarEnv' - _ -> loop $! List.foldl' (\env (srules,selector) -> - List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) grammarEnv' todo - -convertRule :: TermMap -> TermSelector -> XRule -> GrammarEnv -> GrammarEnv -convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) grammarEnv = - foldBM addRule - grammarEnv - (convertTerm cnc_defs selector term [([],[])]) - (protoFCat cat, map (\scat -> (protoFCat scat,[])) args, ctype, ctypes) - where - addRule linRec (newCat', newArgs', _, _) env0 = - let (env1, newCat) = genFCatHead env0 newCat' - (env2, newArgs,idxArgs) = foldr (\((xcat@(PFCat cat rcs tcs),xpaths),ctype,idx) (env,args,all_args) -> - let xargs = xcat:[PFCat cat [path] tcs | path <- reverse xpaths] - (env1, xargs1) = List.mapAccumL (genFCatArg cnc_defs ctype) env xargs - in case xcat of - PFCat _ [] _ -> (env , args, all_args) - _ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args)) - (env1,[],[]) (zip3 newArgs' ctypes [0..]) - - (env3,newLinRec) = List.mapAccumL (translateLin idxArgs linRec) env2 (case newCat' of {PFCat _ rcs _ -> rcs}) - - (_,newProfile) = List.mapAccumL accumProf 0 newArgs' - where - accumProf nr (PFCat _ [] _,_ ) = (nr, [] ) - accumProf nr (_ ,xpaths) = (nr+cnt+1, [nr..nr+cnt]) - where cnt = length xpaths - - (env4,funid) = addFFun env3 (FFun fun newProfile (mkArray newLinRec)) - - in addProduction env4 newCat (FApply funid newArgs) - -translateLin idxArgs [] grammarEnv lbl' = error "translateLin" -translateLin idxArgs ((lbl,syms) : lins) grammarEnv lbl' - | lbl' == lbl = addFSeq grammarEnv (lbl,map instSym syms) - | otherwise = translateLin idxArgs lins grammarEnv lbl' - where - instSym = either (\(lbl, nr, xnr) -> instCat lbl nr xnr 0 idxArgs) - (\t -> case t of - KS s -> FSymKS [s] - KP strs vars -> FSymKP strs vars) - instCat lbl nr xnr nr' ((idx,xargs):idxArgs) - | nr == idx = let (fcat, PFCat _ rcs _) = xargs !! xnr - in FSymCat (nr'+xnr) (index lbl rcs 0) - | otherwise = instCat lbl nr xnr (nr'+length xargs) idxArgs - - index lbl' (lbl:lbls) idx - | lbl' == lbl = idx - | otherwise = index lbl' lbls $! (idx+1) - - ----------------------------------------------------------------------- --- term conversion - -type CnvMonad a = BacktrackM Env a - -type FPath = [FIndex] -type Env = (ProtoFCat, [(ProtoFCat,[FPath])], Term, [Term]) -type LinRec = [(FPath, [Either (FPath, FIndex, Int) Tokn])] - -type TermMap = Map.Map CId Term - -convertTerm :: TermMap -> TermSelector -> Term -> LinRec -> CnvMonad LinRec -convertTerm cnc_defs selector (V nr) ((lbl_path,lin) : lins) = convertArg selector nr [] lbl_path lin lins -convertTerm cnc_defs selector (C nr) ((lbl_path,lin) : lins) = convertCon selector nr lbl_path lin lins -convertTerm cnc_defs selector (R record) ((lbl_path,lin) : lins) = convertRec cnc_defs selector 0 record lbl_path lin lins - -convertTerm cnc_defs selector (P term sel) lins = do nr <- evalTerm cnc_defs [] sel - convertTerm cnc_defs (TuplePrj nr selector) term lins -convertTerm cnc_defs selector (FV vars) lins = do term <- member vars - convertTerm cnc_defs selector term lins -convertTerm cnc_defs selector (S ts) ((lbl_path,lin) : lins) = do projectHead lbl_path - foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts) -convertTerm cnc_defs selector (K (KS str)) ((lbl_path,lin) : lins) = - do projectHead lbl_path - return ((lbl_path,Right (KS str) : lin) : lins) -convertTerm cnc_defs selector (K (KP strs vars))((lbl_path,lin) : lins) = - do projectHead lbl_path - toks <- member (strs:[strs' | Alt strs' _ <- vars]) - return ((lbl_path, map (Right . KS) toks ++ lin) : lins) -convertTerm cnc_defs selector (F id) lins = case Map.lookup id cnc_defs of - Just term -> convertTerm cnc_defs selector term lins - Nothing -> mzero -convertTerm cnc_defs selector (W s t) ((lbl_path,lin) : lins) = do - ss <- case t of - R ss -> return ss - F f -> case Map.lookup f cnc_defs of - Just (R ss) -> return ss - _ -> mzero - convertRec cnc_defs selector 0 [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins -convertTerm cnc_defs selector x lins = error ("convertTerm ("++show x++")") - - -convertArg (TupleSel record) nr path lbl_path lin lins = - foldM (\lins (lbl, selector) -> convertArg selector nr (lbl:path) (lbl:lbl_path) lin lins) lins record -convertArg (TuplePrj lbl selector) nr path lbl_path lin lins = - convertArg selector nr (lbl:path) lbl_path lin lins -convertArg (ConSel indices) nr path lbl_path lin lins = do - index <- member indices - restrictHead lbl_path index - restrictArg nr path index - return lins -convertArg StrSel nr path lbl_path lin lins = do - projectHead lbl_path - xnr <- projectArg nr path - return ((lbl_path, Left (path, nr, xnr) : lin) : lins) - -convertCon (ConSel indices) index lbl_path lin lins = do - guard (index `elem` indices) - restrictHead lbl_path index - return lins -convertCon x _ _ _ _ = error $ "SimpleToFCFG,convertCon: " ++ show x - -convertRec cnc_defs selector index [] lbl_path lin lins = return lins -convertRec cnc_defs selector@(TupleSel fields) index (val:record) lbl_path lin lins = select fields - where - select [] = convertRec cnc_defs selector (index+1) record lbl_path lin lins - select ((index',sub_sel) : fields) - | index == index' = do lins <- convertTerm cnc_defs sub_sel val ((index:lbl_path,lin) : lins) - convertRec cnc_defs selector (index+1) record lbl_path lin lins - | otherwise = select fields -convertRec cnc_defs (TuplePrj index' sub_sel) index record lbl_path lin lins = do - convertTerm cnc_defs sub_sel (record !! (index'-index)) ((lbl_path,lin) : lins) - - ------------------------------------------------------------- --- eval a term to ground terms - -evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex -evalTerm cnc_defs path (V nr) = do term <- readArgCType nr - unifyPType nr (reverse path) (selectTerm path term) -evalTerm cnc_defs path (C nr) = return nr -evalTerm cnc_defs path (R record) = case path of - (index:path) -> evalTerm cnc_defs path (record !! index) -evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel - evalTerm cnc_defs (index:path) term -evalTerm cnc_defs path (FV terms) = member terms >>= evalTerm cnc_defs path -evalTerm cnc_defs path (F id) = case Map.lookup id cnc_defs of - Just term -> evalTerm cnc_defs path term - Nothing -> mzero -evalTerm cnc_defs path x = error ("evalTerm ("++show x++")") - -unifyPType :: FIndex -> FPath -> Term -> CnvMonad FIndex -unifyPType nr path (C max_index) = - do (_, args, _, _) <- get - let (PFCat _ _ tcs,_) = args !! nr - case lookup path tcs of - Just index -> return index - Nothing -> do index <- member [0..max_index] - restrictArg nr path index - return index -unifyPType nr path t = error $ "unifyPType " ++ show t ---- AR 2/10/2007 - -selectTerm :: FPath -> Term -> Term -selectTerm [] term = term -selectTerm (index:path) (R record) = selectTerm path (record !! index) - - ----------------------------------------------------------------------- --- GrammarEnv - - -data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int FCatSet FSeqSet FFunSet (IntMap.IntMap (Set.Set Production)) -type FCatSet = Map.Map CId (Map.Map [FPath] (Map.Map [(FPath,FIndex)] (Either FCat FCat))) -type FSeqSet = Map.Map FSeq SeqId -type FFunSet = Map.Map FFun FunId - -data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)] - -protoFCat :: CId -> ProtoFCat -protoFCat cat = PFCat cat [] [] - -emptyFFunsEnv = GrammarEnv 0 initFCatSet Map.empty Map.empty IntMap.empty - where - initFCatSet = (ins fcatString (mkCId "String") [[0]] [] $ - ins fcatInt (mkCId "Int") [[0]] [] $ - ins fcatFloat (mkCId "Float") [[0]] [] $ - ins fcatVar (mkCId "_Var") [[0]] [] $ - Map.empty) - - ins fcat cat rcs tcs catSet = - Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet - where - right_fcat = Right fcat - tmap_s = Map.singleton tcs right_fcat - rmap_s = Map.singleton rcs tmap_s - -addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv -addProduction (GrammarEnv last_id catSet seqSet funSet prodSet) cat p = - GrammarEnv last_id catSet seqSet funSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet) - -addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId) -addFSeq env@(GrammarEnv last_id catSet seqSet funSet prodSet) (_,lst) = - case Map.lookup seq seqSet of - Just id -> (env,id) - Nothing -> let !last_seq = Map.size seqSet - in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet prodSet,last_seq) - where - seq = mkArray lst - -addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId) -addFFun env@(GrammarEnv last_id catSet seqSet funSet prodSet) fun = - case Map.lookup fun funSet of - Just id -> (env,id) - Nothing -> let !last_funid = Map.size funSet - in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) prodSet,last_funid) - -getParserInfo :: GrammarEnv -> ParserInfo -getParserInfo (GrammarEnv last_id catSet seqSet funSet prodSet) = - ParserInfo { functions = mkArray funSet - , sequences = mkArray seqSet - , productions0= prodSet - , productions = prodSet - , startCats = Map.map getFCatList catSet - , totalCats = last_id+1 - } - where - mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] - - getFCatList rcs = Map.fold (\tcs lst -> Map.fold (\x lst -> either id id x : lst) lst tcs) [] rcs - - -genFCatHead :: GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat) -genFCatHead env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) = - case Map.lookup cat catSet >>= Map.lookup rcs >>= Map.lookup tcs of - Just (Left fcat) -> (GrammarEnv last_id (ins fcat) seqSet funSet prodSet, fcat) - Just (Right fcat) -> (env, fcat) - Nothing -> let fcat = last_id+1 - in (GrammarEnv fcat (ins fcat) seqSet funSet prodSet, fcat) - where - ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet - where - right_fcat = Right fcat - tmap_s = Map.singleton tcs right_fcat - rmap_s = Map.singleton rcs tmap_s - -genFCatArg :: TermMap -> Term -> GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat) -genFCatArg cnc_defs ctype env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) = - case Map.lookup cat catSet >>= Map.lookup rcs of - Just tmap -> case Map.lookup tcs tmap of - Just (Left fcat) -> (env, fcat) - Just (Right fcat) -> (env, fcat) - Nothing -> ins tmap - Nothing -> ins Map.empty - where - ins tmap = - let fcat = last_id+1 - (either_fcat,last_id1,tmap1,prodSet1) - = foldBM (\tcs st (either_fcat,last_id,tmap,prodSet) -> - let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap - p = FCoerce fcat_arg - prodSet1 = IntMap.insertWith Set.union fcat (Set.singleton p) prodSet - in if st - then (Right fcat, last_id1,tmap1,prodSet1) - else (either_fcat,last_id, tmap ,prodSet )) - (Left fcat,fcat,Map.insert tcs either_fcat tmap,prodSet) - (gen_tcs ctype [] []) - False - rmap1 = Map.singleton rcs tmap1 - in (GrammarEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 catSet) seqSet funSet prodSet1, fcat) - where - addArg tcs last_id tmap = - case Map.lookup tcs tmap of - Just (Left fcat) -> (last_id, tmap, fcat) - Just (Right fcat) -> (last_id, tmap, fcat) - Nothing -> let fcat = last_id+1 - in (fcat, Map.insert tcs (Left fcat) tmap, fcat) - - gen_tcs :: Term -> FPath -> [(FPath,FIndex)] -> BacktrackM Bool [(FPath,FIndex)] - gen_tcs (R record) path acc = foldM (\acc (label,ctype) -> gen_tcs ctype (label:path) acc) acc (zip [0..] record) - gen_tcs (S _) path acc = return acc - gen_tcs (C max_index) path acc = - case List.lookup path tcs of - Just index -> return $! addConstraint path index acc - Nothing -> do put True - index <- member [0..max_index] - return $! addConstraint path index acc - where - addConstraint path0 index0 (c@(path,index) : cs) - | path0 > path = c:addConstraint path0 index0 cs - addConstraint path0 index0 cs = (path0,index0) : cs - gen_tcs (F id) path acc = case Map.lookup id cnc_defs of - Just term -> gen_tcs term path acc - Nothing -> error ("unknown identifier: "++showCId id) - - - ------------------------------------------------------------- --- TODO queue organization - -type XRulesMap = Map.Map CId [XRule] -data XRule = XRule CId {- function -} - [CId] {- argument types -} - CId {- result type -} - [Term] {- argument lin-types representation -} - Term {- result lin-type representation -} - Term {- body -} - -takeToDoRules :: XRulesMap -> GrammarEnv -> ([([XRule], TermSelector)], GrammarEnv) -takeToDoRules xrulesMap (GrammarEnv last_id catSet seqSet funSet prodSet) = - (todo,GrammarEnv last_id catSet' seqSet funSet prodSet) - where - (todo,catSet') = - Map.mapAccumWithKey (\todo cat rmap -> - let (todo1,rmap1) = Map.mapAccumWithKey (\todo rcs tmap -> - let (tcss,tmap') = Map.mapAccumWithKey (\tcss tcs either_xcat -> - case either_xcat of - Left xcat -> (tcs:tcss,Right xcat) - Right xcat -> ( tcss,either_xcat)) [] tmap - in case tcss of - [] -> ( todo,tmap ) - _ -> ((srules,mkSelector rcs tcss) : todo,tmap')) todo rmap - mb_srules = Map.lookup cat xrulesMap - Just srules = mb_srules - - in case mb_srules of - Just srules -> (todo1,rmap1) - Nothing -> (todo ,rmap1)) [] catSet - - ------------------------------------------------------------- --- The TermSelector - -data TermSelector - = TupleSel [(FIndex, TermSelector)] - | TuplePrj FIndex TermSelector - | ConSel [FIndex] - | StrSel - deriving Show - -mkSingletonSelectors :: TermMap - -> Term -- ^ Type representation term - -> [TermSelector] -- ^ list of selectors containing just one string field -mkSingletonSelectors cnc_defs term = sels0 - where - (sels0,tcss0) = loop [] ([],[]) term - - loop path st (R record) = List.foldl' (\st (index,term) -> loop (index:path) st term) st (zip [0..] record) - loop path (sels,tcss) (C i) = ( sels,map ((,) path) [0..i] : tcss) - loop path (sels,tcss) (S _) = (mkSelector [path] tcss0 : sels, tcss) - loop path (sels,tcss) (F id) = case Map.lookup id cnc_defs of - Just term -> loop path (sels,tcss) term - Nothing -> error ("unknown identifier: "++showCId id) - -mkSelector :: [FPath] -> [[(FPath,FIndex)]] -> TermSelector -mkSelector rcs tcss = - List.foldl' addRestriction (case xs of - (path:xs) -> List.foldl' addProjection (path2selector StrSel path) xs) ys - where - xs = [ reverse path | path <- rcs] - ys = [(reverse path,term) | tcs <- tcss, (path,term) <- tcs] - - addRestriction :: TermSelector -> (FPath,FIndex) -> TermSelector - addRestriction (ConSel indices) ([] ,n_index) = ConSel (add indices) - where - add [] = [n_index] - add (index':indices) - | n_index == index' = index': indices - | otherwise = index':add indices - addRestriction (TupleSel fields) (index : path,n_index) = TupleSel (add fields) - where - add [] = [(index,path2selector (ConSel [n_index]) path)] - add (field@(index',sub_sel):fields) - | index == index' = (index',addRestriction sub_sel (path,n_index)):fields - | otherwise = field : add fields - - addProjection :: TermSelector -> FPath -> TermSelector - addProjection StrSel [] = StrSel - addProjection (TupleSel fields) (index : path) = TupleSel (add fields) - where - add [] = [(index,path2selector StrSel path)] - add (field@(index',sub_sel):fields) - | index == index' = (index',addProjection sub_sel path):fields - | otherwise = field : add fields - - path2selector base [] = base - path2selector base (index : path) = TupleSel [(index,path2selector base path)] - ------------------------------------------------------------- --- updating the MCF rule - -readArgCType :: FIndex -> CnvMonad Term -readArgCType nr = do (_, _, _, ctypes) <- get - return (ctypes !! nr) - -restrictArg :: FIndex -> FPath -> FIndex -> CnvMonad () -restrictArg nr path index = do - (head, args, ctype, ctypes) <- get - args' <- updateNthM (\(xcat,xs) -> do xcat <- restrictProtoFCat path index xcat - return (xcat,xs) ) nr args - put (head, args', ctype, ctypes) - -projectArg :: FIndex -> FPath -> CnvMonad Int -projectArg nr path = do - (head, args, ctype, ctypes) <- get - (xnr,args') <- updateArgs nr args - put (head, args', ctype, ctypes) - return xnr - where - updateArgs :: FIndex -> [(ProtoFCat,[FPath])] -> CnvMonad (Int,[(ProtoFCat,[FPath])]) - updateArgs 0 ((a@(PFCat _ rcs _),xpaths) : as) - | path `elem` rcs = return (length xpaths+1,(a,path:xpaths):as) - | otherwise = do a <- projectProtoFCat path a - return (0,(a,xpaths):as) - updateArgs n (a : as) = do - (xnr,as) <- updateArgs (n-1) as - return (xnr,a:as) - -readHeadCType :: CnvMonad Term -readHeadCType = do (_, _, ctype, _) <- get - return ctype - -restrictHead :: FPath -> FIndex -> CnvMonad () -restrictHead path term - = do (head, args, ctype, ctypes) <- get - head' <- restrictProtoFCat path term head - put (head', args, ctype, ctypes) - -projectHead :: FPath -> CnvMonad () -projectHead path - = do (head, args, ctype, ctypes) <- get - head' <- projectProtoFCat path head - put (head', args, ctype, ctypes) - -restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat -restrictProtoFCat path0 index0 (PFCat cat rcs tcs) = do - tcs <- addConstraint tcs - return (PFCat cat rcs tcs) - where - addConstraint (c@(path,index) : cs) - | path0 > path = liftM (c:) (addConstraint cs) - | path0 == path = guard (index0 == index) >> - return (c : cs) - addConstraint cs = return ((path0,index0) : cs) - -projectProtoFCat :: FPath -> ProtoFCat -> CnvMonad ProtoFCat -projectProtoFCat path0 (PFCat cat rcs tcs) = do - return (PFCat cat (addConstraint rcs) tcs) - where - addConstraint (path : rcs) - | path0 > path = path : addConstraint rcs - | path0 == path = path : rcs - addConstraint rcs = path0 : rcs - -mkArray lst = listArray (0,length lst-1) lst diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs index 458cf3f5c..fed2521e1 100644 --- a/src/compiler/GF/Compile/GeneratePMCFG.hs +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -123,7 +123,7 @@ convertRule cnc_defs grammarEnv (PFRule fun args res ctypes ctype term) = let [newCat] = getFCats env0 newCat' (env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs' - (env2,funid) = addFFun env1 (FFun fun [[n] | n <- [0..length newArgs-1]] (mkArray lins)) + (env2,funid) = addFFun env1 (FFun fun (mkArray lins)) in addProduction env2 newCat (FApply funid newArgs) @@ -394,7 +394,7 @@ expandHOAS abs_defs cnc_defs lincats lindefs env = (env1,lins) = List.mapAccumL addFSeq env linRec newLinRec = mkArray lins - (env2,funid) = addFFun env1 (FFun _B [[i] | i <- [0..n]] newLinRec) + (env2,funid) = addFFun env1 (FFun _B newLinRec) env3 = foldl (\env (arg,res) -> addProduction env res (FApply funid (arg : replicate n fcatVar))) env2 @@ -462,7 +462,7 @@ getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = , sequences = mkArray seqSet , productions0= productions0 , productions = filterProductions productions0 - , startCats = maybe Map.empty (Map.map (\(start,end,_) -> range (start,end))) (IntMap.lookup 0 catSet) + , startCats = maybe Map.empty (Map.map (\(start,end,_) -> (start,end))) (IntMap.lookup 0 catSet) , totalCats = last_id+1 } where diff --git a/src/compiler/GF/Compile/GeneratePMCFGOld.hs b/src/compiler/GF/Compile/GeneratePMCFGOld.hs deleted file mode 100644 index 244ed68fe..000000000 --- a/src/compiler/GF/Compile/GeneratePMCFGOld.hs +++ /dev/null @@ -1,374 +0,0 @@ -{-# LANGUAGE BangPatterns, CPP #-} ----------------------------------------------------------------------- --- | --- Maintainer : Krasimir Angelov --- Stability : (stable) --- Portability : (portable) --- --- Converting SimpleGFC grammars to fast nonerasing MCFG grammar. --- --- the resulting grammars might be /very large/ --- --- the conversion is only equivalent if the GFC grammar has a context-free backbone. ------------------------------------------------------------------------------ - -module GF.Compile.GeneratePMCFG - (convertConcrete) where - -import PGF.CId -import PGF.Data -import PGF.Macros --hiding (prt) - -import GF.Data.BacktrackM -import GF.Data.SortedList -import GF.Data.Utilities (updateNthM, sortNub) - -import qualified Data.Map as Map -import qualified Data.Set as Set -import qualified Data.List as List -import qualified Data.IntMap as IntMap -import qualified Data.ByteString.Char8 as BS -import Data.Array.IArray -import Data.Maybe -import Control.Monad -import Debug.Trace - ----------------------------------------------------------------------- --- main conversion function - -convertConcrete :: Abstr -> Concr -> ParserInfo -convertConcrete abs cnc = convert abs_defs conc cats - where abs_defs = Map.assocs (funs abs) - conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" - cats = lincats cnc - -convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> ParserInfo -convert abs_defs cnc_defs cat_defs = - let env = expandHOAS abs_defs cnc_defs cat_defs (emptyGrammarEnv cnc_defs cat_defs) - in getParserInfo (List.foldl' (convertRule cnc_defs) env xrules) - where - xrules = [ - (XRule id args (0,res) (map findLinType args) (findLinType (0,res)) term) | - (id, (ty,_)) <- abs_defs, let (args,res) = typeSkeleton ty, - term <- maybeToList (Map.lookup id cnc_defs)] - - findLinType (_,id) = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs) - -brk :: (GrammarEnv -> GrammarEnv) -> (GrammarEnv -> GrammarEnv) -brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = - case f (GrammarEnv last_id catSet seqSet funSet crcSet IntMap.empty) of - (GrammarEnv last_id catSet seqSet funSet crcSet topdown1) -> IntMap.foldWithKey optimize (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) topdown1 - where - optimize cat ps env = IntMap.foldWithKey ff env (IntMap.fromListWith (++) [(funid,[args]) | FApply funid args <- Set.toList ps]) - where - ff :: FunId -> [[FCat]] -> GrammarEnv -> GrammarEnv - ff funid xs env - | product (map Set.size ys) == count = - case List.mapAccumL (\env c -> addFCoercion env (Set.toList c)) env ys of - (env,args) -> addProduction env cat (FApply funid args) - | otherwise = List.foldl (\env args -> addProduction env cat (FApply funid args)) env xs - where - count = length xs - ys = foldr (zipWith Set.insert) (repeat Set.empty) xs - -convertRule :: TermMap -> GrammarEnv -> XRule -> GrammarEnv -convertRule cnc_defs grammarEnv (XRule fun args res ctypes ctype term) = - brk (\grammarEnv -> foldBM addRule - grammarEnv - (convertTerm cnc_defs [] ctype term [([],[])]) - (protoFCat cnc_defs res ctype, zipWith (protoFCat cnc_defs) args ctypes)) grammarEnv - where - addRule linRec (newCat', newArgs') env0 = - let [newCat] = getFCats env0 newCat' - (env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs' - - (env2,lins) = List.mapAccumL addFSeq env1 linRec - newLinRec = mkArray lins - - (env3,funid) = addFFun env2 (FFun fun [[n] | n <- [0..length newArgs-1]] newLinRec) - - in addProduction env3 newCat (FApply funid newArgs) - ----------------------------------------------------------------------- --- term conversion - -type CnvMonad a = BacktrackM Env a - -type FPath = [FIndex] -data ProtoFCat = PFCat Int CId [FPath] [(FPath,[FIndex])] -type Env = (ProtoFCat, [ProtoFCat]) -type LinRec = [(FPath, [FSymbol])] -data XRule = XRule CId {- function -} - [(Int,CId)] {- argument types: context size and category -} - (Int,CId) {- result type : context size (always 0) and category -} - [Term] {- argument lin-types representation -} - Term {- result lin-type representation -} - Term {- body -} - -protoFCat :: TermMap -> (Int,CId) -> Term -> ProtoFCat -protoFCat cnc_defs (n,cat) ctype = - let (rcs,tcs) = loop [] [] [] ctype' - in PFCat n cat rcs tcs - where - ctype' -- extend the high-order linearization type - | n > 0 = case ctype of - R xs -> R (xs ++ replicate n (S [])) - _ -> error $ "Not a record: " ++ show ctype - | otherwise = ctype - - loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record) - loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs) - loop path rcs tcs (S _) = (path:rcs, tcs) - loop path rcs tcs (F id) = case Map.lookup id cnc_defs of - Just term -> loop path rcs tcs term - Nothing -> error ("unknown identifier: "++show id) - -type TermMap = Map.Map CId Term - -convertTerm :: TermMap -> FPath -> Term -> Term -> LinRec -> CnvMonad LinRec -convertTerm cnc_defs sel ctype (V nr) ((lbl_path,lin) : lins) = convertArg ctype nr (reverse sel) lbl_path lin lins -convertTerm cnc_defs sel ctype (C nr) ((lbl_path,lin) : lins) = convertCon ctype nr (reverse sel) lbl_path lin lins -convertTerm cnc_defs sel ctype (R record) ((lbl_path,lin) : lins) = convertRec cnc_defs sel ctype record lbl_path lin lins -convertTerm cnc_defs sel ctype (P term p) lins = do nr <- evalTerm cnc_defs [] p - convertTerm cnc_defs (nr:sel) ctype term lins -convertTerm cnc_defs sel ctype (FV vars) lins = do term <- member vars - convertTerm cnc_defs sel ctype term lins -convertTerm cnc_defs sel ctype (S ts) lins = foldM (\lins t -> convertTerm cnc_defs sel ctype t lins) lins (reverse ts) ---convertTerm cnc_defs sel ctype (K t) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok t : lin) : lins) -convertTerm cnc_defs sel ctype (K (KS t)) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok (KS t) : lin) : lins) -convertTerm cnc_defs sel ctype (K (KP strs vars))((lbl_path,lin) : lins) = - do toks <- member (strs:[strs' | Alt strs' _ <- vars]) - return ((lbl_path, map (FSymTok . KS) toks ++ lin) : lins) -convertTerm cnc_defs sel ctype (F id) lins = case Map.lookup id cnc_defs of - Just term -> convertTerm cnc_defs sel ctype term lins - Nothing -> mzero -convertTerm cnc_defs sel ctype (W s t) ((lbl_path,lin) : lins) = do - ss <- case t of - R ss -> return ss - F f -> case Map.lookup f cnc_defs of - Just (R ss) -> return ss - _ -> mzero - convertRec cnc_defs sel ctype [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins -convertTerm cnc_defs sel ctype x lins = error ("convertTerm ("++show x++")") - - -convertArg (R record) nr path lbl_path lin lins = - foldM (\lins (lbl, ctype) -> convertArg ctype nr (lbl:path) (lbl:lbl_path) lin lins) lins (zip [0..] record) -convertArg (C max) nr path lbl_path lin lins = do - index <- member [0..max] - restrictHead lbl_path index - restrictArg nr path index - return lins -convertArg (S _) nr path lbl_path lin lins = do - (_, args) <- get - let PFCat _ cat rcs tcs = args !! nr - l = index path rcs 0 - sym | isLiteralCat cat = FSymLit nr l - | otherwise = FSymCat nr l - return ((lbl_path, sym : lin) : lins) - where - index lbl' (lbl:lbls) idx - | lbl' == lbl = idx - | otherwise = index lbl' lbls $! (idx+1) - - -convertCon (C max) index [] lbl_path lin lins = do - guard (index <= max) - restrictHead lbl_path index - return lins -convertCon x _ _ _ _ _ = error $ "SimpleToFCFG,convertCon: " ++ show x - -convertRec cnc_defs [] (R ctypes) record lbl_path lin lins = - foldM (\lins (index,ctype,val) -> convertTerm cnc_defs [] ctype val ((index:lbl_path,lin) : lins)) - lins - (zip3 [0..] ctypes record) -convertRec cnc_defs (index:sub_sel) ctype record lbl_path lin lins = do - convertTerm cnc_defs sub_sel ctype (record !! index) ((lbl_path,lin) : lins) - - ------------------------------------------------------------- --- eval a term to ground terms - -evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex -evalTerm cnc_defs path (V nr) = do (_, args) <- get - let PFCat _ _ _ tcs = args !! nr - rpath = reverse path - index <- member (fromMaybe (error "evalTerm: wrong path") (lookup rpath tcs)) - restrictArg nr rpath index - return index -evalTerm cnc_defs path (C nr) = return nr -evalTerm cnc_defs path (R record) = case path of - (index:path) -> evalTerm cnc_defs path (record !! index) -evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel - evalTerm cnc_defs (index:path) term -evalTerm cnc_defs path (FV terms) = member terms >>= evalTerm cnc_defs path -evalTerm cnc_defs path (F id) = case Map.lookup id cnc_defs of - Just term -> evalTerm cnc_defs path term - Nothing -> mzero -evalTerm cnc_defs path x = error ("evalTerm ("++show x++")") - - ----------------------------------------------------------------------- --- GrammarEnv - -data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production)) -type CatSet = IntMap.IntMap (Map.Map CId (FCat,FCat,[Int])) -type SeqSet = Map.Map FSeq SeqId -type FunSet = Map.Map FFun FunId -type CoerceSet= Map.Map [FCat] FCat - -emptyGrammarEnv cnc_defs lincats = - let (last_id,catSet) = Map.mapAccumWithKey computeCatRange 0 lincats - in GrammarEnv last_id (IntMap.singleton 0 catSet) Map.empty Map.empty Map.empty IntMap.empty - where - computeCatRange index cat ctype - | cat == cidString = (index, (fcatString,fcatString,[])) - | cat == cidInt = (index, (fcatInt, fcatInt, [])) - | cat == cidFloat = (index, (fcatFloat, fcatFloat, [])) - | otherwise = (index+size,(index,index+size-1,poly)) - where - (size,poly) = getMultipliers 1 [] ctype - - getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record - getMultipliers m ms (S _) = (m,ms) - getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms) - getMultipliers m ms (F id) = case Map.lookup id cnc_defs of - Just term -> getMultipliers m ms term - Nothing -> error ("unknown identifier: "++prCId id) - - -expandHOAS abs_defs cnc_defs lincats env = - foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) hoCats - where - hoTypes :: [(Int,CId)] - hoTypes = sortNub [(n,c) | (_,(ty,_)) <- abs_defs - , (n,c) <- fst (typeSkeleton ty), n > 0] - - hoCats :: [CId] - hoCats = sortNub [c | (_,(ty,_)) <- abs_defs - , Hyp _ ty <- case ty of {DTyp hyps val _ -> hyps} - , c <- fst (catSkeleton ty)] - - -- add a range of PMCFG categories for each GF high-order category - add_hoCat env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,cat) = - case IntMap.lookup 0 catSet >>= Map.lookup cat of - Just (start,end,ms) -> let !catSet' = IntMap.insertWith Map.union n (Map.singleton cat (last_id,last_id+(end-start),ms)) catSet - !last_id' = last_id+(end-start)+1 - in (GrammarEnv last_id' catSet' seqSet funSet crcSet prodSet) - Nothing -> env - - -- add one PMCFG function for each high-order type: _B : Cat -> Var -> ... -> Var -> HoCat - add_hoFun env (n,cat) = - let linRec = reverse $ - [(l ,[FSymCat 0 i]) | (l,i) <- case arg of {PFCat _ _ rcs _ -> zip rcs [0..]}] ++ - [([],[FSymLit i 0]) | i <- [1..n]] - (env1,lins) = List.mapAccumL addFSeq env linRec - newLinRec = mkArray lins - - (env2,funid) = addFFun env1 (FFun _B [[i] | i <- [0..n]] newLinRec) - - env3 = foldl (\env (arg,res) -> addProduction env res (FApply funid (arg : replicate n fcatVar))) - env2 - (zip (getFCats env2 arg) (getFCats env2 res)) - in env3 - where - (arg,res) = case Map.lookup cat lincats of - Nothing -> error $ "No lincat for " ++ prCId cat - Just ctype -> (protoFCat cnc_defs (0,cat) ctype, protoFCat cnc_defs (n,cat) ctype) - - -- add one PMCFG function for each high-order category: _V : Var -> Cat - add_varFun env cat = - let (env1,seqid) = addFSeq env ([],[FSymLit 0 0]) - lins = replicate (case res of {PFCat _ _ rcs _ -> length rcs}) seqid - (env2,funid) = addFFun env1 (FFun _V [[0]] (mkArray lins)) - env3 = foldl (\env res -> addProduction env2 res (FApply funid [fcatVar])) - env2 - (getFCats env2 res) - in env3 - where - res = case Map.lookup cat lincats of - Nothing -> error $ "No lincat for " ++ prCId cat - Just ctype -> protoFCat cnc_defs (0,cat) ctype - - _B = mkCId "_B" - _V = mkCId "_V" - - -addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv -addProduction (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) cat p = - GrammarEnv last_id catSet seqSet funSet crcSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet) - -addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId) -addFSeq env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (_,lst) = - case Map.lookup seq seqSet of - Just id -> (env,id) - Nothing -> let !last_seq = Map.size seqSet - in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet crcSet prodSet,last_seq) - where - seq = mkArray lst - -addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId) -addFFun env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) fun = - case Map.lookup fun funSet of - Just id -> (env,id) - Nothing -> let !last_funid = Map.size funSet - in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) crcSet prodSet,last_funid) - -addFCoercion :: GrammarEnv -> [FCat] -> (GrammarEnv,FCat) -addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fcats = - case sub_fcats of - [fcat] -> (env,fcat) - _ -> case Map.lookup sub_fcats crcSet of - Just fcat -> (env,fcat) - Nothing -> let !fcat = last_id+1 - in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat) - -getParserInfo :: GrammarEnv -> ParserInfo -getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = - ParserInfo { functions = mkArray funSet - , sequences = mkArray seqSet - , productions = IntMap.union prodSet coercions - , startCats = maybe Map.empty (Map.map (\(start,end,_) -> range (start,end))) (IntMap.lookup 0 catSet) - , totalCats = last_id+1 - } - where - mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] - - coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet] - -getFCats :: GrammarEnv -> ProtoFCat -> [FCat] -getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat rcs tcs) = - case IntMap.lookup n catSet >>= Map.lookup cat of - Just (start,end,ms) -> reverse (solutions (variants ms tcs start) ()) - where - variants _ [] fcat = return fcat - variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices - variants ms tcs ((m*index) + fcat) - ------------------------------------------------------------- --- updating the MCF rule - -restrictArg :: FIndex -> FPath -> FIndex -> CnvMonad () -restrictArg nr path index = do - (head, args) <- get - args' <- updateNthM (restrictProtoFCat path index) nr args - put (head, args') - -restrictHead :: FPath -> FIndex -> CnvMonad () -restrictHead path term - = do (head, args) <- get - head' <- restrictProtoFCat path term head - put (head', args) - -restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat -restrictProtoFCat path0 index0 (PFCat n cat rcs tcs) = do - tcs <- addConstraint tcs - return (PFCat n cat rcs tcs) - where - addConstraint [] = error "restrictProtoFCat: unknown path" - addConstraint (c@(path,indices) : tcs) - | path0 == path = guard (index0 `elem` indices) >> - return ((path,[index0]) : tcs) - | otherwise = liftM (c:) (addConstraint tcs) - -mkArray lst = listArray (0,length lst-1) lst diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs index ce7e5be73..7cd00550a 100644 --- a/src/compiler/GF/Compile/GrammarToPGF.hs +++ b/src/compiler/GF/Compile/GrammarToPGF.hs @@ -2,8 +2,7 @@ module GF.Compile.GrammarToPGF (mkCanon2gfcc,addParsers) where import GF.Compile.Export -import qualified GF.Compile.GenerateFCFG as FCFG -import qualified GF.Compile.GeneratePMCFG as PMCFG +import GF.Compile.GeneratePMCFG import PGF.CId import qualified PGF.Macros as CM @@ -48,12 +47,8 @@ addParsers :: Options -> D.PGF -> IO D.PGF addParsers opts pgf = do cncs <- sequence [conv lang cnc | (lang,cnc) <- Map.toList (D.concretes pgf)] return pgf { D.concretes = Map.fromList cncs } where - conv lang cnc = do pinfo <- if flag optErasing (erasingFromCnc `addOptions` opts) - then PMCFG.convertConcrete opts (D.abstract pgf) lang cnc - else return $ FCFG.convertConcrete (D.abstract pgf) cnc + conv lang cnc = do pinfo <- convertConcrete opts (D.abstract pgf) lang cnc return (lang,cnc { D.parser = Just pinfo }) - where - erasingFromCnc = modifyFlags (\o -> o { optErasing = Map.lookup (mkCId "erasing") (D.cflags cnc) == Just "on"}) -- Generate PGF from GFCM. -- this assumes a grammar translated by canon2canon diff --git a/src/compiler/GF/Compile/PGFtoJS.hs b/src/compiler/GF/Compile/PGFtoJS.hs index ff75d8fe6..01be44e8c 100644 --- a/src/compiler/GF/Compile/PGFtoJS.hs +++ b/src/compiler/GF/Compile/PGFtoJS.hs @@ -96,13 +96,14 @@ parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray JS.EObj $ map cats (Map.assocs (startCats p)), JS.EInt (totalCats p)]] where - cats (c,is) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EArray (map JS.EInt is)) + cats (c,(start,end)) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EObj [JS.Prop (JS.IdentPropName (JS.Ident "s")) (JS.EInt start) + ,JS.Prop (JS.IdentPropName (JS.Ident "e")) (JS.EInt end)]) frule2js :: Production -> JS.Expr frule2js (FApply funid args) = new "Rule" [JS.EInt funid, JS.EArray (map JS.EInt args)] frule2js (FCoerce arg) = new "Coerce" [JS.EInt arg] -ffun2js (FFun f _ lins) = new "FFun" [JS.EStr (showCId f), JS.EArray (map JS.EInt (Array.elems lins))] +ffun2js (FFun f lins) = new "FFun" [JS.EStr (showCId f), JS.EArray (map JS.EInt (Array.elems lins))] seq2js :: Array.Array FIndex FSymbol -> JS.Expr seq2js seq = JS.EArray [sym2js s | s <- Array.elems seq] diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs index dc15d1929..dba40cbf3 100644 --- a/src/compiler/GF/Infra/Option.hs +++ b/src/compiler/GF/Infra/Option.hs @@ -172,7 +172,6 @@ data Flags = Flags { optSpeechLanguage :: Maybe String, optLexer :: Maybe String, optUnlexer :: Maybe String, - optErasing :: Bool, optBuildParser :: BuildParser, optWarnings :: [Warning], optDump :: [Dump] @@ -219,7 +218,6 @@ optionsPGF :: Options -> [(String,String)] optionsPGF opts = maybe [] (\x -> [("language",x)]) (flag optSpeechLanguage opts) ++ maybe [] (\x -> [("startcat",x)]) (flag optStartCat opts) - ++ (if flag optErasing opts then [("erasing","on")] else []) ++ (if flag optBuildParser opts == BuildParserOnDemand then [("parser","ondemand")] else []) -- Option manipulation @@ -276,7 +274,6 @@ defaultFlags = Flags { optSpeechLanguage = Nothing, optLexer = Nothing, optUnlexer = Nothing, - optErasing = True, optBuildParser = BuildParser, optWarnings = [], optDump = [] @@ -354,7 +351,6 @@ optDescr = Option [] ["coding"] (ReqArg coding "ENCODING") ("Character encoding of the source grammar, ENCODING = " ++ concat (intersperse " | " (map fst encodings)) ++ "."), - Option [] ["erasing"] (onOff erasing False) "Generate erasing grammar (default off).", Option [] ["parser"] (ReqArg buildParser "VALUE") "Build parser (default on). VALUE = on | off | ondemand", Option [] ["startcat"] (ReqArg startcat "CAT") "Grammar start category.", Option [] ["language"] (ReqArg language "LANG") "Set the speech language flag to LANG in the generated grammar.", @@ -414,7 +410,6 @@ optDescr = coding x = case lookup x encodings of Just c -> set $ \o -> o { optEncoding = c } Nothing -> fail $ "Unknown character encoding: " ++ x - erasing x = set $ \o -> o { optErasing = x } buildParser x = do v <- case x of "on" -> return BuildParser "off" -> return DontBuildParser diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs index d22a4ea8d..a9bb20ef6 100644 --- a/src/compiler/GF/Speech/PGFToCFG.hs +++ b/src/compiler/GF/Speech/PGFToCFG.hs @@ -27,6 +27,7 @@ bnfPrinter = toBNF id toBNF :: (CFG -> CFG) -> PGF -> CId -> String toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc +type Profile = [Int] pgfToCFG :: PGF -> CId -- ^ Concrete syntax name @@ -42,7 +43,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co fcatCats :: Map FCat Cat fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i) | (c,fcs) <- Map.toList (startCats pinfo), - (fc,i) <- zip fcs [1..]] + (fc,i) <- zip (range fcs) [1..]] fcatCat :: FCat -> Cat fcatCat c = Map.findWithDefault ("Unknown_" ++ show c) c fcatCats @@ -53,7 +54,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co -- gets the number of fields in the lincat for the given category catLinArity :: FCat -> Int - catLinArity c = maximum (1:[rangeSize (bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c]) + catLinArity c = maximum (1:[rangeSize (bounds rhs) | (FFun _ rhs, _) <- topdownRules c]) topdownRules cat = f cat [] where @@ -69,17 +70,17 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co startRules :: [CFRule] startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0) | (c,fcs) <- Map.toList (startCats pinfo), - fc <- fcs, not (isLiteralFCat fc), + fc <- range fcs, not (isLiteralFCat fc), r <- [0..catLinArity fc-1]] fruleToCFRule :: (FCat,Production) -> [CFRule] fruleToCFRule (c,FApply funid args) = - [CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps)) + [CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm [fixProfile row n | n <- [0..length args-1]]) | (l,seqid) <- Array.assocs rhs , let row = sequences pinfo ! seqid , not (containsLiterals row)] where - FFun f ps rhs = functions pinfo ! funid + FFun f rhs = functions pinfo ! funid mkRhs :: Array FPointPos FSymbol -> [CFSymbol] mkRhs = concatMap fsymbolToSymbol . Array.elems @@ -94,11 +95,10 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co fsymbolToSymbol (FSymLit n l) = [NonTerminal (fcatToCat (args!!n) l)] fsymbolToSymbol (FSymKS ts) = map Terminal ts - fixProfile :: Array FPointPos FSymbol -> Profile -> Profile - fixProfile row = concatMap positions + fixProfile :: Array FPointPos FSymbol -> Int -> Profile + fixProfile row i = [k | (k,j) <- nts, j == i] where nts = zip [0..] [j | nt <- Array.elems row, j <- getPos nt] - positions i = [k | (k,j) <- nts, j == i] getPos (FSymCat j _) = [j] getPos (FSymLit j _) = [j] diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs index 6c192095d..2b521e8f7 100644 --- a/src/runtime/haskell/PGF.hs +++ b/src/runtime/haskell/PGF.hs @@ -74,8 +74,8 @@ module PGF( -- ** Word Completion (Incremental Parsing) complete, - Incremental.ParseState, - Incremental.initState, Incremental.nextState, Incremental.getCompletions, Incremental.recoveryStates, Incremental.extractTrees, + Parse.ParseState, + Parse.initState, Parse.nextState, Parse.getCompletions, Parse.recoveryStates, Parse.extractTrees, -- ** Generation generateRandom, generateAll, generateAllDepth, @@ -105,8 +105,7 @@ import PGF.Expr (Tree) import PGF.Morphology import PGF.Data hiding (functions) import PGF.Binary -import qualified PGF.Parsing.FCFG.Active as Active -import qualified PGF.Parsing.FCFG.Incremental as Incremental +import qualified PGF.Parse as Parse import qualified GF.Compile.GeneratePMCFG as PMCFG import GF.Infra.Option @@ -249,13 +248,11 @@ linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang parse pgf lang typ s = case Map.lookup lang (concretes pgf) of Just cnc -> case parser cnc of - Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on" - then Incremental.parse pgf lang typ (words s) - else Active.parse "t" pinfo typ (words s) + Just pinfo -> Parse.parse pgf lang typ (words s) Nothing -> error ("No parser built for language: " ++ showCId lang) Nothing -> error ("Unknown language: " ++ showCId lang) -parseWithRecovery pgf lang typ open_typs s = Incremental.parseWithRecovery pgf lang typ open_typs (words s) +parseWithRecovery pgf lang typ open_typs s = Parse.parseWithRecovery pgf lang typ open_typs (words s) canParse pgf cnc = isJust (lookParser pgf cnc) @@ -297,12 +294,12 @@ functionType pgf fun = complete pgf from typ input = let (ws,prefix) = tokensAndPrefix input - state0 = Incremental.initState pgf from typ + state0 = Parse.initState pgf from typ in case loop state0 ws of Nothing -> [] Just state -> - (if null prefix && not (null (Incremental.extractTrees state typ)) then [unwords ws ++ " "] else []) - ++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Incremental.getCompletions state prefix)] + (if null prefix && not (null (Parse.extractTrees state typ)) then [unwords ws ++ " "] else []) + ++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Parse.getCompletions state prefix)] where tokensAndPrefix :: String -> ([String],String) tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "") @@ -311,7 +308,7 @@ complete pgf from typ input = where ws = words s loop ps [] = Just ps - loop ps (t:ts) = case Incremental.nextState ps t of + loop ps (t:ts) = case Parse.nextState ps t of Left es -> Nothing Right ps -> loop ps ts diff --git a/src/runtime/haskell/PGF/Binary.hs b/src/runtime/haskell/PGF/Binary.hs index e4ed98424..7d5db73af 100644 --- a/src/runtime/haskell/PGF/Binary.hs +++ b/src/runtime/haskell/PGF/Binary.hs @@ -159,8 +159,8 @@ instance Binary BindType where _ -> decodingError instance Binary FFun where - put (FFun fun prof lins) = put (fun,prof,lins) - get = liftM3 FFun get get get + put (FFun fun lins) = put (fun,lins) + get = liftM2 FFun get get instance Binary FSymbol where put (FSymCat n l) = putWord8 0 >> put (n,l) diff --git a/src/runtime/haskell/PGF/BuildParser.hs b/src/runtime/haskell/PGF/BuildParser.hs deleted file mode 100644 index 23e0725c6..000000000 --- a/src/runtime/haskell/PGF/BuildParser.hs +++ /dev/null @@ -1,76 +0,0 @@ ---------------------------------------------------------------------- --- | --- Maintainer : Krasimir Angelov --- Stability : (stable) --- Portability : (portable) --- --- FCFG parsing, parser information ------------------------------------------------------------------------------ - -module PGF.BuildParser where - -import GF.Data.SortedList -import GF.Data.Assoc -import PGF.CId -import PGF.Data -import PGF.Parsing.FCFG.Utilities - -import Data.Array.IArray -import Data.Maybe -import qualified Data.IntMap as IntMap -import qualified Data.Map as Map -import qualified Data.Set as Set -import Debug.Trace - - -data ParserInfoEx - = ParserInfoEx { epsilonRules :: [(FunId,[FCat],FCat)] - , leftcornerCats :: Assoc FCat [(FunId,[FCat],FCat)] - , leftcornerTokens :: Assoc String [(FunId,[FCat],FCat)] - , grammarToks :: [String] - } - ------------------------------------------------------------- --- parser information - -getLeftCornerTok pinfo (FFun _ _ lins) - | inRange (bounds syms) 0 = case syms ! 0 of - FSymKS [tok] -> [tok] - _ -> [] - | otherwise = [] - where - syms = (sequences pinfo) ! (lins ! 0) - -getLeftCornerCat pinfo args (FFun _ _ lins) - | inRange (bounds syms) 0 = case syms ! 0 of - FSymCat d _ -> let cat = args !! d - in case IntMap.lookup cat (productions pinfo) of - Just set -> cat : [cat' | FCoerce cat' <- Set.toList set] - Nothing -> [cat] - _ -> [] - | otherwise = [] - where - syms = (sequences pinfo) ! (lins ! 0) - -buildParserInfo :: ParserInfo -> ParserInfoEx -buildParserInfo pinfo = - ParserInfoEx { epsilonRules = epsilonrules - , leftcornerCats = leftcorncats - , leftcornerTokens = leftcorntoks - , grammarToks = grammartoks - } - - where epsilonrules = [ (ruleid,args,cat) - | (cat,set) <- IntMap.toList (productions pinfo) - , (FApply ruleid args) <- Set.toList set - , let (FFun _ _ lins) = (functions pinfo) ! ruleid - , not (inRange (bounds ((sequences pinfo) ! (lins ! 0))) 0) ] - leftcorncats = accumAssoc id [ (cat', (ruleid, args, cat)) - | (cat,set) <- IntMap.toList (productions pinfo) - , (FApply ruleid args) <- Set.toList set - , cat' <- getLeftCornerCat pinfo args ((functions pinfo) ! ruleid) ] - leftcorntoks = accumAssoc id [ (tok, (ruleid, args, cat)) - | (cat,set) <- IntMap.toList (productions pinfo) - , (FApply ruleid args) <- Set.toList set - , tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ] - grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymKS [t] <- elems lin] diff --git a/src/runtime/haskell/PGF/PMCFG.hs b/src/runtime/haskell/PGF/PMCFG.hs index c657e3d17..b9303aeb8 100644 --- a/src/runtime/haskell/PGF/PMCFG.hs +++ b/src/runtime/haskell/PGF/PMCFG.hs @@ -19,13 +19,12 @@ data FSymbol | FSymKS [String] | FSymKP [String] [Alternative] deriving (Eq,Ord,Show) -type Profile = [Int] data Production = FApply {-# UNPACK #-} !FunId [FCat] | FCoerce {-# UNPACK #-} !FCat | FConst Expr [String] deriving (Eq,Ord,Show) -data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show) +data FFun = FFun CId {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show) type FSeq = Array FPointPos FSymbol type FunId = Int type SeqId = Int @@ -39,7 +38,7 @@ data ParserInfo , sequences :: Array SeqId FSeq , productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file , productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions - , startCats :: Map.Map CId [FCat] + , startCats :: Map.Map CId (FCat,FCat) , totalCats :: {-# UNPACK #-} !FCat } @@ -71,14 +70,14 @@ ppProduction (fcat,FCoerce arg) = ppProduction (fcat,FConst _ ss) = ppFCat fcat <+> text "->" <+> ppStrs ss -ppFun (funid,FFun fun _ arr) = +ppFun (funid,FFun fun arr) = ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun) ppSeq (seqid,seq) = ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq)) -ppStartCat (id,fcats) = - ppCId id <+> text ":=" <+> brackets (hcat (punctuate comma (map ppFCat fcats))) +ppStartCat (id,(start,end)) = + ppCId id <+> text ":=" <+> brackets (ppFCat start <+> text ".." <+> ppFCat end) ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>' ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>' diff --git a/src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs b/src/runtime/haskell/PGF/Parse.hs similarity index 95% rename from src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs rename to src/runtime/haskell/PGF/Parse.hs index 296a0d33b..44ff525b4 100644 --- a/src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs +++ b/src/runtime/haskell/PGF/Parse.hs @@ -1,5 +1,5 @@ {-# LANGUAGE BangPatterns #-} -module PGF.Parsing.FCFG.Incremental +module PGF.Parse ( ParseState , ErrorState , initState @@ -57,10 +57,10 @@ parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ) initState :: PGF -> Language -> Type -> ParseState initState pgf lang (DTyp _ start _) = let items = do - cat <- fromMaybe [] (Map.lookup start (startCats pinfo)) + cat <- maybe [] range (Map.lookup start (startCats pinfo)) (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args) [] cat (productions pinfo) - let FFun fn _ lins = functions pinfo ! funid + let FFun fn lins = functions pinfo ! funid (lbl,seqid) <- assocs lins return (Active 0 0 funid seqid args (AK cat lbl)) @@ -131,7 +131,7 @@ recoveryStates open_types (EState pgf pinfo chart) = } in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc) where - type2fcats (DTyp _ cat _) = fromMaybe [] (Map.lookup cat (startCats pinfo)) + type2fcats (DTyp _ cat _) = maybe [] range (Map.lookup cat (startCats pinfo)) complete open_fcats items ac = foldl (Set.fold (\(Active j' ppos funid seqid args keyc) -> @@ -154,10 +154,10 @@ extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) = (_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart exps = do - cat <- fromMaybe [] (Map.lookup start (startCats pinfo)) + cat <- maybe [] range (Map.lookup start (startCats pinfo)) (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args) [] cat (productions pinfo) - let FFun fn _ lins = functions pinfo ! funid + let FFun fn lins = functions pinfo ! funid lbl <- indices lins Just fid <- [lookupPC (PK cat lbl 0) (passive st)] (fvs,tree) <- go Set.empty 0 (0,fid) @@ -168,7 +168,7 @@ extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) = | fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments | Set.member fcat rec = mzero | otherwise = foldForest (\funid args trees -> - do let FFun fn _ lins = functions pinfo ! funid + do let FFun fn lins = functions pinfo ! funid args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args) check_ho_fun fn args `mplus` @@ -250,7 +250,7 @@ process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) ac rhs funid lbl = unsafeAt lins lbl where - FFun _ _ lins = unsafeAt funs funid + FFun _ lins = unsafeAt funs funid updateAt :: Int -> a -> [a] -> [a] diff --git a/src/runtime/haskell/PGF/Parsing/FCFG/Active.hs b/src/runtime/haskell/PGF/Parsing/FCFG/Active.hs deleted file mode 100644 index e88926f6e..000000000 --- a/src/runtime/haskell/PGF/Parsing/FCFG/Active.hs +++ /dev/null @@ -1,205 +0,0 @@ ----------------------------------------------------------------------- --- | --- Maintainer : Krasimir Angelov --- Stability : (stable) --- Portability : (portable) --- --- MCFG parsing, the active algorithm ------------------------------------------------------------------------------ - -module PGF.Parsing.FCFG.Active (parse) where - -import GF.Data.Assoc -import GF.Data.SortedList -import GF.Data.Utilities -import qualified GF.Data.MultiMap as MM - -import PGF.CId -import PGF.Data -import PGF.Tree -import PGF.Parsing.FCFG.Utilities -import PGF.BuildParser - -import Control.Monad (guard) - -import qualified Data.List as List -import qualified Data.Map as Map -import qualified Data.IntMap as IntMap -import qualified Data.Set as Set -import Data.Array.IArray -import Debug.Trace - ----------------------------------------------------------------------- --- * parsing - -type FToken = String - -makeFinalEdge cat 0 0 = (cat, [EmptyRange]) -makeFinalEdge cat i j = (cat, [makeRange i j]) - --- | the list of categories = possible starting categories -parse :: String -> ParserInfo -> Type -> [FToken] -> [Expr] -parse strategy pinfo (DTyp _ start _) toks = map (tree2expr) . nubsort $ filteredForests >>= forest2trees - where - inTokens = input toks - starts = Map.findWithDefault [] start (startCats pinfo) - schart = xchart2syntaxchart chart pinfo - (i,j) = inputBounds inTokens - finalEdges = [makeFinalEdge cat i j | cat <- starts] - forests = chart2forests schart (const False) finalEdges - filteredForests = forests >>= applyProfileToForest - - pinfoex = buildParserInfo pinfo - - chart = process strategy pinfo pinfoex inTokens axioms emptyXChart - axioms | isBU strategy = literals pinfoex inTokens ++ initialBU pinfo pinfoex inTokens - | isTD strategy = literals pinfoex inTokens ++ initialTD pinfo starts inTokens - -isBU s = s=="b" -isTD s = s=="t" - --- used in prediction -emptyChildren :: FunId -> [FCat] -> SyntaxNode FunId RangeRec -emptyChildren ruleid args = SNode ruleid (replicate (length args) []) - - -process :: String -> ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -> XChart FCat -> XChart FCat -process strategy pinfo pinfoex toks [] chart = chart -process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo pinfoex toks items $! univRule item chart - where - univRule item@(Active found rng lbl ppos node@(SNode ruleid recs) args cat) chart - | inRange (bounds lin) ppos = - case lin ! ppos of - FSymCat d r -> let c = args !! d - in case recs !! d of - [] -> case insertXChart chart item c of - Nothing -> chart - Just chart -> let items = do item@(Final found' _ _ _) <- lookupXChartFinal chart c - rng <- concatRange rng (found' !! r) - return (Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)) args cat) - ++ - do guard (isTD strategy) - (ruleid,args) <- topdownRules pinfo c - return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args c) - in process strategy pinfo pinfoex toks items chart - found' -> let items = do rng <- concatRange rng (found' !! r) - return (Active found rng lbl (ppos+1) node args cat) - in process strategy pinfo pinfoex toks items chart - FSymKS [tok] - -> let items = do t_rng <- inputToken toks ? tok - rng' <- concatRange rng t_rng - return (Active found rng' lbl (ppos+1) node args cat) - in process strategy pinfo pinfoex toks items chart - | otherwise = - if inRange (bounds lins) (lbl+1) - then univRule (Active (rng:found) EmptyRange (lbl+1) 0 node args cat) chart - else univRule (Final (reverse (rng:found)) node args cat) chart - where - (FFun _ _ lins) = functions pinfo ! ruleid - lin = sequences pinfo ! (lins ! lbl) - univRule item@(Final found' node args cat) chart = - case insertXChart chart item cat of - Nothing -> chart - Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _) args c) <- lookupXChartAct chart cat - let FFun _ _ lins = functions pinfo ! ruleid - FSymCat d r = (sequences pinfo ! (lins ! l)) ! ppos - rng <- concatRange rng (found' !! r) - return (Active found rng l (ppos+1) (updateChildren node d found') args c) - ++ - do guard (isBU strategy) - (ruleid,args,c) <- leftcornerCats pinfoex ? cat - let FFun _ _ lins = functions pinfo ! ruleid - FSymCat d r = (sequences pinfo ! (lins ! 0)) ! 0 - return (Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid args) d found') args c) - - updateChildren :: SyntaxNode FunId RangeRec -> Int -> RangeRec -> SyntaxNode FunId RangeRec - updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs - in process strategy pinfo pinfoex toks items chart - ----------------------------------------------------------------------- --- * XChart - -data Item - = Active RangeRec - Range - {-# UNPACK #-} !FIndex - {-# UNPACK #-} !FPointPos - (SyntaxNode FunId RangeRec) - [FCat] - FCat - | Final RangeRec (SyntaxNode FunId RangeRec) [FCat] FCat - deriving (Eq, Ord, Show) - -data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item) - -emptyXChart :: Ord c => XChart c -emptyXChart = XChart MM.empty MM.empty - -insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _ _) c = - case MM.insert' c item actives of - Nothing -> Nothing - Just actives -> Just (XChart actives finals) - -insertXChart (XChart actives finals) item@(Final _ _ _ _) c = - case MM.insert' c item finals of - Nothing -> Nothing - Just finals -> Just (XChart actives finals) - -lookupXChartAct (XChart actives finals) c = actives MM.! c -lookupXChartFinal (XChart actives finals) c = finals MM.! c - -xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec) -xchart2syntaxchart (XChart actives finals) pinfo = - accumAssoc groupSyntaxNodes $ - [ case node of - SNode ruleid rrecs -> let FFun fun prof _ = functions pinfo ! ruleid - in ((cat,found), SNode (fun,prof) (zip rhs rrecs)) - SString s -> ((cat,found), SString s) - SInt n -> ((cat,found), SInt n) - SFloat f -> ((cat,found), SFloat f) - | (Final found node rhs cat) <- MM.elems finals - ] - -literals :: ParserInfoEx -> Input FToken -> [Item] -literals pinfoex toks = - [let (c,node) = lexer t in (Final [rng] node [] c) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfoex)] - where - lexer t = - case reads t of - [(n,"")] -> (fcatInt, SInt (n::Integer)) - _ -> case reads t of - [(f,"")] -> (fcatFloat, SFloat (f::Double)) - _ -> (fcatString,SString t) - - ----------------------------------------------------------------------- --- Earley -- - --- called with all starting categories -initialTD :: ParserInfo -> [FCat] -> Input FToken -> [Item] -initialTD pinfo starts toks = - do cat <- starts - (ruleid,args) <- topdownRules pinfo cat - return (Active [] (Range 0 0) 0 0 (emptyChildren ruleid args) args cat) - -topdownRules pinfo cat = f cat [] - where - f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo)) - - g (FApply ruleid args) rules = (ruleid,args) : rules - g (FCoerce cat) rules = f cat rules - - ----------------------------------------------------------------------- --- Kilbury -- - -initialBU :: ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -initialBU pinfo pinfoex toks = - do (tok,rngs) <- aAssocs (inputToken toks) - (ruleid,args,cat) <- leftcornerTokens pinfoex ? tok - rng <- rngs - return (Active [] rng 0 1 (emptyChildren ruleid args) args cat) - ++ - do (ruleid,args,cat) <- epsilonRules pinfoex - let FFun _ _ _ = functions pinfo ! ruleid - return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args cat) diff --git a/src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs b/src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs deleted file mode 100644 index dc0b2dc4a..000000000 --- a/src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs +++ /dev/null @@ -1,188 +0,0 @@ ----------------------------------------------------------------------- --- | --- Maintainer : PL --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/05/13 12:40:19 $ --- > CVS $Author: peb $ --- > CVS $Revision: 1.6 $ --- --- Basic type declarations and functions for grammar formalisms ------------------------------------------------------------------------------ - - -module PGF.Parsing.FCFG.Utilities where - -import Control.Monad -import Data.Array -import Data.List (groupBy) - -import PGF.CId -import PGF.Data -import PGF.Tree -import GF.Data.Assoc -import GF.Data.Utilities (sameLength, foldMerge, splitBy) - - ------------------------------------------------------------- --- ranges as single pairs - -type RangeRec = [Range] - -data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int - | EmptyRange - deriving (Eq, Ord, Show) - -makeRange :: Int -> Int -> Range -makeRange = Range - -concatRange :: Range -> Range -> [Range] -concatRange EmptyRange rng = return rng -concatRange rng EmptyRange = return rng -concatRange (Range i j) (Range j' k) = [Range i k | j==j'] - -minRange :: Range -> Int -minRange (Range i j) = i - -maxRange :: Range -> Int -maxRange (Range i j) = j - - ------------------------------------------------------------- --- * representaions of input tokens - -data Input t = MkInput { inputBounds :: (Int, Int), - inputToken :: Assoc t [Range] - } - -input :: Ord t => [t] -> Input t -input toks = MkInput inBounds inToken - where - inBounds = (0, length toks) - inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,tok) <- zip3 [0..] [1..] toks ] - -inputMany :: Ord t => [[t]] -> Input t -inputMany toks = MkInput inBounds inToken - where - inBounds = (0, length toks) - inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,ts) <- zip3 [0..] [1..] toks, tok <- ts ] - - ------------------------------------------------------------- --- * representations of syntactical analyses - --- ** charts as finite maps over edges - --- | The values of the chart, a list of key-daughters pairs, --- has unique keys. In essence, it is a map from 'n' to daughters. --- The daughters should be a set (not necessarily sorted) of rhs's. -type SyntaxChart n e = Assoc e [SyntaxNode n [e]] - -data SyntaxNode n e = SMeta - | SNode n [e] - | SString String - | SInt Integer - | SFloat Double - deriving (Eq,Ord,Show) - -groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]] -groupSyntaxNodes [] = [] -groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs' - where - (ess,xs') = span xs - - span [] = ([],[]) - span xs@(SNode n es:xs') - | n0 == n = let (ess,xs) = span xs' in (es:ess,xs) - | otherwise = ([],xs) -groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs -groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs -groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs - --- ** syntax forests - -data SyntaxForest n = FMeta - | FNode n [[SyntaxForest n]] - -- ^ The outer list should be a set (not necessarily sorted) - -- of possible alternatives. Ie. the outer list - -- is a disjunctive node, and the inner lists - -- are (conjunctive) concatenative nodes - | FString String - | FInt Integer - | FFloat Double - deriving (Eq, Ord, Show) - -instance Functor SyntaxForest where - fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests - fmap _ (FString s) = FString s - fmap _ (FInt n) = FInt n - fmap _ (FFloat f) = FFloat f - fmap _ (FMeta) = FMeta - -forestName :: SyntaxForest n -> Maybe n -forestName (FNode n _) = Just n -forestName _ = Nothing - -unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n) -unifyManyForests = foldM unifyForests FMeta - --- | two forests can be unified, if either is 'FMeta', or both have the same parent, --- and all children can be unified -unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n) -unifyForests FMeta forest = return forest -unifyForests forest FMeta = return forest -unifyForests (FNode name1 children1) (FNode name2 children2) - | name1 == name2 && not (null children) = return $ FNode name1 children - where children = [ forests | forests1 <- children1, forests2 <- children2, - sameLength forests1 forests2, - forests <- zipWithM unifyForests forests1 forests2 ] -unifyForests (FString s1) (FString s2) - | s1 == s2 = return $ FString s1 -unifyForests (FInt n1) (FInt n2) - | n1 == n2 = return $ FInt n1 -unifyForests (FFloat f1) (FFloat f2) - | f1 == f2 = return $ FFloat f1 -unifyForests _ _ = fail "forest unification failure" - - --- ** conversions between representations - -chart2forests :: (Ord n, Ord e) => - SyntaxChart n e -- ^ The complete chart - -> (e -> Bool) -- ^ When is an edge 'FMeta'? - -> [e] -- ^ The starting edges - -> [SyntaxForest n] -- ^ The result has unique keys, ie. all 'n' are joined together. - -- In essence, the result is a map from 'n' to forest daughters -chart2forests chart isMeta = concatMap (edge2forests []) - where edge2forests edges edge - | isMeta edge = [FMeta] - | edge `elem` edges = [] - | otherwise = map (item2forest (edge:edges)) $ chart ? edge - item2forest edges (SMeta) = FMeta - item2forest edges (SNode name children) = - FNode name $ children >>= mapM (edge2forests edges) - item2forest edges (SString s) = FString s - item2forest edges (SInt n) = FInt n - item2forest edges (SFloat f) = FFloat f - - -applyProfileToForest :: SyntaxForest (CId,[Profile]) -> [SyntaxForest CId] -applyProfileToForest (FNode (fun,profiles) children) - | fun == wildCId = concat chForests - | otherwise = [ FNode fun chForests | not (null chForests) ] - where chForests = concat [ mapM (unifyManyForests . map (forests !!)) profiles | - forests0 <- children, - forests <- mapM applyProfileToForest forests0 ] -applyProfileToForest (FString s) = [FString s] -applyProfileToForest (FInt n) = [FInt n] -applyProfileToForest (FFloat f) = [FFloat f] -applyProfileToForest (FMeta) = [FMeta] - - -forest2trees :: SyntaxForest CId -> [Tree] -forest2trees (FNode n forests) = map (Fun n) $ forests >>= mapM forest2trees -forest2trees (FString s) = [Lit (LStr s)] -forest2trees (FInt n) = [Lit (LInt n)] -forest2trees (FFloat f) = [Lit (LFlt f)] -forest2trees (FMeta) = [Meta 0]