remove the old parsing code and the -erasing=on flag

This commit is contained in:
krasimir
2009-12-14 10:54:22 +00:00
parent 15ddc283d4
commit 76debee2c1
16 changed files with 42 additions and 1474 deletions

View File

@@ -39,10 +39,7 @@ library
PGF.Macros
PGF.Generate
PGF.Linearize
PGF.BuildParser
PGF.Parsing.FCFG.Utilities
PGF.Parsing.FCFG.Active
PGF.Parsing.FCFG.Incremental
PGF.Parse
PGF.Expr
PGF.Type
PGF.Tree
@@ -66,7 +63,6 @@ library
GF.Infra.Option
GF.Data.ErrM
GF.Data.BacktrackM
GF.Compile.GenerateFCFG
GF.Compile.GeneratePMCFG
-- not really part of GF but I have changed the original binary library
-- and we have to keep the copy for now.
@@ -106,7 +102,6 @@ executable gf
GF.Data.Utilities
GF.Data.SortedList
GF.Data.Assoc
GF.Compile.GenerateFCFG
GF.Data.ErrM
GF.Data.Operations
GF.Infra.Ident
@@ -169,9 +164,6 @@ executable gf
PGF.Macros
PGF.Generate
PGF.Linearize
PGF.BuildParser
PGF.Parsing.FCFG.Utilities
PGF.Parsing.FCFG.Active
PGF.Binary
PGF.Paraphrase
PGF.TypeCheck

View File

@@ -7,6 +7,6 @@ concrete LangBul of Lang =
flags coding=cp1251 ;
flags startcat = Phr ; unlexer = text ; lexer = text ; erasing = on ; coding = cp1251 ;
flags startcat = Phr ; unlexer = text ; lexer = text ; coding = cp1251 ;
} ;

View File

@@ -1,568 +0,0 @@
----------------------------------------------------------------------
-- |
-- Maintainer : Krasimir Angelov
-- Stability : (stable)
-- Portability : (portable)
--
-- Converting SimpleGFC grammars to fast nonerasing MCFG grammar.
--
-- the resulting grammars might be /very large/
--
-- the conversion is only equivalent if the GFC grammar has a context-free backbone.
-----------------------------------------------------------------------------
module GF.Compile.GenerateFCFG
(convertConcrete) where
import PGF.CId
import PGF.Data
import PGF.Macros --hiding (prt)
import PGF.Parsing.FCFG.Utilities
import GF.Data.BacktrackM
import GF.Data.SortedList
import GF.Data.Utilities (updateNthM, sortNub)
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import qualified Data.List as List
import qualified Data.ByteString.Char8 as BS
import Data.Array.IArray
import Data.Maybe
import Control.Monad
----------------------------------------------------------------------
-- main conversion function
convertConcrete :: Abstr -> Concr -> ParserInfo
convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats'
where abs_defs = Map.assocs (funs abs)
conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
cats = lincats cnc
(abs_defs',conc',cats') = expandHOAS abs_defs conc cats
expandHOAS :: [(CId,(Type,Int,[Equation]))] -> TermMap -> TermMap -> ([(CId,(Type,Int,[Equation]))],TermMap,TermMap)
expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns,
Map.unions [lins, hoLins, varLins],
Map.unions [lincats, hoLincats, varLincat])
where
-- replace higher-order fun argument types with new categories
funs' = [(f,(fixType ty,a,e)) | (f,(ty,a,e)) <- funs]
where
fixType :: Type -> Type
fixType ty = let (ats,rt) = typeSkeleton ty in cftype (map catName ats) rt
hoTypes :: [(Int,CId)]
hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- funs, (n,c) <- fst (typeSkeleton ty), n > 0]
hoCats = sortNub (map snd hoTypes)
-- for each Cat with N bindings, we add a new category _NCat
-- each new category contains a single function __NCat : Cat -> _Var -> ... -> _Var -> _NCat
hoFuns = [(funName ty,(cftype (c : replicate n varCat) (catName ty),0,[])) | ty@(n,c) <- hoTypes]
-- lincats for the new categories
hoLincats = Map.fromList [(catName ty, modifyRec (++ replicate n (S [])) (lincatOf c)) | ty@(n,c) <- hoTypes]
-- linearizations of the new functions, lin __NCat v_0 ... v_n-1 x = { s1 = x.s1; ...; sk = x.sk; $0 = v_0.s ...
hoLins = Map.fromList [ (funName ty, mkLin c n) | ty@(n,c) <- hoTypes]
where mkLin c n = modifyRec (\fs -> [P (V 0) (C j) | j <- [0..length fs-1]] ++ [P (V i) (C 0) | i <- [1..n]]) (lincatOf c)
-- for each Cat, we a add a fun _Var_Cat : _Var -> Cat
varFuns = [(varFunName cat, (cftype [varCat] cat,0,[])) | cat <- hoCats]
-- linearizations of the _Var_Cat functions
varLins = Map.fromList [(varFunName cat, R [P (V 0) (C 0)]) | cat <- hoCats]
-- lincat for the _Var category
varLincat = Map.singleton varCat (R [S []])
lincatOf c = fromMaybe (error $ "No lincat for " ++ showCId c) $ Map.lookup c lincats
modifyRec :: ([Term] -> [Term]) -> Term -> Term
modifyRec f (R xs) = R (f xs)
modifyRec _ t = error $ "Not a record: " ++ show t
varCat = mkCId "_Var"
catName :: (Int,CId) -> CId
catName (0,c) = c
catName (n,c) = mkCId ("_" ++ show n ++ showCId c)
funName :: (Int,CId) -> CId
funName (n,c) = mkCId ("__" ++ show n ++ showCId c)
varFunName :: CId -> CId
varFunName c = mkCId ("_Var_" ++ showCId c)
-- replaces __NCat with _B and _Var_Cat with _.
-- the temporary names are just there to avoid name collisions.
fixHoasFuns :: ParserInfo -> ParserInfo
fixHoasFuns pinfo = pinfo{functions=mkArray [FFun (fixName n) prof lins | FFun n prof lins <- elems (functions pinfo)]}
where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B")
| BS.pack "_Var_" `BS.isPrefixOf` n = wildCId
fixName n = n
convert :: [(CId,(Type,Int,[Equation]))] -> TermMap -> TermMap -> ParserInfo
convert abs_defs cnc_defs cat_defs = getParserInfo (loop grammarEnv)
where
srules = [
(XRule id args res (map findLinType args) (findLinType res) term) |
(id, (ty,_,_)) <- abs_defs, let (args,res) = catSkeleton ty,
term <- maybeToList (Map.lookup id cnc_defs)]
findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
(xrulesMap,grammarEnv) = List.foldl' helper (Map.empty,emptyFFunsEnv) srules
where
helper (xrulesMap,grammarEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) =
let xrulesMap' = Map.insertWith (++) abs_res [rule] xrulesMap
grammarEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env)
grammarEnv
(mkSingletonSelectors cnc_defs cnc_res)
in xrulesMap' `seq` grammarEnv' `seq` (xrulesMap',grammarEnv')
loop grammarEnv =
let (todo, grammarEnv') = takeToDoRules xrulesMap grammarEnv
in case todo of
[] -> grammarEnv'
_ -> loop $! List.foldl' (\env (srules,selector) ->
List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) grammarEnv' todo
convertRule :: TermMap -> TermSelector -> XRule -> GrammarEnv -> GrammarEnv
convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) grammarEnv =
foldBM addRule
grammarEnv
(convertTerm cnc_defs selector term [([],[])])
(protoFCat cat, map (\scat -> (protoFCat scat,[])) args, ctype, ctypes)
where
addRule linRec (newCat', newArgs', _, _) env0 =
let (env1, newCat) = genFCatHead env0 newCat'
(env2, newArgs,idxArgs) = foldr (\((xcat@(PFCat cat rcs tcs),xpaths),ctype,idx) (env,args,all_args) ->
let xargs = xcat:[PFCat cat [path] tcs | path <- reverse xpaths]
(env1, xargs1) = List.mapAccumL (genFCatArg cnc_defs ctype) env xargs
in case xcat of
PFCat _ [] _ -> (env , args, all_args)
_ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args))
(env1,[],[]) (zip3 newArgs' ctypes [0..])
(env3,newLinRec) = List.mapAccumL (translateLin idxArgs linRec) env2 (case newCat' of {PFCat _ rcs _ -> rcs})
(_,newProfile) = List.mapAccumL accumProf 0 newArgs'
where
accumProf nr (PFCat _ [] _,_ ) = (nr, [] )
accumProf nr (_ ,xpaths) = (nr+cnt+1, [nr..nr+cnt])
where cnt = length xpaths
(env4,funid) = addFFun env3 (FFun fun newProfile (mkArray newLinRec))
in addProduction env4 newCat (FApply funid newArgs)
translateLin idxArgs [] grammarEnv lbl' = error "translateLin"
translateLin idxArgs ((lbl,syms) : lins) grammarEnv lbl'
| lbl' == lbl = addFSeq grammarEnv (lbl,map instSym syms)
| otherwise = translateLin idxArgs lins grammarEnv lbl'
where
instSym = either (\(lbl, nr, xnr) -> instCat lbl nr xnr 0 idxArgs)
(\t -> case t of
KS s -> FSymKS [s]
KP strs vars -> FSymKP strs vars)
instCat lbl nr xnr nr' ((idx,xargs):idxArgs)
| nr == idx = let (fcat, PFCat _ rcs _) = xargs !! xnr
in FSymCat (nr'+xnr) (index lbl rcs 0)
| otherwise = instCat lbl nr xnr (nr'+length xargs) idxArgs
index lbl' (lbl:lbls) idx
| lbl' == lbl = idx
| otherwise = index lbl' lbls $! (idx+1)
----------------------------------------------------------------------
-- term conversion
type CnvMonad a = BacktrackM Env a
type FPath = [FIndex]
type Env = (ProtoFCat, [(ProtoFCat,[FPath])], Term, [Term])
type LinRec = [(FPath, [Either (FPath, FIndex, Int) Tokn])]
type TermMap = Map.Map CId Term
convertTerm :: TermMap -> TermSelector -> Term -> LinRec -> CnvMonad LinRec
convertTerm cnc_defs selector (V nr) ((lbl_path,lin) : lins) = convertArg selector nr [] lbl_path lin lins
convertTerm cnc_defs selector (C nr) ((lbl_path,lin) : lins) = convertCon selector nr lbl_path lin lins
convertTerm cnc_defs selector (R record) ((lbl_path,lin) : lins) = convertRec cnc_defs selector 0 record lbl_path lin lins
convertTerm cnc_defs selector (P term sel) lins = do nr <- evalTerm cnc_defs [] sel
convertTerm cnc_defs (TuplePrj nr selector) term lins
convertTerm cnc_defs selector (FV vars) lins = do term <- member vars
convertTerm cnc_defs selector term lins
convertTerm cnc_defs selector (S ts) ((lbl_path,lin) : lins) = do projectHead lbl_path
foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts)
convertTerm cnc_defs selector (K (KS str)) ((lbl_path,lin) : lins) =
do projectHead lbl_path
return ((lbl_path,Right (KS str) : lin) : lins)
convertTerm cnc_defs selector (K (KP strs vars))((lbl_path,lin) : lins) =
do projectHead lbl_path
toks <- member (strs:[strs' | Alt strs' _ <- vars])
return ((lbl_path, map (Right . KS) toks ++ lin) : lins)
convertTerm cnc_defs selector (F id) lins = case Map.lookup id cnc_defs of
Just term -> convertTerm cnc_defs selector term lins
Nothing -> mzero
convertTerm cnc_defs selector (W s t) ((lbl_path,lin) : lins) = do
ss <- case t of
R ss -> return ss
F f -> case Map.lookup f cnc_defs of
Just (R ss) -> return ss
_ -> mzero
convertRec cnc_defs selector 0 [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins
convertTerm cnc_defs selector x lins = error ("convertTerm ("++show x++")")
convertArg (TupleSel record) nr path lbl_path lin lins =
foldM (\lins (lbl, selector) -> convertArg selector nr (lbl:path) (lbl:lbl_path) lin lins) lins record
convertArg (TuplePrj lbl selector) nr path lbl_path lin lins =
convertArg selector nr (lbl:path) lbl_path lin lins
convertArg (ConSel indices) nr path lbl_path lin lins = do
index <- member indices
restrictHead lbl_path index
restrictArg nr path index
return lins
convertArg StrSel nr path lbl_path lin lins = do
projectHead lbl_path
xnr <- projectArg nr path
return ((lbl_path, Left (path, nr, xnr) : lin) : lins)
convertCon (ConSel indices) index lbl_path lin lins = do
guard (index `elem` indices)
restrictHead lbl_path index
return lins
convertCon x _ _ _ _ = error $ "SimpleToFCFG,convertCon: " ++ show x
convertRec cnc_defs selector index [] lbl_path lin lins = return lins
convertRec cnc_defs selector@(TupleSel fields) index (val:record) lbl_path lin lins = select fields
where
select [] = convertRec cnc_defs selector (index+1) record lbl_path lin lins
select ((index',sub_sel) : fields)
| index == index' = do lins <- convertTerm cnc_defs sub_sel val ((index:lbl_path,lin) : lins)
convertRec cnc_defs selector (index+1) record lbl_path lin lins
| otherwise = select fields
convertRec cnc_defs (TuplePrj index' sub_sel) index record lbl_path lin lins = do
convertTerm cnc_defs sub_sel (record !! (index'-index)) ((lbl_path,lin) : lins)
------------------------------------------------------------
-- eval a term to ground terms
evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex
evalTerm cnc_defs path (V nr) = do term <- readArgCType nr
unifyPType nr (reverse path) (selectTerm path term)
evalTerm cnc_defs path (C nr) = return nr
evalTerm cnc_defs path (R record) = case path of
(index:path) -> evalTerm cnc_defs path (record !! index)
evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel
evalTerm cnc_defs (index:path) term
evalTerm cnc_defs path (FV terms) = member terms >>= evalTerm cnc_defs path
evalTerm cnc_defs path (F id) = case Map.lookup id cnc_defs of
Just term -> evalTerm cnc_defs path term
Nothing -> mzero
evalTerm cnc_defs path x = error ("evalTerm ("++show x++")")
unifyPType :: FIndex -> FPath -> Term -> CnvMonad FIndex
unifyPType nr path (C max_index) =
do (_, args, _, _) <- get
let (PFCat _ _ tcs,_) = args !! nr
case lookup path tcs of
Just index -> return index
Nothing -> do index <- member [0..max_index]
restrictArg nr path index
return index
unifyPType nr path t = error $ "unifyPType " ++ show t ---- AR 2/10/2007
selectTerm :: FPath -> Term -> Term
selectTerm [] term = term
selectTerm (index:path) (R record) = selectTerm path (record !! index)
----------------------------------------------------------------------
-- GrammarEnv
data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int FCatSet FSeqSet FFunSet (IntMap.IntMap (Set.Set Production))
type FCatSet = Map.Map CId (Map.Map [FPath] (Map.Map [(FPath,FIndex)] (Either FCat FCat)))
type FSeqSet = Map.Map FSeq SeqId
type FFunSet = Map.Map FFun FunId
data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)]
protoFCat :: CId -> ProtoFCat
protoFCat cat = PFCat cat [] []
emptyFFunsEnv = GrammarEnv 0 initFCatSet Map.empty Map.empty IntMap.empty
where
initFCatSet = (ins fcatString (mkCId "String") [[0]] [] $
ins fcatInt (mkCId "Int") [[0]] [] $
ins fcatFloat (mkCId "Float") [[0]] [] $
ins fcatVar (mkCId "_Var") [[0]] [] $
Map.empty)
ins fcat cat rcs tcs catSet =
Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet
where
right_fcat = Right fcat
tmap_s = Map.singleton tcs right_fcat
rmap_s = Map.singleton rcs tmap_s
addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv
addProduction (GrammarEnv last_id catSet seqSet funSet prodSet) cat p =
GrammarEnv last_id catSet seqSet funSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet)
addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId)
addFSeq env@(GrammarEnv last_id catSet seqSet funSet prodSet) (_,lst) =
case Map.lookup seq seqSet of
Just id -> (env,id)
Nothing -> let !last_seq = Map.size seqSet
in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet prodSet,last_seq)
where
seq = mkArray lst
addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId)
addFFun env@(GrammarEnv last_id catSet seqSet funSet prodSet) fun =
case Map.lookup fun funSet of
Just id -> (env,id)
Nothing -> let !last_funid = Map.size funSet
in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) prodSet,last_funid)
getParserInfo :: GrammarEnv -> ParserInfo
getParserInfo (GrammarEnv last_id catSet seqSet funSet prodSet) =
ParserInfo { functions = mkArray funSet
, sequences = mkArray seqSet
, productions0= prodSet
, productions = prodSet
, startCats = Map.map getFCatList catSet
, totalCats = last_id+1
}
where
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
getFCatList rcs = Map.fold (\tcs lst -> Map.fold (\x lst -> either id id x : lst) lst tcs) [] rcs
genFCatHead :: GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat)
genFCatHead env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) =
case Map.lookup cat catSet >>= Map.lookup rcs >>= Map.lookup tcs of
Just (Left fcat) -> (GrammarEnv last_id (ins fcat) seqSet funSet prodSet, fcat)
Just (Right fcat) -> (env, fcat)
Nothing -> let fcat = last_id+1
in (GrammarEnv fcat (ins fcat) seqSet funSet prodSet, fcat)
where
ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet
where
right_fcat = Right fcat
tmap_s = Map.singleton tcs right_fcat
rmap_s = Map.singleton rcs tmap_s
genFCatArg :: TermMap -> Term -> GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat)
genFCatArg cnc_defs ctype env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) =
case Map.lookup cat catSet >>= Map.lookup rcs of
Just tmap -> case Map.lookup tcs tmap of
Just (Left fcat) -> (env, fcat)
Just (Right fcat) -> (env, fcat)
Nothing -> ins tmap
Nothing -> ins Map.empty
where
ins tmap =
let fcat = last_id+1
(either_fcat,last_id1,tmap1,prodSet1)
= foldBM (\tcs st (either_fcat,last_id,tmap,prodSet) ->
let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap
p = FCoerce fcat_arg
prodSet1 = IntMap.insertWith Set.union fcat (Set.singleton p) prodSet
in if st
then (Right fcat, last_id1,tmap1,prodSet1)
else (either_fcat,last_id, tmap ,prodSet ))
(Left fcat,fcat,Map.insert tcs either_fcat tmap,prodSet)
(gen_tcs ctype [] [])
False
rmap1 = Map.singleton rcs tmap1
in (GrammarEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 catSet) seqSet funSet prodSet1, fcat)
where
addArg tcs last_id tmap =
case Map.lookup tcs tmap of
Just (Left fcat) -> (last_id, tmap, fcat)
Just (Right fcat) -> (last_id, tmap, fcat)
Nothing -> let fcat = last_id+1
in (fcat, Map.insert tcs (Left fcat) tmap, fcat)
gen_tcs :: Term -> FPath -> [(FPath,FIndex)] -> BacktrackM Bool [(FPath,FIndex)]
gen_tcs (R record) path acc = foldM (\acc (label,ctype) -> gen_tcs ctype (label:path) acc) acc (zip [0..] record)
gen_tcs (S _) path acc = return acc
gen_tcs (C max_index) path acc =
case List.lookup path tcs of
Just index -> return $! addConstraint path index acc
Nothing -> do put True
index <- member [0..max_index]
return $! addConstraint path index acc
where
addConstraint path0 index0 (c@(path,index) : cs)
| path0 > path = c:addConstraint path0 index0 cs
addConstraint path0 index0 cs = (path0,index0) : cs
gen_tcs (F id) path acc = case Map.lookup id cnc_defs of
Just term -> gen_tcs term path acc
Nothing -> error ("unknown identifier: "++showCId id)
------------------------------------------------------------
-- TODO queue organization
type XRulesMap = Map.Map CId [XRule]
data XRule = XRule CId {- function -}
[CId] {- argument types -}
CId {- result type -}
[Term] {- argument lin-types representation -}
Term {- result lin-type representation -}
Term {- body -}
takeToDoRules :: XRulesMap -> GrammarEnv -> ([([XRule], TermSelector)], GrammarEnv)
takeToDoRules xrulesMap (GrammarEnv last_id catSet seqSet funSet prodSet) =
(todo,GrammarEnv last_id catSet' seqSet funSet prodSet)
where
(todo,catSet') =
Map.mapAccumWithKey (\todo cat rmap ->
let (todo1,rmap1) = Map.mapAccumWithKey (\todo rcs tmap ->
let (tcss,tmap') = Map.mapAccumWithKey (\tcss tcs either_xcat ->
case either_xcat of
Left xcat -> (tcs:tcss,Right xcat)
Right xcat -> ( tcss,either_xcat)) [] tmap
in case tcss of
[] -> ( todo,tmap )
_ -> ((srules,mkSelector rcs tcss) : todo,tmap')) todo rmap
mb_srules = Map.lookup cat xrulesMap
Just srules = mb_srules
in case mb_srules of
Just srules -> (todo1,rmap1)
Nothing -> (todo ,rmap1)) [] catSet
------------------------------------------------------------
-- The TermSelector
data TermSelector
= TupleSel [(FIndex, TermSelector)]
| TuplePrj FIndex TermSelector
| ConSel [FIndex]
| StrSel
deriving Show
mkSingletonSelectors :: TermMap
-> Term -- ^ Type representation term
-> [TermSelector] -- ^ list of selectors containing just one string field
mkSingletonSelectors cnc_defs term = sels0
where
(sels0,tcss0) = loop [] ([],[]) term
loop path st (R record) = List.foldl' (\st (index,term) -> loop (index:path) st term) st (zip [0..] record)
loop path (sels,tcss) (C i) = ( sels,map ((,) path) [0..i] : tcss)
loop path (sels,tcss) (S _) = (mkSelector [path] tcss0 : sels, tcss)
loop path (sels,tcss) (F id) = case Map.lookup id cnc_defs of
Just term -> loop path (sels,tcss) term
Nothing -> error ("unknown identifier: "++showCId id)
mkSelector :: [FPath] -> [[(FPath,FIndex)]] -> TermSelector
mkSelector rcs tcss =
List.foldl' addRestriction (case xs of
(path:xs) -> List.foldl' addProjection (path2selector StrSel path) xs) ys
where
xs = [ reverse path | path <- rcs]
ys = [(reverse path,term) | tcs <- tcss, (path,term) <- tcs]
addRestriction :: TermSelector -> (FPath,FIndex) -> TermSelector
addRestriction (ConSel indices) ([] ,n_index) = ConSel (add indices)
where
add [] = [n_index]
add (index':indices)
| n_index == index' = index': indices
| otherwise = index':add indices
addRestriction (TupleSel fields) (index : path,n_index) = TupleSel (add fields)
where
add [] = [(index,path2selector (ConSel [n_index]) path)]
add (field@(index',sub_sel):fields)
| index == index' = (index',addRestriction sub_sel (path,n_index)):fields
| otherwise = field : add fields
addProjection :: TermSelector -> FPath -> TermSelector
addProjection StrSel [] = StrSel
addProjection (TupleSel fields) (index : path) = TupleSel (add fields)
where
add [] = [(index,path2selector StrSel path)]
add (field@(index',sub_sel):fields)
| index == index' = (index',addProjection sub_sel path):fields
| otherwise = field : add fields
path2selector base [] = base
path2selector base (index : path) = TupleSel [(index,path2selector base path)]
------------------------------------------------------------
-- updating the MCF rule
readArgCType :: FIndex -> CnvMonad Term
readArgCType nr = do (_, _, _, ctypes) <- get
return (ctypes !! nr)
restrictArg :: FIndex -> FPath -> FIndex -> CnvMonad ()
restrictArg nr path index = do
(head, args, ctype, ctypes) <- get
args' <- updateNthM (\(xcat,xs) -> do xcat <- restrictProtoFCat path index xcat
return (xcat,xs) ) nr args
put (head, args', ctype, ctypes)
projectArg :: FIndex -> FPath -> CnvMonad Int
projectArg nr path = do
(head, args, ctype, ctypes) <- get
(xnr,args') <- updateArgs nr args
put (head, args', ctype, ctypes)
return xnr
where
updateArgs :: FIndex -> [(ProtoFCat,[FPath])] -> CnvMonad (Int,[(ProtoFCat,[FPath])])
updateArgs 0 ((a@(PFCat _ rcs _),xpaths) : as)
| path `elem` rcs = return (length xpaths+1,(a,path:xpaths):as)
| otherwise = do a <- projectProtoFCat path a
return (0,(a,xpaths):as)
updateArgs n (a : as) = do
(xnr,as) <- updateArgs (n-1) as
return (xnr,a:as)
readHeadCType :: CnvMonad Term
readHeadCType = do (_, _, ctype, _) <- get
return ctype
restrictHead :: FPath -> FIndex -> CnvMonad ()
restrictHead path term
= do (head, args, ctype, ctypes) <- get
head' <- restrictProtoFCat path term head
put (head', args, ctype, ctypes)
projectHead :: FPath -> CnvMonad ()
projectHead path
= do (head, args, ctype, ctypes) <- get
head' <- projectProtoFCat path head
put (head', args, ctype, ctypes)
restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat
restrictProtoFCat path0 index0 (PFCat cat rcs tcs) = do
tcs <- addConstraint tcs
return (PFCat cat rcs tcs)
where
addConstraint (c@(path,index) : cs)
| path0 > path = liftM (c:) (addConstraint cs)
| path0 == path = guard (index0 == index) >>
return (c : cs)
addConstraint cs = return ((path0,index0) : cs)
projectProtoFCat :: FPath -> ProtoFCat -> CnvMonad ProtoFCat
projectProtoFCat path0 (PFCat cat rcs tcs) = do
return (PFCat cat (addConstraint rcs) tcs)
where
addConstraint (path : rcs)
| path0 > path = path : addConstraint rcs
| path0 == path = path : rcs
addConstraint rcs = path0 : rcs
mkArray lst = listArray (0,length lst-1) lst

View File

@@ -123,7 +123,7 @@ convertRule cnc_defs grammarEnv (PFRule fun args res ctypes ctype term) =
let [newCat] = getFCats env0 newCat'
(env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs'
(env2,funid) = addFFun env1 (FFun fun [[n] | n <- [0..length newArgs-1]] (mkArray lins))
(env2,funid) = addFFun env1 (FFun fun (mkArray lins))
in addProduction env2 newCat (FApply funid newArgs)
@@ -394,7 +394,7 @@ expandHOAS abs_defs cnc_defs lincats lindefs env =
(env1,lins) = List.mapAccumL addFSeq env linRec
newLinRec = mkArray lins
(env2,funid) = addFFun env1 (FFun _B [[i] | i <- [0..n]] newLinRec)
(env2,funid) = addFFun env1 (FFun _B newLinRec)
env3 = foldl (\env (arg,res) -> addProduction env res (FApply funid (arg : replicate n fcatVar)))
env2
@@ -462,7 +462,7 @@ getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
, sequences = mkArray seqSet
, productions0= productions0
, productions = filterProductions productions0
, startCats = maybe Map.empty (Map.map (\(start,end,_) -> range (start,end))) (IntMap.lookup 0 catSet)
, startCats = maybe Map.empty (Map.map (\(start,end,_) -> (start,end))) (IntMap.lookup 0 catSet)
, totalCats = last_id+1
}
where

View File

@@ -1,374 +0,0 @@
{-# LANGUAGE BangPatterns, CPP #-}
----------------------------------------------------------------------
-- |
-- Maintainer : Krasimir Angelov
-- Stability : (stable)
-- Portability : (portable)
--
-- Converting SimpleGFC grammars to fast nonerasing MCFG grammar.
--
-- the resulting grammars might be /very large/
--
-- the conversion is only equivalent if the GFC grammar has a context-free backbone.
-----------------------------------------------------------------------------
module GF.Compile.GeneratePMCFG
(convertConcrete) where
import PGF.CId
import PGF.Data
import PGF.Macros --hiding (prt)
import GF.Data.BacktrackM
import GF.Data.SortedList
import GF.Data.Utilities (updateNthM, sortNub)
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.List as List
import qualified Data.IntMap as IntMap
import qualified Data.ByteString.Char8 as BS
import Data.Array.IArray
import Data.Maybe
import Control.Monad
import Debug.Trace
----------------------------------------------------------------------
-- main conversion function
convertConcrete :: Abstr -> Concr -> ParserInfo
convertConcrete abs cnc = convert abs_defs conc cats
where abs_defs = Map.assocs (funs abs)
conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
cats = lincats cnc
convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> ParserInfo
convert abs_defs cnc_defs cat_defs =
let env = expandHOAS abs_defs cnc_defs cat_defs (emptyGrammarEnv cnc_defs cat_defs)
in getParserInfo (List.foldl' (convertRule cnc_defs) env xrules)
where
xrules = [
(XRule id args (0,res) (map findLinType args) (findLinType (0,res)) term) |
(id, (ty,_)) <- abs_defs, let (args,res) = typeSkeleton ty,
term <- maybeToList (Map.lookup id cnc_defs)]
findLinType (_,id) = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
brk :: (GrammarEnv -> GrammarEnv) -> (GrammarEnv -> GrammarEnv)
brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
case f (GrammarEnv last_id catSet seqSet funSet crcSet IntMap.empty) of
(GrammarEnv last_id catSet seqSet funSet crcSet topdown1) -> IntMap.foldWithKey optimize (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) topdown1
where
optimize cat ps env = IntMap.foldWithKey ff env (IntMap.fromListWith (++) [(funid,[args]) | FApply funid args <- Set.toList ps])
where
ff :: FunId -> [[FCat]] -> GrammarEnv -> GrammarEnv
ff funid xs env
| product (map Set.size ys) == count =
case List.mapAccumL (\env c -> addFCoercion env (Set.toList c)) env ys of
(env,args) -> addProduction env cat (FApply funid args)
| otherwise = List.foldl (\env args -> addProduction env cat (FApply funid args)) env xs
where
count = length xs
ys = foldr (zipWith Set.insert) (repeat Set.empty) xs
convertRule :: TermMap -> GrammarEnv -> XRule -> GrammarEnv
convertRule cnc_defs grammarEnv (XRule fun args res ctypes ctype term) =
brk (\grammarEnv -> foldBM addRule
grammarEnv
(convertTerm cnc_defs [] ctype term [([],[])])
(protoFCat cnc_defs res ctype, zipWith (protoFCat cnc_defs) args ctypes)) grammarEnv
where
addRule linRec (newCat', newArgs') env0 =
let [newCat] = getFCats env0 newCat'
(env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs'
(env2,lins) = List.mapAccumL addFSeq env1 linRec
newLinRec = mkArray lins
(env3,funid) = addFFun env2 (FFun fun [[n] | n <- [0..length newArgs-1]] newLinRec)
in addProduction env3 newCat (FApply funid newArgs)
----------------------------------------------------------------------
-- term conversion
type CnvMonad a = BacktrackM Env a
type FPath = [FIndex]
data ProtoFCat = PFCat Int CId [FPath] [(FPath,[FIndex])]
type Env = (ProtoFCat, [ProtoFCat])
type LinRec = [(FPath, [FSymbol])]
data XRule = XRule CId {- function -}
[(Int,CId)] {- argument types: context size and category -}
(Int,CId) {- result type : context size (always 0) and category -}
[Term] {- argument lin-types representation -}
Term {- result lin-type representation -}
Term {- body -}
protoFCat :: TermMap -> (Int,CId) -> Term -> ProtoFCat
protoFCat cnc_defs (n,cat) ctype =
let (rcs,tcs) = loop [] [] [] ctype'
in PFCat n cat rcs tcs
where
ctype' -- extend the high-order linearization type
| n > 0 = case ctype of
R xs -> R (xs ++ replicate n (S []))
_ -> error $ "Not a record: " ++ show ctype
| otherwise = ctype
loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record)
loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs)
loop path rcs tcs (S _) = (path:rcs, tcs)
loop path rcs tcs (F id) = case Map.lookup id cnc_defs of
Just term -> loop path rcs tcs term
Nothing -> error ("unknown identifier: "++show id)
type TermMap = Map.Map CId Term
convertTerm :: TermMap -> FPath -> Term -> Term -> LinRec -> CnvMonad LinRec
convertTerm cnc_defs sel ctype (V nr) ((lbl_path,lin) : lins) = convertArg ctype nr (reverse sel) lbl_path lin lins
convertTerm cnc_defs sel ctype (C nr) ((lbl_path,lin) : lins) = convertCon ctype nr (reverse sel) lbl_path lin lins
convertTerm cnc_defs sel ctype (R record) ((lbl_path,lin) : lins) = convertRec cnc_defs sel ctype record lbl_path lin lins
convertTerm cnc_defs sel ctype (P term p) lins = do nr <- evalTerm cnc_defs [] p
convertTerm cnc_defs (nr:sel) ctype term lins
convertTerm cnc_defs sel ctype (FV vars) lins = do term <- member vars
convertTerm cnc_defs sel ctype term lins
convertTerm cnc_defs sel ctype (S ts) lins = foldM (\lins t -> convertTerm cnc_defs sel ctype t lins) lins (reverse ts)
--convertTerm cnc_defs sel ctype (K t) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok t : lin) : lins)
convertTerm cnc_defs sel ctype (K (KS t)) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok (KS t) : lin) : lins)
convertTerm cnc_defs sel ctype (K (KP strs vars))((lbl_path,lin) : lins) =
do toks <- member (strs:[strs' | Alt strs' _ <- vars])
return ((lbl_path, map (FSymTok . KS) toks ++ lin) : lins)
convertTerm cnc_defs sel ctype (F id) lins = case Map.lookup id cnc_defs of
Just term -> convertTerm cnc_defs sel ctype term lins
Nothing -> mzero
convertTerm cnc_defs sel ctype (W s t) ((lbl_path,lin) : lins) = do
ss <- case t of
R ss -> return ss
F f -> case Map.lookup f cnc_defs of
Just (R ss) -> return ss
_ -> mzero
convertRec cnc_defs sel ctype [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins
convertTerm cnc_defs sel ctype x lins = error ("convertTerm ("++show x++")")
convertArg (R record) nr path lbl_path lin lins =
foldM (\lins (lbl, ctype) -> convertArg ctype nr (lbl:path) (lbl:lbl_path) lin lins) lins (zip [0..] record)
convertArg (C max) nr path lbl_path lin lins = do
index <- member [0..max]
restrictHead lbl_path index
restrictArg nr path index
return lins
convertArg (S _) nr path lbl_path lin lins = do
(_, args) <- get
let PFCat _ cat rcs tcs = args !! nr
l = index path rcs 0
sym | isLiteralCat cat = FSymLit nr l
| otherwise = FSymCat nr l
return ((lbl_path, sym : lin) : lins)
where
index lbl' (lbl:lbls) idx
| lbl' == lbl = idx
| otherwise = index lbl' lbls $! (idx+1)
convertCon (C max) index [] lbl_path lin lins = do
guard (index <= max)
restrictHead lbl_path index
return lins
convertCon x _ _ _ _ _ = error $ "SimpleToFCFG,convertCon: " ++ show x
convertRec cnc_defs [] (R ctypes) record lbl_path lin lins =
foldM (\lins (index,ctype,val) -> convertTerm cnc_defs [] ctype val ((index:lbl_path,lin) : lins))
lins
(zip3 [0..] ctypes record)
convertRec cnc_defs (index:sub_sel) ctype record lbl_path lin lins = do
convertTerm cnc_defs sub_sel ctype (record !! index) ((lbl_path,lin) : lins)
------------------------------------------------------------
-- eval a term to ground terms
evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex
evalTerm cnc_defs path (V nr) = do (_, args) <- get
let PFCat _ _ _ tcs = args !! nr
rpath = reverse path
index <- member (fromMaybe (error "evalTerm: wrong path") (lookup rpath tcs))
restrictArg nr rpath index
return index
evalTerm cnc_defs path (C nr) = return nr
evalTerm cnc_defs path (R record) = case path of
(index:path) -> evalTerm cnc_defs path (record !! index)
evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel
evalTerm cnc_defs (index:path) term
evalTerm cnc_defs path (FV terms) = member terms >>= evalTerm cnc_defs path
evalTerm cnc_defs path (F id) = case Map.lookup id cnc_defs of
Just term -> evalTerm cnc_defs path term
Nothing -> mzero
evalTerm cnc_defs path x = error ("evalTerm ("++show x++")")
----------------------------------------------------------------------
-- GrammarEnv
data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production))
type CatSet = IntMap.IntMap (Map.Map CId (FCat,FCat,[Int]))
type SeqSet = Map.Map FSeq SeqId
type FunSet = Map.Map FFun FunId
type CoerceSet= Map.Map [FCat] FCat
emptyGrammarEnv cnc_defs lincats =
let (last_id,catSet) = Map.mapAccumWithKey computeCatRange 0 lincats
in GrammarEnv last_id (IntMap.singleton 0 catSet) Map.empty Map.empty Map.empty IntMap.empty
where
computeCatRange index cat ctype
| cat == cidString = (index, (fcatString,fcatString,[]))
| cat == cidInt = (index, (fcatInt, fcatInt, []))
| cat == cidFloat = (index, (fcatFloat, fcatFloat, []))
| otherwise = (index+size,(index,index+size-1,poly))
where
(size,poly) = getMultipliers 1 [] ctype
getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record
getMultipliers m ms (S _) = (m,ms)
getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms)
getMultipliers m ms (F id) = case Map.lookup id cnc_defs of
Just term -> getMultipliers m ms term
Nothing -> error ("unknown identifier: "++prCId id)
expandHOAS abs_defs cnc_defs lincats env =
foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) hoCats
where
hoTypes :: [(Int,CId)]
hoTypes = sortNub [(n,c) | (_,(ty,_)) <- abs_defs
, (n,c) <- fst (typeSkeleton ty), n > 0]
hoCats :: [CId]
hoCats = sortNub [c | (_,(ty,_)) <- abs_defs
, Hyp _ ty <- case ty of {DTyp hyps val _ -> hyps}
, c <- fst (catSkeleton ty)]
-- add a range of PMCFG categories for each GF high-order category
add_hoCat env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,cat) =
case IntMap.lookup 0 catSet >>= Map.lookup cat of
Just (start,end,ms) -> let !catSet' = IntMap.insertWith Map.union n (Map.singleton cat (last_id,last_id+(end-start),ms)) catSet
!last_id' = last_id+(end-start)+1
in (GrammarEnv last_id' catSet' seqSet funSet crcSet prodSet)
Nothing -> env
-- add one PMCFG function for each high-order type: _B : Cat -> Var -> ... -> Var -> HoCat
add_hoFun env (n,cat) =
let linRec = reverse $
[(l ,[FSymCat 0 i]) | (l,i) <- case arg of {PFCat _ _ rcs _ -> zip rcs [0..]}] ++
[([],[FSymLit i 0]) | i <- [1..n]]
(env1,lins) = List.mapAccumL addFSeq env linRec
newLinRec = mkArray lins
(env2,funid) = addFFun env1 (FFun _B [[i] | i <- [0..n]] newLinRec)
env3 = foldl (\env (arg,res) -> addProduction env res (FApply funid (arg : replicate n fcatVar)))
env2
(zip (getFCats env2 arg) (getFCats env2 res))
in env3
where
(arg,res) = case Map.lookup cat lincats of
Nothing -> error $ "No lincat for " ++ prCId cat
Just ctype -> (protoFCat cnc_defs (0,cat) ctype, protoFCat cnc_defs (n,cat) ctype)
-- add one PMCFG function for each high-order category: _V : Var -> Cat
add_varFun env cat =
let (env1,seqid) = addFSeq env ([],[FSymLit 0 0])
lins = replicate (case res of {PFCat _ _ rcs _ -> length rcs}) seqid
(env2,funid) = addFFun env1 (FFun _V [[0]] (mkArray lins))
env3 = foldl (\env res -> addProduction env2 res (FApply funid [fcatVar]))
env2
(getFCats env2 res)
in env3
where
res = case Map.lookup cat lincats of
Nothing -> error $ "No lincat for " ++ prCId cat
Just ctype -> protoFCat cnc_defs (0,cat) ctype
_B = mkCId "_B"
_V = mkCId "_V"
addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv
addProduction (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) cat p =
GrammarEnv last_id catSet seqSet funSet crcSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet)
addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId)
addFSeq env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (_,lst) =
case Map.lookup seq seqSet of
Just id -> (env,id)
Nothing -> let !last_seq = Map.size seqSet
in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet crcSet prodSet,last_seq)
where
seq = mkArray lst
addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId)
addFFun env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) fun =
case Map.lookup fun funSet of
Just id -> (env,id)
Nothing -> let !last_funid = Map.size funSet
in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) crcSet prodSet,last_funid)
addFCoercion :: GrammarEnv -> [FCat] -> (GrammarEnv,FCat)
addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fcats =
case sub_fcats of
[fcat] -> (env,fcat)
_ -> case Map.lookup sub_fcats crcSet of
Just fcat -> (env,fcat)
Nothing -> let !fcat = last_id+1
in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat)
getParserInfo :: GrammarEnv -> ParserInfo
getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
ParserInfo { functions = mkArray funSet
, sequences = mkArray seqSet
, productions = IntMap.union prodSet coercions
, startCats = maybe Map.empty (Map.map (\(start,end,_) -> range (start,end))) (IntMap.lookup 0 catSet)
, totalCats = last_id+1
}
where
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet]
getFCats :: GrammarEnv -> ProtoFCat -> [FCat]
getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat rcs tcs) =
case IntMap.lookup n catSet >>= Map.lookup cat of
Just (start,end,ms) -> reverse (solutions (variants ms tcs start) ())
where
variants _ [] fcat = return fcat
variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices
variants ms tcs ((m*index) + fcat)
------------------------------------------------------------
-- updating the MCF rule
restrictArg :: FIndex -> FPath -> FIndex -> CnvMonad ()
restrictArg nr path index = do
(head, args) <- get
args' <- updateNthM (restrictProtoFCat path index) nr args
put (head, args')
restrictHead :: FPath -> FIndex -> CnvMonad ()
restrictHead path term
= do (head, args) <- get
head' <- restrictProtoFCat path term head
put (head', args)
restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat
restrictProtoFCat path0 index0 (PFCat n cat rcs tcs) = do
tcs <- addConstraint tcs
return (PFCat n cat rcs tcs)
where
addConstraint [] = error "restrictProtoFCat: unknown path"
addConstraint (c@(path,indices) : tcs)
| path0 == path = guard (index0 `elem` indices) >>
return ((path,[index0]) : tcs)
| otherwise = liftM (c:) (addConstraint tcs)
mkArray lst = listArray (0,length lst-1) lst

View File

@@ -2,8 +2,7 @@
module GF.Compile.GrammarToPGF (mkCanon2gfcc,addParsers) where
import GF.Compile.Export
import qualified GF.Compile.GenerateFCFG as FCFG
import qualified GF.Compile.GeneratePMCFG as PMCFG
import GF.Compile.GeneratePMCFG
import PGF.CId
import qualified PGF.Macros as CM
@@ -48,12 +47,8 @@ addParsers :: Options -> D.PGF -> IO D.PGF
addParsers opts pgf = do cncs <- sequence [conv lang cnc | (lang,cnc) <- Map.toList (D.concretes pgf)]
return pgf { D.concretes = Map.fromList cncs }
where
conv lang cnc = do pinfo <- if flag optErasing (erasingFromCnc `addOptions` opts)
then PMCFG.convertConcrete opts (D.abstract pgf) lang cnc
else return $ FCFG.convertConcrete (D.abstract pgf) cnc
conv lang cnc = do pinfo <- convertConcrete opts (D.abstract pgf) lang cnc
return (lang,cnc { D.parser = Just pinfo })
where
erasingFromCnc = modifyFlags (\o -> o { optErasing = Map.lookup (mkCId "erasing") (D.cflags cnc) == Just "on"})
-- Generate PGF from GFCM.
-- this assumes a grammar translated by canon2canon

View File

@@ -96,13 +96,14 @@ parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray
JS.EObj $ map cats (Map.assocs (startCats p)),
JS.EInt (totalCats p)]]
where
cats (c,is) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EArray (map JS.EInt is))
cats (c,(start,end)) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EObj [JS.Prop (JS.IdentPropName (JS.Ident "s")) (JS.EInt start)
,JS.Prop (JS.IdentPropName (JS.Ident "e")) (JS.EInt end)])
frule2js :: Production -> JS.Expr
frule2js (FApply funid args) = new "Rule" [JS.EInt funid, JS.EArray (map JS.EInt args)]
frule2js (FCoerce arg) = new "Coerce" [JS.EInt arg]
ffun2js (FFun f _ lins) = new "FFun" [JS.EStr (showCId f), JS.EArray (map JS.EInt (Array.elems lins))]
ffun2js (FFun f lins) = new "FFun" [JS.EStr (showCId f), JS.EArray (map JS.EInt (Array.elems lins))]
seq2js :: Array.Array FIndex FSymbol -> JS.Expr
seq2js seq = JS.EArray [sym2js s | s <- Array.elems seq]

View File

@@ -172,7 +172,6 @@ data Flags = Flags {
optSpeechLanguage :: Maybe String,
optLexer :: Maybe String,
optUnlexer :: Maybe String,
optErasing :: Bool,
optBuildParser :: BuildParser,
optWarnings :: [Warning],
optDump :: [Dump]
@@ -219,7 +218,6 @@ optionsPGF :: Options -> [(String,String)]
optionsPGF opts =
maybe [] (\x -> [("language",x)]) (flag optSpeechLanguage opts)
++ maybe [] (\x -> [("startcat",x)]) (flag optStartCat opts)
++ (if flag optErasing opts then [("erasing","on")] else [])
++ (if flag optBuildParser opts == BuildParserOnDemand then [("parser","ondemand")] else [])
-- Option manipulation
@@ -276,7 +274,6 @@ defaultFlags = Flags {
optSpeechLanguage = Nothing,
optLexer = Nothing,
optUnlexer = Nothing,
optErasing = True,
optBuildParser = BuildParser,
optWarnings = [],
optDump = []
@@ -354,7 +351,6 @@ optDescr =
Option [] ["coding"] (ReqArg coding "ENCODING")
("Character encoding of the source grammar, ENCODING = "
++ concat (intersperse " | " (map fst encodings)) ++ "."),
Option [] ["erasing"] (onOff erasing False) "Generate erasing grammar (default off).",
Option [] ["parser"] (ReqArg buildParser "VALUE") "Build parser (default on). VALUE = on | off | ondemand",
Option [] ["startcat"] (ReqArg startcat "CAT") "Grammar start category.",
Option [] ["language"] (ReqArg language "LANG") "Set the speech language flag to LANG in the generated grammar.",
@@ -414,7 +410,6 @@ optDescr =
coding x = case lookup x encodings of
Just c -> set $ \o -> o { optEncoding = c }
Nothing -> fail $ "Unknown character encoding: " ++ x
erasing x = set $ \o -> o { optErasing = x }
buildParser x = do v <- case x of
"on" -> return BuildParser
"off" -> return DontBuildParser

View File

@@ -27,6 +27,7 @@ bnfPrinter = toBNF id
toBNF :: (CFG -> CFG) -> PGF -> CId -> String
toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc
type Profile = [Int]
pgfToCFG :: PGF
-> CId -- ^ Concrete syntax name
@@ -42,7 +43,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
fcatCats :: Map FCat Cat
fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i)
| (c,fcs) <- Map.toList (startCats pinfo),
(fc,i) <- zip fcs [1..]]
(fc,i) <- zip (range fcs) [1..]]
fcatCat :: FCat -> Cat
fcatCat c = Map.findWithDefault ("Unknown_" ++ show c) c fcatCats
@@ -53,7 +54,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
-- gets the number of fields in the lincat for the given category
catLinArity :: FCat -> Int
catLinArity c = maximum (1:[rangeSize (bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c])
catLinArity c = maximum (1:[rangeSize (bounds rhs) | (FFun _ rhs, _) <- topdownRules c])
topdownRules cat = f cat []
where
@@ -69,17 +70,17 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
startRules :: [CFRule]
startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0)
| (c,fcs) <- Map.toList (startCats pinfo),
fc <- fcs, not (isLiteralFCat fc),
fc <- range fcs, not (isLiteralFCat fc),
r <- [0..catLinArity fc-1]]
fruleToCFRule :: (FCat,Production) -> [CFRule]
fruleToCFRule (c,FApply funid args) =
[CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps))
[CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm [fixProfile row n | n <- [0..length args-1]])
| (l,seqid) <- Array.assocs rhs
, let row = sequences pinfo ! seqid
, not (containsLiterals row)]
where
FFun f ps rhs = functions pinfo ! funid
FFun f rhs = functions pinfo ! funid
mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
mkRhs = concatMap fsymbolToSymbol . Array.elems
@@ -94,11 +95,10 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
fsymbolToSymbol (FSymLit n l) = [NonTerminal (fcatToCat (args!!n) l)]
fsymbolToSymbol (FSymKS ts) = map Terminal ts
fixProfile :: Array FPointPos FSymbol -> Profile -> Profile
fixProfile row = concatMap positions
fixProfile :: Array FPointPos FSymbol -> Int -> Profile
fixProfile row i = [k | (k,j) <- nts, j == i]
where
nts = zip [0..] [j | nt <- Array.elems row, j <- getPos nt]
positions i = [k | (k,j) <- nts, j == i]
getPos (FSymCat j _) = [j]
getPos (FSymLit j _) = [j]

View File

@@ -74,8 +74,8 @@ module PGF(
-- ** Word Completion (Incremental Parsing)
complete,
Incremental.ParseState,
Incremental.initState, Incremental.nextState, Incremental.getCompletions, Incremental.recoveryStates, Incremental.extractTrees,
Parse.ParseState,
Parse.initState, Parse.nextState, Parse.getCompletions, Parse.recoveryStates, Parse.extractTrees,
-- ** Generation
generateRandom, generateAll, generateAllDepth,
@@ -105,8 +105,7 @@ import PGF.Expr (Tree)
import PGF.Morphology
import PGF.Data hiding (functions)
import PGF.Binary
import qualified PGF.Parsing.FCFG.Active as Active
import qualified PGF.Parsing.FCFG.Incremental as Incremental
import qualified PGF.Parse as Parse
import qualified GF.Compile.GeneratePMCFG as PMCFG
import GF.Infra.Option
@@ -249,13 +248,11 @@ linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang
parse pgf lang typ s =
case Map.lookup lang (concretes pgf) of
Just cnc -> case parser cnc of
Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on"
then Incremental.parse pgf lang typ (words s)
else Active.parse "t" pinfo typ (words s)
Just pinfo -> Parse.parse pgf lang typ (words s)
Nothing -> error ("No parser built for language: " ++ showCId lang)
Nothing -> error ("Unknown language: " ++ showCId lang)
parseWithRecovery pgf lang typ open_typs s = Incremental.parseWithRecovery pgf lang typ open_typs (words s)
parseWithRecovery pgf lang typ open_typs s = Parse.parseWithRecovery pgf lang typ open_typs (words s)
canParse pgf cnc = isJust (lookParser pgf cnc)
@@ -297,12 +294,12 @@ functionType pgf fun =
complete pgf from typ input =
let (ws,prefix) = tokensAndPrefix input
state0 = Incremental.initState pgf from typ
state0 = Parse.initState pgf from typ
in case loop state0 ws of
Nothing -> []
Just state ->
(if null prefix && not (null (Incremental.extractTrees state typ)) then [unwords ws ++ " "] else [])
++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Incremental.getCompletions state prefix)]
(if null prefix && not (null (Parse.extractTrees state typ)) then [unwords ws ++ " "] else [])
++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Parse.getCompletions state prefix)]
where
tokensAndPrefix :: String -> ([String],String)
tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "")
@@ -311,7 +308,7 @@ complete pgf from typ input =
where ws = words s
loop ps [] = Just ps
loop ps (t:ts) = case Incremental.nextState ps t of
loop ps (t:ts) = case Parse.nextState ps t of
Left es -> Nothing
Right ps -> loop ps ts

View File

@@ -159,8 +159,8 @@ instance Binary BindType where
_ -> decodingError
instance Binary FFun where
put (FFun fun prof lins) = put (fun,prof,lins)
get = liftM3 FFun get get get
put (FFun fun lins) = put (fun,lins)
get = liftM2 FFun get get
instance Binary FSymbol where
put (FSymCat n l) = putWord8 0 >> put (n,l)

View File

@@ -1,76 +0,0 @@
---------------------------------------------------------------------
-- |
-- Maintainer : Krasimir Angelov
-- Stability : (stable)
-- Portability : (portable)
--
-- FCFG parsing, parser information
-----------------------------------------------------------------------------
module PGF.BuildParser where
import GF.Data.SortedList
import GF.Data.Assoc
import PGF.CId
import PGF.Data
import PGF.Parsing.FCFG.Utilities
import Data.Array.IArray
import Data.Maybe
import qualified Data.IntMap as IntMap
import qualified Data.Map as Map
import qualified Data.Set as Set
import Debug.Trace
data ParserInfoEx
= ParserInfoEx { epsilonRules :: [(FunId,[FCat],FCat)]
, leftcornerCats :: Assoc FCat [(FunId,[FCat],FCat)]
, leftcornerTokens :: Assoc String [(FunId,[FCat],FCat)]
, grammarToks :: [String]
}
------------------------------------------------------------
-- parser information
getLeftCornerTok pinfo (FFun _ _ lins)
| inRange (bounds syms) 0 = case syms ! 0 of
FSymKS [tok] -> [tok]
_ -> []
| otherwise = []
where
syms = (sequences pinfo) ! (lins ! 0)
getLeftCornerCat pinfo args (FFun _ _ lins)
| inRange (bounds syms) 0 = case syms ! 0 of
FSymCat d _ -> let cat = args !! d
in case IntMap.lookup cat (productions pinfo) of
Just set -> cat : [cat' | FCoerce cat' <- Set.toList set]
Nothing -> [cat]
_ -> []
| otherwise = []
where
syms = (sequences pinfo) ! (lins ! 0)
buildParserInfo :: ParserInfo -> ParserInfoEx
buildParserInfo pinfo =
ParserInfoEx { epsilonRules = epsilonrules
, leftcornerCats = leftcorncats
, leftcornerTokens = leftcorntoks
, grammarToks = grammartoks
}
where epsilonrules = [ (ruleid,args,cat)
| (cat,set) <- IntMap.toList (productions pinfo)
, (FApply ruleid args) <- Set.toList set
, let (FFun _ _ lins) = (functions pinfo) ! ruleid
, not (inRange (bounds ((sequences pinfo) ! (lins ! 0))) 0) ]
leftcorncats = accumAssoc id [ (cat', (ruleid, args, cat))
| (cat,set) <- IntMap.toList (productions pinfo)
, (FApply ruleid args) <- Set.toList set
, cat' <- getLeftCornerCat pinfo args ((functions pinfo) ! ruleid) ]
leftcorntoks = accumAssoc id [ (tok, (ruleid, args, cat))
| (cat,set) <- IntMap.toList (productions pinfo)
, (FApply ruleid args) <- Set.toList set
, tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ]
grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymKS [t] <- elems lin]

View File

@@ -19,13 +19,12 @@ data FSymbol
| FSymKS [String]
| FSymKP [String] [Alternative]
deriving (Eq,Ord,Show)
type Profile = [Int]
data Production
= FApply {-# UNPACK #-} !FunId [FCat]
| FCoerce {-# UNPACK #-} !FCat
| FConst Expr [String]
deriving (Eq,Ord,Show)
data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
data FFun = FFun CId {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
type FSeq = Array FPointPos FSymbol
type FunId = Int
type SeqId = Int
@@ -39,7 +38,7 @@ data ParserInfo
, sequences :: Array SeqId FSeq
, productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file
, productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions
, startCats :: Map.Map CId [FCat]
, startCats :: Map.Map CId (FCat,FCat)
, totalCats :: {-# UNPACK #-} !FCat
}
@@ -71,14 +70,14 @@ ppProduction (fcat,FCoerce arg) =
ppProduction (fcat,FConst _ ss) =
ppFCat fcat <+> text "->" <+> ppStrs ss
ppFun (funid,FFun fun _ arr) =
ppFun (funid,FFun fun arr) =
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
ppSeq (seqid,seq) =
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
ppStartCat (id,fcats) =
ppCId id <+> text ":=" <+> brackets (hcat (punctuate comma (map ppFCat fcats)))
ppStartCat (id,(start,end)) =
ppCId id <+> text ":=" <+> brackets (ppFCat start <+> text ".." <+> ppFCat end)
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'

View File

@@ -1,5 +1,5 @@
{-# LANGUAGE BangPatterns #-}
module PGF.Parsing.FCFG.Incremental
module PGF.Parse
( ParseState
, ErrorState
, initState
@@ -57,10 +57,10 @@ parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ)
initState :: PGF -> Language -> Type -> ParseState
initState pgf lang (DTyp _ start _) =
let items = do
cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
cat <- maybe [] range (Map.lookup start (startCats pinfo))
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
[] cat (productions pinfo)
let FFun fn _ lins = functions pinfo ! funid
let FFun fn lins = functions pinfo ! funid
(lbl,seqid) <- assocs lins
return (Active 0 0 funid seqid args (AK cat lbl))
@@ -131,7 +131,7 @@ recoveryStates open_types (EState pgf pinfo chart) =
}
in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc)
where
type2fcats (DTyp _ cat _) = fromMaybe [] (Map.lookup cat (startCats pinfo))
type2fcats (DTyp _ cat _) = maybe [] range (Map.lookup cat (startCats pinfo))
complete open_fcats items ac =
foldl (Set.fold (\(Active j' ppos funid seqid args keyc) ->
@@ -154,10 +154,10 @@ extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
(_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
exps = do
cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
cat <- maybe [] range (Map.lookup start (startCats pinfo))
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
[] cat (productions pinfo)
let FFun fn _ lins = functions pinfo ! funid
let FFun fn lins = functions pinfo ! funid
lbl <- indices lins
Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
(fvs,tree) <- go Set.empty 0 (0,fid)
@@ -168,7 +168,7 @@ extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
| fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
| Set.member fcat rec = mzero
| otherwise = foldForest (\funid args trees ->
do let FFun fn _ lins = functions pinfo ! funid
do let FFun fn lins = functions pinfo ! funid
args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
check_ho_fun fn args
`mplus`
@@ -250,7 +250,7 @@ process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) ac
rhs funid lbl = unsafeAt lins lbl
where
FFun _ _ lins = unsafeAt funs funid
FFun _ lins = unsafeAt funs funid
updateAt :: Int -> a -> [a] -> [a]

View File

@@ -1,205 +0,0 @@
----------------------------------------------------------------------
-- |
-- Maintainer : Krasimir Angelov
-- Stability : (stable)
-- Portability : (portable)
--
-- MCFG parsing, the active algorithm
-----------------------------------------------------------------------------
module PGF.Parsing.FCFG.Active (parse) where
import GF.Data.Assoc
import GF.Data.SortedList
import GF.Data.Utilities
import qualified GF.Data.MultiMap as MM
import PGF.CId
import PGF.Data
import PGF.Tree
import PGF.Parsing.FCFG.Utilities
import PGF.BuildParser
import Control.Monad (guard)
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Data.Array.IArray
import Debug.Trace
----------------------------------------------------------------------
-- * parsing
type FToken = String
makeFinalEdge cat 0 0 = (cat, [EmptyRange])
makeFinalEdge cat i j = (cat, [makeRange i j])
-- | the list of categories = possible starting categories
parse :: String -> ParserInfo -> Type -> [FToken] -> [Expr]
parse strategy pinfo (DTyp _ start _) toks = map (tree2expr) . nubsort $ filteredForests >>= forest2trees
where
inTokens = input toks
starts = Map.findWithDefault [] start (startCats pinfo)
schart = xchart2syntaxchart chart pinfo
(i,j) = inputBounds inTokens
finalEdges = [makeFinalEdge cat i j | cat <- starts]
forests = chart2forests schart (const False) finalEdges
filteredForests = forests >>= applyProfileToForest
pinfoex = buildParserInfo pinfo
chart = process strategy pinfo pinfoex inTokens axioms emptyXChart
axioms | isBU strategy = literals pinfoex inTokens ++ initialBU pinfo pinfoex inTokens
| isTD strategy = literals pinfoex inTokens ++ initialTD pinfo starts inTokens
isBU s = s=="b"
isTD s = s=="t"
-- used in prediction
emptyChildren :: FunId -> [FCat] -> SyntaxNode FunId RangeRec
emptyChildren ruleid args = SNode ruleid (replicate (length args) [])
process :: String -> ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -> XChart FCat -> XChart FCat
process strategy pinfo pinfoex toks [] chart = chart
process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo pinfoex toks items $! univRule item chart
where
univRule item@(Active found rng lbl ppos node@(SNode ruleid recs) args cat) chart
| inRange (bounds lin) ppos =
case lin ! ppos of
FSymCat d r -> let c = args !! d
in case recs !! d of
[] -> case insertXChart chart item c of
Nothing -> chart
Just chart -> let items = do item@(Final found' _ _ _) <- lookupXChartFinal chart c
rng <- concatRange rng (found' !! r)
return (Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)) args cat)
++
do guard (isTD strategy)
(ruleid,args) <- topdownRules pinfo c
return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args c)
in process strategy pinfo pinfoex toks items chart
found' -> let items = do rng <- concatRange rng (found' !! r)
return (Active found rng lbl (ppos+1) node args cat)
in process strategy pinfo pinfoex toks items chart
FSymKS [tok]
-> let items = do t_rng <- inputToken toks ? tok
rng' <- concatRange rng t_rng
return (Active found rng' lbl (ppos+1) node args cat)
in process strategy pinfo pinfoex toks items chart
| otherwise =
if inRange (bounds lins) (lbl+1)
then univRule (Active (rng:found) EmptyRange (lbl+1) 0 node args cat) chart
else univRule (Final (reverse (rng:found)) node args cat) chart
where
(FFun _ _ lins) = functions pinfo ! ruleid
lin = sequences pinfo ! (lins ! lbl)
univRule item@(Final found' node args cat) chart =
case insertXChart chart item cat of
Nothing -> chart
Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _) args c) <- lookupXChartAct chart cat
let FFun _ _ lins = functions pinfo ! ruleid
FSymCat d r = (sequences pinfo ! (lins ! l)) ! ppos
rng <- concatRange rng (found' !! r)
return (Active found rng l (ppos+1) (updateChildren node d found') args c)
++
do guard (isBU strategy)
(ruleid,args,c) <- leftcornerCats pinfoex ? cat
let FFun _ _ lins = functions pinfo ! ruleid
FSymCat d r = (sequences pinfo ! (lins ! 0)) ! 0
return (Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid args) d found') args c)
updateChildren :: SyntaxNode FunId RangeRec -> Int -> RangeRec -> SyntaxNode FunId RangeRec
updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs
in process strategy pinfo pinfoex toks items chart
----------------------------------------------------------------------
-- * XChart
data Item
= Active RangeRec
Range
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !FPointPos
(SyntaxNode FunId RangeRec)
[FCat]
FCat
| Final RangeRec (SyntaxNode FunId RangeRec) [FCat] FCat
deriving (Eq, Ord, Show)
data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item)
emptyXChart :: Ord c => XChart c
emptyXChart = XChart MM.empty MM.empty
insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _ _) c =
case MM.insert' c item actives of
Nothing -> Nothing
Just actives -> Just (XChart actives finals)
insertXChart (XChart actives finals) item@(Final _ _ _ _) c =
case MM.insert' c item finals of
Nothing -> Nothing
Just finals -> Just (XChart actives finals)
lookupXChartAct (XChart actives finals) c = actives MM.! c
lookupXChartFinal (XChart actives finals) c = finals MM.! c
xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec)
xchart2syntaxchart (XChart actives finals) pinfo =
accumAssoc groupSyntaxNodes $
[ case node of
SNode ruleid rrecs -> let FFun fun prof _ = functions pinfo ! ruleid
in ((cat,found), SNode (fun,prof) (zip rhs rrecs))
SString s -> ((cat,found), SString s)
SInt n -> ((cat,found), SInt n)
SFloat f -> ((cat,found), SFloat f)
| (Final found node rhs cat) <- MM.elems finals
]
literals :: ParserInfoEx -> Input FToken -> [Item]
literals pinfoex toks =
[let (c,node) = lexer t in (Final [rng] node [] c) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfoex)]
where
lexer t =
case reads t of
[(n,"")] -> (fcatInt, SInt (n::Integer))
_ -> case reads t of
[(f,"")] -> (fcatFloat, SFloat (f::Double))
_ -> (fcatString,SString t)
----------------------------------------------------------------------
-- Earley --
-- called with all starting categories
initialTD :: ParserInfo -> [FCat] -> Input FToken -> [Item]
initialTD pinfo starts toks =
do cat <- starts
(ruleid,args) <- topdownRules pinfo cat
return (Active [] (Range 0 0) 0 0 (emptyChildren ruleid args) args cat)
topdownRules pinfo cat = f cat []
where
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo))
g (FApply ruleid args) rules = (ruleid,args) : rules
g (FCoerce cat) rules = f cat rules
----------------------------------------------------------------------
-- Kilbury --
initialBU :: ParserInfo -> ParserInfoEx -> Input FToken -> [Item]
initialBU pinfo pinfoex toks =
do (tok,rngs) <- aAssocs (inputToken toks)
(ruleid,args,cat) <- leftcornerTokens pinfoex ? tok
rng <- rngs
return (Active [] rng 0 1 (emptyChildren ruleid args) args cat)
++
do (ruleid,args,cat) <- epsilonRules pinfoex
let FFun _ _ _ = functions pinfo ! ruleid
return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args cat)

View File

@@ -1,188 +0,0 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/05/13 12:40:19 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.6 $
--
-- Basic type declarations and functions for grammar formalisms
-----------------------------------------------------------------------------
module PGF.Parsing.FCFG.Utilities where
import Control.Monad
import Data.Array
import Data.List (groupBy)
import PGF.CId
import PGF.Data
import PGF.Tree
import GF.Data.Assoc
import GF.Data.Utilities (sameLength, foldMerge, splitBy)
------------------------------------------------------------
-- ranges as single pairs
type RangeRec = [Range]
data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
| EmptyRange
deriving (Eq, Ord, Show)
makeRange :: Int -> Int -> Range
makeRange = Range
concatRange :: Range -> Range -> [Range]
concatRange EmptyRange rng = return rng
concatRange rng EmptyRange = return rng
concatRange (Range i j) (Range j' k) = [Range i k | j==j']
minRange :: Range -> Int
minRange (Range i j) = i
maxRange :: Range -> Int
maxRange (Range i j) = j
------------------------------------------------------------
-- * representaions of input tokens
data Input t = MkInput { inputBounds :: (Int, Int),
inputToken :: Assoc t [Range]
}
input :: Ord t => [t] -> Input t
input toks = MkInput inBounds inToken
where
inBounds = (0, length toks)
inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,tok) <- zip3 [0..] [1..] toks ]
inputMany :: Ord t => [[t]] -> Input t
inputMany toks = MkInput inBounds inToken
where
inBounds = (0, length toks)
inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,ts) <- zip3 [0..] [1..] toks, tok <- ts ]
------------------------------------------------------------
-- * representations of syntactical analyses
-- ** charts as finite maps over edges
-- | The values of the chart, a list of key-daughters pairs,
-- has unique keys. In essence, it is a map from 'n' to daughters.
-- The daughters should be a set (not necessarily sorted) of rhs's.
type SyntaxChart n e = Assoc e [SyntaxNode n [e]]
data SyntaxNode n e = SMeta
| SNode n [e]
| SString String
| SInt Integer
| SFloat Double
deriving (Eq,Ord,Show)
groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]]
groupSyntaxNodes [] = []
groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs'
where
(ess,xs') = span xs
span [] = ([],[])
span xs@(SNode n es:xs')
| n0 == n = let (ess,xs) = span xs' in (es:ess,xs)
| otherwise = ([],xs)
groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs
groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs
groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs
-- ** syntax forests
data SyntaxForest n = FMeta
| FNode n [[SyntaxForest n]]
-- ^ The outer list should be a set (not necessarily sorted)
-- of possible alternatives. Ie. the outer list
-- is a disjunctive node, and the inner lists
-- are (conjunctive) concatenative nodes
| FString String
| FInt Integer
| FFloat Double
deriving (Eq, Ord, Show)
instance Functor SyntaxForest where
fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests
fmap _ (FString s) = FString s
fmap _ (FInt n) = FInt n
fmap _ (FFloat f) = FFloat f
fmap _ (FMeta) = FMeta
forestName :: SyntaxForest n -> Maybe n
forestName (FNode n _) = Just n
forestName _ = Nothing
unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n)
unifyManyForests = foldM unifyForests FMeta
-- | two forests can be unified, if either is 'FMeta', or both have the same parent,
-- and all children can be unified
unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n)
unifyForests FMeta forest = return forest
unifyForests forest FMeta = return forest
unifyForests (FNode name1 children1) (FNode name2 children2)
| name1 == name2 && not (null children) = return $ FNode name1 children
where children = [ forests | forests1 <- children1, forests2 <- children2,
sameLength forests1 forests2,
forests <- zipWithM unifyForests forests1 forests2 ]
unifyForests (FString s1) (FString s2)
| s1 == s2 = return $ FString s1
unifyForests (FInt n1) (FInt n2)
| n1 == n2 = return $ FInt n1
unifyForests (FFloat f1) (FFloat f2)
| f1 == f2 = return $ FFloat f1
unifyForests _ _ = fail "forest unification failure"
-- ** conversions between representations
chart2forests :: (Ord n, Ord e) =>
SyntaxChart n e -- ^ The complete chart
-> (e -> Bool) -- ^ When is an edge 'FMeta'?
-> [e] -- ^ The starting edges
-> [SyntaxForest n] -- ^ The result has unique keys, ie. all 'n' are joined together.
-- In essence, the result is a map from 'n' to forest daughters
chart2forests chart isMeta = concatMap (edge2forests [])
where edge2forests edges edge
| isMeta edge = [FMeta]
| edge `elem` edges = []
| otherwise = map (item2forest (edge:edges)) $ chart ? edge
item2forest edges (SMeta) = FMeta
item2forest edges (SNode name children) =
FNode name $ children >>= mapM (edge2forests edges)
item2forest edges (SString s) = FString s
item2forest edges (SInt n) = FInt n
item2forest edges (SFloat f) = FFloat f
applyProfileToForest :: SyntaxForest (CId,[Profile]) -> [SyntaxForest CId]
applyProfileToForest (FNode (fun,profiles) children)
| fun == wildCId = concat chForests
| otherwise = [ FNode fun chForests | not (null chForests) ]
where chForests = concat [ mapM (unifyManyForests . map (forests !!)) profiles |
forests0 <- children,
forests <- mapM applyProfileToForest forests0 ]
applyProfileToForest (FString s) = [FString s]
applyProfileToForest (FInt n) = [FInt n]
applyProfileToForest (FFloat f) = [FFloat f]
applyProfileToForest (FMeta) = [FMeta]
forest2trees :: SyntaxForest CId -> [Tree]
forest2trees (FNode n forests) = map (Fun n) $ forests >>= mapM forest2trees
forest2trees (FString s) = [Lit (LStr s)]
forest2trees (FInt n) = [Lit (LInt n)]
forest2trees (FFloat f) = [Lit (LFlt f)]
forest2trees (FMeta) = [Meta 0]