forked from GitHub/gf-core
remove the old parsing code and the -erasing=on flag
This commit is contained in:
10
GF.cabal
10
GF.cabal
@@ -39,10 +39,7 @@ library
|
|||||||
PGF.Macros
|
PGF.Macros
|
||||||
PGF.Generate
|
PGF.Generate
|
||||||
PGF.Linearize
|
PGF.Linearize
|
||||||
PGF.BuildParser
|
PGF.Parse
|
||||||
PGF.Parsing.FCFG.Utilities
|
|
||||||
PGF.Parsing.FCFG.Active
|
|
||||||
PGF.Parsing.FCFG.Incremental
|
|
||||||
PGF.Expr
|
PGF.Expr
|
||||||
PGF.Type
|
PGF.Type
|
||||||
PGF.Tree
|
PGF.Tree
|
||||||
@@ -66,7 +63,6 @@ library
|
|||||||
GF.Infra.Option
|
GF.Infra.Option
|
||||||
GF.Data.ErrM
|
GF.Data.ErrM
|
||||||
GF.Data.BacktrackM
|
GF.Data.BacktrackM
|
||||||
GF.Compile.GenerateFCFG
|
|
||||||
GF.Compile.GeneratePMCFG
|
GF.Compile.GeneratePMCFG
|
||||||
-- not really part of GF but I have changed the original binary library
|
-- not really part of GF but I have changed the original binary library
|
||||||
-- and we have to keep the copy for now.
|
-- and we have to keep the copy for now.
|
||||||
@@ -106,7 +102,6 @@ executable gf
|
|||||||
GF.Data.Utilities
|
GF.Data.Utilities
|
||||||
GF.Data.SortedList
|
GF.Data.SortedList
|
||||||
GF.Data.Assoc
|
GF.Data.Assoc
|
||||||
GF.Compile.GenerateFCFG
|
|
||||||
GF.Data.ErrM
|
GF.Data.ErrM
|
||||||
GF.Data.Operations
|
GF.Data.Operations
|
||||||
GF.Infra.Ident
|
GF.Infra.Ident
|
||||||
@@ -169,9 +164,6 @@ executable gf
|
|||||||
PGF.Macros
|
PGF.Macros
|
||||||
PGF.Generate
|
PGF.Generate
|
||||||
PGF.Linearize
|
PGF.Linearize
|
||||||
PGF.BuildParser
|
|
||||||
PGF.Parsing.FCFG.Utilities
|
|
||||||
PGF.Parsing.FCFG.Active
|
|
||||||
PGF.Binary
|
PGF.Binary
|
||||||
PGF.Paraphrase
|
PGF.Paraphrase
|
||||||
PGF.TypeCheck
|
PGF.TypeCheck
|
||||||
|
|||||||
@@ -7,6 +7,6 @@ concrete LangBul of Lang =
|
|||||||
flags coding=cp1251 ;
|
flags coding=cp1251 ;
|
||||||
|
|
||||||
|
|
||||||
flags startcat = Phr ; unlexer = text ; lexer = text ; erasing = on ; coding = cp1251 ;
|
flags startcat = Phr ; unlexer = text ; lexer = text ; coding = cp1251 ;
|
||||||
|
|
||||||
} ;
|
} ;
|
||||||
|
|||||||
@@ -1,568 +0,0 @@
|
|||||||
----------------------------------------------------------------------
|
|
||||||
-- |
|
|
||||||
-- Maintainer : Krasimir Angelov
|
|
||||||
-- Stability : (stable)
|
|
||||||
-- Portability : (portable)
|
|
||||||
--
|
|
||||||
-- Converting SimpleGFC grammars to fast nonerasing MCFG grammar.
|
|
||||||
--
|
|
||||||
-- the resulting grammars might be /very large/
|
|
||||||
--
|
|
||||||
-- the conversion is only equivalent if the GFC grammar has a context-free backbone.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
module GF.Compile.GenerateFCFG
|
|
||||||
(convertConcrete) where
|
|
||||||
|
|
||||||
import PGF.CId
|
|
||||||
import PGF.Data
|
|
||||||
import PGF.Macros --hiding (prt)
|
|
||||||
import PGF.Parsing.FCFG.Utilities
|
|
||||||
|
|
||||||
import GF.Data.BacktrackM
|
|
||||||
import GF.Data.SortedList
|
|
||||||
import GF.Data.Utilities (updateNthM, sortNub)
|
|
||||||
|
|
||||||
import qualified Data.Map as Map
|
|
||||||
import qualified Data.IntMap as IntMap
|
|
||||||
import qualified Data.Set as Set
|
|
||||||
import qualified Data.List as List
|
|
||||||
import qualified Data.ByteString.Char8 as BS
|
|
||||||
import Data.Array.IArray
|
|
||||||
import Data.Maybe
|
|
||||||
import Control.Monad
|
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
|
||||||
-- main conversion function
|
|
||||||
|
|
||||||
convertConcrete :: Abstr -> Concr -> ParserInfo
|
|
||||||
convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats'
|
|
||||||
where abs_defs = Map.assocs (funs abs)
|
|
||||||
conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
|
|
||||||
cats = lincats cnc
|
|
||||||
(abs_defs',conc',cats') = expandHOAS abs_defs conc cats
|
|
||||||
|
|
||||||
expandHOAS :: [(CId,(Type,Int,[Equation]))] -> TermMap -> TermMap -> ([(CId,(Type,Int,[Equation]))],TermMap,TermMap)
|
|
||||||
expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns,
|
|
||||||
Map.unions [lins, hoLins, varLins],
|
|
||||||
Map.unions [lincats, hoLincats, varLincat])
|
|
||||||
where
|
|
||||||
-- replace higher-order fun argument types with new categories
|
|
||||||
funs' = [(f,(fixType ty,a,e)) | (f,(ty,a,e)) <- funs]
|
|
||||||
where
|
|
||||||
fixType :: Type -> Type
|
|
||||||
fixType ty = let (ats,rt) = typeSkeleton ty in cftype (map catName ats) rt
|
|
||||||
|
|
||||||
hoTypes :: [(Int,CId)]
|
|
||||||
hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- funs, (n,c) <- fst (typeSkeleton ty), n > 0]
|
|
||||||
hoCats = sortNub (map snd hoTypes)
|
|
||||||
-- for each Cat with N bindings, we add a new category _NCat
|
|
||||||
-- each new category contains a single function __NCat : Cat -> _Var -> ... -> _Var -> _NCat
|
|
||||||
hoFuns = [(funName ty,(cftype (c : replicate n varCat) (catName ty),0,[])) | ty@(n,c) <- hoTypes]
|
|
||||||
-- lincats for the new categories
|
|
||||||
hoLincats = Map.fromList [(catName ty, modifyRec (++ replicate n (S [])) (lincatOf c)) | ty@(n,c) <- hoTypes]
|
|
||||||
-- linearizations of the new functions, lin __NCat v_0 ... v_n-1 x = { s1 = x.s1; ...; sk = x.sk; $0 = v_0.s ...
|
|
||||||
hoLins = Map.fromList [ (funName ty, mkLin c n) | ty@(n,c) <- hoTypes]
|
|
||||||
where mkLin c n = modifyRec (\fs -> [P (V 0) (C j) | j <- [0..length fs-1]] ++ [P (V i) (C 0) | i <- [1..n]]) (lincatOf c)
|
|
||||||
-- for each Cat, we a add a fun _Var_Cat : _Var -> Cat
|
|
||||||
varFuns = [(varFunName cat, (cftype [varCat] cat,0,[])) | cat <- hoCats]
|
|
||||||
-- linearizations of the _Var_Cat functions
|
|
||||||
varLins = Map.fromList [(varFunName cat, R [P (V 0) (C 0)]) | cat <- hoCats]
|
|
||||||
-- lincat for the _Var category
|
|
||||||
varLincat = Map.singleton varCat (R [S []])
|
|
||||||
|
|
||||||
lincatOf c = fromMaybe (error $ "No lincat for " ++ showCId c) $ Map.lookup c lincats
|
|
||||||
|
|
||||||
modifyRec :: ([Term] -> [Term]) -> Term -> Term
|
|
||||||
modifyRec f (R xs) = R (f xs)
|
|
||||||
modifyRec _ t = error $ "Not a record: " ++ show t
|
|
||||||
|
|
||||||
varCat = mkCId "_Var"
|
|
||||||
|
|
||||||
catName :: (Int,CId) -> CId
|
|
||||||
catName (0,c) = c
|
|
||||||
catName (n,c) = mkCId ("_" ++ show n ++ showCId c)
|
|
||||||
|
|
||||||
funName :: (Int,CId) -> CId
|
|
||||||
funName (n,c) = mkCId ("__" ++ show n ++ showCId c)
|
|
||||||
|
|
||||||
varFunName :: CId -> CId
|
|
||||||
varFunName c = mkCId ("_Var_" ++ showCId c)
|
|
||||||
|
|
||||||
-- replaces __NCat with _B and _Var_Cat with _.
|
|
||||||
-- the temporary names are just there to avoid name collisions.
|
|
||||||
fixHoasFuns :: ParserInfo -> ParserInfo
|
|
||||||
fixHoasFuns pinfo = pinfo{functions=mkArray [FFun (fixName n) prof lins | FFun n prof lins <- elems (functions pinfo)]}
|
|
||||||
where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B")
|
|
||||||
| BS.pack "_Var_" `BS.isPrefixOf` n = wildCId
|
|
||||||
fixName n = n
|
|
||||||
|
|
||||||
convert :: [(CId,(Type,Int,[Equation]))] -> TermMap -> TermMap -> ParserInfo
|
|
||||||
convert abs_defs cnc_defs cat_defs = getParserInfo (loop grammarEnv)
|
|
||||||
where
|
|
||||||
srules = [
|
|
||||||
(XRule id args res (map findLinType args) (findLinType res) term) |
|
|
||||||
(id, (ty,_,_)) <- abs_defs, let (args,res) = catSkeleton ty,
|
|
||||||
term <- maybeToList (Map.lookup id cnc_defs)]
|
|
||||||
|
|
||||||
findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
|
|
||||||
|
|
||||||
(xrulesMap,grammarEnv) = List.foldl' helper (Map.empty,emptyFFunsEnv) srules
|
|
||||||
where
|
|
||||||
helper (xrulesMap,grammarEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) =
|
|
||||||
let xrulesMap' = Map.insertWith (++) abs_res [rule] xrulesMap
|
|
||||||
grammarEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env)
|
|
||||||
grammarEnv
|
|
||||||
(mkSingletonSelectors cnc_defs cnc_res)
|
|
||||||
in xrulesMap' `seq` grammarEnv' `seq` (xrulesMap',grammarEnv')
|
|
||||||
|
|
||||||
loop grammarEnv =
|
|
||||||
let (todo, grammarEnv') = takeToDoRules xrulesMap grammarEnv
|
|
||||||
in case todo of
|
|
||||||
[] -> grammarEnv'
|
|
||||||
_ -> loop $! List.foldl' (\env (srules,selector) ->
|
|
||||||
List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) grammarEnv' todo
|
|
||||||
|
|
||||||
convertRule :: TermMap -> TermSelector -> XRule -> GrammarEnv -> GrammarEnv
|
|
||||||
convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) grammarEnv =
|
|
||||||
foldBM addRule
|
|
||||||
grammarEnv
|
|
||||||
(convertTerm cnc_defs selector term [([],[])])
|
|
||||||
(protoFCat cat, map (\scat -> (protoFCat scat,[])) args, ctype, ctypes)
|
|
||||||
where
|
|
||||||
addRule linRec (newCat', newArgs', _, _) env0 =
|
|
||||||
let (env1, newCat) = genFCatHead env0 newCat'
|
|
||||||
(env2, newArgs,idxArgs) = foldr (\((xcat@(PFCat cat rcs tcs),xpaths),ctype,idx) (env,args,all_args) ->
|
|
||||||
let xargs = xcat:[PFCat cat [path] tcs | path <- reverse xpaths]
|
|
||||||
(env1, xargs1) = List.mapAccumL (genFCatArg cnc_defs ctype) env xargs
|
|
||||||
in case xcat of
|
|
||||||
PFCat _ [] _ -> (env , args, all_args)
|
|
||||||
_ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args))
|
|
||||||
(env1,[],[]) (zip3 newArgs' ctypes [0..])
|
|
||||||
|
|
||||||
(env3,newLinRec) = List.mapAccumL (translateLin idxArgs linRec) env2 (case newCat' of {PFCat _ rcs _ -> rcs})
|
|
||||||
|
|
||||||
(_,newProfile) = List.mapAccumL accumProf 0 newArgs'
|
|
||||||
where
|
|
||||||
accumProf nr (PFCat _ [] _,_ ) = (nr, [] )
|
|
||||||
accumProf nr (_ ,xpaths) = (nr+cnt+1, [nr..nr+cnt])
|
|
||||||
where cnt = length xpaths
|
|
||||||
|
|
||||||
(env4,funid) = addFFun env3 (FFun fun newProfile (mkArray newLinRec))
|
|
||||||
|
|
||||||
in addProduction env4 newCat (FApply funid newArgs)
|
|
||||||
|
|
||||||
translateLin idxArgs [] grammarEnv lbl' = error "translateLin"
|
|
||||||
translateLin idxArgs ((lbl,syms) : lins) grammarEnv lbl'
|
|
||||||
| lbl' == lbl = addFSeq grammarEnv (lbl,map instSym syms)
|
|
||||||
| otherwise = translateLin idxArgs lins grammarEnv lbl'
|
|
||||||
where
|
|
||||||
instSym = either (\(lbl, nr, xnr) -> instCat lbl nr xnr 0 idxArgs)
|
|
||||||
(\t -> case t of
|
|
||||||
KS s -> FSymKS [s]
|
|
||||||
KP strs vars -> FSymKP strs vars)
|
|
||||||
instCat lbl nr xnr nr' ((idx,xargs):idxArgs)
|
|
||||||
| nr == idx = let (fcat, PFCat _ rcs _) = xargs !! xnr
|
|
||||||
in FSymCat (nr'+xnr) (index lbl rcs 0)
|
|
||||||
| otherwise = instCat lbl nr xnr (nr'+length xargs) idxArgs
|
|
||||||
|
|
||||||
index lbl' (lbl:lbls) idx
|
|
||||||
| lbl' == lbl = idx
|
|
||||||
| otherwise = index lbl' lbls $! (idx+1)
|
|
||||||
|
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
|
||||||
-- term conversion
|
|
||||||
|
|
||||||
type CnvMonad a = BacktrackM Env a
|
|
||||||
|
|
||||||
type FPath = [FIndex]
|
|
||||||
type Env = (ProtoFCat, [(ProtoFCat,[FPath])], Term, [Term])
|
|
||||||
type LinRec = [(FPath, [Either (FPath, FIndex, Int) Tokn])]
|
|
||||||
|
|
||||||
type TermMap = Map.Map CId Term
|
|
||||||
|
|
||||||
convertTerm :: TermMap -> TermSelector -> Term -> LinRec -> CnvMonad LinRec
|
|
||||||
convertTerm cnc_defs selector (V nr) ((lbl_path,lin) : lins) = convertArg selector nr [] lbl_path lin lins
|
|
||||||
convertTerm cnc_defs selector (C nr) ((lbl_path,lin) : lins) = convertCon selector nr lbl_path lin lins
|
|
||||||
convertTerm cnc_defs selector (R record) ((lbl_path,lin) : lins) = convertRec cnc_defs selector 0 record lbl_path lin lins
|
|
||||||
|
|
||||||
convertTerm cnc_defs selector (P term sel) lins = do nr <- evalTerm cnc_defs [] sel
|
|
||||||
convertTerm cnc_defs (TuplePrj nr selector) term lins
|
|
||||||
convertTerm cnc_defs selector (FV vars) lins = do term <- member vars
|
|
||||||
convertTerm cnc_defs selector term lins
|
|
||||||
convertTerm cnc_defs selector (S ts) ((lbl_path,lin) : lins) = do projectHead lbl_path
|
|
||||||
foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts)
|
|
||||||
convertTerm cnc_defs selector (K (KS str)) ((lbl_path,lin) : lins) =
|
|
||||||
do projectHead lbl_path
|
|
||||||
return ((lbl_path,Right (KS str) : lin) : lins)
|
|
||||||
convertTerm cnc_defs selector (K (KP strs vars))((lbl_path,lin) : lins) =
|
|
||||||
do projectHead lbl_path
|
|
||||||
toks <- member (strs:[strs' | Alt strs' _ <- vars])
|
|
||||||
return ((lbl_path, map (Right . KS) toks ++ lin) : lins)
|
|
||||||
convertTerm cnc_defs selector (F id) lins = case Map.lookup id cnc_defs of
|
|
||||||
Just term -> convertTerm cnc_defs selector term lins
|
|
||||||
Nothing -> mzero
|
|
||||||
convertTerm cnc_defs selector (W s t) ((lbl_path,lin) : lins) = do
|
|
||||||
ss <- case t of
|
|
||||||
R ss -> return ss
|
|
||||||
F f -> case Map.lookup f cnc_defs of
|
|
||||||
Just (R ss) -> return ss
|
|
||||||
_ -> mzero
|
|
||||||
convertRec cnc_defs selector 0 [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins
|
|
||||||
convertTerm cnc_defs selector x lins = error ("convertTerm ("++show x++")")
|
|
||||||
|
|
||||||
|
|
||||||
convertArg (TupleSel record) nr path lbl_path lin lins =
|
|
||||||
foldM (\lins (lbl, selector) -> convertArg selector nr (lbl:path) (lbl:lbl_path) lin lins) lins record
|
|
||||||
convertArg (TuplePrj lbl selector) nr path lbl_path lin lins =
|
|
||||||
convertArg selector nr (lbl:path) lbl_path lin lins
|
|
||||||
convertArg (ConSel indices) nr path lbl_path lin lins = do
|
|
||||||
index <- member indices
|
|
||||||
restrictHead lbl_path index
|
|
||||||
restrictArg nr path index
|
|
||||||
return lins
|
|
||||||
convertArg StrSel nr path lbl_path lin lins = do
|
|
||||||
projectHead lbl_path
|
|
||||||
xnr <- projectArg nr path
|
|
||||||
return ((lbl_path, Left (path, nr, xnr) : lin) : lins)
|
|
||||||
|
|
||||||
convertCon (ConSel indices) index lbl_path lin lins = do
|
|
||||||
guard (index `elem` indices)
|
|
||||||
restrictHead lbl_path index
|
|
||||||
return lins
|
|
||||||
convertCon x _ _ _ _ = error $ "SimpleToFCFG,convertCon: " ++ show x
|
|
||||||
|
|
||||||
convertRec cnc_defs selector index [] lbl_path lin lins = return lins
|
|
||||||
convertRec cnc_defs selector@(TupleSel fields) index (val:record) lbl_path lin lins = select fields
|
|
||||||
where
|
|
||||||
select [] = convertRec cnc_defs selector (index+1) record lbl_path lin lins
|
|
||||||
select ((index',sub_sel) : fields)
|
|
||||||
| index == index' = do lins <- convertTerm cnc_defs sub_sel val ((index:lbl_path,lin) : lins)
|
|
||||||
convertRec cnc_defs selector (index+1) record lbl_path lin lins
|
|
||||||
| otherwise = select fields
|
|
||||||
convertRec cnc_defs (TuplePrj index' sub_sel) index record lbl_path lin lins = do
|
|
||||||
convertTerm cnc_defs sub_sel (record !! (index'-index)) ((lbl_path,lin) : lins)
|
|
||||||
|
|
||||||
|
|
||||||
------------------------------------------------------------
|
|
||||||
-- eval a term to ground terms
|
|
||||||
|
|
||||||
evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex
|
|
||||||
evalTerm cnc_defs path (V nr) = do term <- readArgCType nr
|
|
||||||
unifyPType nr (reverse path) (selectTerm path term)
|
|
||||||
evalTerm cnc_defs path (C nr) = return nr
|
|
||||||
evalTerm cnc_defs path (R record) = case path of
|
|
||||||
(index:path) -> evalTerm cnc_defs path (record !! index)
|
|
||||||
evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel
|
|
||||||
evalTerm cnc_defs (index:path) term
|
|
||||||
evalTerm cnc_defs path (FV terms) = member terms >>= evalTerm cnc_defs path
|
|
||||||
evalTerm cnc_defs path (F id) = case Map.lookup id cnc_defs of
|
|
||||||
Just term -> evalTerm cnc_defs path term
|
|
||||||
Nothing -> mzero
|
|
||||||
evalTerm cnc_defs path x = error ("evalTerm ("++show x++")")
|
|
||||||
|
|
||||||
unifyPType :: FIndex -> FPath -> Term -> CnvMonad FIndex
|
|
||||||
unifyPType nr path (C max_index) =
|
|
||||||
do (_, args, _, _) <- get
|
|
||||||
let (PFCat _ _ tcs,_) = args !! nr
|
|
||||||
case lookup path tcs of
|
|
||||||
Just index -> return index
|
|
||||||
Nothing -> do index <- member [0..max_index]
|
|
||||||
restrictArg nr path index
|
|
||||||
return index
|
|
||||||
unifyPType nr path t = error $ "unifyPType " ++ show t ---- AR 2/10/2007
|
|
||||||
|
|
||||||
selectTerm :: FPath -> Term -> Term
|
|
||||||
selectTerm [] term = term
|
|
||||||
selectTerm (index:path) (R record) = selectTerm path (record !! index)
|
|
||||||
|
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
|
||||||
-- GrammarEnv
|
|
||||||
|
|
||||||
|
|
||||||
data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int FCatSet FSeqSet FFunSet (IntMap.IntMap (Set.Set Production))
|
|
||||||
type FCatSet = Map.Map CId (Map.Map [FPath] (Map.Map [(FPath,FIndex)] (Either FCat FCat)))
|
|
||||||
type FSeqSet = Map.Map FSeq SeqId
|
|
||||||
type FFunSet = Map.Map FFun FunId
|
|
||||||
|
|
||||||
data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)]
|
|
||||||
|
|
||||||
protoFCat :: CId -> ProtoFCat
|
|
||||||
protoFCat cat = PFCat cat [] []
|
|
||||||
|
|
||||||
emptyFFunsEnv = GrammarEnv 0 initFCatSet Map.empty Map.empty IntMap.empty
|
|
||||||
where
|
|
||||||
initFCatSet = (ins fcatString (mkCId "String") [[0]] [] $
|
|
||||||
ins fcatInt (mkCId "Int") [[0]] [] $
|
|
||||||
ins fcatFloat (mkCId "Float") [[0]] [] $
|
|
||||||
ins fcatVar (mkCId "_Var") [[0]] [] $
|
|
||||||
Map.empty)
|
|
||||||
|
|
||||||
ins fcat cat rcs tcs catSet =
|
|
||||||
Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet
|
|
||||||
where
|
|
||||||
right_fcat = Right fcat
|
|
||||||
tmap_s = Map.singleton tcs right_fcat
|
|
||||||
rmap_s = Map.singleton rcs tmap_s
|
|
||||||
|
|
||||||
addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv
|
|
||||||
addProduction (GrammarEnv last_id catSet seqSet funSet prodSet) cat p =
|
|
||||||
GrammarEnv last_id catSet seqSet funSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet)
|
|
||||||
|
|
||||||
addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId)
|
|
||||||
addFSeq env@(GrammarEnv last_id catSet seqSet funSet prodSet) (_,lst) =
|
|
||||||
case Map.lookup seq seqSet of
|
|
||||||
Just id -> (env,id)
|
|
||||||
Nothing -> let !last_seq = Map.size seqSet
|
|
||||||
in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet prodSet,last_seq)
|
|
||||||
where
|
|
||||||
seq = mkArray lst
|
|
||||||
|
|
||||||
addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId)
|
|
||||||
addFFun env@(GrammarEnv last_id catSet seqSet funSet prodSet) fun =
|
|
||||||
case Map.lookup fun funSet of
|
|
||||||
Just id -> (env,id)
|
|
||||||
Nothing -> let !last_funid = Map.size funSet
|
|
||||||
in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) prodSet,last_funid)
|
|
||||||
|
|
||||||
getParserInfo :: GrammarEnv -> ParserInfo
|
|
||||||
getParserInfo (GrammarEnv last_id catSet seqSet funSet prodSet) =
|
|
||||||
ParserInfo { functions = mkArray funSet
|
|
||||||
, sequences = mkArray seqSet
|
|
||||||
, productions0= prodSet
|
|
||||||
, productions = prodSet
|
|
||||||
, startCats = Map.map getFCatList catSet
|
|
||||||
, totalCats = last_id+1
|
|
||||||
}
|
|
||||||
where
|
|
||||||
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
|
|
||||||
|
|
||||||
getFCatList rcs = Map.fold (\tcs lst -> Map.fold (\x lst -> either id id x : lst) lst tcs) [] rcs
|
|
||||||
|
|
||||||
|
|
||||||
genFCatHead :: GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat)
|
|
||||||
genFCatHead env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) =
|
|
||||||
case Map.lookup cat catSet >>= Map.lookup rcs >>= Map.lookup tcs of
|
|
||||||
Just (Left fcat) -> (GrammarEnv last_id (ins fcat) seqSet funSet prodSet, fcat)
|
|
||||||
Just (Right fcat) -> (env, fcat)
|
|
||||||
Nothing -> let fcat = last_id+1
|
|
||||||
in (GrammarEnv fcat (ins fcat) seqSet funSet prodSet, fcat)
|
|
||||||
where
|
|
||||||
ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet
|
|
||||||
where
|
|
||||||
right_fcat = Right fcat
|
|
||||||
tmap_s = Map.singleton tcs right_fcat
|
|
||||||
rmap_s = Map.singleton rcs tmap_s
|
|
||||||
|
|
||||||
genFCatArg :: TermMap -> Term -> GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat)
|
|
||||||
genFCatArg cnc_defs ctype env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) =
|
|
||||||
case Map.lookup cat catSet >>= Map.lookup rcs of
|
|
||||||
Just tmap -> case Map.lookup tcs tmap of
|
|
||||||
Just (Left fcat) -> (env, fcat)
|
|
||||||
Just (Right fcat) -> (env, fcat)
|
|
||||||
Nothing -> ins tmap
|
|
||||||
Nothing -> ins Map.empty
|
|
||||||
where
|
|
||||||
ins tmap =
|
|
||||||
let fcat = last_id+1
|
|
||||||
(either_fcat,last_id1,tmap1,prodSet1)
|
|
||||||
= foldBM (\tcs st (either_fcat,last_id,tmap,prodSet) ->
|
|
||||||
let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap
|
|
||||||
p = FCoerce fcat_arg
|
|
||||||
prodSet1 = IntMap.insertWith Set.union fcat (Set.singleton p) prodSet
|
|
||||||
in if st
|
|
||||||
then (Right fcat, last_id1,tmap1,prodSet1)
|
|
||||||
else (either_fcat,last_id, tmap ,prodSet ))
|
|
||||||
(Left fcat,fcat,Map.insert tcs either_fcat tmap,prodSet)
|
|
||||||
(gen_tcs ctype [] [])
|
|
||||||
False
|
|
||||||
rmap1 = Map.singleton rcs tmap1
|
|
||||||
in (GrammarEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 catSet) seqSet funSet prodSet1, fcat)
|
|
||||||
where
|
|
||||||
addArg tcs last_id tmap =
|
|
||||||
case Map.lookup tcs tmap of
|
|
||||||
Just (Left fcat) -> (last_id, tmap, fcat)
|
|
||||||
Just (Right fcat) -> (last_id, tmap, fcat)
|
|
||||||
Nothing -> let fcat = last_id+1
|
|
||||||
in (fcat, Map.insert tcs (Left fcat) tmap, fcat)
|
|
||||||
|
|
||||||
gen_tcs :: Term -> FPath -> [(FPath,FIndex)] -> BacktrackM Bool [(FPath,FIndex)]
|
|
||||||
gen_tcs (R record) path acc = foldM (\acc (label,ctype) -> gen_tcs ctype (label:path) acc) acc (zip [0..] record)
|
|
||||||
gen_tcs (S _) path acc = return acc
|
|
||||||
gen_tcs (C max_index) path acc =
|
|
||||||
case List.lookup path tcs of
|
|
||||||
Just index -> return $! addConstraint path index acc
|
|
||||||
Nothing -> do put True
|
|
||||||
index <- member [0..max_index]
|
|
||||||
return $! addConstraint path index acc
|
|
||||||
where
|
|
||||||
addConstraint path0 index0 (c@(path,index) : cs)
|
|
||||||
| path0 > path = c:addConstraint path0 index0 cs
|
|
||||||
addConstraint path0 index0 cs = (path0,index0) : cs
|
|
||||||
gen_tcs (F id) path acc = case Map.lookup id cnc_defs of
|
|
||||||
Just term -> gen_tcs term path acc
|
|
||||||
Nothing -> error ("unknown identifier: "++showCId id)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
------------------------------------------------------------
|
|
||||||
-- TODO queue organization
|
|
||||||
|
|
||||||
type XRulesMap = Map.Map CId [XRule]
|
|
||||||
data XRule = XRule CId {- function -}
|
|
||||||
[CId] {- argument types -}
|
|
||||||
CId {- result type -}
|
|
||||||
[Term] {- argument lin-types representation -}
|
|
||||||
Term {- result lin-type representation -}
|
|
||||||
Term {- body -}
|
|
||||||
|
|
||||||
takeToDoRules :: XRulesMap -> GrammarEnv -> ([([XRule], TermSelector)], GrammarEnv)
|
|
||||||
takeToDoRules xrulesMap (GrammarEnv last_id catSet seqSet funSet prodSet) =
|
|
||||||
(todo,GrammarEnv last_id catSet' seqSet funSet prodSet)
|
|
||||||
where
|
|
||||||
(todo,catSet') =
|
|
||||||
Map.mapAccumWithKey (\todo cat rmap ->
|
|
||||||
let (todo1,rmap1) = Map.mapAccumWithKey (\todo rcs tmap ->
|
|
||||||
let (tcss,tmap') = Map.mapAccumWithKey (\tcss tcs either_xcat ->
|
|
||||||
case either_xcat of
|
|
||||||
Left xcat -> (tcs:tcss,Right xcat)
|
|
||||||
Right xcat -> ( tcss,either_xcat)) [] tmap
|
|
||||||
in case tcss of
|
|
||||||
[] -> ( todo,tmap )
|
|
||||||
_ -> ((srules,mkSelector rcs tcss) : todo,tmap')) todo rmap
|
|
||||||
mb_srules = Map.lookup cat xrulesMap
|
|
||||||
Just srules = mb_srules
|
|
||||||
|
|
||||||
in case mb_srules of
|
|
||||||
Just srules -> (todo1,rmap1)
|
|
||||||
Nothing -> (todo ,rmap1)) [] catSet
|
|
||||||
|
|
||||||
|
|
||||||
------------------------------------------------------------
|
|
||||||
-- The TermSelector
|
|
||||||
|
|
||||||
data TermSelector
|
|
||||||
= TupleSel [(FIndex, TermSelector)]
|
|
||||||
| TuplePrj FIndex TermSelector
|
|
||||||
| ConSel [FIndex]
|
|
||||||
| StrSel
|
|
||||||
deriving Show
|
|
||||||
|
|
||||||
mkSingletonSelectors :: TermMap
|
|
||||||
-> Term -- ^ Type representation term
|
|
||||||
-> [TermSelector] -- ^ list of selectors containing just one string field
|
|
||||||
mkSingletonSelectors cnc_defs term = sels0
|
|
||||||
where
|
|
||||||
(sels0,tcss0) = loop [] ([],[]) term
|
|
||||||
|
|
||||||
loop path st (R record) = List.foldl' (\st (index,term) -> loop (index:path) st term) st (zip [0..] record)
|
|
||||||
loop path (sels,tcss) (C i) = ( sels,map ((,) path) [0..i] : tcss)
|
|
||||||
loop path (sels,tcss) (S _) = (mkSelector [path] tcss0 : sels, tcss)
|
|
||||||
loop path (sels,tcss) (F id) = case Map.lookup id cnc_defs of
|
|
||||||
Just term -> loop path (sels,tcss) term
|
|
||||||
Nothing -> error ("unknown identifier: "++showCId id)
|
|
||||||
|
|
||||||
mkSelector :: [FPath] -> [[(FPath,FIndex)]] -> TermSelector
|
|
||||||
mkSelector rcs tcss =
|
|
||||||
List.foldl' addRestriction (case xs of
|
|
||||||
(path:xs) -> List.foldl' addProjection (path2selector StrSel path) xs) ys
|
|
||||||
where
|
|
||||||
xs = [ reverse path | path <- rcs]
|
|
||||||
ys = [(reverse path,term) | tcs <- tcss, (path,term) <- tcs]
|
|
||||||
|
|
||||||
addRestriction :: TermSelector -> (FPath,FIndex) -> TermSelector
|
|
||||||
addRestriction (ConSel indices) ([] ,n_index) = ConSel (add indices)
|
|
||||||
where
|
|
||||||
add [] = [n_index]
|
|
||||||
add (index':indices)
|
|
||||||
| n_index == index' = index': indices
|
|
||||||
| otherwise = index':add indices
|
|
||||||
addRestriction (TupleSel fields) (index : path,n_index) = TupleSel (add fields)
|
|
||||||
where
|
|
||||||
add [] = [(index,path2selector (ConSel [n_index]) path)]
|
|
||||||
add (field@(index',sub_sel):fields)
|
|
||||||
| index == index' = (index',addRestriction sub_sel (path,n_index)):fields
|
|
||||||
| otherwise = field : add fields
|
|
||||||
|
|
||||||
addProjection :: TermSelector -> FPath -> TermSelector
|
|
||||||
addProjection StrSel [] = StrSel
|
|
||||||
addProjection (TupleSel fields) (index : path) = TupleSel (add fields)
|
|
||||||
where
|
|
||||||
add [] = [(index,path2selector StrSel path)]
|
|
||||||
add (field@(index',sub_sel):fields)
|
|
||||||
| index == index' = (index',addProjection sub_sel path):fields
|
|
||||||
| otherwise = field : add fields
|
|
||||||
|
|
||||||
path2selector base [] = base
|
|
||||||
path2selector base (index : path) = TupleSel [(index,path2selector base path)]
|
|
||||||
|
|
||||||
------------------------------------------------------------
|
|
||||||
-- updating the MCF rule
|
|
||||||
|
|
||||||
readArgCType :: FIndex -> CnvMonad Term
|
|
||||||
readArgCType nr = do (_, _, _, ctypes) <- get
|
|
||||||
return (ctypes !! nr)
|
|
||||||
|
|
||||||
restrictArg :: FIndex -> FPath -> FIndex -> CnvMonad ()
|
|
||||||
restrictArg nr path index = do
|
|
||||||
(head, args, ctype, ctypes) <- get
|
|
||||||
args' <- updateNthM (\(xcat,xs) -> do xcat <- restrictProtoFCat path index xcat
|
|
||||||
return (xcat,xs) ) nr args
|
|
||||||
put (head, args', ctype, ctypes)
|
|
||||||
|
|
||||||
projectArg :: FIndex -> FPath -> CnvMonad Int
|
|
||||||
projectArg nr path = do
|
|
||||||
(head, args, ctype, ctypes) <- get
|
|
||||||
(xnr,args') <- updateArgs nr args
|
|
||||||
put (head, args', ctype, ctypes)
|
|
||||||
return xnr
|
|
||||||
where
|
|
||||||
updateArgs :: FIndex -> [(ProtoFCat,[FPath])] -> CnvMonad (Int,[(ProtoFCat,[FPath])])
|
|
||||||
updateArgs 0 ((a@(PFCat _ rcs _),xpaths) : as)
|
|
||||||
| path `elem` rcs = return (length xpaths+1,(a,path:xpaths):as)
|
|
||||||
| otherwise = do a <- projectProtoFCat path a
|
|
||||||
return (0,(a,xpaths):as)
|
|
||||||
updateArgs n (a : as) = do
|
|
||||||
(xnr,as) <- updateArgs (n-1) as
|
|
||||||
return (xnr,a:as)
|
|
||||||
|
|
||||||
readHeadCType :: CnvMonad Term
|
|
||||||
readHeadCType = do (_, _, ctype, _) <- get
|
|
||||||
return ctype
|
|
||||||
|
|
||||||
restrictHead :: FPath -> FIndex -> CnvMonad ()
|
|
||||||
restrictHead path term
|
|
||||||
= do (head, args, ctype, ctypes) <- get
|
|
||||||
head' <- restrictProtoFCat path term head
|
|
||||||
put (head', args, ctype, ctypes)
|
|
||||||
|
|
||||||
projectHead :: FPath -> CnvMonad ()
|
|
||||||
projectHead path
|
|
||||||
= do (head, args, ctype, ctypes) <- get
|
|
||||||
head' <- projectProtoFCat path head
|
|
||||||
put (head', args, ctype, ctypes)
|
|
||||||
|
|
||||||
restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat
|
|
||||||
restrictProtoFCat path0 index0 (PFCat cat rcs tcs) = do
|
|
||||||
tcs <- addConstraint tcs
|
|
||||||
return (PFCat cat rcs tcs)
|
|
||||||
where
|
|
||||||
addConstraint (c@(path,index) : cs)
|
|
||||||
| path0 > path = liftM (c:) (addConstraint cs)
|
|
||||||
| path0 == path = guard (index0 == index) >>
|
|
||||||
return (c : cs)
|
|
||||||
addConstraint cs = return ((path0,index0) : cs)
|
|
||||||
|
|
||||||
projectProtoFCat :: FPath -> ProtoFCat -> CnvMonad ProtoFCat
|
|
||||||
projectProtoFCat path0 (PFCat cat rcs tcs) = do
|
|
||||||
return (PFCat cat (addConstraint rcs) tcs)
|
|
||||||
where
|
|
||||||
addConstraint (path : rcs)
|
|
||||||
| path0 > path = path : addConstraint rcs
|
|
||||||
| path0 == path = path : rcs
|
|
||||||
addConstraint rcs = path0 : rcs
|
|
||||||
|
|
||||||
mkArray lst = listArray (0,length lst-1) lst
|
|
||||||
@@ -123,7 +123,7 @@ convertRule cnc_defs grammarEnv (PFRule fun args res ctypes ctype term) =
|
|||||||
let [newCat] = getFCats env0 newCat'
|
let [newCat] = getFCats env0 newCat'
|
||||||
(env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs'
|
(env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs'
|
||||||
|
|
||||||
(env2,funid) = addFFun env1 (FFun fun [[n] | n <- [0..length newArgs-1]] (mkArray lins))
|
(env2,funid) = addFFun env1 (FFun fun (mkArray lins))
|
||||||
|
|
||||||
in addProduction env2 newCat (FApply funid newArgs)
|
in addProduction env2 newCat (FApply funid newArgs)
|
||||||
|
|
||||||
@@ -394,7 +394,7 @@ expandHOAS abs_defs cnc_defs lincats lindefs env =
|
|||||||
(env1,lins) = List.mapAccumL addFSeq env linRec
|
(env1,lins) = List.mapAccumL addFSeq env linRec
|
||||||
newLinRec = mkArray lins
|
newLinRec = mkArray lins
|
||||||
|
|
||||||
(env2,funid) = addFFun env1 (FFun _B [[i] | i <- [0..n]] newLinRec)
|
(env2,funid) = addFFun env1 (FFun _B newLinRec)
|
||||||
|
|
||||||
env3 = foldl (\env (arg,res) -> addProduction env res (FApply funid (arg : replicate n fcatVar)))
|
env3 = foldl (\env (arg,res) -> addProduction env res (FApply funid (arg : replicate n fcatVar)))
|
||||||
env2
|
env2
|
||||||
@@ -462,7 +462,7 @@ getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
|
|||||||
, sequences = mkArray seqSet
|
, sequences = mkArray seqSet
|
||||||
, productions0= productions0
|
, productions0= productions0
|
||||||
, productions = filterProductions productions0
|
, productions = filterProductions productions0
|
||||||
, startCats = maybe Map.empty (Map.map (\(start,end,_) -> range (start,end))) (IntMap.lookup 0 catSet)
|
, startCats = maybe Map.empty (Map.map (\(start,end,_) -> (start,end))) (IntMap.lookup 0 catSet)
|
||||||
, totalCats = last_id+1
|
, totalCats = last_id+1
|
||||||
}
|
}
|
||||||
where
|
where
|
||||||
|
|||||||
@@ -1,374 +0,0 @@
|
|||||||
{-# LANGUAGE BangPatterns, CPP #-}
|
|
||||||
----------------------------------------------------------------------
|
|
||||||
-- |
|
|
||||||
-- Maintainer : Krasimir Angelov
|
|
||||||
-- Stability : (stable)
|
|
||||||
-- Portability : (portable)
|
|
||||||
--
|
|
||||||
-- Converting SimpleGFC grammars to fast nonerasing MCFG grammar.
|
|
||||||
--
|
|
||||||
-- the resulting grammars might be /very large/
|
|
||||||
--
|
|
||||||
-- the conversion is only equivalent if the GFC grammar has a context-free backbone.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
module GF.Compile.GeneratePMCFG
|
|
||||||
(convertConcrete) where
|
|
||||||
|
|
||||||
import PGF.CId
|
|
||||||
import PGF.Data
|
|
||||||
import PGF.Macros --hiding (prt)
|
|
||||||
|
|
||||||
import GF.Data.BacktrackM
|
|
||||||
import GF.Data.SortedList
|
|
||||||
import GF.Data.Utilities (updateNthM, sortNub)
|
|
||||||
|
|
||||||
import qualified Data.Map as Map
|
|
||||||
import qualified Data.Set as Set
|
|
||||||
import qualified Data.List as List
|
|
||||||
import qualified Data.IntMap as IntMap
|
|
||||||
import qualified Data.ByteString.Char8 as BS
|
|
||||||
import Data.Array.IArray
|
|
||||||
import Data.Maybe
|
|
||||||
import Control.Monad
|
|
||||||
import Debug.Trace
|
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
|
||||||
-- main conversion function
|
|
||||||
|
|
||||||
convertConcrete :: Abstr -> Concr -> ParserInfo
|
|
||||||
convertConcrete abs cnc = convert abs_defs conc cats
|
|
||||||
where abs_defs = Map.assocs (funs abs)
|
|
||||||
conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
|
|
||||||
cats = lincats cnc
|
|
||||||
|
|
||||||
convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> ParserInfo
|
|
||||||
convert abs_defs cnc_defs cat_defs =
|
|
||||||
let env = expandHOAS abs_defs cnc_defs cat_defs (emptyGrammarEnv cnc_defs cat_defs)
|
|
||||||
in getParserInfo (List.foldl' (convertRule cnc_defs) env xrules)
|
|
||||||
where
|
|
||||||
xrules = [
|
|
||||||
(XRule id args (0,res) (map findLinType args) (findLinType (0,res)) term) |
|
|
||||||
(id, (ty,_)) <- abs_defs, let (args,res) = typeSkeleton ty,
|
|
||||||
term <- maybeToList (Map.lookup id cnc_defs)]
|
|
||||||
|
|
||||||
findLinType (_,id) = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
|
|
||||||
|
|
||||||
brk :: (GrammarEnv -> GrammarEnv) -> (GrammarEnv -> GrammarEnv)
|
|
||||||
brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
|
|
||||||
case f (GrammarEnv last_id catSet seqSet funSet crcSet IntMap.empty) of
|
|
||||||
(GrammarEnv last_id catSet seqSet funSet crcSet topdown1) -> IntMap.foldWithKey optimize (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) topdown1
|
|
||||||
where
|
|
||||||
optimize cat ps env = IntMap.foldWithKey ff env (IntMap.fromListWith (++) [(funid,[args]) | FApply funid args <- Set.toList ps])
|
|
||||||
where
|
|
||||||
ff :: FunId -> [[FCat]] -> GrammarEnv -> GrammarEnv
|
|
||||||
ff funid xs env
|
|
||||||
| product (map Set.size ys) == count =
|
|
||||||
case List.mapAccumL (\env c -> addFCoercion env (Set.toList c)) env ys of
|
|
||||||
(env,args) -> addProduction env cat (FApply funid args)
|
|
||||||
| otherwise = List.foldl (\env args -> addProduction env cat (FApply funid args)) env xs
|
|
||||||
where
|
|
||||||
count = length xs
|
|
||||||
ys = foldr (zipWith Set.insert) (repeat Set.empty) xs
|
|
||||||
|
|
||||||
convertRule :: TermMap -> GrammarEnv -> XRule -> GrammarEnv
|
|
||||||
convertRule cnc_defs grammarEnv (XRule fun args res ctypes ctype term) =
|
|
||||||
brk (\grammarEnv -> foldBM addRule
|
|
||||||
grammarEnv
|
|
||||||
(convertTerm cnc_defs [] ctype term [([],[])])
|
|
||||||
(protoFCat cnc_defs res ctype, zipWith (protoFCat cnc_defs) args ctypes)) grammarEnv
|
|
||||||
where
|
|
||||||
addRule linRec (newCat', newArgs') env0 =
|
|
||||||
let [newCat] = getFCats env0 newCat'
|
|
||||||
(env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs'
|
|
||||||
|
|
||||||
(env2,lins) = List.mapAccumL addFSeq env1 linRec
|
|
||||||
newLinRec = mkArray lins
|
|
||||||
|
|
||||||
(env3,funid) = addFFun env2 (FFun fun [[n] | n <- [0..length newArgs-1]] newLinRec)
|
|
||||||
|
|
||||||
in addProduction env3 newCat (FApply funid newArgs)
|
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
|
||||||
-- term conversion
|
|
||||||
|
|
||||||
type CnvMonad a = BacktrackM Env a
|
|
||||||
|
|
||||||
type FPath = [FIndex]
|
|
||||||
data ProtoFCat = PFCat Int CId [FPath] [(FPath,[FIndex])]
|
|
||||||
type Env = (ProtoFCat, [ProtoFCat])
|
|
||||||
type LinRec = [(FPath, [FSymbol])]
|
|
||||||
data XRule = XRule CId {- function -}
|
|
||||||
[(Int,CId)] {- argument types: context size and category -}
|
|
||||||
(Int,CId) {- result type : context size (always 0) and category -}
|
|
||||||
[Term] {- argument lin-types representation -}
|
|
||||||
Term {- result lin-type representation -}
|
|
||||||
Term {- body -}
|
|
||||||
|
|
||||||
protoFCat :: TermMap -> (Int,CId) -> Term -> ProtoFCat
|
|
||||||
protoFCat cnc_defs (n,cat) ctype =
|
|
||||||
let (rcs,tcs) = loop [] [] [] ctype'
|
|
||||||
in PFCat n cat rcs tcs
|
|
||||||
where
|
|
||||||
ctype' -- extend the high-order linearization type
|
|
||||||
| n > 0 = case ctype of
|
|
||||||
R xs -> R (xs ++ replicate n (S []))
|
|
||||||
_ -> error $ "Not a record: " ++ show ctype
|
|
||||||
| otherwise = ctype
|
|
||||||
|
|
||||||
loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record)
|
|
||||||
loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs)
|
|
||||||
loop path rcs tcs (S _) = (path:rcs, tcs)
|
|
||||||
loop path rcs tcs (F id) = case Map.lookup id cnc_defs of
|
|
||||||
Just term -> loop path rcs tcs term
|
|
||||||
Nothing -> error ("unknown identifier: "++show id)
|
|
||||||
|
|
||||||
type TermMap = Map.Map CId Term
|
|
||||||
|
|
||||||
convertTerm :: TermMap -> FPath -> Term -> Term -> LinRec -> CnvMonad LinRec
|
|
||||||
convertTerm cnc_defs sel ctype (V nr) ((lbl_path,lin) : lins) = convertArg ctype nr (reverse sel) lbl_path lin lins
|
|
||||||
convertTerm cnc_defs sel ctype (C nr) ((lbl_path,lin) : lins) = convertCon ctype nr (reverse sel) lbl_path lin lins
|
|
||||||
convertTerm cnc_defs sel ctype (R record) ((lbl_path,lin) : lins) = convertRec cnc_defs sel ctype record lbl_path lin lins
|
|
||||||
convertTerm cnc_defs sel ctype (P term p) lins = do nr <- evalTerm cnc_defs [] p
|
|
||||||
convertTerm cnc_defs (nr:sel) ctype term lins
|
|
||||||
convertTerm cnc_defs sel ctype (FV vars) lins = do term <- member vars
|
|
||||||
convertTerm cnc_defs sel ctype term lins
|
|
||||||
convertTerm cnc_defs sel ctype (S ts) lins = foldM (\lins t -> convertTerm cnc_defs sel ctype t lins) lins (reverse ts)
|
|
||||||
--convertTerm cnc_defs sel ctype (K t) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok t : lin) : lins)
|
|
||||||
convertTerm cnc_defs sel ctype (K (KS t)) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok (KS t) : lin) : lins)
|
|
||||||
convertTerm cnc_defs sel ctype (K (KP strs vars))((lbl_path,lin) : lins) =
|
|
||||||
do toks <- member (strs:[strs' | Alt strs' _ <- vars])
|
|
||||||
return ((lbl_path, map (FSymTok . KS) toks ++ lin) : lins)
|
|
||||||
convertTerm cnc_defs sel ctype (F id) lins = case Map.lookup id cnc_defs of
|
|
||||||
Just term -> convertTerm cnc_defs sel ctype term lins
|
|
||||||
Nothing -> mzero
|
|
||||||
convertTerm cnc_defs sel ctype (W s t) ((lbl_path,lin) : lins) = do
|
|
||||||
ss <- case t of
|
|
||||||
R ss -> return ss
|
|
||||||
F f -> case Map.lookup f cnc_defs of
|
|
||||||
Just (R ss) -> return ss
|
|
||||||
_ -> mzero
|
|
||||||
convertRec cnc_defs sel ctype [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins
|
|
||||||
convertTerm cnc_defs sel ctype x lins = error ("convertTerm ("++show x++")")
|
|
||||||
|
|
||||||
|
|
||||||
convertArg (R record) nr path lbl_path lin lins =
|
|
||||||
foldM (\lins (lbl, ctype) -> convertArg ctype nr (lbl:path) (lbl:lbl_path) lin lins) lins (zip [0..] record)
|
|
||||||
convertArg (C max) nr path lbl_path lin lins = do
|
|
||||||
index <- member [0..max]
|
|
||||||
restrictHead lbl_path index
|
|
||||||
restrictArg nr path index
|
|
||||||
return lins
|
|
||||||
convertArg (S _) nr path lbl_path lin lins = do
|
|
||||||
(_, args) <- get
|
|
||||||
let PFCat _ cat rcs tcs = args !! nr
|
|
||||||
l = index path rcs 0
|
|
||||||
sym | isLiteralCat cat = FSymLit nr l
|
|
||||||
| otherwise = FSymCat nr l
|
|
||||||
return ((lbl_path, sym : lin) : lins)
|
|
||||||
where
|
|
||||||
index lbl' (lbl:lbls) idx
|
|
||||||
| lbl' == lbl = idx
|
|
||||||
| otherwise = index lbl' lbls $! (idx+1)
|
|
||||||
|
|
||||||
|
|
||||||
convertCon (C max) index [] lbl_path lin lins = do
|
|
||||||
guard (index <= max)
|
|
||||||
restrictHead lbl_path index
|
|
||||||
return lins
|
|
||||||
convertCon x _ _ _ _ _ = error $ "SimpleToFCFG,convertCon: " ++ show x
|
|
||||||
|
|
||||||
convertRec cnc_defs [] (R ctypes) record lbl_path lin lins =
|
|
||||||
foldM (\lins (index,ctype,val) -> convertTerm cnc_defs [] ctype val ((index:lbl_path,lin) : lins))
|
|
||||||
lins
|
|
||||||
(zip3 [0..] ctypes record)
|
|
||||||
convertRec cnc_defs (index:sub_sel) ctype record lbl_path lin lins = do
|
|
||||||
convertTerm cnc_defs sub_sel ctype (record !! index) ((lbl_path,lin) : lins)
|
|
||||||
|
|
||||||
|
|
||||||
------------------------------------------------------------
|
|
||||||
-- eval a term to ground terms
|
|
||||||
|
|
||||||
evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex
|
|
||||||
evalTerm cnc_defs path (V nr) = do (_, args) <- get
|
|
||||||
let PFCat _ _ _ tcs = args !! nr
|
|
||||||
rpath = reverse path
|
|
||||||
index <- member (fromMaybe (error "evalTerm: wrong path") (lookup rpath tcs))
|
|
||||||
restrictArg nr rpath index
|
|
||||||
return index
|
|
||||||
evalTerm cnc_defs path (C nr) = return nr
|
|
||||||
evalTerm cnc_defs path (R record) = case path of
|
|
||||||
(index:path) -> evalTerm cnc_defs path (record !! index)
|
|
||||||
evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel
|
|
||||||
evalTerm cnc_defs (index:path) term
|
|
||||||
evalTerm cnc_defs path (FV terms) = member terms >>= evalTerm cnc_defs path
|
|
||||||
evalTerm cnc_defs path (F id) = case Map.lookup id cnc_defs of
|
|
||||||
Just term -> evalTerm cnc_defs path term
|
|
||||||
Nothing -> mzero
|
|
||||||
evalTerm cnc_defs path x = error ("evalTerm ("++show x++")")
|
|
||||||
|
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
|
||||||
-- GrammarEnv
|
|
||||||
|
|
||||||
data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production))
|
|
||||||
type CatSet = IntMap.IntMap (Map.Map CId (FCat,FCat,[Int]))
|
|
||||||
type SeqSet = Map.Map FSeq SeqId
|
|
||||||
type FunSet = Map.Map FFun FunId
|
|
||||||
type CoerceSet= Map.Map [FCat] FCat
|
|
||||||
|
|
||||||
emptyGrammarEnv cnc_defs lincats =
|
|
||||||
let (last_id,catSet) = Map.mapAccumWithKey computeCatRange 0 lincats
|
|
||||||
in GrammarEnv last_id (IntMap.singleton 0 catSet) Map.empty Map.empty Map.empty IntMap.empty
|
|
||||||
where
|
|
||||||
computeCatRange index cat ctype
|
|
||||||
| cat == cidString = (index, (fcatString,fcatString,[]))
|
|
||||||
| cat == cidInt = (index, (fcatInt, fcatInt, []))
|
|
||||||
| cat == cidFloat = (index, (fcatFloat, fcatFloat, []))
|
|
||||||
| otherwise = (index+size,(index,index+size-1,poly))
|
|
||||||
where
|
|
||||||
(size,poly) = getMultipliers 1 [] ctype
|
|
||||||
|
|
||||||
getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record
|
|
||||||
getMultipliers m ms (S _) = (m,ms)
|
|
||||||
getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms)
|
|
||||||
getMultipliers m ms (F id) = case Map.lookup id cnc_defs of
|
|
||||||
Just term -> getMultipliers m ms term
|
|
||||||
Nothing -> error ("unknown identifier: "++prCId id)
|
|
||||||
|
|
||||||
|
|
||||||
expandHOAS abs_defs cnc_defs lincats env =
|
|
||||||
foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) hoCats
|
|
||||||
where
|
|
||||||
hoTypes :: [(Int,CId)]
|
|
||||||
hoTypes = sortNub [(n,c) | (_,(ty,_)) <- abs_defs
|
|
||||||
, (n,c) <- fst (typeSkeleton ty), n > 0]
|
|
||||||
|
|
||||||
hoCats :: [CId]
|
|
||||||
hoCats = sortNub [c | (_,(ty,_)) <- abs_defs
|
|
||||||
, Hyp _ ty <- case ty of {DTyp hyps val _ -> hyps}
|
|
||||||
, c <- fst (catSkeleton ty)]
|
|
||||||
|
|
||||||
-- add a range of PMCFG categories for each GF high-order category
|
|
||||||
add_hoCat env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,cat) =
|
|
||||||
case IntMap.lookup 0 catSet >>= Map.lookup cat of
|
|
||||||
Just (start,end,ms) -> let !catSet' = IntMap.insertWith Map.union n (Map.singleton cat (last_id,last_id+(end-start),ms)) catSet
|
|
||||||
!last_id' = last_id+(end-start)+1
|
|
||||||
in (GrammarEnv last_id' catSet' seqSet funSet crcSet prodSet)
|
|
||||||
Nothing -> env
|
|
||||||
|
|
||||||
-- add one PMCFG function for each high-order type: _B : Cat -> Var -> ... -> Var -> HoCat
|
|
||||||
add_hoFun env (n,cat) =
|
|
||||||
let linRec = reverse $
|
|
||||||
[(l ,[FSymCat 0 i]) | (l,i) <- case arg of {PFCat _ _ rcs _ -> zip rcs [0..]}] ++
|
|
||||||
[([],[FSymLit i 0]) | i <- [1..n]]
|
|
||||||
(env1,lins) = List.mapAccumL addFSeq env linRec
|
|
||||||
newLinRec = mkArray lins
|
|
||||||
|
|
||||||
(env2,funid) = addFFun env1 (FFun _B [[i] | i <- [0..n]] newLinRec)
|
|
||||||
|
|
||||||
env3 = foldl (\env (arg,res) -> addProduction env res (FApply funid (arg : replicate n fcatVar)))
|
|
||||||
env2
|
|
||||||
(zip (getFCats env2 arg) (getFCats env2 res))
|
|
||||||
in env3
|
|
||||||
where
|
|
||||||
(arg,res) = case Map.lookup cat lincats of
|
|
||||||
Nothing -> error $ "No lincat for " ++ prCId cat
|
|
||||||
Just ctype -> (protoFCat cnc_defs (0,cat) ctype, protoFCat cnc_defs (n,cat) ctype)
|
|
||||||
|
|
||||||
-- add one PMCFG function for each high-order category: _V : Var -> Cat
|
|
||||||
add_varFun env cat =
|
|
||||||
let (env1,seqid) = addFSeq env ([],[FSymLit 0 0])
|
|
||||||
lins = replicate (case res of {PFCat _ _ rcs _ -> length rcs}) seqid
|
|
||||||
(env2,funid) = addFFun env1 (FFun _V [[0]] (mkArray lins))
|
|
||||||
env3 = foldl (\env res -> addProduction env2 res (FApply funid [fcatVar]))
|
|
||||||
env2
|
|
||||||
(getFCats env2 res)
|
|
||||||
in env3
|
|
||||||
where
|
|
||||||
res = case Map.lookup cat lincats of
|
|
||||||
Nothing -> error $ "No lincat for " ++ prCId cat
|
|
||||||
Just ctype -> protoFCat cnc_defs (0,cat) ctype
|
|
||||||
|
|
||||||
_B = mkCId "_B"
|
|
||||||
_V = mkCId "_V"
|
|
||||||
|
|
||||||
|
|
||||||
addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv
|
|
||||||
addProduction (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) cat p =
|
|
||||||
GrammarEnv last_id catSet seqSet funSet crcSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet)
|
|
||||||
|
|
||||||
addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId)
|
|
||||||
addFSeq env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (_,lst) =
|
|
||||||
case Map.lookup seq seqSet of
|
|
||||||
Just id -> (env,id)
|
|
||||||
Nothing -> let !last_seq = Map.size seqSet
|
|
||||||
in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet crcSet prodSet,last_seq)
|
|
||||||
where
|
|
||||||
seq = mkArray lst
|
|
||||||
|
|
||||||
addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId)
|
|
||||||
addFFun env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) fun =
|
|
||||||
case Map.lookup fun funSet of
|
|
||||||
Just id -> (env,id)
|
|
||||||
Nothing -> let !last_funid = Map.size funSet
|
|
||||||
in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) crcSet prodSet,last_funid)
|
|
||||||
|
|
||||||
addFCoercion :: GrammarEnv -> [FCat] -> (GrammarEnv,FCat)
|
|
||||||
addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fcats =
|
|
||||||
case sub_fcats of
|
|
||||||
[fcat] -> (env,fcat)
|
|
||||||
_ -> case Map.lookup sub_fcats crcSet of
|
|
||||||
Just fcat -> (env,fcat)
|
|
||||||
Nothing -> let !fcat = last_id+1
|
|
||||||
in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat)
|
|
||||||
|
|
||||||
getParserInfo :: GrammarEnv -> ParserInfo
|
|
||||||
getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
|
|
||||||
ParserInfo { functions = mkArray funSet
|
|
||||||
, sequences = mkArray seqSet
|
|
||||||
, productions = IntMap.union prodSet coercions
|
|
||||||
, startCats = maybe Map.empty (Map.map (\(start,end,_) -> range (start,end))) (IntMap.lookup 0 catSet)
|
|
||||||
, totalCats = last_id+1
|
|
||||||
}
|
|
||||||
where
|
|
||||||
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
|
|
||||||
|
|
||||||
coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet]
|
|
||||||
|
|
||||||
getFCats :: GrammarEnv -> ProtoFCat -> [FCat]
|
|
||||||
getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat rcs tcs) =
|
|
||||||
case IntMap.lookup n catSet >>= Map.lookup cat of
|
|
||||||
Just (start,end,ms) -> reverse (solutions (variants ms tcs start) ())
|
|
||||||
where
|
|
||||||
variants _ [] fcat = return fcat
|
|
||||||
variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices
|
|
||||||
variants ms tcs ((m*index) + fcat)
|
|
||||||
|
|
||||||
------------------------------------------------------------
|
|
||||||
-- updating the MCF rule
|
|
||||||
|
|
||||||
restrictArg :: FIndex -> FPath -> FIndex -> CnvMonad ()
|
|
||||||
restrictArg nr path index = do
|
|
||||||
(head, args) <- get
|
|
||||||
args' <- updateNthM (restrictProtoFCat path index) nr args
|
|
||||||
put (head, args')
|
|
||||||
|
|
||||||
restrictHead :: FPath -> FIndex -> CnvMonad ()
|
|
||||||
restrictHead path term
|
|
||||||
= do (head, args) <- get
|
|
||||||
head' <- restrictProtoFCat path term head
|
|
||||||
put (head', args)
|
|
||||||
|
|
||||||
restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat
|
|
||||||
restrictProtoFCat path0 index0 (PFCat n cat rcs tcs) = do
|
|
||||||
tcs <- addConstraint tcs
|
|
||||||
return (PFCat n cat rcs tcs)
|
|
||||||
where
|
|
||||||
addConstraint [] = error "restrictProtoFCat: unknown path"
|
|
||||||
addConstraint (c@(path,indices) : tcs)
|
|
||||||
| path0 == path = guard (index0 `elem` indices) >>
|
|
||||||
return ((path,[index0]) : tcs)
|
|
||||||
| otherwise = liftM (c:) (addConstraint tcs)
|
|
||||||
|
|
||||||
mkArray lst = listArray (0,length lst-1) lst
|
|
||||||
@@ -2,8 +2,7 @@
|
|||||||
module GF.Compile.GrammarToPGF (mkCanon2gfcc,addParsers) where
|
module GF.Compile.GrammarToPGF (mkCanon2gfcc,addParsers) where
|
||||||
|
|
||||||
import GF.Compile.Export
|
import GF.Compile.Export
|
||||||
import qualified GF.Compile.GenerateFCFG as FCFG
|
import GF.Compile.GeneratePMCFG
|
||||||
import qualified GF.Compile.GeneratePMCFG as PMCFG
|
|
||||||
|
|
||||||
import PGF.CId
|
import PGF.CId
|
||||||
import qualified PGF.Macros as CM
|
import qualified PGF.Macros as CM
|
||||||
@@ -48,12 +47,8 @@ addParsers :: Options -> D.PGF -> IO D.PGF
|
|||||||
addParsers opts pgf = do cncs <- sequence [conv lang cnc | (lang,cnc) <- Map.toList (D.concretes pgf)]
|
addParsers opts pgf = do cncs <- sequence [conv lang cnc | (lang,cnc) <- Map.toList (D.concretes pgf)]
|
||||||
return pgf { D.concretes = Map.fromList cncs }
|
return pgf { D.concretes = Map.fromList cncs }
|
||||||
where
|
where
|
||||||
conv lang cnc = do pinfo <- if flag optErasing (erasingFromCnc `addOptions` opts)
|
conv lang cnc = do pinfo <- convertConcrete opts (D.abstract pgf) lang cnc
|
||||||
then PMCFG.convertConcrete opts (D.abstract pgf) lang cnc
|
|
||||||
else return $ FCFG.convertConcrete (D.abstract pgf) cnc
|
|
||||||
return (lang,cnc { D.parser = Just pinfo })
|
return (lang,cnc { D.parser = Just pinfo })
|
||||||
where
|
|
||||||
erasingFromCnc = modifyFlags (\o -> o { optErasing = Map.lookup (mkCId "erasing") (D.cflags cnc) == Just "on"})
|
|
||||||
|
|
||||||
-- Generate PGF from GFCM.
|
-- Generate PGF from GFCM.
|
||||||
-- this assumes a grammar translated by canon2canon
|
-- this assumes a grammar translated by canon2canon
|
||||||
|
|||||||
@@ -96,13 +96,14 @@ parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray
|
|||||||
JS.EObj $ map cats (Map.assocs (startCats p)),
|
JS.EObj $ map cats (Map.assocs (startCats p)),
|
||||||
JS.EInt (totalCats p)]]
|
JS.EInt (totalCats p)]]
|
||||||
where
|
where
|
||||||
cats (c,is) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EArray (map JS.EInt is))
|
cats (c,(start,end)) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EObj [JS.Prop (JS.IdentPropName (JS.Ident "s")) (JS.EInt start)
|
||||||
|
,JS.Prop (JS.IdentPropName (JS.Ident "e")) (JS.EInt end)])
|
||||||
|
|
||||||
frule2js :: Production -> JS.Expr
|
frule2js :: Production -> JS.Expr
|
||||||
frule2js (FApply funid args) = new "Rule" [JS.EInt funid, JS.EArray (map JS.EInt args)]
|
frule2js (FApply funid args) = new "Rule" [JS.EInt funid, JS.EArray (map JS.EInt args)]
|
||||||
frule2js (FCoerce arg) = new "Coerce" [JS.EInt arg]
|
frule2js (FCoerce arg) = new "Coerce" [JS.EInt arg]
|
||||||
|
|
||||||
ffun2js (FFun f _ lins) = new "FFun" [JS.EStr (showCId f), JS.EArray (map JS.EInt (Array.elems lins))]
|
ffun2js (FFun f lins) = new "FFun" [JS.EStr (showCId f), JS.EArray (map JS.EInt (Array.elems lins))]
|
||||||
|
|
||||||
seq2js :: Array.Array FIndex FSymbol -> JS.Expr
|
seq2js :: Array.Array FIndex FSymbol -> JS.Expr
|
||||||
seq2js seq = JS.EArray [sym2js s | s <- Array.elems seq]
|
seq2js seq = JS.EArray [sym2js s | s <- Array.elems seq]
|
||||||
|
|||||||
@@ -172,7 +172,6 @@ data Flags = Flags {
|
|||||||
optSpeechLanguage :: Maybe String,
|
optSpeechLanguage :: Maybe String,
|
||||||
optLexer :: Maybe String,
|
optLexer :: Maybe String,
|
||||||
optUnlexer :: Maybe String,
|
optUnlexer :: Maybe String,
|
||||||
optErasing :: Bool,
|
|
||||||
optBuildParser :: BuildParser,
|
optBuildParser :: BuildParser,
|
||||||
optWarnings :: [Warning],
|
optWarnings :: [Warning],
|
||||||
optDump :: [Dump]
|
optDump :: [Dump]
|
||||||
@@ -219,7 +218,6 @@ optionsPGF :: Options -> [(String,String)]
|
|||||||
optionsPGF opts =
|
optionsPGF opts =
|
||||||
maybe [] (\x -> [("language",x)]) (flag optSpeechLanguage opts)
|
maybe [] (\x -> [("language",x)]) (flag optSpeechLanguage opts)
|
||||||
++ maybe [] (\x -> [("startcat",x)]) (flag optStartCat opts)
|
++ maybe [] (\x -> [("startcat",x)]) (flag optStartCat opts)
|
||||||
++ (if flag optErasing opts then [("erasing","on")] else [])
|
|
||||||
++ (if flag optBuildParser opts == BuildParserOnDemand then [("parser","ondemand")] else [])
|
++ (if flag optBuildParser opts == BuildParserOnDemand then [("parser","ondemand")] else [])
|
||||||
|
|
||||||
-- Option manipulation
|
-- Option manipulation
|
||||||
@@ -276,7 +274,6 @@ defaultFlags = Flags {
|
|||||||
optSpeechLanguage = Nothing,
|
optSpeechLanguage = Nothing,
|
||||||
optLexer = Nothing,
|
optLexer = Nothing,
|
||||||
optUnlexer = Nothing,
|
optUnlexer = Nothing,
|
||||||
optErasing = True,
|
|
||||||
optBuildParser = BuildParser,
|
optBuildParser = BuildParser,
|
||||||
optWarnings = [],
|
optWarnings = [],
|
||||||
optDump = []
|
optDump = []
|
||||||
@@ -354,7 +351,6 @@ optDescr =
|
|||||||
Option [] ["coding"] (ReqArg coding "ENCODING")
|
Option [] ["coding"] (ReqArg coding "ENCODING")
|
||||||
("Character encoding of the source grammar, ENCODING = "
|
("Character encoding of the source grammar, ENCODING = "
|
||||||
++ concat (intersperse " | " (map fst encodings)) ++ "."),
|
++ concat (intersperse " | " (map fst encodings)) ++ "."),
|
||||||
Option [] ["erasing"] (onOff erasing False) "Generate erasing grammar (default off).",
|
|
||||||
Option [] ["parser"] (ReqArg buildParser "VALUE") "Build parser (default on). VALUE = on | off | ondemand",
|
Option [] ["parser"] (ReqArg buildParser "VALUE") "Build parser (default on). VALUE = on | off | ondemand",
|
||||||
Option [] ["startcat"] (ReqArg startcat "CAT") "Grammar start category.",
|
Option [] ["startcat"] (ReqArg startcat "CAT") "Grammar start category.",
|
||||||
Option [] ["language"] (ReqArg language "LANG") "Set the speech language flag to LANG in the generated grammar.",
|
Option [] ["language"] (ReqArg language "LANG") "Set the speech language flag to LANG in the generated grammar.",
|
||||||
@@ -414,7 +410,6 @@ optDescr =
|
|||||||
coding x = case lookup x encodings of
|
coding x = case lookup x encodings of
|
||||||
Just c -> set $ \o -> o { optEncoding = c }
|
Just c -> set $ \o -> o { optEncoding = c }
|
||||||
Nothing -> fail $ "Unknown character encoding: " ++ x
|
Nothing -> fail $ "Unknown character encoding: " ++ x
|
||||||
erasing x = set $ \o -> o { optErasing = x }
|
|
||||||
buildParser x = do v <- case x of
|
buildParser x = do v <- case x of
|
||||||
"on" -> return BuildParser
|
"on" -> return BuildParser
|
||||||
"off" -> return DontBuildParser
|
"off" -> return DontBuildParser
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ bnfPrinter = toBNF id
|
|||||||
toBNF :: (CFG -> CFG) -> PGF -> CId -> String
|
toBNF :: (CFG -> CFG) -> PGF -> CId -> String
|
||||||
toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc
|
toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc
|
||||||
|
|
||||||
|
type Profile = [Int]
|
||||||
|
|
||||||
pgfToCFG :: PGF
|
pgfToCFG :: PGF
|
||||||
-> CId -- ^ Concrete syntax name
|
-> CId -- ^ Concrete syntax name
|
||||||
@@ -42,7 +43,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
|
|||||||
fcatCats :: Map FCat Cat
|
fcatCats :: Map FCat Cat
|
||||||
fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i)
|
fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i)
|
||||||
| (c,fcs) <- Map.toList (startCats pinfo),
|
| (c,fcs) <- Map.toList (startCats pinfo),
|
||||||
(fc,i) <- zip fcs [1..]]
|
(fc,i) <- zip (range fcs) [1..]]
|
||||||
|
|
||||||
fcatCat :: FCat -> Cat
|
fcatCat :: FCat -> Cat
|
||||||
fcatCat c = Map.findWithDefault ("Unknown_" ++ show c) c fcatCats
|
fcatCat c = Map.findWithDefault ("Unknown_" ++ show c) c fcatCats
|
||||||
@@ -53,7 +54,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
|
|||||||
|
|
||||||
-- gets the number of fields in the lincat for the given category
|
-- gets the number of fields in the lincat for the given category
|
||||||
catLinArity :: FCat -> Int
|
catLinArity :: FCat -> Int
|
||||||
catLinArity c = maximum (1:[rangeSize (bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c])
|
catLinArity c = maximum (1:[rangeSize (bounds rhs) | (FFun _ rhs, _) <- topdownRules c])
|
||||||
|
|
||||||
topdownRules cat = f cat []
|
topdownRules cat = f cat []
|
||||||
where
|
where
|
||||||
@@ -69,17 +70,17 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
|
|||||||
startRules :: [CFRule]
|
startRules :: [CFRule]
|
||||||
startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0)
|
startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0)
|
||||||
| (c,fcs) <- Map.toList (startCats pinfo),
|
| (c,fcs) <- Map.toList (startCats pinfo),
|
||||||
fc <- fcs, not (isLiteralFCat fc),
|
fc <- range fcs, not (isLiteralFCat fc),
|
||||||
r <- [0..catLinArity fc-1]]
|
r <- [0..catLinArity fc-1]]
|
||||||
|
|
||||||
fruleToCFRule :: (FCat,Production) -> [CFRule]
|
fruleToCFRule :: (FCat,Production) -> [CFRule]
|
||||||
fruleToCFRule (c,FApply funid args) =
|
fruleToCFRule (c,FApply funid args) =
|
||||||
[CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps))
|
[CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm [fixProfile row n | n <- [0..length args-1]])
|
||||||
| (l,seqid) <- Array.assocs rhs
|
| (l,seqid) <- Array.assocs rhs
|
||||||
, let row = sequences pinfo ! seqid
|
, let row = sequences pinfo ! seqid
|
||||||
, not (containsLiterals row)]
|
, not (containsLiterals row)]
|
||||||
where
|
where
|
||||||
FFun f ps rhs = functions pinfo ! funid
|
FFun f rhs = functions pinfo ! funid
|
||||||
|
|
||||||
mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
|
mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
|
||||||
mkRhs = concatMap fsymbolToSymbol . Array.elems
|
mkRhs = concatMap fsymbolToSymbol . Array.elems
|
||||||
@@ -94,11 +95,10 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
|
|||||||
fsymbolToSymbol (FSymLit n l) = [NonTerminal (fcatToCat (args!!n) l)]
|
fsymbolToSymbol (FSymLit n l) = [NonTerminal (fcatToCat (args!!n) l)]
|
||||||
fsymbolToSymbol (FSymKS ts) = map Terminal ts
|
fsymbolToSymbol (FSymKS ts) = map Terminal ts
|
||||||
|
|
||||||
fixProfile :: Array FPointPos FSymbol -> Profile -> Profile
|
fixProfile :: Array FPointPos FSymbol -> Int -> Profile
|
||||||
fixProfile row = concatMap positions
|
fixProfile row i = [k | (k,j) <- nts, j == i]
|
||||||
where
|
where
|
||||||
nts = zip [0..] [j | nt <- Array.elems row, j <- getPos nt]
|
nts = zip [0..] [j | nt <- Array.elems row, j <- getPos nt]
|
||||||
positions i = [k | (k,j) <- nts, j == i]
|
|
||||||
|
|
||||||
getPos (FSymCat j _) = [j]
|
getPos (FSymCat j _) = [j]
|
||||||
getPos (FSymLit j _) = [j]
|
getPos (FSymLit j _) = [j]
|
||||||
|
|||||||
@@ -74,8 +74,8 @@ module PGF(
|
|||||||
|
|
||||||
-- ** Word Completion (Incremental Parsing)
|
-- ** Word Completion (Incremental Parsing)
|
||||||
complete,
|
complete,
|
||||||
Incremental.ParseState,
|
Parse.ParseState,
|
||||||
Incremental.initState, Incremental.nextState, Incremental.getCompletions, Incremental.recoveryStates, Incremental.extractTrees,
|
Parse.initState, Parse.nextState, Parse.getCompletions, Parse.recoveryStates, Parse.extractTrees,
|
||||||
|
|
||||||
-- ** Generation
|
-- ** Generation
|
||||||
generateRandom, generateAll, generateAllDepth,
|
generateRandom, generateAll, generateAllDepth,
|
||||||
@@ -105,8 +105,7 @@ import PGF.Expr (Tree)
|
|||||||
import PGF.Morphology
|
import PGF.Morphology
|
||||||
import PGF.Data hiding (functions)
|
import PGF.Data hiding (functions)
|
||||||
import PGF.Binary
|
import PGF.Binary
|
||||||
import qualified PGF.Parsing.FCFG.Active as Active
|
import qualified PGF.Parse as Parse
|
||||||
import qualified PGF.Parsing.FCFG.Incremental as Incremental
|
|
||||||
import qualified GF.Compile.GeneratePMCFG as PMCFG
|
import qualified GF.Compile.GeneratePMCFG as PMCFG
|
||||||
|
|
||||||
import GF.Infra.Option
|
import GF.Infra.Option
|
||||||
@@ -249,13 +248,11 @@ linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang
|
|||||||
parse pgf lang typ s =
|
parse pgf lang typ s =
|
||||||
case Map.lookup lang (concretes pgf) of
|
case Map.lookup lang (concretes pgf) of
|
||||||
Just cnc -> case parser cnc of
|
Just cnc -> case parser cnc of
|
||||||
Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on"
|
Just pinfo -> Parse.parse pgf lang typ (words s)
|
||||||
then Incremental.parse pgf lang typ (words s)
|
|
||||||
else Active.parse "t" pinfo typ (words s)
|
|
||||||
Nothing -> error ("No parser built for language: " ++ showCId lang)
|
Nothing -> error ("No parser built for language: " ++ showCId lang)
|
||||||
Nothing -> error ("Unknown language: " ++ showCId lang)
|
Nothing -> error ("Unknown language: " ++ showCId lang)
|
||||||
|
|
||||||
parseWithRecovery pgf lang typ open_typs s = Incremental.parseWithRecovery pgf lang typ open_typs (words s)
|
parseWithRecovery pgf lang typ open_typs s = Parse.parseWithRecovery pgf lang typ open_typs (words s)
|
||||||
|
|
||||||
canParse pgf cnc = isJust (lookParser pgf cnc)
|
canParse pgf cnc = isJust (lookParser pgf cnc)
|
||||||
|
|
||||||
@@ -297,12 +294,12 @@ functionType pgf fun =
|
|||||||
|
|
||||||
complete pgf from typ input =
|
complete pgf from typ input =
|
||||||
let (ws,prefix) = tokensAndPrefix input
|
let (ws,prefix) = tokensAndPrefix input
|
||||||
state0 = Incremental.initState pgf from typ
|
state0 = Parse.initState pgf from typ
|
||||||
in case loop state0 ws of
|
in case loop state0 ws of
|
||||||
Nothing -> []
|
Nothing -> []
|
||||||
Just state ->
|
Just state ->
|
||||||
(if null prefix && not (null (Incremental.extractTrees state typ)) then [unwords ws ++ " "] else [])
|
(if null prefix && not (null (Parse.extractTrees state typ)) then [unwords ws ++ " "] else [])
|
||||||
++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Incremental.getCompletions state prefix)]
|
++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Parse.getCompletions state prefix)]
|
||||||
where
|
where
|
||||||
tokensAndPrefix :: String -> ([String],String)
|
tokensAndPrefix :: String -> ([String],String)
|
||||||
tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "")
|
tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "")
|
||||||
@@ -311,7 +308,7 @@ complete pgf from typ input =
|
|||||||
where ws = words s
|
where ws = words s
|
||||||
|
|
||||||
loop ps [] = Just ps
|
loop ps [] = Just ps
|
||||||
loop ps (t:ts) = case Incremental.nextState ps t of
|
loop ps (t:ts) = case Parse.nextState ps t of
|
||||||
Left es -> Nothing
|
Left es -> Nothing
|
||||||
Right ps -> loop ps ts
|
Right ps -> loop ps ts
|
||||||
|
|
||||||
|
|||||||
@@ -159,8 +159,8 @@ instance Binary BindType where
|
|||||||
_ -> decodingError
|
_ -> decodingError
|
||||||
|
|
||||||
instance Binary FFun where
|
instance Binary FFun where
|
||||||
put (FFun fun prof lins) = put (fun,prof,lins)
|
put (FFun fun lins) = put (fun,lins)
|
||||||
get = liftM3 FFun get get get
|
get = liftM2 FFun get get
|
||||||
|
|
||||||
instance Binary FSymbol where
|
instance Binary FSymbol where
|
||||||
put (FSymCat n l) = putWord8 0 >> put (n,l)
|
put (FSymCat n l) = putWord8 0 >> put (n,l)
|
||||||
|
|||||||
@@ -1,76 +0,0 @@
|
|||||||
---------------------------------------------------------------------
|
|
||||||
-- |
|
|
||||||
-- Maintainer : Krasimir Angelov
|
|
||||||
-- Stability : (stable)
|
|
||||||
-- Portability : (portable)
|
|
||||||
--
|
|
||||||
-- FCFG parsing, parser information
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
module PGF.BuildParser where
|
|
||||||
|
|
||||||
import GF.Data.SortedList
|
|
||||||
import GF.Data.Assoc
|
|
||||||
import PGF.CId
|
|
||||||
import PGF.Data
|
|
||||||
import PGF.Parsing.FCFG.Utilities
|
|
||||||
|
|
||||||
import Data.Array.IArray
|
|
||||||
import Data.Maybe
|
|
||||||
import qualified Data.IntMap as IntMap
|
|
||||||
import qualified Data.Map as Map
|
|
||||||
import qualified Data.Set as Set
|
|
||||||
import Debug.Trace
|
|
||||||
|
|
||||||
|
|
||||||
data ParserInfoEx
|
|
||||||
= ParserInfoEx { epsilonRules :: [(FunId,[FCat],FCat)]
|
|
||||||
, leftcornerCats :: Assoc FCat [(FunId,[FCat],FCat)]
|
|
||||||
, leftcornerTokens :: Assoc String [(FunId,[FCat],FCat)]
|
|
||||||
, grammarToks :: [String]
|
|
||||||
}
|
|
||||||
|
|
||||||
------------------------------------------------------------
|
|
||||||
-- parser information
|
|
||||||
|
|
||||||
getLeftCornerTok pinfo (FFun _ _ lins)
|
|
||||||
| inRange (bounds syms) 0 = case syms ! 0 of
|
|
||||||
FSymKS [tok] -> [tok]
|
|
||||||
_ -> []
|
|
||||||
| otherwise = []
|
|
||||||
where
|
|
||||||
syms = (sequences pinfo) ! (lins ! 0)
|
|
||||||
|
|
||||||
getLeftCornerCat pinfo args (FFun _ _ lins)
|
|
||||||
| inRange (bounds syms) 0 = case syms ! 0 of
|
|
||||||
FSymCat d _ -> let cat = args !! d
|
|
||||||
in case IntMap.lookup cat (productions pinfo) of
|
|
||||||
Just set -> cat : [cat' | FCoerce cat' <- Set.toList set]
|
|
||||||
Nothing -> [cat]
|
|
||||||
_ -> []
|
|
||||||
| otherwise = []
|
|
||||||
where
|
|
||||||
syms = (sequences pinfo) ! (lins ! 0)
|
|
||||||
|
|
||||||
buildParserInfo :: ParserInfo -> ParserInfoEx
|
|
||||||
buildParserInfo pinfo =
|
|
||||||
ParserInfoEx { epsilonRules = epsilonrules
|
|
||||||
, leftcornerCats = leftcorncats
|
|
||||||
, leftcornerTokens = leftcorntoks
|
|
||||||
, grammarToks = grammartoks
|
|
||||||
}
|
|
||||||
|
|
||||||
where epsilonrules = [ (ruleid,args,cat)
|
|
||||||
| (cat,set) <- IntMap.toList (productions pinfo)
|
|
||||||
, (FApply ruleid args) <- Set.toList set
|
|
||||||
, let (FFun _ _ lins) = (functions pinfo) ! ruleid
|
|
||||||
, not (inRange (bounds ((sequences pinfo) ! (lins ! 0))) 0) ]
|
|
||||||
leftcorncats = accumAssoc id [ (cat', (ruleid, args, cat))
|
|
||||||
| (cat,set) <- IntMap.toList (productions pinfo)
|
|
||||||
, (FApply ruleid args) <- Set.toList set
|
|
||||||
, cat' <- getLeftCornerCat pinfo args ((functions pinfo) ! ruleid) ]
|
|
||||||
leftcorntoks = accumAssoc id [ (tok, (ruleid, args, cat))
|
|
||||||
| (cat,set) <- IntMap.toList (productions pinfo)
|
|
||||||
, (FApply ruleid args) <- Set.toList set
|
|
||||||
, tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ]
|
|
||||||
grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymKS [t] <- elems lin]
|
|
||||||
@@ -19,13 +19,12 @@ data FSymbol
|
|||||||
| FSymKS [String]
|
| FSymKS [String]
|
||||||
| FSymKP [String] [Alternative]
|
| FSymKP [String] [Alternative]
|
||||||
deriving (Eq,Ord,Show)
|
deriving (Eq,Ord,Show)
|
||||||
type Profile = [Int]
|
|
||||||
data Production
|
data Production
|
||||||
= FApply {-# UNPACK #-} !FunId [FCat]
|
= FApply {-# UNPACK #-} !FunId [FCat]
|
||||||
| FCoerce {-# UNPACK #-} !FCat
|
| FCoerce {-# UNPACK #-} !FCat
|
||||||
| FConst Expr [String]
|
| FConst Expr [String]
|
||||||
deriving (Eq,Ord,Show)
|
deriving (Eq,Ord,Show)
|
||||||
data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
|
data FFun = FFun CId {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
|
||||||
type FSeq = Array FPointPos FSymbol
|
type FSeq = Array FPointPos FSymbol
|
||||||
type FunId = Int
|
type FunId = Int
|
||||||
type SeqId = Int
|
type SeqId = Int
|
||||||
@@ -39,7 +38,7 @@ data ParserInfo
|
|||||||
, sequences :: Array SeqId FSeq
|
, sequences :: Array SeqId FSeq
|
||||||
, productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file
|
, productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file
|
||||||
, productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions
|
, productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions
|
||||||
, startCats :: Map.Map CId [FCat]
|
, startCats :: Map.Map CId (FCat,FCat)
|
||||||
, totalCats :: {-# UNPACK #-} !FCat
|
, totalCats :: {-# UNPACK #-} !FCat
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -71,14 +70,14 @@ ppProduction (fcat,FCoerce arg) =
|
|||||||
ppProduction (fcat,FConst _ ss) =
|
ppProduction (fcat,FConst _ ss) =
|
||||||
ppFCat fcat <+> text "->" <+> ppStrs ss
|
ppFCat fcat <+> text "->" <+> ppStrs ss
|
||||||
|
|
||||||
ppFun (funid,FFun fun _ arr) =
|
ppFun (funid,FFun fun arr) =
|
||||||
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
|
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
|
||||||
|
|
||||||
ppSeq (seqid,seq) =
|
ppSeq (seqid,seq) =
|
||||||
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
|
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
|
||||||
|
|
||||||
ppStartCat (id,fcats) =
|
ppStartCat (id,(start,end)) =
|
||||||
ppCId id <+> text ":=" <+> brackets (hcat (punctuate comma (map ppFCat fcats)))
|
ppCId id <+> text ":=" <+> brackets (ppFCat start <+> text ".." <+> ppFCat end)
|
||||||
|
|
||||||
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
|
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
|
||||||
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
|
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
{-# LANGUAGE BangPatterns #-}
|
{-# LANGUAGE BangPatterns #-}
|
||||||
module PGF.Parsing.FCFG.Incremental
|
module PGF.Parse
|
||||||
( ParseState
|
( ParseState
|
||||||
, ErrorState
|
, ErrorState
|
||||||
, initState
|
, initState
|
||||||
@@ -57,10 +57,10 @@ parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ)
|
|||||||
initState :: PGF -> Language -> Type -> ParseState
|
initState :: PGF -> Language -> Type -> ParseState
|
||||||
initState pgf lang (DTyp _ start _) =
|
initState pgf lang (DTyp _ start _) =
|
||||||
let items = do
|
let items = do
|
||||||
cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
|
cat <- maybe [] range (Map.lookup start (startCats pinfo))
|
||||||
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
|
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
|
||||||
[] cat (productions pinfo)
|
[] cat (productions pinfo)
|
||||||
let FFun fn _ lins = functions pinfo ! funid
|
let FFun fn lins = functions pinfo ! funid
|
||||||
(lbl,seqid) <- assocs lins
|
(lbl,seqid) <- assocs lins
|
||||||
return (Active 0 0 funid seqid args (AK cat lbl))
|
return (Active 0 0 funid seqid args (AK cat lbl))
|
||||||
|
|
||||||
@@ -131,7 +131,7 @@ recoveryStates open_types (EState pgf pinfo chart) =
|
|||||||
}
|
}
|
||||||
in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc)
|
in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc)
|
||||||
where
|
where
|
||||||
type2fcats (DTyp _ cat _) = fromMaybe [] (Map.lookup cat (startCats pinfo))
|
type2fcats (DTyp _ cat _) = maybe [] range (Map.lookup cat (startCats pinfo))
|
||||||
|
|
||||||
complete open_fcats items ac =
|
complete open_fcats items ac =
|
||||||
foldl (Set.fold (\(Active j' ppos funid seqid args keyc) ->
|
foldl (Set.fold (\(Active j' ppos funid seqid args keyc) ->
|
||||||
@@ -154,10 +154,10 @@ extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
|
|||||||
(_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
|
(_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
|
||||||
|
|
||||||
exps = do
|
exps = do
|
||||||
cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
|
cat <- maybe [] range (Map.lookup start (startCats pinfo))
|
||||||
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
|
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
|
||||||
[] cat (productions pinfo)
|
[] cat (productions pinfo)
|
||||||
let FFun fn _ lins = functions pinfo ! funid
|
let FFun fn lins = functions pinfo ! funid
|
||||||
lbl <- indices lins
|
lbl <- indices lins
|
||||||
Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
|
Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
|
||||||
(fvs,tree) <- go Set.empty 0 (0,fid)
|
(fvs,tree) <- go Set.empty 0 (0,fid)
|
||||||
@@ -168,7 +168,7 @@ extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
|
|||||||
| fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
|
| fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
|
||||||
| Set.member fcat rec = mzero
|
| Set.member fcat rec = mzero
|
||||||
| otherwise = foldForest (\funid args trees ->
|
| otherwise = foldForest (\funid args trees ->
|
||||||
do let FFun fn _ lins = functions pinfo ! funid
|
do let FFun fn lins = functions pinfo ! funid
|
||||||
args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
|
args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
|
||||||
check_ho_fun fn args
|
check_ho_fun fn args
|
||||||
`mplus`
|
`mplus`
|
||||||
@@ -250,7 +250,7 @@ process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) ac
|
|||||||
|
|
||||||
rhs funid lbl = unsafeAt lins lbl
|
rhs funid lbl = unsafeAt lins lbl
|
||||||
where
|
where
|
||||||
FFun _ _ lins = unsafeAt funs funid
|
FFun _ lins = unsafeAt funs funid
|
||||||
|
|
||||||
|
|
||||||
updateAt :: Int -> a -> [a] -> [a]
|
updateAt :: Int -> a -> [a] -> [a]
|
||||||
@@ -1,205 +0,0 @@
|
|||||||
----------------------------------------------------------------------
|
|
||||||
-- |
|
|
||||||
-- Maintainer : Krasimir Angelov
|
|
||||||
-- Stability : (stable)
|
|
||||||
-- Portability : (portable)
|
|
||||||
--
|
|
||||||
-- MCFG parsing, the active algorithm
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
module PGF.Parsing.FCFG.Active (parse) where
|
|
||||||
|
|
||||||
import GF.Data.Assoc
|
|
||||||
import GF.Data.SortedList
|
|
||||||
import GF.Data.Utilities
|
|
||||||
import qualified GF.Data.MultiMap as MM
|
|
||||||
|
|
||||||
import PGF.CId
|
|
||||||
import PGF.Data
|
|
||||||
import PGF.Tree
|
|
||||||
import PGF.Parsing.FCFG.Utilities
|
|
||||||
import PGF.BuildParser
|
|
||||||
|
|
||||||
import Control.Monad (guard)
|
|
||||||
|
|
||||||
import qualified Data.List as List
|
|
||||||
import qualified Data.Map as Map
|
|
||||||
import qualified Data.IntMap as IntMap
|
|
||||||
import qualified Data.Set as Set
|
|
||||||
import Data.Array.IArray
|
|
||||||
import Debug.Trace
|
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
|
||||||
-- * parsing
|
|
||||||
|
|
||||||
type FToken = String
|
|
||||||
|
|
||||||
makeFinalEdge cat 0 0 = (cat, [EmptyRange])
|
|
||||||
makeFinalEdge cat i j = (cat, [makeRange i j])
|
|
||||||
|
|
||||||
-- | the list of categories = possible starting categories
|
|
||||||
parse :: String -> ParserInfo -> Type -> [FToken] -> [Expr]
|
|
||||||
parse strategy pinfo (DTyp _ start _) toks = map (tree2expr) . nubsort $ filteredForests >>= forest2trees
|
|
||||||
where
|
|
||||||
inTokens = input toks
|
|
||||||
starts = Map.findWithDefault [] start (startCats pinfo)
|
|
||||||
schart = xchart2syntaxchart chart pinfo
|
|
||||||
(i,j) = inputBounds inTokens
|
|
||||||
finalEdges = [makeFinalEdge cat i j | cat <- starts]
|
|
||||||
forests = chart2forests schart (const False) finalEdges
|
|
||||||
filteredForests = forests >>= applyProfileToForest
|
|
||||||
|
|
||||||
pinfoex = buildParserInfo pinfo
|
|
||||||
|
|
||||||
chart = process strategy pinfo pinfoex inTokens axioms emptyXChart
|
|
||||||
axioms | isBU strategy = literals pinfoex inTokens ++ initialBU pinfo pinfoex inTokens
|
|
||||||
| isTD strategy = literals pinfoex inTokens ++ initialTD pinfo starts inTokens
|
|
||||||
|
|
||||||
isBU s = s=="b"
|
|
||||||
isTD s = s=="t"
|
|
||||||
|
|
||||||
-- used in prediction
|
|
||||||
emptyChildren :: FunId -> [FCat] -> SyntaxNode FunId RangeRec
|
|
||||||
emptyChildren ruleid args = SNode ruleid (replicate (length args) [])
|
|
||||||
|
|
||||||
|
|
||||||
process :: String -> ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -> XChart FCat -> XChart FCat
|
|
||||||
process strategy pinfo pinfoex toks [] chart = chart
|
|
||||||
process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo pinfoex toks items $! univRule item chart
|
|
||||||
where
|
|
||||||
univRule item@(Active found rng lbl ppos node@(SNode ruleid recs) args cat) chart
|
|
||||||
| inRange (bounds lin) ppos =
|
|
||||||
case lin ! ppos of
|
|
||||||
FSymCat d r -> let c = args !! d
|
|
||||||
in case recs !! d of
|
|
||||||
[] -> case insertXChart chart item c of
|
|
||||||
Nothing -> chart
|
|
||||||
Just chart -> let items = do item@(Final found' _ _ _) <- lookupXChartFinal chart c
|
|
||||||
rng <- concatRange rng (found' !! r)
|
|
||||||
return (Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)) args cat)
|
|
||||||
++
|
|
||||||
do guard (isTD strategy)
|
|
||||||
(ruleid,args) <- topdownRules pinfo c
|
|
||||||
return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args c)
|
|
||||||
in process strategy pinfo pinfoex toks items chart
|
|
||||||
found' -> let items = do rng <- concatRange rng (found' !! r)
|
|
||||||
return (Active found rng lbl (ppos+1) node args cat)
|
|
||||||
in process strategy pinfo pinfoex toks items chart
|
|
||||||
FSymKS [tok]
|
|
||||||
-> let items = do t_rng <- inputToken toks ? tok
|
|
||||||
rng' <- concatRange rng t_rng
|
|
||||||
return (Active found rng' lbl (ppos+1) node args cat)
|
|
||||||
in process strategy pinfo pinfoex toks items chart
|
|
||||||
| otherwise =
|
|
||||||
if inRange (bounds lins) (lbl+1)
|
|
||||||
then univRule (Active (rng:found) EmptyRange (lbl+1) 0 node args cat) chart
|
|
||||||
else univRule (Final (reverse (rng:found)) node args cat) chart
|
|
||||||
where
|
|
||||||
(FFun _ _ lins) = functions pinfo ! ruleid
|
|
||||||
lin = sequences pinfo ! (lins ! lbl)
|
|
||||||
univRule item@(Final found' node args cat) chart =
|
|
||||||
case insertXChart chart item cat of
|
|
||||||
Nothing -> chart
|
|
||||||
Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _) args c) <- lookupXChartAct chart cat
|
|
||||||
let FFun _ _ lins = functions pinfo ! ruleid
|
|
||||||
FSymCat d r = (sequences pinfo ! (lins ! l)) ! ppos
|
|
||||||
rng <- concatRange rng (found' !! r)
|
|
||||||
return (Active found rng l (ppos+1) (updateChildren node d found') args c)
|
|
||||||
++
|
|
||||||
do guard (isBU strategy)
|
|
||||||
(ruleid,args,c) <- leftcornerCats pinfoex ? cat
|
|
||||||
let FFun _ _ lins = functions pinfo ! ruleid
|
|
||||||
FSymCat d r = (sequences pinfo ! (lins ! 0)) ! 0
|
|
||||||
return (Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid args) d found') args c)
|
|
||||||
|
|
||||||
updateChildren :: SyntaxNode FunId RangeRec -> Int -> RangeRec -> SyntaxNode FunId RangeRec
|
|
||||||
updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs
|
|
||||||
in process strategy pinfo pinfoex toks items chart
|
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
|
||||||
-- * XChart
|
|
||||||
|
|
||||||
data Item
|
|
||||||
= Active RangeRec
|
|
||||||
Range
|
|
||||||
{-# UNPACK #-} !FIndex
|
|
||||||
{-# UNPACK #-} !FPointPos
|
|
||||||
(SyntaxNode FunId RangeRec)
|
|
||||||
[FCat]
|
|
||||||
FCat
|
|
||||||
| Final RangeRec (SyntaxNode FunId RangeRec) [FCat] FCat
|
|
||||||
deriving (Eq, Ord, Show)
|
|
||||||
|
|
||||||
data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item)
|
|
||||||
|
|
||||||
emptyXChart :: Ord c => XChart c
|
|
||||||
emptyXChart = XChart MM.empty MM.empty
|
|
||||||
|
|
||||||
insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _ _) c =
|
|
||||||
case MM.insert' c item actives of
|
|
||||||
Nothing -> Nothing
|
|
||||||
Just actives -> Just (XChart actives finals)
|
|
||||||
|
|
||||||
insertXChart (XChart actives finals) item@(Final _ _ _ _) c =
|
|
||||||
case MM.insert' c item finals of
|
|
||||||
Nothing -> Nothing
|
|
||||||
Just finals -> Just (XChart actives finals)
|
|
||||||
|
|
||||||
lookupXChartAct (XChart actives finals) c = actives MM.! c
|
|
||||||
lookupXChartFinal (XChart actives finals) c = finals MM.! c
|
|
||||||
|
|
||||||
xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec)
|
|
||||||
xchart2syntaxchart (XChart actives finals) pinfo =
|
|
||||||
accumAssoc groupSyntaxNodes $
|
|
||||||
[ case node of
|
|
||||||
SNode ruleid rrecs -> let FFun fun prof _ = functions pinfo ! ruleid
|
|
||||||
in ((cat,found), SNode (fun,prof) (zip rhs rrecs))
|
|
||||||
SString s -> ((cat,found), SString s)
|
|
||||||
SInt n -> ((cat,found), SInt n)
|
|
||||||
SFloat f -> ((cat,found), SFloat f)
|
|
||||||
| (Final found node rhs cat) <- MM.elems finals
|
|
||||||
]
|
|
||||||
|
|
||||||
literals :: ParserInfoEx -> Input FToken -> [Item]
|
|
||||||
literals pinfoex toks =
|
|
||||||
[let (c,node) = lexer t in (Final [rng] node [] c) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfoex)]
|
|
||||||
where
|
|
||||||
lexer t =
|
|
||||||
case reads t of
|
|
||||||
[(n,"")] -> (fcatInt, SInt (n::Integer))
|
|
||||||
_ -> case reads t of
|
|
||||||
[(f,"")] -> (fcatFloat, SFloat (f::Double))
|
|
||||||
_ -> (fcatString,SString t)
|
|
||||||
|
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
|
||||||
-- Earley --
|
|
||||||
|
|
||||||
-- called with all starting categories
|
|
||||||
initialTD :: ParserInfo -> [FCat] -> Input FToken -> [Item]
|
|
||||||
initialTD pinfo starts toks =
|
|
||||||
do cat <- starts
|
|
||||||
(ruleid,args) <- topdownRules pinfo cat
|
|
||||||
return (Active [] (Range 0 0) 0 0 (emptyChildren ruleid args) args cat)
|
|
||||||
|
|
||||||
topdownRules pinfo cat = f cat []
|
|
||||||
where
|
|
||||||
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo))
|
|
||||||
|
|
||||||
g (FApply ruleid args) rules = (ruleid,args) : rules
|
|
||||||
g (FCoerce cat) rules = f cat rules
|
|
||||||
|
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
|
||||||
-- Kilbury --
|
|
||||||
|
|
||||||
initialBU :: ParserInfo -> ParserInfoEx -> Input FToken -> [Item]
|
|
||||||
initialBU pinfo pinfoex toks =
|
|
||||||
do (tok,rngs) <- aAssocs (inputToken toks)
|
|
||||||
(ruleid,args,cat) <- leftcornerTokens pinfoex ? tok
|
|
||||||
rng <- rngs
|
|
||||||
return (Active [] rng 0 1 (emptyChildren ruleid args) args cat)
|
|
||||||
++
|
|
||||||
do (ruleid,args,cat) <- epsilonRules pinfoex
|
|
||||||
let FFun _ _ _ = functions pinfo ! ruleid
|
|
||||||
return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args cat)
|
|
||||||
@@ -1,188 +0,0 @@
|
|||||||
----------------------------------------------------------------------
|
|
||||||
-- |
|
|
||||||
-- Maintainer : PL
|
|
||||||
-- Stability : (stable)
|
|
||||||
-- Portability : (portable)
|
|
||||||
--
|
|
||||||
-- > CVS $Date: 2005/05/13 12:40:19 $
|
|
||||||
-- > CVS $Author: peb $
|
|
||||||
-- > CVS $Revision: 1.6 $
|
|
||||||
--
|
|
||||||
-- Basic type declarations and functions for grammar formalisms
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
module PGF.Parsing.FCFG.Utilities where
|
|
||||||
|
|
||||||
import Control.Monad
|
|
||||||
import Data.Array
|
|
||||||
import Data.List (groupBy)
|
|
||||||
|
|
||||||
import PGF.CId
|
|
||||||
import PGF.Data
|
|
||||||
import PGF.Tree
|
|
||||||
import GF.Data.Assoc
|
|
||||||
import GF.Data.Utilities (sameLength, foldMerge, splitBy)
|
|
||||||
|
|
||||||
|
|
||||||
------------------------------------------------------------
|
|
||||||
-- ranges as single pairs
|
|
||||||
|
|
||||||
type RangeRec = [Range]
|
|
||||||
|
|
||||||
data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
|
|
||||||
| EmptyRange
|
|
||||||
deriving (Eq, Ord, Show)
|
|
||||||
|
|
||||||
makeRange :: Int -> Int -> Range
|
|
||||||
makeRange = Range
|
|
||||||
|
|
||||||
concatRange :: Range -> Range -> [Range]
|
|
||||||
concatRange EmptyRange rng = return rng
|
|
||||||
concatRange rng EmptyRange = return rng
|
|
||||||
concatRange (Range i j) (Range j' k) = [Range i k | j==j']
|
|
||||||
|
|
||||||
minRange :: Range -> Int
|
|
||||||
minRange (Range i j) = i
|
|
||||||
|
|
||||||
maxRange :: Range -> Int
|
|
||||||
maxRange (Range i j) = j
|
|
||||||
|
|
||||||
|
|
||||||
------------------------------------------------------------
|
|
||||||
-- * representaions of input tokens
|
|
||||||
|
|
||||||
data Input t = MkInput { inputBounds :: (Int, Int),
|
|
||||||
inputToken :: Assoc t [Range]
|
|
||||||
}
|
|
||||||
|
|
||||||
input :: Ord t => [t] -> Input t
|
|
||||||
input toks = MkInput inBounds inToken
|
|
||||||
where
|
|
||||||
inBounds = (0, length toks)
|
|
||||||
inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,tok) <- zip3 [0..] [1..] toks ]
|
|
||||||
|
|
||||||
inputMany :: Ord t => [[t]] -> Input t
|
|
||||||
inputMany toks = MkInput inBounds inToken
|
|
||||||
where
|
|
||||||
inBounds = (0, length toks)
|
|
||||||
inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,ts) <- zip3 [0..] [1..] toks, tok <- ts ]
|
|
||||||
|
|
||||||
|
|
||||||
------------------------------------------------------------
|
|
||||||
-- * representations of syntactical analyses
|
|
||||||
|
|
||||||
-- ** charts as finite maps over edges
|
|
||||||
|
|
||||||
-- | The values of the chart, a list of key-daughters pairs,
|
|
||||||
-- has unique keys. In essence, it is a map from 'n' to daughters.
|
|
||||||
-- The daughters should be a set (not necessarily sorted) of rhs's.
|
|
||||||
type SyntaxChart n e = Assoc e [SyntaxNode n [e]]
|
|
||||||
|
|
||||||
data SyntaxNode n e = SMeta
|
|
||||||
| SNode n [e]
|
|
||||||
| SString String
|
|
||||||
| SInt Integer
|
|
||||||
| SFloat Double
|
|
||||||
deriving (Eq,Ord,Show)
|
|
||||||
|
|
||||||
groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]]
|
|
||||||
groupSyntaxNodes [] = []
|
|
||||||
groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs'
|
|
||||||
where
|
|
||||||
(ess,xs') = span xs
|
|
||||||
|
|
||||||
span [] = ([],[])
|
|
||||||
span xs@(SNode n es:xs')
|
|
||||||
| n0 == n = let (ess,xs) = span xs' in (es:ess,xs)
|
|
||||||
| otherwise = ([],xs)
|
|
||||||
groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs
|
|
||||||
groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs
|
|
||||||
groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs
|
|
||||||
|
|
||||||
-- ** syntax forests
|
|
||||||
|
|
||||||
data SyntaxForest n = FMeta
|
|
||||||
| FNode n [[SyntaxForest n]]
|
|
||||||
-- ^ The outer list should be a set (not necessarily sorted)
|
|
||||||
-- of possible alternatives. Ie. the outer list
|
|
||||||
-- is a disjunctive node, and the inner lists
|
|
||||||
-- are (conjunctive) concatenative nodes
|
|
||||||
| FString String
|
|
||||||
| FInt Integer
|
|
||||||
| FFloat Double
|
|
||||||
deriving (Eq, Ord, Show)
|
|
||||||
|
|
||||||
instance Functor SyntaxForest where
|
|
||||||
fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests
|
|
||||||
fmap _ (FString s) = FString s
|
|
||||||
fmap _ (FInt n) = FInt n
|
|
||||||
fmap _ (FFloat f) = FFloat f
|
|
||||||
fmap _ (FMeta) = FMeta
|
|
||||||
|
|
||||||
forestName :: SyntaxForest n -> Maybe n
|
|
||||||
forestName (FNode n _) = Just n
|
|
||||||
forestName _ = Nothing
|
|
||||||
|
|
||||||
unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n)
|
|
||||||
unifyManyForests = foldM unifyForests FMeta
|
|
||||||
|
|
||||||
-- | two forests can be unified, if either is 'FMeta', or both have the same parent,
|
|
||||||
-- and all children can be unified
|
|
||||||
unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n)
|
|
||||||
unifyForests FMeta forest = return forest
|
|
||||||
unifyForests forest FMeta = return forest
|
|
||||||
unifyForests (FNode name1 children1) (FNode name2 children2)
|
|
||||||
| name1 == name2 && not (null children) = return $ FNode name1 children
|
|
||||||
where children = [ forests | forests1 <- children1, forests2 <- children2,
|
|
||||||
sameLength forests1 forests2,
|
|
||||||
forests <- zipWithM unifyForests forests1 forests2 ]
|
|
||||||
unifyForests (FString s1) (FString s2)
|
|
||||||
| s1 == s2 = return $ FString s1
|
|
||||||
unifyForests (FInt n1) (FInt n2)
|
|
||||||
| n1 == n2 = return $ FInt n1
|
|
||||||
unifyForests (FFloat f1) (FFloat f2)
|
|
||||||
| f1 == f2 = return $ FFloat f1
|
|
||||||
unifyForests _ _ = fail "forest unification failure"
|
|
||||||
|
|
||||||
|
|
||||||
-- ** conversions between representations
|
|
||||||
|
|
||||||
chart2forests :: (Ord n, Ord e) =>
|
|
||||||
SyntaxChart n e -- ^ The complete chart
|
|
||||||
-> (e -> Bool) -- ^ When is an edge 'FMeta'?
|
|
||||||
-> [e] -- ^ The starting edges
|
|
||||||
-> [SyntaxForest n] -- ^ The result has unique keys, ie. all 'n' are joined together.
|
|
||||||
-- In essence, the result is a map from 'n' to forest daughters
|
|
||||||
chart2forests chart isMeta = concatMap (edge2forests [])
|
|
||||||
where edge2forests edges edge
|
|
||||||
| isMeta edge = [FMeta]
|
|
||||||
| edge `elem` edges = []
|
|
||||||
| otherwise = map (item2forest (edge:edges)) $ chart ? edge
|
|
||||||
item2forest edges (SMeta) = FMeta
|
|
||||||
item2forest edges (SNode name children) =
|
|
||||||
FNode name $ children >>= mapM (edge2forests edges)
|
|
||||||
item2forest edges (SString s) = FString s
|
|
||||||
item2forest edges (SInt n) = FInt n
|
|
||||||
item2forest edges (SFloat f) = FFloat f
|
|
||||||
|
|
||||||
|
|
||||||
applyProfileToForest :: SyntaxForest (CId,[Profile]) -> [SyntaxForest CId]
|
|
||||||
applyProfileToForest (FNode (fun,profiles) children)
|
|
||||||
| fun == wildCId = concat chForests
|
|
||||||
| otherwise = [ FNode fun chForests | not (null chForests) ]
|
|
||||||
where chForests = concat [ mapM (unifyManyForests . map (forests !!)) profiles |
|
|
||||||
forests0 <- children,
|
|
||||||
forests <- mapM applyProfileToForest forests0 ]
|
|
||||||
applyProfileToForest (FString s) = [FString s]
|
|
||||||
applyProfileToForest (FInt n) = [FInt n]
|
|
||||||
applyProfileToForest (FFloat f) = [FFloat f]
|
|
||||||
applyProfileToForest (FMeta) = [FMeta]
|
|
||||||
|
|
||||||
|
|
||||||
forest2trees :: SyntaxForest CId -> [Tree]
|
|
||||||
forest2trees (FNode n forests) = map (Fun n) $ forests >>= mapM forest2trees
|
|
||||||
forest2trees (FString s) = [Lit (LStr s)]
|
|
||||||
forest2trees (FInt n) = [Lit (LInt n)]
|
|
||||||
forest2trees (FFloat f) = [Lit (LFlt f)]
|
|
||||||
forest2trees (FMeta) = [Meta 0]
|
|
||||||
Reference in New Issue
Block a user