add the FCFG parser

This commit is contained in:
kr.angelov
2006-06-01 11:19:47 +00:00
parent 0c0c82603c
commit a5758468ed
13 changed files with 985 additions and 18 deletions

View File

@@ -151,7 +151,7 @@ emptyStateGrammar = StGr {
cf = emptyCF,
mcfg = [],
cfg = [],
pInfo = Prs.buildPInfo [] [],
pInfo = Prs.buildPInfo [] [] [],
morpho = emptyMorpho,
probs = emptyProbs,
loptions = noOptions
@@ -231,9 +231,9 @@ updateShellState opts ign mcnc sh ((_,sgr,gr),rts) = do
let probss = [] -----
let fromGFC = snd . snd . Cnv.convertGFC opts
(mcfgs, cfgs) = unzip $ map (curry fromGFC cgr) concrs
pInfos = zipWith Prs.buildPInfo mcfgs cfgs
let fromGFC = snd . snd . Cnv.convertGFC opts
(mcfgs, fcfgs, cfgs) = unzip3 $ map (curry fromGFC cgr) concrs
pInfos = zipWith3 Prs.buildPInfo mcfgs fcfgs cfgs
let funs = funRulesOf cgr
let cats = allCatsOf cgr
@@ -362,7 +362,7 @@ stateGrammarOfLangOpt purg st0 l = StGr {
cf = maybe emptyCF id (lookup l (cfs st)),
mcfg = maybe [] id $ lookup l $ mcfgs st,
cfg = maybe [] id $ lookup l $ cfgs st,
pInfo = maybe (Prs.buildPInfo [] []) id $ lookup l $ pInfos st,
pInfo = maybe (Prs.buildPInfo [] [] []) id $ lookup l $ pInfos st,
morpho = maybe emptyMorpho id (lookup l (morphos st)),
probs = maybe emptyProbs id (lookup l (probss st)),
loptions = errVal noOptions $ lookupOptionsCan allCan
@@ -404,7 +404,7 @@ stateAbstractGrammar st = StGr {
cf = emptyCF,
mcfg = [],
cfg = [],
pInfo = Prs.buildPInfo [] [],
pInfo = Prs.buildPInfo [] [] [],
morpho = emptyMorpho,
probs = emptyProbs,
loptions = gloptions st ----

View File

@@ -31,6 +31,7 @@ import qualified GF.Conversion.RemoveSingletons as RemSing
import qualified GF.Conversion.RemoveErasing as RemEra
import qualified GF.Conversion.RemoveEpsilon as RemEps
import qualified GF.Conversion.SimpleToMCFG as S2M
import qualified GF.Conversion.SimpleToFCFG as S2FM
import qualified GF.Conversion.MCFGtoCFG as M2C
import GF.Infra.Print
@@ -40,10 +41,10 @@ import GF.System.Tracing
----------------------------------------------------------------------
-- * GFC -> MCFG & CFG, using options to decide which conversion is used
convertGFC :: Options -> (CanonGrammar, Ident) -> (SGrammar, (EGrammar, (MGrammar, CGrammar)))
convertGFC :: Options -> (CanonGrammar, Ident) -> (SGrammar, (EGrammar, (MGrammar, FGrammar, CGrammar)))
convertGFC opts = \g -> let s = g2s g
e = s2e s
in trace2 "Options" (show opts) (s, (e, (e2m e, e2c e)))
in trace2 "Options" (show opts) (s, (e, (e2m e, s2fm s, e2c e)))
where e2c = M2C.convertGrammar
e2m = case getOptVal opts firstCat of
Just cat -> flip erasing [identC cat]
@@ -53,6 +54,7 @@ convertGFC opts = \g -> let s = g2s g
Just "finite-strict" -> strict
Just "epsilon" -> epsilon . nondet
_ -> nondet
s2fm= S2FM.convertGrammar
g2s = case getOptVal opts gfcConversion of
Just "finite" -> finite . simple
Just "finite2" -> finite . finite . simple
@@ -74,10 +76,19 @@ gfc2simple :: Options -> (CanonGrammar, Ident) -> SGrammar
gfc2simple opts = fst . convertGFC opts
gfc2mcfg :: Options -> (CanonGrammar, Ident) -> MGrammar
gfc2mcfg opts = fst . snd . snd . convertGFC opts
gfc2mcfg opts g = mcfg
where
(mcfg, _, _) = snd (snd (convertGFC opts g))
gfc2cfg :: Options -> (CanonGrammar, Ident) -> CGrammar
gfc2cfg opts = snd . snd . snd . convertGFC opts
gfc2cfg opts g = cfg
where
(_, _, cfg) = snd (snd (convertGFC opts g))
gfc2fcfg :: Options -> (CanonGrammar, Ident) -> FGrammar
gfc2fcfg opts g = fcfg
where
(_, fcfg, _) = snd (snd (convertGFC opts g))
----------------------------------------------------------------------
-- * single step conversions

View File

@@ -0,0 +1,459 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/08/17 08:27:29 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.7 $
--
-- Converting SimpleGFC grammars to fast nonerasing MCFG grammar.
--
-- the resulting grammars might be /very large/
--
-- the conversion is only equivalent if the GFC grammar has a context-free backbone.
-----------------------------------------------------------------------------
module GF.Conversion.SimpleToFCFG
(convertGrammar) where
import GF.System.Tracing
import GF.Infra.Print
import GF.Infra.Ident
import Control.Monad
import GF.Formalism.Utilities
import GF.Formalism.GCFG
import GF.Formalism.FCFG
import GF.Formalism.MCFG(Lin(..))
import GF.Formalism.SimpleGFC
import GF.Conversion.Types
import GF.Canon.AbsGFC(CIdent(..))
import GF.Data.BacktrackM
import GF.Data.SortedList
import GF.Data.Utilities (updateNthM)
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.List as List
import Data.Array
----------------------------------------------------------------------
-- main conversion function
convertGrammar :: SGrammar -> FGrammar
convertGrammar srules = getFRules (loop frulesEnv)
where
(srulesMap,frulesEnv) = foldl helper (Map.empty,emptyFRulesEnv) srules
where
helper (srulesMap,frulesEnv) rule@(Rule (Abs decl _ _) (Cnc ctype _ _)) =
( Map.insertWith (++) (decl2cat decl) [rule] srulesMap
, foldBM (\selector _ env -> convertRule selector rule env)
frulesEnv
(mkSingletonSelector ctype)
()
)
loop frulesEnv =
let (todo, frulesEnv') = takeToDoRules srulesMap frulesEnv
in case todo of
[] -> frulesEnv'
_ -> loop $! foldl (\env (srules,selector) ->
foldl (\env srule -> convertRule selector srule env) env srules) frulesEnv' todo
----------------------------------------------------------------------
-- rule conversion
convertRule :: STermSelector -> SRule -> FRulesEnv -> FRulesEnv
convertRule selector (Rule (Abs decl decls (Name fun profile)) (Cnc ctype ctypes (Just term))) frulesEnv =
foldBM addRule
frulesEnv
(convertTerm selector term [Lin emptyPath []])
(let cat : args = map decl2cat (decl : decls)
in (initialFCat cat, map initialFCat args, ctype, ctypes))
where
addRule linRec (newCat', newArgs', _, _) env0 =
let (env1, newCat) = genFCatHead env0 newCat'
(env2, newArgs,idxArgs) = foldr (\(fcat,ctype,idx) (env,args,all_args) ->
let (env1, fcat1) = genFCatArg env fcat ctype
in case fcat of
FCat _ _ [] _ -> (env , args, all_args)
_ -> (env1,fcat1:args,(idx,fcat1):all_args)) (env1,[],[]) (zip3 newArgs' ctypes [0..])
(catPaths : argsPaths) = [rcs | (FCat _ _ rcs _) <- (newCat : newArgs)]
newLinRec = listArray (0,length linRec-1) [translateLin idxArgs path linRec | path <- catPaths]
(_,newProfile) = List.mapAccumL accumProf 0 newArgs'
where
accumProf nr (FCat _ _ [] _) = (nr, Unify [] )
accumProf nr _ = (nr+1, Unify [nr])
newName = Name fun (profile `composeProfiles` newProfile)
rule = FRule (Abs newCat newArgs (Name fun newProfile)) newLinRec
in addFCatRule env2 rule
convertRule selector _ frulesEnv = frulesEnv
translateLin idxArgs lbl' [] = array (0,-1) []
translateLin idxArgs lbl' (Lin lbl syms : lins)
| lbl' == lbl = listArray (0,length syms-1) (map instSym syms)
| otherwise = translateLin idxArgs lbl' lins
where instSym = symbol (\(_, lbl, nr) -> instCat lbl nr 0 idxArgs) FSymTok
instCat lbl nr nr' ((idx,arg@(FCat _ _ rcs _)):idxArgs)
| nr == idx = FSymCat arg (index lbl rcs 0) nr'
| otherwise = instCat lbl nr (nr'+1) idxArgs
index lbl' (lbl:lbls) idx
| lbl' == lbl = idx
| otherwise = index lbl' lbls $! (idx+1)
----------------------------------------------------------------------
-- term conversion
type CnvMonad a = BacktrackM Env a
type Env = (FCat, [FCat], SLinType, [SLinType])
type LinRec = [Lin SCat SPath Token]
convertTerm :: STermSelector -> STerm -> LinRec -> CnvMonad LinRec
convertTerm selector (Arg nr cat path) (Lin lbl_path lin : lins) = convertArg selector nr cat path lbl_path lin lins
convertTerm selector (con :^ args) (Lin lbl_path lin : lins) = convertCon selector con args lbl_path lin lins
convertTerm selector (Rec record) (Lin lbl_path lin : lins) = convertRec selector record lbl_path lin lins
convertTerm selector (term :. lbl) lins = convertTerm (RecPrj lbl selector) term lins
convertTerm selector (Tbl table) (Lin lbl_path lin : lins) = convertTbl selector table lbl_path lin lins
convertTerm selector (term :! sel) lins = do sel <- evalTerm sel
convertTerm (TblPrj sel selector) term lins
convertTerm selector (Variants vars) lins = do term <- member vars
convertTerm selector term lins
convertTerm selector (t1 :++ t2) lins = do lins <- convertTerm selector t2 lins
lins <- convertTerm selector t1 lins
return lins
convertTerm selector (Token str) (Lin lbl_path lin : lins) = do projectHead lbl_path
return (Lin lbl_path (Tok str : lin) : lins)
convertTerm selector (Empty ) (Lin lbl_path lin : lins) = do projectHead lbl_path
return (Lin lbl_path lin : lins)
convertArg (RecSel record) nr cat path lbl_path lin lins =
foldM (\lins (lbl, selector) -> convertArg selector nr cat (path ++. lbl) (lbl_path ++. lbl) lin lins) lins record
convertArg (TblSel cases) nr cat path lbl_path lin lins =
foldM (\lins (term, selector) -> convertArg selector nr cat (path ++! term) (lbl_path ++! term) lin lins) lins cases
convertArg (RecPrj lbl selector) nr cat path lbl_path lin lins =
convertArg selector nr cat (path ++. lbl ) lbl_path lin lins
convertArg (TblPrj term selector) nr cat path lbl_path lin lins =
convertArg selector nr cat (path ++! term) lbl_path lin lins
convertArg (ConSel terms) nr cat path lbl_path lin lins = do
sel <- member terms
restrictHead lbl_path sel
restrictArg nr lbl_path sel
return lins
convertArg StrSel nr cat path lbl_path lin lins = do
projectHead lbl_path
projectArg nr path
return (Lin lbl_path (Cat (cat, path, nr) : lin) : lins)
convertCon (ConSel terms) con args lbl_path lin lins = do
args <- mapM evalTerm args
let term = con :^ args
guard (term `elem` terms)
restrictHead lbl_path term
return lins
convertRec selector [] lbl_path lin lins = return lins
convertRec selector@(RecSel fields) ((label, val):record) lbl_path lin lins = select fields
where
select [] = convertRec selector record lbl_path lin lins
select ((label',sub_sel) : fields)
| label == label' = do lins <- convertTerm sub_sel val (Lin (lbl_path ++. label) lin : lins)
convertRec selector record lbl_path lin lins
| otherwise = select fields
convertRec (RecPrj label sub_sel) record lbl_path lin lins = do
(label',val) <- member record
guard (label==label')
convertTerm sub_sel val (Lin lbl_path lin : lins)
convertTbl selector [] lbl_path lin lins = return lins
convertTbl selector@(TblSel cases) ((term, val):table) lbl_path lin lins = case selector of { TblSel cases -> select cases }
where
select [] = convertTbl selector table lbl_path lin lins
select ((term',sub_sel) : cases)
| term == term' = do lins <- convertTerm sub_sel val (Lin (lbl_path ++! term) lin : lins)
convertTbl selector table lbl_path lin lins
| otherwise = select cases
convertTbl (TblPrj term sub_sel) table lbl_path lin lins = do
(term',val) <- member table
guard (term==term')
convertTerm sub_sel val (Lin lbl_path lin : lins)
------------------------------------------------------------
-- eval a term to ground terms
evalTerm :: STerm -> CnvMonad STerm
evalTerm arg@(Arg nr _ path) = do ctype <- readArgCType nr
unifyPType arg $ lintypeFollowPath path ctype
evalTerm (con :^ terms) = do terms <- mapM evalTerm terms
return (con :^ terms)
evalTerm (Rec record) = do record <- mapM evalAssign record
return (Rec record)
evalTerm (term :. lbl) = do term <- evalTerm term
evalTerm (term +. lbl)
evalTerm (Tbl table) = do table <- mapM evalCase table
return (Tbl table)
evalTerm (term :! sel) = do sel <- evalTerm sel
evalTerm (term +! sel)
evalTerm (Variants terms) = member terms >>= evalTerm
evalTerm (t1 :++ t2) = do t1 <- evalTerm t1
t2 <- evalTerm t2
return (t1 :++ t2)
evalTerm (Token str) = do return (Token str)
evalTerm Empty = do return Empty
evalAssign :: (Label, STerm) -> CnvMonad (Label, STerm)
evalAssign (lbl, term) = liftM ((,) lbl) $ evalTerm term
evalCase :: (STerm, STerm) -> CnvMonad (STerm, STerm)
evalCase (pat, term) = liftM2 (,) (evalTerm pat) (evalTerm term)
unifyPType :: STerm -> SLinType -> CnvMonad STerm
unifyPType arg (RecT prec) =
liftM Rec $
sequence [ liftM ((,) lbl) $
unifyPType (arg +. lbl) ptype |
(lbl, ptype) <- prec ]
unifyPType (Arg nr _ path) (ConT terms) =
do (_, args, _, _) <- readState
let (FCat _ _ _ tcs) = args !! nr
case lookup path tcs of
Just term -> return term
Nothing -> do term <- member terms
restrictArg nr path term
return term
----------------------------------------------------------------------
-- FRulesEnv
data FRulesEnv = FRulesEnv {-# UNPACK #-} !Int FCatSet [FRule]
type SRulesMap = Map.Map SCat [SRule]
type FCatSet = Map.Map SCat (Map.Map [SPath] (Map.Map [(SPath,STerm)] (Either FCat FCat)))
emptyFRulesEnv = FRulesEnv 0 Map.empty []
genFCatHead :: FRulesEnv -> FCat -> (FRulesEnv, FCat)
genFCatHead env@(FRulesEnv last_id fcatSet rules) m1@(FCat _ cat rcs tcs) =
case Map.lookup cat fcatSet >>= Map.lookup rcs >>= Map.lookup tcs of
Just (Left fcat) -> (FRulesEnv last_id (ins fcat) rules, fcat)
Just (Right fcat) -> (env, fcat)
Nothing -> let next_id = last_id+1
fcat = FCat next_id cat rcs tcs
in (FRulesEnv next_id (ins fcat) rules, fcat)
where
ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs x_fcat) rcs tmap_s) cat rmap_s fcatSet
where
x_fcat = Right fcat
tmap_s = Map.singleton tcs x_fcat
rmap_s = Map.singleton rcs tmap_s
genFCatArg :: FRulesEnv -> FCat -> SLinType -> (FRulesEnv, FCat)
genFCatArg env@(FRulesEnv last_id fcatSet rules) m1@(FCat _ cat rcs tcs) ctype =
case Map.lookup cat fcatSet >>= Map.lookup rcs of
Just tmap -> case Map.lookup tcs tmap of
Just (Left fcat) -> (env, fcat)
Just (Right fcat) -> (env, fcat)
Nothing -> ins tmap
Nothing -> ins Map.empty
where
ins tmap =
let next_id = last_id+1
fcat = FCat next_id cat rcs tcs
(x_fcat,last_id1,tmap1,rules1)
= foldBM (\tcs st (x_fcat,last_id,tmap,rules) ->
let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap
rule = FRule (Abs fcat [fcat_arg] coercionName)
(listArray (0,length rcs-1) [listArray (0,0) [FSymCat fcat_arg lbl 0] | lbl <- [0..length rcs-1]])
in if st
then (Right fcat,last_id1,tmap1,rule:rules)
else (x_fcat, last_id, tmap, rules))
(Left fcat,next_id,Map.insert tcs x_fcat tmap,rules)
(gen_tcs ctype emptyPath [])
False
rmap1 = Map.singleton rcs tmap1
in (FRulesEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 fcatSet) rules1, fcat)
where
addArg tcs last_id tmap =
case Map.lookup tcs tmap of
Just (Left fcat) -> (last_id, tmap, fcat)
Just (Right fcat) -> (last_id, tmap, fcat)
Nothing -> let next_id = last_id+1
fcat = FCat next_id cat rcs tcs
in (next_id, Map.insert tcs (Left fcat) tmap, fcat)
gen_tcs :: SLinType -> SPath -> [(SPath,STerm)] -> BacktrackM Bool [(SPath,STerm)]
gen_tcs (RecT record) path acc = foldM (\acc (label,ctype) -> gen_tcs ctype (path ++. label) acc) acc record
gen_tcs (TblT terms ctype) path acc = foldM (\acc term -> gen_tcs ctype (path ++! term ) acc) acc terms
gen_tcs (StrT) path acc = return acc
gen_tcs (ConT terms) path acc =
case List.lookup path tcs of
Just term -> return ((path,term) : acc)
Nothing -> do writeState True
term <- member terms
return ((path,term) : acc)
takeToDoRules :: SRulesMap -> FRulesEnv -> ([([SRule], STermSelector)], FRulesEnv)
takeToDoRules srulesMap (FRulesEnv last_id fcatSet rules) = (todo,FRulesEnv last_id fcatSet' rules)
where
(todo,fcatSet') =
Map.mapAccumWithKey (\todo cat rmap ->
let (todo1,rmap1) = Map.mapAccumWithKey (\todo rcs tmap ->
let (tcss,tmap') = Map.mapAccumWithKey (\tcss tcs x_fcat ->
case x_fcat of
Left fcat -> (tcs:tcss,Right fcat)
Right fcat -> ( tcss, x_fcat)) [] tmap
in case tcss of
[] -> ( todo,tmap )
_ -> ((srules,mkSelector rcs tcss) : todo,tmap')) todo rmap
mb_srules = Map.lookup cat srulesMap
Just srules = mb_srules
in case mb_srules of
Just srules -> (todo1,rmap1)
Nothing -> (todo ,rmap1)) [] fcatSet
addFCatRule :: FRulesEnv -> FRule -> FRulesEnv
addFCatRule (FRulesEnv last_id fcatSet rules) rule = FRulesEnv last_id fcatSet (rule:rules)
getFRules :: FRulesEnv -> [FRule]
getFRules (FRulesEnv last_id fcatSet rules) = rules
------------------------------------------------------------
-- The STermSelector
data STermSelector
= RecSel [(Label, STermSelector)]
| TblSel [(STerm, STermSelector)]
| RecPrj Label STermSelector
| TblPrj STerm STermSelector
| ConSel [STerm]
| StrSel
deriving Show
mkSingletonSelector :: SLinType -> BacktrackM () STermSelector
mkSingletonSelector ctype = do
let (rcss,tcss) = loop emptyPath ([],[]) ctype
rcs <- member rcss
return (mkSelector [rcs] tcss)
where
loop path st (RecT record) = foldl (\st (lbl,ctype) -> loop (path ++. lbl ) st ctype) st record
loop path st (TblT terms ctype) = foldl (\st term -> loop (path ++! term) st ctype) st terms
loop path (rcss,tcss) (ConT terms) = (rcss, map ((,) path) terms : tcss)
loop path (rcss,tcss) (StrT) = (path : rcss, tcss)
mkSelector :: [SPath] -> [[(SPath,STerm)]] -> STermSelector
mkSelector rcs tcss =
foldl addRestriction (case xs of
(path:xs) -> foldl addProjection (path2selector StrSel path) xs) ys
where
xs = [ reverse path | Path path <- rcs]
ys = [(reverse path,term) | tcs <- tcss, (Path path,term) <- tcs]
addProjection :: STermSelector -> [Either Label STerm] -> STermSelector
addProjection StrSel [] = StrSel
addProjection (RecSel fields) (Left lbl : path) = RecSel (add fields)
where
add [] = [(lbl,path2selector StrSel path)]
add (field@(lbl',sub_sel):fields)
| lbl == lbl' = (lbl',addProjection sub_sel path):fields
| otherwise = field : add fields
addProjection (TblSel cases) (Right pat : path) = TblSel (add cases)
where
add [] = [(pat,path2selector StrSel path)]
add (cas@(pat',sub_sel):cases)
| pat == pat' = (pat',addProjection sub_sel path):cases
| otherwise = cas : add cases
addProjection x y = error ("addProjection "++show x ++ " " ++ prt (Path y))
addRestriction :: STermSelector -> ([Either Label STerm],STerm) -> STermSelector
addRestriction (ConSel terms) ([] ,term) = ConSel (term:terms)
addRestriction (RecSel fields) (Left lbl : path,term) = RecSel (add fields)
where
add [] = [(lbl,path2selector (ConSel [term]) path)]
add (field@(lbl',sub_sel):fields)
| lbl == lbl' = (lbl',addRestriction sub_sel (path,term)):fields
| otherwise = field : add fields
addRestriction (TblSel cases) (Right pat : path,term) = TblSel (add cases)
where
add [] = [(pat,path2selector (ConSel [term]) path)]
add (field@(pat',sub_sel):cases)
| pat == pat' = (pat',addRestriction sub_sel (path,term)):cases
| otherwise = field : add cases
path2selector base [] = base
path2selector base (Left lbl : path) = RecSel [(lbl,path2selector base path)]
path2selector base (Right sel : path) = TblSel [(sel,path2selector base path)]
------------------------------------------------------------
-- updating the MCF rule
readArgCType :: Int -> CnvMonad SLinType
readArgCType arg = do (_, _, _, ctypes) <- readState
return (ctypes !! arg)
restrictArg :: Int -> SPath -> STerm -> CnvMonad ()
restrictArg arg path term
= do (head, args, ctype, ctypes) <- readState
args' <- updateNthM (restrictFCat path term) arg args
writeState (head, args', ctype, ctypes)
projectArg :: Int -> SPath -> CnvMonad ()
projectArg arg path
= do (head, args, ctype, ctypes) <- readState
args' <- updateNthM (projectFCat path) arg args
writeState (head, args', ctype, ctypes)
readHeadCType :: CnvMonad SLinType
readHeadCType = do (_, _, ctype, _) <- readState
return ctype
restrictHead :: SPath -> STerm -> CnvMonad ()
restrictHead path term
= do (head, args, ctype, ctypes) <- readState
head' <- restrictFCat path term head
writeState (head', args, ctype, ctypes)
projectHead :: SPath -> CnvMonad ()
projectHead path
= do (head, args, ctype, ctypes) <- readState
head' <- projectFCat path head
writeState (head', args, ctype, ctypes)
restrictFCat :: SPath -> STerm -> FCat -> CnvMonad FCat
restrictFCat path0 term0 (FCat id cat rcs tcs) = do
tcs <- addConstraint tcs
return (FCat id cat rcs tcs)
where
addConstraint (c@(path,term) : cs)
| path0 > path = liftM (c:) (addConstraint cs)
| path0 == path = guard (term0 == term) >>
return (c : cs)
addConstraint cs = return ((path0,term0) : cs)
projectFCat :: SPath -> FCat -> CnvMonad FCat
projectFCat path0 (FCat id cat rcs tcs) = do
return (FCat id cat (addConstraint rcs) tcs)
where
addConstraint (path : rcs)
| path0 > path = path : addConstraint rcs
| path0 == path = path : rcs
addConstraint rcs = path0 : rcs

View File

@@ -21,12 +21,14 @@ import qualified GF.Grammar.Grammar as Grammar (Term)
import GF.Formalism.GCFG
import GF.Formalism.SimpleGFC
import GF.Formalism.MCFG
import GF.Formalism.FCFG
import GF.Formalism.CFG
import GF.Formalism.Utilities
import GF.Infra.Print
import GF.Data.Assoc
import Control.Monad (foldM)
import Data.Array
----------------------------------------------------------------------
-- * basic (leaf) types
@@ -104,6 +106,25 @@ mcat2ecat (MCat cat _) = cat
mcat2scat :: MCat -> SCat
mcat2scat = ecat2scat . mcat2ecat
----------------------------------------------------------------------
-- * fast nonerasing MCFG
type FGrammar = FCFGrammar FCat Name Token
type FRule = FCFRule FCat Name Token
data FCat = FCat {-# UNPACK #-} !Int SCat [SPath] [(SPath,STerm)]
initialFCat :: SCat -> FCat
initialFCat cat = FCat 0 cat [] []
fcat2scat :: FCat -> SCat
fcat2scat (FCat _ c _ _) = c
instance Eq FCat where
(FCat id1 _ _ _) == (FCat id2 _ _ _) = id1 == id2
instance Ord FCat where
compare (FCat id1 _ _ _) (FCat id2 _ _ _) = compare id1 id2
----------------------------------------------------------------------
-- * CFG
@@ -131,4 +152,9 @@ instance Print MCat where
instance Print CCat where
prt (CCat cat label) = prt cat ++ prt label
instance Print FCat where
prt (FCat _ cat rcs tcs) = prt cat ++ "{" ++
prtSep ";" ([prt path | path <- rcs] ++
[prt path ++ "=" ++ prt term | (path,term) <- tcs])
++ "}"

55
src/GF/Formalism/FCFG.hs Normal file
View File

@@ -0,0 +1,55 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/05/09 09:28:45 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.2 $
--
-- Definitions of multiple context-free grammars
-----------------------------------------------------------------------------
module GF.Formalism.FCFG where
import Control.Monad (liftM)
import Data.List (groupBy)
import Data.Array
import GF.Formalism.Utilities
import GF.Formalism.GCFG
import GF.Infra.Print
------------------------------------------------------------
-- grammar types
type FLabel = Int
type FPointPos = Int
data FSymbol cat tok
= FSymCat cat {-# UNPACK #-} !FLabel {-# UNPACK #-} !Int
| FSymTok tok
type FCFGrammar cat name tok = [FCFRule cat name tok]
data FCFRule cat name tok = FRule (Abstract cat name) (Array FLabel (Array FPointPos (FSymbol cat tok)))
------------------------------------------------------------
-- pretty-printing
instance (Print c, Print t) => Print (FSymbol c t) where
prt (FSymCat c l n) = prt c ++ "[" ++ prt n ++ "," ++ prt l ++ "]"
prt (FSymTok t) = simpleShow (prt t)
where simpleShow str = "\"" ++ concatMap mkEsc str ++ "\""
mkEsc '\\' = "\\\\"
mkEsc '\"' = "\\\""
mkEsc '\n' = "\\n"
mkEsc '\t' = "\\t"
mkEsc chr = [chr]
prtList = prtSep " "
instance (Print c, Print n, Print t) => Print (FCFRule n c t) where
prt (FRule abs lins) = prt abs ++ " := \n" ++ prtSep "\n" [" | "++prtSep " " [prt sym | (_,sym) <- assocs syms] | (_,syms) <- assocs lins]
prtList = prtSep "\n"

View File

@@ -157,6 +157,7 @@ newParser = iOpt "new"
newerParser = iOpt "newer"
newCParser = iOpt "cfg"
newMParser = iOpt "mcfg"
newFParser = iOpt "fcfg"
{-
useParserMCFG, useParserMCFGviaCFG, useParserCFG, useParserCF :: Option

38
src/GF/Parsing/FCFG.hs Normal file
View File

@@ -0,0 +1,38 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/05/11 10:28:16 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.5 $
--
-- MCFG parsing
-----------------------------------------------------------------------------
module GF.Parsing.FCFG
(parseFCF, module GF.Parsing.FCFG.PInfo) where
import GF.Data.Operations (Err(..))
import GF.Formalism.Utilities
import GF.Formalism.GCFG
import GF.Formalism.MCFG
import GF.Parsing.FCFG.PInfo
import qualified GF.Parsing.FCFG.Active as Active
import GF.Infra.Print
----------------------------------------------------------------------
-- parsing
parseFCF :: (Print c, Ord c, Print n, Ord n, Print t, Ord t) => String -> Err (FCFParser c n t)
parseFCF prs | prs `elem` strategies = Ok $ parseFCF' prs
| otherwise = Bad $ "FCFG parsing strategy not defined: " ++ prs
strategies = words "bottomup topdown"
parseFCF' :: (Print c, Ord c, Print n, Ord n, Print t, Ord t) => String -> FCFParser c n t
parseFCF' "bottomup" pinfo starts toks = Active.parse "b" pinfo starts toks
parseFCF' "topdown" pinfo starts toks = Active.parse "t" pinfo starts toks

View File

@@ -0,0 +1,188 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/08/08 09:01:25 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.5 $
--
-- MCFG parsing, the active algorithm
-----------------------------------------------------------------------------
module GF.Parsing.FCFG.Active (parse) where
import GF.Data.GeneralDeduction
import GF.Data.Assoc
import GF.Data.Utilities
import GF.Formalism.GCFG
import GF.Formalism.FCFG
import GF.Formalism.MCFG(Lin(..))
import GF.Formalism.Utilities
import GF.Infra.Ident
import GF.Parsing.FCFG.Range
import GF.Parsing.FCFG.PInfo
import GF.System.Tracing
import Control.Monad (guard)
import GF.Infra.Print
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import Data.Array
----------------------------------------------------------------------
-- * parsing
parse :: (Ord c, Print n, Ord n, Ord t) => String -> FCFParser c n t
parse strategy pinfo starts toks =
[ Abs (cat, found) (zip rhs rrecs) fun |
Final ruleid found rrecs <- listXChartFinal chart,
let FRule (Abs cat rhs fun) _ = allRules pinfo ! ruleid ]
where chart = process strategy pinfo toks axioms emptyXChart
axioms | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks
| isTD strategy = initial pinfo starts toks
isBU s = s=="b"
isTD s = s=="t"
-- used in prediction
emptyChildren :: Abstract c n -> [RangeRec]
emptyChildren (Abs _ rhs _) = replicate (length rhs) []
updateChildren :: [RangeRec] -> Int -> RangeRec -> [[RangeRec]]
updateChildren recs i rec = updateNthM update i recs
where update rec' = do guard (null rec' || rec' == rec)
return rec
makeMaxRange (Range _ j) = Range j j
makeMaxRange EmptyRange = EmptyRange
process :: (Ord c, Ord n, Ord t) => String -> FCFPInfo c n t -> Input t -> [Item] -> XChart c -> XChart c
process strategy pinfo toks [] chart = chart
process strategy pinfo toks (item:items) chart = process strategy pinfo toks items $! univRule item chart
where
univRule item@(Active ruleid found rng lbl ppos recs) chart
| inRange (bounds lin) ppos =
case lin ! ppos of
FSymCat c r d -> case insertXChart chart item c of
Nothing -> chart
Just chart -> let items = do Final _ found' _ <- lookupXChartFinal chart c
rng' <- concatRange rng (found' !! r)
recs' <- updateChildren recs d found'
return (Active ruleid found rng' lbl (ppos+1) recs')
++
do guard (isTD strategy)
ruleid <- topdownRules pinfo ? c
let FRule abs lins = allRules pinfo ! ruleid
return (Active ruleid [] EmptyRange 0 0 (emptyChildren abs))
in process strategy pinfo toks items chart
FSymTok tok -> let items = do (i,j) <- inputToken toks ? tok
rng' <- concatRange rng (makeRange i j)
return (Active ruleid found rng' lbl (ppos+1) recs)
in process strategy pinfo toks items chart
| otherwise =
if inRange (bounds lins) (lbl+1)
then univRule (Active ruleid (rng:found) EmptyRange (lbl+1) 0 recs) chart
else univRule (Final ruleid (reverse (rng:found)) recs) chart
where
(FRule (Abs cat _ fn) lins) = allRules pinfo ! ruleid
lin = lins ! lbl
univRule item@(Final ruleid found' recs) chart =
case insertXChart chart item cat of
Nothing -> chart
Just chart -> let items = do (Active ruleid found rng l ppos recs) <- lookupXChartAct chart cat
let FRule _ lins = allRules pinfo ! ruleid
FSymCat cat r d = lins ! l ! ppos
rng' <- concatRange rng (found' !! r)
recs' <- updateChildren recs d found'
return (Active ruleid found rng' l (ppos+1) recs')
++
do guard (isBU strategy)
ruleid <- leftcornerCats pinfo ? cat
let FRule abs lins = allRules pinfo ! ruleid
FSymCat cat r d = lins ! 0 ! 0
return (Active ruleid [] (found' !! r) 0 1 (updateNth (const found') d (emptyChildren abs)))
in process strategy pinfo toks items chart
where
(FRule (Abs cat _ fn) _) = allRules pinfo ! ruleid
----------------------------------------------------------------------
-- * XChart
data Item
= Active {-# UNPACK #-} !RuleId
RangeRec
Range
{-# UNPACK #-} !FLabel
{-# UNPACK #-} !FPointPos
[RangeRec]
| Final {-# UNPACK #-} !RuleId RangeRec [RangeRec]
deriving (Eq, Ord)
data XChart c = XChart !(ParseChart Item c) !(ParseChart Item c)
emptyXChart :: Ord c => XChart c
emptyXChart = XChart emptyChart emptyChart
insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _) c =
case chartInsert actives item c of
Nothing -> Nothing
Just actives -> Just (XChart actives finals)
insertXChart (XChart actives finals) item@(Final _ _ _) c =
case chartInsert finals item c of
Nothing -> Nothing
Just finals -> Just (XChart actives finals)
lookupXChartAct (XChart actives finals) c = chartLookup actives c
lookupXChartFinal (XChart actives finals) c = chartLookup finals c
listXChartAct (XChart actives finals) = chartList actives
listXChartFinal (XChart actives finals) = chartList finals
----------------------------------------------------------------------
-- Earley --
-- anropas med alla startkategorier
initial :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> [c] -> Input t -> [Item]
initial pinfo starts toks =
tracePrt "MCFG.Active (Earley) - initial rules" (prt . length) $
do cat <- starts
ruleid <- topdownRules pinfo ? cat
let FRule abs lins = allRules pinfo ! ruleid
return $ Active ruleid [] (Range 0 0) 0 0 (emptyChildren abs)
----------------------------------------------------------------------
-- Kilbury --
terminal :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [Item]
terminal pinfo toks =
tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $
do ruleid <- emptyRules pinfo
let FRule abs lins = allRules pinfo ! ruleid
rrec <- mapM (rangeRestSyms toks EmptyRange . elems) (elems lins)
return $ Final ruleid rrec []
where
rangeRestSyms toks rng [] = return rng
rangeRestSyms toks rng (FSymTok tok:syms) = do (i,j) <- inputToken toks ? tok
rng' <- concatRange rng (makeRange i j)
rangeRestSyms toks rng' syms
initialScan :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [Item]
initialScan pinfo toks =
tracePrt "MCFG.Active (Kilbury) - initial scanned rules" (prt . length) $
do tok <- aElems (inputToken toks)
ruleid <- leftcornerTokens pinfo ? tok
let FRule abs lins = allRules pinfo ! ruleid
return $ Active ruleid [] EmptyRange 0 0 (emptyChildren abs)

View File

@@ -0,0 +1,115 @@
---------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/05/13 12:40:19 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.5 $
--
-- MCFG parsing, parser information
-----------------------------------------------------------------------------
module GF.Parsing.FCFG.PInfo where
import GF.System.Tracing
import GF.Infra.Print
import GF.Formalism.Utilities
import GF.Formalism.GCFG
import GF.Formalism.FCFG
import GF.Data.SortedList
import GF.Data.Assoc
import GF.Parsing.FCFG.Range
import Data.Array
import Data.Maybe
----------------------------------------------------------------------
-- type declarations
-- | the list of categories = possible starting categories
type FCFParser c n t = FCFPInfo c n t
-> [c]
-> Input t
-> FCFChart c n
type FCFChart c n = [Abstract (c, RangeRec) n]
makeFinalEdge :: c -> Int -> Int -> (c, RangeRec)
makeFinalEdge cat i j = (cat, [makeRange i j])
------------------------------------------------------------
-- parser information
type RuleId = Int
data FCFPInfo c n t
= FCFPInfo { allRules :: Array RuleId (FCFRule c n t)
, topdownRules :: Assoc c (SList RuleId)
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
, emptyRules :: [RuleId]
, leftcornerCats :: Assoc c (SList RuleId)
, leftcornerTokens :: Assoc t (SList RuleId)
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
, grammarCats :: SList c
}
getLeftCornerTok lins
| inRange (bounds syms) 0 = case syms ! 0 of
FSymTok tok -> Just tok
_ -> Nothing
| otherwise = Nothing
where
syms = lins ! 0
getLeftCornerCat lins
| inRange (bounds syms) 0 = case syms ! 0 of
FSymCat c _ _ -> Just c
_ -> Nothing
| otherwise = Nothing
where
syms = lins ! 0
buildFCFPInfo :: (Ord c, Ord n, Ord t) => FCFGrammar c n t -> FCFPInfo c n t
buildFCFPInfo grammar =
traceCalcFirst grammar $
tracePrt "MCFG.PInfo - parser info" (prt) $
FCFPInfo { allRules = allrules
, topdownRules = topdownrules
, emptyRules = emptyrules
, leftcornerCats = leftcorncats
, leftcornerTokens = leftcorntoks
, grammarCats = grammarcats
}
where allrules = listArray (0,length grammar-1) grammar
topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule (Abs cat _ _) _) <- assocs allrules]
emptyrules = [ruleid | (ruleid, FRule (Abs _ [] _) _) <- assocs allrules]
leftcorncats = accumAssoc id
[ (fromJust (getLeftCornerCat lins), ruleid) |
(ruleid, FRule _ lins) <- assocs allrules, isJust (getLeftCornerCat lins) ]
leftcorntoks = accumAssoc id
[ (fromJust (getLeftCornerTok lins), ruleid) |
(ruleid, FRule _ lins) <- assocs allrules, isJust (getLeftCornerTok lins) ]
grammarcats = aElems topdownrules
----------------------------------------------------------------------
-- pretty-printing of statistics
instance (Ord c, Ord n, Ord t) => Print (FCFPInfo c n t) where
prt pI = "[ allRules=" ++ sl (elems . allRules) ++
"; tdRules=" ++ sla topdownRules ++
"; emptyRules=" ++ sl emptyRules ++
"; lcCats=" ++ sla leftcornerCats ++
"; lcTokens=" ++ sla leftcornerTokens ++
"; categories=" ++ sl grammarCats ++
" ]"
where sl f = show $ length $ f pI
sla f = let (as, bs) = unzip $ aAssocs $ f pI
in show (length as) ++ "/" ++ show (length (concat bs))

View File

@@ -0,0 +1,54 @@
---------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/08/08 09:01:25 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.5 $
--
-- Definitions of ranges, and operations on ranges
-----------------------------------------------------------------------------
module GF.Parsing.FCFG.Range
( RangeRec, Range(..), makeRange, concatRange, rangeEdge, edgeRange, minRange, maxRange,
) where
-- GF modules
import GF.Formalism.Utilities
import GF.Infra.Print
------------------------------------------------------------
-- ranges as single pairs
type RangeRec = [Range]
data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
| EmptyRange
deriving (Eq, Ord)
makeRange :: Int -> Int -> Range
makeRange = Range
concatRange :: Range -> Range -> [Range]
concatRange EmptyRange rng = return rng
concatRange rng EmptyRange = return rng
concatRange (Range i j) (Range j' k) = [Range i k | j==j']
rangeEdge :: a -> Range -> Edge a
rangeEdge a (Range i j) = Edge i j a
edgeRange :: Edge a -> Range
edgeRange (Edge i j _) = Range i j
minRange :: Range -> Int
minRange (Range i j) = i
maxRange :: Range -> Int
maxRange (Range i j) = j
instance Print Range where
prt (Range i j) = "(" ++ show i ++ "-" ++ show j ++ ")"
prt (EmptyRange) = "(?)"

View File

@@ -37,23 +37,29 @@ import qualified GF.Formalism.SimpleGFC as S
import qualified GF.Formalism.MCFG as M
import qualified GF.Formalism.CFG as C
import qualified GF.Parsing.MCFG as PM
import qualified GF.Parsing.FCFG as PF
import qualified GF.Parsing.CFG as PC
----------------------------------------------------------------------
-- parsing information
data PInfo = PInfo { mcfPInfo :: MCFPInfo,
cfPInfo :: CFPInfo }
data PInfo = PInfo { mcfPInfo :: MCFPInfo
, fcfPInfo :: FCFPInfo
, cfPInfo :: CFPInfo
}
type MCFPInfo = PM.MCFPInfo MCat Name MLabel Token
type FCFPInfo = PF.FCFPInfo FCat Name Token
type CFPInfo = PC.CFPInfo CCat Name Token
buildPInfo :: MGrammar -> CGrammar -> PInfo
buildPInfo mcfg cfg = PInfo { mcfPInfo = PM.buildMCFPInfo mcfg,
cfPInfo = PC.buildCFPInfo cfg }
buildPInfo :: MGrammar -> FGrammar -> CGrammar -> PInfo
buildPInfo mcfg fcfg cfg = PInfo { mcfPInfo = PM.buildMCFPInfo mcfg
, fcfPInfo = PF.buildFCFPInfo fcfg
, cfPInfo = PC.buildCFPInfo cfg
}
instance Print PInfo where
prt (PInfo m c) = prt m ++ "\n" ++ prt c
prt (PInfo m f c) = prt m ++ "\n" ++ prt c
----------------------------------------------------------------------
-- main parsing function
@@ -114,6 +120,19 @@ selectParser "m" strategy pinfo startCat inTokens
cat@(MCat _ [lbl]) <- startCats ]
return $ chart2forests chart (const False) finalEdges
-- parsing via FCFG
selectParser "f" strategy pinfo startCat inTokens
= do let startCats = filter isStart $ PF.grammarCats fcfpi
isStart cat = fcat2scat cat == cfCat2Ident startCat
fcfpi = fcfPInfo pinfo
fcfParser <- PF.parseFCF strategy
let fcfChart = fcfParser fcfpi startCats inTokens
chart = G.abstract2chart fcfChart
(begin,end) = inputBounds inTokens
finalEdges = [ PF.makeFinalEdge cat begin end |
cat@(FCat _ _ [lbl] _) <- startCats ]
return $ chart2forests chart (const False) finalEdges
-- error parser:
selectParser prs strategy _ _ _ = Bad $ "Parser '" ++ prs ++ "' not defined with strategy: " ++ strategy

View File

@@ -184,7 +184,7 @@ optionsOfCommand co = case co of
CTransformGrammar _ -> flags "printer"
CConvertLatex _ -> none
CLinearize _ -> both "utf8 table struct record all multi" "lang number unlexer mark"
CParse -> both "ambiguous fail cut new newer cfg mcfg n ign raw v lines all prob"
CParse -> both "ambiguous fail cut new newer cfg mcfg fcfg n ign raw v lines all prob"
"cat lang lexer parser number rawtrees"
CTranslate _ _ -> opts "cat lexer parser"
CGenerateRandom -> both "cf prob" "cat lang number depth atoms noexpand doexpand"

View File

@@ -66,10 +66,11 @@ parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
-- to use peb's newer parser 7/4-05
parseStringC opts0 sg cat s
| oElem newCParser opts0 || oElem newMParser opts0 || oElem newParser opts0 || oElem newerParser opts0 = do
| oElem newCParser opts0 || oElem newMParser opts0 || oElem newFParser opts0 || oElem newParser opts0 || oElem newerParser opts0 = do
let opts = unionOptions opts0 $ stateOptions sg
algorithm | oElem newCParser opts0 = "c"
| oElem newMParser opts0 = "m"
| oElem newFParser opts0 = "f"
| otherwise = "c" -- default algorithm
strategy = maybe "bottomup" id $ getOptVal opts useParser -- -parser=bottomup/topdown
tokenizer = customOrDefault opts useTokenizer customTokenizer sg