mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-12 06:19:33 -06:00
add the FCFG parser
This commit is contained in:
@@ -151,7 +151,7 @@ emptyStateGrammar = StGr {
|
||||
cf = emptyCF,
|
||||
mcfg = [],
|
||||
cfg = [],
|
||||
pInfo = Prs.buildPInfo [] [],
|
||||
pInfo = Prs.buildPInfo [] [] [],
|
||||
morpho = emptyMorpho,
|
||||
probs = emptyProbs,
|
||||
loptions = noOptions
|
||||
@@ -231,9 +231,9 @@ updateShellState opts ign mcnc sh ((_,sgr,gr),rts) = do
|
||||
let probss = [] -----
|
||||
|
||||
|
||||
let fromGFC = snd . snd . Cnv.convertGFC opts
|
||||
(mcfgs, cfgs) = unzip $ map (curry fromGFC cgr) concrs
|
||||
pInfos = zipWith Prs.buildPInfo mcfgs cfgs
|
||||
let fromGFC = snd . snd . Cnv.convertGFC opts
|
||||
(mcfgs, fcfgs, cfgs) = unzip3 $ map (curry fromGFC cgr) concrs
|
||||
pInfos = zipWith3 Prs.buildPInfo mcfgs fcfgs cfgs
|
||||
|
||||
let funs = funRulesOf cgr
|
||||
let cats = allCatsOf cgr
|
||||
@@ -362,7 +362,7 @@ stateGrammarOfLangOpt purg st0 l = StGr {
|
||||
cf = maybe emptyCF id (lookup l (cfs st)),
|
||||
mcfg = maybe [] id $ lookup l $ mcfgs st,
|
||||
cfg = maybe [] id $ lookup l $ cfgs st,
|
||||
pInfo = maybe (Prs.buildPInfo [] []) id $ lookup l $ pInfos st,
|
||||
pInfo = maybe (Prs.buildPInfo [] [] []) id $ lookup l $ pInfos st,
|
||||
morpho = maybe emptyMorpho id (lookup l (morphos st)),
|
||||
probs = maybe emptyProbs id (lookup l (probss st)),
|
||||
loptions = errVal noOptions $ lookupOptionsCan allCan
|
||||
@@ -404,7 +404,7 @@ stateAbstractGrammar st = StGr {
|
||||
cf = emptyCF,
|
||||
mcfg = [],
|
||||
cfg = [],
|
||||
pInfo = Prs.buildPInfo [] [],
|
||||
pInfo = Prs.buildPInfo [] [] [],
|
||||
morpho = emptyMorpho,
|
||||
probs = emptyProbs,
|
||||
loptions = gloptions st ----
|
||||
|
||||
@@ -31,6 +31,7 @@ import qualified GF.Conversion.RemoveSingletons as RemSing
|
||||
import qualified GF.Conversion.RemoveErasing as RemEra
|
||||
import qualified GF.Conversion.RemoveEpsilon as RemEps
|
||||
import qualified GF.Conversion.SimpleToMCFG as S2M
|
||||
import qualified GF.Conversion.SimpleToFCFG as S2FM
|
||||
import qualified GF.Conversion.MCFGtoCFG as M2C
|
||||
|
||||
import GF.Infra.Print
|
||||
@@ -40,10 +41,10 @@ import GF.System.Tracing
|
||||
----------------------------------------------------------------------
|
||||
-- * GFC -> MCFG & CFG, using options to decide which conversion is used
|
||||
|
||||
convertGFC :: Options -> (CanonGrammar, Ident) -> (SGrammar, (EGrammar, (MGrammar, CGrammar)))
|
||||
convertGFC :: Options -> (CanonGrammar, Ident) -> (SGrammar, (EGrammar, (MGrammar, FGrammar, CGrammar)))
|
||||
convertGFC opts = \g -> let s = g2s g
|
||||
e = s2e s
|
||||
in trace2 "Options" (show opts) (s, (e, (e2m e, e2c e)))
|
||||
in trace2 "Options" (show opts) (s, (e, (e2m e, s2fm s, e2c e)))
|
||||
where e2c = M2C.convertGrammar
|
||||
e2m = case getOptVal opts firstCat of
|
||||
Just cat -> flip erasing [identC cat]
|
||||
@@ -53,6 +54,7 @@ convertGFC opts = \g -> let s = g2s g
|
||||
Just "finite-strict" -> strict
|
||||
Just "epsilon" -> epsilon . nondet
|
||||
_ -> nondet
|
||||
s2fm= S2FM.convertGrammar
|
||||
g2s = case getOptVal opts gfcConversion of
|
||||
Just "finite" -> finite . simple
|
||||
Just "finite2" -> finite . finite . simple
|
||||
@@ -74,10 +76,19 @@ gfc2simple :: Options -> (CanonGrammar, Ident) -> SGrammar
|
||||
gfc2simple opts = fst . convertGFC opts
|
||||
|
||||
gfc2mcfg :: Options -> (CanonGrammar, Ident) -> MGrammar
|
||||
gfc2mcfg opts = fst . snd . snd . convertGFC opts
|
||||
gfc2mcfg opts g = mcfg
|
||||
where
|
||||
(mcfg, _, _) = snd (snd (convertGFC opts g))
|
||||
|
||||
gfc2cfg :: Options -> (CanonGrammar, Ident) -> CGrammar
|
||||
gfc2cfg opts = snd . snd . snd . convertGFC opts
|
||||
gfc2cfg opts g = cfg
|
||||
where
|
||||
(_, _, cfg) = snd (snd (convertGFC opts g))
|
||||
|
||||
gfc2fcfg :: Options -> (CanonGrammar, Ident) -> FGrammar
|
||||
gfc2fcfg opts g = fcfg
|
||||
where
|
||||
(_, fcfg, _) = snd (snd (convertGFC opts g))
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * single step conversions
|
||||
|
||||
459
src/GF/Conversion/SimpleToFCFG.hs
Normal file
459
src/GF/Conversion/SimpleToFCFG.hs
Normal file
@@ -0,0 +1,459 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/08/17 08:27:29 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.7 $
|
||||
--
|
||||
-- Converting SimpleGFC grammars to fast nonerasing MCFG grammar.
|
||||
--
|
||||
-- the resulting grammars might be /very large/
|
||||
--
|
||||
-- the conversion is only equivalent if the GFC grammar has a context-free backbone.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
|
||||
module GF.Conversion.SimpleToFCFG
|
||||
(convertGrammar) where
|
||||
|
||||
import GF.System.Tracing
|
||||
import GF.Infra.Print
|
||||
import GF.Infra.Ident
|
||||
|
||||
import Control.Monad
|
||||
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Formalism.GCFG
|
||||
import GF.Formalism.FCFG
|
||||
import GF.Formalism.MCFG(Lin(..))
|
||||
import GF.Formalism.SimpleGFC
|
||||
import GF.Conversion.Types
|
||||
import GF.Canon.AbsGFC(CIdent(..))
|
||||
|
||||
import GF.Data.BacktrackM
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Utilities (updateNthM)
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.List as List
|
||||
import Data.Array
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- main conversion function
|
||||
|
||||
convertGrammar :: SGrammar -> FGrammar
|
||||
convertGrammar srules = getFRules (loop frulesEnv)
|
||||
where
|
||||
(srulesMap,frulesEnv) = foldl helper (Map.empty,emptyFRulesEnv) srules
|
||||
where
|
||||
helper (srulesMap,frulesEnv) rule@(Rule (Abs decl _ _) (Cnc ctype _ _)) =
|
||||
( Map.insertWith (++) (decl2cat decl) [rule] srulesMap
|
||||
, foldBM (\selector _ env -> convertRule selector rule env)
|
||||
frulesEnv
|
||||
(mkSingletonSelector ctype)
|
||||
()
|
||||
)
|
||||
|
||||
loop frulesEnv =
|
||||
let (todo, frulesEnv') = takeToDoRules srulesMap frulesEnv
|
||||
in case todo of
|
||||
[] -> frulesEnv'
|
||||
_ -> loop $! foldl (\env (srules,selector) ->
|
||||
foldl (\env srule -> convertRule selector srule env) env srules) frulesEnv' todo
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- rule conversion
|
||||
|
||||
convertRule :: STermSelector -> SRule -> FRulesEnv -> FRulesEnv
|
||||
convertRule selector (Rule (Abs decl decls (Name fun profile)) (Cnc ctype ctypes (Just term))) frulesEnv =
|
||||
foldBM addRule
|
||||
frulesEnv
|
||||
(convertTerm selector term [Lin emptyPath []])
|
||||
(let cat : args = map decl2cat (decl : decls)
|
||||
in (initialFCat cat, map initialFCat args, ctype, ctypes))
|
||||
where
|
||||
addRule linRec (newCat', newArgs', _, _) env0 =
|
||||
let (env1, newCat) = genFCatHead env0 newCat'
|
||||
(env2, newArgs,idxArgs) = foldr (\(fcat,ctype,idx) (env,args,all_args) ->
|
||||
let (env1, fcat1) = genFCatArg env fcat ctype
|
||||
in case fcat of
|
||||
FCat _ _ [] _ -> (env , args, all_args)
|
||||
_ -> (env1,fcat1:args,(idx,fcat1):all_args)) (env1,[],[]) (zip3 newArgs' ctypes [0..])
|
||||
|
||||
(catPaths : argsPaths) = [rcs | (FCat _ _ rcs _) <- (newCat : newArgs)]
|
||||
|
||||
newLinRec = listArray (0,length linRec-1) [translateLin idxArgs path linRec | path <- catPaths]
|
||||
|
||||
(_,newProfile) = List.mapAccumL accumProf 0 newArgs'
|
||||
where
|
||||
accumProf nr (FCat _ _ [] _) = (nr, Unify [] )
|
||||
accumProf nr _ = (nr+1, Unify [nr])
|
||||
|
||||
newName = Name fun (profile `composeProfiles` newProfile)
|
||||
rule = FRule (Abs newCat newArgs (Name fun newProfile)) newLinRec
|
||||
in addFCatRule env2 rule
|
||||
convertRule selector _ frulesEnv = frulesEnv
|
||||
|
||||
translateLin idxArgs lbl' [] = array (0,-1) []
|
||||
translateLin idxArgs lbl' (Lin lbl syms : lins)
|
||||
| lbl' == lbl = listArray (0,length syms-1) (map instSym syms)
|
||||
| otherwise = translateLin idxArgs lbl' lins
|
||||
where instSym = symbol (\(_, lbl, nr) -> instCat lbl nr 0 idxArgs) FSymTok
|
||||
instCat lbl nr nr' ((idx,arg@(FCat _ _ rcs _)):idxArgs)
|
||||
| nr == idx = FSymCat arg (index lbl rcs 0) nr'
|
||||
| otherwise = instCat lbl nr (nr'+1) idxArgs
|
||||
|
||||
index lbl' (lbl:lbls) idx
|
||||
| lbl' == lbl = idx
|
||||
| otherwise = index lbl' lbls $! (idx+1)
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- term conversion
|
||||
|
||||
type CnvMonad a = BacktrackM Env a
|
||||
|
||||
type Env = (FCat, [FCat], SLinType, [SLinType])
|
||||
type LinRec = [Lin SCat SPath Token]
|
||||
|
||||
|
||||
convertTerm :: STermSelector -> STerm -> LinRec -> CnvMonad LinRec
|
||||
convertTerm selector (Arg nr cat path) (Lin lbl_path lin : lins) = convertArg selector nr cat path lbl_path lin lins
|
||||
convertTerm selector (con :^ args) (Lin lbl_path lin : lins) = convertCon selector con args lbl_path lin lins
|
||||
convertTerm selector (Rec record) (Lin lbl_path lin : lins) = convertRec selector record lbl_path lin lins
|
||||
convertTerm selector (term :. lbl) lins = convertTerm (RecPrj lbl selector) term lins
|
||||
convertTerm selector (Tbl table) (Lin lbl_path lin : lins) = convertTbl selector table lbl_path lin lins
|
||||
convertTerm selector (term :! sel) lins = do sel <- evalTerm sel
|
||||
convertTerm (TblPrj sel selector) term lins
|
||||
convertTerm selector (Variants vars) lins = do term <- member vars
|
||||
convertTerm selector term lins
|
||||
convertTerm selector (t1 :++ t2) lins = do lins <- convertTerm selector t2 lins
|
||||
lins <- convertTerm selector t1 lins
|
||||
return lins
|
||||
convertTerm selector (Token str) (Lin lbl_path lin : lins) = do projectHead lbl_path
|
||||
return (Lin lbl_path (Tok str : lin) : lins)
|
||||
convertTerm selector (Empty ) (Lin lbl_path lin : lins) = do projectHead lbl_path
|
||||
return (Lin lbl_path lin : lins)
|
||||
|
||||
convertArg (RecSel record) nr cat path lbl_path lin lins =
|
||||
foldM (\lins (lbl, selector) -> convertArg selector nr cat (path ++. lbl) (lbl_path ++. lbl) lin lins) lins record
|
||||
convertArg (TblSel cases) nr cat path lbl_path lin lins =
|
||||
foldM (\lins (term, selector) -> convertArg selector nr cat (path ++! term) (lbl_path ++! term) lin lins) lins cases
|
||||
convertArg (RecPrj lbl selector) nr cat path lbl_path lin lins =
|
||||
convertArg selector nr cat (path ++. lbl ) lbl_path lin lins
|
||||
convertArg (TblPrj term selector) nr cat path lbl_path lin lins =
|
||||
convertArg selector nr cat (path ++! term) lbl_path lin lins
|
||||
convertArg (ConSel terms) nr cat path lbl_path lin lins = do
|
||||
sel <- member terms
|
||||
restrictHead lbl_path sel
|
||||
restrictArg nr lbl_path sel
|
||||
return lins
|
||||
convertArg StrSel nr cat path lbl_path lin lins = do
|
||||
projectHead lbl_path
|
||||
projectArg nr path
|
||||
return (Lin lbl_path (Cat (cat, path, nr) : lin) : lins)
|
||||
|
||||
convertCon (ConSel terms) con args lbl_path lin lins = do
|
||||
args <- mapM evalTerm args
|
||||
let term = con :^ args
|
||||
guard (term `elem` terms)
|
||||
restrictHead lbl_path term
|
||||
return lins
|
||||
|
||||
convertRec selector [] lbl_path lin lins = return lins
|
||||
convertRec selector@(RecSel fields) ((label, val):record) lbl_path lin lins = select fields
|
||||
where
|
||||
select [] = convertRec selector record lbl_path lin lins
|
||||
select ((label',sub_sel) : fields)
|
||||
| label == label' = do lins <- convertTerm sub_sel val (Lin (lbl_path ++. label) lin : lins)
|
||||
convertRec selector record lbl_path lin lins
|
||||
| otherwise = select fields
|
||||
convertRec (RecPrj label sub_sel) record lbl_path lin lins = do
|
||||
(label',val) <- member record
|
||||
guard (label==label')
|
||||
convertTerm sub_sel val (Lin lbl_path lin : lins)
|
||||
|
||||
convertTbl selector [] lbl_path lin lins = return lins
|
||||
convertTbl selector@(TblSel cases) ((term, val):table) lbl_path lin lins = case selector of { TblSel cases -> select cases }
|
||||
where
|
||||
select [] = convertTbl selector table lbl_path lin lins
|
||||
select ((term',sub_sel) : cases)
|
||||
| term == term' = do lins <- convertTerm sub_sel val (Lin (lbl_path ++! term) lin : lins)
|
||||
convertTbl selector table lbl_path lin lins
|
||||
| otherwise = select cases
|
||||
convertTbl (TblPrj term sub_sel) table lbl_path lin lins = do
|
||||
(term',val) <- member table
|
||||
guard (term==term')
|
||||
convertTerm sub_sel val (Lin lbl_path lin : lins)
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- eval a term to ground terms
|
||||
|
||||
evalTerm :: STerm -> CnvMonad STerm
|
||||
evalTerm arg@(Arg nr _ path) = do ctype <- readArgCType nr
|
||||
unifyPType arg $ lintypeFollowPath path ctype
|
||||
evalTerm (con :^ terms) = do terms <- mapM evalTerm terms
|
||||
return (con :^ terms)
|
||||
evalTerm (Rec record) = do record <- mapM evalAssign record
|
||||
return (Rec record)
|
||||
evalTerm (term :. lbl) = do term <- evalTerm term
|
||||
evalTerm (term +. lbl)
|
||||
evalTerm (Tbl table) = do table <- mapM evalCase table
|
||||
return (Tbl table)
|
||||
evalTerm (term :! sel) = do sel <- evalTerm sel
|
||||
evalTerm (term +! sel)
|
||||
evalTerm (Variants terms) = member terms >>= evalTerm
|
||||
evalTerm (t1 :++ t2) = do t1 <- evalTerm t1
|
||||
t2 <- evalTerm t2
|
||||
return (t1 :++ t2)
|
||||
evalTerm (Token str) = do return (Token str)
|
||||
evalTerm Empty = do return Empty
|
||||
|
||||
evalAssign :: (Label, STerm) -> CnvMonad (Label, STerm)
|
||||
evalAssign (lbl, term) = liftM ((,) lbl) $ evalTerm term
|
||||
|
||||
evalCase :: (STerm, STerm) -> CnvMonad (STerm, STerm)
|
||||
evalCase (pat, term) = liftM2 (,) (evalTerm pat) (evalTerm term)
|
||||
|
||||
unifyPType :: STerm -> SLinType -> CnvMonad STerm
|
||||
unifyPType arg (RecT prec) =
|
||||
liftM Rec $
|
||||
sequence [ liftM ((,) lbl) $
|
||||
unifyPType (arg +. lbl) ptype |
|
||||
(lbl, ptype) <- prec ]
|
||||
unifyPType (Arg nr _ path) (ConT terms) =
|
||||
do (_, args, _, _) <- readState
|
||||
let (FCat _ _ _ tcs) = args !! nr
|
||||
case lookup path tcs of
|
||||
Just term -> return term
|
||||
Nothing -> do term <- member terms
|
||||
restrictArg nr path term
|
||||
return term
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- FRulesEnv
|
||||
|
||||
data FRulesEnv = FRulesEnv {-# UNPACK #-} !Int FCatSet [FRule]
|
||||
|
||||
type SRulesMap = Map.Map SCat [SRule]
|
||||
type FCatSet = Map.Map SCat (Map.Map [SPath] (Map.Map [(SPath,STerm)] (Either FCat FCat)))
|
||||
|
||||
|
||||
emptyFRulesEnv = FRulesEnv 0 Map.empty []
|
||||
|
||||
genFCatHead :: FRulesEnv -> FCat -> (FRulesEnv, FCat)
|
||||
genFCatHead env@(FRulesEnv last_id fcatSet rules) m1@(FCat _ cat rcs tcs) =
|
||||
case Map.lookup cat fcatSet >>= Map.lookup rcs >>= Map.lookup tcs of
|
||||
Just (Left fcat) -> (FRulesEnv last_id (ins fcat) rules, fcat)
|
||||
Just (Right fcat) -> (env, fcat)
|
||||
Nothing -> let next_id = last_id+1
|
||||
fcat = FCat next_id cat rcs tcs
|
||||
in (FRulesEnv next_id (ins fcat) rules, fcat)
|
||||
where
|
||||
ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs x_fcat) rcs tmap_s) cat rmap_s fcatSet
|
||||
where
|
||||
x_fcat = Right fcat
|
||||
tmap_s = Map.singleton tcs x_fcat
|
||||
rmap_s = Map.singleton rcs tmap_s
|
||||
|
||||
genFCatArg :: FRulesEnv -> FCat -> SLinType -> (FRulesEnv, FCat)
|
||||
genFCatArg env@(FRulesEnv last_id fcatSet rules) m1@(FCat _ cat rcs tcs) ctype =
|
||||
case Map.lookup cat fcatSet >>= Map.lookup rcs of
|
||||
Just tmap -> case Map.lookup tcs tmap of
|
||||
Just (Left fcat) -> (env, fcat)
|
||||
Just (Right fcat) -> (env, fcat)
|
||||
Nothing -> ins tmap
|
||||
Nothing -> ins Map.empty
|
||||
where
|
||||
ins tmap =
|
||||
let next_id = last_id+1
|
||||
fcat = FCat next_id cat rcs tcs
|
||||
(x_fcat,last_id1,tmap1,rules1)
|
||||
= foldBM (\tcs st (x_fcat,last_id,tmap,rules) ->
|
||||
let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap
|
||||
rule = FRule (Abs fcat [fcat_arg] coercionName)
|
||||
(listArray (0,length rcs-1) [listArray (0,0) [FSymCat fcat_arg lbl 0] | lbl <- [0..length rcs-1]])
|
||||
in if st
|
||||
then (Right fcat,last_id1,tmap1,rule:rules)
|
||||
else (x_fcat, last_id, tmap, rules))
|
||||
(Left fcat,next_id,Map.insert tcs x_fcat tmap,rules)
|
||||
(gen_tcs ctype emptyPath [])
|
||||
False
|
||||
rmap1 = Map.singleton rcs tmap1
|
||||
in (FRulesEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 fcatSet) rules1, fcat)
|
||||
where
|
||||
addArg tcs last_id tmap =
|
||||
case Map.lookup tcs tmap of
|
||||
Just (Left fcat) -> (last_id, tmap, fcat)
|
||||
Just (Right fcat) -> (last_id, tmap, fcat)
|
||||
Nothing -> let next_id = last_id+1
|
||||
fcat = FCat next_id cat rcs tcs
|
||||
in (next_id, Map.insert tcs (Left fcat) tmap, fcat)
|
||||
|
||||
gen_tcs :: SLinType -> SPath -> [(SPath,STerm)] -> BacktrackM Bool [(SPath,STerm)]
|
||||
gen_tcs (RecT record) path acc = foldM (\acc (label,ctype) -> gen_tcs ctype (path ++. label) acc) acc record
|
||||
gen_tcs (TblT terms ctype) path acc = foldM (\acc term -> gen_tcs ctype (path ++! term ) acc) acc terms
|
||||
gen_tcs (StrT) path acc = return acc
|
||||
gen_tcs (ConT terms) path acc =
|
||||
case List.lookup path tcs of
|
||||
Just term -> return ((path,term) : acc)
|
||||
Nothing -> do writeState True
|
||||
term <- member terms
|
||||
return ((path,term) : acc)
|
||||
|
||||
takeToDoRules :: SRulesMap -> FRulesEnv -> ([([SRule], STermSelector)], FRulesEnv)
|
||||
takeToDoRules srulesMap (FRulesEnv last_id fcatSet rules) = (todo,FRulesEnv last_id fcatSet' rules)
|
||||
where
|
||||
(todo,fcatSet') =
|
||||
Map.mapAccumWithKey (\todo cat rmap ->
|
||||
let (todo1,rmap1) = Map.mapAccumWithKey (\todo rcs tmap ->
|
||||
let (tcss,tmap') = Map.mapAccumWithKey (\tcss tcs x_fcat ->
|
||||
case x_fcat of
|
||||
Left fcat -> (tcs:tcss,Right fcat)
|
||||
Right fcat -> ( tcss, x_fcat)) [] tmap
|
||||
in case tcss of
|
||||
[] -> ( todo,tmap )
|
||||
_ -> ((srules,mkSelector rcs tcss) : todo,tmap')) todo rmap
|
||||
mb_srules = Map.lookup cat srulesMap
|
||||
Just srules = mb_srules
|
||||
|
||||
in case mb_srules of
|
||||
Just srules -> (todo1,rmap1)
|
||||
Nothing -> (todo ,rmap1)) [] fcatSet
|
||||
|
||||
addFCatRule :: FRulesEnv -> FRule -> FRulesEnv
|
||||
addFCatRule (FRulesEnv last_id fcatSet rules) rule = FRulesEnv last_id fcatSet (rule:rules)
|
||||
|
||||
getFRules :: FRulesEnv -> [FRule]
|
||||
getFRules (FRulesEnv last_id fcatSet rules) = rules
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- The STermSelector
|
||||
|
||||
data STermSelector
|
||||
= RecSel [(Label, STermSelector)]
|
||||
| TblSel [(STerm, STermSelector)]
|
||||
| RecPrj Label STermSelector
|
||||
| TblPrj STerm STermSelector
|
||||
| ConSel [STerm]
|
||||
| StrSel
|
||||
deriving Show
|
||||
|
||||
|
||||
mkSingletonSelector :: SLinType -> BacktrackM () STermSelector
|
||||
mkSingletonSelector ctype = do
|
||||
let (rcss,tcss) = loop emptyPath ([],[]) ctype
|
||||
rcs <- member rcss
|
||||
return (mkSelector [rcs] tcss)
|
||||
where
|
||||
loop path st (RecT record) = foldl (\st (lbl,ctype) -> loop (path ++. lbl ) st ctype) st record
|
||||
loop path st (TblT terms ctype) = foldl (\st term -> loop (path ++! term) st ctype) st terms
|
||||
loop path (rcss,tcss) (ConT terms) = (rcss, map ((,) path) terms : tcss)
|
||||
loop path (rcss,tcss) (StrT) = (path : rcss, tcss)
|
||||
|
||||
|
||||
mkSelector :: [SPath] -> [[(SPath,STerm)]] -> STermSelector
|
||||
mkSelector rcs tcss =
|
||||
foldl addRestriction (case xs of
|
||||
(path:xs) -> foldl addProjection (path2selector StrSel path) xs) ys
|
||||
where
|
||||
xs = [ reverse path | Path path <- rcs]
|
||||
ys = [(reverse path,term) | tcs <- tcss, (Path path,term) <- tcs]
|
||||
|
||||
addProjection :: STermSelector -> [Either Label STerm] -> STermSelector
|
||||
addProjection StrSel [] = StrSel
|
||||
addProjection (RecSel fields) (Left lbl : path) = RecSel (add fields)
|
||||
where
|
||||
add [] = [(lbl,path2selector StrSel path)]
|
||||
add (field@(lbl',sub_sel):fields)
|
||||
| lbl == lbl' = (lbl',addProjection sub_sel path):fields
|
||||
| otherwise = field : add fields
|
||||
addProjection (TblSel cases) (Right pat : path) = TblSel (add cases)
|
||||
where
|
||||
add [] = [(pat,path2selector StrSel path)]
|
||||
add (cas@(pat',sub_sel):cases)
|
||||
| pat == pat' = (pat',addProjection sub_sel path):cases
|
||||
| otherwise = cas : add cases
|
||||
addProjection x y = error ("addProjection "++show x ++ " " ++ prt (Path y))
|
||||
|
||||
addRestriction :: STermSelector -> ([Either Label STerm],STerm) -> STermSelector
|
||||
addRestriction (ConSel terms) ([] ,term) = ConSel (term:terms)
|
||||
addRestriction (RecSel fields) (Left lbl : path,term) = RecSel (add fields)
|
||||
where
|
||||
add [] = [(lbl,path2selector (ConSel [term]) path)]
|
||||
add (field@(lbl',sub_sel):fields)
|
||||
| lbl == lbl' = (lbl',addRestriction sub_sel (path,term)):fields
|
||||
| otherwise = field : add fields
|
||||
addRestriction (TblSel cases) (Right pat : path,term) = TblSel (add cases)
|
||||
where
|
||||
add [] = [(pat,path2selector (ConSel [term]) path)]
|
||||
add (field@(pat',sub_sel):cases)
|
||||
| pat == pat' = (pat',addRestriction sub_sel (path,term)):cases
|
||||
| otherwise = field : add cases
|
||||
|
||||
path2selector base [] = base
|
||||
path2selector base (Left lbl : path) = RecSel [(lbl,path2selector base path)]
|
||||
path2selector base (Right sel : path) = TblSel [(sel,path2selector base path)]
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- updating the MCF rule
|
||||
|
||||
readArgCType :: Int -> CnvMonad SLinType
|
||||
readArgCType arg = do (_, _, _, ctypes) <- readState
|
||||
return (ctypes !! arg)
|
||||
|
||||
restrictArg :: Int -> SPath -> STerm -> CnvMonad ()
|
||||
restrictArg arg path term
|
||||
= do (head, args, ctype, ctypes) <- readState
|
||||
args' <- updateNthM (restrictFCat path term) arg args
|
||||
writeState (head, args', ctype, ctypes)
|
||||
|
||||
projectArg :: Int -> SPath -> CnvMonad ()
|
||||
projectArg arg path
|
||||
= do (head, args, ctype, ctypes) <- readState
|
||||
args' <- updateNthM (projectFCat path) arg args
|
||||
writeState (head, args', ctype, ctypes)
|
||||
|
||||
readHeadCType :: CnvMonad SLinType
|
||||
readHeadCType = do (_, _, ctype, _) <- readState
|
||||
return ctype
|
||||
|
||||
restrictHead :: SPath -> STerm -> CnvMonad ()
|
||||
restrictHead path term
|
||||
= do (head, args, ctype, ctypes) <- readState
|
||||
head' <- restrictFCat path term head
|
||||
writeState (head', args, ctype, ctypes)
|
||||
|
||||
projectHead :: SPath -> CnvMonad ()
|
||||
projectHead path
|
||||
= do (head, args, ctype, ctypes) <- readState
|
||||
head' <- projectFCat path head
|
||||
writeState (head', args, ctype, ctypes)
|
||||
|
||||
restrictFCat :: SPath -> STerm -> FCat -> CnvMonad FCat
|
||||
restrictFCat path0 term0 (FCat id cat rcs tcs) = do
|
||||
tcs <- addConstraint tcs
|
||||
return (FCat id cat rcs tcs)
|
||||
where
|
||||
addConstraint (c@(path,term) : cs)
|
||||
| path0 > path = liftM (c:) (addConstraint cs)
|
||||
| path0 == path = guard (term0 == term) >>
|
||||
return (c : cs)
|
||||
addConstraint cs = return ((path0,term0) : cs)
|
||||
|
||||
projectFCat :: SPath -> FCat -> CnvMonad FCat
|
||||
projectFCat path0 (FCat id cat rcs tcs) = do
|
||||
return (FCat id cat (addConstraint rcs) tcs)
|
||||
where
|
||||
addConstraint (path : rcs)
|
||||
| path0 > path = path : addConstraint rcs
|
||||
| path0 == path = path : rcs
|
||||
addConstraint rcs = path0 : rcs
|
||||
@@ -21,12 +21,14 @@ import qualified GF.Grammar.Grammar as Grammar (Term)
|
||||
import GF.Formalism.GCFG
|
||||
import GF.Formalism.SimpleGFC
|
||||
import GF.Formalism.MCFG
|
||||
import GF.Formalism.FCFG
|
||||
import GF.Formalism.CFG
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Infra.Print
|
||||
import GF.Data.Assoc
|
||||
|
||||
import Control.Monad (foldM)
|
||||
import Data.Array
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * basic (leaf) types
|
||||
@@ -104,6 +106,25 @@ mcat2ecat (MCat cat _) = cat
|
||||
mcat2scat :: MCat -> SCat
|
||||
mcat2scat = ecat2scat . mcat2ecat
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * fast nonerasing MCFG
|
||||
|
||||
type FGrammar = FCFGrammar FCat Name Token
|
||||
type FRule = FCFRule FCat Name Token
|
||||
data FCat = FCat {-# UNPACK #-} !Int SCat [SPath] [(SPath,STerm)]
|
||||
|
||||
initialFCat :: SCat -> FCat
|
||||
initialFCat cat = FCat 0 cat [] []
|
||||
|
||||
fcat2scat :: FCat -> SCat
|
||||
fcat2scat (FCat _ c _ _) = c
|
||||
|
||||
instance Eq FCat where
|
||||
(FCat id1 _ _ _) == (FCat id2 _ _ _) = id1 == id2
|
||||
|
||||
instance Ord FCat where
|
||||
compare (FCat id1 _ _ _) (FCat id2 _ _ _) = compare id1 id2
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * CFG
|
||||
|
||||
@@ -131,4 +152,9 @@ instance Print MCat where
|
||||
instance Print CCat where
|
||||
prt (CCat cat label) = prt cat ++ prt label
|
||||
|
||||
instance Print FCat where
|
||||
prt (FCat _ cat rcs tcs) = prt cat ++ "{" ++
|
||||
prtSep ";" ([prt path | path <- rcs] ++
|
||||
[prt path ++ "=" ++ prt term | (path,term) <- tcs])
|
||||
++ "}"
|
||||
|
||||
|
||||
55
src/GF/Formalism/FCFG.hs
Normal file
55
src/GF/Formalism/FCFG.hs
Normal file
@@ -0,0 +1,55 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/05/09 09:28:45 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.2 $
|
||||
--
|
||||
-- Definitions of multiple context-free grammars
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Formalism.FCFG where
|
||||
|
||||
import Control.Monad (liftM)
|
||||
import Data.List (groupBy)
|
||||
import Data.Array
|
||||
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Formalism.GCFG
|
||||
|
||||
import GF.Infra.Print
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- grammar types
|
||||
|
||||
type FLabel = Int
|
||||
type FPointPos = Int
|
||||
|
||||
data FSymbol cat tok
|
||||
= FSymCat cat {-# UNPACK #-} !FLabel {-# UNPACK #-} !Int
|
||||
| FSymTok tok
|
||||
|
||||
type FCFGrammar cat name tok = [FCFRule cat name tok]
|
||||
data FCFRule cat name tok = FRule (Abstract cat name) (Array FLabel (Array FPointPos (FSymbol cat tok)))
|
||||
|
||||
------------------------------------------------------------
|
||||
-- pretty-printing
|
||||
|
||||
instance (Print c, Print t) => Print (FSymbol c t) where
|
||||
prt (FSymCat c l n) = prt c ++ "[" ++ prt n ++ "," ++ prt l ++ "]"
|
||||
prt (FSymTok t) = simpleShow (prt t)
|
||||
where simpleShow str = "\"" ++ concatMap mkEsc str ++ "\""
|
||||
mkEsc '\\' = "\\\\"
|
||||
mkEsc '\"' = "\\\""
|
||||
mkEsc '\n' = "\\n"
|
||||
mkEsc '\t' = "\\t"
|
||||
mkEsc chr = [chr]
|
||||
prtList = prtSep " "
|
||||
|
||||
instance (Print c, Print n, Print t) => Print (FCFRule n c t) where
|
||||
prt (FRule abs lins) = prt abs ++ " := \n" ++ prtSep "\n" [" | "++prtSep " " [prt sym | (_,sym) <- assocs syms] | (_,syms) <- assocs lins]
|
||||
prtList = prtSep "\n"
|
||||
@@ -157,6 +157,7 @@ newParser = iOpt "new"
|
||||
newerParser = iOpt "newer"
|
||||
newCParser = iOpt "cfg"
|
||||
newMParser = iOpt "mcfg"
|
||||
newFParser = iOpt "fcfg"
|
||||
|
||||
{-
|
||||
useParserMCFG, useParserMCFGviaCFG, useParserCFG, useParserCF :: Option
|
||||
|
||||
38
src/GF/Parsing/FCFG.hs
Normal file
38
src/GF/Parsing/FCFG.hs
Normal file
@@ -0,0 +1,38 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/05/11 10:28:16 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.5 $
|
||||
--
|
||||
-- MCFG parsing
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Parsing.FCFG
|
||||
(parseFCF, module GF.Parsing.FCFG.PInfo) where
|
||||
|
||||
import GF.Data.Operations (Err(..))
|
||||
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Formalism.GCFG
|
||||
import GF.Formalism.MCFG
|
||||
import GF.Parsing.FCFG.PInfo
|
||||
|
||||
import qualified GF.Parsing.FCFG.Active as Active
|
||||
import GF.Infra.Print
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- parsing
|
||||
|
||||
parseFCF :: (Print c, Ord c, Print n, Ord n, Print t, Ord t) => String -> Err (FCFParser c n t)
|
||||
parseFCF prs | prs `elem` strategies = Ok $ parseFCF' prs
|
||||
| otherwise = Bad $ "FCFG parsing strategy not defined: " ++ prs
|
||||
|
||||
strategies = words "bottomup topdown"
|
||||
|
||||
parseFCF' :: (Print c, Ord c, Print n, Ord n, Print t, Ord t) => String -> FCFParser c n t
|
||||
parseFCF' "bottomup" pinfo starts toks = Active.parse "b" pinfo starts toks
|
||||
parseFCF' "topdown" pinfo starts toks = Active.parse "t" pinfo starts toks
|
||||
188
src/GF/Parsing/FCFG/Active.hs
Normal file
188
src/GF/Parsing/FCFG/Active.hs
Normal file
@@ -0,0 +1,188 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/08/08 09:01:25 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.5 $
|
||||
--
|
||||
-- MCFG parsing, the active algorithm
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Parsing.FCFG.Active (parse) where
|
||||
|
||||
import GF.Data.GeneralDeduction
|
||||
import GF.Data.Assoc
|
||||
import GF.Data.Utilities
|
||||
|
||||
import GF.Formalism.GCFG
|
||||
import GF.Formalism.FCFG
|
||||
import GF.Formalism.MCFG(Lin(..))
|
||||
import GF.Formalism.Utilities
|
||||
|
||||
import GF.Infra.Ident
|
||||
|
||||
import GF.Parsing.FCFG.Range
|
||||
import GF.Parsing.FCFG.PInfo
|
||||
|
||||
import GF.System.Tracing
|
||||
|
||||
import Control.Monad (guard)
|
||||
|
||||
import GF.Infra.Print
|
||||
|
||||
import qualified Data.List as List
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import Data.Array
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * parsing
|
||||
|
||||
parse :: (Ord c, Print n, Ord n, Ord t) => String -> FCFParser c n t
|
||||
parse strategy pinfo starts toks =
|
||||
[ Abs (cat, found) (zip rhs rrecs) fun |
|
||||
Final ruleid found rrecs <- listXChartFinal chart,
|
||||
let FRule (Abs cat rhs fun) _ = allRules pinfo ! ruleid ]
|
||||
where chart = process strategy pinfo toks axioms emptyXChart
|
||||
|
||||
axioms | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks
|
||||
| isTD strategy = initial pinfo starts toks
|
||||
|
||||
isBU s = s=="b"
|
||||
isTD s = s=="t"
|
||||
|
||||
-- used in prediction
|
||||
emptyChildren :: Abstract c n -> [RangeRec]
|
||||
emptyChildren (Abs _ rhs _) = replicate (length rhs) []
|
||||
|
||||
updateChildren :: [RangeRec] -> Int -> RangeRec -> [[RangeRec]]
|
||||
updateChildren recs i rec = updateNthM update i recs
|
||||
where update rec' = do guard (null rec' || rec' == rec)
|
||||
return rec
|
||||
|
||||
makeMaxRange (Range _ j) = Range j j
|
||||
makeMaxRange EmptyRange = EmptyRange
|
||||
|
||||
process :: (Ord c, Ord n, Ord t) => String -> FCFPInfo c n t -> Input t -> [Item] -> XChart c -> XChart c
|
||||
process strategy pinfo toks [] chart = chart
|
||||
process strategy pinfo toks (item:items) chart = process strategy pinfo toks items $! univRule item chart
|
||||
where
|
||||
univRule item@(Active ruleid found rng lbl ppos recs) chart
|
||||
| inRange (bounds lin) ppos =
|
||||
case lin ! ppos of
|
||||
FSymCat c r d -> case insertXChart chart item c of
|
||||
Nothing -> chart
|
||||
Just chart -> let items = do Final _ found' _ <- lookupXChartFinal chart c
|
||||
rng' <- concatRange rng (found' !! r)
|
||||
recs' <- updateChildren recs d found'
|
||||
return (Active ruleid found rng' lbl (ppos+1) recs')
|
||||
++
|
||||
do guard (isTD strategy)
|
||||
ruleid <- topdownRules pinfo ? c
|
||||
let FRule abs lins = allRules pinfo ! ruleid
|
||||
return (Active ruleid [] EmptyRange 0 0 (emptyChildren abs))
|
||||
in process strategy pinfo toks items chart
|
||||
FSymTok tok -> let items = do (i,j) <- inputToken toks ? tok
|
||||
rng' <- concatRange rng (makeRange i j)
|
||||
return (Active ruleid found rng' lbl (ppos+1) recs)
|
||||
in process strategy pinfo toks items chart
|
||||
| otherwise =
|
||||
if inRange (bounds lins) (lbl+1)
|
||||
then univRule (Active ruleid (rng:found) EmptyRange (lbl+1) 0 recs) chart
|
||||
else univRule (Final ruleid (reverse (rng:found)) recs) chart
|
||||
where
|
||||
(FRule (Abs cat _ fn) lins) = allRules pinfo ! ruleid
|
||||
lin = lins ! lbl
|
||||
univRule item@(Final ruleid found' recs) chart =
|
||||
case insertXChart chart item cat of
|
||||
Nothing -> chart
|
||||
Just chart -> let items = do (Active ruleid found rng l ppos recs) <- lookupXChartAct chart cat
|
||||
let FRule _ lins = allRules pinfo ! ruleid
|
||||
FSymCat cat r d = lins ! l ! ppos
|
||||
rng' <- concatRange rng (found' !! r)
|
||||
recs' <- updateChildren recs d found'
|
||||
return (Active ruleid found rng' l (ppos+1) recs')
|
||||
++
|
||||
do guard (isBU strategy)
|
||||
ruleid <- leftcornerCats pinfo ? cat
|
||||
let FRule abs lins = allRules pinfo ! ruleid
|
||||
FSymCat cat r d = lins ! 0 ! 0
|
||||
return (Active ruleid [] (found' !! r) 0 1 (updateNth (const found') d (emptyChildren abs)))
|
||||
in process strategy pinfo toks items chart
|
||||
where
|
||||
(FRule (Abs cat _ fn) _) = allRules pinfo ! ruleid
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * XChart
|
||||
|
||||
data Item
|
||||
= Active {-# UNPACK #-} !RuleId
|
||||
RangeRec
|
||||
Range
|
||||
{-# UNPACK #-} !FLabel
|
||||
{-# UNPACK #-} !FPointPos
|
||||
[RangeRec]
|
||||
| Final {-# UNPACK #-} !RuleId RangeRec [RangeRec]
|
||||
deriving (Eq, Ord)
|
||||
|
||||
data XChart c = XChart !(ParseChart Item c) !(ParseChart Item c)
|
||||
|
||||
emptyXChart :: Ord c => XChart c
|
||||
emptyXChart = XChart emptyChart emptyChart
|
||||
|
||||
insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _) c =
|
||||
case chartInsert actives item c of
|
||||
Nothing -> Nothing
|
||||
Just actives -> Just (XChart actives finals)
|
||||
|
||||
insertXChart (XChart actives finals) item@(Final _ _ _) c =
|
||||
case chartInsert finals item c of
|
||||
Nothing -> Nothing
|
||||
Just finals -> Just (XChart actives finals)
|
||||
|
||||
lookupXChartAct (XChart actives finals) c = chartLookup actives c
|
||||
lookupXChartFinal (XChart actives finals) c = chartLookup finals c
|
||||
|
||||
listXChartAct (XChart actives finals) = chartList actives
|
||||
listXChartFinal (XChart actives finals) = chartList finals
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Earley --
|
||||
|
||||
-- anropas med alla startkategorier
|
||||
initial :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> [c] -> Input t -> [Item]
|
||||
initial pinfo starts toks =
|
||||
tracePrt "MCFG.Active (Earley) - initial rules" (prt . length) $
|
||||
do cat <- starts
|
||||
ruleid <- topdownRules pinfo ? cat
|
||||
let FRule abs lins = allRules pinfo ! ruleid
|
||||
return $ Active ruleid [] (Range 0 0) 0 0 (emptyChildren abs)
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Kilbury --
|
||||
|
||||
terminal :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [Item]
|
||||
terminal pinfo toks =
|
||||
tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $
|
||||
do ruleid <- emptyRules pinfo
|
||||
let FRule abs lins = allRules pinfo ! ruleid
|
||||
rrec <- mapM (rangeRestSyms toks EmptyRange . elems) (elems lins)
|
||||
return $ Final ruleid rrec []
|
||||
where
|
||||
rangeRestSyms toks rng [] = return rng
|
||||
rangeRestSyms toks rng (FSymTok tok:syms) = do (i,j) <- inputToken toks ? tok
|
||||
rng' <- concatRange rng (makeRange i j)
|
||||
rangeRestSyms toks rng' syms
|
||||
|
||||
initialScan :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [Item]
|
||||
initialScan pinfo toks =
|
||||
tracePrt "MCFG.Active (Kilbury) - initial scanned rules" (prt . length) $
|
||||
do tok <- aElems (inputToken toks)
|
||||
ruleid <- leftcornerTokens pinfo ? tok
|
||||
let FRule abs lins = allRules pinfo ! ruleid
|
||||
return $ Active ruleid [] EmptyRange 0 0 (emptyChildren abs)
|
||||
115
src/GF/Parsing/FCFG/PInfo.hs
Normal file
115
src/GF/Parsing/FCFG/PInfo.hs
Normal file
@@ -0,0 +1,115 @@
|
||||
---------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/05/13 12:40:19 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.5 $
|
||||
--
|
||||
-- MCFG parsing, parser information
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Parsing.FCFG.PInfo where
|
||||
|
||||
import GF.System.Tracing
|
||||
import GF.Infra.Print
|
||||
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Formalism.GCFG
|
||||
import GF.Formalism.FCFG
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Assoc
|
||||
import GF.Parsing.FCFG.Range
|
||||
|
||||
import Data.Array
|
||||
import Data.Maybe
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- type declarations
|
||||
|
||||
-- | the list of categories = possible starting categories
|
||||
type FCFParser c n t = FCFPInfo c n t
|
||||
-> [c]
|
||||
-> Input t
|
||||
-> FCFChart c n
|
||||
|
||||
type FCFChart c n = [Abstract (c, RangeRec) n]
|
||||
|
||||
makeFinalEdge :: c -> Int -> Int -> (c, RangeRec)
|
||||
makeFinalEdge cat i j = (cat, [makeRange i j])
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- parser information
|
||||
|
||||
type RuleId = Int
|
||||
|
||||
data FCFPInfo c n t
|
||||
= FCFPInfo { allRules :: Array RuleId (FCFRule c n t)
|
||||
, topdownRules :: Assoc c (SList RuleId)
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
|
||||
, emptyRules :: [RuleId]
|
||||
, leftcornerCats :: Assoc c (SList RuleId)
|
||||
, leftcornerTokens :: Assoc t (SList RuleId)
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
|
||||
, grammarCats :: SList c
|
||||
}
|
||||
|
||||
|
||||
getLeftCornerTok lins
|
||||
| inRange (bounds syms) 0 = case syms ! 0 of
|
||||
FSymTok tok -> Just tok
|
||||
_ -> Nothing
|
||||
| otherwise = Nothing
|
||||
where
|
||||
syms = lins ! 0
|
||||
|
||||
getLeftCornerCat lins
|
||||
| inRange (bounds syms) 0 = case syms ! 0 of
|
||||
FSymCat c _ _ -> Just c
|
||||
_ -> Nothing
|
||||
| otherwise = Nothing
|
||||
where
|
||||
syms = lins ! 0
|
||||
|
||||
buildFCFPInfo :: (Ord c, Ord n, Ord t) => FCFGrammar c n t -> FCFPInfo c n t
|
||||
buildFCFPInfo grammar =
|
||||
traceCalcFirst grammar $
|
||||
tracePrt "MCFG.PInfo - parser info" (prt) $
|
||||
FCFPInfo { allRules = allrules
|
||||
, topdownRules = topdownrules
|
||||
, emptyRules = emptyrules
|
||||
, leftcornerCats = leftcorncats
|
||||
, leftcornerTokens = leftcorntoks
|
||||
, grammarCats = grammarcats
|
||||
}
|
||||
|
||||
where allrules = listArray (0,length grammar-1) grammar
|
||||
topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule (Abs cat _ _) _) <- assocs allrules]
|
||||
emptyrules = [ruleid | (ruleid, FRule (Abs _ [] _) _) <- assocs allrules]
|
||||
leftcorncats = accumAssoc id
|
||||
[ (fromJust (getLeftCornerCat lins), ruleid) |
|
||||
(ruleid, FRule _ lins) <- assocs allrules, isJust (getLeftCornerCat lins) ]
|
||||
leftcorntoks = accumAssoc id
|
||||
[ (fromJust (getLeftCornerTok lins), ruleid) |
|
||||
(ruleid, FRule _ lins) <- assocs allrules, isJust (getLeftCornerTok lins) ]
|
||||
grammarcats = aElems topdownrules
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- pretty-printing of statistics
|
||||
|
||||
instance (Ord c, Ord n, Ord t) => Print (FCFPInfo c n t) where
|
||||
prt pI = "[ allRules=" ++ sl (elems . allRules) ++
|
||||
"; tdRules=" ++ sla topdownRules ++
|
||||
"; emptyRules=" ++ sl emptyRules ++
|
||||
"; lcCats=" ++ sla leftcornerCats ++
|
||||
"; lcTokens=" ++ sla leftcornerTokens ++
|
||||
"; categories=" ++ sl grammarCats ++
|
||||
" ]"
|
||||
|
||||
where sl f = show $ length $ f pI
|
||||
sla f = let (as, bs) = unzip $ aAssocs $ f pI
|
||||
in show (length as) ++ "/" ++ show (length (concat bs))
|
||||
|
||||
54
src/GF/Parsing/FCFG/Range.hs
Normal file
54
src/GF/Parsing/FCFG/Range.hs
Normal file
@@ -0,0 +1,54 @@
|
||||
---------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/08/08 09:01:25 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.5 $
|
||||
--
|
||||
-- Definitions of ranges, and operations on ranges
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Parsing.FCFG.Range
|
||||
( RangeRec, Range(..), makeRange, concatRange, rangeEdge, edgeRange, minRange, maxRange,
|
||||
) where
|
||||
|
||||
|
||||
-- GF modules
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Infra.Print
|
||||
|
||||
------------------------------------------------------------
|
||||
-- ranges as single pairs
|
||||
|
||||
type RangeRec = [Range]
|
||||
|
||||
data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
|
||||
| EmptyRange
|
||||
deriving (Eq, Ord)
|
||||
|
||||
makeRange :: Int -> Int -> Range
|
||||
makeRange = Range
|
||||
|
||||
concatRange :: Range -> Range -> [Range]
|
||||
concatRange EmptyRange rng = return rng
|
||||
concatRange rng EmptyRange = return rng
|
||||
concatRange (Range i j) (Range j' k) = [Range i k | j==j']
|
||||
|
||||
rangeEdge :: a -> Range -> Edge a
|
||||
rangeEdge a (Range i j) = Edge i j a
|
||||
|
||||
edgeRange :: Edge a -> Range
|
||||
edgeRange (Edge i j _) = Range i j
|
||||
|
||||
minRange :: Range -> Int
|
||||
minRange (Range i j) = i
|
||||
|
||||
maxRange :: Range -> Int
|
||||
maxRange (Range i j) = j
|
||||
|
||||
instance Print Range where
|
||||
prt (Range i j) = "(" ++ show i ++ "-" ++ show j ++ ")"
|
||||
prt (EmptyRange) = "(?)"
|
||||
@@ -37,23 +37,29 @@ import qualified GF.Formalism.SimpleGFC as S
|
||||
import qualified GF.Formalism.MCFG as M
|
||||
import qualified GF.Formalism.CFG as C
|
||||
import qualified GF.Parsing.MCFG as PM
|
||||
import qualified GF.Parsing.FCFG as PF
|
||||
import qualified GF.Parsing.CFG as PC
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- parsing information
|
||||
|
||||
data PInfo = PInfo { mcfPInfo :: MCFPInfo,
|
||||
cfPInfo :: CFPInfo }
|
||||
data PInfo = PInfo { mcfPInfo :: MCFPInfo
|
||||
, fcfPInfo :: FCFPInfo
|
||||
, cfPInfo :: CFPInfo
|
||||
}
|
||||
|
||||
type MCFPInfo = PM.MCFPInfo MCat Name MLabel Token
|
||||
type FCFPInfo = PF.FCFPInfo FCat Name Token
|
||||
type CFPInfo = PC.CFPInfo CCat Name Token
|
||||
|
||||
buildPInfo :: MGrammar -> CGrammar -> PInfo
|
||||
buildPInfo mcfg cfg = PInfo { mcfPInfo = PM.buildMCFPInfo mcfg,
|
||||
cfPInfo = PC.buildCFPInfo cfg }
|
||||
buildPInfo :: MGrammar -> FGrammar -> CGrammar -> PInfo
|
||||
buildPInfo mcfg fcfg cfg = PInfo { mcfPInfo = PM.buildMCFPInfo mcfg
|
||||
, fcfPInfo = PF.buildFCFPInfo fcfg
|
||||
, cfPInfo = PC.buildCFPInfo cfg
|
||||
}
|
||||
|
||||
instance Print PInfo where
|
||||
prt (PInfo m c) = prt m ++ "\n" ++ prt c
|
||||
prt (PInfo m f c) = prt m ++ "\n" ++ prt c
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- main parsing function
|
||||
@@ -114,6 +120,19 @@ selectParser "m" strategy pinfo startCat inTokens
|
||||
cat@(MCat _ [lbl]) <- startCats ]
|
||||
return $ chart2forests chart (const False) finalEdges
|
||||
|
||||
-- parsing via FCFG
|
||||
selectParser "f" strategy pinfo startCat inTokens
|
||||
= do let startCats = filter isStart $ PF.grammarCats fcfpi
|
||||
isStart cat = fcat2scat cat == cfCat2Ident startCat
|
||||
fcfpi = fcfPInfo pinfo
|
||||
fcfParser <- PF.parseFCF strategy
|
||||
let fcfChart = fcfParser fcfpi startCats inTokens
|
||||
chart = G.abstract2chart fcfChart
|
||||
(begin,end) = inputBounds inTokens
|
||||
finalEdges = [ PF.makeFinalEdge cat begin end |
|
||||
cat@(FCat _ _ [lbl] _) <- startCats ]
|
||||
return $ chart2forests chart (const False) finalEdges
|
||||
|
||||
-- error parser:
|
||||
selectParser prs strategy _ _ _ = Bad $ "Parser '" ++ prs ++ "' not defined with strategy: " ++ strategy
|
||||
|
||||
|
||||
@@ -184,7 +184,7 @@ optionsOfCommand co = case co of
|
||||
CTransformGrammar _ -> flags "printer"
|
||||
CConvertLatex _ -> none
|
||||
CLinearize _ -> both "utf8 table struct record all multi" "lang number unlexer mark"
|
||||
CParse -> both "ambiguous fail cut new newer cfg mcfg n ign raw v lines all prob"
|
||||
CParse -> both "ambiguous fail cut new newer cfg mcfg fcfg n ign raw v lines all prob"
|
||||
"cat lang lexer parser number rawtrees"
|
||||
CTranslate _ _ -> opts "cat lexer parser"
|
||||
CGenerateRandom -> both "cf prob" "cat lang number depth atoms noexpand doexpand"
|
||||
|
||||
@@ -66,10 +66,11 @@ parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
|
||||
|
||||
-- to use peb's newer parser 7/4-05
|
||||
parseStringC opts0 sg cat s
|
||||
| oElem newCParser opts0 || oElem newMParser opts0 || oElem newParser opts0 || oElem newerParser opts0 = do
|
||||
| oElem newCParser opts0 || oElem newMParser opts0 || oElem newFParser opts0 || oElem newParser opts0 || oElem newerParser opts0 = do
|
||||
let opts = unionOptions opts0 $ stateOptions sg
|
||||
algorithm | oElem newCParser opts0 = "c"
|
||||
| oElem newMParser opts0 = "m"
|
||||
| oElem newFParser opts0 = "f"
|
||||
| otherwise = "c" -- default algorithm
|
||||
strategy = maybe "bottomup" id $ getOptVal opts useParser -- -parser=bottomup/topdown
|
||||
tokenizer = customOrDefault opts useTokenizer customTokenizer sg
|
||||
|
||||
Reference in New Issue
Block a user