add the FCFG parser

This commit is contained in:
kr.angelov
2006-06-01 11:19:47 +00:00
parent 0c0c82603c
commit a5758468ed
13 changed files with 985 additions and 18 deletions

38
src/GF/Parsing/FCFG.hs Normal file
View File

@@ -0,0 +1,38 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/05/11 10:28:16 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.5 $
--
-- MCFG parsing
-----------------------------------------------------------------------------
module GF.Parsing.FCFG
(parseFCF, module GF.Parsing.FCFG.PInfo) where
import GF.Data.Operations (Err(..))
import GF.Formalism.Utilities
import GF.Formalism.GCFG
import GF.Formalism.MCFG
import GF.Parsing.FCFG.PInfo
import qualified GF.Parsing.FCFG.Active as Active
import GF.Infra.Print
----------------------------------------------------------------------
-- parsing
parseFCF :: (Print c, Ord c, Print n, Ord n, Print t, Ord t) => String -> Err (FCFParser c n t)
parseFCF prs | prs `elem` strategies = Ok $ parseFCF' prs
| otherwise = Bad $ "FCFG parsing strategy not defined: " ++ prs
strategies = words "bottomup topdown"
parseFCF' :: (Print c, Ord c, Print n, Ord n, Print t, Ord t) => String -> FCFParser c n t
parseFCF' "bottomup" pinfo starts toks = Active.parse "b" pinfo starts toks
parseFCF' "topdown" pinfo starts toks = Active.parse "t" pinfo starts toks

View File

@@ -0,0 +1,188 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/08/08 09:01:25 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.5 $
--
-- MCFG parsing, the active algorithm
-----------------------------------------------------------------------------
module GF.Parsing.FCFG.Active (parse) where
import GF.Data.GeneralDeduction
import GF.Data.Assoc
import GF.Data.Utilities
import GF.Formalism.GCFG
import GF.Formalism.FCFG
import GF.Formalism.MCFG(Lin(..))
import GF.Formalism.Utilities
import GF.Infra.Ident
import GF.Parsing.FCFG.Range
import GF.Parsing.FCFG.PInfo
import GF.System.Tracing
import Control.Monad (guard)
import GF.Infra.Print
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import Data.Array
----------------------------------------------------------------------
-- * parsing
parse :: (Ord c, Print n, Ord n, Ord t) => String -> FCFParser c n t
parse strategy pinfo starts toks =
[ Abs (cat, found) (zip rhs rrecs) fun |
Final ruleid found rrecs <- listXChartFinal chart,
let FRule (Abs cat rhs fun) _ = allRules pinfo ! ruleid ]
where chart = process strategy pinfo toks axioms emptyXChart
axioms | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks
| isTD strategy = initial pinfo starts toks
isBU s = s=="b"
isTD s = s=="t"
-- used in prediction
emptyChildren :: Abstract c n -> [RangeRec]
emptyChildren (Abs _ rhs _) = replicate (length rhs) []
updateChildren :: [RangeRec] -> Int -> RangeRec -> [[RangeRec]]
updateChildren recs i rec = updateNthM update i recs
where update rec' = do guard (null rec' || rec' == rec)
return rec
makeMaxRange (Range _ j) = Range j j
makeMaxRange EmptyRange = EmptyRange
process :: (Ord c, Ord n, Ord t) => String -> FCFPInfo c n t -> Input t -> [Item] -> XChart c -> XChart c
process strategy pinfo toks [] chart = chart
process strategy pinfo toks (item:items) chart = process strategy pinfo toks items $! univRule item chart
where
univRule item@(Active ruleid found rng lbl ppos recs) chart
| inRange (bounds lin) ppos =
case lin ! ppos of
FSymCat c r d -> case insertXChart chart item c of
Nothing -> chart
Just chart -> let items = do Final _ found' _ <- lookupXChartFinal chart c
rng' <- concatRange rng (found' !! r)
recs' <- updateChildren recs d found'
return (Active ruleid found rng' lbl (ppos+1) recs')
++
do guard (isTD strategy)
ruleid <- topdownRules pinfo ? c
let FRule abs lins = allRules pinfo ! ruleid
return (Active ruleid [] EmptyRange 0 0 (emptyChildren abs))
in process strategy pinfo toks items chart
FSymTok tok -> let items = do (i,j) <- inputToken toks ? tok
rng' <- concatRange rng (makeRange i j)
return (Active ruleid found rng' lbl (ppos+1) recs)
in process strategy pinfo toks items chart
| otherwise =
if inRange (bounds lins) (lbl+1)
then univRule (Active ruleid (rng:found) EmptyRange (lbl+1) 0 recs) chart
else univRule (Final ruleid (reverse (rng:found)) recs) chart
where
(FRule (Abs cat _ fn) lins) = allRules pinfo ! ruleid
lin = lins ! lbl
univRule item@(Final ruleid found' recs) chart =
case insertXChart chart item cat of
Nothing -> chart
Just chart -> let items = do (Active ruleid found rng l ppos recs) <- lookupXChartAct chart cat
let FRule _ lins = allRules pinfo ! ruleid
FSymCat cat r d = lins ! l ! ppos
rng' <- concatRange rng (found' !! r)
recs' <- updateChildren recs d found'
return (Active ruleid found rng' l (ppos+1) recs')
++
do guard (isBU strategy)
ruleid <- leftcornerCats pinfo ? cat
let FRule abs lins = allRules pinfo ! ruleid
FSymCat cat r d = lins ! 0 ! 0
return (Active ruleid [] (found' !! r) 0 1 (updateNth (const found') d (emptyChildren abs)))
in process strategy pinfo toks items chart
where
(FRule (Abs cat _ fn) _) = allRules pinfo ! ruleid
----------------------------------------------------------------------
-- * XChart
data Item
= Active {-# UNPACK #-} !RuleId
RangeRec
Range
{-# UNPACK #-} !FLabel
{-# UNPACK #-} !FPointPos
[RangeRec]
| Final {-# UNPACK #-} !RuleId RangeRec [RangeRec]
deriving (Eq, Ord)
data XChart c = XChart !(ParseChart Item c) !(ParseChart Item c)
emptyXChart :: Ord c => XChart c
emptyXChart = XChart emptyChart emptyChart
insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _) c =
case chartInsert actives item c of
Nothing -> Nothing
Just actives -> Just (XChart actives finals)
insertXChart (XChart actives finals) item@(Final _ _ _) c =
case chartInsert finals item c of
Nothing -> Nothing
Just finals -> Just (XChart actives finals)
lookupXChartAct (XChart actives finals) c = chartLookup actives c
lookupXChartFinal (XChart actives finals) c = chartLookup finals c
listXChartAct (XChart actives finals) = chartList actives
listXChartFinal (XChart actives finals) = chartList finals
----------------------------------------------------------------------
-- Earley --
-- anropas med alla startkategorier
initial :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> [c] -> Input t -> [Item]
initial pinfo starts toks =
tracePrt "MCFG.Active (Earley) - initial rules" (prt . length) $
do cat <- starts
ruleid <- topdownRules pinfo ? cat
let FRule abs lins = allRules pinfo ! ruleid
return $ Active ruleid [] (Range 0 0) 0 0 (emptyChildren abs)
----------------------------------------------------------------------
-- Kilbury --
terminal :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [Item]
terminal pinfo toks =
tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $
do ruleid <- emptyRules pinfo
let FRule abs lins = allRules pinfo ! ruleid
rrec <- mapM (rangeRestSyms toks EmptyRange . elems) (elems lins)
return $ Final ruleid rrec []
where
rangeRestSyms toks rng [] = return rng
rangeRestSyms toks rng (FSymTok tok:syms) = do (i,j) <- inputToken toks ? tok
rng' <- concatRange rng (makeRange i j)
rangeRestSyms toks rng' syms
initialScan :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [Item]
initialScan pinfo toks =
tracePrt "MCFG.Active (Kilbury) - initial scanned rules" (prt . length) $
do tok <- aElems (inputToken toks)
ruleid <- leftcornerTokens pinfo ? tok
let FRule abs lins = allRules pinfo ! ruleid
return $ Active ruleid [] EmptyRange 0 0 (emptyChildren abs)

View File

@@ -0,0 +1,115 @@
---------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/05/13 12:40:19 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.5 $
--
-- MCFG parsing, parser information
-----------------------------------------------------------------------------
module GF.Parsing.FCFG.PInfo where
import GF.System.Tracing
import GF.Infra.Print
import GF.Formalism.Utilities
import GF.Formalism.GCFG
import GF.Formalism.FCFG
import GF.Data.SortedList
import GF.Data.Assoc
import GF.Parsing.FCFG.Range
import Data.Array
import Data.Maybe
----------------------------------------------------------------------
-- type declarations
-- | the list of categories = possible starting categories
type FCFParser c n t = FCFPInfo c n t
-> [c]
-> Input t
-> FCFChart c n
type FCFChart c n = [Abstract (c, RangeRec) n]
makeFinalEdge :: c -> Int -> Int -> (c, RangeRec)
makeFinalEdge cat i j = (cat, [makeRange i j])
------------------------------------------------------------
-- parser information
type RuleId = Int
data FCFPInfo c n t
= FCFPInfo { allRules :: Array RuleId (FCFRule c n t)
, topdownRules :: Assoc c (SList RuleId)
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
, emptyRules :: [RuleId]
, leftcornerCats :: Assoc c (SList RuleId)
, leftcornerTokens :: Assoc t (SList RuleId)
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
, grammarCats :: SList c
}
getLeftCornerTok lins
| inRange (bounds syms) 0 = case syms ! 0 of
FSymTok tok -> Just tok
_ -> Nothing
| otherwise = Nothing
where
syms = lins ! 0
getLeftCornerCat lins
| inRange (bounds syms) 0 = case syms ! 0 of
FSymCat c _ _ -> Just c
_ -> Nothing
| otherwise = Nothing
where
syms = lins ! 0
buildFCFPInfo :: (Ord c, Ord n, Ord t) => FCFGrammar c n t -> FCFPInfo c n t
buildFCFPInfo grammar =
traceCalcFirst grammar $
tracePrt "MCFG.PInfo - parser info" (prt) $
FCFPInfo { allRules = allrules
, topdownRules = topdownrules
, emptyRules = emptyrules
, leftcornerCats = leftcorncats
, leftcornerTokens = leftcorntoks
, grammarCats = grammarcats
}
where allrules = listArray (0,length grammar-1) grammar
topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule (Abs cat _ _) _) <- assocs allrules]
emptyrules = [ruleid | (ruleid, FRule (Abs _ [] _) _) <- assocs allrules]
leftcorncats = accumAssoc id
[ (fromJust (getLeftCornerCat lins), ruleid) |
(ruleid, FRule _ lins) <- assocs allrules, isJust (getLeftCornerCat lins) ]
leftcorntoks = accumAssoc id
[ (fromJust (getLeftCornerTok lins), ruleid) |
(ruleid, FRule _ lins) <- assocs allrules, isJust (getLeftCornerTok lins) ]
grammarcats = aElems topdownrules
----------------------------------------------------------------------
-- pretty-printing of statistics
instance (Ord c, Ord n, Ord t) => Print (FCFPInfo c n t) where
prt pI = "[ allRules=" ++ sl (elems . allRules) ++
"; tdRules=" ++ sla topdownRules ++
"; emptyRules=" ++ sl emptyRules ++
"; lcCats=" ++ sla leftcornerCats ++
"; lcTokens=" ++ sla leftcornerTokens ++
"; categories=" ++ sl grammarCats ++
" ]"
where sl f = show $ length $ f pI
sla f = let (as, bs) = unzip $ aAssocs $ f pI
in show (length as) ++ "/" ++ show (length (concat bs))

View File

@@ -0,0 +1,54 @@
---------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/08/08 09:01:25 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.5 $
--
-- Definitions of ranges, and operations on ranges
-----------------------------------------------------------------------------
module GF.Parsing.FCFG.Range
( RangeRec, Range(..), makeRange, concatRange, rangeEdge, edgeRange, minRange, maxRange,
) where
-- GF modules
import GF.Formalism.Utilities
import GF.Infra.Print
------------------------------------------------------------
-- ranges as single pairs
type RangeRec = [Range]
data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
| EmptyRange
deriving (Eq, Ord)
makeRange :: Int -> Int -> Range
makeRange = Range
concatRange :: Range -> Range -> [Range]
concatRange EmptyRange rng = return rng
concatRange rng EmptyRange = return rng
concatRange (Range i j) (Range j' k) = [Range i k | j==j']
rangeEdge :: a -> Range -> Edge a
rangeEdge a (Range i j) = Edge i j a
edgeRange :: Edge a -> Range
edgeRange (Edge i j _) = Range i j
minRange :: Range -> Int
minRange (Range i j) = i
maxRange :: Range -> Int
maxRange (Range i j) = j
instance Print Range where
prt (Range i j) = "(" ++ show i ++ "-" ++ show j ++ ")"
prt (EmptyRange) = "(?)"

View File

@@ -37,23 +37,29 @@ import qualified GF.Formalism.SimpleGFC as S
import qualified GF.Formalism.MCFG as M
import qualified GF.Formalism.CFG as C
import qualified GF.Parsing.MCFG as PM
import qualified GF.Parsing.FCFG as PF
import qualified GF.Parsing.CFG as PC
----------------------------------------------------------------------
-- parsing information
data PInfo = PInfo { mcfPInfo :: MCFPInfo,
cfPInfo :: CFPInfo }
data PInfo = PInfo { mcfPInfo :: MCFPInfo
, fcfPInfo :: FCFPInfo
, cfPInfo :: CFPInfo
}
type MCFPInfo = PM.MCFPInfo MCat Name MLabel Token
type FCFPInfo = PF.FCFPInfo FCat Name Token
type CFPInfo = PC.CFPInfo CCat Name Token
buildPInfo :: MGrammar -> CGrammar -> PInfo
buildPInfo mcfg cfg = PInfo { mcfPInfo = PM.buildMCFPInfo mcfg,
cfPInfo = PC.buildCFPInfo cfg }
buildPInfo :: MGrammar -> FGrammar -> CGrammar -> PInfo
buildPInfo mcfg fcfg cfg = PInfo { mcfPInfo = PM.buildMCFPInfo mcfg
, fcfPInfo = PF.buildFCFPInfo fcfg
, cfPInfo = PC.buildCFPInfo cfg
}
instance Print PInfo where
prt (PInfo m c) = prt m ++ "\n" ++ prt c
prt (PInfo m f c) = prt m ++ "\n" ++ prt c
----------------------------------------------------------------------
-- main parsing function
@@ -114,6 +120,19 @@ selectParser "m" strategy pinfo startCat inTokens
cat@(MCat _ [lbl]) <- startCats ]
return $ chart2forests chart (const False) finalEdges
-- parsing via FCFG
selectParser "f" strategy pinfo startCat inTokens
= do let startCats = filter isStart $ PF.grammarCats fcfpi
isStart cat = fcat2scat cat == cfCat2Ident startCat
fcfpi = fcfPInfo pinfo
fcfParser <- PF.parseFCF strategy
let fcfChart = fcfParser fcfpi startCats inTokens
chart = G.abstract2chart fcfChart
(begin,end) = inputBounds inTokens
finalEdges = [ PF.makeFinalEdge cat begin end |
cat@(FCat _ _ [lbl] _) <- startCats ]
return $ chart2forests chart (const False) finalEdges
-- error parser:
selectParser prs strategy _ _ _ = Bad $ "Parser '" ++ prs ++ "' not defined with strategy: " ++ strategy