mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-09 19:22:50 -06:00
add the FCFG parser
This commit is contained in:
38
src/GF/Parsing/FCFG.hs
Normal file
38
src/GF/Parsing/FCFG.hs
Normal file
@@ -0,0 +1,38 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/05/11 10:28:16 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.5 $
|
||||
--
|
||||
-- MCFG parsing
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Parsing.FCFG
|
||||
(parseFCF, module GF.Parsing.FCFG.PInfo) where
|
||||
|
||||
import GF.Data.Operations (Err(..))
|
||||
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Formalism.GCFG
|
||||
import GF.Formalism.MCFG
|
||||
import GF.Parsing.FCFG.PInfo
|
||||
|
||||
import qualified GF.Parsing.FCFG.Active as Active
|
||||
import GF.Infra.Print
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- parsing
|
||||
|
||||
parseFCF :: (Print c, Ord c, Print n, Ord n, Print t, Ord t) => String -> Err (FCFParser c n t)
|
||||
parseFCF prs | prs `elem` strategies = Ok $ parseFCF' prs
|
||||
| otherwise = Bad $ "FCFG parsing strategy not defined: " ++ prs
|
||||
|
||||
strategies = words "bottomup topdown"
|
||||
|
||||
parseFCF' :: (Print c, Ord c, Print n, Ord n, Print t, Ord t) => String -> FCFParser c n t
|
||||
parseFCF' "bottomup" pinfo starts toks = Active.parse "b" pinfo starts toks
|
||||
parseFCF' "topdown" pinfo starts toks = Active.parse "t" pinfo starts toks
|
||||
188
src/GF/Parsing/FCFG/Active.hs
Normal file
188
src/GF/Parsing/FCFG/Active.hs
Normal file
@@ -0,0 +1,188 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/08/08 09:01:25 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.5 $
|
||||
--
|
||||
-- MCFG parsing, the active algorithm
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Parsing.FCFG.Active (parse) where
|
||||
|
||||
import GF.Data.GeneralDeduction
|
||||
import GF.Data.Assoc
|
||||
import GF.Data.Utilities
|
||||
|
||||
import GF.Formalism.GCFG
|
||||
import GF.Formalism.FCFG
|
||||
import GF.Formalism.MCFG(Lin(..))
|
||||
import GF.Formalism.Utilities
|
||||
|
||||
import GF.Infra.Ident
|
||||
|
||||
import GF.Parsing.FCFG.Range
|
||||
import GF.Parsing.FCFG.PInfo
|
||||
|
||||
import GF.System.Tracing
|
||||
|
||||
import Control.Monad (guard)
|
||||
|
||||
import GF.Infra.Print
|
||||
|
||||
import qualified Data.List as List
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import Data.Array
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * parsing
|
||||
|
||||
parse :: (Ord c, Print n, Ord n, Ord t) => String -> FCFParser c n t
|
||||
parse strategy pinfo starts toks =
|
||||
[ Abs (cat, found) (zip rhs rrecs) fun |
|
||||
Final ruleid found rrecs <- listXChartFinal chart,
|
||||
let FRule (Abs cat rhs fun) _ = allRules pinfo ! ruleid ]
|
||||
where chart = process strategy pinfo toks axioms emptyXChart
|
||||
|
||||
axioms | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks
|
||||
| isTD strategy = initial pinfo starts toks
|
||||
|
||||
isBU s = s=="b"
|
||||
isTD s = s=="t"
|
||||
|
||||
-- used in prediction
|
||||
emptyChildren :: Abstract c n -> [RangeRec]
|
||||
emptyChildren (Abs _ rhs _) = replicate (length rhs) []
|
||||
|
||||
updateChildren :: [RangeRec] -> Int -> RangeRec -> [[RangeRec]]
|
||||
updateChildren recs i rec = updateNthM update i recs
|
||||
where update rec' = do guard (null rec' || rec' == rec)
|
||||
return rec
|
||||
|
||||
makeMaxRange (Range _ j) = Range j j
|
||||
makeMaxRange EmptyRange = EmptyRange
|
||||
|
||||
process :: (Ord c, Ord n, Ord t) => String -> FCFPInfo c n t -> Input t -> [Item] -> XChart c -> XChart c
|
||||
process strategy pinfo toks [] chart = chart
|
||||
process strategy pinfo toks (item:items) chart = process strategy pinfo toks items $! univRule item chart
|
||||
where
|
||||
univRule item@(Active ruleid found rng lbl ppos recs) chart
|
||||
| inRange (bounds lin) ppos =
|
||||
case lin ! ppos of
|
||||
FSymCat c r d -> case insertXChart chart item c of
|
||||
Nothing -> chart
|
||||
Just chart -> let items = do Final _ found' _ <- lookupXChartFinal chart c
|
||||
rng' <- concatRange rng (found' !! r)
|
||||
recs' <- updateChildren recs d found'
|
||||
return (Active ruleid found rng' lbl (ppos+1) recs')
|
||||
++
|
||||
do guard (isTD strategy)
|
||||
ruleid <- topdownRules pinfo ? c
|
||||
let FRule abs lins = allRules pinfo ! ruleid
|
||||
return (Active ruleid [] EmptyRange 0 0 (emptyChildren abs))
|
||||
in process strategy pinfo toks items chart
|
||||
FSymTok tok -> let items = do (i,j) <- inputToken toks ? tok
|
||||
rng' <- concatRange rng (makeRange i j)
|
||||
return (Active ruleid found rng' lbl (ppos+1) recs)
|
||||
in process strategy pinfo toks items chart
|
||||
| otherwise =
|
||||
if inRange (bounds lins) (lbl+1)
|
||||
then univRule (Active ruleid (rng:found) EmptyRange (lbl+1) 0 recs) chart
|
||||
else univRule (Final ruleid (reverse (rng:found)) recs) chart
|
||||
where
|
||||
(FRule (Abs cat _ fn) lins) = allRules pinfo ! ruleid
|
||||
lin = lins ! lbl
|
||||
univRule item@(Final ruleid found' recs) chart =
|
||||
case insertXChart chart item cat of
|
||||
Nothing -> chart
|
||||
Just chart -> let items = do (Active ruleid found rng l ppos recs) <- lookupXChartAct chart cat
|
||||
let FRule _ lins = allRules pinfo ! ruleid
|
||||
FSymCat cat r d = lins ! l ! ppos
|
||||
rng' <- concatRange rng (found' !! r)
|
||||
recs' <- updateChildren recs d found'
|
||||
return (Active ruleid found rng' l (ppos+1) recs')
|
||||
++
|
||||
do guard (isBU strategy)
|
||||
ruleid <- leftcornerCats pinfo ? cat
|
||||
let FRule abs lins = allRules pinfo ! ruleid
|
||||
FSymCat cat r d = lins ! 0 ! 0
|
||||
return (Active ruleid [] (found' !! r) 0 1 (updateNth (const found') d (emptyChildren abs)))
|
||||
in process strategy pinfo toks items chart
|
||||
where
|
||||
(FRule (Abs cat _ fn) _) = allRules pinfo ! ruleid
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * XChart
|
||||
|
||||
data Item
|
||||
= Active {-# UNPACK #-} !RuleId
|
||||
RangeRec
|
||||
Range
|
||||
{-# UNPACK #-} !FLabel
|
||||
{-# UNPACK #-} !FPointPos
|
||||
[RangeRec]
|
||||
| Final {-# UNPACK #-} !RuleId RangeRec [RangeRec]
|
||||
deriving (Eq, Ord)
|
||||
|
||||
data XChart c = XChart !(ParseChart Item c) !(ParseChart Item c)
|
||||
|
||||
emptyXChart :: Ord c => XChart c
|
||||
emptyXChart = XChart emptyChart emptyChart
|
||||
|
||||
insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _) c =
|
||||
case chartInsert actives item c of
|
||||
Nothing -> Nothing
|
||||
Just actives -> Just (XChart actives finals)
|
||||
|
||||
insertXChart (XChart actives finals) item@(Final _ _ _) c =
|
||||
case chartInsert finals item c of
|
||||
Nothing -> Nothing
|
||||
Just finals -> Just (XChart actives finals)
|
||||
|
||||
lookupXChartAct (XChart actives finals) c = chartLookup actives c
|
||||
lookupXChartFinal (XChart actives finals) c = chartLookup finals c
|
||||
|
||||
listXChartAct (XChart actives finals) = chartList actives
|
||||
listXChartFinal (XChart actives finals) = chartList finals
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Earley --
|
||||
|
||||
-- anropas med alla startkategorier
|
||||
initial :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> [c] -> Input t -> [Item]
|
||||
initial pinfo starts toks =
|
||||
tracePrt "MCFG.Active (Earley) - initial rules" (prt . length) $
|
||||
do cat <- starts
|
||||
ruleid <- topdownRules pinfo ? cat
|
||||
let FRule abs lins = allRules pinfo ! ruleid
|
||||
return $ Active ruleid [] (Range 0 0) 0 0 (emptyChildren abs)
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Kilbury --
|
||||
|
||||
terminal :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [Item]
|
||||
terminal pinfo toks =
|
||||
tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $
|
||||
do ruleid <- emptyRules pinfo
|
||||
let FRule abs lins = allRules pinfo ! ruleid
|
||||
rrec <- mapM (rangeRestSyms toks EmptyRange . elems) (elems lins)
|
||||
return $ Final ruleid rrec []
|
||||
where
|
||||
rangeRestSyms toks rng [] = return rng
|
||||
rangeRestSyms toks rng (FSymTok tok:syms) = do (i,j) <- inputToken toks ? tok
|
||||
rng' <- concatRange rng (makeRange i j)
|
||||
rangeRestSyms toks rng' syms
|
||||
|
||||
initialScan :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [Item]
|
||||
initialScan pinfo toks =
|
||||
tracePrt "MCFG.Active (Kilbury) - initial scanned rules" (prt . length) $
|
||||
do tok <- aElems (inputToken toks)
|
||||
ruleid <- leftcornerTokens pinfo ? tok
|
||||
let FRule abs lins = allRules pinfo ! ruleid
|
||||
return $ Active ruleid [] EmptyRange 0 0 (emptyChildren abs)
|
||||
115
src/GF/Parsing/FCFG/PInfo.hs
Normal file
115
src/GF/Parsing/FCFG/PInfo.hs
Normal file
@@ -0,0 +1,115 @@
|
||||
---------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/05/13 12:40:19 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.5 $
|
||||
--
|
||||
-- MCFG parsing, parser information
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Parsing.FCFG.PInfo where
|
||||
|
||||
import GF.System.Tracing
|
||||
import GF.Infra.Print
|
||||
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Formalism.GCFG
|
||||
import GF.Formalism.FCFG
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Assoc
|
||||
import GF.Parsing.FCFG.Range
|
||||
|
||||
import Data.Array
|
||||
import Data.Maybe
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- type declarations
|
||||
|
||||
-- | the list of categories = possible starting categories
|
||||
type FCFParser c n t = FCFPInfo c n t
|
||||
-> [c]
|
||||
-> Input t
|
||||
-> FCFChart c n
|
||||
|
||||
type FCFChart c n = [Abstract (c, RangeRec) n]
|
||||
|
||||
makeFinalEdge :: c -> Int -> Int -> (c, RangeRec)
|
||||
makeFinalEdge cat i j = (cat, [makeRange i j])
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- parser information
|
||||
|
||||
type RuleId = Int
|
||||
|
||||
data FCFPInfo c n t
|
||||
= FCFPInfo { allRules :: Array RuleId (FCFRule c n t)
|
||||
, topdownRules :: Assoc c (SList RuleId)
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
|
||||
, emptyRules :: [RuleId]
|
||||
, leftcornerCats :: Assoc c (SList RuleId)
|
||||
, leftcornerTokens :: Assoc t (SList RuleId)
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
|
||||
, grammarCats :: SList c
|
||||
}
|
||||
|
||||
|
||||
getLeftCornerTok lins
|
||||
| inRange (bounds syms) 0 = case syms ! 0 of
|
||||
FSymTok tok -> Just tok
|
||||
_ -> Nothing
|
||||
| otherwise = Nothing
|
||||
where
|
||||
syms = lins ! 0
|
||||
|
||||
getLeftCornerCat lins
|
||||
| inRange (bounds syms) 0 = case syms ! 0 of
|
||||
FSymCat c _ _ -> Just c
|
||||
_ -> Nothing
|
||||
| otherwise = Nothing
|
||||
where
|
||||
syms = lins ! 0
|
||||
|
||||
buildFCFPInfo :: (Ord c, Ord n, Ord t) => FCFGrammar c n t -> FCFPInfo c n t
|
||||
buildFCFPInfo grammar =
|
||||
traceCalcFirst grammar $
|
||||
tracePrt "MCFG.PInfo - parser info" (prt) $
|
||||
FCFPInfo { allRules = allrules
|
||||
, topdownRules = topdownrules
|
||||
, emptyRules = emptyrules
|
||||
, leftcornerCats = leftcorncats
|
||||
, leftcornerTokens = leftcorntoks
|
||||
, grammarCats = grammarcats
|
||||
}
|
||||
|
||||
where allrules = listArray (0,length grammar-1) grammar
|
||||
topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule (Abs cat _ _) _) <- assocs allrules]
|
||||
emptyrules = [ruleid | (ruleid, FRule (Abs _ [] _) _) <- assocs allrules]
|
||||
leftcorncats = accumAssoc id
|
||||
[ (fromJust (getLeftCornerCat lins), ruleid) |
|
||||
(ruleid, FRule _ lins) <- assocs allrules, isJust (getLeftCornerCat lins) ]
|
||||
leftcorntoks = accumAssoc id
|
||||
[ (fromJust (getLeftCornerTok lins), ruleid) |
|
||||
(ruleid, FRule _ lins) <- assocs allrules, isJust (getLeftCornerTok lins) ]
|
||||
grammarcats = aElems topdownrules
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- pretty-printing of statistics
|
||||
|
||||
instance (Ord c, Ord n, Ord t) => Print (FCFPInfo c n t) where
|
||||
prt pI = "[ allRules=" ++ sl (elems . allRules) ++
|
||||
"; tdRules=" ++ sla topdownRules ++
|
||||
"; emptyRules=" ++ sl emptyRules ++
|
||||
"; lcCats=" ++ sla leftcornerCats ++
|
||||
"; lcTokens=" ++ sla leftcornerTokens ++
|
||||
"; categories=" ++ sl grammarCats ++
|
||||
" ]"
|
||||
|
||||
where sl f = show $ length $ f pI
|
||||
sla f = let (as, bs) = unzip $ aAssocs $ f pI
|
||||
in show (length as) ++ "/" ++ show (length (concat bs))
|
||||
|
||||
54
src/GF/Parsing/FCFG/Range.hs
Normal file
54
src/GF/Parsing/FCFG/Range.hs
Normal file
@@ -0,0 +1,54 @@
|
||||
---------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/08/08 09:01:25 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.5 $
|
||||
--
|
||||
-- Definitions of ranges, and operations on ranges
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Parsing.FCFG.Range
|
||||
( RangeRec, Range(..), makeRange, concatRange, rangeEdge, edgeRange, minRange, maxRange,
|
||||
) where
|
||||
|
||||
|
||||
-- GF modules
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Infra.Print
|
||||
|
||||
------------------------------------------------------------
|
||||
-- ranges as single pairs
|
||||
|
||||
type RangeRec = [Range]
|
||||
|
||||
data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
|
||||
| EmptyRange
|
||||
deriving (Eq, Ord)
|
||||
|
||||
makeRange :: Int -> Int -> Range
|
||||
makeRange = Range
|
||||
|
||||
concatRange :: Range -> Range -> [Range]
|
||||
concatRange EmptyRange rng = return rng
|
||||
concatRange rng EmptyRange = return rng
|
||||
concatRange (Range i j) (Range j' k) = [Range i k | j==j']
|
||||
|
||||
rangeEdge :: a -> Range -> Edge a
|
||||
rangeEdge a (Range i j) = Edge i j a
|
||||
|
||||
edgeRange :: Edge a -> Range
|
||||
edgeRange (Edge i j _) = Range i j
|
||||
|
||||
minRange :: Range -> Int
|
||||
minRange (Range i j) = i
|
||||
|
||||
maxRange :: Range -> Int
|
||||
maxRange (Range i j) = j
|
||||
|
||||
instance Print Range where
|
||||
prt (Range i j) = "(" ++ show i ++ "-" ++ show j ++ ")"
|
||||
prt (EmptyRange) = "(?)"
|
||||
@@ -37,23 +37,29 @@ import qualified GF.Formalism.SimpleGFC as S
|
||||
import qualified GF.Formalism.MCFG as M
|
||||
import qualified GF.Formalism.CFG as C
|
||||
import qualified GF.Parsing.MCFG as PM
|
||||
import qualified GF.Parsing.FCFG as PF
|
||||
import qualified GF.Parsing.CFG as PC
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- parsing information
|
||||
|
||||
data PInfo = PInfo { mcfPInfo :: MCFPInfo,
|
||||
cfPInfo :: CFPInfo }
|
||||
data PInfo = PInfo { mcfPInfo :: MCFPInfo
|
||||
, fcfPInfo :: FCFPInfo
|
||||
, cfPInfo :: CFPInfo
|
||||
}
|
||||
|
||||
type MCFPInfo = PM.MCFPInfo MCat Name MLabel Token
|
||||
type FCFPInfo = PF.FCFPInfo FCat Name Token
|
||||
type CFPInfo = PC.CFPInfo CCat Name Token
|
||||
|
||||
buildPInfo :: MGrammar -> CGrammar -> PInfo
|
||||
buildPInfo mcfg cfg = PInfo { mcfPInfo = PM.buildMCFPInfo mcfg,
|
||||
cfPInfo = PC.buildCFPInfo cfg }
|
||||
buildPInfo :: MGrammar -> FGrammar -> CGrammar -> PInfo
|
||||
buildPInfo mcfg fcfg cfg = PInfo { mcfPInfo = PM.buildMCFPInfo mcfg
|
||||
, fcfPInfo = PF.buildFCFPInfo fcfg
|
||||
, cfPInfo = PC.buildCFPInfo cfg
|
||||
}
|
||||
|
||||
instance Print PInfo where
|
||||
prt (PInfo m c) = prt m ++ "\n" ++ prt c
|
||||
prt (PInfo m f c) = prt m ++ "\n" ++ prt c
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- main parsing function
|
||||
@@ -114,6 +120,19 @@ selectParser "m" strategy pinfo startCat inTokens
|
||||
cat@(MCat _ [lbl]) <- startCats ]
|
||||
return $ chart2forests chart (const False) finalEdges
|
||||
|
||||
-- parsing via FCFG
|
||||
selectParser "f" strategy pinfo startCat inTokens
|
||||
= do let startCats = filter isStart $ PF.grammarCats fcfpi
|
||||
isStart cat = fcat2scat cat == cfCat2Ident startCat
|
||||
fcfpi = fcfPInfo pinfo
|
||||
fcfParser <- PF.parseFCF strategy
|
||||
let fcfChart = fcfParser fcfpi startCats inTokens
|
||||
chart = G.abstract2chart fcfChart
|
||||
(begin,end) = inputBounds inTokens
|
||||
finalEdges = [ PF.makeFinalEdge cat begin end |
|
||||
cat@(FCat _ _ [lbl] _) <- startCats ]
|
||||
return $ chart2forests chart (const False) finalEdges
|
||||
|
||||
-- error parser:
|
||||
selectParser prs strategy _ _ _ = Bad $ "Parser '" ++ prs ++ "' not defined with strategy: " ++ strategy
|
||||
|
||||
|
||||
Reference in New Issue
Block a user