forked from GitHub/gf-core
"Committed_by_peb"
This commit is contained in:
@@ -4,15 +4,17 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/18 14:55:33 $
|
||||
-- > CVS $Date: 2005/04/20 12:49:44 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.2 $
|
||||
-- > CVS $Revision: 1.3 $
|
||||
--
|
||||
-- Chart parsing of grammars in CF format
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.NewParsing.CF (parse) where
|
||||
|
||||
import Operations (errVal)
|
||||
|
||||
import GF.System.Tracing
|
||||
import GF.Infra.Print
|
||||
|
||||
@@ -29,7 +31,7 @@ type Name = CFI.CFFun
|
||||
type Category = CFI.CFCat
|
||||
|
||||
parse :: String -> CF.CF -> Category -> CF.CFParser
|
||||
parse = buildParser . P.parseCF
|
||||
parse = buildParser . errVal (errVal undefined (P.parseCF "")) . P.parseCF
|
||||
|
||||
buildParser :: P.CFParser Category Name Token -> CF.CF -> Category -> CF.CFParser
|
||||
buildParser parser cf start tokens = (parseResults, parseInformation)
|
||||
@@ -38,7 +40,7 @@ buildParser parser cf start tokens = (parseResults, parseInformation)
|
||||
theInput = input tokens
|
||||
edges = tracePrt "Parsing.CF - nr. edges" (prt.length) $
|
||||
parser pInf [start] theInput
|
||||
chart = tracePrt "Parsing.CF - size of chart" (prt . map (length.snd) . aAssocs) $
|
||||
chart = tracePrt "Parsing.CF - sz. chart" (prt . map (length.snd) . aAssocs) $
|
||||
grammar2chart $ map addCategory edges
|
||||
forests = tracePrt "Parsing.CF - nr. forests" (prt.length) $
|
||||
chart2forests chart (const False)
|
||||
|
||||
@@ -4,9 +4,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/14 18:38:36 $
|
||||
-- > CVS $Date: 2005/04/20 12:49:44 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.2 $
|
||||
-- > CVS $Revision: 1.3 $
|
||||
--
|
||||
-- CFG parsing
|
||||
-----------------------------------------------------------------------------
|
||||
@@ -14,6 +14,8 @@
|
||||
module GF.NewParsing.CFG
|
||||
(parseCF, module GF.NewParsing.CFG.PInfo) where
|
||||
|
||||
import Operations (Err(..))
|
||||
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Formalism.CFG
|
||||
import GF.NewParsing.CFG.PInfo
|
||||
@@ -24,17 +26,19 @@ import qualified GF.NewParsing.CFG.General as Gen
|
||||
----------------------------------------------------------------------
|
||||
-- parsing
|
||||
|
||||
parseCF :: (Ord n, Ord c, Ord t) => String -> CFParser c n t
|
||||
parseCF "gb" = Gen.parse bottomup
|
||||
parseCF "gt" = Gen.parse topdown
|
||||
parseCF "ib" = Inc.parse (bottomup, noFilter)
|
||||
parseCF "it" = Inc.parse (topdown, noFilter)
|
||||
parseCF "ibFT" = Inc.parse (bottomup, topdown)
|
||||
parseCF "ibFB" = Inc.parse (bottomup, bottomup)
|
||||
parseCF "ibFTB" = Inc.parse (bottomup, bothFilters)
|
||||
parseCF "itF" = Inc.parse (topdown, bottomup)
|
||||
parseCF :: (Ord n, Ord c, Ord t) => String -> Err (CFParser c n t)
|
||||
parseCF "gb" = Ok $ Gen.parse bottomup
|
||||
parseCF "gt" = Ok $ Gen.parse topdown
|
||||
parseCF "ib" = Ok $ Inc.parse (bottomup, noFilter)
|
||||
parseCF "it" = Ok $ Inc.parse (topdown, noFilter)
|
||||
parseCF "ibFT" = Ok $ Inc.parse (bottomup, topdown)
|
||||
parseCF "ibFB" = Ok $ Inc.parse (bottomup, bottomup)
|
||||
parseCF "ibFTB" = Ok $ Inc.parse (bottomup, bothFilters)
|
||||
parseCF "itF" = Ok $ Inc.parse (topdown, bottomup)
|
||||
-- default parser:
|
||||
parseCF _ = parseCF "gb"
|
||||
parseCF "" = parseCF "gb"
|
||||
-- error parser:
|
||||
parseCF prs = Bad $ "Parser not defined: " ++ prs
|
||||
|
||||
bottomup = (True, False)
|
||||
topdown = (False, True)
|
||||
|
||||
@@ -4,9 +4,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/19 10:46:07 $
|
||||
-- > CVS $Date: 2005/04/20 12:49:44 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.4 $
|
||||
-- > CVS $Revision: 1.5 $
|
||||
--
|
||||
-- The main parsing module, parsing GFC grammars
|
||||
-- by translating to simpler formats, such as PMCFG and CFG
|
||||
@@ -19,28 +19,25 @@ import GF.System.Tracing
|
||||
import GF.Infra.Print
|
||||
import qualified PrGrammar
|
||||
|
||||
import Monad
|
||||
import Operations (Err(..))
|
||||
|
||||
import qualified Grammar
|
||||
-- import Values
|
||||
import qualified Macros
|
||||
-- import qualified Modules
|
||||
import qualified AbsGFC
|
||||
import qualified Ident
|
||||
import Operations
|
||||
import CFIdent (CFCat, cfCat2Ident, CFTok, prCFTok)
|
||||
import CFIdent (CFCat, cfCat2Ident, CFTok, wordsCFTok)
|
||||
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Assoc
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Conversion.Types
|
||||
import GF.Formalism.GCFG
|
||||
import GF.Formalism.SimpleGFC
|
||||
|
||||
import qualified GF.Formalism.GCFG as G
|
||||
import qualified GF.Formalism.SimpleGFC as S
|
||||
import qualified GF.Formalism.MCFG as M
|
||||
import qualified GF.Formalism.CFG as C
|
||||
import qualified GF.NewParsing.MCFG as PM
|
||||
import qualified GF.NewParsing.CFG as PC
|
||||
--import qualified GF.Conversion.FromGFC as From
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- parsing information
|
||||
@@ -64,82 +61,60 @@ parse :: String -- ^ parsing strategy
|
||||
-> Ident.Ident -- ^ abstract module name
|
||||
-> CFCat -- ^ starting category
|
||||
-> [CFTok] -- ^ input tokens
|
||||
-> [Grammar.Term] -- ^ resulting GF terms
|
||||
-> Err [Grammar.Term] -- ^ resulting GF terms
|
||||
|
||||
parse (prs:strategy) pinfo abs startCat inString =
|
||||
do let inTokens = tracePrt "Parsing.GFC - input tokens" prt $
|
||||
inputMany (map wordsCFTok inString)
|
||||
forests <- selectParser prs strategy pinfo startCat inTokens
|
||||
traceM "Parsing.GFC - nr. forests" (prt (length forests))
|
||||
let filteredForests = tracePrt "Parsing.GFC - nr. filtered forests" (prt . length) $
|
||||
forests >>= applyProfileToForest
|
||||
-- compactFs = tracePrt "#compactForests" (prt . length) $
|
||||
-- tracePrt "compactForests" (prtBefore "\n") $
|
||||
-- compactForests forests
|
||||
trees = tracePrt "Parsing.GFC - nr. trees" (prt . length) $
|
||||
nubsort $ filteredForests >>= forest2trees
|
||||
-- compactFs >>= forest2trees
|
||||
return $ map (tree2term abs) trees
|
||||
|
||||
-- default parser = CFG (for now)
|
||||
parse "" pinfo abs startCat inString = parse "c" pinfo abs startCat inString
|
||||
|
||||
|
||||
-- parsing via CFG
|
||||
parse (c:strategy) pinfo abs startCat
|
||||
| c=='c' || c=='C' = map (tree2term abs) .
|
||||
parseCFG strategy cfpi startCats .
|
||||
map prCFTok
|
||||
where startCats = tracePrt "Parsing.GFC - starting categories" prt $
|
||||
filter isStartCat $ map fst $ aAssocs $ PC.topdownRules cfpi
|
||||
isStartCat (CCat (ECat cat _) _) = cat == cfCat2Ident startCat
|
||||
cfpi = cfPInfo pinfo
|
||||
selectParser prs strategy pinfo startCat inTokens | prs=='c'
|
||||
= do let startCats = tracePrt "Parsing.GFC - starting CF categories" prt $
|
||||
filter isStart $ map fst $ aAssocs $ PC.topdownRules cfpi
|
||||
isStart cat = ccat2scat cat == cfCat2Ident startCat
|
||||
cfpi = cfPInfo pinfo
|
||||
cfParser <- PC.parseCF strategy
|
||||
let cfChart = tracePrt "Parsing.GFC - sz. CF chart" (prt . length) $
|
||||
cfParser cfpi startCats inTokens
|
||||
chart = tracePrt "Parsing.GFC - sz. chart" (prt . map (length.snd) . aAssocs) $
|
||||
C.grammar2chart cfChart
|
||||
finalEdges = tracePrt "Parsing.GFC - final chart edges" prt $
|
||||
map (uncurry Edge (inputBounds inTokens)) startCats
|
||||
return $ chart2forests chart (const False) finalEdges
|
||||
|
||||
-- parsing via MCFG
|
||||
parse (c:strategy) pinfo abs startCat
|
||||
| c=='m' || c=='M' = map (tree2term abs) .
|
||||
parseMCFG strategy mcfpi startCats .
|
||||
map prCFTok
|
||||
where startCats = tracePrt "Parsing.GFC - starting categories" prt $
|
||||
filter isStartCat $ nubsort [ c | Rule (Abs c _ _) _ <- mcfpi ]
|
||||
isStartCat (MCat (ECat cat _) _) = cat == cfCat2Ident startCat
|
||||
mcfpi = mcfPInfo pinfo
|
||||
selectParser prs strategy pinfo startCat inTokens | prs=='m'
|
||||
= do let startCats = tracePrt "Parsing.GFC - starting MCF categories" prt $
|
||||
filter isStart $ nubsort [ c | G.Rule (G.Abs c _ _) _ <- mcfpi ]
|
||||
isStart cat = mcat2scat cat == cfCat2Ident startCat
|
||||
mcfpi = mcfPInfo pinfo
|
||||
mcfParser <- PM.parseMCF strategy
|
||||
let mcfChart = tracePrt "Parsing.GFC - sz. MCF chart" (prt . length) $
|
||||
mcfParser mcfpi startCats inTokens
|
||||
chart = tracePrt "Parsing.GFC - sz. chart" (prt . map (length.snd) . aAssocs) $
|
||||
G.abstract2chart mcfChart
|
||||
finalEdges = tracePrt "Parsing.GFC - final chart edges" prt $
|
||||
[ PM.makeFinalEdge cat lbl (inputBounds inTokens) |
|
||||
cat@(MCat _ [lbl]) <- startCats ]
|
||||
return $ chart2forests chart (const False) finalEdges
|
||||
|
||||
-- default parser
|
||||
parse strategy pinfo abs start = parse ('c':strategy) pinfo abs start
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
parseCFG :: String -> CFPInfo -> [CCat] -> [Token] -> [SyntaxTree Fun]
|
||||
parseCFG strategy pinfo startCats inString = trace2 "Parsing.GFC - selected algorithm" "CFG" $
|
||||
trees
|
||||
where trees = tracePrt "Parsing.GFC - nr. trees" (prt . length) $
|
||||
nubsort $ forests >>= forest2trees
|
||||
-- compactFs >>= forest2trees
|
||||
|
||||
-- compactFs = tracePrt "#compactForests" (prt . length) $
|
||||
-- tracePrt "compactForests" (prtBefore "\n") $
|
||||
-- compactForests forests
|
||||
|
||||
forests = tracePrt "Parsing.GFC - nr. forests" (prt . length) $
|
||||
cfForests >>= convertFromCFForest
|
||||
cfForests= tracePrt "Parsing.GFC - nr. context-free forests" (prt . length) $
|
||||
chart2forests chart (const False) finalEdges
|
||||
|
||||
finalEdges = tracePrt "Parsing.GFC - final chart edges" prt $
|
||||
map (uncurry Edge (inputBounds inTokens)) startCats
|
||||
chart = --tracePrt "finalChartEdges" (prt . (? finalEdge)) $
|
||||
tracePrt "Parsing.GFC - size of chart" (prt . map (length.snd) . aAssocs) $
|
||||
C.grammar2chart cfChart
|
||||
cfChart = --tracePrt "finalEdges"
|
||||
--(prt . filter (\(Edge i j _) -> (i,j)==inputBounds inTokens)) $
|
||||
tracePrt "Parsing.GFC - size of context-free chart" (prt . length) $
|
||||
PC.parseCF strategy pinfo startCats inTokens
|
||||
|
||||
inTokens = input inString
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
parseMCFG :: String -> MCFPInfo -> [MCat] -> [Token] -> [SyntaxTree Fun]
|
||||
parseMCFG strategy pinfo startCats inString = trace2 "Parsing.GFC - selected algorithm" "MCFG" $
|
||||
trees
|
||||
where trees = tracePrt "Parsing.GFC - nr. trees" (prt . length) $
|
||||
forests >>= forest2trees
|
||||
|
||||
forests = tracePrt "Parsing.GFC - nr. forests" (prt . length) $
|
||||
cfForests >>= convertFromCFForest
|
||||
cfForests= tracePrt "Parsing.GFC - nr. context-free forests" (prt . length) $
|
||||
chart2forests chart (const False) finalEdges
|
||||
|
||||
chart = tracePrt "Parsing.GFC - size of chart" (prt . map (length.snd) . aAssocs) $
|
||||
PM.parseMCF strategy pinfo inString -- inTokens
|
||||
|
||||
finalEdges = tracePrt "Parsing.GFC - final chart edges" prt $
|
||||
[ PM.makeFinalEdge cat lbl (inputBounds inTokens) |
|
||||
cat@(MCat _ [lbl]) <- startCats ]
|
||||
|
||||
inTokens = input inString
|
||||
-- error parser:
|
||||
selectParser prs strategy _ _ _ = Bad $ "Parser not defined: " ++ (prs:strategy)
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
@@ -153,36 +128,23 @@ tree2term abs (TMeta) = Macros.mkMeta 0
|
||||
----------------------------------------------------------------------
|
||||
-- conversion and unification of forests
|
||||
|
||||
convertFromCFForest :: SyntaxForest Name -> [SyntaxForest Fun]
|
||||
|
||||
-- simplest implementation
|
||||
convertFromCFForest (FNode name@(Name fun profile) children)
|
||||
applyProfileToForest :: SyntaxForest Name -> [SyntaxForest Fun]
|
||||
applyProfileToForest (FNode name@(Name fun profile) children)
|
||||
| isCoercion name = concat chForests
|
||||
| otherwise = [ FNode fun chForests | not (null chForests) ]
|
||||
where chForests = concat [ applyProfileM unifyManyForests profile forests |
|
||||
forests0 <- children,
|
||||
forests <- mapM convertFromCFForest forests0 ]
|
||||
forests <- mapM applyProfileToForest forests0 ]
|
||||
|
||||
{-
|
||||
-- more intelligent(?) implementation
|
||||
convertFromCFForest (FNode (Name name profile) children)
|
||||
applyProfileToForest (FNode (Name name profile) children)
|
||||
| isCoercion name = concat chForests
|
||||
| otherwise = [ FNode name chForests | not (null chForests) ]
|
||||
where chForests = concat [ mapM (checkProfile forests) profile |
|
||||
forests0 <- children,
|
||||
forests <- mapM convertFromCFForest forests0 ]
|
||||
forests <- mapM applyProfileToForest forests0 ]
|
||||
-}
|
||||
|
||||
{-
|
||||
----------------------------------------------------------------------
|
||||
-- conversion and unification for parse trees instead of forests
|
||||
-- OBSOLETE!
|
||||
|
||||
convertFromCFTree :: SyntaxTree Name -> [SyntaxTree Fun]
|
||||
convertFromCFTree (TNode name@(Name fun profile) children0)
|
||||
| isCoercion name = concat chTrees
|
||||
| otherwise = map (TNode fun) chTrees
|
||||
where chTrees = [ children |
|
||||
children1 <- mapM convertFromCFTree children0,
|
||||
children <- applyProfileM unifyManyTrees profile children1 ]
|
||||
-}
|
||||
|
||||
@@ -4,32 +4,39 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/19 10:46:07 $
|
||||
-- > CVS $Date: 2005/04/20 12:49:45 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
-- > CVS $Revision: 1.2 $
|
||||
--
|
||||
-- MCFG parsing
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.NewParsing.MCFG where
|
||||
module GF.NewParsing.MCFG
|
||||
(parseMCF, module GF.NewParsing.MCFG.PInfo) where
|
||||
|
||||
import Operations (Err(..))
|
||||
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Formalism.GCFG
|
||||
import GF.Formalism.MCFG
|
||||
import GF.NewParsing.MCFG.PInfo
|
||||
|
||||
import qualified GF.NewParsing.MCFG.Naive as Naive
|
||||
import qualified GF.NewParsing.MCFG.Active as Active
|
||||
import qualified GF.NewParsing.MCFG.Range as Range (makeRange)
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- parsing
|
||||
|
||||
--parseMCF :: (Ord n, Ord c, Ord t) => String -> CFParser c n t
|
||||
parseMCF "n" = Naive.parse
|
||||
-- default parser:
|
||||
parseMCF _ = parseMCF "n"
|
||||
|
||||
|
||||
makeFinalEdge cat lbl bnds = (cat, [(lbl, Range.makeRange bnds)])
|
||||
parseMCF :: (Ord c, Ord n, Ord l, Ord t) => String -> Err (MCFParser c n l t)
|
||||
parseMCF "n" = Ok $ Naive.parse
|
||||
parseMCF "an" = Ok $ Active.parse "n"
|
||||
parseMCF "ab" = Ok $ Active.parse "b"
|
||||
parseMCF "at" = Ok $ Active.parse "t"
|
||||
-- default parsers:
|
||||
parseMCF "a" = parseMCF "an"
|
||||
-- error parser:
|
||||
parseMCF prs = Bad $ "Parser not defined: " ++ prs
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,174 +1,186 @@
|
||||
{-- Module --------------------------------------------------------------------
|
||||
Filename: ActiveParse.hs
|
||||
Author: Håkan Burden
|
||||
Time-stamp: <2005-04-18, 14:25>
|
||||
|
||||
Description: An agenda-driven implementation of algorithm 4.6, Active parsing
|
||||
of PMCFG, as described in Ljunglöf (2004)
|
||||
------------------------------------------------------------------------------}
|
||||
module GF.NewParsing.MCFG.Active (parse) where
|
||||
|
||||
module ActiveParse where
|
||||
import GF.NewParsing.GeneralChart
|
||||
import GF.Formalism.GCFG
|
||||
import GF.Formalism.MCFG
|
||||
import GF.Formalism.Utilities
|
||||
import GF.NewParsing.MCFG.Range
|
||||
import GF.NewParsing.MCFG.PInfo
|
||||
import GF.System.Tracing
|
||||
import Monad (guard)
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * parsing
|
||||
|
||||
-- GF modules
|
||||
import Examples
|
||||
import GeneralChart
|
||||
import MCFGrammar
|
||||
import MCFParser
|
||||
import Nondet
|
||||
import Parser
|
||||
import Range
|
||||
parse :: (Ord n, Ord c, Ord l, Ord t) => String -> MCFParser c n l t
|
||||
parse strategy mcfg starts toks
|
||||
= [ Abs (cat, found) (zip rhs rrecs) fun |
|
||||
Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ]
|
||||
where chart = process strategy mcfg starts toks
|
||||
|
||||
process :: (Ord n, Ord c, Ord l, Ord t) =>
|
||||
String -> MCFGrammar c n l t -> [c] -> Input t -> AChart c n l
|
||||
process strategy mcfg starts toks
|
||||
= trace2 "MCFG.Active - strategy" (if isBU strategy then "BU"
|
||||
else if isTD strategy then "TD" else "None") $
|
||||
tracePrt "MCFG.Active - chart size" prtSizes $
|
||||
buildChart keyof (complete : combine : convert : rules) axioms
|
||||
where rules | isNil strategy = [scan]
|
||||
| isBU strategy = [predictKilbury mcfg toks]
|
||||
| isTD strategy = [predictEarley mcfg toks]
|
||||
axioms | isNil strategy = predict mcfg toks
|
||||
| isBU strategy = terminal mcfg toks
|
||||
| isTD strategy = initial mcfg starts toks
|
||||
|
||||
{-- Datatypes -----------------------------------------------------------------
|
||||
AChart: A RedBlackMap with Items and Keys
|
||||
Item :
|
||||
AKey :
|
||||
------------------------------------------------------------------------------}
|
||||
data Item n c l = Active (AbstractRule n c)
|
||||
(RangeRec l)
|
||||
Range
|
||||
(Lin c l Range)
|
||||
(LinRec c l Range)
|
||||
[RangeRec l]
|
||||
| Passive (AbstractRule n c) (RangeRec l) [RangeRec l]
|
||||
isNil s = s=="n"
|
||||
isBU s = s=="b"
|
||||
isTD s = s=="t"
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * type definitions
|
||||
|
||||
type AChart c n l = ParseChart (Item c n l) (AKey c)
|
||||
|
||||
data Item c n l = Active (Abstract c n)
|
||||
(RangeRec l)
|
||||
Range
|
||||
(Lin c l Range)
|
||||
(LinRec c l Range)
|
||||
[RangeRec l]
|
||||
| Final (Abstract c n) (RangeRec l) [RangeRec l]
|
||||
| Passive c (RangeRec l)
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
type AChart n c l = ParseChart (Item n c l) (AKey c)
|
||||
|
||||
data AKey c = Act c
|
||||
| Pass c
|
||||
| Useless
|
||||
| Fin
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
|
||||
keyof :: Item n c l -> AKey c
|
||||
keyof :: Item c n l -> AKey c
|
||||
keyof (Active _ _ _ (Lin _ (Cat (next, _, _):_)) _ _) = Act next
|
||||
keyof (Passive (_, cat, _) _ _) = Pass cat
|
||||
keyof _ = Useless
|
||||
keyof (Final _ _ _) = Fin
|
||||
keyof (Passive cat _) = Pass cat
|
||||
keyof _ = Useless
|
||||
|
||||
-- to be used in prediction
|
||||
emptyChildren :: Abstract c n -> [RangeRec l]
|
||||
emptyChildren (Abs _ rhs _) = replicate (length rhs) []
|
||||
|
||||
-- for tracing purposes
|
||||
prtSizes chart = "final=" ++ show (length (chartLookup chart Fin)) ++
|
||||
", passive=" ++ show (sum [length (chartLookup chart k) |
|
||||
k@(Pass _) <- chartKeys chart ]) ++
|
||||
", active=" ++ show (sum [length (chartLookup chart k) |
|
||||
k@(Act _) <- chartKeys chart ]) ++
|
||||
", useless=" ++ show (length (chartLookup chart Useless))
|
||||
|
||||
|
||||
{-- Parsing -------------------------------------------------------------------
|
||||
recognize:
|
||||
parse : Builds a chart from the initial agenda, given by prediction, and
|
||||
the inference rules
|
||||
keyof : Given an Item returns an appropriate Key for the Chart
|
||||
------------------------------------------------------------------------------}
|
||||
----------------------------------------------------------------------
|
||||
-- * inference rules
|
||||
|
||||
recognize strategy mcfg toks = chartMember
|
||||
(parse strategy mcfg toks) item (keyof item)
|
||||
where n = length toks
|
||||
n2 = n `div` 2
|
||||
item = (Passive ("f", S, [A])
|
||||
[("s",Range (0,n))]
|
||||
[[("p",Range (0,n2)),("q",Range (n2,n))]])
|
||||
|
||||
|
||||
parse :: (Ord n, Ord c, Ord l, Eq t) => Strategy -> Grammar n c l t -> [t]
|
||||
-> ParseChart (Item n c l) (AKey c)
|
||||
parse (False,False) mcfg toks = buildChart keyof
|
||||
[complete, scan, combine, convert]
|
||||
(predict mcfg toks)
|
||||
parse (True, False) mcfg toks = buildChart keyof
|
||||
[predictKilbury mcfg toks, complete, combine, convert]
|
||||
(terminal mcfg toks)
|
||||
parse (False, True) mcfg toks = buildChart keyof
|
||||
[predictEarley mcfg toks, complete, scan, combine, convert]
|
||||
(initial (take 1 mcfg) toks)
|
||||
|
||||
predictKilbury mcfg toks _ (Passive (_, cat, _) found _) =
|
||||
[ Active (f, a, rhs) [] rng lin' lins' daughters |
|
||||
Rule a rhs ((Lin l ((Cat (cat', r, i)):syms)):lins) f <- mcfg,
|
||||
cat == cat',
|
||||
lin' : lins' <- solutions $ rangeRestRec toks (Lin l syms : lins),
|
||||
-- lins' <- solutions $ rangeRestRec toks lins,
|
||||
rng <- solutions $ projection r found,
|
||||
let daughters = (replaceRec (replicate (length rhs) []) i found) ]
|
||||
predictKilbury _ _ _ _ = []
|
||||
|
||||
predictEarley mcfg toks _ item@(Active _ _ _ (Lin _ ((Cat (cat, _, _)):_)) _ _) =
|
||||
concat [ predEar toks item rule |
|
||||
rule@(Rule cat' _ _ _) <- mcfg, cat == cat' ]
|
||||
predictEarley _ _ _ _ = []
|
||||
|
||||
predEar toks _ (Rule cat [] lins f) =
|
||||
[ Passive (f, cat, []) (makeRangeRec lins') [] |
|
||||
lins' <- solutions $ rangeRestRec toks lins ]
|
||||
predEar toks (Active _ _ (Range (_,j)) _ _ _) (Rule cat rhs lins f) =
|
||||
[ Active (f, cat, rhs) [] (Range (j, j)) lin' lins' (replicate (length rhs) []) |
|
||||
(lin':lins') <- solutions $ rangeRestRec toks lins ]
|
||||
predEar toks (Active _ _ EmptyRange _ _ _) (Rule cat rhs lins f) =
|
||||
[ Active (f, cat, rhs) [] EmptyRange lin' lins' (replicate (length rhs) []) |
|
||||
(lin':lins') <- solutions $ rangeRestRec toks lins ]
|
||||
|
||||
|
||||
{--Inference rules ------------------------------------------------------------
|
||||
predict : Creates an Active Item of every Rule in the Grammar to give the
|
||||
initial Agenda
|
||||
complete:
|
||||
scan :
|
||||
combine : Creates an Active Item every time it is possible to combine
|
||||
an Active Item from the agenda with a Passive Item from the Chart
|
||||
convert : Active Items with nothing to find are converted to Passive Items
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
predict :: Eq t => Grammar n c l t -> [t] -> [Item n c l]
|
||||
predict grammar toks = [ Active (f, cat, rhs) [] EmptyRange lin' lins'
|
||||
(replicate (length rhs) []) |
|
||||
Rule cat rhs lins f <- grammar,
|
||||
(lin':lins') <- solutions $ rangeRestRec toks lins ]
|
||||
|
||||
|
||||
complete :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey c) -> Item n c l
|
||||
-> [Item n c l]
|
||||
complete _ (Active rule found (Range (i, j)) (Lin l []) (lin:lins) recs) =
|
||||
[ Active rule (found ++ [(l, Range (i,j))]) EmptyRange lin lins recs ]
|
||||
-- completion
|
||||
complete :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l]
|
||||
complete _ (Active rule found rng (Lin l []) (lin:lins) recs) =
|
||||
return $ Active rule (found ++ [(l, rng)]) EmptyRange lin lins recs
|
||||
complete _ _ = []
|
||||
|
||||
|
||||
scan :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey c) -> Item n c l
|
||||
-> [Item n c l]
|
||||
scan _ (Active rule found rng (Lin l ((Tok rng'):syms)) lins recs) =
|
||||
[ Active rule found rng'' (Lin l syms) lins recs |
|
||||
rng'' <- solutions $ concRanges rng rng' ]
|
||||
-- scanning
|
||||
scan :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l]
|
||||
scan _ (Active rule found rng (Lin l (Tok rng':syms)) lins recs) =
|
||||
do rng'' <- concatRange rng rng'
|
||||
return $ Active rule found rng'' (Lin l syms) lins recs
|
||||
scan _ _ = []
|
||||
|
||||
|
||||
combine :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey c) -> Item n c l
|
||||
-> [Item n c l]
|
||||
combine chart (Active rule found rng (Lin l ((Cat (c, r, d)):syms)) lins recs) =
|
||||
[ Active rule found rng'' (Lin l syms) lins (replaceRec recs d found') |
|
||||
Passive _ found' _ <- chartLookup chart (Pass c),
|
||||
rng' <- solutions $ projection r found',
|
||||
rng'' <- solutions $ concRanges rng rng',
|
||||
subsumes (recs !! d) found' ]
|
||||
combine chart (Passive (_, c, _) found _) =
|
||||
[ Active rule found' rng (Lin l syms) lins (replaceRec recs' d found) |
|
||||
Active rule found' rng' (Lin l ((Cat (c, r, d)):syms)) lins recs'
|
||||
<- chartLookup chart (Act c),
|
||||
rng'' <- solutions $ projection r found,
|
||||
rng <- solutions $ concRanges rng' rng'',
|
||||
subsumes (recs' !! d) found ]
|
||||
-- | Creates an Active Item every time it is possible to combine
|
||||
-- an Active Item from the agenda with a Passive Item from the Chart
|
||||
combine :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l]
|
||||
combine chart (Active rule found rng (Lin l (Cat (c, r, d):syms)) lins recs) =
|
||||
do Passive _c found' <- chartLookup chart (Pass c)
|
||||
rng' <- projection r found'
|
||||
rng'' <- concatRange rng rng'
|
||||
guard $ subsumes (recs !! d) found'
|
||||
return $ Active rule found rng'' (Lin l syms) lins (replaceRec recs d found')
|
||||
combine chart (Passive c found) =
|
||||
do Active rule found' rng' (Lin l ((Cat (_c, r, d)):syms)) lins recs'
|
||||
<- chartLookup chart (Act c)
|
||||
rng'' <- projection r found
|
||||
rng <- concatRange rng' rng''
|
||||
guard $ subsumes (recs' !! d) found
|
||||
return $ Active rule found' rng (Lin l syms) lins (replaceRec recs' d found)
|
||||
combine _ _ = []
|
||||
|
||||
convert :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey c) -> Item n c l
|
||||
-> [Item n c l]
|
||||
convert _ (Active rule found rng (Lin l []) [] recs) =
|
||||
[ Passive rule (found ++ [(l, rng)]) recs ]
|
||||
-- | Active Items with nothing to find are converted to Final items,
|
||||
-- which in turn are converted to Passive Items
|
||||
convert :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l]
|
||||
convert _ (Active rule found rng (Lin lbl []) [] recs) =
|
||||
return $ Final rule (found ++ [(lbl,rng)]) recs
|
||||
convert _ (Final (Abs cat _ _) found _) =
|
||||
return $ Passive cat found
|
||||
convert _ _ = []
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Naive --
|
||||
|
||||
-- | Creates an Active Item of every Rule in the Grammar to give the initial Agenda
|
||||
predict :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> Input t -> [Item c n l]
|
||||
predict grammar toks =
|
||||
do Rule abs (Cnc _ _ lins) <- grammar
|
||||
(lin':lins') <- rangeRestRec toks lins
|
||||
return $ Active abs [] EmptyRange lin' lins' (emptyChildren abs)
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Earley --
|
||||
-- anropas med alla startregler
|
||||
initial :: Eq t => [Rule n c l t] -> [t] -> [Item n c l]
|
||||
initial starts toks =
|
||||
[ Active (f, s, rhs) [] (Range (0, 0)) lin' lins' (replicate (length rhs) []) |
|
||||
Rule s rhs lins f <- starts,
|
||||
(lin':lins') <- solutions $ rangeRestRec toks lins ]
|
||||
|
||||
-- anropas med alla startkategorier
|
||||
initial :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> [c] -> Input t -> [Item c n l]
|
||||
initial mcfg starts toks =
|
||||
do Rule abs@(Abs cat _ _) (Cnc _ _ lins) <- mcfg
|
||||
guard $ cat `elem` starts
|
||||
lin' : lins' <- rangeRestRec toks lins
|
||||
return $ Active abs [] (Range (0, 0)) lin' lins' (emptyChildren abs)
|
||||
|
||||
-- earley prediction
|
||||
predictEarley :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> Input t
|
||||
-> AChart c n l -> Item c n l -> [Item c n l]
|
||||
predictEarley mcfg toks _ (Active _ _ rng (Lin _ (Cat (cat,_,_):_)) _ _) =
|
||||
do rule@(Rule (Abs cat' _ _) _) <- mcfg
|
||||
guard $ cat == cat'
|
||||
predEar toks rng rule
|
||||
predictEarley _ _ _ _ = []
|
||||
|
||||
predEar :: (Ord c, Ord n, Ord l, Ord t) =>
|
||||
Input t -> Range -> MCFRule c n l t -> [Item c n l]
|
||||
predEar toks _ (Rule abs@(Abs _ [] _) (Cnc _ _ lins)) =
|
||||
do lins' <- rangeRestRec toks lins
|
||||
return $ Final abs (makeRangeRec lins') []
|
||||
predEar toks rng (Rule abs (Cnc _ _ lins)) =
|
||||
do lin' : lins' <- rangeRestRec toks lins
|
||||
return $ Active abs [] (makeMaxRange rng) lin' lins' (emptyChildren abs)
|
||||
|
||||
makeMaxRange (Range (_, j)) = Range (j, j)
|
||||
makeMaxRange EmptyRange = EmptyRange
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Kilbury --
|
||||
|
||||
terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> Input t -> [Item c n l]
|
||||
terminal mcfg toks =
|
||||
[ Passive (f, cat, []) (makeRangeRec lins') [] |
|
||||
Rule cat [] lins f <- mcfg,
|
||||
lins' <- solutions $ rangeRestRec toks lins ]
|
||||
do Rule abs@(Abs _ [] _) (Cnc _ _ lins) <- mcfg
|
||||
lins' <- rangeRestRec toks lins
|
||||
return $ Final abs (makeRangeRec lins') []
|
||||
|
||||
-- kilbury prediction
|
||||
predictKilbury :: (Ord c, Ord n, Ord l, Ord t) =>
|
||||
MCFGrammar c n l t -> Input t
|
||||
-> AChart c n l -> Item c n l -> [Item c n l]
|
||||
predictKilbury mcfg toks _ (Passive cat found) =
|
||||
do Rule abs@(Abs _ rhs _) (Cnc _ _ (Lin l (Cat (cat', r, i):syms) : lins)) <- mcfg
|
||||
guard $ cat == cat'
|
||||
lin' : lins' <- rangeRestRec toks (Lin l syms : lins)
|
||||
rng <- projection r found
|
||||
let children = replaceRec (emptyChildren abs) i found
|
||||
return $ Active abs [] rng lin' lins' children
|
||||
predictKilbury _ _ _ _ = []
|
||||
|
||||
123
src/GF/Parsing/MCFG/Incremental.hs
Normal file
123
src/GF/Parsing/MCFG/Incremental.hs
Normal file
@@ -0,0 +1,123 @@
|
||||
{-- Module --------------------------------------------------------------------
|
||||
Filename: IncrementalParse.hs
|
||||
Author: Håkan Burden
|
||||
Time-stamp: <2005-04-18, 15:07>
|
||||
|
||||
Description: An agenda-driven implementation of the incremental algorithm 4.6
|
||||
that handles erasing and suppressing MCFG.
|
||||
As described in Ljunglöf (2004)
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
module IncrementalParse where
|
||||
|
||||
|
||||
-- Haskell
|
||||
import List
|
||||
|
||||
-- GF modules
|
||||
import Examples
|
||||
import GeneralChart
|
||||
import MCFGrammar
|
||||
import MCFParser
|
||||
import Parser
|
||||
import Range
|
||||
import Nondet
|
||||
|
||||
|
||||
{-- Datatypes -----------------------------------------------------------------
|
||||
IChart: A RedBlackMap with Items and Keys
|
||||
Item : One kind of Item since the Passive Items not necessarily need to be
|
||||
saturated iow, they can still have rows to recognize.
|
||||
IKey :
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
type IChart n c l = ParseChart (Item n c l) (IKey c l)
|
||||
|
||||
data Item n c l = Active (AbstractRule n c)
|
||||
(RangeRec l)
|
||||
Range
|
||||
(Lin c l Range)
|
||||
(LinRec c l Range)
|
||||
[RangeRec l]
|
||||
-- | Passive (AbstractRule n c)
|
||||
-- (RangeRec l)
|
||||
-- [RangeRec l]
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
data IKey c l = Act c l Int
|
||||
-- | ActE l
|
||||
| Pass c l Int
|
||||
-- | Pred l
|
||||
| Useless
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
keyof :: Item n c l -> IKey c l
|
||||
keyof (Active _ _ (Range (_,j)) (Lin _ ((Cat (next,lbl,_)):_)) _ _)
|
||||
= Act next lbl j
|
||||
keyof (Active (_, cat, _) found (Range (i,_)) (Lin lbl []) _ _)
|
||||
= Pass cat lbl i
|
||||
keyof _
|
||||
= Useless
|
||||
|
||||
|
||||
{-- Parsing -------------------------------------------------------------------
|
||||
recognize:
|
||||
parse : Builds a chart from the initial agenda, given by prediction, and
|
||||
the inference rules
|
||||
keyof : Given an Item returns an appropriate Key for the Chart
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
recognize mcfg toks = chartMember (parse mcfg toks) item (keyof item)
|
||||
where n = length toks
|
||||
n2 = n `div` 2
|
||||
item = Active ("f",S,[A])
|
||||
[] (Range (0, n)) (Lin "s" []) []
|
||||
[[("p", Range (0, n2)), ("q", Range (n2, n))]]
|
||||
|
||||
|
||||
parse :: (Ord n, Ord c, Ord l, Eq t) => Grammar n c l t -> [t] -> IChart n c l
|
||||
parse mcfg toks = buildChart keyof [complete ntoks, scan, combine] (predict mcfg toks ntoks)
|
||||
where ntoks = length toks
|
||||
|
||||
complete :: (Ord n, Ord c, Ord l) => Int -> IChart n c l
|
||||
-> Item n c l -> [Item n c l]
|
||||
complete ntoks _ (Active rule found rng@(Range (_,j)) (Lin l []) lins recs) =
|
||||
[ Active rule (found ++ [(l, rng)]) (Range (k,k)) lin lins' recs |
|
||||
(lin, lins') <- select lins,
|
||||
k <- [j .. ntoks] ]
|
||||
complete _ _ _ = []
|
||||
|
||||
|
||||
predict :: (Eq n, Eq c, Eq l, Eq t) => Grammar n c l t -> [t] -> Int -> [Item n c l]
|
||||
predict mcfg toks n = [ Active (f, c, rhs) [] (Range (k,k)) lin' lins'' daughters |
|
||||
Rule c rhs lins f <- mcfg,
|
||||
let daughters = replicate (length rhs) [],
|
||||
lins' <- solutions $ rangeRestRec toks lins,
|
||||
(lin', lins'') <- select lins',
|
||||
k <- [0..n] ]
|
||||
|
||||
|
||||
scan :: (Ord n, Ord c, Ord l) => IChart n c l -> Item n c l -> [Item n c l]
|
||||
scan _ (Active rule found rng (Lin l (Tok rng':syms)) lins recs) =
|
||||
[ Active rule found rng'' (Lin l syms) lins recs |
|
||||
rng'' <- solutions $ concRanges rng rng' ]
|
||||
scan _ _ = []
|
||||
|
||||
|
||||
combine :: (Ord n, Ord c, Ord l) => IChart n c l -> Item n c l -> [Item n c l]
|
||||
combine chart (Active rule found rng@(Range (_,j)) (Lin l ((Cat (c,r,d)):syms)) lins recs) =
|
||||
[ Active rule found rng'' (Lin l syms) lins (replaceRec recs d (found' ++ [(l',rng')])) |
|
||||
Active _ found' rng' (Lin l' []) _ _ <- chartLookup chart (Pass c r j),
|
||||
subsumes (recs !! d) (found' ++ [(l',rng')]),
|
||||
rng'' <- solutions $ concRanges rng rng' ]
|
||||
combine chart (Active (_,c,_) found rng'@(Range (i,_)) (Lin l []) _ _) =
|
||||
[ Active rule found' rng'' (Lin l' syms) lins (replaceRec recs d (found ++ [(l,rng')])) |
|
||||
Active rule found' rng (Lin l' ((Cat (c,r,d)):syms)) lins recs
|
||||
<- chartLookup chart (Act c l i),
|
||||
subsumes (recs !! d) (found ++ [(l,rng')]),
|
||||
rng'' <- solutions $ concRanges rng rng' ]
|
||||
combine _ _ = []
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
module GF.NewParsing.MCFG.Naive where
|
||||
module GF.NewParsing.MCFG.Naive (parse) where
|
||||
|
||||
|
||||
-- GF modules
|
||||
@@ -8,21 +8,34 @@ import GF.Formalism.GCFG
|
||||
import GF.Formalism.MCFG
|
||||
import GF.Formalism.Utilities
|
||||
import GF.NewParsing.MCFG.Range
|
||||
import GF.NewParsing.MCFG.PInfo
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Assoc
|
||||
import GF.System.Tracing
|
||||
|
||||
{-- Datatypes and types -------------------------------------------------------
|
||||
NChart : A RedBlackMap with Items and Keys
|
||||
Item : The parse Items are either Active or Passive
|
||||
NKey : One for Active Items, one for Passive and one for Active Items
|
||||
to convert to Passive
|
||||
DottedRule: (function-name, LHS, [Found in RHS], [To find in RHS])
|
||||
------------------------------------------------------------------------------}
|
||||
----------------------------------------------------------------------
|
||||
-- * parsing
|
||||
|
||||
-- | Builds a chart from the initial agenda, given by prediction, and
|
||||
-- the inference rules
|
||||
parse :: (Ord t, Ord n, Ord c, Ord l) => MCFParser c n l t
|
||||
parse mcfg starts toks
|
||||
= [ Abs (cat, makeRangeRec lins) (zip rhs rrecs) fun |
|
||||
Active (Abs cat _Nil fun, rhs) lins rrecs <- chartLookup chart Final ]
|
||||
where chart = process mcfg toks
|
||||
|
||||
process :: (Ord t, Ord n, Ord c, Ord l) => MCFGrammar c n l t -> Input t -> NChart c n l
|
||||
process mcfg toks
|
||||
= tracePrt "MCFG.Naive - chart size" prtSizes $
|
||||
buildChart keyof [convert, combine] (predict toks mcfg)
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * type definitions
|
||||
|
||||
type NChart c n l = ParseChart (Item c n l) (NKey c)
|
||||
|
||||
data Item c n l = Active (DottedRule c n) (LinRec c l Range) [RangeRec l]
|
||||
| Passive (Abstract c n) (RangeRec l)
|
||||
| Passive c (RangeRec l)
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
type DottedRule c n = (Abstract c n, [c])
|
||||
@@ -32,63 +45,43 @@ data NKey c = Act c
|
||||
| Final
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
|
||||
{-- Parsing -------------------------------------------------------------------
|
||||
recognize:
|
||||
parse : Builds a chart from the initial agenda, given by prediction, and
|
||||
the inference rules
|
||||
keyof : Given an Item returns an appropriate Key for the Chart
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
|
||||
parse :: (Ord t, Ord n, Ord c, Ord l) => MCFGrammar c n l t -> [t]
|
||||
-> SyntaxChart n (c, RangeRec l)
|
||||
parse mcfg toks = chart3
|
||||
where chart3 = assocMap (const groupPairs) chart2
|
||||
chart2 = accumAssoc id $ nubsort chart1
|
||||
chart1 = [ ((cat, rrec), (fun, zip rhs rrecs)) |
|
||||
Active (Abs cat _Nil fun, rhs) lins rrecs <- chartLookup chart0 Final,
|
||||
let rrec = makeRangeRec lins ]
|
||||
chart0 = process mcfg toks
|
||||
|
||||
process :: (Ord t, Ord n, Ord c, Ord l) => MCFGrammar c n l t -> [t] -> NChart c n l
|
||||
process mcfg toks = buildChart keyof [convert, combine] (predict toks mcfg)
|
||||
|
||||
|
||||
keyof :: Item c n l -> NKey c
|
||||
keyof (Active (Abs _ (next:_) _, _) _ _) = Act next
|
||||
keyof (Passive (Abs cat _ _) _) = Pass cat
|
||||
keyof (Passive cat _) = Pass cat
|
||||
keyof _ = Final
|
||||
|
||||
-- for tracing purposes
|
||||
prtSizes chart = "final=" ++ show (length (chartLookup chart Final)) ++
|
||||
", passive=" ++ show (sum [length (chartLookup chart k) |
|
||||
k@(Pass _) <- chartKeys chart ]) ++
|
||||
", active=" ++ show (sum [length (chartLookup chart k) |
|
||||
k@(Act _) <- chartKeys chart ])
|
||||
|
||||
{--Inference rules ------------------------------------------------------------
|
||||
predict: Creates an Active Item of every Rule in the Grammar to give the
|
||||
initial Agenda
|
||||
combine: Creates an Active Item every time it is possible to combine
|
||||
an Active Item from the agenda with a Passive Item from the Chart
|
||||
convert: Active Items with nothing to find are converted to Passive Items
|
||||
------------------------------------------------------------------------------}
|
||||
----------------------------------------------------------------------
|
||||
-- * inference rules
|
||||
|
||||
predict :: (Eq t, Eq c) => [t] -> MCFGrammar c n l t -> [Item c n l]
|
||||
-- Creates an Active Item of every Rule in the Grammar to give the initial Agenda
|
||||
predict :: Ord t => Input t -> MCFGrammar c n l t -> [Item c n l]
|
||||
predict toks mcfg = [ Active (abs, []) lins' [] |
|
||||
Rule abs (Cnc _ _ lins) <- mcfg,
|
||||
lins' <- rangeRestRec toks lins ]
|
||||
|
||||
|
||||
-- | Creates an Active Item every time it is possible to combine
|
||||
-- an Active Item from the agenda with a Passive Item from the Chart
|
||||
combine :: (Ord n, Ord c, Ord l) => NChart c n l -> Item c n l -> [Item c n l]
|
||||
combine chart (Active (Abs nt (c:find) f, found) lins rrecs) =
|
||||
do Passive _ rrec <- chartLookup chart (Pass c)
|
||||
lins' <- concLinRec $ substArgRec (length found) rrec lins
|
||||
return $ Active (Abs nt find f, found ++ [c]) lins' (rrecs ++ [rrec])
|
||||
combine chart (Passive (Abs c _ _) rrec) =
|
||||
combine chart (Passive c rrec) =
|
||||
do Active (Abs nt (c:find) f, found) lins rrecs <- chartLookup chart (Act c)
|
||||
lins' <- concLinRec $ substArgRec (length found) rrec lins
|
||||
return $ Active (Abs nt find f, found ++ [c]) lins' (rrecs ++ [rrec])
|
||||
combine _ _ = []
|
||||
|
||||
|
||||
-- | Active Items with nothing to find are converted to Passive Items
|
||||
convert :: (Ord n, Ord c, Ord l) => NChart c n l -> Item c n l -> [Item c n l]
|
||||
convert _ (Active (Abs nt [] f, rhs) lins _) = [Passive (Abs nt rhs f) rrec]
|
||||
convert _ (Active (Abs cat [] _, _) lins _) = [Passive cat rrec]
|
||||
where rrec = makeRangeRec lins
|
||||
convert _ _ = []
|
||||
|
||||
|
||||
@@ -4,15 +4,14 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/19 10:46:08 $
|
||||
-- > CVS $Date: 2005/04/20 12:49:45 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
-- > CVS $Revision: 1.2 $
|
||||
--
|
||||
-- MCFG parsing, parser information
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.NewParsing.MCFG.PInfo
|
||||
(MCFParser, MCFPInfo(..), buildMCFPInfo) where
|
||||
module GF.NewParsing.MCFG.PInfo where
|
||||
|
||||
import GF.System.Tracing
|
||||
import GF.Infra.Print
|
||||
@@ -22,6 +21,7 @@ import GF.Formalism.GCFG
|
||||
import GF.Formalism.MCFG
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Assoc
|
||||
import GF.NewParsing.MCFG.Range
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- type declarations
|
||||
@@ -32,10 +32,13 @@ type MCFParser c n l t = MCFPInfo c n l t
|
||||
-> Input t
|
||||
-> MCFChart c n l
|
||||
|
||||
type MCFChart c n l = [(n, (c, RangeRec l), [(c, RangeRec l)])]
|
||||
type MCFChart c n l = [Abstract (c, RangeRec l) n]
|
||||
|
||||
type MCFPInfo c n l t = MCFGrammar c n l t
|
||||
|
||||
buildCFPInfo :: (Ord n, Ord c, Ord l, Ord t) => MCFGrammar c n l t -> MCFPInfo c n l t
|
||||
buildCFPInfo = id
|
||||
buildMCFPInfo :: (Ord n, Ord c, Ord l, Ord t) => MCFGrammar c n l t -> MCFPInfo c n l t
|
||||
buildMCFPInfo = id
|
||||
|
||||
makeFinalEdge :: c -> l -> (Int, Int) -> (c, RangeRec l)
|
||||
makeFinalEdge cat lbl bnds = (cat, [(lbl, makeRange bnds)])
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ import GF.Formalism.GCFG
|
||||
import GF.Formalism.MCFG
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Infra.Print
|
||||
|
||||
import GF.Data.Assoc ((?))
|
||||
|
||||
------------------------------------------------------------
|
||||
-- ranges as single pairs
|
||||
@@ -95,29 +95,29 @@ makeRangeRec lins = map convLin lins
|
||||
|
||||
--- Record projection --------------------------------------------------------
|
||||
|
||||
projection :: Eq l => l -> RangeRec l -> [Range]
|
||||
projection :: Ord l => l -> RangeRec l -> [Range]
|
||||
projection l rec = maybe (fail "projection") return $ lookup l rec
|
||||
|
||||
|
||||
--- Range restriction --------------------------------------------------------
|
||||
|
||||
rangeRestTok :: Eq t => [t] -> t -> [Range]
|
||||
rangeRestTok toks tok = do i <- elemIndices tok toks
|
||||
return (makeRange (i, i+1))
|
||||
rangeRestTok :: Ord t => Input t -> t -> [Range]
|
||||
rangeRestTok toks tok = do rng <- inputToken toks ? tok
|
||||
return (makeRange rng)
|
||||
|
||||
|
||||
rangeRestSym :: Eq t => [t] -> Symbol a t -> [Symbol a Range]
|
||||
rangeRestSym :: Ord t => Input t -> Symbol a t -> [Symbol a Range]
|
||||
rangeRestSym toks (Tok tok) = do rng <- rangeRestTok toks tok
|
||||
return (Tok rng)
|
||||
rangeRestSym _ (Cat c) = return (Cat c)
|
||||
|
||||
|
||||
rangeRestLin :: Eq t => [t] -> Lin c l t -> [Lin c l Range]
|
||||
rangeRestLin :: Ord t => Input t -> Lin c l t -> [Lin c l Range]
|
||||
rangeRestLin toks (Lin lbl syms) = do syms' <- mapM (rangeRestSym toks) syms
|
||||
return (Lin lbl syms')
|
||||
|
||||
|
||||
rangeRestRec :: Eq t => [t] -> LinRec c l t -> [LinRec c l Range]
|
||||
rangeRestRec :: Ord t => Input t -> LinRec c l t -> [LinRec c l Range]
|
||||
rangeRestRec toks = mapM (rangeRestLin toks)
|
||||
|
||||
|
||||
@@ -131,7 +131,7 @@ replaceRec recs i rec = (fst tup) ++ [rec] ++ (tail $ snd tup)
|
||||
|
||||
--- Argument substitution ----------------------------------------------------
|
||||
|
||||
substArgSymbol :: Eq l => Int -> RangeRec l -> Symbol (c, l, Int) Range
|
||||
substArgSymbol :: Ord l => Int -> RangeRec l -> Symbol (c, l, Int) Range
|
||||
-> Symbol (c, l, Int) Range
|
||||
substArgSymbol i rec (Tok rng) = (Tok rng)
|
||||
substArgSymbol i rec (Cat (c, l, j))
|
||||
@@ -139,13 +139,13 @@ substArgSymbol i rec (Cat (c, l, j))
|
||||
| otherwise = (Cat (c, l, j))
|
||||
|
||||
|
||||
substArgLin :: Eq l => Int -> RangeRec l -> Lin c l Range
|
||||
substArgLin :: Ord l => Int -> RangeRec l -> Lin c l Range
|
||||
-> Lin c l Range
|
||||
substArgLin i rec (Lin lbl syms) =
|
||||
(Lin lbl (map (substArgSymbol i rec) syms))
|
||||
|
||||
|
||||
substArgRec :: Eq l => Int -> RangeRec l -> LinRec c l Range
|
||||
substArgRec :: Ord l => Int -> RangeRec l -> LinRec c l Range
|
||||
-> LinRec c l Range
|
||||
substArgRec i rec lins = map (substArgLin i rec) lins
|
||||
|
||||
@@ -153,7 +153,7 @@ substArgRec i rec lins = map (substArgLin i rec) lins
|
||||
--- Subsumation -------------------------------------------------------------
|
||||
|
||||
-- "rec' subsumes rec?"
|
||||
subsumes :: Eq l => RangeRec l -> RangeRec l -> Bool
|
||||
subsumes :: Ord l => RangeRec l -> RangeRec l -> Bool
|
||||
subsumes rec rec' = and [elem r rec' | r <- rec]
|
||||
|
||||
|
||||
|
||||
183
src/GF/Parsing/MCFG/ViaCFG.hs
Normal file
183
src/GF/Parsing/MCFG/ViaCFG.hs
Normal file
@@ -0,0 +1,183 @@
|
||||
{-- Module --------------------------------------------------------------------
|
||||
Filename: ApproxParse.hs
|
||||
Author: Håkan Burden
|
||||
Time-stamp: <2005-04-18, 14:56>
|
||||
|
||||
Description: An agenda-driven implementation of the active algorithm 4.3.4,
|
||||
parsing through context-free approximation as described in
|
||||
Ljunglöf (2004)
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
module ApproxParse where
|
||||
|
||||
|
||||
-- Haskell modules
|
||||
import List
|
||||
import Monad
|
||||
|
||||
-- GF modules
|
||||
import ConvertMCFGtoDecoratedCFG
|
||||
import qualified DecoratedCFParser as CFP
|
||||
import qualified DecoratedGrammar as CFG
|
||||
import Examples
|
||||
import GeneralChart
|
||||
import qualified MCFGrammar as MCFG
|
||||
import MCFParser
|
||||
import Nondet
|
||||
import Parser
|
||||
import Range
|
||||
|
||||
|
||||
{-- Datatypes -----------------------------------------------------------------
|
||||
Chart
|
||||
Item
|
||||
Key
|
||||
|
||||
|
||||
Item : Four different Items are used. PreMCFG for MCFG Pre Items, Pre are
|
||||
the Items returned by the pre-Functions and Mark are the
|
||||
corresponding Items for the mark-Functions. For convenience correctly
|
||||
marked Mark Items are converted to Passive Items.
|
||||
I use dottedrule for convenience to keep track of wich daughter's RangeRec to look for.
|
||||
AChart: A RedBlackMap with Items and Keys
|
||||
AKey :
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
--Ev ta bort några typer av Item och bara nyckla på det som är unikt för den typen...
|
||||
data Item n c l = PreMCFG (n, c) (RangeRec l) [RangeRec l]
|
||||
| Pre (n, c) (RangeRec l) [l] [RangeRec l]
|
||||
| Mark (n, c) (RangeRec l) (RangeRec l) [RangeRec l]
|
||||
| Passive (n, c) (RangeRec l) (RangeRec l)
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
type AChart n c l = ParseChart (Item n c l) (AKey n c l)
|
||||
|
||||
data AKey n c l = Pr (n, c) l
|
||||
| Pm (n, c) l
|
||||
| Mk (RangeRec l)
|
||||
| Ps (RangeRec l)
|
||||
| Useless
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
|
||||
{-- Parsing -------------------------------------------------------------------
|
||||
recognize:
|
||||
parse : The Agenda consists of the Passive Items from context-free
|
||||
approximation (as PreMCFG Items) and the Pre Items inferred by
|
||||
pre-prediction.
|
||||
keyof : Given an Item returns an appropriate Key for the Chart
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
recognize strategy mcfg toks = chartMember (parse strategy mcfg toks)
|
||||
(Passive ("f", S)
|
||||
[("s" , MCFG.Range (0, n))]
|
||||
[("p" , MCFG.Range (0, n2)), ("q", MCFG.Range (n2, n))])
|
||||
(Ps [("s" , MCFG.Range (0, n))])
|
||||
where n = length toks
|
||||
n2 = n `div` 2
|
||||
|
||||
|
||||
--parse :: (Ord n, Ord NT, Ord String, Eq t) => CFP.Strategy -> MCFG.Grammar n NT String t -> [t]
|
||||
-- -> AChart n NT String
|
||||
parse strategy mcfg toks
|
||||
= buildChart keyof
|
||||
[preCombine, markPredict, markCombine, convert]
|
||||
(makePreItems (CFP.parse strategy (CFG.pInfo (convertGrammar mcfg)) [(S, "s")] toks) ++
|
||||
(prePredict mcfg))
|
||||
|
||||
|
||||
keyof :: Item n c l -> AKey n c l
|
||||
keyof (PreMCFG head [(lbl, rng)] _) = Pm head lbl
|
||||
keyof (Pre head _ (lbl:lbls) _) = Pr head lbl
|
||||
keyof (Mark _ _ _ (rec:recs)) = Mk rec
|
||||
keyof (Passive _ rec _) = Ps rec
|
||||
keyof _ = Useless
|
||||
|
||||
|
||||
{-- Initializing agenda -------------------------------------------------------
|
||||
makePreItems:
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
makePreItems :: (Eq c, Ord i) => CFG.Grammar n (Edge (c, l)) i t -> [Item n c l]
|
||||
makePreItems cfchart
|
||||
= [ PreMCFG (fun, cat) [(lbl, MCFG.makeRange (i, j))] (symToRec beta) |
|
||||
CFG.Rule (Edge i j (cat,lbl)) beta fun <- cfchart ]
|
||||
|
||||
|
||||
prePredict :: (Ord n, Ord c, Ord l) => MCFG.Grammar n c l t -> [Item n c l]
|
||||
prePredict mcfg =
|
||||
[ Pre (f, nt) [] (getLables lins) (replicate (nrOfCats (head lins)) []) |
|
||||
MCFG.Rule nt nts lins f <- mcfg ]
|
||||
|
||||
|
||||
{-- Inference rules ---------------------------------------------------------
|
||||
prePredict :
|
||||
preCombine :
|
||||
markPredict:
|
||||
markCombine:
|
||||
convert :
|
||||
----------------------------------------------------------------------------}
|
||||
|
||||
preCombine :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l)
|
||||
-> Item n c l -> [Item n c l]
|
||||
preCombine chart (Pre head rec (l:ls) recs) =
|
||||
[ Pre head (rec ++ [(l, r)]) ls recs'' |
|
||||
PreMCFG head [(l, r)] recs' <- chartLookup chart (Pm head l),
|
||||
recs'' <- solutions (unifyRangeRecs recs recs') ]
|
||||
preCombine chart (PreMCFG head [(l, r)] recs) =
|
||||
[ Pre head (rec ++ [(l, r)]) ls recs'' |
|
||||
Pre head rec (l:ls) recs' <- chartLookup chart (Pr head l),
|
||||
recs'' <- solutions (unifyRangeRecs recs recs') ]
|
||||
preCombine _ _ = []
|
||||
|
||||
|
||||
markPredict :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l)
|
||||
-> Item n c l -> [Item n c l]
|
||||
markPredict _ (Pre (n, c) rec [] recs) = [Mark (n, c) rec [] recs]
|
||||
markPredict _ _ = []
|
||||
|
||||
|
||||
markCombine :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l)
|
||||
-> Item n c l -> [Item n c l]
|
||||
markCombine chart (Mark (f, c) rec mRec (r:recs)) =
|
||||
[ Mark (f, c) rec (mRec ++ r) recs |
|
||||
Passive _ r _ <- chartLookup chart (Ps r)]
|
||||
markCombine chart (Passive _ r _) =
|
||||
[ Mark (f, c) rec (mRec++r) recs |
|
||||
Mark (f, c) rec mRec (r:recs) <- chartLookup chart (Mk r) ]
|
||||
markCombine _ _ = []
|
||||
|
||||
|
||||
convert :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l)
|
||||
-> Item n c l -> [Item n c l]
|
||||
convert _ (Mark (f, c) r rec []) = [Passive (f, c) r rec]
|
||||
convert _ _ = []
|
||||
|
||||
|
||||
{-- Help functions ----------------------------------------------------------------
|
||||
getRHS :
|
||||
getLables:
|
||||
symToRec :
|
||||
----------------------------------------------------------------------------------}
|
||||
|
||||
-- FULKOD !
|
||||
nrOfCats :: Eq c => MCFG.Lin c l t -> Int
|
||||
nrOfCats (MCFG.Lin l syms) = length $ nub [(c, i) | Cat (c, l, i) <- syms]
|
||||
|
||||
|
||||
--
|
||||
getLables :: LinRec c l t -> [l]
|
||||
getLables lins = [l | MCFG.Lin l syms <- lins]
|
||||
|
||||
|
||||
--
|
||||
symToRec :: Ord i => [Symbol (Edge (c, l), i) d] -> [[(l, MCFG.Range)]]
|
||||
symToRec beta = map makeLblRng $ groupBy (\(_, d) (_, d') -> (d == d'))
|
||||
$ sortBy sBd [(Edge i j (c, l) , d) | Cat (Edge i j (c, l), d)
|
||||
<- beta]
|
||||
where makeLblRng edges = [(l, (MCFG.makeRange (i, j))) | (Edge i j (_, l), _)
|
||||
<- edges]
|
||||
sBd (_, d) (_, d')
|
||||
| d < d' = LT
|
||||
| d > d' = GT
|
||||
| otherwise = EQ
|
||||
Reference in New Issue
Block a user