mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 11:42:49 -06:00
"Committed_by_peb"
This commit is contained in:
@@ -4,9 +4,9 @@
|
|||||||
-- Stability : (stable)
|
-- Stability : (stable)
|
||||||
-- Portability : (portable)
|
-- Portability : (portable)
|
||||||
--
|
--
|
||||||
-- > CVS $Date: 2005/04/18 14:55:33 $
|
-- > CVS $Date: 2005/04/19 10:46:07 $
|
||||||
-- > CVS $Author: peb $
|
-- > CVS $Author: peb $
|
||||||
-- > CVS $Revision: 1.3 $
|
-- > CVS $Revision: 1.4 $
|
||||||
--
|
--
|
||||||
-- The main parsing module, parsing GFC grammars
|
-- The main parsing module, parsing GFC grammars
|
||||||
-- by translating to simpler formats, such as PMCFG and CFG
|
-- by translating to simpler formats, such as PMCFG and CFG
|
||||||
@@ -34,21 +34,25 @@ import GF.Data.SortedList
|
|||||||
import GF.Data.Assoc
|
import GF.Data.Assoc
|
||||||
import GF.Formalism.Utilities
|
import GF.Formalism.Utilities
|
||||||
import GF.Conversion.Types
|
import GF.Conversion.Types
|
||||||
|
import GF.Formalism.GCFG
|
||||||
import GF.Formalism.SimpleGFC
|
import GF.Formalism.SimpleGFC
|
||||||
import qualified GF.Formalism.MCFG as M
|
import qualified GF.Formalism.MCFG as M
|
||||||
import qualified GF.Formalism.CFG as C
|
import qualified GF.Formalism.CFG as C
|
||||||
-- import qualified GF.NewParsing.MCFG as PM
|
import qualified GF.NewParsing.MCFG as PM
|
||||||
import qualified GF.NewParsing.CFG as PC
|
import qualified GF.NewParsing.CFG as PC
|
||||||
--import qualified GF.Conversion.FromGFC as From
|
--import qualified GF.Conversion.FromGFC as From
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
-- parsing information
|
-- parsing information
|
||||||
|
|
||||||
data PInfo = PInfo { mcfPInfo :: (), -- ^ not implemented yet
|
data PInfo = PInfo { mcfPInfo :: MCFPInfo,
|
||||||
cfPInfo :: PC.CFPInfo CCat Name Token }
|
cfPInfo :: CFPInfo }
|
||||||
|
|
||||||
|
type MCFPInfo = MGrammar
|
||||||
|
type CFPInfo = PC.CFPInfo CCat Name Token
|
||||||
|
|
||||||
buildPInfo :: MGrammar -> CGrammar -> PInfo
|
buildPInfo :: MGrammar -> CGrammar -> PInfo
|
||||||
buildPInfo mcfg cfg = PInfo { mcfPInfo = (),
|
buildPInfo mcfg cfg = PInfo { mcfPInfo = mcfg,
|
||||||
cfPInfo = PC.buildCFPInfo cfg }
|
cfPInfo = PC.buildCFPInfo cfg }
|
||||||
|
|
||||||
|
|
||||||
@@ -65,20 +69,30 @@ parse :: String -- ^ parsing strategy
|
|||||||
-- parsing via CFG
|
-- parsing via CFG
|
||||||
parse (c:strategy) pinfo abs startCat
|
parse (c:strategy) pinfo abs startCat
|
||||||
| c=='c' || c=='C' = map (tree2term abs) .
|
| c=='c' || c=='C' = map (tree2term abs) .
|
||||||
parseCFG strategy pinfo startCats .
|
parseCFG strategy cfpi startCats .
|
||||||
map prCFTok
|
map prCFTok
|
||||||
where startCats = tracePrt "Parsing.GFC - starting categories" prt $
|
where startCats = tracePrt "Parsing.GFC - starting categories" prt $
|
||||||
filter isStartCat $ map fst $ aAssocs $ PC.topdownRules $ cfPInfo pinfo
|
filter isStartCat $ map fst $ aAssocs $ PC.topdownRules cfpi
|
||||||
isStartCat (CCat (ECat cat _) _) = cat == cfCat2Ident startCat
|
isStartCat (CCat (ECat cat _) _) = cat == cfCat2Ident startCat
|
||||||
|
cfpi = cfPInfo pinfo
|
||||||
|
|
||||||
|
-- parsing via MCFG
|
||||||
|
parse (c:strategy) pinfo abs startCat
|
||||||
|
| c=='m' || c=='M' = map (tree2term abs) .
|
||||||
|
parseMCFG strategy mcfpi startCats .
|
||||||
|
map prCFTok
|
||||||
|
where startCats = tracePrt "Parsing.GFC - starting categories" prt $
|
||||||
|
filter isStartCat $ nubsort [ c | Rule (Abs c _ _) _ <- mcfpi ]
|
||||||
|
isStartCat (MCat (ECat cat _) _) = cat == cfCat2Ident startCat
|
||||||
|
mcfpi = mcfPInfo pinfo
|
||||||
|
|
||||||
-- default parser
|
-- default parser
|
||||||
parse strategy pinfo abs start = parse ('c':strategy) pinfo abs start
|
parse strategy pinfo abs start = parse ('c':strategy) pinfo abs start
|
||||||
|
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
|
||||||
parseCFG :: String -> PInfo -> [CCat] -> [Token] -> [SyntaxTree Fun]
|
parseCFG :: String -> CFPInfo -> [CCat] -> [Token] -> [SyntaxTree Fun]
|
||||||
parseCFG strategy pInfo startCats inString = trace2 "Parsing.GFC - selected algorithm" "CFG" $
|
parseCFG strategy pinfo startCats inString = trace2 "Parsing.GFC - selected algorithm" "CFG" $
|
||||||
trees
|
trees
|
||||||
where trees = tracePrt "Parsing.GFC - nr. trees" (prt . length) $
|
where trees = tracePrt "Parsing.GFC - nr. trees" (prt . length) $
|
||||||
nubsort $ forests >>= forest2trees
|
nubsort $ forests >>= forest2trees
|
||||||
@@ -101,44 +115,31 @@ parseCFG strategy pInfo startCats inString = trace2 "Parsing.GFC - selected algo
|
|||||||
cfChart = --tracePrt "finalEdges"
|
cfChart = --tracePrt "finalEdges"
|
||||||
--(prt . filter (\(Edge i j _) -> (i,j)==inputBounds inTokens)) $
|
--(prt . filter (\(Edge i j _) -> (i,j)==inputBounds inTokens)) $
|
||||||
tracePrt "Parsing.GFC - size of context-free chart" (prt . length) $
|
tracePrt "Parsing.GFC - size of context-free chart" (prt . length) $
|
||||||
PC.parseCF strategy (cfPInfo pInfo) startCats inTokens
|
PC.parseCF strategy pinfo startCats inTokens
|
||||||
|
|
||||||
inTokens = input inString
|
inTokens = input inString
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
|
||||||
{-
|
parseMCFG :: String -> MCFPInfo -> [MCat] -> [Token] -> [SyntaxTree Fun]
|
||||||
-- parsing via MCFG
|
parseMCFG strategy pinfo startCats inString = trace2 "Parsing.GFC - selected algorithm" "MCFG" $
|
||||||
newParser (m:strategy) gr (_, startCat) inString
|
trees
|
||||||
| m=='m' || m=='M' = trace2 "Parser" "MCFG" $ Ok terms
|
where trees = tracePrt "Parsing.GFC - nr. trees" (prt . length) $
|
||||||
where terms = map (tree2term abstract) trees
|
forests >>= forest2trees
|
||||||
trees = --tracePrt "trees" (prtBefore "\n") $
|
|
||||||
tracePrt "#trees" (prt . length) $
|
forests = tracePrt "Parsing.GFC - nr. forests" (prt . length) $
|
||||||
concatMap forest2trees forests
|
cfForests >>= convertFromCFForest
|
||||||
forests = --tracePrt "forests" (prtBefore "\n") $
|
cfForests= tracePrt "Parsing.GFC - nr. context-free forests" (prt . length) $
|
||||||
tracePrt "#forests" (prt . length) $
|
chart2forests chart (const False) finalEdges
|
||||||
concatMap (chart2forests chart isMeta) finalEdges
|
|
||||||
isMeta = null . snd
|
chart = tracePrt "Parsing.GFC - size of chart" (prt . map (length.snd) . aAssocs) $
|
||||||
finalEdges = tracePrt "finalEdges" (prtBefore "\n") $
|
PM.parseMCF strategy pinfo inString -- inTokens
|
||||||
filter isFinalEdge $ aElems chart
|
|
||||||
-- nubsort [ (cat, [(lbl, E.makeRange [(i,j)])]) |
|
finalEdges = tracePrt "Parsing.GFC - final chart edges" prt $
|
||||||
-- let (i, j) = inputBounds inTokens,
|
[ PM.makeFinalEdge cat lbl (inputBounds inTokens) |
|
||||||
-- E.Rule cat _ [E.Lin lbl _] _ <- pInf,
|
cat@(MCat _ [lbl]) <- startCats ]
|
||||||
-- isStartCat cat ]
|
|
||||||
isFinalEdge (cat, rows)
|
inTokens = input inString
|
||||||
= isStartCat cat &&
|
|
||||||
inputBounds inTokens `elem` concat [ rho | (_, M.Rng rho) <- rows ]
|
|
||||||
chart = --tracePrt "chart" (prtBefore "\n" . aAssocs) $
|
|
||||||
tracePrt "#chart" (prt . map (length.snd) . aAssocs) $
|
|
||||||
PM.parse strategy pInf starters inTokens
|
|
||||||
inTokens = input $ map AbsGFC.KS $ words inString
|
|
||||||
pInf = -- tracePrt "avg rec" (\gr -> show (sum [ length rec | E.Rule _ _ rec _ <- gr ] % length gr)) $
|
|
||||||
mcfPInfo $ SS.statePInfo gr
|
|
||||||
starters = tracePrt "startCats" prt $
|
|
||||||
filter isStartCat $ nubsort [ cat | M.Rule cat _ _ _ <- pInf ]
|
|
||||||
isStartCat (MCFCat cat _) = cat == startCat
|
|
||||||
abstract = tracePrt "abstract module" PrGrammar.prt $
|
|
||||||
SS.absId gr
|
|
||||||
-}
|
|
||||||
|
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
|||||||
35
src/GF/Parsing/MCFG.hs
Normal file
35
src/GF/Parsing/MCFG.hs
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Maintainer : PL
|
||||||
|
-- Stability : (stable)
|
||||||
|
-- Portability : (portable)
|
||||||
|
--
|
||||||
|
-- > CVS $Date: 2005/04/19 10:46:07 $
|
||||||
|
-- > CVS $Author: peb $
|
||||||
|
-- > CVS $Revision: 1.1 $
|
||||||
|
--
|
||||||
|
-- MCFG parsing
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
module GF.NewParsing.MCFG where
|
||||||
|
|
||||||
|
import GF.Formalism.Utilities
|
||||||
|
import GF.Formalism.GCFG
|
||||||
|
import GF.Formalism.MCFG
|
||||||
|
|
||||||
|
import qualified GF.NewParsing.MCFG.Naive as Naive
|
||||||
|
import qualified GF.NewParsing.MCFG.Range as Range (makeRange)
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
-- parsing
|
||||||
|
|
||||||
|
--parseMCF :: (Ord n, Ord c, Ord t) => String -> CFParser c n t
|
||||||
|
parseMCF "n" = Naive.parse
|
||||||
|
-- default parser:
|
||||||
|
parseMCF _ = parseMCF "n"
|
||||||
|
|
||||||
|
|
||||||
|
makeFinalEdge cat lbl bnds = (cat, [(lbl, Range.makeRange bnds)])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
174
src/GF/Parsing/MCFG/Active.hs
Normal file
174
src/GF/Parsing/MCFG/Active.hs
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
{-- Module --------------------------------------------------------------------
|
||||||
|
Filename: ActiveParse.hs
|
||||||
|
Author: Håkan Burden
|
||||||
|
Time-stamp: <2005-04-18, 14:25>
|
||||||
|
|
||||||
|
Description: An agenda-driven implementation of algorithm 4.6, Active parsing
|
||||||
|
of PMCFG, as described in Ljunglöf (2004)
|
||||||
|
------------------------------------------------------------------------------}
|
||||||
|
|
||||||
|
module ActiveParse where
|
||||||
|
|
||||||
|
|
||||||
|
-- GF modules
|
||||||
|
import Examples
|
||||||
|
import GeneralChart
|
||||||
|
import MCFGrammar
|
||||||
|
import MCFParser
|
||||||
|
import Nondet
|
||||||
|
import Parser
|
||||||
|
import Range
|
||||||
|
|
||||||
|
|
||||||
|
{-- Datatypes -----------------------------------------------------------------
|
||||||
|
AChart: A RedBlackMap with Items and Keys
|
||||||
|
Item :
|
||||||
|
AKey :
|
||||||
|
------------------------------------------------------------------------------}
|
||||||
|
data Item n c l = Active (AbstractRule n c)
|
||||||
|
(RangeRec l)
|
||||||
|
Range
|
||||||
|
(Lin c l Range)
|
||||||
|
(LinRec c l Range)
|
||||||
|
[RangeRec l]
|
||||||
|
| Passive (AbstractRule n c) (RangeRec l) [RangeRec l]
|
||||||
|
deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
type AChart n c l = ParseChart (Item n c l) (AKey c)
|
||||||
|
|
||||||
|
data AKey c = Act c
|
||||||
|
| Pass c
|
||||||
|
| Useless
|
||||||
|
deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
|
||||||
|
keyof :: Item n c l -> AKey c
|
||||||
|
keyof (Active _ _ _ (Lin _ (Cat (next, _, _):_)) _ _) = Act next
|
||||||
|
keyof (Passive (_, cat, _) _ _) = Pass cat
|
||||||
|
keyof _ = Useless
|
||||||
|
|
||||||
|
|
||||||
|
{-- Parsing -------------------------------------------------------------------
|
||||||
|
recognize:
|
||||||
|
parse : Builds a chart from the initial agenda, given by prediction, and
|
||||||
|
the inference rules
|
||||||
|
keyof : Given an Item returns an appropriate Key for the Chart
|
||||||
|
------------------------------------------------------------------------------}
|
||||||
|
|
||||||
|
recognize strategy mcfg toks = chartMember
|
||||||
|
(parse strategy mcfg toks) item (keyof item)
|
||||||
|
where n = length toks
|
||||||
|
n2 = n `div` 2
|
||||||
|
item = (Passive ("f", S, [A])
|
||||||
|
[("s",Range (0,n))]
|
||||||
|
[[("p",Range (0,n2)),("q",Range (n2,n))]])
|
||||||
|
|
||||||
|
|
||||||
|
parse :: (Ord n, Ord c, Ord l, Eq t) => Strategy -> Grammar n c l t -> [t]
|
||||||
|
-> ParseChart (Item n c l) (AKey c)
|
||||||
|
parse (False,False) mcfg toks = buildChart keyof
|
||||||
|
[complete, scan, combine, convert]
|
||||||
|
(predict mcfg toks)
|
||||||
|
parse (True, False) mcfg toks = buildChart keyof
|
||||||
|
[predictKilbury mcfg toks, complete, combine, convert]
|
||||||
|
(terminal mcfg toks)
|
||||||
|
parse (False, True) mcfg toks = buildChart keyof
|
||||||
|
[predictEarley mcfg toks, complete, scan, combine, convert]
|
||||||
|
(initial (take 1 mcfg) toks)
|
||||||
|
|
||||||
|
predictKilbury mcfg toks _ (Passive (_, cat, _) found _) =
|
||||||
|
[ Active (f, a, rhs) [] rng lin' lins' daughters |
|
||||||
|
Rule a rhs ((Lin l ((Cat (cat', r, i)):syms)):lins) f <- mcfg,
|
||||||
|
cat == cat',
|
||||||
|
lin' : lins' <- solutions $ rangeRestRec toks (Lin l syms : lins),
|
||||||
|
-- lins' <- solutions $ rangeRestRec toks lins,
|
||||||
|
rng <- solutions $ projection r found,
|
||||||
|
let daughters = (replaceRec (replicate (length rhs) []) i found) ]
|
||||||
|
predictKilbury _ _ _ _ = []
|
||||||
|
|
||||||
|
predictEarley mcfg toks _ item@(Active _ _ _ (Lin _ ((Cat (cat, _, _)):_)) _ _) =
|
||||||
|
concat [ predEar toks item rule |
|
||||||
|
rule@(Rule cat' _ _ _) <- mcfg, cat == cat' ]
|
||||||
|
predictEarley _ _ _ _ = []
|
||||||
|
|
||||||
|
predEar toks _ (Rule cat [] lins f) =
|
||||||
|
[ Passive (f, cat, []) (makeRangeRec lins') [] |
|
||||||
|
lins' <- solutions $ rangeRestRec toks lins ]
|
||||||
|
predEar toks (Active _ _ (Range (_,j)) _ _ _) (Rule cat rhs lins f) =
|
||||||
|
[ Active (f, cat, rhs) [] (Range (j, j)) lin' lins' (replicate (length rhs) []) |
|
||||||
|
(lin':lins') <- solutions $ rangeRestRec toks lins ]
|
||||||
|
predEar toks (Active _ _ EmptyRange _ _ _) (Rule cat rhs lins f) =
|
||||||
|
[ Active (f, cat, rhs) [] EmptyRange lin' lins' (replicate (length rhs) []) |
|
||||||
|
(lin':lins') <- solutions $ rangeRestRec toks lins ]
|
||||||
|
|
||||||
|
|
||||||
|
{--Inference rules ------------------------------------------------------------
|
||||||
|
predict : Creates an Active Item of every Rule in the Grammar to give the
|
||||||
|
initial Agenda
|
||||||
|
complete:
|
||||||
|
scan :
|
||||||
|
combine : Creates an Active Item every time it is possible to combine
|
||||||
|
an Active Item from the agenda with a Passive Item from the Chart
|
||||||
|
convert : Active Items with nothing to find are converted to Passive Items
|
||||||
|
------------------------------------------------------------------------------}
|
||||||
|
|
||||||
|
predict :: Eq t => Grammar n c l t -> [t] -> [Item n c l]
|
||||||
|
predict grammar toks = [ Active (f, cat, rhs) [] EmptyRange lin' lins'
|
||||||
|
(replicate (length rhs) []) |
|
||||||
|
Rule cat rhs lins f <- grammar,
|
||||||
|
(lin':lins') <- solutions $ rangeRestRec toks lins ]
|
||||||
|
|
||||||
|
|
||||||
|
complete :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey c) -> Item n c l
|
||||||
|
-> [Item n c l]
|
||||||
|
complete _ (Active rule found (Range (i, j)) (Lin l []) (lin:lins) recs) =
|
||||||
|
[ Active rule (found ++ [(l, Range (i,j))]) EmptyRange lin lins recs ]
|
||||||
|
complete _ _ = []
|
||||||
|
|
||||||
|
|
||||||
|
scan :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey c) -> Item n c l
|
||||||
|
-> [Item n c l]
|
||||||
|
scan _ (Active rule found rng (Lin l ((Tok rng'):syms)) lins recs) =
|
||||||
|
[ Active rule found rng'' (Lin l syms) lins recs |
|
||||||
|
rng'' <- solutions $ concRanges rng rng' ]
|
||||||
|
scan _ _ = []
|
||||||
|
|
||||||
|
|
||||||
|
combine :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey c) -> Item n c l
|
||||||
|
-> [Item n c l]
|
||||||
|
combine chart (Active rule found rng (Lin l ((Cat (c, r, d)):syms)) lins recs) =
|
||||||
|
[ Active rule found rng'' (Lin l syms) lins (replaceRec recs d found') |
|
||||||
|
Passive _ found' _ <- chartLookup chart (Pass c),
|
||||||
|
rng' <- solutions $ projection r found',
|
||||||
|
rng'' <- solutions $ concRanges rng rng',
|
||||||
|
subsumes (recs !! d) found' ]
|
||||||
|
combine chart (Passive (_, c, _) found _) =
|
||||||
|
[ Active rule found' rng (Lin l syms) lins (replaceRec recs' d found) |
|
||||||
|
Active rule found' rng' (Lin l ((Cat (c, r, d)):syms)) lins recs'
|
||||||
|
<- chartLookup chart (Act c),
|
||||||
|
rng'' <- solutions $ projection r found,
|
||||||
|
rng <- solutions $ concRanges rng' rng'',
|
||||||
|
subsumes (recs' !! d) found ]
|
||||||
|
combine _ _ = []
|
||||||
|
|
||||||
|
convert :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey c) -> Item n c l
|
||||||
|
-> [Item n c l]
|
||||||
|
convert _ (Active rule found rng (Lin l []) [] recs) =
|
||||||
|
[ Passive rule (found ++ [(l, rng)]) recs ]
|
||||||
|
convert _ _ = []
|
||||||
|
|
||||||
|
|
||||||
|
-- Earley --
|
||||||
|
-- anropas med alla startregler
|
||||||
|
initial :: Eq t => [Rule n c l t] -> [t] -> [Item n c l]
|
||||||
|
initial starts toks =
|
||||||
|
[ Active (f, s, rhs) [] (Range (0, 0)) lin' lins' (replicate (length rhs) []) |
|
||||||
|
Rule s rhs lins f <- starts,
|
||||||
|
(lin':lins') <- solutions $ rangeRestRec toks lins ]
|
||||||
|
|
||||||
|
|
||||||
|
-- Kilbury --
|
||||||
|
terminal mcfg toks =
|
||||||
|
[ Passive (f, cat, []) (makeRangeRec lins') [] |
|
||||||
|
Rule cat [] lins f <- mcfg,
|
||||||
|
lins' <- solutions $ rangeRestRec toks lins ]
|
||||||
95
src/GF/Parsing/MCFG/Naive.hs
Normal file
95
src/GF/Parsing/MCFG/Naive.hs
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
|
||||||
|
module GF.NewParsing.MCFG.Naive where
|
||||||
|
|
||||||
|
|
||||||
|
-- GF modules
|
||||||
|
import GF.NewParsing.GeneralChart
|
||||||
|
import GF.Formalism.GCFG
|
||||||
|
import GF.Formalism.MCFG
|
||||||
|
import GF.Formalism.Utilities
|
||||||
|
import GF.NewParsing.MCFG.Range
|
||||||
|
import GF.Data.SortedList
|
||||||
|
import GF.Data.Assoc
|
||||||
|
|
||||||
|
{-- Datatypes and types -------------------------------------------------------
|
||||||
|
NChart : A RedBlackMap with Items and Keys
|
||||||
|
Item : The parse Items are either Active or Passive
|
||||||
|
NKey : One for Active Items, one for Passive and one for Active Items
|
||||||
|
to convert to Passive
|
||||||
|
DottedRule: (function-name, LHS, [Found in RHS], [To find in RHS])
|
||||||
|
------------------------------------------------------------------------------}
|
||||||
|
|
||||||
|
type NChart c n l = ParseChart (Item c n l) (NKey c)
|
||||||
|
|
||||||
|
data Item c n l = Active (DottedRule c n) (LinRec c l Range) [RangeRec l]
|
||||||
|
| Passive (Abstract c n) (RangeRec l)
|
||||||
|
deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
type DottedRule c n = (Abstract c n, [c])
|
||||||
|
|
||||||
|
data NKey c = Act c
|
||||||
|
| Pass c
|
||||||
|
| Final
|
||||||
|
deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
|
||||||
|
{-- Parsing -------------------------------------------------------------------
|
||||||
|
recognize:
|
||||||
|
parse : Builds a chart from the initial agenda, given by prediction, and
|
||||||
|
the inference rules
|
||||||
|
keyof : Given an Item returns an appropriate Key for the Chart
|
||||||
|
------------------------------------------------------------------------------}
|
||||||
|
|
||||||
|
|
||||||
|
parse :: (Ord t, Ord n, Ord c, Ord l) => MCFGrammar c n l t -> [t]
|
||||||
|
-> SyntaxChart n (c, RangeRec l)
|
||||||
|
parse mcfg toks = chart3
|
||||||
|
where chart3 = assocMap (const groupPairs) chart2
|
||||||
|
chart2 = accumAssoc id $ nubsort chart1
|
||||||
|
chart1 = [ ((cat, rrec), (fun, zip rhs rrecs)) |
|
||||||
|
Active (Abs cat _Nil fun, rhs) lins rrecs <- chartLookup chart0 Final,
|
||||||
|
let rrec = makeRangeRec lins ]
|
||||||
|
chart0 = process mcfg toks
|
||||||
|
|
||||||
|
process :: (Ord t, Ord n, Ord c, Ord l) => MCFGrammar c n l t -> [t] -> NChart c n l
|
||||||
|
process mcfg toks = buildChart keyof [convert, combine] (predict toks mcfg)
|
||||||
|
|
||||||
|
|
||||||
|
keyof :: Item c n l -> NKey c
|
||||||
|
keyof (Active (Abs _ (next:_) _, _) _ _) = Act next
|
||||||
|
keyof (Passive (Abs cat _ _) _) = Pass cat
|
||||||
|
keyof _ = Final
|
||||||
|
|
||||||
|
|
||||||
|
{--Inference rules ------------------------------------------------------------
|
||||||
|
predict: Creates an Active Item of every Rule in the Grammar to give the
|
||||||
|
initial Agenda
|
||||||
|
combine: Creates an Active Item every time it is possible to combine
|
||||||
|
an Active Item from the agenda with a Passive Item from the Chart
|
||||||
|
convert: Active Items with nothing to find are converted to Passive Items
|
||||||
|
------------------------------------------------------------------------------}
|
||||||
|
|
||||||
|
predict :: (Eq t, Eq c) => [t] -> MCFGrammar c n l t -> [Item c n l]
|
||||||
|
predict toks mcfg = [ Active (abs, []) lins' [] |
|
||||||
|
Rule abs (Cnc _ _ lins) <- mcfg,
|
||||||
|
lins' <- rangeRestRec toks lins ]
|
||||||
|
|
||||||
|
|
||||||
|
combine :: (Ord n, Ord c, Ord l) => NChart c n l -> Item c n l -> [Item c n l]
|
||||||
|
combine chart (Active (Abs nt (c:find) f, found) lins rrecs) =
|
||||||
|
do Passive _ rrec <- chartLookup chart (Pass c)
|
||||||
|
lins' <- concLinRec $ substArgRec (length found) rrec lins
|
||||||
|
return $ Active (Abs nt find f, found ++ [c]) lins' (rrecs ++ [rrec])
|
||||||
|
combine chart (Passive (Abs c _ _) rrec) =
|
||||||
|
do Active (Abs nt (c:find) f, found) lins rrecs <- chartLookup chart (Act c)
|
||||||
|
lins' <- concLinRec $ substArgRec (length found) rrec lins
|
||||||
|
return $ Active (Abs nt find f, found ++ [c]) lins' (rrecs ++ [rrec])
|
||||||
|
combine _ _ = []
|
||||||
|
|
||||||
|
|
||||||
|
convert :: (Ord n, Ord c, Ord l) => NChart c n l -> Item c n l -> [Item c n l]
|
||||||
|
convert _ (Active (Abs nt [] f, rhs) lins _) = [Passive (Abs nt rhs f) rrec]
|
||||||
|
where rrec = makeRangeRec lins
|
||||||
|
convert _ _ = []
|
||||||
|
|
||||||
|
|
||||||
41
src/GF/Parsing/MCFG/PInfo.hs
Normal file
41
src/GF/Parsing/MCFG/PInfo.hs
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
---------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Maintainer : PL
|
||||||
|
-- Stability : (stable)
|
||||||
|
-- Portability : (portable)
|
||||||
|
--
|
||||||
|
-- > CVS $Date: 2005/04/19 10:46:08 $
|
||||||
|
-- > CVS $Author: peb $
|
||||||
|
-- > CVS $Revision: 1.1 $
|
||||||
|
--
|
||||||
|
-- MCFG parsing, parser information
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
module GF.NewParsing.MCFG.PInfo
|
||||||
|
(MCFParser, MCFPInfo(..), buildMCFPInfo) where
|
||||||
|
|
||||||
|
import GF.System.Tracing
|
||||||
|
import GF.Infra.Print
|
||||||
|
|
||||||
|
import GF.Formalism.Utilities
|
||||||
|
import GF.Formalism.GCFG
|
||||||
|
import GF.Formalism.MCFG
|
||||||
|
import GF.Data.SortedList
|
||||||
|
import GF.Data.Assoc
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
-- type declarations
|
||||||
|
|
||||||
|
-- | the list of categories = possible starting categories
|
||||||
|
type MCFParser c n l t = MCFPInfo c n l t
|
||||||
|
-> [c]
|
||||||
|
-> Input t
|
||||||
|
-> MCFChart c n l
|
||||||
|
|
||||||
|
type MCFChart c n l = [(n, (c, RangeRec l), [(c, RangeRec l)])]
|
||||||
|
|
||||||
|
type MCFPInfo c n l t = MCFGrammar c n l t
|
||||||
|
|
||||||
|
buildCFPInfo :: (Ord n, Ord c, Ord l, Ord t) => MCFGrammar c n l t -> MCFPInfo c n l t
|
||||||
|
buildCFPInfo = id
|
||||||
|
|
||||||
175
src/GF/Parsing/MCFG/Range.hs
Normal file
175
src/GF/Parsing/MCFG/Range.hs
Normal file
@@ -0,0 +1,175 @@
|
|||||||
|
|
||||||
|
module GF.NewParsing.MCFG.Range where
|
||||||
|
|
||||||
|
|
||||||
|
-- Haskell
|
||||||
|
import List
|
||||||
|
import Monad
|
||||||
|
|
||||||
|
-- GF modules
|
||||||
|
import GF.Formalism.GCFG
|
||||||
|
import GF.Formalism.MCFG
|
||||||
|
import GF.Formalism.Utilities
|
||||||
|
import GF.Infra.Print
|
||||||
|
|
||||||
|
|
||||||
|
------------------------------------------------------------
|
||||||
|
-- ranges as single pairs
|
||||||
|
|
||||||
|
data Range = Range (Int, Int)
|
||||||
|
| EmptyRange
|
||||||
|
deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
makeRange :: (Int, Int) -> Range
|
||||||
|
concatRange :: Range -> Range -> [Range]
|
||||||
|
rangeEdge :: a -> Range -> Edge a
|
||||||
|
minRange :: Range -> Int
|
||||||
|
maxRange :: Range -> Int
|
||||||
|
|
||||||
|
makeRange = Range
|
||||||
|
concatRange EmptyRange rng = return rng
|
||||||
|
concatRange rng EmptyRange = return rng
|
||||||
|
concatRange (Range(i,j)) (Range(j',k)) = [ Range(i,k) | j==j']
|
||||||
|
rangeEdge a (Range(i,j)) = Edge i j a
|
||||||
|
minRange (Range rho) = fst rho
|
||||||
|
maxRange (Range rho) = snd rho
|
||||||
|
|
||||||
|
instance Print Range where
|
||||||
|
prt (Range (i,j)) = "(" ++ show i ++ "-" ++ show j ++ ")"
|
||||||
|
prt (EmptyRange) = "(?)"
|
||||||
|
|
||||||
|
{-- Types --------------------------------------------------------------------
|
||||||
|
Linearization- and Range records implemented as lists
|
||||||
|
-----------------------------------------------------------------------------}
|
||||||
|
|
||||||
|
type LinRec c l t = [Lin c l t]
|
||||||
|
|
||||||
|
type RangeRec l = [(l, Range)]
|
||||||
|
|
||||||
|
|
||||||
|
{-- Functions ----------------------------------------------------------------
|
||||||
|
Concatenation : Concatenation of Ranges, Symbols and Linearizations
|
||||||
|
and records of Linearizations
|
||||||
|
Record transformation : Makes a Range record from a fully instantiated
|
||||||
|
Linearization record
|
||||||
|
Record projection : Given a label, returns the corresponding Range
|
||||||
|
Range restriction : Range restriction of Tokens, Symbols,
|
||||||
|
Linearizations and Records given a list of Tokens
|
||||||
|
Record replacment : Substitute a record for another in a list of Range
|
||||||
|
records
|
||||||
|
Argument substitution : Substitution of a Cat c to a Tok Range, where
|
||||||
|
Range is the cover of c
|
||||||
|
Note: The argument is still a Symbol c Range
|
||||||
|
Subsumation : Checks if a Range record subsumes another Range
|
||||||
|
record
|
||||||
|
Record unification : Unification of two Range records
|
||||||
|
-----------------------------------------------------------------------------}
|
||||||
|
|
||||||
|
|
||||||
|
--- Concatenation ------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
concSymbols :: [Symbol c Range] -> [[Symbol c Range]]
|
||||||
|
concSymbols (Tok rng:Tok rng':toks) = do rng'' <- concatRange rng rng'
|
||||||
|
concSymbols (Tok rng'':toks)
|
||||||
|
concSymbols (sym:syms) = do syms' <- concSymbols syms
|
||||||
|
return (sym:syms')
|
||||||
|
concSymbols [] = return []
|
||||||
|
|
||||||
|
|
||||||
|
concLin :: Lin c l Range -> [Lin c l Range]
|
||||||
|
concLin (Lin lbl syms) = do syms' <- concSymbols syms
|
||||||
|
return (Lin lbl syms')
|
||||||
|
|
||||||
|
|
||||||
|
concLinRec :: LinRec c l Range -> [LinRec c l Range]
|
||||||
|
concLinRec = mapM concLin
|
||||||
|
|
||||||
|
|
||||||
|
--- Record transformation ----------------------------------------------------
|
||||||
|
|
||||||
|
makeRangeRec :: LinRec c l Range -> RangeRec l
|
||||||
|
makeRangeRec lins = map convLin lins
|
||||||
|
where convLin (Lin lbl [Tok rng]) = (lbl, rng)
|
||||||
|
|
||||||
|
|
||||||
|
--- Record projection --------------------------------------------------------
|
||||||
|
|
||||||
|
projection :: Eq l => l -> RangeRec l -> [Range]
|
||||||
|
projection l rec = maybe (fail "projection") return $ lookup l rec
|
||||||
|
|
||||||
|
|
||||||
|
--- Range restriction --------------------------------------------------------
|
||||||
|
|
||||||
|
rangeRestTok :: Eq t => [t] -> t -> [Range]
|
||||||
|
rangeRestTok toks tok = do i <- elemIndices tok toks
|
||||||
|
return (makeRange (i, i+1))
|
||||||
|
|
||||||
|
|
||||||
|
rangeRestSym :: Eq t => [t] -> Symbol a t -> [Symbol a Range]
|
||||||
|
rangeRestSym toks (Tok tok) = do rng <- rangeRestTok toks tok
|
||||||
|
return (Tok rng)
|
||||||
|
rangeRestSym _ (Cat c) = return (Cat c)
|
||||||
|
|
||||||
|
|
||||||
|
rangeRestLin :: Eq t => [t] -> Lin c l t -> [Lin c l Range]
|
||||||
|
rangeRestLin toks (Lin lbl syms) = do syms' <- mapM (rangeRestSym toks) syms
|
||||||
|
return (Lin lbl syms')
|
||||||
|
|
||||||
|
|
||||||
|
rangeRestRec :: Eq t => [t] -> LinRec c l t -> [LinRec c l Range]
|
||||||
|
rangeRestRec toks = mapM (rangeRestLin toks)
|
||||||
|
|
||||||
|
|
||||||
|
-- Record replacment ---------------------------------------------------------
|
||||||
|
-- ineffektiv!!
|
||||||
|
|
||||||
|
replaceRec :: [RangeRec l] -> Int -> RangeRec l -> [RangeRec l]
|
||||||
|
replaceRec recs i rec = (fst tup) ++ [rec] ++ (tail $ snd tup)
|
||||||
|
where tup = splitAt i recs
|
||||||
|
|
||||||
|
|
||||||
|
--- Argument substitution ----------------------------------------------------
|
||||||
|
|
||||||
|
substArgSymbol :: Eq l => Int -> RangeRec l -> Symbol (c, l, Int) Range
|
||||||
|
-> Symbol (c, l, Int) Range
|
||||||
|
substArgSymbol i rec (Tok rng) = (Tok rng)
|
||||||
|
substArgSymbol i rec (Cat (c, l, j))
|
||||||
|
| i==j = maybe (Cat (c, l, j)) Tok $ lookup l rec
|
||||||
|
| otherwise = (Cat (c, l, j))
|
||||||
|
|
||||||
|
|
||||||
|
substArgLin :: Eq l => Int -> RangeRec l -> Lin c l Range
|
||||||
|
-> Lin c l Range
|
||||||
|
substArgLin i rec (Lin lbl syms) =
|
||||||
|
(Lin lbl (map (substArgSymbol i rec) syms))
|
||||||
|
|
||||||
|
|
||||||
|
substArgRec :: Eq l => Int -> RangeRec l -> LinRec c l Range
|
||||||
|
-> LinRec c l Range
|
||||||
|
substArgRec i rec lins = map (substArgLin i rec) lins
|
||||||
|
|
||||||
|
|
||||||
|
--- Subsumation -------------------------------------------------------------
|
||||||
|
|
||||||
|
-- "rec' subsumes rec?"
|
||||||
|
subsumes :: Eq l => RangeRec l -> RangeRec l -> Bool
|
||||||
|
subsumes rec rec' = and [elem r rec' | r <- rec]
|
||||||
|
|
||||||
|
|
||||||
|
--- Record unification -------------------------------------------------------
|
||||||
|
|
||||||
|
unifyRangeRecs :: Ord l => [RangeRec l] -> [RangeRec l] -> [[RangeRec l]]
|
||||||
|
unifyRangeRecs recs recs' = zipWithM unify recs recs'
|
||||||
|
where unify :: Ord l => RangeRec l -> RangeRec l -> [RangeRec l]
|
||||||
|
unify rec [] = return rec
|
||||||
|
unify [] rec = return rec
|
||||||
|
unify rec1'@(p1@(l1, r1):rec1) rec2'@(p2@(l2, r2):rec2)
|
||||||
|
= case compare l1 l2 of
|
||||||
|
LT -> do rec3 <- unify rec1 rec2'
|
||||||
|
return (p1:rec3)
|
||||||
|
GT -> do rec3 <- unify rec1' rec2
|
||||||
|
return (p2:rec3)
|
||||||
|
EQ -> do guard (r1 == r2)
|
||||||
|
rec3 <- unify rec1 rec2
|
||||||
|
return (p1:rec3)
|
||||||
Reference in New Issue
Block a user