forked from GitHub/gf-core
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
This commit is contained in:
103
src-3.0/GF/Parsing/CFG/General.hs
Normal file
103
src-3.0/GF/Parsing/CFG/General.hs
Normal file
@@ -0,0 +1,103 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/21 16:23:08 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.4 $
|
||||
--
|
||||
-- CFG parsing with a general chart
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Parsing.CFG.General
|
||||
(parse, Strategy) where
|
||||
|
||||
import GF.System.Tracing
|
||||
import GF.Infra.Print
|
||||
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Formalism.CFG
|
||||
import GF.Parsing.CFG.PInfo
|
||||
import GF.Data.GeneralDeduction
|
||||
import GF.Data.Assoc
|
||||
import Control.Monad
|
||||
|
||||
parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser c n t
|
||||
parse strategy grammar start = extract .
|
||||
tracePrt "Parsing.CFG.General - size internal of chart"
|
||||
(prt . length . chartList) .
|
||||
process strategy grammar start
|
||||
|
||||
-- | parsing strategy: (isBottomup, isTopdown)
|
||||
type Strategy = (Bool, Bool)
|
||||
|
||||
extract :: (Ord n, Ord c, Ord t) =>
|
||||
IChart n (Symbol c t) -> CFChart c n t
|
||||
extract chart = [ CFRule (Edge j k cat) daughters name |
|
||||
Edge j k (Cat cat, found, [], Just name) <- chartList chart,
|
||||
daughters <- path j k (reverse found) ]
|
||||
where path i k [] = [ [] | i==k ]
|
||||
path i k (Tok tok : found)
|
||||
= [ Tok tok : daughters |
|
||||
daughters <- path (i+1) k found ]
|
||||
path i k (Cat cat : found)
|
||||
= [ Cat (Edge i j cat) : daughters |
|
||||
Edge _i j _cat <- chartLookup chart (Passive (Cat cat) i),
|
||||
daughters <- path j k found ]
|
||||
|
||||
|
||||
process :: (Ord n, Ord c, Ord t) =>
|
||||
Strategy -- ^ (isBottomup, isTopdown) :: (Bool, Bool)
|
||||
-> CFPInfo c n t -- ^ parser information (= grammar)
|
||||
-> [c] -- ^ list of starting categories
|
||||
-> Input t -- ^ input string
|
||||
-> IChart n (Symbol c t)
|
||||
process (isBottomup, isTopdown) grammar start
|
||||
= trace2 "Parsing.CFG.General - strategy" ((if isBottomup then " BU" else "") ++
|
||||
(if isTopdown then " TD" else "")) $
|
||||
buildChart keyof [predict, combine] . axioms
|
||||
where axioms input = initial ++ scan input
|
||||
|
||||
scan input = map (fmap mkEdge) (inputEdges input)
|
||||
mkEdge tok = (Tok tok, [], [], Nothing)
|
||||
|
||||
-- the combine rule
|
||||
combine chart (Edge j k (next, _, [], _))
|
||||
= [ edge `forwardTo` k | edge <- chartLookup chart (Active next j) ]
|
||||
combine chart edge@(Edge _ j (_, _, next:_, _))
|
||||
= [ edge `forwardTo` k | Edge _ k _ <- chartLookup chart (Passive next j) ]
|
||||
|
||||
-- initial predictions
|
||||
initial = [ loopingEdge 0 rule | cat <- start, rule <- tdRuleLookup ? cat ]
|
||||
|
||||
-- predictions
|
||||
predict chart (Edge j k (next, _, [], _)) | isBottomup
|
||||
= [ loopingEdge j rule `forwardTo` k | rule <- bottomupRules grammar ? next ]
|
||||
-- - - - - - - - - - ^^^^^^^^^^^^^ Kilbury prediction: move dot forward
|
||||
predict chart (Edge _ k (_, _, Cat cat:_, _))
|
||||
= [ loopingEdge k rule | rule <- tdRuleLookup ? cat ]
|
||||
predict _ _ = []
|
||||
|
||||
tdRuleLookup | isTopdown = topdownRules grammar
|
||||
| isBottomup = emptyLeftcornerRules grammar
|
||||
|
||||
-- internal representation of parse items
|
||||
|
||||
type Item n s = Edge (s, [s], [s], Maybe n)
|
||||
type IChart n s = ParseChart (Item n s) (IKey s)
|
||||
data IKey s = Active s Int
|
||||
| Passive s Int
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
keyof (Edge _ j (_, _, next:_, _)) = Active next j
|
||||
keyof (Edge j _ (cat, _, [], _)) = Passive cat j
|
||||
|
||||
forwardTo (Edge i j (cat, found, next:tofind, name)) k
|
||||
= Edge i k (cat, next:found, tofind, name)
|
||||
|
||||
loopingEdge k (CFRule cat tofind name) = Edge k k (Cat cat, [], tofind, Just name)
|
||||
|
||||
|
||||
|
||||
150
src-3.0/GF/Parsing/CFG/Incremental.hs
Normal file
150
src-3.0/GF/Parsing/CFG/Incremental.hs
Normal file
@@ -0,0 +1,150 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/21 16:23:09 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.4 $
|
||||
--
|
||||
-- Incremental chart parsing for CFG
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
|
||||
module GF.Parsing.CFG.Incremental
|
||||
(parse, Strategy) where
|
||||
|
||||
import GF.System.Tracing
|
||||
import GF.Infra.Print
|
||||
|
||||
import Data.Array
|
||||
|
||||
import GF.Data.Operations
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Assoc
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Formalism.CFG
|
||||
import GF.Parsing.CFG.PInfo
|
||||
import GF.Data.IncrementalDeduction
|
||||
|
||||
|
||||
-- | parsing strategy: (predict:(BU, TD), filter:(BU, TD))
|
||||
type Strategy = ((Bool, Bool), (Bool, Bool))
|
||||
|
||||
parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser c n t
|
||||
parse strategy grammar start = extract .
|
||||
tracePrt "Parsing.CFG.Incremental - size of internal chart"
|
||||
(prt . length . flip chartList const) .
|
||||
process strategy grammar start
|
||||
|
||||
extract :: (Ord n, Ord c, Ord t) =>
|
||||
IChart c n t -> CFChart c n t
|
||||
extract finalChart = [ CFRule (Edge j k cat) daughters name |
|
||||
(k, Item j (CFRule cat [] name) found) <- chartList finalChart (,),
|
||||
daughters <- path j k (reverse found) ]
|
||||
where path i k [] = [ [] | i==k ]
|
||||
path i k (Tok tok : found)
|
||||
= [ Tok tok : daughters |
|
||||
daughters <- path (i+1) k found ]
|
||||
path i k (Cat cat : found)
|
||||
= [ Cat (Edge i j cat) : daughters |
|
||||
Item j _ _ <- chartLookup finalChart i (Passive cat),
|
||||
daughters <- path j k found ]
|
||||
|
||||
process :: (Ord n, Ord c, Ord t) =>
|
||||
Strategy -> CFPInfo c n t -> [c] -> Input t -> IChart c n t
|
||||
process ((isPredictBU, isPredictTD), (isFilterBU, isFilterTD)) grammar start input
|
||||
= trace2 "Parsing.CFG.Incremental - strategy" ((if isPredictBU then "BU-predict " else "") ++
|
||||
(if isPredictTD then "TD-predict " else "") ++
|
||||
(if isFilterBU then "BU-filter " else "") ++
|
||||
(if isFilterTD then "TD-filter " else "")) $
|
||||
finalChart
|
||||
where finalChart = buildChart keyof rules axioms $ inputBounds input
|
||||
|
||||
axioms 0 = union $ map (tdInfer 0) start
|
||||
axioms k = union [ buInfer j k (Tok token) |
|
||||
(token, js) <- aAssocs (inputTo input ! k), j <- js ]
|
||||
|
||||
rules k (Item j (CFRule cat [] _) _)
|
||||
= buInfer j k (Cat cat)
|
||||
rules k (Item j rule@(CFRule _ (sym@(Cat next):_) _) found)
|
||||
= tdInfer k next <++>
|
||||
-- hack for empty rules:
|
||||
[ Item j (forward rule) (sym:found) |
|
||||
emptyCategories grammar ?= next ]
|
||||
rules _ _ = []
|
||||
|
||||
buInfer j k next = buPredict j k next <++> buCombine j k next
|
||||
tdInfer k next = tdPredict k next
|
||||
|
||||
-- the combine rule
|
||||
buCombine j k next
|
||||
| j == k = [] -- hack for empty rules, see rules above and tdPredict below
|
||||
| otherwise = [ Item i (forward rule) (next:found) |
|
||||
Item i rule found <- (finalChart ! j) ? Active next ]
|
||||
|
||||
-- kilbury bottom-up prediction
|
||||
buPredict j k next
|
||||
= [ Item j rule [next] | isPredictBU,
|
||||
rule <- map forward $ bottomupRules grammar ? next,
|
||||
buFilter rule k,
|
||||
tdFilter rule j k ]
|
||||
|
||||
-- top-down prediction
|
||||
tdPredict k cat
|
||||
= [ Item k rule [] | isPredictTD || isFilterTD,
|
||||
rule <- topdownRules grammar ? cat,
|
||||
buFilter rule k ] <++>
|
||||
-- hack for empty rules:
|
||||
[ Item k rule [] | isPredictBU,
|
||||
rule <- emptyLeftcornerRules grammar ? cat ]
|
||||
|
||||
-- bottom up filtering: input symbol k can begin the given symbol list (first set)
|
||||
-- leftcornerTokens DOESN'T WORK WITH EMPTY RULES!!!
|
||||
buFilter (CFRule _ (Cat cat:_) _) k | isFilterBU
|
||||
= k < snd (inputBounds input) &&
|
||||
hasCommonElements (leftcornerTokens grammar ? cat)
|
||||
(aElems (inputFrom input ! k))
|
||||
buFilter _ _ = True
|
||||
|
||||
-- top down filtering: 'cat' is reachable by an active edge ending in node j < k
|
||||
tdFilter (CFRule cat _ _) j k | isFilterTD && j < k
|
||||
= (tdFilters ! j) ?= cat
|
||||
tdFilter _ _ _ = True
|
||||
|
||||
tdFilters = listArray (inputBounds input) $
|
||||
map (listSet . limit leftCats . activeCats) [0..]
|
||||
activeCats j = [ next | Active (Cat next) <- aElems (finalChart ! j) ]
|
||||
leftCats cat = [ left | CFRule _cat (Cat left:_) _ <- topdownRules grammar ? cat ]
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- type declarations, items & keys
|
||||
|
||||
data Item c n t = Item Int (CFRule c n t) [Symbol c t]
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
data IKey c t = Active (Symbol c t) | Passive c
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
type IChart c n t = IncrementalChart (Item c n t) (IKey c t)
|
||||
|
||||
keyof :: Item c n t -> IKey c t
|
||||
keyof (Item _ (CFRule _ (next:_) _) _) = Active next
|
||||
keyof (Item _ (CFRule cat [] _) _) = Passive cat
|
||||
|
||||
forward :: CFRule c n t -> CFRule c n t
|
||||
forward (CFRule cat (_:rest) name) = CFRule cat rest name
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
instance (Print n, Print c, Print t) => Print (Item c n t) where
|
||||
prt (Item k rule syms)
|
||||
= "<"++show k++ ": "++ prt rule++" / "++prt syms++">"
|
||||
|
||||
instance (Print c, Print t) => Print (IKey c t) where
|
||||
prt (Active sym) = "?" ++ prt sym
|
||||
prt (Passive cat) = "!" ++ prt cat
|
||||
|
||||
|
||||
98
src-3.0/GF/Parsing/CFG/PInfo.hs
Normal file
98
src-3.0/GF/Parsing/CFG/PInfo.hs
Normal file
@@ -0,0 +1,98 @@
|
||||
---------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/05/09 09:28:45 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.5 $
|
||||
--
|
||||
-- CFG parsing, parser information
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Parsing.CFG.PInfo
|
||||
(CFParser, CFPInfo(..), buildCFPInfo) where
|
||||
|
||||
import GF.System.Tracing
|
||||
import GF.Infra.Print
|
||||
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Formalism.CFG
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Assoc
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- type declarations
|
||||
|
||||
-- | the list of categories = possible starting categories
|
||||
type CFParser c n t = CFPInfo c n t
|
||||
-> [c]
|
||||
-> Input t
|
||||
-> CFChart c n t
|
||||
|
||||
------------------------------------------------------------
|
||||
-- parser information
|
||||
|
||||
data CFPInfo c n t
|
||||
= CFPInfo { grammarTokens :: SList t,
|
||||
nameRules :: Assoc n (SList (CFRule c n t)),
|
||||
topdownRules :: Assoc c (SList (CFRule c n t)),
|
||||
bottomupRules :: Assoc (Symbol c t) (SList (CFRule c n t)),
|
||||
emptyLeftcornerRules :: Assoc c (SList (CFRule c n t)),
|
||||
emptyCategories :: Set c,
|
||||
cyclicCategories :: SList c,
|
||||
-- ^ ONLY FOR DIRECT CYCLIC RULES!!!
|
||||
leftcornerTokens :: Assoc c (SList t)
|
||||
-- ^ DOES NOT WORK WITH EMPTY RULES!!!
|
||||
}
|
||||
|
||||
buildCFPInfo :: (Ord c, Ord n, Ord t) => CFGrammar c n t -> CFPInfo c n t
|
||||
|
||||
-- this is not permanent...
|
||||
buildCFPInfo grammar = traceCalcFirst grammar $
|
||||
tracePrt "CFG.PInfo - parser info" (prt) $
|
||||
pInfo' (filter (not . isCyclic) grammar)
|
||||
|
||||
pInfo' grammar = CFPInfo grToks nmRules tdRules buRules elcRules emptyCats cyclicCats leftToks
|
||||
where grToks = union [ nubsort [ tok | Tok tok <- rhs ] |
|
||||
CFRule _ rhs _ <- grammar ]
|
||||
nmRules = accumAssoc id [ (name, rule) |
|
||||
rule@(CFRule _ _ name) <- grammar ]
|
||||
tdRules = accumAssoc id [ (cat, rule) |
|
||||
rule@(CFRule cat _ _) <- grammar ]
|
||||
buRules = accumAssoc id [ (next, rule) |
|
||||
rule@(CFRule _ (next:_) _) <- grammar ]
|
||||
elcRules = accumAssoc id $ limit lc emptyRules
|
||||
leftToks = accumAssoc id $ limit lc $
|
||||
nubsort [ (cat, token) |
|
||||
CFRule cat (Tok token:_) _ <- grammar ]
|
||||
lc (left, res) = nubsort [ (cat, res) |
|
||||
CFRule cat _ _ <- buRules ? Cat left ]
|
||||
emptyRules = nubsort [ (cat, rule) |
|
||||
rule@(CFRule cat [] _) <- grammar ]
|
||||
emptyCats = listSet $ limitEmpties $ map fst emptyRules
|
||||
limitEmpties es = if es==es' then es else limitEmpties es'
|
||||
where es' = nubsort [ cat | CFRule cat rhs _ <- grammar,
|
||||
all (symbol (\e -> e `elem` es) (const False)) rhs ]
|
||||
cyclicCats = nubsort [ cat | CFRule cat [Cat cat'] _ <- grammar, cat == cat' ]
|
||||
|
||||
isCyclic (CFRule cat [Cat cat'] _) = cat==cat'
|
||||
isCyclic _ = False
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- pretty-printing of statistics
|
||||
|
||||
instance (Ord c, Ord n, Ord t) => Print (CFPInfo c n t) where
|
||||
prt pI = "[ tokens=" ++ sl grammarTokens ++
|
||||
"; names=" ++ sla nameRules ++
|
||||
"; tdCats=" ++ sla topdownRules ++
|
||||
"; buCats=" ++ sla bottomupRules ++
|
||||
"; elcCats=" ++ sla emptyLeftcornerRules ++
|
||||
"; eCats=" ++ sla emptyCategories ++
|
||||
-- "; cCats=" ++ sl cyclicCategories ++
|
||||
-- "; lctokCats=" ++ sla leftcornerTokens ++
|
||||
" ]"
|
||||
where sla f = show $ length $ aElems $ f pI
|
||||
sl f = show $ length $ f pI
|
||||
Reference in New Issue
Block a user