1
0
forked from GitHub/gf-core

GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3

This commit is contained in:
aarne
2008-05-21 09:26:44 +00:00
parent 915a1de717
commit 055c0d0d5a
536 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,103 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:23:08 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.4 $
--
-- CFG parsing with a general chart
-----------------------------------------------------------------------------
module GF.Parsing.CFG.General
(parse, Strategy) where
import GF.System.Tracing
import GF.Infra.Print
import GF.Formalism.Utilities
import GF.Formalism.CFG
import GF.Parsing.CFG.PInfo
import GF.Data.GeneralDeduction
import GF.Data.Assoc
import Control.Monad
parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser c n t
parse strategy grammar start = extract .
tracePrt "Parsing.CFG.General - size internal of chart"
(prt . length . chartList) .
process strategy grammar start
-- | parsing strategy: (isBottomup, isTopdown)
type Strategy = (Bool, Bool)
extract :: (Ord n, Ord c, Ord t) =>
IChart n (Symbol c t) -> CFChart c n t
extract chart = [ CFRule (Edge j k cat) daughters name |
Edge j k (Cat cat, found, [], Just name) <- chartList chart,
daughters <- path j k (reverse found) ]
where path i k [] = [ [] | i==k ]
path i k (Tok tok : found)
= [ Tok tok : daughters |
daughters <- path (i+1) k found ]
path i k (Cat cat : found)
= [ Cat (Edge i j cat) : daughters |
Edge _i j _cat <- chartLookup chart (Passive (Cat cat) i),
daughters <- path j k found ]
process :: (Ord n, Ord c, Ord t) =>
Strategy -- ^ (isBottomup, isTopdown) :: (Bool, Bool)
-> CFPInfo c n t -- ^ parser information (= grammar)
-> [c] -- ^ list of starting categories
-> Input t -- ^ input string
-> IChart n (Symbol c t)
process (isBottomup, isTopdown) grammar start
= trace2 "Parsing.CFG.General - strategy" ((if isBottomup then " BU" else "") ++
(if isTopdown then " TD" else "")) $
buildChart keyof [predict, combine] . axioms
where axioms input = initial ++ scan input
scan input = map (fmap mkEdge) (inputEdges input)
mkEdge tok = (Tok tok, [], [], Nothing)
-- the combine rule
combine chart (Edge j k (next, _, [], _))
= [ edge `forwardTo` k | edge <- chartLookup chart (Active next j) ]
combine chart edge@(Edge _ j (_, _, next:_, _))
= [ edge `forwardTo` k | Edge _ k _ <- chartLookup chart (Passive next j) ]
-- initial predictions
initial = [ loopingEdge 0 rule | cat <- start, rule <- tdRuleLookup ? cat ]
-- predictions
predict chart (Edge j k (next, _, [], _)) | isBottomup
= [ loopingEdge j rule `forwardTo` k | rule <- bottomupRules grammar ? next ]
-- - - - - - - - - - ^^^^^^^^^^^^^ Kilbury prediction: move dot forward
predict chart (Edge _ k (_, _, Cat cat:_, _))
= [ loopingEdge k rule | rule <- tdRuleLookup ? cat ]
predict _ _ = []
tdRuleLookup | isTopdown = topdownRules grammar
| isBottomup = emptyLeftcornerRules grammar
-- internal representation of parse items
type Item n s = Edge (s, [s], [s], Maybe n)
type IChart n s = ParseChart (Item n s) (IKey s)
data IKey s = Active s Int
| Passive s Int
deriving (Eq, Ord, Show)
keyof (Edge _ j (_, _, next:_, _)) = Active next j
keyof (Edge j _ (cat, _, [], _)) = Passive cat j
forwardTo (Edge i j (cat, found, next:tofind, name)) k
= Edge i k (cat, next:found, tofind, name)
loopingEdge k (CFRule cat tofind name) = Edge k k (Cat cat, [], tofind, Just name)

View File

@@ -0,0 +1,150 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:23:09 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.4 $
--
-- Incremental chart parsing for CFG
-----------------------------------------------------------------------------
module GF.Parsing.CFG.Incremental
(parse, Strategy) where
import GF.System.Tracing
import GF.Infra.Print
import Data.Array
import GF.Data.Operations
import GF.Data.SortedList
import GF.Data.Assoc
import GF.Formalism.Utilities
import GF.Formalism.CFG
import GF.Parsing.CFG.PInfo
import GF.Data.IncrementalDeduction
-- | parsing strategy: (predict:(BU, TD), filter:(BU, TD))
type Strategy = ((Bool, Bool), (Bool, Bool))
parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser c n t
parse strategy grammar start = extract .
tracePrt "Parsing.CFG.Incremental - size of internal chart"
(prt . length . flip chartList const) .
process strategy grammar start
extract :: (Ord n, Ord c, Ord t) =>
IChart c n t -> CFChart c n t
extract finalChart = [ CFRule (Edge j k cat) daughters name |
(k, Item j (CFRule cat [] name) found) <- chartList finalChart (,),
daughters <- path j k (reverse found) ]
where path i k [] = [ [] | i==k ]
path i k (Tok tok : found)
= [ Tok tok : daughters |
daughters <- path (i+1) k found ]
path i k (Cat cat : found)
= [ Cat (Edge i j cat) : daughters |
Item j _ _ <- chartLookup finalChart i (Passive cat),
daughters <- path j k found ]
process :: (Ord n, Ord c, Ord t) =>
Strategy -> CFPInfo c n t -> [c] -> Input t -> IChart c n t
process ((isPredictBU, isPredictTD), (isFilterBU, isFilterTD)) grammar start input
= trace2 "Parsing.CFG.Incremental - strategy" ((if isPredictBU then "BU-predict " else "") ++
(if isPredictTD then "TD-predict " else "") ++
(if isFilterBU then "BU-filter " else "") ++
(if isFilterTD then "TD-filter " else "")) $
finalChart
where finalChart = buildChart keyof rules axioms $ inputBounds input
axioms 0 = union $ map (tdInfer 0) start
axioms k = union [ buInfer j k (Tok token) |
(token, js) <- aAssocs (inputTo input ! k), j <- js ]
rules k (Item j (CFRule cat [] _) _)
= buInfer j k (Cat cat)
rules k (Item j rule@(CFRule _ (sym@(Cat next):_) _) found)
= tdInfer k next <++>
-- hack for empty rules:
[ Item j (forward rule) (sym:found) |
emptyCategories grammar ?= next ]
rules _ _ = []
buInfer j k next = buPredict j k next <++> buCombine j k next
tdInfer k next = tdPredict k next
-- the combine rule
buCombine j k next
| j == k = [] -- hack for empty rules, see rules above and tdPredict below
| otherwise = [ Item i (forward rule) (next:found) |
Item i rule found <- (finalChart ! j) ? Active next ]
-- kilbury bottom-up prediction
buPredict j k next
= [ Item j rule [next] | isPredictBU,
rule <- map forward $ bottomupRules grammar ? next,
buFilter rule k,
tdFilter rule j k ]
-- top-down prediction
tdPredict k cat
= [ Item k rule [] | isPredictTD || isFilterTD,
rule <- topdownRules grammar ? cat,
buFilter rule k ] <++>
-- hack for empty rules:
[ Item k rule [] | isPredictBU,
rule <- emptyLeftcornerRules grammar ? cat ]
-- bottom up filtering: input symbol k can begin the given symbol list (first set)
-- leftcornerTokens DOESN'T WORK WITH EMPTY RULES!!!
buFilter (CFRule _ (Cat cat:_) _) k | isFilterBU
= k < snd (inputBounds input) &&
hasCommonElements (leftcornerTokens grammar ? cat)
(aElems (inputFrom input ! k))
buFilter _ _ = True
-- top down filtering: 'cat' is reachable by an active edge ending in node j < k
tdFilter (CFRule cat _ _) j k | isFilterTD && j < k
= (tdFilters ! j) ?= cat
tdFilter _ _ _ = True
tdFilters = listArray (inputBounds input) $
map (listSet . limit leftCats . activeCats) [0..]
activeCats j = [ next | Active (Cat next) <- aElems (finalChart ! j) ]
leftCats cat = [ left | CFRule _cat (Cat left:_) _ <- topdownRules grammar ? cat ]
----------------------------------------------------------------------
-- type declarations, items & keys
data Item c n t = Item Int (CFRule c n t) [Symbol c t]
deriving (Eq, Ord, Show)
data IKey c t = Active (Symbol c t) | Passive c
deriving (Eq, Ord, Show)
type IChart c n t = IncrementalChart (Item c n t) (IKey c t)
keyof :: Item c n t -> IKey c t
keyof (Item _ (CFRule _ (next:_) _) _) = Active next
keyof (Item _ (CFRule cat [] _) _) = Passive cat
forward :: CFRule c n t -> CFRule c n t
forward (CFRule cat (_:rest) name) = CFRule cat rest name
----------------------------------------------------------------------
instance (Print n, Print c, Print t) => Print (Item c n t) where
prt (Item k rule syms)
= "<"++show k++ ": "++ prt rule++" / "++prt syms++">"
instance (Print c, Print t) => Print (IKey c t) where
prt (Active sym) = "?" ++ prt sym
prt (Passive cat) = "!" ++ prt cat

View File

@@ -0,0 +1,98 @@
---------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/05/09 09:28:45 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.5 $
--
-- CFG parsing, parser information
-----------------------------------------------------------------------------
module GF.Parsing.CFG.PInfo
(CFParser, CFPInfo(..), buildCFPInfo) where
import GF.System.Tracing
import GF.Infra.Print
import GF.Formalism.Utilities
import GF.Formalism.CFG
import GF.Data.SortedList
import GF.Data.Assoc
----------------------------------------------------------------------
-- type declarations
-- | the list of categories = possible starting categories
type CFParser c n t = CFPInfo c n t
-> [c]
-> Input t
-> CFChart c n t
------------------------------------------------------------
-- parser information
data CFPInfo c n t
= CFPInfo { grammarTokens :: SList t,
nameRules :: Assoc n (SList (CFRule c n t)),
topdownRules :: Assoc c (SList (CFRule c n t)),
bottomupRules :: Assoc (Symbol c t) (SList (CFRule c n t)),
emptyLeftcornerRules :: Assoc c (SList (CFRule c n t)),
emptyCategories :: Set c,
cyclicCategories :: SList c,
-- ^ ONLY FOR DIRECT CYCLIC RULES!!!
leftcornerTokens :: Assoc c (SList t)
-- ^ DOES NOT WORK WITH EMPTY RULES!!!
}
buildCFPInfo :: (Ord c, Ord n, Ord t) => CFGrammar c n t -> CFPInfo c n t
-- this is not permanent...
buildCFPInfo grammar = traceCalcFirst grammar $
tracePrt "CFG.PInfo - parser info" (prt) $
pInfo' (filter (not . isCyclic) grammar)
pInfo' grammar = CFPInfo grToks nmRules tdRules buRules elcRules emptyCats cyclicCats leftToks
where grToks = union [ nubsort [ tok | Tok tok <- rhs ] |
CFRule _ rhs _ <- grammar ]
nmRules = accumAssoc id [ (name, rule) |
rule@(CFRule _ _ name) <- grammar ]
tdRules = accumAssoc id [ (cat, rule) |
rule@(CFRule cat _ _) <- grammar ]
buRules = accumAssoc id [ (next, rule) |
rule@(CFRule _ (next:_) _) <- grammar ]
elcRules = accumAssoc id $ limit lc emptyRules
leftToks = accumAssoc id $ limit lc $
nubsort [ (cat, token) |
CFRule cat (Tok token:_) _ <- grammar ]
lc (left, res) = nubsort [ (cat, res) |
CFRule cat _ _ <- buRules ? Cat left ]
emptyRules = nubsort [ (cat, rule) |
rule@(CFRule cat [] _) <- grammar ]
emptyCats = listSet $ limitEmpties $ map fst emptyRules
limitEmpties es = if es==es' then es else limitEmpties es'
where es' = nubsort [ cat | CFRule cat rhs _ <- grammar,
all (symbol (\e -> e `elem` es) (const False)) rhs ]
cyclicCats = nubsort [ cat | CFRule cat [Cat cat'] _ <- grammar, cat == cat' ]
isCyclic (CFRule cat [Cat cat'] _) = cat==cat'
isCyclic _ = False
----------------------------------------------------------------------
-- pretty-printing of statistics
instance (Ord c, Ord n, Ord t) => Print (CFPInfo c n t) where
prt pI = "[ tokens=" ++ sl grammarTokens ++
"; names=" ++ sla nameRules ++
"; tdCats=" ++ sla topdownRules ++
"; buCats=" ++ sla bottomupRules ++
"; elcCats=" ++ sla emptyLeftcornerRules ++
"; eCats=" ++ sla emptyCategories ++
-- "; cCats=" ++ sl cyclicCategories ++
-- "; lctokCats=" ++ sla leftcornerTokens ++
" ]"
where sla f = show $ length $ aElems $ f pI
sl f = show $ length $ f pI