"Committed_by_peb"

This commit is contained in:
peb
2005-04-11 12:57:45 +00:00
parent f6273f7033
commit ac00f77dad
81 changed files with 7080 additions and 181 deletions

44
src/GF/Parsing/CFG.hs Normal file
View File

@@ -0,0 +1,44 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/11 13:52:51 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.1 $
--
-- CFG parsing
-----------------------------------------------------------------------------
module GF.NewParsing.CFG
(parseCF, module GF.NewParsing.CFG.PInfo) where
import GF.Formalism.Utilities
import GF.Formalism.CFG
import GF.NewParsing.CFG.PInfo
import qualified GF.NewParsing.CFG.Incremental as Inc
import qualified GF.NewParsing.CFG.General as Gen
----------------------------------------------------------------------
-- parsing
--parseCF :: (Ord n, Ord c, Ord t) => String -> CFParser c n t
parseCF "gb" = Gen.parse bottomup
parseCF "gt" = Gen.parse topdown
parseCF "ib" = Inc.parse (bottomup, noFilter)
parseCF "it" = Inc.parse (topdown, noFilter)
parseCF "ibFT" = Inc.parse (bottomup, topdown)
parseCF "ibFB" = Inc.parse (bottomup, bottomup)
parseCF "ibFTB" = Inc.parse (bottomup, bothFilters)
parseCF "itF" = Inc.parse (topdown, bottomup)
-- default parser:
parseCF _ = parseCF "gb"
bottomup = (True, False)
topdown = (False, True)
noFilter = (False, False)
bothFilters = (True, True)

View File

@@ -0,0 +1,101 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/11 13:52:51 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.1 $
--
-- CFG parsing with a general chart
-----------------------------------------------------------------------------
module GF.NewParsing.CFG.General
(parse, Strategy) where
import GF.System.Tracing
import GF.Infra.Print
import GF.Formalism.Utilities
import GF.Formalism.CFG
import GF.NewParsing.CFG.PInfo
import GF.NewParsing.GeneralChart
import GF.Data.Assoc
import Monad
--parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser c n t
parse strategy grammar start = extract .
tracePrt "#internal chart" (prt . length . chartList) .
process strategy grammar start
type Strategy = (Bool, Bool) -- ^ (isBottomup, isTopdown)
extract :: (Ord n, Ord c, Ord t) =>
IChart n (Symbol c t) -> CFChart c n t
extract chart = [ CFRule (Edge j k cat) daughters name |
Edge j k (Cat cat, found, [], Just name) <- chartList chart,
daughters <- path j k (reverse found) ]
where path i k [] = [ [] | i==k ]
path i k (Tok tok : found)
= [ Tok tok : daughters |
daughters <- path (i+1) k found ]
path i k (Cat cat : found)
= [ Cat (Edge i j cat) : daughters |
Edge _i j _cat <- chartLookup chart (Passive (Cat cat) i),
daughters <- path j k found ]
process :: (Ord n, Ord c, Ord t) =>
Strategy -- ^ (isBottomup, isTopdown) :: (Bool, Bool)
-> CFPInfo c n t -- ^ parser information (= grammar)
-> [c] -- ^ list of starting categories
-> Input t -- ^ input string
-> IChart n (Symbol c t)
process (isBottomup, isTopdown) grammar start
= trace2 "CFParserGeneral" ((if isBottomup then " BU" else "") ++
(if isTopdown then " TD" else "")) $
buildChart keyof [predict, combine] . axioms
where axioms input = initial ++ scan input
scan input = map (fmap mkEdge) (inputEdges input)
mkEdge tok = (Tok tok, [], [], Nothing)
-- the combine rule
combine chart (Edge j k (next, _, [], _))
= [ edge `forwardTo` k | edge <- chartLookup chart (Active next j) ]
combine chart edge@(Edge _ j (_, _, next:_, _))
= [ edge `forwardTo` k | Edge _ k _ <- chartLookup chart (Passive next j) ]
-- initial predictions
initial = [ loopingEdge 0 rule | cat <- start, rule <- tdRuleLookup ? cat ]
-- predictions
predict chart (Edge j k (next, _, [], _)) | isBottomup
= [ loopingEdge j rule `forwardTo` k | rule <- bottomupRules grammar ? next ]
-- - - - - - - - - - ^^^^^^^^^^^^^ Kilbury prediction: move dot forward
predict chart (Edge _ k (_, _, Cat cat:_, _))
= [ loopingEdge k rule | rule <- tdRuleLookup ? cat ]
predict _ _ = []
tdRuleLookup | isTopdown = topdownRules grammar
| isBottomup = emptyLeftcornerRules grammar
-- internal representation of parse items
type Item n s = Edge (s, [s], [s], Maybe n)
type IChart n s = ParseChart (Item n s) (IKey s)
data IKey s = Active s Int
| Passive s Int
deriving (Eq, Ord, Show)
keyof (Edge _ j (_, _, next:_, _)) = Active next j
keyof (Edge j _ (cat, _, [], _)) = Passive cat j
forwardTo (Edge i j (cat, found, next:tofind, name)) k
= Edge i k (cat, next:found, tofind, name)
loopingEdge k (CFRule cat tofind name) = Edge k k (Cat cat, [], tofind, Just name)

View File

@@ -0,0 +1,148 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/11 13:52:51 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.1 $
--
-- Incremental chart parsing for CFG
-----------------------------------------------------------------------------
module GF.NewParsing.CFG.Incremental
(parse, Strategy) where
import GF.System.Tracing
import GF.Infra.Print
import Array
import Operations
import GF.Data.SortedList
import GF.Data.Assoc
import GF.Formalism.Utilities
import GF.Formalism.CFG
import GF.NewParsing.CFG.PInfo
import GF.NewParsing.IncrementalChart
type Strategy = ((Bool, Bool), (Bool, Bool)) -- ^ (predict:(BU, TD), filter:(BU, TD))
parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser c n t
parse strategy grammar start = extract .
tracePrt "#internal chart" (prt . length . flip chartList const) .
process strategy grammar start
extract :: (Ord n, Ord c, Ord t) =>
IChart c n t -> CFChart c n t
extract finalChart = [ CFRule (Edge j k cat) daughters name |
(k, Item j (CFRule cat [] name) found) <- chartList finalChart (,),
daughters <- path j k (reverse found) ]
where path i k [] = [ [] | i==k ]
path i k (Tok tok : found)
= [ Tok tok : daughters |
daughters <- path (i+1) k found ]
path i k (Cat cat : found)
= [ Cat (Edge i j cat) : daughters |
Item j _ _ <- chartLookup finalChart i (Passive cat),
daughters <- path j k found ]
process :: (Ord n, Ord c, Ord t) =>
Strategy -> CFPInfo c n t -> [c] -> Input t -> IChart c n t
process ((isPredictBU, isPredictTD), (isFilterBU, isFilterTD)) grammar start input
= trace2 "CFParserIncremental" ((if isPredictBU then "BU-predict " else "") ++
(if isPredictTD then "TD-predict " else "") ++
(if isFilterBU then "BU-filter " else "") ++
(if isFilterTD then "TD-filter " else "")) $
finalChart
where finalChart = buildChart keyof rules axioms $ inputBounds input
axioms 0 = union $ map (tdInfer 0) start
axioms k = union [ buInfer j k (Tok token) |
(token, js) <- aAssocs (inputTo input ! k), j <- js ]
rules k (Item j (CFRule cat [] _) _)
= buInfer j k (Cat cat)
rules k (Item j rule@(CFRule _ (sym@(Cat next):_) _) found)
= tdInfer k next <++>
-- hack for empty rules:
[ Item j (forward rule) (sym:found) |
emptyCategories grammar ?= next ]
rules _ _ = []
buInfer j k next = buPredict j k next <++> buCombine j k next
tdInfer k next = tdPredict k next
-- the combine rule
buCombine j k next
| j == k = [] -- hack for empty rules, see rules above and tdPredict below
| otherwise = [ Item i (forward rule) (next:found) |
Item i rule found <- (finalChart ! j) ? Active next ]
-- kilbury bottom-up prediction
buPredict j k next
= [ Item j rule [next] | isPredictBU,
rule <- map forward $ bottomupRules grammar ? next,
buFilter rule k,
tdFilter rule j k ]
-- top-down prediction
tdPredict k cat
= [ Item k rule [] | isPredictTD || isFilterTD,
rule <- topdownRules grammar ? cat,
buFilter rule k ] <++>
-- hack for empty rules:
[ Item k rule [] | isPredictBU,
rule <- emptyLeftcornerRules grammar ? cat ]
-- bottom up filtering: input symbol k can begin the given symbol list (first set)
-- leftcornerTokens DOESN'T WORK WITH EMPTY RULES!!!
buFilter (CFRule _ (Cat cat:_) _) k | isFilterBU
= k < snd (inputBounds input) &&
hasCommonElements (leftcornerTokens grammar ? cat)
(aElems (inputFrom input ! k))
buFilter _ _ = True
-- top down filtering: 'cat' is reachable by an active edge ending in node j < k
tdFilter (CFRule cat _ _) j k | isFilterTD && j < k
= (tdFilters ! j) ?= cat
tdFilter _ _ _ = True
tdFilters = listArray (inputBounds input) $
map (listSet . limit leftCats . activeCats) [0..]
activeCats j = [ next | Active (Cat next) <- aElems (finalChart ! j) ]
leftCats cat = [ left | CFRule _cat (Cat left:_) _ <- topdownRules grammar ? cat ]
----------------------------------------------------------------------
-- type declarations, items & keys
data Item c n t = Item Int (CFRule c n t) [Symbol c t]
deriving (Eq, Ord, Show)
data IKey c t = Active (Symbol c t) | Passive c
deriving (Eq, Ord, Show)
type IChart c n t = IncrementalChart (Item c n t) (IKey c t)
keyof :: Item c n t -> IKey c t
keyof (Item _ (CFRule _ (next:_) _) _) = Active next
keyof (Item _ (CFRule cat [] _) _) = Passive cat
forward :: CFRule c n t -> CFRule c n t
forward (CFRule cat (_:rest) name) = CFRule cat rest name
----------------------------------------------------------------------
instance (Print n, Print c, Print t) => Print (Item c n t) where
prt (Item k rule syms)
= "<"++show k++ ": "++ prt rule++" / "++prt syms++">"
instance (Print c, Print t) => Print (IKey c t) where
prt (Active sym) = "?" ++ prt sym
prt (Passive cat) = "!" ++ prt cat

View File

@@ -0,0 +1,95 @@
---------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/11 13:52:52 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.1 $
--
-- CFG parsing, parser information
-----------------------------------------------------------------------------
module GF.NewParsing.CFG.PInfo where
import GF.System.Tracing
import GF.Infra.Print
import GF.Formalism.Utilities
import GF.Formalism.CFG
import GF.Data.SortedList
import GF.Data.Assoc
----------------------------------------------------------------------
-- type declarations
type CFParser c n t = CFPInfo c n t
-> [c] -- ^ possible starting categories
-> Input t -- ^ the input tokens
-> CFChart c n t
------------------------------------------------------------
-- parser information
data CFPInfo c n t
= CFPInfo { grammarTokens :: SList t,
nameRules :: Assoc n (SList (CFRule c n t)),
topdownRules :: Assoc c (SList (CFRule c n t)),
bottomupRules :: Assoc (Symbol c t) (SList (CFRule c n t)),
emptyLeftcornerRules :: Assoc c (SList (CFRule c n t)),
emptyCategories :: Set c,
cyclicCategories :: SList c,
-- ^ ONLY FOR DIRECT CYCLIC RULES!!!
leftcornerTokens :: Assoc c (SList t)
-- ^ DOES NOT WORK WITH EMPTY RULES!!!
}
--buildCFPInfo :: (Ord n, Ord c, Ord t) => CFGrammar c n t -> CFPInfo c n t
-- this is not permanent...
buildCFPInfo grammar = traceCalcFirst grammar $
tracePrt "cf parser info" (prt) $
pInfo' (filter (not . isCyclic) grammar)
pInfo' grammar = CFPInfo grToks nmRules tdRules buRules elcRules emptyCats cyclicCats leftToks
where grToks = union [ nubsort [ tok | Tok tok <- rhs ] |
CFRule _ rhs _ <- grammar ]
nmRules = accumAssoc id [ (name, rule) |
rule@(CFRule _ _ name) <- grammar ]
tdRules = accumAssoc id [ (cat, rule) |
rule@(CFRule cat _ _) <- grammar ]
buRules = accumAssoc id [ (next, rule) |
rule@(CFRule _ (next:_) _) <- grammar ]
elcRules = accumAssoc id $ limit lc emptyRules
leftToks = accumAssoc id $ limit lc $
nubsort [ (cat, token) |
CFRule cat (Tok token:_) _ <- grammar ]
lc (left, res) = nubsort [ (cat, res) |
CFRule cat _ _ <- buRules ? Cat left ]
emptyRules = nubsort [ (cat, rule) |
rule@(CFRule cat [] _) <- grammar ]
emptyCats = listSet $ limitEmpties $ map fst emptyRules
limitEmpties es = if es==es' then es else limitEmpties es'
where es' = nubsort [ cat | CFRule cat rhs _ <- grammar,
all (symbol (\e -> e `elem` es) (const False)) rhs ]
cyclicCats = nubsort [ cat | CFRule cat [Cat cat'] _ <- grammar, cat == cat' ]
isCyclic (CFRule cat [Cat cat'] _) = cat==cat'
isCyclic _ = False
----------------------------------------------------------------------
instance (Ord n, Ord c, Ord t) => Print (CFPInfo n c t) where
prt pI = "[ tokens=" ++ sl grammarTokens ++
"; names=" ++ sla nameRules ++
"; tdCats=" ++ sla topdownRules ++
"; buCats=" ++ sla bottomupRules ++
"; elcCats=" ++ sla emptyLeftcornerRules ++
"; eCats=" ++ sla emptyCategories ++
"; cCats=" ++ sl cyclicCategories ++
"; lctokCats=" ++ sla leftcornerTokens ++
" ]"
where sla f = show $ length $ aElems $ f pI
sl f = show $ length $ f pI

187
src/GF/Parsing/GFC.hs Normal file
View File

@@ -0,0 +1,187 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/11 13:52:51 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.1 $
--
-- The main parsing module, parsing GFC grammars
-- by translating to simpler formats, such as PMCFG and CFG
----------------------------------------------------------------------
module GF.NewParsing.GFC
(parse, PInfo(..), buildPInfo) where
import GF.System.Tracing
import GF.Infra.Print
import qualified PrGrammar
import Monad
import qualified Grammar
-- import Values
import qualified Macros
-- import qualified Modules
import qualified AbsGFC
import qualified Ident
import Operations
import CFIdent (CFCat, cfCat2Ident, CFTok, prCFTok)
import GF.Data.SortedList
import GF.Data.Assoc
import GF.Formalism.Utilities
import GF.Conversion.Types
import GF.Formalism.SimpleGFC
import qualified GF.Formalism.MCFG as M
import qualified GF.Formalism.CFG as C
-- import qualified GF.NewParsing.MCFG as PM
import qualified GF.NewParsing.CFG as PC
--import qualified GF.Conversion.FromGFC as From
----------------------------------------------------------------------
-- parsing information
data PInfo = PInfo { mcfPInfo :: (), -- ^ not implemented yet
cfPInfo :: PC.CFPInfo CCat CName Token }
buildPInfo :: MGrammar -> CGrammar -> PInfo
buildPInfo mcfg cfg = PInfo { mcfPInfo = (),
cfPInfo = PC.buildCFPInfo cfg }
----------------------------------------------------------------------
-- main parsing function
parse :: String -- ^ parsing strategy
-> PInfo -- ^ compiled grammars (mcfg and cfg)
-> Ident.Ident -- ^ abstract module name
-> CFCat -- ^ starting category
-> [CFTok] -- ^ input tokens
-> [Grammar.Term] -- ^ resulting GF terms
-- parsing via CFG
parse (c:strategy) pinfo abs startCat
| c=='c' || c=='C' = map (tree2term abs) .
parseCFG strategy pinfo startCats .
map prCFTok
where startCats = tracePrt "startCats" prt $
filter isStartCat $ map fst $ aAssocs $ PC.topdownRules $ cfPInfo pinfo
isStartCat (CCat (MCat cat _) _) = cat == cfCat2Ident startCat
-- default parser
parse strategy pinfo abs start = parse ('c':strategy) pinfo abs start
----------------------------------------------------------------------
parseCFG :: String -> PInfo -> [CCat] -> [Token] -> [SyntaxTree Name]
parseCFG strategy pInfo startCats inString = trace2 "Parser" "CFG" $
trees
where trees = tracePrt "#trees" (prt . length) $
nubsort $ forests >>= forest2trees
-- compactFs >>= forest2trees
-- compactFs = tracePrt "#compactForests" (prt . length) $
-- tracePrt "compactForests" (prtBefore "\n") $
-- compactForests forests
forests = tracePrt "#forests" (prt . length) $
cfForests >>= convertFromCFForest
cfForests= tracePrt "#cfForests" (prt . length) $
chart2forests chart (const False) finalEdges
finalEdges = tracePrt "finalChartEdges" prt $
map (uncurry Edge (inputBounds inTokens)) startCats
chart = --tracePrt "finalChartEdges" (prt . (? finalEdge)) $
tracePrt "#chart" (prt . map (length.snd) . aAssocs) $
C.grammar2chart cfChart
cfChart = --tracePrt "finalEdges"
--(prt . filter (\(Edge i j _) -> (i,j)==inputBounds inTokens)) $
tracePrt "#cfChart" (prt . length) $
PC.parseCF strategy (cfPInfo pInfo) startCats inTokens
inTokens = input inString
{-
-- parsing via MCFG
newParser (m:strategy) gr (_, startCat) inString
| m=='m' || m=='M' = trace2 "Parser" "MCFG" $ Ok terms
where terms = map (tree2term abstract) trees
trees = --tracePrt "trees" (prtBefore "\n") $
tracePrt "#trees" (prt . length) $
concatMap forest2trees forests
forests = --tracePrt "forests" (prtBefore "\n") $
tracePrt "#forests" (prt . length) $
concatMap (chart2forests chart isMeta) finalEdges
isMeta = null . snd
finalEdges = tracePrt "finalEdges" (prtBefore "\n") $
filter isFinalEdge $ aElems chart
-- nubsort [ (cat, [(lbl, E.makeRange [(i,j)])]) |
-- let (i, j) = inputBounds inTokens,
-- E.Rule cat _ [E.Lin lbl _] _ <- pInf,
-- isStartCat cat ]
isFinalEdge (cat, rows)
= isStartCat cat &&
inputBounds inTokens `elem` concat [ rho | (_, M.Rng rho) <- rows ]
chart = --tracePrt "chart" (prtBefore "\n" . aAssocs) $
tracePrt "#chart" (prt . map (length.snd) . aAssocs) $
PM.parse strategy pInf starters inTokens
inTokens = input $ map AbsGFC.KS $ words inString
pInf = -- tracePrt "avg rec" (\gr -> show (sum [ length rec | E.Rule _ _ rec _ <- gr ] % length gr)) $
mcfPInfo $ SS.statePInfo gr
starters = tracePrt "startCats" prt $
filter isStartCat $ nubsort [ cat | M.Rule cat _ _ _ <- pInf ]
isStartCat (MCFCat cat _) = cat == startCat
abstract = tracePrt "abstract module" PrGrammar.prt $
SS.absId gr
-}
----------------------------------------------------------------------
-- parse trees to GF terms
tree2term :: Ident.Ident -> SyntaxTree Name -> Grammar.Term
tree2term abs (TNode f ts) = Macros.mkApp (Macros.qq (abs,f)) (map (tree2term abs) ts)
tree2term abs (TMeta) = Macros.mkMeta 0
----------------------------------------------------------------------
-- conversion and unification of forests
convertFromCFForest :: SyntaxForest CName -> [SyntaxForest Name]
-- simplest implementation
convertFromCFForest (FNode (CName name profile) children)
| isCoercion name = concat chForests
| otherwise = [ FNode name chForests | not (null chForests) ]
where chForests = concat [ mapM (checkProfile forests) profile |
forests0 <- children,
forests <- mapM convertFromCFForest forests0 ]
{-
-- more intelligent(?) implementation
convertFromCFForest (FNode (CName name profile) children)
| isCoercion name = concat chForests
| otherwise = [ FNode name chForests | not (null chForests) ]
where chForests = concat [ mapM (checkProfile forests) profile |
forests0 <- children,
forests <- mapM convertFromCFForest forests0 ]
-}
checkProfile forests = unifyManyForests . map (forests !!)
----------------------------------------------------------------------
-- conversion and unification for parse trees instead of forests
convertFromCFTree :: SyntaxTree CName -> [SyntaxTree Name]
convertFromCFTree (TNode (CName name profile) children0)
= [ TNode name children |
children1 <- mapM convertFromCFTree children0,
children <- mapM (checkProfile children1) profile ]
where checkProfile trees = unifyManyTrees . map (trees !!)