move the parsing related stuff to GF.GFCC.Parsing

This commit is contained in:
krasimir
2008-05-29 12:38:09 +00:00
parent 7fc0aec243
commit 9d52772d86
8 changed files with 17 additions and 18 deletions

View File

@@ -26,7 +26,7 @@ import GF.Command.PPrTree
import GF.Data.ErrM
import GF.Parsing.FCFG
import GF.GFCC.Parsing.FCFG
import qualified Data.Map as Map
import System.Random (newStdGen)

View File

@@ -10,7 +10,7 @@
module GF.GFCC.BuildParser where
import GF.Infra.PrintClass
import GF.Formalism.Utilities
import GF.GFCC.Parsing.FCFG.Utilities
import GF.Data.SortedList
import GF.Data.Assoc
import GF.GFCC.CId

View File

@@ -0,0 +1,82 @@
----------------------------------------------------------------------
-- |
-- Maintainer : Krasimir Angelov
-- Stability : (stable)
-- Portability : (portable)
--
-- FCFG parsing
-----------------------------------------------------------------------------
module GF.GFCC.Parsing.FCFG
(parseFCF,buildParserInfo,ParserInfo(..),makeFinalEdge) where
import GF.Data.SortedList
import GF.Data.Assoc
import GF.Infra.PrintClass
import GF.GFCC.Parsing.FCFG.Utilities
import GF.GFCC.Parsing.FCFG.Active
import GF.GFCC.CId
import GF.GFCC.DataGFCC
import GF.GFCC.BuildParser
import GF.GFCC.Macros
import GF.Data.ErrM
import qualified Data.Map as Map
----------------------------------------------------------------------
-- parsing
-- main parsing function
parseFCF ::
String -> -- ^ parsing strategy
ParserInfo -> -- ^ compiled grammar (fcfg)
CId -> -- ^ starting category
[String] -> -- ^ input tokens
Err [Exp] -- ^ resulting GF terms
parseFCF strategy pinfo startCat inString =
do let inTokens = input inString
startCats <- Map.lookup startCat (startupCats pinfo)
fcfParser <- {- trace lctree $ -} parseFCF strategy
let chart = fcfParser pinfo startCats inTokens
(i,j) = inputBounds inTokens
finalEdges = [makeFinalEdge cat i j | cat <- startCats]
forests = chart2forests chart (const False) finalEdges
filteredForests = forests >>= applyProfileToForest
trees = nubsort $ filteredForests >>= forest2trees
return $ map tree2term trees
where
parseFCF :: String -> Err (FCFParser)
parseFCF "bottomup" = Ok $ parse "b"
parseFCF "topdown" = Ok $ parse "t"
parseFCF strat = Bad $ "FCFG parsing strategy not defined: " ++ strat
----------------------------------------------------------------------
-- parse trees to GFCC terms
tree2term :: SyntaxTree CId -> Exp
tree2term (TNode f ts) = tree (AC f) (map tree2term ts)
tree2term (TString s) = tree (AS s) []
tree2term (TInt n) = tree (AI n) []
tree2term (TFloat f) = tree (AF f) []
tree2term (TMeta) = exp0
----------------------------------------------------------------------
-- conversion and unification of forests
-- simplest implementation
applyProfileToForest :: SyntaxForest (CId,[Profile]) -> [SyntaxForest CId]
applyProfileToForest (FNode (fun,profiles) children)
| fun == wildCId = concat chForests
| otherwise = [ FNode fun chForests | not (null chForests) ]
where chForests = concat [ mapM (unifyManyForests . map (forests !!)) profiles |
forests0 <- children,
forests <- mapM applyProfileToForest forests0 ]
applyProfileToForest (FString s) = [FString s]
applyProfileToForest (FInt n) = [FInt n]
applyProfileToForest (FFloat f) = [FFloat f]
applyProfileToForest (FMeta) = [FMeta]

View File

@@ -0,0 +1,188 @@
----------------------------------------------------------------------
-- |
-- Maintainer : Krasimir Angelov
-- Stability : (stable)
-- Portability : (portable)
--
-- MCFG parsing, the active algorithm
-----------------------------------------------------------------------------
module GF.GFCC.Parsing.FCFG.Active (FCFParser, parse, makeFinalEdge) where
import GF.Data.GeneralDeduction
import GF.Data.Assoc
import GF.Data.SortedList
import GF.Data.Utilities
import GF.GFCC.CId
import GF.GFCC.DataGFCC
import GF.GFCC.Parsing.FCFG.Utilities
import GF.Infra.PrintClass
import Control.Monad (guard)
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import Data.Array
----------------------------------------------------------------------
-- * parsing
makeFinalEdge cat 0 0 = (cat, [EmptyRange])
makeFinalEdge cat i j = (cat, [makeRange i j])
-- | the list of categories = possible starting categories
type FCFParser = ParserInfo
-> [FCat]
-> Input FToken
-> SyntaxChart (CId,[Profile]) (FCat,RangeRec)
parse :: String -> FCFParser
parse strategy pinfo starts toks = xchart2syntaxchart chart pinfo
where chart = process strategy pinfo toks axioms emptyXChart
axioms | isBU strategy = literals pinfo toks ++ initialBU pinfo toks
| isTD strategy = literals pinfo toks ++ initialTD pinfo starts toks
isBU s = s=="b"
isTD s = s=="t"
-- used in prediction
emptyChildren :: RuleId -> ParserInfo -> SyntaxNode RuleId RangeRec
emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) [])
where
FRule _ _ rhs _ _ = allRules pinfo ! ruleid
process :: String -> ParserInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat
process strategy pinfo toks [] chart = chart
process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart
where
univRule cat item@(Active found rng lbl ppos node@(SNode ruleid recs)) chart
| inRange (bounds lin) ppos =
case lin ! ppos of
FSymCat r d -> let c = args !! d
in case recs !! d of
[] -> case insertXChart chart item c of
Nothing -> chart
Just chart -> let items = do item@(Final found' _) <- lookupXChartFinal chart c
rng <- concatRange rng (found' !! r)
return (c, Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)))
++
do guard (isTD strategy)
ruleid <- topdownRules pinfo ? c
return (c, Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo))
in process strategy pinfo toks items chart
found' -> let items = do rng <- concatRange rng (found' !! r)
return (c, Active found rng lbl (ppos+1) node)
in process strategy pinfo toks items chart
FSymTok tok -> let items = do t_rng <- inputToken toks ? tok
rng' <- concatRange rng t_rng
return (cat, Active found rng' lbl (ppos+1) node)
in process strategy pinfo toks items chart
| otherwise =
if inRange (bounds lins) (lbl+1)
then univRule cat (Active (rng:found) EmptyRange (lbl+1) 0 node) chart
else univRule cat (Final (reverse (rng:found)) node) chart
where
(FRule _ _ args cat lins) = allRules pinfo ! ruleid
lin = lins ! lbl
univRule cat item@(Final found' node) chart =
case insertXChart chart item cat of
Nothing -> chart
Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _)) <- lookupXChartAct chart cat
let FRule _ _ args _ lins = allRules pinfo ! ruleid
FSymCat r d = lins ! l ! ppos
rng <- concatRange rng (found' !! r)
return (args !! d, Active found rng l (ppos+1) (updateChildren node d found'))
++
do guard (isBU strategy)
ruleid <- leftcornerCats pinfo ? cat
let FRule _ _ args _ lins = allRules pinfo ! ruleid
FSymCat r d = lins ! 0 ! 0
return (args !! d, Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid pinfo) d found'))
updateChildren :: SyntaxNode RuleId RangeRec -> Int -> RangeRec -> SyntaxNode RuleId RangeRec
updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs
in process strategy pinfo toks items chart
----------------------------------------------------------------------
-- * XChart
data Item
= Active RangeRec
Range
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !FPointPos
(SyntaxNode RuleId RangeRec)
| Final RangeRec (SyntaxNode RuleId RangeRec)
deriving (Eq, Ord)
data XChart c = XChart !(ParseChart Item c) !(ParseChart Item c)
emptyXChart :: Ord c => XChart c
emptyXChart = XChart emptyChart emptyChart
insertXChart (XChart actives finals) item@(Active _ _ _ _ _) c =
case chartInsert actives item c of
Nothing -> Nothing
Just actives -> Just (XChart actives finals)
insertXChart (XChart actives finals) item@(Final _ _) c =
case chartInsert finals item c of
Nothing -> Nothing
Just finals -> Just (XChart actives finals)
lookupXChartAct (XChart actives finals) c = chartLookup actives c
lookupXChartFinal (XChart actives finals) c = chartLookup finals c
xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec)
xchart2syntaxchart (XChart actives finals) pinfo =
accumAssoc groupSyntaxNodes $
[ case node of
SNode ruleid rrecs -> let FRule fun prof rhs cat _ = allRules pinfo ! ruleid
in ((cat,found), SNode (fun,prof) (zip rhs rrecs))
SString s -> ((cat,found), SString s)
SInt n -> ((cat,found), SInt n)
SFloat f -> ((cat,found), SFloat f)
| (cat, Final found node) <- chartAssocs finals
]
literals :: ParserInfo -> Input FToken -> [(FCat,Item)]
literals pinfo toks =
[let (c,node) = lexer t in (c,Final [rng] node) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfo)]
where
lexer t =
case reads t of
[(n,"")] -> (fcatInt, SInt (n::Integer))
_ -> case reads t of
[(f,"")] -> (fcatFloat, SFloat (f::Double))
_ -> (fcatString,SString t)
----------------------------------------------------------------------
-- Earley --
-- called with all starting categories
initialTD :: ParserInfo -> [FCat] -> Input FToken -> [(FCat,Item)]
initialTD pinfo starts toks =
do cat <- starts
ruleid <- topdownRules pinfo ? cat
return (cat,Active [] (Range 0 0) 0 0 (emptyChildren ruleid pinfo))
----------------------------------------------------------------------
-- Kilbury --
initialBU :: ParserInfo -> Input FToken -> [(FCat,Item)]
initialBU pinfo toks =
do (tok,rngs) <- aAssocs (inputToken toks)
ruleid <- leftcornerTokens pinfo ? tok
let FRule _ _ _ cat _ = allRules pinfo ! ruleid
rng <- rngs
return (cat,Active [] rng 0 1 (emptyChildren ruleid pinfo))
++
do ruleid <- epsilonRules pinfo
let FRule _ _ _ cat _ = allRules pinfo ! ruleid
return (cat,Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo))

View File

@@ -0,0 +1,303 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/05/13 12:40:19 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.6 $
--
-- Basic type declarations and functions for grammar formalisms
-----------------------------------------------------------------------------
module GF.GFCC.Parsing.FCFG.Utilities where
import Control.Monad
import Data.Array
import Data.List (groupBy)
import GF.Data.SortedList
import GF.Data.Assoc
import GF.Data.Utilities (sameLength, foldMerge, splitBy)
import GF.Infra.PrintClass
------------------------------------------------------------
-- ranges as single pairs
type RangeRec = [Range]
data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
| EmptyRange
deriving (Eq, Ord)
makeRange :: Int -> Int -> Range
makeRange = Range
concatRange :: Range -> Range -> [Range]
concatRange EmptyRange rng = return rng
concatRange rng EmptyRange = return rng
concatRange (Range i j) (Range j' k) = [Range i k | j==j']
minRange :: Range -> Int
minRange (Range i j) = i
maxRange :: Range -> Int
maxRange (Range i j) = j
------------------------------------------------------------
-- * representaions of input tokens
data Input t = MkInput { inputBounds :: (Int, Int),
inputToken :: Assoc t [Range]
}
input :: Ord t => [t] -> Input t
input toks = MkInput inBounds inToken
where
inBounds = (0, length toks)
inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,tok) <- zip3 [0..] [1..] toks ]
inputMany :: Ord t => [[t]] -> Input t
inputMany toks = MkInput inBounds inToken
where
inBounds = (0, length toks)
inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,ts) <- zip3 [0..] [1..] toks, tok <- ts ]
------------------------------------------------------------
-- * representations of syntactical analyses
-- ** charts as finite maps over edges
-- | The values of the chart, a list of key-daughters pairs,
-- has unique keys. In essence, it is a map from 'n' to daughters.
-- The daughters should be a set (not necessarily sorted) of rhs's.
type SyntaxChart n e = Assoc e [SyntaxNode n [e]]
data SyntaxNode n e = SMeta
| SNode n [e]
| SString String
| SInt Integer
| SFloat Double
deriving (Eq,Ord)
groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]]
groupSyntaxNodes [] = []
groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs'
where
(ess,xs') = span xs
span [] = ([],[])
span xs@(SNode n es:xs')
| n0 == n = let (ess,xs) = span xs' in (es:ess,xs)
| otherwise = ([],xs)
groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs
groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs
groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs
-- better(?) representation of forests:
-- data Forest n = F (SMap n (SList [Forest n])) Bool
-- ==
-- type Forest n = GeneralTrie n (SList [Forest n]) Bool
-- (the Bool == isMeta)
-- ** syntax forests
data SyntaxForest n = FMeta
| FNode n [[SyntaxForest n]]
-- ^ The outer list should be a set (not necessarily sorted)
-- of possible alternatives. Ie. the outer list
-- is a disjunctive node, and the inner lists
-- are (conjunctive) concatenative nodes
| FString String
| FInt Integer
| FFloat Double
deriving (Eq, Ord, Show)
instance Functor SyntaxForest where
fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests
fmap _ (FString s) = FString s
fmap _ (FInt n) = FInt n
fmap _ (FFloat f) = FFloat f
fmap _ (FMeta) = FMeta
forestName :: SyntaxForest n -> Maybe n
forestName (FNode n _) = Just n
forestName _ = Nothing
unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n)
unifyManyForests = foldM unifyForests FMeta
-- | two forests can be unified, if either is 'FMeta', or both have the same parent,
-- and all children can be unified
unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n)
unifyForests FMeta forest = return forest
unifyForests forest FMeta = return forest
unifyForests (FNode name1 children1) (FNode name2 children2)
| name1 == name2 && not (null children) = return $ FNode name1 children
where children = [ forests | forests1 <- children1, forests2 <- children2,
sameLength forests1 forests2,
forests <- zipWithM unifyForests forests1 forests2 ]
unifyForests (FString s1) (FString s2)
| s1 == s2 = return $ FString s1
unifyForests (FInt n1) (FInt n2)
| n1 == n2 = return $ FInt n1
unifyForests (FFloat f1) (FFloat f2)
| f1 == f2 = return $ FFloat f1
unifyForests _ _ = fail "forest unification failure"
{- måste tänka mer på detta:
compactForests :: Ord n => [SyntaxForest n] -> SList (SyntaxForest n)
compactForests = map joinForests . groupBy eqNames . sortForests
where eqNames f g = forestName f == forestName g
sortForests = foldMerge mergeForests [] . map return
mergeForests [] gs = gs
mergeForests fs [] = fs
mergeForests fs@(f:fs') gs@(g:gs')
= case forestName f `compare` forestName g of
LT -> f : mergeForests fs' gs
GT -> g : mergeForests fs gs'
EQ -> f : g : mergeForests fs' gs'
joinForests fs = case forestName (head fs) of
Nothing -> FMeta
Just name -> FNode name $
compactDaughters $
concat [ fss | FNode _ fss <- fs ]
compactDaughters fss = case head fss of
[] -> [[]]
[_] -> map return $ compactForests $ concat fss
_ -> nubsort fss
-}
-- ** syntax trees
data SyntaxTree n = TMeta
| TNode n [SyntaxTree n]
| TString String
| TInt Integer
| TFloat Double
deriving (Eq, Ord, Show)
instance Functor SyntaxTree where
fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees
fmap _ (TString s) = TString s
fmap _ (TInt n) = TInt n
fmap _ (TFloat f) = TFloat f
fmap _ (TMeta) = TMeta
treeName :: SyntaxTree n -> Maybe n
treeName (TNode n _) = Just n
treeName (TMeta) = Nothing
unifyManyTrees :: (Monad m, Eq n) => [SyntaxTree n] -> m (SyntaxTree n)
unifyManyTrees = foldM unifyTrees TMeta
-- | two trees can be unified, if either is 'TMeta',
-- or both have the same parent, and their children can be unified
unifyTrees :: (Monad m, Eq n) => SyntaxTree n -> SyntaxTree n -> m (SyntaxTree n)
unifyTrees TMeta tree = return tree
unifyTrees tree TMeta = return tree
unifyTrees (TNode name1 children1) (TNode name2 children2)
| name1 == name2 && sameLength children1 children2
= liftM (TNode name1) $ zipWithM unifyTrees children1 children2
unifyTrees (TString s1) (TString s2)
| s1 == s2 = return (TString s1)
unifyTrees (TInt n1) (TInt n2)
| n1 == n2 = return (TInt n1)
unifyTrees (TFloat f1) (TFloat f2)
| f1 == f2 = return (TFloat f1)
unifyTrees _ _ = fail "tree unification failure"
-- ** conversions between representations
chart2forests :: (Ord n, Ord e) =>
SyntaxChart n e -- ^ The complete chart
-> (e -> Bool) -- ^ When is an edge 'FMeta'?
-> [e] -- ^ The starting edges
-> SList (SyntaxForest n) -- ^ The result has unique keys, ie. all 'n' are joined together.
-- In essence, the result is a map from 'n' to forest daughters
-- simplest implementation
chart2forests chart isMeta = concatMap (edge2forests [])
where edge2forests edges edge
| isMeta edge = [FMeta]
| edge `elem` edges = []
| otherwise = map (item2forest (edge:edges)) $ chart ? edge
item2forest edges (SMeta) = FMeta
item2forest edges (SNode name children) =
FNode name $ children >>= mapM (edge2forests edges)
item2forest edges (SString s) = FString s
item2forest edges (SInt n) = FInt n
item2forest edges (SFloat f) = FFloat f
{- -before AR inserted peb's patch 8/7/2007, this was:
chart2forests chart isMeta = concatMap edge2forests
where edge2forests edge = if isMeta edge then [FMeta]
else map item2forest $ chart ? edge
item2forest (SMeta) = FMeta
item2forest (SNode name children) = FNode name $ children >>= mapM edge2forests
item2forest (SString s) = FString s
item2forest (SInt n) = FInt n
item2forest (SFloat f) = FFloat f
-}
{-
-- more intelligent(?) implementation,
-- requiring that charts and forests are sorted maps and sorted sets
chart2forests chart isMeta = es2fs
where e2fs e = if isMeta e then [FMeta] else map i2f $ chart ? e
es2fs es = if null metas then fs else FMeta : fs
where (metas, nonMetas) = splitBy isMeta es
fs = map i2f $ unionMap (<++>) $ map (chart ?) nonMetas
i2f (name, children) = FNode name $
case head children of
[] -> [[]]
[_] -> map return $ es2fs $ concat children
_ -> children >>= mapM e2fs
-}
forest2trees :: SyntaxForest n -> SList (SyntaxTree n)
forest2trees (FNode n forests) = map (TNode n) $ forests >>= mapM forest2trees
forest2trees (FString s) = [TString s]
forest2trees (FInt n) = [TInt n]
forest2trees (FFloat f) = [TFloat f]
forest2trees (FMeta) = [TMeta]
------------------------------------------------------------
-- pretty-printing
instance Print Range where
prt (Range i j) = "(" ++ show i ++ "-" ++ show j ++ ")"
prt (EmptyRange) = "(?)"
instance (Print s) => Print (SyntaxTree s) where
prt (TNode s trees)
| null trees = prt s
| otherwise = "(" ++ prt s ++ prtBefore " " trees ++ ")"
prt (TString s) = show s
prt (TInt n) = show n
prt (TFloat f) = show f
prt (TMeta) = "?"
prtList = prtAfter "\n"
instance (Print s) => Print (SyntaxForest s) where
prt (FNode s []) = "(" ++ prt s ++ " - ERROR: null forests)"
prt (FNode s [[]]) = prt s
prt (FNode s [forests]) = "(" ++ prt s ++ prtBefore " " forests ++ ")"
prt (FNode s children) = "{" ++ prtSep " | " [ prt s ++ prtBefore " " forests |
forests <- children ] ++ "}"
prt (FString s) = show s
prt (FInt n) = show n
prt (FFloat f) = show f
prt (FMeta) = "?"
prtList = prtAfter "\n"

View File

@@ -4,9 +4,9 @@ import GF.GFCC.CId
import GF.GFCC.DataGFCC
import GF.GFCC.Raw.AbsGFCCRaw
import GF.GFCC.BuildParser (buildParserInfo)
import GF.GFCC.Parsing.FCFG.Utilities
import GF.Infra.PrintClass
import GF.Formalism.Utilities
import qualified Data.Array as Array
import qualified Data.Map as Map