merge FCFGParsing with GF.Parsing.FCFG

This commit is contained in:
kr.angelov
2007-09-24 14:36:19 +00:00
parent ee58d52af0
commit 133963751b
4 changed files with 135 additions and 161 deletions

View File

@@ -1,114 +0,0 @@
module GF.Canon.GFCC.FCFGParsing (parserLang,PF.buildFCFPInfo,PF.FCFPInfo) where
import GF.Canon.GFCC.DataGFCC
import GF.Canon.GFCC.AbsGFCC
import GF.Conversion.SimpleToFCFG (convertGrammar,FCat(..))
import GF.Data.SortedList
import GF.Data.Assoc
import GF.Formalism.Utilities --(forest2trees)
import qualified GF.Data.Operations as Op
import GF.Formalism.FCFG
import qualified GF.Parsing.FCFG as PF
import GF.Canon.GFCC.ErrM
import GF.Infra.PrintClass
parserLang :: GFCC -> CId -> CFCat -> [CFTok] -> Err [Exp]
parserLang mgr lang = parse info where
fcfgs = convertGrammar mgr
info = PF.buildFCFPInfo $ maybe (error "no parser") id $ lookup lang fcfgs
type CFTok = String ----
type CFCat = CId ----
type Fun = CId ----
cfCat2Ident = id ----
wordsCFTok :: CFTok -> [String]
wordsCFTok = return ----
-- main parsing function
parse ::
-- String -> -- ^ parsing algorithm (mcfg or cfg)
-- String -> -- ^ parsing strategy
PF.FCFPInfo -> -- ^ compiled grammar (fcfg)
-- Ident.Ident -> -- ^ abstract module name
CFCat -> -- ^ starting category
[CFTok] -> -- ^ input tokens
Err [Exp] -- ^ resulting GF terms
parse pinfo startCat inString = e2e $
do let inTokens = inputMany (map wordsCFTok inString)
forests <- selectParser pinfo startCat inTokens
let filteredForests = forests >>= applyProfileToForest
trees = nubsort $ filteredForests >>= forest2trees
return $ map tree2term trees
-- parsing via FCFG
selectParser pinfo startCat inTokens
= do let startCats = filter isStart $ PF.grammarCats fcfpi
isStart cat = cat' == cfCat2Ident startCat
where CId x = fcat2cid cat
cat' = CId x
fcfpi = pinfo
fcfParser <- PF.parseFCF "bottomup"
let chart = fcfParser fcfpi startCats inTokens
(i,j) = inputBounds inTokens
finalEdges = [PF.makeFinalEdge cat i j | cat <- startCats]
return $ map cnv_forests $ chart2forests chart (const False) finalEdges
cnv_forests FMeta = FMeta
cnv_forests (FNode (Name (CId n) p) fss) = FNode (Name (CId n) (map cnv_profile p)) (map (map cnv_forests) fss)
cnv_forests (FString x) = FString x
cnv_forests (FInt x) = FInt x
cnv_forests (FFloat x) = FFloat x
cnv_profile (Unify x) = Unify x
cnv_profile (Constant x) = Constant (cnv_forests2 x)
cnv_forests2 FMeta = FMeta
cnv_forests2 (FNode (CId n) fss) = FNode (CId n) (map (map cnv_forests2) fss)
cnv_forests2 (FString x) = FString x
cnv_forests2 (FInt x) = FInt x
cnv_forests2 (FFloat x) = FFloat x
----------------------------------------------------------------------
-- parse trees to GFCC terms
tree2term :: SyntaxTree Fun -> Exp
tree2term (TNode f ts) = Tr (AC f) (map tree2term ts)
tree2term (TString s) = Tr (AS s) []
tree2term (TInt n) = Tr (AI n) []
tree2term (TFloat f) = Tr (AF f) []
tree2term (TMeta) = Tr AM []
----------------------------------------------------------------------
-- conversion and unification of forests
-- simplest implementation
applyProfileToForest :: SyntaxForest FName -> [SyntaxForest Fun]
applyProfileToForest (FNode name@(Name fun profile) children)
| isCoercionF name = concat chForests
| otherwise = [ FNode fun chForests | not (null chForests) ]
where chForests = concat [ applyProfileM unifyManyForests profile forests |
forests0 <- children,
forests <- mapM applyProfileToForest forests0 ]
applyProfileToForest (FString s) = [FString s]
applyProfileToForest (FInt n) = [FInt n]
applyProfileToForest (FFloat f) = [FFloat f]
applyProfileToForest (FMeta) = [FMeta]
---
e2e :: Op.Err a -> Err a
e2e e = case e of
Op.Ok v -> Ok v
Op.Bad s -> Bad s

View File

@@ -21,7 +21,7 @@ import GF.Canon.GFCC.AbsGFCC
import GF.Canon.GFCC.ParGFCC
import GF.Canon.GFCC.PrintGFCC
import GF.Canon.GFCC.ErrM
import GF.Canon.GFCC.FCFGParsing
import GF.Parsing.FCFG
import qualified GF.Canon.GFCC.GenGFCC as G
import GF.Conversion.SimpleToFCFG (convertGrammar,FCat(..))
@@ -82,7 +82,11 @@ file2gfcc f =
linearize mgr lang = GF.Canon.GFCC.DataGFCC.linearize (gfcc mgr) (CId lang)
parse mgr lang cat s =
err error id $ parserLang (gfcc mgr) (CId lang) (CId cat) (words s)
case lookup lang (parsers mgr) of
Nothing -> error "no parser"
Just pinfo -> case parseFCF "bottomup" pinfo (CId cat) (words s) of
Ok x -> x
Bad s -> error s
linearizeAll mgr = map snd . linearizeAllLang mgr
linearizeAllLang mgr t =

View File

@@ -8,25 +8,91 @@
-----------------------------------------------------------------------------
module GF.Parsing.FCFG
(parseFCF, module GF.Parsing.FCFG.PInfo) where
(parseFCF,buildFCFPInfo,FCFPInfo(..),makeFinalEdge) where
import GF.Data.Operations (Err(..))
import GF.Data.SortedList
import GF.Data.Assoc
import GF.Infra.PrintClass
import GF.Formalism.FCFG
import GF.Formalism.Utilities
import GF.Parsing.FCFG.PInfo
import qualified GF.Parsing.FCFG.Active as Active
import GF.Infra.PrintClass
import GF.Parsing.FCFG.PInfo
import GF.Canon.GFCC.AbsGFCC
import GF.Canon.GFCC.ErrM
----------------------------------------------------------------------
-- parsing
parseFCF :: String -> Err (FCFParser)
parseFCF prs | prs `elem` strategies = Ok $ parseFCF' prs
| otherwise = Bad $ "FCFG parsing strategy not defined: " ++ prs
-- main parsing function
strategies = words "bottomup topdown"
parseFCF ::
String -> -- ^ parsing strategy
FCFPInfo -> -- ^ compiled grammar (fcfg)
CId -> -- ^ starting category
[String] -> -- ^ input tokens
Err [Exp] -- ^ resulting GF terms
parseFCF strategy pinfo startCat inString =
do let inTokens = input inString
startCats = filter isStart $ grammarCats pinfo
isStart cat = fcat2cid cat == startCat
fcfParser <- parseFCF strategy
let chart = fcfParser pinfo startCats inTokens
(i,j) = inputBounds inTokens
finalEdges = [makeFinalEdge cat i j | cat <- startCats]
forests = map cnv_forests $ chart2forests chart (const False) finalEdges
filteredForests = forests >>= applyProfileToForest
trees = nubsort $ filteredForests >>= forest2trees
return $ map tree2term trees
where
parseFCF :: String -> Err (FCFParser)
parseFCF "bottomup" = Ok $ Active.parse "b"
parseFCF "topdown" = Ok $ Active.parse "t"
parseFCF strat = Bad $ "FCFG parsing strategy not defined: " ++ strat
parseFCF' :: String -> FCFParser
parseFCF' "bottomup" pinfo starts toks = Active.parse "b" pinfo starts toks
parseFCF' "topdown" pinfo starts toks = Active.parse "t" pinfo starts toks
cnv_forests FMeta = FMeta
cnv_forests (FNode (Name (CId n) p) fss) = FNode (Name (CId n) (map cnv_profile p)) (map (map cnv_forests) fss)
cnv_forests (FString x) = FString x
cnv_forests (FInt x) = FInt x
cnv_forests (FFloat x) = FFloat x
cnv_profile (Unify x) = Unify x
cnv_profile (Constant x) = Constant (cnv_forests2 x)
cnv_forests2 FMeta = FMeta
cnv_forests2 (FNode (CId n) fss) = FNode (CId n) (map (map cnv_forests2) fss)
cnv_forests2 (FString x) = FString x
cnv_forests2 (FInt x) = FInt x
cnv_forests2 (FFloat x) = FFloat x
----------------------------------------------------------------------
-- parse trees to GFCC terms
tree2term :: SyntaxTree CId -> Exp
tree2term (TNode f ts) = Tr (AC f) (map tree2term ts)
tree2term (TString s) = Tr (AS s) []
tree2term (TInt n) = Tr (AI n) []
tree2term (TFloat f) = Tr (AF f) []
tree2term (TMeta) = Tr AM []
----------------------------------------------------------------------
-- conversion and unification of forests
-- simplest implementation
applyProfileToForest :: SyntaxForest FName -> [SyntaxForest CId]
applyProfileToForest (FNode name@(Name fun profile) children)
| isCoercionF name = concat chForests
| otherwise = [ FNode fun chForests | not (null chForests) ]
where chForests = concat [ applyProfileM unifyManyForests profile forests |
forests0 <- children,
forests <- mapM applyProfileToForest forests0 ]
applyProfileToForest (FString s) = [FString s]
applyProfileToForest (FInt n) = [FInt n]
applyProfileToForest (FFloat f) = [FFloat f]
applyProfileToForest (FMeta) = [FMeta]

View File

@@ -25,8 +25,9 @@ import qualified GF.Grammar.Grammar as Grammar
import qualified GF.Grammar.Macros as Macros
import qualified GF.Canon.AbsGFC as AbsGFC
import qualified GF.Canon.GFCC.AbsGFCC as AbsGFCC
import qualified GF.Canon.GFCC.ErrM as ErrM
import qualified GF.Infra.Ident as Ident
import GF.CF.CFIdent (CFCat, cfCat2Ident, CFTok, wordsCFTok)
import GF.CF.CFIdent (CFCat, cfCat2Ident, CFTok, wordsCFTok, prCFTok)
import GF.Data.SortedList
import GF.Data.Assoc
@@ -73,26 +74,12 @@ parse :: String -- ^ parsing algorithm (mcfg or cfg)
-> [CFTok] -- ^ input tokens
-> Err [Grammar.Term] -- ^ resulting GF terms
parse prs strategy pinfo abs startCat inString =
do let inTokens = tracePrt "Parsing.GFC - input tokens" prt $
inputMany (map wordsCFTok inString)
forests <- selectParser prs strategy pinfo startCat inTokens
traceM "Parsing.GFC - nr. unfiltered forests" (prt (length forests))
traceM "Parsing.GFC - nr. unfiltered trees" (prt (length (forests >>= forest2trees)))
let filteredForests = tracePrt "Parsing.GFC - nr. forests" (prt . length) $
forests >>= applyProfileToForest
-- compactFs = tracePrt "#compactForests" (prt . length) $
-- tracePrt "compactForests" (prtBefore "\n") $
-- compactForests forests
trees = tracePrt "Parsing.GFC - nr. trees" (prt . length) $
nubsort $ filteredForests >>= forest2trees
-- compactFs >>= forest2trees
return $ map (tree2term abs) trees
-- parsing via CFG
selectParser "c" strategy pinfo startCat inTokens
= do let startCats = tracePrt "Parsing.GFC - starting CF categories" prt $
parse "c" strategy pinfo abs startCat inString
= do let inTokens = tracePrt "Parsing.GFC - input tokens" prt $
inputMany (map wordsCFTok inString)
let startCats = tracePrt "Parsing.GFC - starting CF categories" prt $
filter isStart $ map fst $ aAssocs $ PC.topdownRules cfpi
isStart cat = ccat2scat cat == cfCat2Ident startCat
cfpi = cfPInfo pinfo
@@ -103,11 +90,25 @@ selectParser "c" strategy pinfo startCat inTokens
C.grammar2chart cfChart
finalEdges = tracePrt "Parsing.GFC - final chart edges" prt $
map (uncurry Edge (inputBounds inTokens)) startCats
return $ chart2forests chart (const False) finalEdges
forests = chart2forests chart (const False) finalEdges
traceM "Parsing.GFC - nr. unfiltered forests" (prt (length forests))
traceM "Parsing.GFC - nr. unfiltered trees" (prt (length (forests >>= forest2trees)))
let filteredForests = tracePrt "Parsing.GFC - nr. forests" (prt . length) $
forests >>= applyProfileToForest
-- compactFs = tracePrt "#compactForests" (prt . length) $
-- tracePrt "compactForests" (prtBefore "\n") $
-- compactForests forests
trees = tracePrt "Parsing.GFC - nr. trees" (prt . length) $
nubsort $ filteredForests >>= forest2trees
-- compactFs >>= forest2trees
return $ map (tree2term abs) trees
-- parsing via MCFG
selectParser "m" strategy pinfo startCat inTokens
= do let startCats = tracePrt "Parsing.GFC - starting MCF categories" prt $
parse "m" strategy pinfo abs startCat inString
= do let inTokens = tracePrt "Parsing.GFC - input tokens" prt $
inputMany (map wordsCFTok inString)
let startCats = tracePrt "Parsing.GFC - starting MCF categories" prt $
filter isStart $ PM.grammarCats mcfpi
isStart cat = mcat2scat cat == cfCat2Ident startCat
mcfpi = mcfPInfo pinfo
@@ -116,20 +117,28 @@ selectParser "m" strategy pinfo startCat inTokens
finalEdges = tracePrt "Parsing.GFC - final chart edges" prt $
[ PM.makeFinalEdge cat lbl (inputBounds inTokens) |
cat@(MCat _ [lbl]) <- startCats ]
return $ chart2forests chart (const False) finalEdges
forests = chart2forests chart (const False) finalEdges
traceM "Parsing.GFC - nr. unfiltered forests" (prt (length forests))
traceM "Parsing.GFC - nr. unfiltered trees" (prt (length (forests >>= forest2trees)))
let filteredForests = tracePrt "Parsing.GFC - nr. forests" (prt . length) $
forests >>= applyProfileToForest
-- compactFs = tracePrt "#compactForests" (prt . length) $
-- tracePrt "compactForests" (prtBefore "\n") $
-- compactForests forests
trees = tracePrt "Parsing.GFC - nr. trees" (prt . length) $
nubsort $ filteredForests >>= forest2trees
-- compactFs >>= forest2trees
return $ map (tree2term abs) trees
-- parsing via FCFG
selectParser "f" strategy pinfo startCat inTokens
= do let startCats = filter isStart $ PF.grammarCats fcfpi
isStart cat = cat' == cfCat2Ident startCat
where AbsGFCC.CId x = fcat2cid cat
cat' = Ident.IC x
fcfpi = fcfPInfo pinfo
fcfParser <- PF.parseFCF strategy
let chart = fcfParser fcfpi startCats inTokens
(i,j) = inputBounds inTokens
finalEdges = [PF.makeFinalEdge cat i j | cat <- startCats]
return $ map cnv_forests $ chart2forests chart (const False) finalEdges
parse "f" strategy pinfo abs startCat inString =
let Ident.IC x = cfCat2Ident startCat
cat' = AbsGFCC.CId x
in case PF.parseFCF strategy (fcfPInfo pinfo) cat' (map prCFTok inString) of
ErrM.Ok es -> Ok (map (exp2term abs) es)
ErrM.Bad msg -> Bad msg
-- error parser:
selectParser prs strategy _ _ _ = Bad $ "Parser '" ++ prs ++ "' not defined with strategy: " ++ strategy
@@ -159,6 +168,15 @@ tree2term abs (TInt n) = Macros.int2term n
tree2term abs (TFloat f) = Macros.float2term f
tree2term abs (TMeta) = Macros.mkMeta 0
exp2term :: Ident.Ident -> AbsGFCC.Exp -> Grammar.Term
exp2term abs (AbsGFCC.Tr a es) = Macros.mkApp (atom2term abs a) (map (exp2term abs) es)
atom2term :: Ident.Ident -> AbsGFCC.Atom -> Grammar.Term
atom2term abs (AbsGFCC.AC (AbsGFCC.CId f)) = Macros.qq (abs,Ident.IC f)
atom2term abs (AbsGFCC.AS s) = Macros.string2term s
atom2term abs (AbsGFCC.AI n) = Macros.int2term n
atom2term abs (AbsGFCC.AF f) = Macros.float2term f
atom2term abs AbsGFCC.AM = Macros.mkMeta 0
----------------------------------------------------------------------
-- conversion and unification of forests