1
0
forked from GitHub/gf-core

the new optimized incremental parser and the common subexpression elimination optimization in PMCFG

This commit is contained in:
krasimir
2008-10-14 08:00:50 +00:00
parent 0c66ad597d
commit 4573d10442
17 changed files with 654 additions and 526 deletions

View File

@@ -4,21 +4,19 @@
--
-- Approximates PGF grammars with context-free grammars.
----------------------------------------------------------------------
module GF.Speech.PGFToCFG (bnfPrinter,
fcfgPrinter, pgfToCFG) where
module GF.Speech.PGFToCFG (bnfPrinter, pgfToCFG) where
import PGF.CId
import PGF.Data as PGF
import PGF.Macros
import GF.Data.MultiMap (MultiMap)
import qualified GF.Data.MultiMap as MultiMap
import GF.Infra.Ident
import GF.Speech.CFG
import Data.Array as Array
import Data.Array.IArray as Array
import Data.List
import Data.Map (Map)
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import Data.Maybe
import Data.Set (Set)
import qualified Data.Set as Set
@@ -29,21 +27,6 @@ bnfPrinter = toBNF id
toBNF :: (CFG -> CFG) -> PGF -> CId -> String
toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc
-- FIXME: move this somewhere else
fcfgPrinter :: PGF -> CId -> String
fcfgPrinter pgf cnc = unlines (map showRule rules)
where
pinfo = fromMaybe (error "fcfgPrinter") (lookParser pgf cnc)
rules :: [FRule]
rules = Array.elems (PGF.allRules pinfo)
showRule (FRule cid ps cs fc arr) = prCId cid ++ " " ++ show ps ++ ". " ++ showCat fc ++ " ::= [" ++ concat (intersperse ", " (map showCat cs)) ++ "] = " ++ showLin arr
where
showLin arr = "[" ++ concat (intersperse ", " [ unwords (map showFSymbol (Array.elems r)) | r <- Array.elems arr]) ++ "]"
showFSymbol (FSymCat i j) = showCat (cs!!j) ++ "_" ++ show j ++ "." ++ show i
showFSymbol (FSymTok t) = t
showCat c = "C" ++ show c
pgfToCFG :: PGF
-> CId -- ^ Concrete syntax name
@@ -52,12 +35,13 @@ pgfToCFG pgf lang = mkCFG (lookStartCat pgf) extCats (startRules ++ concatMap fr
where
pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang)
rules :: [FRule]
rules = Array.elems (PGF.allRules pinfo)
rules :: [(FCat,Production)]
rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.productions pinfo)
, prod <- Set.toList set]
fcatCats :: Map FCat Cat
fcatCats = Map.fromList [(fc, prCId c ++ "_" ++ show i)
| (c,fcs) <- Map.toList (startupCats pinfo),
| (c,fcs) <- Map.toList (startCats pinfo),
(fc,i) <- zip fcs [1..]]
fcatCat :: FCat -> Cat
@@ -69,49 +53,61 @@ pgfToCFG pgf lang = mkCFG (lookStartCat pgf) extCats (startRules ++ concatMap fr
-- gets the number of fields in the lincat for the given category
catLinArity :: FCat -> Int
catLinArity c = maximum (1:[rangeSize (bounds rhs) | FRule _ _ _ _ rhs <- Map.findWithDefault [] c rulesByFCat])
catLinArity c = maximum (1:[rangeSize (bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c])
topdownRules cat = f cat []
where
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo))
g (FApply funid args) rules = (functions pinfo ! funid,args) : rules
g (FCoerce cat) rules = f cat rules
rulesByFCat :: Map FCat [FRule]
rulesByFCat = Map.fromListWith (++) [(c,[r]) | r@(FRule _ _ _ c _) <- rules]
extCats :: Set Cat
extCats = Set.fromList $ map lhsCat startRules
startRules :: [CFRule]
startRules = [CFRule (prCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0)
| (c,fcs) <- Map.toList (startupCats pinfo),
| (c,fcs) <- Map.toList (startCats pinfo),
fc <- fcs, not (isLiteralFCat fc),
r <- [0..catLinArity fc-1]]
fruleToCFRule :: FRule -> [CFRule]
fruleToCFRule (FRule f ps args c rhs) =
fruleToCFRule :: (FCat,Production) -> [CFRule]
fruleToCFRule (c,FApply funid args) =
[CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps))
| (l,row) <- Array.assocs rhs, not (containsLiterals row)]
| (l,seqid) <- Array.assocs rhs
, let row = sequences pinfo ! seqid
, not (containsLiterals row)]
where
FFun f ps rhs = functions pinfo ! funid
mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
mkRhs = map fsymbolToSymbol . Array.elems
containsLiterals :: Array FPointPos FSymbol -> Bool
containsLiterals row = any isLiteralFCat [args!!n | FSymCat _ n <- Array.elems row]
containsLiterals row = any isLiteralFCat [args!!n | FSymCat n _ <- Array.elems row]
fsymbolToSymbol :: FSymbol -> CFSymbol
fsymbolToSymbol (FSymCat l n) = NonTerminal (fcatToCat (args!!n) l)
fsymbolToSymbol (FSymTok t) = Terminal t
fsymbolToSymbol (FSymCat n l) = NonTerminal (fcatToCat (args!!n) l)
fsymbolToSymbol (FSymTok (KS t)) = Terminal t
fixProfile :: Array FPointPos FSymbol -> Profile -> Profile
fixProfile row = concatMap positions
where
nts = zip [0..] [nt | nt@(FSymCat _ _) <- Array.elems row ]
positions i = [k | (k,FSymCat _ j) <- nts, j == i]
nts = zip [0..] [nt | nt@(FSymCat _ _) <- Array.elems row]
positions i = [k | (k,FSymCat j _) <- nts, j == i]
profilesToTerm :: [Profile] -> CFTerm
profilesToTerm [[n]] | f == wildCId = CFRes n
profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps)
where (argTypes,_) = catSkeleton $ lookType pgf f
profileToTerm :: CId -> Profile -> CFTerm
profileToTerm t [] = CFMeta t
profileToTerm _ xs = CFRes (last xs) -- FIXME: unify
fruleToCFRule (c,FCoerce c') =
[CFRule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0)
| l <- [0..catLinArity c-1]]
isLiteralFCat :: FCat -> Bool
isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])