1
0
forked from GitHub/gf-core

Added still unused implementation of Moore's LCLR algorithm for left recursion elimination. Fixed top category generation for SRG (included LR-elimination-added categories before).

This commit is contained in:
bringert
2006-12-17 19:18:28 +00:00
parent 3797fc35c5
commit 5b8680b8fd
6 changed files with 114 additions and 43 deletions

View File

@@ -75,7 +75,7 @@ makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
where trSet cs | allXLinear cs rs = rs where trSet cs | allXLinear cs rs = rs
| otherwise = concatMap handleCat csl | otherwise = concatMap handleCat csl
where csl = Set.toList cs where csl = Set.toList cs
rs = catSetRules g csl rs = catSetRules g cs
handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e
++ concatMap (makeRightLinearRules c) (catRules g c) ++ concatMap (makeRightLinearRules c) (catRules g c)
where c' = newCat c where c' = newCat c
@@ -90,15 +90,6 @@ makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
| otherwise = [CFRule c rhs n] | otherwise = [CFRule c rhs n]
newCat c = c ++ "$" newCat c = c ++ "$"
-- | Get the sets of mutually recursive non-terminals for a grammar.
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
-- If false, only recursive categories will be included.
-> CFRules -> [Set Cat_]
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
where r = mkRel [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss]
allCats = map fst g
refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation
-- --
-- * Compile strongly regular grammars to NFAs -- * Compile strongly regular grammars to NFAs
-- --
@@ -271,7 +262,7 @@ mutRecSets g = Map.fromList . concatMap mkMutRecSet
where where
mkMutRecSet cs = [ (c,ms) | c <- csl ] mkMutRecSet cs = [ (c,ms) | c <- csl ]
where csl = Set.toList cs where csl = Set.toList cs
rs = catSetRules g csl rs = catSetRules g cs
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs (nrs,rrs) = partition (ruleIsNonRecursive cs) rs
ms = MutRecSet { ms = MutRecSet {
mrCats = cs, mrCats = cs,

View File

@@ -44,7 +44,7 @@ jsgfPrinter name start opts sisr probs cfg = prJSGF srg sisr ""
where srg = makeSimpleSRG name start opts probs cfg where srg = makeSimpleSRG name start opts probs cfg
prJSGF :: SRG -> Maybe SISRFormat -> ShowS prJSGF :: SRG -> Maybe SISRFormat -> ShowS
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) sisr prJSGF srg@(SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) sisr
= header . nl = header . nl
. mainCat . nl . mainCat . nl
. unlinesS topCatRules . nl . unlinesS topCatRules . nl
@@ -62,9 +62,8 @@ prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) si
-- FIXME: use the probability -- FIXME: use the probability
prAlt (EBnfSRGAlt mp n rhs) = tag sisr (profileInitSISR n) . showChar ' '. prItem sisr rhs prAlt (EBnfSRGAlt mp n rhs) = tag sisr (profileInitSISR n) . showChar ' '. prItem sisr rhs
topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- topCats] topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- srgTopCats srg]
where topCats = buildMultiMap [(cfgCatToGFCat origCat, cat) | SRGRule cat origCat _ <- rs] where it i c = prCat c . tag sisr [(EThis :. catFieldId i) := (ERef c)]
it i c = prCat c . tag sisr [(EThis :. catFieldId i) := (ERef c)]
catFormId :: String -> String catFormId :: String -> String
catFormId = (++ "_cat") catFormId = (++ "_cat")

View File

@@ -45,11 +45,11 @@ srgsXmlPrinter name start opts sisr probs cfg = prSrgsXml sisr srg ""
where srg = makeSRG name start opts probs cfg where srg = makeSRG name start opts probs cfg
prSrgsXml :: Maybe SISRFormat -> SRG -> ShowS prSrgsXml :: Maybe SISRFormat -> SRG -> ShowS
prSrgsXml sisr (SRG{grammarName=name,startCat=start, prSrgsXml sisr srg@(SRG{grammarName=name,startCat=start,
origStartCat=origStart,grammarLanguage=l,rules=rs}) origStartCat=origStart,grammarLanguage=l,rules=rs})
= showsXMLDoc $ optimizeSRGS xmlGr = showsXMLDoc $ optimizeSRGS xmlGr
where where
root = cfgCatToGFCat origStart Just root = cfgCatToGFCat origStart
xmlGr = grammar sisr (catFormId root) l $ xmlGr = grammar sisr (catFormId root) l $
[meta "description" [meta "description"
("SRGS XML speech recognition grammar for " ++ name ("SRGS XML speech recognition grammar for " ++ name
@@ -62,9 +62,8 @@ prSrgsXml sisr (SRG{grammarName=name,startCat=start,
comments ["Category " ++ origCat] ++ [rule cat (prRhs $ ebnfSRGAlts alts)] comments ["Category " ++ origCat] ++ [rule cat (prRhs $ ebnfSRGAlts alts)]
prRhs rhss = [oneOf (map (mkProd sisr) rhss)] prRhs rhss = [oneOf (map (mkProd sisr) rhss)]
-- externally visible rules for each of the GF categories -- externally visible rules for each of the GF categories
topCatRules = [topRule tc [oneOf (map (it tc) cs)] | (tc,cs) <- topCats] topCatRules = [topRule tc [oneOf (map (it tc) cs)] | (tc,cs) <- srgTopCats srg]
where topCats = buildMultiMap [(cfgCatToGFCat origCat, cat) | SRGRule cat origCat _ <- rs] where it i c = Tag "item" [] [Tag "ruleref" [("uri","#" ++ c)] [],
it i c = Tag "item" [] [Tag "ruleref" [("uri","#" ++ c)] [],
tag sisr [(EThis :. catFieldId i) := (ERef c)]] tag sisr [(EThis :. catFieldId i) := (ERef c)]]
topRule i is = Tag "rule" [("id",catFormId i),("scope","public")] is topRule i is = Tag "rule" [("id",catFormId i),("scope","public")] is

View File

@@ -12,12 +12,13 @@
-- A simple module for relations. -- A simple module for relations.
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
module GF.Speech.Relation (Rel, mkRel module GF.Speech.Relation (Rel, mkRel, mkRel'
, allRelated , isRelatedTo , allRelated , isRelatedTo
, transitiveClosure , transitiveClosure
, reflexiveClosure, reflexiveClosure_ , reflexiveClosure, reflexiveClosure_
, symmetricClosure , symmetricClosure
, symmetricSubrelation, reflexiveSubrelation , symmetricSubrelation, reflexiveSubrelation
, reflexiveElements
, equivalenceClasses , equivalenceClasses
, isTransitive, isReflexive, isSymmetric , isTransitive, isReflexive, isSymmetric
, isEquivalence , isEquivalence
@@ -38,6 +39,11 @@ type Rel a = Map a (Set a)
mkRel :: Ord a => [(a,a)] -> Rel a mkRel :: Ord a => [(a,a)] -> Rel a
mkRel ps = relates ps Map.empty mkRel ps = relates ps Map.empty
-- | Creates a relation from a list pairs of elements and the elements
-- related to them.
mkRel' :: Ord a => [(a,[a])] -> Rel a
mkRel' xs = Map.fromListWith Set.union [(x,Set.fromList ys) | (x,ys) <- xs]
relToList :: Rel a -> [(a,a)] relToList :: Rel a -> [(a,a)]
relToList r = [ (x,y) | (x,ys) <- Map.toList r, y <- Set.toList ys ] relToList r = [ (x,y) | (x,ys) <- Map.toList r, y <- Set.toList ys ]

View File

@@ -22,7 +22,7 @@ module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..),
SRGCat, SRGNT, SRGCat, SRGNT,
makeSimpleSRG, makeSRG makeSimpleSRG, makeSRG
, lookupFM_, prtS , lookupFM_, prtS
, topDownFilter, cfgCatToGFCat , topDownFilter, cfgCatToGFCat, srgTopCats
, EBnfSRGAlt(..), EBnfSRGItem , EBnfSRGAlt(..), EBnfSRGItem
, ebnfSRGAlts , ebnfSRGAlts
) where ) where
@@ -44,7 +44,7 @@ import GF.Infra.Option
import GF.Probabilistic.Probabilistic (Probs) import GF.Probabilistic.Probabilistic (Probs)
import Data.List import Data.List
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe, maybeToList)
import Data.Map (Map) import Data.Map (Map)
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Set (Set) import Data.Set (Set)
@@ -86,8 +86,11 @@ makeSimpleSRG :: Ident -- ^ Grammar name
-> Maybe Probs -- ^ Probabilities -> Maybe Probs -- ^ Probabilities
-> CGrammar -- ^ A context-free grammar -> CGrammar -- ^ A context-free grammar
-> SRG -> SRG
makeSimpleSRG makeSimpleSRG i origStart opts probs =
= makeSRG_ (removeLeftRecursion . removeIdenticalRules . removeEmptyCats . removeCycles) makeSRG_ i origStart opts probs
. removeLeftRecursion origStart . removeIdenticalRules
. removeEmptyCats . removeCycles
. cfgToCFRules
-- | Create a SRG preserving the names, profiles and probabilities of the -- | Create a SRG preserving the names, profiles and probabilities of the
-- input grammar. The returned grammar may be left-recursive. -- input grammar. The returned grammar may be left-recursive.
@@ -97,18 +100,17 @@ makeSRG :: Ident -- ^ Grammar name
-> Maybe Probs -- ^ Probabilities -> Maybe Probs -- ^ Probabilities
-> CGrammar -- ^ A context-free grammar -> CGrammar -- ^ A context-free grammar
-> SRG -> SRG
makeSRG = makeSRG_ removeEmptyCats makeSRG i origStart opts probs =
makeSRG_ i origStart opts probs . removeEmptyCats . cfgToCFRules
makeSRG_ :: (CFRules -> CFRules) -- ^ Transformations to apply to the makeSRG_ :: Ident -- ^ Grammar name
-- CFG before converting to SRG
-> Ident -- ^ Grammar name
-> String -- ^ Start category -> String -- ^ Start category
-> Options -- ^ Grammar options -> Options -- ^ Grammar options
-> Maybe Probs -- ^ Probabilities -> Maybe Probs -- ^ Probabilities
-> CGrammar -- ^ A context-free grammar -> CFRules -- ^ A context-free grammar
-> SRG -> SRG
makeSRG_ f i origStart opts probs gr makeSRG_ i origStart opts probs gr =
= SRG { grammarName = name, SRG { grammarName = name,
startCat = lookupFM_ names origStart, startCat = lookupFM_ names origStart,
origStartCat = origStart, origStartCat = origStart,
grammarLanguage = l, grammarLanguage = l,
@@ -116,8 +118,7 @@ makeSRG_ f i origStart opts probs gr
where where
name = prIdent i name = prIdent i
l = fromMaybe "en_UK" (getOptVal opts speechLanguage) l = fromMaybe "en_UK" (getOptVal opts speechLanguage)
gr' = f (cfgToCFRules gr) (cats,cfgRules) = unzip gr
(cats,cfgRules) = unzip gr'
names = mkCatNames name cats names = mkCatNames name cats
rs = map (cfgRulesToSRGRule names probs) cfgRules rs = map (cfgRulesToSRGRule names probs) cfgRules
@@ -168,8 +169,14 @@ topDownFilter srg@(SRG { startCat = start, rules = rs }) = srg { rules = rs' }
allSRGCats :: SRG -> [String] allSRGCats :: SRG -> [String]
allSRGCats SRG { rules = rs } = [c | SRGRule c _ _ <- rs] allSRGCats SRG { rules = rs } = [c | SRGRule c _ _ <- rs]
cfgCatToGFCat :: SRGCat -> String cfgCatToGFCat :: SRGCat -> Maybe String
cfgCatToGFCat = takeWhile (/='{') cfgCatToGFCat c
| '-' `elem` c = Nothing -- categories introduced by removeLeftRecursion contain dashes
| otherwise = Just $ takeWhile (/='{') c
srgTopCats :: SRG -> [(String,[SRGCat])]
srgTopCats srg = buildMultiMap [(oc, cat) | SRGRule cat origCat _ <- rules srg,
oc <- maybeToList $ cfgCatToGFCat origCat]
-- --
-- * Size-optimized EBNF SRGs -- * Size-optimized EBNF SRGs
@@ -189,7 +196,7 @@ ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem
ebnfSRGItem = dfa2re . mkSRGFA ebnfSRGItem = dfa2re . mkSRGFA
mkSRGFA :: [[Symbol SRGNT Token]] -> DFA (Symbol SRGNT Token) mkSRGFA :: [[Symbol SRGNT Token]] -> DFA (Symbol SRGNT Token)
mkSRGFA = minimize . dfa2nfa . foldr addString (newFA ()) mkSRGFA = {- minimize . dfa2nfa . -} foldr addString (newFA ())
addString :: [a] -> DFA a -> DFA a addString :: [a] -> DFA a -> DFA a
addString xs fa = addFinalState (last sts0) $ newTransitions ts fa' addString xs fa = addFinalState (last sts0) $ newTransitions ts fa'

View File

@@ -31,6 +31,7 @@ import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol,
import GF.Infra.Ident import GF.Infra.Ident
import GF.Infra.Option import GF.Infra.Option
import GF.Infra.Print import GF.Infra.Print
import GF.Speech.Relation
import Control.Monad import Control.Monad
import Control.Monad.State (State, get, put, evalState) import Control.Monad.State (State, get, put, evalState)
@@ -46,6 +47,7 @@ import qualified Data.Set as Set
-- | not very nice to replace the structured CFCat type with a simple string -- | not very nice to replace the structured CFCat type with a simple string
type CFRule_ = CFRule Cat_ Name Token type CFRule_ = CFRule Cat_ Name Token
type Cat_ = String type Cat_ = String
type CFSymbol_ = Symbol Cat_ Token
type CFRules = [(Cat_,[CFRule_])] type CFRules = [(Cat_,[CFRule_])]
@@ -78,10 +80,65 @@ removeIdenticalRules g = [(c,sortNubBy cmpRules rs) | (c,rs) <- g]
-- * Removing left recursion -- * Removing left recursion
{-
-- The LC_LR algorithm from
-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
-- Not used since I haven't figured out how to make proper profiles. /Bjorn
removeLeftRecursion :: Cat_ -> CFRules -> CFRules
removeLeftRecursion start gr
= groupProds $ concat [scheme1, scheme2, scheme3, scheme4]
where
scheme1 = [CFRule a [x,Cat a_x] (Name (IC "phony1") []) |
a <- retainedLeftRecursive,
x <- properLeftCornersOf a,
not (isLeftRecursive x),
let a_x = mkCat (Cat a) x]
scheme2 = [CFRule a_x (beta++[Cat a_b]) (Name (IC "phony2") []) |
a <- retainedLeftRecursive,
b@(Cat b') <- properLeftCornersOf a,
isLeftRecursive b,
CFRule _ (x:beta) n <- catRules gr b',
let a_x = mkCat (Cat a) x,
let a_b = mkCat (Cat a) b]
scheme3 = [CFRule a_x beta n | -- FIXME: remove 0 from all profile elements
a <- retainedLeftRecursive,
x <- properLeftCornersOf a,
CFRule _ (x':beta) n <- catRules gr a,
x == x',
let a_x = mkCat (Cat a) x]
scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . Cat) cats
cats = allCats gr
rules = ungroupProds gr
directLeftCorner = mkRel' [(Cat s,[t | CFRule _ (t:_) _ <- rs]) | (s,rs) <- gr]
leftCorner = reflexiveClosure_ (map Cat cats) $ transitiveClosure directLeftCorner
properLeftCorner = transitiveClosure directLeftCorner
properLeftCornersOf = Set.toList . allRelated properLeftCorner . Cat
isProperLeftCornerOf = flip (isRelatedTo properLeftCorner)
leftRecursive = reflexiveElements properLeftCorner
isLeftRecursive = (`Set.member` leftRecursive)
-- FIXME: include start cat
retained = start `Set.insert`
Set.fromList [a | (c,rs) <- gr, not (isLeftRecursive (Cat c)),
r <- rs, Cat a <- ruleRhs r]
isRetained = (`Set.member` retained)
retainedLeftRecursive = filter (isLeftRecursive . Cat) $ Set.toList retained
mkCat :: CFSymbol_ -> CFSymbol_ -> Cat_
mkCat x y = showSymbol x ++ "-" ++ showSymbol y
where showSymbol = symbol id ("$"++) -- FIXME !!!!!
-}
-- Paull's algorithm, see -- Paull's algorithm, see
-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf -- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
removeLeftRecursion :: CFRules -> CFRules removeLeftRecursion :: Cat_ -> CFRules -> CFRules
removeLeftRecursion rs = removeDirectLeftRecursions $ map handleProds rs removeLeftRecursion start rs = removeDirectLeftRecursions $ map handleProds rs
where where
handleProds (c, r) = (c, concatMap handleProd r) handleProds (c, r) = (c, concatMap handleProd r)
handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai = handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai =
@@ -113,18 +170,30 @@ removeDirectLeftRecursion (a,rs)
return [(a, as), (a', a's)] return [(a, as), (a', a's)]
where where
(dr,nr) = partition isDirectLeftRecursive rs (dr,nr) = partition isDirectLeftRecursive rs
fresh x = do { n <- get; put (n+1); return $ x ++ "'" ++ show n } fresh x = do { n <- get; put (n+1); return $ x ++ "-" ++ show n }
isDirectLeftRecursive :: CFRule_ -> Bool isDirectLeftRecursive :: CFRule_ -> Bool
isDirectLeftRecursive (CFRule c (Cat c':_) _) = c == c' isDirectLeftRecursive (CFRule c (Cat c':_) _) = c == c'
isDirectLeftRecursive _ = False isDirectLeftRecursive _ = False
-- * Removing cycles -- * Removing cycles
removeCycles :: CFRules -> CFRules removeCycles :: CFRules -> CFRules
removeCycles = groupProds . removeCycles_ . ungroupProds removeCycles = groupProds . removeCycles_ . ungroupProds
where removeCycles_ rs = [r | r@(CFRule c rhs n) <- rs, rhs /= [Cat c]] where removeCycles_ rs = [r | r@(CFRule c rhs n) <- rs, rhs /= [Cat c]]
-- | Get the sets of mutually recursive non-terminals for a grammar.
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
-- If false, only recursive categories will be included.
-> CFRules -> [Set Cat_]
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
where r = mkRel [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss]
allCats = map fst g
refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation
-- --
-- * CFG rule utilities -- * CFG rule utilities
-- --
@@ -142,8 +211,8 @@ allCats = map fst
catRules :: CFRules -> Cat_ -> [CFRule_] catRules :: CFRules -> Cat_ -> [CFRule_]
catRules rs c = fromMaybe [] (lookup c rs) catRules rs c = fromMaybe [] (lookup c rs)
catSetRules :: CFRules -> [Cat_] -> [CFRule_] catSetRules :: CFRules -> Set Cat_ -> [CFRule_]
catSetRules g s = concatMap (catRules g) s catSetRules g cs = concat [rs | (c,rs) <- g, c `Set.member` cs]
lhsCat :: CFRule c n t -> c lhsCat :: CFRule c n t -> c
lhsCat (CFRule c _ _) = c lhsCat (CFRule c _ _) = c