forked from GitHub/gf-core
Added still unused implementation of Moore's LCLR algorithm for left recursion elimination. Fixed top category generation for SRG (included LR-elimination-added categories before).
This commit is contained in:
@@ -75,7 +75,7 @@ makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
|
||||
where trSet cs | allXLinear cs rs = rs
|
||||
| otherwise = concatMap handleCat csl
|
||||
where csl = Set.toList cs
|
||||
rs = catSetRules g csl
|
||||
rs = catSetRules g cs
|
||||
handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e
|
||||
++ concatMap (makeRightLinearRules c) (catRules g c)
|
||||
where c' = newCat c
|
||||
@@ -90,15 +90,6 @@ makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
|
||||
| otherwise = [CFRule c rhs n]
|
||||
newCat c = c ++ "$"
|
||||
|
||||
-- | Get the sets of mutually recursive non-terminals for a grammar.
|
||||
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
|
||||
-- If false, only recursive categories will be included.
|
||||
-> CFRules -> [Set Cat_]
|
||||
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
|
||||
where r = mkRel [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss]
|
||||
allCats = map fst g
|
||||
refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation
|
||||
|
||||
--
|
||||
-- * Compile strongly regular grammars to NFAs
|
||||
--
|
||||
@@ -271,7 +262,7 @@ mutRecSets g = Map.fromList . concatMap mkMutRecSet
|
||||
where
|
||||
mkMutRecSet cs = [ (c,ms) | c <- csl ]
|
||||
where csl = Set.toList cs
|
||||
rs = catSetRules g csl
|
||||
rs = catSetRules g cs
|
||||
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs
|
||||
ms = MutRecSet {
|
||||
mrCats = cs,
|
||||
|
||||
@@ -44,7 +44,7 @@ jsgfPrinter name start opts sisr probs cfg = prJSGF srg sisr ""
|
||||
where srg = makeSimpleSRG name start opts probs cfg
|
||||
|
||||
prJSGF :: SRG -> Maybe SISRFormat -> ShowS
|
||||
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) sisr
|
||||
prJSGF srg@(SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) sisr
|
||||
= header . nl
|
||||
. mainCat . nl
|
||||
. unlinesS topCatRules . nl
|
||||
@@ -62,9 +62,8 @@ prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) si
|
||||
-- FIXME: use the probability
|
||||
prAlt (EBnfSRGAlt mp n rhs) = tag sisr (profileInitSISR n) . showChar ' '. prItem sisr rhs
|
||||
|
||||
topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- topCats]
|
||||
where topCats = buildMultiMap [(cfgCatToGFCat origCat, cat) | SRGRule cat origCat _ <- rs]
|
||||
it i c = prCat c . tag sisr [(EThis :. catFieldId i) := (ERef c)]
|
||||
topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- srgTopCats srg]
|
||||
where it i c = prCat c . tag sisr [(EThis :. catFieldId i) := (ERef c)]
|
||||
|
||||
catFormId :: String -> String
|
||||
catFormId = (++ "_cat")
|
||||
|
||||
@@ -45,11 +45,11 @@ srgsXmlPrinter name start opts sisr probs cfg = prSrgsXml sisr srg ""
|
||||
where srg = makeSRG name start opts probs cfg
|
||||
|
||||
prSrgsXml :: Maybe SISRFormat -> SRG -> ShowS
|
||||
prSrgsXml sisr (SRG{grammarName=name,startCat=start,
|
||||
origStartCat=origStart,grammarLanguage=l,rules=rs})
|
||||
prSrgsXml sisr srg@(SRG{grammarName=name,startCat=start,
|
||||
origStartCat=origStart,grammarLanguage=l,rules=rs})
|
||||
= showsXMLDoc $ optimizeSRGS xmlGr
|
||||
where
|
||||
root = cfgCatToGFCat origStart
|
||||
Just root = cfgCatToGFCat origStart
|
||||
xmlGr = grammar sisr (catFormId root) l $
|
||||
[meta "description"
|
||||
("SRGS XML speech recognition grammar for " ++ name
|
||||
@@ -62,9 +62,8 @@ prSrgsXml sisr (SRG{grammarName=name,startCat=start,
|
||||
comments ["Category " ++ origCat] ++ [rule cat (prRhs $ ebnfSRGAlts alts)]
|
||||
prRhs rhss = [oneOf (map (mkProd sisr) rhss)]
|
||||
-- externally visible rules for each of the GF categories
|
||||
topCatRules = [topRule tc [oneOf (map (it tc) cs)] | (tc,cs) <- topCats]
|
||||
where topCats = buildMultiMap [(cfgCatToGFCat origCat, cat) | SRGRule cat origCat _ <- rs]
|
||||
it i c = Tag "item" [] [Tag "ruleref" [("uri","#" ++ c)] [],
|
||||
topCatRules = [topRule tc [oneOf (map (it tc) cs)] | (tc,cs) <- srgTopCats srg]
|
||||
where it i c = Tag "item" [] [Tag "ruleref" [("uri","#" ++ c)] [],
|
||||
tag sisr [(EThis :. catFieldId i) := (ERef c)]]
|
||||
topRule i is = Tag "rule" [("id",catFormId i),("scope","public")] is
|
||||
|
||||
|
||||
@@ -12,12 +12,13 @@
|
||||
-- A simple module for relations.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.Relation (Rel, mkRel
|
||||
module GF.Speech.Relation (Rel, mkRel, mkRel'
|
||||
, allRelated , isRelatedTo
|
||||
, transitiveClosure
|
||||
, reflexiveClosure, reflexiveClosure_
|
||||
, symmetricClosure
|
||||
, symmetricSubrelation, reflexiveSubrelation
|
||||
, reflexiveElements
|
||||
, equivalenceClasses
|
||||
, isTransitive, isReflexive, isSymmetric
|
||||
, isEquivalence
|
||||
@@ -38,6 +39,11 @@ type Rel a = Map a (Set a)
|
||||
mkRel :: Ord a => [(a,a)] -> Rel a
|
||||
mkRel ps = relates ps Map.empty
|
||||
|
||||
-- | Creates a relation from a list pairs of elements and the elements
|
||||
-- related to them.
|
||||
mkRel' :: Ord a => [(a,[a])] -> Rel a
|
||||
mkRel' xs = Map.fromListWith Set.union [(x,Set.fromList ys) | (x,ys) <- xs]
|
||||
|
||||
relToList :: Rel a -> [(a,a)]
|
||||
relToList r = [ (x,y) | (x,ys) <- Map.toList r, y <- Set.toList ys ]
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..),
|
||||
SRGCat, SRGNT,
|
||||
makeSimpleSRG, makeSRG
|
||||
, lookupFM_, prtS
|
||||
, topDownFilter, cfgCatToGFCat
|
||||
, topDownFilter, cfgCatToGFCat, srgTopCats
|
||||
, EBnfSRGAlt(..), EBnfSRGItem
|
||||
, ebnfSRGAlts
|
||||
) where
|
||||
@@ -44,7 +44,7 @@ import GF.Infra.Option
|
||||
import GF.Probabilistic.Probabilistic (Probs)
|
||||
|
||||
import Data.List
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Maybe (fromMaybe, maybeToList)
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Set (Set)
|
||||
@@ -86,8 +86,11 @@ makeSimpleSRG :: Ident -- ^ Grammar name
|
||||
-> Maybe Probs -- ^ Probabilities
|
||||
-> CGrammar -- ^ A context-free grammar
|
||||
-> SRG
|
||||
makeSimpleSRG
|
||||
= makeSRG_ (removeLeftRecursion . removeIdenticalRules . removeEmptyCats . removeCycles)
|
||||
makeSimpleSRG i origStart opts probs =
|
||||
makeSRG_ i origStart opts probs
|
||||
. removeLeftRecursion origStart . removeIdenticalRules
|
||||
. removeEmptyCats . removeCycles
|
||||
. cfgToCFRules
|
||||
|
||||
-- | Create a SRG preserving the names, profiles and probabilities of the
|
||||
-- input grammar. The returned grammar may be left-recursive.
|
||||
@@ -97,18 +100,17 @@ makeSRG :: Ident -- ^ Grammar name
|
||||
-> Maybe Probs -- ^ Probabilities
|
||||
-> CGrammar -- ^ A context-free grammar
|
||||
-> SRG
|
||||
makeSRG = makeSRG_ removeEmptyCats
|
||||
makeSRG i origStart opts probs =
|
||||
makeSRG_ i origStart opts probs . removeEmptyCats . cfgToCFRules
|
||||
|
||||
makeSRG_ :: (CFRules -> CFRules) -- ^ Transformations to apply to the
|
||||
-- CFG before converting to SRG
|
||||
-> Ident -- ^ Grammar name
|
||||
makeSRG_ :: Ident -- ^ Grammar name
|
||||
-> String -- ^ Start category
|
||||
-> Options -- ^ Grammar options
|
||||
-> Maybe Probs -- ^ Probabilities
|
||||
-> CGrammar -- ^ A context-free grammar
|
||||
-> CFRules -- ^ A context-free grammar
|
||||
-> SRG
|
||||
makeSRG_ f i origStart opts probs gr
|
||||
= SRG { grammarName = name,
|
||||
makeSRG_ i origStart opts probs gr =
|
||||
SRG { grammarName = name,
|
||||
startCat = lookupFM_ names origStart,
|
||||
origStartCat = origStart,
|
||||
grammarLanguage = l,
|
||||
@@ -116,8 +118,7 @@ makeSRG_ f i origStart opts probs gr
|
||||
where
|
||||
name = prIdent i
|
||||
l = fromMaybe "en_UK" (getOptVal opts speechLanguage)
|
||||
gr' = f (cfgToCFRules gr)
|
||||
(cats,cfgRules) = unzip gr'
|
||||
(cats,cfgRules) = unzip gr
|
||||
names = mkCatNames name cats
|
||||
rs = map (cfgRulesToSRGRule names probs) cfgRules
|
||||
|
||||
@@ -168,8 +169,14 @@ topDownFilter srg@(SRG { startCat = start, rules = rs }) = srg { rules = rs' }
|
||||
allSRGCats :: SRG -> [String]
|
||||
allSRGCats SRG { rules = rs } = [c | SRGRule c _ _ <- rs]
|
||||
|
||||
cfgCatToGFCat :: SRGCat -> String
|
||||
cfgCatToGFCat = takeWhile (/='{')
|
||||
cfgCatToGFCat :: SRGCat -> Maybe String
|
||||
cfgCatToGFCat c
|
||||
| '-' `elem` c = Nothing -- categories introduced by removeLeftRecursion contain dashes
|
||||
| otherwise = Just $ takeWhile (/='{') c
|
||||
|
||||
srgTopCats :: SRG -> [(String,[SRGCat])]
|
||||
srgTopCats srg = buildMultiMap [(oc, cat) | SRGRule cat origCat _ <- rules srg,
|
||||
oc <- maybeToList $ cfgCatToGFCat origCat]
|
||||
|
||||
--
|
||||
-- * Size-optimized EBNF SRGs
|
||||
@@ -189,7 +196,7 @@ ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem
|
||||
ebnfSRGItem = dfa2re . mkSRGFA
|
||||
|
||||
mkSRGFA :: [[Symbol SRGNT Token]] -> DFA (Symbol SRGNT Token)
|
||||
mkSRGFA = minimize . dfa2nfa . foldr addString (newFA ())
|
||||
mkSRGFA = {- minimize . dfa2nfa . -} foldr addString (newFA ())
|
||||
|
||||
addString :: [a] -> DFA a -> DFA a
|
||||
addString xs fa = addFinalState (last sts0) $ newTransitions ts fa'
|
||||
|
||||
@@ -31,6 +31,7 @@ import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol,
|
||||
import GF.Infra.Ident
|
||||
import GF.Infra.Option
|
||||
import GF.Infra.Print
|
||||
import GF.Speech.Relation
|
||||
|
||||
import Control.Monad
|
||||
import Control.Monad.State (State, get, put, evalState)
|
||||
@@ -46,6 +47,7 @@ import qualified Data.Set as Set
|
||||
-- | not very nice to replace the structured CFCat type with a simple string
|
||||
type CFRule_ = CFRule Cat_ Name Token
|
||||
type Cat_ = String
|
||||
type CFSymbol_ = Symbol Cat_ Token
|
||||
|
||||
type CFRules = [(Cat_,[CFRule_])]
|
||||
|
||||
@@ -78,10 +80,65 @@ removeIdenticalRules g = [(c,sortNubBy cmpRules rs) | (c,rs) <- g]
|
||||
|
||||
-- * Removing left recursion
|
||||
|
||||
{-
|
||||
|
||||
-- The LC_LR algorithm from
|
||||
-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
|
||||
-- Not used since I haven't figured out how to make proper profiles. /Bjorn
|
||||
removeLeftRecursion :: Cat_ -> CFRules -> CFRules
|
||||
removeLeftRecursion start gr
|
||||
= groupProds $ concat [scheme1, scheme2, scheme3, scheme4]
|
||||
where
|
||||
scheme1 = [CFRule a [x,Cat a_x] (Name (IC "phony1") []) |
|
||||
a <- retainedLeftRecursive,
|
||||
x <- properLeftCornersOf a,
|
||||
not (isLeftRecursive x),
|
||||
let a_x = mkCat (Cat a) x]
|
||||
scheme2 = [CFRule a_x (beta++[Cat a_b]) (Name (IC "phony2") []) |
|
||||
a <- retainedLeftRecursive,
|
||||
b@(Cat b') <- properLeftCornersOf a,
|
||||
isLeftRecursive b,
|
||||
CFRule _ (x:beta) n <- catRules gr b',
|
||||
let a_x = mkCat (Cat a) x,
|
||||
let a_b = mkCat (Cat a) b]
|
||||
scheme3 = [CFRule a_x beta n | -- FIXME: remove 0 from all profile elements
|
||||
a <- retainedLeftRecursive,
|
||||
x <- properLeftCornersOf a,
|
||||
CFRule _ (x':beta) n <- catRules gr a,
|
||||
x == x',
|
||||
let a_x = mkCat (Cat a) x]
|
||||
scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . Cat) cats
|
||||
|
||||
cats = allCats gr
|
||||
rules = ungroupProds gr
|
||||
|
||||
directLeftCorner = mkRel' [(Cat s,[t | CFRule _ (t:_) _ <- rs]) | (s,rs) <- gr]
|
||||
leftCorner = reflexiveClosure_ (map Cat cats) $ transitiveClosure directLeftCorner
|
||||
properLeftCorner = transitiveClosure directLeftCorner
|
||||
properLeftCornersOf = Set.toList . allRelated properLeftCorner . Cat
|
||||
isProperLeftCornerOf = flip (isRelatedTo properLeftCorner)
|
||||
|
||||
leftRecursive = reflexiveElements properLeftCorner
|
||||
isLeftRecursive = (`Set.member` leftRecursive)
|
||||
|
||||
-- FIXME: include start cat
|
||||
retained = start `Set.insert`
|
||||
Set.fromList [a | (c,rs) <- gr, not (isLeftRecursive (Cat c)),
|
||||
r <- rs, Cat a <- ruleRhs r]
|
||||
isRetained = (`Set.member` retained)
|
||||
|
||||
retainedLeftRecursive = filter (isLeftRecursive . Cat) $ Set.toList retained
|
||||
|
||||
mkCat :: CFSymbol_ -> CFSymbol_ -> Cat_
|
||||
mkCat x y = showSymbol x ++ "-" ++ showSymbol y
|
||||
where showSymbol = symbol id ("$"++) -- FIXME !!!!!
|
||||
|
||||
-}
|
||||
|
||||
-- Paull's algorithm, see
|
||||
-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
|
||||
removeLeftRecursion :: CFRules -> CFRules
|
||||
removeLeftRecursion rs = removeDirectLeftRecursions $ map handleProds rs
|
||||
removeLeftRecursion :: Cat_ -> CFRules -> CFRules
|
||||
removeLeftRecursion start rs = removeDirectLeftRecursions $ map handleProds rs
|
||||
where
|
||||
handleProds (c, r) = (c, concatMap handleProd r)
|
||||
handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai =
|
||||
@@ -113,18 +170,30 @@ removeDirectLeftRecursion (a,rs)
|
||||
return [(a, as), (a', a's)]
|
||||
where
|
||||
(dr,nr) = partition isDirectLeftRecursive rs
|
||||
fresh x = do { n <- get; put (n+1); return $ x ++ "'" ++ show n }
|
||||
fresh x = do { n <- get; put (n+1); return $ x ++ "-" ++ show n }
|
||||
|
||||
isDirectLeftRecursive :: CFRule_ -> Bool
|
||||
isDirectLeftRecursive (CFRule c (Cat c':_) _) = c == c'
|
||||
isDirectLeftRecursive _ = False
|
||||
|
||||
|
||||
-- * Removing cycles
|
||||
|
||||
removeCycles :: CFRules -> CFRules
|
||||
removeCycles = groupProds . removeCycles_ . ungroupProds
|
||||
where removeCycles_ rs = [r | r@(CFRule c rhs n) <- rs, rhs /= [Cat c]]
|
||||
|
||||
|
||||
-- | Get the sets of mutually recursive non-terminals for a grammar.
|
||||
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
|
||||
-- If false, only recursive categories will be included.
|
||||
-> CFRules -> [Set Cat_]
|
||||
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
|
||||
where r = mkRel [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss]
|
||||
allCats = map fst g
|
||||
refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation
|
||||
|
||||
|
||||
--
|
||||
-- * CFG rule utilities
|
||||
--
|
||||
@@ -142,8 +211,8 @@ allCats = map fst
|
||||
catRules :: CFRules -> Cat_ -> [CFRule_]
|
||||
catRules rs c = fromMaybe [] (lookup c rs)
|
||||
|
||||
catSetRules :: CFRules -> [Cat_] -> [CFRule_]
|
||||
catSetRules g s = concatMap (catRules g) s
|
||||
catSetRules :: CFRules -> Set Cat_ -> [CFRule_]
|
||||
catSetRules g cs = concat [rs | (c,rs) <- g, c `Set.member` cs]
|
||||
|
||||
lhsCat :: CFRule c n t -> c
|
||||
lhsCat (CFRule c _ _) = c
|
||||
|
||||
Reference in New Issue
Block a user