diff --git a/src/GF/Speech/CFGToFiniteState.hs b/src/GF/Speech/CFGToFiniteState.hs index 7876f637d..cb32ff73e 100644 --- a/src/GF/Speech/CFGToFiniteState.hs +++ b/src/GF/Speech/CFGToFiniteState.hs @@ -75,7 +75,7 @@ makeRegular g = groupProds $ concatMap trSet (mutRecCats True g) where trSet cs | allXLinear cs rs = rs | otherwise = concatMap handleCat csl where csl = Set.toList cs - rs = catSetRules g csl + rs = catSetRules g cs handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e ++ concatMap (makeRightLinearRules c) (catRules g c) where c' = newCat c @@ -90,15 +90,6 @@ makeRegular g = groupProds $ concatMap trSet (mutRecCats True g) | otherwise = [CFRule c rhs n] newCat c = c ++ "$" --- | Get the sets of mutually recursive non-terminals for a grammar. -mutRecCats :: Bool -- ^ If true, all categories will be in some set. - -- If false, only recursive categories will be included. - -> CFRules -> [Set Cat_] -mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r - where r = mkRel [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss] - allCats = map fst g - refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation - -- -- * Compile strongly regular grammars to NFAs -- @@ -271,7 +262,7 @@ mutRecSets g = Map.fromList . concatMap mkMutRecSet where mkMutRecSet cs = [ (c,ms) | c <- csl ] where csl = Set.toList cs - rs = catSetRules g csl + rs = catSetRules g cs (nrs,rrs) = partition (ruleIsNonRecursive cs) rs ms = MutRecSet { mrCats = cs, diff --git a/src/GF/Speech/PrJSGF.hs b/src/GF/Speech/PrJSGF.hs index 90e8678f1..05aa6562c 100644 --- a/src/GF/Speech/PrJSGF.hs +++ b/src/GF/Speech/PrJSGF.hs @@ -44,7 +44,7 @@ jsgfPrinter name start opts sisr probs cfg = prJSGF srg sisr "" where srg = makeSimpleSRG name start opts probs cfg prJSGF :: SRG -> Maybe SISRFormat -> ShowS -prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) sisr +prJSGF srg@(SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) sisr = header . nl . mainCat . nl . unlinesS topCatRules . nl @@ -62,9 +62,8 @@ prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) si -- FIXME: use the probability prAlt (EBnfSRGAlt mp n rhs) = tag sisr (profileInitSISR n) . showChar ' '. prItem sisr rhs - topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- topCats] - where topCats = buildMultiMap [(cfgCatToGFCat origCat, cat) | SRGRule cat origCat _ <- rs] - it i c = prCat c . tag sisr [(EThis :. catFieldId i) := (ERef c)] + topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- srgTopCats srg] + where it i c = prCat c . tag sisr [(EThis :. catFieldId i) := (ERef c)] catFormId :: String -> String catFormId = (++ "_cat") diff --git a/src/GF/Speech/PrSRGS.hs b/src/GF/Speech/PrSRGS.hs index d4ab5c4c0..977e257e8 100644 --- a/src/GF/Speech/PrSRGS.hs +++ b/src/GF/Speech/PrSRGS.hs @@ -45,11 +45,11 @@ srgsXmlPrinter name start opts sisr probs cfg = prSrgsXml sisr srg "" where srg = makeSRG name start opts probs cfg prSrgsXml :: Maybe SISRFormat -> SRG -> ShowS -prSrgsXml sisr (SRG{grammarName=name,startCat=start, - origStartCat=origStart,grammarLanguage=l,rules=rs}) +prSrgsXml sisr srg@(SRG{grammarName=name,startCat=start, + origStartCat=origStart,grammarLanguage=l,rules=rs}) = showsXMLDoc $ optimizeSRGS xmlGr where - root = cfgCatToGFCat origStart + Just root = cfgCatToGFCat origStart xmlGr = grammar sisr (catFormId root) l $ [meta "description" ("SRGS XML speech recognition grammar for " ++ name @@ -62,9 +62,8 @@ prSrgsXml sisr (SRG{grammarName=name,startCat=start, comments ["Category " ++ origCat] ++ [rule cat (prRhs $ ebnfSRGAlts alts)] prRhs rhss = [oneOf (map (mkProd sisr) rhss)] -- externally visible rules for each of the GF categories - topCatRules = [topRule tc [oneOf (map (it tc) cs)] | (tc,cs) <- topCats] - where topCats = buildMultiMap [(cfgCatToGFCat origCat, cat) | SRGRule cat origCat _ <- rs] - it i c = Tag "item" [] [Tag "ruleref" [("uri","#" ++ c)] [], + topCatRules = [topRule tc [oneOf (map (it tc) cs)] | (tc,cs) <- srgTopCats srg] + where it i c = Tag "item" [] [Tag "ruleref" [("uri","#" ++ c)] [], tag sisr [(EThis :. catFieldId i) := (ERef c)]] topRule i is = Tag "rule" [("id",catFormId i),("scope","public")] is diff --git a/src/GF/Speech/Relation.hs b/src/GF/Speech/Relation.hs index a62b4b1a7..fe91716c6 100644 --- a/src/GF/Speech/Relation.hs +++ b/src/GF/Speech/Relation.hs @@ -12,12 +12,13 @@ -- A simple module for relations. ----------------------------------------------------------------------------- -module GF.Speech.Relation (Rel, mkRel +module GF.Speech.Relation (Rel, mkRel, mkRel' , allRelated , isRelatedTo , transitiveClosure , reflexiveClosure, reflexiveClosure_ , symmetricClosure , symmetricSubrelation, reflexiveSubrelation + , reflexiveElements , equivalenceClasses , isTransitive, isReflexive, isSymmetric , isEquivalence @@ -38,6 +39,11 @@ type Rel a = Map a (Set a) mkRel :: Ord a => [(a,a)] -> Rel a mkRel ps = relates ps Map.empty +-- | Creates a relation from a list pairs of elements and the elements +-- related to them. +mkRel' :: Ord a => [(a,[a])] -> Rel a +mkRel' xs = Map.fromListWith Set.union [(x,Set.fromList ys) | (x,ys) <- xs] + relToList :: Rel a -> [(a,a)] relToList r = [ (x,y) | (x,ys) <- Map.toList r, y <- Set.toList ys ] diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs index e89e42662..84820be9f 100644 --- a/src/GF/Speech/SRG.hs +++ b/src/GF/Speech/SRG.hs @@ -22,7 +22,7 @@ module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGCat, SRGNT, makeSimpleSRG, makeSRG , lookupFM_, prtS - , topDownFilter, cfgCatToGFCat + , topDownFilter, cfgCatToGFCat, srgTopCats , EBnfSRGAlt(..), EBnfSRGItem , ebnfSRGAlts ) where @@ -44,7 +44,7 @@ import GF.Infra.Option import GF.Probabilistic.Probabilistic (Probs) import Data.List -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, maybeToList) import Data.Map (Map) import qualified Data.Map as Map import Data.Set (Set) @@ -86,8 +86,11 @@ makeSimpleSRG :: Ident -- ^ Grammar name -> Maybe Probs -- ^ Probabilities -> CGrammar -- ^ A context-free grammar -> SRG -makeSimpleSRG - = makeSRG_ (removeLeftRecursion . removeIdenticalRules . removeEmptyCats . removeCycles) +makeSimpleSRG i origStart opts probs = + makeSRG_ i origStart opts probs + . removeLeftRecursion origStart . removeIdenticalRules + . removeEmptyCats . removeCycles + . cfgToCFRules -- | Create a SRG preserving the names, profiles and probabilities of the -- input grammar. The returned grammar may be left-recursive. @@ -97,18 +100,17 @@ makeSRG :: Ident -- ^ Grammar name -> Maybe Probs -- ^ Probabilities -> CGrammar -- ^ A context-free grammar -> SRG -makeSRG = makeSRG_ removeEmptyCats +makeSRG i origStart opts probs = + makeSRG_ i origStart opts probs . removeEmptyCats . cfgToCFRules -makeSRG_ :: (CFRules -> CFRules) -- ^ Transformations to apply to the - -- CFG before converting to SRG - -> Ident -- ^ Grammar name +makeSRG_ :: Ident -- ^ Grammar name -> String -- ^ Start category -> Options -- ^ Grammar options -> Maybe Probs -- ^ Probabilities - -> CGrammar -- ^ A context-free grammar + -> CFRules -- ^ A context-free grammar -> SRG -makeSRG_ f i origStart opts probs gr - = SRG { grammarName = name, +makeSRG_ i origStart opts probs gr = + SRG { grammarName = name, startCat = lookupFM_ names origStart, origStartCat = origStart, grammarLanguage = l, @@ -116,8 +118,7 @@ makeSRG_ f i origStart opts probs gr where name = prIdent i l = fromMaybe "en_UK" (getOptVal opts speechLanguage) - gr' = f (cfgToCFRules gr) - (cats,cfgRules) = unzip gr' + (cats,cfgRules) = unzip gr names = mkCatNames name cats rs = map (cfgRulesToSRGRule names probs) cfgRules @@ -168,8 +169,14 @@ topDownFilter srg@(SRG { startCat = start, rules = rs }) = srg { rules = rs' } allSRGCats :: SRG -> [String] allSRGCats SRG { rules = rs } = [c | SRGRule c _ _ <- rs] -cfgCatToGFCat :: SRGCat -> String -cfgCatToGFCat = takeWhile (/='{') +cfgCatToGFCat :: SRGCat -> Maybe String +cfgCatToGFCat c + | '-' `elem` c = Nothing -- categories introduced by removeLeftRecursion contain dashes + | otherwise = Just $ takeWhile (/='{') c + +srgTopCats :: SRG -> [(String,[SRGCat])] +srgTopCats srg = buildMultiMap [(oc, cat) | SRGRule cat origCat _ <- rules srg, + oc <- maybeToList $ cfgCatToGFCat origCat] -- -- * Size-optimized EBNF SRGs @@ -189,7 +196,7 @@ ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem ebnfSRGItem = dfa2re . mkSRGFA mkSRGFA :: [[Symbol SRGNT Token]] -> DFA (Symbol SRGNT Token) -mkSRGFA = minimize . dfa2nfa . foldr addString (newFA ()) +mkSRGFA = {- minimize . dfa2nfa . -} foldr addString (newFA ()) addString :: [a] -> DFA a -> DFA a addString xs fa = addFinalState (last sts0) $ newTransitions ts fa' diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs index 38fb82b68..9d087609b 100644 --- a/src/GF/Speech/TransformCFG.hs +++ b/src/GF/Speech/TransformCFG.hs @@ -31,6 +31,7 @@ import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, import GF.Infra.Ident import GF.Infra.Option import GF.Infra.Print +import GF.Speech.Relation import Control.Monad import Control.Monad.State (State, get, put, evalState) @@ -46,6 +47,7 @@ import qualified Data.Set as Set -- | not very nice to replace the structured CFCat type with a simple string type CFRule_ = CFRule Cat_ Name Token type Cat_ = String +type CFSymbol_ = Symbol Cat_ Token type CFRules = [(Cat_,[CFRule_])] @@ -78,10 +80,65 @@ removeIdenticalRules g = [(c,sortNubBy cmpRules rs) | (c,rs) <- g] -- * Removing left recursion +{- + +-- The LC_LR algorithm from +-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf +-- Not used since I haven't figured out how to make proper profiles. /Bjorn +removeLeftRecursion :: Cat_ -> CFRules -> CFRules +removeLeftRecursion start gr + = groupProds $ concat [scheme1, scheme2, scheme3, scheme4] + where + scheme1 = [CFRule a [x,Cat a_x] (Name (IC "phony1") []) | + a <- retainedLeftRecursive, + x <- properLeftCornersOf a, + not (isLeftRecursive x), + let a_x = mkCat (Cat a) x] + scheme2 = [CFRule a_x (beta++[Cat a_b]) (Name (IC "phony2") []) | + a <- retainedLeftRecursive, + b@(Cat b') <- properLeftCornersOf a, + isLeftRecursive b, + CFRule _ (x:beta) n <- catRules gr b', + let a_x = mkCat (Cat a) x, + let a_b = mkCat (Cat a) b] + scheme3 = [CFRule a_x beta n | -- FIXME: remove 0 from all profile elements + a <- retainedLeftRecursive, + x <- properLeftCornersOf a, + CFRule _ (x':beta) n <- catRules gr a, + x == x', + let a_x = mkCat (Cat a) x] + scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . Cat) cats + + cats = allCats gr + rules = ungroupProds gr + + directLeftCorner = mkRel' [(Cat s,[t | CFRule _ (t:_) _ <- rs]) | (s,rs) <- gr] + leftCorner = reflexiveClosure_ (map Cat cats) $ transitiveClosure directLeftCorner + properLeftCorner = transitiveClosure directLeftCorner + properLeftCornersOf = Set.toList . allRelated properLeftCorner . Cat + isProperLeftCornerOf = flip (isRelatedTo properLeftCorner) + + leftRecursive = reflexiveElements properLeftCorner + isLeftRecursive = (`Set.member` leftRecursive) + + -- FIXME: include start cat + retained = start `Set.insert` + Set.fromList [a | (c,rs) <- gr, not (isLeftRecursive (Cat c)), + r <- rs, Cat a <- ruleRhs r] + isRetained = (`Set.member` retained) + + retainedLeftRecursive = filter (isLeftRecursive . Cat) $ Set.toList retained + +mkCat :: CFSymbol_ -> CFSymbol_ -> Cat_ +mkCat x y = showSymbol x ++ "-" ++ showSymbol y + where showSymbol = symbol id ("$"++) -- FIXME !!!!! + +-} + -- Paull's algorithm, see -- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf -removeLeftRecursion :: CFRules -> CFRules -removeLeftRecursion rs = removeDirectLeftRecursions $ map handleProds rs +removeLeftRecursion :: Cat_ -> CFRules -> CFRules +removeLeftRecursion start rs = removeDirectLeftRecursions $ map handleProds rs where handleProds (c, r) = (c, concatMap handleProd r) handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai = @@ -113,18 +170,30 @@ removeDirectLeftRecursion (a,rs) return [(a, as), (a', a's)] where (dr,nr) = partition isDirectLeftRecursive rs - fresh x = do { n <- get; put (n+1); return $ x ++ "'" ++ show n } + fresh x = do { n <- get; put (n+1); return $ x ++ "-" ++ show n } isDirectLeftRecursive :: CFRule_ -> Bool isDirectLeftRecursive (CFRule c (Cat c':_) _) = c == c' isDirectLeftRecursive _ = False + -- * Removing cycles removeCycles :: CFRules -> CFRules removeCycles = groupProds . removeCycles_ . ungroupProds where removeCycles_ rs = [r | r@(CFRule c rhs n) <- rs, rhs /= [Cat c]] + +-- | Get the sets of mutually recursive non-terminals for a grammar. +mutRecCats :: Bool -- ^ If true, all categories will be in some set. + -- If false, only recursive categories will be included. + -> CFRules -> [Set Cat_] +mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r + where r = mkRel [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss] + allCats = map fst g + refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation + + -- -- * CFG rule utilities -- @@ -142,8 +211,8 @@ allCats = map fst catRules :: CFRules -> Cat_ -> [CFRule_] catRules rs c = fromMaybe [] (lookup c rs) -catSetRules :: CFRules -> [Cat_] -> [CFRule_] -catSetRules g s = concatMap (catRules g) s +catSetRules :: CFRules -> Set Cat_ -> [CFRule_] +catSetRules g cs = concat [rs | (c,rs) <- g, c `Set.member` cs] lhsCat :: CFRule c n t -> c lhsCat (CFRule c _ _) = c