From 982a5222726831d60f046fdeff91461ff610c6c5 Mon Sep 17 00:00:00 2001 From: bringert Date: Wed, 7 Sep 2005 13:21:30 +0000 Subject: [PATCH] Added the prerequisits for automaton building. --- src/GF/Speech/FiniteState.hs | 105 ++++++++++++++++++++++++++++++++++ src/GF/Speech/PrGSL.hs | 7 +-- src/GF/Speech/PrJSGF.hs | 7 +-- src/GF/Speech/PrSLF.hs | 32 ++++++++--- src/GF/Speech/SRG.hs | 31 +++------- src/GF/Speech/TransformCFG.hs | 82 ++++++++++++++------------ src/Makefile | 2 +- 7 files changed, 191 insertions(+), 75 deletions(-) create mode 100644 src/GF/Speech/FiniteState.hs diff --git a/src/GF/Speech/FiniteState.hs b/src/GF/Speech/FiniteState.hs new file mode 100644 index 000000000..bdbd21b67 --- /dev/null +++ b/src/GF/Speech/FiniteState.hs @@ -0,0 +1,105 @@ +module GF.Speech.FiniteState (FA, State, + startState, finalStates, + states, transitions, + moveLabelsToNodes) where + +import Data.Graph.Inductive +import Data.List (nub,partition) +import Data.Maybe (fromJust) + +import Debug.Trace + +data FA a b = FA (Gr a b) Node [Node] + +type State = Node + +startState :: FA a b -> State +startState (FA _ s _) = s + +finalStates :: FA a b -> [State] +finalStates (FA _ _ ss) = ss + +states :: FA a b -> [(State,a)] +states (FA g _ _) = labNodes g + +transitions :: FA a b -> [(State,State,b)] +transitions (FA g _ _) = labEdges g + +onGraph :: (Gr a b -> Gr c d) -> FA a b -> FA c d +onGraph f (FA g s ss) = FA (f g) s ss + +newState :: a -> FA a b -> (FA a b, State) +newState x (FA g s ss) = (FA g' s ss, n) + where (g',n) = addNode x g + +newEdge :: Node -> Node -> b -> FA a b -> FA a b +newEdge f t l = onGraph (insEdge (f,t,l)) + +addNode :: DynGraph gr => a -> gr a b -> (gr a b, Node) +addNode x g = let s = freshNode g in (insNode (s,x) g, s) + +freshNode :: Graph gr => gr a b -> Node +freshNode = succ . snd . nodeRange + +-- | Get an infinte supply of new nodes. +freshNodes :: Graph gr => gr a b -> [Node] +freshNodes g = [snd (nodeRange g)+1..] + +-- | Transform a standard finite automaton with labelled edges +-- to one where the labels are on the nodes instead. This can add +-- up to one extra node per edge. +moveLabelsToNodes :: Eq a => FA () (Maybe a) -> FA (Maybe a) () +moveLabelsToNodes = onGraph moveLabelsToNodes_ + +moveLabelsToNodes_ :: (DynGraph gr, Eq a) => gr () (Maybe a) -> gr (Maybe a) () +moveLabelsToNodes_ g = gmap f g' + where g' = sameLabelIncoming g + f (to,n,(),fr) = (removeAdjLabels to, n, l, removeAdjLabels fr) + where l | not (allEqual ls) + = error $ "moveLabelsToNodes: not all incoming labels are equal" + | null ls = Nothing + | otherwise = head ls + ls = map snd $ lpre g' n + removeAdjLabels = map (\ (_,n) -> ((),n)) + +-- | Add the extra nodes needed to make sure that all edges to a node +-- have the same label. +sameLabelIncoming :: (DynGraph gr, Eq b) => gr () (Maybe b) -> gr () (Maybe b) +sameLabelIncoming gr = foldr fixIncoming gr (nodes gr) + +fixIncoming :: (DynGraph gr, Eq b) => Node -> gr () (Maybe b) -> gr () (Maybe b) +fixIncoming n gr | allLabelsEqual to' = gr + | otherwise = addContexts newContexts $ delNode n gr + where (to,_,_,fr) = context gr n + -- move cyclic edges to the list of incoming edges + (cyc,fr') = partition (\ (_,t) -> t == n) fr + to' = to ++ cyc + -- make new nodes for each unique label + newNodes = zip (nub $ map fst to') (freshNodes gr) + -- for each cyclic edge, add an edge to the node for + -- that label (could be the current node). + fr'' = fr' ++ [ (l',fromJust (lookup l' newNodes)) | (l',f) <- to', f == n ] + -- keep all incoming non-cyclic edges with the right label. + to'' l = [ e | e@(l',f) <- to', l'==l, f /= n ] + newContexts = [ (to'' l,n',(),fr'') | (l,n') <- newNodes] + +allLabelsEqual :: Eq b => Adj b -> Bool +allLabelsEqual = allEqual . map fst + +edgeLabel :: LEdge b -> b +edgeLabel (_,_,l) = l + +ledgeToEdge :: LEdge b -> Edge +ledgeToEdge (f,t,_) = (f,t) + +addContexts :: DynGraph gr => [Context a b] -> gr a b -> gr a b +addContexts cs gr = foldr (&) gr cs + +-- +-- * Utilities +-- + +allEqual :: Eq a => [a] -> Bool +allEqual [] = True +allEqual (x:xs) = all (==x) xs + diff --git a/src/GF/Speech/PrGSL.hs b/src/GF/Speech/PrGSL.hs index 8ddf0a521..b98339914 100644 --- a/src/GF/Speech/PrGSL.hs +++ b/src/GF/Speech/PrGSL.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/06/17 12:46:04 $ +-- > CVS $Date: 2005/09/07 14:21:30 $ -- > CVS $Author: bringert $ --- > CVS $Revision: 1.19 $ +-- > CVS $Revision: 1.20 $ -- -- This module prints a CFG as a Nuance GSL 2.0 grammar. -- @@ -18,7 +18,6 @@ module GF.Speech.PrGSL (gslPrinter) where import GF.Speech.SRG -import GF.Speech.TransformCFG import GF.Infra.Ident import GF.Formalism.CFG @@ -32,7 +31,7 @@ import Data.Char (toUpper,toLower) gslPrinter :: Ident -- ^ Grammar name -> Options -> CGrammar -> String gslPrinter name opts cfg = prGSL srg "" - where srg = makeSRG name opts (makeNice cfg) + where srg = makeSRG name opts cfg prGSL :: SRG -> ShowS prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) diff --git a/src/GF/Speech/PrJSGF.hs b/src/GF/Speech/PrJSGF.hs index 8b73a080a..052299329 100644 --- a/src/GF/Speech/PrJSGF.hs +++ b/src/GF/Speech/PrJSGF.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/06/17 12:46:05 $ +-- > CVS $Date: 2005/09/07 14:21:30 $ -- > CVS $Author: bringert $ --- > CVS $Revision: 1.13 $ +-- > CVS $Revision: 1.14 $ -- -- This module prints a CFG as a JSGF grammar. -- @@ -20,7 +20,6 @@ module GF.Speech.PrJSGF (jsgfPrinter) where import GF.Speech.SRG -import GF.Speech.TransformCFG import GF.Infra.Ident import GF.Formalism.CFG import GF.Formalism.Utilities (Symbol(..)) @@ -31,7 +30,7 @@ import GF.Infra.Option jsgfPrinter :: Ident -- ^ Grammar name -> Options -> CGrammar -> String jsgfPrinter name opts cfg = prJSGF srg "" - where srg = makeSRG name opts (makeNice cfg) + where srg = makeSRG name opts cfg prJSGF :: SRG -> ShowS prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) diff --git a/src/GF/Speech/PrSLF.hs b/src/GF/Speech/PrSLF.hs index cec2b89fd..9fe7d20ee 100644 --- a/src/GF/Speech/PrSLF.hs +++ b/src/GF/Speech/PrSLF.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/09/02 15:47:46 $ +-- > CVS $Date: 2005/09/07 14:21:30 $ -- > CVS $Author: bringert $ --- > CVS $Revision: 1.2 $ +-- > CVS $Revision: 1.3 $ -- -- This module converts a CFG to an SLF finite-state network -- for use with the ATK recognizer. The SLF format is described @@ -22,6 +22,7 @@ module GF.Speech.PrSLF (slfPrinter) where import GF.Speech.SRG import GF.Speech.TransformCFG +import GF.Speech.FiniteState import GF.Infra.Ident import GF.Formalism.CFG @@ -31,24 +32,35 @@ import GF.Infra.Print import GF.Infra.Option import Data.Char (toUpper,toLower) +import Data.Maybe (fromMaybe) data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] } data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord } -- | An SLF word is a word, or the empty string. -type SLFWord = String +type SLFWord = Maybe String data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int } slfPrinter :: Ident -- ^ Grammar name -> Options -> CGrammar -> String -slfPrinter name opts cfg = prSLF slf "" - where slf = srg2slf $ makeSRG name opts $ makeRegular $ makeNice cfg +slfPrinter name opts cfg = prSLF (regularToSLF start rgr) "" + where start = getStartCat opts + rgr = makeRegular $ removeEmptyCats $ cfgToCFRules cfg + +regularToSLF :: String -> CFRules -> SLF +regularToSLF s rs = automatonToSLF $ compileAutomaton s rs + +automatonToSLF :: FA () (Maybe String) -> SLF +automatonToSLF fa = + SLF { slfNodes = map mkSLFNode (states fa'), + slfEdges = zipWith mkSLFEdge [0..] (transitions fa') } + where fa' = moveLabelsToNodes fa + mkSLFNode (i,w) = SLFNode { nId = i, nWord = w } + mkSLFEdge i (f,t,()) = SLFEdge { eId = i, eStart = f, eEnd = t } -srg2slf :: SRG -> SLF -srg2slf = undefined -- should use TransformCFG.compileAutomaton prSLF :: SLF -> ShowS prSLF (SLF { slfNodes = ns, slfEdges = es}) = header . unlinesS (map prNode ns) . unlinesS (map prEdge es) @@ -60,8 +72,10 @@ prSLF (SLF { slfNodes = ns, slfEdges = es}) = header . unlinesS (map prNode ns) showWord :: SLFWord -> String -showWord "" = "!NULL" -showWord w = w -- FIXME: convert words to upper case +showWord Nothing = "!NULL" +showWord (Just w) = w -- FIXME: convert words to upper case + -- FIXME: could this be the empty string? if so, print as !NULL prFields :: [(String,String)] -> ShowS prFields fs = unwordsS [ showString l . showChar '=' . showString v | (l,v) <- fs ] + diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs index 2594e0a3e..9f4919ed2 100644 --- a/src/GF/Speech/SRG.hs +++ b/src/GF/Speech/SRG.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/06/17 12:46:05 $ +-- > CVS $Date: 2005/09/07 14:21:30 $ -- > CVS $Author: bringert $ --- > CVS $Revision: 1.14 $ +-- > CVS $Revision: 1.15 $ -- -- Representation of, conversion to, and utilities for -- printing of a general Speech Recognition Grammar. @@ -21,10 +21,6 @@ module GF.Speech.SRG where import GF.Infra.Ident --- import GF.OldParsing.CFGrammar --- import GF.OldParsing.Utilities (Symbol(..)) --- import GF.OldParsing.GrammarTypes --- import GF.Printing.PrintParser import GF.Formalism.CFG import GF.Formalism.Utilities (Symbol(..)) import GF.Conversion.Types @@ -53,18 +49,18 @@ type CatNames = FiniteMap String String makeSRG :: Ident -- ^ Grammar name -> Options -- ^ Grammar options - -> [CFRule_] -- ^ A context-free grammar + -> CGrammar -- ^ A context-free grammar -> SRG makeSRG i opts gr = SRG { grammarName = name, - startCat = start, + startCat = lookupFM_ names origStart, origStartCat = origStart, - rules = rs } + rules = map (cfgRulesToSRGRule names) cfgRules } where name = prIdent i - origStart = fromMaybe "S" (getOptVal opts gStartCat) ++ "{}.s" - start = lookupFM_ names origStart - names = mkCatNames name (nub $ map ruleCat gr) - rs = map (cfgRulesToSRGRule names) (sortAndGroupBy ruleCat gr) + origStart = getStartCat opts + gr' = removeLeftRecursion $ removeEmptyCats $ cfgToCFRules gr + (cats,cfgRules) = unzip gr' + names = mkCatNames name cats cfgRulesToSRGRule :: FiniteMap String String -> [CFRule_] -> SRGRule cfgRulesToSRGRule names rs@(r:_) = SRGRule cat origCat rhs @@ -111,15 +107,6 @@ unlinesS = join "\n" join :: String -> [ShowS] -> ShowS join glue = concatS . intersperse (showString glue) -sortAndGroupBy :: Ord b => - (a -> b) -- ^ Gets the value to sort and group by - -> [a] - -> [[a]] -sortAndGroupBy f = groupBy (both (==) f) . sortBy (both compare f) - -both :: (b -> b -> c) -> (a -> b) -> a -> a -> c -both f g x y = f (g x) (g y) - prtS :: Print a => a -> ShowS prtS = showString . prt diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs index 10f84bd79..9c3ed2c06 100644 --- a/src/GF/Speech/TransformCFG.hs +++ b/src/GF/Speech/TransformCFG.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/09/06 08:06:42 $ +-- > CVS $Date: 2005/09/07 14:21:31 $ -- > CVS $Author: bringert $ --- > CVS $Revision: 1.15 $ +-- > CVS $Revision: 1.16 $ -- -- This module does some useful transformations on CFGs. -- @@ -16,18 +16,25 @@ -- peb thinks: most of this module should be moved to GF.Conversion... ----------------------------------------------------------------------------- -module GF.Speech.TransformCFG (makeNice, CFRule_, makeRegular) where +module GF.Speech.TransformCFG (CFRule_, CFRules, + cfgToCFRules, getStartCat, + removeLeftRecursion, + removeEmptyCats, + makeRegular, + compileAutomaton) where import GF.Infra.Ident import GF.Formalism.CFG import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..)) import GF.Conversion.Types import GF.Infra.Print +import GF.Infra.Option +import GF.Speech.FiniteState import Control.Monad import Data.FiniteMap import Data.List -import Data.Maybe (fromJust) +import Data.Maybe (fromJust, fromMaybe) import Debug.Trace @@ -36,33 +43,33 @@ import Debug.Trace type CFRule_ = CFRule Cat_ Name Token type Cat_ = String -type CFRules = FiniteMap Cat_ [CFRule_] +type CFRules = [(Cat_,[CFRule_])] --- | Remove left-recursion and categories with no productions --- from a context-free grammar. -makeNice :: CGrammar -> [CFRule_] -makeNice = ungroupProds . makeNice' . groupProds . cfgToCFRules - where makeNice' = removeLeftRecursion . removeEmptyCats - -cfgToCFRules :: CGrammar -> [CFRule_] -cfgToCFRules cfg = [CFRule (catToString c) (map symb r) n | CFRule c r n <- cfg] +cfgToCFRules :: CGrammar -> CFRules +cfgToCFRules cfg = groupProds [CFRule (catToString c) (map symb r) n | CFRule c r n <- cfg] where symb = mapSymbol catToString id -- symb (Cat c) = Cat (catToString c) -- symb (Tok t) = Tok t catToString = prt +getStartCat :: Options -> String +getStartCat opts = fromMaybe "S" (getOptVal opts gStartCat) ++ "{}.s" + -- | Group productions by their lhs categories groupProds :: [CFRule_] -> CFRules -groupProds = addListToFM_C (++) emptyFM . map (\r -> (lhsCat r,[r])) +groupProds = fmToList . addListToFM_C (++) emptyFM . map (\r -> (lhsCat r,[r])) ungroupProds :: CFRules -> [CFRule_] -ungroupProds = concat . eltsFM +ungroupProds = concat . map snd + +catRules :: CFRules -> Cat_ -> [CFRule_] +catRules rs c = fromMaybe [] (lookup c rs) -- | Remove productions which use categories which have no productions removeEmptyCats :: CFRules -> CFRules -removeEmptyCats rss = listToFM $ fix removeEmptyCats' $ fmToList rss +removeEmptyCats = fix removeEmptyCats' where - removeEmptyCats' :: [(Cat_,[CFRule_])] -> [(Cat_,[CFRule_])] + removeEmptyCats' :: CFRules -> CFRules removeEmptyCats' rs = k' where keep = filter (not . null . snd) rs @@ -71,16 +78,16 @@ removeEmptyCats rss = listToFM $ fix removeEmptyCats' $ fmToList rss k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep removeLeftRecursion :: CFRules -> CFRules -removeLeftRecursion rs = listToFM $ concatMap removeDirectLeftRecursion $ map handleProds $ fmToList rs +removeLeftRecursion rs = concatMap removeDirectLeftRecursion $ map handleProds rs where handleProds (c, r) = (c, concatMap handleProd r) handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai = -- FIXME: this will give multiple rules with the same name - [CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- fromJust (lookupFM rs aj)] + [CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- fromJust (lookup aj rs)] handleProd r = [r] removeDirectLeftRecursion :: (Cat_,[CFRule_]) -- ^ All productions for a category - -> [(Cat_,[CFRule_])] + -> CFRules removeDirectLeftRecursion (a,rs) | null dr = [(a,rs)] | otherwise = [(a, as), (a', a's)] where @@ -100,16 +107,14 @@ isDirectLeftRecursive _ = False -- Grammars through Approximation\", Mohri and Nederhof, 2000 -- to create an over-generating regular frammar for a context-free -- grammar -makeRegular :: [CFRule_] -> [CFRule_] -makeRegular g = concatMap trSet (mutRecCats g) +makeRegular :: CFRules -> CFRules +makeRegular g = groupProds $ concatMap trSet (mutRecCats g) where trSet cs | allXLinear cs rs = rs | otherwise = concatMap handleCat cs where rs = concatMap (catRules g) cs handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e - ++ concatMap (makeRightLinearRules c) crs - -- FIXME: add more rules here, see pg 255, item 2 - where crs = catRules rs c - c' = newCat c + ++ concatMap (makeRightLinearRules c) (catRules g c) + where c' = newCat c makeRightLinearRules b' (CFRule c ss n) = case ys of [] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left @@ -119,27 +124,29 @@ makeRegular g = concatMap trSet (mutRecCats g) newCat c = c ++ "$" --- | Check if all the rules are right-linear, or all the rules are --- left-linear, with respect to given categories. -allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool -allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs - -- | Get the sets of mutually recursive non-terminals for a grammar. -mutRecCats :: Eq c => [CFRule c n t] -> [[c]] +mutRecCats :: CFRules -> [[Cat_]] mutRecCats g = equivalenceClasses $ symmetricSubrelation $ transitiveClosure $ reflexiveClosure allCats r - where r = nub [(c,c') | CFRule c ss _ <- g, Cat c' <- ss] - allCats = nub [c | CFRule c _ _ <- g] + where r = nub [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss] + allCats = map fst g + + -- Convert a strongly regular grammar to a finite automaton. --- compileAutomaton :: +compileAutomaton :: Cat_ -- ^ Start category + -> CFRules + -> FA () (Maybe Token) +compileAutomaton s g = undefined -- -- * CFG rule utilities -- +{- -- | Get all the rules for a given category. catRules :: Eq c => [CFRule c n t] -> c -> [CFRule c n t] catRules rs c = [r | r@(CFRule c' _ _) <- rs, c' == c] +-} -- | Gets the set of LHS categories of a set of rules. lhsCats :: Eq c => [CFRule c n t] -> [c] @@ -148,6 +155,11 @@ lhsCats = nub . map lhsCat lhsCat :: CFRule c n t -> c lhsCat (CFRule c _ _) = c +-- | Check if all the rules are right-linear, or all the rules are +-- left-linear, with respect to given categories. +allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool +allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs + -- | Checks if a context-free rule is right-linear. isRightLinear :: Eq c => [c] -- ^ The categories to consider -> CFRule c n t -- ^ The rule to check for right-linearity diff --git a/src/Makefile b/src/Makefile index 678d6cd6b..dea6000a7 100644 --- a/src/Makefile +++ b/src/Makefile @@ -3,7 +3,7 @@ include config.mk GHMAKE=$(GHC) --make GHCXMAKE=ghcxmake -GHCFLAGS+= -fglasgow-exts +GHCFLAGS+= -fglasgow-exts -package fgl GHCOPTFLAGS=-O2 GHCFUDFLAG= JAVAFLAGS=-target 1.4 -source 1.4