1
0
forked from GitHub/gf-core

Added the prerequisits for automaton building.

This commit is contained in:
bringert
2005-09-07 13:21:30 +00:00
parent 6b00f22c12
commit a134b2b6f8
7 changed files with 191 additions and 75 deletions

View File

@@ -0,0 +1,105 @@
module GF.Speech.FiniteState (FA, State,
startState, finalStates,
states, transitions,
moveLabelsToNodes) where
import Data.Graph.Inductive
import Data.List (nub,partition)
import Data.Maybe (fromJust)
import Debug.Trace
data FA a b = FA (Gr a b) Node [Node]
type State = Node
startState :: FA a b -> State
startState (FA _ s _) = s
finalStates :: FA a b -> [State]
finalStates (FA _ _ ss) = ss
states :: FA a b -> [(State,a)]
states (FA g _ _) = labNodes g
transitions :: FA a b -> [(State,State,b)]
transitions (FA g _ _) = labEdges g
onGraph :: (Gr a b -> Gr c d) -> FA a b -> FA c d
onGraph f (FA g s ss) = FA (f g) s ss
newState :: a -> FA a b -> (FA a b, State)
newState x (FA g s ss) = (FA g' s ss, n)
where (g',n) = addNode x g
newEdge :: Node -> Node -> b -> FA a b -> FA a b
newEdge f t l = onGraph (insEdge (f,t,l))
addNode :: DynGraph gr => a -> gr a b -> (gr a b, Node)
addNode x g = let s = freshNode g in (insNode (s,x) g, s)
freshNode :: Graph gr => gr a b -> Node
freshNode = succ . snd . nodeRange
-- | Get an infinte supply of new nodes.
freshNodes :: Graph gr => gr a b -> [Node]
freshNodes g = [snd (nodeRange g)+1..]
-- | Transform a standard finite automaton with labelled edges
-- to one where the labels are on the nodes instead. This can add
-- up to one extra node per edge.
moveLabelsToNodes :: Eq a => FA () (Maybe a) -> FA (Maybe a) ()
moveLabelsToNodes = onGraph moveLabelsToNodes_
moveLabelsToNodes_ :: (DynGraph gr, Eq a) => gr () (Maybe a) -> gr (Maybe a) ()
moveLabelsToNodes_ g = gmap f g'
where g' = sameLabelIncoming g
f (to,n,(),fr) = (removeAdjLabels to, n, l, removeAdjLabels fr)
where l | not (allEqual ls)
= error $ "moveLabelsToNodes: not all incoming labels are equal"
| null ls = Nothing
| otherwise = head ls
ls = map snd $ lpre g' n
removeAdjLabels = map (\ (_,n) -> ((),n))
-- | Add the extra nodes needed to make sure that all edges to a node
-- have the same label.
sameLabelIncoming :: (DynGraph gr, Eq b) => gr () (Maybe b) -> gr () (Maybe b)
sameLabelIncoming gr = foldr fixIncoming gr (nodes gr)
fixIncoming :: (DynGraph gr, Eq b) => Node -> gr () (Maybe b) -> gr () (Maybe b)
fixIncoming n gr | allLabelsEqual to' = gr
| otherwise = addContexts newContexts $ delNode n gr
where (to,_,_,fr) = context gr n
-- move cyclic edges to the list of incoming edges
(cyc,fr') = partition (\ (_,t) -> t == n) fr
to' = to ++ cyc
-- make new nodes for each unique label
newNodes = zip (nub $ map fst to') (freshNodes gr)
-- for each cyclic edge, add an edge to the node for
-- that label (could be the current node).
fr'' = fr' ++ [ (l',fromJust (lookup l' newNodes)) | (l',f) <- to', f == n ]
-- keep all incoming non-cyclic edges with the right label.
to'' l = [ e | e@(l',f) <- to', l'==l, f /= n ]
newContexts = [ (to'' l,n',(),fr'') | (l,n') <- newNodes]
allLabelsEqual :: Eq b => Adj b -> Bool
allLabelsEqual = allEqual . map fst
edgeLabel :: LEdge b -> b
edgeLabel (_,_,l) = l
ledgeToEdge :: LEdge b -> Edge
ledgeToEdge (f,t,_) = (f,t)
addContexts :: DynGraph gr => [Context a b] -> gr a b -> gr a b
addContexts cs gr = foldr (&) gr cs
--
-- * Utilities
--
allEqual :: Eq a => [a] -> Bool
allEqual [] = True
allEqual (x:xs) = all (==x) xs

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/06/17 12:46:04 $
-- > CVS $Date: 2005/09/07 14:21:30 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.19 $
-- > CVS $Revision: 1.20 $
--
-- This module prints a CFG as a Nuance GSL 2.0 grammar.
--
@@ -18,7 +18,6 @@
module GF.Speech.PrGSL (gslPrinter) where
import GF.Speech.SRG
import GF.Speech.TransformCFG
import GF.Infra.Ident
import GF.Formalism.CFG
@@ -32,7 +31,7 @@ import Data.Char (toUpper,toLower)
gslPrinter :: Ident -- ^ Grammar name
-> Options -> CGrammar -> String
gslPrinter name opts cfg = prGSL srg ""
where srg = makeSRG name opts (makeNice cfg)
where srg = makeSRG name opts cfg
prGSL :: SRG -> ShowS
prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/06/17 12:46:05 $
-- > CVS $Date: 2005/09/07 14:21:30 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.13 $
-- > CVS $Revision: 1.14 $
--
-- This module prints a CFG as a JSGF grammar.
--
@@ -20,7 +20,6 @@
module GF.Speech.PrJSGF (jsgfPrinter) where
import GF.Speech.SRG
import GF.Speech.TransformCFG
import GF.Infra.Ident
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..))
@@ -31,7 +30,7 @@ import GF.Infra.Option
jsgfPrinter :: Ident -- ^ Grammar name
-> Options -> CGrammar -> String
jsgfPrinter name opts cfg = prJSGF srg ""
where srg = makeSRG name opts (makeNice cfg)
where srg = makeSRG name opts cfg
prJSGF :: SRG -> ShowS
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/09/02 15:47:46 $
-- > CVS $Date: 2005/09/07 14:21:30 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.2 $
-- > CVS $Revision: 1.3 $
--
-- This module converts a CFG to an SLF finite-state network
-- for use with the ATK recognizer. The SLF format is described
@@ -22,6 +22,7 @@ module GF.Speech.PrSLF (slfPrinter) where
import GF.Speech.SRG
import GF.Speech.TransformCFG
import GF.Speech.FiniteState
import GF.Infra.Ident
import GF.Formalism.CFG
@@ -31,24 +32,35 @@ import GF.Infra.Print
import GF.Infra.Option
import Data.Char (toUpper,toLower)
import Data.Maybe (fromMaybe)
data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] }
data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord }
-- | An SLF word is a word, or the empty string.
type SLFWord = String
type SLFWord = Maybe String
data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int }
slfPrinter :: Ident -- ^ Grammar name
-> Options -> CGrammar -> String
slfPrinter name opts cfg = prSLF slf ""
where slf = srg2slf $ makeSRG name opts $ makeRegular $ makeNice cfg
slfPrinter name opts cfg = prSLF (regularToSLF start rgr) ""
where start = getStartCat opts
rgr = makeRegular $ removeEmptyCats $ cfgToCFRules cfg
regularToSLF :: String -> CFRules -> SLF
regularToSLF s rs = automatonToSLF $ compileAutomaton s rs
automatonToSLF :: FA () (Maybe String) -> SLF
automatonToSLF fa =
SLF { slfNodes = map mkSLFNode (states fa'),
slfEdges = zipWith mkSLFEdge [0..] (transitions fa') }
where fa' = moveLabelsToNodes fa
mkSLFNode (i,w) = SLFNode { nId = i, nWord = w }
mkSLFEdge i (f,t,()) = SLFEdge { eId = i, eStart = f, eEnd = t }
srg2slf :: SRG -> SLF
srg2slf = undefined -- should use TransformCFG.compileAutomaton
prSLF :: SLF -> ShowS
prSLF (SLF { slfNodes = ns, slfEdges = es}) = header . unlinesS (map prNode ns) . unlinesS (map prEdge es)
@@ -60,8 +72,10 @@ prSLF (SLF { slfNodes = ns, slfEdges = es}) = header . unlinesS (map prNode ns)
showWord :: SLFWord -> String
showWord "" = "!NULL"
showWord w = w -- FIXME: convert words to upper case
showWord Nothing = "!NULL"
showWord (Just w) = w -- FIXME: convert words to upper case
-- FIXME: could this be the empty string? if so, print as !NULL
prFields :: [(String,String)] -> ShowS
prFields fs = unwordsS [ showString l . showChar '=' . showString v | (l,v) <- fs ]

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/06/17 12:46:05 $
-- > CVS $Date: 2005/09/07 14:21:30 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.14 $
-- > CVS $Revision: 1.15 $
--
-- Representation of, conversion to, and utilities for
-- printing of a general Speech Recognition Grammar.
@@ -21,10 +21,6 @@
module GF.Speech.SRG where
import GF.Infra.Ident
-- import GF.OldParsing.CFGrammar
-- import GF.OldParsing.Utilities (Symbol(..))
-- import GF.OldParsing.GrammarTypes
-- import GF.Printing.PrintParser
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..))
import GF.Conversion.Types
@@ -53,18 +49,18 @@ type CatNames = FiniteMap String String
makeSRG :: Ident -- ^ Grammar name
-> Options -- ^ Grammar options
-> [CFRule_] -- ^ A context-free grammar
-> CGrammar -- ^ A context-free grammar
-> SRG
makeSRG i opts gr = SRG { grammarName = name,
startCat = start,
startCat = lookupFM_ names origStart,
origStartCat = origStart,
rules = rs }
rules = map (cfgRulesToSRGRule names) cfgRules }
where
name = prIdent i
origStart = fromMaybe "S" (getOptVal opts gStartCat) ++ "{}.s"
start = lookupFM_ names origStart
names = mkCatNames name (nub $ map ruleCat gr)
rs = map (cfgRulesToSRGRule names) (sortAndGroupBy ruleCat gr)
origStart = getStartCat opts
gr' = removeLeftRecursion $ removeEmptyCats $ cfgToCFRules gr
(cats,cfgRules) = unzip gr'
names = mkCatNames name cats
cfgRulesToSRGRule :: FiniteMap String String -> [CFRule_] -> SRGRule
cfgRulesToSRGRule names rs@(r:_) = SRGRule cat origCat rhs
@@ -111,15 +107,6 @@ unlinesS = join "\n"
join :: String -> [ShowS] -> ShowS
join glue = concatS . intersperse (showString glue)
sortAndGroupBy :: Ord b =>
(a -> b) -- ^ Gets the value to sort and group by
-> [a]
-> [[a]]
sortAndGroupBy f = groupBy (both (==) f) . sortBy (both compare f)
both :: (b -> b -> c) -> (a -> b) -> a -> a -> c
both f g x y = f (g x) (g y)
prtS :: Print a => a -> ShowS
prtS = showString . prt

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/09/06 08:06:42 $
-- > CVS $Date: 2005/09/07 14:21:31 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.15 $
-- > CVS $Revision: 1.16 $
--
-- This module does some useful transformations on CFGs.
--
@@ -16,18 +16,25 @@
-- peb thinks: most of this module should be moved to GF.Conversion...
-----------------------------------------------------------------------------
module GF.Speech.TransformCFG (makeNice, CFRule_, makeRegular) where
module GF.Speech.TransformCFG (CFRule_, CFRules,
cfgToCFRules, getStartCat,
removeLeftRecursion,
removeEmptyCats,
makeRegular,
compileAutomaton) where
import GF.Infra.Ident
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..))
import GF.Conversion.Types
import GF.Infra.Print
import GF.Infra.Option
import GF.Speech.FiniteState
import Control.Monad
import Data.FiniteMap
import Data.List
import Data.Maybe (fromJust)
import Data.Maybe (fromJust, fromMaybe)
import Debug.Trace
@@ -36,33 +43,33 @@ import Debug.Trace
type CFRule_ = CFRule Cat_ Name Token
type Cat_ = String
type CFRules = FiniteMap Cat_ [CFRule_]
type CFRules = [(Cat_,[CFRule_])]
-- | Remove left-recursion and categories with no productions
-- from a context-free grammar.
makeNice :: CGrammar -> [CFRule_]
makeNice = ungroupProds . makeNice' . groupProds . cfgToCFRules
where makeNice' = removeLeftRecursion . removeEmptyCats
cfgToCFRules :: CGrammar -> [CFRule_]
cfgToCFRules cfg = [CFRule (catToString c) (map symb r) n | CFRule c r n <- cfg]
cfgToCFRules :: CGrammar -> CFRules
cfgToCFRules cfg = groupProds [CFRule (catToString c) (map symb r) n | CFRule c r n <- cfg]
where symb = mapSymbol catToString id
-- symb (Cat c) = Cat (catToString c)
-- symb (Tok t) = Tok t
catToString = prt
getStartCat :: Options -> String
getStartCat opts = fromMaybe "S" (getOptVal opts gStartCat) ++ "{}.s"
-- | Group productions by their lhs categories
groupProds :: [CFRule_] -> CFRules
groupProds = addListToFM_C (++) emptyFM . map (\r -> (lhsCat r,[r]))
groupProds = fmToList . addListToFM_C (++) emptyFM . map (\r -> (lhsCat r,[r]))
ungroupProds :: CFRules -> [CFRule_]
ungroupProds = concat . eltsFM
ungroupProds = concat . map snd
catRules :: CFRules -> Cat_ -> [CFRule_]
catRules rs c = fromMaybe [] (lookup c rs)
-- | Remove productions which use categories which have no productions
removeEmptyCats :: CFRules -> CFRules
removeEmptyCats rss = listToFM $ fix removeEmptyCats' $ fmToList rss
removeEmptyCats = fix removeEmptyCats'
where
removeEmptyCats' :: [(Cat_,[CFRule_])] -> [(Cat_,[CFRule_])]
removeEmptyCats' :: CFRules -> CFRules
removeEmptyCats' rs = k'
where
keep = filter (not . null . snd) rs
@@ -71,16 +78,16 @@ removeEmptyCats rss = listToFM $ fix removeEmptyCats' $ fmToList rss
k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep
removeLeftRecursion :: CFRules -> CFRules
removeLeftRecursion rs = listToFM $ concatMap removeDirectLeftRecursion $ map handleProds $ fmToList rs
removeLeftRecursion rs = concatMap removeDirectLeftRecursion $ map handleProds rs
where
handleProds (c, r) = (c, concatMap handleProd r)
handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai =
-- FIXME: this will give multiple rules with the same name
[CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- fromJust (lookupFM rs aj)]
[CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- fromJust (lookup aj rs)]
handleProd r = [r]
removeDirectLeftRecursion :: (Cat_,[CFRule_]) -- ^ All productions for a category
-> [(Cat_,[CFRule_])]
-> CFRules
removeDirectLeftRecursion (a,rs) | null dr = [(a,rs)]
| otherwise = [(a, as), (a', a's)]
where
@@ -100,16 +107,14 @@ isDirectLeftRecursive _ = False
-- Grammars through Approximation\", Mohri and Nederhof, 2000
-- to create an over-generating regular frammar for a context-free
-- grammar
makeRegular :: [CFRule_] -> [CFRule_]
makeRegular g = concatMap trSet (mutRecCats g)
makeRegular :: CFRules -> CFRules
makeRegular g = groupProds $ concatMap trSet (mutRecCats g)
where trSet cs | allXLinear cs rs = rs
| otherwise = concatMap handleCat cs
where rs = concatMap (catRules g) cs
handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e
++ concatMap (makeRightLinearRules c) crs
-- FIXME: add more rules here, see pg 255, item 2
where crs = catRules rs c
c' = newCat c
++ concatMap (makeRightLinearRules c) (catRules g c)
where c' = newCat c
makeRightLinearRules b' (CFRule c ss n) =
case ys of
[] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left
@@ -119,27 +124,29 @@ makeRegular g = concatMap trSet (mutRecCats g)
newCat c = c ++ "$"
-- | Check if all the rules are right-linear, or all the rules are
-- left-linear, with respect to given categories.
allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
-- | Get the sets of mutually recursive non-terminals for a grammar.
mutRecCats :: Eq c => [CFRule c n t] -> [[c]]
mutRecCats :: CFRules -> [[Cat_]]
mutRecCats g = equivalenceClasses $ symmetricSubrelation $ transitiveClosure $ reflexiveClosure allCats r
where r = nub [(c,c') | CFRule c ss _ <- g, Cat c' <- ss]
allCats = nub [c | CFRule c _ _ <- g]
where r = nub [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss]
allCats = map fst g
-- Convert a strongly regular grammar to a finite automaton.
-- compileAutomaton ::
compileAutomaton :: Cat_ -- ^ Start category
-> CFRules
-> FA () (Maybe Token)
compileAutomaton s g = undefined
--
-- * CFG rule utilities
--
{-
-- | Get all the rules for a given category.
catRules :: Eq c => [CFRule c n t] -> c -> [CFRule c n t]
catRules rs c = [r | r@(CFRule c' _ _) <- rs, c' == c]
-}
-- | Gets the set of LHS categories of a set of rules.
lhsCats :: Eq c => [CFRule c n t] -> [c]
@@ -148,6 +155,11 @@ lhsCats = nub . map lhsCat
lhsCat :: CFRule c n t -> c
lhsCat (CFRule c _ _) = c
-- | Check if all the rules are right-linear, or all the rules are
-- left-linear, with respect to given categories.
allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
-- | Checks if a context-free rule is right-linear.
isRightLinear :: Eq c => [c] -- ^ The categories to consider
-> CFRule c n t -- ^ The rule to check for right-linearity

View File

@@ -3,7 +3,7 @@ include config.mk
GHMAKE=$(GHC) --make
GHCXMAKE=ghcxmake
GHCFLAGS+= -fglasgow-exts
GHCFLAGS+= -fglasgow-exts -package fgl
GHCOPTFLAGS=-O2
GHCFUDFLAG=
JAVAFLAGS=-target 1.4 -source 1.4