mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-09 03:02:50 -06:00
+ References to modules under src/compiler have been eliminated from the PGF library (under src/runtime/haskell). Only two functions had to be moved (from GF.Data.Utilities to PGF.Utilities) to make this possible, other apparent dependencies turned out to be vacuous. + In gf.cabal, the GF executable no longer directly depends on the PGF library source directory, but only on the exposed library modules. This means that there is less duplication in gf.cabal and that the 30 modules in the PGF library will no longer be compiled twice while building GF. To make this possible, additional PGF library modules have been exposed, even though they should probably be considered for internal use only. They could be collected in a PGF.Internal module, or marked as "unstable", to make this explicit. + Also, by using the -fwarn-unused-imports flag, ~220 redundant imports were found and removed, reducing the total number of imports by ~15%.
245 lines
9.7 KiB
Haskell
245 lines
9.7 KiB
Haskell
----------------------------------------------------------------------
|
|
-- |
|
|
-- Module : GF.Speech.CFGToFA
|
|
--
|
|
-- Approximates CFGs with finite state networks.
|
|
----------------------------------------------------------------------
|
|
module GF.Speech.CFGToFA (cfgToFA, makeSimpleRegular,
|
|
MFA(..), cfgToMFA, cfgToFA') where
|
|
|
|
import Data.List
|
|
import Data.Maybe
|
|
import Data.Map (Map)
|
|
import qualified Data.Map as Map
|
|
import Data.Set (Set)
|
|
import qualified Data.Set as Set
|
|
|
|
--import PGF.CId
|
|
import PGF.Data
|
|
import GF.Data.Utilities
|
|
import GF.Speech.CFG
|
|
--import GF.Speech.PGFToCFG
|
|
--import GF.Infra.Ident (Ident)
|
|
|
|
import GF.Data.Graph
|
|
--import GF.Data.Relation
|
|
import GF.Speech.FiniteState
|
|
--import GF.Speech.CFG
|
|
|
|
data Recursivity = RightR | LeftR | NotR
|
|
|
|
data MutRecSet = MutRecSet {
|
|
mrCats :: Set Cat,
|
|
mrNonRecRules :: [CFRule],
|
|
mrRecRules :: [CFRule],
|
|
mrRec :: Recursivity
|
|
}
|
|
|
|
|
|
type MutRecSets = Map Cat MutRecSet
|
|
|
|
--
|
|
-- * Multiple DFA type
|
|
--
|
|
|
|
data MFA = MFA Cat [(Cat,DFA CFSymbol)]
|
|
|
|
|
|
|
|
cfgToFA :: CFG -> DFA Token
|
|
cfgToFA = minimize . compileAutomaton . makeSimpleRegular
|
|
|
|
|
|
--
|
|
-- * Compile strongly regular grammars to NFAs
|
|
--
|
|
|
|
-- Convert a strongly regular grammar to a finite automaton.
|
|
compileAutomaton :: CFG -> NFA Token
|
|
compileAutomaton g = make_fa (g,ns) s [NonTerminal (cfgStartCat g)] f fa
|
|
where
|
|
(fa,s,f) = newFA_
|
|
ns = mutRecSets g $ mutRecCats False g
|
|
|
|
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
|
|
-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000.
|
|
make_fa :: (CFG,MutRecSets) -> State -> [CFSymbol] -> State
|
|
-> NFA Token -> NFA Token
|
|
make_fa c@(g,ns) q0 alpha q1 fa =
|
|
case alpha of
|
|
[] -> newTransition q0 q1 Nothing fa
|
|
[Terminal t] -> newTransition q0 q1 (Just t) fa
|
|
[NonTerminal a] ->
|
|
case Map.lookup a ns of
|
|
-- a is recursive
|
|
Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
|
|
case mrRec n of
|
|
-- the set Ni is right-recursive or cyclic
|
|
RightR ->
|
|
let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs]
|
|
++ [(getState c, xs, getState d) | CFRule c ss _ <- rs,
|
|
let (xs,NonTerminal d) = (init ss,last ss)]
|
|
in make_fas new $ newTransition q0 (getState a) Nothing fa'
|
|
-- the set Ni is left-recursive
|
|
LeftR ->
|
|
let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs]
|
|
++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- rs]
|
|
in make_fas new $ newTransition (getState a) q1 Nothing fa'
|
|
where
|
|
(fa',stateMap) = addStatesForCats ni fa
|
|
getState x = Map.findWithDefault
|
|
(error $ "CFGToFiniteState: No state for " ++ x)
|
|
x stateMap
|
|
-- a is not recursive
|
|
Nothing -> let rs = catRules g a
|
|
in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs
|
|
(x:beta) -> let (fa',q) = newState () fa
|
|
in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa'
|
|
where
|
|
make_fa_ = make_fa c
|
|
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs
|
|
|
|
--
|
|
-- * Compile a strongly regular grammar to a DFA with sub-automata
|
|
--
|
|
|
|
cfgToMFA :: CFG -> MFA
|
|
cfgToMFA = buildMFA . makeSimpleRegular
|
|
|
|
-- | Build a DFA by building and expanding an MFA
|
|
cfgToFA' :: CFG -> DFA Token
|
|
cfgToFA' = mfaToDFA . cfgToMFA
|
|
|
|
buildMFA :: CFG -> MFA
|
|
buildMFA g = sortSubLats $ removeUnusedSubLats mfa
|
|
where fas = compileAutomata g
|
|
mfa = MFA (cfgStartCat g) [(c, minimize fa) | (c,fa) <- fas]
|
|
|
|
mfaStartDFA :: MFA -> DFA CFSymbol
|
|
mfaStartDFA (MFA start subs) =
|
|
fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs
|
|
|
|
mfaToDFA :: MFA -> DFA Token
|
|
mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa
|
|
where
|
|
subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs]
|
|
getSub l = fromJust $ Map.lookup l subs'
|
|
expand (FA (Graph c ns es) s f)
|
|
= foldl' expandEdge (FA (Graph c ns []) s f) es
|
|
expandEdge fa (f,t,x) =
|
|
case x of
|
|
Nothing -> newTransition f t Nothing fa
|
|
Just (Terminal s) -> newTransition f t (Just s) fa
|
|
Just (NonTerminal l) -> insertNFA fa (f,t) (expand $ getSub l)
|
|
|
|
removeUnusedSubLats :: MFA -> MFA
|
|
removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c]
|
|
where
|
|
usedMap = subLatUseMap mfa
|
|
used = growUsedSet (Set.singleton start)
|
|
isUsed c = c `Set.member` used
|
|
growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s)
|
|
|
|
subLatUseMap :: MFA -> Map Cat (Set Cat)
|
|
subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs]
|
|
|
|
usedSubLats :: DFA CFSymbol -> Set Cat
|
|
usedSubLats fa = Set.fromList [s | (_,_,NonTerminal s) <- transitions fa]
|
|
|
|
-- | Sort sub-networks topologically.
|
|
sortSubLats :: MFA -> MFA
|
|
sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs)
|
|
where
|
|
usedByMap = revMultiMap (subLatUseMap mfa)
|
|
sortLats _ [] = []
|
|
sortLats ub ls = xs ++ sortLats ub' ys
|
|
where (xs,ys) = partition ((==0) . indeg) ls
|
|
ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub
|
|
indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub
|
|
|
|
-- | Convert a strongly regular grammar to a number of finite automata,
|
|
-- one for each non-terminal.
|
|
-- The edges in the automata accept tokens, or name another automaton to use.
|
|
compileAutomata :: CFG
|
|
-> [(Cat,NFA CFSymbol)]
|
|
-- ^ A map of non-terminals and their automata.
|
|
compileAutomata g = [(c, makeOneFA c) | c <- allCats g]
|
|
where
|
|
mrs = mutRecSets g $ mutRecCats True g
|
|
makeOneFA c = make_fa1 mr s [NonTerminal c] f fa
|
|
where (fa,s,f) = newFA_
|
|
mr = fromJust (Map.lookup c mrs)
|
|
|
|
|
|
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
|
|
-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000,
|
|
-- adapted to build a finite automaton for a single (mutually recursive) set only.
|
|
-- Categories not in the set will result in category-labelled edges.
|
|
make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which
|
|
-- we are building the automaton.
|
|
-> State -- ^ State to come from
|
|
-> [CFSymbol] -- ^ Symbols to accept
|
|
-> State -- ^ State to end up in
|
|
-> NFA CFSymbol -- ^ FA to add to.
|
|
-> NFA CFSymbol
|
|
make_fa1 mr q0 alpha q1 fa =
|
|
case alpha of
|
|
[] -> newTransition q0 q1 Nothing fa
|
|
[t@(Terminal _)] -> newTransition q0 q1 (Just t) fa
|
|
[c@(NonTerminal a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa
|
|
[NonTerminal a] ->
|
|
case mrRec mr of
|
|
NotR -> -- the set is a non-recursive (always singleton) set of categories
|
|
-- so the set of category rules is the set of rules for the whole set
|
|
make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa
|
|
RightR -> -- the set is right-recursive or cyclic
|
|
let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr]
|
|
++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr,
|
|
let (xs,NonTerminal d) = (init ss,last ss)]
|
|
in make_fas new $ newTransition q0 (getState a) Nothing fa'
|
|
LeftR -> -- the set is left-recursive
|
|
let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr]
|
|
++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- mrRecRules mr]
|
|
in make_fas new $ newTransition (getState a) q1 Nothing fa'
|
|
where
|
|
(fa',stateMap) = addStatesForCats (mrCats mr) fa
|
|
getState x = Map.findWithDefault
|
|
(error $ "CFGToFiniteState: No state for " ++ x)
|
|
x stateMap
|
|
(x:beta) -> let (fa',q) = newState () fa
|
|
in make_fas [(q0,[x],q),(q,beta,q1)] fa'
|
|
where
|
|
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs
|
|
|
|
mutRecSets :: CFG -> [Set Cat] -> MutRecSets
|
|
mutRecSets g = Map.fromList . concatMap mkMutRecSet
|
|
where
|
|
mkMutRecSet cs = [ (c,ms) | c <- csl ]
|
|
where csl = Set.toList cs
|
|
rs = catSetRules g cs
|
|
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs
|
|
ms = MutRecSet {
|
|
mrCats = cs,
|
|
mrNonRecRules = nrs,
|
|
mrRecRules = rrs,
|
|
mrRec = rec
|
|
}
|
|
rec | null rrs = NotR
|
|
| all (isRightLinear cs) rrs = RightR
|
|
| otherwise = LeftR
|
|
|
|
--
|
|
-- * Utilities
|
|
--
|
|
|
|
-- | Add a state for the given NFA for each of the categories
|
|
-- in the given set. Returns a map of categories to their
|
|
-- corresponding states.
|
|
addStatesForCats :: Set Cat -> NFA t -> (NFA t, Map Cat State)
|
|
addStatesForCats cs fa = (fa', m)
|
|
where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
|
|
m = Map.fromList (zip (Set.toList cs) (map fst ns))
|
|
|
|
revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a)
|
|
revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s]
|