Finished untested function for making context-free grammars regular.

This commit is contained in:
bringert
2005-09-06 07:06:42 +00:00
parent 7340b804da
commit fb419f71ad

View File

@@ -5,9 +5,9 @@
-- Stability : (stable) -- Stability : (stable)
-- Portability : (portable) -- Portability : (portable)
-- --
-- > CVS $Date: 2005/09/02 15:47:47 $ -- > CVS $Date: 2005/09/06 08:06:42 $
-- > CVS $Author: bringert $ -- > CVS $Author: bringert $
-- > CVS $Revision: 1.14 $ -- > CVS $Revision: 1.15 $
-- --
-- This module does some useful transformations on CFGs. -- This module does some useful transformations on CFGs.
-- --
@@ -20,10 +20,11 @@ module GF.Speech.TransformCFG (makeNice, CFRule_, makeRegular) where
import GF.Infra.Ident import GF.Infra.Ident
import GF.Formalism.CFG import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..), mapSymbol) import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..))
import GF.Conversion.Types import GF.Conversion.Types
import GF.Infra.Print import GF.Infra.Print
import Control.Monad
import Data.FiniteMap import Data.FiniteMap
import Data.List import Data.List
import Data.Maybe (fromJust) import Data.Maybe (fromJust)
@@ -52,8 +53,7 @@ cfgToCFRules cfg = [CFRule (catToString c) (map symb r) n | CFRule c r n <- cfg]
-- | Group productions by their lhs categories -- | Group productions by their lhs categories
groupProds :: [CFRule_] -> CFRules groupProds :: [CFRule_] -> CFRules
groupProds = addListToFM_C (++) emptyFM . map (\rs -> (ruleCat rs,[rs])) groupProds = addListToFM_C (++) emptyFM . map (\r -> (lhsCat r,[r]))
where ruleCat (CFRule c _ _) = c
ungroupProds :: CFRules -> [CFRule_] ungroupProds :: CFRules -> [CFRule_]
ungroupProds = concat . eltsFM ungroupProds = concat . eltsFM
@@ -101,49 +101,103 @@ isDirectLeftRecursive _ = False
-- to create an over-generating regular frammar for a context-free -- to create an over-generating regular frammar for a context-free
-- grammar -- grammar
makeRegular :: [CFRule_] -> [CFRule_] makeRegular :: [CFRule_] -> [CFRule_]
makeRegular = undefined makeRegular g = concatMap trSet (mutRecCats g)
where trSet cs | allXLinear cs rs = rs
| otherwise = concatMap handleCat cs
where rs = concatMap (catRules g) cs
handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e
++ concatMap (makeRightLinearRules c) crs
-- FIXME: add more rules here, see pg 255, item 2
where crs = catRules rs c
c' = newCat c
makeRightLinearRules b' (CFRule c ss n) =
case ys of
[] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left
(Cat b:zs) -> CFRule b' (xs ++ [Cat b]) n
: makeRightLinearRules (newCat b) (CFRule c zs n)
where (xs,ys) = break (`catElem` cs) ss
newCat c = c ++ "$"
-- | Check if all the rules are right-linear, or all the rules are
-- left-linear, with respect to given categories.
allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
{-
-- | Get the sets of mutually recursive non-terminals for a grammar. -- | Get the sets of mutually recursive non-terminals for a grammar.
mutRecCats :: Eq c => [CFRule c n t] -> [[c]] mutRecCats :: Eq c => [CFRule c n t] -> [[c]]
mutRecCats = mutRecCats g = equivalenceClasses $ symmetricSubrelation $ transitiveClosure $ reflexiveClosure allCats r
-} where r = nub [(c,c') | CFRule c ss _ <- g, Cat c' <- ss]
allCats = nub [c | CFRule c _ _ <- g]
{-
-- | Get a map of categories to all categories which can occur in
-- the result of rewriting each category.
allCatsTrans :: CFRules -> FinitMap
allCatsTrans g c =
-}
-- Convert a strongly regular grammar to a finite automaton. -- Convert a strongly regular grammar to a finite automaton.
-- compileAutomaton :: -- compileAutomaton ::
-- --
-- CFG rule utilities -- * CFG rule utilities
-- --
-- | Get all the rules for a given category.
catRules :: Eq c => [CFRule c n t] -> c -> [CFRule c n t]
catRules rs c = [r | r@(CFRule c' _ _) <- rs, c' == c]
-- | Gets the set of LHS categories of a set of rules.
lhsCats :: Eq c => [CFRule c n t] -> [c]
lhsCats = nub . map lhsCat
lhsCat :: CFRule c n t -> c
lhsCat (CFRule c _ _) = c
-- | Checks if a context-free rule is right-linear. -- | Checks if a context-free rule is right-linear.
isRightLinear :: Eq c => [c] -- ^ The categories to consider isRightLinear :: Eq c => [c] -- ^ The categories to consider
-> CFRule c n t -- ^ The rule to check for right-linearity -> CFRule c n t -- ^ The rule to check for right-linearity
-> Bool -> Bool
isRightLinear cs (CFRule _ ss _) = all (not . catElem cs) (safeInit ss) isRightLinear cs (CFRule _ ss _) = all (not . (`catElem` cs)) (safeInit ss)
-- | Checks if a context-free rule is left-linear. -- | Checks if a context-free rule is left-linear.
isLeftLinear :: Eq c => [c] -- ^ The categories to consider isLeftLinear :: Eq c => [c] -- ^ The categories to consider
-> CFRule c n t -- ^ The rule to check for right-linearity -> CFRule c n t -- ^ The rule to check for right-linearity
-> Bool -> Bool
isLeftLinear cs (CFRule _ ss _) = all (not . catElem cs) (drop 1 ss) isLeftLinear cs (CFRule _ ss _) = all (not . (`catElem` cs)) (drop 1 ss)
-- | Checks if a symbol is a non-terminal of one of the given categories. -- | Checks if a symbol is a non-terminal of one of the given categories.
catElem :: Eq c => [c] -> Symbol c t -> Bool catElem :: Eq c => Symbol c t -> [c] -> Bool
catElem cs (Tok _) = False catElem s cs = symbol (`elem` cs) (const False) s
catElem cs (Cat c) = c `elem` cs
-- | Check if any of the categories used on the right-hand side -- | Check if any of the categories used on the right-hand side
-- are in the given list of categories. -- are in the given list of categories.
anyUsedBy :: Eq c => [c] -> CFRule c n t -> Bool anyUsedBy :: Eq c => [c] -> CFRule c n t -> Bool
anyUsedBy cs (CFRule _ ss _) = any (catElem cs) ss anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
mkName :: String -> Name
mkName n = Name (IC n) []
--
-- * Relations
--
-- FIXME: these could use a more efficent data structures and algorithms.
isRelatedTo :: Eq a => [(a,a)] -> a -> a -> Bool
isRelatedTo r x y = (x,y) `elem` r
transitiveClosure :: Eq a => [(a,a)] -> [(a,a)]
transitiveClosure r = fix (\r -> r `union` [ (x,w) | (x,y) <- r, (z,w) <- r, y == z ]) r
reflexiveClosure :: Eq a => [a] -- ^ The set over which the relation is defined.
-> [(a,a)] -> [(a,a)]
reflexiveClosure u r = [(x,x) | x <- u] `union` r
symmetricSubrelation :: Eq a => [(a,a)] -> [(a,a)]
symmetricSubrelation r = [p | p@(x,y) <- r, (y,x) `elem` r]
-- | Get the equivalence classes from an equivalence relation. Since
-- the relation is relexive, the set can be recoved from the relation.
equivalenceClasses :: Eq a => [(a,a)] -> [[a]]
equivalenceClasses r = equivalenceClasses_ (nub (map fst r)) r
where equivalenceClasses_ [] _ = []
equivalenceClasses_ (x:xs) r = (x:ys):equivalenceClasses_ zs r
where (ys,zs) = partition (isRelatedTo r x) xs
-- --
-- * Utilities -- * Utilities
@@ -159,3 +213,23 @@ nothingOrNull (Just xs) = null xs
safeInit :: [a] -> [a] safeInit :: [a] -> [a]
safeInit [] = [] safeInit [] = []
safeInit xs = init xs safeInit xs = init xs
unionAll :: Eq a => [[a]] -> [a]
unionAll = nub . concat
whenMP :: MonadPlus m => Bool -> a -> m a
whenMP b x = if b then return x else mzero
--
-- * Testing stuff, can be removed
--
c --> ss = CFRule c ss (mkName "")
prGr g = putStrLn $ showGr g
showGr g = unlines $ map showRule g
showRule (CFRule c ss _) = c ++ " --> " ++ unwords (map showSym ss)
showSym s = symbol id show s