mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 03:32:51 -06:00
Some refactorings needed for recursion removal.
This commit is contained in:
@@ -68,33 +68,6 @@ makeSimpleRegular opts s = makeRegular $ cfgToCFRules s
|
|||||||
preprocess = fix (topDownFilter start . bottomUpFilter)
|
preprocess = fix (topDownFilter start . bottomUpFilter)
|
||||||
. removeCycles
|
. removeCycles
|
||||||
|
|
||||||
--
|
|
||||||
-- * Approximate context-free grammars with regular grammars.
|
|
||||||
--
|
|
||||||
|
|
||||||
-- Use the transformation algorithm from \"Regular Approximation of Context-free
|
|
||||||
-- Grammars through Approximation\", Mohri and Nederhof, 2000
|
|
||||||
-- to create an over-generating regular frammar for a context-free
|
|
||||||
-- grammar
|
|
||||||
makeRegular :: CFRules -> CFRules
|
|
||||||
makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
|
|
||||||
where trSet cs | allXLinear cs rs = rs
|
|
||||||
| otherwise = concatMap handleCat csl
|
|
||||||
where csl = Set.toList cs
|
|
||||||
rs = catSetRules g cs
|
|
||||||
handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
|
|
||||||
++ concatMap (makeRightLinearRules c) (catRules g c)
|
|
||||||
where c' = newCat c
|
|
||||||
makeRightLinearRules b' (CFRule c ss n) =
|
|
||||||
case ys of
|
|
||||||
[] -> newRule b' (xs ++ [Cat (newCat c)]) n -- no non-terminals left
|
|
||||||
(Cat b:zs) -> newRule b' (xs ++ [Cat b]) n
|
|
||||||
++ makeRightLinearRules (newCat b) (CFRule c zs n)
|
|
||||||
where (xs,ys) = break (`catElem` cs) ss
|
|
||||||
-- don't add rules on the form A -> A
|
|
||||||
newRule c rhs n | rhs == [Cat c] = []
|
|
||||||
| otherwise = [CFRule c rhs n]
|
|
||||||
newCat c = c ++ "$"
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- * Compile strongly regular grammars to NFAs
|
-- * Compile strongly regular grammars to NFAs
|
||||||
@@ -300,26 +273,3 @@ addStatesForCats :: Set Cat_ -> NFA t -> (NFA t, Map Cat_ State)
|
|||||||
addStatesForCats cs fa = (fa', m)
|
addStatesForCats cs fa = (fa', m)
|
||||||
where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
|
where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
|
||||||
m = Map.fromList (zip (Set.toList cs) (map fst ns))
|
m = Map.fromList (zip (Set.toList cs) (map fst ns))
|
||||||
|
|
||||||
ruleIsNonRecursive :: Set Cat_ -> CFRule_ -> Bool
|
|
||||||
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
|
|
||||||
|
|
||||||
noCatsInSet :: Set Cat_ -> [Symbol Cat_ t] -> Bool
|
|
||||||
noCatsInSet cs = not . any (`catElem` cs)
|
|
||||||
|
|
||||||
-- | Check if all the rules are right-linear, or all the rules are
|
|
||||||
-- left-linear, with respect to given categories.
|
|
||||||
allXLinear :: Set Cat_ -> [CFRule_] -> Bool
|
|
||||||
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
|
|
||||||
|
|
||||||
-- | Checks if a context-free rule is right-linear.
|
|
||||||
isRightLinear :: Set Cat_ -- ^ The categories to consider
|
|
||||||
-> CFRule_ -- ^ The rule to check for right-linearity
|
|
||||||
-> Bool
|
|
||||||
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
|
|
||||||
|
|
||||||
-- | Checks if a context-free rule is left-linear.
|
|
||||||
isLeftLinear :: Set Cat_ -- ^ The categories to consider
|
|
||||||
-> CFRule_ -- ^ The rule to check for right-linearity
|
|
||||||
-> Bool
|
|
||||||
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
|
|
||||||
|
|||||||
@@ -3,7 +3,8 @@ module GF.Speech.RegExp (RE(..),
|
|||||||
isEpsilon, isNull,
|
isEpsilon, isNull,
|
||||||
unionRE, concatRE, seqRE,
|
unionRE, concatRE, seqRE,
|
||||||
repeatRE, minimizeRE,
|
repeatRE, minimizeRE,
|
||||||
mapRE, joinRE,
|
mapRE, mapRE', joinRE,
|
||||||
|
symbolsRE,
|
||||||
dfa2re, prRE) where
|
dfa2re, prRE) where
|
||||||
|
|
||||||
import Data.List
|
import Data.List
|
||||||
@@ -107,10 +108,13 @@ firstRE (REConcat (x:xs)) = (x, REConcat xs)
|
|||||||
firstRE r = (r,epsilonRE)
|
firstRE r = (r,epsilonRE)
|
||||||
|
|
||||||
mapRE :: (a -> b) -> RE a -> RE b
|
mapRE :: (a -> b) -> RE a -> RE b
|
||||||
mapRE f (REConcat xs) = REConcat (map (mapRE f) xs)
|
mapRE f = mapRE' (RESymbol . f)
|
||||||
mapRE f (REUnion xs) = REUnion (map (mapRE f) xs)
|
|
||||||
mapRE f (RERepeat xs) = RERepeat (mapRE f xs)
|
mapRE' :: (a -> RE b) -> RE a -> RE b
|
||||||
mapRE f (RESymbol s) = RESymbol (f s)
|
mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs)
|
||||||
|
mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs)
|
||||||
|
mapRE' f (RERepeat x) = RERepeat (mapRE' f x)
|
||||||
|
mapRE' f (RESymbol s) = f s
|
||||||
|
|
||||||
joinRE :: RE (RE a) -> RE a
|
joinRE :: RE (RE a) -> RE a
|
||||||
joinRE (REConcat xs) = REConcat (map joinRE xs)
|
joinRE (REConcat xs) = REConcat (map joinRE xs)
|
||||||
@@ -118,6 +122,12 @@ joinRE (REUnion xs) = REUnion (map joinRE xs)
|
|||||||
joinRE (RERepeat xs) = RERepeat (joinRE xs)
|
joinRE (RERepeat xs) = RERepeat (joinRE xs)
|
||||||
joinRE (RESymbol ss) = ss
|
joinRE (RESymbol ss) = ss
|
||||||
|
|
||||||
|
symbolsRE :: RE a -> [a]
|
||||||
|
symbolsRE (REConcat xs) = concatMap symbolsRE xs
|
||||||
|
symbolsRE (REUnion xs) = concatMap symbolsRE xs
|
||||||
|
symbolsRE (RERepeat x) = symbolsRE x
|
||||||
|
symbolsRE (RESymbol x) = [x]
|
||||||
|
|
||||||
-- Debugging
|
-- Debugging
|
||||||
|
|
||||||
prRE :: RE String -> String
|
prRE :: RE String -> String
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ import Data.Set (Set)
|
|||||||
import qualified Data.Set as Set
|
import qualified Data.Set as Set
|
||||||
|
|
||||||
data SRG = SRG { grammarName :: String -- ^ grammar name
|
data SRG = SRG { grammarName :: String -- ^ grammar name
|
||||||
, startCat :: String -- ^ start category name
|
, startCat :: SRGCat -- ^ start category name
|
||||||
, origStartCat :: String -- ^ original start category name
|
, origStartCat :: String -- ^ original start category name
|
||||||
, grammarLanguage :: Maybe String -- ^ The language for which the grammar
|
, grammarLanguage :: Maybe String -- ^ The language for which the grammar
|
||||||
-- is intended, e.g. en-UK
|
-- is intended, e.g. en-UK
|
||||||
@@ -61,7 +61,7 @@ data SRGRule = SRGRule SRGCat String [SRGAlt] -- ^ SRG category name, original c
|
|||||||
-- and productions
|
-- and productions
|
||||||
deriving (Eq,Show)
|
deriving (Eq,Show)
|
||||||
|
|
||||||
-- | maybe a probability, a rule name and a list of symbols
|
-- | maybe a probability, a rule name and an EBNF right-hand side
|
||||||
data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem
|
data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem
|
||||||
deriving (Eq,Show)
|
deriving (Eq,Show)
|
||||||
|
|
||||||
@@ -163,6 +163,8 @@ srgTopCats srg = buildMultiMap [(oc, cat) | SRGRule cat origCat _ <- rules srg,
|
|||||||
|
|
||||||
srgItem :: [[Symbol SRGNT Token]] -> SRGItem
|
srgItem :: [[Symbol SRGNT Token]] -> SRGItem
|
||||||
srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
|
srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
|
||||||
|
-- non-optimizing version:
|
||||||
|
--srgItem = unionRE . map seqRE
|
||||||
|
|
||||||
-- | Merges a list of right-hand sides which all have the same
|
-- | Merges a list of right-hand sides which all have the same
|
||||||
-- sequence of non-terminals.
|
-- sequence of non-terminals.
|
||||||
|
|||||||
@@ -257,6 +257,33 @@ mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transit
|
|||||||
allCats = map fst g
|
allCats = map fst g
|
||||||
refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation
|
refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Approximate context-free grammars with regular grammars.
|
||||||
|
--
|
||||||
|
|
||||||
|
-- Use the transformation algorithm from \"Regular Approximation of Context-free
|
||||||
|
-- Grammars through Approximation\", Mohri and Nederhof, 2000
|
||||||
|
-- to create an over-generating regular frammar for a context-free
|
||||||
|
-- grammar
|
||||||
|
makeRegular :: CFRules -> CFRules
|
||||||
|
makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
|
||||||
|
where trSet cs | allXLinear cs rs = rs
|
||||||
|
| otherwise = concatMap handleCat csl
|
||||||
|
where csl = Set.toList cs
|
||||||
|
rs = catSetRules g cs
|
||||||
|
handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
|
||||||
|
++ concatMap (makeRightLinearRules c) (catRules g c)
|
||||||
|
where c' = newCat c
|
||||||
|
makeRightLinearRules b' (CFRule c ss n) =
|
||||||
|
case ys of
|
||||||
|
[] -> newRule b' (xs ++ [Cat (newCat c)]) n -- no non-terminals left
|
||||||
|
(Cat b:zs) -> newRule b' (xs ++ [Cat b]) n
|
||||||
|
++ makeRightLinearRules (newCat b) (CFRule c zs n)
|
||||||
|
where (xs,ys) = break (`catElem` cs) ss
|
||||||
|
-- don't add rules on the form A -> A
|
||||||
|
newRule c rhs n | rhs == [Cat c] = []
|
||||||
|
| otherwise = [CFRule c rhs n]
|
||||||
|
newCat c = c ++ "$"
|
||||||
|
|
||||||
--
|
--
|
||||||
-- * CFG rule utilities
|
-- * CFG rule utilities
|
||||||
@@ -292,7 +319,7 @@ ruleFun (CFRule _ _ t) = f t
|
|||||||
f _ = IC ""
|
f _ = IC ""
|
||||||
|
|
||||||
-- | Checks if a symbol is a non-terminal of one of the given categories.
|
-- | Checks if a symbol is a non-terminal of one of the given categories.
|
||||||
catElem :: Symbol Cat_ t -> Set Cat_ -> Bool
|
catElem :: Ord c => Symbol c t -> Set c -> Bool
|
||||||
catElem s cs = symbol (`Set.member` cs) (const False) s
|
catElem s cs = symbol (`Set.member` cs) (const False) s
|
||||||
|
|
||||||
-- | Check if any of the categories used on the right-hand side
|
-- | Check if any of the categories used on the right-hand side
|
||||||
@@ -301,4 +328,29 @@ anyUsedBy :: Eq c => [c] -> CFRule c n t -> Bool
|
|||||||
anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
|
anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
|
||||||
|
|
||||||
mkCFTerm :: String -> CFTerm
|
mkCFTerm :: String -> CFTerm
|
||||||
mkCFTerm n = CFObj (IC n) []
|
mkCFTerm n = CFObj (IC n) []
|
||||||
|
|
||||||
|
ruleIsNonRecursive :: Ord c => Set c -> CFRule c n t -> Bool
|
||||||
|
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
|
||||||
|
|
||||||
|
noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool
|
||||||
|
noCatsInSet cs = not . any (`catElem` cs)
|
||||||
|
|
||||||
|
-- | Check if all the rules are right-linear, or all the rules are
|
||||||
|
-- left-linear, with respect to given categories.
|
||||||
|
allXLinear :: Ord c => Set c -> [CFRule c n t] -> Bool
|
||||||
|
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
|
||||||
|
|
||||||
|
-- | Checks if a context-free rule is right-linear.
|
||||||
|
isRightLinear :: Ord c =>
|
||||||
|
Set c -- ^ The categories to consider
|
||||||
|
-> CFRule c n t -- ^ The rule to check for right-linearity
|
||||||
|
-> Bool
|
||||||
|
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
|
||||||
|
|
||||||
|
-- | Checks if a context-free rule is left-linear.
|
||||||
|
isLeftLinear :: Ord c =>
|
||||||
|
Set c -- ^ The categories to consider
|
||||||
|
-> CFRule c n t -- ^ The rule to check for left-linearity
|
||||||
|
-> Bool
|
||||||
|
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
|
||||||
|
|||||||
Reference in New Issue
Block a user