forked from GitHub/gf-core
SRG always has RE rhs.
This commit is contained in:
@@ -48,12 +48,12 @@ prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
|||||||
text ".MAIN" <+> prCat start
|
text ".MAIN" <+> prCat start
|
||||||
prRule (SRGRule cat origCat rhs) =
|
prRule (SRGRule cat origCat rhs) =
|
||||||
comment (prt origCat) $$
|
comment (prt origCat) $$
|
||||||
prCat cat <+> union (map prAlt (ebnfSRGAlts rhs))
|
prCat cat <+> union (map prAlt rhs)
|
||||||
-- FIXME: use the probability
|
-- FIXME: use the probability
|
||||||
prAlt (EBnfSRGAlt mp _ rhs) = prItem rhs
|
prAlt (SRGAlt mp _ rhs) = prItem rhs
|
||||||
|
|
||||||
|
|
||||||
prItem :: EBnfSRGItem -> Doc
|
prItem :: SRGItem -> Doc
|
||||||
prItem = f
|
prItem = f
|
||||||
where
|
where
|
||||||
f (REUnion xs) = (if null es then empty else text "?") <> union (map f nes)
|
f (REUnion xs) = (if null es then empty else text "?") <> union (map f nes)
|
||||||
|
|||||||
@@ -60,10 +60,10 @@ prJSGF sisr srg@(SRG{grammarName=name,grammarLanguage=ml,
|
|||||||
Nothing -> empty
|
Nothing -> empty
|
||||||
prRule (SRGRule cat origCat rhs) =
|
prRule (SRGRule cat origCat rhs) =
|
||||||
comment origCat $$
|
comment origCat $$
|
||||||
rule False cat (map prAlt (ebnfSRGAlts rhs))
|
rule False cat (map prAlt rhs)
|
||||||
-- rule False cat (map prAlt rhs)
|
-- rule False cat (map prAlt rhs)
|
||||||
-- FIXME: use the probability
|
-- FIXME: use the probability
|
||||||
prAlt (EBnfSRGAlt mp n rhs) = sep [initTag, parens (prItem sisr n rhs), finalTag]
|
prAlt (SRGAlt mp n rhs) = sep [initTag, parens (prItem sisr n rhs), finalTag]
|
||||||
-- prAlt (SRGAlt mp n rhs) = initTag <+> prItem sisr n rhs <+> finalTag
|
-- prAlt (SRGAlt mp n rhs) = initTag <+> prItem sisr n rhs <+> finalTag
|
||||||
where initTag | isEmpty t = empty
|
where initTag | isEmpty t = empty
|
||||||
| otherwise = text "<NULL>" <+> t
|
| otherwise = text "<NULL>" <+> t
|
||||||
@@ -79,7 +79,7 @@ catFormId = (++ "_cat")
|
|||||||
prCat :: SRGCat -> Doc
|
prCat :: SRGCat -> Doc
|
||||||
prCat c = char '<' <> text c <> char '>'
|
prCat c = char '<' <> text c <> char '>'
|
||||||
|
|
||||||
prItem :: Maybe SISRFormat -> CFTerm -> EBnfSRGItem -> Doc
|
prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc
|
||||||
prItem sisr t = f 1
|
prItem sisr t = f 1
|
||||||
where
|
where
|
||||||
f _ (REUnion []) = text "<VOID>"
|
f _ (REUnion []) = text "<VOID>"
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ prSrgsXml sisr probs srg@(SRG{grammarName=name,startCat=start,
|
|||||||
++ topCatRules
|
++ topCatRules
|
||||||
++ concatMap ruleToXML rs
|
++ concatMap ruleToXML rs
|
||||||
ruleToXML (SRGRule cat origCat alts) =
|
ruleToXML (SRGRule cat origCat alts) =
|
||||||
comments ["Category " ++ origCat] ++ [rule cat (prRhs (ebnfSRGAlts alts))]
|
comments ["Category " ++ origCat] ++ [rule cat (prRhs alts)]
|
||||||
prRhs rhss = [oneOf (map (mkProd sisr probs) rhss)]
|
prRhs rhss = [oneOf (map (mkProd sisr probs) rhss)]
|
||||||
-- externally visible rules for each of the GF categories
|
-- externally visible rules for each of the GF categories
|
||||||
topCatRules = [topRule tc [oneOf (map (it tc) cs)] | (tc,cs) <- srgTopCats srg]
|
topCatRules = [topRule tc [oneOf (map (it tc) cs)] | (tc,cs) <- srgTopCats srg]
|
||||||
@@ -67,15 +67,15 @@ prSrgsXml sisr probs srg@(SRG{grammarName=name,startCat=start,
|
|||||||
rule :: String -> [XML] -> XML
|
rule :: String -> [XML] -> XML
|
||||||
rule i = Tag "rule" [("id",i)]
|
rule i = Tag "rule" [("id",i)]
|
||||||
|
|
||||||
mkProd :: Maybe SISRFormat -> Bool -> EBnfSRGAlt -> XML
|
mkProd :: Maybe SISRFormat -> Bool -> SRGAlt -> XML
|
||||||
mkProd sisr probs (EBnfSRGAlt mp n rhs) = Tag "item" w (ti ++ [x] ++ tf)
|
mkProd sisr probs (SRGAlt mp n rhs) = Tag "item" w (ti ++ [x] ++ tf)
|
||||||
where x = mkItem sisr n rhs
|
where x = mkItem sisr n rhs
|
||||||
w | probs = maybe [] (\p -> [("weight", show p)]) mp
|
w | probs = maybe [] (\p -> [("weight", show p)]) mp
|
||||||
| otherwise = []
|
| otherwise = []
|
||||||
ti = [tag sisr (profileInitSISR n)]
|
ti = [tag sisr (profileInitSISR n)]
|
||||||
tf = [tag sisr (profileFinalSISR n)]
|
tf = [tag sisr (profileFinalSISR n)]
|
||||||
|
|
||||||
mkItem :: Maybe SISRFormat -> CFTerm -> EBnfSRGItem -> XML
|
mkItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> XML
|
||||||
mkItem sisr cn = f
|
mkItem sisr cn = f
|
||||||
where
|
where
|
||||||
f (REUnion []) = ETag "ruleref" [("special","VOID")]
|
f (REUnion []) = ETag "ruleref" [("special","VOID")]
|
||||||
|
|||||||
@@ -65,9 +65,9 @@ prABNF sisr probs srg@(SRG{grammarName=name,grammarLanguage=ml,
|
|||||||
Nothing -> empty
|
Nothing -> empty
|
||||||
prRule (SRGRule cat origCat rhs) =
|
prRule (SRGRule cat origCat rhs) =
|
||||||
comment origCat $$
|
comment origCat $$
|
||||||
rule False cat (map prAlt (ebnfSRGAlts rhs))
|
rule False cat (map prAlt rhs)
|
||||||
-- FIXME: use the probability
|
-- FIXME: use the probability
|
||||||
prAlt (EBnfSRGAlt mp n rhs) = sep [initTag, parens (prItem sisr n rhs), finalTag]
|
prAlt (SRGAlt mp n rhs) = sep [initTag, parens (prItem sisr n rhs), finalTag]
|
||||||
where initTag = tag sisr (profileInitSISR n)
|
where initTag = tag sisr (profileInitSISR n)
|
||||||
finalTag = tag sisr (profileFinalSISR n)
|
finalTag = tag sisr (profileFinalSISR n)
|
||||||
|
|
||||||
@@ -80,7 +80,7 @@ catFormId = (++ "_cat")
|
|||||||
prCat :: SRGCat -> Doc
|
prCat :: SRGCat -> Doc
|
||||||
prCat c = char '$' <> text c
|
prCat c = char '$' <> text c
|
||||||
|
|
||||||
prItem :: Maybe SISRFormat -> CFTerm -> EBnfSRGItem -> Doc
|
prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc
|
||||||
prItem sisr t = f 1
|
prItem sisr t = f 1
|
||||||
where
|
where
|
||||||
f _ (REUnion []) = text "$VOID"
|
f _ (REUnion []) = text "$VOID"
|
||||||
|
|||||||
@@ -16,13 +16,11 @@
|
|||||||
-- categories in the grammar
|
-- categories in the grammar
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..),
|
module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem,
|
||||||
SRGCat, SRGNT, CFTerm,
|
SRGCat, SRGNT, CFTerm,
|
||||||
makeSimpleSRG
|
makeSimpleSRG
|
||||||
, lookupFM_, prtS
|
, lookupFM_, prtS
|
||||||
, cfgCatToGFCat, srgTopCats
|
, cfgCatToGFCat, srgTopCats
|
||||||
, EBnfSRGAlt(..), EBnfSRGItem
|
|
||||||
, ebnfSRGAlts
|
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import GF.Data.Operations
|
import GF.Data.Operations
|
||||||
@@ -63,9 +61,11 @@ data SRGRule = SRGRule SRGCat String [SRGAlt] -- ^ SRG category name, original c
|
|||||||
deriving (Eq,Show)
|
deriving (Eq,Show)
|
||||||
|
|
||||||
-- | maybe a probability, a rule name and a list of symbols
|
-- | maybe a probability, a rule name and a list of symbols
|
||||||
data SRGAlt = SRGAlt (Maybe Double) CFTerm [Symbol SRGNT Token]
|
data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem
|
||||||
deriving (Eq,Show)
|
deriving (Eq,Show)
|
||||||
|
|
||||||
|
type SRGItem = RE (Symbol SRGNT Token)
|
||||||
|
|
||||||
type SRGCat = String
|
type SRGCat = String
|
||||||
|
|
||||||
-- | An SRG non-terminal. Category name and its number in the profile.
|
-- | An SRG non-terminal. Category name and its number in the profile.
|
||||||
@@ -108,13 +108,13 @@ cfgRulesToSRGRule names probs rs@(r:_) = SRGRule cat origCat rhs
|
|||||||
where
|
where
|
||||||
origCat = lhsCat r
|
origCat = lhsCat r
|
||||||
cat = lookupFM_ names origCat
|
cat = lookupFM_ names origCat
|
||||||
rhs = nub $ map ruleToAlt rs
|
alts = [((n,ruleProb probs r),mkSRGSymbols 0 ss) | CFRule c ss n <- rs]
|
||||||
ruleToAlt r@(CFRule c ss n)
|
rhs = [SRGAlt p n (srgItem sss) | ((n,p),sss) <- buildMultiMap alts ]
|
||||||
= SRGAlt (ruleProb probs r) n (mkSRGSymbols 0 ss)
|
|
||||||
where
|
mkSRGSymbols _ [] = []
|
||||||
mkSRGSymbols _ [] = []
|
mkSRGSymbols i (Cat c:ss) = Cat (renameCat c,i) : mkSRGSymbols (i+1) ss
|
||||||
mkSRGSymbols i (Cat c:ss) = Cat (renameCat c,i) : mkSRGSymbols (i+1) ss
|
mkSRGSymbols i (Tok t:ss) = Tok t : mkSRGSymbols i ss
|
||||||
mkSRGSymbols i (Tok t:ss) = Tok t : mkSRGSymbols i ss
|
|
||||||
renameCat = lookupFM_ names
|
renameCat = lookupFM_ names
|
||||||
|
|
||||||
ruleProb :: Probs -> CFRule_ -> Maybe Double
|
ruleProb :: Probs -> CFRule_ -> Maybe Double
|
||||||
@@ -153,22 +153,12 @@ srgTopCats srg = buildMultiMap [(oc, cat) | SRGRule cat origCat _ <- rules srg,
|
|||||||
-- * Size-optimized EBNF SRGs
|
-- * Size-optimized EBNF SRGs
|
||||||
--
|
--
|
||||||
|
|
||||||
data EBnfSRGAlt = EBnfSRGAlt (Maybe Double) CFTerm EBnfSRGItem
|
srgItem :: [[Symbol SRGNT Token]] -> SRGItem
|
||||||
deriving (Eq,Show)
|
srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
|
||||||
|
|
||||||
type EBnfSRGItem = RE (Symbol SRGNT Token)
|
|
||||||
|
|
||||||
|
|
||||||
ebnfSRGAlts :: [SRGAlt] -> [EBnfSRGAlt]
|
|
||||||
ebnfSRGAlts alts = [EBnfSRGAlt p n (ebnfSRGItem sss)
|
|
||||||
| ((n,p),sss) <- buildMultiMap [((n,p),ss) | SRGAlt p n ss <- alts]]
|
|
||||||
|
|
||||||
ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem
|
|
||||||
ebnfSRGItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
|
|
||||||
|
|
||||||
-- | Merges a list of right-hand sides which all have the same
|
-- | Merges a list of right-hand sides which all have the same
|
||||||
-- sequence of non-terminals.
|
-- sequence of non-terminals.
|
||||||
mergeItems :: [[Symbol SRGNT Token]] -> EBnfSRGItem
|
mergeItems :: [[Symbol SRGNT Token]] -> SRGItem
|
||||||
mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens
|
mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens
|
||||||
|
|
||||||
groupTokens :: [Symbol SRGNT Token] -> [Symbol SRGNT [Token]]
|
groupTokens :: [Symbol SRGNT Token] -> [Symbol SRGNT [Token]]
|
||||||
|
|||||||
Reference in New Issue
Block a user