forked from GitHub/gf-core
Generate more compact JSGF by converting to ENBF.
This commit is contained in:
@@ -55,11 +55,11 @@ prJSGF sisr srg@(SRG{grammarName=name,startCat=start,origStartCat=origStart,rule
|
|||||||
rule True "MAIN" [prCat start]
|
rule True "MAIN" [prCat start]
|
||||||
prRule (SRGRule cat origCat rhs) =
|
prRule (SRGRule cat origCat rhs) =
|
||||||
comment origCat $$
|
comment origCat $$
|
||||||
-- rule False cat (map prAlt (ebnfSRGAlts rhs))
|
rule False cat (map prAlt (ebnfSRGAlts rhs))
|
||||||
rule False cat (map prAlt rhs)
|
-- rule False cat (map prAlt rhs)
|
||||||
-- FIXME: use the probability
|
-- FIXME: use the probability
|
||||||
-- prAlt (EBnfSRGAlt mp n rhs) = tag sisr (profileInitSISR n) . showChar ' '. prItem sisr rhs
|
prAlt (EBnfSRGAlt mp n rhs) = sep [initTag, prItem sisr n rhs, finalTag]
|
||||||
prAlt (SRGAlt mp n rhs) = initTag <+> prItem sisr n rhs <+> finalTag
|
-- prAlt (SRGAlt mp n rhs) = initTag <+> prItem sisr n rhs <+> finalTag
|
||||||
where initTag | isEmpty t = empty
|
where initTag | isEmpty t = empty
|
||||||
| otherwise = text "<NULL>" <+> t
|
| otherwise = text "<NULL>" <+> t
|
||||||
where t = tag sisr (profileInitSISR n)
|
where t = tag sisr (profileInitSISR n)
|
||||||
@@ -74,25 +74,25 @@ catFormId = (++ "_cat")
|
|||||||
prCat :: SRGCat -> Doc
|
prCat :: SRGCat -> Doc
|
||||||
prCat c = char '<' <> text c <> char '>'
|
prCat c = char '<' <> text c <> char '>'
|
||||||
|
|
||||||
{-
|
prItem :: Maybe SISRFormat -> CFTerm -> EBnfSRGItem -> Doc
|
||||||
prItem :: Maybe SISRFormat -> EBnfSRGItem -> ShowS
|
prItem sisr t = f 1
|
||||||
prItem sisr = f 1
|
|
||||||
where
|
where
|
||||||
f _ (REUnion []) = showString "<VOID>"
|
f _ (REUnion []) = text "<VOID>"
|
||||||
f p (REUnion xs)
|
f p (REUnion xs)
|
||||||
| not (null es) = wrap "[" (f 0 (REUnion nes)) "]"
|
| not (null es) = brackets (f 0 (REUnion nes))
|
||||||
| otherwise = (if p >= 1 then paren else id) (joinS " | " (map (f 1) xs))
|
| otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs))
|
||||||
where (es,nes) = partition (== REConcat []) xs
|
where (es,nes) = partition isEpsilon xs
|
||||||
f _ (REConcat []) = showString "<NULL>"
|
f _ (REConcat []) = text "<NULL>"
|
||||||
f p (REConcat xs) = (if p >= 3 then paren else id) (unwordsS (map (f 2) xs))
|
f p (REConcat xs) = (if p >= 3 then parens else id) (hsep (map (f 2) xs))
|
||||||
f p (RERepeat x) = f 3 x . showString "*"
|
f p (RERepeat x) = f 3 x <> char '*'
|
||||||
f _ (RESymbol s) = prSymbol sisr s
|
f _ (RESymbol s) = prSymbol sisr t s
|
||||||
-}
|
|
||||||
|
|
||||||
|
{-
|
||||||
prItem :: Maybe SISRFormat -> CFTerm -> [Symbol SRGNT Token] -> Doc
|
prItem :: Maybe SISRFormat -> CFTerm -> [Symbol SRGNT Token] -> Doc
|
||||||
prItem _ _ [] = text "<NULL>"
|
prItem _ _ [] = text "<NULL>"
|
||||||
prItem sisr cn ss = paren $ hsep $ map (prSymbol sisr cn) ss
|
prItem sisr cn ss = paren $ hsep $ map (prSymbol sisr cn) ss
|
||||||
where paren = if length ss == 1 then id else parens
|
where paren = if length ss == 1 then id else parens
|
||||||
|
-}
|
||||||
|
|
||||||
prSymbol :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> Doc
|
prSymbol :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> Doc
|
||||||
prSymbol sisr cn (Cat n@(c,_)) = prCat c <+> tag sisr (catSISR cn n)
|
prSymbol sisr cn (Cat n@(c,_)) = prCat c <+> tag sisr (catSISR cn n)
|
||||||
@@ -103,7 +103,7 @@ tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc
|
|||||||
tag Nothing _ = empty
|
tag Nothing _ = empty
|
||||||
tag (Just fmt) t = case t fmt of
|
tag (Just fmt) t = case t fmt of
|
||||||
[] -> empty
|
[] -> empty
|
||||||
ts -> char '{' <+> text (e $ prSISR ts) <+> char '}'
|
ts -> char '{' <+> (text (e $ prSISR ts)) <+> char '}'
|
||||||
where e [] = []
|
where e [] = []
|
||||||
e ('}':xs) = '\\':'}':e xs
|
e ('}':xs) = '\\':'}':e xs
|
||||||
e ('\n':xs) = ' ' : e (dropWhile isSpace xs)
|
e ('\n':xs) = ' ' : e (dropWhile isSpace xs)
|
||||||
@@ -115,11 +115,11 @@ isPunct c = c `elem` "-_.;.,?!"
|
|||||||
comment :: String -> Doc
|
comment :: String -> Doc
|
||||||
comment s = text "//" <+> text s
|
comment s = text "//" <+> text s
|
||||||
|
|
||||||
|
alts :: [Doc] -> Doc
|
||||||
|
alts = sep . prepunctuate (text "| ")
|
||||||
|
|
||||||
rule :: Bool -> SRGCat -> [Doc] -> Doc
|
rule :: Bool -> SRGCat -> [Doc] -> Doc
|
||||||
rule pub c xs = p <+> prCat c <+> char '='
|
rule pub c xs = sep [p <+> prCat c <+> char '=', nest 2 (alts xs) <+> char ';']
|
||||||
$$ nest 2 (sep (prepunctuate (text "| ") xs) <+> char ';')
|
|
||||||
where p = if pub then text "public" else empty
|
where p = if pub then text "public" else empty
|
||||||
|
|
||||||
-- Pretty-printing utilities
|
-- Pretty-printing utilities
|
||||||
@@ -133,3 +133,4 @@ prepunctuate p (x:xs) = x : map (p <>) xs
|
|||||||
|
|
||||||
($++$) :: Doc -> Doc -> Doc
|
($++$) :: Doc -> Doc -> Doc
|
||||||
x $++$ y = x $$ emptyLine $$ y
|
x $++$ y = x $$ emptyLine $$ y
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,9 @@
|
|||||||
module GF.Speech.RegExp (RE(..), dfa2re, prRE) where
|
module GF.Speech.RegExp (RE(..),
|
||||||
|
epsilonRE, nullRE,
|
||||||
|
isEpsilon, isNull,
|
||||||
|
unionRE, concatRE, seqRE,
|
||||||
|
repeatRE,
|
||||||
|
dfa2re, prRE) where
|
||||||
|
|
||||||
import Data.List
|
import Data.List
|
||||||
|
|
||||||
@@ -10,17 +15,17 @@ data RE a =
|
|||||||
| REConcat [RE a] -- ^ REConcat [] is epsilon
|
| REConcat [RE a] -- ^ REConcat [] is epsilon
|
||||||
| RERepeat (RE a)
|
| RERepeat (RE a)
|
||||||
| RESymbol a
|
| RESymbol a
|
||||||
deriving (Eq,Show)
|
deriving (Eq,Ord,Show)
|
||||||
|
|
||||||
|
|
||||||
dfa2re :: Show a => DFA a -> RE a
|
dfa2re :: (Show a,Ord a) => DFA a -> RE a
|
||||||
dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
|
dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
|
||||||
. oneFinalState () epsilonRE . mapTransitions RESymbol
|
. oneFinalState () epsilonRE . mapTransitions RESymbol
|
||||||
where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
|
where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
|
||||||
merge es = [(f,t,unionRE ls)
|
merge es = [(f,t,unionRE ls)
|
||||||
| ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]
|
| ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]
|
||||||
|
|
||||||
elimStates :: Show a => DFA (RE a) -> DFA (RE a)
|
elimStates :: (Show a, Ord a) => DFA (RE a) -> DFA (RE a)
|
||||||
elimStates fa =
|
elimStates fa =
|
||||||
case [s | (s,_) <- states fa, isInternal fa s] of
|
case [s | (s,_) <- states fa, isInternal fa s] of
|
||||||
[] -> fa
|
[] -> fa
|
||||||
@@ -31,18 +36,22 @@ elimStates fa =
|
|||||||
ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
|
ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
|
||||||
r r1 r3 = concatRE [r1, repeatRE r2, r3]
|
r r1 r3 = concatRE [r1, repeatRE r2, r3]
|
||||||
|
|
||||||
|
epsilonRE :: RE a
|
||||||
epsilonRE = REConcat []
|
epsilonRE = REConcat []
|
||||||
|
|
||||||
|
nullRE :: RE a
|
||||||
nullRE = REUnion []
|
nullRE = REUnion []
|
||||||
|
|
||||||
|
isNull :: RE a -> Bool
|
||||||
isNull (REUnion []) = True
|
isNull (REUnion []) = True
|
||||||
isNull _ = False
|
isNull _ = False
|
||||||
|
|
||||||
|
isEpsilon :: RE a -> Bool
|
||||||
isEpsilon (REConcat []) = True
|
isEpsilon (REConcat []) = True
|
||||||
isEpsilon _ = False
|
isEpsilon _ = False
|
||||||
|
|
||||||
unionRE :: [RE a] -> RE a
|
unionRE :: Ord a => [RE a] -> RE a
|
||||||
unionRE = unionOrId . concatMap toList
|
unionRE = unionOrId . sortNub . concatMap toList
|
||||||
where
|
where
|
||||||
toList (REUnion xs) = xs
|
toList (REUnion xs) = xs
|
||||||
toList x = [x]
|
toList x = [x]
|
||||||
@@ -58,11 +67,14 @@ concatRE xs | any isNull xs = nullRE
|
|||||||
toList (REConcat xs) = xs
|
toList (REConcat xs) = xs
|
||||||
toList x = [x]
|
toList x = [x]
|
||||||
|
|
||||||
|
seqRE :: [a] -> RE a
|
||||||
|
seqRE = concatRE . map RESymbol
|
||||||
|
|
||||||
repeatRE :: RE a -> RE a
|
repeatRE :: RE a -> RE a
|
||||||
repeatRE x | isNull x || isEpsilon x = epsilonRE
|
repeatRE x | isNull x || isEpsilon x = epsilonRE
|
||||||
| otherwise = RERepeat x
|
| otherwise = RERepeat x
|
||||||
|
|
||||||
finalRE :: DFA (RE a) -> RE a
|
finalRE :: Ord a => DFA (RE a) -> RE a
|
||||||
finalRE fa = concatRE [repeatRE r1, r2,
|
finalRE fa = concatRE [repeatRE r1, r2,
|
||||||
repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
|
repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
|
||||||
where
|
where
|
||||||
|
|||||||
@@ -23,8 +23,8 @@ module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..),
|
|||||||
makeSimpleSRG, makeSRG
|
makeSimpleSRG, makeSRG
|
||||||
, lookupFM_, prtS
|
, lookupFM_, prtS
|
||||||
, topDownFilter, cfgCatToGFCat, srgTopCats
|
, topDownFilter, cfgCatToGFCat, srgTopCats
|
||||||
--, EBnfSRGAlt(..), EBnfSRGItem
|
, EBnfSRGAlt(..), EBnfSRGItem
|
||||||
--, ebnfSRGAlts
|
, ebnfSRGAlts
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import GF.Data.Operations
|
import GF.Data.Operations
|
||||||
@@ -33,7 +33,7 @@ import GF.Infra.Ident
|
|||||||
import GF.Formalism.CFG
|
import GF.Formalism.CFG
|
||||||
import GF.Formalism.Utilities (Symbol(..), NameProfile(..)
|
import GF.Formalism.Utilities (Symbol(..), NameProfile(..)
|
||||||
, Profile(..), SyntaxForest
|
, Profile(..), SyntaxForest
|
||||||
, filterCats, mapSymbol)
|
, filterCats, mapSymbol, symbol)
|
||||||
import GF.Conversion.Types
|
import GF.Conversion.Types
|
||||||
import GF.Infra.Print
|
import GF.Infra.Print
|
||||||
import GF.Speech.TransformCFG
|
import GF.Speech.TransformCFG
|
||||||
@@ -177,8 +177,7 @@ srgTopCats srg = buildMultiMap [(oc, cat) | SRGRule cat origCat _ <- rules srg,
|
|||||||
-- * Size-optimized EBNF SRGs
|
-- * Size-optimized EBNF SRGs
|
||||||
--
|
--
|
||||||
|
|
||||||
{-
|
data EBnfSRGAlt = EBnfSRGAlt (Maybe Double) CFTerm EBnfSRGItem
|
||||||
data EBnfSRGAlt = EBnfSRGAlt (Maybe Double) Name EBnfSRGItem
|
|
||||||
deriving (Eq,Show)
|
deriving (Eq,Show)
|
||||||
|
|
||||||
type EBnfSRGItem = RE (Symbol SRGNT Token)
|
type EBnfSRGItem = RE (Symbol SRGNT Token)
|
||||||
@@ -186,21 +185,22 @@ type EBnfSRGItem = RE (Symbol SRGNT Token)
|
|||||||
|
|
||||||
ebnfSRGAlts :: [SRGAlt] -> [EBnfSRGAlt]
|
ebnfSRGAlts :: [SRGAlt] -> [EBnfSRGAlt]
|
||||||
ebnfSRGAlts alts = [EBnfSRGAlt p n (ebnfSRGItem sss)
|
ebnfSRGAlts alts = [EBnfSRGAlt p n (ebnfSRGItem sss)
|
||||||
| ((p,n),sss) <- buildMultiMap [((p,n),ss) | SRGAlt p n ss <- alts]]
|
| ((n,p),sss) <- buildMultiMap [((n,p),ss) | SRGAlt p n ss <- alts]]
|
||||||
|
|
||||||
ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem
|
ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem
|
||||||
ebnfSRGItem = dfa2re . mkSRGFA
|
ebnfSRGItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
|
||||||
|
|
||||||
mkSRGFA :: [[Symbol SRGNT Token]] -> DFA (Symbol SRGNT Token)
|
-- ^ Merges a list of right-hand sides which all have the same
|
||||||
mkSRGFA = {- minimize . dfa2nfa . -} foldr addString (newFA ())
|
-- sequence of non-terminals.
|
||||||
|
mergeItems :: [[Symbol SRGNT Token]] -> EBnfSRGItem
|
||||||
addString :: [a] -> DFA a -> DFA a
|
--mergeItems = unionRE . map seqRE
|
||||||
addString xs fa = addFinalState (last sts0) $ newTransitions ts fa'
|
mergeItems [] = nullRE
|
||||||
where (fa',ss) = newStates (replicate (length xs) ()) fa
|
mergeItems sss | any null rss = t
|
||||||
sts0 = startState fa : sts1
|
| otherwise = concatRE [t,seqRE (head cs), mergeItems nss]
|
||||||
sts1 = map fst ss
|
where (tss,rss) = unzip $ map (span isToken) sss
|
||||||
ts = zip3 sts0 sts1 xs
|
t = unionRE (map seqRE tss)
|
||||||
-}
|
(cs,nss) = unzip $ map (splitAt 1) rss
|
||||||
|
isToken = symbol (const False) (const True)
|
||||||
|
|
||||||
--
|
--
|
||||||
-- * Utilities for building and printing SRGs
|
-- * Utilities for building and printing SRGs
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ data CFTerm
|
|||||||
| CFVar Int
|
| CFVar Int
|
||||||
| CFConst String
|
| CFConst String
|
||||||
| CFMeta String
|
| CFMeta String
|
||||||
deriving (Eq,Show)
|
deriving (Eq,Ord,Show)
|
||||||
|
|
||||||
type Cat_ = String
|
type Cat_ = String
|
||||||
type CFSymbol_ = Symbol Cat_ Token
|
type CFSymbol_ = Symbol Cat_ Token
|
||||||
|
|||||||
Reference in New Issue
Block a user