forked from GitHub/gf-core
Generate more compact JSGF by converting to ENBF.
This commit is contained in:
@@ -55,11 +55,11 @@ prJSGF sisr srg@(SRG{grammarName=name,startCat=start,origStartCat=origStart,rule
|
||||
rule True "MAIN" [prCat start]
|
||||
prRule (SRGRule cat origCat rhs) =
|
||||
comment origCat $$
|
||||
-- rule False cat (map prAlt (ebnfSRGAlts rhs))
|
||||
rule False cat (map prAlt rhs)
|
||||
rule False cat (map prAlt (ebnfSRGAlts rhs))
|
||||
-- rule False cat (map prAlt rhs)
|
||||
-- FIXME: use the probability
|
||||
-- prAlt (EBnfSRGAlt mp n rhs) = tag sisr (profileInitSISR n) . showChar ' '. prItem sisr rhs
|
||||
prAlt (SRGAlt mp n rhs) = initTag <+> prItem sisr n rhs <+> finalTag
|
||||
prAlt (EBnfSRGAlt mp n rhs) = sep [initTag, prItem sisr n rhs, finalTag]
|
||||
-- prAlt (SRGAlt mp n rhs) = initTag <+> prItem sisr n rhs <+> finalTag
|
||||
where initTag | isEmpty t = empty
|
||||
| otherwise = text "<NULL>" <+> t
|
||||
where t = tag sisr (profileInitSISR n)
|
||||
@@ -74,25 +74,25 @@ catFormId = (++ "_cat")
|
||||
prCat :: SRGCat -> Doc
|
||||
prCat c = char '<' <> text c <> char '>'
|
||||
|
||||
{-
|
||||
prItem :: Maybe SISRFormat -> EBnfSRGItem -> ShowS
|
||||
prItem sisr = f 1
|
||||
prItem :: Maybe SISRFormat -> CFTerm -> EBnfSRGItem -> Doc
|
||||
prItem sisr t = f 1
|
||||
where
|
||||
f _ (REUnion []) = showString "<VOID>"
|
||||
f _ (REUnion []) = text "<VOID>"
|
||||
f p (REUnion xs)
|
||||
| not (null es) = wrap "[" (f 0 (REUnion nes)) "]"
|
||||
| otherwise = (if p >= 1 then paren else id) (joinS " | " (map (f 1) xs))
|
||||
where (es,nes) = partition (== REConcat []) xs
|
||||
f _ (REConcat []) = showString "<NULL>"
|
||||
f p (REConcat xs) = (if p >= 3 then paren else id) (unwordsS (map (f 2) xs))
|
||||
f p (RERepeat x) = f 3 x . showString "*"
|
||||
f _ (RESymbol s) = prSymbol sisr s
|
||||
-}
|
||||
| not (null es) = brackets (f 0 (REUnion nes))
|
||||
| otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs))
|
||||
where (es,nes) = partition isEpsilon xs
|
||||
f _ (REConcat []) = text "<NULL>"
|
||||
f p (REConcat xs) = (if p >= 3 then parens else id) (hsep (map (f 2) xs))
|
||||
f p (RERepeat x) = f 3 x <> char '*'
|
||||
f _ (RESymbol s) = prSymbol sisr t s
|
||||
|
||||
{-
|
||||
prItem :: Maybe SISRFormat -> CFTerm -> [Symbol SRGNT Token] -> Doc
|
||||
prItem _ _ [] = text "<NULL>"
|
||||
prItem sisr cn ss = paren $ hsep $ map (prSymbol sisr cn) ss
|
||||
where paren = if length ss == 1 then id else parens
|
||||
-}
|
||||
|
||||
prSymbol :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> Doc
|
||||
prSymbol sisr cn (Cat n@(c,_)) = prCat c <+> tag sisr (catSISR cn n)
|
||||
@@ -103,7 +103,7 @@ tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc
|
||||
tag Nothing _ = empty
|
||||
tag (Just fmt) t = case t fmt of
|
||||
[] -> empty
|
||||
ts -> char '{' <+> text (e $ prSISR ts) <+> char '}'
|
||||
ts -> char '{' <+> (text (e $ prSISR ts)) <+> char '}'
|
||||
where e [] = []
|
||||
e ('}':xs) = '\\':'}':e xs
|
||||
e ('\n':xs) = ' ' : e (dropWhile isSpace xs)
|
||||
@@ -115,11 +115,11 @@ isPunct c = c `elem` "-_.;.,?!"
|
||||
comment :: String -> Doc
|
||||
comment s = text "//" <+> text s
|
||||
|
||||
|
||||
alts :: [Doc] -> Doc
|
||||
alts = sep . prepunctuate (text "| ")
|
||||
|
||||
rule :: Bool -> SRGCat -> [Doc] -> Doc
|
||||
rule pub c xs = p <+> prCat c <+> char '='
|
||||
$$ nest 2 (sep (prepunctuate (text "| ") xs) <+> char ';')
|
||||
rule pub c xs = sep [p <+> prCat c <+> char '=', nest 2 (alts xs) <+> char ';']
|
||||
where p = if pub then text "public" else empty
|
||||
|
||||
-- Pretty-printing utilities
|
||||
@@ -133,3 +133,4 @@ prepunctuate p (x:xs) = x : map (p <>) xs
|
||||
|
||||
($++$) :: Doc -> Doc -> Doc
|
||||
x $++$ y = x $$ emptyLine $$ y
|
||||
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
module GF.Speech.RegExp (RE(..), dfa2re, prRE) where
|
||||
module GF.Speech.RegExp (RE(..),
|
||||
epsilonRE, nullRE,
|
||||
isEpsilon, isNull,
|
||||
unionRE, concatRE, seqRE,
|
||||
repeatRE,
|
||||
dfa2re, prRE) where
|
||||
|
||||
import Data.List
|
||||
|
||||
@@ -10,17 +15,17 @@ data RE a =
|
||||
| REConcat [RE a] -- ^ REConcat [] is epsilon
|
||||
| RERepeat (RE a)
|
||||
| RESymbol a
|
||||
deriving (Eq,Show)
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
|
||||
dfa2re :: Show a => DFA a -> RE a
|
||||
dfa2re :: (Show a,Ord a) => DFA a -> RE a
|
||||
dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
|
||||
. oneFinalState () epsilonRE . mapTransitions RESymbol
|
||||
where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
|
||||
merge es = [(f,t,unionRE ls)
|
||||
| ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]
|
||||
|
||||
elimStates :: Show a => DFA (RE a) -> DFA (RE a)
|
||||
elimStates :: (Show a, Ord a) => DFA (RE a) -> DFA (RE a)
|
||||
elimStates fa =
|
||||
case [s | (s,_) <- states fa, isInternal fa s] of
|
||||
[] -> fa
|
||||
@@ -31,18 +36,22 @@ elimStates fa =
|
||||
ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
|
||||
r r1 r3 = concatRE [r1, repeatRE r2, r3]
|
||||
|
||||
epsilonRE :: RE a
|
||||
epsilonRE = REConcat []
|
||||
|
||||
nullRE :: RE a
|
||||
nullRE = REUnion []
|
||||
|
||||
isNull :: RE a -> Bool
|
||||
isNull (REUnion []) = True
|
||||
isNull _ = False
|
||||
|
||||
isEpsilon :: RE a -> Bool
|
||||
isEpsilon (REConcat []) = True
|
||||
isEpsilon _ = False
|
||||
|
||||
unionRE :: [RE a] -> RE a
|
||||
unionRE = unionOrId . concatMap toList
|
||||
unionRE :: Ord a => [RE a] -> RE a
|
||||
unionRE = unionOrId . sortNub . concatMap toList
|
||||
where
|
||||
toList (REUnion xs) = xs
|
||||
toList x = [x]
|
||||
@@ -58,11 +67,14 @@ concatRE xs | any isNull xs = nullRE
|
||||
toList (REConcat xs) = xs
|
||||
toList x = [x]
|
||||
|
||||
seqRE :: [a] -> RE a
|
||||
seqRE = concatRE . map RESymbol
|
||||
|
||||
repeatRE :: RE a -> RE a
|
||||
repeatRE x | isNull x || isEpsilon x = epsilonRE
|
||||
| otherwise = RERepeat x
|
||||
|
||||
finalRE :: DFA (RE a) -> RE a
|
||||
finalRE :: Ord a => DFA (RE a) -> RE a
|
||||
finalRE fa = concatRE [repeatRE r1, r2,
|
||||
repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
|
||||
where
|
||||
|
||||
@@ -23,8 +23,8 @@ module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..),
|
||||
makeSimpleSRG, makeSRG
|
||||
, lookupFM_, prtS
|
||||
, topDownFilter, cfgCatToGFCat, srgTopCats
|
||||
--, EBnfSRGAlt(..), EBnfSRGItem
|
||||
--, ebnfSRGAlts
|
||||
, EBnfSRGAlt(..), EBnfSRGItem
|
||||
, ebnfSRGAlts
|
||||
) where
|
||||
|
||||
import GF.Data.Operations
|
||||
@@ -33,7 +33,7 @@ import GF.Infra.Ident
|
||||
import GF.Formalism.CFG
|
||||
import GF.Formalism.Utilities (Symbol(..), NameProfile(..)
|
||||
, Profile(..), SyntaxForest
|
||||
, filterCats, mapSymbol)
|
||||
, filterCats, mapSymbol, symbol)
|
||||
import GF.Conversion.Types
|
||||
import GF.Infra.Print
|
||||
import GF.Speech.TransformCFG
|
||||
@@ -177,8 +177,7 @@ srgTopCats srg = buildMultiMap [(oc, cat) | SRGRule cat origCat _ <- rules srg,
|
||||
-- * Size-optimized EBNF SRGs
|
||||
--
|
||||
|
||||
{-
|
||||
data EBnfSRGAlt = EBnfSRGAlt (Maybe Double) Name EBnfSRGItem
|
||||
data EBnfSRGAlt = EBnfSRGAlt (Maybe Double) CFTerm EBnfSRGItem
|
||||
deriving (Eq,Show)
|
||||
|
||||
type EBnfSRGItem = RE (Symbol SRGNT Token)
|
||||
@@ -186,21 +185,22 @@ type EBnfSRGItem = RE (Symbol SRGNT Token)
|
||||
|
||||
ebnfSRGAlts :: [SRGAlt] -> [EBnfSRGAlt]
|
||||
ebnfSRGAlts alts = [EBnfSRGAlt p n (ebnfSRGItem sss)
|
||||
| ((p,n),sss) <- buildMultiMap [((p,n),ss) | SRGAlt p n ss <- alts]]
|
||||
| ((n,p),sss) <- buildMultiMap [((n,p),ss) | SRGAlt p n ss <- alts]]
|
||||
|
||||
ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem
|
||||
ebnfSRGItem = dfa2re . mkSRGFA
|
||||
ebnfSRGItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
|
||||
|
||||
mkSRGFA :: [[Symbol SRGNT Token]] -> DFA (Symbol SRGNT Token)
|
||||
mkSRGFA = {- minimize . dfa2nfa . -} foldr addString (newFA ())
|
||||
|
||||
addString :: [a] -> DFA a -> DFA a
|
||||
addString xs fa = addFinalState (last sts0) $ newTransitions ts fa'
|
||||
where (fa',ss) = newStates (replicate (length xs) ()) fa
|
||||
sts0 = startState fa : sts1
|
||||
sts1 = map fst ss
|
||||
ts = zip3 sts0 sts1 xs
|
||||
-}
|
||||
-- ^ Merges a list of right-hand sides which all have the same
|
||||
-- sequence of non-terminals.
|
||||
mergeItems :: [[Symbol SRGNT Token]] -> EBnfSRGItem
|
||||
--mergeItems = unionRE . map seqRE
|
||||
mergeItems [] = nullRE
|
||||
mergeItems sss | any null rss = t
|
||||
| otherwise = concatRE [t,seqRE (head cs), mergeItems nss]
|
||||
where (tss,rss) = unzip $ map (span isToken) sss
|
||||
t = unionRE (map seqRE tss)
|
||||
(cs,nss) = unzip $ map (splitAt 1) rss
|
||||
isToken = symbol (const False) (const True)
|
||||
|
||||
--
|
||||
-- * Utilities for building and printing SRGs
|
||||
|
||||
@@ -60,7 +60,7 @@ data CFTerm
|
||||
| CFVar Int
|
||||
| CFConst String
|
||||
| CFMeta String
|
||||
deriving (Eq,Show)
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
type Cat_ = String
|
||||
type CFSymbol_ = Symbol Cat_ Token
|
||||
|
||||
Reference in New Issue
Block a user