1
0
forked from GitHub/gf-core

Generate more compact JSGF by converting to ENBF.

This commit is contained in:
bringert
2007-01-07 20:04:39 +00:00
parent bb7eb9f78d
commit 476080e6db
4 changed files with 58 additions and 45 deletions

View File

@@ -55,11 +55,11 @@ prJSGF sisr srg@(SRG{grammarName=name,startCat=start,origStartCat=origStart,rule
rule True "MAIN" [prCat start]
prRule (SRGRule cat origCat rhs) =
comment origCat $$
-- rule False cat (map prAlt (ebnfSRGAlts rhs))
rule False cat (map prAlt rhs)
rule False cat (map prAlt (ebnfSRGAlts rhs))
-- rule False cat (map prAlt rhs)
-- FIXME: use the probability
-- prAlt (EBnfSRGAlt mp n rhs) = tag sisr (profileInitSISR n) . showChar ' '. prItem sisr rhs
prAlt (SRGAlt mp n rhs) = initTag <+> prItem sisr n rhs <+> finalTag
prAlt (EBnfSRGAlt mp n rhs) = sep [initTag, prItem sisr n rhs, finalTag]
-- prAlt (SRGAlt mp n rhs) = initTag <+> prItem sisr n rhs <+> finalTag
where initTag | isEmpty t = empty
| otherwise = text "<NULL>" <+> t
where t = tag sisr (profileInitSISR n)
@@ -74,25 +74,25 @@ catFormId = (++ "_cat")
prCat :: SRGCat -> Doc
prCat c = char '<' <> text c <> char '>'
{-
prItem :: Maybe SISRFormat -> EBnfSRGItem -> ShowS
prItem sisr = f 1
prItem :: Maybe SISRFormat -> CFTerm -> EBnfSRGItem -> Doc
prItem sisr t = f 1
where
f _ (REUnion []) = showString "<VOID>"
f _ (REUnion []) = text "<VOID>"
f p (REUnion xs)
| not (null es) = wrap "[" (f 0 (REUnion nes)) "]"
| otherwise = (if p >= 1 then paren else id) (joinS " | " (map (f 1) xs))
where (es,nes) = partition (== REConcat []) xs
f _ (REConcat []) = showString "<NULL>"
f p (REConcat xs) = (if p >= 3 then paren else id) (unwordsS (map (f 2) xs))
f p (RERepeat x) = f 3 x . showString "*"
f _ (RESymbol s) = prSymbol sisr s
-}
| not (null es) = brackets (f 0 (REUnion nes))
| otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs))
where (es,nes) = partition isEpsilon xs
f _ (REConcat []) = text "<NULL>"
f p (REConcat xs) = (if p >= 3 then parens else id) (hsep (map (f 2) xs))
f p (RERepeat x) = f 3 x <> char '*'
f _ (RESymbol s) = prSymbol sisr t s
{-
prItem :: Maybe SISRFormat -> CFTerm -> [Symbol SRGNT Token] -> Doc
prItem _ _ [] = text "<NULL>"
prItem sisr cn ss = paren $ hsep $ map (prSymbol sisr cn) ss
where paren = if length ss == 1 then id else parens
-}
prSymbol :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> Doc
prSymbol sisr cn (Cat n@(c,_)) = prCat c <+> tag sisr (catSISR cn n)
@@ -103,7 +103,7 @@ tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc
tag Nothing _ = empty
tag (Just fmt) t = case t fmt of
[] -> empty
ts -> char '{' <+> text (e $ prSISR ts) <+> char '}'
ts -> char '{' <+> (text (e $ prSISR ts)) <+> char '}'
where e [] = []
e ('}':xs) = '\\':'}':e xs
e ('\n':xs) = ' ' : e (dropWhile isSpace xs)
@@ -115,11 +115,11 @@ isPunct c = c `elem` "-_.;.,?!"
comment :: String -> Doc
comment s = text "//" <+> text s
alts :: [Doc] -> Doc
alts = sep . prepunctuate (text "| ")
rule :: Bool -> SRGCat -> [Doc] -> Doc
rule pub c xs = p <+> prCat c <+> char '='
$$ nest 2 (sep (prepunctuate (text "| ") xs) <+> char ';')
rule pub c xs = sep [p <+> prCat c <+> char '=', nest 2 (alts xs) <+> char ';']
where p = if pub then text "public" else empty
-- Pretty-printing utilities
@@ -133,3 +133,4 @@ prepunctuate p (x:xs) = x : map (p <>) xs
($++$) :: Doc -> Doc -> Doc
x $++$ y = x $$ emptyLine $$ y

View File

@@ -1,4 +1,9 @@
module GF.Speech.RegExp (RE(..), dfa2re, prRE) where
module GF.Speech.RegExp (RE(..),
epsilonRE, nullRE,
isEpsilon, isNull,
unionRE, concatRE, seqRE,
repeatRE,
dfa2re, prRE) where
import Data.List
@@ -10,17 +15,17 @@ data RE a =
| REConcat [RE a] -- ^ REConcat [] is epsilon
| RERepeat (RE a)
| RESymbol a
deriving (Eq,Show)
deriving (Eq,Ord,Show)
dfa2re :: Show a => DFA a -> RE a
dfa2re :: (Show a,Ord a) => DFA a -> RE a
dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
. oneFinalState () epsilonRE . mapTransitions RESymbol
where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
merge es = [(f,t,unionRE ls)
| ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]
elimStates :: Show a => DFA (RE a) -> DFA (RE a)
elimStates :: (Show a, Ord a) => DFA (RE a) -> DFA (RE a)
elimStates fa =
case [s | (s,_) <- states fa, isInternal fa s] of
[] -> fa
@@ -31,18 +36,22 @@ elimStates fa =
ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
r r1 r3 = concatRE [r1, repeatRE r2, r3]
epsilonRE :: RE a
epsilonRE = REConcat []
nullRE :: RE a
nullRE = REUnion []
isNull :: RE a -> Bool
isNull (REUnion []) = True
isNull _ = False
isEpsilon :: RE a -> Bool
isEpsilon (REConcat []) = True
isEpsilon _ = False
unionRE :: [RE a] -> RE a
unionRE = unionOrId . concatMap toList
unionRE :: Ord a => [RE a] -> RE a
unionRE = unionOrId . sortNub . concatMap toList
where
toList (REUnion xs) = xs
toList x = [x]
@@ -58,11 +67,14 @@ concatRE xs | any isNull xs = nullRE
toList (REConcat xs) = xs
toList x = [x]
seqRE :: [a] -> RE a
seqRE = concatRE . map RESymbol
repeatRE :: RE a -> RE a
repeatRE x | isNull x || isEpsilon x = epsilonRE
| otherwise = RERepeat x
finalRE :: DFA (RE a) -> RE a
finalRE :: Ord a => DFA (RE a) -> RE a
finalRE fa = concatRE [repeatRE r1, r2,
repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
where

View File

@@ -23,8 +23,8 @@ module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..),
makeSimpleSRG, makeSRG
, lookupFM_, prtS
, topDownFilter, cfgCatToGFCat, srgTopCats
--, EBnfSRGAlt(..), EBnfSRGItem
--, ebnfSRGAlts
, EBnfSRGAlt(..), EBnfSRGItem
, ebnfSRGAlts
) where
import GF.Data.Operations
@@ -33,7 +33,7 @@ import GF.Infra.Ident
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..), NameProfile(..)
, Profile(..), SyntaxForest
, filterCats, mapSymbol)
, filterCats, mapSymbol, symbol)
import GF.Conversion.Types
import GF.Infra.Print
import GF.Speech.TransformCFG
@@ -177,8 +177,7 @@ srgTopCats srg = buildMultiMap [(oc, cat) | SRGRule cat origCat _ <- rules srg,
-- * Size-optimized EBNF SRGs
--
{-
data EBnfSRGAlt = EBnfSRGAlt (Maybe Double) Name EBnfSRGItem
data EBnfSRGAlt = EBnfSRGAlt (Maybe Double) CFTerm EBnfSRGItem
deriving (Eq,Show)
type EBnfSRGItem = RE (Symbol SRGNT Token)
@@ -186,21 +185,22 @@ type EBnfSRGItem = RE (Symbol SRGNT Token)
ebnfSRGAlts :: [SRGAlt] -> [EBnfSRGAlt]
ebnfSRGAlts alts = [EBnfSRGAlt p n (ebnfSRGItem sss)
| ((p,n),sss) <- buildMultiMap [((p,n),ss) | SRGAlt p n ss <- alts]]
| ((n,p),sss) <- buildMultiMap [((n,p),ss) | SRGAlt p n ss <- alts]]
ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem
ebnfSRGItem = dfa2re . mkSRGFA
ebnfSRGItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
mkSRGFA :: [[Symbol SRGNT Token]] -> DFA (Symbol SRGNT Token)
mkSRGFA = {- minimize . dfa2nfa . -} foldr addString (newFA ())
addString :: [a] -> DFA a -> DFA a
addString xs fa = addFinalState (last sts0) $ newTransitions ts fa'
where (fa',ss) = newStates (replicate (length xs) ()) fa
sts0 = startState fa : sts1
sts1 = map fst ss
ts = zip3 sts0 sts1 xs
-}
-- ^ Merges a list of right-hand sides which all have the same
-- sequence of non-terminals.
mergeItems :: [[Symbol SRGNT Token]] -> EBnfSRGItem
--mergeItems = unionRE . map seqRE
mergeItems [] = nullRE
mergeItems sss | any null rss = t
| otherwise = concatRE [t,seqRE (head cs), mergeItems nss]
where (tss,rss) = unzip $ map (span isToken) sss
t = unionRE (map seqRE tss)
(cs,nss) = unzip $ map (splitAt 1) rss
isToken = symbol (const False) (const True)
--
-- * Utilities for building and printing SRGs

View File

@@ -60,7 +60,7 @@ data CFTerm
| CFVar Int
| CFConst String
| CFMeta String
deriving (Eq,Show)
deriving (Eq,Ord,Show)
type Cat_ = String
type CFSymbol_ = Symbol Cat_ Token