Added option to treat some categories as lexical when generating Haskell data types.

This commit is contained in:
bjorn
2008-10-10 11:55:12 +00:00
parent 18aa48941c
commit 88798b2a23
3 changed files with 90 additions and 51 deletions

View File

@@ -35,8 +35,7 @@ exportPGF opts fmt pgf =
FmtPGF -> multi "pgf" printPGF
FmtPGFPretty -> multi "txt" prPGFPretty
FmtJavaScript -> multi "js" pgf2js
FmtHaskell -> multi "hs" (grammar2haskell hsPrefix name)
FmtHaskell_GADT -> multi "hs" (grammar2haskellGADT hsPrefix name)
FmtHaskell -> multi "hs" (grammar2haskell opts name)
FmtProlog -> multi "pl" grammar2prolog
FmtProlog_Abs -> multi "pl" grammar2prolog_abs
FmtBNF -> single "bnf" bnfPrinter
@@ -54,7 +53,6 @@ exportPGF opts fmt pgf =
FmtFA -> single "dot" slfGraphvizPrinter
where
name = fromMaybe (prCId (absname pgf)) (moduleFlag optName opts)
hsPrefix = flag optHaskellPrefix opts
multi :: String -> (PGF -> String) -> [(FilePath,String)]
multi ext pr = [(name <.> ext, pr pgf)]

View File

@@ -14,13 +14,14 @@
-- AR 11/11/1999 -- 7/12/2000 -- 18/5/2004
-----------------------------------------------------------------------------
module GF.Compile.GFCCtoHaskell (grammar2haskell, grammar2haskellGADT) where
module GF.Compile.GFCCtoHaskell (grammar2haskell) where
import PGF.CId
import PGF.Data
import PGF.Macros
import GF.Data.Operations
import GF.Infra.Option
import GF.Text.UTF8
import Data.List --(isPrefixOf, find, intersperse)
@@ -29,24 +30,21 @@ import qualified Data.Map as Map
type Prefix = String -> String
-- | the main function
grammar2haskell :: String -- ^ Constructor prefix
grammar2haskell :: Options
-> String -- ^ Module name.
-> PGF
-> String
grammar2haskell prefix name gr = encodeUTF8 $ foldr (++++) [] $
haskPreamble name ++ [datatypes gId gr', gfinstances gId gr']
grammar2haskell opts name gr = encodeUTF8 $ foldr (++++) [] $
pragmas ++ haskPreamble name ++ [types, gfinstances gId lexical gr']
where gr' = hSkeleton gr
gId = (prefix++)
grammar2haskellGADT :: String -- ^ Constructor prefix
-> String -- ^ Module name.
-> PGF
-> String
grammar2haskellGADT prefix name gr = encodeUTF8 $ foldr (++++) [] $
["{-# OPTIONS_GHC -fglasgow-exts #-}"] ++
haskPreamble name ++ [datatypesGADT gId gr', gfinstances gId gr']
where gr' = hSkeleton gr
gId = (prefix++)
gadt = haskellOption opts HaskellGADT
lexical cat = haskellOption opts HaskellLexical && isLexicalCat opts cat
gId | haskellOption opts HaskellNoPrefix = id
| otherwise = ("G"++)
pragmas | gadt = ["{-# OPTIONS_GHC -fglasgow-exts #-}"]
| otherwise = []
types | gadt = datatypesGADT gId lexical gr'
| otherwise = datatypes gId lexical gr'
haskPreamble name =
[
@@ -86,49 +84,62 @@ type OIdent = String
type HSkeleton = [(OIdent, [(OIdent, [OIdent])])]
datatypes, gfinstances :: Prefix -> (String,HSkeleton) -> String
datatypes gId = (foldr (+++++) "") . (filter (/="")) . (map (hDatatype gId)) . snd
gfinstances gId (m,g) = (foldr (+++++) "") $ (filter (/="")) $ (map (gfInstance gId m)) g
datatypes :: Prefix -> (OIdent -> Bool) -> (String,HSkeleton) -> String
datatypes gId lexical = (foldr (+++++) "") . (filter (/="")) . (map (hDatatype gId lexical)) . snd
hDatatype :: Prefix -> (OIdent, [(OIdent, [OIdent])]) -> String
gfInstance :: Prefix -> String -> (OIdent, [(OIdent, [OIdent])]) -> String
gfinstances :: Prefix -> (OIdent -> Bool) -> (String,HSkeleton) -> String
gfinstances gId lexical (m,g) = (foldr (+++++) "") $ (filter (/="")) $ (map (gfInstance gId lexical m)) g
hDatatype _ ("Cn",_) = "" ---
hDatatype _ (cat,[]) = ""
hDatatype gId (cat,rules) | isListCat (cat,rules) =
hDatatype :: Prefix -> (OIdent -> Bool) -> (OIdent, [(OIdent, [OIdent])]) -> String
hDatatype _ _ ("Cn",_) = "" ---
hDatatype _ _ (cat,[]) = ""
hDatatype gId _ (cat,rules) | isListCat (cat,rules) =
"newtype" +++ gId cat +++ "=" +++ gId cat +++ "[" ++ gId (elemCat cat) ++ "]"
+++ "deriving Show"
hDatatype gId (cat,rules) =
hDatatype gId lexical (cat,rules) =
"data" +++ gId cat +++ "=" ++
(if length rules == 1 then "" else "\n ") +++
foldr1 (\x y -> x ++ "\n |" +++ y)
[gId f +++ foldr (+++) "" (map (gId) xx) | (f,xx) <- rules] ++++
foldr1 (\x y -> x ++ "\n |" +++ y) constructors ++++
" deriving Show"
where
constructors = [gId f +++ foldr (+++) "" (map (gId) xx) | (f,xx) <- nonLexicalRules (lexical cat) rules]
++ if lexical cat then [lexicalConstructor cat +++ "String"] else []
nonLexicalRules :: Bool -> [(OIdent, [OIdent])] -> [(OIdent, [OIdent])]
nonLexicalRules False rules = rules
nonLexicalRules True rules = [r | r@(f,t) <- rules, not (null t)]
lexicalConstructor :: OIdent -> String
lexicalConstructor cat = "Lex" ++ cat
-- GADT version of data types
datatypesGADT :: Prefix -> (String,HSkeleton) -> String
datatypesGADT gId (_,skel) =
datatypesGADT :: Prefix -> (OIdent -> Bool) -> (String,HSkeleton) -> String
datatypesGADT gId lexical (_,skel) =
unlines (concatMap (hCatTypeGADT gId) skel)
+++++
"data Tree :: * -> * where" ++++ unlines (concatMap (map (" "++) . hDatatypeGADT gId) skel)
"data Tree :: * -> * where" ++++ unlines (concatMap (map (" "++) . hDatatypeGADT gId lexical) skel)
hCatTypeGADT :: Prefix -> (OIdent, [(OIdent, [OIdent])]) -> [String]
hCatTypeGADT gId (cat,rules)
= ["type"+++gId cat+++"="+++"Tree"+++gId cat++"_",
"data"+++gId cat++"_"]
hDatatypeGADT :: Prefix -> (OIdent, [(OIdent, [OIdent])]) -> [String]
hDatatypeGADT gId (cat, rules)
hDatatypeGADT :: Prefix -> (OIdent -> Bool) -> (OIdent, [(OIdent, [OIdent])]) -> [String]
hDatatypeGADT gId lexical (cat, rules)
| isListCat (cat,rules) = [gId cat+++"::"+++"["++gId (elemCat cat)++"]" +++ "->" +++ t]
| otherwise =
[ gId f +++ "::" +++ concatMap (\a -> gId a +++ "-> ") args ++ t | (f,args) <- rules ]
[ gId f +++ "::" +++ concatMap (\a -> gId a +++ "-> ") args ++ t
| (f,args) <- nonLexicalRules (lexical cat) rules ]
++ if lexical cat then [lexicalConstructor cat +++ ":: String ->"+++ t] else []
where t = "Tree" +++ gId cat ++ "_"
gfInstance gId m crs = hInstance gId m crs ++++ fInstance gId m crs
gfInstance :: Prefix -> (OIdent -> Bool) -> String -> (OIdent, [(OIdent, [OIdent])]) -> String
gfInstance gId lexical m crs = hInstance gId lexical m crs ++++ fInstance gId lexical m crs
----hInstance m ("Cn",_) = "" --- seems to belong to an old applic. AR 18/5/2004
hInstance _ m (cat,[]) = ""
hInstance gId m (cat,rules)
hInstance _ _ m (cat,[]) = ""
hInstance gId lexical m (cat,rules)
| isListCat (cat,rules) =
"instance Gf" +++ gId cat +++ "where" ++++
" gf (" ++ gId cat +++ "[" ++ concat (intersperse "," baseVars) ++ "])"
@@ -139,7 +150,8 @@ hInstance gId m (cat,rules)
-- ++++ " gf (" ++ gId cat +++ "xs) = error (\"Bad " ++ cat ++ " value: \" ++ show xs)"
| otherwise =
"instance Gf" +++ gId cat +++ "where\n" ++
unlines [mkInst f xx | (f,xx) <- rules]
unlines ([mkInst f xx | (f,xx) <- nonLexicalRules (lexical cat) rules]
++ if lexical cat then [" gf (" ++ lexicalConstructor cat +++ "x) = Fun (mkCId x) []"] else [])
where
ec = elemCat cat
baseVars = mkVars (baseSize (cat,rules))
@@ -152,11 +164,12 @@ hInstance gId m (cat,rules)
----fInstance m ("Cn",_) = "" ---
fInstance _ m (cat,[]) = ""
fInstance gId m (cat,rules) =
fInstance _ _ m (cat,[]) = ""
fInstance gId lexical m (cat,rules) =
" fg t =" ++++
" case t of" ++++
unlines [mkInst f xx | (f,xx) <- rules] ++++
unlines [mkInst f xx | (f,xx) <- nonLexicalRules (lexical cat) rules] ++++
(if lexical cat then " Fun i [] -> " ++ lexicalConstructor cat +++ "(prCId i)" else "") ++++
" _ -> error (\"no" +++ cat ++ " \" ++ show t)"
where
mkInst f xx =

View File

@@ -4,7 +4,7 @@ module GF.Infra.Option
Options, ModuleOptions,
Flags(..), ModuleFlags(..),
Mode(..), Phase(..), Verbosity(..), Encoding(..), OutputFormat(..),
SISRFormat(..), Optimization(..), CFGTransform(..),
SISRFormat(..), Optimization(..), CFGTransform(..), HaskellOption(..),
Dump(..), Printer(..), Recomp(..),
-- * Option parsing
parseOptions, parseModuleOptions,
@@ -17,7 +17,8 @@ module GF.Infra.Option
modifyFlags, modifyModuleFlags,
helpMessage,
-- * Checking specific options
flag, moduleFlag, cfgTransform,
flag, moduleFlag, cfgTransform, haskellOption,
isLexicalCat,
-- * Setting specific options
setOptimization, setCFGTransform,
-- * Convenience methods for checking options
@@ -84,7 +85,6 @@ data OutputFormat = FmtPGF
| FmtPGFPretty
| FmtJavaScript
| FmtHaskell
| FmtHaskell_GADT
| FmtProlog
| FmtProlog_Abs
| FmtBNF
@@ -123,6 +123,9 @@ data CFGTransform = CFGNoLR
| CFGRemoveCycles
deriving (Show,Eq,Ord)
data HaskellOption = HaskellNoPrefix | HaskellGADT | HaskellLexical
deriving (Show,Eq,Ord)
data Warning = WarnMissingLincat
deriving (Show,Eq,Ord)
@@ -166,7 +169,8 @@ data Flags = Flags {
optGFODir :: FilePath,
optOutputFormats :: [OutputFormat],
optSISR :: Maybe SISRFormat,
optHaskellPrefix :: String,
optHaskellOptions :: Set HaskellOption,
optLexicalCats :: Set String,
optOutputFile :: Maybe FilePath,
optOutputDir :: Maybe FilePath,
optRecomp :: Recomp,
@@ -313,7 +317,8 @@ defaultFlags = Flags {
optGFODir = ".",
optOutputFormats = [FmtPGF],
optSISR = Nothing,
optHaskellPrefix = "G",
optHaskellOptions = Set.empty,
optLexicalCats = Set.empty,
optOutputFile = Nothing,
optOutputDir = Nothing,
optRecomp = RecompIfNewer,
@@ -431,8 +436,11 @@ optDescr =
Option [] ["sisr"] (ReqArg sisrFmt "FMT")
(unlines ["Include SISR tags in generated speech recognition grammars.",
"FMT can be one of: old, 1.0"]),
Option [] ["haskell-prefix"] (ReqArg hsPrefix "PREFIX")
"Constructor prefix for generated Haskell code. Default: G",
Option [] ["haskell"] (ReqArg hsOption "OPTION")
("Turn on an optional feature when generating Haskell data types. OPTION = "
++ concat (intersperse " | " (map fst haskellOptionNames))),
Option [] ["lexical"] (ReqArg lexicalCat "CAT[,CAT[...]]")
"Treat CAT as a lexical category.",
Option ['o'] ["output-file"] (ReqArg outFile "FILE")
"Save output in FILE (default is out.X, where X depends on output format.",
Option ['D'] ["output-dir"] (ReqArg outDir "DIR")
@@ -464,7 +472,11 @@ optDescr =
"old" -> set $ \o -> o { optSISR = Just SISR_WD20030401 }
"1.0" -> set $ \o -> o { optSISR = Just SISR_1_0 }
_ -> fail $ "Unknown SISR format: " ++ show x
hsPrefix x = set $ \o -> o { optHaskellPrefix = x }
hsOption x = case lookup x haskellOptionNames of
Just p -> set $ \o -> o { optHaskellOptions = Set.insert p (optHaskellOptions o) }
Nothing -> fail $ "Unknown Haskell option: " ++ x
++ " Known: " ++ show (map fst haskellOptionNames)
lexicalCat x = set $ \o -> o { optLexicalCats = foldr Set.insert (optLexicalCats o) (splitBy (==',') x) }
outFile x = set $ \o -> o { optOutputFile = Just x }
outDir x = set $ \o -> o { optOutputDir = Just x }
recomp x = set $ \o -> o { optRecomp = x }
@@ -479,7 +491,6 @@ outputFormats =
("pgf-pretty", FmtPGFPretty),
("js", FmtJavaScript),
("haskell", FmtHaskell),
("haskell_gadt", FmtHaskell_GADT),
("prolog", FmtProlog),
("prolog_abs", FmtProlog_Abs),
("bnf", FmtBNF),
@@ -523,6 +534,12 @@ cfgTransformNames =
("merge", CFGMergeIdentical),
("removecycles", CFGRemoveCycles)]
haskellOptionNames :: [(String, HaskellOption)]
haskellOptionNames =
[("noprefix", HaskellNoPrefix),
("gadt", HaskellGADT),
("lexical", HaskellLexical)]
encodings :: [(String,Encoding)]
encodings =
[("utf8", UTF_8),
@@ -573,6 +590,12 @@ dump opts d = moduleFlag ((d `elem`) . optDump) opts
cfgTransform :: Options -> CFGTransform -> Bool
cfgTransform opts t = Set.member t (moduleFlag optCFGTransforms opts)
haskellOption :: Options -> HaskellOption -> Bool
haskellOption opts o = Set.member o (flag optHaskellOptions opts)
isLexicalCat :: Options -> String -> Bool
isLexicalCat opts c = Set.member c (flag optLexicalCats opts)
--
-- * Convenience functions for setting options
--
@@ -609,6 +632,11 @@ toEnumBounded i = let mi = minBound
then Just (toEnum i `asTypeOf` mi)
else Nothing
splitBy :: (a -> Bool) -> [a] -> [[a]]
splitBy _ [] = []
splitBy p s = case break p s of
(l, _ : t@(_ : _)) -> l : splitBy p t
(l, _) -> [l]
instance Functor OptDescr where
fmap f (Option cs ss d s) = Option cs ss (fmap f d) s