Added option to treat some categories as lexical when generating Haskell data types.

This commit is contained in:
bjorn
2008-10-10 11:55:12 +00:00
parent 18aa48941c
commit 88798b2a23
3 changed files with 90 additions and 51 deletions

View File

@@ -35,8 +35,7 @@ exportPGF opts fmt pgf =
FmtPGF -> multi "pgf" printPGF FmtPGF -> multi "pgf" printPGF
FmtPGFPretty -> multi "txt" prPGFPretty FmtPGFPretty -> multi "txt" prPGFPretty
FmtJavaScript -> multi "js" pgf2js FmtJavaScript -> multi "js" pgf2js
FmtHaskell -> multi "hs" (grammar2haskell hsPrefix name) FmtHaskell -> multi "hs" (grammar2haskell opts name)
FmtHaskell_GADT -> multi "hs" (grammar2haskellGADT hsPrefix name)
FmtProlog -> multi "pl" grammar2prolog FmtProlog -> multi "pl" grammar2prolog
FmtProlog_Abs -> multi "pl" grammar2prolog_abs FmtProlog_Abs -> multi "pl" grammar2prolog_abs
FmtBNF -> single "bnf" bnfPrinter FmtBNF -> single "bnf" bnfPrinter
@@ -54,7 +53,6 @@ exportPGF opts fmt pgf =
FmtFA -> single "dot" slfGraphvizPrinter FmtFA -> single "dot" slfGraphvizPrinter
where where
name = fromMaybe (prCId (absname pgf)) (moduleFlag optName opts) name = fromMaybe (prCId (absname pgf)) (moduleFlag optName opts)
hsPrefix = flag optHaskellPrefix opts
multi :: String -> (PGF -> String) -> [(FilePath,String)] multi :: String -> (PGF -> String) -> [(FilePath,String)]
multi ext pr = [(name <.> ext, pr pgf)] multi ext pr = [(name <.> ext, pr pgf)]

View File

@@ -14,13 +14,14 @@
-- AR 11/11/1999 -- 7/12/2000 -- 18/5/2004 -- AR 11/11/1999 -- 7/12/2000 -- 18/5/2004
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
module GF.Compile.GFCCtoHaskell (grammar2haskell, grammar2haskellGADT) where module GF.Compile.GFCCtoHaskell (grammar2haskell) where
import PGF.CId import PGF.CId
import PGF.Data import PGF.Data
import PGF.Macros import PGF.Macros
import GF.Data.Operations import GF.Data.Operations
import GF.Infra.Option
import GF.Text.UTF8 import GF.Text.UTF8
import Data.List --(isPrefixOf, find, intersperse) import Data.List --(isPrefixOf, find, intersperse)
@@ -29,24 +30,21 @@ import qualified Data.Map as Map
type Prefix = String -> String type Prefix = String -> String
-- | the main function -- | the main function
grammar2haskell :: String -- ^ Constructor prefix grammar2haskell :: Options
-> String -- ^ Module name. -> String -- ^ Module name.
-> PGF -> PGF
-> String -> String
grammar2haskell prefix name gr = encodeUTF8 $ foldr (++++) [] $ grammar2haskell opts name gr = encodeUTF8 $ foldr (++++) [] $
haskPreamble name ++ [datatypes gId gr', gfinstances gId gr'] pragmas ++ haskPreamble name ++ [types, gfinstances gId lexical gr']
where gr' = hSkeleton gr where gr' = hSkeleton gr
gId = (prefix++) gadt = haskellOption opts HaskellGADT
lexical cat = haskellOption opts HaskellLexical && isLexicalCat opts cat
grammar2haskellGADT :: String -- ^ Constructor prefix gId | haskellOption opts HaskellNoPrefix = id
-> String -- ^ Module name. | otherwise = ("G"++)
-> PGF pragmas | gadt = ["{-# OPTIONS_GHC -fglasgow-exts #-}"]
-> String | otherwise = []
grammar2haskellGADT prefix name gr = encodeUTF8 $ foldr (++++) [] $ types | gadt = datatypesGADT gId lexical gr'
["{-# OPTIONS_GHC -fglasgow-exts #-}"] ++ | otherwise = datatypes gId lexical gr'
haskPreamble name ++ [datatypesGADT gId gr', gfinstances gId gr']
where gr' = hSkeleton gr
gId = (prefix++)
haskPreamble name = haskPreamble name =
[ [
@@ -86,49 +84,62 @@ type OIdent = String
type HSkeleton = [(OIdent, [(OIdent, [OIdent])])] type HSkeleton = [(OIdent, [(OIdent, [OIdent])])]
datatypes, gfinstances :: Prefix -> (String,HSkeleton) -> String datatypes :: Prefix -> (OIdent -> Bool) -> (String,HSkeleton) -> String
datatypes gId = (foldr (+++++) "") . (filter (/="")) . (map (hDatatype gId)) . snd datatypes gId lexical = (foldr (+++++) "") . (filter (/="")) . (map (hDatatype gId lexical)) . snd
gfinstances gId (m,g) = (foldr (+++++) "") $ (filter (/="")) $ (map (gfInstance gId m)) g
hDatatype :: Prefix -> (OIdent, [(OIdent, [OIdent])]) -> String gfinstances :: Prefix -> (OIdent -> Bool) -> (String,HSkeleton) -> String
gfInstance :: Prefix -> String -> (OIdent, [(OIdent, [OIdent])]) -> String gfinstances gId lexical (m,g) = (foldr (+++++) "") $ (filter (/="")) $ (map (gfInstance gId lexical m)) g
hDatatype _ ("Cn",_) = "" ---
hDatatype _ (cat,[]) = "" hDatatype :: Prefix -> (OIdent -> Bool) -> (OIdent, [(OIdent, [OIdent])]) -> String
hDatatype gId (cat,rules) | isListCat (cat,rules) = hDatatype _ _ ("Cn",_) = "" ---
hDatatype _ _ (cat,[]) = ""
hDatatype gId _ (cat,rules) | isListCat (cat,rules) =
"newtype" +++ gId cat +++ "=" +++ gId cat +++ "[" ++ gId (elemCat cat) ++ "]" "newtype" +++ gId cat +++ "=" +++ gId cat +++ "[" ++ gId (elemCat cat) ++ "]"
+++ "deriving Show" +++ "deriving Show"
hDatatype gId (cat,rules) = hDatatype gId lexical (cat,rules) =
"data" +++ gId cat +++ "=" ++ "data" +++ gId cat +++ "=" ++
(if length rules == 1 then "" else "\n ") +++ (if length rules == 1 then "" else "\n ") +++
foldr1 (\x y -> x ++ "\n |" +++ y) foldr1 (\x y -> x ++ "\n |" +++ y) constructors ++++
[gId f +++ foldr (+++) "" (map (gId) xx) | (f,xx) <- rules] ++++
" deriving Show" " deriving Show"
where
constructors = [gId f +++ foldr (+++) "" (map (gId) xx) | (f,xx) <- nonLexicalRules (lexical cat) rules]
++ if lexical cat then [lexicalConstructor cat +++ "String"] else []
nonLexicalRules :: Bool -> [(OIdent, [OIdent])] -> [(OIdent, [OIdent])]
nonLexicalRules False rules = rules
nonLexicalRules True rules = [r | r@(f,t) <- rules, not (null t)]
lexicalConstructor :: OIdent -> String
lexicalConstructor cat = "Lex" ++ cat
-- GADT version of data types -- GADT version of data types
datatypesGADT :: Prefix -> (String,HSkeleton) -> String datatypesGADT :: Prefix -> (OIdent -> Bool) -> (String,HSkeleton) -> String
datatypesGADT gId (_,skel) = datatypesGADT gId lexical (_,skel) =
unlines (concatMap (hCatTypeGADT gId) skel) unlines (concatMap (hCatTypeGADT gId) skel)
+++++ +++++
"data Tree :: * -> * where" ++++ unlines (concatMap (map (" "++) . hDatatypeGADT gId) skel) "data Tree :: * -> * where" ++++ unlines (concatMap (map (" "++) . hDatatypeGADT gId lexical) skel)
hCatTypeGADT :: Prefix -> (OIdent, [(OIdent, [OIdent])]) -> [String] hCatTypeGADT :: Prefix -> (OIdent, [(OIdent, [OIdent])]) -> [String]
hCatTypeGADT gId (cat,rules) hCatTypeGADT gId (cat,rules)
= ["type"+++gId cat+++"="+++"Tree"+++gId cat++"_", = ["type"+++gId cat+++"="+++"Tree"+++gId cat++"_",
"data"+++gId cat++"_"] "data"+++gId cat++"_"]
hDatatypeGADT :: Prefix -> (OIdent, [(OIdent, [OIdent])]) -> [String] hDatatypeGADT :: Prefix -> (OIdent -> Bool) -> (OIdent, [(OIdent, [OIdent])]) -> [String]
hDatatypeGADT gId (cat, rules) hDatatypeGADT gId lexical (cat, rules)
| isListCat (cat,rules) = [gId cat+++"::"+++"["++gId (elemCat cat)++"]" +++ "->" +++ t] | isListCat (cat,rules) = [gId cat+++"::"+++"["++gId (elemCat cat)++"]" +++ "->" +++ t]
| otherwise = | otherwise =
[ gId f +++ "::" +++ concatMap (\a -> gId a +++ "-> ") args ++ t | (f,args) <- rules ] [ gId f +++ "::" +++ concatMap (\a -> gId a +++ "-> ") args ++ t
| (f,args) <- nonLexicalRules (lexical cat) rules ]
++ if lexical cat then [lexicalConstructor cat +++ ":: String ->"+++ t] else []
where t = "Tree" +++ gId cat ++ "_" where t = "Tree" +++ gId cat ++ "_"
gfInstance gId m crs = hInstance gId m crs ++++ fInstance gId m crs gfInstance :: Prefix -> (OIdent -> Bool) -> String -> (OIdent, [(OIdent, [OIdent])]) -> String
gfInstance gId lexical m crs = hInstance gId lexical m crs ++++ fInstance gId lexical m crs
----hInstance m ("Cn",_) = "" --- seems to belong to an old applic. AR 18/5/2004 ----hInstance m ("Cn",_) = "" --- seems to belong to an old applic. AR 18/5/2004
hInstance _ m (cat,[]) = "" hInstance _ _ m (cat,[]) = ""
hInstance gId m (cat,rules) hInstance gId lexical m (cat,rules)
| isListCat (cat,rules) = | isListCat (cat,rules) =
"instance Gf" +++ gId cat +++ "where" ++++ "instance Gf" +++ gId cat +++ "where" ++++
" gf (" ++ gId cat +++ "[" ++ concat (intersperse "," baseVars) ++ "])" " gf (" ++ gId cat +++ "[" ++ concat (intersperse "," baseVars) ++ "])"
@@ -139,7 +150,8 @@ hInstance gId m (cat,rules)
-- ++++ " gf (" ++ gId cat +++ "xs) = error (\"Bad " ++ cat ++ " value: \" ++ show xs)" -- ++++ " gf (" ++ gId cat +++ "xs) = error (\"Bad " ++ cat ++ " value: \" ++ show xs)"
| otherwise = | otherwise =
"instance Gf" +++ gId cat +++ "where\n" ++ "instance Gf" +++ gId cat +++ "where\n" ++
unlines [mkInst f xx | (f,xx) <- rules] unlines ([mkInst f xx | (f,xx) <- nonLexicalRules (lexical cat) rules]
++ if lexical cat then [" gf (" ++ lexicalConstructor cat +++ "x) = Fun (mkCId x) []"] else [])
where where
ec = elemCat cat ec = elemCat cat
baseVars = mkVars (baseSize (cat,rules)) baseVars = mkVars (baseSize (cat,rules))
@@ -152,11 +164,12 @@ hInstance gId m (cat,rules)
----fInstance m ("Cn",_) = "" --- ----fInstance m ("Cn",_) = "" ---
fInstance _ m (cat,[]) = "" fInstance _ _ m (cat,[]) = ""
fInstance gId m (cat,rules) = fInstance gId lexical m (cat,rules) =
" fg t =" ++++ " fg t =" ++++
" case t of" ++++ " case t of" ++++
unlines [mkInst f xx | (f,xx) <- rules] ++++ unlines [mkInst f xx | (f,xx) <- nonLexicalRules (lexical cat) rules] ++++
(if lexical cat then " Fun i [] -> " ++ lexicalConstructor cat +++ "(prCId i)" else "") ++++
" _ -> error (\"no" +++ cat ++ " \" ++ show t)" " _ -> error (\"no" +++ cat ++ " \" ++ show t)"
where where
mkInst f xx = mkInst f xx =

View File

@@ -4,7 +4,7 @@ module GF.Infra.Option
Options, ModuleOptions, Options, ModuleOptions,
Flags(..), ModuleFlags(..), Flags(..), ModuleFlags(..),
Mode(..), Phase(..), Verbosity(..), Encoding(..), OutputFormat(..), Mode(..), Phase(..), Verbosity(..), Encoding(..), OutputFormat(..),
SISRFormat(..), Optimization(..), CFGTransform(..), SISRFormat(..), Optimization(..), CFGTransform(..), HaskellOption(..),
Dump(..), Printer(..), Recomp(..), Dump(..), Printer(..), Recomp(..),
-- * Option parsing -- * Option parsing
parseOptions, parseModuleOptions, parseOptions, parseModuleOptions,
@@ -17,7 +17,8 @@ module GF.Infra.Option
modifyFlags, modifyModuleFlags, modifyFlags, modifyModuleFlags,
helpMessage, helpMessage,
-- * Checking specific options -- * Checking specific options
flag, moduleFlag, cfgTransform, flag, moduleFlag, cfgTransform, haskellOption,
isLexicalCat,
-- * Setting specific options -- * Setting specific options
setOptimization, setCFGTransform, setOptimization, setCFGTransform,
-- * Convenience methods for checking options -- * Convenience methods for checking options
@@ -84,7 +85,6 @@ data OutputFormat = FmtPGF
| FmtPGFPretty | FmtPGFPretty
| FmtJavaScript | FmtJavaScript
| FmtHaskell | FmtHaskell
| FmtHaskell_GADT
| FmtProlog | FmtProlog
| FmtProlog_Abs | FmtProlog_Abs
| FmtBNF | FmtBNF
@@ -123,6 +123,9 @@ data CFGTransform = CFGNoLR
| CFGRemoveCycles | CFGRemoveCycles
deriving (Show,Eq,Ord) deriving (Show,Eq,Ord)
data HaskellOption = HaskellNoPrefix | HaskellGADT | HaskellLexical
deriving (Show,Eq,Ord)
data Warning = WarnMissingLincat data Warning = WarnMissingLincat
deriving (Show,Eq,Ord) deriving (Show,Eq,Ord)
@@ -166,7 +169,8 @@ data Flags = Flags {
optGFODir :: FilePath, optGFODir :: FilePath,
optOutputFormats :: [OutputFormat], optOutputFormats :: [OutputFormat],
optSISR :: Maybe SISRFormat, optSISR :: Maybe SISRFormat,
optHaskellPrefix :: String, optHaskellOptions :: Set HaskellOption,
optLexicalCats :: Set String,
optOutputFile :: Maybe FilePath, optOutputFile :: Maybe FilePath,
optOutputDir :: Maybe FilePath, optOutputDir :: Maybe FilePath,
optRecomp :: Recomp, optRecomp :: Recomp,
@@ -313,7 +317,8 @@ defaultFlags = Flags {
optGFODir = ".", optGFODir = ".",
optOutputFormats = [FmtPGF], optOutputFormats = [FmtPGF],
optSISR = Nothing, optSISR = Nothing,
optHaskellPrefix = "G", optHaskellOptions = Set.empty,
optLexicalCats = Set.empty,
optOutputFile = Nothing, optOutputFile = Nothing,
optOutputDir = Nothing, optOutputDir = Nothing,
optRecomp = RecompIfNewer, optRecomp = RecompIfNewer,
@@ -431,8 +436,11 @@ optDescr =
Option [] ["sisr"] (ReqArg sisrFmt "FMT") Option [] ["sisr"] (ReqArg sisrFmt "FMT")
(unlines ["Include SISR tags in generated speech recognition grammars.", (unlines ["Include SISR tags in generated speech recognition grammars.",
"FMT can be one of: old, 1.0"]), "FMT can be one of: old, 1.0"]),
Option [] ["haskell-prefix"] (ReqArg hsPrefix "PREFIX") Option [] ["haskell"] (ReqArg hsOption "OPTION")
"Constructor prefix for generated Haskell code. Default: G", ("Turn on an optional feature when generating Haskell data types. OPTION = "
++ concat (intersperse " | " (map fst haskellOptionNames))),
Option [] ["lexical"] (ReqArg lexicalCat "CAT[,CAT[...]]")
"Treat CAT as a lexical category.",
Option ['o'] ["output-file"] (ReqArg outFile "FILE") Option ['o'] ["output-file"] (ReqArg outFile "FILE")
"Save output in FILE (default is out.X, where X depends on output format.", "Save output in FILE (default is out.X, where X depends on output format.",
Option ['D'] ["output-dir"] (ReqArg outDir "DIR") Option ['D'] ["output-dir"] (ReqArg outDir "DIR")
@@ -464,7 +472,11 @@ optDescr =
"old" -> set $ \o -> o { optSISR = Just SISR_WD20030401 } "old" -> set $ \o -> o { optSISR = Just SISR_WD20030401 }
"1.0" -> set $ \o -> o { optSISR = Just SISR_1_0 } "1.0" -> set $ \o -> o { optSISR = Just SISR_1_0 }
_ -> fail $ "Unknown SISR format: " ++ show x _ -> fail $ "Unknown SISR format: " ++ show x
hsPrefix x = set $ \o -> o { optHaskellPrefix = x } hsOption x = case lookup x haskellOptionNames of
Just p -> set $ \o -> o { optHaskellOptions = Set.insert p (optHaskellOptions o) }
Nothing -> fail $ "Unknown Haskell option: " ++ x
++ " Known: " ++ show (map fst haskellOptionNames)
lexicalCat x = set $ \o -> o { optLexicalCats = foldr Set.insert (optLexicalCats o) (splitBy (==',') x) }
outFile x = set $ \o -> o { optOutputFile = Just x } outFile x = set $ \o -> o { optOutputFile = Just x }
outDir x = set $ \o -> o { optOutputDir = Just x } outDir x = set $ \o -> o { optOutputDir = Just x }
recomp x = set $ \o -> o { optRecomp = x } recomp x = set $ \o -> o { optRecomp = x }
@@ -479,7 +491,6 @@ outputFormats =
("pgf-pretty", FmtPGFPretty), ("pgf-pretty", FmtPGFPretty),
("js", FmtJavaScript), ("js", FmtJavaScript),
("haskell", FmtHaskell), ("haskell", FmtHaskell),
("haskell_gadt", FmtHaskell_GADT),
("prolog", FmtProlog), ("prolog", FmtProlog),
("prolog_abs", FmtProlog_Abs), ("prolog_abs", FmtProlog_Abs),
("bnf", FmtBNF), ("bnf", FmtBNF),
@@ -523,6 +534,12 @@ cfgTransformNames =
("merge", CFGMergeIdentical), ("merge", CFGMergeIdentical),
("removecycles", CFGRemoveCycles)] ("removecycles", CFGRemoveCycles)]
haskellOptionNames :: [(String, HaskellOption)]
haskellOptionNames =
[("noprefix", HaskellNoPrefix),
("gadt", HaskellGADT),
("lexical", HaskellLexical)]
encodings :: [(String,Encoding)] encodings :: [(String,Encoding)]
encodings = encodings =
[("utf8", UTF_8), [("utf8", UTF_8),
@@ -573,6 +590,12 @@ dump opts d = moduleFlag ((d `elem`) . optDump) opts
cfgTransform :: Options -> CFGTransform -> Bool cfgTransform :: Options -> CFGTransform -> Bool
cfgTransform opts t = Set.member t (moduleFlag optCFGTransforms opts) cfgTransform opts t = Set.member t (moduleFlag optCFGTransforms opts)
haskellOption :: Options -> HaskellOption -> Bool
haskellOption opts o = Set.member o (flag optHaskellOptions opts)
isLexicalCat :: Options -> String -> Bool
isLexicalCat opts c = Set.member c (flag optLexicalCats opts)
-- --
-- * Convenience functions for setting options -- * Convenience functions for setting options
-- --
@@ -609,6 +632,11 @@ toEnumBounded i = let mi = minBound
then Just (toEnum i `asTypeOf` mi) then Just (toEnum i `asTypeOf` mi)
else Nothing else Nothing
splitBy :: (a -> Bool) -> [a] -> [[a]]
splitBy _ [] = []
splitBy p s = case break p s of
(l, _ : t@(_ : _)) -> l : splitBy p t
(l, _) -> [l]
instance Functor OptDescr where instance Functor OptDescr where
fmap f (Option cs ss d s) = Option cs ss (fmap f d) s fmap f (Option cs ss d s) = Option cs ss (fmap f d) s