From 88798b2a23c90ce92ec6062ec1ca43f45ee8fe18 Mon Sep 17 00:00:00 2001 From: bjorn Date: Fri, 10 Oct 2008 11:55:12 +0000 Subject: [PATCH] Added option to treat some categories as lexical when generating Haskell data types. --- src/GF/Compile/Export.hs | 4 +- src/GF/Compile/GFCCtoHaskell.hs | 91 +++++++++++++++++++-------------- src/GF/Infra/Option.hs | 46 +++++++++++++---- 3 files changed, 90 insertions(+), 51 deletions(-) diff --git a/src/GF/Compile/Export.hs b/src/GF/Compile/Export.hs index 8fb4cbed8..f4e5b2884 100644 --- a/src/GF/Compile/Export.hs +++ b/src/GF/Compile/Export.hs @@ -35,8 +35,7 @@ exportPGF opts fmt pgf = FmtPGF -> multi "pgf" printPGF FmtPGFPretty -> multi "txt" prPGFPretty FmtJavaScript -> multi "js" pgf2js - FmtHaskell -> multi "hs" (grammar2haskell hsPrefix name) - FmtHaskell_GADT -> multi "hs" (grammar2haskellGADT hsPrefix name) + FmtHaskell -> multi "hs" (grammar2haskell opts name) FmtProlog -> multi "pl" grammar2prolog FmtProlog_Abs -> multi "pl" grammar2prolog_abs FmtBNF -> single "bnf" bnfPrinter @@ -54,7 +53,6 @@ exportPGF opts fmt pgf = FmtFA -> single "dot" slfGraphvizPrinter where name = fromMaybe (prCId (absname pgf)) (moduleFlag optName opts) - hsPrefix = flag optHaskellPrefix opts multi :: String -> (PGF -> String) -> [(FilePath,String)] multi ext pr = [(name <.> ext, pr pgf)] diff --git a/src/GF/Compile/GFCCtoHaskell.hs b/src/GF/Compile/GFCCtoHaskell.hs index 3fc75df74..a8fd321b0 100644 --- a/src/GF/Compile/GFCCtoHaskell.hs +++ b/src/GF/Compile/GFCCtoHaskell.hs @@ -14,13 +14,14 @@ -- AR 11/11/1999 -- 7/12/2000 -- 18/5/2004 ----------------------------------------------------------------------------- -module GF.Compile.GFCCtoHaskell (grammar2haskell, grammar2haskellGADT) where +module GF.Compile.GFCCtoHaskell (grammar2haskell) where import PGF.CId import PGF.Data import PGF.Macros import GF.Data.Operations +import GF.Infra.Option import GF.Text.UTF8 import Data.List --(isPrefixOf, find, intersperse) @@ -29,24 +30,21 @@ import qualified Data.Map as Map type Prefix = String -> String -- | the main function -grammar2haskell :: String -- ^ Constructor prefix +grammar2haskell :: Options -> String -- ^ Module name. -> PGF -> String -grammar2haskell prefix name gr = encodeUTF8 $ foldr (++++) [] $ - haskPreamble name ++ [datatypes gId gr', gfinstances gId gr'] +grammar2haskell opts name gr = encodeUTF8 $ foldr (++++) [] $ + pragmas ++ haskPreamble name ++ [types, gfinstances gId lexical gr'] where gr' = hSkeleton gr - gId = (prefix++) - -grammar2haskellGADT :: String -- ^ Constructor prefix - -> String -- ^ Module name. - -> PGF - -> String -grammar2haskellGADT prefix name gr = encodeUTF8 $ foldr (++++) [] $ - ["{-# OPTIONS_GHC -fglasgow-exts #-}"] ++ - haskPreamble name ++ [datatypesGADT gId gr', gfinstances gId gr'] - where gr' = hSkeleton gr - gId = (prefix++) + gadt = haskellOption opts HaskellGADT + lexical cat = haskellOption opts HaskellLexical && isLexicalCat opts cat + gId | haskellOption opts HaskellNoPrefix = id + | otherwise = ("G"++) + pragmas | gadt = ["{-# OPTIONS_GHC -fglasgow-exts #-}"] + | otherwise = [] + types | gadt = datatypesGADT gId lexical gr' + | otherwise = datatypes gId lexical gr' haskPreamble name = [ @@ -86,49 +84,62 @@ type OIdent = String type HSkeleton = [(OIdent, [(OIdent, [OIdent])])] -datatypes, gfinstances :: Prefix -> (String,HSkeleton) -> String -datatypes gId = (foldr (+++++) "") . (filter (/="")) . (map (hDatatype gId)) . snd -gfinstances gId (m,g) = (foldr (+++++) "") $ (filter (/="")) $ (map (gfInstance gId m)) g +datatypes :: Prefix -> (OIdent -> Bool) -> (String,HSkeleton) -> String +datatypes gId lexical = (foldr (+++++) "") . (filter (/="")) . (map (hDatatype gId lexical)) . snd -hDatatype :: Prefix -> (OIdent, [(OIdent, [OIdent])]) -> String -gfInstance :: Prefix -> String -> (OIdent, [(OIdent, [OIdent])]) -> String +gfinstances :: Prefix -> (OIdent -> Bool) -> (String,HSkeleton) -> String +gfinstances gId lexical (m,g) = (foldr (+++++) "") $ (filter (/="")) $ (map (gfInstance gId lexical m)) g -hDatatype _ ("Cn",_) = "" --- -hDatatype _ (cat,[]) = "" -hDatatype gId (cat,rules) | isListCat (cat,rules) = + +hDatatype :: Prefix -> (OIdent -> Bool) -> (OIdent, [(OIdent, [OIdent])]) -> String +hDatatype _ _ ("Cn",_) = "" --- +hDatatype _ _ (cat,[]) = "" +hDatatype gId _ (cat,rules) | isListCat (cat,rules) = "newtype" +++ gId cat +++ "=" +++ gId cat +++ "[" ++ gId (elemCat cat) ++ "]" +++ "deriving Show" -hDatatype gId (cat,rules) = +hDatatype gId lexical (cat,rules) = "data" +++ gId cat +++ "=" ++ (if length rules == 1 then "" else "\n ") +++ - foldr1 (\x y -> x ++ "\n |" +++ y) - [gId f +++ foldr (+++) "" (map (gId) xx) | (f,xx) <- rules] ++++ + foldr1 (\x y -> x ++ "\n |" +++ y) constructors ++++ " deriving Show" + where + constructors = [gId f +++ foldr (+++) "" (map (gId) xx) | (f,xx) <- nonLexicalRules (lexical cat) rules] + ++ if lexical cat then [lexicalConstructor cat +++ "String"] else [] + +nonLexicalRules :: Bool -> [(OIdent, [OIdent])] -> [(OIdent, [OIdent])] +nonLexicalRules False rules = rules +nonLexicalRules True rules = [r | r@(f,t) <- rules, not (null t)] + +lexicalConstructor :: OIdent -> String +lexicalConstructor cat = "Lex" ++ cat -- GADT version of data types -datatypesGADT :: Prefix -> (String,HSkeleton) -> String -datatypesGADT gId (_,skel) = +datatypesGADT :: Prefix -> (OIdent -> Bool) -> (String,HSkeleton) -> String +datatypesGADT gId lexical (_,skel) = unlines (concatMap (hCatTypeGADT gId) skel) +++++ - "data Tree :: * -> * where" ++++ unlines (concatMap (map (" "++) . hDatatypeGADT gId) skel) + "data Tree :: * -> * where" ++++ unlines (concatMap (map (" "++) . hDatatypeGADT gId lexical) skel) hCatTypeGADT :: Prefix -> (OIdent, [(OIdent, [OIdent])]) -> [String] hCatTypeGADT gId (cat,rules) = ["type"+++gId cat+++"="+++"Tree"+++gId cat++"_", "data"+++gId cat++"_"] -hDatatypeGADT :: Prefix -> (OIdent, [(OIdent, [OIdent])]) -> [String] -hDatatypeGADT gId (cat, rules) +hDatatypeGADT :: Prefix -> (OIdent -> Bool) -> (OIdent, [(OIdent, [OIdent])]) -> [String] +hDatatypeGADT gId lexical (cat, rules) | isListCat (cat,rules) = [gId cat+++"::"+++"["++gId (elemCat cat)++"]" +++ "->" +++ t] | otherwise = - [ gId f +++ "::" +++ concatMap (\a -> gId a +++ "-> ") args ++ t | (f,args) <- rules ] + [ gId f +++ "::" +++ concatMap (\a -> gId a +++ "-> ") args ++ t + | (f,args) <- nonLexicalRules (lexical cat) rules ] + ++ if lexical cat then [lexicalConstructor cat +++ ":: String ->"+++ t] else [] where t = "Tree" +++ gId cat ++ "_" -gfInstance gId m crs = hInstance gId m crs ++++ fInstance gId m crs +gfInstance :: Prefix -> (OIdent -> Bool) -> String -> (OIdent, [(OIdent, [OIdent])]) -> String +gfInstance gId lexical m crs = hInstance gId lexical m crs ++++ fInstance gId lexical m crs ----hInstance m ("Cn",_) = "" --- seems to belong to an old applic. AR 18/5/2004 -hInstance _ m (cat,[]) = "" -hInstance gId m (cat,rules) +hInstance _ _ m (cat,[]) = "" +hInstance gId lexical m (cat,rules) | isListCat (cat,rules) = "instance Gf" +++ gId cat +++ "where" ++++ " gf (" ++ gId cat +++ "[" ++ concat (intersperse "," baseVars) ++ "])" @@ -139,7 +150,8 @@ hInstance gId m (cat,rules) -- ++++ " gf (" ++ gId cat +++ "xs) = error (\"Bad " ++ cat ++ " value: \" ++ show xs)" | otherwise = "instance Gf" +++ gId cat +++ "where\n" ++ - unlines [mkInst f xx | (f,xx) <- rules] + unlines ([mkInst f xx | (f,xx) <- nonLexicalRules (lexical cat) rules] + ++ if lexical cat then [" gf (" ++ lexicalConstructor cat +++ "x) = Fun (mkCId x) []"] else []) where ec = elemCat cat baseVars = mkVars (baseSize (cat,rules)) @@ -152,11 +164,12 @@ hInstance gId m (cat,rules) ----fInstance m ("Cn",_) = "" --- -fInstance _ m (cat,[]) = "" -fInstance gId m (cat,rules) = +fInstance _ _ m (cat,[]) = "" +fInstance gId lexical m (cat,rules) = " fg t =" ++++ " case t of" ++++ - unlines [mkInst f xx | (f,xx) <- rules] ++++ + unlines [mkInst f xx | (f,xx) <- nonLexicalRules (lexical cat) rules] ++++ + (if lexical cat then " Fun i [] -> " ++ lexicalConstructor cat +++ "(prCId i)" else "") ++++ " _ -> error (\"no" +++ cat ++ " \" ++ show t)" where mkInst f xx = diff --git a/src/GF/Infra/Option.hs b/src/GF/Infra/Option.hs index 10b5dcd21..48352fc91 100644 --- a/src/GF/Infra/Option.hs +++ b/src/GF/Infra/Option.hs @@ -4,7 +4,7 @@ module GF.Infra.Option Options, ModuleOptions, Flags(..), ModuleFlags(..), Mode(..), Phase(..), Verbosity(..), Encoding(..), OutputFormat(..), - SISRFormat(..), Optimization(..), CFGTransform(..), + SISRFormat(..), Optimization(..), CFGTransform(..), HaskellOption(..), Dump(..), Printer(..), Recomp(..), -- * Option parsing parseOptions, parseModuleOptions, @@ -17,7 +17,8 @@ module GF.Infra.Option modifyFlags, modifyModuleFlags, helpMessage, -- * Checking specific options - flag, moduleFlag, cfgTransform, + flag, moduleFlag, cfgTransform, haskellOption, + isLexicalCat, -- * Setting specific options setOptimization, setCFGTransform, -- * Convenience methods for checking options @@ -84,7 +85,6 @@ data OutputFormat = FmtPGF | FmtPGFPretty | FmtJavaScript | FmtHaskell - | FmtHaskell_GADT | FmtProlog | FmtProlog_Abs | FmtBNF @@ -123,6 +123,9 @@ data CFGTransform = CFGNoLR | CFGRemoveCycles deriving (Show,Eq,Ord) +data HaskellOption = HaskellNoPrefix | HaskellGADT | HaskellLexical + deriving (Show,Eq,Ord) + data Warning = WarnMissingLincat deriving (Show,Eq,Ord) @@ -166,7 +169,8 @@ data Flags = Flags { optGFODir :: FilePath, optOutputFormats :: [OutputFormat], optSISR :: Maybe SISRFormat, - optHaskellPrefix :: String, + optHaskellOptions :: Set HaskellOption, + optLexicalCats :: Set String, optOutputFile :: Maybe FilePath, optOutputDir :: Maybe FilePath, optRecomp :: Recomp, @@ -313,7 +317,8 @@ defaultFlags = Flags { optGFODir = ".", optOutputFormats = [FmtPGF], optSISR = Nothing, - optHaskellPrefix = "G", + optHaskellOptions = Set.empty, + optLexicalCats = Set.empty, optOutputFile = Nothing, optOutputDir = Nothing, optRecomp = RecompIfNewer, @@ -431,8 +436,11 @@ optDescr = Option [] ["sisr"] (ReqArg sisrFmt "FMT") (unlines ["Include SISR tags in generated speech recognition grammars.", "FMT can be one of: old, 1.0"]), - Option [] ["haskell-prefix"] (ReqArg hsPrefix "PREFIX") - "Constructor prefix for generated Haskell code. Default: G", + Option [] ["haskell"] (ReqArg hsOption "OPTION") + ("Turn on an optional feature when generating Haskell data types. OPTION = " + ++ concat (intersperse " | " (map fst haskellOptionNames))), + Option [] ["lexical"] (ReqArg lexicalCat "CAT[,CAT[...]]") + "Treat CAT as a lexical category.", Option ['o'] ["output-file"] (ReqArg outFile "FILE") "Save output in FILE (default is out.X, where X depends on output format.", Option ['D'] ["output-dir"] (ReqArg outDir "DIR") @@ -464,7 +472,11 @@ optDescr = "old" -> set $ \o -> o { optSISR = Just SISR_WD20030401 } "1.0" -> set $ \o -> o { optSISR = Just SISR_1_0 } _ -> fail $ "Unknown SISR format: " ++ show x - hsPrefix x = set $ \o -> o { optHaskellPrefix = x } + hsOption x = case lookup x haskellOptionNames of + Just p -> set $ \o -> o { optHaskellOptions = Set.insert p (optHaskellOptions o) } + Nothing -> fail $ "Unknown Haskell option: " ++ x + ++ " Known: " ++ show (map fst haskellOptionNames) + lexicalCat x = set $ \o -> o { optLexicalCats = foldr Set.insert (optLexicalCats o) (splitBy (==',') x) } outFile x = set $ \o -> o { optOutputFile = Just x } outDir x = set $ \o -> o { optOutputDir = Just x } recomp x = set $ \o -> o { optRecomp = x } @@ -479,7 +491,6 @@ outputFormats = ("pgf-pretty", FmtPGFPretty), ("js", FmtJavaScript), ("haskell", FmtHaskell), - ("haskell_gadt", FmtHaskell_GADT), ("prolog", FmtProlog), ("prolog_abs", FmtProlog_Abs), ("bnf", FmtBNF), @@ -523,6 +534,12 @@ cfgTransformNames = ("merge", CFGMergeIdentical), ("removecycles", CFGRemoveCycles)] +haskellOptionNames :: [(String, HaskellOption)] +haskellOptionNames = + [("noprefix", HaskellNoPrefix), + ("gadt", HaskellGADT), + ("lexical", HaskellLexical)] + encodings :: [(String,Encoding)] encodings = [("utf8", UTF_8), @@ -573,6 +590,12 @@ dump opts d = moduleFlag ((d `elem`) . optDump) opts cfgTransform :: Options -> CFGTransform -> Bool cfgTransform opts t = Set.member t (moduleFlag optCFGTransforms opts) +haskellOption :: Options -> HaskellOption -> Bool +haskellOption opts o = Set.member o (flag optHaskellOptions opts) + +isLexicalCat :: Options -> String -> Bool +isLexicalCat opts c = Set.member c (flag optLexicalCats opts) + -- -- * Convenience functions for setting options -- @@ -609,6 +632,11 @@ toEnumBounded i = let mi = minBound then Just (toEnum i `asTypeOf` mi) else Nothing +splitBy :: (a -> Bool) -> [a] -> [[a]] +splitBy _ [] = [] +splitBy p s = case break p s of + (l, _ : t@(_ : _)) -> l : splitBy p t + (l, _) -> [l] instance Functor OptDescr where fmap f (Option cs ss d s) = Option cs ss (fmap f d) s