1
0
forked from GitHub/gf-core

Change how GF deals with character encodings in grammar files

1. The default encoding is changed from Latin-1 to UTF-8.

2. Alternate encodings should be specified as "--# -coding=enc", the old
   "flags coding=enc" declarations have no effect but are still checked for
   consistency.

3. A transitional warning is generated for files that contain non-ASCII
   characters without specifying a character encoding:

	"Warning: default encoding has changed from Latin-1 to UTF-8"

4. Conversion to Unicode is now done *before* lexing. This makes it possible
   to allow arbitrary Unicode characters in identifiers. But identifiers are
   still stored as ByteStrings, so they are limited to Latin-1 characters
   for now.

5. Lexer.hs is no longer part of the repository. We now generate the lexer
   from Lexer.x with alex>=3. Some workarounds for bugs in alex-3.0 were
   needed. These bugs might already be fixed in newer versions of alex, but
   we should be compatible with what is shipped in the Haskell Platform.
This commit is contained in:
hallgren
2013-11-25 21:12:11 +00:00
parent 3210a50648
commit 9d7fdf7c9a
9 changed files with 157 additions and 559 deletions

View File

@@ -20,7 +20,7 @@ module GF.Infra.Option
helpMessage,
-- * Checking specific options
flag, cfgTransform, haskellOption, readOutputFormat,
isLexicalCat, isLiteralCat, renameEncoding,
isLexicalCat, isLiteralCat, renameEncoding, getEncoding, defaultEncoding,
-- * Setting specific options
setOptimization, setCFGTransform,
-- * Convenience methods for checking options
@@ -157,7 +157,7 @@ data Flags = Flags {
optRetainResource :: Bool,
optName :: Maybe String,
optPreprocessors :: [String],
optEncoding :: String,
optEncoding :: Maybe String,
optPMCFG :: Bool,
optOptimizations :: Set Optimization,
optOptimizePGF :: Bool,
@@ -213,7 +213,7 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o)
-- | Pretty-print the options that are preserved in .gfo files.
optionsGFO :: Options -> [(String,Literal)]
optionsGFO opts = optionsPGF opts
++ [("coding", LStr (flag optEncoding opts))]
++ [("coding", LStr (getEncoding opts))]
-- | Pretty-print the options that are preserved in .pgf files.
optionsPGF :: Options -> [(String,Literal)]
@@ -241,6 +241,10 @@ concatOptions = foldr addOptions noOptions
modifyFlags :: (Flags -> Flags) -> Options
modifyFlags = Options
getEncoding :: Options -> String
getEncoding = renameEncoding . maybe defaultEncoding id . flag optEncoding
defaultEncoding = "UTF-8"
-- Default options
defaultFlags :: Flags
@@ -264,7 +268,7 @@ defaultFlags = Flags {
optName = Nothing,
optPreprocessors = [],
optEncoding = "latin1",
optEncoding = Nothing,
optPMCFG = True,
optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize],
optOptimizePGF = False,
@@ -419,7 +423,7 @@ optDescr =
addLibDir x = set $ \o -> o { optLibraryPath = x:optLibraryPath o }
setLibPath x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x }
preproc x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] }
coding x = set $ \o -> o { optEncoding = x }
coding x = set $ \o -> o { optEncoding = Just x }
startcat x = set $ \o -> o { optStartCat = Just x }
language x = set $ \o -> o { optSpeechLanguage = Just x }
lexer x = set $ \o -> o { optLexer = Just x }