forked from GitHub/gf-core
Change how GF deals with character encodings in grammar files
1. The default encoding is changed from Latin-1 to UTF-8. 2. Alternate encodings should be specified as "--# -coding=enc", the old "flags coding=enc" declarations have no effect but are still checked for consistency. 3. A transitional warning is generated for files that contain non-ASCII characters without specifying a character encoding: "Warning: default encoding has changed from Latin-1 to UTF-8" 4. Conversion to Unicode is now done *before* lexing. This makes it possible to allow arbitrary Unicode characters in identifiers. But identifiers are still stored as ByteStrings, so they are limited to Latin-1 characters for now. 5. Lexer.hs is no longer part of the repository. We now generate the lexer from Lexer.x with alex>=3. Some workarounds for bugs in alex-3.0 were needed. These bugs might already be fixed in newer versions of alex, but we should be compatible with what is shipped in the Haskell Platform.
This commit is contained in:
@@ -20,7 +20,7 @@ module GF.Infra.Option
|
||||
helpMessage,
|
||||
-- * Checking specific options
|
||||
flag, cfgTransform, haskellOption, readOutputFormat,
|
||||
isLexicalCat, isLiteralCat, renameEncoding,
|
||||
isLexicalCat, isLiteralCat, renameEncoding, getEncoding, defaultEncoding,
|
||||
-- * Setting specific options
|
||||
setOptimization, setCFGTransform,
|
||||
-- * Convenience methods for checking options
|
||||
@@ -157,7 +157,7 @@ data Flags = Flags {
|
||||
optRetainResource :: Bool,
|
||||
optName :: Maybe String,
|
||||
optPreprocessors :: [String],
|
||||
optEncoding :: String,
|
||||
optEncoding :: Maybe String,
|
||||
optPMCFG :: Bool,
|
||||
optOptimizations :: Set Optimization,
|
||||
optOptimizePGF :: Bool,
|
||||
@@ -213,7 +213,7 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o)
|
||||
-- | Pretty-print the options that are preserved in .gfo files.
|
||||
optionsGFO :: Options -> [(String,Literal)]
|
||||
optionsGFO opts = optionsPGF opts
|
||||
++ [("coding", LStr (flag optEncoding opts))]
|
||||
++ [("coding", LStr (getEncoding opts))]
|
||||
|
||||
-- | Pretty-print the options that are preserved in .pgf files.
|
||||
optionsPGF :: Options -> [(String,Literal)]
|
||||
@@ -241,6 +241,10 @@ concatOptions = foldr addOptions noOptions
|
||||
modifyFlags :: (Flags -> Flags) -> Options
|
||||
modifyFlags = Options
|
||||
|
||||
getEncoding :: Options -> String
|
||||
getEncoding = renameEncoding . maybe defaultEncoding id . flag optEncoding
|
||||
defaultEncoding = "UTF-8"
|
||||
|
||||
-- Default options
|
||||
|
||||
defaultFlags :: Flags
|
||||
@@ -264,7 +268,7 @@ defaultFlags = Flags {
|
||||
|
||||
optName = Nothing,
|
||||
optPreprocessors = [],
|
||||
optEncoding = "latin1",
|
||||
optEncoding = Nothing,
|
||||
optPMCFG = True,
|
||||
optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize],
|
||||
optOptimizePGF = False,
|
||||
@@ -419,7 +423,7 @@ optDescr =
|
||||
addLibDir x = set $ \o -> o { optLibraryPath = x:optLibraryPath o }
|
||||
setLibPath x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x }
|
||||
preproc x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] }
|
||||
coding x = set $ \o -> o { optEncoding = x }
|
||||
coding x = set $ \o -> o { optEncoding = Just x }
|
||||
startcat x = set $ \o -> o { optStartCat = Just x }
|
||||
language x = set $ \o -> o { optSpeechLanguage = Just x }
|
||||
lexer x = set $ \o -> o { optLexer = Just x }
|
||||
|
||||
Reference in New Issue
Block a user