diff --git a/GF.cabal b/GF.cabal index 2b3fd725d..7a4c990fc 100644 --- a/GF.cabal +++ b/GF.cabal @@ -89,7 +89,6 @@ executable gf GF.JavaScript.AbsJS GF.JavaScript.PrintJS GF.Infra.CompactPrint - GF.Text.UTF8 GF.Data.TrieMap GF.Data.Utilities GF.Data.SortedList diff --git a/src/compiler/GF.hs b/src/compiler/GF.hs index 32a95ca1f..503253589 100644 --- a/src/compiler/GF.hs +++ b/src/compiler/GF.hs @@ -24,6 +24,10 @@ main = do codepage <- getACP setConsoleCP codepage setConsoleOutputCP codepage + enc <- mkTextEncoding ("CP"++show codepage) + hSetEncoding stdin enc + hSetEncoding stdout enc + hSetEncoding stderr enc #endif args <- getArgs case parseOptions args of diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index 0ca54839c..00fc8305b 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -24,7 +24,7 @@ import PGF.Probabilistic -- (getProbsFromFile,prProbabilities,defaultProbabiliti import PGF.Generate (generateRandomFrom) ---- import GF.Compile.Export import GF.Compile.ExampleBased -import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..)) +import GF.Infra.Option (noOptions, readOutputFormat) import GF.Infra.UseIO import GF.Data.ErrM ---- import GF.Command.Abstract @@ -36,7 +36,6 @@ import GF.Quiz import GF.Command.TreeOperations ---- temporary place for typecheck and compute import GF.Data.Operations -import GF.Text.Coding import Data.List import Data.Maybe @@ -77,10 +76,10 @@ emptyCommandInfo = CommandInfo { lookCommand :: String -> Map.Map String CommandInfo -> Maybe CommandInfo lookCommand = Map.lookup -commandHelpAll :: Encoding -> PGFEnv -> [Option] -> String -commandHelpAll cod pgf opts = unlines +commandHelpAll :: PGFEnv -> [Option] -> String +commandHelpAll pgf opts = unlines [commandHelp (isOpt "full" opts) (co,info) - | (co,info) <- Map.assocs (allCommands cod pgf)] + | (co,info) <- Map.assocs (allCommands pgf)] commandHelp :: Bool -> (String,CommandInfo) -> String commandHelp full (co,info) = unlines $ [ @@ -120,8 +119,8 @@ commandHelpTags full (co,info) = unlines $ [ type PGFEnv = (PGF, Map.Map Language Morpho) -- this list must no more be kept sorted by the command name -allCommands :: Encoding -> PGFEnv -> Map.Map String CommandInfo -allCommands cod env@(pgf, mos) = Map.fromList [ +allCommands :: PGFEnv -> Map.Map String CommandInfo +allCommands env@(pgf, mos) = Map.fromList [ ("!", emptyCommandInfo { synopsis = "system command: escape to system shell", syntax = "! SYSTEMCOMMAND", @@ -156,7 +155,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [ let file s = "_grph." ++ s let view = optViewGraph opts ++ " " let format = optViewFormat opts - writeFile (file "dot") (enc grph) + writeFile (file "dot") grph system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ " ; " ++ view ++ file format return void @@ -365,10 +364,10 @@ allCommands cod env@(pgf, mos) = Map.fromList [ _ | isOpt "coding" opts -> codingMsg _ | isOpt "license" opts -> licenseMsg [t] -> let co = getCommandOp (showExpr [] t) in - case lookCommand co (allCommands cod env) of ---- new map ??!! + case lookCommand co (allCommands env) of ---- new map ??!! Just info -> commandHelp True (co,info) _ -> "command not found" - _ -> commandHelpAll cod env opts + _ -> commandHelpAll env opts in return (fromString msg), needsTypeCheck = False }), @@ -458,7 +457,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [ let typ = optType opts mprobs <- optProbs opts pgf let mt = mexp xs - morphologyQuiz mt mprobs cod pgf lang typ + morphologyQuiz mt mprobs pgf lang typ return void, flags = [ ("lang","language of the quiz"), @@ -656,7 +655,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [ let typ = optType opts let mt = mexp xs mprobs <- optProbs opts pgf - translationQuiz mt mprobs cod pgf from to typ + translationQuiz mt mprobs pgf from to typ return void, flags = [ ("from","translate from this language"), @@ -687,7 +686,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [ exec = \opts arg -> do let tmpi = "_tmpi" --- let tmpo = "_tmpo" - writeFile tmpi $ enc $ toString arg + writeFile tmpi $ toString arg let syst = optComm opts ++ " " ++ tmpi system $ syst ++ " <" ++ tmpi ++ " >" ++ tmpo s <- readFile tmpo @@ -738,7 +737,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [ let file s = "_grphd." ++ s let view = optViewGraph opts ++ " " let format = optViewFormat opts - writeFile (file "dot") (enc grphs) + writeFile (file "dot") grphs system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ " ; " ++ view ++ file format return void @@ -779,7 +778,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [ let file s = "_grph." ++ s let view = optViewGraph opts ++ " " let format = optViewFormat opts - writeFile (file "dot") (enc grph) + writeFile (file "dot") grph system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ " ; " ++ view ++ file format return void @@ -819,7 +818,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [ let file s = "_grph." ++ s let view = optViewGraph opts ++ " " let format = optViewFormat opts - writeFile (file "dot") (enc grph) + writeFile (file "dot") grph system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ " ; " ++ view ++ file format return void @@ -844,8 +843,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [ exec = \opts arg -> do let file = valStrOpts "file" "_gftmp" opts if isOpt "append" opts - then appendFile file (enc (toString arg)) - else writeFile file (enc (toString arg)) + then appendFile file (toString arg) + else writeFile file (toString arg) return void, options = [ ("append","append to file, instead of overwriting it") @@ -889,8 +888,6 @@ allCommands cod env@(pgf, mos) = Map.fromList [ }) ] where - enc = encodeUnicode cod - par opts s = case optOpenTypes opts of [] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts] open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts] @@ -1063,17 +1060,17 @@ stringOpOptions = sort $ [ treeOpOptions pgf = [(op,expl) | (op,(expl,Left _)) <- allTreeOps pgf] treeOpFlags pgf = [(op,expl) | (op,(expl,Right _)) <- allTreeOps pgf] -translationQuiz :: Maybe Expr -> Maybe Probabilities -> Encoding -> +translationQuiz :: Maybe Expr -> Maybe Probabilities -> PGF -> Language -> Language -> Type -> IO () -translationQuiz mex mprobs cod pgf ig og typ = do +translationQuiz mex mprobs pgf ig og typ = do tts <- translationList mex mprobs pgf ig og typ infinity - mkQuiz cod "Welcome to GF Translation Quiz." tts + mkQuiz "Welcome to GF Translation Quiz." tts -morphologyQuiz :: Maybe Expr -> Maybe Probabilities -> Encoding -> +morphologyQuiz :: Maybe Expr -> Maybe Probabilities -> PGF -> Language -> Type -> IO () -morphologyQuiz mex mprobs cod pgf ig typ = do +morphologyQuiz mex mprobs pgf ig typ = do tts <- morphologyList mex mprobs pgf ig typ infinity - mkQuiz cod "Welcome to GF Morphology Quiz." tts + mkQuiz "Welcome to GF Morphology Quiz." tts -- | the maximal number of precompiled quiz problems infinity :: Int diff --git a/src/compiler/GF/Command/Interpreter.hs b/src/compiler/GF/Command/Interpreter.hs index ff84da8a3..4f146bb93 100644 --- a/src/compiler/GF/Command/Interpreter.hs +++ b/src/compiler/GF/Command/Interpreter.hs @@ -29,24 +29,24 @@ data CommandEnv = CommandEnv { expmacros :: Map.Map String Expr } -mkCommandEnv :: Encoding -> PGF -> CommandEnv -mkCommandEnv enc pgf = +mkCommandEnv :: PGF -> CommandEnv +mkCommandEnv pgf = let mos = Map.fromList [(la,buildMorpho pgf la) | la <- languages pgf] in - CommandEnv pgf mos (allCommands enc (pgf, mos)) Map.empty Map.empty + CommandEnv pgf mos (allCommands (pgf, mos)) Map.empty Map.empty emptyCommandEnv :: CommandEnv -emptyCommandEnv = mkCommandEnv UTF_8 emptyPGF +emptyCommandEnv = mkCommandEnv emptyPGF -interpretCommandLine :: (String -> String) -> CommandEnv -> String -> IO () -interpretCommandLine enc env line = +interpretCommandLine :: CommandEnv -> String -> IO () +interpretCommandLine env line = case readCommandLine line of Just [] -> return () - Just pipes -> mapM_ (interpretPipe enc env) pipes + Just pipes -> mapM_ (interpretPipe env) pipes Nothing -> putStrLnFlush "command not parsed" -interpretPipe enc env cs = do +interpretPipe env cs = do v@(_,s) <- intercs ([],"") cs - putStrLnFlush $ enc s + putStrLnFlush s return v where intercs treess [] = return treess @@ -57,14 +57,14 @@ interpretPipe enc env cs = do '%':f -> case Map.lookup f (commandmacros env) of Just css -> case getCommandTrees env False arg es of - Right es -> do mapM_ (interpretPipe enc env) (appLine es css) + Right es -> do mapM_ (interpretPipe env) (appLine es css) return ([],[]) Left msg -> do putStrLn ('\n':msg) return ([],[]) Nothing -> do putStrLn $ "command macro " ++ co ++ " not interpreted" return ([],[]) - _ -> interpret enc env es comm + _ -> interpret env es comm appLine es = map (map (appCommand es)) -- macro definition applications: replace ?i by (exps !! i) @@ -81,14 +81,14 @@ appCommand xs c@(Command i os arg) = case arg of EFun x -> EFun x -- return the trees to be sent in pipe, and the output possibly printed -interpret :: (String -> String) -> CommandEnv -> [Expr] -> Command -> IO CommandOutput -interpret enc env trees comm = +interpret :: CommandEnv -> [Expr] -> Command -> IO CommandOutput +interpret env trees comm = case getCommand env trees comm of Left msg -> do putStrLn ('\n':msg) return ([],[]) Right (info,opts,trees) -> do tss@(_,s) <- exec info opts trees if isOpt "tr" opts - then putStrLn (enc s) + then putStrLn s else return () return tss diff --git a/src/compiler/GF/Compile.hs b/src/compiler/GF/Compile.hs index a862f85e2..1aebeaf31 100644 --- a/src/compiler/GF/Compile.hs +++ b/src/compiler/GF/Compile.hs @@ -12,7 +12,6 @@ import GF.Compile.Update import GF.Compile.Refresh import GF.Compile.Coding -import GF.Text.UTF8 ---- import GF.Grammar.Grammar import GF.Grammar.Lookup @@ -82,7 +81,7 @@ compileSourceGrammar opts gr@(MGrammar ms) = do -- to output an intermediate stage intermOut :: Options -> Dump -> Doc -> IOE () intermOut opts d doc - | dump opts d = ioeIO (hPutStrLn stderr (encodeUTF8 (render (text "\n\n--#" <+> text (show d) $$ doc)))) + | dump opts d = ioeIO (hPutStrLn stderr (render (text "\n\n--#" <+> text (show d) $$ doc))) | otherwise = return () -- | the environment @@ -162,7 +161,8 @@ compileOne opts env@(_,srcgr,_) file = do sm00 <- putpOpt ("- parsing" +++ file) ("- compiling" +++ file ++ "... ") $ getSourceModule opts file - let sm0 = decodeStringsInModule sm00 + enc <- ioeIO $ mkTextEncoding (renameEncoding (flag optEncoding (flagsModule sm00))) + let sm0 = decodeStringsInModule enc sm00 intermOut opts DumpSource (ppModule Qualified sm0) diff --git a/src/compiler/GF/Compile/Coding.hs b/src/compiler/GF/Compile/Coding.hs index b909aac7d..01285eef1 100644 --- a/src/compiler/GF/Compile/Coding.hs +++ b/src/compiler/GF/Compile/Coding.hs @@ -8,12 +8,14 @@ import GF.Infra.Option import GF.Data.Operations import Data.Char +import System.IO +import qualified Data.ByteString.Char8 as BS -encodeStringsInModule :: SourceModule -> SourceModule -encodeStringsInModule = codeSourceModule (encodeUnicode UTF_8) +encodeStringsInModule :: TextEncoding -> SourceModule -> SourceModule +encodeStringsInModule enc = codeSourceModule (BS.unpack . encodeUnicode enc) -decodeStringsInModule :: SourceModule -> SourceModule -decodeStringsInModule mo = codeSourceModule (decodeUnicode (flag optEncoding (flagsModule mo))) mo +decodeStringsInModule :: TextEncoding -> SourceModule -> SourceModule +decodeStringsInModule enc mo = codeSourceModule (decodeUnicode enc . BS.pack) mo codeSourceModule :: (String -> String) -> SourceModule -> SourceModule codeSourceModule co (id,mo) = (id,replaceJudgements mo (mapTree codj (jments mo))) diff --git a/src/compiler/GF/Compile/PGFtoHaskell.hs b/src/compiler/GF/Compile/PGFtoHaskell.hs index 252fc95ee..ecc70cb5e 100644 --- a/src/compiler/GF/Compile/PGFtoHaskell.hs +++ b/src/compiler/GF/Compile/PGFtoHaskell.hs @@ -22,7 +22,6 @@ import PGF.Macros import GF.Data.Operations import GF.Infra.Option -import GF.Text.UTF8 import Data.List --(isPrefixOf, find, intersperse) import qualified Data.Map as Map @@ -34,7 +33,7 @@ grammar2haskell :: Options -> String -- ^ Module name. -> PGF -> String -grammar2haskell opts name gr = encodeUTF8 $ foldr (++++) [] $ +grammar2haskell opts name gr = foldr (++++) [] $ pragmas ++ haskPreamble name ++ [types, gfinstances gId lexical gr'] where gr' = hSkeleton gr gadt = haskellOption opts HaskellGADT diff --git a/src/compiler/GF/Compile/PGFtoJS.hs b/src/compiler/GF/Compile/PGFtoJS.hs index bb29ff7c5..f6725bf4f 100644 --- a/src/compiler/GF/Compile/PGFtoJS.hs +++ b/src/compiler/GF/Compile/PGFtoJS.hs @@ -6,7 +6,6 @@ import qualified PGF.Macros as M import qualified GF.JavaScript.AbsJS as JS import qualified GF.JavaScript.PrintJS as JS -import GF.Text.UTF8 import GF.Data.ErrM import GF.Infra.Option @@ -21,7 +20,7 @@ import qualified Data.IntMap as IntMap pgf2js :: PGF -> String pgf2js pgf = - encodeUTF8 $ JS.printTree $ JS.Program [JS.ElStmt $ JS.SDeclOrExpr $ JS.Decl [JS.DInit (JS.Ident n) grammar]] + JS.printTree $ JS.Program [JS.ElStmt $ JS.SDeclOrExpr $ JS.Decl [JS.DInit (JS.Ident n) grammar]] where n = showCId $ absname pgf as = abstract pgf diff --git a/src/compiler/GF/Compile/PGFtoProlog.hs b/src/compiler/GF/Compile/PGFtoProlog.hs index 3a5df0256..9e390e87b 100644 --- a/src/compiler/GF/Compile/PGFtoProlog.hs +++ b/src/compiler/GF/Compile/PGFtoProlog.hs @@ -15,7 +15,6 @@ import PGF.Data import PGF.Macros import GF.Data.Operations -import GF.Text.UTF8 import qualified Data.Map as Map import Data.Char (isAlphaNum, isAsciiLower, isAsciiUpper, ord) diff --git a/src/compiler/GF/Data/XML.hs b/src/compiler/GF/Data/XML.hs index bdc6f98a1..4d4a3e8ca 100644 --- a/src/compiler/GF/Data/XML.hs +++ b/src/compiler/GF/Data/XML.hs @@ -7,7 +7,6 @@ module GF.Data.XML (XML(..), Attr, comments, showXMLDoc, showsXMLDoc, showsXML, bottomUpXML) where import GF.Data.Utilities -import GF.Text.UTF8 data XML = Data String | CData String | Tag String [Attr] [XML] | ETag String [Attr] | Comment String | Empty deriving (Ord,Eq,Show) @@ -21,7 +20,7 @@ showXMLDoc :: XML -> String showXMLDoc xml = showsXMLDoc xml "" showsXMLDoc :: XML -> ShowS -showsXMLDoc xml = encodeUTF8 . showString header . showsXML xml +showsXMLDoc xml = showString header . showsXML xml where header = "" showsXML :: XML -> ShowS diff --git a/src/compiler/GF/Grammar/Parser.y b/src/compiler/GF/Grammar/Parser.y index 3be1b3519..867776607 100644 --- a/src/compiler/GF/Grammar/Parser.y +++ b/src/compiler/GF/Grammar/Parser.y @@ -441,9 +441,7 @@ Exp6 | '?' { Meta 0 } | '[' ']' { Empty } | '[' Ident Exps ']' { foldl App (Vr (mkListId $2)) $3 } - | '[' String ']' { case $2 of - [] -> Empty - str -> foldr1 C (map K (words str)) } + | '[' String ']' { K $2 } | '{' ListLocDef '}' {% mkR $2 } | '<' ListTupleComp '>' { R (tuple2record $2) } | '<' Exp ':' Exp '>' { Typed $2 $4 } diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs index 24b967aff..ee8d76b45 100644 --- a/src/compiler/GF/Infra/Option.hs +++ b/src/compiler/GF/Infra/Option.hs @@ -3,7 +3,7 @@ module GF.Infra.Option -- * Option types Options, Flags(..), - Mode(..), Phase(..), Verbosity(..), Encoding(..), OutputFormat(..), + Mode(..), Phase(..), Verbosity(..), OutputFormat(..), SISRFormat(..), Optimization(..), CFGTransform(..), HaskellOption(..), Dump(..), Printer(..), Recomp(..), -- * Option parsing @@ -17,7 +17,7 @@ module GF.Infra.Option helpMessage, -- * Checking specific options flag, cfgTransform, haskellOption, readOutputFormat, - isLexicalCat, encodings, + isLexicalCat, renameEncoding, -- * Setting specific options setOptimization, setCFGTransform, -- * Convenience methods for checking options @@ -25,12 +25,13 @@ module GF.Infra.Option ) where import Control.Monad -import Data.Char (toLower) +import Data.Char (toLower, isDigit) import Data.List import Data.Maybe import GF.Infra.GetOpt --import System.Console.GetOpt import System.FilePath +import System.IO import GF.Data.ErrM @@ -77,9 +78,6 @@ data Verbosity = Quiet | Normal | Verbose | Debug data Phase = Preproc | Convert | Compile | Link deriving (Show,Eq,Ord) -data Encoding = UTF_8 | ISO_8859_1 | CP_1250 | CP_1251 | CP_1252 | CP_1254 - deriving (Eq,Ord) - data OutputFormat = FmtPGFPretty | FmtJavaScript | FmtHaskell @@ -161,7 +159,7 @@ data Flags = Flags { optCncName :: Maybe String, optResName :: Maybe String, optPreprocessors :: [String], - optEncoding :: Encoding, + optEncoding :: String, optOptimizations :: Set Optimization, optCFGTransforms :: Set CFGTransform, optLibraryPath :: [FilePath], @@ -207,7 +205,7 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o) -- | Pretty-print the options that are preserved in .gfo files. optionsGFO :: Options -> [(String,String)] optionsGFO opts = optionsPGF opts - ++ [("coding", show (flag optEncoding opts))] + ++ [("coding", flag optEncoding opts)] -- | Pretty-print the options that are preserved in .pgf files. optionsPGF :: Options -> [(String,String)] @@ -260,7 +258,7 @@ defaultFlags = Flags { optCncName = Nothing, optResName = Nothing, optPreprocessors = [], - optEncoding = ISO_8859_1, + optEncoding = "latin1", optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize], optCFGTransforms = Set.fromList [CFGRemoveCycles, CFGBottomUpFilter, CFGTopDownFilter, CFGMergeIdentical], @@ -343,8 +341,7 @@ optDescr = (unlines ["Use CMD to preprocess input files.", "Multiple preprocessors can be used by giving this option multiple times."]), Option [] ["coding"] (ReqArg coding "ENCODING") - ("Character encoding of the source grammar, ENCODING = " - ++ concat (intersperse " | " (map fst encodings)) ++ "."), + ("Character encoding of the source grammar, ENCODING = utf8, latin1, cp1251, ..."), Option [] ["startcat"] (ReqArg startcat "CAT") "Grammar start category.", Option [] ["language"] (ReqArg language "LANG") "Set the speech language flag to LANG in the generated grammar.", Option [] ["lexer"] (ReqArg lexer "LEXER") "Use lexer LEXER.", @@ -400,9 +397,7 @@ optDescr = addLibDir x = set $ \o -> o { optLibraryPath = x:optLibraryPath o } setLibPath x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x } preproc x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] } - coding x = case lookup x encodings of - Just c -> set $ \o -> o { optEncoding = c } - Nothing -> fail $ "Unknown character encoding: " ++ x + coding x = set $ \o -> o { optEncoding = x } startcat x = set $ \o -> o { optStartCat = Just x } language x = set $ \o -> o { optSpeechLanguage = Just x } lexer x = set $ \o -> o { optLexer = Just x } @@ -483,18 +478,14 @@ haskellOptionNames = ("gadt", HaskellGADT), ("lexical", HaskellLexical)] -encodings :: [(String,Encoding)] -encodings = - [("utf8", UTF_8), - ("cp1250", CP_1250), - ("cp1251", CP_1251), - ("cp1252", CP_1252), - ("cp1254", CP_1254), - ("latin1", ISO_8859_1) - ] - -instance Show Encoding where - show = lookupShow encodings +-- | This is for bacward compatibility. Since GHC 6.12 we +-- started using the native Unicode support in GHC but it +-- uses different names for the code pages. +renameEncoding :: String -> String +renameEncoding "utf8" = "UTF-8" +renameEncoding "latin1" = "CP1252" +renameEncoding ('c':'p':s) | all isDigit s = 'C':'P':s +renameEncoding s = s lookupShow :: Eq a => [(String,a)] -> a -> String lookupShow xs z = fromMaybe "lookupShow" $ lookup z [(y,x) | (x,y) <- xs] diff --git a/src/compiler/GF/Quiz.hs b/src/compiler/GF/Quiz.hs index 9a3540645..4a4caafc8 100644 --- a/src/compiler/GF/Quiz.hs +++ b/src/compiler/GF/Quiz.hs @@ -23,7 +23,6 @@ import PGF.Linearize import GF.Data.Operations import GF.Infra.UseIO import GF.Infra.Option -import GF.Text.Coding import PGF.Probabilistic import System.Random @@ -33,9 +32,9 @@ import Data.List (nub) -- generic quiz function -mkQuiz :: Encoding -> String -> [(String,[String])] -> IO () -mkQuiz cod msg tts = do - let qas = [ (encodeUnicode cod q, mkAnswer cod as) | (q,as) <- tts] +mkQuiz :: String -> [(String,[String])] -> IO () +mkQuiz msg tts = do + let qas = [(q, mkAnswer as) | (q,as) <- tts] teachDialogue qas msg translationList :: @@ -62,14 +61,13 @@ morphologyList mex mprobs pgf ig typ number = do (pwss@(pws0:_),i) <- zip ss forms, let ws = map (\pws -> snd (pws !! i)) pwss] -- | compare answer to the list of right answers, increase score and give feedback -mkAnswer :: Encoding -> [String] -> String -> (Integer, String) -mkAnswer cod as s = +mkAnswer :: [String] -> String -> (Integer, String) +mkAnswer as s = if (elem (norm s) as) then (1,"Yes.") - else (0,"No, not" +++ s ++ ", but" ++++ enc (unlines as)) + else (0,"No, not" +++ s ++ ", but" ++++ unlines as) where - norm = unwords . words . decodeUnicode cod - enc = encodeUnicode cod + norm = unwords . words norml = unwords . words diff --git a/src/compiler/GF/Text/CP1250.hs b/src/compiler/GF/Text/CP1250.hs deleted file mode 100644 index 2ed263877..000000000 --- a/src/compiler/GF/Text/CP1250.hs +++ /dev/null @@ -1,91 +0,0 @@ ------------------------------------------------------------------------------ --- | --- Module : GF.Text.CP1250 --- Maintainer : Krasimir Angelov --- --- cp1250 is a code page used under Microsoft Windows to represent texts --- in Central European and Eastern European languages that use Latin script, --- such as Polish, Czech, Slovak, Hungarian, Slovene, Bosnian, Croatian, --- Serbian (Latin script), Romanian and Albanian. It may also be used with --- the German language; German-language texts encoded with cp1250 and cp1252 --- are identical. --- ------------------------------------------------------------------------------ - -module GF.Text.CP1250 where - -import Data.Char - -decodeCP1250 = map convert where - convert c - | c == '\x80' = chr 0x20AC - | c == '\x82' = chr 0x201A - | c == '\x84' = chr 0x201E - | c == '\x85' = chr 0x2026 - | c == '\x86' = chr 0x2020 - | c == '\x87' = chr 0x2021 - | c == '\x89' = chr 0x2030 - | c == '\x8A' = chr 0x0160 - | c == '\x8B' = chr 0x2039 - | c == '\x8C' = chr 0x015A - | c == '\x8D' = chr 0x0164 - | c == '\x8E' = chr 0x017D - | c == '\x8F' = chr 0x0179 - | c == '\x91' = chr 0x2018 - | c == '\x92' = chr 0x2019 - | c == '\x93' = chr 0x201C - | c == '\x94' = chr 0x201D - | c == '\x95' = chr 0x2022 - | c == '\x96' = chr 0x2013 - | c == '\x97' = chr 0x2014 - | c == '\x99' = chr 0x2122 - | c == '\x9A' = chr 0x0161 - | c == '\x9B' = chr 0x203A - | c == '\x9C' = chr 0x015B - | c == '\x9D' = chr 0x0165 - | c == '\x9E' = chr 0x017E - | c == '\x9F' = chr 0x017A - | c == '\xA1' = chr 0x02C7 - | c == '\xA5' = chr 0x0104 - | c == '\xB9' = chr 0x0105 - | c == '\xBC' = chr 0x013D - | c == '\xBE' = chr 0x013E - | otherwise = c - - -encodeCP1250 = map convert where - convert c - | oc == 0x20AC = '\x80' - | oc == 0x201A = '\x82' - | oc == 0x201E = '\x84' - | oc == 0x2026 = '\x85' - | oc == 0x2020 = '\x86' - | oc == 0x2021 = '\x87' - | oc == 0x2030 = '\x89' - | oc == 0x0160 = '\x8A' - | oc == 0x2039 = '\x8B' - | oc == 0x015A = '\x8C' - | oc == 0x0164 = '\x8D' - | oc == 0x017D = '\x8E' - | oc == 0x0179 = '\x8F' - | oc == 0x2018 = '\x91' - | oc == 0x2019 = '\x92' - | oc == 0x201C = '\x93' - | oc == 0x201D = '\x94' - | oc == 0x2022 = '\x95' - | oc == 0x2013 = '\x96' - | oc == 0x2014 = '\x97' - | oc == 0x2122 = '\x99' - | oc == 0x0161 = '\x9A' - | oc == 0x203A = '\x9B' - | oc == 0x015B = '\x9C' - | oc == 0x0165 = '\x9D' - | oc == 0x017E = '\x9E' - | oc == 0x017A = '\x9F' - | oc == 0x02C7 = '\xA1' - | oc == 0x0104 = '\xA5' - | oc == 0x0105 = '\xB9' - | oc == 0x013D = '\xBC' - | oc == 0x013E = '\xBE' - | otherwise = c - where oc = ord c diff --git a/src/compiler/GF/Text/CP1251.hs b/src/compiler/GF/Text/CP1251.hs deleted file mode 100644 index 8d8ceebf6..000000000 --- a/src/compiler/GF/Text/CP1251.hs +++ /dev/null @@ -1,86 +0,0 @@ ------------------------------------------------------------------------------ --- | --- Module : GF.Text.CP1251 --- Maintainer : Krasimir Angelov --- --- cp1251 is a popular 8-bit character encoding, designed to cover languages --- that use the Cyrillic alphabet such as Russian, Bulgarian, Serbian Cyrillic --- and other languages. It is the most widely used for encoding the Bulgarian, --- Serbian and Macedonian languages. --- ------------------------------------------------------------------------------ - -module GF.Text.CP1251 where - -import Data.Char - -decodeCP1251 = map convert where - convert c - | c >= '\xC0' && c <= '\xFF' = chr (ord c + (0x410-0xC0)) - | c == '\xA8' = chr 0x401 -- cyrillic capital letter lo - | c == '\x80' = chr 0x402 - | c == '\x81' = chr 0x403 - | c == '\xAA' = chr 0x404 - | c == '\xBD' = chr 0x405 - | c == '\xB2' = chr 0x406 - | c == '\xAF' = chr 0x407 - | c == '\xA3' = chr 0x408 - | c == '\x8A' = chr 0x409 - | c == '\x8C' = chr 0x40A - | c == '\x8E' = chr 0x40B - | c == '\x8D' = chr 0x40C - | c == '\xA1' = chr 0x40E - | c == '\x8F' = chr 0x40F - | c == '\xB8' = chr 0x451 -- cyrillic small letter lo - | c == '\x90' = chr 0x452 - | c == '\x83' = chr 0x453 - | c == '\xBA' = chr 0x454 - | c == '\xBE' = chr 0x455 - | c == '\xB3' = chr 0x456 - | c == '\xBF' = chr 0x457 - | c == '\xBC' = chr 0x458 - | c == '\x9A' = chr 0x459 - | c == '\x9C' = chr 0x45A - | c == '\x9E' = chr 0x45B - | c == '\x9D' = chr 0x45C - | c == '\xA2' = chr 0x45E - | c == '\x9F' = chr 0x45F - | c == '\xA5' = chr 0x490 - | c == '\xB4' = chr 0x491 - | otherwise = c - -encodeCP1251 = map convert where - convert c - | oc >= 0x410 && oc <= 0x44F = chr (oc - (0x410-0xC0)) - | oc == 0x401 = '\xA8' -- cyrillic capital letter lo - | oc == 0x402 = '\x80' - | oc == 0x403 = '\x81' - | oc == 0x404 = '\xAA' - | oc == 0x405 = '\xBD' - | oc == 0x406 = '\xB2' - | oc == 0x407 = '\xAF' - | oc == 0x408 = '\xA3' - | oc == 0x409 = '\x8A' - | oc == 0x40A = '\x8C' - | oc == 0x40B = '\x8E' - | oc == 0x40C = '\x8D' - | oc == 0x40E = '\xA1' - | oc == 0x40F = '\x8F' - | oc == 0x451 = '\xB8' -- cyrillic small letter lo - | oc == 0x452 = '\x90' - | oc == 0x453 = '\x83' - | oc == 0x454 = '\xBA' - | oc == 0x455 = '\xBE' - | oc == 0x456 = '\xB3' - | oc == 0x457 = '\xBF' - | oc == 0x458 = '\xBC' - | oc == 0x459 = '\x9A' - | oc == 0x45A = '\x9C' - | oc == 0x45B = '\x9E' - | oc == 0x45C = '\x9D' - | oc == 0x45E = '\xA2' - | oc == 0x45F = '\x9F' - | oc == 0x490 = '\xA5' - | oc == 0x491 = '\xB4' - | otherwise = c - where oc = ord c diff --git a/src/compiler/GF/Text/CP1252.hs b/src/compiler/GF/Text/CP1252.hs deleted file mode 100644 index a1d8ab8f3..000000000 --- a/src/compiler/GF/Text/CP1252.hs +++ /dev/null @@ -1,17 +0,0 @@ ------------------------------------------------------------------------------ --- | --- Module : GF.Text.CP1252 --- Maintainer : Krasimir Angelov --- --- cp1252 is a character encoding of the Latin alphabet, used by default in --- the legacy components of Microsoft Windows in English and some other --- Western languages. --- ------------------------------------------------------------------------------ - -module GF.Text.CP1252 where - -import Data.Char - -decodeCP1252 = map id -encodeCP1252 = map (\x -> if x <= '\255' then x else '?') diff --git a/src/compiler/GF/Text/CP1254.hs b/src/compiler/GF/Text/CP1254.hs deleted file mode 100644 index 488359d70..000000000 --- a/src/compiler/GF/Text/CP1254.hs +++ /dev/null @@ -1,84 +0,0 @@ ------------------------------------------------------------------------------ --- | --- Module : GF.Text.CP1254 --- Maintainer : Krasimir Angelov --- --- cp1254 is a code page used under Microsoft Windows to write Turkish. --- Characters with codepoints A0 through FF are compatible with ISO 8859-9. --- ------------------------------------------------------------------------------ - -module GF.Text.CP1254 where - -import Data.Char - -decodeCP1254 = map convert where - convert c - | c == '\x80' = chr 0x20AC - | c == '\x82' = chr 0x201A - | c == '\x83' = chr 0x192 - | c == '\x84' = chr 0x201E - | c == '\x85' = chr 0x2026 - | c == '\x86' = chr 0x2020 - | c == '\x87' = chr 0x2021 - | c == '\x88' = chr 0x2C6 - | c == '\x89' = chr 0x2030 - | c == '\x8A' = chr 0x160 - | c == '\x8B' = chr 0x2039 - | c == '\x8C' = chr 0x152 - | c == '\x91' = chr 0x2018 - | c == '\x92' = chr 0x2019 - | c == '\x93' = chr 0x201C - | c == '\x94' = chr 0x201D - | c == '\x95' = chr 0x2022 - | c == '\x96' = chr 0x2013 - | c == '\x97' = chr 0x2014 - | c == '\x98' = chr 0x2DC - | c == '\x99' = chr 0x2122 - | c == '\x9A' = chr 0x161 - | c == '\x9B' = chr 0x203A - | c == '\x9C' = chr 0x153 - | c == '\x9F' = chr 0x178 - | c == '\xD0' = chr 0x11E - | c == '\xDD' = chr 0x130 - | c == '\xDE' = chr 0x15E - | c == '\xF0' = chr 0x11F - | c == '\xFD' = chr 0x131 - | c == '\xFE' = chr 0x15F - | otherwise = c - -encodeCP1254 = map convert where - convert c - | oc == 0x20AC = '\x80' - | oc == 0x201A = '\x82' - | oc == 0x192 = '\x83' - | oc == 0x201E = '\x84' - | oc == 0x2026 = '\x85' - | oc == 0x2020 = '\x86' - | oc == 0x2021 = '\x87' - | oc == 0x2C6 = '\x88' - | oc == 0x2030 = '\x89' - | oc == 0x160 = '\x8A' - | oc == 0x2039 = '\x8B' - | oc == 0x152 = '\x8C' - | oc == 0x2018 = '\x91' - | oc == 0x2019 = '\x92' - | oc == 0x201C = '\x93' - | oc == 0x201D = '\x94' - | oc == 0x2022 = '\x95' - | oc == 0x2013 = '\x96' - | oc == 0x2014 = '\x97' - | oc == 0x2DC = '\x98' - | oc == 0x2122 = '\x99' - | oc == 0x161 = '\x9A' - | oc == 0x203A = '\x9B' - | oc == 0x153 = '\x9C' - | oc == 0x178 = '\x9F' - | oc == 0x11E = '\xD0' - | oc == 0x130 = '\xDD' - | oc == 0x15E = '\xDE' - | oc == 0x11F = '\xF0' - | oc == 0x131 = '\xFD' - | oc == 0x15F = '\xFE' - | otherwise = c - where oc = ord c diff --git a/src/compiler/GF/Text/Coding.hs b/src/compiler/GF/Text/Coding.hs index 3481b278d..a206bb4d2 100644 --- a/src/compiler/GF/Text/Coding.hs +++ b/src/compiler/GF/Text/Coding.hs @@ -1,24 +1,69 @@ module GF.Text.Coding where -import GF.Infra.Option -import GF.Text.UTF8 -import GF.Text.CP1250 -import GF.Text.CP1251 -import GF.Text.CP1252 -import GF.Text.CP1254 +import qualified Data.ByteString as BS +import Data.ByteString.Internal +import GHC.IO +import GHC.IO.Buffer +import GHC.IO.Encoding +import GHC.IO.Exception +import Control.Monad -encodeUnicode e = case e of - UTF_8 -> encodeUTF8 - CP_1250 -> encodeCP1250 - CP_1251 -> encodeCP1251 - CP_1252 -> encodeCP1252 - CP_1254 -> encodeCP1254 - _ -> id +encodeUnicode :: TextEncoding -> String -> ByteString +encodeUnicode enc s = + unsafePerformIO $ do + let len = length s + cbuf0 <- newCharBuffer (len*4) ReadBuffer + foldM (\i c -> writeCharBuf (bufRaw cbuf0) i c) 0 s + let cbuf = cbuf0{bufR=len} + case enc of + TextEncoding {mkTextEncoder=mk} -> do encoder <- mk + bss <- translate (encode encoder) cbuf + close encoder + return (BS.concat bss) + where + translate cod cbuf + | i < w = do bbuf <- newByteBuffer 128 WriteBuffer + (cbuf,bbuf) <- cod cbuf bbuf + if isEmptyBuffer bbuf + then ioe_invalidCharacter + else do let bs = PS (bufRaw bbuf) (bufL bbuf) (bufR bbuf-bufL bbuf) + bss <- translate cod cbuf + return (bs:bss) + | otherwise = return [] + where + i = bufL cbuf + w = bufR cbuf -decodeUnicode e = case e of - UTF_8 -> decodeUTF8 - CP_1250 -> decodeCP1250 - CP_1251 -> decodeCP1251 - CP_1252 -> decodeCP1252 - CP_1254 -> decodeCP1254 - _ -> id +decodeUnicode :: TextEncoding -> ByteString -> String +decodeUnicode enc (PS fptr l len) = + unsafePerformIO $ do + let bbuf = Buffer{bufRaw=fptr, bufState=ReadBuffer, bufSize=len, bufL=l, bufR=l+len} + cbuf <- newCharBuffer 128 WriteBuffer + case enc of + TextEncoding {mkTextDecoder=mk} -> do decoder <- mk + s <- translate (encode decoder) bbuf cbuf + close decoder + return s + where + translate cod bbuf cbuf + | i < w = do (bbuf,cbuf) <- cod bbuf cbuf + if isEmptyBuffer cbuf + then ioe_invalidCharacter + else unpack cod bbuf cbuf + | otherwise = return [] + where + i = bufL bbuf + w = bufR bbuf + + unpack cod bbuf cbuf + | i < w = do (c,i') <- readCharBuf (bufRaw cbuf) i + cs <- unpack cod bbuf cbuf{bufL=i'} + return (c:cs) + | otherwise = translate cod bbuf cbuf{bufL=0,bufR=0} + where + i = bufL cbuf + w = bufR cbuf + +ioe_invalidCharacter = ioException + (IOError Nothing InvalidArgument "" + ("invalid byte sequence for this encoding") Nothing Nothing) diff --git a/src/compiler/GF/Text/Lexing.hs b/src/compiler/GF/Text/Lexing.hs index a5a2c71eb..ec030e158 100644 --- a/src/compiler/GF/Text/Lexing.hs +++ b/src/compiler/GF/Text/Lexing.hs @@ -1,8 +1,6 @@ module GF.Text.Lexing (stringOp,opInEnv) where import GF.Text.Transliterations -import GF.Text.UTF8 -import GF.Text.CP1251 import Data.Char import Data.List (intersperse) @@ -23,10 +21,6 @@ stringOp name = case name of "unlexmixed" -> Just $ capitInit . appUnlexer (unlexMixed . unquote) "unwords" -> Just $ appUnlexer unwords "to_html" -> Just wrapHTML - "to_utf8" -> Just encodeUTF8 - "from_utf8" -> Just decodeUTF8 - "to_cp1251" -> Just encodeCP1251 - "from_cp1251" -> Just decodeCP1251 _ -> transliterate name -- perform op in environments beg--end, t.ex. between "--" diff --git a/src/compiler/GF/Text/Transliterations.hs b/src/compiler/GF/Text/Transliterations.hs index bd56f5f89..cbe8baf15 100644 --- a/src/compiler/GF/Text/Transliterations.hs +++ b/src/compiler/GF/Text/Transliterations.hs @@ -5,8 +5,6 @@ module GF.Text.Transliterations ( transliterationPrintNames ) where -import GF.Text.UTF8 - import Data.Char import Numeric import qualified Data.Map as Map diff --git a/src/compiler/GF/Text/UTF8.hs b/src/compiler/GF/Text/UTF8.hs deleted file mode 100644 index 5e9687684..000000000 --- a/src/compiler/GF/Text/UTF8.hs +++ /dev/null @@ -1,48 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : UTF8 --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/04/21 16:23:42 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.5 $ --- --- From the Char module supplied with HBC. --- code by Thomas Hallgren (Jul 10 1999) ------------------------------------------------------------------------------ - -module GF.Text.UTF8 (decodeUTF8, encodeUTF8) where - --- | Take a Unicode string and encode it as a string --- with the UTF8 method. -decodeUTF8 :: String -> String -decodeUTF8 "" = "" -decodeUTF8 (c:cs) | c < '\x80' = c : decodeUTF8 cs -decodeUTF8 (c:c':cs) | '\xc0' <= c && c <= '\xdf' && - '\x80' <= c' && c' <= '\xbf' = - toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : decodeUTF8 cs -decodeUTF8 (c:c':c'':cs) | '\xe0' <= c && c <= '\xef' && - '\x80' <= c' && c' <= '\xbf' && - '\x80' <= c'' && c'' <= '\xbf' = - toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : decodeUTF8 cs -decodeUTF8 s = s ---- AR workaround 22/6/2006 -----decodeUTF8 _ = error "UniChar.decodeUTF8: bad data" - -encodeUTF8 :: String -> String -encodeUTF8 "" = "" -encodeUTF8 (c:cs) = - if c > '\x0000' && c < '\x0080' then - c : encodeUTF8 cs - else if c < toEnum 0x0800 then - let i = fromEnum c - in toEnum (0xc0 + i `div` 0x40) : - toEnum (0x80 + i `mod` 0x40) : - encodeUTF8 cs - else - let i = fromEnum c - in toEnum (0xe0 + i `div` 0x1000) : - toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) : - toEnum (0x80 + i `mod` 0x40) : - encodeUTF8 cs diff --git a/src/compiler/GFC.hs b/src/compiler/GFC.hs index 8037d4f1a..1f0ac870b 100644 --- a/src/compiler/GFC.hs +++ b/src/compiler/GFC.hs @@ -17,6 +17,7 @@ import Data.Maybe import Data.Binary import System.FilePath import System.IO +import Control.Exception mainGFC :: Options -> [FilePath] -> IOE () @@ -81,8 +82,8 @@ writeOutput opts file str = do let path = case flag optOutputDir opts of Nothing -> file Just dir -> dir file - writeOutputFile opts path str - -writeOutputFile :: Options -> FilePath -> String -> IOE () -writeOutputFile opts outfile output = - do putPointE Normal opts ("Writing " ++ outfile ++ "...") $ ioeIO $ writeFile outfile output + putPointE Normal opts ("Writing " ++ path ++ "...") $ ioeIO $ + bracket + (openFile path WriteMode) + (hClose) + (\h -> hSetEncoding h utf8 >> hPutStr h str) diff --git a/src/compiler/GFI.hs b/src/compiler/GFI.hs index 9561c407f..a0806ce94 100644 --- a/src/compiler/GFI.hs +++ b/src/compiler/GFI.hs @@ -21,7 +21,6 @@ import GF.Infra.Option import GF.Infra.Modules (greatestResource, modules, emptyModInfo) import GF.System.Readline -import GF.Text.Coding import GF.Compile.Coding import PGF @@ -34,6 +33,7 @@ import Data.List(isPrefixOf) import qualified Data.Map as Map import qualified Data.ByteString.Char8 as BS import qualified Text.ParserCombinators.ReadP as RP +import System.IO import System.Cmd import System.CPUTime import System.Directory @@ -86,9 +86,7 @@ loop opts gfenv0 = do s0 <- fetch let gfenv = gfenv0 {history = s0 : history gfenv0} let - enc = encode gfenv - s = decode gfenv s0 - pwords = case words s of + pwords = case words s0 of w:ws -> getCommandOp w :ws ws -> ws @@ -130,8 +128,8 @@ loop opts gfenv0 = do case runP pExp (BS.pack s) of Left (_,msg) -> putStrLn msg Right t -> case checkComputeTerm sgr (codeTerm (decode gfenv) (L (0,0) t)) of - Ok x -> putStrLn $ enc (showTerm sgr style q x) - Bad s -> putStrLn $ enc s + Ok x -> putStrLn $ showTerm sgr style q x + Bad s -> putStrLn $ s loopNewCPU gfenv "dg":ws -> do let stop = case ws of @@ -141,7 +139,7 @@ loop opts gfenv0 = do putStrLn "wrote graph in file _gfdepgraph.dot" loopNewCPU gfenv "eh":w:_ -> do - cs <- readFile w >>= return . map (interpretCommandLine enc env) . lines + cs <- readFile w >>= return . map (interpretCommandLine env) . lines loopNewCPU gfenv "i":args -> do @@ -179,25 +177,28 @@ loop opts gfenv0 = do _ -> putStrLn "value definition not parsed" >> loopNewCPU gfenv "ph":_ -> - mapM_ (putStrLn . enc) (reverse (history gfenv0)) >> loopNewCPU gfenv - "se":c:_ -> - case lookup c encodings of - Just cod -> do + mapM_ putStrLn (reverse (history gfenv0)) >> loopNewCPU gfenv + "se":c:_ -> do + let cod = renameEncoding c #ifdef mingw32_HOST_OS - case c of - 'c':'p':c -> case reads c of - [(cp,"")] -> setConsoleCP cp >> setConsoleOutputCP cp - _ -> return () - "utf8" -> setConsoleCP 65001 >> setConsoleOutputCP 65001 - _ -> return () + case cod of + 'C':'P':c -> case reads c of + [(cp,"")] -> do setConsoleCP cp + setConsoleOutputCP cp + _ -> return () + "UTF-8" -> do setConsoleCP 65001 + setConsoleOutputCP 65001 + _ -> return () #endif - loopNewCPU $ gfenv {coding = cod} - Nothing -> do putStrLn "unknown encoding" - loopNewCPU gfenv + enc <- mkTextEncoding cod + hSetEncoding stdin enc + hSetEncoding stdout enc + hSetEncoding stderr enc + loopNewCPU gfenv -- ordinary commands, working on CommandEnv _ -> do - interpretCommandLine enc env s + interpretCommandLine env s0 loopNewCPU gfenv -- gfenv' <- return $ either (const gfenv) id r gfenv' <- either (\e -> (print e >> return gfenv)) return r @@ -215,7 +216,7 @@ importInEnv gfenv opts files if (verbAtLeast opts Normal) then putStrLnFlush $ unwords $ "\nLanguages:" : map showCId (languages pgf1) else return () - return $ gfenv { commandenv = mkCommandEnv (coding gfenv) pgf1 } + return $ gfenv { commandenv = mkCommandEnv pgf1 } tryGetLine = do res <- try getLine @@ -252,24 +253,16 @@ data GFEnv = GFEnv { sourcegrammar :: SourceGrammar, -- gfo grammar -retain commandenv :: CommandEnv, history :: [String], - cputime :: Integer, - coding :: Encoding + cputime :: Integer } emptyGFEnv :: IO GFEnv emptyGFEnv = do -#ifdef mingw32_HOST_OS - codepage <- getACP - let coding = fromMaybe UTF_8 (lookup ("cp"++show codepage) encodings) -#else - let coding = UTF_8 -#endif - return $ GFEnv emptySourceGrammar{modules=[(identW,emptyModInfo)]} (mkCommandEnv coding emptyPGF) [] 0 coding + return $ GFEnv emptySourceGrammar{modules=[(identW,emptyModInfo)]} (mkCommandEnv emptyPGF) [] 0 -encode = encodeUnicode . coding -decode = decodeUnicode . coding +decode _ = id -- decodeUnicode . coding -wordCompletion gfenv line0 prefix0 p = +wordCompletion gfenv line prefix p = case wc_type (take p line) of CmplCmd pref -> ret ' ' [name | name <- Map.keys (commands cmdEnv), isPrefixOf pref name] @@ -280,7 +273,7 @@ wordCompletion gfenv line0 prefix0 p = in case loop state0 ws of Nothing -> ret ' ' [] Just state -> let compls = getCompletions state prefix - in ret ' ' (map (encode gfenv) (Map.keys compls)) + in ret ' ' (Map.keys compls) Left (_ :: SomeException) -> ret ' ' [] CmplOpt (Just (Command n _ _)) pref -> case Map.lookup n (commands cmdEnv) of @@ -298,9 +291,6 @@ wordCompletion gfenv line0 prefix0 p = Left (_ :: SomeException) -> ret ' ' [] _ -> ret ' ' [] where - line = decode gfenv line0 - prefix = decode gfenv prefix0 - pgf = multigrammar cmdEnv cmdEnv = commandenv gfenv optLang opts = valCIdOpts "lang" (head (languages pgf)) opts