use the native unicode support from GHC 6.12

2026-04-09 04:59:31 -06:00 · 2010-04-19 09:38:36 +00:00
parent 4c757f4683
commit 0b6b30d4a8
23 changed files with 177 additions and 490 deletions
--- a/GF.cabal
+++ b/GF.cabal
@@ -89,7 +89,6 @@ executable gf
    GF.JavaScript.AbsJS
    GF.JavaScript.PrintJS
    GF.Infra.CompactPrint
-    GF.Text.UTF8
    GF.Data.TrieMap
    GF.Data.Utilities
    GF.Data.SortedList
--- a/src/compiler/GF.hs
+++ b/src/compiler/GF.hs
@@ -24,6 +24,10 @@ main = do
  codepage <- getACP
  setConsoleCP codepage
  setConsoleOutputCP codepage
+  enc <- mkTextEncoding ("CP"++show codepage)
+  hSetEncoding stdin  enc
+  hSetEncoding stdout enc
+  hSetEncoding stderr enc
 #endif
  args <- getArgs
  case parseOptions args of
--- a/src/compiler/GF/Command/Commands.hs
+++ b/src/compiler/GF/Command/Commands.hs
@@ -24,7 +24,7 @@ import PGF.Probabilistic -- (getProbsFromFile,prProbabilities,defaultProbabiliti
 import PGF.Generate (generateRandomFrom) ----
 import GF.Compile.Export
 import GF.Compile.ExampleBased
-import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..))
+import GF.Infra.Option (noOptions, readOutputFormat)
 import GF.Infra.UseIO
 import GF.Data.ErrM ----
 import GF.Command.Abstract
@@ -36,7 +36,6 @@ import GF.Quiz
 import GF.Command.TreeOperations ---- temporary place for typecheck and compute

 import GF.Data.Operations
-import GF.Text.Coding

 import Data.List
 import Data.Maybe
@@ -77,10 +76,10 @@ emptyCommandInfo = CommandInfo {
 lookCommand :: String -> Map.Map String CommandInfo -> Maybe CommandInfo
 lookCommand = Map.lookup

-commandHelpAll :: Encoding -> PGFEnv -> [Option] -> String
-commandHelpAll cod pgf opts = unlines
+commandHelpAll :: PGFEnv -> [Option] -> String
+commandHelpAll pgf opts = unlines
  [commandHelp (isOpt "full" opts) (co,info)
-    | (co,info) <- Map.assocs (allCommands cod pgf)]
+    | (co,info) <- Map.assocs (allCommands pgf)]

 commandHelp :: Bool -> (String,CommandInfo) -> String
 commandHelp full (co,info) = unlines $ [
@@ -120,8 +119,8 @@ commandHelpTags full (co,info) = unlines $ [
 type PGFEnv = (PGF, Map.Map Language Morpho)

 -- this list must no more be kept sorted by the command name
-allCommands :: Encoding -> PGFEnv -> Map.Map String CommandInfo
-allCommands cod env@(pgf, mos) = Map.fromList [
+allCommands :: PGFEnv -> Map.Map String CommandInfo
+allCommands env@(pgf, mos) = Map.fromList [
  ("!", emptyCommandInfo {
     synopsis = "system command: escape to system shell",
     syntax   = "! SYSTEMCOMMAND",
@@ -156,7 +155,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
           let file s = "_grph." ++ s
           let view = optViewGraph opts ++ " "
           let format = optViewFormat opts 
-           writeFile (file "dot") (enc grph)
+           writeFile (file "dot") grph
           system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ 
                    " ; " ++ view ++ file format
           return void
@@ -365,10 +364,10 @@ allCommands cod env@(pgf, mos) = Map.fromList [
          _ | isOpt "coding" opts -> codingMsg
          _ | isOpt "license" opts -> licenseMsg
          [t] -> let co = getCommandOp (showExpr [] t) in 
-                 case lookCommand co (allCommands cod env) of   ---- new map ??!!
+                 case lookCommand co (allCommands env) of   ---- new map ??!!
                   Just info -> commandHelp True (co,info)
                   _ -> "command not found"
-          _ -> commandHelpAll cod env opts
+          _ -> commandHelpAll env opts
       in return (fromString msg),
     needsTypeCheck = False
     }),
@@ -458,7 +457,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
         let typ  = optType opts
         mprobs <- optProbs opts pgf
         let mt = mexp xs 
-         morphologyQuiz mt mprobs cod pgf lang typ
+         morphologyQuiz mt mprobs pgf lang typ
         return void,
     flags = [
       ("lang","language of the quiz"),
@@ -656,7 +655,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
         let typ  = optType opts
         let mt   = mexp xs
         mprobs <- optProbs opts pgf
-         translationQuiz mt mprobs cod pgf from to typ
+         translationQuiz mt mprobs pgf from to typ
         return void,
     flags = [
       ("from","translate from this language"),
@@ -687,7 +686,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
     exec = \opts arg -> do
       let tmpi = "_tmpi" ---
       let tmpo = "_tmpo"
-       writeFile tmpi $ enc $ toString arg
+       writeFile tmpi $ toString arg
       let syst = optComm opts ++ " " ++ tmpi
       system $ syst ++ " <" ++ tmpi ++ " >" ++ tmpo 
       s <- readFile tmpo
@@ -738,7 +737,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
           let file s = "_grphd." ++ s
           let view = optViewGraph opts ++ " "
           let format = optViewFormat opts 
-           writeFile (file "dot") (enc grphs)
+           writeFile (file "dot") grphs
           system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ 
                    " ; " ++ view ++ file format
           return void
@@ -779,7 +778,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
           let file s = "_grph." ++ s
           let view = optViewGraph opts ++ " "
           let format = optViewFormat opts 
-           writeFile (file "dot") (enc grph)
+           writeFile (file "dot") grph
           system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ 
                    " ; " ++ view ++ file format
           return void
@@ -819,7 +818,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
           let file s = "_grph." ++ s
           let view = optViewGraph opts ++ " "
           let format = optViewFormat opts 
-           writeFile (file "dot") (enc grph)
+           writeFile (file "dot") grph
           system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ 
                    " ; " ++ view ++ file format
           return void
@@ -844,8 +843,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [
     exec = \opts arg -> do
         let file = valStrOpts "file" "_gftmp" opts
         if isOpt "append" opts 
-           then appendFile file (enc (toString arg))
-           else writeFile file (enc (toString arg))
+           then appendFile file (toString arg)
+           else writeFile file (toString arg)
         return void,
     options = [
       ("append","append to file, instead of overwriting it")
@@ -889,8 +888,6 @@ allCommands cod env@(pgf, mos) = Map.fromList [
     })
  ]
 where
-   enc = encodeUnicode cod
-
   par opts s = case optOpenTypes opts of
                  []        -> concat  [parse pgf lang (optType opts) s | lang <- optLangs opts]
                  open_typs -> concat  [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts]
@@ -1063,17 +1060,17 @@ stringOpOptions = sort $ [
 treeOpOptions pgf = [(op,expl) | (op,(expl,Left  _)) <- allTreeOps pgf]
 treeOpFlags   pgf = [(op,expl) | (op,(expl,Right _)) <- allTreeOps pgf]

-translationQuiz :: Maybe Expr -> Maybe Probabilities -> Encoding -> 
+translationQuiz :: Maybe Expr -> Maybe Probabilities ->
                   PGF -> Language -> Language -> Type -> IO ()
-translationQuiz mex mprobs cod pgf ig og typ = do
+translationQuiz mex mprobs pgf ig og typ = do
  tts <- translationList mex mprobs pgf ig og typ infinity
-  mkQuiz cod "Welcome to GF Translation Quiz." tts
+  mkQuiz "Welcome to GF Translation Quiz." tts

-morphologyQuiz :: Maybe Expr -> Maybe Probabilities -> Encoding -> 
+morphologyQuiz :: Maybe Expr -> Maybe Probabilities -> 
                  PGF -> Language -> Type -> IO ()
-morphologyQuiz mex mprobs cod pgf ig typ = do
+morphologyQuiz mex mprobs pgf ig typ = do
  tts <- morphologyList mex mprobs pgf ig typ infinity
-  mkQuiz cod "Welcome to GF Morphology Quiz." tts
+  mkQuiz "Welcome to GF Morphology Quiz." tts

 -- | the maximal number of precompiled quiz problems
 infinity :: Int
--- a/src/compiler/GF/Command/Interpreter.hs
+++ b/src/compiler/GF/Command/Interpreter.hs
@@ -29,24 +29,24 @@ data CommandEnv = CommandEnv {
  expmacros     :: Map.Map String Expr
  }

-mkCommandEnv :: Encoding -> PGF -> CommandEnv
-mkCommandEnv enc pgf = 
+mkCommandEnv :: PGF -> CommandEnv
+mkCommandEnv pgf = 
  let mos = Map.fromList [(la,buildMorpho pgf la) | la <- languages pgf] in
-    CommandEnv pgf mos (allCommands enc (pgf, mos)) Map.empty Map.empty
+    CommandEnv pgf mos (allCommands (pgf, mos)) Map.empty Map.empty

 emptyCommandEnv :: CommandEnv
-emptyCommandEnv = mkCommandEnv UTF_8 emptyPGF
+emptyCommandEnv = mkCommandEnv emptyPGF

-interpretCommandLine :: (String -> String) -> CommandEnv -> String -> IO ()
-interpretCommandLine enc env line =
+interpretCommandLine :: CommandEnv -> String -> IO ()
+interpretCommandLine env line =
  case readCommandLine line of
    Just []    -> return ()
-    Just pipes -> mapM_ (interpretPipe enc env) pipes
+    Just pipes -> mapM_ (interpretPipe env) pipes
    Nothing    -> putStrLnFlush "command not parsed"

-interpretPipe enc env cs = do
+interpretPipe env cs = do
     v@(_,s) <- intercs ([],"") cs
-     putStrLnFlush $ enc s
+     putStrLnFlush s
     return v
  where
   intercs treess [] = return treess
@@ -57,14 +57,14 @@ interpretPipe enc env cs = do
     '%':f -> case Map.lookup f (commandmacros env) of
       Just css ->
         case getCommandTrees env False arg es of
-           Right es -> do mapM_ (interpretPipe enc env) (appLine es css) 
+           Right es -> do mapM_ (interpretPipe env) (appLine es css) 
                          return ([],[])
           Left msg -> do putStrLn ('\n':msg)
                          return ([],[])
       Nothing  -> do
         putStrLn $ "command macro " ++ co ++ " not interpreted"
         return ([],[])
-     _ -> interpret enc env es comm
+     _ -> interpret env es comm
   appLine es = map (map (appCommand es))

 -- macro definition applications: replace ?i by (exps !! i)
@@ -81,14 +81,14 @@ appCommand xs c@(Command i os arg) = case arg of
    EFun x     -> EFun x

 -- return the trees to be sent in pipe, and the output possibly printed
-interpret :: (String -> String) -> CommandEnv -> [Expr] -> Command -> IO CommandOutput
-interpret enc env trees comm = 
+interpret :: CommandEnv -> [Expr] -> Command -> IO CommandOutput
+interpret env trees comm = 
  case getCommand env trees comm of
    Left  msg               -> do putStrLn ('\n':msg)
                                  return ([],[])
    Right (info,opts,trees) -> do tss@(_,s) <- exec info opts trees
                                  if isOpt "tr" opts
-                                    then putStrLn (enc s)
+                                    then putStrLn s
                                    else return ()
                                  return tss

--- a/src/compiler/GF/Compile.hs
+++ b/src/compiler/GF/Compile.hs
@@ -12,7 +12,6 @@ import GF.Compile.Update
 import GF.Compile.Refresh

 import GF.Compile.Coding
-import GF.Text.UTF8 ----

 import GF.Grammar.Grammar
 import GF.Grammar.Lookup
@@ -82,7 +81,7 @@ compileSourceGrammar opts gr@(MGrammar ms) = do
 -- to output an intermediate stage
 intermOut :: Options -> Dump -> Doc -> IOE ()
 intermOut opts d doc
-  | dump opts d = ioeIO (hPutStrLn stderr (encodeUTF8 (render (text "\n\n--#" <+> text (show d) $$ doc))))
+  | dump opts d = ioeIO (hPutStrLn stderr (render (text "\n\n--#" <+> text (show d) $$ doc)))
  | otherwise   = return ()

 -- | the environment
@@ -162,7 +161,8 @@ compileOne opts env@(_,srcgr,_) file = do

       sm00 <- putpOpt ("- parsing" +++ file) ("- compiling" +++ file ++ "... ") $ 
                                           getSourceModule opts file
-       let sm0 = decodeStringsInModule sm00
+       enc <- ioeIO $ mkTextEncoding (renameEncoding (flag optEncoding (flagsModule sm00)))
+       let sm0 = decodeStringsInModule enc sm00

       intermOut opts DumpSource (ppModule Qualified sm0)

--- a/src/compiler/GF/Compile/Coding.hs
+++ b/src/compiler/GF/Compile/Coding.hs
@@ -8,12 +8,14 @@ import GF.Infra.Option
 import GF.Data.Operations

 import Data.Char
+import System.IO
+import qualified Data.ByteString.Char8 as BS

-encodeStringsInModule :: SourceModule -> SourceModule
-encodeStringsInModule = codeSourceModule (encodeUnicode UTF_8)
+encodeStringsInModule :: TextEncoding -> SourceModule -> SourceModule
+encodeStringsInModule enc = codeSourceModule (BS.unpack . encodeUnicode enc)

-decodeStringsInModule :: SourceModule -> SourceModule
-decodeStringsInModule mo = codeSourceModule (decodeUnicode (flag optEncoding (flagsModule mo))) mo
+decodeStringsInModule :: TextEncoding -> SourceModule -> SourceModule
+decodeStringsInModule enc mo = codeSourceModule (decodeUnicode enc . BS.pack) mo

 codeSourceModule :: (String -> String) -> SourceModule -> SourceModule
 codeSourceModule co (id,mo) = (id,replaceJudgements mo (mapTree codj (jments mo)))
--- a/src/compiler/GF/Compile/PGFtoHaskell.hs
+++ b/src/compiler/GF/Compile/PGFtoHaskell.hs
@@ -22,7 +22,6 @@ import PGF.Macros

 import GF.Data.Operations
 import GF.Infra.Option
-import GF.Text.UTF8

 import Data.List --(isPrefixOf, find, intersperse)
 import qualified Data.Map as Map
@@ -34,7 +33,7 @@ grammar2haskell :: Options
                -> String  -- ^ Module name.
                -> PGF
                -> String
-grammar2haskell opts name gr = encodeUTF8 $ foldr (++++) [] $  
+grammar2haskell opts name gr = foldr (++++) [] $  
  pragmas ++ haskPreamble name ++ [types, gfinstances gId lexical gr']
    where gr' = hSkeleton gr
          gadt = haskellOption opts HaskellGADT
--- a/src/compiler/GF/Compile/PGFtoJS.hs
+++ b/src/compiler/GF/Compile/PGFtoJS.hs
@@ -6,7 +6,6 @@ import qualified PGF.Macros as M
 import qualified GF.JavaScript.AbsJS as JS
 import qualified GF.JavaScript.PrintJS as JS

-import GF.Text.UTF8
 import GF.Data.ErrM
 import GF.Infra.Option

@@ -21,7 +20,7 @@ import qualified Data.IntMap as IntMap

 pgf2js :: PGF -> String
 pgf2js pgf =
-  encodeUTF8 $ JS.printTree $ JS.Program [JS.ElStmt $ JS.SDeclOrExpr $ JS.Decl [JS.DInit (JS.Ident n) grammar]]
+  JS.printTree $ JS.Program [JS.ElStmt $ JS.SDeclOrExpr $ JS.Decl [JS.DInit (JS.Ident n) grammar]]
 where
   n  = showCId $ absname pgf
   as = abstract pgf
--- a/src/compiler/GF/Compile/PGFtoProlog.hs
+++ b/src/compiler/GF/Compile/PGFtoProlog.hs
@@ -15,7 +15,6 @@ import PGF.Data
 import PGF.Macros

 import GF.Data.Operations
-import GF.Text.UTF8

 import qualified Data.Map as Map
 import Data.Char (isAlphaNum, isAsciiLower, isAsciiUpper, ord)
--- a/src/compiler/GF/Data/XML.hs
+++ b/src/compiler/GF/Data/XML.hs
@@ -7,7 +7,6 @@
 module GF.Data.XML (XML(..), Attr, comments, showXMLDoc, showsXMLDoc, showsXML, bottomUpXML) where

 import GF.Data.Utilities
-import GF.Text.UTF8

 data XML = Data String | CData String | Tag String [Attr] [XML] | ETag String [Attr] | Comment String | Empty
 deriving (Ord,Eq,Show)
@@ -21,7 +20,7 @@ showXMLDoc :: XML -> String
 showXMLDoc xml = showsXMLDoc xml ""

 showsXMLDoc :: XML -> ShowS
-showsXMLDoc xml = encodeUTF8 . showString header . showsXML xml
+showsXMLDoc xml = showString header . showsXML xml
  where header = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"

 showsXML :: XML -> ShowS
--- a/src/compiler/GF/Grammar/Parser.y
+++ b/src/compiler/GF/Grammar/Parser.y
@@ -441,9 +441,7 @@ Exp6
  | '?'                   { Meta 0 }
  | '[' ']'               { Empty }
  | '[' Ident Exps ']'    { foldl App (Vr (mkListId $2)) $3 }
-  | '[' String ']'        { case $2 of
-                              []   -> Empty
-                              str  -> foldr1 C (map K (words str)) }
+  | '[' String ']'        { K $2 }
  | '{' ListLocDef '}'    {% mkR $2 }
  | '<' ListTupleComp '>' { R (tuple2record $2) }
  | '<' Exp ':' Exp '>'   { Typed $2 $4      }
--- a/src/compiler/GF/Infra/Option.hs
+++ b/src/compiler/GF/Infra/Option.hs
@@ -3,7 +3,7 @@ module GF.Infra.Option
     -- * Option types
     Options, 
     Flags(..), 
-     Mode(..), Phase(..), Verbosity(..), Encoding(..), OutputFormat(..), 
+     Mode(..), Phase(..), Verbosity(..), OutputFormat(..), 
     SISRFormat(..), Optimization(..), CFGTransform(..), HaskellOption(..),
     Dump(..), Printer(..), Recomp(..),
     -- * Option parsing 
@@ -17,7 +17,7 @@ module GF.Infra.Option
     helpMessage,
     -- * Checking specific options
     flag, cfgTransform, haskellOption, readOutputFormat,
-     isLexicalCat, encodings,
+     isLexicalCat, renameEncoding,
     -- * Setting specific options
     setOptimization, setCFGTransform,
     -- * Convenience methods for checking options    
@@ -25,12 +25,13 @@ module GF.Infra.Option
    ) where

 import Control.Monad
-import Data.Char (toLower)
+import Data.Char (toLower, isDigit)
 import Data.List
 import Data.Maybe
 import GF.Infra.GetOpt
 --import System.Console.GetOpt
 import System.FilePath
+import System.IO

 import GF.Data.ErrM

@@ -77,9 +78,6 @@ data Verbosity = Quiet | Normal | Verbose | Debug
 data Phase = Preproc | Convert | Compile | Link
  deriving (Show,Eq,Ord)

-data Encoding = UTF_8 | ISO_8859_1 | CP_1250 | CP_1251 | CP_1252 | CP_1254
-  deriving (Eq,Ord)
-
 data OutputFormat = FmtPGFPretty
                  | FmtJavaScript 
                  | FmtHaskell 
@@ -161,7 +159,7 @@ data Flags = Flags {
      optCncName         :: Maybe String,
      optResName         :: Maybe String,
      optPreprocessors   :: [String],
-      optEncoding        :: Encoding,
+      optEncoding        :: String,
      optOptimizations   :: Set Optimization,
      optCFGTransforms   :: Set CFGTransform,
      optLibraryPath     :: [FilePath],
@@ -207,7 +205,7 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o)
 -- | Pretty-print the options that are preserved in .gfo files.
 optionsGFO :: Options -> [(String,String)]
 optionsGFO opts = optionsPGF opts
-      ++ [("coding", show (flag optEncoding opts))]
+      ++ [("coding", flag optEncoding opts)]

 -- | Pretty-print the options that are preserved in .pgf files.
 optionsPGF :: Options -> [(String,String)]
@@ -260,7 +258,7 @@ defaultFlags = Flags {
      optCncName         = Nothing,
      optResName         = Nothing,
      optPreprocessors   = [],
-      optEncoding        = ISO_8859_1,
+      optEncoding        = "latin1",
      optOptimizations   = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize],
      optCFGTransforms   = Set.fromList [CFGRemoveCycles, CFGBottomUpFilter, 
                                         CFGTopDownFilter, CFGMergeIdentical],
@@ -343,8 +341,7 @@ optDescr =
                 (unlines ["Use CMD to preprocess input files.",
                           "Multiple preprocessors can be used by giving this option multiple times."]),
     Option [] ["coding"] (ReqArg coding "ENCODING") 
-                ("Character encoding of the source grammar, ENCODING = "
-                 ++ concat (intersperse " | " (map fst encodings)) ++ "."),
+                ("Character encoding of the source grammar, ENCODING = utf8, latin1, cp1251, ..."),
     Option [] ["startcat"] (ReqArg startcat "CAT") "Grammar start category.",
     Option [] ["language"] (ReqArg language "LANG") "Set the speech language flag to LANG in the generated grammar.",
     Option [] ["lexer"] (ReqArg lexer "LEXER") "Use lexer LEXER.",
@@ -400,9 +397,7 @@ optDescr =
       addLibDir   x = set $ \o -> o { optLibraryPath = x:optLibraryPath o }
       setLibPath  x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x }
       preproc     x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] }
-       coding      x = case lookup x encodings of
-                         Just c  -> set $ \o -> o { optEncoding = c }
-                         Nothing -> fail $ "Unknown character encoding: " ++ x
+       coding      x = set $ \o -> o { optEncoding = x }
       startcat    x = set $ \o -> o { optStartCat = Just x }
       language    x = set $ \o -> o { optSpeechLanguage = Just x }
       lexer       x = set $ \o -> o { optLexer = Just x }
@@ -483,18 +478,14 @@ haskellOptionNames =
     ("gadt",     HaskellGADT),
     ("lexical",  HaskellLexical)]

-encodings :: [(String,Encoding)]
-encodings = 
-    [("utf8",   UTF_8),
-     ("cp1250", CP_1250),
-     ("cp1251", CP_1251),
-     ("cp1252", CP_1252),
-     ("cp1254", CP_1254),
-     ("latin1", ISO_8859_1)
-    ]
-
-instance Show Encoding where
-    show = lookupShow encodings
+-- | This is for bacward compatibility. Since GHC 6.12 we
+-- started using the native Unicode support in GHC but it
+-- uses different names for the code pages.
+renameEncoding :: String -> String
+renameEncoding "utf8"                      = "UTF-8"
+renameEncoding "latin1"                    = "CP1252"
+renameEncoding ('c':'p':s) | all isDigit s = 'C':'P':s
+renameEncoding s                           = s

 lookupShow :: Eq a => [(String,a)] -> a -> String
 lookupShow xs z = fromMaybe "lookupShow" $ lookup z [(y,x) | (x,y) <- xs]
--- a/src/compiler/GF/Quiz.hs
+++ b/src/compiler/GF/Quiz.hs
@@ -23,7 +23,6 @@ import PGF.Linearize
 import GF.Data.Operations
 import GF.Infra.UseIO
 import GF.Infra.Option
-import GF.Text.Coding
 import PGF.Probabilistic

 import System.Random
@@ -33,9 +32,9 @@ import Data.List (nub)

 -- generic quiz function

-mkQuiz :: Encoding -> String -> [(String,[String])] -> IO ()
-mkQuiz cod msg tts = do
-  let qas = [ (encodeUnicode cod q, mkAnswer cod as) | (q,as) <- tts]
+mkQuiz :: String -> [(String,[String])] -> IO ()
+mkQuiz msg tts = do
+  let qas = [(q, mkAnswer as) | (q,as) <- tts]
  teachDialogue qas msg

 translationList :: 
@@ -62,14 +61,13 @@ morphologyList mex mprobs pgf ig typ number = do
           (pwss@(pws0:_),i) <- zip ss forms, let ws = map (\pws -> snd (pws !! i)) pwss]

 -- | compare answer to the list of right answers, increase score and give feedback 
-mkAnswer :: Encoding -> [String] -> String -> (Integer, String) 
-mkAnswer cod as s = 
+mkAnswer :: [String] -> String -> (Integer, String) 
+mkAnswer as s = 
  if (elem (norm s) as) 
     then (1,"Yes.") 
-     else (0,"No, not" +++ s ++ ", but" ++++ enc (unlines as))
+     else (0,"No, not" +++ s ++ ", but" ++++ unlines as)
 where
-   norm = unwords . words . decodeUnicode cod
-   enc = encodeUnicode cod
+   norm = unwords . words

 norml = unwords . words

--- a/src/compiler/GF/Text/CP1250.hs
+++ b/src/compiler/GF/Text/CP1250.hs
@@ -1,91 +0,0 @@
-----------------------------------------------------------------------------
-- |
-- Module      : GF.Text.CP1250
-- Maintainer  : Krasimir Angelov
--
-- cp1250 is a code page used under Microsoft Windows to represent texts 
-- in Central European and Eastern European languages that use Latin script, 
-- such as Polish, Czech, Slovak, Hungarian, Slovene, Bosnian, Croatian, 
-- Serbian (Latin script), Romanian and Albanian. It may also be used with 
-- the German language; German-language texts encoded with cp1250 and cp1252 
-- are identical.
--
-----------------------------------------------------------------------------
-
-module GF.Text.CP1250 where
-
-import Data.Char
-
-decodeCP1250 = map convert where
-  convert c
-   | c == '\x80'                = chr 0x20AC
-   | c == '\x82'                = chr 0x201A
-   | c == '\x84'                = chr 0x201E
-   | c == '\x85'                = chr 0x2026
-   | c == '\x86'                = chr 0x2020
-   | c == '\x87'                = chr 0x2021
-   | c == '\x89'                = chr 0x2030
-   | c == '\x8A'                = chr 0x0160
-   | c == '\x8B'                = chr 0x2039
-   | c == '\x8C'                = chr 0x015A
-   | c == '\x8D'                = chr 0x0164
-   | c == '\x8E'                = chr 0x017D
-   | c == '\x8F'                = chr 0x0179
-   | c == '\x91'                = chr 0x2018
-   | c == '\x92'                = chr 0x2019
-   | c == '\x93'                = chr 0x201C
-   | c == '\x94'                = chr 0x201D
-   | c == '\x95'                = chr 0x2022
-   | c == '\x96'                = chr 0x2013
-   | c == '\x97'                = chr 0x2014
-   | c == '\x99'                = chr 0x2122
-   | c == '\x9A'                = chr 0x0161
-   | c == '\x9B'                = chr 0x203A
-   | c == '\x9C'                = chr 0x015B
-   | c == '\x9D'                = chr 0x0165
-   | c == '\x9E'                = chr 0x017E
-   | c == '\x9F'                = chr 0x017A
-   | c == '\xA1'                = chr 0x02C7
-   | c == '\xA5'                = chr 0x0104
-   | c == '\xB9'                = chr 0x0105
-   | c == '\xBC'                = chr 0x013D
-   | c == '\xBE'                = chr 0x013E
-   | otherwise                  = c
-
-
-encodeCP1250 = map convert where
-  convert c
-   | oc == 0x20AC               = '\x80'
-   | oc == 0x201A               = '\x82'
-   | oc == 0x201E               = '\x84'
-   | oc == 0x2026               = '\x85'
-   | oc == 0x2020               = '\x86'
-   | oc == 0x2021               = '\x87'
-   | oc == 0x2030               = '\x89'
-   | oc == 0x0160               = '\x8A'
-   | oc == 0x2039               = '\x8B'
-   | oc == 0x015A               = '\x8C'
-   | oc == 0x0164               = '\x8D'
-   | oc == 0x017D               = '\x8E'
-   | oc == 0x0179               = '\x8F'
-   | oc == 0x2018               = '\x91'
-   | oc == 0x2019               = '\x92'
-   | oc == 0x201C               = '\x93'
-   | oc == 0x201D               = '\x94'
-   | oc == 0x2022               = '\x95'
-   | oc == 0x2013               = '\x96'
-   | oc == 0x2014               = '\x97'
-   | oc == 0x2122               = '\x99'
-   | oc == 0x0161               = '\x9A'
-   | oc == 0x203A               = '\x9B'
-   | oc == 0x015B               = '\x9C'
-   | oc == 0x0165               = '\x9D'
-   | oc == 0x017E               = '\x9E'
-   | oc == 0x017A               = '\x9F'
-   | oc == 0x02C7               = '\xA1'
-   | oc == 0x0104               = '\xA5'
-   | oc == 0x0105               = '\xB9'
-   | oc == 0x013D               = '\xBC'
-   | oc == 0x013E               = '\xBE'
-   | otherwise                  = c
-   where oc = ord c
--- a/src/compiler/GF/Text/CP1251.hs
+++ b/src/compiler/GF/Text/CP1251.hs
@@ -1,86 +0,0 @@
-----------------------------------------------------------------------------
-- |
-- Module      : GF.Text.CP1251
-- Maintainer  : Krasimir Angelov
--
-- cp1251 is a popular 8-bit character encoding, designed to cover languages
-- that use the Cyrillic alphabet such as Russian, Bulgarian, Serbian Cyrillic
-- and other languages. It is the most widely used for encoding the Bulgarian,
-- Serbian and Macedonian languages.
--
-----------------------------------------------------------------------------
-
-module GF.Text.CP1251 where
-
-import Data.Char
-
-decodeCP1251 = map convert where
-  convert c
-   | c >= '\xC0' && c <= '\xFF' = chr (ord c + (0x410-0xC0))
-   | c == '\xA8'                = chr 0x401      -- cyrillic capital letter lo
-   | c == '\x80'                = chr 0x402
-   | c == '\x81'                = chr 0x403
-   | c == '\xAA'                = chr 0x404
-   | c == '\xBD'                = chr 0x405
-   | c == '\xB2'                = chr 0x406
-   | c == '\xAF'                = chr 0x407
-   | c == '\xA3'                = chr 0x408
-   | c == '\x8A'                = chr 0x409
-   | c == '\x8C'                = chr 0x40A
-   | c == '\x8E'                = chr 0x40B
-   | c == '\x8D'                = chr 0x40C
-   | c == '\xA1'                = chr 0x40E
-   | c == '\x8F'                = chr 0x40F
-   | c == '\xB8'                = chr 0x451      -- cyrillic small   letter lo
-   | c == '\x90'                = chr 0x452
-   | c == '\x83'                = chr 0x453
-   | c == '\xBA'                = chr 0x454
-   | c == '\xBE'                = chr 0x455
-   | c == '\xB3'                = chr 0x456
-   | c == '\xBF'                = chr 0x457
-   | c == '\xBC'                = chr 0x458
-   | c == '\x9A'                = chr 0x459
-   | c == '\x9C'                = chr 0x45A
-   | c == '\x9E'                = chr 0x45B
-   | c == '\x9D'                = chr 0x45C
-   | c == '\xA2'                = chr 0x45E
-   | c == '\x9F'                = chr 0x45F
-   | c == '\xA5'                = chr 0x490
-   | c == '\xB4'                = chr 0x491
-   | otherwise                  = c
-
-encodeCP1251 = map convert where
-  convert c
-   | oc >= 0x410 && oc <= 0x44F = chr (oc - (0x410-0xC0))
-   | oc == 0x401                = '\xA8'         -- cyrillic capital letter lo
-   | oc == 0x402                = '\x80'
-   | oc == 0x403                = '\x81'
-   | oc == 0x404                = '\xAA'
-   | oc == 0x405                = '\xBD'
-   | oc == 0x406                = '\xB2'
-   | oc == 0x407                = '\xAF'
-   | oc == 0x408                = '\xA3'
-   | oc == 0x409                = '\x8A'
-   | oc == 0x40A                = '\x8C'
-   | oc == 0x40B                = '\x8E'
-   | oc == 0x40C                = '\x8D'
-   | oc == 0x40E                = '\xA1'
-   | oc == 0x40F                = '\x8F'
-   | oc == 0x451                = '\xB8'         -- cyrillic small   letter lo
-   | oc == 0x452                = '\x90'
-   | oc == 0x453                = '\x83'
-   | oc == 0x454                = '\xBA'
-   | oc == 0x455                = '\xBE'
-   | oc == 0x456                = '\xB3'
-   | oc == 0x457                = '\xBF'
-   | oc == 0x458                = '\xBC'
-   | oc == 0x459                = '\x9A'
-   | oc == 0x45A                = '\x9C'
-   | oc == 0x45B                = '\x9E'
-   | oc == 0x45C                = '\x9D'
-   | oc == 0x45E                = '\xA2'
-   | oc == 0x45F                = '\x9F'
-   | oc == 0x490                = '\xA5'
-   | oc == 0x491                = '\xB4'
-   | otherwise                  = c
-   where oc = ord c
--- a/src/compiler/GF/Text/CP1252.hs
+++ b/src/compiler/GF/Text/CP1252.hs
@@ -1,17 +0,0 @@
-----------------------------------------------------------------------------
-- |
-- Module      : GF.Text.CP1252
-- Maintainer  : Krasimir Angelov
--
-- cp1252 is a character encoding of the Latin alphabet, used by default in 
-- the legacy components of Microsoft Windows in English and some other 
-- Western languages.
--
-----------------------------------------------------------------------------
-
-module GF.Text.CP1252 where
-
-import Data.Char
-
-decodeCP1252 = map id
-encodeCP1252 = map (\x -> if x <= '\255' then x else '?')
--- a/src/compiler/GF/Text/CP1254.hs
+++ b/src/compiler/GF/Text/CP1254.hs
@@ -1,84 +0,0 @@
-----------------------------------------------------------------------------
-- |
-- Module      : GF.Text.CP1254
-- Maintainer  : Krasimir Angelov
--
-- cp1254 is a code page used under Microsoft Windows to write Turkish.
-- Characters with codepoints A0 through FF are compatible with ISO 8859-9.
--
-----------------------------------------------------------------------------
-
-module GF.Text.CP1254 where
-
-import Data.Char
-
-decodeCP1254 = map convert where
-  convert c
-   | c == '\x80'                = chr 0x20AC
-   | c == '\x82'                = chr 0x201A
-   | c == '\x83'                = chr 0x192
-   | c == '\x84'                = chr 0x201E
-   | c == '\x85'                = chr 0x2026
-   | c == '\x86'                = chr 0x2020
-   | c == '\x87'                = chr 0x2021
-   | c == '\x88'                = chr 0x2C6
-   | c == '\x89'                = chr 0x2030
-   | c == '\x8A'                = chr 0x160
-   | c == '\x8B'                = chr 0x2039
-   | c == '\x8C'                = chr 0x152
-   | c == '\x91'                = chr 0x2018
-   | c == '\x92'                = chr 0x2019
-   | c == '\x93'                = chr 0x201C
-   | c == '\x94'                = chr 0x201D
-   | c == '\x95'                = chr 0x2022
-   | c == '\x96'                = chr 0x2013
-   | c == '\x97'                = chr 0x2014
-   | c == '\x98'                = chr 0x2DC
-   | c == '\x99'                = chr 0x2122
-   | c == '\x9A'                = chr 0x161
-   | c == '\x9B'                = chr 0x203A
-   | c == '\x9C'                = chr 0x153
-   | c == '\x9F'                = chr 0x178
-   | c == '\xD0'                = chr 0x11E
-   | c == '\xDD'                = chr 0x130
-   | c == '\xDE'                = chr 0x15E
-   | c == '\xF0'                = chr 0x11F
-   | c == '\xFD'                = chr 0x131
-   | c == '\xFE'                = chr 0x15F
-   | otherwise                  = c
-
-encodeCP1254 = map convert where
-  convert c
-   | oc == 0x20AC               = '\x80'
-   | oc == 0x201A               = '\x82'
-   | oc == 0x192                = '\x83'
-   | oc == 0x201E               = '\x84'
-   | oc == 0x2026               = '\x85'
-   | oc == 0x2020               = '\x86'
-   | oc == 0x2021               = '\x87'
-   | oc == 0x2C6                = '\x88'
-   | oc == 0x2030               = '\x89'
-   | oc == 0x160                = '\x8A'
-   | oc == 0x2039               = '\x8B'
-   | oc == 0x152                = '\x8C'
-   | oc == 0x2018               = '\x91'
-   | oc == 0x2019               = '\x92'
-   | oc == 0x201C               = '\x93'
-   | oc == 0x201D               = '\x94'
-   | oc == 0x2022               = '\x95'
-   | oc == 0x2013               = '\x96'
-   | oc == 0x2014               = '\x97'
-   | oc == 0x2DC                = '\x98'
-   | oc == 0x2122               = '\x99'
-   | oc == 0x161                = '\x9A'
-   | oc == 0x203A               = '\x9B'
-   | oc == 0x153                = '\x9C'
-   | oc == 0x178                = '\x9F'
-   | oc == 0x11E                = '\xD0'
-   | oc == 0x130                = '\xDD'
-   | oc == 0x15E                = '\xDE'
-   | oc == 0x11F                = '\xF0'
-   | oc == 0x131                = '\xFD'
-   | oc == 0x15F                = '\xFE'
-   | otherwise                  = c
-   where oc = ord c
--- a/src/compiler/GF/Text/Coding.hs
+++ b/src/compiler/GF/Text/Coding.hs
@@ -1,24 +1,69 @@
 module GF.Text.Coding where

-import GF.Infra.Option
-import GF.Text.UTF8
-import GF.Text.CP1250
-import GF.Text.CP1251
-import GF.Text.CP1252
-import GF.Text.CP1254
+import qualified Data.ByteString as BS
+import Data.ByteString.Internal
+import GHC.IO
+import GHC.IO.Buffer
+import GHC.IO.Encoding
+import GHC.IO.Exception
+import Control.Monad

-encodeUnicode e = case e of
-  UTF_8   -> encodeUTF8
-  CP_1250 -> encodeCP1250
-  CP_1251 -> encodeCP1251
-  CP_1252 -> encodeCP1252
-  CP_1254 -> encodeCP1254
-  _       -> id
+encodeUnicode :: TextEncoding -> String -> ByteString
+encodeUnicode enc s =
+  unsafePerformIO $ do
+    let len = length s
+    cbuf0 <- newCharBuffer (len*4) ReadBuffer
+    foldM (\i c -> writeCharBuf (bufRaw cbuf0) i c) 0 s
+    let cbuf = cbuf0{bufR=len}
+    case enc of
+      TextEncoding {mkTextEncoder=mk} -> do encoder <- mk
+                                            bss <- translate (encode encoder) cbuf
+                                            close encoder
+                                            return (BS.concat bss)
+  where
+    translate cod cbuf
+      | i < w     = do bbuf <- newByteBuffer 128 WriteBuffer
+                       (cbuf,bbuf) <- cod cbuf bbuf
+                       if isEmptyBuffer bbuf
+                         then ioe_invalidCharacter
+                         else do let bs = PS (bufRaw bbuf) (bufL bbuf) (bufR bbuf-bufL bbuf)
+                                 bss <- translate cod cbuf
+                                 return (bs:bss)
+      | otherwise = return []
+      where
+        i = bufL cbuf
+        w = bufR cbuf

-decodeUnicode e = case e of
-  UTF_8   -> decodeUTF8
-  CP_1250 -> decodeCP1250
-  CP_1251 -> decodeCP1251
-  CP_1252 -> decodeCP1252
-  CP_1254 -> decodeCP1254
-  _       -> id
+decodeUnicode :: TextEncoding -> ByteString -> String
+decodeUnicode enc (PS fptr l len) =
+  unsafePerformIO $ do
+    let bbuf = Buffer{bufRaw=fptr, bufState=ReadBuffer, bufSize=len, bufL=l, bufR=l+len}
+    cbuf <- newCharBuffer 128 WriteBuffer
+    case enc of
+      TextEncoding {mkTextDecoder=mk} -> do decoder <- mk
+                                            s <- translate (encode decoder) bbuf cbuf
+                                            close decoder
+                                            return s
+  where
+    translate cod bbuf cbuf
+      | i < w     = do (bbuf,cbuf) <- cod bbuf cbuf
+                       if isEmptyBuffer cbuf
+                         then ioe_invalidCharacter
+                         else unpack cod bbuf cbuf
+      | otherwise = return []
+      where
+        i = bufL bbuf
+        w = bufR bbuf
+                                            
+    unpack cod bbuf cbuf
+      | i < w     = do (c,i') <- readCharBuf (bufRaw cbuf) i
+                       cs <- unpack cod bbuf cbuf{bufL=i'}
+                       return (c:cs)
+      | otherwise = translate cod bbuf cbuf{bufL=0,bufR=0}
+      where
+        i = bufL cbuf
+        w = bufR cbuf
+
+ioe_invalidCharacter = ioException
+   (IOError Nothing InvalidArgument ""
+        ("invalid byte sequence for this encoding") Nothing Nothing)
--- a/src/compiler/GF/Text/Lexing.hs
+++ b/src/compiler/GF/Text/Lexing.hs
@@ -1,8 +1,6 @@
 module GF.Text.Lexing (stringOp,opInEnv) where

 import GF.Text.Transliterations
-import GF.Text.UTF8
-import GF.Text.CP1251

 import Data.Char
 import Data.List (intersperse)
@@ -23,10 +21,6 @@ stringOp name = case name of
  "unlexmixed" -> Just $ capitInit . appUnlexer (unlexMixed . unquote)
  "unwords"    -> Just $ appUnlexer unwords
  "to_html"    -> Just wrapHTML
-  "to_utf8"    -> Just encodeUTF8
-  "from_utf8"  -> Just decodeUTF8
-  "to_cp1251"  -> Just encodeCP1251
-  "from_cp1251" -> Just decodeCP1251
  _ -> transliterate name

 -- perform op in environments beg--end, t.ex. between "--"
--- a/src/compiler/GF/Text/Transliterations.hs
+++ b/src/compiler/GF/Text/Transliterations.hs
@@ -5,8 +5,6 @@ module GF.Text.Transliterations (
  transliterationPrintNames
  ) where

-import GF.Text.UTF8
-
 import Data.Char
 import Numeric
 import qualified Data.Map as Map
--- a/src/compiler/GF/Text/UTF8.hs
+++ b/src/compiler/GF/Text/UTF8.hs
@@ -1,48 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module      : UTF8
-- Maintainer  : AR
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:23:42 $ 
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.5 $
--
-- From the Char module supplied with HBC.
-- code by Thomas Hallgren (Jul 10 1999)
-----------------------------------------------------------------------------
-
-module GF.Text.UTF8 (decodeUTF8, encodeUTF8) where
-
-- | Take a Unicode string and encode it as a string
-- with the UTF8 method.
-decodeUTF8 :: String -> String
-decodeUTF8 "" = ""
-decodeUTF8 (c:cs) | c < '\x80' = c : decodeUTF8 cs
-decodeUTF8 (c:c':cs) | '\xc0' <= c  && c  <= '\xdf' && 
-		      '\x80' <= c' && c' <= '\xbf' =
-	toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : decodeUTF8 cs
-decodeUTF8 (c:c':c'':cs) | '\xe0' <= c   && c   <= '\xef' && 
-		          '\x80' <= c'  && c'  <= '\xbf' &&
-		          '\x80' <= c'' && c'' <= '\xbf' =
-	toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : decodeUTF8 cs
-decodeUTF8 s = s ---- AR workaround 22/6/2006
----decodeUTF8 _ = error "UniChar.decodeUTF8: bad data"
-
-encodeUTF8 :: String -> String
-encodeUTF8 "" = ""
-encodeUTF8 (c:cs) =
-	if c > '\x0000' && c < '\x0080' then
-	    c : encodeUTF8 cs
-	else if c < toEnum 0x0800 then
-	    let i = fromEnum c
-	    in  toEnum (0xc0 + i `div` 0x40) : 
-	        toEnum (0x80 + i `mod` 0x40) : 
-		encodeUTF8 cs
-	else
-	    let i = fromEnum c
-	    in  toEnum (0xe0 + i `div` 0x1000) : 
-	        toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) : 
-		toEnum (0x80 + i `mod` 0x40) : 
-		encodeUTF8 cs
--- a/src/compiler/GFC.hs
+++ b/src/compiler/GFC.hs
@@ -17,6 +17,7 @@ import Data.Maybe
 import Data.Binary
 import System.FilePath
 import System.IO
+import Control.Exception


 mainGFC :: Options -> [FilePath] -> IOE ()
@@ -81,8 +82,8 @@ writeOutput opts file str =
    do let path = case flag optOutputDir opts of
                    Nothing  -> file
                    Just dir -> dir </> file
-       writeOutputFile opts path str
-
-writeOutputFile :: Options -> FilePath -> String -> IOE ()
-writeOutputFile opts outfile output = 
-      do putPointE Normal opts ("Writing " ++ outfile ++ "...") $ ioeIO $ writeFile outfile output
+       putPointE Normal opts ("Writing " ++ path ++ "...") $ ioeIO $
+         bracket
+           (openFile path WriteMode)
+           (hClose)
+           (\h -> hSetEncoding h utf8 >> hPutStr h str)
--- a/src/compiler/GFI.hs
+++ b/src/compiler/GFI.hs
@@ -21,7 +21,6 @@ import GF.Infra.Option
 import GF.Infra.Modules (greatestResource, modules, emptyModInfo)
 import GF.System.Readline

-import GF.Text.Coding
 import GF.Compile.Coding

 import PGF
@@ -34,6 +33,7 @@ import Data.List(isPrefixOf)
 import qualified Data.Map as Map
 import qualified Data.ByteString.Char8 as BS
 import qualified Text.ParserCombinators.ReadP as RP
+import System.IO
 import System.Cmd
 import System.CPUTime
 import System.Directory
@@ -86,9 +86,7 @@ loop opts gfenv0 = do
  s0 <- fetch 
  let gfenv = gfenv0 {history = s0 : history gfenv0}
  let 
-    enc = encode gfenv
-    s = decode gfenv s0
-    pwords = case words s of
+    pwords = case words s0 of
      w:ws -> getCommandOp w :ws
      ws -> ws
  
@@ -130,8 +128,8 @@ loop opts gfenv0 = do
             case runP pExp (BS.pack s) of
               Left (_,msg) -> putStrLn msg
               Right t      -> case checkComputeTerm sgr (codeTerm (decode gfenv) (L (0,0) t)) of
-                                 Ok  x -> putStrLn $ enc (showTerm sgr style q x)
-                                 Bad s -> putStrLn $ enc s
+                                 Ok  x -> putStrLn $ showTerm sgr style q x
+                                 Bad s -> putStrLn $ s
             loopNewCPU gfenv
          "dg":ws -> do
             let stop = case ws of
@@ -141,7 +139,7 @@ loop opts gfenv0 = do
             putStrLn "wrote graph in file _gfdepgraph.dot"
             loopNewCPU gfenv
          "eh":w:_ -> do
-             cs <- readFile w >>= return . map (interpretCommandLine enc env) . lines
+             cs <- readFile w >>= return . map (interpretCommandLine env) . lines
             loopNewCPU gfenv

          "i":args -> do
@@ -179,25 +177,28 @@ loop opts gfenv0 = do
               _ -> putStrLn "value definition not parsed" >> loopNewCPU gfenv

          "ph":_ -> 
-            mapM_ (putStrLn . enc) (reverse (history gfenv0)) >> loopNewCPU gfenv
-          "se":c:_ -> 
-             case lookup c encodings of
-               Just cod -> do
+            mapM_ putStrLn (reverse (history gfenv0)) >> loopNewCPU gfenv
+          "se":c:_ -> do
+             let cod = renameEncoding c
 #ifdef mingw32_HOST_OS
-                              case c of
-                                'c':'p':c -> case reads c of
-                                               [(cp,"")] -> setConsoleCP cp >> setConsoleOutputCP cp
-                                               _         -> return ()
-                                "utf8"    -> setConsoleCP 65001 >> setConsoleOutputCP 65001
-                                _         -> return ()
+             case cod of
+               'C':'P':c -> case reads c of
+                              [(cp,"")] -> do setConsoleCP cp
+                                              setConsoleOutputCP cp
+                              _         -> return ()
+               "UTF-8"   -> do setConsoleCP 65001
+                               setConsoleOutputCP 65001
+               _         -> return ()
 #endif
-                              loopNewCPU $ gfenv {coding = cod}
-               Nothing  -> do putStrLn "unknown encoding"
-                              loopNewCPU gfenv
+             enc <- mkTextEncoding cod
+             hSetEncoding stdin  enc
+             hSetEncoding stdout enc
+             hSetEncoding stderr enc
+             loopNewCPU gfenv

  -- ordinary commands, working on CommandEnv
          _ -> do
-            interpretCommandLine enc env s
+            interpretCommandLine env s0
            loopNewCPU gfenv
 --        gfenv' <- return $ either (const gfenv) id r
        gfenv' <- either (\e -> (print e >> return gfenv)) return r
@@ -215,7 +216,7 @@ importInEnv gfenv opts files
           if (verbAtLeast opts Normal)
             then putStrLnFlush $ unwords $ "\nLanguages:" : map showCId (languages pgf1)
             else return ()
-           return $ gfenv { commandenv = mkCommandEnv (coding gfenv) pgf1 }
+           return $ gfenv { commandenv = mkCommandEnv pgf1 }

 tryGetLine = do
  res <- try getLine
@@ -252,24 +253,16 @@ data GFEnv = GFEnv {
  sourcegrammar :: SourceGrammar, -- gfo grammar -retain
  commandenv :: CommandEnv,
  history    :: [String],
-  cputime    :: Integer,
-  coding     :: Encoding
+  cputime    :: Integer
  }

 emptyGFEnv :: IO GFEnv
 emptyGFEnv = do
-#ifdef mingw32_HOST_OS
-  codepage <- getACP
-  let coding = fromMaybe UTF_8 (lookup ("cp"++show codepage) encodings)
-#else
-  let coding = UTF_8
-#endif
-  return $ GFEnv emptySourceGrammar{modules=[(identW,emptyModInfo)]} (mkCommandEnv coding emptyPGF) [] 0 coding
+  return $ GFEnv emptySourceGrammar{modules=[(identW,emptyModInfo)]} (mkCommandEnv emptyPGF) [] 0

-encode = encodeUnicode . coding 
-decode = decodeUnicode . coding
+decode _ = id -- decodeUnicode . coding

-wordCompletion gfenv line0 prefix0 p =
+wordCompletion gfenv line prefix p =
  case wc_type (take p line) of
    CmplCmd pref
      -> ret ' ' [name | name <- Map.keys (commands cmdEnv), isPrefixOf pref name]
@@ -280,7 +273,7 @@ wordCompletion gfenv line0 prefix0 p =
                              in case loop state0 ws of
                                   Nothing    -> ret ' ' []
                                   Just state -> let compls = getCompletions state prefix
-                                                 in ret ' ' (map (encode gfenv) (Map.keys compls))
+                                                 in ret ' ' (Map.keys compls)
              Left (_ :: SomeException) -> ret ' ' []
    CmplOpt (Just (Command n _ _)) pref
      -> case Map.lookup n (commands cmdEnv) of
@@ -298,9 +291,6 @@ wordCompletion gfenv line0 prefix0 p =
              Left (_ :: SomeException) -> ret ' ' []
    _ -> ret ' ' []
  where
-    line   = decode gfenv line0
-    prefix = decode gfenv prefix0
-
    pgf    = multigrammar cmdEnv
    cmdEnv = commandenv gfenv
    optLang opts = valCIdOpts "lang" (head (languages pgf)) opts