use the native unicode support from GHC 6.12

This commit is contained in:
krasimir
2010-04-19 09:38:36 +00:00
parent 8b5827fc89
commit 6313244eac
23 changed files with 177 additions and 490 deletions

View File

@@ -89,7 +89,6 @@ executable gf
GF.JavaScript.AbsJS GF.JavaScript.AbsJS
GF.JavaScript.PrintJS GF.JavaScript.PrintJS
GF.Infra.CompactPrint GF.Infra.CompactPrint
GF.Text.UTF8
GF.Data.TrieMap GF.Data.TrieMap
GF.Data.Utilities GF.Data.Utilities
GF.Data.SortedList GF.Data.SortedList

View File

@@ -24,6 +24,10 @@ main = do
codepage <- getACP codepage <- getACP
setConsoleCP codepage setConsoleCP codepage
setConsoleOutputCP codepage setConsoleOutputCP codepage
enc <- mkTextEncoding ("CP"++show codepage)
hSetEncoding stdin enc
hSetEncoding stdout enc
hSetEncoding stderr enc
#endif #endif
args <- getArgs args <- getArgs
case parseOptions args of case parseOptions args of

View File

@@ -24,7 +24,7 @@ import PGF.Probabilistic -- (getProbsFromFile,prProbabilities,defaultProbabiliti
import PGF.Generate (generateRandomFrom) ---- import PGF.Generate (generateRandomFrom) ----
import GF.Compile.Export import GF.Compile.Export
import GF.Compile.ExampleBased import GF.Compile.ExampleBased
import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..)) import GF.Infra.Option (noOptions, readOutputFormat)
import GF.Infra.UseIO import GF.Infra.UseIO
import GF.Data.ErrM ---- import GF.Data.ErrM ----
import GF.Command.Abstract import GF.Command.Abstract
@@ -36,7 +36,6 @@ import GF.Quiz
import GF.Command.TreeOperations ---- temporary place for typecheck and compute import GF.Command.TreeOperations ---- temporary place for typecheck and compute
import GF.Data.Operations import GF.Data.Operations
import GF.Text.Coding
import Data.List import Data.List
import Data.Maybe import Data.Maybe
@@ -77,10 +76,10 @@ emptyCommandInfo = CommandInfo {
lookCommand :: String -> Map.Map String CommandInfo -> Maybe CommandInfo lookCommand :: String -> Map.Map String CommandInfo -> Maybe CommandInfo
lookCommand = Map.lookup lookCommand = Map.lookup
commandHelpAll :: Encoding -> PGFEnv -> [Option] -> String commandHelpAll :: PGFEnv -> [Option] -> String
commandHelpAll cod pgf opts = unlines commandHelpAll pgf opts = unlines
[commandHelp (isOpt "full" opts) (co,info) [commandHelp (isOpt "full" opts) (co,info)
| (co,info) <- Map.assocs (allCommands cod pgf)] | (co,info) <- Map.assocs (allCommands pgf)]
commandHelp :: Bool -> (String,CommandInfo) -> String commandHelp :: Bool -> (String,CommandInfo) -> String
commandHelp full (co,info) = unlines $ [ commandHelp full (co,info) = unlines $ [
@@ -120,8 +119,8 @@ commandHelpTags full (co,info) = unlines $ [
type PGFEnv = (PGF, Map.Map Language Morpho) type PGFEnv = (PGF, Map.Map Language Morpho)
-- this list must no more be kept sorted by the command name -- this list must no more be kept sorted by the command name
allCommands :: Encoding -> PGFEnv -> Map.Map String CommandInfo allCommands :: PGFEnv -> Map.Map String CommandInfo
allCommands cod env@(pgf, mos) = Map.fromList [ allCommands env@(pgf, mos) = Map.fromList [
("!", emptyCommandInfo { ("!", emptyCommandInfo {
synopsis = "system command: escape to system shell", synopsis = "system command: escape to system shell",
syntax = "! SYSTEMCOMMAND", syntax = "! SYSTEMCOMMAND",
@@ -156,7 +155,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
let file s = "_grph." ++ s let file s = "_grph." ++ s
let view = optViewGraph opts ++ " " let view = optViewGraph opts ++ " "
let format = optViewFormat opts let format = optViewFormat opts
writeFile (file "dot") (enc grph) writeFile (file "dot") grph
system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++
" ; " ++ view ++ file format " ; " ++ view ++ file format
return void return void
@@ -365,10 +364,10 @@ allCommands cod env@(pgf, mos) = Map.fromList [
_ | isOpt "coding" opts -> codingMsg _ | isOpt "coding" opts -> codingMsg
_ | isOpt "license" opts -> licenseMsg _ | isOpt "license" opts -> licenseMsg
[t] -> let co = getCommandOp (showExpr [] t) in [t] -> let co = getCommandOp (showExpr [] t) in
case lookCommand co (allCommands cod env) of ---- new map ??!! case lookCommand co (allCommands env) of ---- new map ??!!
Just info -> commandHelp True (co,info) Just info -> commandHelp True (co,info)
_ -> "command not found" _ -> "command not found"
_ -> commandHelpAll cod env opts _ -> commandHelpAll env opts
in return (fromString msg), in return (fromString msg),
needsTypeCheck = False needsTypeCheck = False
}), }),
@@ -458,7 +457,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
let typ = optType opts let typ = optType opts
mprobs <- optProbs opts pgf mprobs <- optProbs opts pgf
let mt = mexp xs let mt = mexp xs
morphologyQuiz mt mprobs cod pgf lang typ morphologyQuiz mt mprobs pgf lang typ
return void, return void,
flags = [ flags = [
("lang","language of the quiz"), ("lang","language of the quiz"),
@@ -656,7 +655,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
let typ = optType opts let typ = optType opts
let mt = mexp xs let mt = mexp xs
mprobs <- optProbs opts pgf mprobs <- optProbs opts pgf
translationQuiz mt mprobs cod pgf from to typ translationQuiz mt mprobs pgf from to typ
return void, return void,
flags = [ flags = [
("from","translate from this language"), ("from","translate from this language"),
@@ -687,7 +686,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
exec = \opts arg -> do exec = \opts arg -> do
let tmpi = "_tmpi" --- let tmpi = "_tmpi" ---
let tmpo = "_tmpo" let tmpo = "_tmpo"
writeFile tmpi $ enc $ toString arg writeFile tmpi $ toString arg
let syst = optComm opts ++ " " ++ tmpi let syst = optComm opts ++ " " ++ tmpi
system $ syst ++ " <" ++ tmpi ++ " >" ++ tmpo system $ syst ++ " <" ++ tmpi ++ " >" ++ tmpo
s <- readFile tmpo s <- readFile tmpo
@@ -738,7 +737,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
let file s = "_grphd." ++ s let file s = "_grphd." ++ s
let view = optViewGraph opts ++ " " let view = optViewGraph opts ++ " "
let format = optViewFormat opts let format = optViewFormat opts
writeFile (file "dot") (enc grphs) writeFile (file "dot") grphs
system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++
" ; " ++ view ++ file format " ; " ++ view ++ file format
return void return void
@@ -779,7 +778,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
let file s = "_grph." ++ s let file s = "_grph." ++ s
let view = optViewGraph opts ++ " " let view = optViewGraph opts ++ " "
let format = optViewFormat opts let format = optViewFormat opts
writeFile (file "dot") (enc grph) writeFile (file "dot") grph
system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++
" ; " ++ view ++ file format " ; " ++ view ++ file format
return void return void
@@ -819,7 +818,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
let file s = "_grph." ++ s let file s = "_grph." ++ s
let view = optViewGraph opts ++ " " let view = optViewGraph opts ++ " "
let format = optViewFormat opts let format = optViewFormat opts
writeFile (file "dot") (enc grph) writeFile (file "dot") grph
system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++
" ; " ++ view ++ file format " ; " ++ view ++ file format
return void return void
@@ -844,8 +843,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [
exec = \opts arg -> do exec = \opts arg -> do
let file = valStrOpts "file" "_gftmp" opts let file = valStrOpts "file" "_gftmp" opts
if isOpt "append" opts if isOpt "append" opts
then appendFile file (enc (toString arg)) then appendFile file (toString arg)
else writeFile file (enc (toString arg)) else writeFile file (toString arg)
return void, return void,
options = [ options = [
("append","append to file, instead of overwriting it") ("append","append to file, instead of overwriting it")
@@ -889,8 +888,6 @@ allCommands cod env@(pgf, mos) = Map.fromList [
}) })
] ]
where where
enc = encodeUnicode cod
par opts s = case optOpenTypes opts of par opts s = case optOpenTypes opts of
[] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts] [] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts]
open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts] open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts]
@@ -1063,17 +1060,17 @@ stringOpOptions = sort $ [
treeOpOptions pgf = [(op,expl) | (op,(expl,Left _)) <- allTreeOps pgf] treeOpOptions pgf = [(op,expl) | (op,(expl,Left _)) <- allTreeOps pgf]
treeOpFlags pgf = [(op,expl) | (op,(expl,Right _)) <- allTreeOps pgf] treeOpFlags pgf = [(op,expl) | (op,(expl,Right _)) <- allTreeOps pgf]
translationQuiz :: Maybe Expr -> Maybe Probabilities -> Encoding -> translationQuiz :: Maybe Expr -> Maybe Probabilities ->
PGF -> Language -> Language -> Type -> IO () PGF -> Language -> Language -> Type -> IO ()
translationQuiz mex mprobs cod pgf ig og typ = do translationQuiz mex mprobs pgf ig og typ = do
tts <- translationList mex mprobs pgf ig og typ infinity tts <- translationList mex mprobs pgf ig og typ infinity
mkQuiz cod "Welcome to GF Translation Quiz." tts mkQuiz "Welcome to GF Translation Quiz." tts
morphologyQuiz :: Maybe Expr -> Maybe Probabilities -> Encoding -> morphologyQuiz :: Maybe Expr -> Maybe Probabilities ->
PGF -> Language -> Type -> IO () PGF -> Language -> Type -> IO ()
morphologyQuiz mex mprobs cod pgf ig typ = do morphologyQuiz mex mprobs pgf ig typ = do
tts <- morphologyList mex mprobs pgf ig typ infinity tts <- morphologyList mex mprobs pgf ig typ infinity
mkQuiz cod "Welcome to GF Morphology Quiz." tts mkQuiz "Welcome to GF Morphology Quiz." tts
-- | the maximal number of precompiled quiz problems -- | the maximal number of precompiled quiz problems
infinity :: Int infinity :: Int

View File

@@ -29,24 +29,24 @@ data CommandEnv = CommandEnv {
expmacros :: Map.Map String Expr expmacros :: Map.Map String Expr
} }
mkCommandEnv :: Encoding -> PGF -> CommandEnv mkCommandEnv :: PGF -> CommandEnv
mkCommandEnv enc pgf = mkCommandEnv pgf =
let mos = Map.fromList [(la,buildMorpho pgf la) | la <- languages pgf] in let mos = Map.fromList [(la,buildMorpho pgf la) | la <- languages pgf] in
CommandEnv pgf mos (allCommands enc (pgf, mos)) Map.empty Map.empty CommandEnv pgf mos (allCommands (pgf, mos)) Map.empty Map.empty
emptyCommandEnv :: CommandEnv emptyCommandEnv :: CommandEnv
emptyCommandEnv = mkCommandEnv UTF_8 emptyPGF emptyCommandEnv = mkCommandEnv emptyPGF
interpretCommandLine :: (String -> String) -> CommandEnv -> String -> IO () interpretCommandLine :: CommandEnv -> String -> IO ()
interpretCommandLine enc env line = interpretCommandLine env line =
case readCommandLine line of case readCommandLine line of
Just [] -> return () Just [] -> return ()
Just pipes -> mapM_ (interpretPipe enc env) pipes Just pipes -> mapM_ (interpretPipe env) pipes
Nothing -> putStrLnFlush "command not parsed" Nothing -> putStrLnFlush "command not parsed"
interpretPipe enc env cs = do interpretPipe env cs = do
v@(_,s) <- intercs ([],"") cs v@(_,s) <- intercs ([],"") cs
putStrLnFlush $ enc s putStrLnFlush s
return v return v
where where
intercs treess [] = return treess intercs treess [] = return treess
@@ -57,14 +57,14 @@ interpretPipe enc env cs = do
'%':f -> case Map.lookup f (commandmacros env) of '%':f -> case Map.lookup f (commandmacros env) of
Just css -> Just css ->
case getCommandTrees env False arg es of case getCommandTrees env False arg es of
Right es -> do mapM_ (interpretPipe enc env) (appLine es css) Right es -> do mapM_ (interpretPipe env) (appLine es css)
return ([],[]) return ([],[])
Left msg -> do putStrLn ('\n':msg) Left msg -> do putStrLn ('\n':msg)
return ([],[]) return ([],[])
Nothing -> do Nothing -> do
putStrLn $ "command macro " ++ co ++ " not interpreted" putStrLn $ "command macro " ++ co ++ " not interpreted"
return ([],[]) return ([],[])
_ -> interpret enc env es comm _ -> interpret env es comm
appLine es = map (map (appCommand es)) appLine es = map (map (appCommand es))
-- macro definition applications: replace ?i by (exps !! i) -- macro definition applications: replace ?i by (exps !! i)
@@ -81,14 +81,14 @@ appCommand xs c@(Command i os arg) = case arg of
EFun x -> EFun x EFun x -> EFun x
-- return the trees to be sent in pipe, and the output possibly printed -- return the trees to be sent in pipe, and the output possibly printed
interpret :: (String -> String) -> CommandEnv -> [Expr] -> Command -> IO CommandOutput interpret :: CommandEnv -> [Expr] -> Command -> IO CommandOutput
interpret enc env trees comm = interpret env trees comm =
case getCommand env trees comm of case getCommand env trees comm of
Left msg -> do putStrLn ('\n':msg) Left msg -> do putStrLn ('\n':msg)
return ([],[]) return ([],[])
Right (info,opts,trees) -> do tss@(_,s) <- exec info opts trees Right (info,opts,trees) -> do tss@(_,s) <- exec info opts trees
if isOpt "tr" opts if isOpt "tr" opts
then putStrLn (enc s) then putStrLn s
else return () else return ()
return tss return tss

View File

@@ -12,7 +12,6 @@ import GF.Compile.Update
import GF.Compile.Refresh import GF.Compile.Refresh
import GF.Compile.Coding import GF.Compile.Coding
import GF.Text.UTF8 ----
import GF.Grammar.Grammar import GF.Grammar.Grammar
import GF.Grammar.Lookup import GF.Grammar.Lookup
@@ -82,7 +81,7 @@ compileSourceGrammar opts gr@(MGrammar ms) = do
-- to output an intermediate stage -- to output an intermediate stage
intermOut :: Options -> Dump -> Doc -> IOE () intermOut :: Options -> Dump -> Doc -> IOE ()
intermOut opts d doc intermOut opts d doc
| dump opts d = ioeIO (hPutStrLn stderr (encodeUTF8 (render (text "\n\n--#" <+> text (show d) $$ doc)))) | dump opts d = ioeIO (hPutStrLn stderr (render (text "\n\n--#" <+> text (show d) $$ doc)))
| otherwise = return () | otherwise = return ()
-- | the environment -- | the environment
@@ -162,7 +161,8 @@ compileOne opts env@(_,srcgr,_) file = do
sm00 <- putpOpt ("- parsing" +++ file) ("- compiling" +++ file ++ "... ") $ sm00 <- putpOpt ("- parsing" +++ file) ("- compiling" +++ file ++ "... ") $
getSourceModule opts file getSourceModule opts file
let sm0 = decodeStringsInModule sm00 enc <- ioeIO $ mkTextEncoding (renameEncoding (flag optEncoding (flagsModule sm00)))
let sm0 = decodeStringsInModule enc sm00
intermOut opts DumpSource (ppModule Qualified sm0) intermOut opts DumpSource (ppModule Qualified sm0)

View File

@@ -8,12 +8,14 @@ import GF.Infra.Option
import GF.Data.Operations import GF.Data.Operations
import Data.Char import Data.Char
import System.IO
import qualified Data.ByteString.Char8 as BS
encodeStringsInModule :: SourceModule -> SourceModule encodeStringsInModule :: TextEncoding -> SourceModule -> SourceModule
encodeStringsInModule = codeSourceModule (encodeUnicode UTF_8) encodeStringsInModule enc = codeSourceModule (BS.unpack . encodeUnicode enc)
decodeStringsInModule :: SourceModule -> SourceModule decodeStringsInModule :: TextEncoding -> SourceModule -> SourceModule
decodeStringsInModule mo = codeSourceModule (decodeUnicode (flag optEncoding (flagsModule mo))) mo decodeStringsInModule enc mo = codeSourceModule (decodeUnicode enc . BS.pack) mo
codeSourceModule :: (String -> String) -> SourceModule -> SourceModule codeSourceModule :: (String -> String) -> SourceModule -> SourceModule
codeSourceModule co (id,mo) = (id,replaceJudgements mo (mapTree codj (jments mo))) codeSourceModule co (id,mo) = (id,replaceJudgements mo (mapTree codj (jments mo)))

View File

@@ -22,7 +22,6 @@ import PGF.Macros
import GF.Data.Operations import GF.Data.Operations
import GF.Infra.Option import GF.Infra.Option
import GF.Text.UTF8
import Data.List --(isPrefixOf, find, intersperse) import Data.List --(isPrefixOf, find, intersperse)
import qualified Data.Map as Map import qualified Data.Map as Map
@@ -34,7 +33,7 @@ grammar2haskell :: Options
-> String -- ^ Module name. -> String -- ^ Module name.
-> PGF -> PGF
-> String -> String
grammar2haskell opts name gr = encodeUTF8 $ foldr (++++) [] $ grammar2haskell opts name gr = foldr (++++) [] $
pragmas ++ haskPreamble name ++ [types, gfinstances gId lexical gr'] pragmas ++ haskPreamble name ++ [types, gfinstances gId lexical gr']
where gr' = hSkeleton gr where gr' = hSkeleton gr
gadt = haskellOption opts HaskellGADT gadt = haskellOption opts HaskellGADT

View File

@@ -6,7 +6,6 @@ import qualified PGF.Macros as M
import qualified GF.JavaScript.AbsJS as JS import qualified GF.JavaScript.AbsJS as JS
import qualified GF.JavaScript.PrintJS as JS import qualified GF.JavaScript.PrintJS as JS
import GF.Text.UTF8
import GF.Data.ErrM import GF.Data.ErrM
import GF.Infra.Option import GF.Infra.Option
@@ -21,7 +20,7 @@ import qualified Data.IntMap as IntMap
pgf2js :: PGF -> String pgf2js :: PGF -> String
pgf2js pgf = pgf2js pgf =
encodeUTF8 $ JS.printTree $ JS.Program [JS.ElStmt $ JS.SDeclOrExpr $ JS.Decl [JS.DInit (JS.Ident n) grammar]] JS.printTree $ JS.Program [JS.ElStmt $ JS.SDeclOrExpr $ JS.Decl [JS.DInit (JS.Ident n) grammar]]
where where
n = showCId $ absname pgf n = showCId $ absname pgf
as = abstract pgf as = abstract pgf

View File

@@ -15,7 +15,6 @@ import PGF.Data
import PGF.Macros import PGF.Macros
import GF.Data.Operations import GF.Data.Operations
import GF.Text.UTF8
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Char (isAlphaNum, isAsciiLower, isAsciiUpper, ord) import Data.Char (isAlphaNum, isAsciiLower, isAsciiUpper, ord)

View File

@@ -7,7 +7,6 @@
module GF.Data.XML (XML(..), Attr, comments, showXMLDoc, showsXMLDoc, showsXML, bottomUpXML) where module GF.Data.XML (XML(..), Attr, comments, showXMLDoc, showsXMLDoc, showsXML, bottomUpXML) where
import GF.Data.Utilities import GF.Data.Utilities
import GF.Text.UTF8
data XML = Data String | CData String | Tag String [Attr] [XML] | ETag String [Attr] | Comment String | Empty data XML = Data String | CData String | Tag String [Attr] [XML] | ETag String [Attr] | Comment String | Empty
deriving (Ord,Eq,Show) deriving (Ord,Eq,Show)
@@ -21,7 +20,7 @@ showXMLDoc :: XML -> String
showXMLDoc xml = showsXMLDoc xml "" showXMLDoc xml = showsXMLDoc xml ""
showsXMLDoc :: XML -> ShowS showsXMLDoc :: XML -> ShowS
showsXMLDoc xml = encodeUTF8 . showString header . showsXML xml showsXMLDoc xml = showString header . showsXML xml
where header = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" where header = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
showsXML :: XML -> ShowS showsXML :: XML -> ShowS

View File

@@ -441,9 +441,7 @@ Exp6
| '?' { Meta 0 } | '?' { Meta 0 }
| '[' ']' { Empty } | '[' ']' { Empty }
| '[' Ident Exps ']' { foldl App (Vr (mkListId $2)) $3 } | '[' Ident Exps ']' { foldl App (Vr (mkListId $2)) $3 }
| '[' String ']' { case $2 of | '[' String ']' { K $2 }
[] -> Empty
str -> foldr1 C (map K (words str)) }
| '{' ListLocDef '}' {% mkR $2 } | '{' ListLocDef '}' {% mkR $2 }
| '<' ListTupleComp '>' { R (tuple2record $2) } | '<' ListTupleComp '>' { R (tuple2record $2) }
| '<' Exp ':' Exp '>' { Typed $2 $4 } | '<' Exp ':' Exp '>' { Typed $2 $4 }

View File

@@ -3,7 +3,7 @@ module GF.Infra.Option
-- * Option types -- * Option types
Options, Options,
Flags(..), Flags(..),
Mode(..), Phase(..), Verbosity(..), Encoding(..), OutputFormat(..), Mode(..), Phase(..), Verbosity(..), OutputFormat(..),
SISRFormat(..), Optimization(..), CFGTransform(..), HaskellOption(..), SISRFormat(..), Optimization(..), CFGTransform(..), HaskellOption(..),
Dump(..), Printer(..), Recomp(..), Dump(..), Printer(..), Recomp(..),
-- * Option parsing -- * Option parsing
@@ -17,7 +17,7 @@ module GF.Infra.Option
helpMessage, helpMessage,
-- * Checking specific options -- * Checking specific options
flag, cfgTransform, haskellOption, readOutputFormat, flag, cfgTransform, haskellOption, readOutputFormat,
isLexicalCat, encodings, isLexicalCat, renameEncoding,
-- * Setting specific options -- * Setting specific options
setOptimization, setCFGTransform, setOptimization, setCFGTransform,
-- * Convenience methods for checking options -- * Convenience methods for checking options
@@ -25,12 +25,13 @@ module GF.Infra.Option
) where ) where
import Control.Monad import Control.Monad
import Data.Char (toLower) import Data.Char (toLower, isDigit)
import Data.List import Data.List
import Data.Maybe import Data.Maybe
import GF.Infra.GetOpt import GF.Infra.GetOpt
--import System.Console.GetOpt --import System.Console.GetOpt
import System.FilePath import System.FilePath
import System.IO
import GF.Data.ErrM import GF.Data.ErrM
@@ -77,9 +78,6 @@ data Verbosity = Quiet | Normal | Verbose | Debug
data Phase = Preproc | Convert | Compile | Link data Phase = Preproc | Convert | Compile | Link
deriving (Show,Eq,Ord) deriving (Show,Eq,Ord)
data Encoding = UTF_8 | ISO_8859_1 | CP_1250 | CP_1251 | CP_1252 | CP_1254
deriving (Eq,Ord)
data OutputFormat = FmtPGFPretty data OutputFormat = FmtPGFPretty
| FmtJavaScript | FmtJavaScript
| FmtHaskell | FmtHaskell
@@ -161,7 +159,7 @@ data Flags = Flags {
optCncName :: Maybe String, optCncName :: Maybe String,
optResName :: Maybe String, optResName :: Maybe String,
optPreprocessors :: [String], optPreprocessors :: [String],
optEncoding :: Encoding, optEncoding :: String,
optOptimizations :: Set Optimization, optOptimizations :: Set Optimization,
optCFGTransforms :: Set CFGTransform, optCFGTransforms :: Set CFGTransform,
optLibraryPath :: [FilePath], optLibraryPath :: [FilePath],
@@ -207,7 +205,7 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o)
-- | Pretty-print the options that are preserved in .gfo files. -- | Pretty-print the options that are preserved in .gfo files.
optionsGFO :: Options -> [(String,String)] optionsGFO :: Options -> [(String,String)]
optionsGFO opts = optionsPGF opts optionsGFO opts = optionsPGF opts
++ [("coding", show (flag optEncoding opts))] ++ [("coding", flag optEncoding opts)]
-- | Pretty-print the options that are preserved in .pgf files. -- | Pretty-print the options that are preserved in .pgf files.
optionsPGF :: Options -> [(String,String)] optionsPGF :: Options -> [(String,String)]
@@ -260,7 +258,7 @@ defaultFlags = Flags {
optCncName = Nothing, optCncName = Nothing,
optResName = Nothing, optResName = Nothing,
optPreprocessors = [], optPreprocessors = [],
optEncoding = ISO_8859_1, optEncoding = "latin1",
optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize], optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize],
optCFGTransforms = Set.fromList [CFGRemoveCycles, CFGBottomUpFilter, optCFGTransforms = Set.fromList [CFGRemoveCycles, CFGBottomUpFilter,
CFGTopDownFilter, CFGMergeIdentical], CFGTopDownFilter, CFGMergeIdentical],
@@ -343,8 +341,7 @@ optDescr =
(unlines ["Use CMD to preprocess input files.", (unlines ["Use CMD to preprocess input files.",
"Multiple preprocessors can be used by giving this option multiple times."]), "Multiple preprocessors can be used by giving this option multiple times."]),
Option [] ["coding"] (ReqArg coding "ENCODING") Option [] ["coding"] (ReqArg coding "ENCODING")
("Character encoding of the source grammar, ENCODING = " ("Character encoding of the source grammar, ENCODING = utf8, latin1, cp1251, ..."),
++ concat (intersperse " | " (map fst encodings)) ++ "."),
Option [] ["startcat"] (ReqArg startcat "CAT") "Grammar start category.", Option [] ["startcat"] (ReqArg startcat "CAT") "Grammar start category.",
Option [] ["language"] (ReqArg language "LANG") "Set the speech language flag to LANG in the generated grammar.", Option [] ["language"] (ReqArg language "LANG") "Set the speech language flag to LANG in the generated grammar.",
Option [] ["lexer"] (ReqArg lexer "LEXER") "Use lexer LEXER.", Option [] ["lexer"] (ReqArg lexer "LEXER") "Use lexer LEXER.",
@@ -400,9 +397,7 @@ optDescr =
addLibDir x = set $ \o -> o { optLibraryPath = x:optLibraryPath o } addLibDir x = set $ \o -> o { optLibraryPath = x:optLibraryPath o }
setLibPath x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x } setLibPath x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x }
preproc x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] } preproc x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] }
coding x = case lookup x encodings of coding x = set $ \o -> o { optEncoding = x }
Just c -> set $ \o -> o { optEncoding = c }
Nothing -> fail $ "Unknown character encoding: " ++ x
startcat x = set $ \o -> o { optStartCat = Just x } startcat x = set $ \o -> o { optStartCat = Just x }
language x = set $ \o -> o { optSpeechLanguage = Just x } language x = set $ \o -> o { optSpeechLanguage = Just x }
lexer x = set $ \o -> o { optLexer = Just x } lexer x = set $ \o -> o { optLexer = Just x }
@@ -483,18 +478,14 @@ haskellOptionNames =
("gadt", HaskellGADT), ("gadt", HaskellGADT),
("lexical", HaskellLexical)] ("lexical", HaskellLexical)]
encodings :: [(String,Encoding)] -- | This is for bacward compatibility. Since GHC 6.12 we
encodings = -- started using the native Unicode support in GHC but it
[("utf8", UTF_8), -- uses different names for the code pages.
("cp1250", CP_1250), renameEncoding :: String -> String
("cp1251", CP_1251), renameEncoding "utf8" = "UTF-8"
("cp1252", CP_1252), renameEncoding "latin1" = "CP1252"
("cp1254", CP_1254), renameEncoding ('c':'p':s) | all isDigit s = 'C':'P':s
("latin1", ISO_8859_1) renameEncoding s = s
]
instance Show Encoding where
show = lookupShow encodings
lookupShow :: Eq a => [(String,a)] -> a -> String lookupShow :: Eq a => [(String,a)] -> a -> String
lookupShow xs z = fromMaybe "lookupShow" $ lookup z [(y,x) | (x,y) <- xs] lookupShow xs z = fromMaybe "lookupShow" $ lookup z [(y,x) | (x,y) <- xs]

View File

@@ -23,7 +23,6 @@ import PGF.Linearize
import GF.Data.Operations import GF.Data.Operations
import GF.Infra.UseIO import GF.Infra.UseIO
import GF.Infra.Option import GF.Infra.Option
import GF.Text.Coding
import PGF.Probabilistic import PGF.Probabilistic
import System.Random import System.Random
@@ -33,9 +32,9 @@ import Data.List (nub)
-- generic quiz function -- generic quiz function
mkQuiz :: Encoding -> String -> [(String,[String])] -> IO () mkQuiz :: String -> [(String,[String])] -> IO ()
mkQuiz cod msg tts = do mkQuiz msg tts = do
let qas = [ (encodeUnicode cod q, mkAnswer cod as) | (q,as) <- tts] let qas = [(q, mkAnswer as) | (q,as) <- tts]
teachDialogue qas msg teachDialogue qas msg
translationList :: translationList ::
@@ -62,14 +61,13 @@ morphologyList mex mprobs pgf ig typ number = do
(pwss@(pws0:_),i) <- zip ss forms, let ws = map (\pws -> snd (pws !! i)) pwss] (pwss@(pws0:_),i) <- zip ss forms, let ws = map (\pws -> snd (pws !! i)) pwss]
-- | compare answer to the list of right answers, increase score and give feedback -- | compare answer to the list of right answers, increase score and give feedback
mkAnswer :: Encoding -> [String] -> String -> (Integer, String) mkAnswer :: [String] -> String -> (Integer, String)
mkAnswer cod as s = mkAnswer as s =
if (elem (norm s) as) if (elem (norm s) as)
then (1,"Yes.") then (1,"Yes.")
else (0,"No, not" +++ s ++ ", but" ++++ enc (unlines as)) else (0,"No, not" +++ s ++ ", but" ++++ unlines as)
where where
norm = unwords . words . decodeUnicode cod norm = unwords . words
enc = encodeUnicode cod
norml = unwords . words norml = unwords . words

View File

@@ -1,91 +0,0 @@
-----------------------------------------------------------------------------
-- |
-- Module : GF.Text.CP1250
-- Maintainer : Krasimir Angelov
--
-- cp1250 is a code page used under Microsoft Windows to represent texts
-- in Central European and Eastern European languages that use Latin script,
-- such as Polish, Czech, Slovak, Hungarian, Slovene, Bosnian, Croatian,
-- Serbian (Latin script), Romanian and Albanian. It may also be used with
-- the German language; German-language texts encoded with cp1250 and cp1252
-- are identical.
--
-----------------------------------------------------------------------------
module GF.Text.CP1250 where
import Data.Char
decodeCP1250 = map convert where
convert c
| c == '\x80' = chr 0x20AC
| c == '\x82' = chr 0x201A
| c == '\x84' = chr 0x201E
| c == '\x85' = chr 0x2026
| c == '\x86' = chr 0x2020
| c == '\x87' = chr 0x2021
| c == '\x89' = chr 0x2030
| c == '\x8A' = chr 0x0160
| c == '\x8B' = chr 0x2039
| c == '\x8C' = chr 0x015A
| c == '\x8D' = chr 0x0164
| c == '\x8E' = chr 0x017D
| c == '\x8F' = chr 0x0179
| c == '\x91' = chr 0x2018
| c == '\x92' = chr 0x2019
| c == '\x93' = chr 0x201C
| c == '\x94' = chr 0x201D
| c == '\x95' = chr 0x2022
| c == '\x96' = chr 0x2013
| c == '\x97' = chr 0x2014
| c == '\x99' = chr 0x2122
| c == '\x9A' = chr 0x0161
| c == '\x9B' = chr 0x203A
| c == '\x9C' = chr 0x015B
| c == '\x9D' = chr 0x0165
| c == '\x9E' = chr 0x017E
| c == '\x9F' = chr 0x017A
| c == '\xA1' = chr 0x02C7
| c == '\xA5' = chr 0x0104
| c == '\xB9' = chr 0x0105
| c == '\xBC' = chr 0x013D
| c == '\xBE' = chr 0x013E
| otherwise = c
encodeCP1250 = map convert where
convert c
| oc == 0x20AC = '\x80'
| oc == 0x201A = '\x82'
| oc == 0x201E = '\x84'
| oc == 0x2026 = '\x85'
| oc == 0x2020 = '\x86'
| oc == 0x2021 = '\x87'
| oc == 0x2030 = '\x89'
| oc == 0x0160 = '\x8A'
| oc == 0x2039 = '\x8B'
| oc == 0x015A = '\x8C'
| oc == 0x0164 = '\x8D'
| oc == 0x017D = '\x8E'
| oc == 0x0179 = '\x8F'
| oc == 0x2018 = '\x91'
| oc == 0x2019 = '\x92'
| oc == 0x201C = '\x93'
| oc == 0x201D = '\x94'
| oc == 0x2022 = '\x95'
| oc == 0x2013 = '\x96'
| oc == 0x2014 = '\x97'
| oc == 0x2122 = '\x99'
| oc == 0x0161 = '\x9A'
| oc == 0x203A = '\x9B'
| oc == 0x015B = '\x9C'
| oc == 0x0165 = '\x9D'
| oc == 0x017E = '\x9E'
| oc == 0x017A = '\x9F'
| oc == 0x02C7 = '\xA1'
| oc == 0x0104 = '\xA5'
| oc == 0x0105 = '\xB9'
| oc == 0x013D = '\xBC'
| oc == 0x013E = '\xBE'
| otherwise = c
where oc = ord c

View File

@@ -1,86 +0,0 @@
-----------------------------------------------------------------------------
-- |
-- Module : GF.Text.CP1251
-- Maintainer : Krasimir Angelov
--
-- cp1251 is a popular 8-bit character encoding, designed to cover languages
-- that use the Cyrillic alphabet such as Russian, Bulgarian, Serbian Cyrillic
-- and other languages. It is the most widely used for encoding the Bulgarian,
-- Serbian and Macedonian languages.
--
-----------------------------------------------------------------------------
module GF.Text.CP1251 where
import Data.Char
decodeCP1251 = map convert where
convert c
| c >= '\xC0' && c <= '\xFF' = chr (ord c + (0x410-0xC0))
| c == '\xA8' = chr 0x401 -- cyrillic capital letter lo
| c == '\x80' = chr 0x402
| c == '\x81' = chr 0x403
| c == '\xAA' = chr 0x404
| c == '\xBD' = chr 0x405
| c == '\xB2' = chr 0x406
| c == '\xAF' = chr 0x407
| c == '\xA3' = chr 0x408
| c == '\x8A' = chr 0x409
| c == '\x8C' = chr 0x40A
| c == '\x8E' = chr 0x40B
| c == '\x8D' = chr 0x40C
| c == '\xA1' = chr 0x40E
| c == '\x8F' = chr 0x40F
| c == '\xB8' = chr 0x451 -- cyrillic small letter lo
| c == '\x90' = chr 0x452
| c == '\x83' = chr 0x453
| c == '\xBA' = chr 0x454
| c == '\xBE' = chr 0x455
| c == '\xB3' = chr 0x456
| c == '\xBF' = chr 0x457
| c == '\xBC' = chr 0x458
| c == '\x9A' = chr 0x459
| c == '\x9C' = chr 0x45A
| c == '\x9E' = chr 0x45B
| c == '\x9D' = chr 0x45C
| c == '\xA2' = chr 0x45E
| c == '\x9F' = chr 0x45F
| c == '\xA5' = chr 0x490
| c == '\xB4' = chr 0x491
| otherwise = c
encodeCP1251 = map convert where
convert c
| oc >= 0x410 && oc <= 0x44F = chr (oc - (0x410-0xC0))
| oc == 0x401 = '\xA8' -- cyrillic capital letter lo
| oc == 0x402 = '\x80'
| oc == 0x403 = '\x81'
| oc == 0x404 = '\xAA'
| oc == 0x405 = '\xBD'
| oc == 0x406 = '\xB2'
| oc == 0x407 = '\xAF'
| oc == 0x408 = '\xA3'
| oc == 0x409 = '\x8A'
| oc == 0x40A = '\x8C'
| oc == 0x40B = '\x8E'
| oc == 0x40C = '\x8D'
| oc == 0x40E = '\xA1'
| oc == 0x40F = '\x8F'
| oc == 0x451 = '\xB8' -- cyrillic small letter lo
| oc == 0x452 = '\x90'
| oc == 0x453 = '\x83'
| oc == 0x454 = '\xBA'
| oc == 0x455 = '\xBE'
| oc == 0x456 = '\xB3'
| oc == 0x457 = '\xBF'
| oc == 0x458 = '\xBC'
| oc == 0x459 = '\x9A'
| oc == 0x45A = '\x9C'
| oc == 0x45B = '\x9E'
| oc == 0x45C = '\x9D'
| oc == 0x45E = '\xA2'
| oc == 0x45F = '\x9F'
| oc == 0x490 = '\xA5'
| oc == 0x491 = '\xB4'
| otherwise = c
where oc = ord c

View File

@@ -1,17 +0,0 @@
-----------------------------------------------------------------------------
-- |
-- Module : GF.Text.CP1252
-- Maintainer : Krasimir Angelov
--
-- cp1252 is a character encoding of the Latin alphabet, used by default in
-- the legacy components of Microsoft Windows in English and some other
-- Western languages.
--
-----------------------------------------------------------------------------
module GF.Text.CP1252 where
import Data.Char
decodeCP1252 = map id
encodeCP1252 = map (\x -> if x <= '\255' then x else '?')

View File

@@ -1,84 +0,0 @@
-----------------------------------------------------------------------------
-- |
-- Module : GF.Text.CP1254
-- Maintainer : Krasimir Angelov
--
-- cp1254 is a code page used under Microsoft Windows to write Turkish.
-- Characters with codepoints A0 through FF are compatible with ISO 8859-9.
--
-----------------------------------------------------------------------------
module GF.Text.CP1254 where
import Data.Char
decodeCP1254 = map convert where
convert c
| c == '\x80' = chr 0x20AC
| c == '\x82' = chr 0x201A
| c == '\x83' = chr 0x192
| c == '\x84' = chr 0x201E
| c == '\x85' = chr 0x2026
| c == '\x86' = chr 0x2020
| c == '\x87' = chr 0x2021
| c == '\x88' = chr 0x2C6
| c == '\x89' = chr 0x2030
| c == '\x8A' = chr 0x160
| c == '\x8B' = chr 0x2039
| c == '\x8C' = chr 0x152
| c == '\x91' = chr 0x2018
| c == '\x92' = chr 0x2019
| c == '\x93' = chr 0x201C
| c == '\x94' = chr 0x201D
| c == '\x95' = chr 0x2022
| c == '\x96' = chr 0x2013
| c == '\x97' = chr 0x2014
| c == '\x98' = chr 0x2DC
| c == '\x99' = chr 0x2122
| c == '\x9A' = chr 0x161
| c == '\x9B' = chr 0x203A
| c == '\x9C' = chr 0x153
| c == '\x9F' = chr 0x178
| c == '\xD0' = chr 0x11E
| c == '\xDD' = chr 0x130
| c == '\xDE' = chr 0x15E
| c == '\xF0' = chr 0x11F
| c == '\xFD' = chr 0x131
| c == '\xFE' = chr 0x15F
| otherwise = c
encodeCP1254 = map convert where
convert c
| oc == 0x20AC = '\x80'
| oc == 0x201A = '\x82'
| oc == 0x192 = '\x83'
| oc == 0x201E = '\x84'
| oc == 0x2026 = '\x85'
| oc == 0x2020 = '\x86'
| oc == 0x2021 = '\x87'
| oc == 0x2C6 = '\x88'
| oc == 0x2030 = '\x89'
| oc == 0x160 = '\x8A'
| oc == 0x2039 = '\x8B'
| oc == 0x152 = '\x8C'
| oc == 0x2018 = '\x91'
| oc == 0x2019 = '\x92'
| oc == 0x201C = '\x93'
| oc == 0x201D = '\x94'
| oc == 0x2022 = '\x95'
| oc == 0x2013 = '\x96'
| oc == 0x2014 = '\x97'
| oc == 0x2DC = '\x98'
| oc == 0x2122 = '\x99'
| oc == 0x161 = '\x9A'
| oc == 0x203A = '\x9B'
| oc == 0x153 = '\x9C'
| oc == 0x178 = '\x9F'
| oc == 0x11E = '\xD0'
| oc == 0x130 = '\xDD'
| oc == 0x15E = '\xDE'
| oc == 0x11F = '\xF0'
| oc == 0x131 = '\xFD'
| oc == 0x15F = '\xFE'
| otherwise = c
where oc = ord c

View File

@@ -1,24 +1,69 @@
module GF.Text.Coding where module GF.Text.Coding where
import GF.Infra.Option import qualified Data.ByteString as BS
import GF.Text.UTF8 import Data.ByteString.Internal
import GF.Text.CP1250 import GHC.IO
import GF.Text.CP1251 import GHC.IO.Buffer
import GF.Text.CP1252 import GHC.IO.Encoding
import GF.Text.CP1254 import GHC.IO.Exception
import Control.Monad
encodeUnicode e = case e of encodeUnicode :: TextEncoding -> String -> ByteString
UTF_8 -> encodeUTF8 encodeUnicode enc s =
CP_1250 -> encodeCP1250 unsafePerformIO $ do
CP_1251 -> encodeCP1251 let len = length s
CP_1252 -> encodeCP1252 cbuf0 <- newCharBuffer (len*4) ReadBuffer
CP_1254 -> encodeCP1254 foldM (\i c -> writeCharBuf (bufRaw cbuf0) i c) 0 s
_ -> id let cbuf = cbuf0{bufR=len}
case enc of
TextEncoding {mkTextEncoder=mk} -> do encoder <- mk
bss <- translate (encode encoder) cbuf
close encoder
return (BS.concat bss)
where
translate cod cbuf
| i < w = do bbuf <- newByteBuffer 128 WriteBuffer
(cbuf,bbuf) <- cod cbuf bbuf
if isEmptyBuffer bbuf
then ioe_invalidCharacter
else do let bs = PS (bufRaw bbuf) (bufL bbuf) (bufR bbuf-bufL bbuf)
bss <- translate cod cbuf
return (bs:bss)
| otherwise = return []
where
i = bufL cbuf
w = bufR cbuf
decodeUnicode e = case e of decodeUnicode :: TextEncoding -> ByteString -> String
UTF_8 -> decodeUTF8 decodeUnicode enc (PS fptr l len) =
CP_1250 -> decodeCP1250 unsafePerformIO $ do
CP_1251 -> decodeCP1251 let bbuf = Buffer{bufRaw=fptr, bufState=ReadBuffer, bufSize=len, bufL=l, bufR=l+len}
CP_1252 -> decodeCP1252 cbuf <- newCharBuffer 128 WriteBuffer
CP_1254 -> decodeCP1254 case enc of
_ -> id TextEncoding {mkTextDecoder=mk} -> do decoder <- mk
s <- translate (encode decoder) bbuf cbuf
close decoder
return s
where
translate cod bbuf cbuf
| i < w = do (bbuf,cbuf) <- cod bbuf cbuf
if isEmptyBuffer cbuf
then ioe_invalidCharacter
else unpack cod bbuf cbuf
| otherwise = return []
where
i = bufL bbuf
w = bufR bbuf
unpack cod bbuf cbuf
| i < w = do (c,i') <- readCharBuf (bufRaw cbuf) i
cs <- unpack cod bbuf cbuf{bufL=i'}
return (c:cs)
| otherwise = translate cod bbuf cbuf{bufL=0,bufR=0}
where
i = bufL cbuf
w = bufR cbuf
ioe_invalidCharacter = ioException
(IOError Nothing InvalidArgument ""
("invalid byte sequence for this encoding") Nothing Nothing)

View File

@@ -1,8 +1,6 @@
module GF.Text.Lexing (stringOp,opInEnv) where module GF.Text.Lexing (stringOp,opInEnv) where
import GF.Text.Transliterations import GF.Text.Transliterations
import GF.Text.UTF8
import GF.Text.CP1251
import Data.Char import Data.Char
import Data.List (intersperse) import Data.List (intersperse)
@@ -23,10 +21,6 @@ stringOp name = case name of
"unlexmixed" -> Just $ capitInit . appUnlexer (unlexMixed . unquote) "unlexmixed" -> Just $ capitInit . appUnlexer (unlexMixed . unquote)
"unwords" -> Just $ appUnlexer unwords "unwords" -> Just $ appUnlexer unwords
"to_html" -> Just wrapHTML "to_html" -> Just wrapHTML
"to_utf8" -> Just encodeUTF8
"from_utf8" -> Just decodeUTF8
"to_cp1251" -> Just encodeCP1251
"from_cp1251" -> Just decodeCP1251
_ -> transliterate name _ -> transliterate name
-- perform op in environments beg--end, t.ex. between "--" -- perform op in environments beg--end, t.ex. between "--"

View File

@@ -5,8 +5,6 @@ module GF.Text.Transliterations (
transliterationPrintNames transliterationPrintNames
) where ) where
import GF.Text.UTF8
import Data.Char import Data.Char
import Numeric import Numeric
import qualified Data.Map as Map import qualified Data.Map as Map

View File

@@ -1,48 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : UTF8
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:23:42 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.5 $
--
-- From the Char module supplied with HBC.
-- code by Thomas Hallgren (Jul 10 1999)
-----------------------------------------------------------------------------
module GF.Text.UTF8 (decodeUTF8, encodeUTF8) where
-- | Take a Unicode string and encode it as a string
-- with the UTF8 method.
decodeUTF8 :: String -> String
decodeUTF8 "" = ""
decodeUTF8 (c:cs) | c < '\x80' = c : decodeUTF8 cs
decodeUTF8 (c:c':cs) | '\xc0' <= c && c <= '\xdf' &&
'\x80' <= c' && c' <= '\xbf' =
toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : decodeUTF8 cs
decodeUTF8 (c:c':c'':cs) | '\xe0' <= c && c <= '\xef' &&
'\x80' <= c' && c' <= '\xbf' &&
'\x80' <= c'' && c'' <= '\xbf' =
toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : decodeUTF8 cs
decodeUTF8 s = s ---- AR workaround 22/6/2006
----decodeUTF8 _ = error "UniChar.decodeUTF8: bad data"
encodeUTF8 :: String -> String
encodeUTF8 "" = ""
encodeUTF8 (c:cs) =
if c > '\x0000' && c < '\x0080' then
c : encodeUTF8 cs
else if c < toEnum 0x0800 then
let i = fromEnum c
in toEnum (0xc0 + i `div` 0x40) :
toEnum (0x80 + i `mod` 0x40) :
encodeUTF8 cs
else
let i = fromEnum c
in toEnum (0xe0 + i `div` 0x1000) :
toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) :
toEnum (0x80 + i `mod` 0x40) :
encodeUTF8 cs

View File

@@ -17,6 +17,7 @@ import Data.Maybe
import Data.Binary import Data.Binary
import System.FilePath import System.FilePath
import System.IO import System.IO
import Control.Exception
mainGFC :: Options -> [FilePath] -> IOE () mainGFC :: Options -> [FilePath] -> IOE ()
@@ -81,8 +82,8 @@ writeOutput opts file str =
do let path = case flag optOutputDir opts of do let path = case flag optOutputDir opts of
Nothing -> file Nothing -> file
Just dir -> dir </> file Just dir -> dir </> file
writeOutputFile opts path str putPointE Normal opts ("Writing " ++ path ++ "...") $ ioeIO $
bracket
writeOutputFile :: Options -> FilePath -> String -> IOE () (openFile path WriteMode)
writeOutputFile opts outfile output = (hClose)
do putPointE Normal opts ("Writing " ++ outfile ++ "...") $ ioeIO $ writeFile outfile output (\h -> hSetEncoding h utf8 >> hPutStr h str)

View File

@@ -21,7 +21,6 @@ import GF.Infra.Option
import GF.Infra.Modules (greatestResource, modules, emptyModInfo) import GF.Infra.Modules (greatestResource, modules, emptyModInfo)
import GF.System.Readline import GF.System.Readline
import GF.Text.Coding
import GF.Compile.Coding import GF.Compile.Coding
import PGF import PGF
@@ -34,6 +33,7 @@ import Data.List(isPrefixOf)
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Char8 as BS
import qualified Text.ParserCombinators.ReadP as RP import qualified Text.ParserCombinators.ReadP as RP
import System.IO
import System.Cmd import System.Cmd
import System.CPUTime import System.CPUTime
import System.Directory import System.Directory
@@ -86,9 +86,7 @@ loop opts gfenv0 = do
s0 <- fetch s0 <- fetch
let gfenv = gfenv0 {history = s0 : history gfenv0} let gfenv = gfenv0 {history = s0 : history gfenv0}
let let
enc = encode gfenv pwords = case words s0 of
s = decode gfenv s0
pwords = case words s of
w:ws -> getCommandOp w :ws w:ws -> getCommandOp w :ws
ws -> ws ws -> ws
@@ -130,8 +128,8 @@ loop opts gfenv0 = do
case runP pExp (BS.pack s) of case runP pExp (BS.pack s) of
Left (_,msg) -> putStrLn msg Left (_,msg) -> putStrLn msg
Right t -> case checkComputeTerm sgr (codeTerm (decode gfenv) (L (0,0) t)) of Right t -> case checkComputeTerm sgr (codeTerm (decode gfenv) (L (0,0) t)) of
Ok x -> putStrLn $ enc (showTerm sgr style q x) Ok x -> putStrLn $ showTerm sgr style q x
Bad s -> putStrLn $ enc s Bad s -> putStrLn $ s
loopNewCPU gfenv loopNewCPU gfenv
"dg":ws -> do "dg":ws -> do
let stop = case ws of let stop = case ws of
@@ -141,7 +139,7 @@ loop opts gfenv0 = do
putStrLn "wrote graph in file _gfdepgraph.dot" putStrLn "wrote graph in file _gfdepgraph.dot"
loopNewCPU gfenv loopNewCPU gfenv
"eh":w:_ -> do "eh":w:_ -> do
cs <- readFile w >>= return . map (interpretCommandLine enc env) . lines cs <- readFile w >>= return . map (interpretCommandLine env) . lines
loopNewCPU gfenv loopNewCPU gfenv
"i":args -> do "i":args -> do
@@ -179,25 +177,28 @@ loop opts gfenv0 = do
_ -> putStrLn "value definition not parsed" >> loopNewCPU gfenv _ -> putStrLn "value definition not parsed" >> loopNewCPU gfenv
"ph":_ -> "ph":_ ->
mapM_ (putStrLn . enc) (reverse (history gfenv0)) >> loopNewCPU gfenv mapM_ putStrLn (reverse (history gfenv0)) >> loopNewCPU gfenv
"se":c:_ -> "se":c:_ -> do
case lookup c encodings of let cod = renameEncoding c
Just cod -> do
#ifdef mingw32_HOST_OS #ifdef mingw32_HOST_OS
case c of case cod of
'c':'p':c -> case reads c of 'C':'P':c -> case reads c of
[(cp,"")] -> setConsoleCP cp >> setConsoleOutputCP cp [(cp,"")] -> do setConsoleCP cp
_ -> return () setConsoleOutputCP cp
"utf8" -> setConsoleCP 65001 >> setConsoleOutputCP 65001 _ -> return ()
_ -> return () "UTF-8" -> do setConsoleCP 65001
setConsoleOutputCP 65001
_ -> return ()
#endif #endif
loopNewCPU $ gfenv {coding = cod} enc <- mkTextEncoding cod
Nothing -> do putStrLn "unknown encoding" hSetEncoding stdin enc
loopNewCPU gfenv hSetEncoding stdout enc
hSetEncoding stderr enc
loopNewCPU gfenv
-- ordinary commands, working on CommandEnv -- ordinary commands, working on CommandEnv
_ -> do _ -> do
interpretCommandLine enc env s interpretCommandLine env s0
loopNewCPU gfenv loopNewCPU gfenv
-- gfenv' <- return $ either (const gfenv) id r -- gfenv' <- return $ either (const gfenv) id r
gfenv' <- either (\e -> (print e >> return gfenv)) return r gfenv' <- either (\e -> (print e >> return gfenv)) return r
@@ -215,7 +216,7 @@ importInEnv gfenv opts files
if (verbAtLeast opts Normal) if (verbAtLeast opts Normal)
then putStrLnFlush $ unwords $ "\nLanguages:" : map showCId (languages pgf1) then putStrLnFlush $ unwords $ "\nLanguages:" : map showCId (languages pgf1)
else return () else return ()
return $ gfenv { commandenv = mkCommandEnv (coding gfenv) pgf1 } return $ gfenv { commandenv = mkCommandEnv pgf1 }
tryGetLine = do tryGetLine = do
res <- try getLine res <- try getLine
@@ -252,24 +253,16 @@ data GFEnv = GFEnv {
sourcegrammar :: SourceGrammar, -- gfo grammar -retain sourcegrammar :: SourceGrammar, -- gfo grammar -retain
commandenv :: CommandEnv, commandenv :: CommandEnv,
history :: [String], history :: [String],
cputime :: Integer, cputime :: Integer
coding :: Encoding
} }
emptyGFEnv :: IO GFEnv emptyGFEnv :: IO GFEnv
emptyGFEnv = do emptyGFEnv = do
#ifdef mingw32_HOST_OS return $ GFEnv emptySourceGrammar{modules=[(identW,emptyModInfo)]} (mkCommandEnv emptyPGF) [] 0
codepage <- getACP
let coding = fromMaybe UTF_8 (lookup ("cp"++show codepage) encodings)
#else
let coding = UTF_8
#endif
return $ GFEnv emptySourceGrammar{modules=[(identW,emptyModInfo)]} (mkCommandEnv coding emptyPGF) [] 0 coding
encode = encodeUnicode . coding decode _ = id -- decodeUnicode . coding
decode = decodeUnicode . coding
wordCompletion gfenv line0 prefix0 p = wordCompletion gfenv line prefix p =
case wc_type (take p line) of case wc_type (take p line) of
CmplCmd pref CmplCmd pref
-> ret ' ' [name | name <- Map.keys (commands cmdEnv), isPrefixOf pref name] -> ret ' ' [name | name <- Map.keys (commands cmdEnv), isPrefixOf pref name]
@@ -280,7 +273,7 @@ wordCompletion gfenv line0 prefix0 p =
in case loop state0 ws of in case loop state0 ws of
Nothing -> ret ' ' [] Nothing -> ret ' ' []
Just state -> let compls = getCompletions state prefix Just state -> let compls = getCompletions state prefix
in ret ' ' (map (encode gfenv) (Map.keys compls)) in ret ' ' (Map.keys compls)
Left (_ :: SomeException) -> ret ' ' [] Left (_ :: SomeException) -> ret ' ' []
CmplOpt (Just (Command n _ _)) pref CmplOpt (Just (Command n _ _)) pref
-> case Map.lookup n (commands cmdEnv) of -> case Map.lookup n (commands cmdEnv) of
@@ -298,9 +291,6 @@ wordCompletion gfenv line0 prefix0 p =
Left (_ :: SomeException) -> ret ' ' [] Left (_ :: SomeException) -> ret ' ' []
_ -> ret ' ' [] _ -> ret ' ' []
where where
line = decode gfenv line0
prefix = decode gfenv prefix0
pgf = multigrammar cmdEnv pgf = multigrammar cmdEnv
cmdEnv = commandenv gfenv cmdEnv = commandenv gfenv
optLang opts = valCIdOpts "lang" (head (languages pgf)) opts optLang opts = valCIdOpts "lang" (head (languages pgf)) opts