uniform encoding: gfo and pgf in UTF8, internal in unicode

This commit is contained in:
aarne
2008-06-26 16:35:45 +00:00
parent f7622321de
commit ed708ffda6
6 changed files with 79 additions and 44 deletions

View File

@@ -62,10 +62,10 @@ emptyCommandInfo = CommandInfo {
lookCommand :: String -> Map.Map String CommandInfo -> Maybe CommandInfo
lookCommand = Map.lookup
commandHelpAll :: PGF -> [Option] -> String
commandHelpAll pgf opts = unlines
commandHelpAll :: (String -> String) -> PGF -> [Option] -> String
commandHelpAll enc pgf opts = unlines
[commandHelp (isOpt "full" opts) (co,info)
| (co,info) <- Map.assocs (allCommands pgf)]
| (co,info) <- Map.assocs (allCommands enc pgf)]
commandHelp :: Bool -> (String,CommandInfo) -> String
commandHelp full (co,info) = unlines $ [
@@ -81,8 +81,8 @@ commandHelp full (co,info) = unlines $ [
] else []
-- this list must no more be kept sorted by the command name
allCommands :: PGF -> Map.Map String CommandInfo
allCommands pgf = Map.fromList [
allCommands :: (String -> String) -> PGF -> Map.Map String CommandInfo
allCommands enc pgf = Map.fromList [
("cc", emptyCommandInfo {
longname = "compute_concrete",
syntax = "cc (-all | -table | -unqual)? TERM",
@@ -145,7 +145,8 @@ allCommands pgf = Map.fromList [
syntax = "gr [-cat=CAT] [-number=INT]",
examples = [
"gr -- one tree in the startcat of the current grammar",
"gr -cat=NP -number=16 -- 16 trees in the category NP"
"gr -cat=NP -number=16 -- 16 trees in the category NP",
"gr -lang=LangHin,LangTha -cat=Cl -- Cl, both in LangHin and LangTha"
],
explanation = unlines [
"Generates a list of random trees, by default one tree."
@@ -154,7 +155,7 @@ allCommands pgf = Map.fromList [
],
flags = [
("cat","generation category"),
("lang","excludes functions that have no linearization in this language"),
("lang","uses only functions that have linearizations in all these languages"),
("number","number of trees generated")
],
exec = \opts _ -> do
@@ -196,10 +197,10 @@ allCommands pgf = Map.fromList [
],
exec = \opts ts -> return ([], case ts of
[t] -> let co = showTree t in
case lookCommand co (allCommands pgf) of ---- new map ??!!
case lookCommand co (allCommands enc pgf) of ---- new map ??!!
Just info -> commandHelp True (co,info)
_ -> "command not found"
_ -> commandHelpAll pgf opts)
_ -> commandHelpAll enc pgf opts)
}),
("i", emptyCommandInfo {
longname = "import",
@@ -400,6 +401,15 @@ allCommands pgf = Map.fromList [
("number","the maximum number of questions")
]
}),
("se", emptyCommandInfo {
longname = "set_encoding",
synopsis = "set the encoding used in current terminal",
syntax = "se ID",
examples = [
"se cp1251 -- set encoding to cp1521",
"se utf8 -- set encoding to utf8 (default)"
]
}),
("sp", emptyCommandInfo {
longname = "system_pipe",
synopsis = "send argument to a system command",
@@ -407,7 +417,7 @@ allCommands pgf = Map.fromList [
exec = \opts arg -> do
let tmpi = "_tmpi" ---
let tmpo = "_tmpo"
writeFile tmpi $ toString arg
writeFile tmpi $ enc $ toString arg
let syst = optComm opts ++ " " ++ tmpi
system $ syst ++ " <" ++ tmpi ++ " >" ++ tmpo
s <- readFile tmpo
@@ -451,7 +461,7 @@ allCommands pgf = Map.fromList [
let file s = "_grph." ++ s
let view = optViewGraph opts ++ " "
let format = optViewFormat opts
writeFile (file "dot") grph
writeFile (file "dot") (enc grph)
system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++
" ; " ++ view ++ file format
return void
@@ -475,8 +485,8 @@ allCommands pgf = Map.fromList [
exec = \opts arg -> do
let file = valIdOpts "file" "_gftmp" opts
if isOpt "append" opts
then appendFile file (toString arg)
else writeFile file (toString arg)
then appendFile file (enc (toString arg))
else writeFile file (enc (toString arg))
return void,
options = [
("append","append to file, instead of overwriting it")
@@ -526,7 +536,8 @@ allCommands pgf = Map.fromList [
in cod : filter (/=cod) (map prOpt opts)
_ -> map prOpt opts
optRestricted opts = restrictPGF (hasLin pgf (mkCId (optLang opts))) pgf
optRestricted opts =
restrictPGF (\f -> and [hasLin pgf (mkCId la) f | la <- optLangs opts]) pgf
optLangs opts = case valIdOpts "lang" "" opts of
"" -> languages pgf

View File

@@ -17,6 +17,7 @@ import GF.System.Signal
import GF.Infra.UseIO
import GF.Data.ErrM ----
import GF.Text.UTF8
import qualified Data.Map as Map
@@ -27,25 +28,25 @@ data CommandEnv = CommandEnv {
expmacros :: Map.Map String Tree
}
mkCommandEnv :: PGF -> CommandEnv
mkCommandEnv pgf = CommandEnv pgf (allCommands pgf) Map.empty Map.empty
mkCommandEnv :: (String -> String) -> PGF -> CommandEnv
mkCommandEnv enc pgf = CommandEnv pgf (allCommands enc pgf) Map.empty Map.empty
emptyCommandEnv :: CommandEnv
emptyCommandEnv = mkCommandEnv emptyPGF
emptyCommandEnv = mkCommandEnv encodeUTF8 emptyPGF
interpretCommandLine :: CommandEnv -> String -> IO ()
interpretCommandLine env line =
interpretCommandLine :: (String -> String) -> CommandEnv -> String -> IO ()
interpretCommandLine enc env line =
case readCommandLine line of
Just [] -> return ()
Just pipes -> do res <- runInterruptibly (mapM_ (interpretPipe env) pipes)
Just pipes -> do res <- runInterruptibly (mapM_ (interpretPipe enc env) pipes)
case res of
Left ex -> putStrLnFlush (show ex)
Left ex -> putStrLnFlush $ enc (show ex)
Right x -> return x
Nothing -> putStrLnFlush "command not parsed"
interpretPipe env cs = do
interpretPipe enc env cs = do
v@(_,s) <- intercs ([],"") cs
putStrLnFlush s
putStrLnFlush $ enc s
return v
where
intercs treess [] = return treess
@@ -55,12 +56,12 @@ interpretPipe env cs = do
interc es comm@(Command co _ arg) = case co of
'%':f -> case Map.lookup f (commandmacros env) of
Just css -> do
mapM_ (interpretPipe env) (appLine (getCommandArg env arg es) css)
mapM_ (interpretPipe enc env) (appLine (getCommandArg env arg es) css)
return ([],[]) ---- return ?
_ -> do
putStrLn $ "command macro " ++ co ++ " not interpreted"
return ([],[])
_ -> interpret env es comm
_ -> interpret enc env es comm
appLine es = map (map (appCommand es))
-- macro definition applications: replace ?i by (exps !! i)
@@ -75,12 +76,12 @@ appCommand xs c@(Command i os arg) = case arg of
Abs x b -> Abs x (app b)
-- return the trees to be sent in pipe, and the output possibly printed
interpret :: CommandEnv -> [Tree] -> Command -> IO CommandOutput
interpret env trees0 comm = case lookCommand co comms of
interpret :: (String -> String) -> CommandEnv -> [Tree] -> Command -> IO CommandOutput
interpret enc env trees0 comm = case lookCommand co comms of
Just info -> do
checkOpts info
tss@(_,s) <- exec info opts trees
optTrace s
optTrace $ enc s
return tss
_ -> do
putStrLn $ "command " ++ co ++ " not interpreted"

View File

@@ -14,6 +14,9 @@ import GF.Compile.ReadFiles
import GF.Compile.Update
import GF.Compile.Refresh
import GF.Compile.Coding
import GF.Text.UTF8 ----
import GF.Grammar.Grammar
import GF.Grammar.Lookup
import GF.Grammar.PrGrammar
@@ -133,7 +136,8 @@ compileOne opts env@(_,srcgr,_) file = do
-- for compiled gf, read the file and update environment
-- also undo common subexp optimization, to enable normal computations
".gfo" -> do
sm0 <- putPointE Normal opts ("+ reading" +++ file) $ getSourceModule opts file
sm00 <- putPointE Normal opts ("+ reading" +++ file) $ getSourceModule opts file
let sm0 = codeSourceModule decodeUTF8 sm00 -- always UTF8 in gfo
let sm1 = unsubexpModule sm0
sm <- {- putPointE Normal opts "creating indirections" $ -} ioeErr $ extendModule mos sm1
@@ -148,8 +152,9 @@ compileOne opts env@(_,srcgr,_) file = do
then compileOne opts env $ gfo
else do
sm0 <- putpOpt ("- parsing" +++ file) ("- compiling" +++ file ++ "... ") $
sm00 <- putpOpt ("- parsing" +++ file) ("- compiling" +++ file ++ "... ") $
getSourceModule opts file
let sm0 = decodeStringsInModule sm00
(k',sm) <- compileSourceModule opts env sm0
let sm1 = if isConcr sm then shareModule sm else sm -- cannot expand Str
cm <- putPointE Verbose opts " generating code... " $ generateModuleCode opts gfo sm1
@@ -201,7 +206,7 @@ compileSourceModule opts env@(k,gr,_) mo@(i,mi) = do
generateModuleCode :: Options -> FilePath -> SourceModule -> IOE SourceModule
generateModuleCode opts file minfo = do
let minfo1 = subexpModule minfo
out = prGrammar (MGrammar [minfo1])
out = prGrammar (MGrammar [codeSourceModule encodeUTF8 minfo1])
putPointE Normal opts (" wrote file" +++ file) $ ioeIO $ writeFile file $ out
return minfo1

View File

@@ -58,5 +58,5 @@ outputConcr pgf = case cncnames pgf of
cnc:_ -> cnc
printPGF :: PGF -> String
printPGF = ---- encodeUTF8 . -- out by AR26/6/2008: the PGF may already be UTF8
printPGF = -- encodeUTF8 . -- fromPGF does UTF8 encoding
printTree . fromPGF

View File

@@ -97,8 +97,10 @@ canon2gfcc opts pars cgr@(M.MGrammar ((a,M.ModMod abm):cms)) =
js = tree2list (M.jments mo)
flags = Map.fromList [(mkCId f,x) | (f,x) <- moduleOptionsGFO (M.flags mo)]
opers = Map.fromAscList [] -- opers will be created as optimization
utf = if moduleFlag optEncoding (moduleOptions (M.flags mo)) == UTF_8
then D.convertStringsInTerm decodeUTF8 else id
utf = id -- trace (show lang0 +++ show flags) $
-- if moduleFlag optEncoding (moduleOptions (M.flags mo)) == UTF_8
-- then id else id
---- then (trace "decode" D.convertStringsInTerm decodeUTF8) else id
umkTerm = utf . mkTerm
lins = Map.fromAscList
[(i2i f, umkTerm tr) | (f,CncFun _ (Yes tr) _) <- js]

View File

@@ -11,6 +11,8 @@ import GF.Infra.UseIO
import GF.Infra.Option
import GF.System.Readline
import GF.Text.UTF8 ----
import PGF
import PGF.Data
import PGF.Macros
@@ -23,8 +25,8 @@ import qualified Text.ParserCombinators.ReadP as RP
import System.Cmd
import System.CPUTime
import Control.Exception
import Data.Version
import Paths_gf
mainGFI :: Options -> [FilePath] -> IO ()
@@ -39,13 +41,15 @@ loop opts gfenv0 = do
let env = commandenv gfenv0
let sgr = sourcegrammar gfenv0
setCompletionFunction (Just (wordCompletion (commandenv gfenv0)))
s <- fetchCommand (prompt env)
let gfenv = gfenv0 {history = s : history gfenv0}
s0 <- fetchCommand (prompt env)
let gfenv = gfenv0 {history = s0 : history gfenv0}
let loopNewCPU gfenv' = do
cpu' <- getCPUTime
putStrLnFlush (show ((cpu' - cputime gfenv') `div` 1000000000) ++ " msec")
loop opts $ gfenv' {cputime = cpu'}
let
enc = encode gfenv
s = decode gfenv s0
pwords = case words s of
w:ws -> getCommandOp w :ws
ws -> ws
@@ -60,8 +64,8 @@ loop opts gfenv0 = do
('-':w):ws2 -> (pTermPrintStyle w, ws2)
_ -> (TermPrintDefault, ws)
case pTerm (unwords term) >>= checkTerm sgr >>= computeTerm sgr of ---- pipe!
Ok x -> putStrLn (showTerm style x)
Bad s -> putStrLn s
Ok x -> putStrLn $ enc (showTerm style x)
Bad s -> putStrLn $ enc s
loopNewCPU gfenv
"i":args -> do
gfenv' <- case parseOptions args of
@@ -93,12 +97,14 @@ loop opts gfenv0 = do
}
_ -> putStrLn "value definition not parsed" >> loopNewCPU gfenv
"ph":_ -> mapM_ putStrLn (reverse (history gfenv0)) >> loopNewCPU gfenv
"ph":_ -> mapM_ (putStrLn . enc) (reverse (history gfenv0)) >> loopNewCPU gfenv
"se":c -> loopNewCPU $ gfenv {coding = s}
"q":_ -> putStrLn "See you." >> return gfenv
-- ordinary commands, working on CommandEnv
_ -> do
interpretCommandLine env s
interpretCommandLine enc env s
loopNewCPU gfenv
importInEnv :: GFEnv -> Options -> [FilePath] -> IO GFEnv
@@ -111,7 +117,7 @@ importInEnv gfenv opts files
pgf0 = multigrammar (commandenv gfenv)
pgf1 <- importGrammar pgf0 opts' files
putStrLnFlush $ unwords $ "\nLanguages:" : languages pgf1
return $ gfenv { commandenv = mkCommandEnv pgf1 }
return $ gfenv { commandenv = mkCommandEnv (encode gfenv) pgf1 }
welcome = unlines [
" ",
@@ -139,11 +145,21 @@ data GFEnv = GFEnv {
sourcegrammar :: Grammar, -- gfo grammar -retain
commandenv :: CommandEnv,
history :: [String],
cputime :: Integer
cputime :: Integer,
coding :: String
}
emptyGFEnv :: GFEnv
emptyGFEnv = GFEnv emptyGrammar (mkCommandEnv emptyPGF) [] 0
emptyGFEnv = GFEnv emptyGrammar (mkCommandEnv encodeUTF8 emptyPGF) [] 0 "utf8"
encode env = case coding env of
"utf8" -> encodeUTF8
_ -> id
decode env = case coding env of
"utf8" -> decodeUTF8
_ -> id
wordCompletion cmdEnv line prefix p =