uniform encoding: gfo and pgf in UTF8, internal in unicode

This commit is contained in:
aarne
2008-06-26 16:35:45 +00:00
parent f7622321de
commit ed708ffda6
6 changed files with 79 additions and 44 deletions

View File

@@ -62,10 +62,10 @@ emptyCommandInfo = CommandInfo {
lookCommand :: String -> Map.Map String CommandInfo -> Maybe CommandInfo lookCommand :: String -> Map.Map String CommandInfo -> Maybe CommandInfo
lookCommand = Map.lookup lookCommand = Map.lookup
commandHelpAll :: PGF -> [Option] -> String commandHelpAll :: (String -> String) -> PGF -> [Option] -> String
commandHelpAll pgf opts = unlines commandHelpAll enc pgf opts = unlines
[commandHelp (isOpt "full" opts) (co,info) [commandHelp (isOpt "full" opts) (co,info)
| (co,info) <- Map.assocs (allCommands pgf)] | (co,info) <- Map.assocs (allCommands enc pgf)]
commandHelp :: Bool -> (String,CommandInfo) -> String commandHelp :: Bool -> (String,CommandInfo) -> String
commandHelp full (co,info) = unlines $ [ commandHelp full (co,info) = unlines $ [
@@ -81,8 +81,8 @@ commandHelp full (co,info) = unlines $ [
] else [] ] else []
-- this list must no more be kept sorted by the command name -- this list must no more be kept sorted by the command name
allCommands :: PGF -> Map.Map String CommandInfo allCommands :: (String -> String) -> PGF -> Map.Map String CommandInfo
allCommands pgf = Map.fromList [ allCommands enc pgf = Map.fromList [
("cc", emptyCommandInfo { ("cc", emptyCommandInfo {
longname = "compute_concrete", longname = "compute_concrete",
syntax = "cc (-all | -table | -unqual)? TERM", syntax = "cc (-all | -table | -unqual)? TERM",
@@ -145,7 +145,8 @@ allCommands pgf = Map.fromList [
syntax = "gr [-cat=CAT] [-number=INT]", syntax = "gr [-cat=CAT] [-number=INT]",
examples = [ examples = [
"gr -- one tree in the startcat of the current grammar", "gr -- one tree in the startcat of the current grammar",
"gr -cat=NP -number=16 -- 16 trees in the category NP" "gr -cat=NP -number=16 -- 16 trees in the category NP",
"gr -lang=LangHin,LangTha -cat=Cl -- Cl, both in LangHin and LangTha"
], ],
explanation = unlines [ explanation = unlines [
"Generates a list of random trees, by default one tree." "Generates a list of random trees, by default one tree."
@@ -154,7 +155,7 @@ allCommands pgf = Map.fromList [
], ],
flags = [ flags = [
("cat","generation category"), ("cat","generation category"),
("lang","excludes functions that have no linearization in this language"), ("lang","uses only functions that have linearizations in all these languages"),
("number","number of trees generated") ("number","number of trees generated")
], ],
exec = \opts _ -> do exec = \opts _ -> do
@@ -196,10 +197,10 @@ allCommands pgf = Map.fromList [
], ],
exec = \opts ts -> return ([], case ts of exec = \opts ts -> return ([], case ts of
[t] -> let co = showTree t in [t] -> let co = showTree t in
case lookCommand co (allCommands pgf) of ---- new map ??!! case lookCommand co (allCommands enc pgf) of ---- new map ??!!
Just info -> commandHelp True (co,info) Just info -> commandHelp True (co,info)
_ -> "command not found" _ -> "command not found"
_ -> commandHelpAll pgf opts) _ -> commandHelpAll enc pgf opts)
}), }),
("i", emptyCommandInfo { ("i", emptyCommandInfo {
longname = "import", longname = "import",
@@ -400,6 +401,15 @@ allCommands pgf = Map.fromList [
("number","the maximum number of questions") ("number","the maximum number of questions")
] ]
}), }),
("se", emptyCommandInfo {
longname = "set_encoding",
synopsis = "set the encoding used in current terminal",
syntax = "se ID",
examples = [
"se cp1251 -- set encoding to cp1521",
"se utf8 -- set encoding to utf8 (default)"
]
}),
("sp", emptyCommandInfo { ("sp", emptyCommandInfo {
longname = "system_pipe", longname = "system_pipe",
synopsis = "send argument to a system command", synopsis = "send argument to a system command",
@@ -407,7 +417,7 @@ allCommands pgf = Map.fromList [
exec = \opts arg -> do exec = \opts arg -> do
let tmpi = "_tmpi" --- let tmpi = "_tmpi" ---
let tmpo = "_tmpo" let tmpo = "_tmpo"
writeFile tmpi $ toString arg writeFile tmpi $ enc $ toString arg
let syst = optComm opts ++ " " ++ tmpi let syst = optComm opts ++ " " ++ tmpi
system $ syst ++ " <" ++ tmpi ++ " >" ++ tmpo system $ syst ++ " <" ++ tmpi ++ " >" ++ tmpo
s <- readFile tmpo s <- readFile tmpo
@@ -451,7 +461,7 @@ allCommands pgf = Map.fromList [
let file s = "_grph." ++ s let file s = "_grph." ++ s
let view = optViewGraph opts ++ " " let view = optViewGraph opts ++ " "
let format = optViewFormat opts let format = optViewFormat opts
writeFile (file "dot") grph writeFile (file "dot") (enc grph)
system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++
" ; " ++ view ++ file format " ; " ++ view ++ file format
return void return void
@@ -475,8 +485,8 @@ allCommands pgf = Map.fromList [
exec = \opts arg -> do exec = \opts arg -> do
let file = valIdOpts "file" "_gftmp" opts let file = valIdOpts "file" "_gftmp" opts
if isOpt "append" opts if isOpt "append" opts
then appendFile file (toString arg) then appendFile file (enc (toString arg))
else writeFile file (toString arg) else writeFile file (enc (toString arg))
return void, return void,
options = [ options = [
("append","append to file, instead of overwriting it") ("append","append to file, instead of overwriting it")
@@ -526,7 +536,8 @@ allCommands pgf = Map.fromList [
in cod : filter (/=cod) (map prOpt opts) in cod : filter (/=cod) (map prOpt opts)
_ -> map prOpt opts _ -> map prOpt opts
optRestricted opts = restrictPGF (hasLin pgf (mkCId (optLang opts))) pgf optRestricted opts =
restrictPGF (\f -> and [hasLin pgf (mkCId la) f | la <- optLangs opts]) pgf
optLangs opts = case valIdOpts "lang" "" opts of optLangs opts = case valIdOpts "lang" "" opts of
"" -> languages pgf "" -> languages pgf

View File

@@ -17,6 +17,7 @@ import GF.System.Signal
import GF.Infra.UseIO import GF.Infra.UseIO
import GF.Data.ErrM ---- import GF.Data.ErrM ----
import GF.Text.UTF8
import qualified Data.Map as Map import qualified Data.Map as Map
@@ -27,25 +28,25 @@ data CommandEnv = CommandEnv {
expmacros :: Map.Map String Tree expmacros :: Map.Map String Tree
} }
mkCommandEnv :: PGF -> CommandEnv mkCommandEnv :: (String -> String) -> PGF -> CommandEnv
mkCommandEnv pgf = CommandEnv pgf (allCommands pgf) Map.empty Map.empty mkCommandEnv enc pgf = CommandEnv pgf (allCommands enc pgf) Map.empty Map.empty
emptyCommandEnv :: CommandEnv emptyCommandEnv :: CommandEnv
emptyCommandEnv = mkCommandEnv emptyPGF emptyCommandEnv = mkCommandEnv encodeUTF8 emptyPGF
interpretCommandLine :: CommandEnv -> String -> IO () interpretCommandLine :: (String -> String) -> CommandEnv -> String -> IO ()
interpretCommandLine env line = interpretCommandLine enc env line =
case readCommandLine line of case readCommandLine line of
Just [] -> return () Just [] -> return ()
Just pipes -> do res <- runInterruptibly (mapM_ (interpretPipe env) pipes) Just pipes -> do res <- runInterruptibly (mapM_ (interpretPipe enc env) pipes)
case res of case res of
Left ex -> putStrLnFlush (show ex) Left ex -> putStrLnFlush $ enc (show ex)
Right x -> return x Right x -> return x
Nothing -> putStrLnFlush "command not parsed" Nothing -> putStrLnFlush "command not parsed"
interpretPipe env cs = do interpretPipe enc env cs = do
v@(_,s) <- intercs ([],"") cs v@(_,s) <- intercs ([],"") cs
putStrLnFlush s putStrLnFlush $ enc s
return v return v
where where
intercs treess [] = return treess intercs treess [] = return treess
@@ -55,12 +56,12 @@ interpretPipe env cs = do
interc es comm@(Command co _ arg) = case co of interc es comm@(Command co _ arg) = case co of
'%':f -> case Map.lookup f (commandmacros env) of '%':f -> case Map.lookup f (commandmacros env) of
Just css -> do Just css -> do
mapM_ (interpretPipe env) (appLine (getCommandArg env arg es) css) mapM_ (interpretPipe enc env) (appLine (getCommandArg env arg es) css)
return ([],[]) ---- return ? return ([],[]) ---- return ?
_ -> do _ -> do
putStrLn $ "command macro " ++ co ++ " not interpreted" putStrLn $ "command macro " ++ co ++ " not interpreted"
return ([],[]) return ([],[])
_ -> interpret env es comm _ -> interpret enc env es comm
appLine es = map (map (appCommand es)) appLine es = map (map (appCommand es))
-- macro definition applications: replace ?i by (exps !! i) -- macro definition applications: replace ?i by (exps !! i)
@@ -75,12 +76,12 @@ appCommand xs c@(Command i os arg) = case arg of
Abs x b -> Abs x (app b) Abs x b -> Abs x (app b)
-- return the trees to be sent in pipe, and the output possibly printed -- return the trees to be sent in pipe, and the output possibly printed
interpret :: CommandEnv -> [Tree] -> Command -> IO CommandOutput interpret :: (String -> String) -> CommandEnv -> [Tree] -> Command -> IO CommandOutput
interpret env trees0 comm = case lookCommand co comms of interpret enc env trees0 comm = case lookCommand co comms of
Just info -> do Just info -> do
checkOpts info checkOpts info
tss@(_,s) <- exec info opts trees tss@(_,s) <- exec info opts trees
optTrace s optTrace $ enc s
return tss return tss
_ -> do _ -> do
putStrLn $ "command " ++ co ++ " not interpreted" putStrLn $ "command " ++ co ++ " not interpreted"

View File

@@ -14,6 +14,9 @@ import GF.Compile.ReadFiles
import GF.Compile.Update import GF.Compile.Update
import GF.Compile.Refresh import GF.Compile.Refresh
import GF.Compile.Coding
import GF.Text.UTF8 ----
import GF.Grammar.Grammar import GF.Grammar.Grammar
import GF.Grammar.Lookup import GF.Grammar.Lookup
import GF.Grammar.PrGrammar import GF.Grammar.PrGrammar
@@ -133,7 +136,8 @@ compileOne opts env@(_,srcgr,_) file = do
-- for compiled gf, read the file and update environment -- for compiled gf, read the file and update environment
-- also undo common subexp optimization, to enable normal computations -- also undo common subexp optimization, to enable normal computations
".gfo" -> do ".gfo" -> do
sm0 <- putPointE Normal opts ("+ reading" +++ file) $ getSourceModule opts file sm00 <- putPointE Normal opts ("+ reading" +++ file) $ getSourceModule opts file
let sm0 = codeSourceModule decodeUTF8 sm00 -- always UTF8 in gfo
let sm1 = unsubexpModule sm0 let sm1 = unsubexpModule sm0
sm <- {- putPointE Normal opts "creating indirections" $ -} ioeErr $ extendModule mos sm1 sm <- {- putPointE Normal opts "creating indirections" $ -} ioeErr $ extendModule mos sm1
@@ -148,8 +152,9 @@ compileOne opts env@(_,srcgr,_) file = do
then compileOne opts env $ gfo then compileOne opts env $ gfo
else do else do
sm0 <- putpOpt ("- parsing" +++ file) ("- compiling" +++ file ++ "... ") $ sm00 <- putpOpt ("- parsing" +++ file) ("- compiling" +++ file ++ "... ") $
getSourceModule opts file getSourceModule opts file
let sm0 = decodeStringsInModule sm00
(k',sm) <- compileSourceModule opts env sm0 (k',sm) <- compileSourceModule opts env sm0
let sm1 = if isConcr sm then shareModule sm else sm -- cannot expand Str let sm1 = if isConcr sm then shareModule sm else sm -- cannot expand Str
cm <- putPointE Verbose opts " generating code... " $ generateModuleCode opts gfo sm1 cm <- putPointE Verbose opts " generating code... " $ generateModuleCode opts gfo sm1
@@ -201,7 +206,7 @@ compileSourceModule opts env@(k,gr,_) mo@(i,mi) = do
generateModuleCode :: Options -> FilePath -> SourceModule -> IOE SourceModule generateModuleCode :: Options -> FilePath -> SourceModule -> IOE SourceModule
generateModuleCode opts file minfo = do generateModuleCode opts file minfo = do
let minfo1 = subexpModule minfo let minfo1 = subexpModule minfo
out = prGrammar (MGrammar [minfo1]) out = prGrammar (MGrammar [codeSourceModule encodeUTF8 minfo1])
putPointE Normal opts (" wrote file" +++ file) $ ioeIO $ writeFile file $ out putPointE Normal opts (" wrote file" +++ file) $ ioeIO $ writeFile file $ out
return minfo1 return minfo1

View File

@@ -58,5 +58,5 @@ outputConcr pgf = case cncnames pgf of
cnc:_ -> cnc cnc:_ -> cnc
printPGF :: PGF -> String printPGF :: PGF -> String
printPGF = ---- encodeUTF8 . -- out by AR26/6/2008: the PGF may already be UTF8 printPGF = -- encodeUTF8 . -- fromPGF does UTF8 encoding
printTree . fromPGF printTree . fromPGF

View File

@@ -97,8 +97,10 @@ canon2gfcc opts pars cgr@(M.MGrammar ((a,M.ModMod abm):cms)) =
js = tree2list (M.jments mo) js = tree2list (M.jments mo)
flags = Map.fromList [(mkCId f,x) | (f,x) <- moduleOptionsGFO (M.flags mo)] flags = Map.fromList [(mkCId f,x) | (f,x) <- moduleOptionsGFO (M.flags mo)]
opers = Map.fromAscList [] -- opers will be created as optimization opers = Map.fromAscList [] -- opers will be created as optimization
utf = if moduleFlag optEncoding (moduleOptions (M.flags mo)) == UTF_8 utf = id -- trace (show lang0 +++ show flags) $
then D.convertStringsInTerm decodeUTF8 else id -- if moduleFlag optEncoding (moduleOptions (M.flags mo)) == UTF_8
-- then id else id
---- then (trace "decode" D.convertStringsInTerm decodeUTF8) else id
umkTerm = utf . mkTerm umkTerm = utf . mkTerm
lins = Map.fromAscList lins = Map.fromAscList
[(i2i f, umkTerm tr) | (f,CncFun _ (Yes tr) _) <- js] [(i2i f, umkTerm tr) | (f,CncFun _ (Yes tr) _) <- js]

View File

@@ -11,6 +11,8 @@ import GF.Infra.UseIO
import GF.Infra.Option import GF.Infra.Option
import GF.System.Readline import GF.System.Readline
import GF.Text.UTF8 ----
import PGF import PGF
import PGF.Data import PGF.Data
import PGF.Macros import PGF.Macros
@@ -23,8 +25,8 @@ import qualified Text.ParserCombinators.ReadP as RP
import System.Cmd import System.Cmd
import System.CPUTime import System.CPUTime
import Control.Exception import Control.Exception
import Data.Version import Data.Version
import Paths_gf import Paths_gf
mainGFI :: Options -> [FilePath] -> IO () mainGFI :: Options -> [FilePath] -> IO ()
@@ -39,13 +41,15 @@ loop opts gfenv0 = do
let env = commandenv gfenv0 let env = commandenv gfenv0
let sgr = sourcegrammar gfenv0 let sgr = sourcegrammar gfenv0
setCompletionFunction (Just (wordCompletion (commandenv gfenv0))) setCompletionFunction (Just (wordCompletion (commandenv gfenv0)))
s <- fetchCommand (prompt env) s0 <- fetchCommand (prompt env)
let gfenv = gfenv0 {history = s : history gfenv0} let gfenv = gfenv0 {history = s0 : history gfenv0}
let loopNewCPU gfenv' = do let loopNewCPU gfenv' = do
cpu' <- getCPUTime cpu' <- getCPUTime
putStrLnFlush (show ((cpu' - cputime gfenv') `div` 1000000000) ++ " msec") putStrLnFlush (show ((cpu' - cputime gfenv') `div` 1000000000) ++ " msec")
loop opts $ gfenv' {cputime = cpu'} loop opts $ gfenv' {cputime = cpu'}
let let
enc = encode gfenv
s = decode gfenv s0
pwords = case words s of pwords = case words s of
w:ws -> getCommandOp w :ws w:ws -> getCommandOp w :ws
ws -> ws ws -> ws
@@ -60,8 +64,8 @@ loop opts gfenv0 = do
('-':w):ws2 -> (pTermPrintStyle w, ws2) ('-':w):ws2 -> (pTermPrintStyle w, ws2)
_ -> (TermPrintDefault, ws) _ -> (TermPrintDefault, ws)
case pTerm (unwords term) >>= checkTerm sgr >>= computeTerm sgr of ---- pipe! case pTerm (unwords term) >>= checkTerm sgr >>= computeTerm sgr of ---- pipe!
Ok x -> putStrLn (showTerm style x) Ok x -> putStrLn $ enc (showTerm style x)
Bad s -> putStrLn s Bad s -> putStrLn $ enc s
loopNewCPU gfenv loopNewCPU gfenv
"i":args -> do "i":args -> do
gfenv' <- case parseOptions args of gfenv' <- case parseOptions args of
@@ -93,12 +97,14 @@ loop opts gfenv0 = do
} }
_ -> putStrLn "value definition not parsed" >> loopNewCPU gfenv _ -> putStrLn "value definition not parsed" >> loopNewCPU gfenv
"ph":_ -> mapM_ putStrLn (reverse (history gfenv0)) >> loopNewCPU gfenv "ph":_ -> mapM_ (putStrLn . enc) (reverse (history gfenv0)) >> loopNewCPU gfenv
"se":c -> loopNewCPU $ gfenv {coding = s}
"q":_ -> putStrLn "See you." >> return gfenv "q":_ -> putStrLn "See you." >> return gfenv
-- ordinary commands, working on CommandEnv -- ordinary commands, working on CommandEnv
_ -> do _ -> do
interpretCommandLine env s interpretCommandLine enc env s
loopNewCPU gfenv loopNewCPU gfenv
importInEnv :: GFEnv -> Options -> [FilePath] -> IO GFEnv importInEnv :: GFEnv -> Options -> [FilePath] -> IO GFEnv
@@ -111,7 +117,7 @@ importInEnv gfenv opts files
pgf0 = multigrammar (commandenv gfenv) pgf0 = multigrammar (commandenv gfenv)
pgf1 <- importGrammar pgf0 opts' files pgf1 <- importGrammar pgf0 opts' files
putStrLnFlush $ unwords $ "\nLanguages:" : languages pgf1 putStrLnFlush $ unwords $ "\nLanguages:" : languages pgf1
return $ gfenv { commandenv = mkCommandEnv pgf1 } return $ gfenv { commandenv = mkCommandEnv (encode gfenv) pgf1 }
welcome = unlines [ welcome = unlines [
" ", " ",
@@ -139,11 +145,21 @@ data GFEnv = GFEnv {
sourcegrammar :: Grammar, -- gfo grammar -retain sourcegrammar :: Grammar, -- gfo grammar -retain
commandenv :: CommandEnv, commandenv :: CommandEnv,
history :: [String], history :: [String],
cputime :: Integer cputime :: Integer,
coding :: String
} }
emptyGFEnv :: GFEnv emptyGFEnv :: GFEnv
emptyGFEnv = GFEnv emptyGrammar (mkCommandEnv emptyPGF) [] 0 emptyGFEnv = GFEnv emptyGrammar (mkCommandEnv encodeUTF8 emptyPGF) [] 0 "utf8"
encode env = case coding env of
"utf8" -> encodeUTF8
_ -> id
decode env = case coding env of
"utf8" -> decodeUTF8
_ -> id
wordCompletion cmdEnv line prefix p = wordCompletion cmdEnv line prefix p =