1
0
forked from GitHub/gf-core

Change how GF deals with character encodings in grammar files

1. The default encoding is changed from Latin-1 to UTF-8.

2. Alternate encodings should be specified as "--# -coding=enc", the old
   "flags coding=enc" declarations have no effect but are still checked for
   consistency.

3. A transitional warning is generated for files that contain non-ASCII
   characters without specifying a character encoding:

	"Warning: default encoding has changed from Latin-1 to UTF-8"

4. Conversion to Unicode is now done *before* lexing. This makes it possible
   to allow arbitrary Unicode characters in identifiers. But identifiers are
   still stored as ByteStrings, so they are limited to Latin-1 characters
   for now.

5. Lexer.hs is no longer part of the repository. We now generate the lexer
   from Lexer.x with alex>=3. Some workarounds for bugs in alex-3.0 were
   needed. These bugs might already be fixed in newer versions of alex, but
   we should be compatible with what is shipped in the Haskell Platform.
This commit is contained in:
hallgren
2013-11-25 21:12:11 +00:00
parent 1ae58146cd
commit 30fc46e934
9 changed files with 157 additions and 559 deletions

View File

@@ -1,5 +1,5 @@
name: gf name: gf
version: 3.5-darcs version: 3.5.11-darcs
cabal-version: >= 1.8 cabal-version: >= 1.8
build-type: Custom build-type: Custom
@@ -140,8 +140,8 @@ Executable gf
if flag(new-comp) if flag(new-comp)
cpp-options: -DNEW_COMP cpp-options: -DNEW_COMP
build-tools: happy build-tools: happy, alex>=3
--, alex>=2 && <3 -- tricky to install in Ubuntu 12.04
if os(windows) if os(windows)
build-depends: Win32 build-depends: Win32
else else

View File

@@ -18,40 +18,61 @@ import Prelude hiding (catch)
import GF.Data.Operations import GF.Data.Operations
import GF.System.Catch --import GF.System.Catch
import GF.Infra.UseIO import GF.Infra.UseIO
import GF.Infra.Option(Options,optPreprocessors,addOptions,optEncoding,flag,renameEncoding) import GF.Infra.Option(Options,optPreprocessors,addOptions,renameEncoding,optEncoding,flag,defaultEncoding)
import GF.Grammar.Lexer import GF.Grammar.Lexer
import GF.Grammar.Parser import GF.Grammar.Parser
import GF.Grammar.Grammar import GF.Grammar.Grammar
import GF.Compile.Coding --import GF.Compile.Coding
import GF.Compile.ReadFiles(parseSource,lift)
--import GF.Text.Coding(decodeUnicodeIO)
import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Char8 as BS
import Control.Monad (foldM) import Data.Char(isAscii)
import Control.Monad (foldM,when,unless)
import System.Cmd (system) import System.Cmd (system)
import System.IO(mkTextEncoding) --import System.IO(mkTextEncoding) --,utf8
import System.Directory(removeFile) import System.Directory(removeFile)
getSourceModule :: Options -> FilePath -> IOE SourceModule getSourceModule :: Options -> FilePath -> IOE SourceModule
getSourceModule opts file0 = ioe $ getSourceModule opts file0 =
do tmp <- foldM runPreprocessor (Source file0) (flag optPreprocessors opts) errIn file0 $
content <- keepTemp tmp do tmp <- lift $ foldM runPreprocessor (Source file0) (flag optPreprocessors opts)
case runP pModDef content of raw <- lift $ keepTemp tmp
Left (Pn l c,msg) -> do file <- writeTemp tmp --ePutStrLn $ "1 "++file0
(optCoding,parsed) <- parseSource opts pModDef raw
case parsed of
Left (Pn l c,msg) -> do file <- lift $ writeTemp tmp
let location = file++":"++show l++":"++show c let location = file++":"++show l++":"++show c
return (Bad (location++":\n "++msg)) raise (location++":\n "++msg)
Right (i,mi00) -> Right (i,mi0) ->
do removeTemp tmp do lift $ removeTemp tmp
let mi0 =mi00 {mflags=mflags mi00 `addOptions` opts, msrc=file0} let mi =mi0 {mflags=mflags mi0 `addOptions` opts, msrc=file0}
mi <- transcodeModule (i,mi0) optCoding' = renameEncoding `fmap` flag optEncoding (mflags mi0)
return (Ok mi) case (optCoding,optCoding') of
`catch` (return . Bad . show) (Nothing,Nothing) ->
unless (BS.all isAscii raw) $
ePutStrLn $ file0++":\n Warning: default encoding has changed from Latin-1 to UTF-8"
(_,Just coding') ->
when (coding/=coding') $
raise $ "Encoding mismatch: "++coding++" /= "++coding'
where coding = maybe defaultEncoding renameEncoding optCoding
_ -> return ()
--lift $ transcodeModule' (i,mi) -- old lexer
return (i,mi) -- new lexer
{-
transcodeModule sm00 = transcodeModule sm00 =
do enc <- mkTextEncoding (renameEncoding (flag optEncoding (mflags (snd sm00)))) do enc <- mkTextEncoding (getEncoding (mflags (snd sm00)))
let sm = decodeStringsInModule enc sm00 let sm = decodeStringsInModule enc sm00
return sm return sm
transcodeModule' sm00 =
do let enc = utf8
let sm = decodeStringsInModule enc sm00
return sm
-}
runPreprocessor :: Temporary -> String -> IO Temporary runPreprocessor :: Temporary -> String -> IO Temporary
runPreprocessor tmp0 p = runPreprocessor tmp0 p =

View File

@@ -21,7 +21,8 @@
module GF.Compile.ReadFiles module GF.Compile.ReadFiles
( getAllFiles,ModName,ModEnv,importsOfModule, ( getAllFiles,ModName,ModEnv,importsOfModule,
gfoFile,gfFile,isGFO,gf2gfo, gfoFile,gfFile,isGFO,gf2gfo,
getOptionsFromFile) where parseSource,lift,
getOptionsFromFile,getPragmas) where
import Prelude hiding (catch) import Prelude hiding (catch)
import GF.System.Catch import GF.System.Catch
@@ -34,6 +35,10 @@ import GF.Grammar.Parser
import GF.Grammar.Grammar import GF.Grammar.Grammar
import GF.Grammar.Binary import GF.Grammar.Binary
import System.IO(mkTextEncoding)
import qualified Data.ByteString.UTF8 as UTF8
import GF.Text.Coding(decodeUnicodeIO)
import Control.Monad import Control.Monad
import Data.Maybe(isJust) import Data.Maybe(isJust)
import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Char8 as BS
@@ -50,7 +55,7 @@ type ModEnv = Map.Map ModName (UTCTime,[ModName])
-- | Returns a list of all files to be compiled in topological order i.e. -- | Returns a list of all files to be compiled in topological order i.e.
-- the low level (leaf) modules are first. -- the low level (leaf) modules are first.
getAllFiles :: (MonadIO m,ErrorMonad m) => Options -> [InitPath] -> ModEnv -> FileName -> m [FullPath] --getAllFiles :: (MonadIO m,ErrorMonad m) => Options -> [InitPath] -> ModEnv -> FileName -> m [FullPath]
getAllFiles opts ps env file = do getAllFiles opts ps env file = do
-- read module headers from all files recursively -- read module headers from all files recursively
ds <- liftM reverse $ get [] [] (justModuleName file) ds <- liftM reverse $ get [] [] (justModuleName file)
@@ -117,14 +122,10 @@ getAllFiles opts ps env file = do
Just mo -> return (st,importsOfModule mo) Just mo -> return (st,importsOfModule mo)
Nothing Nothing
| isGFO file -> raise (file ++ " is compiled with different GF version and I can't find the source file") | isGFO file -> raise (file ++ " is compiled with different GF version and I can't find the source file")
| otherwise -> do s <- liftIO $ BS.readFile file | otherwise -> do mo <- parseModHeader opts file
case runP pModHeader s of return (CSComp,importsOfModule mo)
Left (Pn l c,msg) -> raise (file ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg) CSComp -> do mo <- parseModHeader opts file
Right mo -> return (CSComp,importsOfModule mo) return (st,importsOfModule mo)
CSComp -> do s <- liftIO $ BS.readFile file
case runP pModHeader s of
Left (Pn l c,msg) -> raise (file ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg)
Right mo -> return (st,importsOfModule mo)
testErr (mname == name) testErr (mname == name)
("module name" +++ mname +++ "differs from file name" +++ name) ("module name" +++ mname +++ "differs from file name" +++ name)
return (name,st,t,isJust gfTime,imps,dropFileName file) return (name,st,t,isJust gfTime,imps,dropFileName file)
@@ -209,17 +210,55 @@ importsOfModule (m,mi) = (modName m,depModInfo mi [])
modName = showIdent modName = showIdent
parseModHeader opts file =
do --ePutStrLn file
(_,parsed) <- parseSource opts pModHeader =<< lift (BS.readFile file)
case parsed of
Right mo -> return mo
Left (Pn l c,msg) ->
raise (file ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg)
parseSource opts p raw =
do (coding,utf8) <- toUTF8 opts raw
return (coding,runP p utf8)
toUTF8 opts0 raw =
do opts <- getPragmas raw
let given = flag optEncoding opts -- explicitly given encoding
coding = getEncoding $ opts0 `addOptions` opts
utf8 <- if coding=="UTF-8"
then return raw
else lift $ do --ePutStrLn $ "toUTF8 from "++coding
enc <- mkTextEncoding coding
-- decodeUnicodeIO uses a lot of stack space,
-- so we need to split the file into smaller pieces
ls <- mapM (decodeUnicodeIO enc) (BS.lines raw)
return $ UTF8.fromString (unlines ls)
return (given,utf8)
--lift io = ioe (fmap Ok io `catch` (return . Bad . show))
lift io = liftIO io
-- | options can be passed to the compiler by comments in @--#@, in the main file -- | options can be passed to the compiler by comments in @--#@, in the main file
getOptionsFromFile :: (MonadIO m,ErrorMonad m) => FilePath -> m Options getOptionsFromFile :: (MonadIO m,ErrorMonad m) => FilePath -> m Options
getOptionsFromFile file = do getOptionsFromFile file = do
s <- either (\_ -> raise $ "File " ++ file ++ " does not exist") return =<< s <- either (\_ -> raise $ "File " ++ file ++ " does not exist") return =<<
liftIO (try $ BS.readFile file) liftIO (try $ BS.readFile file)
let ls = filter (BS.isPrefixOf (BS.pack "--#")) $ BS.lines s opts <- getPragmas s
fs = map (BS.unpack . BS.unwords . BS.words . BS.drop 3) ls -- The coding flag should not be inherited by other files
parseModuleOptions fs return (addOptions opts (modifyFlags $ \ f -> f{optEncoding=Nothing}))
getFilePath :: MonadIO m => [FilePath] -> String -> m (Maybe FilePath)
getFilePath paths file = liftIO $ get paths getPragmas :: (ErrorMonad m) => BS.ByteString -> m Options
getPragmas = parseModuleOptions .
map (BS.unpack . BS.unwords . BS.words . BS.drop 3) .
filter (BS.isPrefixOf (BS.pack "--#")) . BS.lines
--getFilePath :: MonadIO m => [FilePath] -> String -> m (Maybe FilePath)
getFilePath paths file =
liftIO $ do --ePutStrLn $ "getFilePath "++show paths++" "++show file
get paths
where where
get [] = return Nothing get [] = return Nothing
get (p:ps) = do get (p:ps) = do

File diff suppressed because one or more lines are too long

View File

@@ -1,3 +1,4 @@
-- -*- haskell -*-
{ {
module GF.Grammar.Lexer module GF.Grammar.Lexer
( Token(..), Posn(..) ( Token(..), Posn(..)
@@ -6,9 +7,14 @@ module GF.Grammar.Lexer
) where ) where
import GF.Infra.Ident import GF.Infra.Ident
--import GF.Data.Operations
import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString as WBS
import qualified Data.ByteString.Internal as BS(w2c)
import qualified Data.ByteString.UTF8 as UTF8
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Word(Word8)
--import Debug.Trace(trace)
} }
@@ -17,7 +23,7 @@ $c = [A-Z\192-\221] # [\215]
$s = [a-z\222-\255] # [\247] $s = [a-z\222-\255] # [\247]
$d = [0-9] -- digit $d = [0-9] -- digit
$i = [$l $d _ '] -- identifier character $i = [$l $d _ '] -- identifier character
$u = [\0-\255] -- universal: any character $u = [.\n] -- universal: any character
@rsyms = -- symbols and non-identifier-like reserved words @rsyms = -- symbols and non-identifier-like reserved words
\; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ \; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/
@@ -27,17 +33,20 @@ $u = [\0-\255] -- universal: any character
"{-" ([$u # \-] | \- [$u # \}])* ("-")+ "}" ; "{-" ([$u # \-] | \- [$u # \}])* ("-")+ "}" ;
$white+ ; $white+ ;
@rsyms { tok (eitherResIdent (T_Ident . identC . rawIdentC)) } @rsyms { tok (res (T_Ident . identS)) }
\' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (eitherResIdent (T_Ident . identC . rawIdentS . unescapeInitTail . BS.unpack)) } \' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (eitherResIdent (T_Ident . identC . rawIdentS . unescapeInitTail . unpack)) }
(\_ | $l)($l | $d | \_ | \')* { tok (eitherResIdent (T_Ident . identC . rawIdentC)) } (\_ | $l)($l | $d | \_ | \')* { tok (res (T_Ident . identS)) }
\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . BS.unpack) } \" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . unpack) }
(\-)? $d+ { tok (T_Integer . read . BS.unpack) } (\-)? $d+ { tok (T_Integer . read . unpack) }
(\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . BS.unpack) } (\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . unpack) }
{ {
--unpack = BS.unpack
unpack = id
tok :: (String->Token) -> Posn -> String -> Token
tok f p s = f s tok f p s = f s
data Token data Token
@@ -114,15 +123,17 @@ data Token
| T_Double Double -- double precision float literals | T_Double Double -- double precision float literals
| T_Ident Ident | T_Ident Ident
| T_EOF | T_EOF
-- deriving Show -- debug
eitherResIdent :: (BS.ByteString -> Token) -> BS.ByteString -> Token res = eitherResIdent
eitherResIdent :: (String -> Token) -> String -> Token
eitherResIdent tv s = eitherResIdent tv s =
case Map.lookup s resWords of case Map.lookup s resWords of
Just t -> t Just t -> t
Nothing -> tv s Nothing -> tv s
isReservedWord :: BS.ByteString -> Bool isReservedWord :: BS.ByteString -> Bool
isReservedWord s = Map.member s resWords isReservedWord s = Map.member (BS.unpack s) resWords
resWords = Map.fromList resWords = Map.fromList
[ b "!" T_exclmark [ b "!" T_exclmark
@@ -194,7 +205,7 @@ resWords = Map.fromList
, b "where" T_where , b "where" T_where
, b "with" T_with , b "with" T_with
] ]
where b s t = (BS.pack s, t) where b s t = (s, t)
unescapeInitTail :: String -> String unescapeInitTail :: String -> String
unescapeInitTail = unesc . tail where unescapeInitTail = unesc . tail where
@@ -202,7 +213,7 @@ unescapeInitTail = unesc . tail where
'\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
'\\':'n':cs -> '\n' : unesc cs '\\':'n':cs -> '\n' : unesc cs
'\\':'t':cs -> '\t' : unesc cs '\\':'t':cs -> '\t' : unesc cs
'"':[] -> [] '\"':[] -> []
'\'':[] -> [] '\'':[] -> []
c:cs -> c : unesc cs c:cs -> c : unesc cs
_ -> [] _ -> []
@@ -219,13 +230,14 @@ alexMove :: Posn -> Char -> Posn
alexMove (Pn l c) '\n' = Pn (l+1) 1 alexMove (Pn l c) '\n' = Pn (l+1) 1
alexMove (Pn l c) _ = Pn l (c+1) alexMove (Pn l c) _ = Pn l (c+1)
alexGetChar :: AlexInput -> Maybe (Char,AlexInput) alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
alexGetChar (AI p _ s) = alexGetByte (AI p _ s) =
case BS.uncons s of case WBS.uncons s of
Nothing -> Nothing Nothing -> Nothing
Just (c,s) -> Just (w,s) ->
let p' = alexMove p c let p' = alexMove p c
in p' `seq` Just (c, (AI p' c s)) c = BS.w2c w
in p' `seq` Just (w, (AI p' c s))
alexInputPrevChar :: AlexInput -> Char alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (AI p c s) = c alexInputPrevChar (AI p c s) = c
@@ -260,12 +272,13 @@ failLoc pos msg = P $ \_ -> PFailed pos msg
lexer :: (Token -> P a) -> P a lexer :: (Token -> P a) -> P a
lexer cont = P go lexer cont = P go
where where
--cont' t = trace (show t) (cont t)
go inp@(AI pos _ str) = go inp@(AI pos _ str) =
case alexScan inp 0 of case alexScan inp 0 of
AlexEOF -> unP (cont T_EOF) inp AlexEOF -> unP (cont T_EOF) inp
AlexError (AI pos _ _) -> PFailed pos "lexical error" AlexError (AI pos _ _) -> PFailed pos "lexical error"
AlexSkip inp' len -> go inp' AlexSkip inp' len -> {-trace (show len) $-} go inp'
AlexToken inp' len act -> unP (cont (act pos (BS.take len str))) inp' AlexToken inp' len act -> unP (cont (act pos (UTF8.toString (UTF8.take len str)))) inp'
getPosn :: P Posn getPosn :: P Posn
getPosn = P $ \inp@(AI pos _ _) -> POk pos getPosn = P $ \inp@(AI pos _ _) -> POk pos

View File

@@ -17,8 +17,8 @@ import GF.Grammar.Grammar
import GF.Grammar.Macros import GF.Grammar.Macros
import GF.Grammar.Lexer import GF.Grammar.Lexer
import GF.Compile.Update (buildAnyTree) import GF.Compile.Update (buildAnyTree)
import Codec.Binary.UTF8.String(decodeString) --import Codec.Binary.UTF8.String(decodeString)
import Data.Char(toLower) --import Data.Char(toLower)
} }
%name pModDef ModDef %name pModDef ModDef
@@ -616,9 +616,9 @@ happyError = fail "syntax error"
-- Quick fix to render error messages from UTF-8-encoded source files correctly. -- Quick fix to render error messages from UTF-8-encoded source files correctly.
optDecode opts = optDecode opts =
if map toLower (flag optEncoding opts) `elem` ["utf8","utf-8"] {-if map toLower (getEncoding opts) `elem` ["utf8","utf-8"]
then decodeString then decodeString
else id else-} id
mkListId,mkConsId,mkBaseId :: Ident -> Ident mkListId,mkConsId,mkBaseId :: Ident -> Ident
mkListId = prefixIdent "List" mkListId = prefixIdent "List"

View File

@@ -20,7 +20,7 @@ module GF.Infra.Option
helpMessage, helpMessage,
-- * Checking specific options -- * Checking specific options
flag, cfgTransform, haskellOption, readOutputFormat, flag, cfgTransform, haskellOption, readOutputFormat,
isLexicalCat, isLiteralCat, renameEncoding, isLexicalCat, isLiteralCat, renameEncoding, getEncoding, defaultEncoding,
-- * Setting specific options -- * Setting specific options
setOptimization, setCFGTransform, setOptimization, setCFGTransform,
-- * Convenience methods for checking options -- * Convenience methods for checking options
@@ -157,7 +157,7 @@ data Flags = Flags {
optRetainResource :: Bool, optRetainResource :: Bool,
optName :: Maybe String, optName :: Maybe String,
optPreprocessors :: [String], optPreprocessors :: [String],
optEncoding :: String, optEncoding :: Maybe String,
optPMCFG :: Bool, optPMCFG :: Bool,
optOptimizations :: Set Optimization, optOptimizations :: Set Optimization,
optOptimizePGF :: Bool, optOptimizePGF :: Bool,
@@ -213,7 +213,7 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o)
-- | Pretty-print the options that are preserved in .gfo files. -- | Pretty-print the options that are preserved in .gfo files.
optionsGFO :: Options -> [(String,Literal)] optionsGFO :: Options -> [(String,Literal)]
optionsGFO opts = optionsPGF opts optionsGFO opts = optionsPGF opts
++ [("coding", LStr (flag optEncoding opts))] ++ [("coding", LStr (getEncoding opts))]
-- | Pretty-print the options that are preserved in .pgf files. -- | Pretty-print the options that are preserved in .pgf files.
optionsPGF :: Options -> [(String,Literal)] optionsPGF :: Options -> [(String,Literal)]
@@ -241,6 +241,10 @@ concatOptions = foldr addOptions noOptions
modifyFlags :: (Flags -> Flags) -> Options modifyFlags :: (Flags -> Flags) -> Options
modifyFlags = Options modifyFlags = Options
getEncoding :: Options -> String
getEncoding = renameEncoding . maybe defaultEncoding id . flag optEncoding
defaultEncoding = "UTF-8"
-- Default options -- Default options
defaultFlags :: Flags defaultFlags :: Flags
@@ -264,7 +268,7 @@ defaultFlags = Flags {
optName = Nothing, optName = Nothing,
optPreprocessors = [], optPreprocessors = [],
optEncoding = "latin1", optEncoding = Nothing,
optPMCFG = True, optPMCFG = True,
optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize], optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize],
optOptimizePGF = False, optOptimizePGF = False,
@@ -419,7 +423,7 @@ optDescr =
addLibDir x = set $ \o -> o { optLibraryPath = x:optLibraryPath o } addLibDir x = set $ \o -> o { optLibraryPath = x:optLibraryPath o }
setLibPath x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x } setLibPath x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x }
preproc x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] } preproc x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] }
coding x = set $ \o -> o { optEncoding = x } coding x = set $ \o -> o { optEncoding = Just x }
startcat x = set $ \o -> o { optStartCat = Just x } startcat x = set $ \o -> o { optStartCat = Just x }
language x = set $ \o -> o { optSpeechLanguage = Just x } language x = set $ \o -> o { optSpeechLanguage = Just x }
lexer x = set $ \o -> o { optLexer = Just x } lexer x = set $ \o -> o { optLexer = Just x }

View File

@@ -31,7 +31,7 @@ encodeUnicode enc s =
(cbuf,bbuf) <- cod cbuf bbuf (cbuf,bbuf) <- cod cbuf bbuf
#endif #endif
if isEmptyBuffer bbuf if isEmptyBuffer bbuf
then ioe_invalidCharacter then ioe_invalidCharacter1
else do let bs = PS (bufRaw bbuf) (bufL bbuf) (bufR bbuf-bufL bbuf) else do let bs = PS (bufRaw bbuf) (bufL bbuf) (bufR bbuf-bufL bbuf)
bss <- translate cod cbuf bss <- translate cod cbuf
return (bs:bss) return (bs:bss)
@@ -41,8 +41,9 @@ encodeUnicode enc s =
w = bufR cbuf w = bufR cbuf
decodeUnicode :: TextEncoding -> ByteString -> String decodeUnicode :: TextEncoding -> ByteString -> String
decodeUnicode enc (PS fptr l len) = decodeUnicode enc bs = unsafePerformIO $ decodeUnicodeIO enc bs
unsafePerformIO $ do
decodeUnicodeIO enc (PS fptr l len) = do
let bbuf = Buffer{bufRaw=fptr, bufState=ReadBuffer, bufSize=len, bufL=l, bufR=l+len} let bbuf = Buffer{bufRaw=fptr, bufState=ReadBuffer, bufSize=len, bufL=l, bufR=l+len}
cbuf <- newCharBuffer 128 WriteBuffer cbuf <- newCharBuffer 128 WriteBuffer
case enc of case enc of
@@ -59,7 +60,7 @@ decodeUnicode enc (PS fptr l len) =
(bbuf,cbuf) <- cod bbuf cbuf (bbuf,cbuf) <- cod bbuf cbuf
#endif #endif
if isEmptyBuffer cbuf if isEmptyBuffer cbuf
then ioe_invalidCharacter then ioe_invalidCharacter2
else unpack cod bbuf cbuf else unpack cod bbuf cbuf
| otherwise = return [] | otherwise = return []
where where
@@ -75,6 +76,10 @@ decodeUnicode enc (PS fptr l len) =
i = bufL cbuf i = bufL cbuf
w = bufR cbuf w = bufR cbuf
ioe_invalidCharacter = ioException ioe_invalidCharacter1 = ioException
(IOError Nothing InvalidArgument "" (IOError Nothing InvalidArgument ""
("invalid byte sequence for this encoding") Nothing Nothing) ("invalid byte sequence for this encoding") Nothing Nothing)
ioe_invalidCharacter2 = ioException
(IOError Nothing InvalidArgument ""
("invalid byte sequence for this decoding") Nothing Nothing)

View File

@@ -24,7 +24,7 @@ import qualified Data.ByteString.Lazy as BSL
import System.FilePath import System.FilePath
import System.IO import System.IO
import Control.Exception import Control.Exception
import Control.Monad(unless)
mainGFC :: Options -> [FilePath] -> IO () mainGFC :: Options -> [FilePath] -> IO ()
mainGFC opts fs = do mainGFC opts fs = do
@@ -46,9 +46,8 @@ compileSourceFiles :: Options -> [FilePath] -> IOE ()
compileSourceFiles opts fs = compileSourceFiles opts fs =
do gr <- batchCompile opts fs do gr <- batchCompile opts fs
let cnc = justModuleName (last fs) let cnc = justModuleName (last fs)
if flag optStopAfterPhase opts == Compile unless (flag optStopAfterPhase opts == Compile) $
then return () do pgf <- link opts (identS cnc) gr
else do pgf <- link opts (identS cnc) gr
writePGF opts pgf writePGF opts pgf
writeByteCode opts pgf writeByteCode opts pgf
writeOutputs opts pgf writeOutputs opts pgf
@@ -57,11 +56,9 @@ compileCFFiles :: Options -> [FilePath] -> IOE ()
compileCFFiles opts fs = compileCFFiles opts fs =
do s <- liftIO $ fmap unlines $ mapM readFile fs do s <- liftIO $ fmap unlines $ mapM readFile fs
let cnc = justModuleName (last fs) let cnc = justModuleName (last fs)
gf <- getCF cnc s gr <- compileSourceGrammar opts =<< getCF cnc s
gr <- compileSourceGrammar opts gf unless (flag optStopAfterPhase opts == Compile) $
if flag optStopAfterPhase opts == Compile do pgf <- link opts (identS cnc) gr
then return ()
else do pgf <- link opts (identS cnc) gr
writePGF opts pgf writePGF opts pgf
writeOutputs opts pgf writeOutputs opts pgf