forked from GitHub/gf-core
Change how GF deals with character encodings in grammar files
1. The default encoding is changed from Latin-1 to UTF-8. 2. Alternate encodings should be specified as "--# -coding=enc", the old "flags coding=enc" declarations have no effect but are still checked for consistency. 3. A transitional warning is generated for files that contain non-ASCII characters without specifying a character encoding: "Warning: default encoding has changed from Latin-1 to UTF-8" 4. Conversion to Unicode is now done *before* lexing. This makes it possible to allow arbitrary Unicode characters in identifiers. But identifiers are still stored as ByteStrings, so they are limited to Latin-1 characters for now. 5. Lexer.hs is no longer part of the repository. We now generate the lexer from Lexer.x with alex>=3. Some workarounds for bugs in alex-3.0 were needed. These bugs might already be fixed in newer versions of alex, but we should be compatible with what is shipped in the Haskell Platform.
This commit is contained in:
6
gf.cabal
6
gf.cabal
@@ -1,5 +1,5 @@
|
||||
name: gf
|
||||
version: 3.5-darcs
|
||||
version: 3.5.11-darcs
|
||||
|
||||
cabal-version: >= 1.8
|
||||
build-type: Custom
|
||||
@@ -140,8 +140,8 @@ Executable gf
|
||||
if flag(new-comp)
|
||||
cpp-options: -DNEW_COMP
|
||||
|
||||
build-tools: happy
|
||||
--, alex>=2 && <3 -- tricky to install in Ubuntu 12.04
|
||||
build-tools: happy, alex>=3
|
||||
|
||||
if os(windows)
|
||||
build-depends: Win32
|
||||
else
|
||||
|
||||
@@ -18,40 +18,61 @@ import Prelude hiding (catch)
|
||||
|
||||
import GF.Data.Operations
|
||||
|
||||
import GF.System.Catch
|
||||
--import GF.System.Catch
|
||||
import GF.Infra.UseIO
|
||||
import GF.Infra.Option(Options,optPreprocessors,addOptions,optEncoding,flag,renameEncoding)
|
||||
import GF.Infra.Option(Options,optPreprocessors,addOptions,renameEncoding,optEncoding,flag,defaultEncoding)
|
||||
import GF.Grammar.Lexer
|
||||
import GF.Grammar.Parser
|
||||
import GF.Grammar.Grammar
|
||||
import GF.Compile.Coding
|
||||
--import GF.Compile.Coding
|
||||
import GF.Compile.ReadFiles(parseSource,lift)
|
||||
--import GF.Text.Coding(decodeUnicodeIO)
|
||||
|
||||
import qualified Data.ByteString.Char8 as BS
|
||||
import Control.Monad (foldM)
|
||||
import Data.Char(isAscii)
|
||||
import Control.Monad (foldM,when,unless)
|
||||
import System.Cmd (system)
|
||||
import System.IO(mkTextEncoding)
|
||||
--import System.IO(mkTextEncoding) --,utf8
|
||||
import System.Directory(removeFile)
|
||||
|
||||
getSourceModule :: Options -> FilePath -> IOE SourceModule
|
||||
getSourceModule opts file0 = ioe $
|
||||
do tmp <- foldM runPreprocessor (Source file0) (flag optPreprocessors opts)
|
||||
content <- keepTemp tmp
|
||||
case runP pModDef content of
|
||||
Left (Pn l c,msg) -> do file <- writeTemp tmp
|
||||
getSourceModule opts file0 =
|
||||
errIn file0 $
|
||||
do tmp <- lift $ foldM runPreprocessor (Source file0) (flag optPreprocessors opts)
|
||||
raw <- lift $ keepTemp tmp
|
||||
--ePutStrLn $ "1 "++file0
|
||||
(optCoding,parsed) <- parseSource opts pModDef raw
|
||||
case parsed of
|
||||
Left (Pn l c,msg) -> do file <- lift $ writeTemp tmp
|
||||
let location = file++":"++show l++":"++show c
|
||||
return (Bad (location++":\n "++msg))
|
||||
Right (i,mi00) ->
|
||||
do removeTemp tmp
|
||||
let mi0 =mi00 {mflags=mflags mi00 `addOptions` opts, msrc=file0}
|
||||
mi <- transcodeModule (i,mi0)
|
||||
return (Ok mi)
|
||||
`catch` (return . Bad . show)
|
||||
raise (location++":\n "++msg)
|
||||
Right (i,mi0) ->
|
||||
do lift $ removeTemp tmp
|
||||
let mi =mi0 {mflags=mflags mi0 `addOptions` opts, msrc=file0}
|
||||
optCoding' = renameEncoding `fmap` flag optEncoding (mflags mi0)
|
||||
case (optCoding,optCoding') of
|
||||
(Nothing,Nothing) ->
|
||||
unless (BS.all isAscii raw) $
|
||||
ePutStrLn $ file0++":\n Warning: default encoding has changed from Latin-1 to UTF-8"
|
||||
(_,Just coding') ->
|
||||
when (coding/=coding') $
|
||||
raise $ "Encoding mismatch: "++coding++" /= "++coding'
|
||||
where coding = maybe defaultEncoding renameEncoding optCoding
|
||||
_ -> return ()
|
||||
--lift $ transcodeModule' (i,mi) -- old lexer
|
||||
return (i,mi) -- new lexer
|
||||
|
||||
{-
|
||||
transcodeModule sm00 =
|
||||
do enc <- mkTextEncoding (renameEncoding (flag optEncoding (mflags (snd sm00))))
|
||||
do enc <- mkTextEncoding (getEncoding (mflags (snd sm00)))
|
||||
let sm = decodeStringsInModule enc sm00
|
||||
return sm
|
||||
|
||||
transcodeModule' sm00 =
|
||||
do let enc = utf8
|
||||
let sm = decodeStringsInModule enc sm00
|
||||
return sm
|
||||
-}
|
||||
|
||||
runPreprocessor :: Temporary -> String -> IO Temporary
|
||||
runPreprocessor tmp0 p =
|
||||
|
||||
@@ -21,7 +21,8 @@
|
||||
module GF.Compile.ReadFiles
|
||||
( getAllFiles,ModName,ModEnv,importsOfModule,
|
||||
gfoFile,gfFile,isGFO,gf2gfo,
|
||||
getOptionsFromFile) where
|
||||
parseSource,lift,
|
||||
getOptionsFromFile,getPragmas) where
|
||||
|
||||
import Prelude hiding (catch)
|
||||
import GF.System.Catch
|
||||
@@ -34,6 +35,10 @@ import GF.Grammar.Parser
|
||||
import GF.Grammar.Grammar
|
||||
import GF.Grammar.Binary
|
||||
|
||||
import System.IO(mkTextEncoding)
|
||||
import qualified Data.ByteString.UTF8 as UTF8
|
||||
import GF.Text.Coding(decodeUnicodeIO)
|
||||
|
||||
import Control.Monad
|
||||
import Data.Maybe(isJust)
|
||||
import qualified Data.ByteString.Char8 as BS
|
||||
@@ -50,7 +55,7 @@ type ModEnv = Map.Map ModName (UTCTime,[ModName])
|
||||
|
||||
-- | Returns a list of all files to be compiled in topological order i.e.
|
||||
-- the low level (leaf) modules are first.
|
||||
getAllFiles :: (MonadIO m,ErrorMonad m) => Options -> [InitPath] -> ModEnv -> FileName -> m [FullPath]
|
||||
--getAllFiles :: (MonadIO m,ErrorMonad m) => Options -> [InitPath] -> ModEnv -> FileName -> m [FullPath]
|
||||
getAllFiles opts ps env file = do
|
||||
-- read module headers from all files recursively
|
||||
ds <- liftM reverse $ get [] [] (justModuleName file)
|
||||
@@ -117,14 +122,10 @@ getAllFiles opts ps env file = do
|
||||
Just mo -> return (st,importsOfModule mo)
|
||||
Nothing
|
||||
| isGFO file -> raise (file ++ " is compiled with different GF version and I can't find the source file")
|
||||
| otherwise -> do s <- liftIO $ BS.readFile file
|
||||
case runP pModHeader s of
|
||||
Left (Pn l c,msg) -> raise (file ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg)
|
||||
Right mo -> return (CSComp,importsOfModule mo)
|
||||
CSComp -> do s <- liftIO $ BS.readFile file
|
||||
case runP pModHeader s of
|
||||
Left (Pn l c,msg) -> raise (file ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg)
|
||||
Right mo -> return (st,importsOfModule mo)
|
||||
| otherwise -> do mo <- parseModHeader opts file
|
||||
return (CSComp,importsOfModule mo)
|
||||
CSComp -> do mo <- parseModHeader opts file
|
||||
return (st,importsOfModule mo)
|
||||
testErr (mname == name)
|
||||
("module name" +++ mname +++ "differs from file name" +++ name)
|
||||
return (name,st,t,isJust gfTime,imps,dropFileName file)
|
||||
@@ -209,17 +210,55 @@ importsOfModule (m,mi) = (modName m,depModInfo mi [])
|
||||
|
||||
modName = showIdent
|
||||
|
||||
|
||||
parseModHeader opts file =
|
||||
do --ePutStrLn file
|
||||
(_,parsed) <- parseSource opts pModHeader =<< lift (BS.readFile file)
|
||||
case parsed of
|
||||
Right mo -> return mo
|
||||
Left (Pn l c,msg) ->
|
||||
raise (file ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg)
|
||||
|
||||
parseSource opts p raw =
|
||||
do (coding,utf8) <- toUTF8 opts raw
|
||||
return (coding,runP p utf8)
|
||||
|
||||
toUTF8 opts0 raw =
|
||||
do opts <- getPragmas raw
|
||||
let given = flag optEncoding opts -- explicitly given encoding
|
||||
coding = getEncoding $ opts0 `addOptions` opts
|
||||
utf8 <- if coding=="UTF-8"
|
||||
then return raw
|
||||
else lift $ do --ePutStrLn $ "toUTF8 from "++coding
|
||||
enc <- mkTextEncoding coding
|
||||
-- decodeUnicodeIO uses a lot of stack space,
|
||||
-- so we need to split the file into smaller pieces
|
||||
ls <- mapM (decodeUnicodeIO enc) (BS.lines raw)
|
||||
return $ UTF8.fromString (unlines ls)
|
||||
return (given,utf8)
|
||||
|
||||
--lift io = ioe (fmap Ok io `catch` (return . Bad . show))
|
||||
lift io = liftIO io
|
||||
|
||||
-- | options can be passed to the compiler by comments in @--#@, in the main file
|
||||
getOptionsFromFile :: (MonadIO m,ErrorMonad m) => FilePath -> m Options
|
||||
getOptionsFromFile file = do
|
||||
s <- either (\_ -> raise $ "File " ++ file ++ " does not exist") return =<<
|
||||
liftIO (try $ BS.readFile file)
|
||||
let ls = filter (BS.isPrefixOf (BS.pack "--#")) $ BS.lines s
|
||||
fs = map (BS.unpack . BS.unwords . BS.words . BS.drop 3) ls
|
||||
parseModuleOptions fs
|
||||
opts <- getPragmas s
|
||||
-- The coding flag should not be inherited by other files
|
||||
return (addOptions opts (modifyFlags $ \ f -> f{optEncoding=Nothing}))
|
||||
|
||||
getFilePath :: MonadIO m => [FilePath] -> String -> m (Maybe FilePath)
|
||||
getFilePath paths file = liftIO $ get paths
|
||||
|
||||
getPragmas :: (ErrorMonad m) => BS.ByteString -> m Options
|
||||
getPragmas = parseModuleOptions .
|
||||
map (BS.unpack . BS.unwords . BS.words . BS.drop 3) .
|
||||
filter (BS.isPrefixOf (BS.pack "--#")) . BS.lines
|
||||
|
||||
--getFilePath :: MonadIO m => [FilePath] -> String -> m (Maybe FilePath)
|
||||
getFilePath paths file =
|
||||
liftIO $ do --ePutStrLn $ "getFilePath "++show paths++" "++show file
|
||||
get paths
|
||||
where
|
||||
get [] = return Nothing
|
||||
get (p:ps) = do
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,3 +1,4 @@
|
||||
-- -*- haskell -*-
|
||||
{
|
||||
module GF.Grammar.Lexer
|
||||
( Token(..), Posn(..)
|
||||
@@ -6,9 +7,14 @@ module GF.Grammar.Lexer
|
||||
) where
|
||||
|
||||
import GF.Infra.Ident
|
||||
--import GF.Data.Operations
|
||||
import qualified Data.ByteString.Char8 as BS
|
||||
import qualified Data.ByteString as WBS
|
||||
import qualified Data.ByteString.Internal as BS(w2c)
|
||||
import qualified Data.ByteString.UTF8 as UTF8
|
||||
import qualified Data.Map as Map
|
||||
|
||||
import Data.Word(Word8)
|
||||
--import Debug.Trace(trace)
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +23,7 @@ $c = [A-Z\192-\221] # [\215]
|
||||
$s = [a-z\222-\255] # [\247]
|
||||
$d = [0-9] -- digit
|
||||
$i = [$l $d _ '] -- identifier character
|
||||
$u = [\0-\255] -- universal: any character
|
||||
$u = [.\n] -- universal: any character
|
||||
|
||||
@rsyms = -- symbols and non-identifier-like reserved words
|
||||
\; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/
|
||||
@@ -27,17 +33,20 @@ $u = [\0-\255] -- universal: any character
|
||||
"{-" ([$u # \-] | \- [$u # \}])* ("-")+ "}" ;
|
||||
|
||||
$white+ ;
|
||||
@rsyms { tok (eitherResIdent (T_Ident . identC . rawIdentC)) }
|
||||
\' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (eitherResIdent (T_Ident . identC . rawIdentS . unescapeInitTail . BS.unpack)) }
|
||||
(\_ | $l)($l | $d | \_ | \')* { tok (eitherResIdent (T_Ident . identC . rawIdentC)) }
|
||||
@rsyms { tok (res (T_Ident . identS)) }
|
||||
\' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (eitherResIdent (T_Ident . identC . rawIdentS . unescapeInitTail . unpack)) }
|
||||
(\_ | $l)($l | $d | \_ | \')* { tok (res (T_Ident . identS)) }
|
||||
|
||||
\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . BS.unpack) }
|
||||
\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . unpack) }
|
||||
|
||||
(\-)? $d+ { tok (T_Integer . read . BS.unpack) }
|
||||
(\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . BS.unpack) }
|
||||
(\-)? $d+ { tok (T_Integer . read . unpack) }
|
||||
(\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . unpack) }
|
||||
|
||||
{
|
||||
--unpack = BS.unpack
|
||||
unpack = id
|
||||
|
||||
tok :: (String->Token) -> Posn -> String -> Token
|
||||
tok f p s = f s
|
||||
|
||||
data Token
|
||||
@@ -114,15 +123,17 @@ data Token
|
||||
| T_Double Double -- double precision float literals
|
||||
| T_Ident Ident
|
||||
| T_EOF
|
||||
-- deriving Show -- debug
|
||||
|
||||
eitherResIdent :: (BS.ByteString -> Token) -> BS.ByteString -> Token
|
||||
res = eitherResIdent
|
||||
eitherResIdent :: (String -> Token) -> String -> Token
|
||||
eitherResIdent tv s =
|
||||
case Map.lookup s resWords of
|
||||
Just t -> t
|
||||
Nothing -> tv s
|
||||
|
||||
isReservedWord :: BS.ByteString -> Bool
|
||||
isReservedWord s = Map.member s resWords
|
||||
isReservedWord s = Map.member (BS.unpack s) resWords
|
||||
|
||||
resWords = Map.fromList
|
||||
[ b "!" T_exclmark
|
||||
@@ -194,7 +205,7 @@ resWords = Map.fromList
|
||||
, b "where" T_where
|
||||
, b "with" T_with
|
||||
]
|
||||
where b s t = (BS.pack s, t)
|
||||
where b s t = (s, t)
|
||||
|
||||
unescapeInitTail :: String -> String
|
||||
unescapeInitTail = unesc . tail where
|
||||
@@ -202,7 +213,7 @@ unescapeInitTail = unesc . tail where
|
||||
'\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
|
||||
'\\':'n':cs -> '\n' : unesc cs
|
||||
'\\':'t':cs -> '\t' : unesc cs
|
||||
'"':[] -> []
|
||||
'\"':[] -> []
|
||||
'\'':[] -> []
|
||||
c:cs -> c : unesc cs
|
||||
_ -> []
|
||||
@@ -219,13 +230,14 @@ alexMove :: Posn -> Char -> Posn
|
||||
alexMove (Pn l c) '\n' = Pn (l+1) 1
|
||||
alexMove (Pn l c) _ = Pn l (c+1)
|
||||
|
||||
alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
|
||||
alexGetChar (AI p _ s) =
|
||||
case BS.uncons s of
|
||||
alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
|
||||
alexGetByte (AI p _ s) =
|
||||
case WBS.uncons s of
|
||||
Nothing -> Nothing
|
||||
Just (c,s) ->
|
||||
Just (w,s) ->
|
||||
let p' = alexMove p c
|
||||
in p' `seq` Just (c, (AI p' c s))
|
||||
c = BS.w2c w
|
||||
in p' `seq` Just (w, (AI p' c s))
|
||||
|
||||
alexInputPrevChar :: AlexInput -> Char
|
||||
alexInputPrevChar (AI p c s) = c
|
||||
@@ -260,12 +272,13 @@ failLoc pos msg = P $ \_ -> PFailed pos msg
|
||||
lexer :: (Token -> P a) -> P a
|
||||
lexer cont = P go
|
||||
where
|
||||
--cont' t = trace (show t) (cont t)
|
||||
go inp@(AI pos _ str) =
|
||||
case alexScan inp 0 of
|
||||
AlexEOF -> unP (cont T_EOF) inp
|
||||
AlexError (AI pos _ _) -> PFailed pos "lexical error"
|
||||
AlexSkip inp' len -> go inp'
|
||||
AlexToken inp' len act -> unP (cont (act pos (BS.take len str))) inp'
|
||||
AlexSkip inp' len -> {-trace (show len) $-} go inp'
|
||||
AlexToken inp' len act -> unP (cont (act pos (UTF8.toString (UTF8.take len str)))) inp'
|
||||
|
||||
getPosn :: P Posn
|
||||
getPosn = P $ \inp@(AI pos _ _) -> POk pos
|
||||
@@ -17,8 +17,8 @@ import GF.Grammar.Grammar
|
||||
import GF.Grammar.Macros
|
||||
import GF.Grammar.Lexer
|
||||
import GF.Compile.Update (buildAnyTree)
|
||||
import Codec.Binary.UTF8.String(decodeString)
|
||||
import Data.Char(toLower)
|
||||
--import Codec.Binary.UTF8.String(decodeString)
|
||||
--import Data.Char(toLower)
|
||||
}
|
||||
|
||||
%name pModDef ModDef
|
||||
@@ -616,9 +616,9 @@ happyError = fail "syntax error"
|
||||
|
||||
-- Quick fix to render error messages from UTF-8-encoded source files correctly.
|
||||
optDecode opts =
|
||||
if map toLower (flag optEncoding opts) `elem` ["utf8","utf-8"]
|
||||
{-if map toLower (getEncoding opts) `elem` ["utf8","utf-8"]
|
||||
then decodeString
|
||||
else id
|
||||
else-} id
|
||||
|
||||
mkListId,mkConsId,mkBaseId :: Ident -> Ident
|
||||
mkListId = prefixIdent "List"
|
||||
|
||||
@@ -20,7 +20,7 @@ module GF.Infra.Option
|
||||
helpMessage,
|
||||
-- * Checking specific options
|
||||
flag, cfgTransform, haskellOption, readOutputFormat,
|
||||
isLexicalCat, isLiteralCat, renameEncoding,
|
||||
isLexicalCat, isLiteralCat, renameEncoding, getEncoding, defaultEncoding,
|
||||
-- * Setting specific options
|
||||
setOptimization, setCFGTransform,
|
||||
-- * Convenience methods for checking options
|
||||
@@ -157,7 +157,7 @@ data Flags = Flags {
|
||||
optRetainResource :: Bool,
|
||||
optName :: Maybe String,
|
||||
optPreprocessors :: [String],
|
||||
optEncoding :: String,
|
||||
optEncoding :: Maybe String,
|
||||
optPMCFG :: Bool,
|
||||
optOptimizations :: Set Optimization,
|
||||
optOptimizePGF :: Bool,
|
||||
@@ -213,7 +213,7 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o)
|
||||
-- | Pretty-print the options that are preserved in .gfo files.
|
||||
optionsGFO :: Options -> [(String,Literal)]
|
||||
optionsGFO opts = optionsPGF opts
|
||||
++ [("coding", LStr (flag optEncoding opts))]
|
||||
++ [("coding", LStr (getEncoding opts))]
|
||||
|
||||
-- | Pretty-print the options that are preserved in .pgf files.
|
||||
optionsPGF :: Options -> [(String,Literal)]
|
||||
@@ -241,6 +241,10 @@ concatOptions = foldr addOptions noOptions
|
||||
modifyFlags :: (Flags -> Flags) -> Options
|
||||
modifyFlags = Options
|
||||
|
||||
getEncoding :: Options -> String
|
||||
getEncoding = renameEncoding . maybe defaultEncoding id . flag optEncoding
|
||||
defaultEncoding = "UTF-8"
|
||||
|
||||
-- Default options
|
||||
|
||||
defaultFlags :: Flags
|
||||
@@ -264,7 +268,7 @@ defaultFlags = Flags {
|
||||
|
||||
optName = Nothing,
|
||||
optPreprocessors = [],
|
||||
optEncoding = "latin1",
|
||||
optEncoding = Nothing,
|
||||
optPMCFG = True,
|
||||
optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize],
|
||||
optOptimizePGF = False,
|
||||
@@ -419,7 +423,7 @@ optDescr =
|
||||
addLibDir x = set $ \o -> o { optLibraryPath = x:optLibraryPath o }
|
||||
setLibPath x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x }
|
||||
preproc x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] }
|
||||
coding x = set $ \o -> o { optEncoding = x }
|
||||
coding x = set $ \o -> o { optEncoding = Just x }
|
||||
startcat x = set $ \o -> o { optStartCat = Just x }
|
||||
language x = set $ \o -> o { optSpeechLanguage = Just x }
|
||||
lexer x = set $ \o -> o { optLexer = Just x }
|
||||
|
||||
@@ -31,7 +31,7 @@ encodeUnicode enc s =
|
||||
(cbuf,bbuf) <- cod cbuf bbuf
|
||||
#endif
|
||||
if isEmptyBuffer bbuf
|
||||
then ioe_invalidCharacter
|
||||
then ioe_invalidCharacter1
|
||||
else do let bs = PS (bufRaw bbuf) (bufL bbuf) (bufR bbuf-bufL bbuf)
|
||||
bss <- translate cod cbuf
|
||||
return (bs:bss)
|
||||
@@ -41,8 +41,9 @@ encodeUnicode enc s =
|
||||
w = bufR cbuf
|
||||
|
||||
decodeUnicode :: TextEncoding -> ByteString -> String
|
||||
decodeUnicode enc (PS fptr l len) =
|
||||
unsafePerformIO $ do
|
||||
decodeUnicode enc bs = unsafePerformIO $ decodeUnicodeIO enc bs
|
||||
|
||||
decodeUnicodeIO enc (PS fptr l len) = do
|
||||
let bbuf = Buffer{bufRaw=fptr, bufState=ReadBuffer, bufSize=len, bufL=l, bufR=l+len}
|
||||
cbuf <- newCharBuffer 128 WriteBuffer
|
||||
case enc of
|
||||
@@ -59,7 +60,7 @@ decodeUnicode enc (PS fptr l len) =
|
||||
(bbuf,cbuf) <- cod bbuf cbuf
|
||||
#endif
|
||||
if isEmptyBuffer cbuf
|
||||
then ioe_invalidCharacter
|
||||
then ioe_invalidCharacter2
|
||||
else unpack cod bbuf cbuf
|
||||
| otherwise = return []
|
||||
where
|
||||
@@ -75,6 +76,10 @@ decodeUnicode enc (PS fptr l len) =
|
||||
i = bufL cbuf
|
||||
w = bufR cbuf
|
||||
|
||||
ioe_invalidCharacter = ioException
|
||||
ioe_invalidCharacter1 = ioException
|
||||
(IOError Nothing InvalidArgument ""
|
||||
("invalid byte sequence for this encoding") Nothing Nothing)
|
||||
|
||||
ioe_invalidCharacter2 = ioException
|
||||
(IOError Nothing InvalidArgument ""
|
||||
("invalid byte sequence for this decoding") Nothing Nothing)
|
||||
|
||||
@@ -24,7 +24,7 @@ import qualified Data.ByteString.Lazy as BSL
|
||||
import System.FilePath
|
||||
import System.IO
|
||||
import Control.Exception
|
||||
|
||||
import Control.Monad(unless)
|
||||
|
||||
mainGFC :: Options -> [FilePath] -> IO ()
|
||||
mainGFC opts fs = do
|
||||
@@ -46,9 +46,8 @@ compileSourceFiles :: Options -> [FilePath] -> IOE ()
|
||||
compileSourceFiles opts fs =
|
||||
do gr <- batchCompile opts fs
|
||||
let cnc = justModuleName (last fs)
|
||||
if flag optStopAfterPhase opts == Compile
|
||||
then return ()
|
||||
else do pgf <- link opts (identS cnc) gr
|
||||
unless (flag optStopAfterPhase opts == Compile) $
|
||||
do pgf <- link opts (identS cnc) gr
|
||||
writePGF opts pgf
|
||||
writeByteCode opts pgf
|
||||
writeOutputs opts pgf
|
||||
@@ -57,11 +56,9 @@ compileCFFiles :: Options -> [FilePath] -> IOE ()
|
||||
compileCFFiles opts fs =
|
||||
do s <- liftIO $ fmap unlines $ mapM readFile fs
|
||||
let cnc = justModuleName (last fs)
|
||||
gf <- getCF cnc s
|
||||
gr <- compileSourceGrammar opts gf
|
||||
if flag optStopAfterPhase opts == Compile
|
||||
then return ()
|
||||
else do pgf <- link opts (identS cnc) gr
|
||||
gr <- compileSourceGrammar opts =<< getCF cnc s
|
||||
unless (flag optStopAfterPhase opts == Compile) $
|
||||
do pgf <- link opts (identS cnc) gr
|
||||
writePGF opts pgf
|
||||
writeOutputs opts pgf
|
||||
|
||||
|
||||
Reference in New Issue
Block a user