forked from GitHub/gf-core
Change how GF deals with character encodings in grammar files
1. The default encoding is changed from Latin-1 to UTF-8. 2. Alternate encodings should be specified as "--# -coding=enc", the old "flags coding=enc" declarations have no effect but are still checked for consistency. 3. A transitional warning is generated for files that contain non-ASCII characters without specifying a character encoding: "Warning: default encoding has changed from Latin-1 to UTF-8" 4. Conversion to Unicode is now done *before* lexing. This makes it possible to allow arbitrary Unicode characters in identifiers. But identifiers are still stored as ByteStrings, so they are limited to Latin-1 characters for now. 5. Lexer.hs is no longer part of the repository. We now generate the lexer from Lexer.x with alex>=3. Some workarounds for bugs in alex-3.0 were needed. These bugs might already be fixed in newer versions of alex, but we should be compatible with what is shipped in the Haskell Platform.
This commit is contained in:
6
gf.cabal
6
gf.cabal
@@ -1,5 +1,5 @@
|
|||||||
name: gf
|
name: gf
|
||||||
version: 3.5-darcs
|
version: 3.5.11-darcs
|
||||||
|
|
||||||
cabal-version: >= 1.8
|
cabal-version: >= 1.8
|
||||||
build-type: Custom
|
build-type: Custom
|
||||||
@@ -140,8 +140,8 @@ Executable gf
|
|||||||
if flag(new-comp)
|
if flag(new-comp)
|
||||||
cpp-options: -DNEW_COMP
|
cpp-options: -DNEW_COMP
|
||||||
|
|
||||||
build-tools: happy
|
build-tools: happy, alex>=3
|
||||||
--, alex>=2 && <3 -- tricky to install in Ubuntu 12.04
|
|
||||||
if os(windows)
|
if os(windows)
|
||||||
build-depends: Win32
|
build-depends: Win32
|
||||||
else
|
else
|
||||||
|
|||||||
@@ -18,40 +18,61 @@ import Prelude hiding (catch)
|
|||||||
|
|
||||||
import GF.Data.Operations
|
import GF.Data.Operations
|
||||||
|
|
||||||
import GF.System.Catch
|
--import GF.System.Catch
|
||||||
import GF.Infra.UseIO
|
import GF.Infra.UseIO
|
||||||
import GF.Infra.Option(Options,optPreprocessors,addOptions,optEncoding,flag,renameEncoding)
|
import GF.Infra.Option(Options,optPreprocessors,addOptions,renameEncoding,optEncoding,flag,defaultEncoding)
|
||||||
import GF.Grammar.Lexer
|
import GF.Grammar.Lexer
|
||||||
import GF.Grammar.Parser
|
import GF.Grammar.Parser
|
||||||
import GF.Grammar.Grammar
|
import GF.Grammar.Grammar
|
||||||
import GF.Compile.Coding
|
--import GF.Compile.Coding
|
||||||
|
import GF.Compile.ReadFiles(parseSource,lift)
|
||||||
|
--import GF.Text.Coding(decodeUnicodeIO)
|
||||||
|
|
||||||
import qualified Data.ByteString.Char8 as BS
|
import qualified Data.ByteString.Char8 as BS
|
||||||
import Control.Monad (foldM)
|
import Data.Char(isAscii)
|
||||||
|
import Control.Monad (foldM,when,unless)
|
||||||
import System.Cmd (system)
|
import System.Cmd (system)
|
||||||
import System.IO(mkTextEncoding)
|
--import System.IO(mkTextEncoding) --,utf8
|
||||||
import System.Directory(removeFile)
|
import System.Directory(removeFile)
|
||||||
|
|
||||||
getSourceModule :: Options -> FilePath -> IOE SourceModule
|
getSourceModule :: Options -> FilePath -> IOE SourceModule
|
||||||
getSourceModule opts file0 = ioe $
|
getSourceModule opts file0 =
|
||||||
do tmp <- foldM runPreprocessor (Source file0) (flag optPreprocessors opts)
|
errIn file0 $
|
||||||
content <- keepTemp tmp
|
do tmp <- lift $ foldM runPreprocessor (Source file0) (flag optPreprocessors opts)
|
||||||
case runP pModDef content of
|
raw <- lift $ keepTemp tmp
|
||||||
Left (Pn l c,msg) -> do file <- writeTemp tmp
|
--ePutStrLn $ "1 "++file0
|
||||||
|
(optCoding,parsed) <- parseSource opts pModDef raw
|
||||||
|
case parsed of
|
||||||
|
Left (Pn l c,msg) -> do file <- lift $ writeTemp tmp
|
||||||
let location = file++":"++show l++":"++show c
|
let location = file++":"++show l++":"++show c
|
||||||
return (Bad (location++":\n "++msg))
|
raise (location++":\n "++msg)
|
||||||
Right (i,mi00) ->
|
Right (i,mi0) ->
|
||||||
do removeTemp tmp
|
do lift $ removeTemp tmp
|
||||||
let mi0 =mi00 {mflags=mflags mi00 `addOptions` opts, msrc=file0}
|
let mi =mi0 {mflags=mflags mi0 `addOptions` opts, msrc=file0}
|
||||||
mi <- transcodeModule (i,mi0)
|
optCoding' = renameEncoding `fmap` flag optEncoding (mflags mi0)
|
||||||
return (Ok mi)
|
case (optCoding,optCoding') of
|
||||||
`catch` (return . Bad . show)
|
(Nothing,Nothing) ->
|
||||||
|
unless (BS.all isAscii raw) $
|
||||||
|
ePutStrLn $ file0++":\n Warning: default encoding has changed from Latin-1 to UTF-8"
|
||||||
|
(_,Just coding') ->
|
||||||
|
when (coding/=coding') $
|
||||||
|
raise $ "Encoding mismatch: "++coding++" /= "++coding'
|
||||||
|
where coding = maybe defaultEncoding renameEncoding optCoding
|
||||||
|
_ -> return ()
|
||||||
|
--lift $ transcodeModule' (i,mi) -- old lexer
|
||||||
|
return (i,mi) -- new lexer
|
||||||
|
|
||||||
|
{-
|
||||||
transcodeModule sm00 =
|
transcodeModule sm00 =
|
||||||
do enc <- mkTextEncoding (renameEncoding (flag optEncoding (mflags (snd sm00))))
|
do enc <- mkTextEncoding (getEncoding (mflags (snd sm00)))
|
||||||
let sm = decodeStringsInModule enc sm00
|
let sm = decodeStringsInModule enc sm00
|
||||||
return sm
|
return sm
|
||||||
|
|
||||||
|
transcodeModule' sm00 =
|
||||||
|
do let enc = utf8
|
||||||
|
let sm = decodeStringsInModule enc sm00
|
||||||
|
return sm
|
||||||
|
-}
|
||||||
|
|
||||||
runPreprocessor :: Temporary -> String -> IO Temporary
|
runPreprocessor :: Temporary -> String -> IO Temporary
|
||||||
runPreprocessor tmp0 p =
|
runPreprocessor tmp0 p =
|
||||||
|
|||||||
@@ -21,7 +21,8 @@
|
|||||||
module GF.Compile.ReadFiles
|
module GF.Compile.ReadFiles
|
||||||
( getAllFiles,ModName,ModEnv,importsOfModule,
|
( getAllFiles,ModName,ModEnv,importsOfModule,
|
||||||
gfoFile,gfFile,isGFO,gf2gfo,
|
gfoFile,gfFile,isGFO,gf2gfo,
|
||||||
getOptionsFromFile) where
|
parseSource,lift,
|
||||||
|
getOptionsFromFile,getPragmas) where
|
||||||
|
|
||||||
import Prelude hiding (catch)
|
import Prelude hiding (catch)
|
||||||
import GF.System.Catch
|
import GF.System.Catch
|
||||||
@@ -34,6 +35,10 @@ import GF.Grammar.Parser
|
|||||||
import GF.Grammar.Grammar
|
import GF.Grammar.Grammar
|
||||||
import GF.Grammar.Binary
|
import GF.Grammar.Binary
|
||||||
|
|
||||||
|
import System.IO(mkTextEncoding)
|
||||||
|
import qualified Data.ByteString.UTF8 as UTF8
|
||||||
|
import GF.Text.Coding(decodeUnicodeIO)
|
||||||
|
|
||||||
import Control.Monad
|
import Control.Monad
|
||||||
import Data.Maybe(isJust)
|
import Data.Maybe(isJust)
|
||||||
import qualified Data.ByteString.Char8 as BS
|
import qualified Data.ByteString.Char8 as BS
|
||||||
@@ -50,7 +55,7 @@ type ModEnv = Map.Map ModName (UTCTime,[ModName])
|
|||||||
|
|
||||||
-- | Returns a list of all files to be compiled in topological order i.e.
|
-- | Returns a list of all files to be compiled in topological order i.e.
|
||||||
-- the low level (leaf) modules are first.
|
-- the low level (leaf) modules are first.
|
||||||
getAllFiles :: (MonadIO m,ErrorMonad m) => Options -> [InitPath] -> ModEnv -> FileName -> m [FullPath]
|
--getAllFiles :: (MonadIO m,ErrorMonad m) => Options -> [InitPath] -> ModEnv -> FileName -> m [FullPath]
|
||||||
getAllFiles opts ps env file = do
|
getAllFiles opts ps env file = do
|
||||||
-- read module headers from all files recursively
|
-- read module headers from all files recursively
|
||||||
ds <- liftM reverse $ get [] [] (justModuleName file)
|
ds <- liftM reverse $ get [] [] (justModuleName file)
|
||||||
@@ -117,14 +122,10 @@ getAllFiles opts ps env file = do
|
|||||||
Just mo -> return (st,importsOfModule mo)
|
Just mo -> return (st,importsOfModule mo)
|
||||||
Nothing
|
Nothing
|
||||||
| isGFO file -> raise (file ++ " is compiled with different GF version and I can't find the source file")
|
| isGFO file -> raise (file ++ " is compiled with different GF version and I can't find the source file")
|
||||||
| otherwise -> do s <- liftIO $ BS.readFile file
|
| otherwise -> do mo <- parseModHeader opts file
|
||||||
case runP pModHeader s of
|
return (CSComp,importsOfModule mo)
|
||||||
Left (Pn l c,msg) -> raise (file ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg)
|
CSComp -> do mo <- parseModHeader opts file
|
||||||
Right mo -> return (CSComp,importsOfModule mo)
|
return (st,importsOfModule mo)
|
||||||
CSComp -> do s <- liftIO $ BS.readFile file
|
|
||||||
case runP pModHeader s of
|
|
||||||
Left (Pn l c,msg) -> raise (file ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg)
|
|
||||||
Right mo -> return (st,importsOfModule mo)
|
|
||||||
testErr (mname == name)
|
testErr (mname == name)
|
||||||
("module name" +++ mname +++ "differs from file name" +++ name)
|
("module name" +++ mname +++ "differs from file name" +++ name)
|
||||||
return (name,st,t,isJust gfTime,imps,dropFileName file)
|
return (name,st,t,isJust gfTime,imps,dropFileName file)
|
||||||
@@ -209,17 +210,55 @@ importsOfModule (m,mi) = (modName m,depModInfo mi [])
|
|||||||
|
|
||||||
modName = showIdent
|
modName = showIdent
|
||||||
|
|
||||||
|
|
||||||
|
parseModHeader opts file =
|
||||||
|
do --ePutStrLn file
|
||||||
|
(_,parsed) <- parseSource opts pModHeader =<< lift (BS.readFile file)
|
||||||
|
case parsed of
|
||||||
|
Right mo -> return mo
|
||||||
|
Left (Pn l c,msg) ->
|
||||||
|
raise (file ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg)
|
||||||
|
|
||||||
|
parseSource opts p raw =
|
||||||
|
do (coding,utf8) <- toUTF8 opts raw
|
||||||
|
return (coding,runP p utf8)
|
||||||
|
|
||||||
|
toUTF8 opts0 raw =
|
||||||
|
do opts <- getPragmas raw
|
||||||
|
let given = flag optEncoding opts -- explicitly given encoding
|
||||||
|
coding = getEncoding $ opts0 `addOptions` opts
|
||||||
|
utf8 <- if coding=="UTF-8"
|
||||||
|
then return raw
|
||||||
|
else lift $ do --ePutStrLn $ "toUTF8 from "++coding
|
||||||
|
enc <- mkTextEncoding coding
|
||||||
|
-- decodeUnicodeIO uses a lot of stack space,
|
||||||
|
-- so we need to split the file into smaller pieces
|
||||||
|
ls <- mapM (decodeUnicodeIO enc) (BS.lines raw)
|
||||||
|
return $ UTF8.fromString (unlines ls)
|
||||||
|
return (given,utf8)
|
||||||
|
|
||||||
|
--lift io = ioe (fmap Ok io `catch` (return . Bad . show))
|
||||||
|
lift io = liftIO io
|
||||||
|
|
||||||
-- | options can be passed to the compiler by comments in @--#@, in the main file
|
-- | options can be passed to the compiler by comments in @--#@, in the main file
|
||||||
getOptionsFromFile :: (MonadIO m,ErrorMonad m) => FilePath -> m Options
|
getOptionsFromFile :: (MonadIO m,ErrorMonad m) => FilePath -> m Options
|
||||||
getOptionsFromFile file = do
|
getOptionsFromFile file = do
|
||||||
s <- either (\_ -> raise $ "File " ++ file ++ " does not exist") return =<<
|
s <- either (\_ -> raise $ "File " ++ file ++ " does not exist") return =<<
|
||||||
liftIO (try $ BS.readFile file)
|
liftIO (try $ BS.readFile file)
|
||||||
let ls = filter (BS.isPrefixOf (BS.pack "--#")) $ BS.lines s
|
opts <- getPragmas s
|
||||||
fs = map (BS.unpack . BS.unwords . BS.words . BS.drop 3) ls
|
-- The coding flag should not be inherited by other files
|
||||||
parseModuleOptions fs
|
return (addOptions opts (modifyFlags $ \ f -> f{optEncoding=Nothing}))
|
||||||
|
|
||||||
getFilePath :: MonadIO m => [FilePath] -> String -> m (Maybe FilePath)
|
|
||||||
getFilePath paths file = liftIO $ get paths
|
getPragmas :: (ErrorMonad m) => BS.ByteString -> m Options
|
||||||
|
getPragmas = parseModuleOptions .
|
||||||
|
map (BS.unpack . BS.unwords . BS.words . BS.drop 3) .
|
||||||
|
filter (BS.isPrefixOf (BS.pack "--#")) . BS.lines
|
||||||
|
|
||||||
|
--getFilePath :: MonadIO m => [FilePath] -> String -> m (Maybe FilePath)
|
||||||
|
getFilePath paths file =
|
||||||
|
liftIO $ do --ePutStrLn $ "getFilePath "++show paths++" "++show file
|
||||||
|
get paths
|
||||||
where
|
where
|
||||||
get [] = return Nothing
|
get [] = return Nothing
|
||||||
get (p:ps) = do
|
get (p:ps) = do
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -1,3 +1,4 @@
|
|||||||
|
-- -*- haskell -*-
|
||||||
{
|
{
|
||||||
module GF.Grammar.Lexer
|
module GF.Grammar.Lexer
|
||||||
( Token(..), Posn(..)
|
( Token(..), Posn(..)
|
||||||
@@ -6,9 +7,14 @@ module GF.Grammar.Lexer
|
|||||||
) where
|
) where
|
||||||
|
|
||||||
import GF.Infra.Ident
|
import GF.Infra.Ident
|
||||||
|
--import GF.Data.Operations
|
||||||
import qualified Data.ByteString.Char8 as BS
|
import qualified Data.ByteString.Char8 as BS
|
||||||
|
import qualified Data.ByteString as WBS
|
||||||
|
import qualified Data.ByteString.Internal as BS(w2c)
|
||||||
|
import qualified Data.ByteString.UTF8 as UTF8
|
||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
|
import Data.Word(Word8)
|
||||||
|
--import Debug.Trace(trace)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -17,7 +23,7 @@ $c = [A-Z\192-\221] # [\215]
|
|||||||
$s = [a-z\222-\255] # [\247]
|
$s = [a-z\222-\255] # [\247]
|
||||||
$d = [0-9] -- digit
|
$d = [0-9] -- digit
|
||||||
$i = [$l $d _ '] -- identifier character
|
$i = [$l $d _ '] -- identifier character
|
||||||
$u = [\0-\255] -- universal: any character
|
$u = [.\n] -- universal: any character
|
||||||
|
|
||||||
@rsyms = -- symbols and non-identifier-like reserved words
|
@rsyms = -- symbols and non-identifier-like reserved words
|
||||||
\; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/
|
\; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/
|
||||||
@@ -27,17 +33,20 @@ $u = [\0-\255] -- universal: any character
|
|||||||
"{-" ([$u # \-] | \- [$u # \}])* ("-")+ "}" ;
|
"{-" ([$u # \-] | \- [$u # \}])* ("-")+ "}" ;
|
||||||
|
|
||||||
$white+ ;
|
$white+ ;
|
||||||
@rsyms { tok (eitherResIdent (T_Ident . identC . rawIdentC)) }
|
@rsyms { tok (res (T_Ident . identS)) }
|
||||||
\' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (eitherResIdent (T_Ident . identC . rawIdentS . unescapeInitTail . BS.unpack)) }
|
\' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (eitherResIdent (T_Ident . identC . rawIdentS . unescapeInitTail . unpack)) }
|
||||||
(\_ | $l)($l | $d | \_ | \')* { tok (eitherResIdent (T_Ident . identC . rawIdentC)) }
|
(\_ | $l)($l | $d | \_ | \')* { tok (res (T_Ident . identS)) }
|
||||||
|
|
||||||
\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . BS.unpack) }
|
\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . unpack) }
|
||||||
|
|
||||||
(\-)? $d+ { tok (T_Integer . read . BS.unpack) }
|
(\-)? $d+ { tok (T_Integer . read . unpack) }
|
||||||
(\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . BS.unpack) }
|
(\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . unpack) }
|
||||||
|
|
||||||
{
|
{
|
||||||
|
--unpack = BS.unpack
|
||||||
|
unpack = id
|
||||||
|
|
||||||
|
tok :: (String->Token) -> Posn -> String -> Token
|
||||||
tok f p s = f s
|
tok f p s = f s
|
||||||
|
|
||||||
data Token
|
data Token
|
||||||
@@ -114,15 +123,17 @@ data Token
|
|||||||
| T_Double Double -- double precision float literals
|
| T_Double Double -- double precision float literals
|
||||||
| T_Ident Ident
|
| T_Ident Ident
|
||||||
| T_EOF
|
| T_EOF
|
||||||
|
-- deriving Show -- debug
|
||||||
|
|
||||||
eitherResIdent :: (BS.ByteString -> Token) -> BS.ByteString -> Token
|
res = eitherResIdent
|
||||||
|
eitherResIdent :: (String -> Token) -> String -> Token
|
||||||
eitherResIdent tv s =
|
eitherResIdent tv s =
|
||||||
case Map.lookup s resWords of
|
case Map.lookup s resWords of
|
||||||
Just t -> t
|
Just t -> t
|
||||||
Nothing -> tv s
|
Nothing -> tv s
|
||||||
|
|
||||||
isReservedWord :: BS.ByteString -> Bool
|
isReservedWord :: BS.ByteString -> Bool
|
||||||
isReservedWord s = Map.member s resWords
|
isReservedWord s = Map.member (BS.unpack s) resWords
|
||||||
|
|
||||||
resWords = Map.fromList
|
resWords = Map.fromList
|
||||||
[ b "!" T_exclmark
|
[ b "!" T_exclmark
|
||||||
@@ -194,7 +205,7 @@ resWords = Map.fromList
|
|||||||
, b "where" T_where
|
, b "where" T_where
|
||||||
, b "with" T_with
|
, b "with" T_with
|
||||||
]
|
]
|
||||||
where b s t = (BS.pack s, t)
|
where b s t = (s, t)
|
||||||
|
|
||||||
unescapeInitTail :: String -> String
|
unescapeInitTail :: String -> String
|
||||||
unescapeInitTail = unesc . tail where
|
unescapeInitTail = unesc . tail where
|
||||||
@@ -202,7 +213,7 @@ unescapeInitTail = unesc . tail where
|
|||||||
'\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
|
'\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
|
||||||
'\\':'n':cs -> '\n' : unesc cs
|
'\\':'n':cs -> '\n' : unesc cs
|
||||||
'\\':'t':cs -> '\t' : unesc cs
|
'\\':'t':cs -> '\t' : unesc cs
|
||||||
'"':[] -> []
|
'\"':[] -> []
|
||||||
'\'':[] -> []
|
'\'':[] -> []
|
||||||
c:cs -> c : unesc cs
|
c:cs -> c : unesc cs
|
||||||
_ -> []
|
_ -> []
|
||||||
@@ -219,13 +230,14 @@ alexMove :: Posn -> Char -> Posn
|
|||||||
alexMove (Pn l c) '\n' = Pn (l+1) 1
|
alexMove (Pn l c) '\n' = Pn (l+1) 1
|
||||||
alexMove (Pn l c) _ = Pn l (c+1)
|
alexMove (Pn l c) _ = Pn l (c+1)
|
||||||
|
|
||||||
alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
|
alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
|
||||||
alexGetChar (AI p _ s) =
|
alexGetByte (AI p _ s) =
|
||||||
case BS.uncons s of
|
case WBS.uncons s of
|
||||||
Nothing -> Nothing
|
Nothing -> Nothing
|
||||||
Just (c,s) ->
|
Just (w,s) ->
|
||||||
let p' = alexMove p c
|
let p' = alexMove p c
|
||||||
in p' `seq` Just (c, (AI p' c s))
|
c = BS.w2c w
|
||||||
|
in p' `seq` Just (w, (AI p' c s))
|
||||||
|
|
||||||
alexInputPrevChar :: AlexInput -> Char
|
alexInputPrevChar :: AlexInput -> Char
|
||||||
alexInputPrevChar (AI p c s) = c
|
alexInputPrevChar (AI p c s) = c
|
||||||
@@ -260,12 +272,13 @@ failLoc pos msg = P $ \_ -> PFailed pos msg
|
|||||||
lexer :: (Token -> P a) -> P a
|
lexer :: (Token -> P a) -> P a
|
||||||
lexer cont = P go
|
lexer cont = P go
|
||||||
where
|
where
|
||||||
|
--cont' t = trace (show t) (cont t)
|
||||||
go inp@(AI pos _ str) =
|
go inp@(AI pos _ str) =
|
||||||
case alexScan inp 0 of
|
case alexScan inp 0 of
|
||||||
AlexEOF -> unP (cont T_EOF) inp
|
AlexEOF -> unP (cont T_EOF) inp
|
||||||
AlexError (AI pos _ _) -> PFailed pos "lexical error"
|
AlexError (AI pos _ _) -> PFailed pos "lexical error"
|
||||||
AlexSkip inp' len -> go inp'
|
AlexSkip inp' len -> {-trace (show len) $-} go inp'
|
||||||
AlexToken inp' len act -> unP (cont (act pos (BS.take len str))) inp'
|
AlexToken inp' len act -> unP (cont (act pos (UTF8.toString (UTF8.take len str)))) inp'
|
||||||
|
|
||||||
getPosn :: P Posn
|
getPosn :: P Posn
|
||||||
getPosn = P $ \inp@(AI pos _ _) -> POk pos
|
getPosn = P $ \inp@(AI pos _ _) -> POk pos
|
||||||
@@ -17,8 +17,8 @@ import GF.Grammar.Grammar
|
|||||||
import GF.Grammar.Macros
|
import GF.Grammar.Macros
|
||||||
import GF.Grammar.Lexer
|
import GF.Grammar.Lexer
|
||||||
import GF.Compile.Update (buildAnyTree)
|
import GF.Compile.Update (buildAnyTree)
|
||||||
import Codec.Binary.UTF8.String(decodeString)
|
--import Codec.Binary.UTF8.String(decodeString)
|
||||||
import Data.Char(toLower)
|
--import Data.Char(toLower)
|
||||||
}
|
}
|
||||||
|
|
||||||
%name pModDef ModDef
|
%name pModDef ModDef
|
||||||
@@ -616,9 +616,9 @@ happyError = fail "syntax error"
|
|||||||
|
|
||||||
-- Quick fix to render error messages from UTF-8-encoded source files correctly.
|
-- Quick fix to render error messages from UTF-8-encoded source files correctly.
|
||||||
optDecode opts =
|
optDecode opts =
|
||||||
if map toLower (flag optEncoding opts) `elem` ["utf8","utf-8"]
|
{-if map toLower (getEncoding opts) `elem` ["utf8","utf-8"]
|
||||||
then decodeString
|
then decodeString
|
||||||
else id
|
else-} id
|
||||||
|
|
||||||
mkListId,mkConsId,mkBaseId :: Ident -> Ident
|
mkListId,mkConsId,mkBaseId :: Ident -> Ident
|
||||||
mkListId = prefixIdent "List"
|
mkListId = prefixIdent "List"
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ module GF.Infra.Option
|
|||||||
helpMessage,
|
helpMessage,
|
||||||
-- * Checking specific options
|
-- * Checking specific options
|
||||||
flag, cfgTransform, haskellOption, readOutputFormat,
|
flag, cfgTransform, haskellOption, readOutputFormat,
|
||||||
isLexicalCat, isLiteralCat, renameEncoding,
|
isLexicalCat, isLiteralCat, renameEncoding, getEncoding, defaultEncoding,
|
||||||
-- * Setting specific options
|
-- * Setting specific options
|
||||||
setOptimization, setCFGTransform,
|
setOptimization, setCFGTransform,
|
||||||
-- * Convenience methods for checking options
|
-- * Convenience methods for checking options
|
||||||
@@ -157,7 +157,7 @@ data Flags = Flags {
|
|||||||
optRetainResource :: Bool,
|
optRetainResource :: Bool,
|
||||||
optName :: Maybe String,
|
optName :: Maybe String,
|
||||||
optPreprocessors :: [String],
|
optPreprocessors :: [String],
|
||||||
optEncoding :: String,
|
optEncoding :: Maybe String,
|
||||||
optPMCFG :: Bool,
|
optPMCFG :: Bool,
|
||||||
optOptimizations :: Set Optimization,
|
optOptimizations :: Set Optimization,
|
||||||
optOptimizePGF :: Bool,
|
optOptimizePGF :: Bool,
|
||||||
@@ -213,7 +213,7 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o)
|
|||||||
-- | Pretty-print the options that are preserved in .gfo files.
|
-- | Pretty-print the options that are preserved in .gfo files.
|
||||||
optionsGFO :: Options -> [(String,Literal)]
|
optionsGFO :: Options -> [(String,Literal)]
|
||||||
optionsGFO opts = optionsPGF opts
|
optionsGFO opts = optionsPGF opts
|
||||||
++ [("coding", LStr (flag optEncoding opts))]
|
++ [("coding", LStr (getEncoding opts))]
|
||||||
|
|
||||||
-- | Pretty-print the options that are preserved in .pgf files.
|
-- | Pretty-print the options that are preserved in .pgf files.
|
||||||
optionsPGF :: Options -> [(String,Literal)]
|
optionsPGF :: Options -> [(String,Literal)]
|
||||||
@@ -241,6 +241,10 @@ concatOptions = foldr addOptions noOptions
|
|||||||
modifyFlags :: (Flags -> Flags) -> Options
|
modifyFlags :: (Flags -> Flags) -> Options
|
||||||
modifyFlags = Options
|
modifyFlags = Options
|
||||||
|
|
||||||
|
getEncoding :: Options -> String
|
||||||
|
getEncoding = renameEncoding . maybe defaultEncoding id . flag optEncoding
|
||||||
|
defaultEncoding = "UTF-8"
|
||||||
|
|
||||||
-- Default options
|
-- Default options
|
||||||
|
|
||||||
defaultFlags :: Flags
|
defaultFlags :: Flags
|
||||||
@@ -264,7 +268,7 @@ defaultFlags = Flags {
|
|||||||
|
|
||||||
optName = Nothing,
|
optName = Nothing,
|
||||||
optPreprocessors = [],
|
optPreprocessors = [],
|
||||||
optEncoding = "latin1",
|
optEncoding = Nothing,
|
||||||
optPMCFG = True,
|
optPMCFG = True,
|
||||||
optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize],
|
optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize],
|
||||||
optOptimizePGF = False,
|
optOptimizePGF = False,
|
||||||
@@ -419,7 +423,7 @@ optDescr =
|
|||||||
addLibDir x = set $ \o -> o { optLibraryPath = x:optLibraryPath o }
|
addLibDir x = set $ \o -> o { optLibraryPath = x:optLibraryPath o }
|
||||||
setLibPath x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x }
|
setLibPath x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x }
|
||||||
preproc x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] }
|
preproc x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] }
|
||||||
coding x = set $ \o -> o { optEncoding = x }
|
coding x = set $ \o -> o { optEncoding = Just x }
|
||||||
startcat x = set $ \o -> o { optStartCat = Just x }
|
startcat x = set $ \o -> o { optStartCat = Just x }
|
||||||
language x = set $ \o -> o { optSpeechLanguage = Just x }
|
language x = set $ \o -> o { optSpeechLanguage = Just x }
|
||||||
lexer x = set $ \o -> o { optLexer = Just x }
|
lexer x = set $ \o -> o { optLexer = Just x }
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ encodeUnicode enc s =
|
|||||||
(cbuf,bbuf) <- cod cbuf bbuf
|
(cbuf,bbuf) <- cod cbuf bbuf
|
||||||
#endif
|
#endif
|
||||||
if isEmptyBuffer bbuf
|
if isEmptyBuffer bbuf
|
||||||
then ioe_invalidCharacter
|
then ioe_invalidCharacter1
|
||||||
else do let bs = PS (bufRaw bbuf) (bufL bbuf) (bufR bbuf-bufL bbuf)
|
else do let bs = PS (bufRaw bbuf) (bufL bbuf) (bufR bbuf-bufL bbuf)
|
||||||
bss <- translate cod cbuf
|
bss <- translate cod cbuf
|
||||||
return (bs:bss)
|
return (bs:bss)
|
||||||
@@ -41,8 +41,9 @@ encodeUnicode enc s =
|
|||||||
w = bufR cbuf
|
w = bufR cbuf
|
||||||
|
|
||||||
decodeUnicode :: TextEncoding -> ByteString -> String
|
decodeUnicode :: TextEncoding -> ByteString -> String
|
||||||
decodeUnicode enc (PS fptr l len) =
|
decodeUnicode enc bs = unsafePerformIO $ decodeUnicodeIO enc bs
|
||||||
unsafePerformIO $ do
|
|
||||||
|
decodeUnicodeIO enc (PS fptr l len) = do
|
||||||
let bbuf = Buffer{bufRaw=fptr, bufState=ReadBuffer, bufSize=len, bufL=l, bufR=l+len}
|
let bbuf = Buffer{bufRaw=fptr, bufState=ReadBuffer, bufSize=len, bufL=l, bufR=l+len}
|
||||||
cbuf <- newCharBuffer 128 WriteBuffer
|
cbuf <- newCharBuffer 128 WriteBuffer
|
||||||
case enc of
|
case enc of
|
||||||
@@ -59,7 +60,7 @@ decodeUnicode enc (PS fptr l len) =
|
|||||||
(bbuf,cbuf) <- cod bbuf cbuf
|
(bbuf,cbuf) <- cod bbuf cbuf
|
||||||
#endif
|
#endif
|
||||||
if isEmptyBuffer cbuf
|
if isEmptyBuffer cbuf
|
||||||
then ioe_invalidCharacter
|
then ioe_invalidCharacter2
|
||||||
else unpack cod bbuf cbuf
|
else unpack cod bbuf cbuf
|
||||||
| otherwise = return []
|
| otherwise = return []
|
||||||
where
|
where
|
||||||
@@ -75,6 +76,10 @@ decodeUnicode enc (PS fptr l len) =
|
|||||||
i = bufL cbuf
|
i = bufL cbuf
|
||||||
w = bufR cbuf
|
w = bufR cbuf
|
||||||
|
|
||||||
ioe_invalidCharacter = ioException
|
ioe_invalidCharacter1 = ioException
|
||||||
(IOError Nothing InvalidArgument ""
|
(IOError Nothing InvalidArgument ""
|
||||||
("invalid byte sequence for this encoding") Nothing Nothing)
|
("invalid byte sequence for this encoding") Nothing Nothing)
|
||||||
|
|
||||||
|
ioe_invalidCharacter2 = ioException
|
||||||
|
(IOError Nothing InvalidArgument ""
|
||||||
|
("invalid byte sequence for this decoding") Nothing Nothing)
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ import qualified Data.ByteString.Lazy as BSL
|
|||||||
import System.FilePath
|
import System.FilePath
|
||||||
import System.IO
|
import System.IO
|
||||||
import Control.Exception
|
import Control.Exception
|
||||||
|
import Control.Monad(unless)
|
||||||
|
|
||||||
mainGFC :: Options -> [FilePath] -> IO ()
|
mainGFC :: Options -> [FilePath] -> IO ()
|
||||||
mainGFC opts fs = do
|
mainGFC opts fs = do
|
||||||
@@ -46,9 +46,8 @@ compileSourceFiles :: Options -> [FilePath] -> IOE ()
|
|||||||
compileSourceFiles opts fs =
|
compileSourceFiles opts fs =
|
||||||
do gr <- batchCompile opts fs
|
do gr <- batchCompile opts fs
|
||||||
let cnc = justModuleName (last fs)
|
let cnc = justModuleName (last fs)
|
||||||
if flag optStopAfterPhase opts == Compile
|
unless (flag optStopAfterPhase opts == Compile) $
|
||||||
then return ()
|
do pgf <- link opts (identS cnc) gr
|
||||||
else do pgf <- link opts (identS cnc) gr
|
|
||||||
writePGF opts pgf
|
writePGF opts pgf
|
||||||
writeByteCode opts pgf
|
writeByteCode opts pgf
|
||||||
writeOutputs opts pgf
|
writeOutputs opts pgf
|
||||||
@@ -57,11 +56,9 @@ compileCFFiles :: Options -> [FilePath] -> IOE ()
|
|||||||
compileCFFiles opts fs =
|
compileCFFiles opts fs =
|
||||||
do s <- liftIO $ fmap unlines $ mapM readFile fs
|
do s <- liftIO $ fmap unlines $ mapM readFile fs
|
||||||
let cnc = justModuleName (last fs)
|
let cnc = justModuleName (last fs)
|
||||||
gf <- getCF cnc s
|
gr <- compileSourceGrammar opts =<< getCF cnc s
|
||||||
gr <- compileSourceGrammar opts gf
|
unless (flag optStopAfterPhase opts == Compile) $
|
||||||
if flag optStopAfterPhase opts == Compile
|
do pgf <- link opts (identS cnc) gr
|
||||||
then return ()
|
|
||||||
else do pgf <- link opts (identS cnc) gr
|
|
||||||
writePGF opts pgf
|
writePGF opts pgf
|
||||||
writeOutputs opts pgf
|
writeOutputs opts pgf
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user