diff --git a/gf.cabal b/gf.cabal index 6bfcebcc5..1ce5e75f9 100644 --- a/gf.cabal +++ b/gf.cabal @@ -1,5 +1,5 @@ name: gf -version: 3.5-darcs +version: 3.5.11-darcs cabal-version: >= 1.8 build-type: Custom @@ -140,8 +140,8 @@ Executable gf if flag(new-comp) cpp-options: -DNEW_COMP - build-tools: happy - --, alex>=2 && <3 -- tricky to install in Ubuntu 12.04 + build-tools: happy, alex>=3 + if os(windows) build-depends: Win32 else diff --git a/src/compiler/GF/Compile/GetGrammar.hs b/src/compiler/GF/Compile/GetGrammar.hs index 2f40d0242..10a857bf9 100644 --- a/src/compiler/GF/Compile/GetGrammar.hs +++ b/src/compiler/GF/Compile/GetGrammar.hs @@ -18,40 +18,61 @@ import Prelude hiding (catch) import GF.Data.Operations -import GF.System.Catch +--import GF.System.Catch import GF.Infra.UseIO -import GF.Infra.Option(Options,optPreprocessors,addOptions,optEncoding,flag,renameEncoding) +import GF.Infra.Option(Options,optPreprocessors,addOptions,renameEncoding,optEncoding,flag,defaultEncoding) import GF.Grammar.Lexer import GF.Grammar.Parser import GF.Grammar.Grammar -import GF.Compile.Coding +--import GF.Compile.Coding +import GF.Compile.ReadFiles(parseSource,lift) +--import GF.Text.Coding(decodeUnicodeIO) import qualified Data.ByteString.Char8 as BS -import Control.Monad (foldM) +import Data.Char(isAscii) +import Control.Monad (foldM,when,unless) import System.Cmd (system) -import System.IO(mkTextEncoding) +--import System.IO(mkTextEncoding) --,utf8 import System.Directory(removeFile) getSourceModule :: Options -> FilePath -> IOE SourceModule -getSourceModule opts file0 = ioe $ - do tmp <- foldM runPreprocessor (Source file0) (flag optPreprocessors opts) - content <- keepTemp tmp - case runP pModDef content of - Left (Pn l c,msg) -> do file <- writeTemp tmp +getSourceModule opts file0 = + errIn file0 $ + do tmp <- lift $ foldM runPreprocessor (Source file0) (flag optPreprocessors opts) + raw <- lift $ keepTemp tmp + --ePutStrLn $ "1 "++file0 + (optCoding,parsed) <- parseSource opts pModDef raw + case parsed of + Left (Pn l c,msg) -> do file <- lift $ writeTemp tmp let location = file++":"++show l++":"++show c - return (Bad (location++":\n "++msg)) - Right (i,mi00) -> - do removeTemp tmp - let mi0 =mi00 {mflags=mflags mi00 `addOptions` opts, msrc=file0} - mi <- transcodeModule (i,mi0) - return (Ok mi) - `catch` (return . Bad . show) + raise (location++":\n "++msg) + Right (i,mi0) -> + do lift $ removeTemp tmp + let mi =mi0 {mflags=mflags mi0 `addOptions` opts, msrc=file0} + optCoding' = renameEncoding `fmap` flag optEncoding (mflags mi0) + case (optCoding,optCoding') of + (Nothing,Nothing) -> + unless (BS.all isAscii raw) $ + ePutStrLn $ file0++":\n Warning: default encoding has changed from Latin-1 to UTF-8" + (_,Just coding') -> + when (coding/=coding') $ + raise $ "Encoding mismatch: "++coding++" /= "++coding' + where coding = maybe defaultEncoding renameEncoding optCoding + _ -> return () + --lift $ transcodeModule' (i,mi) -- old lexer + return (i,mi) -- new lexer +{- transcodeModule sm00 = - do enc <- mkTextEncoding (renameEncoding (flag optEncoding (mflags (snd sm00)))) + do enc <- mkTextEncoding (getEncoding (mflags (snd sm00))) let sm = decodeStringsInModule enc sm00 return sm +transcodeModule' sm00 = + do let enc = utf8 + let sm = decodeStringsInModule enc sm00 + return sm +-} runPreprocessor :: Temporary -> String -> IO Temporary runPreprocessor tmp0 p = diff --git a/src/compiler/GF/Compile/ReadFiles.hs b/src/compiler/GF/Compile/ReadFiles.hs index 5e65dcba6..70b0d6ee6 100644 --- a/src/compiler/GF/Compile/ReadFiles.hs +++ b/src/compiler/GF/Compile/ReadFiles.hs @@ -21,7 +21,8 @@ module GF.Compile.ReadFiles ( getAllFiles,ModName,ModEnv,importsOfModule, gfoFile,gfFile,isGFO,gf2gfo, - getOptionsFromFile) where + parseSource,lift, + getOptionsFromFile,getPragmas) where import Prelude hiding (catch) import GF.System.Catch @@ -34,6 +35,10 @@ import GF.Grammar.Parser import GF.Grammar.Grammar import GF.Grammar.Binary +import System.IO(mkTextEncoding) +import qualified Data.ByteString.UTF8 as UTF8 +import GF.Text.Coding(decodeUnicodeIO) + import Control.Monad import Data.Maybe(isJust) import qualified Data.ByteString.Char8 as BS @@ -50,7 +55,7 @@ type ModEnv = Map.Map ModName (UTCTime,[ModName]) -- | Returns a list of all files to be compiled in topological order i.e. -- the low level (leaf) modules are first. -getAllFiles :: (MonadIO m,ErrorMonad m) => Options -> [InitPath] -> ModEnv -> FileName -> m [FullPath] +--getAllFiles :: (MonadIO m,ErrorMonad m) => Options -> [InitPath] -> ModEnv -> FileName -> m [FullPath] getAllFiles opts ps env file = do -- read module headers from all files recursively ds <- liftM reverse $ get [] [] (justModuleName file) @@ -117,14 +122,10 @@ getAllFiles opts ps env file = do Just mo -> return (st,importsOfModule mo) Nothing | isGFO file -> raise (file ++ " is compiled with different GF version and I can't find the source file") - | otherwise -> do s <- liftIO $ BS.readFile file - case runP pModHeader s of - Left (Pn l c,msg) -> raise (file ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg) - Right mo -> return (CSComp,importsOfModule mo) - CSComp -> do s <- liftIO $ BS.readFile file - case runP pModHeader s of - Left (Pn l c,msg) -> raise (file ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg) - Right mo -> return (st,importsOfModule mo) + | otherwise -> do mo <- parseModHeader opts file + return (CSComp,importsOfModule mo) + CSComp -> do mo <- parseModHeader opts file + return (st,importsOfModule mo) testErr (mname == name) ("module name" +++ mname +++ "differs from file name" +++ name) return (name,st,t,isJust gfTime,imps,dropFileName file) @@ -209,17 +210,55 @@ importsOfModule (m,mi) = (modName m,depModInfo mi []) modName = showIdent + +parseModHeader opts file = + do --ePutStrLn file + (_,parsed) <- parseSource opts pModHeader =<< lift (BS.readFile file) + case parsed of + Right mo -> return mo + Left (Pn l c,msg) -> + raise (file ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg) + +parseSource opts p raw = + do (coding,utf8) <- toUTF8 opts raw + return (coding,runP p utf8) + +toUTF8 opts0 raw = + do opts <- getPragmas raw + let given = flag optEncoding opts -- explicitly given encoding + coding = getEncoding $ opts0 `addOptions` opts + utf8 <- if coding=="UTF-8" + then return raw + else lift $ do --ePutStrLn $ "toUTF8 from "++coding + enc <- mkTextEncoding coding + -- decodeUnicodeIO uses a lot of stack space, + -- so we need to split the file into smaller pieces + ls <- mapM (decodeUnicodeIO enc) (BS.lines raw) + return $ UTF8.fromString (unlines ls) + return (given,utf8) + +--lift io = ioe (fmap Ok io `catch` (return . Bad . show)) +lift io = liftIO io + -- | options can be passed to the compiler by comments in @--#@, in the main file getOptionsFromFile :: (MonadIO m,ErrorMonad m) => FilePath -> m Options getOptionsFromFile file = do s <- either (\_ -> raise $ "File " ++ file ++ " does not exist") return =<< liftIO (try $ BS.readFile file) - let ls = filter (BS.isPrefixOf (BS.pack "--#")) $ BS.lines s - fs = map (BS.unpack . BS.unwords . BS.words . BS.drop 3) ls - parseModuleOptions fs + opts <- getPragmas s + -- The coding flag should not be inherited by other files + return (addOptions opts (modifyFlags $ \ f -> f{optEncoding=Nothing})) -getFilePath :: MonadIO m => [FilePath] -> String -> m (Maybe FilePath) -getFilePath paths file = liftIO $ get paths + +getPragmas :: (ErrorMonad m) => BS.ByteString -> m Options +getPragmas = parseModuleOptions . + map (BS.unpack . BS.unwords . BS.words . BS.drop 3) . + filter (BS.isPrefixOf (BS.pack "--#")) . BS.lines + +--getFilePath :: MonadIO m => [FilePath] -> String -> m (Maybe FilePath) +getFilePath paths file = + liftIO $ do --ePutStrLn $ "getFilePath "++show paths++" "++show file + get paths where get [] = return Nothing get (p:ps) = do diff --git a/src/compiler/GF/Grammar/Lexer.hs b/src/compiler/GF/Grammar/Lexer.hs deleted file mode 100644 index ebb14a31f..000000000 --- a/src/compiler/GF/Grammar/Lexer.hs +++ /dev/null @@ -1,481 +0,0 @@ -{-# LANGUAGE CPP,MagicHash,BangPatterns #-} -{-# LINE 1 "lexer/Lexer.x" #-} - -module GF.Grammar.Lexer - ( Token(..), Posn(..) - , P, runP, lexer, getPosn, failLoc - , isReservedWord - ) where - -import GF.Infra.Ident -import qualified Data.ByteString.Char8 as BS -import qualified Data.Map as Map - - -#if __GLASGOW_HASKELL__ >= 603 -#include "ghcconfig.h" -#elif defined(__GLASGOW_HASKELL__) -#include "config.h" -#endif -#if __GLASGOW_HASKELL__ >= 503 -import Data.Array -import Data.Char (ord) -import Data.Array.Base (unsafeAt) -#else -import Array -import Char (ord) -#endif -#if __GLASGOW_HASKELL__ >= 503 -import GHC.Exts -#else -import GlaExts -#endif -alex_base :: AlexAddr -alex_base = AlexA# "\x01\x00\x00\x00\x38\x00\x00\x00\x39\x00\x00\x00\x17\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00\x44\x00\x00\x00\x45\x00\x00\x00\x19\x00\x00\x00\x24\x00\x00\x00\x25\x00\x00\x00\x4d\x00\x00\x00\x00\x00\x00\x00\x29\x00\x00\x00\x30\x00\x00\x00\xf8\xff\xff\xff\x26\x00\x00\x00\x9c\x00\x00\x00\x00\x00\x00\x00\xd0\x00\x00\x00\xd2\x00\x00\x00\x9e\x00\x00\x00\x6c\x01\x00\x00\x3c\x02\x00\x00\x00\x00\x00\x00\x17\x01\x00\x00\xe7\x01\x00\x00\xa5\x01\x00\x00\xf2\x00\x00\x00\xb7\x01\x00\x00\xff\x00\x00\x00\x0a\x01\x00\x00\x18\x01\x00\x00\x22\x01\x00\x00"# - -alex_table :: AlexAddr -alex_table = AlexA# "\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x0b\x00\x0b\x00\x0b\x00\x0b\x00\x0b\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x0b\x00\x0c\x00\x19\x00\x0c\x00\x0c\x00\x0c\x00\xff\xff\x14\x00\x0c\x00\x0c\x00\x0d\x00\x0e\x00\x0c\x00\x03\x00\x0c\x00\x0c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x0c\x00\x0c\x00\x0c\x00\x10\x00\x0c\x00\x0c\x00\x0c\x00\xff\xff\xff\xff\x01\x00\x07\x00\x07\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x07\x00\x08\x00\x0c\x00\x0c\x00\x0c\x00\x0b\x00\x0b\x00\x0b\x00\x0b\x00\x0b\x00\x0c\x00\x0c\x00\x0f\x00\x0c\x00\xff\xff\x11\x00\xff\xff\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x07\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x0c\x00\x0c\x00\x0c\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x04\x00\x05\x00\x17\x00\x00\x00\x13\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\xff\xff\x00\x00\xff\xff\x00\x00\xff\xff\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x12\x00\xff\xff\xff\xff\x13\x00\x17\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1f\x00\xff\xff\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x15\x00\x00\x00\x15\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x18\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x00\x00\x21\x00\x00\x00\x00\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x1b\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x1a\x00\x00\x00\x00\x00\x00\x00\x17\x00\x1a\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x1b\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00"# - -alex_check :: AlexAddr -alex_check = AlexA# "\xff\xff\x00\x00\x01\x00\x02\x00\x03\x00\x04\x00\x05\x00\x06\x00\x07\x00\x08\x00\x09\x00\x0a\x00\x0b\x00\x0c\x00\x0d\x00\x0e\x00\x0f\x00\x10\x00\x11\x00\x12\x00\x13\x00\x14\x00\x15\x00\x16\x00\x17\x00\x18\x00\x19\x00\x1a\x00\x1b\x00\x1c\x00\x1d\x00\x1e\x00\x1f\x00\x20\x00\x21\x00\x22\x00\x23\x00\x24\x00\x25\x00\x26\x00\x27\x00\x28\x00\x29\x00\x2a\x00\x2b\x00\x2c\x00\x2d\x00\x2e\x00\x2f\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x3a\x00\x3b\x00\x3c\x00\x3d\x00\x3e\x00\x3f\x00\x40\x00\x0a\x00\x0a\x00\x2d\x00\x2d\x00\x2d\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x2d\x00\x2d\x00\x2a\x00\x5c\x00\x3e\x00\x09\x00\x0a\x00\x0b\x00\x0c\x00\x0d\x00\x2b\x00\x5b\x00\x5c\x00\x5d\x00\x5e\x00\x5f\x00\x60\x00\xff\xff\xff\xff\x3e\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x20\x00\xff\xff\xff\xff\xff\xff\x2d\x00\x2d\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x7b\x00\x7c\x00\x7d\x00\x7e\x00\x7f\x00\x80\x00\x81\x00\x82\x00\x83\x00\x84\x00\x85\x00\x86\x00\x87\x00\x88\x00\x89\x00\x8a\x00\x8b\x00\x8c\x00\x8d\x00\x8e\x00\x8f\x00\x90\x00\x91\x00\x92\x00\x93\x00\x94\x00\x95\x00\x96\x00\x97\x00\x98\x00\x99\x00\x9a\x00\x9b\x00\x9c\x00\x9d\x00\x9e\x00\x9f\x00\xa0\x00\xa1\x00\xa2\x00\xa3\x00\xa4\x00\xa5\x00\xa6\x00\xa7\x00\xa8\x00\xa9\x00\xaa\x00\xab\x00\xac\x00\xad\x00\xae\x00\xaf\x00\xb0\x00\xb1\x00\xb2\x00\xb3\x00\xb4\x00\xb5\x00\xb6\x00\xb7\x00\xb8\x00\xb9\x00\xba\x00\xbb\x00\xbc\x00\xbd\x00\xbe\x00\xbf\x00\x7d\x00\x7d\x00\x27\x00\xff\xff\x27\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\xff\xff\xd7\x00\xff\xff\x0a\x00\xff\xff\x0a\x00\x41\x00\x42\x00\x43\x00\x44\x00\x45\x00\x46\x00\x47\x00\x48\x00\x49\x00\x4a\x00\x4b\x00\x4c\x00\x4d\x00\x4e\x00\x4f\x00\x50\x00\x51\x00\x52\x00\x53\x00\x54\x00\x55\x00\x56\x00\x57\x00\x58\x00\x59\x00\x5a\x00\x27\x00\xf7\x00\x27\x00\x5c\x00\x5f\x00\xff\xff\x61\x00\x62\x00\x63\x00\x64\x00\x65\x00\x66\x00\x67\x00\x68\x00\x69\x00\x6a\x00\x6b\x00\x6c\x00\x6d\x00\x6e\x00\x6f\x00\x70\x00\x71\x00\x72\x00\x73\x00\x74\x00\x75\x00\x76\x00\x77\x00\x78\x00\x79\x00\x7a\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x2e\x00\x0a\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x5c\x00\xff\xff\x5c\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x22\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\x2d\x00\xff\xff\xff\xff\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xc0\x00\xc1\x00\xc2\x00\xc3\x00\xc4\x00\xc5\x00\xc6\x00\xc7\x00\xc8\x00\xc9\x00\xca\x00\xcb\x00\xcc\x00\xcd\x00\xce\x00\xcf\x00\xd0\x00\xd1\x00\xd2\x00\xd3\x00\xd4\x00\xd5\x00\xd6\x00\x5c\x00\xd8\x00\xd9\x00\xda\x00\xdb\x00\xdc\x00\xdd\x00\xde\x00\xdf\x00\xe0\x00\xe1\x00\xe2\x00\xe3\x00\xe4\x00\xe5\x00\xe6\x00\xe7\x00\xe8\x00\xe9\x00\xea\x00\xeb\x00\xec\x00\xed\x00\xee\x00\xef\x00\xf0\x00\xf1\x00\xf2\x00\xf3\x00\xf4\x00\xf5\x00\xf6\x00\x27\x00\xf8\x00\xf9\x00\xfa\x00\xfb\x00\xfc\x00\xfd\x00\xfe\x00\xff\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x41\x00\x42\x00\x43\x00\x44\x00\x45\x00\x46\x00\x47\x00\x48\x00\x49\x00\x4a\x00\x4b\x00\x4c\x00\x4d\x00\x4e\x00\x4f\x00\x50\x00\x51\x00\x52\x00\x53\x00\x54\x00\x55\x00\x56\x00\x57\x00\x58\x00\x59\x00\x5a\x00\x22\x00\xff\xff\xff\xff\xff\xff\x5f\x00\x27\x00\x61\x00\x62\x00\x63\x00\x64\x00\x65\x00\x66\x00\x67\x00\x68\x00\x69\x00\x6a\x00\x6b\x00\x6c\x00\x6d\x00\x6e\x00\x6f\x00\x70\x00\x71\x00\x72\x00\x73\x00\x74\x00\x75\x00\x76\x00\x77\x00\x78\x00\x79\x00\x7a\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x0a\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x5c\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x22\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x6e\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x74\x00\xff\xff\xff\xff\x65\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xc0\x00\xc1\x00\xc2\x00\xc3\x00\xc4\x00\xc5\x00\xc6\x00\xc7\x00\xc8\x00\xc9\x00\xca\x00\xcb\x00\xcc\x00\xcd\x00\xce\x00\xcf\x00\xd0\x00\xd1\x00\xd2\x00\xd3\x00\xd4\x00\xd5\x00\xd6\x00\x5c\x00\xd8\x00\xd9\x00\xda\x00\xdb\x00\xdc\x00\xdd\x00\xde\x00\xdf\x00\xe0\x00\xe1\x00\xe2\x00\xe3\x00\xe4\x00\xe5\x00\xe6\x00\xe7\x00\xe8\x00\xe9\x00\xea\x00\xeb\x00\xec\x00\xed\x00\xee\x00\xef\x00\xf0\x00\xf1\x00\xf2\x00\xf3\x00\xf4\x00\xf5\x00\xf6\x00\x27\x00\xf8\x00\xf9\x00\xfa\x00\xfb\x00\xfc\x00\xfd\x00\xfe\x00\xff\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x41\x00\x42\x00\x43\x00\x44\x00\x45\x00\x46\x00\x47\x00\x48\x00\x49\x00\x4a\x00\x4b\x00\x4c\x00\x4d\x00\x4e\x00\x4f\x00\x50\x00\x51\x00\x52\x00\x53\x00\x54\x00\x55\x00\x56\x00\x57\x00\x58\x00\x59\x00\x5a\x00\xff\xff\xff\xff\xff\xff\xff\xff\x5f\x00\xff\xff\x61\x00\x62\x00\x63\x00\x64\x00\x65\x00\x66\x00\x67\x00\x68\x00\x69\x00\x6a\x00\x6b\x00\x6c\x00\x6d\x00\x6e\x00\x6f\x00\x70\x00\x71\x00\x72\x00\x73\x00\x74\x00\x75\x00\x76\x00\x77\x00\x78\x00\x79\x00\x7a\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xc0\x00\xc1\x00\xc2\x00\xc3\x00\xc4\x00\xc5\x00\xc6\x00\xc7\x00\xc8\x00\xc9\x00\xca\x00\xcb\x00\xcc\x00\xcd\x00\xce\x00\xcf\x00\xd0\x00\xd1\x00\xd2\x00\xd3\x00\xd4\x00\xd5\x00\xd6\x00\xff\xff\xd8\x00\xd9\x00\xda\x00\xdb\x00\xdc\x00\xdd\x00\xde\x00\xdf\x00\xe0\x00\xe1\x00\xe2\x00\xe3\x00\xe4\x00\xe5\x00\xe6\x00\xe7\x00\xe8\x00\xe9\x00\xea\x00\xeb\x00\xec\x00\xed\x00\xee\x00\xef\x00\xf0\x00\xf1\x00\xf2\x00\xf3\x00\xf4\x00\xf5\x00\xf6\x00\xff\xff\xf8\x00\xf9\x00\xfa\x00\xfb\x00\xfc\x00\xfd\x00\xfe\x00\xff\x00"# - -alex_deflt :: AlexAddr -alex_deflt = AlexA# "\x16\x00\x02\x00\x02\x00\xff\xff\x09\x00\xff\xff\x09\x00\x09\x00\x09\x00\x09\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x13\x00\x13\x00\xff\xff\xff\xff\xff\xff\xff\xff\x1a\x00\x1a\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"# - -alex_accept = listArray (0::Int,33) [[],[(AlexAccSkip)],[(AlexAccSkip)],[(AlexAcc (alex_action_3))],[(AlexAccSkip)],[(AlexAccSkip)],[],[],[],[],[(AlexAcc (alex_action_3))],[(AlexAccSkip)],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_4))],[],[],[],[(AlexAcc (alex_action_5))],[(AlexAcc (alex_action_5))],[(AlexAcc (alex_action_6))],[],[],[],[(AlexAcc (alex_action_7))],[(AlexAcc (alex_action_8))],[(AlexAcc (alex_action_8))],[],[],[]] -{-# LINE 39 "lexer/Lexer.x" #-} - - -tok f p s = f s - -data Token - = T_exclmark - | T_patt - | T_int_label - | T_oparen - | T_cparen - | T_tilde - | T_star - | T_starstar - | T_plus - | T_plusplus - | T_comma - | T_minus - | T_rarrow - | T_dot - | T_alt - | T_colon - | T_semicolon - | T_less - | T_equal - | T_big_rarrow - | T_great - | T_questmark - | T_obrack - | T_lam - | T_lamlam - | T_cbrack - | T_ocurly - | T_bar - | T_ccurly - | T_underscore - | T_at - | T_PType - | T_Str - | T_Strs - | T_Tok - | T_Type - | T_abstract - | T_case - | T_cat - | T_concrete - | T_data - | T_def - | T_flags - | T_fn - | T_fun - | T_in - | T_incomplete - | T_instance - | T_interface - | T_let - | T_lin - | T_lincat - | T_lindef - | T_linref - | T_of - | T_open - | T_oper - | T_param - | T_pattern - | T_pre - | T_printname - | T_resource - | T_strs - | T_table - | T_transfer - | T_variants - | T_where - | T_with - | T_String String -- string literals - | T_Integer Int -- integer literals - | T_Double Double -- double precision float literals - | T_Ident Ident - | T_EOF - -eitherResIdent :: (BS.ByteString -> Token) -> BS.ByteString -> Token -eitherResIdent tv s = - case Map.lookup s resWords of - Just t -> t - Nothing -> tv s - -isReservedWord :: BS.ByteString -> Bool -isReservedWord s = Map.member s resWords - -resWords = Map.fromList - [ b "!" T_exclmark - , b "#" T_patt - , b "$" T_int_label - , b "(" T_oparen - , b ")" T_cparen - , b "~" T_tilde - , b "*" T_star - , b "**" T_starstar - , b "+" T_plus - , b "++" T_plusplus - , b "," T_comma - , b "-" T_minus - , b "->" T_rarrow - , b "." T_dot - , b "/" T_alt - , b ":" T_colon - , b ";" T_semicolon - , b "<" T_less - , b "=" T_equal - , b "=>" T_big_rarrow - , b ">" T_great - , b "?" T_questmark - , b "[" T_obrack - , b "]" T_cbrack - , b "\\" T_lam - , b "\\\\" T_lamlam - , b "{" T_ocurly - , b "}" T_ccurly - , b "|" T_bar - , b "_" T_underscore - , b "@" T_at - , b "PType" T_PType - , b "Str" T_Str - , b "Strs" T_Strs - , b "Tok" T_Tok - , b "Type" T_Type - , b "abstract" T_abstract - , b "case" T_case - , b "cat" T_cat - , b "concrete" T_concrete - , b "data" T_data - , b "def" T_def - , b "flags" T_flags - , b "fn" T_fn - , b "fun" T_fun - , b "in" T_in - , b "incomplete" T_incomplete - , b "instance" T_instance - , b "interface" T_interface - , b "let" T_let - , b "lin" T_lin - , b "lincat" T_lincat - , b "lindef" T_lindef - , b "linref" T_linref - , b "of" T_of - , b "open" T_open - , b "oper" T_oper - , b "param" T_param - , b "pattern" T_pattern - , b "pre" T_pre - , b "printname" T_printname - , b "resource" T_resource - , b "strs" T_strs - , b "table" T_table - , b "transfer" T_transfer - , b "variants" T_variants - , b "where" T_where - , b "with" T_with - ] - where b s t = (BS.pack s, t) - -unescapeInitTail :: String -> String -unescapeInitTail = unesc . tail where - unesc s = case s of - '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs - '\\':'n':cs -> '\n' : unesc cs - '\\':'t':cs -> '\t' : unesc cs - '"':[] -> [] - '\'':[] -> [] - c:cs -> c : unesc cs - _ -> [] - -------------------------------------------------------------------- --- Alex wrapper code. --- A modified "posn" wrapper. -------------------------------------------------------------------- - -data Posn = Pn {-# UNPACK #-} !Int - {-# UNPACK #-} !Int - -alexMove :: Posn -> Char -> Posn -alexMove (Pn l c) '\n' = Pn (l+1) 1 -alexMove (Pn l c) _ = Pn l (c+1) - -alexGetChar :: AlexInput -> Maybe (Char,AlexInput) -alexGetChar (AI p _ s) = - case BS.uncons s of - Nothing -> Nothing - Just (c,s) -> - let p' = alexMove p c - in p' `seq` Just (c, (AI p' c s)) - -alexInputPrevChar :: AlexInput -> Char -alexInputPrevChar (AI p c s) = c - -data AlexInput = AI {-# UNPACK #-} !Posn -- current position, - {-# UNPACK #-} !Char -- previous char - {-# UNPACK #-} !BS.ByteString -- current input string - -data ParseResult a - = POk a - | PFailed Posn -- The position of the error - String -- The error message - -newtype P a = P { unP :: AlexInput -> ParseResult a } - -instance Monad P where - return a = a `seq` (P $ \s -> POk a) - (P m) >>= k = P $ \ s -> case m s of - POk a -> unP (k a) s - PFailed posn err -> PFailed posn err - fail msg = P $ \(AI posn _ _) -> PFailed posn msg - -runP :: P a -> BS.ByteString -> Either (Posn,String) a -runP (P f) txt = - case f (AI (Pn 1 0) ' ' txt) of - POk x -> Right x - PFailed pos msg -> Left (pos,msg) - -failLoc :: Posn -> String -> P a -failLoc pos msg = P $ \_ -> PFailed pos msg - -lexer :: (Token -> P a) -> P a -lexer cont = P go - where - go inp@(AI pos _ str) = - case alexScan inp 0 of - AlexEOF -> unP (cont T_EOF) inp - AlexError (AI pos _ _) -> PFailed pos "lexical error" - AlexSkip inp' len -> go inp' - AlexToken inp' len act -> unP (cont (act pos (BS.take len str))) inp' - -getPosn :: P Posn -getPosn = P $ \inp@(AI pos _ _) -> POk pos - - -alex_action_3 = tok (eitherResIdent (T_Ident . identC . rawIdentC)) -alex_action_4 = tok (eitherResIdent (T_Ident . identC . rawIdentS . unescapeInitTail . BS.unpack)) -alex_action_5 = tok (eitherResIdent (T_Ident . identC . rawIdentC)) -alex_action_6 = tok (T_String . unescapeInitTail . BS.unpack) -alex_action_7 = tok (T_Integer . read . BS.unpack) -alex_action_8 = tok (T_Double . read . BS.unpack) -{-# LINE 1 "templates/GenericTemplate.hs" #-} -{-# LINE 1 "templates/GenericTemplate.hs" #-} -{-# LINE 1 "" #-} -{-# LINE 1 "" #-} -{-# LINE 1 "templates/GenericTemplate.hs" #-} --- ----------------------------------------------------------------------------- --- ALEX TEMPLATE --- --- This code is in the PUBLIC DOMAIN; you may copy it freely and use --- it for any purpose whatsoever. - --- ----------------------------------------------------------------------------- --- INTERNALS and main scanner engine - -{-# LINE 37 "templates/GenericTemplate.hs" #-} - -{-# LINE 47 "templates/GenericTemplate.hs" #-} - - -data AlexAddr = AlexA# Addr# - -#if __GLASGOW_HASKELL__ < 503 -uncheckedShiftL# = shiftL# -#endif - -{-# INLINE alexIndexInt16OffAddr #-} -alexIndexInt16OffAddr (AlexA# arr) off = -#ifdef WORDS_BIGENDIAN - narrow16Int# i - where - !i = word2Int# ((high `uncheckedShiftL#` 8#) `or#` low) - !high = int2Word# (ord# (indexCharOffAddr# arr (off' +# 1#))) - !low = int2Word# (ord# (indexCharOffAddr# arr off')) - !off' = off *# 2# -#else - indexInt16OffAddr# arr off -#endif - - - - - -{-# INLINE alexIndexInt32OffAddr #-} -alexIndexInt32OffAddr (AlexA# arr) off = -#ifdef WORDS_BIGENDIAN - narrow32Int# i - where - !i = word2Int# ((b3 `uncheckedShiftL#` 24#) `or#` - (b2 `uncheckedShiftL#` 16#) `or#` - (b1 `uncheckedShiftL#` 8#) `or#` b0) - !b3 = int2Word# (ord# (indexCharOffAddr# arr (off' +# 3#))) - !b2 = int2Word# (ord# (indexCharOffAddr# arr (off' +# 2#))) - !b1 = int2Word# (ord# (indexCharOffAddr# arr (off' +# 1#))) - !b0 = int2Word# (ord# (indexCharOffAddr# arr off')) - !off' = off *# 4# -#else - indexInt32OffAddr# arr off -#endif - - - - - -#if __GLASGOW_HASKELL__ < 503 -quickIndex arr i = arr ! i -#else --- GHC >= 503, unsafeAt is available from Data.Array.Base. -quickIndex = unsafeAt -#endif - - - - --- ----------------------------------------------------------------------------- --- Main lexing routines - -data AlexReturn a - = AlexEOF - | AlexError !AlexInput - | AlexSkip !AlexInput !Int - | AlexToken !AlexInput !Int a - --- alexScan :: AlexInput -> StartCode -> AlexReturn a -alexScan input (I# (sc)) - = alexScanUser undefined input (I# (sc)) - -alexScanUser user input (I# (sc)) - = case alex_scan_tkn user input 0# input sc AlexNone of - (AlexNone, input') -> - case alexGetChar input of - Nothing -> - - - - AlexEOF - Just _ -> - - - - AlexError input' - - (AlexLastSkip input'' len, _) -> - - - - AlexSkip input'' len - - (AlexLastAcc k input''' len, _) -> - - - - AlexToken input''' len k - - --- Push the input through the DFA, remembering the most recent accepting --- state it encountered. - -alex_scan_tkn user orig_input len input s last_acc = - input `seq` -- strict in the input - let - new_acc = check_accs (alex_accept `quickIndex` (I# (s))) - in - new_acc `seq` - case alexGetChar input of - Nothing -> (new_acc, input) - Just (c, new_input) -> - - - - let - (!(base)) = alexIndexInt32OffAddr alex_base s - (!((I# (ord_c)))) = ord c - (!(offset)) = (base +# ord_c) - (!(check)) = alexIndexInt16OffAddr alex_check offset - - (!(new_s)) = if (offset >=# 0#) && (check ==# ord_c) - then alexIndexInt16OffAddr alex_table offset - else alexIndexInt16OffAddr alex_deflt s - in - case new_s of - -1# -> (new_acc, input) - -- on an error, we want to keep the input *before* the - -- character that failed, not after. - _ -> alex_scan_tkn user orig_input (len +# 1#) - new_input new_s new_acc - - where - check_accs [] = last_acc - check_accs (AlexAcc a : _) = AlexLastAcc a input (I# (len)) - check_accs (AlexAccSkip : _) = AlexLastSkip input (I# (len)) - check_accs (AlexAccPred a predx : rest) - | predx user orig_input (I# (len)) input - = AlexLastAcc a input (I# (len)) - check_accs (AlexAccSkipPred predx : rest) - | predx user orig_input (I# (len)) input - = AlexLastSkip input (I# (len)) - check_accs (_ : rest) = check_accs rest - -data AlexLastAcc a - = AlexNone - | AlexLastAcc a !AlexInput !Int - | AlexLastSkip !AlexInput !Int - -data AlexAcc a user - = AlexAcc a - | AlexAccSkip - | AlexAccPred a (AlexAccPred user) - | AlexAccSkipPred (AlexAccPred user) - -type AlexAccPred user = user -> AlexInput -> Int -> AlexInput -> Bool - --- ----------------------------------------------------------------------------- --- Predicates on a rule - -alexAndPred p1 p2 user in1 len in2 - = p1 user in1 len in2 && p2 user in1 len in2 - ---alexPrevCharIsPred :: Char -> AlexAccPred _ -alexPrevCharIs c _ input _ _ = c == alexInputPrevChar input - ---alexPrevCharIsOneOfPred :: Array Char Bool -> AlexAccPred _ -alexPrevCharIsOneOf arr _ input _ _ = arr ! alexInputPrevChar input - ---alexRightContext :: Int -> AlexAccPred _ -alexRightContext (I# (sc)) user _ _ input = - case alex_scan_tkn user input 0# input sc AlexNone of - (AlexNone, _) -> False - _ -> True - -- TODO: there's no need to find the longest - -- match when checking the right context, just - -- the first match will do. - --- used by wrappers -iUnbox (I# (i)) = i diff --git a/src/compiler/GF/Grammar/lexer/Lexer.x b/src/compiler/GF/Grammar/Lexer.x similarity index 80% rename from src/compiler/GF/Grammar/lexer/Lexer.x rename to src/compiler/GF/Grammar/Lexer.x index 460e7f452..60c51f814 100644 --- a/src/compiler/GF/Grammar/lexer/Lexer.x +++ b/src/compiler/GF/Grammar/Lexer.x @@ -1,3 +1,4 @@ +-- -*- haskell -*- { module GF.Grammar.Lexer ( Token(..), Posn(..) @@ -6,9 +7,14 @@ module GF.Grammar.Lexer ) where import GF.Infra.Ident +--import GF.Data.Operations import qualified Data.ByteString.Char8 as BS +import qualified Data.ByteString as WBS +import qualified Data.ByteString.Internal as BS(w2c) +import qualified Data.ByteString.UTF8 as UTF8 import qualified Data.Map as Map - +import Data.Word(Word8) +--import Debug.Trace(trace) } @@ -17,7 +23,7 @@ $c = [A-Z\192-\221] # [\215] $s = [a-z\222-\255] # [\247] $d = [0-9] -- digit $i = [$l $d _ '] -- identifier character -$u = [\0-\255] -- universal: any character +$u = [.\n] -- universal: any character @rsyms = -- symbols and non-identifier-like reserved words \; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ @@ -27,17 +33,20 @@ $u = [\0-\255] -- universal: any character "{-" ([$u # \-] | \- [$u # \}])* ("-")+ "}" ; $white+ ; -@rsyms { tok (eitherResIdent (T_Ident . identC . rawIdentC)) } -\' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (eitherResIdent (T_Ident . identC . rawIdentS . unescapeInitTail . BS.unpack)) } -(\_ | $l)($l | $d | \_ | \')* { tok (eitherResIdent (T_Ident . identC . rawIdentC)) } +@rsyms { tok (res (T_Ident . identS)) } +\' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (eitherResIdent (T_Ident . identC . rawIdentS . unescapeInitTail . unpack)) } +(\_ | $l)($l | $d | \_ | \')* { tok (res (T_Ident . identS)) } -\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . BS.unpack) } +\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . unpack) } -(\-)? $d+ { tok (T_Integer . read . BS.unpack) } -(\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . BS.unpack) } +(\-)? $d+ { tok (T_Integer . read . unpack) } +(\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . unpack) } { +--unpack = BS.unpack +unpack = id +tok :: (String->Token) -> Posn -> String -> Token tok f p s = f s data Token @@ -114,15 +123,17 @@ data Token | T_Double Double -- double precision float literals | T_Ident Ident | T_EOF +-- deriving Show -- debug -eitherResIdent :: (BS.ByteString -> Token) -> BS.ByteString -> Token +res = eitherResIdent +eitherResIdent :: (String -> Token) -> String -> Token eitherResIdent tv s = case Map.lookup s resWords of Just t -> t Nothing -> tv s isReservedWord :: BS.ByteString -> Bool -isReservedWord s = Map.member s resWords +isReservedWord s = Map.member (BS.unpack s) resWords resWords = Map.fromList [ b "!" T_exclmark @@ -194,7 +205,7 @@ resWords = Map.fromList , b "where" T_where , b "with" T_with ] - where b s t = (BS.pack s, t) + where b s t = (s, t) unescapeInitTail :: String -> String unescapeInitTail = unesc . tail where @@ -202,7 +213,7 @@ unescapeInitTail = unesc . tail where '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs '\\':'n':cs -> '\n' : unesc cs '\\':'t':cs -> '\t' : unesc cs - '"':[] -> [] + '\"':[] -> [] '\'':[] -> [] c:cs -> c : unesc cs _ -> [] @@ -219,13 +230,14 @@ alexMove :: Posn -> Char -> Posn alexMove (Pn l c) '\n' = Pn (l+1) 1 alexMove (Pn l c) _ = Pn l (c+1) -alexGetChar :: AlexInput -> Maybe (Char,AlexInput) -alexGetChar (AI p _ s) = - case BS.uncons s of +alexGetByte :: AlexInput -> Maybe (Word8,AlexInput) +alexGetByte (AI p _ s) = + case WBS.uncons s of Nothing -> Nothing - Just (c,s) -> + Just (w,s) -> let p' = alexMove p c - in p' `seq` Just (c, (AI p' c s)) + c = BS.w2c w + in p' `seq` Just (w, (AI p' c s)) alexInputPrevChar :: AlexInput -> Char alexInputPrevChar (AI p c s) = c @@ -260,12 +272,13 @@ failLoc pos msg = P $ \_ -> PFailed pos msg lexer :: (Token -> P a) -> P a lexer cont = P go where + --cont' t = trace (show t) (cont t) go inp@(AI pos _ str) = case alexScan inp 0 of AlexEOF -> unP (cont T_EOF) inp AlexError (AI pos _ _) -> PFailed pos "lexical error" - AlexSkip inp' len -> go inp' - AlexToken inp' len act -> unP (cont (act pos (BS.take len str))) inp' + AlexSkip inp' len -> {-trace (show len) $-} go inp' + AlexToken inp' len act -> unP (cont (act pos (UTF8.toString (UTF8.take len str)))) inp' getPosn :: P Posn getPosn = P $ \inp@(AI pos _ _) -> POk pos diff --git a/src/compiler/GF/Grammar/Parser.y b/src/compiler/GF/Grammar/Parser.y index bc2a394b1..028da18c6 100644 --- a/src/compiler/GF/Grammar/Parser.y +++ b/src/compiler/GF/Grammar/Parser.y @@ -17,8 +17,8 @@ import GF.Grammar.Grammar import GF.Grammar.Macros import GF.Grammar.Lexer import GF.Compile.Update (buildAnyTree) -import Codec.Binary.UTF8.String(decodeString) -import Data.Char(toLower) +--import Codec.Binary.UTF8.String(decodeString) +--import Data.Char(toLower) } %name pModDef ModDef @@ -616,9 +616,9 @@ happyError = fail "syntax error" -- Quick fix to render error messages from UTF-8-encoded source files correctly. optDecode opts = - if map toLower (flag optEncoding opts) `elem` ["utf8","utf-8"] + {-if map toLower (getEncoding opts) `elem` ["utf8","utf-8"] then decodeString - else id + else-} id mkListId,mkConsId,mkBaseId :: Ident -> Ident mkListId = prefixIdent "List" diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs index 115665419..08f0df18b 100644 --- a/src/compiler/GF/Infra/Option.hs +++ b/src/compiler/GF/Infra/Option.hs @@ -20,7 +20,7 @@ module GF.Infra.Option helpMessage, -- * Checking specific options flag, cfgTransform, haskellOption, readOutputFormat, - isLexicalCat, isLiteralCat, renameEncoding, + isLexicalCat, isLiteralCat, renameEncoding, getEncoding, defaultEncoding, -- * Setting specific options setOptimization, setCFGTransform, -- * Convenience methods for checking options @@ -157,7 +157,7 @@ data Flags = Flags { optRetainResource :: Bool, optName :: Maybe String, optPreprocessors :: [String], - optEncoding :: String, + optEncoding :: Maybe String, optPMCFG :: Bool, optOptimizations :: Set Optimization, optOptimizePGF :: Bool, @@ -213,7 +213,7 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o) -- | Pretty-print the options that are preserved in .gfo files. optionsGFO :: Options -> [(String,Literal)] optionsGFO opts = optionsPGF opts - ++ [("coding", LStr (flag optEncoding opts))] + ++ [("coding", LStr (getEncoding opts))] -- | Pretty-print the options that are preserved in .pgf files. optionsPGF :: Options -> [(String,Literal)] @@ -241,6 +241,10 @@ concatOptions = foldr addOptions noOptions modifyFlags :: (Flags -> Flags) -> Options modifyFlags = Options +getEncoding :: Options -> String +getEncoding = renameEncoding . maybe defaultEncoding id . flag optEncoding +defaultEncoding = "UTF-8" + -- Default options defaultFlags :: Flags @@ -264,7 +268,7 @@ defaultFlags = Flags { optName = Nothing, optPreprocessors = [], - optEncoding = "latin1", + optEncoding = Nothing, optPMCFG = True, optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize], optOptimizePGF = False, @@ -419,7 +423,7 @@ optDescr = addLibDir x = set $ \o -> o { optLibraryPath = x:optLibraryPath o } setLibPath x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x } preproc x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] } - coding x = set $ \o -> o { optEncoding = x } + coding x = set $ \o -> o { optEncoding = Just x } startcat x = set $ \o -> o { optStartCat = Just x } language x = set $ \o -> o { optSpeechLanguage = Just x } lexer x = set $ \o -> o { optLexer = Just x } diff --git a/src/compiler/GF/Text/Coding.hs b/src/compiler/GF/Text/Coding.hs index e347730e0..3669733d0 100644 --- a/src/compiler/GF/Text/Coding.hs +++ b/src/compiler/GF/Text/Coding.hs @@ -31,7 +31,7 @@ encodeUnicode enc s = (cbuf,bbuf) <- cod cbuf bbuf #endif if isEmptyBuffer bbuf - then ioe_invalidCharacter + then ioe_invalidCharacter1 else do let bs = PS (bufRaw bbuf) (bufL bbuf) (bufR bbuf-bufL bbuf) bss <- translate cod cbuf return (bs:bss) @@ -41,8 +41,9 @@ encodeUnicode enc s = w = bufR cbuf decodeUnicode :: TextEncoding -> ByteString -> String -decodeUnicode enc (PS fptr l len) = - unsafePerformIO $ do +decodeUnicode enc bs = unsafePerformIO $ decodeUnicodeIO enc bs + +decodeUnicodeIO enc (PS fptr l len) = do let bbuf = Buffer{bufRaw=fptr, bufState=ReadBuffer, bufSize=len, bufL=l, bufR=l+len} cbuf <- newCharBuffer 128 WriteBuffer case enc of @@ -59,7 +60,7 @@ decodeUnicode enc (PS fptr l len) = (bbuf,cbuf) <- cod bbuf cbuf #endif if isEmptyBuffer cbuf - then ioe_invalidCharacter + then ioe_invalidCharacter2 else unpack cod bbuf cbuf | otherwise = return [] where @@ -75,6 +76,10 @@ decodeUnicode enc (PS fptr l len) = i = bufL cbuf w = bufR cbuf -ioe_invalidCharacter = ioException +ioe_invalidCharacter1 = ioException (IOError Nothing InvalidArgument "" ("invalid byte sequence for this encoding") Nothing Nothing) + +ioe_invalidCharacter2 = ioException + (IOError Nothing InvalidArgument "" + ("invalid byte sequence for this decoding") Nothing Nothing) diff --git a/src/compiler/GFC.hs b/src/compiler/GFC.hs index dd9f1771b..99156e16d 100644 --- a/src/compiler/GFC.hs +++ b/src/compiler/GFC.hs @@ -24,7 +24,7 @@ import qualified Data.ByteString.Lazy as BSL import System.FilePath import System.IO import Control.Exception - +import Control.Monad(unless) mainGFC :: Options -> [FilePath] -> IO () mainGFC opts fs = do @@ -46,9 +46,8 @@ compileSourceFiles :: Options -> [FilePath] -> IOE () compileSourceFiles opts fs = do gr <- batchCompile opts fs let cnc = justModuleName (last fs) - if flag optStopAfterPhase opts == Compile - then return () - else do pgf <- link opts (identS cnc) gr + unless (flag optStopAfterPhase opts == Compile) $ + do pgf <- link opts (identS cnc) gr writePGF opts pgf writeByteCode opts pgf writeOutputs opts pgf @@ -57,11 +56,9 @@ compileCFFiles :: Options -> [FilePath] -> IOE () compileCFFiles opts fs = do s <- liftIO $ fmap unlines $ mapM readFile fs let cnc = justModuleName (last fs) - gf <- getCF cnc s - gr <- compileSourceGrammar opts gf - if flag optStopAfterPhase opts == Compile - then return () - else do pgf <- link opts (identS cnc) gr + gr <- compileSourceGrammar opts =<< getCF cnc s + unless (flag optStopAfterPhase opts == Compile) $ + do pgf <- link opts (identS cnc) gr writePGF opts pgf writeOutputs opts pgf