From 021b5f06d3900fe2b10d5d3ccf6ac286a779ef16 Mon Sep 17 00:00:00 2001 From: hallgren Date: Thu, 19 Sep 2013 20:48:10 +0000 Subject: [PATCH] Introduce type RawIdent; only 9 imports of Data.ByteString.Char8 remain The fact that identifiers are represented as ByteStrings is now an internal implentation detail in module GF.Infra.Ident. Conversion between ByteString and identifiers is only needed in the lexer and the Binary instances. --- src/compiler/GF/Command/Importing.hs | 5 +-- src/compiler/GF/Compile.hs | 3 +- src/compiler/GF/Compile/GrammarToPGF.hs | 4 +- src/compiler/GF/Compile/SubExOpt.hs | 9 ++-- src/compiler/GF/Grammar/Binary.hs | 5 ++- src/compiler/GF/Grammar/Grammar.hs | 14 +++--- src/compiler/GF/Grammar/Lexer.hs | 10 ++--- src/compiler/GF/Grammar/Lockfield.hs | 8 ++-- src/compiler/GF/Grammar/MMacros.hs | 11 +++-- src/compiler/GF/Grammar/Parser.y | 4 +- src/compiler/GF/Grammar/lexer/Lexer.x | 4 +- src/compiler/GF/Infra/Dependencies.hs | 2 +- src/compiler/GF/Infra/Ident.hs | 57 ++++++++++++++++--------- src/compiler/GFI.hs | 2 +- 14 files changed, 74 insertions(+), 64 deletions(-) diff --git a/src/compiler/GF/Command/Importing.hs b/src/compiler/GF/Command/Importing.hs index d8b7f0e0c..ce06156e4 100644 --- a/src/compiler/GF/Command/Importing.hs +++ b/src/compiler/GF/Command/Importing.hs @@ -5,7 +5,7 @@ import PGF.Data import GF.Compile import GF.Compile.Multi (readMulti) -import GF.Grammar (identC, SourceGrammar) -- for cc command +import GF.Grammar (identS, SourceGrammar) -- for cc command import GF.Grammar.CF import GF.Grammar.EBNF import GF.Infra.UseIO @@ -13,7 +13,6 @@ import GF.Infra.Option import GF.Data.ErrM import Data.List (nubBy) -import qualified Data.ByteString.Char8 as BS import System.FilePath -- import a grammar in an environment where it extends an existing grammar @@ -59,7 +58,7 @@ importCF opts files get = do Ok gf -> return gf Bad s -> error s ---- Ok gr <- appIOE $ compileSourceGrammar opts gf - epgf <- appIOE $ link opts (identC (BS.pack (justModuleName (last files) ++ "Abs"))) gr + epgf <- appIOE $ link opts (identS (justModuleName (last files) ++ "Abs")) gr case epgf of Ok pgf -> return pgf Bad s -> error s ---- diff --git a/src/compiler/GF/Compile.hs b/src/compiler/GF/Compile.hs index 67fd750a2..e572920df 100644 --- a/src/compiler/GF/Compile.hs +++ b/src/compiler/GF/Compile.hs @@ -35,7 +35,6 @@ import qualified Data.Map as Map --import qualified Data.Set as Set import Data.List(nub) import Data.Maybe (isNothing) -import qualified Data.ByteString.Char8 as BS import Text.PrettyPrint import PGF.CId @@ -50,7 +49,7 @@ compileToPGF :: Options -> [FilePath] -> IOE PGF compileToPGF opts fs = do gr <- batchCompile opts fs let name = justModuleName (last fs) - link opts (identC (BS.pack name)) gr + link opts (identS name) gr link :: Options -> Ident -> SourceGrammar -> IOE PGF link opts cnc gr = do diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs index 5ec7f9774..b1a2c5d33 100644 --- a/src/compiler/GF/Compile/GrammarToPGF.hs +++ b/src/compiler/GF/Compile/GrammarToPGF.hs @@ -30,7 +30,6 @@ import Data.Char (isDigit,isSpace) import qualified Data.Set as Set import qualified Data.Map as Map import qualified Data.IntMap as IntMap -import qualified Data.ByteString.Char8 as BS import Data.Array.IArray import Text.PrettyPrint import Control.Monad.Identity @@ -250,8 +249,7 @@ genCncFuns gr am cm seqs0 cdefs fid_cnt cnccats = ctxt = mapM (mkCtxt lindefs) hargs_C fids = map (mkFId arg_C) fid0s - mkLinDefId id = - identC (BS.append (BS.pack "lindef ") (ident2bs id)) + mkLinDefId id = prefixIdent "lindef " id toLinDef res offs lindefs (Production fid0 funid0 _) = IntMap.insertWith (++) fid [offs+funid0] lindefs diff --git a/src/compiler/GF/Compile/SubExOpt.hs b/src/compiler/GF/Compile/SubExOpt.hs index bfa2a1334..4c056f479 100644 --- a/src/compiler/GF/Compile/SubExOpt.hs +++ b/src/compiler/GF/Compile/SubExOpt.hs @@ -32,8 +32,7 @@ import GF.Data.Operations import Control.Monad import Data.Map (Map) import qualified Data.Map as Map -import qualified Data.ByteString.Char8 as BS -import Data.List + subexpModule :: SourceModule -> SourceModule subexpModule (n,mo) = errVal (n,mo) $ do @@ -133,9 +132,9 @@ collectSubterms mo t = case t of return t --- only because of composOp operIdent :: Int -> Ident -operIdent i = identC (operPrefix `BS.append` (BS.pack (show i))) --- +operIdent i = identC (operPrefix `prefixRawIdent` (rawIdentS (show i))) --- isOperIdent :: Ident -> Bool -isOperIdent id = BS.isPrefixOf operPrefix (ident2bs id) +isOperIdent id = isPrefixOf operPrefix (ident2raw id) -operPrefix = BS.pack ("A''") +operPrefix = rawIdentS ("A''") diff --git a/src/compiler/GF/Grammar/Binary.hs b/src/compiler/GF/Grammar/Binary.hs index ae0c72809..fab63a7ba 100644 --- a/src/compiler/GF/Grammar/Binary.hs +++ b/src/compiler/GF/Grammar/Binary.hs @@ -34,7 +34,7 @@ instance Binary Ident where get = do bs <- get if bs == BS.pack "_" then return identW - else return (identC bs) + else return (identC (rawIdentC bs)) instance Binary SourceGrammar where put = put . modules @@ -289,6 +289,9 @@ instance Binary Label where 1 -> fmap LVar get _ -> decodingError +instance Binary RawIdent where + put = put . rawId2bs + get = fmap rawIdentC get putGFOVersion = mapM_ (putWord8 . fromIntegral . ord) gfoVersion getGFOVersion = replicateM (length gfoVersion) (fmap (chr . fromIntegral) getWord8) diff --git a/src/compiler/GF/Grammar/Grammar.hs b/src/compiler/GF/Grammar/Grammar.hs index c59cd809e..2efec220b 100644 --- a/src/compiler/GF/Grammar/Grammar.hs +++ b/src/compiler/GF/Grammar/Grammar.hs @@ -74,7 +74,6 @@ import Data.Array.Unboxed import qualified Data.Map as Map import qualified Data.Set as Set import qualified Data.IntMap as IntMap -import qualified Data.ByteString.Char8 as BS import Text.PrettyPrint import System.FilePath import Control.Monad.Identity @@ -472,7 +471,7 @@ data TInfo = -- | record label data Label = - LIdent BS.ByteString + LIdent RawIdent | LVar Int deriving (Show, Eq, Ord) @@ -497,16 +496,15 @@ varLabel :: Int -> Label varLabel = LVar tupleLabel, linLabel :: Int -> Label -tupleLabel i = LIdent $! BS.pack ('p':show i) -linLabel i = LIdent $! BS.pack ('s':show i) +tupleLabel i = LIdent $! rawIdentS ('p':show i) +linLabel i = LIdent $! rawIdentS ('s':show i) theLinLabel :: Label -theLinLabel = LIdent (BS.singleton 's') +theLinLabel = LIdent (rawIdentS "s") ident2label :: Ident -> Label -ident2label c = LIdent (ident2bs c) +ident2label c = LIdent (ident2raw c) label2ident :: Label -> Ident label2ident (LIdent s) = identC s -label2ident (LVar i) = identC (BS.pack ('$':show i)) - +label2ident (LVar i) = identS ('$':show i) diff --git a/src/compiler/GF/Grammar/Lexer.hs b/src/compiler/GF/Grammar/Lexer.hs index 3ab9eda61..a9fef2cc4 100644 --- a/src/compiler/GF/Grammar/Lexer.hs +++ b/src/compiler/GF/Grammar/Lexer.hs @@ -278,12 +278,12 @@ getPosn :: P Posn getPosn = P $ \inp@(AI pos _ _) -> POk pos -alex_action_3 = tok (eitherResIdent (T_Ident . identC)) +alex_action_3 = tok (eitherResIdent (T_Ident . identC . rawIdentC)) alex_action_4 = tok (eitherResIdent (T_LString . BS.unpack)) -alex_action_5 = tok (eitherResIdent (T_Ident . identC)) -alex_action_6 = tok (T_String . unescapeInitTail . BS.unpack) -alex_action_7 = tok (T_Integer . read . BS.unpack) -alex_action_8 = tok (T_Double . read . BS.unpack) +alex_action_5 = tok (eitherResIdent (T_Ident . identC . rawIdentC)) +alex_action_6 = tok (T_String . unescapeInitTail . BS.unpack) +alex_action_7 = tok (T_Integer . read . BS.unpack) +alex_action_8 = tok (T_Double . read . BS.unpack) {-# LINE 1 "templates/GenericTemplate.hs" #-} {-# LINE 1 "templates/GenericTemplate.hs" #-} {-# LINE 1 "" #-} diff --git a/src/compiler/GF/Grammar/Lockfield.hs b/src/compiler/GF/Grammar/Lockfield.hs index 8b0798527..5c2f5d0f0 100644 --- a/src/compiler/GF/Grammar/Lockfield.hs +++ b/src/compiler/GF/Grammar/Lockfield.hs @@ -16,8 +16,6 @@ module GF.Grammar.Lockfield (lockRecType, unlockRecord, lockLabel, isLockLabel) where -import qualified Data.ByteString.Char8 as BS - import GF.Infra.Ident import GF.Grammar.Grammar import GF.Grammar.Macros @@ -41,12 +39,12 @@ unlockRecord c ft = do _ -> return $ mkAbs xs (ExtR t lock) lockLabel :: Ident -> Label -lockLabel c = LIdent $! BS.append lockPrefix (ident2bs c) +lockLabel c = LIdent $! prefixRawIdent lockPrefix (ident2raw c) isLockLabel :: Label -> Bool isLockLabel l = case l of - LIdent c -> BS.isPrefixOf lockPrefix c + LIdent c -> isPrefixOf lockPrefix c _ -> False -lockPrefix = BS.pack "lock_" +lockPrefix = rawIdentS "lock_" diff --git a/src/compiler/GF/Grammar/MMacros.hs b/src/compiler/GF/Grammar/MMacros.hs index f1d2f172a..1b9060003 100644 --- a/src/compiler/GF/Grammar/MMacros.hs +++ b/src/compiler/GF/Grammar/MMacros.hs @@ -26,7 +26,6 @@ import GF.Grammar.Values import GF.Grammar.Macros import Control.Monad -import qualified Data.ByteString.Char8 as BS import Text.PrettyPrint {- @@ -238,11 +237,11 @@ qualifTerm m = qualif [] where Cn c -> Q (m,c) Con c -> QC (m,c) _ -> composSafeOp (qualif xs) t - chV x = string2var $ ident2bs x + chV x = string2var $ ident2raw x -string2var :: BS.ByteString -> Ident -string2var s = case BS.unpack s of - c:'_':i -> identV (BS.singleton c) (readIntArg i) --- +string2var :: RawIdent -> Ident +string2var s = case showRawIdent s of + c:'_':i -> identV (rawIdentS [c]) (readIntArg i) --- _ -> identC s -- | reindex variables so that they tell nesting depth level @@ -254,7 +253,7 @@ reindexTerm = qualif (0,[]) where Vr x -> Vr $ look x g _ -> composSafeOp (qualif dg) t look x = maybe x id . lookup x --- if x is not in scope it is unchanged - ind x d = identC $ ident2bs x `BS.append` BS.singleton '_' `BS.append` BS.pack (show d) + ind x d = identC $ ident2raw x `prefixRawIdent` rawIdentS "_" `prefixRawIdent` rawIdentS (show d) {- -- this method works for context-free abstract syntax diff --git a/src/compiler/GF/Grammar/Parser.y b/src/compiler/GF/Grammar/Parser.y index a84db6ffd..e5a7f359c 100644 --- a/src/compiler/GF/Grammar/Parser.y +++ b/src/compiler/GF/Grammar/Parser.y @@ -507,11 +507,11 @@ Patt3 PattAss :: { [(Label,Patt)] } PattAss - : ListIdent '=' Patt { [(LIdent (ident2bs i),$3) | i <- $1] } + : ListIdent '=' Patt { [(LIdent (ident2raw i),$3) | i <- $1] } Label :: { Label } Label - : Ident { LIdent (ident2bs $1) } + : Ident { LIdent (ident2raw $1) } | '$' Integer { LVar (fromIntegral $2) } Sort :: { Ident } diff --git a/src/compiler/GF/Grammar/lexer/Lexer.x b/src/compiler/GF/Grammar/lexer/Lexer.x index ca796808b..4050f4854 100644 --- a/src/compiler/GF/Grammar/lexer/Lexer.x +++ b/src/compiler/GF/Grammar/lexer/Lexer.x @@ -30,9 +30,9 @@ $u = [\0-\255] -- universal: any character "{-" ([$u # \-] | \- [$u # \}])* ("-")+ "}" ; $white+ ; -@rsyms { tok (eitherResIdent (T_Ident . identC)) } +@rsyms { tok (eitherResIdent (T_Ident . identC . rawIdentC)) } \' ($u # \')* \' { tok (eitherResIdent (T_LString . BS.unpack)) } -(\_ | $l)($l | $d | \_ | \')* { tok (eitherResIdent (T_Ident . identC)) } +(\_ | $l)($l | $d | \_ | \')* { tok (eitherResIdent (T_Ident . identC . rawIdentC)) } \" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . BS.unpack) } diff --git a/src/compiler/GF/Infra/Dependencies.hs b/src/compiler/GF/Infra/Dependencies.hs index d90cbbae6..8c3d6666f 100644 --- a/src/compiler/GF/Infra/Dependencies.hs +++ b/src/compiler/GF/Infra/Dependencies.hs @@ -3,7 +3,7 @@ module GF.Infra.Dependencies ( ) where import GF.Grammar.Grammar -import GF.Infra.Ident +import GF.Infra.Ident(Ident,showIdent) import Data.List (nub,isPrefixOf) diff --git a/src/compiler/GF/Infra/Ident.hs b/src/compiler/GF/Infra/Ident.hs index bb26ea98c..102ceedd3 100644 --- a/src/compiler/GF/Infra/Ident.hs +++ b/src/compiler/GF/Infra/Ident.hs @@ -17,6 +17,9 @@ module GF.Infra.Ident (-- * Identifiers identS, identC, identV, identA, identAV, identW, argIdent, isArgIdent, getArgIndex, varStr, varX, isWildIdent, varIndex, + -- * Raw Identifiers + RawIdent, rawIdentS, rawIdentC, ident2raw, prefixRawIdent, + isPrefixOf, showRawIdent, rawId2bs, -- * refreshing identifiers IdState, initIdStateN, initIdState, lookVar, refVar, refVarPlus @@ -31,25 +34,37 @@ import Text.PrettyPrint -- | the constructors labelled /INTERNAL/ are -- internal representation never returned by the parser data Ident = - IC {-# UNPACK #-} !BS.ByteString -- ^ raw identifier after parsing, resolved in Rename - | IW -- ^ wildcard + IC {-# UNPACK #-} !RawIdent -- ^ raw identifier after parsing, resolved in Rename + | IW -- ^ wildcard -- -- below this constructor: internal representation never returned by the parser - | IV {-# UNPACK #-} !BS.ByteString {-# UNPACK #-} !Int -- ^ /INTERNAL/ variable - | IA {-# UNPACK #-} !BS.ByteString {-# UNPACK #-} !Int -- ^ /INTERNAL/ argument of cat at position - | IAV {-# UNPACK #-} !BS.ByteString {-# UNPACK #-} !Int {-# UNPACK #-} !Int -- ^ /INTERNAL/ argument of cat with bindings at position + | IV {-# UNPACK #-} !RawIdent {-# UNPACK #-} !Int -- ^ /INTERNAL/ variable + | IA {-# UNPACK #-} !RawIdent {-# UNPACK #-} !Int -- ^ /INTERNAL/ argument of cat at position + | IAV {-# UNPACK #-} !RawIdent {-# UNPACK #-} !Int {-# UNPACK #-} !Int -- ^ /INTERNAL/ argument of cat with bindings at position -- deriving (Eq, Ord, Show, Read) +newtype RawIdent = Id { rawId2bs :: BS.ByteString } + deriving (Eq, Ord, Show, Read) + +rawIdentS = Id . BS.pack +rawIdentC = Id +showRawIdent = BS.unpack . rawId2bs + +prefixRawIdent (Id x) (Id y) = Id (BS.append x y) +isPrefixOf (Id x) (Id y) = BS.isPrefixOf x y + ident2bs :: Ident -> BS.ByteString ident2bs i = case i of - IC s -> s - IV s n -> BS.append s (BS.pack ('_':show n)) - IA s j -> BS.append s (BS.pack ('_':show j)) - IAV s b j -> BS.append s (BS.pack ('_':show b ++ '_':show j)) + IC (Id s) -> s + IV (Id s) n -> BS.append s (BS.pack ('_':show n)) + IA (Id s) j -> BS.append s (BS.pack ('_':show j)) + IAV (Id s) b j -> BS.append s (BS.pack ('_':show b ++ '_':show j)) IW -> BS.pack "_" +ident2raw = Id . ident2bs + showIdent :: Ident -> String showIdent i = BS.unpack $! ident2bs i @@ -57,19 +72,19 @@ ppIdent :: Ident -> Doc ppIdent = text . showIdent identS :: String -> Ident -identS = identC . BS.pack +identS = identC . rawIdentS -identC :: BS.ByteString -> Ident -identV :: BS.ByteString -> Int -> Ident -identA :: BS.ByteString -> Int -> Ident -identAV:: BS.ByteString -> Int -> Int -> Ident +identC :: RawIdent -> Ident +identV :: RawIdent -> Int -> Ident +identA :: RawIdent -> Int -> Ident +identAV:: RawIdent -> Int -> Int -> Ident identW :: Ident (identC, identV, identA, identAV, identW) = (IC, IV, IA, IAV, IW) prefixIdent :: String -> Ident -> Ident -prefixIdent pref = identC . BS.append (BS.pack pref) . ident2bs +prefixIdent pref = identC . Id . BS.append (BS.pack pref) . ident2bs -- normal identifier -- ident s = IC s @@ -85,24 +100,26 @@ isArgIdent _ = False getArgIndex (IA _ i) = Just i getArgIndex (IAV _ _ i) = Just i -getArgIndex (IC s) +getArgIndex (IC (Id s)) | isDigit (BS.last s) = (Just . read . BS.unpack . snd . BS.spanEnd isDigit) s getArgIndex x = Nothing -- | used in lin defaults varStr :: Ident -varStr = identA (BS.pack "str") 0 +varStr = identA (rawIdentS "str") 0 -- | refreshing variables varX :: Int -> Ident -varX = identV (BS.pack "x") +varX = identV (rawIdentS "x") isWildIdent :: Ident -> Bool isWildIdent x = case x of IW -> True - IC s | s == BS.pack "_" -> True + IC s | s == wild -> True _ -> False +wild = Id (BS.pack "_") + varIndex :: Ident -> Int varIndex (IV _ n) = n varIndex _ = -1 --- other than IV should not count @@ -129,7 +146,7 @@ refVar :: Ident -> STM IdState Ident ----refVar IW = return IW --- no update of wildcard refVar x = do (_,m) <- readSTM - let x' = IV (ident2bs x) m + let x' = IV (ident2raw x) m updateSTM (\(sys,mx) -> ((x, x'):sys, mx + 1)) return x' diff --git a/src/compiler/GFI.hs b/src/compiler/GFI.hs index bd67f29bc..7c53ed8d8 100644 --- a/src/compiler/GFI.hs +++ b/src/compiler/GFI.hs @@ -10,7 +10,7 @@ import GF.Command.Abstract import GF.Command.Parse(readCommandLine,pCommand) import GF.Data.ErrM import GF.Data.Operations (chunks,err) -import GF.Grammar hiding (Ident) +import GF.Grammar hiding (Ident,isPrefixOf) import GF.Grammar.Analyse import GF.Grammar.Parser (runP, pExp) import GF.Grammar.Printer (ppGrammar, ppModule)