mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-13 13:12:51 -06:00
Also store Pre prefixes in token map. Introduce IntMapBuilder data structure.
Storing of prefixes uses show/read, which isn't a great solution but avoids having yet another token map.
This commit is contained in:
@@ -9,6 +9,7 @@ import qualified GF.Grammar.Canonical as C
|
||||
import GF.Compile.GrammarToCanonical (grammar2canonical)
|
||||
|
||||
import GF.Data.Operations (ErrorMonad (..))
|
||||
import qualified GF.Data.IntMapBuilder as IntMapBuilder
|
||||
import GF.Infra.Option (Options)
|
||||
import GF.Infra.UseIO (IOE)
|
||||
import GF.Text.Pretty (pp, render)
|
||||
@@ -24,7 +25,6 @@ import qualified Data.Map.Strict as Map
|
||||
import Data.Maybe (fromJust, isJust)
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Data.Tuple (swap)
|
||||
import System.Environment (lookupEnv)
|
||||
import System.FilePath ((</>), (<.>))
|
||||
import Text.Printf (printf)
|
||||
@@ -292,38 +292,36 @@ isRecordType _ = False
|
||||
|
||||
-- | Find all token strings, put them in a map and replace with token indexes
|
||||
extractStrings :: L.Concrete -> L.Concrete
|
||||
extractStrings concr = L.Concrete { L.toks = strMap, L.lins = lins'}
|
||||
extractStrings concr = L.Concrete { L.toks = toks', L.lins = lins' }
|
||||
where
|
||||
state = (
|
||||
L.toks concr, -- the IntMap we're building
|
||||
Map.fromList (map swap (IntMap.toList (L.toks concr))) -- a reversed map for lookups
|
||||
)
|
||||
(lins',(strMap,_)) = CMS.runState (go0 (L.lins concr)) state
|
||||
imb = IntMapBuilder.fromIntMap (L.toks concr)
|
||||
(lins',imb') = CMS.runState (go0 (L.lins concr)) imb
|
||||
toks' = IntMapBuilder.toIntMap imb'
|
||||
|
||||
go0 :: Map.Map CId L.LinFun -> CMS.State (IntMap.IntMap Text, Map.Map Text Int) (Map.Map CId L.LinFun)
|
||||
go0 :: Map.Map CId L.LinFun -> CMS.State (IntMapBuilder.IMB Text) (Map.Map CId L.LinFun)
|
||||
go0 mp = do
|
||||
xs <- mapM (\(cid,lin) -> go lin >>= \lin' -> return (cid,lin')) (Map.toList mp)
|
||||
return $ Map.fromList xs
|
||||
|
||||
go :: L.LinFun -> CMS.State (IntMap.IntMap Text, Map.Map Text Int) L.LinFun
|
||||
go :: L.LinFun -> CMS.State (IntMapBuilder.IMB Text) L.LinFun
|
||||
go lf = case lf of
|
||||
L.Token str -> do
|
||||
(intMap,revMap) <- CMS.get
|
||||
case Map.lookup str revMap of
|
||||
Just i -> return $ L.TokenIx i
|
||||
Nothing -> do
|
||||
-- add string to map and replace with index
|
||||
let i = IntMap.size intMap
|
||||
let intMap' = IntMap.insert i str intMap
|
||||
let revMap' = Map.insert str i revMap
|
||||
CMS.put (intMap',revMap')
|
||||
return $ L.TokenIx i
|
||||
imb <- CMS.get
|
||||
let (ix,imb') = IntMapBuilder.insert' str imb
|
||||
CMS.put imb'
|
||||
return $ L.TokenIx ix
|
||||
|
||||
-- TODO big savings can be made by putting prefixes in string map too
|
||||
L.Pre pts df -> do
|
||||
pts' <- mapM (\(pfxs,lv) -> go lv >>= \lv' -> return (pfxs,lv')) pts
|
||||
-- pts' <- mapM (\(pfxs,lv) -> go lv >>= \lv' -> return (pfxs,lv')) pts
|
||||
pts' <- forM pts $ \(pfxs,lv) -> do
|
||||
imb <- CMS.get
|
||||
let str = T.pack $ show pfxs
|
||||
let (ix,imb') = IntMapBuilder.insert' str imb
|
||||
CMS.put imb'
|
||||
lv' <- go lv
|
||||
return (ix,lv')
|
||||
df' <- go df
|
||||
return $ L.Pre pts' df'
|
||||
return $ L.PreIx pts' df'
|
||||
L.Concat s t -> do
|
||||
s' <- go s
|
||||
t' <- go t
|
||||
|
||||
57
src/compiler/GF/Data/IntMapBuilder.hs
Normal file
57
src/compiler/GF/Data/IntMapBuilder.hs
Normal file
@@ -0,0 +1,57 @@
|
||||
-- | In order to build an IntMap in one pass, we need a map data structure with
|
||||
-- fast lookup in both keys and values.
|
||||
-- This is achieved by keeping a separate reversed map of values to keys during building.
|
||||
module GF.Data.IntMapBuilder where
|
||||
|
||||
import Data.IntMap (IntMap)
|
||||
import qualified Data.IntMap as IntMap
|
||||
import Data.Hashable (Hashable)
|
||||
import Data.HashMap.Strict (HashMap)
|
||||
import qualified Data.HashMap.Strict as HashMap
|
||||
import Data.Tuple (swap)
|
||||
import Prelude hiding (lookup)
|
||||
|
||||
data IMB a = IMB {
|
||||
intMap :: IntMap a,
|
||||
valMap :: HashMap a Int
|
||||
}
|
||||
|
||||
-- | An empty IMB
|
||||
empty :: (Eq a, Hashable a) => IMB a
|
||||
empty = IMB {
|
||||
intMap = IntMap.empty,
|
||||
valMap = HashMap.empty
|
||||
}
|
||||
|
||||
-- | Lookup a value
|
||||
lookup :: (Eq a, Hashable a) => a -> IMB a -> Maybe Int
|
||||
lookup a IMB { valMap = vm } = HashMap.lookup a vm
|
||||
|
||||
-- | Insert without any lookup
|
||||
insert :: (Eq a, Hashable a) => a -> IMB a -> (Int, IMB a)
|
||||
insert a IMB { intMap = im, valMap = vm } =
|
||||
let
|
||||
ix = IntMap.size im
|
||||
im' = IntMap.insert ix a im
|
||||
vm' = HashMap.insert a ix vm
|
||||
imb' = IMB { intMap = im', valMap = vm' }
|
||||
in
|
||||
(ix, imb')
|
||||
|
||||
-- | Insert only when lookup fails
|
||||
insert' :: (Eq a, Hashable a) => a -> IMB a -> (Int, IMB a)
|
||||
insert' a imb =
|
||||
case lookup a imb of
|
||||
Just ix -> (ix, imb)
|
||||
Nothing -> insert a imb
|
||||
|
||||
-- | Build IMB from existing IntMap
|
||||
fromIntMap :: (Eq a, Hashable a) => IntMap a -> IMB a
|
||||
fromIntMap im = IMB {
|
||||
intMap = im,
|
||||
valMap = HashMap.fromList (map swap (IntMap.toList im))
|
||||
}
|
||||
|
||||
-- | Get IntMap from IMB
|
||||
toIntMap :: (Eq a, Hashable a) => IMB a -> IntMap a
|
||||
toIntMap = intMap
|
||||
Reference in New Issue
Block a user