From 832f25fc2ac19c58476f55ebdc7001945104f023 Mon Sep 17 00:00:00 2001 From: bjorn Date: Wed, 26 Nov 2008 16:19:54 +0000 Subject: [PATCH] Don't use string sharing in LexGF. Profiling showed that when loading a large .gfo file, shareString was responsible for 15-18% of the CPU time, and a lot of the allocation. Since we already use ByteStrings for reading the source files, shareString mostly has the effect of creating lots of small ByteStrings instead of one large one. Since the plain size of the .gfo is seldom a problem (unlike when it was read as a String), it is ok to keep the whole file as one ByteString in RAM, and have all tokens point into that. Profiling after the change showed 15-20% reduction in CPU time and in total allocation. --- src/GF/Source/LexGF.hs | 4 ++-- src/GF/Source/LexGF.x | 4 ++-- src/GF/Source/Makefile | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/GF/Source/LexGF.hs b/src/GF/Source/LexGF.hs index 316da3afd..c45e03f78 100644 --- a/src/GF/Source/LexGF.hs +++ b/src/GF/Source/LexGF.hs @@ -4,7 +4,7 @@ {-# OPTIONS -fno-warn-incomplete-patterns #-} module GF.Source.LexGF where -import GF.Source.SharedString + import qualified Data.ByteString.Char8 as BS #if __GLASGOW_HASKELL__ >= 603 @@ -44,7 +44,7 @@ alex_accept = listArray (0::Int,34) [[],[],[(AlexAccSkip)],[(AlexAccSkip)],[],[( tok f p s = f p s share :: BS.ByteString -> BS.ByteString -share = shareString +share = id data Tok = TS !BS.ByteString !Int -- reserved words and symbols diff --git a/src/GF/Source/LexGF.x b/src/GF/Source/LexGF.x index 46419df74..3579d0cd8 100644 --- a/src/GF/Source/LexGF.x +++ b/src/GF/Source/LexGF.x @@ -4,7 +4,7 @@ {-# OPTIONS -fno-warn-incomplete-patterns #-} module GF.Source.LexGF where -import GF.Source.SharedString + import qualified Data.ByteString.Char8 as BS } @@ -39,7 +39,7 @@ $d+ \. $d+ (e (\-)? $d+)? { tok (\p s -> PT p (TD $ share s)) } tok f p s = f p s share :: BS.ByteString -> BS.ByteString -share = shareString +share = id data Tok = TS !BS.ByteString !Int -- reserved words and symbols diff --git a/src/GF/Source/Makefile b/src/GF/Source/Makefile index a07f78ba5..082efa5ef 100644 --- a/src/GF/Source/Makefile +++ b/src/GF/Source/Makefile @@ -1,5 +1,5 @@ all: - cd ../.. && bnfc -p GF.Source -bytestrings -sharestrings GF/Source/GF.cf + cd ../.. && bnfc -p GF.Source -bytestrings GF/Source/GF.cf rm ErrM.hs perl -i -pe 's/%name pModHeader ModHeader/%partial pModHeader ModHeader/' ParGF.y perl -i -pe 's/GF.Source.ErrM/GF.Data.ErrM/' *.hs *.x *.y