GF source lexer: allow numeric character escapes in string literals

This makes the output from PGF.showExpr (and other Haskell code that uses
the Prelude.show function to show strings) parsable as GF source code in
more cases.

This is a workaround for the problem that GHC's implementation of the show
function uses numeric escapes for printable non-ASCII characters, e.g.
show "dålig" = "d\229lig"...
This commit is contained in:
hallgren
2015-09-29 12:18:35 +00:00
parent 35be182824
commit 1ccdd0d9fd

View File

@@ -16,6 +16,7 @@ import qualified Data.ByteString.Internal as BS(w2c)
import qualified Data.ByteString.UTF8 as UTF8
import qualified Data.Map as Map
import Data.Word(Word8)
import Data.Char(readLitChar)
--import Debug.Trace(trace)
}
@@ -39,7 +40,7 @@ $white+ ;
\' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (T_Ident . identS . unescapeInitTail . unpack) }
(\_ | $l)($l | $d | \_ | \')* { tok ident }
\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . unpack) }
\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t | $d+)))* \" { tok (T_String . unescapeInitTail . unpack) }
(\-)? $d+ { tok (T_Integer . read . unpack) }
(\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . unpack) }
@@ -217,13 +218,11 @@ resWords = Map.fromList
unescapeInitTail :: String -> String
unescapeInitTail = unesc . tail where
unesc s = case s of
'\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
'\\':'n':cs -> '\n' : unesc cs
'\\':'t':cs -> '\t' : unesc cs
[] -> []
'\"':[] -> []
'\'':[] -> []
c:cs -> c : unesc cs
_ -> []
_ -> case readLitChar s of
[(c,cs)] -> c:unesc cs
-------------------------------------------------------------------
-- Alex wrapper code.