From 1ccdd0d9fdea3a4f457101ba205c37f28abb8e70 Mon Sep 17 00:00:00 2001 From: hallgren Date: Tue, 29 Sep 2015 12:18:35 +0000 Subject: [PATCH] GF source lexer: allow numeric character escapes in string literals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes the output from PGF.showExpr (and other Haskell code that uses the Prelude.show function to show strings) parsable as GF source code in more cases. This is a workaround for the problem that GHC's implementation of the show function uses numeric escapes for printable non-ASCII characters, e.g. show "dålig" = "d\229lig"... --- src/compiler/GF/Grammar/Lexer.x | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/compiler/GF/Grammar/Lexer.x b/src/compiler/GF/Grammar/Lexer.x index c2cbb4c47..f073bcdfc 100644 --- a/src/compiler/GF/Grammar/Lexer.x +++ b/src/compiler/GF/Grammar/Lexer.x @@ -16,6 +16,7 @@ import qualified Data.ByteString.Internal as BS(w2c) import qualified Data.ByteString.UTF8 as UTF8 import qualified Data.Map as Map import Data.Word(Word8) +import Data.Char(readLitChar) --import Debug.Trace(trace) } @@ -39,7 +40,7 @@ $white+ ; \' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (T_Ident . identS . unescapeInitTail . unpack) } (\_ | $l)($l | $d | \_ | \')* { tok ident } -\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . unpack) } +\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t | $d+)))* \" { tok (T_String . unescapeInitTail . unpack) } (\-)? $d+ { tok (T_Integer . read . unpack) } (\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . unpack) } @@ -217,13 +218,11 @@ resWords = Map.fromList unescapeInitTail :: String -> String unescapeInitTail = unesc . tail where unesc s = case s of - '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs - '\\':'n':cs -> '\n' : unesc cs - '\\':'t':cs -> '\t' : unesc cs + [] -> [] '\"':[] -> [] '\'':[] -> [] - c:cs -> c : unesc cs - _ -> [] + _ -> case readLitChar s of + [(c,cs)] -> c:unesc cs ------------------------------------------------------------------- -- Alex wrapper code.