diff --git a/src/GF/Canon/LexGFC.hs b/src/GF/Canon/LexGFC.hs index d27e31657..56376894b 100644 --- a/src/GF/Canon/LexGFC.hs +++ b/src/GF/Canon/LexGFC.hs @@ -1,7 +1,7 @@ {-# OPTIONS -fglasgow-exts -cpp #-} {-# LINE 3 "LexGFC.x" #-} module LexGFC where - +import SharedString import ErrM #if __GLASGOW_HASKELL__ >= 503 @@ -35,10 +35,10 @@ alex_accept = listArray (0::Int,14) [[],[],[(AlexAccSkip)],[(AlexAcc (alex_actio tok f p s = f p s data Tok = - TS String -- reserved words - | TL String -- string literals + TS !String -- reserved words + | TL !String -- string literals | TI String -- integer literals - | TV String -- identifiers + | TV !String -- identifiers -- H | TD String -- double precision float literals | TC String -- character literals @@ -130,9 +130,9 @@ alexGetChar (p, _, (c:s)) = alexInputPrevChar :: AlexInput -> Char alexInputPrevChar (p, c, s) = c -alex_action_1 = tok (\p s -> PT p (TS s)) -alex_action_2 = tok (\p s -> PT p (eitherResIdent TV s)) -alex_action_3 = tok (\p s -> PT p (TL $ unescapeInitTail s)) +alex_action_1 = tok (\p s -> PT p (TS (shareString s))) +alex_action_2 = tok (\p s -> PT p (eitherResIdent (TV . shareString) s)) +alex_action_3 = tok (\p s -> PT p (TL $ shareString $ unescapeInitTail s)) alex_action_4 = tok (\p s -> PT p (TI s)) {-# LINE 1 "GenericTemplate.hs" #-} {-# LINE 1 "" #-} diff --git a/src/GF/Canon/LexGFC.x b/src/GF/Canon/LexGFC.x new file mode 100644 index 000000000..c9697c270 --- /dev/null +++ b/src/GF/Canon/LexGFC.x @@ -0,0 +1,131 @@ +-- -*- haskell -*- +-- This Alex file was machine-generated by the BNF converter +-- Lines with -- H have been hacked for greater performance +{ +module LexGFC where +import SharedString -- H +import ErrM +} + + +$l = [a-zA-Z\192 - \255] # [\215 \247] -- isolatin1 letter FIXME +$c = [A-Z\192-\221] # [\215] -- capital isolatin1 letter FIXME +$s = [a-z\222-\255] # [\247] -- small isolatin1 letter FIXME +$d = [0-9] -- digit +$i = [$l $d _ '] -- identifier character +$u = [\0-\255] -- universal: any character + +@rsyms = -- reserved words consisting of special symbols + \; | \= | \{ | \} | \: | \- \> | \* \* | \[ | \] | \\ | \. | \( | \) | \_ | \< | \> | \$ | \? | \= \> | \! | \+ \+ | \/ | \@ | \+ | \| | \, + +:- + +$white+ ; +@rsyms { tok (\p s -> PT p (TS (shareString s))) } -- H + +$l $i* { tok (\p s -> PT p (eitherResIdent (TV . shareString) s)) } -- H +\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \"{ tok (\p s -> PT p (TL $ shareString $ unescapeInitTail s)) } -- H + +$d+ { tok (\p s -> PT p (TI s)) } + + +{ + +tok f p s = f p s + +data Tok = + TS !String -- reserved words -- H + | TL !String -- string literals -- H + | TI String -- integer literals + | TV !String -- identifiers -- H + | TD String -- double precision float literals + | TC String -- character literals + + deriving (Eq,Show,Ord) + +data Token = + PT Posn Tok + | Err Posn + deriving (Eq,Show,Ord) + +tokenPos (PT (Pn _ l _) _ :_) = "line " ++ show l +tokenPos (Err (Pn _ l _) :_) = "line " ++ show l +tokenPos _ = "end of file" + +posLineCol (Pn _ l c) = (l,c) +mkPosToken t@(PT p _) = (posLineCol p, prToken t) + +prToken t = case t of + PT _ (TS s) -> s + PT _ (TI s) -> s + PT _ (TV s) -> s + PT _ (TD s) -> s + PT _ (TC s) -> s + + _ -> show t + +eitherResIdent :: (String -> Tok) -> String -> Tok +eitherResIdent tv s = if isResWord s then (TS s) else (tv s) where + isResWord s = isInTree s $ + B "lin" (B "concrete" (B "Type" (B "Str" (B "Ints" N N) N) (B "cat" (B "abstract" N N) N)) (B "fun" (B "flags" (B "data" N N) N) (B "in" (B "grammar" N N) N))) (B "pre" (B "open" (B "of" (B "lincat" N N) N) (B "param" (B "oper" N N) N)) (B "transfer" (B "table" (B "resource" N N) N) (B "variants" N N))) + +data BTree = N | B String BTree BTree deriving (Show) + +isInTree :: String -> BTree -> Bool +isInTree x tree = case tree of + N -> False + B a left right + | x < a -> isInTree x left + | x > a -> isInTree x right + | x == a -> True + +unescapeInitTail :: String -> String +unescapeInitTail = unesc . tail where + unesc s = case s of + '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs + '\\':'n':cs -> '\n' : unesc cs + '\\':'t':cs -> '\t' : unesc cs + '"':[] -> [] + c:cs -> c : unesc cs + _ -> [] + +------------------------------------------------------------------- +-- Alex wrapper code. +-- A modified "posn" wrapper. +------------------------------------------------------------------- + +data Posn = Pn !Int !Int !Int + deriving (Eq, Show,Ord) + +alexStartPos :: Posn +alexStartPos = Pn 0 1 1 + +alexMove :: Posn -> Char -> Posn +alexMove (Pn a l c) '\t' = Pn (a+1) l (((c+7) `div` 8)*8+1) +alexMove (Pn a l c) '\n' = Pn (a+1) (l+1) 1 +alexMove (Pn a l c) _ = Pn (a+1) l (c+1) + +type AlexInput = (Posn, -- current position, + Char, -- previous char + String) -- current input string + +tokens :: String -> [Token] +tokens str = go (alexStartPos, '\n', str) + where + go :: (Posn, Char, String) -> [Token] + go inp@(pos, _, str) = + case alexScan inp 0 of + AlexEOF -> [] + AlexError (pos, _, _) -> fail $ show pos ++ ": lexical error" + AlexSkip inp' len -> go inp' + AlexToken inp' len act -> act pos (take len str) : (go inp') + +alexGetChar :: AlexInput -> Maybe (Char,AlexInput) +alexGetChar (p, c, []) = Nothing +alexGetChar (p, _, (c:s)) = + let p' = alexMove p c + in p' `seq` Just (c, (p', c, s)) + +alexInputPrevChar :: AlexInput -> Char +alexInputPrevChar (p, c, s) = c +} diff --git a/src/GF/Data/SharedString.hs b/src/GF/Data/SharedString.hs new file mode 100644 index 000000000..f03f6f469 --- /dev/null +++ b/src/GF/Data/SharedString.hs @@ -0,0 +1,18 @@ +module SharedString (shareString) where + +import Data.HashTable as H +import System.IO.Unsafe (unsafePerformIO) + +{-# NOINLINE stringPool #-} +stringPool :: HashTable String String +stringPool = unsafePerformIO $ new (==) hashString + +{-# NOINLINE shareString #-} +shareString :: String -> String +shareString s = unsafePerformIO $ do + mv <- H.lookup stringPool s + case mv of + Just s' -> return s' + Nothing -> do + H.insert stringPool s s + return s