forked from GitHub/gf-core
274 lines
6.4 KiB
Plaintext
274 lines
6.4 KiB
Plaintext
{
|
|
module GF.Grammar.Lexer
|
|
( Token(..), Posn(..)
|
|
, P, runP, lexer, getPosn, failLoc
|
|
, isReservedWord
|
|
) where
|
|
|
|
import GF.Infra.Ident
|
|
import qualified Data.ByteString.Char8 as BS
|
|
import qualified Data.Map as Map
|
|
|
|
}
|
|
|
|
|
|
$l = [a-zA-Z\192 - \255] # [\215 \247]
|
|
$c = [A-Z\192-\221] # [\215]
|
|
$s = [a-z\222-\255] # [\247]
|
|
$d = [0-9] -- digit
|
|
$i = [$l $d _ '] -- identifier character
|
|
$u = [\0-\255] -- universal: any character
|
|
|
|
@rsyms = -- symbols and non-identifier-like reserved words
|
|
\; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/
|
|
|
|
:-
|
|
"--" [.]* ; -- Toss single line comments
|
|
"{-" ([$u # \-] | \- [$u # \}])* ("-")+ "}" ;
|
|
|
|
$white+ ;
|
|
@rsyms { tok (eitherResIdent (T_Ident . identC . rawIdentC)) }
|
|
\' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (eitherResIdent (T_Ident . identC . rawIdentS . unescapeInitTail . BS.unpack)) }
|
|
(\_ | $l)($l | $d | \_ | \')* { tok (eitherResIdent (T_Ident . identC . rawIdentC)) }
|
|
|
|
\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . BS.unpack) }
|
|
|
|
(\-)? $d+ { tok (T_Integer . read . BS.unpack) }
|
|
(\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . BS.unpack) }
|
|
|
|
{
|
|
|
|
tok f p s = f s
|
|
|
|
data Token
|
|
= T_exclmark
|
|
| T_patt
|
|
| T_int_label
|
|
| T_oparen
|
|
| T_cparen
|
|
| T_tilde
|
|
| T_star
|
|
| T_starstar
|
|
| T_plus
|
|
| T_plusplus
|
|
| T_comma
|
|
| T_minus
|
|
| T_rarrow
|
|
| T_dot
|
|
| T_alt
|
|
| T_colon
|
|
| T_semicolon
|
|
| T_less
|
|
| T_equal
|
|
| T_big_rarrow
|
|
| T_great
|
|
| T_questmark
|
|
| T_obrack
|
|
| T_lam
|
|
| T_lamlam
|
|
| T_cbrack
|
|
| T_ocurly
|
|
| T_bar
|
|
| T_ccurly
|
|
| T_underscore
|
|
| T_at
|
|
| T_PType
|
|
| T_Str
|
|
| T_Strs
|
|
| T_Tok
|
|
| T_Type
|
|
| T_abstract
|
|
| T_case
|
|
| T_cat
|
|
| T_concrete
|
|
| T_data
|
|
| T_def
|
|
| T_flags
|
|
| T_fn
|
|
| T_fun
|
|
| T_in
|
|
| T_incomplete
|
|
| T_instance
|
|
| T_interface
|
|
| T_let
|
|
| T_lin
|
|
| T_lincat
|
|
| T_lindef
|
|
| T_linref
|
|
| T_of
|
|
| T_open
|
|
| T_oper
|
|
| T_param
|
|
| T_pattern
|
|
| T_pre
|
|
| T_printname
|
|
| T_resource
|
|
| T_strs
|
|
| T_table
|
|
| T_transfer
|
|
| T_variants
|
|
| T_where
|
|
| T_with
|
|
| T_String String -- string literals
|
|
| T_Integer Int -- integer literals
|
|
| T_Double Double -- double precision float literals
|
|
| T_Ident Ident
|
|
| T_EOF
|
|
|
|
eitherResIdent :: (BS.ByteString -> Token) -> BS.ByteString -> Token
|
|
eitherResIdent tv s =
|
|
case Map.lookup s resWords of
|
|
Just t -> t
|
|
Nothing -> tv s
|
|
|
|
isReservedWord :: BS.ByteString -> Bool
|
|
isReservedWord s = Map.member s resWords
|
|
|
|
resWords = Map.fromList
|
|
[ b "!" T_exclmark
|
|
, b "#" T_patt
|
|
, b "$" T_int_label
|
|
, b "(" T_oparen
|
|
, b ")" T_cparen
|
|
, b "~" T_tilde
|
|
, b "*" T_star
|
|
, b "**" T_starstar
|
|
, b "+" T_plus
|
|
, b "++" T_plusplus
|
|
, b "," T_comma
|
|
, b "-" T_minus
|
|
, b "->" T_rarrow
|
|
, b "." T_dot
|
|
, b "/" T_alt
|
|
, b ":" T_colon
|
|
, b ";" T_semicolon
|
|
, b "<" T_less
|
|
, b "=" T_equal
|
|
, b "=>" T_big_rarrow
|
|
, b ">" T_great
|
|
, b "?" T_questmark
|
|
, b "[" T_obrack
|
|
, b "]" T_cbrack
|
|
, b "\\" T_lam
|
|
, b "\\\\" T_lamlam
|
|
, b "{" T_ocurly
|
|
, b "}" T_ccurly
|
|
, b "|" T_bar
|
|
, b "_" T_underscore
|
|
, b "@" T_at
|
|
, b "PType" T_PType
|
|
, b "Str" T_Str
|
|
, b "Strs" T_Strs
|
|
, b "Tok" T_Tok
|
|
, b "Type" T_Type
|
|
, b "abstract" T_abstract
|
|
, b "case" T_case
|
|
, b "cat" T_cat
|
|
, b "concrete" T_concrete
|
|
, b "data" T_data
|
|
, b "def" T_def
|
|
, b "flags" T_flags
|
|
, b "fn" T_fn
|
|
, b "fun" T_fun
|
|
, b "in" T_in
|
|
, b "incomplete" T_incomplete
|
|
, b "instance" T_instance
|
|
, b "interface" T_interface
|
|
, b "let" T_let
|
|
, b "lin" T_lin
|
|
, b "lincat" T_lincat
|
|
, b "lindef" T_lindef
|
|
, b "linref" T_linref
|
|
, b "of" T_of
|
|
, b "open" T_open
|
|
, b "oper" T_oper
|
|
, b "param" T_param
|
|
, b "pattern" T_pattern
|
|
, b "pre" T_pre
|
|
, b "printname" T_printname
|
|
, b "resource" T_resource
|
|
, b "strs" T_strs
|
|
, b "table" T_table
|
|
, b "transfer" T_transfer
|
|
, b "variants" T_variants
|
|
, b "where" T_where
|
|
, b "with" T_with
|
|
]
|
|
where b s t = (BS.pack s, t)
|
|
|
|
unescapeInitTail :: String -> String
|
|
unescapeInitTail = unesc . tail where
|
|
unesc s = case s of
|
|
'\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
|
|
'\\':'n':cs -> '\n' : unesc cs
|
|
'\\':'t':cs -> '\t' : unesc cs
|
|
'"':[] -> []
|
|
'\'':[] -> []
|
|
c:cs -> c : unesc cs
|
|
_ -> []
|
|
|
|
-------------------------------------------------------------------
|
|
-- Alex wrapper code.
|
|
-- A modified "posn" wrapper.
|
|
-------------------------------------------------------------------
|
|
|
|
data Posn = Pn {-# UNPACK #-} !Int
|
|
{-# UNPACK #-} !Int
|
|
|
|
alexMove :: Posn -> Char -> Posn
|
|
alexMove (Pn l c) '\n' = Pn (l+1) 1
|
|
alexMove (Pn l c) _ = Pn l (c+1)
|
|
|
|
alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
|
|
alexGetChar (AI p _ s) =
|
|
case BS.uncons s of
|
|
Nothing -> Nothing
|
|
Just (c,s) ->
|
|
let p' = alexMove p c
|
|
in p' `seq` Just (c, (AI p' c s))
|
|
|
|
alexInputPrevChar :: AlexInput -> Char
|
|
alexInputPrevChar (AI p c s) = c
|
|
|
|
data AlexInput = AI {-# UNPACK #-} !Posn -- current position,
|
|
{-# UNPACK #-} !Char -- previous char
|
|
{-# UNPACK #-} !BS.ByteString -- current input string
|
|
|
|
data ParseResult a
|
|
= POk a
|
|
| PFailed Posn -- The position of the error
|
|
String -- The error message
|
|
|
|
newtype P a = P { unP :: AlexInput -> ParseResult a }
|
|
|
|
instance Monad P where
|
|
return a = a `seq` (P $ \s -> POk a)
|
|
(P m) >>= k = P $ \ s -> case m s of
|
|
POk a -> unP (k a) s
|
|
PFailed posn err -> PFailed posn err
|
|
fail msg = P $ \(AI posn _ _) -> PFailed posn msg
|
|
|
|
runP :: P a -> BS.ByteString -> Either (Posn,String) a
|
|
runP (P f) txt =
|
|
case f (AI (Pn 1 0) ' ' txt) of
|
|
POk x -> Right x
|
|
PFailed pos msg -> Left (pos,msg)
|
|
|
|
failLoc :: Posn -> String -> P a
|
|
failLoc pos msg = P $ \_ -> PFailed pos msg
|
|
|
|
lexer :: (Token -> P a) -> P a
|
|
lexer cont = P go
|
|
where
|
|
go inp@(AI pos _ str) =
|
|
case alexScan inp 0 of
|
|
AlexEOF -> unP (cont T_EOF) inp
|
|
AlexError (AI pos _ _) -> PFailed pos "lexical error"
|
|
AlexSkip inp' len -> go inp'
|
|
AlexToken inp' len act -> unP (cont (act pos (BS.take len str))) inp'
|
|
|
|
getPosn :: P Posn
|
|
getPosn = P $ \inp@(AI pos _ _) -> POk pos
|
|
|
|
}
|