more correct lexer

This commit is contained in:
crumbtoo
2023-11-21 14:52:17 -07:00
parent 337b2f2c8f
commit 00a265fda1

View File

@@ -5,6 +5,7 @@ module Core.Lex
, lexCore' , lexCore'
, CoreToken(..) , CoreToken(..)
, lexTmp , lexTmp
, ParserError
) )
where where
import Data.Char (chr) import Data.Char (chr)
@@ -77,13 +78,13 @@ $white_no_nl+ { skip }
-- see commentary on the layout system -- see commentary on the layout system
<0> <0>
{ {
"let" { constTok TokenLet `andBegin` layout_keyword } "let" { constTok TokenLet `andBegin` layout }
"letrec" { constTok TokenLet `andBegin` layout_keyword } "letrec" { constTok TokenLet `andBegin` layout }
"of" { constTok TokenOf `andBegin` layout_keyword } "of" { constTok TokenOf `andBegin` layout }
"case" { constTok TokenCase } "case" { constTok TokenCase }
"module" { constTok TokenModule } "module" { constTok TokenModule }
"in" { letin } "in" { letin }
"where" { constTok TokenWhere } "where" { constTok TokenWhere `andBegin` layout }
} }
-- reserved symbols -- reserved symbols
@@ -110,7 +111,7 @@ $white_no_nl+ { skip }
() { doBol `andBegin` 0 } () { doBol `andBegin` 0 }
} }
<layout_keyword> <layout>
{ {
$white { skip } $white { skip }
\{ { lbrace `andBegin` 0 } \{ { lbrace `andBegin` 0 }
@@ -144,6 +145,8 @@ data CoreToken = TokenLet
| TokenRParen | TokenRParen
| TokenLBrace | TokenLBrace
| TokenRBrace | TokenRBrace
| TokenLBraceV -- virtual brace inserted by layout
| TokenRBraceV -- virtual brace inserted by layout
| TokenIndent Int | TokenIndent Int
| TokenDedent Int | TokenDedent Int
| TokenSemicolon | TokenSemicolon
@@ -198,8 +201,7 @@ lexStream = do
_ -> (l:) <$> lexStream _ -> (l:) <$> lexStream
lexCore :: String -> Either String [Located CoreToken] lexCore :: String -> Either String [Located CoreToken]
lexCore s = runAlex s (alexSetStartCode layout_keyword *> lexStream) lexCore s = runAlex s (alexSetStartCode 0 *> lexStream)
-- temp; does not support module header
lexCore' :: String -> Either String [CoreToken] lexCore' :: String -> Either String [CoreToken]
lexCore' s = fmap f <$> lexCore s lexCore' s = fmap f <$> lexCore s
@@ -226,10 +228,10 @@ rbrace (p,_,_,_) _ = do
popContext popContext
pure $ Located p TokenRBrace pure $ Located p TokenRBrace
insRBrace :: AlexPosn -> Alex (Located CoreToken) insRBraceV :: AlexPosn -> Alex (Located CoreToken)
insRBrace p = do insRBraceV p = do
popContext popContext
pure $ Located p TokenRBrace pure $ Located p TokenRBraceV
insSemi :: AlexPosn -> Alex (Located CoreToken) insSemi :: AlexPosn -> Alex (Located CoreToken)
insSemi p = do insSemi p = do
@@ -251,7 +253,7 @@ noBrace :: Lexer
noBrace (p,_,_,_) l = do noBrace (p,_,_,_) l = do
col <- getSrcCol col <- getSrcCol
pushContext (Layout col) pushContext (Layout col)
pure $ Located p TokenLBrace pure $ Located p TokenLBraceV
getOffside :: Alex Ordering getOffside :: Alex Ordering
getOffside = do getOffside = do
@@ -266,7 +268,7 @@ doBol (p,c,_,s) l = do
off <- getOffside off <- getOffside
col <- getSrcCol col <- getSrcCol
case off of case off of
LT -> insRBrace p LT -> insRBraceV p
EQ -> insSemi p EQ -> insSemi p
_ -> lexToken _ -> lexToken
@@ -281,4 +283,7 @@ lexTmp = do
case lexCore' s of case lexCore' s of
Left e -> error e Left e -> error e
Right a -> pure a Right a -> pure a
data ParserError
} }