layout lexing is good enough

This commit is contained in:
crumbtoo
2023-11-20 12:51:04 -07:00
parent 717effc579
commit c15e67869e

View File

@@ -76,11 +76,11 @@ $white_no_nl+ { skip }
-- see commentary on the layout system -- see commentary on the layout system
<0> <0>
{ {
"module" { constTok TokenModule } "let" { constTok TokenLet `andBegin` layout_keyword }
"let" { constTok TokenLet `andBegin` layout } "letrec" { constTok TokenLet `andBegin` layout_keyword }
"letrec" { constTok TokenLet `andBegin` layout } "of" { constTok TokenOf `andBegin` layout_keyword }
"case" { constTok TokenCase } "case" { constTok TokenCase }
"of" { constTok TokenOf `andBegin` layout } "module" { constTok TokenModule }
"in" { constTok TokenIn } "in" { constTok TokenIn }
"where" { constTok TokenWhere } "where" { constTok TokenWhere }
} }
@@ -89,6 +89,7 @@ $white_no_nl+ { skip }
<0> <0>
{ {
"=" { constTok TokenEquals } "=" { constTok TokenEquals }
"->" { constTok TokenArrow }
} }
-- identifiers -- identifiers
@@ -105,15 +106,14 @@ $white_no_nl+ { skip }
<bol> <bol>
{ {
\n { skip } \n { skip }
() { doBOL } () { doBol `andBegin` 0 }
} }
<layout> <layout_keyword>
{ {
-- TODO: does not respect comments nor pragmas $white { skip }
\{ { doLayoutBrace } \{ { lbrace `andBegin` 0 }
\n { skip } () { noBrace `andBegin` 0 }
() { newLayoutContext }
} }
{ {
@@ -145,30 +145,26 @@ data CoreToken = TokenLet
| TokenRParen | TokenRParen
| TokenLBrace | TokenLBrace
| TokenRBrace | TokenRBrace
| TokenIndent Int
| TokenDedent Int
| TokenSemicolon | TokenSemicolon
| TokenEOF | TokenEOF
deriving Show deriving Show
data LayoutContext = Layout Int data LayoutContext = Layout Int
| NoLayout | NoLayout
deriving Show
data AlexUserState = AlexUserState data AlexUserState = AlexUserState
{ _ausContext :: [LayoutContext] { _ausContext :: [LayoutContext]
, _ausStack :: [Int]
} }
ausContext :: Lens' AlexUserState [LayoutContext] ausContext :: Lens' AlexUserState [LayoutContext]
ausContext f (AlexUserState ctx stk) ausContext f (AlexUserState ctx)
= fmap = fmap
(\a -> AlexUserState a stk) (f ctx) (\a -> AlexUserState a) (f ctx)
{-# INLINE ausContext #-} {-# INLINE ausContext #-}
ausStack :: Lens' AlexUserState [Int]
ausStack f (AlexUserState ctx stk)
= fmap
(\a -> AlexUserState ctx a) (f stk)
{-# INLINE ausStack #-}
pushContext :: LayoutContext -> Alex () pushContext :: LayoutContext -> Alex ()
pushContext c = do pushContext c = do
st <- alexGetUserState st <- alexGetUserState
@@ -190,7 +186,7 @@ alexEOF :: Alex (Located CoreToken)
alexEOF = Alex $ \ st@(AlexState { alex_pos = p }) -> Right (st, Located p TokenEOF) alexEOF = Alex $ \ st@(AlexState { alex_pos = p }) -> Right (st, Located p TokenEOF)
alexInitUserState :: AlexUserState alexInitUserState :: AlexUserState
alexInitUserState = AlexUserState [] [bol,0] alexInitUserState = AlexUserState [Layout 1]
nestedComment :: Lexer nestedComment :: Lexer
nestedComment _ _ = undefined nestedComment _ _ = undefined
@@ -203,7 +199,8 @@ lexStream = do
_ -> (l:) <$> lexStream _ -> (l:) <$> lexStream
lexCore :: String -> Either String [Located CoreToken] lexCore :: String -> Either String [Located CoreToken]
lexCore s = runAlex s lexStream lexCore s = runAlex s (alexSetStartCode layout_keyword *> lexStream)
-- temp; does not support module header
lexCore' :: String -> Either String [CoreToken] lexCore' :: String -> Either String [CoreToken]
lexCore' s = fmap f <$> lexCore s lexCore' s = fmap f <$> lexCore s
@@ -220,9 +217,6 @@ getSrcCol = Alex $ \ st ->
let AlexPn _ _ col = alex_pos st let AlexPn _ _ col = alex_pos st
in Right (st, col) in Right (st, col)
doLayoutBrace :: Lexer
doLayoutBrace (p,_,_,s) _ = undefined
lbrace :: Lexer lbrace :: Lexer
lbrace (p,_,_,_) _ = do lbrace (p,_,_,_) _ = do
pushContext NoLayout pushContext NoLayout
@@ -233,8 +227,14 @@ rbrace (p,_,_,_) _ = do
popContext popContext
pure $ Located p TokenRBrace pure $ Located p TokenRBrace
setLexState :: Int -> Alex () insRBrace :: AlexPosn -> Alex (Located CoreToken)
setLexState n = Alex $ \st -> Right (st { alex_scd = n }, ()) insRBrace p = do
popContext
pure $ Located p TokenRBrace
insSemi :: AlexPosn -> Alex (Located CoreToken)
insSemi p = do
pure $ Located p TokenSemicolon
modifyUst :: (AlexUserState -> AlexUserState) -> Alex () modifyUst :: (AlexUserState -> AlexUserState) -> Alex ()
modifyUst f = do modifyUst f = do
@@ -244,32 +244,38 @@ modifyUst f = do
getUst :: Alex AlexUserState getUst :: Alex AlexUserState
getUst = alexGetUserState getUst = alexGetUserState
pushLexState :: Int -> Alex ()
pushLexState n = modifyUst (ausStack %~ (n:)) *> setLexState n
popLexState :: Alex Int
popLexState = do
modifyUst (ausStack %~ drop 1)
ust <- getUst
let s = case ust ^. ausStack of
(a:_) -> a
_ -> 0
setLexState s
pure s
newLayoutContext :: Lexer newLayoutContext :: Lexer
newLayoutContext (p,_,_,_) _ = do newLayoutContext (p,_,_,_) _ = do
_ <- popLexState undefined
ctx <- getContext
off <- getSrcCol noBrace :: Lexer
case ctx of noBrace (p,_,_,_) l = do
Layout prev : _ | off <= prev col <- getSrcCol
-> error $ show prev pushContext (Layout col)
_ -> do
pushContext $ Layout off
pure $ Located p TokenLBrace pure $ Located p TokenLBrace
doBOL :: Lexer getOffside :: Alex Ordering
doBOL = undefined getOffside = do
ctx <- getContext
m <- getSrcCol
case ctx of
Layout n : _ -> pure $ m `compare` n
_ -> pure GT
doBol :: Lexer
doBol (p,c,_,s) l = do
off <- getOffside
-- traceM $ show (p, c, s)
col <- getSrcCol
traceM $ show (s, p, col, off)
case off of
LT -> insRBrace p
EQ -> insSemi p
_ -> lexToken
testTmp :: IO (Either String [CoreToken])
testTmp = do
s <- readFile "/tmp/t.hs"
pure $ lexCore' s
} }