temporarily remove layout support

the core language doesn't need it. let's just keep the core simple for now while i focus on more important things
This commit is contained in:
crumbtoo
2023-11-24 14:13:20 -07:00
parent 1cd7b5a25b
commit 0b72bc8f11
6 changed files with 521 additions and 202 deletions

View File

@@ -1,6 +1,4 @@
{
-- TODO: layout semicolons are not inserted at EOf.
{-# LANGUAGE TemplateHaskell #-}
module Core.Lex
( lexCore
, lexCore'
@@ -18,7 +16,7 @@ import Lens.Micro
import Lens.Micro.TH
}
%wrapper "monadUserState"
%wrapper "monad"
$whitechar = [ \t\n\r\f\v]
$special = [\(\)\,\;\[\]\{\}]
@@ -57,81 +55,37 @@ $white_no_nl = $white # $nl
rlp :-
-- everywhere: skip whitespace
$white_no_nl+ { skip }
-- TODO: `--` could begin an operator
"--"[^$nl]* { skip }
"--"\-*[^$symbol].* { skip }
"{-" { nestedComment }
-- syntactic symbols
<0>
{
"(" { constTok TokenLParen }
")" { constTok TokenRParen }
"{" { lbrace }
"}" { rbrace }
"{" { constTok TokenLBrace }
"}" { constTok TokenRBrace }
";" { constTok TokenSemicolon }
"," { constTok TokenComma }
}
-- keywords
-- see commentary on the layout system
<0>
{
"let" { constTok TokenLet `andBegin` layout }
"letrec" { constTok TokenLetrec `andBegin` layout }
"of" { constTok TokenOf `andBegin` layout }
"let" { constTok TokenLet }
"letrec" { constTok TokenLetrec }
"of" { constTok TokenOf }
"case" { constTok TokenCase }
"module" { constTok TokenModule }
"in" { letin }
"where" { constTok TokenWhere `andBegin` layout }
}
"in" { constTok TokenIn }
"where" { constTok TokenWhere }
-- reserved symbols
<0>
{
"\\" { constTok TokenLambda }
"λ" { constTok TokenLambda }
"=" { constTok TokenEquals }
"->" { constTok TokenArrow }
}
-- identifiers
<0>
{
-- TODO: qualified names
@varname { lexWith TokenVarName }
@conname { lexWith TokenConName }
@varsym { lexWith TokenVarSym }
}
@consym { lexWith TokenConSym }
-- literals
<0>
{
@decimal { lexWith (TokenLitInt . read @Int) }
}
<0> \n { begin bol }
<initial>
{
$white { skip }
\n { skip }
() { topLevelOff `andBegin` 0 }
}
<bol>
{
\n { skip }
() { doBol `andBegin` 0 }
}
<layout>
{
$white { skip }
\{ { lbrace `andBegin` 0 }
() { noBrace `andBegin` 0 }
}
{
@@ -161,57 +115,14 @@ data CoreToken = TokenLet
| TokenRParen
| TokenLBrace
| TokenRBrace
| TokenLBraceV -- virtual brace inserted by layout
| TokenRBraceV -- virtual brace inserted by layout
| TokenIndent Int
| TokenDedent Int
| TokenSemicolon
| TokenEOF
deriving Show
data LayoutContext = Layout Int
| NoLayout
deriving Show
data AlexUserState = AlexUserState
{ _ausContext :: [LayoutContext]
}
ausContext :: Lens' AlexUserState [LayoutContext]
ausContext f (AlexUserState ctx)
= fmap
(\a -> AlexUserState a) (f ctx)
{-# INLINE ausContext #-}
pushContext :: LayoutContext -> Alex ()
pushContext c = do
st <- alexGetUserState
alexSetUserState $ st { _ausContext = c : _ausContext st }
popContext :: Alex ()
popContext = do
st <- alexGetUserState
alexSetUserState $ st { _ausContext = drop 1 (_ausContext st) }
getContext :: Alex [LayoutContext]
getContext = do
st <- alexGetUserState
pure $ _ausContext st
type Lexer = AlexInput -> Int -> Alex (Located CoreToken)
alexInitUserState :: AlexUserState
alexInitUserState = AlexUserState []
nestedComment :: Lexer
nestedComment _ _ = undefined
lexStream :: Alex [Located CoreToken]
lexStream = do
l <- alexMonadScan
case l of
Located _ _ _ TokenEOF -> pure [l]
_ -> (l:) <$> lexStream
lexWith :: (String -> CoreToken) -> Lexer
lexWith f (AlexPn _ y x,_,_,s) l = pure $ Located y x l (f $ take l s)
-- | The main lexer driver.
lexCore :: String -> RLPC ParseError [Located CoreToken]
@@ -224,7 +135,7 @@ lexCore s = case m of
}
Right ts -> pure ts
where
m = runAlex s (alexSetStartCode initial *> lexStream)
m = runAlex s lexStream
-- | @lexCore@, but the tokens are stripped of location info. Useful for
-- debugging
@@ -232,84 +143,20 @@ lexCore' :: String -> RLPC ParseError [CoreToken]
lexCore' s = fmap f <$> lexCore s
where f (Located _ _ _ t) = t
lexStream :: Alex [Located CoreToken]
lexStream = do
l <- alexMonadScan
case l of
Located _ _ _ TokenEOF -> pure [l]
_ -> (l:) <$> lexStream
data ParseError = ParErrLexical String
| ParErrParse
deriving Show
lexWith :: (String -> CoreToken) -> Lexer
lexWith f (AlexPn _ y x,_,_,s) l = pure $ Located y x l (f $ take l s)
lexToken :: Alex (Located CoreToken)
lexToken = alexMonadScan
getSrcCol :: Alex Int
getSrcCol = Alex $ \ st ->
let AlexPn _ _ col = alex_pos st
in Right (st, col)
lbrace :: Lexer
lbrace (AlexPn _ y x,_,_,_) l = do
pushContext NoLayout
pure $ Located y x l TokenLBrace
rbrace :: Lexer
rbrace (AlexPn _ y x,_,_,_) l = do
popContext
pure $ Located y x l TokenRBrace
insRBraceV :: AlexPosn -> Alex (Located CoreToken)
insRBraceV (AlexPn _ y x) = do
popContext
pure $ Located y x 0 TokenRBraceV
insSemi :: AlexPosn -> Alex (Located CoreToken)
insSemi (AlexPn _ y x) = do
pure $ Located y x 0 TokenSemicolon
modifyUst :: (AlexUserState -> AlexUserState) -> Alex ()
modifyUst f = do
st <- alexGetUserState
alexSetUserState $ f st
getUst :: Alex AlexUserState
getUst = alexGetUserState
newLayoutContext :: Lexer
newLayoutContext (p,_,_,_) _ = do
undefined
noBrace :: Lexer
noBrace (AlexPn _ y x,_,_,_) l = do
col <- getSrcCol
pushContext (Layout col)
pure $ Located y x l TokenLBraceV
getOffside :: Alex Ordering
getOffside = do
ctx <- getContext
m <- getSrcCol
case ctx of
Layout n : _ -> pure $ m `compare` n
_ -> pure GT
doBol :: Lexer
doBol (p,c,_,s) _ = do
off <- getOffside
case off of
LT -> insRBraceV p
EQ -> insSemi p
_ -> lexToken
letin :: Lexer
letin (AlexPn _ y x,_,_,_) l = do
popContext
pure $ Located y x l TokenIn
topLevelOff :: Lexer
topLevelOff = noBrace
alexEOF :: Alex (Located CoreToken)
alexEOF = Alex $ \ st@(AlexState { alex_pos = AlexPn _ y x }) ->
Right (st, Located y x 0 TokenEOF)
}