Files
rlp/src/Core/Lex.x.old
crumbtoo 0b72bc8f11 temporarily remove layout support
the core language doesn't need it. let's just keep the core simple for now while i focus on more important things
2023-11-24 14:13:20 -07:00

316 lines
7.6 KiB
Plaintext

{
-- TODO: layout semicolons are not inserted at EOf.
{-# LANGUAGE TemplateHaskell #-}
module Core.Lex
( lexCore
, lexCore'
, CoreToken(..)
, ParseError(..)
, Located(..)
, AlexPosn(..)
)
where
import Data.Char (chr)
import Debug.Trace
import Core.Syntax
import Compiler.RLPC
import Lens.Micro
import Lens.Micro.TH
}
%wrapper "monadUserState"
$whitechar = [ \t\n\r\f\v]
$special = [\(\)\,\;\[\]\{\}]
$digit = 0-9
$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
$unisymbol = [] -- TODO
$symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\']
$large = [A-Z \xc0-\xd6 \xd8-\xde]
$small = [a-z \xdf-\xf6 \xf8-\xff \_]
$alpha = [$small $large]
$graphic = [$small $large $symbol $digit $special \:\"\']
$octit = 0-7
$hexit = [0-9 A-F a-f]
$namechar = [$alpha $digit \' \#]
$symchar = [$symbol \:]
$nl = [\n\r]
$white_no_nl = $white # $nl
@reservedid =
case|data|do|import|in|let|letrec|module|of|where
@reservedop =
"=" | \\ | "->"
@varname = $small $namechar*
@conname = $large $namechar*
@varsym = $symbol $symchar*
@consym = \: $symchar*
@decimal = $digit+
rlp :-
-- everywhere: skip whitespace
$white_no_nl+ { skip }
-- TODO: `--` could begin an operator
"--"[^$nl]* { skip }
"--"\-*[^$symbol].* { skip }
"{-" { nestedComment }
-- syntactic symbols
<0>
{
"(" { constTok TokenLParen }
")" { constTok TokenRParen }
"{" { lbrace }
"}" { rbrace }
";" { constTok TokenSemicolon }
"," { constTok TokenComma }
}
-- keywords
-- see commentary on the layout system
<0>
{
"let" { constTok TokenLet `andBegin` layout }
"letrec" { constTok TokenLetrec `andBegin` layout }
"of" { constTok TokenOf `andBegin` layout }
"case" { constTok TokenCase }
"module" { constTok TokenModule }
"in" { letin }
"where" { constTok TokenWhere `andBegin` layout }
}
-- reserved symbols
<0>
{
"=" { constTok TokenEquals }
"->" { constTok TokenArrow }
}
-- identifiers
<0>
{
-- TODO: qualified names
@varname { lexWith TokenVarName }
@conname { lexWith TokenConName }
@varsym { lexWith TokenVarSym }
}
-- literals
<0>
{
@decimal { lexWith (TokenLitInt . read @Int) }
}
<0> \n { begin bol }
<initial>
{
$white { skip }
\n { skip }
() { topLevelOff `andBegin` 0 }
}
<bol>
{
\n { skip }
() { doBol `andBegin` 0 }
}
<layout>
{
$white { skip }
\{ { lbrace `andBegin` 0 }
() { noBrace `andBegin` 0 }
}
{
data Located a = Located Int Int Int a
deriving Show
constTok :: t -> AlexInput -> Int -> Alex (Located t)
constTok t (AlexPn _ y x,_,_,_) l = pure $ Located y x l t
data CoreToken = TokenLet
| TokenLetrec
| TokenIn
| TokenModule
| TokenWhere
| TokenComma
| TokenCase
| TokenOf
| TokenLambda
| TokenArrow
| TokenLitInt Int
| TokenVarName Name
| TokenConName Name
| TokenVarSym Name
| TokenConSym Name
| TokenEquals
| TokenLParen
| TokenRParen
| TokenLBrace
| TokenRBrace
| TokenLBraceV -- virtual brace inserted by layout
| TokenRBraceV -- virtual brace inserted by layout
| TokenIndent Int
| TokenDedent Int
| TokenSemicolon
| TokenEOF
deriving Show
data LayoutContext = Layout Int
| NoLayout
deriving Show
data AlexUserState = AlexUserState
{ _ausContext :: [LayoutContext]
}
ausContext :: Lens' AlexUserState [LayoutContext]
ausContext f (AlexUserState ctx)
= fmap
(\a -> AlexUserState a) (f ctx)
{-# INLINE ausContext #-}
pushContext :: LayoutContext -> Alex ()
pushContext c = do
st <- alexGetUserState
alexSetUserState $ st { _ausContext = c : _ausContext st }
popContext :: Alex ()
popContext = do
st <- alexGetUserState
alexSetUserState $ st { _ausContext = drop 1 (_ausContext st) }
getContext :: Alex [LayoutContext]
getContext = do
st <- alexGetUserState
pure $ _ausContext st
type Lexer = AlexInput -> Int -> Alex (Located CoreToken)
alexInitUserState :: AlexUserState
alexInitUserState = AlexUserState []
nestedComment :: Lexer
nestedComment _ _ = undefined
lexStream :: Alex [Located CoreToken]
lexStream = do
l <- alexMonadScan
case l of
Located _ _ _ TokenEOF -> pure [l]
_ -> (l:) <$> lexStream
-- | The main lexer driver.
lexCore :: String -> RLPC ParseError [Located CoreToken]
lexCore s = case m of
Left e -> addFatal err
where err = SrcError
{ _errSpan = (0,0,0) -- TODO: location
, _errSeverity = Error
, _errDiagnostic = ParErrLexical e
}
Right ts -> pure ts
where
m = runAlex s (alexSetStartCode initial *> lexStream)
-- | @lexCore@, but the tokens are stripped of location info. Useful for
-- debugging
lexCore' :: String -> RLPC ParseError [CoreToken]
lexCore' s = fmap f <$> lexCore s
where f (Located _ _ _ t) = t
data ParseError = ParErrLexical String
| ParErrParse
deriving Show
lexWith :: (String -> CoreToken) -> Lexer
lexWith f (AlexPn _ y x,_,_,s) l = pure $ Located y x l (f $ take l s)
lexToken :: Alex (Located CoreToken)
lexToken = alexMonadScan
getSrcCol :: Alex Int
getSrcCol = Alex $ \ st ->
let AlexPn _ _ col = alex_pos st
in Right (st, col)
lbrace :: Lexer
lbrace (AlexPn _ y x,_,_,_) l = do
pushContext NoLayout
pure $ Located y x l TokenLBrace
rbrace :: Lexer
rbrace (AlexPn _ y x,_,_,_) l = do
popContext
pure $ Located y x l TokenRBrace
insRBraceV :: AlexPosn -> Alex (Located CoreToken)
insRBraceV (AlexPn _ y x) = do
popContext
pure $ Located y x 0 TokenRBraceV
insSemi :: AlexPosn -> Alex (Located CoreToken)
insSemi (AlexPn _ y x) = do
pure $ Located y x 0 TokenSemicolon
modifyUst :: (AlexUserState -> AlexUserState) -> Alex ()
modifyUst f = do
st <- alexGetUserState
alexSetUserState $ f st
getUst :: Alex AlexUserState
getUst = alexGetUserState
newLayoutContext :: Lexer
newLayoutContext (p,_,_,_) _ = do
undefined
noBrace :: Lexer
noBrace (AlexPn _ y x,_,_,_) l = do
col <- getSrcCol
pushContext (Layout col)
pure $ Located y x l TokenLBraceV
getOffside :: Alex Ordering
getOffside = do
ctx <- getContext
m <- getSrcCol
case ctx of
Layout n : _ -> pure $ m `compare` n
_ -> pure GT
doBol :: Lexer
doBol (p,c,_,s) _ = do
off <- getOffside
case off of
LT -> insRBraceV p
EQ -> insSemi p
_ -> lexToken
letin :: Lexer
letin (AlexPn _ y x,_,_,_) l = do
popContext
pure $ Located y x l TokenIn
topLevelOff :: Lexer
topLevelOff = noBrace
alexEOF :: Alex (Located CoreToken)
alexEOF = Alex $ \ st@(AlexState { alex_pos = AlexPn _ y x }) ->
Right (st, Located y x 0 TokenEOF)
}