rc #13
126
src/Rlp/Lex.x
126
src/Rlp/Lex.x
@@ -1,4 +1,5 @@
|
|||||||
{
|
{
|
||||||
|
{-# LANGUAGE ViewPatterns #-}
|
||||||
{-# LANGUAGE GeneralisedNewtypeDeriving #-}
|
{-# LANGUAGE GeneralisedNewtypeDeriving #-}
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
module Rlp.Lex
|
module Rlp.Lex
|
||||||
@@ -49,21 +50,21 @@ rlp :-
|
|||||||
$white_no_nl+ ;
|
$white_no_nl+ ;
|
||||||
-- TODO: don't treat operators like (-->) as comments
|
-- TODO: don't treat operators like (-->) as comments
|
||||||
"--".* ;
|
"--".* ;
|
||||||
";" { constToken TokenSemicolon }
|
|
||||||
-- "{" { explicitLBrace }
|
|
||||||
-- "}" { explicitRBrace }
|
|
||||||
|
|
||||||
<0>
|
<0>
|
||||||
{
|
{
|
||||||
\n { begin bol }
|
\n { beginPush bol }
|
||||||
}
|
|
||||||
|
|
||||||
<one>
|
|
||||||
{
|
|
||||||
@varname { tokenWith TokenVarName }
|
@varname { tokenWith TokenVarName }
|
||||||
@digits { tokenWith (TokenLitInt . readInt) }
|
@digits { tokenWith (TokenLitInt . readInt) }
|
||||||
"=" { constToken TokenEquals }
|
"=" { constToken TokenEquals }
|
||||||
\n { begin bol }
|
}
|
||||||
|
|
||||||
|
-- control characters
|
||||||
|
<0>
|
||||||
|
{
|
||||||
|
"{" { explicitLBrace }
|
||||||
|
"}" { explicitRBrace }
|
||||||
|
";" { constToken TokenSemicolon }
|
||||||
}
|
}
|
||||||
|
|
||||||
-- consume all whitespace leaving us at the beginning of the next non-empty
|
-- consume all whitespace leaving us at the beginning of the next non-empty
|
||||||
@@ -76,19 +77,47 @@ rlp :-
|
|||||||
() { doBol }
|
() { doBol }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
<layout_top>
|
||||||
|
{
|
||||||
|
\n ;
|
||||||
|
"{" { explicitLBrace `thenDo` popLexState }
|
||||||
|
() { doLayout }
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
|
||||||
begin :: Int -> LexerAction a
|
-- | @andBegin@, with the subtle difference that the start code is set
|
||||||
begin = undefined
|
-- /after/ the action
|
||||||
|
thenBegin :: LexerAction a -> Int -> LexerAction a
|
||||||
|
thenBegin act c inp l = do
|
||||||
|
a <- act inp l
|
||||||
|
psLexState . _head .= c
|
||||||
|
pure a
|
||||||
|
|
||||||
|
andBegin :: LexerAction a -> Int -> LexerAction a
|
||||||
|
andBegin act c inp l = do
|
||||||
|
psLexState . _head .= c
|
||||||
|
act inp l
|
||||||
|
|
||||||
|
beginPush :: Int -> LexerAction (Located RlpToken)
|
||||||
|
beginPush n _ _ = pushLexState n >> lexToken
|
||||||
|
|
||||||
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
|
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
|
||||||
alexGetByte inp = case inp ^. aiBytes of
|
alexGetByte inp = case inp ^. aiBytes of
|
||||||
[] -> do
|
[] -> do
|
||||||
(c,t) <- T.uncons (inp ^. aiSource)
|
(c,t) <- T.uncons (inp ^. aiSource)
|
||||||
let (b:bs) = encodeChar c
|
let (b:bs) = encodeChar c
|
||||||
|
-- tail the source
|
||||||
inp' = inp & aiSource .~ t
|
inp' = inp & aiSource .~ t
|
||||||
|
-- record the excess bytes for successive calls
|
||||||
& aiBytes .~ bs
|
& aiBytes .~ bs
|
||||||
|
-- report the previous char
|
||||||
& aiPrevChar .~ c
|
& aiPrevChar .~ c
|
||||||
|
-- update the position
|
||||||
|
& aiPos %~ \ (ln,col) ->
|
||||||
|
if (inp ^. aiPrevChar) == '\n'
|
||||||
|
then (ln+1,1)
|
||||||
|
else (ln,col+1)
|
||||||
pure (b, inp')
|
pure (b, inp')
|
||||||
|
|
||||||
_ -> Just (head bs, inp')
|
_ -> Just (head bs, inp')
|
||||||
@@ -104,17 +133,13 @@ getLexState = use (psLexState . singular _head)
|
|||||||
alexInputPrevChar :: AlexInput -> Char
|
alexInputPrevChar :: AlexInput -> Char
|
||||||
alexInputPrevChar = view aiPrevChar
|
alexInputPrevChar = view aiPrevChar
|
||||||
|
|
||||||
|
pushLexState :: Int -> P ()
|
||||||
|
pushLexState n = psLexState %= (n:)
|
||||||
|
|
||||||
readInt :: Text -> Int
|
readInt :: Text -> Int
|
||||||
readInt = T.foldr f 0 where
|
readInt = T.foldr f 0 where
|
||||||
f c n = digitToInt c + 10*n
|
f c n = digitToInt c + 10*n
|
||||||
|
|
||||||
-- | @andBegin@, with the subtle difference that the start code is set
|
|
||||||
-- /after/ the action
|
|
||||||
thenBegin :: LexerAction a -> Int -> LexerAction a
|
|
||||||
thenBegin act c inp l = do
|
|
||||||
a <- act inp l
|
|
||||||
undefined
|
|
||||||
|
|
||||||
constToken :: RlpToken -> LexerAction (Located RlpToken)
|
constToken :: RlpToken -> LexerAction (Located RlpToken)
|
||||||
constToken t inp l = do
|
constToken t inp l = do
|
||||||
pos <- use (psInput . aiPos)
|
pos <- use (psInput . aiPos)
|
||||||
@@ -144,27 +169,37 @@ execP' p s = execP p st where
|
|||||||
initParseState :: Text -> ParseState
|
initParseState :: Text -> ParseState
|
||||||
initParseState s = ParseState
|
initParseState s = ParseState
|
||||||
{ _psLayoutStack = []
|
{ _psLayoutStack = []
|
||||||
, _psLexState = [one,bol,0]
|
-- IMPORTANT: the initial state is `bol` to begin the top-level layout,
|
||||||
|
-- which then returns to state 0 which continues the normal lexing process.
|
||||||
|
, _psLexState = [layout_top,0]
|
||||||
, _psInput = initAlexInput s
|
, _psInput = initAlexInput s
|
||||||
}
|
}
|
||||||
|
|
||||||
initAlexInput :: Text -> AlexInput
|
initAlexInput :: Text -> AlexInput
|
||||||
initAlexInput s = AlexInput
|
initAlexInput (unconsBytes -> (b,s)) = AlexInput
|
||||||
{ _aiPrevChar = '\0'
|
{ _aiPrevChar = '\0'
|
||||||
, _aiSource = s
|
, _aiSource = s
|
||||||
, _aiBytes = []
|
, _aiBytes = b
|
||||||
, _aiPos = (1,1)
|
, _aiPos = (1,1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unconsBytes :: Text -> ([Word8], Text)
|
||||||
|
unconsBytes s = (encodeChar c, t) where
|
||||||
|
(c,t) = fromJust $ T.uncons s
|
||||||
|
|
||||||
lexToken :: P (Located RlpToken)
|
lexToken :: P (Located RlpToken)
|
||||||
lexToken = do
|
lexToken = do
|
||||||
inp <- getInput
|
inp <- getInput
|
||||||
c <- getLexState
|
c <- getLexState
|
||||||
|
st <- use id
|
||||||
|
traceM $ "st: " <> show st
|
||||||
case alexScan inp c of
|
case alexScan inp c of
|
||||||
AlexEOF -> pure $ Located (inp ^. aiPos, 0) TokenEOF
|
AlexEOF -> pure $ Located (inp ^. aiPos, 0) TokenEOF
|
||||||
|
AlexSkip inp' l -> do
|
||||||
|
psInput .= inp'
|
||||||
|
lexToken
|
||||||
AlexToken inp' l act -> do
|
AlexToken inp' l act -> do
|
||||||
psInput .= inp'
|
psInput .= inp'
|
||||||
traceM $ "l: " <> show l
|
|
||||||
traceShowM inp'
|
traceShowM inp'
|
||||||
act inp l
|
act inp l
|
||||||
|
|
||||||
@@ -178,8 +213,8 @@ lexStream = do
|
|||||||
Located _ TokenEOF -> pure [TokenEOF]
|
Located _ TokenEOF -> pure [TokenEOF]
|
||||||
Located _ t -> (t:) <$> lexStream
|
Located _ t -> (t:) <$> lexStream
|
||||||
|
|
||||||
lexTest :: Text -> Either String [RlpToken]
|
lexTest :: Text -> Maybe [RlpToken]
|
||||||
lexTest = undefined
|
lexTest s = execP' lexStream s
|
||||||
|
|
||||||
indentLevel :: P Int
|
indentLevel :: P Int
|
||||||
indentLevel = do
|
indentLevel = do
|
||||||
@@ -195,11 +230,20 @@ popLayout :: P Layout
|
|||||||
popLayout = do
|
popLayout = do
|
||||||
traceM "pop layout"
|
traceM "pop layout"
|
||||||
ctx <- preuse (psLayoutStack . _head)
|
ctx <- preuse (psLayoutStack . _head)
|
||||||
modifying psLayoutStack (drop 1)
|
psLayoutStack %= (drop 1)
|
||||||
case ctx of
|
case ctx of
|
||||||
Just l -> pure l
|
Just l -> pure l
|
||||||
Nothing -> error "uhh"
|
Nothing -> error "uhh"
|
||||||
|
|
||||||
|
pushLayout :: Layout -> P ()
|
||||||
|
pushLayout l = do
|
||||||
|
traceM "push layout"
|
||||||
|
psLayoutStack %= (l:)
|
||||||
|
|
||||||
|
popLexState :: P ()
|
||||||
|
popLexState = do
|
||||||
|
psLexState %= tail
|
||||||
|
|
||||||
insertSemicolon, insertLBrace, insertRBrace :: P (Located RlpToken)
|
insertSemicolon, insertLBrace, insertRBrace :: P (Located RlpToken)
|
||||||
insertSemicolon = traceM "inserting semi" >> insertToken TokenSemicolonV
|
insertSemicolon = traceM "inserting semi" >> insertToken TokenSemicolonV
|
||||||
insertLBrace = traceM "inserting lbrace" >> insertToken TokenLBraceV
|
insertLBrace = traceM "inserting lbrace" >> insertToken TokenLBraceV
|
||||||
@@ -209,23 +253,47 @@ cmpLayout :: P Ordering
|
|||||||
cmpLayout = do
|
cmpLayout = do
|
||||||
i <- indentLevel
|
i <- indentLevel
|
||||||
ctx <- preuse (psLayoutStack . _head)
|
ctx <- preuse (psLayoutStack . _head)
|
||||||
case ctx ^. non (Implicit 1) of
|
case ctx of
|
||||||
Implicit n -> pure (i `compare` n)
|
Just (Implicit n) -> pure (i `compare` n)
|
||||||
Explicit -> pure GT
|
_ -> pure GT
|
||||||
|
|
||||||
doBol :: LexerAction (Located RlpToken)
|
doBol :: LexerAction (Located RlpToken)
|
||||||
doBol inp l = do
|
doBol inp l = do
|
||||||
off <- cmpLayout
|
off <- cmpLayout
|
||||||
|
i <- indentLevel
|
||||||
|
traceM $ "i: " <> show i
|
||||||
|
-- important that we pop the lex state lest we find our lexer diverging
|
||||||
|
popLexState
|
||||||
case off of
|
case off of
|
||||||
-- the line is aligned with the previous. it therefore belongs to the
|
-- the line is aligned with the previous. it therefore belongs to the
|
||||||
-- same list
|
-- same list
|
||||||
EQ -> insertSemicolon
|
EQ -> insertSemicolon
|
||||||
-- the line is indented further than the previous, so we assume it is a
|
-- the line is indented further than the previous, so we assume it is a
|
||||||
-- line continuation. ignore it and move on!
|
-- line continuation. ignore it and move on!
|
||||||
GT -> undefined -- alexSetStartCode one >> lexToken
|
GT -> lexToken
|
||||||
-- the line is indented less than the previous, pop the layout stack and
|
-- the line is indented less than the previous, pop the layout stack and
|
||||||
-- insert a closing brace.
|
-- insert a closing brace.
|
||||||
LT -> popLayout >> insertRBrace
|
LT -> popLayout >> insertRBrace
|
||||||
|
|
||||||
|
thenDo :: LexerAction a -> P b -> LexerAction a
|
||||||
|
thenDo act p inp l = act inp l <* p
|
||||||
|
|
||||||
|
explicitLBrace :: LexerAction (Located RlpToken)
|
||||||
|
explicitLBrace inp l = do
|
||||||
|
pushLayout Explicit
|
||||||
|
constToken TokenLBrace inp l
|
||||||
|
|
||||||
|
explicitRBrace :: LexerAction (Located RlpToken)
|
||||||
|
explicitRBrace inp l = do
|
||||||
|
popLayout
|
||||||
|
constToken TokenRBrace inp l
|
||||||
|
|
||||||
|
doLayout :: LexerAction (Located RlpToken)
|
||||||
|
doLayout _ _ = do
|
||||||
|
i <- indentLevel
|
||||||
|
pushLayout (Implicit i)
|
||||||
|
popLexState
|
||||||
|
insertLBrace
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -84,6 +84,7 @@ data ParseState = ParseState
|
|||||||
, _psLexState :: [Int]
|
, _psLexState :: [Int]
|
||||||
, _psInput :: AlexInput
|
, _psInput :: AlexInput
|
||||||
}
|
}
|
||||||
|
deriving Show
|
||||||
|
|
||||||
data Layout = Explicit
|
data Layout = Explicit
|
||||||
| Implicit Int
|
| Implicit Int
|
||||||
|
|||||||
Reference in New Issue
Block a user