From c92d8fac65c3dce3421406d6126c7a75d5955f0d Mon Sep 17 00:00:00 2001 From: crumbtoo Date: Mon, 15 Jan 2024 09:44:26 -0700 Subject: [PATCH] we're so back --- src/Rlp/Lex.x | 126 +++++++++++++++++++++++++++++++---------- src/Rlp/Parse/Types.hs | 1 + 2 files changed, 98 insertions(+), 29 deletions(-) diff --git a/src/Rlp/Lex.x b/src/Rlp/Lex.x index d81e432..e5a8805 100644 --- a/src/Rlp/Lex.x +++ b/src/Rlp/Lex.x @@ -1,4 +1,5 @@ { +{-# LANGUAGE ViewPatterns #-} {-# LANGUAGE GeneralisedNewtypeDeriving #-} {-# LANGUAGE OverloadedStrings #-} module Rlp.Lex @@ -49,21 +50,21 @@ rlp :- $white_no_nl+ ; -- TODO: don't treat operators like (-->) as comments "--".* ; - ";" { constToken TokenSemicolon } - -- "{" { explicitLBrace } - -- "}" { explicitRBrace } <0> { - \n { begin bol } -} - - -{ + \n { beginPush bol } @varname { tokenWith TokenVarName } @digits { tokenWith (TokenLitInt . readInt) } "=" { constToken TokenEquals } - \n { begin bol } +} + +-- control characters +<0> +{ + "{" { explicitLBrace } + "}" { explicitRBrace } + ";" { constToken TokenSemicolon } } -- consume all whitespace leaving us at the beginning of the next non-empty @@ -76,19 +77,47 @@ rlp :- () { doBol } } + +{ + \n ; + "{" { explicitLBrace `thenDo` popLexState } + () { doLayout } +} + { -begin :: Int -> LexerAction a -begin = undefined +-- | @andBegin@, with the subtle difference that the start code is set +-- /after/ the action +thenBegin :: LexerAction a -> Int -> LexerAction a +thenBegin act c inp l = do + a <- act inp l + psLexState . _head .= c + pure a + +andBegin :: LexerAction a -> Int -> LexerAction a +andBegin act c inp l = do + psLexState . _head .= c + act inp l + +beginPush :: Int -> LexerAction (Located RlpToken) +beginPush n _ _ = pushLexState n >> lexToken alexGetByte :: AlexInput -> Maybe (Word8, AlexInput) alexGetByte inp = case inp ^. aiBytes of [] -> do (c,t) <- T.uncons (inp ^. aiSource) let (b:bs) = encodeChar c + -- tail the source inp' = inp & aiSource .~ t + -- record the excess bytes for successive calls & aiBytes .~ bs + -- report the previous char & aiPrevChar .~ c + -- update the position + & aiPos %~ \ (ln,col) -> + if (inp ^. aiPrevChar) == '\n' + then (ln+1,1) + else (ln,col+1) pure (b, inp') _ -> Just (head bs, inp') @@ -104,17 +133,13 @@ getLexState = use (psLexState . singular _head) alexInputPrevChar :: AlexInput -> Char alexInputPrevChar = view aiPrevChar +pushLexState :: Int -> P () +pushLexState n = psLexState %= (n:) + readInt :: Text -> Int readInt = T.foldr f 0 where f c n = digitToInt c + 10*n --- | @andBegin@, with the subtle difference that the start code is set --- /after/ the action -thenBegin :: LexerAction a -> Int -> LexerAction a -thenBegin act c inp l = do - a <- act inp l - undefined - constToken :: RlpToken -> LexerAction (Located RlpToken) constToken t inp l = do pos <- use (psInput . aiPos) @@ -144,27 +169,37 @@ execP' p s = execP p st where initParseState :: Text -> ParseState initParseState s = ParseState { _psLayoutStack = [] - , _psLexState = [one,bol,0] + -- IMPORTANT: the initial state is `bol` to begin the top-level layout, + -- which then returns to state 0 which continues the normal lexing process. + , _psLexState = [layout_top,0] , _psInput = initAlexInput s } initAlexInput :: Text -> AlexInput -initAlexInput s = AlexInput +initAlexInput (unconsBytes -> (b,s)) = AlexInput { _aiPrevChar = '\0' , _aiSource = s - , _aiBytes = [] + , _aiBytes = b , _aiPos = (1,1) } +unconsBytes :: Text -> ([Word8], Text) +unconsBytes s = (encodeChar c, t) where + (c,t) = fromJust $ T.uncons s + lexToken :: P (Located RlpToken) lexToken = do inp <- getInput c <- getLexState + st <- use id + traceM $ "st: " <> show st case alexScan inp c of AlexEOF -> pure $ Located (inp ^. aiPos, 0) TokenEOF + AlexSkip inp' l -> do + psInput .= inp' + lexToken AlexToken inp' l act -> do psInput .= inp' - traceM $ "l: " <> show l traceShowM inp' act inp l @@ -178,8 +213,8 @@ lexStream = do Located _ TokenEOF -> pure [TokenEOF] Located _ t -> (t:) <$> lexStream -lexTest :: Text -> Either String [RlpToken] -lexTest = undefined +lexTest :: Text -> Maybe [RlpToken] +lexTest s = execP' lexStream s indentLevel :: P Int indentLevel = do @@ -195,11 +230,20 @@ popLayout :: P Layout popLayout = do traceM "pop layout" ctx <- preuse (psLayoutStack . _head) - modifying psLayoutStack (drop 1) + psLayoutStack %= (drop 1) case ctx of Just l -> pure l Nothing -> error "uhh" +pushLayout :: Layout -> P () +pushLayout l = do + traceM "push layout" + psLayoutStack %= (l:) + +popLexState :: P () +popLexState = do + psLexState %= tail + insertSemicolon, insertLBrace, insertRBrace :: P (Located RlpToken) insertSemicolon = traceM "inserting semi" >> insertToken TokenSemicolonV insertLBrace = traceM "inserting lbrace" >> insertToken TokenLBraceV @@ -209,23 +253,47 @@ cmpLayout :: P Ordering cmpLayout = do i <- indentLevel ctx <- preuse (psLayoutStack . _head) - case ctx ^. non (Implicit 1) of - Implicit n -> pure (i `compare` n) - Explicit -> pure GT + case ctx of + Just (Implicit n) -> pure (i `compare` n) + _ -> pure GT doBol :: LexerAction (Located RlpToken) doBol inp l = do off <- cmpLayout + i <- indentLevel + traceM $ "i: " <> show i + -- important that we pop the lex state lest we find our lexer diverging + popLexState case off of -- the line is aligned with the previous. it therefore belongs to the -- same list EQ -> insertSemicolon -- the line is indented further than the previous, so we assume it is a -- line continuation. ignore it and move on! - GT -> undefined -- alexSetStartCode one >> lexToken + GT -> lexToken -- the line is indented less than the previous, pop the layout stack and -- insert a closing brace. LT -> popLayout >> insertRBrace +thenDo :: LexerAction a -> P b -> LexerAction a +thenDo act p inp l = act inp l <* p + +explicitLBrace :: LexerAction (Located RlpToken) +explicitLBrace inp l = do + pushLayout Explicit + constToken TokenLBrace inp l + +explicitRBrace :: LexerAction (Located RlpToken) +explicitRBrace inp l = do + popLayout + constToken TokenRBrace inp l + +doLayout :: LexerAction (Located RlpToken) +doLayout _ _ = do + i <- indentLevel + pushLayout (Implicit i) + popLexState + insertLBrace + } diff --git a/src/Rlp/Parse/Types.hs b/src/Rlp/Parse/Types.hs index d25c27b..90b5524 100644 --- a/src/Rlp/Parse/Types.hs +++ b/src/Rlp/Parse/Types.hs @@ -84,6 +84,7 @@ data ParseState = ParseState , _psLexState :: [Int] , _psInput :: AlexInput } + deriving Show data Layout = Explicit | Implicit Int