works

2024-01-15 13:31:15 -07:00
parent c0236dc079
commit 1c035d092a
5 changed files with 103 additions and 42 deletions
--- a/src/Rlp/Lex.x
+++ b/src/Rlp/Lex.x
@@ -1,5 +1,5 @@
 {
-{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE ViewPatterns, LambdaCase #-}
 {-# LANGUAGE GeneralisedNewtypeDeriving #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Rlp.Lex
@@ -7,7 +7,8 @@ module Rlp.Lex
    , RlpToken(..)
    , Located(..)
    , lexToken
-    , lexerCont
+    , lexDebug
+    , lexCont
    )
    where
 import Codec.Binary.UTF8.String (encodeChar)
@@ -30,33 +31,60 @@ import Rlp.Parse.Types

 $whitechar      = [ \t\n\r\f\v]

+$nl             = [\n\r]
+$white_no_nl    = $white # $nl
+
 $lower          = [a-z \_]
 $upper          = [A-Z]
 $alpha          = [$lower $upper]
 $digit          = 0-9

-$nl             = [\n\r]
-$white_no_nl    = $white # $nl
-
+$special        = [\(\)\,\;\[\]\{\}]
 $namechar       = [$alpha $digit \' \#]
+$asciisym       = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:]
+
+@decimal        = $digit+

@varname        = $lower $namechar*
+@conname        = $upper $namechar*
+@consym         = \: $asciisym*
+@varsym         = $asciisym+

-@digits         = $digit+
+@reservedname = 
+    case|data|do|import|in|let|letrec|module|of|where
+
+@reservedop =
+    "=" | \\ | "->" | "|"

 rlp :-
    
-    -- skip whitespace
-    $white_no_nl+       ;
-    -- TODO: don't treat operators like (-->) as comments
-    "--".*              ;
+-- everywhere: skip whitespace
+$white_no_nl+       ;

+-- everywhere: skip comments
+-- TODO: don't treat operators like (-->) as comments
+"--".*              ;
+
+-- we are indentation-sensitive! do not skip NLs!. upon encountering a newline,
+-- we check indentation and potentially insert extra tokens. search this file
+-- for the definition of `doBol`
+<0> \n                  { beginPush bol }
+
+-- scan various identifiers and reserved words. order is important here!
 <0>
 {
-    \n                  { beginPush bol }
+    @reservedname       { tokenWith lexReservedName }
+    @conname            { tokenWith TokenConName }
    @varname            { tokenWith TokenVarName }
-    @digits             { tokenWith (TokenLitInt . readInt) }
-    "="                 { constToken TokenEquals }
+    @reservedop         { tokenWith lexReservedOp }
+    @consym             { tokenWith TokenConSym }
+    @varsym             { tokenWith TokenVarSym }
+}
+
+-- literals -- currently this is just unsigned integer literals
+<0>
+{
+    @decimal            { tokenWith (TokenLitInt . readInt) }
 }

 -- control characters
@@ -86,6 +114,20 @@ rlp :-

 {

+lexReservedName :: Text -> RlpToken
+lexReservedName = \case
+    "data"      -> TokenData
+    "case"      -> TokenCase
+    "of"        -> TokenOf
+    "let"       -> TokenLet
+    "in"        -> TokenIn
+
+lexReservedOp :: Text -> RlpToken
+lexReservedOp = \case
+    "="     -> TokenEquals
+    "::"    -> TokenHasType
+    "|"     -> TokenPipe
+
 -- | @andBegin@, with the subtle difference that the start code is set
 --   /after/ the action
 thenBegin :: LexerAction a -> Int -> LexerAction a
@@ -173,6 +215,7 @@ initParseState s = ParseState
    -- which then returns to state 0 which continues the normal lexing process.
    , _psLexState = [layout_top,0]
    , _psInput = initAlexInput s
+    , _psOpTable = mempty
    }

 initAlexInput :: Text -> AlexInput
@@ -188,7 +231,7 @@ lexToken = do
    inp <- getInput
    c <- getLexState
    st <- use id
-    traceM $ "st: " <> show st
+    -- traceM $ "st: " <> show st
    case alexScan inp c of
        AlexEOF -> pure $ Located (inp ^. aiPos, 0) TokenEOF
        AlexSkip inp' l -> do
@@ -196,11 +239,10 @@ lexToken = do
            lexToken
        AlexToken inp' l act -> do
            psInput .= inp'
-            traceShowM inp'
            act inp l

-lexerCont :: (Located RlpToken -> P a) -> P a
-lexerCont = undefined
+lexCont :: (Located RlpToken -> P a) -> P a
+lexCont = (lexToken >>=)

 lexStream :: P [RlpToken]
 lexStream = do
@@ -209,6 +251,12 @@ lexStream = do
        Located _ TokenEOF -> pure [TokenEOF]
        Located _ t        -> (t:) <$> lexStream

+lexDebug :: (Located RlpToken -> P a) -> P a
+lexDebug k = do
+    t <- lexToken
+    traceM $ "token: " <> show t
+    k t
+
 lexTest :: Text -> Maybe [RlpToken]
 lexTest s = execP' lexStream s

@@ -224,7 +272,7 @@ insertToken t = do

 popLayout :: P Layout
 popLayout = do
-    traceM "pop layout"
+    -- traceM "pop layout"
    ctx <- preuse (psLayoutStack . _head)
    psLayoutStack %= (drop 1)
    case ctx of
@@ -233,7 +281,7 @@ popLayout = do

 pushLayout :: Layout -> P ()
 pushLayout l = do
-    traceM "push layout"
+    -- traceM "push layout"
    psLayoutStack %= (l:)

 popLexState :: P ()
@@ -241,9 +289,9 @@ popLexState = do
    psLexState %= tail

 insertSemicolon, insertLBrace, insertRBrace :: P (Located RlpToken)
-insertSemicolon = traceM "inserting semi"   >> insertToken TokenSemicolonV
-insertLBrace    = traceM "inserting lbrace" >> insertToken TokenLBraceV
-insertRBrace    = traceM "inserting rbrace" >> insertToken TokenRBraceV
+insertSemicolon = {- traceM "inserting semi"   >> -} insertToken TokenSemicolonV
+insertLBrace    = {- traceM "inserting lbrace" >> -} insertToken TokenLBraceV
+insertRBrace    = {- traceM "inserting rbrace" >> -} insertToken TokenRBraceV

 cmpLayout :: P Ordering
 cmpLayout = do