works

2024-01-15 13:31:15 -07:00
parent c0236dc079
commit 1c035d092a
5 changed files with 103 additions and 42 deletions
--- a/4
+++ b/4
@@ -1,7 +1,7 @@
 HAPPY = happy
-HAPPY_OPTS =
+HAPPY_OPTS = -a -g -c
 ALEX = alex
-ALEX_OPTS = -d
+ALEX_OPTS = -g

 SRC = src
 CABAL_BUILD = dist-newstyle/build/x86_64-osx/ghc-9.6.2/rlp-0.1.0.0/build
--- a/src/Rlp/Lex.x
+++ b/src/Rlp/Lex.x
@@ -1,5 +1,5 @@
 {
-{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE ViewPatterns, LambdaCase #-}
 {-# LANGUAGE GeneralisedNewtypeDeriving #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Rlp.Lex
@@ -7,7 +7,8 @@ module Rlp.Lex
    , RlpToken(..)
    , Located(..)
    , lexToken
-    , lexerCont
+    , lexDebug
+    , lexCont
    )
    where
 import Codec.Binary.UTF8.String (encodeChar)
@@ -30,33 +31,60 @@ import Rlp.Parse.Types

 $whitechar      = [ \t\n\r\f\v]

+$nl             = [\n\r]
+$white_no_nl    = $white # $nl
+
 $lower          = [a-z \_]
 $upper          = [A-Z]
 $alpha          = [$lower $upper]
 $digit          = 0-9

-$nl             = [\n\r]
-$white_no_nl    = $white # $nl
-
+$special        = [\(\)\,\;\[\]\{\}]
 $namechar       = [$alpha $digit \' \#]
+$asciisym       = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:]
+
+@decimal        = $digit+

@varname        = $lower $namechar*
+@conname        = $upper $namechar*
+@consym         = \: $asciisym*
+@varsym         = $asciisym+

-@digits         = $digit+
+@reservedname = 
+    case|data|do|import|in|let|letrec|module|of|where
+
+@reservedop =
+    "=" | \\ | "->" | "|"

 rlp :-
    
-    -- skip whitespace
+-- everywhere: skip whitespace
 $white_no_nl+       ;
+
+-- everywhere: skip comments
 -- TODO: don't treat operators like (-->) as comments
 "--".*              ;

+-- we are indentation-sensitive! do not skip NLs!. upon encountering a newline,
+-- we check indentation and potentially insert extra tokens. search this file
+-- for the definition of `doBol`
+<0> \n                  { beginPush bol }
+
+-- scan various identifiers and reserved words. order is important here!
 <0>
 {
-    \n                  { beginPush bol }
+    @reservedname       { tokenWith lexReservedName }
+    @conname            { tokenWith TokenConName }
    @varname            { tokenWith TokenVarName }
-    @digits             { tokenWith (TokenLitInt . readInt) }
-    "="                 { constToken TokenEquals }
+    @reservedop         { tokenWith lexReservedOp }
+    @consym             { tokenWith TokenConSym }
+    @varsym             { tokenWith TokenVarSym }
+}
+
+-- literals -- currently this is just unsigned integer literals
+<0>
+{
+    @decimal            { tokenWith (TokenLitInt . readInt) }
 }

 -- control characters
@@ -86,6 +114,20 @@ rlp :-

 {

+lexReservedName :: Text -> RlpToken
+lexReservedName = \case
+    "data"      -> TokenData
+    "case"      -> TokenCase
+    "of"        -> TokenOf
+    "let"       -> TokenLet
+    "in"        -> TokenIn
+
+lexReservedOp :: Text -> RlpToken
+lexReservedOp = \case
+    "="     -> TokenEquals
+    "::"    -> TokenHasType
+    "|"     -> TokenPipe
+
 -- | @andBegin@, with the subtle difference that the start code is set
 --   /after/ the action
 thenBegin :: LexerAction a -> Int -> LexerAction a
@@ -173,6 +215,7 @@ initParseState s = ParseState
    -- which then returns to state 0 which continues the normal lexing process.
    , _psLexState = [layout_top,0]
    , _psInput = initAlexInput s
+    , _psOpTable = mempty
    }

 initAlexInput :: Text -> AlexInput
@@ -188,7 +231,7 @@ lexToken = do
    inp <- getInput
    c <- getLexState
    st <- use id
-    traceM $ "st: " <> show st
+    -- traceM $ "st: " <> show st
    case alexScan inp c of
        AlexEOF -> pure $ Located (inp ^. aiPos, 0) TokenEOF
        AlexSkip inp' l -> do
@@ -196,11 +239,10 @@ lexToken = do
            lexToken
        AlexToken inp' l act -> do
            psInput .= inp'
-            traceShowM inp'
            act inp l

-lexerCont :: (Located RlpToken -> P a) -> P a
-lexerCont = undefined
+lexCont :: (Located RlpToken -> P a) -> P a
+lexCont = (lexToken >>=)

 lexStream :: P [RlpToken]
 lexStream = do
@@ -209,6 +251,12 @@ lexStream = do
        Located _ TokenEOF -> pure [TokenEOF]
        Located _ t        -> (t:) <$> lexStream

+lexDebug :: (Located RlpToken -> P a) -> P a
+lexDebug k = do
+    t <- lexToken
+    traceM $ "token: " <> show t
+    k t
+
 lexTest :: Text -> Maybe [RlpToken]
 lexTest s = execP' lexStream s

@@ -224,7 +272,7 @@ insertToken t = do

 popLayout :: P Layout
 popLayout = do
-    traceM "pop layout"
+    -- traceM "pop layout"
    ctx <- preuse (psLayoutStack . _head)
    psLayoutStack %= (drop 1)
    case ctx of
@@ -233,7 +281,7 @@ popLayout = do

 pushLayout :: Layout -> P ()
 pushLayout l = do
-    traceM "push layout"
+    -- traceM "push layout"
    psLayoutStack %= (l:)

 popLexState :: P ()
@@ -241,9 +289,9 @@ popLexState = do
    psLexState %= tail

 insertSemicolon, insertLBrace, insertRBrace :: P (Located RlpToken)
-insertSemicolon = traceM "inserting semi"   >> insertToken TokenSemicolonV
-insertLBrace    = traceM "inserting lbrace" >> insertToken TokenLBraceV
-insertRBrace    = traceM "inserting rbrace" >> insertToken TokenRBraceV
+insertSemicolon = {- traceM "inserting semi"   >> -} insertToken TokenSemicolonV
+insertLBrace    = {- traceM "inserting lbrace" >> -} insertToken TokenLBraceV
+insertRBrace    = {- traceM "inserting rbrace" >> -} insertToken TokenRBraceV

 cmpLayout :: P Ordering
 cmpLayout = do
--- a/src/Rlp/Parse.y
+++ b/src/Rlp/Parse.y
@@ -1,62 +1,72 @@
 {
 module Rlp.Parse
    ( parseRlpProgram
+    , parseTest
    )
    where
 import Rlp.Lex
 import Rlp.Syntax
 import Rlp.Parse.Types
+import Data.Fix
+import Data.Functor.Const
 }

 %name parseRlpProgram StandaloneProgram
+%name parseTest VL

 %monad { P }
-%lexer { lexerCont } { Located _ TokenEOF }
+%lexer { lexDebug } { Located _ TokenEOF }
 %error { parseError }
 %tokentype { Located RlpToken }

 %token
    varname         { Located _ (TokenVarName $$) }
+    conname         { Located _ (TokenConName $$) }
+    data            { Located _ TokenData }
    litint          { Located _ (TokenLitInt $$) }
    '='             { Located _ TokenEquals }
+    '|'             { Located _ TokenPipe }
    ';'             { Located _ TokenSemicolon }
-    ';?'            { Located _ TokenSemicolonV }
+    vsemi           { Located _ TokenSemicolonV }
    '{'             { Located _ TokenLBrace }
    '}'             { Located _ TokenRBrace }
-    '{?'            { Located _ TokenLBraceV }
-    '?}'            { Located _ TokenRBraceV }
-    eof             { Located _ TokenEOF }
+    vlbrace         { Located _ TokenLBraceV }
+    vrbrace         { Located _ TokenRBraceV }

 %%

 StandaloneProgram   :: { [PartialDecl'] }
-StandaloneProgram   : VL Decls VR eof       { $2 }
+StandaloneProgram   : '{' Decls '}'         { $2 }
+                    | VL  Decls VR          { $2 }

 VL  :: { () }
-VL  : '{?'          { () }
+VL  : vlbrace       { () }

 VR  :: { () }
-VR  : '?}'          { () }
+VR  : vrbrace       { () }
    | error         { () }

 Decls               :: { [PartialDecl'] }
-Decls               : Decl Semi Decls       { $1 : $3 }
-                    | Decl Semi             { [$1] }
+Decls               : Decl VS Decls         { $1 : $3 }
+                    | Decl VS               { [$1] }
                    | Decl                  { [$1] }

 Semi                :: { Located RlpToken }
 Semi                : ';'                   { $1 }
-                    | ';?'                  { $1 }
+
+VS                  :: { Located RlpToken }
+VS                  : ';'                   { $1 }
+                    | vsemi                 { $1 }

 Decl        :: { PartialDecl' }
-Decl        : FunDecl                   { undefined }
+Decl        : FunDecl                   { $1 }

 FunDecl     :: { PartialDecl' }
-FunDecl     : varname '=' Expr          { undefined }
+FunDecl     : Var '=' Expr              { FunD $1 [] (Const $3) Nothing }

-Expr        :: { RlpExpr' }
-Expr        : Literal                   { LitE $1 }
-            | Var                       { VarE $1 }
+Expr        :: { PartialExpr' }
+Expr        : Literal                   { Fix . E $ LitEF $1 }
+            | Var                       { Fix . E $ VarEF $1 }

 Literal     :: { Lit' }
 Literal     : litint                    { IntL $1 }
--- a/src/Rlp/Parse/Types.hs
+++ b/src/Rlp/Parse/Types.hs
@@ -42,8 +42,10 @@ data RlpToken
    | TokenConName Name
    | TokenVarSym Name
    | TokenConSym Name
-    -- keywords
+    -- reserved words
    | TokenData
+    | TokenCase
+    | TokenOf
    | TokenLet
    | TokenIn
    -- reserved ops
@@ -87,6 +89,7 @@ data ParseState = ParseState
    { _psLayoutStack        :: [Layout]
    , _psLexState           :: [Int]
    , _psInput              :: AlexInput
+    , _psOpTable            :: OpTable
    }
    deriving Show

--- a/src/Rlp/Syntax.hs
+++ b/src/Rlp/Syntax.hs
@@ -65,7 +65,7 @@ type RlpProgram' = RlpProgram Name
 -- accounted for, we may complete the parsing task and get a proper @[Decl
 -- RlpExpr Name]@.

-data Decl e b = FunD    VarId [Pat b] (e b) (Where b)
+data Decl e b = FunD    VarId [Pat b] (e b) (Maybe (Where b))
              | TySigD  [VarId] Type
              | DataD   ConId [Name] [ConAlt]
              | InfixD  Assoc Int Name