works

2024-01-15 13:31:15 -07:00
parent c0236dc079
commit 1c035d092a
5 changed files with 103 additions and 42 deletions
--- a/4
+++ b/4
@@ -1,7 +1,7 @@
 HAPPY = happy
-HAPPY_OPTS =
+HAPPY_OPTS = -a -g -c
 ALEX = alex
-ALEX_OPTS = -d
+ALEX_OPTS = -g
 SRC = src
 CABAL_BUILD = dist-newstyle/build/x86_64-osx/ghc-9.6.2/rlp-0.1.0.0/build
--- a/src/Rlp/Lex.x
+++ b/src/Rlp/Lex.x
@@ -1,5 +1,5 @@
 {
-{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE ViewPatterns, LambdaCase #-}
 {-# LANGUAGE GeneralisedNewtypeDeriving #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Rlp.Lex
@@ -7,7 +7,8 @@ module Rlp.Lex
    , RlpToken(..)
    , Located(..)
    , lexToken
-    , lexerCont
+    , lexDebug
    , lexCont
    )
    where
 import Codec.Binary.UTF8.String (encodeChar)
@@ -30,33 +31,60 @@ import Rlp.Parse.Types
 $whitechar      = [ \t\n\r\f\v]
 $nl             = [\n\r]
 $white_no_nl    = $white # $nl
 $lower          = [a-z \_]
 $upper          = [A-Z]
 $alpha          = [$lower $upper]
 $digit          = 0-9
-$nl             = [\n\r]
+$special        = [\(\)\,\;\[\]\{\}]
 $white_no_nl    = $white # $nl
 $namechar       = [$alpha $digit \' \#]
 $asciisym       = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:]
@decimal        = $digit+
@varname        = $lower $namechar*
@conname        = $upper $namechar*
@consym         = \: $asciisym*
@varsym         = $asciisym+
-@digits         = $digit+
+@reservedname = 
    case|data|do|import|in|let|letrec|module|of|where
@reservedop =
    "=" | \\ | "->" | "|"
 rlp :-
-    -- skip whitespace
+-- everywhere: skip whitespace
 $white_no_nl+       ;
 -- everywhere: skip comments
 -- TODO: don't treat operators like (-->) as comments
 "--".*              ;
 -- we are indentation-sensitive! do not skip NLs!. upon encountering a newline,
 -- we check indentation and potentially insert extra tokens. search this file
 -- for the definition of `doBol`
 <0> \n                  { beginPush bol }
 -- scan various identifiers and reserved words. order is important here!
 <0>
 {
-    \n                  { beginPush bol }
+    @reservedname       { tokenWith lexReservedName }
    @conname            { tokenWith TokenConName }
    @varname            { tokenWith TokenVarName }
-    @digits             { tokenWith (TokenLitInt . readInt) }
+    @reservedop         { tokenWith lexReservedOp }
-    "="                 { constToken TokenEquals }
+    @consym             { tokenWith TokenConSym }
    @varsym             { tokenWith TokenVarSym }
 }
 -- literals -- currently this is just unsigned integer literals
 <0>
 {
    @decimal            { tokenWith (TokenLitInt . readInt) }
 }
 -- control characters
@@ -86,6 +114,20 @@ rlp :-
 {
 lexReservedName :: Text -> RlpToken
 lexReservedName = \case
    "data"      -> TokenData
    "case"      -> TokenCase
    "of"        -> TokenOf
    "let"       -> TokenLet
    "in"        -> TokenIn
 lexReservedOp :: Text -> RlpToken
 lexReservedOp = \case
    "="     -> TokenEquals
    "::"    -> TokenHasType
    "|"     -> TokenPipe
 -- | @andBegin@, with the subtle difference that the start code is set
 --   /after/ the action
 thenBegin :: LexerAction a -> Int -> LexerAction a
@@ -173,6 +215,7 @@ initParseState s = ParseState
    -- which then returns to state 0 which continues the normal lexing process.
    , _psLexState = [layout_top,0]
    , _psInput = initAlexInput s
    , _psOpTable = mempty
    }
 initAlexInput :: Text -> AlexInput
@@ -188,7 +231,7 @@ lexToken = do
    inp <- getInput
    c <- getLexState
    st <- use id
-    traceM $ "st: " <> show st
+    -- traceM $ "st: " <> show st
    case alexScan inp c of
        AlexEOF -> pure $ Located (inp ^. aiPos, 0) TokenEOF
        AlexSkip inp' l -> do
@@ -196,11 +239,10 @@ lexToken = do
            lexToken
        AlexToken inp' l act -> do
            psInput .= inp'
            traceShowM inp'
            act inp l
-lexerCont :: (Located RlpToken -> P a) -> P a
+lexCont :: (Located RlpToken -> P a) -> P a
-lexerCont = undefined
+lexCont = (lexToken >>=)
 lexStream :: P [RlpToken]
 lexStream = do
@@ -209,6 +251,12 @@ lexStream = do
        Located _ TokenEOF -> pure [TokenEOF]
        Located _ t        -> (t:) <$> lexStream
 lexDebug :: (Located RlpToken -> P a) -> P a
 lexDebug k = do
    t <- lexToken
    traceM $ "token: " <> show t
    k t
 lexTest :: Text -> Maybe [RlpToken]
 lexTest s = execP' lexStream s
@@ -224,7 +272,7 @@ insertToken t = do
 popLayout :: P Layout
 popLayout = do
-    traceM "pop layout"
+    -- traceM "pop layout"
    ctx <- preuse (psLayoutStack . _head)
    psLayoutStack %= (drop 1)
    case ctx of
@@ -233,7 +281,7 @@ popLayout = do
 pushLayout :: Layout -> P ()
 pushLayout l = do
-    traceM "push layout"
+    -- traceM "push layout"
    psLayoutStack %= (l:)
 popLexState :: P ()
@@ -241,9 +289,9 @@ popLexState = do
    psLexState %= tail
 insertSemicolon, insertLBrace, insertRBrace :: P (Located RlpToken)
-insertSemicolon = traceM "inserting semi"   >> insertToken TokenSemicolonV
+insertSemicolon = {- traceM "inserting semi"   >> -} insertToken TokenSemicolonV
-insertLBrace    = traceM "inserting lbrace" >> insertToken TokenLBraceV
+insertLBrace    = {- traceM "inserting lbrace" >> -} insertToken TokenLBraceV
-insertRBrace    = traceM "inserting rbrace" >> insertToken TokenRBraceV
+insertRBrace    = {- traceM "inserting rbrace" >> -} insertToken TokenRBraceV
 cmpLayout :: P Ordering
 cmpLayout = do
--- a/src/Rlp/Parse.y
+++ b/src/Rlp/Parse.y
@@ -1,62 +1,72 @@
 {
 module Rlp.Parse
    ( parseRlpProgram
    , parseTest
    )
    where
 import Rlp.Lex
 import Rlp.Syntax
 import Rlp.Parse.Types
 import Data.Fix
 import Data.Functor.Const
 }
 %name parseRlpProgram StandaloneProgram
 %name parseTest VL
 %monad { P }
-%lexer { lexerCont } { Located _ TokenEOF }
+%lexer { lexDebug } { Located _ TokenEOF }
 %error { parseError }
 %tokentype { Located RlpToken }
 %token
    varname         { Located _ (TokenVarName $$) }
    conname         { Located _ (TokenConName $$) }
    data            { Located _ TokenData }
    litint          { Located _ (TokenLitInt $$) }
    '='             { Located _ TokenEquals }
    '|'             { Located _ TokenPipe }
    ';'             { Located _ TokenSemicolon }
-    ';?'            { Located _ TokenSemicolonV }
+    vsemi           { Located _ TokenSemicolonV }
    '{'             { Located _ TokenLBrace }
    '}'             { Located _ TokenRBrace }
-    '{?'            { Located _ TokenLBraceV }
+    vlbrace         { Located _ TokenLBraceV }
-    '?}'            { Located _ TokenRBraceV }
+    vrbrace         { Located _ TokenRBraceV }
    eof             { Located _ TokenEOF }
 %%
 StandaloneProgram   :: { [PartialDecl'] }
-StandaloneProgram   : VL Decls VR eof       { $2 }
+StandaloneProgram   : '{' Decls '}'         { $2 }
                    | VL  Decls VR          { $2 }
 VL  :: { () }
-VL  : '{?'          { () }
+VL  : vlbrace       { () }
 VR  :: { () }
-VR  : '?}'          { () }
+VR  : vrbrace       { () }
    | error         { () }
 Decls               :: { [PartialDecl'] }
-Decls               : Decl Semi Decls       { $1 : $3 }
+Decls               : Decl VS Decls         { $1 : $3 }
-                    | Decl Semi             { [$1] }
+                    | Decl VS               { [$1] }
                    | Decl                  { [$1] }
 Semi                :: { Located RlpToken }
 Semi                : ';'                   { $1 }
-                    | ';?'                  { $1 }
+
 VS                  :: { Located RlpToken }
 VS                  : ';'                   { $1 }
                    | vsemi                 { $1 }
 Decl        :: { PartialDecl' }
-Decl        : FunDecl                   { undefined }
+Decl        : FunDecl                   { $1 }
 FunDecl     :: { PartialDecl' }
-FunDecl     : varname '=' Expr          { undefined }
+FunDecl     : Var '=' Expr              { FunD $1 [] (Const $3) Nothing }
-Expr        :: { RlpExpr' }
+Expr        :: { PartialExpr' }
-Expr        : Literal                   { LitE $1 }
+Expr        : Literal                   { Fix . E $ LitEF $1 }
-            | Var                       { VarE $1 }
+            | Var                       { Fix . E $ VarEF $1 }
 Literal     :: { Lit' }
 Literal     : litint                    { IntL $1 }
--- a/src/Rlp/Parse/Types.hs
+++ b/src/Rlp/Parse/Types.hs
@@ -42,8 +42,10 @@ data RlpToken
    | TokenConName Name
    | TokenVarSym Name
    | TokenConSym Name
-    -- keywords
+    -- reserved words
    | TokenData
    | TokenCase
    | TokenOf
    | TokenLet
    | TokenIn
    -- reserved ops
@@ -87,6 +89,7 @@ data ParseState = ParseState
    { _psLayoutStack        :: [Layout]
    , _psLexState           :: [Int]
    , _psInput              :: AlexInput
    , _psOpTable            :: OpTable
    }
    deriving Show
--- a/src/Rlp/Syntax.hs
+++ b/src/Rlp/Syntax.hs
@@ -65,7 +65,7 @@ type RlpProgram' = RlpProgram Name
 -- accounted for, we may complete the parsing task and get a proper @[Decl
 -- RlpExpr Name]@.
-data Decl e b = FunD    VarId [Pat b] (e b) (Where b)
+data Decl e b = FunD    VarId [Pat b] (e b) (Maybe (Where b))
              | TySigD  [VarId] Type
              | DataD   ConId [Name] [ConAlt]
              | InfixD  Assoc Int Name