diff --git a/Makefile_happysrcs b/Makefile_happysrcs index 35c2ca8..e0dc43e 100644 --- a/Makefile_happysrcs +++ b/Makefile_happysrcs @@ -1,7 +1,7 @@ HAPPY = happy -HAPPY_OPTS = +HAPPY_OPTS = -a -g -c ALEX = alex -ALEX_OPTS = -d +ALEX_OPTS = -g SRC = src CABAL_BUILD = dist-newstyle/build/x86_64-osx/ghc-9.6.2/rlp-0.1.0.0/build diff --git a/src/Rlp/Lex.x b/src/Rlp/Lex.x index 18592f8..55b0191 100644 --- a/src/Rlp/Lex.x +++ b/src/Rlp/Lex.x @@ -1,5 +1,5 @@ { -{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE ViewPatterns, LambdaCase #-} {-# LANGUAGE GeneralisedNewtypeDeriving #-} {-# LANGUAGE OverloadedStrings #-} module Rlp.Lex @@ -7,7 +7,8 @@ module Rlp.Lex , RlpToken(..) , Located(..) , lexToken - , lexerCont + , lexDebug + , lexCont ) where import Codec.Binary.UTF8.String (encodeChar) @@ -30,33 +31,60 @@ import Rlp.Parse.Types $whitechar = [ \t\n\r\f\v] +$nl = [\n\r] +$white_no_nl = $white # $nl + $lower = [a-z \_] $upper = [A-Z] $alpha = [$lower $upper] $digit = 0-9 -$nl = [\n\r] -$white_no_nl = $white # $nl - +$special = [\(\)\,\;\[\]\{\}] $namechar = [$alpha $digit \' \#] +$asciisym = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:] + +@decimal = $digit+ @varname = $lower $namechar* +@conname = $upper $namechar* +@consym = \: $asciisym* +@varsym = $asciisym+ -@digits = $digit+ +@reservedname = + case|data|do|import|in|let|letrec|module|of|where + +@reservedop = + "=" | \\ | "->" | "|" rlp :- - -- skip whitespace - $white_no_nl+ ; - -- TODO: don't treat operators like (-->) as comments - "--".* ; +-- everywhere: skip whitespace +$white_no_nl+ ; +-- everywhere: skip comments +-- TODO: don't treat operators like (-->) as comments +"--".* ; + +-- we are indentation-sensitive! do not skip NLs!. upon encountering a newline, +-- we check indentation and potentially insert extra tokens. search this file +-- for the definition of `doBol` +<0> \n { beginPush bol } + +-- scan various identifiers and reserved words. order is important here! <0> { - \n { beginPush bol } + @reservedname { tokenWith lexReservedName } + @conname { tokenWith TokenConName } @varname { tokenWith TokenVarName } - @digits { tokenWith (TokenLitInt . readInt) } - "=" { constToken TokenEquals } + @reservedop { tokenWith lexReservedOp } + @consym { tokenWith TokenConSym } + @varsym { tokenWith TokenVarSym } +} + +-- literals -- currently this is just unsigned integer literals +<0> +{ + @decimal { tokenWith (TokenLitInt . readInt) } } -- control characters @@ -86,6 +114,20 @@ rlp :- { +lexReservedName :: Text -> RlpToken +lexReservedName = \case + "data" -> TokenData + "case" -> TokenCase + "of" -> TokenOf + "let" -> TokenLet + "in" -> TokenIn + +lexReservedOp :: Text -> RlpToken +lexReservedOp = \case + "=" -> TokenEquals + "::" -> TokenHasType + "|" -> TokenPipe + -- | @andBegin@, with the subtle difference that the start code is set -- /after/ the action thenBegin :: LexerAction a -> Int -> LexerAction a @@ -173,6 +215,7 @@ initParseState s = ParseState -- which then returns to state 0 which continues the normal lexing process. , _psLexState = [layout_top,0] , _psInput = initAlexInput s + , _psOpTable = mempty } initAlexInput :: Text -> AlexInput @@ -188,7 +231,7 @@ lexToken = do inp <- getInput c <- getLexState st <- use id - traceM $ "st: " <> show st + -- traceM $ "st: " <> show st case alexScan inp c of AlexEOF -> pure $ Located (inp ^. aiPos, 0) TokenEOF AlexSkip inp' l -> do @@ -196,11 +239,10 @@ lexToken = do lexToken AlexToken inp' l act -> do psInput .= inp' - traceShowM inp' act inp l -lexerCont :: (Located RlpToken -> P a) -> P a -lexerCont = undefined +lexCont :: (Located RlpToken -> P a) -> P a +lexCont = (lexToken >>=) lexStream :: P [RlpToken] lexStream = do @@ -209,6 +251,12 @@ lexStream = do Located _ TokenEOF -> pure [TokenEOF] Located _ t -> (t:) <$> lexStream +lexDebug :: (Located RlpToken -> P a) -> P a +lexDebug k = do + t <- lexToken + traceM $ "token: " <> show t + k t + lexTest :: Text -> Maybe [RlpToken] lexTest s = execP' lexStream s @@ -224,7 +272,7 @@ insertToken t = do popLayout :: P Layout popLayout = do - traceM "pop layout" + -- traceM "pop layout" ctx <- preuse (psLayoutStack . _head) psLayoutStack %= (drop 1) case ctx of @@ -233,7 +281,7 @@ popLayout = do pushLayout :: Layout -> P () pushLayout l = do - traceM "push layout" + -- traceM "push layout" psLayoutStack %= (l:) popLexState :: P () @@ -241,9 +289,9 @@ popLexState = do psLexState %= tail insertSemicolon, insertLBrace, insertRBrace :: P (Located RlpToken) -insertSemicolon = traceM "inserting semi" >> insertToken TokenSemicolonV -insertLBrace = traceM "inserting lbrace" >> insertToken TokenLBraceV -insertRBrace = traceM "inserting rbrace" >> insertToken TokenRBraceV +insertSemicolon = {- traceM "inserting semi" >> -} insertToken TokenSemicolonV +insertLBrace = {- traceM "inserting lbrace" >> -} insertToken TokenLBraceV +insertRBrace = {- traceM "inserting rbrace" >> -} insertToken TokenRBraceV cmpLayout :: P Ordering cmpLayout = do diff --git a/src/Rlp/Parse.y b/src/Rlp/Parse.y index 3205988..8152f66 100644 --- a/src/Rlp/Parse.y +++ b/src/Rlp/Parse.y @@ -1,62 +1,72 @@ { module Rlp.Parse ( parseRlpProgram + , parseTest ) where import Rlp.Lex import Rlp.Syntax import Rlp.Parse.Types +import Data.Fix +import Data.Functor.Const } %name parseRlpProgram StandaloneProgram +%name parseTest VL %monad { P } -%lexer { lexerCont } { Located _ TokenEOF } +%lexer { lexDebug } { Located _ TokenEOF } %error { parseError } %tokentype { Located RlpToken } %token varname { Located _ (TokenVarName $$) } + conname { Located _ (TokenConName $$) } + data { Located _ TokenData } litint { Located _ (TokenLitInt $$) } '=' { Located _ TokenEquals } + '|' { Located _ TokenPipe } ';' { Located _ TokenSemicolon } - ';?' { Located _ TokenSemicolonV } + vsemi { Located _ TokenSemicolonV } '{' { Located _ TokenLBrace } '}' { Located _ TokenRBrace } - '{?' { Located _ TokenLBraceV } - '?}' { Located _ TokenRBraceV } - eof { Located _ TokenEOF } + vlbrace { Located _ TokenLBraceV } + vrbrace { Located _ TokenRBraceV } %% StandaloneProgram :: { [PartialDecl'] } -StandaloneProgram : VL Decls VR eof { $2 } +StandaloneProgram : '{' Decls '}' { $2 } + | VL Decls VR { $2 } VL :: { () } -VL : '{?' { () } +VL : vlbrace { () } VR :: { () } -VR : '?}' { () } +VR : vrbrace { () } | error { () } Decls :: { [PartialDecl'] } -Decls : Decl Semi Decls { $1 : $3 } - | Decl Semi { [$1] } +Decls : Decl VS Decls { $1 : $3 } + | Decl VS { [$1] } | Decl { [$1] } Semi :: { Located RlpToken } Semi : ';' { $1 } - | ';?' { $1 } + +VS :: { Located RlpToken } +VS : ';' { $1 } + | vsemi { $1 } Decl :: { PartialDecl' } -Decl : FunDecl { undefined } +Decl : FunDecl { $1 } FunDecl :: { PartialDecl' } -FunDecl : varname '=' Expr { undefined } +FunDecl : Var '=' Expr { FunD $1 [] (Const $3) Nothing } -Expr :: { RlpExpr' } -Expr : Literal { LitE $1 } - | Var { VarE $1 } +Expr :: { PartialExpr' } +Expr : Literal { Fix . E $ LitEF $1 } + | Var { Fix . E $ VarEF $1 } Literal :: { Lit' } Literal : litint { IntL $1 } diff --git a/src/Rlp/Parse/Types.hs b/src/Rlp/Parse/Types.hs index 03f24f8..2ec6079 100644 --- a/src/Rlp/Parse/Types.hs +++ b/src/Rlp/Parse/Types.hs @@ -42,8 +42,10 @@ data RlpToken | TokenConName Name | TokenVarSym Name | TokenConSym Name - -- keywords + -- reserved words | TokenData + | TokenCase + | TokenOf | TokenLet | TokenIn -- reserved ops @@ -87,6 +89,7 @@ data ParseState = ParseState { _psLayoutStack :: [Layout] , _psLexState :: [Int] , _psInput :: AlexInput + , _psOpTable :: OpTable } deriving Show diff --git a/src/Rlp/Syntax.hs b/src/Rlp/Syntax.hs index bf35445..18de5a1 100644 --- a/src/Rlp/Syntax.hs +++ b/src/Rlp/Syntax.hs @@ -65,7 +65,7 @@ type RlpProgram' = RlpProgram Name -- accounted for, we may complete the parsing task and get a proper @[Decl -- RlpExpr Name]@. -data Decl e b = FunD VarId [Pat b] (e b) (Where b) +data Decl e b = FunD VarId [Pat b] (e b) (Maybe (Where b)) | TySigD [VarId] Type | DataD ConId [Name] [ConAlt] | InfixD Assoc Int Name