This commit is contained in:
crumbtoo
2024-01-15 13:31:15 -07:00
parent c0236dc079
commit 1c035d092a
5 changed files with 103 additions and 42 deletions

View File

@@ -1,7 +1,7 @@
HAPPY = happy HAPPY = happy
HAPPY_OPTS = HAPPY_OPTS = -a -g -c
ALEX = alex ALEX = alex
ALEX_OPTS = -d ALEX_OPTS = -g
SRC = src SRC = src
CABAL_BUILD = dist-newstyle/build/x86_64-osx/ghc-9.6.2/rlp-0.1.0.0/build CABAL_BUILD = dist-newstyle/build/x86_64-osx/ghc-9.6.2/rlp-0.1.0.0/build

View File

@@ -1,5 +1,5 @@
{ {
{-# LANGUAGE ViewPatterns #-} {-# LANGUAGE ViewPatterns, LambdaCase #-}
{-# LANGUAGE GeneralisedNewtypeDeriving #-} {-# LANGUAGE GeneralisedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Rlp.Lex module Rlp.Lex
@@ -7,7 +7,8 @@ module Rlp.Lex
, RlpToken(..) , RlpToken(..)
, Located(..) , Located(..)
, lexToken , lexToken
, lexerCont , lexDebug
, lexCont
) )
where where
import Codec.Binary.UTF8.String (encodeChar) import Codec.Binary.UTF8.String (encodeChar)
@@ -30,33 +31,60 @@ import Rlp.Parse.Types
$whitechar = [ \t\n\r\f\v] $whitechar = [ \t\n\r\f\v]
$nl = [\n\r]
$white_no_nl = $white # $nl
$lower = [a-z \_] $lower = [a-z \_]
$upper = [A-Z] $upper = [A-Z]
$alpha = [$lower $upper] $alpha = [$lower $upper]
$digit = 0-9 $digit = 0-9
$nl = [\n\r] $special = [\(\)\,\;\[\]\{\}]
$white_no_nl = $white # $nl
$namechar = [$alpha $digit \' \#] $namechar = [$alpha $digit \' \#]
$asciisym = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:]
@decimal = $digit+
@varname = $lower $namechar* @varname = $lower $namechar*
@conname = $upper $namechar*
@consym = \: $asciisym*
@varsym = $asciisym+
@digits = $digit+ @reservedname =
case|data|do|import|in|let|letrec|module|of|where
@reservedop =
"=" | \\ | "->" | "|"
rlp :- rlp :-
-- skip whitespace -- everywhere: skip whitespace
$white_no_nl+ ; $white_no_nl+ ;
-- everywhere: skip comments
-- TODO: don't treat operators like (-->) as comments -- TODO: don't treat operators like (-->) as comments
"--".* ; "--".* ;
-- we are indentation-sensitive! do not skip NLs!. upon encountering a newline,
-- we check indentation and potentially insert extra tokens. search this file
-- for the definition of `doBol`
<0> \n { beginPush bol }
-- scan various identifiers and reserved words. order is important here!
<0> <0>
{ {
\n { beginPush bol } @reservedname { tokenWith lexReservedName }
@conname { tokenWith TokenConName }
@varname { tokenWith TokenVarName } @varname { tokenWith TokenVarName }
@digits { tokenWith (TokenLitInt . readInt) } @reservedop { tokenWith lexReservedOp }
"=" { constToken TokenEquals } @consym { tokenWith TokenConSym }
@varsym { tokenWith TokenVarSym }
}
-- literals -- currently this is just unsigned integer literals
<0>
{
@decimal { tokenWith (TokenLitInt . readInt) }
} }
-- control characters -- control characters
@@ -86,6 +114,20 @@ rlp :-
{ {
lexReservedName :: Text -> RlpToken
lexReservedName = \case
"data" -> TokenData
"case" -> TokenCase
"of" -> TokenOf
"let" -> TokenLet
"in" -> TokenIn
lexReservedOp :: Text -> RlpToken
lexReservedOp = \case
"=" -> TokenEquals
"::" -> TokenHasType
"|" -> TokenPipe
-- | @andBegin@, with the subtle difference that the start code is set -- | @andBegin@, with the subtle difference that the start code is set
-- /after/ the action -- /after/ the action
thenBegin :: LexerAction a -> Int -> LexerAction a thenBegin :: LexerAction a -> Int -> LexerAction a
@@ -173,6 +215,7 @@ initParseState s = ParseState
-- which then returns to state 0 which continues the normal lexing process. -- which then returns to state 0 which continues the normal lexing process.
, _psLexState = [layout_top,0] , _psLexState = [layout_top,0]
, _psInput = initAlexInput s , _psInput = initAlexInput s
, _psOpTable = mempty
} }
initAlexInput :: Text -> AlexInput initAlexInput :: Text -> AlexInput
@@ -188,7 +231,7 @@ lexToken = do
inp <- getInput inp <- getInput
c <- getLexState c <- getLexState
st <- use id st <- use id
traceM $ "st: " <> show st -- traceM $ "st: " <> show st
case alexScan inp c of case alexScan inp c of
AlexEOF -> pure $ Located (inp ^. aiPos, 0) TokenEOF AlexEOF -> pure $ Located (inp ^. aiPos, 0) TokenEOF
AlexSkip inp' l -> do AlexSkip inp' l -> do
@@ -196,11 +239,10 @@ lexToken = do
lexToken lexToken
AlexToken inp' l act -> do AlexToken inp' l act -> do
psInput .= inp' psInput .= inp'
traceShowM inp'
act inp l act inp l
lexerCont :: (Located RlpToken -> P a) -> P a lexCont :: (Located RlpToken -> P a) -> P a
lexerCont = undefined lexCont = (lexToken >>=)
lexStream :: P [RlpToken] lexStream :: P [RlpToken]
lexStream = do lexStream = do
@@ -209,6 +251,12 @@ lexStream = do
Located _ TokenEOF -> pure [TokenEOF] Located _ TokenEOF -> pure [TokenEOF]
Located _ t -> (t:) <$> lexStream Located _ t -> (t:) <$> lexStream
lexDebug :: (Located RlpToken -> P a) -> P a
lexDebug k = do
t <- lexToken
traceM $ "token: " <> show t
k t
lexTest :: Text -> Maybe [RlpToken] lexTest :: Text -> Maybe [RlpToken]
lexTest s = execP' lexStream s lexTest s = execP' lexStream s
@@ -224,7 +272,7 @@ insertToken t = do
popLayout :: P Layout popLayout :: P Layout
popLayout = do popLayout = do
traceM "pop layout" -- traceM "pop layout"
ctx <- preuse (psLayoutStack . _head) ctx <- preuse (psLayoutStack . _head)
psLayoutStack %= (drop 1) psLayoutStack %= (drop 1)
case ctx of case ctx of
@@ -233,7 +281,7 @@ popLayout = do
pushLayout :: Layout -> P () pushLayout :: Layout -> P ()
pushLayout l = do pushLayout l = do
traceM "push layout" -- traceM "push layout"
psLayoutStack %= (l:) psLayoutStack %= (l:)
popLexState :: P () popLexState :: P ()
@@ -241,9 +289,9 @@ popLexState = do
psLexState %= tail psLexState %= tail
insertSemicolon, insertLBrace, insertRBrace :: P (Located RlpToken) insertSemicolon, insertLBrace, insertRBrace :: P (Located RlpToken)
insertSemicolon = traceM "inserting semi" >> insertToken TokenSemicolonV insertSemicolon = {- traceM "inserting semi" >> -} insertToken TokenSemicolonV
insertLBrace = traceM "inserting lbrace" >> insertToken TokenLBraceV insertLBrace = {- traceM "inserting lbrace" >> -} insertToken TokenLBraceV
insertRBrace = traceM "inserting rbrace" >> insertToken TokenRBraceV insertRBrace = {- traceM "inserting rbrace" >> -} insertToken TokenRBraceV
cmpLayout :: P Ordering cmpLayout :: P Ordering
cmpLayout = do cmpLayout = do

View File

@@ -1,62 +1,72 @@
{ {
module Rlp.Parse module Rlp.Parse
( parseRlpProgram ( parseRlpProgram
, parseTest
) )
where where
import Rlp.Lex import Rlp.Lex
import Rlp.Syntax import Rlp.Syntax
import Rlp.Parse.Types import Rlp.Parse.Types
import Data.Fix
import Data.Functor.Const
} }
%name parseRlpProgram StandaloneProgram %name parseRlpProgram StandaloneProgram
%name parseTest VL
%monad { P } %monad { P }
%lexer { lexerCont } { Located _ TokenEOF } %lexer { lexDebug } { Located _ TokenEOF }
%error { parseError } %error { parseError }
%tokentype { Located RlpToken } %tokentype { Located RlpToken }
%token %token
varname { Located _ (TokenVarName $$) } varname { Located _ (TokenVarName $$) }
conname { Located _ (TokenConName $$) }
data { Located _ TokenData }
litint { Located _ (TokenLitInt $$) } litint { Located _ (TokenLitInt $$) }
'=' { Located _ TokenEquals } '=' { Located _ TokenEquals }
'|' { Located _ TokenPipe }
';' { Located _ TokenSemicolon } ';' { Located _ TokenSemicolon }
';?' { Located _ TokenSemicolonV } vsemi { Located _ TokenSemicolonV }
'{' { Located _ TokenLBrace } '{' { Located _ TokenLBrace }
'}' { Located _ TokenRBrace } '}' { Located _ TokenRBrace }
'{?' { Located _ TokenLBraceV } vlbrace { Located _ TokenLBraceV }
'?}' { Located _ TokenRBraceV } vrbrace { Located _ TokenRBraceV }
eof { Located _ TokenEOF }
%% %%
StandaloneProgram :: { [PartialDecl'] } StandaloneProgram :: { [PartialDecl'] }
StandaloneProgram : VL Decls VR eof { $2 } StandaloneProgram : '{' Decls '}' { $2 }
| VL Decls VR { $2 }
VL :: { () } VL :: { () }
VL : '{?' { () } VL : vlbrace { () }
VR :: { () } VR :: { () }
VR : '?}' { () } VR : vrbrace { () }
| error { () } | error { () }
Decls :: { [PartialDecl'] } Decls :: { [PartialDecl'] }
Decls : Decl Semi Decls { $1 : $3 } Decls : Decl VS Decls { $1 : $3 }
| Decl Semi { [$1] } | Decl VS { [$1] }
| Decl { [$1] } | Decl { [$1] }
Semi :: { Located RlpToken } Semi :: { Located RlpToken }
Semi : ';' { $1 } Semi : ';' { $1 }
| ';?' { $1 }
VS :: { Located RlpToken }
VS : ';' { $1 }
| vsemi { $1 }
Decl :: { PartialDecl' } Decl :: { PartialDecl' }
Decl : FunDecl { undefined } Decl : FunDecl { $1 }
FunDecl :: { PartialDecl' } FunDecl :: { PartialDecl' }
FunDecl : varname '=' Expr { undefined } FunDecl : Var '=' Expr { FunD $1 [] (Const $3) Nothing }
Expr :: { RlpExpr' } Expr :: { PartialExpr' }
Expr : Literal { LitE $1 } Expr : Literal { Fix . E $ LitEF $1 }
| Var { VarE $1 } | Var { Fix . E $ VarEF $1 }
Literal :: { Lit' } Literal :: { Lit' }
Literal : litint { IntL $1 } Literal : litint { IntL $1 }

View File

@@ -42,8 +42,10 @@ data RlpToken
| TokenConName Name | TokenConName Name
| TokenVarSym Name | TokenVarSym Name
| TokenConSym Name | TokenConSym Name
-- keywords -- reserved words
| TokenData | TokenData
| TokenCase
| TokenOf
| TokenLet | TokenLet
| TokenIn | TokenIn
-- reserved ops -- reserved ops
@@ -87,6 +89,7 @@ data ParseState = ParseState
{ _psLayoutStack :: [Layout] { _psLayoutStack :: [Layout]
, _psLexState :: [Int] , _psLexState :: [Int]
, _psInput :: AlexInput , _psInput :: AlexInput
, _psOpTable :: OpTable
} }
deriving Show deriving Show

View File

@@ -65,7 +65,7 @@ type RlpProgram' = RlpProgram Name
-- accounted for, we may complete the parsing task and get a proper @[Decl -- accounted for, we may complete the parsing task and get a proper @[Decl
-- RlpExpr Name]@. -- RlpExpr Name]@.
data Decl e b = FunD VarId [Pat b] (e b) (Where b) data Decl e b = FunD VarId [Pat b] (e b) (Maybe (Where b))
| TySigD [VarId] Type | TySigD [VarId] Type
| DataD ConId [Name] [ConAlt] | DataD ConId [Name] [ConAlt]
| InfixD Assoc Int Name | InfixD Assoc Int Name