This commit is contained in:
crumbtoo
2024-01-15 13:31:15 -07:00
parent c0236dc079
commit 1c035d092a
5 changed files with 103 additions and 42 deletions

View File

@@ -1,7 +1,7 @@
HAPPY = happy
HAPPY_OPTS =
HAPPY_OPTS = -a -g -c
ALEX = alex
ALEX_OPTS = -d
ALEX_OPTS = -g
SRC = src
CABAL_BUILD = dist-newstyle/build/x86_64-osx/ghc-9.6.2/rlp-0.1.0.0/build

View File

@@ -1,5 +1,5 @@
{
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE ViewPatterns, LambdaCase #-}
{-# LANGUAGE GeneralisedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
module Rlp.Lex
@@ -7,7 +7,8 @@ module Rlp.Lex
, RlpToken(..)
, Located(..)
, lexToken
, lexerCont
, lexDebug
, lexCont
)
where
import Codec.Binary.UTF8.String (encodeChar)
@@ -30,33 +31,60 @@ import Rlp.Parse.Types
$whitechar = [ \t\n\r\f\v]
$nl = [\n\r]
$white_no_nl = $white # $nl
$lower = [a-z \_]
$upper = [A-Z]
$alpha = [$lower $upper]
$digit = 0-9
$nl = [\n\r]
$white_no_nl = $white # $nl
$special = [\(\)\,\;\[\]\{\}]
$namechar = [$alpha $digit \' \#]
$asciisym = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:]
@decimal = $digit+
@varname = $lower $namechar*
@conname = $upper $namechar*
@consym = \: $asciisym*
@varsym = $asciisym+
@digits = $digit+
@reservedname =
case|data|do|import|in|let|letrec|module|of|where
@reservedop =
"=" | \\ | "->" | "|"
rlp :-
-- skip whitespace
$white_no_nl+ ;
-- TODO: don't treat operators like (-->) as comments
"--".* ;
-- everywhere: skip whitespace
$white_no_nl+ ;
-- everywhere: skip comments
-- TODO: don't treat operators like (-->) as comments
"--".* ;
-- we are indentation-sensitive! do not skip NLs!. upon encountering a newline,
-- we check indentation and potentially insert extra tokens. search this file
-- for the definition of `doBol`
<0> \n { beginPush bol }
-- scan various identifiers and reserved words. order is important here!
<0>
{
\n { beginPush bol }
@reservedname { tokenWith lexReservedName }
@conname { tokenWith TokenConName }
@varname { tokenWith TokenVarName }
@digits { tokenWith (TokenLitInt . readInt) }
"=" { constToken TokenEquals }
@reservedop { tokenWith lexReservedOp }
@consym { tokenWith TokenConSym }
@varsym { tokenWith TokenVarSym }
}
-- literals -- currently this is just unsigned integer literals
<0>
{
@decimal { tokenWith (TokenLitInt . readInt) }
}
-- control characters
@@ -86,6 +114,20 @@ rlp :-
{
lexReservedName :: Text -> RlpToken
lexReservedName = \case
"data" -> TokenData
"case" -> TokenCase
"of" -> TokenOf
"let" -> TokenLet
"in" -> TokenIn
lexReservedOp :: Text -> RlpToken
lexReservedOp = \case
"=" -> TokenEquals
"::" -> TokenHasType
"|" -> TokenPipe
-- | @andBegin@, with the subtle difference that the start code is set
-- /after/ the action
thenBegin :: LexerAction a -> Int -> LexerAction a
@@ -173,6 +215,7 @@ initParseState s = ParseState
-- which then returns to state 0 which continues the normal lexing process.
, _psLexState = [layout_top,0]
, _psInput = initAlexInput s
, _psOpTable = mempty
}
initAlexInput :: Text -> AlexInput
@@ -188,7 +231,7 @@ lexToken = do
inp <- getInput
c <- getLexState
st <- use id
traceM $ "st: " <> show st
-- traceM $ "st: " <> show st
case alexScan inp c of
AlexEOF -> pure $ Located (inp ^. aiPos, 0) TokenEOF
AlexSkip inp' l -> do
@@ -196,11 +239,10 @@ lexToken = do
lexToken
AlexToken inp' l act -> do
psInput .= inp'
traceShowM inp'
act inp l
lexerCont :: (Located RlpToken -> P a) -> P a
lexerCont = undefined
lexCont :: (Located RlpToken -> P a) -> P a
lexCont = (lexToken >>=)
lexStream :: P [RlpToken]
lexStream = do
@@ -209,6 +251,12 @@ lexStream = do
Located _ TokenEOF -> pure [TokenEOF]
Located _ t -> (t:) <$> lexStream
lexDebug :: (Located RlpToken -> P a) -> P a
lexDebug k = do
t <- lexToken
traceM $ "token: " <> show t
k t
lexTest :: Text -> Maybe [RlpToken]
lexTest s = execP' lexStream s
@@ -224,7 +272,7 @@ insertToken t = do
popLayout :: P Layout
popLayout = do
traceM "pop layout"
-- traceM "pop layout"
ctx <- preuse (psLayoutStack . _head)
psLayoutStack %= (drop 1)
case ctx of
@@ -233,7 +281,7 @@ popLayout = do
pushLayout :: Layout -> P ()
pushLayout l = do
traceM "push layout"
-- traceM "push layout"
psLayoutStack %= (l:)
popLexState :: P ()
@@ -241,9 +289,9 @@ popLexState = do
psLexState %= tail
insertSemicolon, insertLBrace, insertRBrace :: P (Located RlpToken)
insertSemicolon = traceM "inserting semi" >> insertToken TokenSemicolonV
insertLBrace = traceM "inserting lbrace" >> insertToken TokenLBraceV
insertRBrace = traceM "inserting rbrace" >> insertToken TokenRBraceV
insertSemicolon = {- traceM "inserting semi" >> -} insertToken TokenSemicolonV
insertLBrace = {- traceM "inserting lbrace" >> -} insertToken TokenLBraceV
insertRBrace = {- traceM "inserting rbrace" >> -} insertToken TokenRBraceV
cmpLayout :: P Ordering
cmpLayout = do

View File

@@ -1,62 +1,72 @@
{
module Rlp.Parse
( parseRlpProgram
, parseTest
)
where
import Rlp.Lex
import Rlp.Syntax
import Rlp.Parse.Types
import Data.Fix
import Data.Functor.Const
}
%name parseRlpProgram StandaloneProgram
%name parseTest VL
%monad { P }
%lexer { lexerCont } { Located _ TokenEOF }
%lexer { lexDebug } { Located _ TokenEOF }
%error { parseError }
%tokentype { Located RlpToken }
%token
varname { Located _ (TokenVarName $$) }
conname { Located _ (TokenConName $$) }
data { Located _ TokenData }
litint { Located _ (TokenLitInt $$) }
'=' { Located _ TokenEquals }
'|' { Located _ TokenPipe }
';' { Located _ TokenSemicolon }
';?' { Located _ TokenSemicolonV }
vsemi { Located _ TokenSemicolonV }
'{' { Located _ TokenLBrace }
'}' { Located _ TokenRBrace }
'{?' { Located _ TokenLBraceV }
'?}' { Located _ TokenRBraceV }
eof { Located _ TokenEOF }
vlbrace { Located _ TokenLBraceV }
vrbrace { Located _ TokenRBraceV }
%%
StandaloneProgram :: { [PartialDecl'] }
StandaloneProgram : VL Decls VR eof { $2 }
StandaloneProgram : '{' Decls '}' { $2 }
| VL Decls VR { $2 }
VL :: { () }
VL : '{?' { () }
VL : vlbrace { () }
VR :: { () }
VR : '?}' { () }
VR : vrbrace { () }
| error { () }
Decls :: { [PartialDecl'] }
Decls : Decl Semi Decls { $1 : $3 }
| Decl Semi { [$1] }
Decls : Decl VS Decls { $1 : $3 }
| Decl VS { [$1] }
| Decl { [$1] }
Semi :: { Located RlpToken }
Semi : ';' { $1 }
| ';?' { $1 }
VS :: { Located RlpToken }
VS : ';' { $1 }
| vsemi { $1 }
Decl :: { PartialDecl' }
Decl : FunDecl { undefined }
Decl : FunDecl { $1 }
FunDecl :: { PartialDecl' }
FunDecl : varname '=' Expr { undefined }
FunDecl : Var '=' Expr { FunD $1 [] (Const $3) Nothing }
Expr :: { RlpExpr' }
Expr : Literal { LitE $1 }
| Var { VarE $1 }
Expr :: { PartialExpr' }
Expr : Literal { Fix . E $ LitEF $1 }
| Var { Fix . E $ VarEF $1 }
Literal :: { Lit' }
Literal : litint { IntL $1 }

View File

@@ -42,8 +42,10 @@ data RlpToken
| TokenConName Name
| TokenVarSym Name
| TokenConSym Name
-- keywords
-- reserved words
| TokenData
| TokenCase
| TokenOf
| TokenLet
| TokenIn
-- reserved ops
@@ -87,6 +89,7 @@ data ParseState = ParseState
{ _psLayoutStack :: [Layout]
, _psLexState :: [Int]
, _psInput :: AlexInput
, _psOpTable :: OpTable
}
deriving Show

View File

@@ -65,7 +65,7 @@ type RlpProgram' = RlpProgram Name
-- accounted for, we may complete the parsing task and get a proper @[Decl
-- RlpExpr Name]@.
data Decl e b = FunD VarId [Pat b] (e b) (Where b)
data Decl e b = FunD VarId [Pat b] (e b) (Maybe (Where b))
| TySigD [VarId] Type
| DataD ConId [Name] [ConAlt]
| InfixD Assoc Int Name