works
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
HAPPY = happy
|
||||
HAPPY_OPTS =
|
||||
HAPPY_OPTS = -a -g -c
|
||||
ALEX = alex
|
||||
ALEX_OPTS = -d
|
||||
ALEX_OPTS = -g
|
||||
|
||||
SRC = src
|
||||
CABAL_BUILD = dist-newstyle/build/x86_64-osx/ghc-9.6.2/rlp-0.1.0.0/build
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
{-# LANGUAGE ViewPatterns #-}
|
||||
{-# LANGUAGE ViewPatterns, LambdaCase #-}
|
||||
{-# LANGUAGE GeneralisedNewtypeDeriving #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
module Rlp.Lex
|
||||
@@ -7,7 +7,8 @@ module Rlp.Lex
|
||||
, RlpToken(..)
|
||||
, Located(..)
|
||||
, lexToken
|
||||
, lexerCont
|
||||
, lexDebug
|
||||
, lexCont
|
||||
)
|
||||
where
|
||||
import Codec.Binary.UTF8.String (encodeChar)
|
||||
@@ -30,33 +31,60 @@ import Rlp.Parse.Types
|
||||
|
||||
$whitechar = [ \t\n\r\f\v]
|
||||
|
||||
$nl = [\n\r]
|
||||
$white_no_nl = $white # $nl
|
||||
|
||||
$lower = [a-z \_]
|
||||
$upper = [A-Z]
|
||||
$alpha = [$lower $upper]
|
||||
$digit = 0-9
|
||||
|
||||
$nl = [\n\r]
|
||||
$white_no_nl = $white # $nl
|
||||
|
||||
$special = [\(\)\,\;\[\]\{\}]
|
||||
$namechar = [$alpha $digit \' \#]
|
||||
$asciisym = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:]
|
||||
|
||||
@decimal = $digit+
|
||||
|
||||
@varname = $lower $namechar*
|
||||
@conname = $upper $namechar*
|
||||
@consym = \: $asciisym*
|
||||
@varsym = $asciisym+
|
||||
|
||||
@digits = $digit+
|
||||
@reservedname =
|
||||
case|data|do|import|in|let|letrec|module|of|where
|
||||
|
||||
@reservedop =
|
||||
"=" | \\ | "->" | "|"
|
||||
|
||||
rlp :-
|
||||
|
||||
-- skip whitespace
|
||||
-- everywhere: skip whitespace
|
||||
$white_no_nl+ ;
|
||||
|
||||
-- everywhere: skip comments
|
||||
-- TODO: don't treat operators like (-->) as comments
|
||||
"--".* ;
|
||||
|
||||
-- we are indentation-sensitive! do not skip NLs!. upon encountering a newline,
|
||||
-- we check indentation and potentially insert extra tokens. search this file
|
||||
-- for the definition of `doBol`
|
||||
<0> \n { beginPush bol }
|
||||
|
||||
-- scan various identifiers and reserved words. order is important here!
|
||||
<0>
|
||||
{
|
||||
\n { beginPush bol }
|
||||
@reservedname { tokenWith lexReservedName }
|
||||
@conname { tokenWith TokenConName }
|
||||
@varname { tokenWith TokenVarName }
|
||||
@digits { tokenWith (TokenLitInt . readInt) }
|
||||
"=" { constToken TokenEquals }
|
||||
@reservedop { tokenWith lexReservedOp }
|
||||
@consym { tokenWith TokenConSym }
|
||||
@varsym { tokenWith TokenVarSym }
|
||||
}
|
||||
|
||||
-- literals -- currently this is just unsigned integer literals
|
||||
<0>
|
||||
{
|
||||
@decimal { tokenWith (TokenLitInt . readInt) }
|
||||
}
|
||||
|
||||
-- control characters
|
||||
@@ -86,6 +114,20 @@ rlp :-
|
||||
|
||||
{
|
||||
|
||||
lexReservedName :: Text -> RlpToken
|
||||
lexReservedName = \case
|
||||
"data" -> TokenData
|
||||
"case" -> TokenCase
|
||||
"of" -> TokenOf
|
||||
"let" -> TokenLet
|
||||
"in" -> TokenIn
|
||||
|
||||
lexReservedOp :: Text -> RlpToken
|
||||
lexReservedOp = \case
|
||||
"=" -> TokenEquals
|
||||
"::" -> TokenHasType
|
||||
"|" -> TokenPipe
|
||||
|
||||
-- | @andBegin@, with the subtle difference that the start code is set
|
||||
-- /after/ the action
|
||||
thenBegin :: LexerAction a -> Int -> LexerAction a
|
||||
@@ -173,6 +215,7 @@ initParseState s = ParseState
|
||||
-- which then returns to state 0 which continues the normal lexing process.
|
||||
, _psLexState = [layout_top,0]
|
||||
, _psInput = initAlexInput s
|
||||
, _psOpTable = mempty
|
||||
}
|
||||
|
||||
initAlexInput :: Text -> AlexInput
|
||||
@@ -188,7 +231,7 @@ lexToken = do
|
||||
inp <- getInput
|
||||
c <- getLexState
|
||||
st <- use id
|
||||
traceM $ "st: " <> show st
|
||||
-- traceM $ "st: " <> show st
|
||||
case alexScan inp c of
|
||||
AlexEOF -> pure $ Located (inp ^. aiPos, 0) TokenEOF
|
||||
AlexSkip inp' l -> do
|
||||
@@ -196,11 +239,10 @@ lexToken = do
|
||||
lexToken
|
||||
AlexToken inp' l act -> do
|
||||
psInput .= inp'
|
||||
traceShowM inp'
|
||||
act inp l
|
||||
|
||||
lexerCont :: (Located RlpToken -> P a) -> P a
|
||||
lexerCont = undefined
|
||||
lexCont :: (Located RlpToken -> P a) -> P a
|
||||
lexCont = (lexToken >>=)
|
||||
|
||||
lexStream :: P [RlpToken]
|
||||
lexStream = do
|
||||
@@ -209,6 +251,12 @@ lexStream = do
|
||||
Located _ TokenEOF -> pure [TokenEOF]
|
||||
Located _ t -> (t:) <$> lexStream
|
||||
|
||||
lexDebug :: (Located RlpToken -> P a) -> P a
|
||||
lexDebug k = do
|
||||
t <- lexToken
|
||||
traceM $ "token: " <> show t
|
||||
k t
|
||||
|
||||
lexTest :: Text -> Maybe [RlpToken]
|
||||
lexTest s = execP' lexStream s
|
||||
|
||||
@@ -224,7 +272,7 @@ insertToken t = do
|
||||
|
||||
popLayout :: P Layout
|
||||
popLayout = do
|
||||
traceM "pop layout"
|
||||
-- traceM "pop layout"
|
||||
ctx <- preuse (psLayoutStack . _head)
|
||||
psLayoutStack %= (drop 1)
|
||||
case ctx of
|
||||
@@ -233,7 +281,7 @@ popLayout = do
|
||||
|
||||
pushLayout :: Layout -> P ()
|
||||
pushLayout l = do
|
||||
traceM "push layout"
|
||||
-- traceM "push layout"
|
||||
psLayoutStack %= (l:)
|
||||
|
||||
popLexState :: P ()
|
||||
@@ -241,9 +289,9 @@ popLexState = do
|
||||
psLexState %= tail
|
||||
|
||||
insertSemicolon, insertLBrace, insertRBrace :: P (Located RlpToken)
|
||||
insertSemicolon = traceM "inserting semi" >> insertToken TokenSemicolonV
|
||||
insertLBrace = traceM "inserting lbrace" >> insertToken TokenLBraceV
|
||||
insertRBrace = traceM "inserting rbrace" >> insertToken TokenRBraceV
|
||||
insertSemicolon = {- traceM "inserting semi" >> -} insertToken TokenSemicolonV
|
||||
insertLBrace = {- traceM "inserting lbrace" >> -} insertToken TokenLBraceV
|
||||
insertRBrace = {- traceM "inserting rbrace" >> -} insertToken TokenRBraceV
|
||||
|
||||
cmpLayout :: P Ordering
|
||||
cmpLayout = do
|
||||
|
||||
@@ -1,62 +1,72 @@
|
||||
{
|
||||
module Rlp.Parse
|
||||
( parseRlpProgram
|
||||
, parseTest
|
||||
)
|
||||
where
|
||||
import Rlp.Lex
|
||||
import Rlp.Syntax
|
||||
import Rlp.Parse.Types
|
||||
import Data.Fix
|
||||
import Data.Functor.Const
|
||||
}
|
||||
|
||||
%name parseRlpProgram StandaloneProgram
|
||||
%name parseTest VL
|
||||
|
||||
%monad { P }
|
||||
%lexer { lexerCont } { Located _ TokenEOF }
|
||||
%lexer { lexDebug } { Located _ TokenEOF }
|
||||
%error { parseError }
|
||||
%tokentype { Located RlpToken }
|
||||
|
||||
%token
|
||||
varname { Located _ (TokenVarName $$) }
|
||||
conname { Located _ (TokenConName $$) }
|
||||
data { Located _ TokenData }
|
||||
litint { Located _ (TokenLitInt $$) }
|
||||
'=' { Located _ TokenEquals }
|
||||
'|' { Located _ TokenPipe }
|
||||
';' { Located _ TokenSemicolon }
|
||||
';?' { Located _ TokenSemicolonV }
|
||||
vsemi { Located _ TokenSemicolonV }
|
||||
'{' { Located _ TokenLBrace }
|
||||
'}' { Located _ TokenRBrace }
|
||||
'{?' { Located _ TokenLBraceV }
|
||||
'?}' { Located _ TokenRBraceV }
|
||||
eof { Located _ TokenEOF }
|
||||
vlbrace { Located _ TokenLBraceV }
|
||||
vrbrace { Located _ TokenRBraceV }
|
||||
|
||||
%%
|
||||
|
||||
StandaloneProgram :: { [PartialDecl'] }
|
||||
StandaloneProgram : VL Decls VR eof { $2 }
|
||||
StandaloneProgram : '{' Decls '}' { $2 }
|
||||
| VL Decls VR { $2 }
|
||||
|
||||
VL :: { () }
|
||||
VL : '{?' { () }
|
||||
VL : vlbrace { () }
|
||||
|
||||
VR :: { () }
|
||||
VR : '?}' { () }
|
||||
VR : vrbrace { () }
|
||||
| error { () }
|
||||
|
||||
Decls :: { [PartialDecl'] }
|
||||
Decls : Decl Semi Decls { $1 : $3 }
|
||||
| Decl Semi { [$1] }
|
||||
Decls : Decl VS Decls { $1 : $3 }
|
||||
| Decl VS { [$1] }
|
||||
| Decl { [$1] }
|
||||
|
||||
Semi :: { Located RlpToken }
|
||||
Semi : ';' { $1 }
|
||||
| ';?' { $1 }
|
||||
|
||||
VS :: { Located RlpToken }
|
||||
VS : ';' { $1 }
|
||||
| vsemi { $1 }
|
||||
|
||||
Decl :: { PartialDecl' }
|
||||
Decl : FunDecl { undefined }
|
||||
Decl : FunDecl { $1 }
|
||||
|
||||
FunDecl :: { PartialDecl' }
|
||||
FunDecl : varname '=' Expr { undefined }
|
||||
FunDecl : Var '=' Expr { FunD $1 [] (Const $3) Nothing }
|
||||
|
||||
Expr :: { RlpExpr' }
|
||||
Expr : Literal { LitE $1 }
|
||||
| Var { VarE $1 }
|
||||
Expr :: { PartialExpr' }
|
||||
Expr : Literal { Fix . E $ LitEF $1 }
|
||||
| Var { Fix . E $ VarEF $1 }
|
||||
|
||||
Literal :: { Lit' }
|
||||
Literal : litint { IntL $1 }
|
||||
|
||||
@@ -42,8 +42,10 @@ data RlpToken
|
||||
| TokenConName Name
|
||||
| TokenVarSym Name
|
||||
| TokenConSym Name
|
||||
-- keywords
|
||||
-- reserved words
|
||||
| TokenData
|
||||
| TokenCase
|
||||
| TokenOf
|
||||
| TokenLet
|
||||
| TokenIn
|
||||
-- reserved ops
|
||||
@@ -87,6 +89,7 @@ data ParseState = ParseState
|
||||
{ _psLayoutStack :: [Layout]
|
||||
, _psLexState :: [Int]
|
||||
, _psInput :: AlexInput
|
||||
, _psOpTable :: OpTable
|
||||
}
|
||||
deriving Show
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ type RlpProgram' = RlpProgram Name
|
||||
-- accounted for, we may complete the parsing task and get a proper @[Decl
|
||||
-- RlpExpr Name]@.
|
||||
|
||||
data Decl e b = FunD VarId [Pat b] (e b) (Where b)
|
||||
data Decl e b = FunD VarId [Pat b] (e b) (Maybe (Where b))
|
||||
| TySigD [VarId] Type
|
||||
| DataD ConId [Name] [ConAlt]
|
||||
| InfixD Assoc Int Name
|
||||
|
||||
Reference in New Issue
Block a user