rc #13

Merged
crumbtoo merged 196 commits from dev into main 2024-02-13 13:22:23 -07:00
4 changed files with 390 additions and 148 deletions
Showing only changes of commit 17ddf3530c - Show all commits

View File

@@ -6,18 +6,19 @@ module Rlp.Lex
( P(..) ( P(..)
, RlpToken(..) , RlpToken(..)
, Located(..) , Located(..)
, AlexPosn
, lexer , lexer
, lexerCont , lexerCont
) )
where where
import Control.Monad import Control.Monad
import Data.Functor.Identity import Data.Functor.Identity
import Data.Char (digitToInt)
import Core.Syntax (Name) import Core.Syntax (Name)
import Data.Monoid (First) import Data.Monoid (First)
import Data.Maybe import Data.Maybe
import Data.Text (Text) import Data.Text (Text)
import Data.Text qualified as T import Data.Text qualified as T
import Data.Word
import Data.Default import Data.Default
import Lens.Micro.Mtl import Lens.Micro.Mtl
import Lens.Micro import Lens.Micro
@@ -26,8 +27,6 @@ import Lens.Micro.TH
import Debug.Trace import Debug.Trace
} }
%wrapper "monadUserState-strict-text"
$whitechar = [ \t\n\r\f\v] $whitechar = [ \t\n\r\f\v]
$lower = [a-z \_] $lower = [a-z \_]
@@ -42,6 +41,8 @@ $namechar = [$alpha $digit \' \#]
@varname = $lower $namechar* @varname = $lower $namechar*
@digits = $digit+
rlp :- rlp :-
-- skip whitespace -- skip whitespace
@@ -54,14 +55,13 @@ rlp :-
<0> <0>
{ {
\n ; \n { begin bol }
"{" { explicitLBrace `thenBegin` one }
() { doLayout `thenBegin` one }
} }
<one> <one>
{ {
@varname { tokenWith TokenVarName } @varname { tokenWith TokenVarName }
@digits { tokenWith (TokenLitInt . readInt) }
"=" { constToken TokenEquals } "=" { constToken TokenEquals }
\n { begin bol } \n { begin bol }
} }
@@ -73,29 +73,50 @@ rlp :-
{ {
$whitechar ; $whitechar ;
\n ; \n ;
() { doBol `andBegin` one } () { doBol }
} }
{ {
begin = undefined
type LexerAction a = AlexInput -> Int -> P a
type AlexInput =
( Char -- prev char
, Text -- input
)
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte (_,s) = undefined
getInput :: P AlexInput
getInput = undefined
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar = (^. _1)
readInt :: Text -> Int
readInt = T.foldr f 0 where
f c n = digitToInt c + 10*n
-- | @andBegin@, with the subtle difference that the start code is set -- | @andBegin@, with the subtle difference that the start code is set
-- /after/ the action -- /after/ the action
thenBegin :: AlexAction a -> Int -> AlexAction a thenBegin :: LexerAction a -> Int -> LexerAction a
thenBegin act c inp l = do thenBegin act c inp l = do
a <- act inp l a <- act inp l
alexSetStartCode c undefined
pure a
constToken :: RlpToken -> AlexAction (Located RlpToken) constToken :: RlpToken -> LexerAction (Located RlpToken)
constToken t inp _ = pure $ Located (inp ^. _1) t constToken t inp _ = undefined
tokenWith :: (Text -> RlpToken) -> AlexAction (Located RlpToken) tokenWith :: (Text -> RlpToken) -> LexerAction (Located RlpToken)
tokenWith tf (p,_,_,s) l = pure $ Located p (tf $ T.take l s) tokenWith tf inp l = undefined
alexEOF :: Alex (Located RlpToken) alexEOF :: P (Located RlpToken)
alexEOF = do alexEOF = do
inp <- alexGetInput inp <- getInput
pure (Located (inp ^. _1) TokenEOF) pure (Located undefined TokenEOF)
data RlpToken data RlpToken
-- literals -- literals
@@ -123,156 +144,59 @@ data RlpToken
| TokenEOF | TokenEOF
deriving (Show) deriving (Show)
newtype P a = P { runP :: ParseState -> Alex (ParseState, a) } newtype P a = P { runP :: ParseState -> (ParseState, Maybe a) }
deriving (Functor) deriving (Functor)
execP :: P a -> ParseState -> Text -> Either String a execP :: P a -> ParseState -> Either String a
execP p st s = snd <$> runAlex s (runP p st) execP p st = undefined
data ParseState = ParseState { } execP' :: P a -> Text -> Either String a
execP' p s = execP p st where
st = initParseState s
instance Default ParseState where initParseState :: Text -> ParseState
def = ParseState { } initParseState s = ParseState
{ _psLayoutStack = []
, _psLexState = [bol,0]
, _psInput = (undefined, s)
}
data ParseState = ParseState
{ _psLayoutStack :: [Layout]
, _psLexState :: [Int]
, _psInput :: AlexInput
}
instance Applicative P where instance Applicative P where
pure a = P $ \st -> pure (st,a) pure a = P $ \st -> (st,Just a)
liftA2 = liftM2 liftA2 = liftM2
instance Monad P where instance Monad P where
p >>= k = P $ \st -> do p >>= k = undefined
(st',a) <- runP p st
runP (k a) st'
data AlexUserState = AlexUserState
-- the layout context, along with a start code to return to when the layout
-- ends
{ _ausLayoutStack :: [(Layout, Int)]
}
deriving Show
alexInitUserState :: AlexUserState
alexInitUserState = AlexUserState
{ _ausLayoutStack = []
}
data Layout = Explicit data Layout = Explicit
| Implicit Int | Implicit Int
deriving (Show, Eq) deriving (Show, Eq)
data Located a = Located AlexPosn a data Located a = Located (Int, Int) a
deriving (Show) deriving (Show)
ausLayoutStack :: Lens' AlexUserState [(Layout, Int)]
ausLayoutStack = lens _ausLayoutStack
(\ s l -> s { _ausLayoutStack = l })
lexer :: P (Located RlpToken) lexer :: P (Located RlpToken)
lexer = P $ \st -> (st,) <$> lexToken lexer = undefined
lexerCont :: (Located RlpToken -> P a) -> P a lexerCont :: (Located RlpToken -> P a) -> P a
lexerCont = (lexer >>=) lexerCont = undefined
lexStream :: Alex [RlpToken] lexStream :: P [RlpToken]
lexStream = do lexStream = undefined
t <- lexToken
case t of
Located _ TokenEOF -> pure [TokenEOF]
Located _ a -> (a:) <$> lexStream
lexTest :: Text -> Either String [RlpToken] lexTest :: Text -> Either String [RlpToken]
lexTest = flip runAlex lexStream lexTest = undefined
lexToken :: Alex (Located RlpToken) lexToken :: P (Located RlpToken)
lexToken = alexMonadScan lexToken = undefined
getsAus :: (AlexUserState -> b) -> Alex b doBol = undefined
getsAus k = alexGetUserState <&> k
useAus :: Getting a AlexUserState a -> Alex a
useAus l = do
aus <- alexGetUserState
pure (aus ^. l)
preuseAus :: Getting (First a) AlexUserState a -> Alex (Maybe a)
preuseAus l = do
aus <- alexGetUserState
pure (aus ^? l)
modifyingAus :: ASetter' AlexUserState a -> (a -> a) -> Alex ()
modifyingAus l f = do
aus <- alexGetUserState
alexSetUserState (aus & l %~ f)
indentLevel :: Alex Int
indentLevel = do
inp <- alexGetInput
let col = inp ^. _1
& \ (AlexPn _ _ c) -> c
pure col
cmpLayout :: Alex Ordering
cmpLayout = do
i <- indentLevel
ctx <- preuseAus (ausLayoutStack . _head)
case (ctx <&> fst) ^. non (Implicit 1) of
Implicit n -> pure (i `compare` n)
Explicit -> pure GT
insertToken :: RlpToken -> Alex (Located RlpToken)
insertToken t = do
inp <- alexGetInput
pure (Located (inp ^. _1) t)
insertSemicolon, insertLBrace, insertRBrace :: Alex (Located RlpToken)
insertSemicolon = traceM "inserting semi" >> insertToken TokenSemicolonV
insertLBrace = traceM "inserting lbrace" >> insertToken TokenLBraceV
insertRBrace = traceM "inserting rbrace" >> insertToken TokenRBraceV
-- pop the layout stack and jump to the popped return code
popLayout :: Alex ()
popLayout = do
traceM "pop layout"
ctx <- preuseAus (ausLayoutStack . _head)
modifyingAus ausLayoutStack (drop 1)
case ctx of
Just (l,c) -> alexSetStartCode c
Nothing -> pure ()
pushLayout :: Layout -> Alex ()
pushLayout l = do
traceM "push layout"
c <- alexGetStartCode
modifyingAus ausLayoutStack ((l,c):)
doBol :: AlexAction (Located RlpToken)
doBol inp len = do
off <- cmpLayout
case off of
-- the line is aligned with the previous. it therefore belongs to the
-- same list
EQ -> insertSemicolon
-- the line is indented further than the previous, so we assume it is a
-- line continuation. ignore it and move on!
GT -> undefined -- alexSetStartCode one >> lexToken
-- the line is indented less than the previous, pop the layout stack and
-- insert a closing brace.
LT -> insertRBrace >> popLayout >> lexToken
explicitLBrace, explicitRBrace :: AlexAction (Located RlpToken)
explicitLBrace _ _ = do
pushLayout Explicit
insertToken TokenLBrace
explicitRBrace _ _ = do
popLayout
insertToken TokenRBrace
doLayout :: AlexAction (Located RlpToken)
doLayout _ _ = do
i <- indentLevel
pushLayout (Implicit i)
insertLBrace
} }

280
src/Rlp/Lex.x.old Normal file
View File

@@ -0,0 +1,280 @@
{
{-# LANGUAGE GeneralisedNewtypeDeriving #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings #-}
module Rlp.Lex
( P(..)
, RlpToken(..)
, Located(..)
, AlexPosn
, lexer
, lexerCont
)
where
import Control.Monad
import Data.Functor.Identity
import Data.Char (digitToInt)
import Core.Syntax (Name)
import Data.Monoid (First)
import Data.Maybe
import Data.Text (Text)
import Data.Text qualified as T
import Data.Default
import Lens.Micro.Mtl
import Lens.Micro
import Lens.Micro.TH
import Debug.Trace
}
$whitechar = [ \t\n\r\f\v]
$lower = [a-z \_]
$upper = [A-Z]
$alpha = [$lower $upper]
$digit = 0-9
$nl = [\n\r]
$white_no_nl = $white # $nl
$namechar = [$alpha $digit \' \#]
@varname = $lower $namechar*
@digits = $digit+
rlp :-
-- skip whitespace
$white_no_nl+ ;
-- TODO: don't treat operators like (-->) as comments
"--".* ;
";" { constToken TokenSemicolon }
-- "{" { explicitLBrace }
-- "}" { explicitRBrace }
<0>
{
\n { begin bol }
}
<one>
{
@varname { tokenWith TokenVarName }
@digits { tokenWith (TokenLitInt . readInt) }
"=" { constToken TokenEquals }
\n { begin bol }
}
-- consume all whitespace leaving us at the beginning of the next non-empty
-- line. we then compare the indentation of that line to the enclosing layout
-- context and proceed accordingly
<bol>
{
$whitechar ;
\n ;
() { doBol `andBegin` one }
}
{
readInt :: Text -> Int
readInt = T.foldr f 0 where
f c n = digitToInt c + 10*n
-- | @andBegin@, with the subtle difference that the start code is set
-- /after/ the action
thenBegin :: AlexAction a -> Int -> AlexAction a
thenBegin act c inp l = do
a <- act inp l
alexSetStartCode c
pure a
constToken :: RlpToken -> AlexAction (Located RlpToken)
constToken t inp _ = pure $ Located (inp ^. _1) t
tokenWith :: (Text -> RlpToken) -> AlexAction (Located RlpToken)
tokenWith tf (p,_,_,s) l = pure $ Located p (tf $ T.take l s)
alexEOF :: Alex (Located RlpToken)
alexEOF = do
inp <- alexGetInput
pure (Located (inp ^. _1) TokenEOF)
data RlpToken
-- literals
= TokenLitInt Int
-- identifiers
| TokenVarName Name
| TokenConName Name
| TokenVarSym Name
| TokenConSym Name
-- keywords
| TokenData
| TokenPipe
| TokenLet
| TokenIn
-- control symbols
| TokenEquals
| TokenSemicolon
| TokenLBrace
| TokenRBrace
-- 'virtual' control symbols, inserted by the lexer without any correlation
-- to a specific symbol
| TokenSemicolonV
| TokenLBraceV
| TokenRBraceV
| TokenEOF
deriving (Show)
newtype P a = P { runP :: ParseState -> Alex (ParseState, Maybe a) }
deriving (Functor)
execP :: P a -> ParseState -> Text -> Either String a
execP p st s = snd <$> runAlex s (runP p st)
execP' :: P a -> Text -> Either String a
execP' p = execP p def
data ParseState = ParseState
{ _psLayoutStack :: [Layout]
, _psLexState :: [Int]
}
instance Default ParseState where
def = ParseState { }
instance Applicative P where
pure a = P $ \st -> pure (st,a)
liftA2 = liftM2
instance Monad P where
p >>= k = P $ \st -> do
(st',a) <- runP p st
runP (k a) st'
data Layout = Explicit
| Implicit Int
deriving (Show, Eq)
data Located a = Located AlexPosn a
deriving (Show)
psLayoutStack :: Lens' AlexUserState [Layout]
psLayoutStack = lens _psLayoutStack
(\ s l -> s { _psLayoutStack = l })
lexer :: P (Located RlpToken)
lexer = P $ \st -> (st,) <$> lexToken
lexerCont :: (Located RlpToken -> P a) -> P a
lexerCont = (lexer >>=)
lexStream :: Alex [RlpToken]
lexStream = do
t <- lexToken
case t of
Located _ TokenEOF -> pure [TokenEOF]
Located _ a -> (a:) <$> lexStream
lexTest :: Text -> Either String [RlpToken]
lexTest = flip runAlex lexStream
lexToken :: Alex (Located RlpToken)
lexToken = alexMonadScan
getsAus :: (AlexUserState -> b) -> Alex b
getsAus k = alexGetUserState <&> k
useAus :: Getting a AlexUserState a -> Alex a
useAus l = do
aus <- alexGetUserState
pure (aus ^. l)
preuseAus :: Getting (First a) AlexUserState a -> Alex (Maybe a)
preuseAus l = do
aus <- alexGetUserState
pure (aus ^? l)
modifyingAus :: ASetter' AlexUserState a -> (a -> a) -> Alex ()
modifyingAus l f = do
aus <- alexGetUserState
alexSetUserState (aus & l %~ f)
indentLevel :: Alex Int
indentLevel = do
inp <- alexGetInput
let col = inp ^. _1
& \ (AlexPn _ _ c) -> c
pure col
cmpLayout :: Alex Ordering
cmpLayout = do
i <- indentLevel
ctx <- preuseAus (ausLayoutStack . _head)
case ctx ^. non (Implicit 1) of
Implicit n -> pure (i `compare` n)
Explicit -> pure GT
insertToken :: RlpToken -> Alex (Located RlpToken)
insertToken t = do
inp <- alexGetInput
pure (Located (inp ^. _1) t)
insertSemicolon, insertLBrace, insertRBrace :: Alex (Located RlpToken)
insertSemicolon = traceM "inserting semi" >> insertToken TokenSemicolonV
insertLBrace = traceM "inserting lbrace" >> insertToken TokenLBraceV
insertRBrace = traceM "inserting rbrace" >> insertToken TokenRBraceV
-- pop the layout stack and jump to the popped return code
popLayout :: Alex Layout
popLayout = do
traceM "pop layout"
ctx <- preuseAus (ausLayoutStack . _head)
modifyingAus ausLayoutStack (drop 1)
case ctx of
Just l -> pure l
Nothing -> error "uhh"
pushLayout :: Layout -> Alex ()
pushLayout l = do
traceM "push layout"
modifyingAus ausLayoutStack (l:)
pushLexState :: Alex ()
pushLexState = do
undefined
doBol :: AlexAction (Located RlpToken)
doBol inp len = do
off <- cmpLayout
case off of
-- the line is aligned with the previous. it therefore belongs to the
-- same list
EQ -> insertSemicolon
-- the line is indented further than the previous, so we assume it is a
-- line continuation. ignore it and move on!
GT -> undefined -- alexSetStartCode one >> lexToken
-- the line is indented less than the previous, pop the layout stack and
-- insert a closing brace.
LT -> popLayout >> insertRBrace
explicitLBrace, explicitRBrace :: AlexAction (Located RlpToken)
explicitLBrace _ _ = do
pushLayout Explicit
insertToken TokenLBrace
explicitRBrace _ _ = do
popLayout
insertToken TokenRBrace
doLayout :: AlexAction (Located RlpToken)
doLayout _ _ = do
i <- indentLevel
pushLayout (Implicit i)
traceM $ "layout " <> show i
insertLBrace
}

View File

@@ -1,6 +1,6 @@
{ {
module Rlp.Parse module Rlp.Parse
( ( parseRlpProgram
) )
where where
import Rlp.Lex import Rlp.Lex
@@ -8,7 +8,8 @@ import Rlp.Syntax
import Rlp.Parse.Types import Rlp.Parse.Types
} }
%name rlp %name parseRlpProgram StandaloneProgram
%monad { P } %monad { P }
%lexer { lexerCont } { Located _ TokenEOF } %lexer { lexerCont } { Located _ TokenEOF }
%error { parseError } %error { parseError }
@@ -16,11 +17,37 @@ import Rlp.Parse.Types
%token %token
varname { Located _ (TokenVarName $$) } varname { Located _ (TokenVarName $$) }
litint { Located _ (TokenLitInt $$) }
'=' { Located _ TokenEquals } '=' { Located _ TokenEquals }
';' { Located _ TokenSemicolon }
';?' { Located _ TokenSemicolonV }
'{' { Located _ TokenLBrace }
'}' { Located _ TokenRBrace }
'{?' { Located _ TokenLBraceV }
'?}' { Located _ TokenRBraceV }
eof { Located _ TokenEOF } eof { Located _ TokenEOF }
%% %%
StandaloneProgram :: { [PartialDecl'] }
StandaloneProgram : VL Decls VR eof { $2 }
VL :: { () }
VL : '{?' { () }
VR :: { () }
VR : '?}' { () }
| error { () }
Decls :: { [PartialDecl'] }
Decls : Decl Semi Decls { $1 : $3 }
| Decl Semi { [$1] }
| Decl { [$1] }
Semi :: { Located RlpToken }
Semi : ';' { $1 }
| ';?' { $1 }
Decl :: { PartialDecl' } Decl :: { PartialDecl' }
Decl : FunDecl { undefined } Decl : FunDecl { undefined }
@@ -28,11 +55,18 @@ FunDecl :: { PartialDecl' }
FunDecl : varname '=' Expr { undefined } FunDecl : varname '=' Expr { undefined }
Expr :: { RlpExpr' } Expr :: { RlpExpr' }
Expr : { undefined } Expr : Literal { LitE $1 }
| Var { VarE $1 }
Literal :: { Lit' }
Literal : litint { IntL $1 }
Var :: { VarId }
Var : varname { NameVar $1 }
{ {
parseError :: Located RlpToken -> P a parseError :: Located RlpToken -> P a
parseError = error "aaaaah" parseError = error . show
} }

View File

@@ -5,6 +5,8 @@
{-# LANGUAGE OverloadedStrings, PatternSynonyms #-} {-# LANGUAGE OverloadedStrings, PatternSynonyms #-}
module Rlp.Syntax module Rlp.Syntax
( RlpModule(..) ( RlpModule(..)
, RlpProgram(..)
, RlpProgram'
, rlpmodName , rlpmodName
, rlpmodProgram , rlpmodProgram
, RlpExpr(..) , RlpExpr(..)
@@ -54,6 +56,8 @@ data RlpModule b = RlpModule
newtype RlpProgram b = RlpProgram [Decl RlpExpr b] newtype RlpProgram b = RlpProgram [Decl RlpExpr b]
type RlpProgram' = RlpProgram Name
-- | The @e@ parameter is used for partial results. When parsing an input, we -- | The @e@ parameter is used for partial results. When parsing an input, we
-- first parse all top-level declarations in order to extract infix[lr] -- first parse all top-level declarations in order to extract infix[lr]
-- declarations. This process yields a @[Decl (Const Text) Name]@, where @Const -- declarations. This process yields a @[Decl (Const Text) Name]@, where @Const