rc #13

Merged
crumbtoo merged 196 commits from dev into main 2024-02-13 13:22:23 -07:00
4 changed files with 390 additions and 148 deletions
Showing only changes of commit 17ddf3530c - Show all commits

View File

@@ -6,18 +6,19 @@ module Rlp.Lex
( P(..)
, RlpToken(..)
, Located(..)
, AlexPosn
, lexer
, lexerCont
)
where
import Control.Monad
import Data.Functor.Identity
import Data.Char (digitToInt)
import Core.Syntax (Name)
import Data.Monoid (First)
import Data.Maybe
import Data.Text (Text)
import Data.Text qualified as T
import Data.Word
import Data.Default
import Lens.Micro.Mtl
import Lens.Micro
@@ -26,8 +27,6 @@ import Lens.Micro.TH
import Debug.Trace
}
%wrapper "monadUserState-strict-text"
$whitechar = [ \t\n\r\f\v]
$lower = [a-z \_]
@@ -42,6 +41,8 @@ $namechar = [$alpha $digit \' \#]
@varname = $lower $namechar*
@digits = $digit+
rlp :-
-- skip whitespace
@@ -54,14 +55,13 @@ rlp :-
<0>
{
\n ;
"{" { explicitLBrace `thenBegin` one }
() { doLayout `thenBegin` one }
\n { begin bol }
}
<one>
{
@varname { tokenWith TokenVarName }
@digits { tokenWith (TokenLitInt . readInt) }
"=" { constToken TokenEquals }
\n { begin bol }
}
@@ -73,29 +73,50 @@ rlp :-
{
$whitechar ;
\n ;
() { doBol `andBegin` one }
() { doBol }
}
{
begin = undefined
type LexerAction a = AlexInput -> Int -> P a
type AlexInput =
( Char -- prev char
, Text -- input
)
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte (_,s) = undefined
getInput :: P AlexInput
getInput = undefined
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar = (^. _1)
readInt :: Text -> Int
readInt = T.foldr f 0 where
f c n = digitToInt c + 10*n
-- | @andBegin@, with the subtle difference that the start code is set
-- /after/ the action
thenBegin :: AlexAction a -> Int -> AlexAction a
thenBegin :: LexerAction a -> Int -> LexerAction a
thenBegin act c inp l = do
a <- act inp l
alexSetStartCode c
pure a
undefined
constToken :: RlpToken -> AlexAction (Located RlpToken)
constToken t inp _ = pure $ Located (inp ^. _1) t
constToken :: RlpToken -> LexerAction (Located RlpToken)
constToken t inp _ = undefined
tokenWith :: (Text -> RlpToken) -> AlexAction (Located RlpToken)
tokenWith tf (p,_,_,s) l = pure $ Located p (tf $ T.take l s)
tokenWith :: (Text -> RlpToken) -> LexerAction (Located RlpToken)
tokenWith tf inp l = undefined
alexEOF :: Alex (Located RlpToken)
alexEOF :: P (Located RlpToken)
alexEOF = do
inp <- alexGetInput
pure (Located (inp ^. _1) TokenEOF)
inp <- getInput
pure (Located undefined TokenEOF)
data RlpToken
-- literals
@@ -123,156 +144,59 @@ data RlpToken
| TokenEOF
deriving (Show)
newtype P a = P { runP :: ParseState -> Alex (ParseState, a) }
newtype P a = P { runP :: ParseState -> (ParseState, Maybe a) }
deriving (Functor)
execP :: P a -> ParseState -> Text -> Either String a
execP p st s = snd <$> runAlex s (runP p st)
execP :: P a -> ParseState -> Either String a
execP p st = undefined
data ParseState = ParseState { }
execP' :: P a -> Text -> Either String a
execP' p s = execP p st where
st = initParseState s
instance Default ParseState where
def = ParseState { }
initParseState :: Text -> ParseState
initParseState s = ParseState
{ _psLayoutStack = []
, _psLexState = [bol,0]
, _psInput = (undefined, s)
}
data ParseState = ParseState
{ _psLayoutStack :: [Layout]
, _psLexState :: [Int]
, _psInput :: AlexInput
}
instance Applicative P where
pure a = P $ \st -> pure (st,a)
pure a = P $ \st -> (st,Just a)
liftA2 = liftM2
instance Monad P where
p >>= k = P $ \st -> do
(st',a) <- runP p st
runP (k a) st'
data AlexUserState = AlexUserState
-- the layout context, along with a start code to return to when the layout
-- ends
{ _ausLayoutStack :: [(Layout, Int)]
}
deriving Show
alexInitUserState :: AlexUserState
alexInitUserState = AlexUserState
{ _ausLayoutStack = []
}
p >>= k = undefined
data Layout = Explicit
| Implicit Int
deriving (Show, Eq)
data Located a = Located AlexPosn a
data Located a = Located (Int, Int) a
deriving (Show)
ausLayoutStack :: Lens' AlexUserState [(Layout, Int)]
ausLayoutStack = lens _ausLayoutStack
(\ s l -> s { _ausLayoutStack = l })
lexer :: P (Located RlpToken)
lexer = P $ \st -> (st,) <$> lexToken
lexer = undefined
lexerCont :: (Located RlpToken -> P a) -> P a
lexerCont = (lexer >>=)
lexerCont = undefined
lexStream :: Alex [RlpToken]
lexStream = do
t <- lexToken
case t of
Located _ TokenEOF -> pure [TokenEOF]
Located _ a -> (a:) <$> lexStream
lexStream :: P [RlpToken]
lexStream = undefined
lexTest :: Text -> Either String [RlpToken]
lexTest = flip runAlex lexStream
lexTest = undefined
lexToken :: Alex (Located RlpToken)
lexToken = alexMonadScan
lexToken :: P (Located RlpToken)
lexToken = undefined
getsAus :: (AlexUserState -> b) -> Alex b
getsAus k = alexGetUserState <&> k
useAus :: Getting a AlexUserState a -> Alex a
useAus l = do
aus <- alexGetUserState
pure (aus ^. l)
preuseAus :: Getting (First a) AlexUserState a -> Alex (Maybe a)
preuseAus l = do
aus <- alexGetUserState
pure (aus ^? l)
modifyingAus :: ASetter' AlexUserState a -> (a -> a) -> Alex ()
modifyingAus l f = do
aus <- alexGetUserState
alexSetUserState (aus & l %~ f)
indentLevel :: Alex Int
indentLevel = do
inp <- alexGetInput
let col = inp ^. _1
& \ (AlexPn _ _ c) -> c
pure col
cmpLayout :: Alex Ordering
cmpLayout = do
i <- indentLevel
ctx <- preuseAus (ausLayoutStack . _head)
case (ctx <&> fst) ^. non (Implicit 1) of
Implicit n -> pure (i `compare` n)
Explicit -> pure GT
insertToken :: RlpToken -> Alex (Located RlpToken)
insertToken t = do
inp <- alexGetInput
pure (Located (inp ^. _1) t)
insertSemicolon, insertLBrace, insertRBrace :: Alex (Located RlpToken)
insertSemicolon = traceM "inserting semi" >> insertToken TokenSemicolonV
insertLBrace = traceM "inserting lbrace" >> insertToken TokenLBraceV
insertRBrace = traceM "inserting rbrace" >> insertToken TokenRBraceV
-- pop the layout stack and jump to the popped return code
popLayout :: Alex ()
popLayout = do
traceM "pop layout"
ctx <- preuseAus (ausLayoutStack . _head)
modifyingAus ausLayoutStack (drop 1)
case ctx of
Just (l,c) -> alexSetStartCode c
Nothing -> pure ()
pushLayout :: Layout -> Alex ()
pushLayout l = do
traceM "push layout"
c <- alexGetStartCode
modifyingAus ausLayoutStack ((l,c):)
doBol :: AlexAction (Located RlpToken)
doBol inp len = do
off <- cmpLayout
case off of
-- the line is aligned with the previous. it therefore belongs to the
-- same list
EQ -> insertSemicolon
-- the line is indented further than the previous, so we assume it is a
-- line continuation. ignore it and move on!
GT -> undefined -- alexSetStartCode one >> lexToken
-- the line is indented less than the previous, pop the layout stack and
-- insert a closing brace.
LT -> insertRBrace >> popLayout >> lexToken
explicitLBrace, explicitRBrace :: AlexAction (Located RlpToken)
explicitLBrace _ _ = do
pushLayout Explicit
insertToken TokenLBrace
explicitRBrace _ _ = do
popLayout
insertToken TokenRBrace
doLayout :: AlexAction (Located RlpToken)
doLayout _ _ = do
i <- indentLevel
pushLayout (Implicit i)
insertLBrace
doBol = undefined
}

280
src/Rlp/Lex.x.old Normal file
View File

@@ -0,0 +1,280 @@
{
{-# LANGUAGE GeneralisedNewtypeDeriving #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings #-}
module Rlp.Lex
( P(..)
, RlpToken(..)
, Located(..)
, AlexPosn
, lexer
, lexerCont
)
where
import Control.Monad
import Data.Functor.Identity
import Data.Char (digitToInt)
import Core.Syntax (Name)
import Data.Monoid (First)
import Data.Maybe
import Data.Text (Text)
import Data.Text qualified as T
import Data.Default
import Lens.Micro.Mtl
import Lens.Micro
import Lens.Micro.TH
import Debug.Trace
}
$whitechar = [ \t\n\r\f\v]
$lower = [a-z \_]
$upper = [A-Z]
$alpha = [$lower $upper]
$digit = 0-9
$nl = [\n\r]
$white_no_nl = $white # $nl
$namechar = [$alpha $digit \' \#]
@varname = $lower $namechar*
@digits = $digit+
rlp :-
-- skip whitespace
$white_no_nl+ ;
-- TODO: don't treat operators like (-->) as comments
"--".* ;
";" { constToken TokenSemicolon }
-- "{" { explicitLBrace }
-- "}" { explicitRBrace }
<0>
{
\n { begin bol }
}
<one>
{
@varname { tokenWith TokenVarName }
@digits { tokenWith (TokenLitInt . readInt) }
"=" { constToken TokenEquals }
\n { begin bol }
}
-- consume all whitespace leaving us at the beginning of the next non-empty
-- line. we then compare the indentation of that line to the enclosing layout
-- context and proceed accordingly
<bol>
{
$whitechar ;
\n ;
() { doBol `andBegin` one }
}
{
readInt :: Text -> Int
readInt = T.foldr f 0 where
f c n = digitToInt c + 10*n
-- | @andBegin@, with the subtle difference that the start code is set
-- /after/ the action
thenBegin :: AlexAction a -> Int -> AlexAction a
thenBegin act c inp l = do
a <- act inp l
alexSetStartCode c
pure a
constToken :: RlpToken -> AlexAction (Located RlpToken)
constToken t inp _ = pure $ Located (inp ^. _1) t
tokenWith :: (Text -> RlpToken) -> AlexAction (Located RlpToken)
tokenWith tf (p,_,_,s) l = pure $ Located p (tf $ T.take l s)
alexEOF :: Alex (Located RlpToken)
alexEOF = do
inp <- alexGetInput
pure (Located (inp ^. _1) TokenEOF)
data RlpToken
-- literals
= TokenLitInt Int
-- identifiers
| TokenVarName Name
| TokenConName Name
| TokenVarSym Name
| TokenConSym Name
-- keywords
| TokenData
| TokenPipe
| TokenLet
| TokenIn
-- control symbols
| TokenEquals
| TokenSemicolon
| TokenLBrace
| TokenRBrace
-- 'virtual' control symbols, inserted by the lexer without any correlation
-- to a specific symbol
| TokenSemicolonV
| TokenLBraceV
| TokenRBraceV
| TokenEOF
deriving (Show)
newtype P a = P { runP :: ParseState -> Alex (ParseState, Maybe a) }
deriving (Functor)
execP :: P a -> ParseState -> Text -> Either String a
execP p st s = snd <$> runAlex s (runP p st)
execP' :: P a -> Text -> Either String a
execP' p = execP p def
data ParseState = ParseState
{ _psLayoutStack :: [Layout]
, _psLexState :: [Int]
}
instance Default ParseState where
def = ParseState { }
instance Applicative P where
pure a = P $ \st -> pure (st,a)
liftA2 = liftM2
instance Monad P where
p >>= k = P $ \st -> do
(st',a) <- runP p st
runP (k a) st'
data Layout = Explicit
| Implicit Int
deriving (Show, Eq)
data Located a = Located AlexPosn a
deriving (Show)
psLayoutStack :: Lens' AlexUserState [Layout]
psLayoutStack = lens _psLayoutStack
(\ s l -> s { _psLayoutStack = l })
lexer :: P (Located RlpToken)
lexer = P $ \st -> (st,) <$> lexToken
lexerCont :: (Located RlpToken -> P a) -> P a
lexerCont = (lexer >>=)
lexStream :: Alex [RlpToken]
lexStream = do
t <- lexToken
case t of
Located _ TokenEOF -> pure [TokenEOF]
Located _ a -> (a:) <$> lexStream
lexTest :: Text -> Either String [RlpToken]
lexTest = flip runAlex lexStream
lexToken :: Alex (Located RlpToken)
lexToken = alexMonadScan
getsAus :: (AlexUserState -> b) -> Alex b
getsAus k = alexGetUserState <&> k
useAus :: Getting a AlexUserState a -> Alex a
useAus l = do
aus <- alexGetUserState
pure (aus ^. l)
preuseAus :: Getting (First a) AlexUserState a -> Alex (Maybe a)
preuseAus l = do
aus <- alexGetUserState
pure (aus ^? l)
modifyingAus :: ASetter' AlexUserState a -> (a -> a) -> Alex ()
modifyingAus l f = do
aus <- alexGetUserState
alexSetUserState (aus & l %~ f)
indentLevel :: Alex Int
indentLevel = do
inp <- alexGetInput
let col = inp ^. _1
& \ (AlexPn _ _ c) -> c
pure col
cmpLayout :: Alex Ordering
cmpLayout = do
i <- indentLevel
ctx <- preuseAus (ausLayoutStack . _head)
case ctx ^. non (Implicit 1) of
Implicit n -> pure (i `compare` n)
Explicit -> pure GT
insertToken :: RlpToken -> Alex (Located RlpToken)
insertToken t = do
inp <- alexGetInput
pure (Located (inp ^. _1) t)
insertSemicolon, insertLBrace, insertRBrace :: Alex (Located RlpToken)
insertSemicolon = traceM "inserting semi" >> insertToken TokenSemicolonV
insertLBrace = traceM "inserting lbrace" >> insertToken TokenLBraceV
insertRBrace = traceM "inserting rbrace" >> insertToken TokenRBraceV
-- pop the layout stack and jump to the popped return code
popLayout :: Alex Layout
popLayout = do
traceM "pop layout"
ctx <- preuseAus (ausLayoutStack . _head)
modifyingAus ausLayoutStack (drop 1)
case ctx of
Just l -> pure l
Nothing -> error "uhh"
pushLayout :: Layout -> Alex ()
pushLayout l = do
traceM "push layout"
modifyingAus ausLayoutStack (l:)
pushLexState :: Alex ()
pushLexState = do
undefined
doBol :: AlexAction (Located RlpToken)
doBol inp len = do
off <- cmpLayout
case off of
-- the line is aligned with the previous. it therefore belongs to the
-- same list
EQ -> insertSemicolon
-- the line is indented further than the previous, so we assume it is a
-- line continuation. ignore it and move on!
GT -> undefined -- alexSetStartCode one >> lexToken
-- the line is indented less than the previous, pop the layout stack and
-- insert a closing brace.
LT -> popLayout >> insertRBrace
explicitLBrace, explicitRBrace :: AlexAction (Located RlpToken)
explicitLBrace _ _ = do
pushLayout Explicit
insertToken TokenLBrace
explicitRBrace _ _ = do
popLayout
insertToken TokenRBrace
doLayout :: AlexAction (Located RlpToken)
doLayout _ _ = do
i <- indentLevel
pushLayout (Implicit i)
traceM $ "layout " <> show i
insertLBrace
}

View File

@@ -1,6 +1,6 @@
{
module Rlp.Parse
(
( parseRlpProgram
)
where
import Rlp.Lex
@@ -8,7 +8,8 @@ import Rlp.Syntax
import Rlp.Parse.Types
}
%name rlp
%name parseRlpProgram StandaloneProgram
%monad { P }
%lexer { lexerCont } { Located _ TokenEOF }
%error { parseError }
@@ -16,23 +17,56 @@ import Rlp.Parse.Types
%token
varname { Located _ (TokenVarName $$) }
litint { Located _ (TokenLitInt $$) }
'=' { Located _ TokenEquals }
';' { Located _ TokenSemicolon }
';?' { Located _ TokenSemicolonV }
'{' { Located _ TokenLBrace }
'}' { Located _ TokenRBrace }
'{?' { Located _ TokenLBraceV }
'?}' { Located _ TokenRBraceV }
eof { Located _ TokenEOF }
%%
StandaloneProgram :: { [PartialDecl'] }
StandaloneProgram : VL Decls VR eof { $2 }
VL :: { () }
VL : '{?' { () }
VR :: { () }
VR : '?}' { () }
| error { () }
Decls :: { [PartialDecl'] }
Decls : Decl Semi Decls { $1 : $3 }
| Decl Semi { [$1] }
| Decl { [$1] }
Semi :: { Located RlpToken }
Semi : ';' { $1 }
| ';?' { $1 }
Decl :: { PartialDecl' }
Decl : FunDecl { undefined }
FunDecl :: { PartialDecl' }
FunDecl : varname '=' Expr { undefined }
Expr :: { RlpExpr' }
Expr : { undefined }
Expr :: { RlpExpr' }
Expr : Literal { LitE $1 }
| Var { VarE $1 }
Literal :: { Lit' }
Literal : litint { IntL $1 }
Var :: { VarId }
Var : varname { NameVar $1 }
{
parseError :: Located RlpToken -> P a
parseError = error "aaaaah"
parseError = error . show
}

View File

@@ -5,6 +5,8 @@
{-# LANGUAGE OverloadedStrings, PatternSynonyms #-}
module Rlp.Syntax
( RlpModule(..)
, RlpProgram(..)
, RlpProgram'
, rlpmodName
, rlpmodProgram
, RlpExpr(..)
@@ -54,6 +56,8 @@ data RlpModule b = RlpModule
newtype RlpProgram b = RlpProgram [Decl RlpExpr b]
type RlpProgram' = RlpProgram Name
-- | The @e@ parameter is used for partial results. When parsing an input, we
-- first parse all top-level declarations in order to extract infix[lr]
-- declarations. This process yields a @[Decl (Const Text) Name]@, where @Const