68 Commits

Author SHA1 Message Date
crumbtoo
a4c0c3a71a rlp2core 2024-01-18 17:21:04 -07:00
crumbtoo
f22d4238f5 Merge branch 'dev' into frontend-parser 2024-01-17 10:28:59 -07:00
crumbtoo
4e1c9dd750 rename rlp 2024-01-17 10:19:48 -07:00
crumbtoo
d6ac991105 renamerlp 2024-01-17 10:19:16 -07:00
crumbtoo
d5663c1aad remove debug flags 2024-01-17 10:11:48 -07:00
crumbtoo
7e6bee3d4a infix exprs 2024-01-17 10:08:57 -07:00
crumbtoo
5ec625e0fd i really need to learn git proper 2024-01-15 15:20:04 -07:00
crumbtoo
9196e20e08 Merge branch 'frontend-parser' into happy-frontend 2024-01-15 15:18:29 -07:00
crumbtoo
a1a50bd013 now we're fucking GETTING SOMEWHERE 2024-01-15 14:58:26 -07:00
crumbtoo
1c035d092a works 2024-01-15 13:31:15 -07:00
crumbtoo
c0236dc079 oh my god 2024-01-15 11:11:43 -07:00
crumbtoo
9a4f24ec10 Merge commit '4f66e71' into happy-frontend 2024-01-15 11:06:37 -07:00
crumbtoo
4f66e71b9a FIX REAL 2024-01-15 11:05:10 -07:00
crumbtoo
bdf74ac6c9 cool 2024-01-15 10:35:11 -07:00
crumbtoo
3dfadc17ec fixy 2024-01-15 10:33:09 -07:00
crumbtoo
c92d8fac65 we're so back 2024-01-15 09:44:26 -07:00
crumbtoo
a38381f6ca version bounds 2024-01-15 07:53:40 -07:00
crumbtoo
6390ca80d8 see previous commit and scale back the part where i'm joking 2024-01-15 07:47:23 -07:00
crumbtoo
17ddf3530c kitten i'll be honest mommy's about to kill herself 2024-01-15 07:47:23 -07:00
crumbtoo
e597ecbfc6 okay layouts kinda 2024-01-15 07:47:23 -07:00
crumbtoo
2496589346 aagh 2024-01-15 07:47:23 -07:00
crumbtoo
681a394312 man this sucks 2024-01-15 07:47:23 -07:00
crumbtoo
aff1c6b4c6 decent starting point 2024-01-15 07:47:23 -07:00
crumbtoo
bec376b7c7 threaded lexer 2024-01-15 07:47:23 -07:00
crumbtoo
eaa04c4a59 its fine 2024-01-15 07:47:23 -07:00
crumbtoo
ea2fb4dcaa tysigs 2024-01-15 07:47:23 -07:00
crumbtoo
ab2cb59526 i did not realise my fs is case insensitive 2024-01-15 07:47:23 -07:00
crumbtoo
ec4902b2d4 layout
layouts

oh my layouts
2024-01-15 07:47:23 -07:00
crumbtoo
1fc45b70b4 replace uses of many+satisfy with takeWhileP 2024-01-15 07:47:23 -07:00
crumbtoo
4b9a570c72 finally in a decent state 2024-01-15 07:47:23 -07:00
crumbtoo
65b967689c decls fix 2024-01-15 07:47:23 -07:00
crumbtoo
ed60ec8b32 aaaaa 2024-01-15 07:47:23 -07:00
crumbtoo
d0dbdbbd9b cool 2024-01-15 07:47:23 -07:00
crumbtoo
cae0939f0c where 2024-01-15 07:47:23 -07:00
crumbtoo
3292998c42 expr fixups 2024-01-15 07:47:23 -07:00
crumbtoo
84c1122995 infix decl 2024-01-15 07:47:21 -07:00
crumbtoo
97ce9b48ae labels 2024-01-15 07:46:23 -07:00
crumbtoo
936f24148f works 2024-01-15 07:46:23 -07:00
crumbtoo
2a159232c7 fixation fufilled - back to work! 2024-01-15 07:46:23 -07:00
crumbtoo
4ee9785239 Show1 instances 2024-01-15 07:46:20 -07:00
crumbtoo
cbe4276061 goofy 2024-01-15 07:44:17 -07:00
crumbtoo
c5c06fa6cb something 2024-01-15 07:44:17 -07:00
crumbtoo
0f04e2decf application and lits
appl
2024-01-15 07:44:17 -07:00
crumbtoo
6130a91668 oh boy am i going to hate this code in 12 hours 2024-01-15 07:44:17 -07:00
crumbtoo
c15f9b6546 4:00 AM psychopath code 2024-01-15 07:44:17 -07:00
crumbtoo
bb6aca094c grammar reference 2024-01-15 07:44:17 -07:00
crumbtoo
245b12a96e add version bounds 2024-01-15 07:43:59 -07:00
crumbtoo
cb9ec43c14 tysigs 2024-01-10 15:11:26 -07:00
crumbtoo
8ad967fac0 i did not realise my fs is case insensitive 2024-01-10 14:33:03 -07:00
crumbtoo
55dbc9de70 layout
layouts

oh my layouts
2024-01-10 14:23:37 -07:00
crumbtoo
05226373ee replace uses of many+satisfy with takeWhileP 2024-01-10 11:33:27 -07:00
crumbtoo
981c5d8a83 finally in a decent state 2024-01-10 11:26:17 -07:00
crumbtoo
86cd1075ca decls fix 2024-01-10 11:03:06 -07:00
crumbtoo
1d43c1d304 aaaaa 2024-01-10 10:46:53 -07:00
crumbtoo
4b44f57066 cool 2024-01-09 22:57:14 -07:00
crumbtoo
90a9594e8f where 2024-01-09 14:24:51 -07:00
crumbtoo
074350768c expr fixups 2024-01-09 12:26:53 -07:00
crumbtoo
37d9e6f219 infix decl 2024-01-09 11:39:26 -07:00
crumbtoo
cb7cdf7ed7 labels 2024-01-08 20:14:18 -07:00
crumbtoo
2f783d96e8 works 2024-01-08 18:56:14 -07:00
crumbtoo
a71c099fe0 fixation fufilled - back to work! 2024-01-08 13:39:12 -07:00
crumbtoo
d1e64eb12d Show1 instances 2024-01-03 10:04:42 -07:00
crumbtoo
f31726b43d goofy 2024-01-02 08:43:34 -07:00
crumbtoo
8aa9bb843f something 2024-01-02 08:04:49 -07:00
crumbtoo
9a357a99b7 application and lits
appl
2024-01-02 07:04:27 -07:00
crumbtoo
060d48f9e1 oh boy am i going to hate this code in 12 hours 2024-01-02 06:26:48 -07:00
crumbtoo
bf4abeb8b4 4:00 AM psychopath code 2024-01-02 05:34:11 -07:00
crumbtoo
7ed565fc24 grammar reference 2024-01-02 02:33:31 -07:00
14 changed files with 1185 additions and 83 deletions

18
.ghci Normal file
View File

@@ -0,0 +1,18 @@
:set -XOverloadedStrings
:set -package process
:{
import System.Exit qualified
import System.Process qualified
_reload_and_make _ = do
p <- System.Process.spawnCommand "make -f Makefile_happysrcs"
r <- System.Process.waitForProcess p
case r of
System.Exit.ExitSuccess -> pure ":reload"
_ -> pure ""
:}
:def! r _reload_and_make

19
Makefile_happysrcs Normal file
View File

@@ -0,0 +1,19 @@
HAPPY = happy
HAPPY_OPTS = -a -g -c
ALEX = alex
ALEX_OPTS = -g
SRC = src
CABAL_BUILD = dist-newstyle/build/x86_64-osx/ghc-9.6.2/rlp-0.1.0.0/build
all: parsers lexers
parsers: $(CABAL_BUILD)/Rlp/Parse.hs
lexers: $(CABAL_BUILD)/Rlp/Lex.hs
$(CABAL_BUILD)/Rlp/Parse.hs: $(SRC)/Rlp/Parse.y
$(HAPPY) $(HAPPY_OPTS) $< -o $@
$(CABAL_BUILD)/Rlp/Lex.hs: $(SRC)/Rlp/Lex.x
$(ALEX) $(ALEX_OPTS) $< -o $@

View File

@@ -32,6 +32,7 @@ html_theme = 'alabaster'
imgmath_latex_preamble = r'''
\usepackage{amsmath}
\usepackage{tabularray}
\usepackage{syntax}
\newcommand{\transrule}[2]
{\begin{tblr}{|rrrlc|}

View File

@@ -0,0 +1,67 @@
The Complete Syntax of rl'
==========================
WIP.
Provided is the complete syntax of rl' in (pseudo) EBNF. {A} represents zero or
more A's, [A] means optional A, and terminals are wrapped in 'single-quotes'.
.. math
:nowrap:
\setlength{\grammarparsep}{20pt plus 1pt minus 1pt}
\setlength{\grammarindent}{12em}
\begin{grammar}
<Decl> ::= <InfixDecl>
\alt <DataDecl>
\alt <TypeSig>
\alt <FunDef>
<InfixDecl> ::= <InfixWord> `litint' <Name>
<InfixWord> ::= `infix'
\alt `infixl'
\alt `infixr'
<DataDecl> ::= `data' `conname' {}
\end{grammar}
.. code-block:: bnf
Decl ::= InfixDecl
| DataDecl
| TypeSig
| FunDef
InfixDecl ::= InfixWord 'litint' Operator
InfixWord ::= 'infix'
| 'infixl'
| 'infixr'
DataDecl ::= 'data' 'conname' {'name'} '=' Data
DataCons ::= 'conname' {Type1} ['|' DataCons]
TypeSig ::= Var '::' Type
FunDef ::= Var {Pat1} '=' Expr
Type ::= Type1 {Type1}
-- note that (->) is right-associative,
-- and extends as far as possible
| Type '->' Type
Type1 ::= '(' Type ')'
| 'conname'
Pat ::= 'conname' Pat1 {Pat1}
| Pat 'consym' Pat
Pat1 ::= Literal
| 'conname'
| '(' Pat ')'
Literal ::= 'litint'
Var ::= 'varname'
| '(' 'varsym' ')'
Con ::= 'conname'
| '(' 'consym' ')'

View File

@@ -12,6 +12,7 @@ category: Language
build-type: Simple
extra-doc-files: README.md
-- extra-source-files:
tested-with: GHC==9.6.2
common warnings
-- ghc-options: -Wall -Wno-incomplete-uni-patterns -Wno-unused-top-binds
@@ -30,37 +31,46 @@ library
, Core.TH
, Core.HindleyMilner
, Control.Monad.Errorful
, Rlp.Syntax
-- , Rlp.Parse.Decls
, Rlp.Parse
, Rlp.Parse.Associate
, Rlp.Lex
, Rlp.Parse.Types
, Rlp.TH
other-modules: Data.Heap
, Data.Pretty
, Core.Parse
, Core.Lex
, Core2Core
, Rlp2Core
, Control.Monad.Utils
, RLP.Syntax
build-tool-depends: happy:happy, alex:alex
-- other-extensions:
build-depends: base ^>=4.18.0.0
, containers
, microlens
, microlens-mtl
, microlens-th
, microlens-platform
, mtl
, template-haskell
-- required for happy
, array
, data-default-class
, unordered-containers
, hashable
, pretty
-- TODO: either learn recursion-schemes, or stop depending
-- on it.
, recursion-schemes
, megaparsec
, text
, array >= 0.5.5 && < 0.6
, containers >= 0.6.7 && < 0.7
, template-haskell >= 2.20.0 && < 2.21
, pretty >= 1.1.3 && < 1.2
, data-default >= 0.7.1 && < 0.8
, data-default-class >= 0.1.2 && < 0.2
, hashable >= 1.4.3 && < 1.5
, mtl >= 2.3.1 && < 2.4
, text >= 2.0.2 && < 2.1
, megaparsec >= 9.6.1 && < 9.7
, microlens >= 0.4.13 && < 0.5
, microlens-mtl >= 0.2.0 && < 0.3
, microlens-platform >= 0.4.3 && < 0.5
, microlens-th >= 0.4.3 && < 0.5
, unordered-containers >= 0.2.20 && < 0.3
, recursion-schemes >= 5.2.2 && < 5.3
, data-fix >= 0.3.2 && < 0.4
, utf8-string >= 1.0.2 && < 1.1
, extra >= 1.7.0 && < 2
hs-source-dirs: src
default-language: GHC2021
@@ -72,12 +82,12 @@ executable rlpc
-- other-extensions:
build-depends: base ^>=4.18.0.0
, rlp
, optparse-applicative
, microlens
, microlens-mtl
, mtl
, unordered-containers
, text
, optparse-applicative >= 0.18.1 && < 0.19
, microlens >= 0.4.13 && < 0.5
, microlens-mtl >= 0.2.0 && < 0.3
, mtl >= 2.3.1 && < 2.4
, unordered-containers >= 0.2.20 && < 0.3
, text >= 2.0.2 && < 2.1
hs-source-dirs: app
default-language: GHC2021

View File

@@ -1,59 +0,0 @@
{-# LANGUAGE OverloadedStrings #-}
module RLP.Syntax
( RlpExpr
)
where
----------------------------------------------------------------------------------
import Data.Text (Text)
import Lens.Micro
import Core (HasRHS(..), HasLHS(..))
----------------------------------------------------------------------------------
newtype RlpProgram b = RlpProgram [Decl b]
data Decl b = InfixD InfixAssoc Int VarId
| FunD VarId [Pat b] (RlpExpr b)
| DataD ConId [ConId] [ConAlt]
data ConAlt = ConAlt ConId [ConId]
data InfixAssoc = Assoc | AssocL | AssocR
data RlpExpr b = LetE [Bind b] (RlpExpr b)
| VarE VarId
| ConE ConId
| LamE [Pat b] (RlpExpr b)
| CaseE (RlpExpr b) [Alt b]
| IfE (RlpExpr b) (RlpExpr b) (RlpExpr b)
| AppE (RlpExpr b) (RlpExpr b)
| LitE (Lit b)
-- do we want guards?
data Alt b = AltA (Pat b) (RlpExpr b)
data Bind b = PatB (Pat b) (RlpExpr b)
| FunB VarId [Pat b] (RlpExpr b)
data VarId = NameVar Text
| SymVar Text
data ConId = NameCon Text
| SymCon Text
data Pat b = VarP VarId
| LitP (Lit b)
| ConP ConId [Pat b]
data Lit b = IntL Int
| CharL Char
| ListL [RlpExpr b]
-- instance HasLHS Alt Alt Pat Pat where
-- _lhs = lens
-- (\ (AltA p _) -> p)
-- (\ (AltA _ e) p' -> AltA p' e)
-- instance HasRHS Alt Alt RlpExpr RlpExpr where
-- _rhs = lens
-- (\ (AltA _ e) -> e)
-- (\ (AltA p _) e' -> AltA p e')

347
src/Rlp/Lex.x Normal file
View File

@@ -0,0 +1,347 @@
{
{-# LANGUAGE ViewPatterns, LambdaCase #-}
{-# LANGUAGE GeneralisedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
module Rlp.Lex
( P(..)
, RlpToken(..)
, Located(..)
, lexToken
, lexDebug
, lexCont
, execP
, execP'
)
where
import Codec.Binary.UTF8.String (encodeChar)
import Control.Monad
import Core.Syntax (Name)
import Data.Functor.Identity
import Data.Char (digitToInt)
import Data.Monoid (First)
import Data.Maybe
import Data.Text (Text)
import Data.Text qualified as T
import Data.Word
import Data.Default
import Lens.Micro.Mtl
import Lens.Micro
import Debug.Trace
import Rlp.Parse.Types
}
$whitechar = [ \t\n\r\f\v]
$nl = [\n\r]
$white_no_nl = $white # $nl
$lower = [a-z \_]
$upper = [A-Z]
$alpha = [$lower $upper]
$digit = 0-9
$special = [\(\)\,\;\[\]\{\}]
$namechar = [$alpha $digit \' \#]
$asciisym = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:]
@decimal = $digit+
@varname = $lower $namechar*
@conname = $upper $namechar*
@consym = \: $asciisym*
@varsym = $asciisym+
@reservedname =
case|data|do|import|in|let|letrec|module|of|where
@reservedop =
"=" | \\ | "->" | "|" | "::"
rlp :-
-- everywhere: skip whitespace
$white_no_nl+ ;
-- everywhere: skip comments
-- TODO: don't treat operators like (-->) as comments
"--".* ;
-- we are indentation-sensitive! do not skip NLs!. upon encountering a newline,
-- we check indentation and potentially insert extra tokens. search this file
-- for the definition of `doBol`
<0> \n { beginPush bol }
-- scan various identifiers and reserved words. order is important here!
<0>
{
@reservedname { tokenWith lexReservedName }
@conname { tokenWith TokenConName }
@varname { tokenWith TokenVarName }
@reservedop { tokenWith lexReservedOp }
@consym { tokenWith TokenConSym }
@varsym { tokenWith TokenVarSym }
}
-- literals -- currently this is just unsigned integer literals
<0>
{
@decimal { tokenWith (TokenLitInt . readInt) }
}
-- control characters
<0>
{
"(" { constToken TokenLParen }
")" { constToken TokenRParen }
"{" { explicitLBrace }
"}" { explicitRBrace }
";" { constToken TokenSemicolon }
}
-- consume all whitespace leaving us at the beginning of the next non-empty
-- line. we then compare the indentation of that line to the enclosing layout
-- context and proceed accordingly
<bol>
{
$whitechar ;
\n ;
() { doBol }
}
<layout_top>
{
\n ;
"{" { explicitLBrace `thenDo` popLexState }
() { doLayout }
}
{
lexReservedName :: Text -> RlpToken
lexReservedName = \case
"data" -> TokenData
"case" -> TokenCase
"of" -> TokenOf
"let" -> TokenLet
"in" -> TokenIn
lexReservedOp :: Text -> RlpToken
lexReservedOp = \case
"=" -> TokenEquals
"::" -> TokenHasType
"|" -> TokenPipe
-- | @andBegin@, with the subtle difference that the start code is set
-- /after/ the action
thenBegin :: LexerAction a -> Int -> LexerAction a
thenBegin act c inp l = do
a <- act inp l
psLexState . _head .= c
pure a
andBegin :: LexerAction a -> Int -> LexerAction a
andBegin act c inp l = do
psLexState . _head .= c
act inp l
beginPush :: Int -> LexerAction (Located RlpToken)
beginPush n _ _ = pushLexState n >> lexToken
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte inp = case inp ^. aiBytes of
[] -> do
(c,t) <- T.uncons (inp ^. aiSource)
let (b:bs) = encodeChar c
-- tail the source
inp' = inp & aiSource .~ t
-- record the excess bytes for successive calls
& aiBytes .~ bs
-- report the previous char
& aiPrevChar .~ c
-- update the position
& aiPos %~ \ (ln,col) ->
if c == '\n'
then (ln+1,1)
else (ln,col+1)
pure (b, inp')
_ -> Just (head bs, inp')
where
(bs, inp') = inp & aiBytes <<%~ drop 1
getInput :: P AlexInput
getInput = use psInput
getLexState :: P Int
getLexState = use (psLexState . singular _head)
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar = view aiPrevChar
pushLexState :: Int -> P ()
pushLexState n = psLexState %= (n:)
readInt :: Text -> Int
readInt = T.foldr f 0 where
f c n = digitToInt c + 10*n
constToken :: RlpToken -> LexerAction (Located RlpToken)
constToken t inp l = do
pos <- use (psInput . aiPos)
pure (Located (pos,l) t)
tokenWith :: (Text -> RlpToken) -> LexerAction (Located RlpToken)
tokenWith tf inp l = do
pos <- getPos
let t = tf (T.take l $ inp ^. aiSource)
pure (Located (pos,l) t)
getPos :: P Position
getPos = use (psInput . aiPos)
alexEOF :: P (Located RlpToken)
alexEOF = do
inp <- getInput
pure (Located undefined TokenEOF)
execP :: P a -> ParseState -> Maybe a
execP p st = runP p st & snd
execP' :: P a -> Text -> Maybe a
execP' p s = execP p st where
st = initParseState s
initParseState :: Text -> ParseState
initParseState s = ParseState
{ _psLayoutStack = []
-- IMPORTANT: the initial state is `bol` to begin the top-level layout,
-- which then returns to state 0 which continues the normal lexing process.
, _psLexState = [layout_top,0]
, _psInput = initAlexInput s
, _psOpTable = mempty
}
initAlexInput :: Text -> AlexInput
initAlexInput s = AlexInput
{ _aiPrevChar = '\0'
, _aiSource = s
, _aiBytes = []
, _aiPos = (1,1)
}
lexToken :: P (Located RlpToken)
lexToken = do
inp <- getInput
c <- getLexState
st <- use id
-- traceM $ "st: " <> show st
case alexScan inp c of
AlexEOF -> pure $ Located (inp ^. aiPos, 0) TokenEOF
AlexSkip inp' l -> do
psInput .= inp'
lexToken
AlexToken inp' l act -> do
psInput .= inp'
act inp l
lexCont :: (Located RlpToken -> P a) -> P a
lexCont = (lexToken >>=)
lexStream :: P [RlpToken]
lexStream = do
t <- lexToken
case t of
Located _ TokenEOF -> pure [TokenEOF]
Located _ t -> (t:) <$> lexStream
lexDebug :: (Located RlpToken -> P a) -> P a
lexDebug k = do
t <- lexToken
traceM $ "token: " <> show t
k t
lexTest :: Text -> Maybe [RlpToken]
lexTest s = execP' lexStream s
indentLevel :: P Int
indentLevel = do
pos <- use (psInput . aiPos)
pure (pos ^. _2)
insertToken :: RlpToken -> P (Located RlpToken)
insertToken t = do
pos <- use (psInput . aiPos)
pure (Located (pos, 0) t)
popLayout :: P Layout
popLayout = do
-- traceM "pop layout"
ctx <- preuse (psLayoutStack . _head)
psLayoutStack %= (drop 1)
case ctx of
Just l -> pure l
Nothing -> error "uhh"
pushLayout :: Layout -> P ()
pushLayout l = do
-- traceM "push layout"
psLayoutStack %= (l:)
popLexState :: P ()
popLexState = do
psLexState %= tail
insertSemicolon, insertLBrace, insertRBrace :: P (Located RlpToken)
insertSemicolon = {- traceM "inserting semi" >> -} insertToken TokenSemicolonV
insertLBrace = {- traceM "inserting lbrace" >> -} insertToken TokenLBraceV
insertRBrace = {- traceM "inserting rbrace" >> -} insertToken TokenRBraceV
cmpLayout :: P Ordering
cmpLayout = do
i <- indentLevel
ctx <- preuse (psLayoutStack . _head)
case ctx of
Just (Implicit n) -> pure (i `compare` n)
_ -> pure GT
doBol :: LexerAction (Located RlpToken)
doBol inp l = do
off <- cmpLayout
i <- indentLevel
traceM $ "i: " <> show i
-- important that we pop the lex state lest we find our lexer diverging
popLexState
case off of
-- the line is aligned with the previous. it therefore belongs to the
-- same list
EQ -> insertSemicolon
-- the line is indented further than the previous, so we assume it is a
-- line continuation. ignore it and move on!
GT -> lexToken
-- the line is indented less than the previous, pop the layout stack and
-- insert a closing brace.
LT -> popLayout >> insertRBrace
thenDo :: LexerAction a -> P b -> LexerAction a
thenDo act p inp l = act inp l <* p
explicitLBrace :: LexerAction (Located RlpToken)
explicitLBrace inp l = do
pushLayout Explicit
constToken TokenLBrace inp l
explicitRBrace :: LexerAction (Located RlpToken)
explicitRBrace inp l = do
popLayout
constToken TokenRBrace inp l
doLayout :: LexerAction (Located RlpToken)
doLayout _ _ = do
i <- indentLevel
pushLayout (Implicit i)
popLexState
insertLBrace
}

184
src/Rlp/Parse.y Normal file
View File

@@ -0,0 +1,184 @@
{
{-# LANGUAGE LambdaCase #-}
module Rlp.Parse
( parseRlpProg
, execP
, execP'
)
where
import Rlp.Lex
import Rlp.Syntax
import Rlp.Parse.Types
import Rlp.Parse.Associate
import Lens.Micro
import Lens.Micro.Mtl
import Lens.Micro.Platform ()
import Data.List.Extra
import Data.Fix
import Data.Functor.Const
}
%name parseRlpProg StandaloneProgram
%monad { P }
%lexer { lexCont } { Located _ TokenEOF }
%error { parseError }
%tokentype { Located RlpToken }
%token
varname { Located _ (TokenVarName $$) }
conname { Located _ (TokenConName $$) }
consym { Located _ (TokenConSym $$) }
varsym { Located _ (TokenVarSym $$) }
data { Located _ TokenData }
litint { Located _ (TokenLitInt $$) }
'::' { Located _ TokenHasType }
'=' { Located _ TokenEquals }
'|' { Located _ TokenPipe }
';' { Located _ TokenSemicolon }
'(' { Located _ TokenLParen }
')' { Located _ TokenRParen }
'->' { Located _ TokenArrow }
vsemi { Located _ TokenSemicolonV }
'{' { Located _ TokenLBrace }
'}' { Located _ TokenRBrace }
vlbrace { Located _ TokenLBraceV }
vrbrace { Located _ TokenRBraceV }
infixl { Located _ TokenInfixL }
infixr { Located _ TokenInfixR }
infix { Located _ TokenInfix }
%right '->'
%%
StandaloneProgram :: { RlpProgram' }
StandaloneProgram : '{' Decls '}' {% mkProgram $2 }
| VL DeclsV VR {% mkProgram $2 }
VL :: { () }
VL : vlbrace { () }
VR :: { () }
VR : vrbrace { () }
| error { () }
Decls :: { [PartialDecl'] }
Decls : Decl ';' Decls { $1 : $3 }
| Decl ';' { [$1] }
| Decl { [$1] }
DeclsV :: { [PartialDecl'] }
DeclsV : Decl VS Decls { $1 : $3 }
| Decl VS { [$1] }
| Decl { [$1] }
VS :: { Located RlpToken }
VS : ';' { $1 }
| vsemi { $1 }
Decl :: { PartialDecl' }
: FunDecl { $1 }
| TySigDecl { $1 }
| DataDecl { $1 }
| InfixDecl { $1 }
-- TODO: multiple vars
TySigDecl :: { PartialDecl' }
: Var '::' Type { TySigD [$1] $3 }
InfixDecl :: { PartialDecl' }
: InfixWord litint InfixOp {% mkInfixD $1 $2 $3 }
InfixWord :: { Assoc }
: infixl { InfixL }
| infixr { InfixR }
| infix { Infix }
DataDecl :: { PartialDecl' }
: data Con TyParams '=' DataCons { DataD $2 $3 $5 }
TyParams :: { [Name] }
: {- epsilon -} { [] }
| TyParams varname { $1 `snoc` $2 }
DataCons :: { [ConAlt] }
: DataCons '|' DataCon { $1 `snoc` $3 }
| DataCon { [$1] }
DataCon :: { ConAlt }
: Con Type1s { ConAlt $1 $2 }
Type1s :: { [Type] }
: {- epsilon -} { [] }
| Type1s Type1 { $1 `snoc` $2 }
Type1 :: { Type }
: '(' Type ')' { $2 }
| conname { TyCon $1 }
| varname { TyVar $1 }
Type :: { Type }
: Type '->' Type { $1 :-> $3 }
| Type1 { $1 }
FunDecl :: { PartialDecl' }
FunDecl : Var Params '=' Expr { FunD $1 $2 (Const $4) Nothing }
Params :: { [Pat'] }
Params : {- epsilon -} { [] }
| Params Pat1 { $1 `snoc` $2 }
Pat1 :: { Pat' }
: Var { VarP $1 }
| Lit { LitP $1 }
Expr :: { PartialExpr' }
: Expr1 varsym Expr { Fix $ B $2 (unFix $1) (unFix $3) }
| Expr1 { $1 }
Expr1 :: { PartialExpr' }
: '(' Expr ')' { wrapFix . Par . unwrapFix $ $2 }
| Lit { Fix . E $ LitEF $1 }
| Var { Fix . E $ VarEF $1 }
-- TODO: happy prefers left-associativity. doing such would require adjusting
-- the code in Rlp.Parse.Associate to expect left-associative input rather than
-- right.
InfixExpr :: { PartialExpr' }
: Expr1 varsym Expr { Fix $ B $2 (unFix $1) (unFix $3) }
InfixOp :: { Name }
: consym { $1 }
| varsym { $1 }
Lit :: { Lit' }
Lit : litint { IntL $1 }
Var :: { VarId }
Var : varname { NameVar $1 }
Con :: { ConId }
: conname { NameCon $1 }
{
mkProgram :: [PartialDecl'] -> P RlpProgram'
mkProgram ds = do
pt <- use psOpTable
pure $ RlpProgram (associate pt <$> ds)
parseError :: Located RlpToken -> P a
parseError = error . show
mkInfixD :: Assoc -> Int -> Name -> P PartialDecl'
mkInfixD a p n = do
let opl :: Lens' ParseState (Maybe OpInfo)
opl = psOpTable . at n
opl <~ (use opl >>= \case
Just o -> error "(TODO: non-fatal) duplicate inix decls"
Nothing -> pure (Just (a,p))
)
pure $ InfixD a p n
}

100
src/Rlp/Parse/Associate.hs Normal file
View File

@@ -0,0 +1,100 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternSynonyms, ViewPatterns, ImplicitParams #-}
module Rlp.Parse.Associate
( associate
)
where
--------------------------------------------------------------------------------
import Data.HashMap.Strict qualified as H
import Data.Functor.Foldable
import Data.Functor.Const
import Lens.Micro
import Rlp.Parse.Types
import Rlp.Syntax
--------------------------------------------------------------------------------
associate :: OpTable -> PartialDecl' -> Decl' RlpExpr
associate pt (FunD n as b w) = FunD n as b' w
where b' = let ?pt = pt in completeExpr (getConst b)
associate pt (TySigD ns t) = TySigD ns t
associate pt (DataD n as cs) = DataD n as cs
associate pt (InfixD a p n) = InfixD a p n
completeExpr :: (?pt :: OpTable) => PartialExpr' -> RlpExpr'
completeExpr = cata completePartial
completePartial :: (?pt :: OpTable) => PartialE -> RlpExpr'
completePartial (E e) = completeRlpExpr e
completePartial p@(B o l r) = completeB (build p)
completePartial (Par e) = completePartial e
completeRlpExpr :: (?pt :: OpTable) => RlpExprF' RlpExpr' -> RlpExpr'
completeRlpExpr = embed
completeB :: (?pt :: OpTable) => PartialE -> RlpExpr'
completeB p = case build p of
B o l r -> (o' `AppE` l') `AppE` r'
where
-- TODO: how do we know it's symbolic?
o' = VarE (SymVar o)
l' = completeB l
r' = completeB r
Par e -> completeB e
E e -> completeRlpExpr e
build :: (?pt :: OpTable) => PartialE -> PartialE
build e = go id e (rightmost e) where
rightmost :: PartialE -> PartialE
rightmost (B _ _ r) = rightmost r
rightmost p@(E _) = p
rightmost p@(Par _) = p
go :: (?pt :: OpTable)
=> (PartialE -> PartialE)
-> PartialE -> PartialE -> PartialE
go f p@(WithInfo o _ r) = case r of
E _ -> mkHole o (f . f')
Par _ -> mkHole o (f . f')
B _ _ _ -> go (mkHole o (f . f')) r
where f' r' = p & pR .~ r'
go f _ = id
mkHole :: (?pt :: OpTable)
=> OpInfo
-> (PartialE -> PartialE)
-> PartialE
-> PartialE
mkHole _ hole p@(Par _) = hole p
mkHole _ hole p@(E _) = hole p
mkHole (a,d) hole p@(WithInfo (a',d') _ _)
| d' < d = above
| d' > d = below
| d == d' = case (a,a') of
-- left-associative operators of equal precedence are
-- associated left
(InfixL,InfixL) -> above
-- right-associative operators are handled similarly
(InfixR,InfixR) -> below
-- non-associative operators of equal precedence, or equal
-- precedence operators of different associativities are
-- invalid
(_, _) -> error "invalid expression"
where
above = p & pL %~ hole
below = hole p
examplePrecTable :: OpTable
examplePrecTable = H.fromList
[ ("+", (InfixL,6))
, ("*", (InfixL,7))
, ("^", (InfixR,8))
, (".", (InfixR,7))
, ("~", (Infix, 9))
, ("=", (Infix, 4))
, ("&&", (Infix, 3))
, ("||", (Infix, 2))
, ("$", (InfixR,0))
, ("&", (InfixL,0))
]

163
src/Rlp/Parse/Types.hs Normal file
View File

@@ -0,0 +1,163 @@
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ImplicitParams, ViewPatterns, PatternSynonyms #-}
{-# LANGUAGE LambdaCase #-}
module Rlp.Parse.Types where
--------------------------------------------------------------------------------
import Core.Syntax (Name)
import Control.Monad
import Control.Monad.State.Class
import Data.Text (Text)
import Data.Maybe
import Data.Fix
import Data.Functor.Foldable
import Data.Functor.Const
import Data.Functor.Classes
import Data.HashMap.Strict qualified as H
import Data.Word (Word8)
import Lens.Micro.TH
import Lens.Micro
import Rlp.Syntax
--------------------------------------------------------------------------------
type LexerAction a = AlexInput -> Int -> P a
data AlexInput = AlexInput
{ _aiPrevChar :: Char
, _aiSource :: Text
, _aiBytes :: [Word8]
, _aiPos :: Position
}
deriving Show
type Position =
( Int -- line
, Int -- column
)
data RlpToken
-- literals
= TokenLitInt Int
-- identifiers
| TokenVarName Name
| TokenConName Name
| TokenVarSym Name
| TokenConSym Name
-- reserved words
| TokenData
| TokenCase
| TokenOf
| TokenLet
| TokenIn
| TokenInfixL
| TokenInfixR
| TokenInfix
-- reserved ops
| TokenArrow
| TokenPipe
| TokenHasType
| TokenLambda
| TokenEquals
-- control symbols
| TokenSemicolon
| TokenLBrace
| TokenRBrace
| TokenLParen
| TokenRParen
-- 'virtual' control symbols, inserted by the lexer without any correlation
-- to a specific symbol
| TokenSemicolonV
| TokenLBraceV
| TokenRBraceV
| TokenEOF
deriving (Show)
newtype P a = P { runP :: ParseState -> (ParseState, Maybe a) }
deriving (Functor)
instance Applicative P where
pure a = P $ \st -> (st,Just a)
liftA2 = liftM2
instance Monad P where
p >>= k = P $ \st ->
let (st',a) = runP p st
in case a of
Just x -> runP (k x) st'
Nothing -> (st', Nothing)
instance MonadState ParseState P where
state f = P $ \st ->
let (a,st') = f st
in (st', Just a)
data ParseState = ParseState
{ _psLayoutStack :: [Layout]
, _psLexState :: [Int]
, _psInput :: AlexInput
, _psOpTable :: OpTable
}
deriving Show
data Layout = Explicit
| Implicit Int
deriving (Show, Eq)
data Located a = Located (Position, Int) a
deriving (Show)
type OpTable = H.HashMap Name OpInfo
type OpInfo = (Assoc, Int)
-- data WithLocation a = WithLocation [String] a
data RlpParseError = RlpParErrOutOfBoundsPrecedence Int
| RlpParErrDuplicateInfixD
deriving (Eq, Ord, Show)
----------------------------------------------------------------------------------
-- absolute psycho shit (partial ASTs)
type PartialDecl' = Decl (Const PartialExpr') Name
data Partial a = E (RlpExprF Name a)
| B Name (Partial a) (Partial a)
| Par (Partial a)
deriving (Show, Functor)
pL :: Traversal' (Partial a) (Partial a)
pL k (B o l r) = (\l' -> B o l' r) <$> k l
pL _ x = pure x
pR :: Traversal' (Partial a) (Partial a)
pR k (B o l r) = (\r' -> B o l r') <$> k r
pR _ x = pure x
type PartialE = Partial RlpExpr'
-- i love you haskell
pattern WithInfo :: (?pt :: OpTable) => OpInfo -> PartialE -> PartialE -> PartialE
pattern WithInfo p l r <- B (opInfoOrDef -> p) l r
opInfoOrDef :: (?pt :: OpTable) => Name -> OpInfo
opInfoOrDef c = fromMaybe (InfixL,9) $ H.lookup c ?pt
-- required to satisfy constraint on Fix's show instance
instance Show1 Partial where
liftShowsPrec :: forall a. (Int -> a -> ShowS)
-> ([a] -> ShowS)
-> Int -> Partial a -> ShowS
liftShowsPrec sp sl p m = case m of
(E e) -> showsUnaryWith lshow "E" p e
(B f a b) -> showsTernaryWith showsPrec lshow lshow "B" p f a b
(Par e) -> showsUnaryWith lshow "Par" p e
where
lshow :: forall f. (Show1 f) => Int -> f a -> ShowS
lshow = liftShowsPrec sp sl
type PartialExpr' = Fix Partial
makeLenses ''AlexInput
makeLenses ''ParseState

178
src/Rlp/Syntax.hs Normal file
View File

@@ -0,0 +1,178 @@
-- recursion-schemes
{-# LANGUAGE DeriveFunctor, DeriveFoldable, DeriveTraversable #-}
-- recursion-schemes
{-# LANGUAGE TemplateHaskell, TypeFamilies #-}
{-# LANGUAGE OverloadedStrings, PatternSynonyms #-}
module Rlp.Syntax
( RlpModule(..)
, RlpProgram(..)
, RlpProgram'
, rlpmodName
, rlpmodProgram
, RlpExpr(..)
, RlpExpr'
, RlpExprF(..)
, RlpExprF'
, Decl(..)
, Decl'
, Bind(..)
, Where
, Where'
, ConAlt(..)
, Type(..)
, pattern (:->)
, Assoc(..)
, VarId(..)
, ConId(..)
, Pat(..)
, Pat'
, Lit(..)
, Lit'
, Name
-- TODO: ugh move this somewhere else later
, showsTernaryWith
-- * Convenience re-exports
, Text
)
where
----------------------------------------------------------------------------------
import Data.Text (Text)
import Data.Text qualified as T
import Data.String (IsString(..))
import Data.Functor.Foldable.TH (makeBaseFunctor)
import Data.Functor.Classes
import Lens.Micro
import Lens.Micro.TH
import Language.Haskell.TH.Syntax (Lift)
import Core.Syntax hiding (Lit)
import Core (HasRHS(..), HasLHS(..))
----------------------------------------------------------------------------------
data RlpModule b = RlpModule
{ _rlpmodName :: Text
, _rlpmodProgram :: RlpProgram b
}
newtype RlpProgram b = RlpProgram [Decl RlpExpr b]
deriving (Show, Lift)
type RlpProgram' = RlpProgram Name
-- | The @e@ parameter is used for partial results. When parsing an input, we
-- first parse all top-level declarations in order to extract infix[lr]
-- declarations. This process yields a @[Decl (Const Text) Name]@, where @Const
-- Text@ stores the remaining unparsed function bodies. Once infixities are
-- accounted for, we may complete the parsing task and get a proper @[Decl
-- RlpExpr Name]@.
data Decl e b = FunD VarId [Pat b] (e b) (Maybe (Where b))
| TySigD [VarId] Type
| DataD ConId [Name] [ConAlt]
| InfixD Assoc Int Name
deriving (Show, Lift)
type Decl' e = Decl e Name
data Assoc = InfixL
| InfixR
| Infix
deriving (Show, Lift)
data ConAlt = ConAlt ConId [Type]
deriving (Show, Lift)
data RlpExpr b = LetE [Bind b] (RlpExpr b)
| VarE VarId
| ConE ConId
| LamE [Pat b] (RlpExpr b)
| CaseE (RlpExpr b) [(Alt b, Where b)]
| IfE (RlpExpr b) (RlpExpr b) (RlpExpr b)
| AppE (RlpExpr b) (RlpExpr b)
| LitE (Lit b)
deriving (Show, Lift)
type RlpExpr' = RlpExpr Name
type Where b = [Bind b]
type Where' = [Bind Name]
-- do we want guards?
data Alt b = AltA (Pat b) (RlpExpr b)
deriving (Show, Lift)
data Bind b = PatB (Pat b) (RlpExpr b)
| FunB VarId [Pat b] (RlpExpr b)
deriving (Show, Lift)
data VarId = NameVar Text
| SymVar Text
deriving (Show, Lift)
instance IsString VarId where
-- TODO: use symvar if it's an operator
fromString = NameVar . T.pack
data ConId = NameCon Text
| SymCon Text
deriving (Show, Lift)
data Pat b = VarP VarId
| LitP (Lit b)
| ConP ConId [Pat b]
deriving (Show, Lift)
type Pat' = Pat Name
data Lit b = IntL Int
| CharL Char
| ListL [RlpExpr b]
deriving (Show, Lift)
type Lit' = Lit Name
-- instance HasLHS Alt Alt Pat Pat where
-- _lhs = lens
-- (\ (AltA p _) -> p)
-- (\ (AltA _ e) p' -> AltA p' e)
-- instance HasRHS Alt Alt RlpExpr RlpExpr where
-- _rhs = lens
-- (\ (AltA _ e) -> e)
-- (\ (AltA p _) e' -> AltA p e')
makeBaseFunctor ''RlpExpr
deriving instance (Show b, Show a) => Show (RlpExprF b a)
type RlpExprF' = RlpExprF Name
-- society if derivable Show1
instance (Show b) => Show1 (RlpExprF b) where
liftShowsPrec sp _ p m = case m of
(LetEF bs e) -> showsBinaryWith showsPrec sp "LetEF" p bs e
(VarEF n) -> showsUnaryWith showsPrec "VarEF" p n
(ConEF n) -> showsUnaryWith showsPrec "ConEF" p n
(LamEF bs e) -> showsBinaryWith showsPrec sp "LamEF" p bs e
(CaseEF e as) -> showsBinaryWith sp showsPrec "CaseEF" p e as
(IfEF a b c) -> showsTernaryWith sp sp sp "IfEF" p a b c
(AppEF f x) -> showsBinaryWith sp sp "AppEF" p f x
(LitEF l) -> showsUnaryWith showsPrec "LitEF" p l
showsTernaryWith :: (Int -> x -> ShowS)
-> (Int -> y -> ShowS)
-> (Int -> z -> ShowS)
-> String -> Int
-> x -> y -> z
-> ShowS
showsTernaryWith sa sb sc name p a b c = showParen (p > 10)
$ showString name
. showChar ' ' . sa 11 a
. showChar ' ' . sb 11 b
. showChar ' ' . sc 11 c
--------------------------------------------------------------------------------
makeLenses ''RlpModule

30
src/Rlp/TH.hs Normal file
View File

@@ -0,0 +1,30 @@
module Rlp.TH
( rlpProg
)
where
--------------------------------------------------------------------------------
import Language.Haskell.TH
import Language.Haskell.TH.Syntax hiding (Module)
import Language.Haskell.TH.Quote
import Control.Monad ((>=>))
import Compiler.RLPC
import Data.Default.Class (def)
import Data.Text qualified as T
import Rlp.Parse
--------------------------------------------------------------------------------
rlpProg :: QuasiQuoter
rlpProg = QuasiQuoter
{ quoteExp = qRlpProg
, quotePat = error "rlp quasiquotes may only be used in expressions"
, quoteType = error "rlp quasiquotes may only be used in expressions"
, quoteDec = error "rlp quasiquotes may only be used in expressions"
}
qRlpProg :: String -> Q Exp
qRlpProg s = case parse (T.pack s) of
Nothing -> error "error lol iddfk"
Just a -> lift a
where
parse = execP' parseRlpProg

44
src/Rlp2Core.hs Normal file
View File

@@ -0,0 +1,44 @@
{-# LANGUAGE LambdaCase #-}
module Rlp2Core
( rlp2core
)
where
--------------------------------------------------------------------------------
import Core.Syntax as Core
import Rlp.Syntax as Rlp
import Data.Foldable
import Data.HashMap.Strict qualified as H
import Control.Monad.State
import Lens.Micro.Platform
--------------------------------------------------------------------------------
rlp2core :: RlpProgram' -> Program'
rlp2core (RlpProgram ds) = execState (decl2core `traverse_` ds) init
where
init = Program
{ _programScDefs = mempty
, _programTypeSigs = mempty
}
type GenCoreProg b = State (Program b)
type GenCoreProg' = GenCoreProg Name
emitTypeSig :: Name -> Type -> GenCoreProg' ()
emitTypeSig b t = do
let tl :: Lens' Program' (Maybe Type)
tl = programTypeSigs . at b
tl <~ (use tl >>= \case
-- TODO: non-fatal error
Just o -> error "(TODO: non-fatal) duplicate type sigs"
Nothing -> pure (Just t)
)
decl2core :: Decl' RlpExpr -> GenCoreProg' ()
decl2core (DataD n as cs) = undefined
decl2core (TySigD vs t) = mkSig `traverse_` vs where
mkSig :: VarId -> GenCoreProg' ()
mkSig (NameVar n) = emitTypeSig n t

View File