From c39a8436607b3ab51487276a19e800ab4a5d3589 Mon Sep 17 00:00:00 2001
From: crumbtoo <crumb@disroot.org>
Date: Wed, 15 Nov 2023 17:38:08 -0700
Subject: [PATCH 1/7] core quasiquoter

---
 rlp.cabal             |  2 ++
 src/Control/Parser.hs |  5 +++++
 src/Core/Parse.hs     |  4 ++++
 src/Core/Syntax.hs    | 17 +++++++++--------
 src/Core/TH.hs        | 39 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 59 insertions(+), 8 deletions(-)
 create mode 100644 src/Core/TH.hs

diff --git a/rlp.cabal b/rlp.cabal
index e25a5ab..d1d2d71 100644
--- a/rlp.cabal
+++ b/rlp.cabal
@@ -26,12 +26,14 @@ library
                     , Core.Syntax
                     , Core.Parse
                     , Core.Lex
+                    , Core.TH
 
     -- other-extensions:
     build-depends:    base ^>=4.18.0.0
                     , containers
                     , microlens
                     , microlens-th
+                    , template-haskell
     hs-source-dirs:   src
     default-language: GHC2021
 
diff --git a/src/Control/Parser.hs b/src/Control/Parser.hs
index 332aeaf..b1b2caa 100644
--- a/src/Control/Parser.hs
+++ b/src/Control/Parser.hs
@@ -62,6 +62,11 @@ instance (MonadFail m) => MonadFail (ParserT i m) where
 
 ----------------------------------------------------------------------------------
 
+eof :: (MonadPlus m) => ParserT [a] m ()
+eof = ParserT $ \case
+    [] -> pure ([], ())
+    _  -> empty
+
 -- TODO: generalise to non-lists
 satisfy :: (MonadPlus m) => (a -> Bool) -> ParserT [a] m a
 satisfy p = ParserT $ \case
diff --git a/src/Core/Parse.hs b/src/Core/Parse.hs
index b9f4576..8cb9497 100644
--- a/src/Core/Parse.hs
+++ b/src/Core/Parse.hs
@@ -1,6 +1,7 @@
 {-# LANGUAGE LambdaCase, BlockArguments #-}
 module Core.Parse
     ( parseCore
+    , parseCoreExpr
     )
     where
 ----------------------------------------------------------------------------------
@@ -15,6 +16,9 @@ type CoreParser = ParserT [CoreToken] Result
 parseCore :: [CoreToken] -> Result Program
 parseCore = fmap snd . runParserT program
 
+parseCoreExpr :: [CoreToken] -> Result Expr
+parseCoreExpr = fmap snd . runParserT expr
+
 program :: CoreParser Program
 program = Program <$> termMany (char TokSemicolon) scdef
 
diff --git a/src/Core/Syntax.hs b/src/Core/Syntax.hs
index 46c9c8e..b08bf1d 100644
--- a/src/Core/Syntax.hs
+++ b/src/Core/Syntax.hs
@@ -18,9 +18,10 @@ module Core.Syntax
 ----------------------------------------------------------------------------------
 import Data.Coerce
 import Data.Pretty
-import Data.List            (intersperse)
-import Data.Function        ((&))
+import Data.List                    (intersperse)
+import Data.Function                ((&))
 import Data.String
+import Language.Haskell.TH.Syntax   (Lift)
 ----------------------------------------------------------------------------------
 
 data Expr = Var Name
@@ -30,14 +31,14 @@ data Expr = Var Name
           | Lam [Name] Expr
           | App Expr Expr
           | IntE Int
-          deriving Show
+          deriving (Show, Lift)
 
 infixl 2 :$
 pattern (:$) :: Expr -> Expr -> Expr
 pattern f :$ x = App f x
 
 data Binding = Binding Name Expr
-    deriving Show
+    deriving (Show, Lift)
 
 infixl 1 :=
 pattern (:=) :: Name -> Expr -> Binding
@@ -45,18 +46,18 @@ pattern k := v = Binding k v
 
 data Rec = Rec
          | NonRec
-         deriving (Show, Eq)
+         deriving (Show, Eq, Lift)
 
 data Alter = Alter Int [Name] Expr
-    deriving Show
+    deriving (Show, Lift)
 
 type Name = String
 
 data ScDef = ScDef Name [Name] Expr
-    deriving (Show)
+    deriving (Show, Lift)
 
 newtype Program = Program [ScDef]
-    deriving (Show)
+    deriving (Show, Lift)
 
 instance IsString Expr where
     fromString = Var
diff --git a/src/Core/TH.hs b/src/Core/TH.hs
new file mode 100644
index 0000000..f455bb1
--- /dev/null
+++ b/src/Core/TH.hs
@@ -0,0 +1,39 @@
+module Core.TH
+    ( coreExpr
+    , core
+    )
+    where
+----------------------------------------------------------------------------------
+import Language.Haskell.TH
+import Language.Haskell.TH.Syntax
+import Language.Haskell.TH.Quote
+import Core.Parse
+import Core.Lex
+----------------------------------------------------------------------------------
+
+core :: QuasiQuoter
+core = QuasiQuoter
+    { quoteExp = qCore
+    , quotePat = error "core quasiquotes may only be used in expressions"
+    , quoteType = error "core quasiquotes may only be used in expressions"
+    , quoteDec = error "core quasiquotes may only be used in expressions"
+    }
+
+coreExpr :: QuasiQuoter
+coreExpr = QuasiQuoter
+    { quoteExp = qCoreExpr
+    , quotePat = error "core quasiquotes may only be used in expressions"
+    , quoteType = error "core quasiquotes may only be used in expressions"
+    , quoteDec = error "core quasiquotes may only be used in expressions"
+    }
+
+qCore :: String -> Q Exp
+qCore s = case lexCore s >>= parseCore of
+    Success a -> lift a
+    Error e _ _ -> error e
+
+qCoreExpr :: String -> Q Exp
+qCoreExpr s = case lexCore s >>= parseCoreExpr of
+    Success a -> lift a
+    Error e _ _ -> error e
+

From ac55a1908c394cdb7fcc2d5d835e14de0123fa7e Mon Sep 17 00:00:00 2001
From: crumbtoo <crumb@disroot.org>
Date: Thu, 16 Nov 2023 19:11:01 -0700
Subject: [PATCH 2/7] unproductive day i fear

---
 docs/src/commentary/layout-lexing.rst |   3 +
 docs/src/commentary/parser.rst        |   3 -
 rlp.cabal                             |   8 +-
 src/Control/Parser.hs                 | 106 ------------------
 src/Core/Examples.hs                  |  22 ++++
 src/Core/Lex.hs                       | 141 ------------------------
 src/Core/Lex.x                        | 107 ++++++++++++++++++
 src/Core/Parse.hs                     |  99 -----------------
 src/Core/Parse.y                      |  82 ++++++++++++++
 src/Core/Syntax.hs                    |  30 ++++--
 src/Core/TH.hs                        |  19 ++--
 src/TIM.hs                            | 150 +++++++++++++-------------
 12 files changed, 330 insertions(+), 440 deletions(-)
 create mode 100644 docs/src/commentary/layout-lexing.rst
 delete mode 100644 docs/src/commentary/parser.rst
 delete mode 100644 src/Control/Parser.hs
 create mode 100644 src/Core/Examples.hs
 delete mode 100644 src/Core/Lex.hs
 create mode 100644 src/Core/Lex.x
 delete mode 100644 src/Core/Parse.hs
 create mode 100644 src/Core/Parse.y

diff --git a/docs/src/commentary/layout-lexing.rst b/docs/src/commentary/layout-lexing.rst
new file mode 100644
index 0000000..5a1f8ab
--- /dev/null
+++ b/docs/src/commentary/layout-lexing.rst
@@ -0,0 +1,3 @@
+Parsing and the Layout System
+=============================
+
diff --git a/docs/src/commentary/parser.rst b/docs/src/commentary/parser.rst
deleted file mode 100644
index 27d0213..0000000
--- a/docs/src/commentary/parser.rst
+++ /dev/null
@@ -1,3 +0,0 @@
-Parser Combinators
-==================
-
diff --git a/rlp.cabal b/rlp.cabal
index d1d2d71..7b4a2c5 100644
--- a/rlp.cabal
+++ b/rlp.cabal
@@ -22,11 +22,13 @@ library
                     , TIM
     other-modules:    Data.Heap
                     , Data.Pretty
-                    , Control.Parser
                     , Core.Syntax
                     , Core.Parse
-                    , Core.Lex
                     , Core.TH
+                    , Core.Examples
+                    , Core.Lex
+
+    build-tool-depends: happy:happy, alex:alex
 
     -- other-extensions:
     build-depends:    base ^>=4.18.0.0
@@ -34,6 +36,8 @@ library
                     , microlens
                     , microlens-th
                     , template-haskell
+                    -- required for happy
+                    , array
     hs-source-dirs:   src
     default-language: GHC2021
 
diff --git a/src/Control/Parser.hs b/src/Control/Parser.hs
deleted file mode 100644
index b1b2caa..0000000
--- a/src/Control/Parser.hs
+++ /dev/null
@@ -1,106 +0,0 @@
-{-|
-Module      : Control.Parser
-Description : Parser combinators
-
-This module implements an interface for parser *types*, used in lexical analysis
-and parsing. For the implementation of the rlp language's parser, see 'Parse'.
--}
-{-# LANGUAGE GeneralisedNewtypeDeriving #-}
-{-# LANGUAGE BlockArguments, LambdaCase #-}
-module Control.Parser
-    ( ParserT
-    , runParserT
-
-    , satisfy
-    , char
-    , spaces
-    , surround
-    , string
-    , match
-    , termMany
-    , sepSome
-
-    -- * Control.Applicative re-exports
-    , (<|>)
-    , many
-    , some
-    , empty
-    )
-    where
-----------------------------------------------------------------------------------
-import Control.Applicative
-import Control.Arrow                ((***))
-import Control.Monad
-import Data.Char
-----------------------------------------------------------------------------------
-
-newtype ParserT i m o = ParserT { runParserT :: i -> m (i, o) }
-    deriving (Functor)
-
-instance (Monad m) => Applicative (ParserT i m) where
-    pure a = ParserT \i -> pure (i, a)
-
-    m <*> k = ParserT \i -> do
-        (i',f) <- runParserT m i
-        fmap (id *** f) $ runParserT k i'
-
-instance (MonadPlus m) => Alternative (ParserT i m) where
-    empty = ParserT $ const empty
-
-    ParserT m <|> ParserT k = ParserT $ \i ->
-        m i <|> k i
-
-instance (MonadPlus m) => MonadPlus (ParserT i m)
-
-instance (Monad m) => Monad (ParserT i m) where
-    m >>= k = ParserT $ \i -> do
-        (i',a) <- runParserT m i
-        runParserT (k a) i'
-
-instance (MonadFail m) => MonadFail (ParserT i m) where
-    fail s = ParserT $ \i -> fail s
-
-----------------------------------------------------------------------------------
-
-eof :: (MonadPlus m) => ParserT [a] m ()
-eof = ParserT $ \case
-    [] -> pure ([], ())
-    _  -> empty
-
--- TODO: generalise to non-lists
-satisfy :: (MonadPlus m) => (a -> Bool) -> ParserT [a] m a
-satisfy p = ParserT $ \case
-    (x:xs) | p x -> pure (xs,x)
-    _            -> empty
-
-match :: (MonadPlus m) => (a -> Maybe b) -> ParserT [a] m b
-match f = ParserT $ \case
-    (x:xs) -> case f x of
-        Just b  -> pure (xs,b)
-        Nothing -> empty
-    [] -> empty
-
-termMany :: (MonadPlus m) => ParserT i m t -> ParserT i m o -> ParserT i m [o]
-termMany t a = many (a <* t)
-
-sepSome :: (MonadPlus m) => ParserT i m t -> ParserT i m o -> ParserT i m [o]
-sepSome s a = (:) <$> a <*> many (s *> a)
-
-char :: (MonadPlus m, Eq a) => a -> ParserT [a] m a
-char c = satisfy (==c)
-
-string :: (MonadPlus m, Eq a) => [a] -> ParserT [a] m [a]
-string s = sequenceA $ char <$> s
-
-----------------------------------------------------------------------------------
-
-surround :: (MonadPlus m)
-         => ParserT i m l
-         -> ParserT i m r
-         -> ParserT i m c
-         -> ParserT i m c
-surround l r c = l *> c <* r
-
-spaces :: (MonadPlus m) => ParserT String m Int
-spaces = length <$> many (satisfy (==' '))
-
diff --git a/src/Core/Examples.hs b/src/Core/Examples.hs
new file mode 100644
index 0000000..51a1461
--- /dev/null
+++ b/src/Core/Examples.hs
@@ -0,0 +1,22 @@
+{-# LANGUAGE QuasiQuotes #-}
+module Core.Examples where
+----------------------------------------------------------------------------------
+import Core.Syntax
+import Core.TH
+----------------------------------------------------------------------------------
+
+{-
+
+letrecExample :: Program
+letrecExample = [core|
+pair x y f = f x y;
+fst p = p k;
+snd p = p k1;
+f x y = letrec
+    { a = pair x b;
+    ; b = pair y a
+    } in fst (snd (snd (snd a)));
+main = f 3 4;
+|]
+
+-}
diff --git a/src/Core/Lex.hs b/src/Core/Lex.hs
deleted file mode 100644
index 2dca505..0000000
--- a/src/Core/Lex.hs
+++ /dev/null
@@ -1,141 +0,0 @@
-{-|
-Module      : Core.Lex
-Description : Core language lexer
--}
-module Core.Lex
-    ( CoreToken(..)
-    , Result(..)
-    , lexCore
-    )
-    where
-----------------------------------------------------------------------------------
-import Control.Parser
-import Control.Applicative
-import Control.Monad
-import Data.Char
-import Data.Functor
-import Core.Syntax              (Name)
-----------------------------------------------------------------------------------
-
-type CoreLexer = ParserT String Result
-
-data Result a = Success a
-              | Error String Int Int
-              deriving (Show)
-
--- TODO: whitespace-sensitive layout
-data CoreToken = TokLitInt Int
-               | TokEquals
-               | TokLBrace
-               | TokRBrace
-               | TokSemicolon
-               | TokLParen
-               | TokRParen
-               | TokLambda
-               | TokArrow
-               | TokCase
-               | TokOf
-               | TokLet
-               | TokLetRec
-               | TokIn
-               | TokCName Name
-               | TokName Name
-               deriving (Show, Eq)
-
-instance Functor Result where
-    fmap f (Success a)   = Success (f a)
-    fmap _ (Error s l c) = Error s l c
-
-instance Foldable Result where
-    foldr f z (Success a)   = a `f` z
-    foldr _ z (Error _ _ _) = z
-
-instance Traversable Result where
-    traverse k (Success a)   = fmap Success (k a)
-    traverse _ (Error s l c) = pure $ Error s l c
-
-instance Applicative Result where
-    pure = Success
-
-    liftA2 f (Success a)   (Success b)   = Success $ f a b
-    liftA2 _ (Error s l c) _             = Error s l c
-    liftA2 _ _             (Error s l c) = Error s l c
-
-instance Alternative Result where
-    empty = Error "some error! this is a temporary system lol" 0 0
-
-    (Success a) <|> _ = Success a
-    _           <|> b = b
-
-instance Monad Result where
-    Success a   >>= k = k a
-    Error s l c >>= _ = Error s l c
-
-instance MonadPlus Result
-
-instance MonadFail Result where
-    fail s = Error s 0 0
-
-----------------------------------------------------------------------------------
-
-lexCore :: String -> Result [CoreToken]
-lexCore = fmap snd . runParserT (many (token <* spaces))
-
-token :: CoreLexer CoreToken
-token = litInt
-    <|> lbrace
-    <|> rbrace
-    <|> semicolon
-    <|> lparen
-    <|> rparen
-    <|> equals
-    <|> lambda
-    <|> arrow
-    <|> _case
-    <|> _of
-    <|> letrec
-    <|> _let
-    <|> _in
-    <|> cName
-    <|> name
-
-----------------------------------------------------------------------------------
-
-litInt, equals, lparen, rparen, lambda,
-    arrow, _case, _of, _let, letrec, _in, cName, name :: CoreLexer CoreToken
-
-litInt = TokLitInt . value <$> some (satisfy isDigit)
-    where
-        value = foldl (\acc a -> 10*acc + digitToInt a) 0
-
-semicolon = (semis <|> nls)        $> TokSemicolon
-    where
-        nls = head <$> some (char '\n')
-        semis = char ';' <* many (char '\n')
-equals = char '='                  $> TokEquals
-lbrace = char '{'                  $> TokLBrace
-rbrace = char '}'                  $> TokRBrace
-lparen = char '('                  $> TokLParen
-rparen = char ')'                  $> TokRParen
-lambda = (char '\\' <|> char 'λ')  $> TokLambda
-arrow  = string "->"               $> TokArrow
-_case  = string "case"             $> TokCase
-_of    = string "of"               $> TokOf
-_let   = string "let"              $> TokLet
-letrec = string "letrec"           $> TokLetRec
-_in    = string "in"               $> TokIn
-
-cName = TokCName <$> ((:) <$> cNameHead <*> properNameTail)
-    where cNameHead = satisfy isUpper
-
-name = some (satisfy p) <&> TokName
-    where p c = not (isSpace c) && c `notElem` ";{}()"
-
-properName :: CoreLexer Name
-properName = (:) <$> nameHead <*> properNameTail
-    where nameHead = satisfy isLetter
-
-properNameTail :: CoreLexer Name
-properNameTail = many . satisfy $ \c ->
-    isLetter c || isDigit c || c == '_'
-
diff --git a/src/Core/Lex.x b/src/Core/Lex.x
new file mode 100644
index 0000000..d5cfa67
--- /dev/null
+++ b/src/Core/Lex.x
@@ -0,0 +1,107 @@
+{
+module Core.Lex
+    ( CoreToken(..)
+    , lexCore
+    )
+    where
+
+import Core.Syntax
+import Lens.Micro
+}
+
+%wrapper "monadUserState"
+
+$digit = 0-9
+$alpha = [a-zA-Z]
+
+$special = [\*\^\%\$#@!\<\>\+\-\=\/&\|\\\.]
+
+$nameTail = [ $alpha $digit \_ \' ]
+
+rlp :-
+
+-- tokens :-
+--     $white+                 ;
+--     "--" ~$special .*       ;
+--     module                  { const TokenModule }
+--     where                   { const TokenWhere }
+--     let                     { const TokenLet }
+--     letrec                  { const TokenLetrec }
+--     in                      { const TokenIn }
+--     case                    { const TokenCase }
+--     of                      { const TokenOf }
+--     $digit+                 { TokenLitInt . read @Int }
+--     ","                     { const TokenComma }
+--     "("                     { const TokenLParen }
+--     ")"                     { const TokenRParen }
+--     "{"                     { const TokenLBrace }
+--     "}"                     { const TokenRBrace }
+--     "\\"                    { const TokenLambda }
+--     "λ"                     { const TokenLambda }
+--     ";"                     { const TokenSemicolon }
+--     $special+               { lexSym }
+--     $alpha $nameTail*       { TokenName }
+
+<0> \n {begin bol}
+
+<bol>
+{
+    \n                                    ;
+    ()                                    { doBOL }
+}
+
+{
+
+data CoreToken = TokenLet
+               | TokenLetrec
+               | TokenIn
+               | TokenModule
+               | TokenWhere
+               | TokenComma
+               | TokenCase
+               | TokenOf
+               | TokenLambda
+               | TokenArrow
+               | TokenLitInt Int
+               | TokenName Name
+               | TokenSym Name
+               | TokenEquals
+               | TokenLParen
+               | TokenRParen
+               | TokenLBrace
+               | TokenRBrace
+               | TokenSemicolon
+               deriving Show
+
+data LayoutContext = NoLayout
+                   | Layout Int
+
+data AlexUserState = AlexUserState
+    { _ausContext :: [LayoutContext]
+    }
+
+ausContext :: Lens' AlexUserState [LayoutContext]
+ausContext = lens _ausContext (\s b -> s { _ausContext = b })
+
+alexInitUserState = AlexUserState
+    { _ausContext = []
+    }
+
+-- lexCore :: String -> [CoreToken]
+lexCore = alexScanTokens
+
+-- lexSym :: String -> CoreToken
+-- lexSym "="  = TokenEquals
+-- lexSym "\\" = TokenLambda
+-- lexSym "->" = TokenArrow
+-- lexSym s    = TokenSym s
+
+lexSym = undefined
+
+doBOL = undefined
+
+alexEOF = undefined
+
+alexScanTokens = undefined
+
+}
diff --git a/src/Core/Parse.hs b/src/Core/Parse.hs
deleted file mode 100644
index 8cb9497..0000000
--- a/src/Core/Parse.hs
+++ /dev/null
@@ -1,99 +0,0 @@
-{-# LANGUAGE LambdaCase, BlockArguments #-}
-module Core.Parse
-    ( parseCore
-    , parseCoreExpr
-    )
-    where
-----------------------------------------------------------------------------------
-import Control.Parser
-import Data.Functor         ((<&>), ($>))
-import Core.Lex
-import Core.Syntax
-----------------------------------------------------------------------------------
-
-type CoreParser = ParserT [CoreToken] Result
-
-parseCore :: [CoreToken] -> Result Program
-parseCore = fmap snd . runParserT program
-
-parseCoreExpr :: [CoreToken] -> Result Expr
-parseCoreExpr = fmap snd . runParserT expr
-
-program :: CoreParser Program
-program = Program <$> termMany (char TokSemicolon) scdef
-
-scdef :: CoreParser ScDef
-scdef = ScDef <$> f <*> (xs <* eq) <*> body
-    where
-        f = name
-        xs = many name
-        eq = char TokEquals
-        body = expr
-
-expr :: CoreParser Expr
-expr = letE
-   <|> app
-   <|> lam
-   <|> atom
-
-atom :: CoreParser Expr
-atom  = var
-    <|> con
-    <|> parenE
-    <|> lit
-    where
-        var = Var <$> name
-        parenE = surround (char TokLParen) (char TokRParen) expr
-        lit = IntE <$> litInt
-
-lam :: CoreParser Expr
-lam = Lam <$> (l *> bs) <*> (arrow *> expr)
-    where
-        l = char TokLambda
-        arrow = char TokArrow
-        bs = some name
-
-app :: CoreParser Expr
-app = foldl App <$> atom <*> some atom
-
-con :: CoreParser Expr
-con = pack *> (Con <$> (l *> tag) <*> (arity <* r))
-    where
-        l = char TokLBrace
-        r = char TokRBrace
-        tag = litInt
-        arity = litInt
-        pack = match \case
-            TokCName "Pack" -> Just ()
-            _               -> Nothing
-
-letE :: CoreParser Expr
-letE = Let <$> word <*> defs <*> (char TokIn *> expr)
-    where
-        word = char TokLet    $> NonRec
-           <|> char TokLetRec $> Rec
-        defs = surround (char TokLBrace) (char TokRBrace) bindings
-
-bindings :: CoreParser [Binding]
-bindings = sepSome (char TokSemicolon) binding
-
-binding :: CoreParser Binding
-binding = Binding <$> name <*> (char TokEquals *> expr)
-
-----------------------------------------------------------------------------------
-
-name :: CoreParser Name
-name = match \case
-    TokName n -> Just n
-    _         -> Nothing
-
-cName :: CoreParser Name
-cName = match \case
-    TokCName n -> Just n
-    _          -> Nothing
-
-litInt :: CoreParser Int
-litInt = match \case
-    TokLitInt n -> Just n
-    _           -> Nothing
-
diff --git a/src/Core/Parse.y b/src/Core/Parse.y
new file mode 100644
index 0000000..4077528
--- /dev/null
+++ b/src/Core/Parse.y
@@ -0,0 +1,82 @@
+{
+module Core.Parse
+    ( parseCore
+    -- , parseCoreExpr
+    , module Core.Lex -- temp convenience
+    )
+    where
+
+import Data.Foldable        (foldl')
+import Core.Syntax
+import Core.Lex
+}
+
+%name parseCore
+%name parseCoreExpr Expr
+%tokentype { CoreToken }
+%error { parseError }
+
+%token
+      let             { TokenLet }
+      letrec          { TokenLetrec }
+      module          { TokenModule }
+      where           { TokenWhere }
+      ','             { TokenComma }
+      in              { TokenIn }
+      litint          { TokenLitInt $$ }
+      name            { TokenName $$ }
+      sym             { TokenSym $$ }
+      'λ'             { TokenLambda }
+      '->'            { TokenArrow }
+      '='             { TokenEquals }
+      '('             { TokenLParen }
+      ')'             { TokenRParen }
+      '{'             { TokenLBrace }
+      '}'             { TokenRBrace }
+      ';'             { TokenSemicolon }
+
+%%
+
+ExportList      :: { [Name] }
+ExportList      : '(' Exports ')'               { $2 }
+
+Exports         :: { [Name] }
+Exports         : Var ',' Exports               { $1 : $3 }
+                | Var                           { [$1] }
+
+Expr            :: { Expr }
+Expr            : let Bindings in Expr          { Let NonRec $2 $4 }
+                | letrec Bindings in Expr       { Let Rec $2 $4 }
+                | 'λ' Binders '->' Expr         { Lam $2 $4 }
+                | Application                   { $1 }
+                | Expr1                         { $1 }
+
+Binders         :: { [Name] }
+Binders         : Var Binders                  { $1 : $2 }
+                | Var                          { [$1] }
+
+Application     :: { Expr }
+Application     : Expr1 AppArgs                 { foldl' App $1 $2 }
+
+-- TODO: Application can probably be written as a single rule, without AppArgs
+AppArgs         :: { [Expr] }
+AppArgs         : Expr1 AppArgs                 { $1 : $2 }
+                | Expr1                         { [$1] }
+
+Expr1           :: { Expr }
+Expr1           : litint                        { IntE $1 }
+                | Var                           { Var $1 }
+                | '(' Expr ')'                  { $2 }
+
+Var            :: { Name }
+Var            : '(' sym ')'                    { $2 }
+               | name                           { $1 }
+
+Bindings        :: { [Binding] }
+Bindings        : Var '=' Expr                  { [$1 := $3] }
+
+{
+parseError :: [CoreToken] -> a
+parseError _ = error "fuuckk!"
+}
+
diff --git a/src/Core/Syntax.hs b/src/Core/Syntax.hs
index b08bf1d..cc4aa86 100644
--- a/src/Core/Syntax.hs
+++ b/src/Core/Syntax.hs
@@ -8,11 +8,13 @@ module Core.Syntax
     , Alter(..)
     , Name
     , ScDef(..)
+    , Module(..)
     , Program(..)
     , corePrelude
     , bindersOf
     , rhssOf
     , isAtomic
+    , insertModule
     )
     where
 ----------------------------------------------------------------------------------
@@ -56,6 +58,9 @@ type Name = String
 data ScDef = ScDef Name [Name] Expr
     deriving (Show, Lift)
 
+data Module = Module (Maybe (Name, [Name])) Program
+    deriving (Show, Lift)
+
 newtype Program = Program [ScDef]
     deriving (Show, Lift)
 
@@ -64,6 +69,15 @@ instance IsString Expr where
 
 ----------------------------------------------------------------------------------
 
+instance Pretty Program where
+    -- TODO: module header
+    prettyPrec (Program ss) _ = mconcat $ intersperse "\n\n" $ fmap pretty ss
+
+instance Pretty ScDef where
+    prettyPrec (ScDef n as e) _ =
+        mconcat (intersperse " " $ fmap IStr (n:as))
+        <> " = " <> pretty e <> IBreak
+
 instance Pretty Expr where
     prettyPrec (Var k)      = withPrec maxBound $ IStr k
     prettyPrec (IntE n)     = withPrec maxBound $ iShow n
@@ -106,7 +120,7 @@ instance Pretty Binding where
 ----------------------------------------------------------------------------------
 
 instance Semigroup Program where
-    (<>) = coerce $ (++) @ScDef
+    (<>) = coerce $ (<>) @[ScDef] 
 
 instance Monoid Program where
     mempty = Program []
@@ -125,15 +139,19 @@ isAtomic _       = False
 
 ----------------------------------------------------------------------------------
 
-corePrelude :: Program
-corePrelude = Program
+corePrelude :: Module
+corePrelude = Module (Just ("Prelude", [])) $ Program
     [ ScDef "id" ["x"] (Var "x")
-    , ScDef "K" ["x", "y"] (Var "x")
-    , ScDef "K1" ["x", "y"] (Var "y")
-    , ScDef "S" ["f", "g", "x"] (Var "f" :$ Var "x" :$ (Var "g" :$ Var "x"))
+    , ScDef "k" ["x", "y"] (Var "x")
+    , ScDef "k1" ["x", "y"] (Var "y")
+    , ScDef "succ" ["f", "g", "x"] (Var "f" :$ Var "x" :$ (Var "g" :$ Var "x"))
     , ScDef "compose" ["f", "g", "x"] (Var "f" :$ (Var "g" :$ Var "x"))
     , ScDef "twice" ["f", "x"] (Var "f" :$ (Var "f" :$ Var "x"))
     , ScDef "False" [] $ Con 0 0
     , ScDef "True" [] $ Con 1 0
     ]
 
+-- TODO: export list awareness
+insertModule :: Module -> Program -> Program
+insertModule (Module _ m) p = p <> m
+
diff --git a/src/Core/TH.hs b/src/Core/TH.hs
index f455bb1..d700275 100644
--- a/src/Core/TH.hs
+++ b/src/Core/TH.hs
@@ -27,13 +27,16 @@ coreExpr = QuasiQuoter
     , quoteDec = error "core quasiquotes may only be used in expressions"
     }
 
-qCore :: String -> Q Exp
-qCore s = case lexCore s >>= parseCore of
-    Success a -> lift a
-    Error e _ _ -> error e
+qCore = undefined
+qCoreExpr = undefined
 
-qCoreExpr :: String -> Q Exp
-qCoreExpr s = case lexCore s >>= parseCoreExpr of
-    Success a -> lift a
-    Error e _ _ -> error e
+-- qCore :: String -> Q Exp
+-- qCore s = case lexCore s >>= parseCore of
+--     Success a -> lift a
+--     Error e _ _ -> error e
+
+-- qCoreExpr :: String -> Q Exp
+-- qCoreExpr s = case lexCore s >>= parseCoreExpr of
+--     Success a -> lift a
+--     Error e _ _ -> error e
 
diff --git a/src/TIM.hs b/src/TIM.hs
index 5e0ac15..73f846e 100644
--- a/src/TIM.hs
+++ b/src/TIM.hs
@@ -71,7 +71,7 @@ compile prog = Just $ TiState s d h g stats
         s = [mainAddr]
         d = []
         (h,g) = buildInitialHeap defs
-        defs = prog <> corePrelude
+        defs = insertModule corePrelude prog
         stats = Stats 0 0 0
 
         mainAddr = fromJust $ lookup "main" g
@@ -422,91 +422,91 @@ hdbgProg p hio = do
         TiState [resAddr] _ h _ sts = last p'
         res = hLookupUnsafe resAddr h
 
-letrecExample :: Program
-letrecExample = Program
-    [ ScDef "pair" ["x","y","f"] $ "f" :$ "x" :$ "y"
-    , ScDef "fst" ["p"] $ "p" :$ "K"
-    , ScDef "snd" ["p"] $ "p" :$ "K1"
-    , ScDef "f" ["x","y"] $
-        Let Rec
-            [ "a" := "pair" :$ "x" :$ "b"
-            , "b" := "pair" :$ "y" :$ "a"
-            ]
-            ("fst" :$ ("snd" :$ ("snd" :$ ("snd" :$ "a"))))
-    , ScDef "main" [] $ "f" :$ IntE 3 :$ IntE 4
-    ]
+-- letrecExample :: Program
+-- letrecExample = Program
+--     [ ScDef "pair" ["x","y","f"] $ "f" :$ "x" :$ "y"
+--     , ScDef "fst" ["p"] $ "p" :$ "K"
+--     , ScDef "snd" ["p"] $ "p" :$ "K1"
+--     , ScDef "f" ["x","y"] $
+--         Let Rec
+--             [ "a" := "pair" :$ "x" :$ "b"
+--             , "b" := "pair" :$ "y" :$ "a"
+--             ]
+--             ("fst" :$ ("snd" :$ ("snd" :$ ("snd" :$ "a"))))
+--     , ScDef "main" [] $ "f" :$ IntE 3 :$ IntE 4
+--     ]
 
-idExample :: Program
-idExample = Program
-    [ ScDef "main" [] $ "id" :$ IntE 3
-    ]
+-- idExample :: Program
+-- idExample = Program
+--     [ ScDef "main" [] $ "id" :$ IntE 3
+--     ]
 
-indExample1 :: Program
-indExample1 = Program
-    [ ScDef "main" [] $ "twice" :$ "twice" :$ "id" :$ IntE 3
-    ]
+-- indExample1 :: Program
+-- indExample1 = Program
+--     [ ScDef "main" [] $ "twice" :$ "twice" :$ "id" :$ IntE 3
+--     ]
 
-indExample2 :: Program
-indExample2 = Program
-    [ ScDef "main" [] $ "twice" :$ "twice" :$ "twice" :$ "id" :$ IntE 3
-    ]
+-- indExample2 :: Program
+-- indExample2 = Program
+--     [ ScDef "main" [] $ "twice" :$ "twice" :$ "twice" :$ "id" :$ IntE 3
+--     ]
 
-indExample3 :: Program
-indExample3 = Program
-    [ ScDef "main" [] $
-        Let Rec
-            [ "x" := IntE 2
-            , "y" := "f" :$ "x" :$ "x"
-            ]
-            ("g" :$ "y" :$ "y")
-    , ScDef "f" ["a","b"] $ "b"
-    , ScDef "g" ["a","b"] $ "a"
-    ]
+-- indExample3 :: Program
+-- indExample3 = Program
+--     [ ScDef "main" [] $
+--         Let Rec
+--             [ "x" := IntE 2
+--             , "y" := "f" :$ "x" :$ "x"
+--             ]
+--             ("g" :$ "y" :$ "y")
+--     , ScDef "f" ["a","b"] $ "b"
+--     , ScDef "g" ["a","b"] $ "a"
+--     ]
 
-negExample1 :: Program
-negExample1 = Program
-    [ ScDef "main" [] $
-        "negate#" :$ ("id" :$ IntE 3)
-    ]
+-- negExample1 :: Program
+-- negExample1 = Program
+--     [ ScDef "main" [] $
+--         "negate#" :$ ("id" :$ IntE 3)
+--     ]
 
-negExample2 :: Program
-negExample2 = Program
-    [ ScDef "main" [] $
-        "negate#" :$ IntE 3
-    ]
+-- negExample2 :: Program
+-- negExample2 = Program
+--     [ ScDef "main" [] $
+--         "negate#" :$ IntE 3
+--     ]
 
-negExample3 :: Program
-negExample3 = Program
-    [ ScDef "main" [] $
-        "twice" :$ "negate#" :$ IntE 3
-    ]
+-- negExample3 :: Program
+-- negExample3 = Program
+--     [ ScDef "main" [] $
+--         "twice" :$ "negate#" :$ IntE 3
+--     ]
 
-arithExample1 :: Program
-arithExample1 = Program
-    [ ScDef "main" [] $
-        "+#" :$ (IntE 3) :$ ("negate#" :$ (IntE 2))
-    ]
+-- arithExample1 :: Program
+-- arithExample1 = Program
+--     [ ScDef "main" [] $
+--         "+#" :$ (IntE 3) :$ ("negate#" :$ (IntE 2))
+--     ]
 
-arithExample2 :: Program
-arithExample2 = Program
-    [ ScDef "main" [] $
-        "negate#" :$ ("+#" :$ (IntE 2) :$ ("*#" :$ IntE 5 :$ IntE 3))
-    ]
+-- arithExample2 :: Program
+-- arithExample2 = Program
+--     [ ScDef "main" [] $
+--         "negate#" :$ ("+#" :$ (IntE 2) :$ ("*#" :$ IntE 5 :$ IntE 3))
+--     ]
 
-ifExample :: Program
-ifExample = Program
-    [ ScDef "main" [] $
-        "if#" :$ "True" :$ IntE 2 :$ IntE 3
-    ]
+-- ifExample :: Program
+-- ifExample = Program
+--     [ ScDef "main" [] $
+--         "if#" :$ "True" :$ IntE 2 :$ IntE 3
+--     ]
 
-facExample :: Program
-facExample = Program
-    [ ScDef "fac" ["n"] $
-        "if#" :$ ("==#" :$ "n" :$ IntE 0)
-        :$ (IntE 1)
-        :$ ("*#" :$ "n" :$ ("fac" :$ ("-#" :$ "n" :$ IntE 1)))
-    , ScDef "main" [] $ "fac" :$ IntE 3
-    ]
+-- facExample :: Program
+-- facExample = Program
+--     [ ScDef "fac" ["n"] $
+--         "if#" :$ ("==#" :$ "n" :$ IntE 0)
+--         :$ (IntE 1)
+--         :$ ("*#" :$ "n" :$ ("fac" :$ ("-#" :$ "n" :$ IntE 1)))
+--     , ScDef "main" [] $ "fac" :$ IntE 3
+--     ]
 
 ----------------------------------------------------------------------------------
 

From 8694ff2307c514214593270c344067bbcc63522c Mon Sep 17 00:00:00 2001
From: crumbtoo <crumb@disroot.org>
Date: Fri, 17 Nov 2023 16:42:06 -0700
Subject: [PATCH 3/7] i hate programming i hate computers i am going to drop
 out i am going to abandon this project

---
 src/Core/Lex.x | 270 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 219 insertions(+), 51 deletions(-)

diff --git a/src/Core/Lex.x b/src/Core/Lex.x
index d5cfa67..6ebb0ae 100644
--- a/src/Core/Lex.x
+++ b/src/Core/Lex.x
@@ -1,56 +1,127 @@
 {
+{-# LANGUAGE TemplateHaskell #-}
 module Core.Lex
-    ( CoreToken(..)
-    , lexCore
+    ( lexCore
+    , lexCore'
+    , CoreToken(..)
     )
     where
-
+import Data.Char (chr)
+import Debug.Trace
 import Core.Syntax
 import Lens.Micro
+import Lens.Micro.TH
 }
 
 %wrapper "monadUserState"
 
-$digit = 0-9
-$alpha = [a-zA-Z]
+$whitechar = [ \t\n\r\f\v]
+$special   = [\(\)\,\;\[\]\{\}]
 
-$special = [\*\^\%\$#@!\<\>\+\-\=\/&\|\\\.]
+$ascdigit  = 0-9
+$unidigit  = [] -- TODO
+$digit     = [$ascdigit $unidigit]
 
-$nameTail = [ $alpha $digit \_ \' ]
+$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
+$unisymbol = [] -- TODO
+$symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
+
+$large     = [A-Z \xc0-\xd6 \xd8-\xde]
+$small     = [a-z \xdf-\xf6 \xf8-\xff \_]
+$alpha     = [$small $large]
+
+$graphic   = [$small $large $symbol $digit $special \:\"\']
+
+$octit     = 0-7
+$hexit     = [0-9 A-F a-f]
+$namechar  = [$alpha $digit \']
+$symchar   = [$symbol \:]
+$nl        = [\n\r]
+$white_no_nl = $white # $nl
+
+@reservedid = 
+    case|data|do|import|in|let|letrec|module|of|where
+
+@reservedop =
+    "=" | \\ | "->"
+
+@varname   = $small $namechar*
+@conname   = $large $namechar*
+@varsym    = $symbol $symchar*
+@consym    = \: $symchar*
+
+@decimal = $digit+
 
 rlp :-
 
--- tokens :-
---     $white+                 ;
---     "--" ~$special .*       ;
---     module                  { const TokenModule }
---     where                   { const TokenWhere }
---     let                     { const TokenLet }
---     letrec                  { const TokenLetrec }
---     in                      { const TokenIn }
---     case                    { const TokenCase }
---     of                      { const TokenOf }
---     $digit+                 { TokenLitInt . read @Int }
---     ","                     { const TokenComma }
---     "("                     { const TokenLParen }
---     ")"                     { const TokenRParen }
---     "{"                     { const TokenLBrace }
---     "}"                     { const TokenRBrace }
---     "\\"                    { const TokenLambda }
---     "λ"                     { const TokenLambda }
---     ";"                     { const TokenSemicolon }
---     $special+               { lexSym }
---     $alpha $nameTail*       { TokenName }
+-- everywhere: skip whitespace
+$white_no_nl+           { skip }
 
-<0> \n {begin bol}
+"--"\-*[^$symbol].*     { skip }
+
+"{-"                        { nestedComment }
+
+-- syntactic symbols
+<0>
+{
+    "("                     { constTok TokenLParen }
+    ")"                     { constTok TokenRParen }
+    "{"                     { lbrace }
+    "}"                     { rbrace }
+    ";"                     { constTok TokenSemicolon }
+    ","                     { constTok TokenComma }
+}
+
+-- keywords
+-- see commentary on the layout system
+<0>
+{
+    "module"                { constTok TokenModule }
+    "let"                   { constTok TokenLet `andBegin` layout }
+    "letrec"                { constTok TokenLet `andBegin` layout }
+    "case"                  { constTok TokenCase }
+    "of"                    { constTok TokenOf `andBegin` layout }
+    "in"                    { constTok TokenIn }
+    "where"                 { constTok TokenWhere }
+}
+
+-- reserved symbols
+<0>
+{
+    "="                     { constTok TokenEquals }
+}
+
+-- identifiers
+<0>
+{
+    -- TODO: qualified names
+    @varname                { lexWith TokenVarName }
+    @conname                { lexWith TokenConName }
+    @varsym                 { lexWith TokenVarSym }
+}
+
+<0> \n                      { begin bol }
 
 <bol>
 {
-    \n                                    ;
-    ()                                    { doBOL }
+    \n                      { skip }
+    ()                      { doBOL }
+}
+
+<layout>
+{
+    -- TODO: does not respect comments nor pragmas
+    \{                      { doLayoutBrace }
+    \n                      { skip }
+    ()                      { newLayoutContext }
 }
 
 {
+data Located a = Located AlexPosn a
+    deriving Show
+
+constTok :: t -> AlexInput -> Int -> Alex (Located t)
+constTok t (p,_,_,_) _ = pure $ Located p t
 
 data CoreToken = TokenLet
                | TokenLetrec
@@ -63,45 +134,142 @@ data CoreToken = TokenLet
                | TokenLambda
                | TokenArrow
                | TokenLitInt Int
-               | TokenName Name
-               | TokenSym Name
+               | TokenVarName Name
+               | TokenConName Name
+               | TokenName Name -- temp
+               | TokenVarSym Name
+               | TokenConSym Name
+               | TokenSym Name -- temp
                | TokenEquals
                | TokenLParen
                | TokenRParen
                | TokenLBrace
                | TokenRBrace
                | TokenSemicolon
+               | TokenEOF
                deriving Show
 
-data LayoutContext = NoLayout
-                   | Layout Int
+data LayoutContext = Layout Int
+                   | NoLayout
 
 data AlexUserState = AlexUserState
     { _ausContext :: [LayoutContext]
+    , _ausStack   :: [Int]
     }
 
 ausContext :: Lens' AlexUserState [LayoutContext]
-ausContext = lens _ausContext (\s b -> s { _ausContext = b })
+ausContext f (AlexUserState ctx stk)
+  = fmap
+      (\a -> AlexUserState a stk) (f ctx)
+{-# INLINE ausContext #-}
 
-alexInitUserState = AlexUserState
-    { _ausContext = []
-    }
+ausStack :: Lens' AlexUserState [Int]
+ausStack f (AlexUserState ctx stk)
+  = fmap
+      (\a -> AlexUserState ctx a) (f stk)
+{-# INLINE ausStack #-}
 
--- lexCore :: String -> [CoreToken]
-lexCore = alexScanTokens
+pushContext :: LayoutContext -> Alex ()
+pushContext c = do
+    st <- alexGetUserState
+    alexSetUserState $ st { _ausContext = c : _ausContext st }
 
--- lexSym :: String -> CoreToken
--- lexSym "="  = TokenEquals
--- lexSym "\\" = TokenLambda
--- lexSym "->" = TokenArrow
--- lexSym s    = TokenSym s
+popContext :: Alex ()
+popContext = do
+    st <- alexGetUserState
+    alexSetUserState $ st { _ausContext = drop 1 (_ausContext st) }
 
-lexSym = undefined
+getContext :: Alex [LayoutContext]
+getContext = do
+    st <- alexGetUserState
+    pure $ _ausContext st
 
+type Lexer = AlexInput -> Int -> Alex (Located CoreToken)
+
+alexEOF :: Alex (Located CoreToken)
+alexEOF = Alex $ \ st@(AlexState { alex_pos = p }) -> Right (st, Located p TokenEOF)
+
+alexInitUserState :: AlexUserState
+alexInitUserState = AlexUserState [] [bol,0]
+
+nestedComment :: Lexer
+nestedComment _ _ = undefined
+
+lexStream :: Alex [Located CoreToken]
+lexStream = do
+    l <- alexMonadScan
+    case l of
+        Located _ TokenEOF  -> pure [l]
+        _                   -> (l:) <$> lexStream
+
+lexCore :: String -> Either String [Located CoreToken]
+lexCore s = runAlex s lexStream
+
+lexCore' :: String -> Either String [CoreToken]
+lexCore' s = fmap f <$> lexCore s
+    where f (Located _ t) = t
+
+lexWith :: (String -> CoreToken) -> Lexer
+lexWith f (p,_,_,s) l = pure $ Located p (f $ take l s)
+
+lexToken :: Alex (Located CoreToken)
+lexToken = alexMonadScan
+
+getSrcCol :: Alex Int
+getSrcCol = Alex $ \ st ->
+    let AlexPn _ _ col = alex_pos st
+    in Right (st, col)
+
+doLayoutBrace :: Lexer
+doLayoutBrace (p,_,_,s) _ = undefined
+
+lbrace :: Lexer
+lbrace (p,_,_,_) _ = do
+    pushContext NoLayout
+    pure $ Located p TokenLBrace
+
+rbrace :: Lexer
+rbrace (p,_,_,_) _ = do
+    popContext
+    pure $ Located p TokenRBrace
+
+setLexState :: Int -> Alex ()
+setLexState n = Alex $ \st -> Right (st { alex_scd = n }, ())
+
+modifyUst :: (AlexUserState -> AlexUserState) -> Alex ()
+modifyUst f = do
+    st <- alexGetUserState
+    alexSetUserState $ f st
+
+getUst :: Alex AlexUserState
+getUst = alexGetUserState
+
+pushLexState :: Int -> Alex ()
+pushLexState n = modifyUst (ausStack %~ (n:)) *> setLexState n
+
+popLexState :: Alex Int
+popLexState = do
+    modifyUst (ausStack %~ drop 1)
+    ust <- getUst
+    let s = case ust ^. ausStack of
+            (a:_) -> a
+            _     -> 0
+    setLexState s
+    pure s
+
+newLayoutContext :: Lexer
+newLayoutContext (p,_,_,_) _ = do
+    _ <- popLexState
+    ctx <- getContext
+    off <- getSrcCol
+    case ctx of
+        Layout prev : _ | off <= prev
+            -> error $ show prev
+        _ -> do
+            pushContext $ Layout off
+            pure $ Located p TokenLBrace
+
+doBOL :: Lexer
 doBOL = undefined
 
-alexEOF = undefined
-
-alexScanTokens = undefined
-
 }

From 48aa05caad5c6281f525668f0f9bc71494d596da Mon Sep 17 00:00:00 2001
From: crumbtoo <crumb@disroot.org>
Date: Mon, 20 Nov 2023 11:17:27 -0700
Subject: [PATCH 4/7] some writing on layouts

---
 docs/src/commentary/layout-lexing.rst | 169 +++++++++++++++++++++++++-
 docs/src/glossary.rst                 |  15 +++
 docs/src/index.rst                    |   6 +
 3 files changed, 188 insertions(+), 2 deletions(-)
 create mode 100644 docs/src/glossary.rst

diff --git a/docs/src/commentary/layout-lexing.rst b/docs/src/commentary/layout-lexing.rst
index 5a1f8ab..4c14735 100644
--- a/docs/src/commentary/layout-lexing.rst
+++ b/docs/src/commentary/layout-lexing.rst
@@ -1,3 +1,168 @@
-Parsing and the Layout System
-=============================
+Lexing, Parsing, and Layouts
+============================
 
+The C-style languages of my previous experiences have all had quite trivial
+lexical analysis stages, peaking in complexity when I streamed tokens lazily in
+C. The task of tokenising a C-style language is very simple in description: you
+ignore all whitespace and point out what you recognise. If you don't recognise
+something, check if it's a literal or an identifier. Should it be neither,
+return an error.
+
+On paper, both lexing and parsing a Haskell-like language seem to pose a few
+greater challenges. Listed by ascending intimidation factor, some of the
+potential roadblocks on my mind before making an attempt were:
+
+* Operators; Haskell has not only user-defined infix operators, but user-defined
+  precedence levels and associativities. I recall using an algorithm that looked
+  up infix, prefix, postfix, and even mixfix operators up in a global table to
+  call their appropriate parser (if their precedence was appropriate, also
+  stored in the table). I never modified the table at runtime, however this
+  could be a very nice solution for Haskell.
+
+* Context-sensitive keywords; Haskell allows for some words to be used as identifiers in
+  appropriate contexts, such as :code:`family`, :code:`role`, :code:`as`.
+  Reading a note_ found in `GHC's lexer
+  <https://gitlab.haskell.org/ghc/ghc/-/blob/master/compiler/GHC/Parser/Lexer.x#L1133>`_,
+  it appears that keywords are only considered in bodies for which their use is
+  relevant, e.g. :code:`family` and :code:`role` in type declarations,
+  :code:`as` after :code:`case`; :code:`if`, :code:`then`, and :code:`else` in
+  expressions, etc.
+
+* Whitespace sensitivity; While I was comfortable with the idea of a system
+  similar to Python's INDENT/DEDENT tokens, Haskell seemed to use whitespace to
+  section code in a way that *felt* different.
+
+.. _note: https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/coding-style#2-using-notes
+
+After a bit of thought and research, whitespace sensitivity in the form of
+*layouts* as Haskell and I will refer to them as, are easily the scariest thing
+on this list -- however they are achievable!
+
+A Lexical Primer: Python
+************************
+
+We will compare and contrast with Python's lexical analysis. Much to my dismay,
+Python uses newlines and indentation to separate statements and resolve scope
+instead of the traditional semicolons and braces found in C-style languages (we
+may generally refer to these C-style languages as *explicitly-sectioned*).
+Internally during tokenisation, when the Python lexer begins a new line, they
+compare the indentation of the new line with that of the previous and apply the
+following rules:
+
+1. If the new line has greater indentation than the previous, insert an INDENT
+   token and push the new line's indentation level onto the indentation stack
+   (the stack is initialised with an indentation level of zero).
+
+2. If the new line has lesser indentation than the previous, pop the stack until
+   the top of the stack is greater than the new line's indentation level. A
+   DEDENT token is inserted for each level popped.
+
+3. If the indentation is equal, insert a NEWLINE token to terminate the previous
+   line, and leave it at that!
+
+Parsing Python with the INDENT, DEDENT, and NEWLINE tokens is identical to
+parsing a language with braces and semicolons. This is a solution pretty in line
+with Python's philosophy of the "one correct answer" (TODO: this needs a
+source). In developing our *layout* rules, we will follow in the pattern of
+translating the whitespace-sensitive source language to an explicitly sectioned
+language.
+
+But What About Haskell?
+***********************
+
+We saw that Python, the most notable example of an implicitly sectioned
+language, is pretty simple to lex. Why then am I so afraid of Haskell's layouts?
+To be frank, I'm far less scared after asking myself this -- however there are
+certainly some new complexities that Python needn't concern. Haskell has
+implicit line *continuation*: forms written over multiple lines; indentation
+styles often seen in Haskell are somewhat esoteric compared to Python's
+"s/[{};]//".
+
+.. code-block:: haskell
+
+   -- line continuation
+   something = this is a
+       single expression
+
+   -- an extremely common style found in haskell
+   data Python = Users
+       { are        :: Crying
+       , right      :: About
+       , now        :: Sorry
+       }
+
+   -- another formatting oddity
+   -- note that this is not line contiation!
+   -- `look at`, `this`, and `alignment`
+   -- are all separate expressions!
+   anotherThing = do look at
+                     this
+                     alignment
+
+But enough fear, lets actually think about implementation. Firstly, some
+formality: what do we mean when we say layout? We will define layout as the
+rules we apply to an implicitly-sectioned language in order to yield one that is
+explicitly-sectioned. We will also define indentation of a lexeme as the column
+number of its first character.
+
+Thankfully for us, our entry point is quite clear; layouts only appear after a
+select few keywords, (with a minor exception; TODO: elaborate) being :code:`let`
+(followed by supercombinators), :code:`where` (followed by supercombinators),
+:code:`do` (followed by expressions), and :code:`of` (followed by alternatives)
+(TODO: all of these terms need linked glossary entries). Under this assumption,
+we give the following rule:
+
+1. If a :code:`let`, :code:`where`, :code:`do`, or :code:`of` keyword is not
+   followed by the lexeme :code:`{`, the token :math:`\{n\}` is inserted after
+   the keyword, where :math:`n` is the indentation of the next lexeme if there
+   is one, or 0 if the end of file has been reached.
+
+Henceforth :math:`\{n\}` will denote the token representing the begining of a
+layout; similar in function to a brace, but it stores the indentation level for
+subsequent lines to compare with. We must introduce an additional input to the
+function handling layouts. Obviously, such a function would require the input
+string, but a helpful book-keeping tool which we will make good use of is a
+stack of "layout contexts", describing the current cascade of layouts. Each
+element is either a :code:`NoLayout`, indicating an explicit layout (i.e. the
+programmer inserted semicolons and braces herself) or a :code:`Layout n` where
+:code:`n` is a non-negative integer representing the indentation level of the
+enclosing context.
+
+.. code-block:: haskell
+
+    f x -- layout stack: []
+        = let -- layout keyword; remember indentation of next token
+              y = w * w -- layout stack: [Layout 10]
+              w = x + x
+          in do -- layout keyword; next token is a brace!
+              { -- layout stack: [NoLayout]
+              pure }
+
+In the code seen above, notice that :code:`let` allows for multiple definitions,
+separated by a newline. We accomate for this with a token :math:`\langle n
+\rangle` which compliments :math:`\{n\}` in how it functions as a closing brace
+that stores indentation. We give a rule to describe the source of such a token:
+
+2. When the first lexeme on a line is preceeded by only whitespace a
+   :math:`\langle n \rangle` token is inserted before the lexeme, where
+   :math:`n` is the indentation of the lexeme, provided that it is not, as a
+   consequence of rule 1 or rule 3 (as we'll see), preceded by {n}. 
+
+Lastly, to handle the top level we will initialise the stack with a
+:math:`\{n\}` where :math:`n` is the indentation of the first lexeme.
+
+3. If the first lexeme of a module is not '{' or :code:`module`, then it is
+   preceded by :math:`\{n\}` where :math:`n` is the indentation of the lexeme. 
+
+For a more pedantic description of the layout system, see `chapter 10
+<https://www.haskell.org/onlinereport/haskell2010/haskellch10.html>`_ of the
+2010 Haskell Report, which I **heavily** referenced here.
+
+References
+----------
+
+* `Python's lexical analysis
+  <https://docs.python.org/3/reference/lexical_analysis.html>`_
+
+* `Haskell Syntax Reference
+  <https://www.haskell.org/onlinereport/haskell2010/haskellch10.html>`_
diff --git a/docs/src/glossary.rst b/docs/src/glossary.rst
new file mode 100644
index 0000000..4cb9aaf
--- /dev/null
+++ b/docs/src/glossary.rst
@@ -0,0 +1,15 @@
+Glossary
+========
+
+Haskell and Haskell culture is infamous for using scary mathematical terms for
+simple ideas. Please excuse us, it's really fun :3.
+
+.. glossary::
+
+   supercombinator
+      An expression with no free variables. For most purposes, just think of a
+      top-level definition.
+
+   case alternative
+      An possible match in a case expression (TODO: example)
+
diff --git a/docs/src/index.rst b/docs/src/index.rst
index a0ab572..707ee37 100644
--- a/docs/src/index.rst
+++ b/docs/src/index.rst
@@ -6,6 +6,12 @@ Contents
 
 .. toctree::
    :maxdepth: 2
+   :caption: Index
+
+   glossary.rst
+
+.. toctree::
+   :maxdepth: 1
    :caption: Commentary
    :glob:
 

From 717effc57990a16d3a9f8edb0b58f62219125391 Mon Sep 17 00:00:00 2001
From: crumbtoo <crumb@disroot.org>
Date: Mon, 20 Nov 2023 11:29:16 -0700
Subject: [PATCH 5/7] uh

---
 docs/src/commentary/layout-lexing.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/src/commentary/layout-lexing.rst b/docs/src/commentary/layout-lexing.rst
index 4c14735..b369475 100644
--- a/docs/src/commentary/layout-lexing.rst
+++ b/docs/src/commentary/layout-lexing.rst
@@ -154,7 +154,9 @@ Lastly, to handle the top level we will initialise the stack with a
 3. If the first lexeme of a module is not '{' or :code:`module`, then it is
    preceded by :math:`\{n\}` where :math:`n` is the indentation of the lexeme. 
 
-For a more pedantic description of the layout system, see `chapter 10
+This set of rules is adequete enough to satisfy our basic concerns about line
+continations and layout lists. For a more pedantic description of the layout
+system, see `chapter 10
 <https://www.haskell.org/onlinereport/haskell2010/haskellch10.html>`_ of the
 2010 Haskell Report, which I **heavily** referenced here.
 

From c15e67869ec114f4ae2030d0dd69ace140d964aa Mon Sep 17 00:00:00 2001
From: crumbtoo <crumb@disroot.org>
Date: Mon, 20 Nov 2023 12:51:04 -0700
Subject: [PATCH 6/7] layout lexing is good enough

---
 src/Core/Lex.x | 106 ++++++++++++++++++++++++++-----------------------
 1 file changed, 56 insertions(+), 50 deletions(-)

diff --git a/src/Core/Lex.x b/src/Core/Lex.x
index 6ebb0ae..090dd95 100644
--- a/src/Core/Lex.x
+++ b/src/Core/Lex.x
@@ -76,11 +76,11 @@ $white_no_nl+           { skip }
 -- see commentary on the layout system
 <0>
 {
-    "module"                { constTok TokenModule }
-    "let"                   { constTok TokenLet `andBegin` layout }
-    "letrec"                { constTok TokenLet `andBegin` layout }
+    "let"                   { constTok TokenLet `andBegin` layout_keyword }
+    "letrec"                { constTok TokenLet `andBegin` layout_keyword }
+    "of"                    { constTok TokenOf `andBegin` layout_keyword }
     "case"                  { constTok TokenCase }
-    "of"                    { constTok TokenOf `andBegin` layout }
+    "module"                { constTok TokenModule }
     "in"                    { constTok TokenIn }
     "where"                 { constTok TokenWhere }
 }
@@ -89,6 +89,7 @@ $white_no_nl+           { skip }
 <0>
 {
     "="                     { constTok TokenEquals }
+    "->"                    { constTok TokenArrow }
 }
 
 -- identifiers
@@ -105,15 +106,14 @@ $white_no_nl+           { skip }
 <bol>
 {
     \n                      { skip }
-    ()                      { doBOL }
+    ()                      { doBol `andBegin` 0 }
 }
 
-<layout>
+<layout_keyword>
 {
-    -- TODO: does not respect comments nor pragmas
-    \{                      { doLayoutBrace }
-    \n                      { skip }
-    ()                      { newLayoutContext }
+    $white                  { skip }
+    \{                      { lbrace `andBegin` 0 }
+    ()                      { noBrace `andBegin` 0 }
 }
 
 {
@@ -145,30 +145,26 @@ data CoreToken = TokenLet
                | TokenRParen
                | TokenLBrace
                | TokenRBrace
+               | TokenIndent Int
+               | TokenDedent Int
                | TokenSemicolon
                | TokenEOF
                deriving Show
 
 data LayoutContext = Layout Int
                    | NoLayout
+                   deriving Show
 
 data AlexUserState = AlexUserState
     { _ausContext :: [LayoutContext]
-    , _ausStack   :: [Int]
     }
 
 ausContext :: Lens' AlexUserState [LayoutContext]
-ausContext f (AlexUserState ctx stk)
+ausContext f (AlexUserState ctx)
   = fmap
-      (\a -> AlexUserState a stk) (f ctx)
+      (\a -> AlexUserState a) (f ctx)
 {-# INLINE ausContext #-}
 
-ausStack :: Lens' AlexUserState [Int]
-ausStack f (AlexUserState ctx stk)
-  = fmap
-      (\a -> AlexUserState ctx a) (f stk)
-{-# INLINE ausStack #-}
-
 pushContext :: LayoutContext -> Alex ()
 pushContext c = do
     st <- alexGetUserState
@@ -190,7 +186,7 @@ alexEOF :: Alex (Located CoreToken)
 alexEOF = Alex $ \ st@(AlexState { alex_pos = p }) -> Right (st, Located p TokenEOF)
 
 alexInitUserState :: AlexUserState
-alexInitUserState = AlexUserState [] [bol,0]
+alexInitUserState = AlexUserState [Layout 1]
 
 nestedComment :: Lexer
 nestedComment _ _ = undefined
@@ -203,7 +199,8 @@ lexStream = do
         _                   -> (l:) <$> lexStream
 
 lexCore :: String -> Either String [Located CoreToken]
-lexCore s = runAlex s lexStream
+lexCore s = runAlex s (alexSetStartCode layout_keyword *> lexStream)
+-- temp; does not support module header
 
 lexCore' :: String -> Either String [CoreToken]
 lexCore' s = fmap f <$> lexCore s
@@ -220,9 +217,6 @@ getSrcCol = Alex $ \ st ->
     let AlexPn _ _ col = alex_pos st
     in Right (st, col)
 
-doLayoutBrace :: Lexer
-doLayoutBrace (p,_,_,s) _ = undefined
-
 lbrace :: Lexer
 lbrace (p,_,_,_) _ = do
     pushContext NoLayout
@@ -233,8 +227,14 @@ rbrace (p,_,_,_) _ = do
     popContext
     pure $ Located p TokenRBrace
 
-setLexState :: Int -> Alex ()
-setLexState n = Alex $ \st -> Right (st { alex_scd = n }, ())
+insRBrace :: AlexPosn -> Alex (Located CoreToken)
+insRBrace p = do
+    popContext
+    pure $ Located p TokenRBrace
+
+insSemi  :: AlexPosn -> Alex (Located CoreToken)
+insSemi p = do
+    pure $ Located p TokenSemicolon
 
 modifyUst :: (AlexUserState -> AlexUserState) -> Alex ()
 modifyUst f = do
@@ -244,32 +244,38 @@ modifyUst f = do
 getUst :: Alex AlexUserState
 getUst = alexGetUserState
 
-pushLexState :: Int -> Alex ()
-pushLexState n = modifyUst (ausStack %~ (n:)) *> setLexState n
-
-popLexState :: Alex Int
-popLexState = do
-    modifyUst (ausStack %~ drop 1)
-    ust <- getUst
-    let s = case ust ^. ausStack of
-            (a:_) -> a
-            _     -> 0
-    setLexState s
-    pure s
-
 newLayoutContext :: Lexer
 newLayoutContext (p,_,_,_) _ = do
-    _ <- popLexState
-    ctx <- getContext
-    off <- getSrcCol
-    case ctx of
-        Layout prev : _ | off <= prev
-            -> error $ show prev
-        _ -> do
-            pushContext $ Layout off
-            pure $ Located p TokenLBrace
+    undefined
 
-doBOL :: Lexer
-doBOL = undefined
+noBrace :: Lexer
+noBrace (p,_,_,_) l = do
+    col <- getSrcCol
+    pushContext (Layout col)
+    pure $ Located p TokenLBrace
+
+getOffside :: Alex Ordering
+getOffside = do
+    ctx <- getContext
+    m <- getSrcCol
+    case ctx of
+        Layout n : _ -> pure $ m `compare` n
+        _            -> pure GT
+
+doBol :: Lexer
+doBol (p,c,_,s) l = do
+    off <- getOffside
+    -- traceM $ show (p, c, s)
+    col <- getSrcCol
+    traceM $ show (s, p, col, off)
+    case off of
+        LT      -> insRBrace p
+        EQ      -> insSemi p
+        _       -> lexToken
+
+testTmp :: IO (Either String [CoreToken])
+testTmp = do
+    s <- readFile "/tmp/t.hs"
+    pure $ lexCore' s
 
 }

From 45952ef30ebfc6467cecb7feaaff4e28b7875ddf Mon Sep 17 00:00:00 2001
From: crumbtoo <crumb@disroot.org>
Date: Mon, 20 Nov 2023 14:08:18 -0700
Subject: [PATCH 7/7] good enough. good enough. good enough. good
 enoguh.jjhbjhfsfbsjhdsfjbhsdbjhdfsbhjlbahjfdaojuai

---
 src/Compiler/RLPC.hs | 10 ++++++
 src/Core/Lex.x       | 21 +++++++-----
 src/Core/Parse.y     | 82 ++++++++++++++++++++++++++++++++++----------
 3 files changed, 86 insertions(+), 27 deletions(-)
 create mode 100644 src/Compiler/RLPC.hs

diff --git a/src/Compiler/RLPC.hs b/src/Compiler/RLPC.hs
new file mode 100644
index 0000000..08d991e
--- /dev/null
+++ b/src/Compiler/RLPC.hs
@@ -0,0 +1,10 @@
+{-# LANGUAGE GeneralisedNewtypeDeriving #-}
+module Compiler.RLPC
+    ( RLPC(..)
+    )
+    where
+
+-- TODO: fancy errors
+newtype RLPC a = RLPC { runRLPC :: Either String a }
+    deriving (Functor, Applicative, Monad)
+
diff --git a/src/Core/Lex.x b/src/Core/Lex.x
index 090dd95..fa40921 100644
--- a/src/Core/Lex.x
+++ b/src/Core/Lex.x
@@ -4,6 +4,7 @@ module Core.Lex
     ( lexCore
     , lexCore'
     , CoreToken(..)
+    , lexTmp
     )
     where
 import Data.Char (chr)
@@ -81,7 +82,7 @@ $white_no_nl+           { skip }
     "of"                    { constTok TokenOf `andBegin` layout_keyword }
     "case"                  { constTok TokenCase }
     "module"                { constTok TokenModule }
-    "in"                    { constTok TokenIn }
+    "in"                    { letin }
     "where"                 { constTok TokenWhere }
 }
 
@@ -136,10 +137,8 @@ data CoreToken = TokenLet
                | TokenLitInt Int
                | TokenVarName Name
                | TokenConName Name
-               | TokenName Name -- temp
                | TokenVarSym Name
                | TokenConSym Name
-               | TokenSym Name -- temp
                | TokenEquals
                | TokenLParen
                | TokenRParen
@@ -265,17 +264,21 @@ getOffside = do
 doBol :: Lexer
 doBol (p,c,_,s) l = do
     off <- getOffside
-    -- traceM $ show (p, c, s)
     col <- getSrcCol
-    traceM $ show (s, p, col, off)
     case off of
         LT      -> insRBrace p
         EQ      -> insSemi p
         _       -> lexToken
 
-testTmp :: IO (Either String [CoreToken])
-testTmp = do
-    s <- readFile "/tmp/t.hs"
-    pure $ lexCore' s
+letin :: Lexer
+letin (p,_,_,_) l = do
+    popContext
+    pure $ Located p TokenIn
 
+lexTmp :: IO [CoreToken]
+lexTmp = do
+    s <- readFile "/tmp/t.hs"
+    case lexCore' s of
+        Left e -> error e
+        Right a -> pure a
 }
diff --git a/src/Core/Parse.y b/src/Core/Parse.y
index 4077528..342acf4 100644
--- a/src/Core/Parse.y
+++ b/src/Core/Parse.y
@@ -1,20 +1,24 @@
+-- TODO: resolve shift/reduce conflicts
 {
 module Core.Parse
     ( parseCore
-    -- , parseCoreExpr
+    , parseCoreExpr
     , module Core.Lex -- temp convenience
+    , parseTmp
     )
     where
 
 import Data.Foldable        (foldl')
 import Core.Syntax
 import Core.Lex
+import Compiler.RLPC
 }
 
-%name parseCore
+%name parseCore Module
 %name parseCoreExpr Expr
 %tokentype { CoreToken }
 %error { parseError }
+%monad { RLPC }
 
 %token
       let             { TokenLet }
@@ -24,8 +28,10 @@ import Core.Lex
       ','             { TokenComma }
       in              { TokenIn }
       litint          { TokenLitInt $$ }
-      name            { TokenName $$ }
-      sym             { TokenSym $$ }
+      varname         { TokenVarName $$ }
+      varsym          { TokenVarSym $$ }
+      conname         { TokenConName $$ }
+      consym          { TokenConSym $$ }
       'λ'             { TokenLambda }
       '->'            { TokenArrow }
       '='             { TokenEquals }
@@ -34,23 +40,43 @@ import Core.Lex
       '{'             { TokenLBrace }
       '}'             { TokenRBrace }
       ';'             { TokenSemicolon }
+      eof             { TokenEOF }
 
 %%
 
-ExportList      :: { [Name] }
-ExportList      : '(' Exports ')'               { $2 }
+Module          :: { Module }
+Module          : module conname where Program Eof { Module (Just ($2, [])) $4 }
+                | Program Eof                      { Module Nothing $1 }
 
-Exports         :: { [Name] }
-Exports         : Var ',' Exports               { $1 : $3 }
-                | Var                           { [$1] }
+Eof             :: { () }
+Eof             : eof           { () }
+                | error         { () }
+
+Program         :: { Program }
+Program         : '{' ScDefs Close              { Program $2 }
+
+ScDefs          :: { [ScDef] }
+ScDefs          : ScDef ';' ScDefs              { $1 : $3 }
+                | {- epsilon -}                 { [] }
+
+ScDef           :: { ScDef }
+ScDef           : Var ParList '=' Expr          { ScDef $1 $2 $4 }
+
+ParList         :: { [Name] }
+ParList         : Var ParList                   { $1 : $2 }
+                | {- epsilon -}                 { [] }
 
 Expr            :: { Expr }
-Expr            : let Bindings in Expr          { Let NonRec $2 $4 }
-                | letrec Bindings in Expr       { Let Rec $2 $4 }
+Expr            : let '{' Bindings Close in Expr  { Let NonRec $3 $6 }
+                | letrec '{' Bindings Close in Expr { Let Rec $3 $6 }
                 | 'λ' Binders '->' Expr         { Lam $2 $4 }
                 | Application                   { $1 }
                 | Expr1                         { $1 }
 
+Close           :: { () }
+Close           : '}'                           { () }
+                | error                         { () }
+
 Binders         :: { [Name] }
 Binders         : Var Binders                  { $1 : $2 }
                 | Var                          { [$1] }
@@ -65,18 +91,38 @@ AppArgs         : Expr1 AppArgs                 { $1 : $2 }
 
 Expr1           :: { Expr }
 Expr1           : litint                        { IntE $1 }
-                | Var                           { Var $1 }
+                | Id                            { Var $1 }
                 | '(' Expr ')'                  { $2 }
 
-Var            :: { Name }
-Var            : '(' sym ')'                    { $2 }
-               | name                           { $1 }
-
 Bindings        :: { [Binding] }
-Bindings        : Var '=' Expr                  { [$1 := $3] }
+Bindings        : Binding ';' Bindings          { $1 : $3 }
+                | Binding ';'                   { [$1] }
+                | Binding                       { [$1] }
+
+Binding         :: { Binding }
+Binding         : Var '=' Expr                  { $1 := $3 }
+
+Id              :: { Name }
+Id              : Var                                { $1 }
+                | Con                                { $1 }
+
+Var             :: { Name }
+Var             : '(' varsym ')'                    { $2 }
+                | varname                           { $1 }
+
+Con             :: { Name }
+Con             : '(' consym ')'                    { $2 }
+                | conname                           { $1 }
 
 {
 parseError :: [CoreToken] -> a
-parseError _ = error "fuuckk!"
+parseError ts = error $ "parse error at token: " <> show (head ts)
+
+parseTmp :: IO (Module)
+parseTmp = do
+    s <- readFile "/tmp/t.hs"
+    case lexCore' s >>= runRLPC . parseCore of
+        Left e -> error e
+        Right a -> pure a
 }