From c39a8436607b3ab51487276a19e800ab4a5d3589 Mon Sep 17 00:00:00 2001 From: crumbtoo Date: Wed, 15 Nov 2023 17:38:08 -0700 Subject: [PATCH 1/7] core quasiquoter --- rlp.cabal | 2 ++ src/Control/Parser.hs | 5 +++++ src/Core/Parse.hs | 4 ++++ src/Core/Syntax.hs | 17 +++++++++-------- src/Core/TH.hs | 39 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 8 deletions(-) create mode 100644 src/Core/TH.hs diff --git a/rlp.cabal b/rlp.cabal index e25a5ab..d1d2d71 100644 --- a/rlp.cabal +++ b/rlp.cabal @@ -26,12 +26,14 @@ library , Core.Syntax , Core.Parse , Core.Lex + , Core.TH -- other-extensions: build-depends: base ^>=4.18.0.0 , containers , microlens , microlens-th + , template-haskell hs-source-dirs: src default-language: GHC2021 diff --git a/src/Control/Parser.hs b/src/Control/Parser.hs index 332aeaf..b1b2caa 100644 --- a/src/Control/Parser.hs +++ b/src/Control/Parser.hs @@ -62,6 +62,11 @@ instance (MonadFail m) => MonadFail (ParserT i m) where ---------------------------------------------------------------------------------- +eof :: (MonadPlus m) => ParserT [a] m () +eof = ParserT $ \case + [] -> pure ([], ()) + _ -> empty + -- TODO: generalise to non-lists satisfy :: (MonadPlus m) => (a -> Bool) -> ParserT [a] m a satisfy p = ParserT $ \case diff --git a/src/Core/Parse.hs b/src/Core/Parse.hs index b9f4576..8cb9497 100644 --- a/src/Core/Parse.hs +++ b/src/Core/Parse.hs @@ -1,6 +1,7 @@ {-# LANGUAGE LambdaCase, BlockArguments #-} module Core.Parse ( parseCore + , parseCoreExpr ) where ---------------------------------------------------------------------------------- @@ -15,6 +16,9 @@ type CoreParser = ParserT [CoreToken] Result parseCore :: [CoreToken] -> Result Program parseCore = fmap snd . runParserT program +parseCoreExpr :: [CoreToken] -> Result Expr +parseCoreExpr = fmap snd . runParserT expr + program :: CoreParser Program program = Program <$> termMany (char TokSemicolon) scdef diff --git a/src/Core/Syntax.hs b/src/Core/Syntax.hs index 46c9c8e..b08bf1d 100644 --- a/src/Core/Syntax.hs +++ b/src/Core/Syntax.hs @@ -18,9 +18,10 @@ module Core.Syntax ---------------------------------------------------------------------------------- import Data.Coerce import Data.Pretty -import Data.List (intersperse) -import Data.Function ((&)) +import Data.List (intersperse) +import Data.Function ((&)) import Data.String +import Language.Haskell.TH.Syntax (Lift) ---------------------------------------------------------------------------------- data Expr = Var Name @@ -30,14 +31,14 @@ data Expr = Var Name | Lam [Name] Expr | App Expr Expr | IntE Int - deriving Show + deriving (Show, Lift) infixl 2 :$ pattern (:$) :: Expr -> Expr -> Expr pattern f :$ x = App f x data Binding = Binding Name Expr - deriving Show + deriving (Show, Lift) infixl 1 := pattern (:=) :: Name -> Expr -> Binding @@ -45,18 +46,18 @@ pattern k := v = Binding k v data Rec = Rec | NonRec - deriving (Show, Eq) + deriving (Show, Eq, Lift) data Alter = Alter Int [Name] Expr - deriving Show + deriving (Show, Lift) type Name = String data ScDef = ScDef Name [Name] Expr - deriving (Show) + deriving (Show, Lift) newtype Program = Program [ScDef] - deriving (Show) + deriving (Show, Lift) instance IsString Expr where fromString = Var diff --git a/src/Core/TH.hs b/src/Core/TH.hs new file mode 100644 index 0000000..f455bb1 --- /dev/null +++ b/src/Core/TH.hs @@ -0,0 +1,39 @@ +module Core.TH + ( coreExpr + , core + ) + where +---------------------------------------------------------------------------------- +import Language.Haskell.TH +import Language.Haskell.TH.Syntax +import Language.Haskell.TH.Quote +import Core.Parse +import Core.Lex +---------------------------------------------------------------------------------- + +core :: QuasiQuoter +core = QuasiQuoter + { quoteExp = qCore + , quotePat = error "core quasiquotes may only be used in expressions" + , quoteType = error "core quasiquotes may only be used in expressions" + , quoteDec = error "core quasiquotes may only be used in expressions" + } + +coreExpr :: QuasiQuoter +coreExpr = QuasiQuoter + { quoteExp = qCoreExpr + , quotePat = error "core quasiquotes may only be used in expressions" + , quoteType = error "core quasiquotes may only be used in expressions" + , quoteDec = error "core quasiquotes may only be used in expressions" + } + +qCore :: String -> Q Exp +qCore s = case lexCore s >>= parseCore of + Success a -> lift a + Error e _ _ -> error e + +qCoreExpr :: String -> Q Exp +qCoreExpr s = case lexCore s >>= parseCoreExpr of + Success a -> lift a + Error e _ _ -> error e + From ac55a1908c394cdb7fcc2d5d835e14de0123fa7e Mon Sep 17 00:00:00 2001 From: crumbtoo Date: Thu, 16 Nov 2023 19:11:01 -0700 Subject: [PATCH 2/7] unproductive day i fear --- docs/src/commentary/layout-lexing.rst | 3 + docs/src/commentary/parser.rst | 3 - rlp.cabal | 8 +- src/Control/Parser.hs | 106 ------------------ src/Core/Examples.hs | 22 ++++ src/Core/Lex.hs | 141 ------------------------ src/Core/Lex.x | 107 ++++++++++++++++++ src/Core/Parse.hs | 99 ----------------- src/Core/Parse.y | 82 ++++++++++++++ src/Core/Syntax.hs | 30 ++++-- src/Core/TH.hs | 19 ++-- src/TIM.hs | 150 +++++++++++++------------- 12 files changed, 330 insertions(+), 440 deletions(-) create mode 100644 docs/src/commentary/layout-lexing.rst delete mode 100644 docs/src/commentary/parser.rst delete mode 100644 src/Control/Parser.hs create mode 100644 src/Core/Examples.hs delete mode 100644 src/Core/Lex.hs create mode 100644 src/Core/Lex.x delete mode 100644 src/Core/Parse.hs create mode 100644 src/Core/Parse.y diff --git a/docs/src/commentary/layout-lexing.rst b/docs/src/commentary/layout-lexing.rst new file mode 100644 index 0000000..5a1f8ab --- /dev/null +++ b/docs/src/commentary/layout-lexing.rst @@ -0,0 +1,3 @@ +Parsing and the Layout System +============================= + diff --git a/docs/src/commentary/parser.rst b/docs/src/commentary/parser.rst deleted file mode 100644 index 27d0213..0000000 --- a/docs/src/commentary/parser.rst +++ /dev/null @@ -1,3 +0,0 @@ -Parser Combinators -================== - diff --git a/rlp.cabal b/rlp.cabal index d1d2d71..7b4a2c5 100644 --- a/rlp.cabal +++ b/rlp.cabal @@ -22,11 +22,13 @@ library , TIM other-modules: Data.Heap , Data.Pretty - , Control.Parser , Core.Syntax , Core.Parse - , Core.Lex , Core.TH + , Core.Examples + , Core.Lex + + build-tool-depends: happy:happy, alex:alex -- other-extensions: build-depends: base ^>=4.18.0.0 @@ -34,6 +36,8 @@ library , microlens , microlens-th , template-haskell + -- required for happy + , array hs-source-dirs: src default-language: GHC2021 diff --git a/src/Control/Parser.hs b/src/Control/Parser.hs deleted file mode 100644 index b1b2caa..0000000 --- a/src/Control/Parser.hs +++ /dev/null @@ -1,106 +0,0 @@ -{-| -Module : Control.Parser -Description : Parser combinators - -This module implements an interface for parser *types*, used in lexical analysis -and parsing. For the implementation of the rlp language's parser, see 'Parse'. --} -{-# LANGUAGE GeneralisedNewtypeDeriving #-} -{-# LANGUAGE BlockArguments, LambdaCase #-} -module Control.Parser - ( ParserT - , runParserT - - , satisfy - , char - , spaces - , surround - , string - , match - , termMany - , sepSome - - -- * Control.Applicative re-exports - , (<|>) - , many - , some - , empty - ) - where ----------------------------------------------------------------------------------- -import Control.Applicative -import Control.Arrow ((***)) -import Control.Monad -import Data.Char ----------------------------------------------------------------------------------- - -newtype ParserT i m o = ParserT { runParserT :: i -> m (i, o) } - deriving (Functor) - -instance (Monad m) => Applicative (ParserT i m) where - pure a = ParserT \i -> pure (i, a) - - m <*> k = ParserT \i -> do - (i',f) <- runParserT m i - fmap (id *** f) $ runParserT k i' - -instance (MonadPlus m) => Alternative (ParserT i m) where - empty = ParserT $ const empty - - ParserT m <|> ParserT k = ParserT $ \i -> - m i <|> k i - -instance (MonadPlus m) => MonadPlus (ParserT i m) - -instance (Monad m) => Monad (ParserT i m) where - m >>= k = ParserT $ \i -> do - (i',a) <- runParserT m i - runParserT (k a) i' - -instance (MonadFail m) => MonadFail (ParserT i m) where - fail s = ParserT $ \i -> fail s - ----------------------------------------------------------------------------------- - -eof :: (MonadPlus m) => ParserT [a] m () -eof = ParserT $ \case - [] -> pure ([], ()) - _ -> empty - --- TODO: generalise to non-lists -satisfy :: (MonadPlus m) => (a -> Bool) -> ParserT [a] m a -satisfy p = ParserT $ \case - (x:xs) | p x -> pure (xs,x) - _ -> empty - -match :: (MonadPlus m) => (a -> Maybe b) -> ParserT [a] m b -match f = ParserT $ \case - (x:xs) -> case f x of - Just b -> pure (xs,b) - Nothing -> empty - [] -> empty - -termMany :: (MonadPlus m) => ParserT i m t -> ParserT i m o -> ParserT i m [o] -termMany t a = many (a <* t) - -sepSome :: (MonadPlus m) => ParserT i m t -> ParserT i m o -> ParserT i m [o] -sepSome s a = (:) <$> a <*> many (s *> a) - -char :: (MonadPlus m, Eq a) => a -> ParserT [a] m a -char c = satisfy (==c) - -string :: (MonadPlus m, Eq a) => [a] -> ParserT [a] m [a] -string s = sequenceA $ char <$> s - ----------------------------------------------------------------------------------- - -surround :: (MonadPlus m) - => ParserT i m l - -> ParserT i m r - -> ParserT i m c - -> ParserT i m c -surround l r c = l *> c <* r - -spaces :: (MonadPlus m) => ParserT String m Int -spaces = length <$> many (satisfy (==' ')) - diff --git a/src/Core/Examples.hs b/src/Core/Examples.hs new file mode 100644 index 0000000..51a1461 --- /dev/null +++ b/src/Core/Examples.hs @@ -0,0 +1,22 @@ +{-# LANGUAGE QuasiQuotes #-} +module Core.Examples where +---------------------------------------------------------------------------------- +import Core.Syntax +import Core.TH +---------------------------------------------------------------------------------- + +{- + +letrecExample :: Program +letrecExample = [core| +pair x y f = f x y; +fst p = p k; +snd p = p k1; +f x y = letrec + { a = pair x b; + ; b = pair y a + } in fst (snd (snd (snd a))); +main = f 3 4; +|] + +-} diff --git a/src/Core/Lex.hs b/src/Core/Lex.hs deleted file mode 100644 index 2dca505..0000000 --- a/src/Core/Lex.hs +++ /dev/null @@ -1,141 +0,0 @@ -{-| -Module : Core.Lex -Description : Core language lexer --} -module Core.Lex - ( CoreToken(..) - , Result(..) - , lexCore - ) - where ----------------------------------------------------------------------------------- -import Control.Parser -import Control.Applicative -import Control.Monad -import Data.Char -import Data.Functor -import Core.Syntax (Name) ----------------------------------------------------------------------------------- - -type CoreLexer = ParserT String Result - -data Result a = Success a - | Error String Int Int - deriving (Show) - --- TODO: whitespace-sensitive layout -data CoreToken = TokLitInt Int - | TokEquals - | TokLBrace - | TokRBrace - | TokSemicolon - | TokLParen - | TokRParen - | TokLambda - | TokArrow - | TokCase - | TokOf - | TokLet - | TokLetRec - | TokIn - | TokCName Name - | TokName Name - deriving (Show, Eq) - -instance Functor Result where - fmap f (Success a) = Success (f a) - fmap _ (Error s l c) = Error s l c - -instance Foldable Result where - foldr f z (Success a) = a `f` z - foldr _ z (Error _ _ _) = z - -instance Traversable Result where - traverse k (Success a) = fmap Success (k a) - traverse _ (Error s l c) = pure $ Error s l c - -instance Applicative Result where - pure = Success - - liftA2 f (Success a) (Success b) = Success $ f a b - liftA2 _ (Error s l c) _ = Error s l c - liftA2 _ _ (Error s l c) = Error s l c - -instance Alternative Result where - empty = Error "some error! this is a temporary system lol" 0 0 - - (Success a) <|> _ = Success a - _ <|> b = b - -instance Monad Result where - Success a >>= k = k a - Error s l c >>= _ = Error s l c - -instance MonadPlus Result - -instance MonadFail Result where - fail s = Error s 0 0 - ----------------------------------------------------------------------------------- - -lexCore :: String -> Result [CoreToken] -lexCore = fmap snd . runParserT (many (token <* spaces)) - -token :: CoreLexer CoreToken -token = litInt - <|> lbrace - <|> rbrace - <|> semicolon - <|> lparen - <|> rparen - <|> equals - <|> lambda - <|> arrow - <|> _case - <|> _of - <|> letrec - <|> _let - <|> _in - <|> cName - <|> name - ----------------------------------------------------------------------------------- - -litInt, equals, lparen, rparen, lambda, - arrow, _case, _of, _let, letrec, _in, cName, name :: CoreLexer CoreToken - -litInt = TokLitInt . value <$> some (satisfy isDigit) - where - value = foldl (\acc a -> 10*acc + digitToInt a) 0 - -semicolon = (semis <|> nls) $> TokSemicolon - where - nls = head <$> some (char '\n') - semis = char ';' <* many (char '\n') -equals = char '=' $> TokEquals -lbrace = char '{' $> TokLBrace -rbrace = char '}' $> TokRBrace -lparen = char '(' $> TokLParen -rparen = char ')' $> TokRParen -lambda = (char '\\' <|> char 'λ') $> TokLambda -arrow = string "->" $> TokArrow -_case = string "case" $> TokCase -_of = string "of" $> TokOf -_let = string "let" $> TokLet -letrec = string "letrec" $> TokLetRec -_in = string "in" $> TokIn - -cName = TokCName <$> ((:) <$> cNameHead <*> properNameTail) - where cNameHead = satisfy isUpper - -name = some (satisfy p) <&> TokName - where p c = not (isSpace c) && c `notElem` ";{}()" - -properName :: CoreLexer Name -properName = (:) <$> nameHead <*> properNameTail - where nameHead = satisfy isLetter - -properNameTail :: CoreLexer Name -properNameTail = many . satisfy $ \c -> - isLetter c || isDigit c || c == '_' - diff --git a/src/Core/Lex.x b/src/Core/Lex.x new file mode 100644 index 0000000..d5cfa67 --- /dev/null +++ b/src/Core/Lex.x @@ -0,0 +1,107 @@ +{ +module Core.Lex + ( CoreToken(..) + , lexCore + ) + where + +import Core.Syntax +import Lens.Micro +} + +%wrapper "monadUserState" + +$digit = 0-9 +$alpha = [a-zA-Z] + +$special = [\*\^\%\$#@!\<\>\+\-\=\/&\|\\\.] + +$nameTail = [ $alpha $digit \_ \' ] + +rlp :- + +-- tokens :- +-- $white+ ; +-- "--" ~$special .* ; +-- module { const TokenModule } +-- where { const TokenWhere } +-- let { const TokenLet } +-- letrec { const TokenLetrec } +-- in { const TokenIn } +-- case { const TokenCase } +-- of { const TokenOf } +-- $digit+ { TokenLitInt . read @Int } +-- "," { const TokenComma } +-- "(" { const TokenLParen } +-- ")" { const TokenRParen } +-- "{" { const TokenLBrace } +-- "}" { const TokenRBrace } +-- "\\" { const TokenLambda } +-- "λ" { const TokenLambda } +-- ";" { const TokenSemicolon } +-- $special+ { lexSym } +-- $alpha $nameTail* { TokenName } + +<0> \n {begin bol} + + +{ + \n ; + () { doBOL } +} + +{ + +data CoreToken = TokenLet + | TokenLetrec + | TokenIn + | TokenModule + | TokenWhere + | TokenComma + | TokenCase + | TokenOf + | TokenLambda + | TokenArrow + | TokenLitInt Int + | TokenName Name + | TokenSym Name + | TokenEquals + | TokenLParen + | TokenRParen + | TokenLBrace + | TokenRBrace + | TokenSemicolon + deriving Show + +data LayoutContext = NoLayout + | Layout Int + +data AlexUserState = AlexUserState + { _ausContext :: [LayoutContext] + } + +ausContext :: Lens' AlexUserState [LayoutContext] +ausContext = lens _ausContext (\s b -> s { _ausContext = b }) + +alexInitUserState = AlexUserState + { _ausContext = [] + } + +-- lexCore :: String -> [CoreToken] +lexCore = alexScanTokens + +-- lexSym :: String -> CoreToken +-- lexSym "=" = TokenEquals +-- lexSym "\\" = TokenLambda +-- lexSym "->" = TokenArrow +-- lexSym s = TokenSym s + +lexSym = undefined + +doBOL = undefined + +alexEOF = undefined + +alexScanTokens = undefined + +} diff --git a/src/Core/Parse.hs b/src/Core/Parse.hs deleted file mode 100644 index 8cb9497..0000000 --- a/src/Core/Parse.hs +++ /dev/null @@ -1,99 +0,0 @@ -{-# LANGUAGE LambdaCase, BlockArguments #-} -module Core.Parse - ( parseCore - , parseCoreExpr - ) - where ----------------------------------------------------------------------------------- -import Control.Parser -import Data.Functor ((<&>), ($>)) -import Core.Lex -import Core.Syntax ----------------------------------------------------------------------------------- - -type CoreParser = ParserT [CoreToken] Result - -parseCore :: [CoreToken] -> Result Program -parseCore = fmap snd . runParserT program - -parseCoreExpr :: [CoreToken] -> Result Expr -parseCoreExpr = fmap snd . runParserT expr - -program :: CoreParser Program -program = Program <$> termMany (char TokSemicolon) scdef - -scdef :: CoreParser ScDef -scdef = ScDef <$> f <*> (xs <* eq) <*> body - where - f = name - xs = many name - eq = char TokEquals - body = expr - -expr :: CoreParser Expr -expr = letE - <|> app - <|> lam - <|> atom - -atom :: CoreParser Expr -atom = var - <|> con - <|> parenE - <|> lit - where - var = Var <$> name - parenE = surround (char TokLParen) (char TokRParen) expr - lit = IntE <$> litInt - -lam :: CoreParser Expr -lam = Lam <$> (l *> bs) <*> (arrow *> expr) - where - l = char TokLambda - arrow = char TokArrow - bs = some name - -app :: CoreParser Expr -app = foldl App <$> atom <*> some atom - -con :: CoreParser Expr -con = pack *> (Con <$> (l *> tag) <*> (arity <* r)) - where - l = char TokLBrace - r = char TokRBrace - tag = litInt - arity = litInt - pack = match \case - TokCName "Pack" -> Just () - _ -> Nothing - -letE :: CoreParser Expr -letE = Let <$> word <*> defs <*> (char TokIn *> expr) - where - word = char TokLet $> NonRec - <|> char TokLetRec $> Rec - defs = surround (char TokLBrace) (char TokRBrace) bindings - -bindings :: CoreParser [Binding] -bindings = sepSome (char TokSemicolon) binding - -binding :: CoreParser Binding -binding = Binding <$> name <*> (char TokEquals *> expr) - ----------------------------------------------------------------------------------- - -name :: CoreParser Name -name = match \case - TokName n -> Just n - _ -> Nothing - -cName :: CoreParser Name -cName = match \case - TokCName n -> Just n - _ -> Nothing - -litInt :: CoreParser Int -litInt = match \case - TokLitInt n -> Just n - _ -> Nothing - diff --git a/src/Core/Parse.y b/src/Core/Parse.y new file mode 100644 index 0000000..4077528 --- /dev/null +++ b/src/Core/Parse.y @@ -0,0 +1,82 @@ +{ +module Core.Parse + ( parseCore + -- , parseCoreExpr + , module Core.Lex -- temp convenience + ) + where + +import Data.Foldable (foldl') +import Core.Syntax +import Core.Lex +} + +%name parseCore +%name parseCoreExpr Expr +%tokentype { CoreToken } +%error { parseError } + +%token + let { TokenLet } + letrec { TokenLetrec } + module { TokenModule } + where { TokenWhere } + ',' { TokenComma } + in { TokenIn } + litint { TokenLitInt $$ } + name { TokenName $$ } + sym { TokenSym $$ } + 'λ' { TokenLambda } + '->' { TokenArrow } + '=' { TokenEquals } + '(' { TokenLParen } + ')' { TokenRParen } + '{' { TokenLBrace } + '}' { TokenRBrace } + ';' { TokenSemicolon } + +%% + +ExportList :: { [Name] } +ExportList : '(' Exports ')' { $2 } + +Exports :: { [Name] } +Exports : Var ',' Exports { $1 : $3 } + | Var { [$1] } + +Expr :: { Expr } +Expr : let Bindings in Expr { Let NonRec $2 $4 } + | letrec Bindings in Expr { Let Rec $2 $4 } + | 'λ' Binders '->' Expr { Lam $2 $4 } + | Application { $1 } + | Expr1 { $1 } + +Binders :: { [Name] } +Binders : Var Binders { $1 : $2 } + | Var { [$1] } + +Application :: { Expr } +Application : Expr1 AppArgs { foldl' App $1 $2 } + +-- TODO: Application can probably be written as a single rule, without AppArgs +AppArgs :: { [Expr] } +AppArgs : Expr1 AppArgs { $1 : $2 } + | Expr1 { [$1] } + +Expr1 :: { Expr } +Expr1 : litint { IntE $1 } + | Var { Var $1 } + | '(' Expr ')' { $2 } + +Var :: { Name } +Var : '(' sym ')' { $2 } + | name { $1 } + +Bindings :: { [Binding] } +Bindings : Var '=' Expr { [$1 := $3] } + +{ +parseError :: [CoreToken] -> a +parseError _ = error "fuuckk!" +} + diff --git a/src/Core/Syntax.hs b/src/Core/Syntax.hs index b08bf1d..cc4aa86 100644 --- a/src/Core/Syntax.hs +++ b/src/Core/Syntax.hs @@ -8,11 +8,13 @@ module Core.Syntax , Alter(..) , Name , ScDef(..) + , Module(..) , Program(..) , corePrelude , bindersOf , rhssOf , isAtomic + , insertModule ) where ---------------------------------------------------------------------------------- @@ -56,6 +58,9 @@ type Name = String data ScDef = ScDef Name [Name] Expr deriving (Show, Lift) +data Module = Module (Maybe (Name, [Name])) Program + deriving (Show, Lift) + newtype Program = Program [ScDef] deriving (Show, Lift) @@ -64,6 +69,15 @@ instance IsString Expr where ---------------------------------------------------------------------------------- +instance Pretty Program where + -- TODO: module header + prettyPrec (Program ss) _ = mconcat $ intersperse "\n\n" $ fmap pretty ss + +instance Pretty ScDef where + prettyPrec (ScDef n as e) _ = + mconcat (intersperse " " $ fmap IStr (n:as)) + <> " = " <> pretty e <> IBreak + instance Pretty Expr where prettyPrec (Var k) = withPrec maxBound $ IStr k prettyPrec (IntE n) = withPrec maxBound $ iShow n @@ -106,7 +120,7 @@ instance Pretty Binding where ---------------------------------------------------------------------------------- instance Semigroup Program where - (<>) = coerce $ (++) @ScDef + (<>) = coerce $ (<>) @[ScDef] instance Monoid Program where mempty = Program [] @@ -125,15 +139,19 @@ isAtomic _ = False ---------------------------------------------------------------------------------- -corePrelude :: Program -corePrelude = Program +corePrelude :: Module +corePrelude = Module (Just ("Prelude", [])) $ Program [ ScDef "id" ["x"] (Var "x") - , ScDef "K" ["x", "y"] (Var "x") - , ScDef "K1" ["x", "y"] (Var "y") - , ScDef "S" ["f", "g", "x"] (Var "f" :$ Var "x" :$ (Var "g" :$ Var "x")) + , ScDef "k" ["x", "y"] (Var "x") + , ScDef "k1" ["x", "y"] (Var "y") + , ScDef "succ" ["f", "g", "x"] (Var "f" :$ Var "x" :$ (Var "g" :$ Var "x")) , ScDef "compose" ["f", "g", "x"] (Var "f" :$ (Var "g" :$ Var "x")) , ScDef "twice" ["f", "x"] (Var "f" :$ (Var "f" :$ Var "x")) , ScDef "False" [] $ Con 0 0 , ScDef "True" [] $ Con 1 0 ] +-- TODO: export list awareness +insertModule :: Module -> Program -> Program +insertModule (Module _ m) p = p <> m + diff --git a/src/Core/TH.hs b/src/Core/TH.hs index f455bb1..d700275 100644 --- a/src/Core/TH.hs +++ b/src/Core/TH.hs @@ -27,13 +27,16 @@ coreExpr = QuasiQuoter , quoteDec = error "core quasiquotes may only be used in expressions" } -qCore :: String -> Q Exp -qCore s = case lexCore s >>= parseCore of - Success a -> lift a - Error e _ _ -> error e +qCore = undefined +qCoreExpr = undefined -qCoreExpr :: String -> Q Exp -qCoreExpr s = case lexCore s >>= parseCoreExpr of - Success a -> lift a - Error e _ _ -> error e +-- qCore :: String -> Q Exp +-- qCore s = case lexCore s >>= parseCore of +-- Success a -> lift a +-- Error e _ _ -> error e + +-- qCoreExpr :: String -> Q Exp +-- qCoreExpr s = case lexCore s >>= parseCoreExpr of +-- Success a -> lift a +-- Error e _ _ -> error e diff --git a/src/TIM.hs b/src/TIM.hs index 5e0ac15..73f846e 100644 --- a/src/TIM.hs +++ b/src/TIM.hs @@ -71,7 +71,7 @@ compile prog = Just $ TiState s d h g stats s = [mainAddr] d = [] (h,g) = buildInitialHeap defs - defs = prog <> corePrelude + defs = insertModule corePrelude prog stats = Stats 0 0 0 mainAddr = fromJust $ lookup "main" g @@ -422,91 +422,91 @@ hdbgProg p hio = do TiState [resAddr] _ h _ sts = last p' res = hLookupUnsafe resAddr h -letrecExample :: Program -letrecExample = Program - [ ScDef "pair" ["x","y","f"] $ "f" :$ "x" :$ "y" - , ScDef "fst" ["p"] $ "p" :$ "K" - , ScDef "snd" ["p"] $ "p" :$ "K1" - , ScDef "f" ["x","y"] $ - Let Rec - [ "a" := "pair" :$ "x" :$ "b" - , "b" := "pair" :$ "y" :$ "a" - ] - ("fst" :$ ("snd" :$ ("snd" :$ ("snd" :$ "a")))) - , ScDef "main" [] $ "f" :$ IntE 3 :$ IntE 4 - ] +-- letrecExample :: Program +-- letrecExample = Program +-- [ ScDef "pair" ["x","y","f"] $ "f" :$ "x" :$ "y" +-- , ScDef "fst" ["p"] $ "p" :$ "K" +-- , ScDef "snd" ["p"] $ "p" :$ "K1" +-- , ScDef "f" ["x","y"] $ +-- Let Rec +-- [ "a" := "pair" :$ "x" :$ "b" +-- , "b" := "pair" :$ "y" :$ "a" +-- ] +-- ("fst" :$ ("snd" :$ ("snd" :$ ("snd" :$ "a")))) +-- , ScDef "main" [] $ "f" :$ IntE 3 :$ IntE 4 +-- ] -idExample :: Program -idExample = Program - [ ScDef "main" [] $ "id" :$ IntE 3 - ] +-- idExample :: Program +-- idExample = Program +-- [ ScDef "main" [] $ "id" :$ IntE 3 +-- ] -indExample1 :: Program -indExample1 = Program - [ ScDef "main" [] $ "twice" :$ "twice" :$ "id" :$ IntE 3 - ] +-- indExample1 :: Program +-- indExample1 = Program +-- [ ScDef "main" [] $ "twice" :$ "twice" :$ "id" :$ IntE 3 +-- ] -indExample2 :: Program -indExample2 = Program - [ ScDef "main" [] $ "twice" :$ "twice" :$ "twice" :$ "id" :$ IntE 3 - ] +-- indExample2 :: Program +-- indExample2 = Program +-- [ ScDef "main" [] $ "twice" :$ "twice" :$ "twice" :$ "id" :$ IntE 3 +-- ] -indExample3 :: Program -indExample3 = Program - [ ScDef "main" [] $ - Let Rec - [ "x" := IntE 2 - , "y" := "f" :$ "x" :$ "x" - ] - ("g" :$ "y" :$ "y") - , ScDef "f" ["a","b"] $ "b" - , ScDef "g" ["a","b"] $ "a" - ] +-- indExample3 :: Program +-- indExample3 = Program +-- [ ScDef "main" [] $ +-- Let Rec +-- [ "x" := IntE 2 +-- , "y" := "f" :$ "x" :$ "x" +-- ] +-- ("g" :$ "y" :$ "y") +-- , ScDef "f" ["a","b"] $ "b" +-- , ScDef "g" ["a","b"] $ "a" +-- ] -negExample1 :: Program -negExample1 = Program - [ ScDef "main" [] $ - "negate#" :$ ("id" :$ IntE 3) - ] +-- negExample1 :: Program +-- negExample1 = Program +-- [ ScDef "main" [] $ +-- "negate#" :$ ("id" :$ IntE 3) +-- ] -negExample2 :: Program -negExample2 = Program - [ ScDef "main" [] $ - "negate#" :$ IntE 3 - ] +-- negExample2 :: Program +-- negExample2 = Program +-- [ ScDef "main" [] $ +-- "negate#" :$ IntE 3 +-- ] -negExample3 :: Program -negExample3 = Program - [ ScDef "main" [] $ - "twice" :$ "negate#" :$ IntE 3 - ] +-- negExample3 :: Program +-- negExample3 = Program +-- [ ScDef "main" [] $ +-- "twice" :$ "negate#" :$ IntE 3 +-- ] -arithExample1 :: Program -arithExample1 = Program - [ ScDef "main" [] $ - "+#" :$ (IntE 3) :$ ("negate#" :$ (IntE 2)) - ] +-- arithExample1 :: Program +-- arithExample1 = Program +-- [ ScDef "main" [] $ +-- "+#" :$ (IntE 3) :$ ("negate#" :$ (IntE 2)) +-- ] -arithExample2 :: Program -arithExample2 = Program - [ ScDef "main" [] $ - "negate#" :$ ("+#" :$ (IntE 2) :$ ("*#" :$ IntE 5 :$ IntE 3)) - ] +-- arithExample2 :: Program +-- arithExample2 = Program +-- [ ScDef "main" [] $ +-- "negate#" :$ ("+#" :$ (IntE 2) :$ ("*#" :$ IntE 5 :$ IntE 3)) +-- ] -ifExample :: Program -ifExample = Program - [ ScDef "main" [] $ - "if#" :$ "True" :$ IntE 2 :$ IntE 3 - ] +-- ifExample :: Program +-- ifExample = Program +-- [ ScDef "main" [] $ +-- "if#" :$ "True" :$ IntE 2 :$ IntE 3 +-- ] -facExample :: Program -facExample = Program - [ ScDef "fac" ["n"] $ - "if#" :$ ("==#" :$ "n" :$ IntE 0) - :$ (IntE 1) - :$ ("*#" :$ "n" :$ ("fac" :$ ("-#" :$ "n" :$ IntE 1))) - , ScDef "main" [] $ "fac" :$ IntE 3 - ] +-- facExample :: Program +-- facExample = Program +-- [ ScDef "fac" ["n"] $ +-- "if#" :$ ("==#" :$ "n" :$ IntE 0) +-- :$ (IntE 1) +-- :$ ("*#" :$ "n" :$ ("fac" :$ ("-#" :$ "n" :$ IntE 1))) +-- , ScDef "main" [] $ "fac" :$ IntE 3 +-- ] ---------------------------------------------------------------------------------- From 8694ff2307c514214593270c344067bbcc63522c Mon Sep 17 00:00:00 2001 From: crumbtoo Date: Fri, 17 Nov 2023 16:42:06 -0700 Subject: [PATCH 3/7] i hate programming i hate computers i am going to drop out i am going to abandon this project --- src/Core/Lex.x | 270 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 219 insertions(+), 51 deletions(-) diff --git a/src/Core/Lex.x b/src/Core/Lex.x index d5cfa67..6ebb0ae 100644 --- a/src/Core/Lex.x +++ b/src/Core/Lex.x @@ -1,56 +1,127 @@ { +{-# LANGUAGE TemplateHaskell #-} module Core.Lex - ( CoreToken(..) - , lexCore + ( lexCore + , lexCore' + , CoreToken(..) ) where - +import Data.Char (chr) +import Debug.Trace import Core.Syntax import Lens.Micro +import Lens.Micro.TH } %wrapper "monadUserState" -$digit = 0-9 -$alpha = [a-zA-Z] +$whitechar = [ \t\n\r\f\v] +$special = [\(\)\,\;\[\]\{\}] -$special = [\*\^\%\$#@!\<\>\+\-\=\/&\|\\\.] +$ascdigit = 0-9 +$unidigit = [] -- TODO +$digit = [$ascdigit $unidigit] -$nameTail = [ $alpha $digit \_ \' ] +$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~] +$unisymbol = [] -- TODO +$symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\'] + +$large = [A-Z \xc0-\xd6 \xd8-\xde] +$small = [a-z \xdf-\xf6 \xf8-\xff \_] +$alpha = [$small $large] + +$graphic = [$small $large $symbol $digit $special \:\"\'] + +$octit = 0-7 +$hexit = [0-9 A-F a-f] +$namechar = [$alpha $digit \'] +$symchar = [$symbol \:] +$nl = [\n\r] +$white_no_nl = $white # $nl + +@reservedid = + case|data|do|import|in|let|letrec|module|of|where + +@reservedop = + "=" | \\ | "->" + +@varname = $small $namechar* +@conname = $large $namechar* +@varsym = $symbol $symchar* +@consym = \: $symchar* + +@decimal = $digit+ rlp :- --- tokens :- --- $white+ ; --- "--" ~$special .* ; --- module { const TokenModule } --- where { const TokenWhere } --- let { const TokenLet } --- letrec { const TokenLetrec } --- in { const TokenIn } --- case { const TokenCase } --- of { const TokenOf } --- $digit+ { TokenLitInt . read @Int } --- "," { const TokenComma } --- "(" { const TokenLParen } --- ")" { const TokenRParen } --- "{" { const TokenLBrace } --- "}" { const TokenRBrace } --- "\\" { const TokenLambda } --- "λ" { const TokenLambda } --- ";" { const TokenSemicolon } --- $special+ { lexSym } --- $alpha $nameTail* { TokenName } +-- everywhere: skip whitespace +$white_no_nl+ { skip } -<0> \n {begin bol} +"--"\-*[^$symbol].* { skip } + +"{-" { nestedComment } + +-- syntactic symbols +<0> +{ + "(" { constTok TokenLParen } + ")" { constTok TokenRParen } + "{" { lbrace } + "}" { rbrace } + ";" { constTok TokenSemicolon } + "," { constTok TokenComma } +} + +-- keywords +-- see commentary on the layout system +<0> +{ + "module" { constTok TokenModule } + "let" { constTok TokenLet `andBegin` layout } + "letrec" { constTok TokenLet `andBegin` layout } + "case" { constTok TokenCase } + "of" { constTok TokenOf `andBegin` layout } + "in" { constTok TokenIn } + "where" { constTok TokenWhere } +} + +-- reserved symbols +<0> +{ + "=" { constTok TokenEquals } +} + +-- identifiers +<0> +{ + -- TODO: qualified names + @varname { lexWith TokenVarName } + @conname { lexWith TokenConName } + @varsym { lexWith TokenVarSym } +} + +<0> \n { begin bol } { - \n ; - () { doBOL } + \n { skip } + () { doBOL } +} + + +{ + -- TODO: does not respect comments nor pragmas + \{ { doLayoutBrace } + \n { skip } + () { newLayoutContext } } { +data Located a = Located AlexPosn a + deriving Show + +constTok :: t -> AlexInput -> Int -> Alex (Located t) +constTok t (p,_,_,_) _ = pure $ Located p t data CoreToken = TokenLet | TokenLetrec @@ -63,45 +134,142 @@ data CoreToken = TokenLet | TokenLambda | TokenArrow | TokenLitInt Int - | TokenName Name - | TokenSym Name + | TokenVarName Name + | TokenConName Name + | TokenName Name -- temp + | TokenVarSym Name + | TokenConSym Name + | TokenSym Name -- temp | TokenEquals | TokenLParen | TokenRParen | TokenLBrace | TokenRBrace | TokenSemicolon + | TokenEOF deriving Show -data LayoutContext = NoLayout - | Layout Int +data LayoutContext = Layout Int + | NoLayout data AlexUserState = AlexUserState { _ausContext :: [LayoutContext] + , _ausStack :: [Int] } ausContext :: Lens' AlexUserState [LayoutContext] -ausContext = lens _ausContext (\s b -> s { _ausContext = b }) +ausContext f (AlexUserState ctx stk) + = fmap + (\a -> AlexUserState a stk) (f ctx) +{-# INLINE ausContext #-} -alexInitUserState = AlexUserState - { _ausContext = [] - } +ausStack :: Lens' AlexUserState [Int] +ausStack f (AlexUserState ctx stk) + = fmap + (\a -> AlexUserState ctx a) (f stk) +{-# INLINE ausStack #-} --- lexCore :: String -> [CoreToken] -lexCore = alexScanTokens +pushContext :: LayoutContext -> Alex () +pushContext c = do + st <- alexGetUserState + alexSetUserState $ st { _ausContext = c : _ausContext st } --- lexSym :: String -> CoreToken --- lexSym "=" = TokenEquals --- lexSym "\\" = TokenLambda --- lexSym "->" = TokenArrow --- lexSym s = TokenSym s +popContext :: Alex () +popContext = do + st <- alexGetUserState + alexSetUserState $ st { _ausContext = drop 1 (_ausContext st) } -lexSym = undefined +getContext :: Alex [LayoutContext] +getContext = do + st <- alexGetUserState + pure $ _ausContext st +type Lexer = AlexInput -> Int -> Alex (Located CoreToken) + +alexEOF :: Alex (Located CoreToken) +alexEOF = Alex $ \ st@(AlexState { alex_pos = p }) -> Right (st, Located p TokenEOF) + +alexInitUserState :: AlexUserState +alexInitUserState = AlexUserState [] [bol,0] + +nestedComment :: Lexer +nestedComment _ _ = undefined + +lexStream :: Alex [Located CoreToken] +lexStream = do + l <- alexMonadScan + case l of + Located _ TokenEOF -> pure [l] + _ -> (l:) <$> lexStream + +lexCore :: String -> Either String [Located CoreToken] +lexCore s = runAlex s lexStream + +lexCore' :: String -> Either String [CoreToken] +lexCore' s = fmap f <$> lexCore s + where f (Located _ t) = t + +lexWith :: (String -> CoreToken) -> Lexer +lexWith f (p,_,_,s) l = pure $ Located p (f $ take l s) + +lexToken :: Alex (Located CoreToken) +lexToken = alexMonadScan + +getSrcCol :: Alex Int +getSrcCol = Alex $ \ st -> + let AlexPn _ _ col = alex_pos st + in Right (st, col) + +doLayoutBrace :: Lexer +doLayoutBrace (p,_,_,s) _ = undefined + +lbrace :: Lexer +lbrace (p,_,_,_) _ = do + pushContext NoLayout + pure $ Located p TokenLBrace + +rbrace :: Lexer +rbrace (p,_,_,_) _ = do + popContext + pure $ Located p TokenRBrace + +setLexState :: Int -> Alex () +setLexState n = Alex $ \st -> Right (st { alex_scd = n }, ()) + +modifyUst :: (AlexUserState -> AlexUserState) -> Alex () +modifyUst f = do + st <- alexGetUserState + alexSetUserState $ f st + +getUst :: Alex AlexUserState +getUst = alexGetUserState + +pushLexState :: Int -> Alex () +pushLexState n = modifyUst (ausStack %~ (n:)) *> setLexState n + +popLexState :: Alex Int +popLexState = do + modifyUst (ausStack %~ drop 1) + ust <- getUst + let s = case ust ^. ausStack of + (a:_) -> a + _ -> 0 + setLexState s + pure s + +newLayoutContext :: Lexer +newLayoutContext (p,_,_,_) _ = do + _ <- popLexState + ctx <- getContext + off <- getSrcCol + case ctx of + Layout prev : _ | off <= prev + -> error $ show prev + _ -> do + pushContext $ Layout off + pure $ Located p TokenLBrace + +doBOL :: Lexer doBOL = undefined -alexEOF = undefined - -alexScanTokens = undefined - } From 48aa05caad5c6281f525668f0f9bc71494d596da Mon Sep 17 00:00:00 2001 From: crumbtoo Date: Mon, 20 Nov 2023 11:17:27 -0700 Subject: [PATCH 4/7] some writing on layouts --- docs/src/commentary/layout-lexing.rst | 169 +++++++++++++++++++++++++- docs/src/glossary.rst | 15 +++ docs/src/index.rst | 6 + 3 files changed, 188 insertions(+), 2 deletions(-) create mode 100644 docs/src/glossary.rst diff --git a/docs/src/commentary/layout-lexing.rst b/docs/src/commentary/layout-lexing.rst index 5a1f8ab..4c14735 100644 --- a/docs/src/commentary/layout-lexing.rst +++ b/docs/src/commentary/layout-lexing.rst @@ -1,3 +1,168 @@ -Parsing and the Layout System -============================= +Lexing, Parsing, and Layouts +============================ +The C-style languages of my previous experiences have all had quite trivial +lexical analysis stages, peaking in complexity when I streamed tokens lazily in +C. The task of tokenising a C-style language is very simple in description: you +ignore all whitespace and point out what you recognise. If you don't recognise +something, check if it's a literal or an identifier. Should it be neither, +return an error. + +On paper, both lexing and parsing a Haskell-like language seem to pose a few +greater challenges. Listed by ascending intimidation factor, some of the +potential roadblocks on my mind before making an attempt were: + +* Operators; Haskell has not only user-defined infix operators, but user-defined + precedence levels and associativities. I recall using an algorithm that looked + up infix, prefix, postfix, and even mixfix operators up in a global table to + call their appropriate parser (if their precedence was appropriate, also + stored in the table). I never modified the table at runtime, however this + could be a very nice solution for Haskell. + +* Context-sensitive keywords; Haskell allows for some words to be used as identifiers in + appropriate contexts, such as :code:`family`, :code:`role`, :code:`as`. + Reading a note_ found in `GHC's lexer + `_, + it appears that keywords are only considered in bodies for which their use is + relevant, e.g. :code:`family` and :code:`role` in type declarations, + :code:`as` after :code:`case`; :code:`if`, :code:`then`, and :code:`else` in + expressions, etc. + +* Whitespace sensitivity; While I was comfortable with the idea of a system + similar to Python's INDENT/DEDENT tokens, Haskell seemed to use whitespace to + section code in a way that *felt* different. + +.. _note: https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/coding-style#2-using-notes + +After a bit of thought and research, whitespace sensitivity in the form of +*layouts* as Haskell and I will refer to them as, are easily the scariest thing +on this list -- however they are achievable! + +A Lexical Primer: Python +************************ + +We will compare and contrast with Python's lexical analysis. Much to my dismay, +Python uses newlines and indentation to separate statements and resolve scope +instead of the traditional semicolons and braces found in C-style languages (we +may generally refer to these C-style languages as *explicitly-sectioned*). +Internally during tokenisation, when the Python lexer begins a new line, they +compare the indentation of the new line with that of the previous and apply the +following rules: + +1. If the new line has greater indentation than the previous, insert an INDENT + token and push the new line's indentation level onto the indentation stack + (the stack is initialised with an indentation level of zero). + +2. If the new line has lesser indentation than the previous, pop the stack until + the top of the stack is greater than the new line's indentation level. A + DEDENT token is inserted for each level popped. + +3. If the indentation is equal, insert a NEWLINE token to terminate the previous + line, and leave it at that! + +Parsing Python with the INDENT, DEDENT, and NEWLINE tokens is identical to +parsing a language with braces and semicolons. This is a solution pretty in line +with Python's philosophy of the "one correct answer" (TODO: this needs a +source). In developing our *layout* rules, we will follow in the pattern of +translating the whitespace-sensitive source language to an explicitly sectioned +language. + +But What About Haskell? +*********************** + +We saw that Python, the most notable example of an implicitly sectioned +language, is pretty simple to lex. Why then am I so afraid of Haskell's layouts? +To be frank, I'm far less scared after asking myself this -- however there are +certainly some new complexities that Python needn't concern. Haskell has +implicit line *continuation*: forms written over multiple lines; indentation +styles often seen in Haskell are somewhat esoteric compared to Python's +"s/[{};]//". + +.. code-block:: haskell + + -- line continuation + something = this is a + single expression + + -- an extremely common style found in haskell + data Python = Users + { are :: Crying + , right :: About + , now :: Sorry + } + + -- another formatting oddity + -- note that this is not line contiation! + -- `look at`, `this`, and `alignment` + -- are all separate expressions! + anotherThing = do look at + this + alignment + +But enough fear, lets actually think about implementation. Firstly, some +formality: what do we mean when we say layout? We will define layout as the +rules we apply to an implicitly-sectioned language in order to yield one that is +explicitly-sectioned. We will also define indentation of a lexeme as the column +number of its first character. + +Thankfully for us, our entry point is quite clear; layouts only appear after a +select few keywords, (with a minor exception; TODO: elaborate) being :code:`let` +(followed by supercombinators), :code:`where` (followed by supercombinators), +:code:`do` (followed by expressions), and :code:`of` (followed by alternatives) +(TODO: all of these terms need linked glossary entries). Under this assumption, +we give the following rule: + +1. If a :code:`let`, :code:`where`, :code:`do`, or :code:`of` keyword is not + followed by the lexeme :code:`{`, the token :math:`\{n\}` is inserted after + the keyword, where :math:`n` is the indentation of the next lexeme if there + is one, or 0 if the end of file has been reached. + +Henceforth :math:`\{n\}` will denote the token representing the begining of a +layout; similar in function to a brace, but it stores the indentation level for +subsequent lines to compare with. We must introduce an additional input to the +function handling layouts. Obviously, such a function would require the input +string, but a helpful book-keeping tool which we will make good use of is a +stack of "layout contexts", describing the current cascade of layouts. Each +element is either a :code:`NoLayout`, indicating an explicit layout (i.e. the +programmer inserted semicolons and braces herself) or a :code:`Layout n` where +:code:`n` is a non-negative integer representing the indentation level of the +enclosing context. + +.. code-block:: haskell + + f x -- layout stack: [] + = let -- layout keyword; remember indentation of next token + y = w * w -- layout stack: [Layout 10] + w = x + x + in do -- layout keyword; next token is a brace! + { -- layout stack: [NoLayout] + pure } + +In the code seen above, notice that :code:`let` allows for multiple definitions, +separated by a newline. We accomate for this with a token :math:`\langle n +\rangle` which compliments :math:`\{n\}` in how it functions as a closing brace +that stores indentation. We give a rule to describe the source of such a token: + +2. When the first lexeme on a line is preceeded by only whitespace a + :math:`\langle n \rangle` token is inserted before the lexeme, where + :math:`n` is the indentation of the lexeme, provided that it is not, as a + consequence of rule 1 or rule 3 (as we'll see), preceded by {n}. + +Lastly, to handle the top level we will initialise the stack with a +:math:`\{n\}` where :math:`n` is the indentation of the first lexeme. + +3. If the first lexeme of a module is not '{' or :code:`module`, then it is + preceded by :math:`\{n\}` where :math:`n` is the indentation of the lexeme. + +For a more pedantic description of the layout system, see `chapter 10 +`_ of the +2010 Haskell Report, which I **heavily** referenced here. + +References +---------- + +* `Python's lexical analysis + `_ + +* `Haskell Syntax Reference + `_ diff --git a/docs/src/glossary.rst b/docs/src/glossary.rst new file mode 100644 index 0000000..4cb9aaf --- /dev/null +++ b/docs/src/glossary.rst @@ -0,0 +1,15 @@ +Glossary +======== + +Haskell and Haskell culture is infamous for using scary mathematical terms for +simple ideas. Please excuse us, it's really fun :3. + +.. glossary:: + + supercombinator + An expression with no free variables. For most purposes, just think of a + top-level definition. + + case alternative + An possible match in a case expression (TODO: example) + diff --git a/docs/src/index.rst b/docs/src/index.rst index a0ab572..707ee37 100644 --- a/docs/src/index.rst +++ b/docs/src/index.rst @@ -6,6 +6,12 @@ Contents .. toctree:: :maxdepth: 2 + :caption: Index + + glossary.rst + +.. toctree:: + :maxdepth: 1 :caption: Commentary :glob: From 717effc57990a16d3a9f8edb0b58f62219125391 Mon Sep 17 00:00:00 2001 From: crumbtoo Date: Mon, 20 Nov 2023 11:29:16 -0700 Subject: [PATCH 5/7] uh --- docs/src/commentary/layout-lexing.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/src/commentary/layout-lexing.rst b/docs/src/commentary/layout-lexing.rst index 4c14735..b369475 100644 --- a/docs/src/commentary/layout-lexing.rst +++ b/docs/src/commentary/layout-lexing.rst @@ -154,7 +154,9 @@ Lastly, to handle the top level we will initialise the stack with a 3. If the first lexeme of a module is not '{' or :code:`module`, then it is preceded by :math:`\{n\}` where :math:`n` is the indentation of the lexeme. -For a more pedantic description of the layout system, see `chapter 10 +This set of rules is adequete enough to satisfy our basic concerns about line +continations and layout lists. For a more pedantic description of the layout +system, see `chapter 10 `_ of the 2010 Haskell Report, which I **heavily** referenced here. From c15e67869ec114f4ae2030d0dd69ace140d964aa Mon Sep 17 00:00:00 2001 From: crumbtoo Date: Mon, 20 Nov 2023 12:51:04 -0700 Subject: [PATCH 6/7] layout lexing is good enough --- src/Core/Lex.x | 106 ++++++++++++++++++++++++++----------------------- 1 file changed, 56 insertions(+), 50 deletions(-) diff --git a/src/Core/Lex.x b/src/Core/Lex.x index 6ebb0ae..090dd95 100644 --- a/src/Core/Lex.x +++ b/src/Core/Lex.x @@ -76,11 +76,11 @@ $white_no_nl+ { skip } -- see commentary on the layout system <0> { - "module" { constTok TokenModule } - "let" { constTok TokenLet `andBegin` layout } - "letrec" { constTok TokenLet `andBegin` layout } + "let" { constTok TokenLet `andBegin` layout_keyword } + "letrec" { constTok TokenLet `andBegin` layout_keyword } + "of" { constTok TokenOf `andBegin` layout_keyword } "case" { constTok TokenCase } - "of" { constTok TokenOf `andBegin` layout } + "module" { constTok TokenModule } "in" { constTok TokenIn } "where" { constTok TokenWhere } } @@ -89,6 +89,7 @@ $white_no_nl+ { skip } <0> { "=" { constTok TokenEquals } + "->" { constTok TokenArrow } } -- identifiers @@ -105,15 +106,14 @@ $white_no_nl+ { skip } { \n { skip } - () { doBOL } + () { doBol `andBegin` 0 } } - + { - -- TODO: does not respect comments nor pragmas - \{ { doLayoutBrace } - \n { skip } - () { newLayoutContext } + $white { skip } + \{ { lbrace `andBegin` 0 } + () { noBrace `andBegin` 0 } } { @@ -145,30 +145,26 @@ data CoreToken = TokenLet | TokenRParen | TokenLBrace | TokenRBrace + | TokenIndent Int + | TokenDedent Int | TokenSemicolon | TokenEOF deriving Show data LayoutContext = Layout Int | NoLayout + deriving Show data AlexUserState = AlexUserState { _ausContext :: [LayoutContext] - , _ausStack :: [Int] } ausContext :: Lens' AlexUserState [LayoutContext] -ausContext f (AlexUserState ctx stk) +ausContext f (AlexUserState ctx) = fmap - (\a -> AlexUserState a stk) (f ctx) + (\a -> AlexUserState a) (f ctx) {-# INLINE ausContext #-} -ausStack :: Lens' AlexUserState [Int] -ausStack f (AlexUserState ctx stk) - = fmap - (\a -> AlexUserState ctx a) (f stk) -{-# INLINE ausStack #-} - pushContext :: LayoutContext -> Alex () pushContext c = do st <- alexGetUserState @@ -190,7 +186,7 @@ alexEOF :: Alex (Located CoreToken) alexEOF = Alex $ \ st@(AlexState { alex_pos = p }) -> Right (st, Located p TokenEOF) alexInitUserState :: AlexUserState -alexInitUserState = AlexUserState [] [bol,0] +alexInitUserState = AlexUserState [Layout 1] nestedComment :: Lexer nestedComment _ _ = undefined @@ -203,7 +199,8 @@ lexStream = do _ -> (l:) <$> lexStream lexCore :: String -> Either String [Located CoreToken] -lexCore s = runAlex s lexStream +lexCore s = runAlex s (alexSetStartCode layout_keyword *> lexStream) +-- temp; does not support module header lexCore' :: String -> Either String [CoreToken] lexCore' s = fmap f <$> lexCore s @@ -220,9 +217,6 @@ getSrcCol = Alex $ \ st -> let AlexPn _ _ col = alex_pos st in Right (st, col) -doLayoutBrace :: Lexer -doLayoutBrace (p,_,_,s) _ = undefined - lbrace :: Lexer lbrace (p,_,_,_) _ = do pushContext NoLayout @@ -233,8 +227,14 @@ rbrace (p,_,_,_) _ = do popContext pure $ Located p TokenRBrace -setLexState :: Int -> Alex () -setLexState n = Alex $ \st -> Right (st { alex_scd = n }, ()) +insRBrace :: AlexPosn -> Alex (Located CoreToken) +insRBrace p = do + popContext + pure $ Located p TokenRBrace + +insSemi :: AlexPosn -> Alex (Located CoreToken) +insSemi p = do + pure $ Located p TokenSemicolon modifyUst :: (AlexUserState -> AlexUserState) -> Alex () modifyUst f = do @@ -244,32 +244,38 @@ modifyUst f = do getUst :: Alex AlexUserState getUst = alexGetUserState -pushLexState :: Int -> Alex () -pushLexState n = modifyUst (ausStack %~ (n:)) *> setLexState n - -popLexState :: Alex Int -popLexState = do - modifyUst (ausStack %~ drop 1) - ust <- getUst - let s = case ust ^. ausStack of - (a:_) -> a - _ -> 0 - setLexState s - pure s - newLayoutContext :: Lexer newLayoutContext (p,_,_,_) _ = do - _ <- popLexState - ctx <- getContext - off <- getSrcCol - case ctx of - Layout prev : _ | off <= prev - -> error $ show prev - _ -> do - pushContext $ Layout off - pure $ Located p TokenLBrace + undefined -doBOL :: Lexer -doBOL = undefined +noBrace :: Lexer +noBrace (p,_,_,_) l = do + col <- getSrcCol + pushContext (Layout col) + pure $ Located p TokenLBrace + +getOffside :: Alex Ordering +getOffside = do + ctx <- getContext + m <- getSrcCol + case ctx of + Layout n : _ -> pure $ m `compare` n + _ -> pure GT + +doBol :: Lexer +doBol (p,c,_,s) l = do + off <- getOffside + -- traceM $ show (p, c, s) + col <- getSrcCol + traceM $ show (s, p, col, off) + case off of + LT -> insRBrace p + EQ -> insSemi p + _ -> lexToken + +testTmp :: IO (Either String [CoreToken]) +testTmp = do + s <- readFile "/tmp/t.hs" + pure $ lexCore' s } From 45952ef30ebfc6467cecb7feaaff4e28b7875ddf Mon Sep 17 00:00:00 2001 From: crumbtoo Date: Mon, 20 Nov 2023 14:08:18 -0700 Subject: [PATCH 7/7] good enough. good enough. good enough. good enoguh.jjhbjhfsfbsjhdsfjbhsdbjhdfsbhjlbahjfdaojuai --- src/Compiler/RLPC.hs | 10 ++++++ src/Core/Lex.x | 21 +++++++----- src/Core/Parse.y | 82 ++++++++++++++++++++++++++++++++++---------- 3 files changed, 86 insertions(+), 27 deletions(-) create mode 100644 src/Compiler/RLPC.hs diff --git a/src/Compiler/RLPC.hs b/src/Compiler/RLPC.hs new file mode 100644 index 0000000..08d991e --- /dev/null +++ b/src/Compiler/RLPC.hs @@ -0,0 +1,10 @@ +{-# LANGUAGE GeneralisedNewtypeDeriving #-} +module Compiler.RLPC + ( RLPC(..) + ) + where + +-- TODO: fancy errors +newtype RLPC a = RLPC { runRLPC :: Either String a } + deriving (Functor, Applicative, Monad) + diff --git a/src/Core/Lex.x b/src/Core/Lex.x index 090dd95..fa40921 100644 --- a/src/Core/Lex.x +++ b/src/Core/Lex.x @@ -4,6 +4,7 @@ module Core.Lex ( lexCore , lexCore' , CoreToken(..) + , lexTmp ) where import Data.Char (chr) @@ -81,7 +82,7 @@ $white_no_nl+ { skip } "of" { constTok TokenOf `andBegin` layout_keyword } "case" { constTok TokenCase } "module" { constTok TokenModule } - "in" { constTok TokenIn } + "in" { letin } "where" { constTok TokenWhere } } @@ -136,10 +137,8 @@ data CoreToken = TokenLet | TokenLitInt Int | TokenVarName Name | TokenConName Name - | TokenName Name -- temp | TokenVarSym Name | TokenConSym Name - | TokenSym Name -- temp | TokenEquals | TokenLParen | TokenRParen @@ -265,17 +264,21 @@ getOffside = do doBol :: Lexer doBol (p,c,_,s) l = do off <- getOffside - -- traceM $ show (p, c, s) col <- getSrcCol - traceM $ show (s, p, col, off) case off of LT -> insRBrace p EQ -> insSemi p _ -> lexToken -testTmp :: IO (Either String [CoreToken]) -testTmp = do - s <- readFile "/tmp/t.hs" - pure $ lexCore' s +letin :: Lexer +letin (p,_,_,_) l = do + popContext + pure $ Located p TokenIn +lexTmp :: IO [CoreToken] +lexTmp = do + s <- readFile "/tmp/t.hs" + case lexCore' s of + Left e -> error e + Right a -> pure a } diff --git a/src/Core/Parse.y b/src/Core/Parse.y index 4077528..342acf4 100644 --- a/src/Core/Parse.y +++ b/src/Core/Parse.y @@ -1,20 +1,24 @@ +-- TODO: resolve shift/reduce conflicts { module Core.Parse ( parseCore - -- , parseCoreExpr + , parseCoreExpr , module Core.Lex -- temp convenience + , parseTmp ) where import Data.Foldable (foldl') import Core.Syntax import Core.Lex +import Compiler.RLPC } -%name parseCore +%name parseCore Module %name parseCoreExpr Expr %tokentype { CoreToken } %error { parseError } +%monad { RLPC } %token let { TokenLet } @@ -24,8 +28,10 @@ import Core.Lex ',' { TokenComma } in { TokenIn } litint { TokenLitInt $$ } - name { TokenName $$ } - sym { TokenSym $$ } + varname { TokenVarName $$ } + varsym { TokenVarSym $$ } + conname { TokenConName $$ } + consym { TokenConSym $$ } 'λ' { TokenLambda } '->' { TokenArrow } '=' { TokenEquals } @@ -34,23 +40,43 @@ import Core.Lex '{' { TokenLBrace } '}' { TokenRBrace } ';' { TokenSemicolon } + eof { TokenEOF } %% -ExportList :: { [Name] } -ExportList : '(' Exports ')' { $2 } +Module :: { Module } +Module : module conname where Program Eof { Module (Just ($2, [])) $4 } + | Program Eof { Module Nothing $1 } -Exports :: { [Name] } -Exports : Var ',' Exports { $1 : $3 } - | Var { [$1] } +Eof :: { () } +Eof : eof { () } + | error { () } + +Program :: { Program } +Program : '{' ScDefs Close { Program $2 } + +ScDefs :: { [ScDef] } +ScDefs : ScDef ';' ScDefs { $1 : $3 } + | {- epsilon -} { [] } + +ScDef :: { ScDef } +ScDef : Var ParList '=' Expr { ScDef $1 $2 $4 } + +ParList :: { [Name] } +ParList : Var ParList { $1 : $2 } + | {- epsilon -} { [] } Expr :: { Expr } -Expr : let Bindings in Expr { Let NonRec $2 $4 } - | letrec Bindings in Expr { Let Rec $2 $4 } +Expr : let '{' Bindings Close in Expr { Let NonRec $3 $6 } + | letrec '{' Bindings Close in Expr { Let Rec $3 $6 } | 'λ' Binders '->' Expr { Lam $2 $4 } | Application { $1 } | Expr1 { $1 } +Close :: { () } +Close : '}' { () } + | error { () } + Binders :: { [Name] } Binders : Var Binders { $1 : $2 } | Var { [$1] } @@ -65,18 +91,38 @@ AppArgs : Expr1 AppArgs { $1 : $2 } Expr1 :: { Expr } Expr1 : litint { IntE $1 } - | Var { Var $1 } + | Id { Var $1 } | '(' Expr ')' { $2 } -Var :: { Name } -Var : '(' sym ')' { $2 } - | name { $1 } - Bindings :: { [Binding] } -Bindings : Var '=' Expr { [$1 := $3] } +Bindings : Binding ';' Bindings { $1 : $3 } + | Binding ';' { [$1] } + | Binding { [$1] } + +Binding :: { Binding } +Binding : Var '=' Expr { $1 := $3 } + +Id :: { Name } +Id : Var { $1 } + | Con { $1 } + +Var :: { Name } +Var : '(' varsym ')' { $2 } + | varname { $1 } + +Con :: { Name } +Con : '(' consym ')' { $2 } + | conname { $1 } { parseError :: [CoreToken] -> a -parseError _ = error "fuuckk!" +parseError ts = error $ "parse error at token: " <> show (head ts) + +parseTmp :: IO (Module) +parseTmp = do + s <- readFile "/tmp/t.hs" + case lexCore' s >>= runRLPC . parseCore of + Left e -> error e + Right a -> pure a }