From b28daefc3c3b69e755343146c325458d6ea7b785 Mon Sep 17 00:00:00 2001 From: crumbtoo Date: Wed, 15 Nov 2023 14:31:27 -0700 Subject: [PATCH] lexer --- docs/src/commentary/parser.rst | 3 + src/Control/Parser.hs | 50 ++++++++++-- src/Core/Lex.hs | 137 ++++++++++++++++++++++++++++++++- src/Core/Parse.hs | 7 ++ 4 files changed, 190 insertions(+), 7 deletions(-) create mode 100644 docs/src/commentary/parser.rst diff --git a/docs/src/commentary/parser.rst b/docs/src/commentary/parser.rst new file mode 100644 index 0000000..27d0213 --- /dev/null +++ b/docs/src/commentary/parser.rst @@ -0,0 +1,3 @@ +Parser Combinators +================== + diff --git a/src/Control/Parser.hs b/src/Control/Parser.hs index 74d78de..56860bb 100644 --- a/src/Control/Parser.hs +++ b/src/Control/Parser.hs @@ -1,6 +1,6 @@ {-| Module : Control.Parser -Description : The parser *object* +Description : Parser combinators This module implements an interface for parser *types*, used in lexical analysis and parsing. For the implementation of the rlp language's parser, see 'Parse'. @@ -8,13 +8,27 @@ and parsing. For the implementation of the rlp language's parser, see 'Parse'. {-# LANGUAGE GeneralisedNewtypeDeriving #-} {-# LANGUAGE BlockArguments, LambdaCase #-} module Control.Parser - ( + ( ParserT + , runParserT + + , satisfy + , char + , spaces + , nl + , surround + , string + + -- * Control.Applicative re-exports + , (<|>) + , many + , some ) where ---------------------------------------------------------------------------------- import Control.Applicative import Control.Arrow ((***)) import Control.Monad +import Data.Char ---------------------------------------------------------------------------------- newtype ParserT i m o = ParserT { runParserT :: i -> m (i, o) } @@ -40,11 +54,35 @@ instance (Monad m) => Monad (ParserT i m) where (i',a) <- runParserT m i runParserT (k a) i' +instance (MonadFail m) => MonadFail (ParserT i m) where + fail s = ParserT $ \i -> fail s ---------------------------------------------------------------------------------- -- TODO: generalise to non-lists -satisfy :: (MonadPlus m, Eq a) => a -> ParserT [a] m a -satisfy c = ParserT $ \case - (x:xs) | x == c -> pure (xs,x) - _ -> empty +satisfy :: (MonadPlus m) => (a -> Bool) -> ParserT [a] m a +satisfy p = ParserT $ \case + (x:xs) | p x -> pure (xs,x) + _ -> empty + + +char :: (MonadPlus m, Eq a) => a -> ParserT [a] m a +char c = satisfy (==c) + +string :: (MonadPlus m, Eq a) => [a] -> ParserT [a] m [a] +string s = sequenceA $ char <$> s + +---------------------------------------------------------------------------------- + +surround :: (MonadPlus m) + => ParserT i m l + -> ParserT i m r + -> ParserT i m c + -> ParserT i m c +surround l r c = l *> c <* r + +spaces :: (MonadPlus m) => ParserT String m Int +spaces = length <$> many (satisfy (==' ')) + +nl :: (MonadPlus m) => ParserT String m Int +nl = undefined diff --git a/src/Core/Lex.hs b/src/Core/Lex.hs index a8c5903..eb82be3 100644 --- a/src/Core/Lex.hs +++ b/src/Core/Lex.hs @@ -1,6 +1,141 @@ +{-| +Module : Core.Lex +Description : Core language lexer +-} module Core.Lex - ( + ( CoreToken + , Result + , lexCore ) where ---------------------------------------------------------------------------------- +import Control.Parser +import Control.Applicative +import Control.Monad +import Data.Char +import Data.Functor +import Core.Syntax (Name) +---------------------------------------------------------------------------------- + +type CoreLexer = ParserT String Result + +data Result a = Success a + | Error String Int Int + deriving (Show) + +-- TODO: whitespace-sensitive layout +data CoreToken = TokLitInt Int + | TokEquals + | TokLBrace + | TokRBrace + | TokSemicolon + | TokLParen + | TokRParen + | TokLambda + | TokArrow + | TokCase + | TokOf + | TokLet + | TokLetRec + | TokIn + | TokCName Name + | TokName Name + deriving (Show) + +instance Functor Result where + fmap f (Success a) = Success (f a) + fmap _ (Error s l c) = Error s l c + +instance Foldable Result where + foldr f z (Success a) = a `f` z + foldr _ z (Error _ _ _) = z + +instance Traversable Result where + traverse k (Success a) = fmap Success (k a) + traverse _ (Error s l c) = pure $ Error s l c + +instance Applicative Result where + pure = Success + + liftA2 f (Success a) (Success b) = Success $ f a b + liftA2 _ (Error s l c) _ = Error s l c + liftA2 _ _ (Error s l c) = Error s l c + +instance Alternative Result where + empty = Error "unknown failure" 0 0 + + (Success a) <|> _ = Success a + _ <|> b = b + +instance Monad Result where + Success a >>= k = k a + Error s l c >>= _ = Error s l c + +instance MonadPlus Result + +instance MonadFail Result where + fail s = Error s 0 0 + +---------------------------------------------------------------------------------- + +lexCore :: String -> Result [CoreToken] +lexCore = fmap snd . runParserT (many (token <* spaces)) + +token :: CoreLexer CoreToken +token = litInt + <|> lbrace + <|> rbrace + <|> semicolon + <|> lparen + <|> rparen + <|> equals + <|> lambda + <|> arrow + <|> _case + <|> _of + <|> _let + <|> letrec + <|> _in + <|> cName + <|> name + +---------------------------------------------------------------------------------- + +litInt, equals, lparen, rparen, lambda, + arrow, _case, _of, _let, letrec, _in, cName, name :: CoreLexer CoreToken + +litInt = TokLitInt . value <$> some (satisfy isDigit) + where + value = foldl (\acc a -> 10*acc + digitToInt a) 0 + +semicolon = (semis <|> nls) $> TokSemicolon + where + nls = head <$> some (char '\n') + semis = char ';' <* many (char '\n') +equals = char '=' $> TokEquals +lbrace = char '{' $> TokLBrace +rbrace = char '}' $> TokRBrace +lparen = char '(' $> TokLParen +rparen = char ')' $> TokRParen +lambda = (char '\\' <|> char 'λ') $> TokLambda +arrow = string "->" $> TokArrow +_case = string "case" $> TokCase +_of = string "of" $> TokOf +_let = string "let" $> TokLet +letrec = string "letrec" $> TokLetRec +_in = string "in" $> TokIn + +cName = TokCName <$> ((:) <$> cNameHead <*> properNameTail) + where cNameHead = satisfy isUpper + +name = some (satisfy p) <&> TokName + where p c = not (isSpace c) && c `notElem` ";{}" + +properName :: CoreLexer Name +properName = (:) <$> nameHead <*> properNameTail + where nameHead = satisfy isLetter + +properNameTail :: CoreLexer Name +properNameTail = many . satisfy $ \c -> + isLetter c || isDigit c || c == '_' diff --git a/src/Core/Parse.hs b/src/Core/Parse.hs index bc70a3a..f67c513 100644 --- a/src/Core/Parse.hs +++ b/src/Core/Parse.hs @@ -3,4 +3,11 @@ module Core.Parse ) where ---------------------------------------------------------------------------------- +import Control.Parser +import Core.Lex +import Core.Syntax +---------------------------------------------------------------------------------- + +parseCore :: [CoreToken] -> Result Program +parseCore = undefined