This commit is contained in:
crumbtoo
2023-11-15 14:31:27 -07:00
parent 86b1538d3d
commit b28daefc3c
4 changed files with 190 additions and 7 deletions

View File

@@ -0,0 +1,3 @@
Parser Combinators
==================

View File

@@ -1,6 +1,6 @@
{-|
Module : Control.Parser
Description : The parser *object*
Description : Parser combinators
This module implements an interface for parser *types*, used in lexical analysis
and parsing. For the implementation of the rlp language's parser, see 'Parse'.
@@ -8,13 +8,27 @@ and parsing. For the implementation of the rlp language's parser, see 'Parse'.
{-# LANGUAGE GeneralisedNewtypeDeriving #-}
{-# LANGUAGE BlockArguments, LambdaCase #-}
module Control.Parser
(
( ParserT
, runParserT
, satisfy
, char
, spaces
, nl
, surround
, string
-- * Control.Applicative re-exports
, (<|>)
, many
, some
)
where
----------------------------------------------------------------------------------
import Control.Applicative
import Control.Arrow ((***))
import Control.Monad
import Data.Char
----------------------------------------------------------------------------------
newtype ParserT i m o = ParserT { runParserT :: i -> m (i, o) }
@@ -40,11 +54,35 @@ instance (Monad m) => Monad (ParserT i m) where
(i',a) <- runParserT m i
runParserT (k a) i'
instance (MonadFail m) => MonadFail (ParserT i m) where
fail s = ParserT $ \i -> fail s
----------------------------------------------------------------------------------
-- TODO: generalise to non-lists
satisfy :: (MonadPlus m, Eq a) => a -> ParserT [a] m a
satisfy c = ParserT $ \case
(x:xs) | x == c -> pure (xs,x)
_ -> empty
satisfy :: (MonadPlus m) => (a -> Bool) -> ParserT [a] m a
satisfy p = ParserT $ \case
(x:xs) | p x -> pure (xs,x)
_ -> empty
char :: (MonadPlus m, Eq a) => a -> ParserT [a] m a
char c = satisfy (==c)
string :: (MonadPlus m, Eq a) => [a] -> ParserT [a] m [a]
string s = sequenceA $ char <$> s
----------------------------------------------------------------------------------
surround :: (MonadPlus m)
=> ParserT i m l
-> ParserT i m r
-> ParserT i m c
-> ParserT i m c
surround l r c = l *> c <* r
spaces :: (MonadPlus m) => ParserT String m Int
spaces = length <$> many (satisfy (==' '))
nl :: (MonadPlus m) => ParserT String m Int
nl = undefined

View File

@@ -1,6 +1,141 @@
{-|
Module : Core.Lex
Description : Core language lexer
-}
module Core.Lex
(
( CoreToken
, Result
, lexCore
)
where
----------------------------------------------------------------------------------
import Control.Parser
import Control.Applicative
import Control.Monad
import Data.Char
import Data.Functor
import Core.Syntax (Name)
----------------------------------------------------------------------------------
type CoreLexer = ParserT String Result
data Result a = Success a
| Error String Int Int
deriving (Show)
-- TODO: whitespace-sensitive layout
data CoreToken = TokLitInt Int
| TokEquals
| TokLBrace
| TokRBrace
| TokSemicolon
| TokLParen
| TokRParen
| TokLambda
| TokArrow
| TokCase
| TokOf
| TokLet
| TokLetRec
| TokIn
| TokCName Name
| TokName Name
deriving (Show)
instance Functor Result where
fmap f (Success a) = Success (f a)
fmap _ (Error s l c) = Error s l c
instance Foldable Result where
foldr f z (Success a) = a `f` z
foldr _ z (Error _ _ _) = z
instance Traversable Result where
traverse k (Success a) = fmap Success (k a)
traverse _ (Error s l c) = pure $ Error s l c
instance Applicative Result where
pure = Success
liftA2 f (Success a) (Success b) = Success $ f a b
liftA2 _ (Error s l c) _ = Error s l c
liftA2 _ _ (Error s l c) = Error s l c
instance Alternative Result where
empty = Error "unknown failure" 0 0
(Success a) <|> _ = Success a
_ <|> b = b
instance Monad Result where
Success a >>= k = k a
Error s l c >>= _ = Error s l c
instance MonadPlus Result
instance MonadFail Result where
fail s = Error s 0 0
----------------------------------------------------------------------------------
lexCore :: String -> Result [CoreToken]
lexCore = fmap snd . runParserT (many (token <* spaces))
token :: CoreLexer CoreToken
token = litInt
<|> lbrace
<|> rbrace
<|> semicolon
<|> lparen
<|> rparen
<|> equals
<|> lambda
<|> arrow
<|> _case
<|> _of
<|> _let
<|> letrec
<|> _in
<|> cName
<|> name
----------------------------------------------------------------------------------
litInt, equals, lparen, rparen, lambda,
arrow, _case, _of, _let, letrec, _in, cName, name :: CoreLexer CoreToken
litInt = TokLitInt . value <$> some (satisfy isDigit)
where
value = foldl (\acc a -> 10*acc + digitToInt a) 0
semicolon = (semis <|> nls) $> TokSemicolon
where
nls = head <$> some (char '\n')
semis = char ';' <* many (char '\n')
equals = char '=' $> TokEquals
lbrace = char '{' $> TokLBrace
rbrace = char '}' $> TokRBrace
lparen = char '(' $> TokLParen
rparen = char ')' $> TokRParen
lambda = (char '\\' <|> char 'λ') $> TokLambda
arrow = string "->" $> TokArrow
_case = string "case" $> TokCase
_of = string "of" $> TokOf
_let = string "let" $> TokLet
letrec = string "letrec" $> TokLetRec
_in = string "in" $> TokIn
cName = TokCName <$> ((:) <$> cNameHead <*> properNameTail)
where cNameHead = satisfy isUpper
name = some (satisfy p) <&> TokName
where p c = not (isSpace c) && c `notElem` ";{}"
properName :: CoreLexer Name
properName = (:) <$> nameHead <*> properNameTail
where nameHead = satisfy isLetter
properNameTail :: CoreLexer Name
properNameTail = many . satisfy $ \c ->
isLetter c || isDigit c || c == '_'

View File

@@ -3,4 +3,11 @@ module Core.Parse
)
where
----------------------------------------------------------------------------------
import Control.Parser
import Core.Lex
import Core.Syntax
----------------------------------------------------------------------------------
parseCore :: [CoreToken] -> Result Program
parseCore = undefined