lexer
This commit is contained in:
3
docs/src/commentary/parser.rst
Normal file
3
docs/src/commentary/parser.rst
Normal file
@@ -0,0 +1,3 @@
|
||||
Parser Combinators
|
||||
==================
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{-|
|
||||
Module : Control.Parser
|
||||
Description : The parser *object*
|
||||
Description : Parser combinators
|
||||
|
||||
This module implements an interface for parser *types*, used in lexical analysis
|
||||
and parsing. For the implementation of the rlp language's parser, see 'Parse'.
|
||||
@@ -8,13 +8,27 @@ and parsing. For the implementation of the rlp language's parser, see 'Parse'.
|
||||
{-# LANGUAGE GeneralisedNewtypeDeriving #-}
|
||||
{-# LANGUAGE BlockArguments, LambdaCase #-}
|
||||
module Control.Parser
|
||||
(
|
||||
( ParserT
|
||||
, runParserT
|
||||
|
||||
, satisfy
|
||||
, char
|
||||
, spaces
|
||||
, nl
|
||||
, surround
|
||||
, string
|
||||
|
||||
-- * Control.Applicative re-exports
|
||||
, (<|>)
|
||||
, many
|
||||
, some
|
||||
)
|
||||
where
|
||||
----------------------------------------------------------------------------------
|
||||
import Control.Applicative
|
||||
import Control.Arrow ((***))
|
||||
import Control.Monad
|
||||
import Data.Char
|
||||
----------------------------------------------------------------------------------
|
||||
|
||||
newtype ParserT i m o = ParserT { runParserT :: i -> m (i, o) }
|
||||
@@ -40,11 +54,35 @@ instance (Monad m) => Monad (ParserT i m) where
|
||||
(i',a) <- runParserT m i
|
||||
runParserT (k a) i'
|
||||
|
||||
instance (MonadFail m) => MonadFail (ParserT i m) where
|
||||
fail s = ParserT $ \i -> fail s
|
||||
----------------------------------------------------------------------------------
|
||||
|
||||
-- TODO: generalise to non-lists
|
||||
satisfy :: (MonadPlus m, Eq a) => a -> ParserT [a] m a
|
||||
satisfy c = ParserT $ \case
|
||||
(x:xs) | x == c -> pure (xs,x)
|
||||
satisfy :: (MonadPlus m) => (a -> Bool) -> ParserT [a] m a
|
||||
satisfy p = ParserT $ \case
|
||||
(x:xs) | p x -> pure (xs,x)
|
||||
_ -> empty
|
||||
|
||||
|
||||
char :: (MonadPlus m, Eq a) => a -> ParserT [a] m a
|
||||
char c = satisfy (==c)
|
||||
|
||||
string :: (MonadPlus m, Eq a) => [a] -> ParserT [a] m [a]
|
||||
string s = sequenceA $ char <$> s
|
||||
|
||||
----------------------------------------------------------------------------------
|
||||
|
||||
surround :: (MonadPlus m)
|
||||
=> ParserT i m l
|
||||
-> ParserT i m r
|
||||
-> ParserT i m c
|
||||
-> ParserT i m c
|
||||
surround l r c = l *> c <* r
|
||||
|
||||
spaces :: (MonadPlus m) => ParserT String m Int
|
||||
spaces = length <$> many (satisfy (==' '))
|
||||
|
||||
nl :: (MonadPlus m) => ParserT String m Int
|
||||
nl = undefined
|
||||
|
||||
|
||||
137
src/Core/Lex.hs
137
src/Core/Lex.hs
@@ -1,6 +1,141 @@
|
||||
{-|
|
||||
Module : Core.Lex
|
||||
Description : Core language lexer
|
||||
-}
|
||||
module Core.Lex
|
||||
(
|
||||
( CoreToken
|
||||
, Result
|
||||
, lexCore
|
||||
)
|
||||
where
|
||||
----------------------------------------------------------------------------------
|
||||
import Control.Parser
|
||||
import Control.Applicative
|
||||
import Control.Monad
|
||||
import Data.Char
|
||||
import Data.Functor
|
||||
import Core.Syntax (Name)
|
||||
----------------------------------------------------------------------------------
|
||||
|
||||
type CoreLexer = ParserT String Result
|
||||
|
||||
data Result a = Success a
|
||||
| Error String Int Int
|
||||
deriving (Show)
|
||||
|
||||
-- TODO: whitespace-sensitive layout
|
||||
data CoreToken = TokLitInt Int
|
||||
| TokEquals
|
||||
| TokLBrace
|
||||
| TokRBrace
|
||||
| TokSemicolon
|
||||
| TokLParen
|
||||
| TokRParen
|
||||
| TokLambda
|
||||
| TokArrow
|
||||
| TokCase
|
||||
| TokOf
|
||||
| TokLet
|
||||
| TokLetRec
|
||||
| TokIn
|
||||
| TokCName Name
|
||||
| TokName Name
|
||||
deriving (Show)
|
||||
|
||||
instance Functor Result where
|
||||
fmap f (Success a) = Success (f a)
|
||||
fmap _ (Error s l c) = Error s l c
|
||||
|
||||
instance Foldable Result where
|
||||
foldr f z (Success a) = a `f` z
|
||||
foldr _ z (Error _ _ _) = z
|
||||
|
||||
instance Traversable Result where
|
||||
traverse k (Success a) = fmap Success (k a)
|
||||
traverse _ (Error s l c) = pure $ Error s l c
|
||||
|
||||
instance Applicative Result where
|
||||
pure = Success
|
||||
|
||||
liftA2 f (Success a) (Success b) = Success $ f a b
|
||||
liftA2 _ (Error s l c) _ = Error s l c
|
||||
liftA2 _ _ (Error s l c) = Error s l c
|
||||
|
||||
instance Alternative Result where
|
||||
empty = Error "unknown failure" 0 0
|
||||
|
||||
(Success a) <|> _ = Success a
|
||||
_ <|> b = b
|
||||
|
||||
instance Monad Result where
|
||||
Success a >>= k = k a
|
||||
Error s l c >>= _ = Error s l c
|
||||
|
||||
instance MonadPlus Result
|
||||
|
||||
instance MonadFail Result where
|
||||
fail s = Error s 0 0
|
||||
|
||||
----------------------------------------------------------------------------------
|
||||
|
||||
lexCore :: String -> Result [CoreToken]
|
||||
lexCore = fmap snd . runParserT (many (token <* spaces))
|
||||
|
||||
token :: CoreLexer CoreToken
|
||||
token = litInt
|
||||
<|> lbrace
|
||||
<|> rbrace
|
||||
<|> semicolon
|
||||
<|> lparen
|
||||
<|> rparen
|
||||
<|> equals
|
||||
<|> lambda
|
||||
<|> arrow
|
||||
<|> _case
|
||||
<|> _of
|
||||
<|> _let
|
||||
<|> letrec
|
||||
<|> _in
|
||||
<|> cName
|
||||
<|> name
|
||||
|
||||
----------------------------------------------------------------------------------
|
||||
|
||||
litInt, equals, lparen, rparen, lambda,
|
||||
arrow, _case, _of, _let, letrec, _in, cName, name :: CoreLexer CoreToken
|
||||
|
||||
litInt = TokLitInt . value <$> some (satisfy isDigit)
|
||||
where
|
||||
value = foldl (\acc a -> 10*acc + digitToInt a) 0
|
||||
|
||||
semicolon = (semis <|> nls) $> TokSemicolon
|
||||
where
|
||||
nls = head <$> some (char '\n')
|
||||
semis = char ';' <* many (char '\n')
|
||||
equals = char '=' $> TokEquals
|
||||
lbrace = char '{' $> TokLBrace
|
||||
rbrace = char '}' $> TokRBrace
|
||||
lparen = char '(' $> TokLParen
|
||||
rparen = char ')' $> TokRParen
|
||||
lambda = (char '\\' <|> char 'λ') $> TokLambda
|
||||
arrow = string "->" $> TokArrow
|
||||
_case = string "case" $> TokCase
|
||||
_of = string "of" $> TokOf
|
||||
_let = string "let" $> TokLet
|
||||
letrec = string "letrec" $> TokLetRec
|
||||
_in = string "in" $> TokIn
|
||||
|
||||
cName = TokCName <$> ((:) <$> cNameHead <*> properNameTail)
|
||||
where cNameHead = satisfy isUpper
|
||||
|
||||
name = some (satisfy p) <&> TokName
|
||||
where p c = not (isSpace c) && c `notElem` ";{}"
|
||||
|
||||
properName :: CoreLexer Name
|
||||
properName = (:) <$> nameHead <*> properNameTail
|
||||
where nameHead = satisfy isLetter
|
||||
|
||||
properNameTail :: CoreLexer Name
|
||||
properNameTail = many . satisfy $ \c ->
|
||||
isLetter c || isDigit c || c == '_'
|
||||
|
||||
|
||||
@@ -3,4 +3,11 @@ module Core.Parse
|
||||
)
|
||||
where
|
||||
----------------------------------------------------------------------------------
|
||||
import Control.Parser
|
||||
import Core.Lex
|
||||
import Core.Syntax
|
||||
----------------------------------------------------------------------------------
|
||||
|
||||
parseCore :: [CoreToken] -> Result Program
|
||||
parseCore = undefined
|
||||
|
||||
|
||||
Reference in New Issue
Block a user