more correct lexer

This commit is contained in:
crumbtoo
2023-11-21 17:59:11 -07:00
parent 00a265fda1
commit 878e92395a
5 changed files with 204 additions and 54 deletions

View File

@@ -35,6 +35,7 @@ library
, containers , containers
, microlens , microlens
, microlens-th , microlens-th
, mtl
, template-haskell , template-haskell
-- required for happy -- required for happy
, array , array

View File

@@ -1,10 +1,71 @@
{-# LANGUAGE GeneralisedNewtypeDeriving #-} {-# LANGUAGE GeneralisedNewtypeDeriving #-}
{-# LANGUAGE TemplateHaskell #-}
module Compiler.RLPC module Compiler.RLPC
( RLPC(..) ( RLPC(..)
, RLPCOptions(RLPCOptions)
, addFatal
, addWound
, Severity(..)
, SrcError(..)
, evalRLPCT
, evalRLPC
) )
where where
----------------------------------------------------------------------------------
import Control.Arrow ((>>>))
import Control.Monad.Reader
import Control.Monad.Errorful
import Data.Functor.Identity
import Data.Coerce
import Lens.Micro
import Lens.Micro.TH
----------------------------------------------------------------------------------
-- TODO: fancy errors -- TODO: fancy errors
newtype RLPC a = RLPC { runRLPC :: Either String a } newtype RLPCT e m a = RLPC {
deriving (Functor, Applicative, Monad) runRLPCT :: ReaderT RLPCOptions (ErrorfulT (SrcError e) m) a
}
deriving (Functor, Applicative, Monad, MonadReader RLPCOptions)
type RLPC e = RLPCT e Identity
evalRLPCT :: RLPCOptions
-> RLPCT e m a
-> m (Either (SrcError e) (a, [SrcError e]))
evalRLPCT o = runRLPCT >>> flip runReaderT o >>> runErrorfulT
evalRLPC :: RLPCOptions
-> RLPC e a
-> Either (SrcError e) (a, [SrcError e])
evalRLPC o m = coerce $ evalRLPCT o m
data RLPCOptions = RLPCOptions
data SrcError e = SrcError
{ _errLocation :: (Int, Int)
, _errSeverity :: Severity
, _errDiagnostic :: e
}
deriving instance (Show e) => Show (SrcError e)
data Severity = Error
| Warning
| Debug
deriving Show
-- temporary until we have a new doc building system
type ErrorDoc = String
class Diagnostic e where
errorDoc :: e -> ErrorDoc
-- makeLenses ''RLPCOptions
makeLenses ''SrcError
pure []
instance MonadErrorful (SrcError e) (RLPC e) where
addWound = RLPC . lift . addWound
addFatal = RLPC . lift . addFatal

View File

@@ -0,0 +1,62 @@
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TupleSections, PatternSynonyms #-}
module Control.Monad.Errorful
( ErrorfulT
, runErrorfulT
, Errorful
, runErrorful
, MonadErrorful(..)
)
where
----------------------------------------------------------------------------------
import Control.Monad.Trans
import Control.Monad.Except
import Data.Functor.Identity
import Data.Coerce
import Lens.Micro
----------------------------------------------------------------------------------
newtype ErrorfulT e m a = ErrorfulT { runErrorfulT :: m (Either e (a, [e])) }
type Errorful e = ErrorfulT e Identity
pattern Errorful :: (Either e (a, [e])) -> Errorful e a
pattern Errorful a = ErrorfulT (Identity a)
runErrorful :: Errorful e a -> Either e (a, [e])
runErrorful m = coerce (runErrorfulT m)
class (Applicative m) => MonadErrorful e m | m -> e where
addWound :: e -> m ()
addFatal :: e -> m a
-- not sure if i want to add this yet...
-- catchWound :: m a -> (e -> m a) -> m a
instance (Applicative m) => MonadErrorful e (ErrorfulT e m) where
addWound e = ErrorfulT $ pure . Right $ ((), [e])
addFatal e = ErrorfulT $ pure . Left $ e
instance MonadTrans (ErrorfulT e) where
lift m = ErrorfulT (Right . (,[]) <$> m)
instance (Functor m) => Functor (ErrorfulT e m) where
fmap f (ErrorfulT m) = ErrorfulT $ fmap (_1 %~ f) <$> m
instance (Applicative m) => Applicative (ErrorfulT e m) where
pure a = ErrorfulT (pure . Right $ (a, []))
m <*> a = ErrorfulT (m' `apply` a')
where
m' = runErrorfulT m
a' = runErrorfulT a
-- TODO: strict concatenation
apply = liftA2 $ liftA2 (\ (f,e1) (x,e2) -> (f x, e1 ++ e2))
instance (Monad m) => Monad (ErrorfulT e m) where
ErrorfulT m >>= k = ErrorfulT $ do
m' <- m
case m' of
Right (a,es) -> runErrorfulT (k a)
Left e -> pure (Left e)

View File

@@ -1,16 +1,19 @@
{ {
-- TODO: layout semicolons are not inserted at EOf.
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
module Core.Lex module Core.Lex
( lexCore ( lexCore
, lexCore' , lexCore'
, CoreToken(..) , CoreToken(..)
, lexTmp , ParseError(..)
, ParserError , Located(..)
, AlexPosn(..)
) )
where where
import Data.Char (chr) import Data.Char (chr)
import Debug.Trace import Debug.Trace
import Core.Syntax import Core.Syntax
import Compiler.RLPC
import Lens.Micro import Lens.Micro
import Lens.Micro.TH import Lens.Micro.TH
} }
@@ -184,9 +187,6 @@ getContext = do
type Lexer = AlexInput -> Int -> Alex (Located CoreToken) type Lexer = AlexInput -> Int -> Alex (Located CoreToken)
alexEOF :: Alex (Located CoreToken)
alexEOF = Alex $ \ st@(AlexState { alex_pos = p }) -> Right (st, Located p TokenEOF)
alexInitUserState :: AlexUserState alexInitUserState :: AlexUserState
alexInitUserState = AlexUserState [Layout 1] alexInitUserState = AlexUserState [Layout 1]
@@ -200,13 +200,29 @@ lexStream = do
Located _ TokenEOF -> pure [l] Located _ TokenEOF -> pure [l]
_ -> (l:) <$> lexStream _ -> (l:) <$> lexStream
lexCore :: String -> Either String [Located CoreToken] -- | The main lexer driver.
lexCore s = runAlex s (alexSetStartCode 0 *> lexStream) lexCore :: String -> RLPC ParseError [Located CoreToken]
lexCore s = case m of
Left e -> addFatal err
where err = SrcError
{ _errLocation = undefined -- TODO: location
, _errSeverity = Error
, _errDiagnostic = ParErrLexical e
}
Right ts -> pure ts
where
m = runAlex s (alexSetStartCode 0 *> lexStream)
lexCore' :: String -> Either String [CoreToken] -- | @lexCore@, but the tokens are stripped of location info. Useful for
-- debugging
lexCore' :: String -> RLPC ParseError [CoreToken]
lexCore' s = fmap f <$> lexCore s lexCore' s = fmap f <$> lexCore s
where f (Located _ t) = t where f (Located _ t) = t
data ParseError = ParErrLexical String
| ParErrParse
deriving Show
lexWith :: (String -> CoreToken) -> Lexer lexWith :: (String -> CoreToken) -> Lexer
lexWith f (p,_,_,s) l = pure $ Located p (f $ take l s) lexWith f (p,_,_,s) l = pure $ Located p (f $ take l s)
@@ -266,7 +282,6 @@ getOffside = do
doBol :: Lexer doBol :: Lexer
doBol (p,c,_,s) l = do doBol (p,c,_,s) l = do
off <- getOffside off <- getOffside
col <- getSrcCol
case off of case off of
LT -> insRBraceV p LT -> insRBraceV p
EQ -> insSemi p EQ -> insSemi p
@@ -277,13 +292,7 @@ letin (p,_,_,_) l = do
popContext popContext
pure $ Located p TokenIn pure $ Located p TokenIn
lexTmp :: IO [CoreToken] alexEOF :: Alex (Located CoreToken)
lexTmp = do alexEOF = Alex $ \ st@(AlexState { alex_pos = p }) -> Right (st, Located p TokenEOF)
s <- readFile "/tmp/t.hs"
case lexCore' s of
Left e -> error e
Right a -> pure a
data ParserError
} }

View File

@@ -1,4 +1,3 @@
-- TODO: resolve shift/reduce conflicts
{ {
module Core.Parse module Core.Parse
( parseCore ( parseCore
@@ -8,6 +7,7 @@ module Core.Parse
) )
where where
import Control.Monad ((>=>))
import Data.Foldable (foldl') import Data.Foldable (foldl')
import Core.Syntax import Core.Syntax
import Core.Lex import Core.Lex
@@ -16,31 +16,33 @@ import Compiler.RLPC
%name parseCore Module %name parseCore Module
%name parseCoreExpr Expr %name parseCoreExpr Expr
%tokentype { CoreToken } %tokentype { Located CoreToken }
%error { parseError } %error { parseError }
%monad { RLPC } %monad { RLPC ParseError }
%token %token
let { TokenLet } let { Located _ TokenLet }
letrec { TokenLetrec } letrec { Located _ TokenLetrec }
module { TokenModule } module { Located _ TokenModule }
where { TokenWhere } where { Located _ TokenWhere }
',' { TokenComma } ',' { Located _ TokenComma }
in { TokenIn } in { Located _ TokenIn }
litint { TokenLitInt $$ } litint { Located _ (TokenLitInt $$) }
varname { TokenVarName $$ } varname { Located _ (TokenVarName $$) }
varsym { TokenVarSym $$ } varsym { Located _ (TokenVarSym $$) }
conname { TokenConName $$ } conname { Located _ (TokenConName $$) }
consym { TokenConSym $$ } consym { Located _ (TokenConSym $$) }
'λ' { TokenLambda } 'λ' { Located _ TokenLambda }
'->' { TokenArrow } '->' { Located _ TokenArrow }
'=' { TokenEquals } '=' { Located _ TokenEquals }
'(' { TokenLParen } '(' { Located _ TokenLParen }
')' { TokenRParen } ')' { Located _ TokenRParen }
'{' { TokenLBrace } '{' { Located _ TokenLBrace }
'}' { TokenRBrace } '}' { Located _ TokenRBrace }
';' { TokenSemicolon } vl { Located _ TokenLBraceV }
eof { TokenEOF } vr { Located _ TokenRBraceV }
';' { Located _ TokenSemicolon }
eof { Located _ TokenEOF }
%% %%
@@ -53,7 +55,14 @@ Eof : eof { () }
| error { () } | error { () }
Program :: { Program } Program :: { Program }
Program : '{' ScDefs Close { Program $2 } Program : VOpen ScDefs VClose { Program $2 }
VOpen :: { () }
VOpen : vl { () }
VClose :: { () }
VClose : vr { () }
| error { () }
ScDefs :: { [ScDef] } ScDefs :: { [ScDef] }
ScDefs : ScDef ';' ScDefs { $1 : $3 } ScDefs : ScDef ';' ScDefs { $1 : $3 }
@@ -67,15 +76,16 @@ ParList : Var ParList { $1 : $2 }
| {- epsilon -} { [] } | {- epsilon -} { [] }
Expr :: { Expr } Expr :: { Expr }
Expr : let '{' Bindings Close in Expr { Let NonRec $3 $6 } Expr : LetExpr { $1 }
| letrec '{' Bindings Close in Expr { Let Rec $3 $6 }
| 'λ' Binders '->' Expr { Lam $2 $4 } | 'λ' Binders '->' Expr { Lam $2 $4 }
| Application { $1 } | Application { $1 }
| Expr1 { $1 } | Expr1 { $1 }
Close :: { () } LetExpr :: { Expr }
Close : '}' { () } LetExpr : let VOpen Bindings VClose in Expr { Let NonRec $3 $6 }
| error { () } | letrec VOpen Bindings VClose in Expr { Let Rec $3 $6 }
| let '{' Bindings '}' in Expr { Let NonRec $3 $6 }
| letrec '{' Bindings '}' in Expr { Let Rec $3 $6 }
Binders :: { [Name] } Binders :: { [Name] }
Binders : Var Binders { $1 : $2 } Binders : Var Binders { $1 : $2 }
@@ -115,14 +125,21 @@ Con : '(' consym ')' { $2 }
| conname { $1 } | conname { $1 }
{ {
parseError :: [CoreToken] -> a parseError :: [Located CoreToken] -> RLPC ParseError a
parseError ts = error $ "parse error at token: " <> show (head ts) parseError (Located (AlexPn _ x y) _ : _) = addFatal err
where err = SrcError
parseTmp :: IO (Module) { _errLocation = (x, y)
parseTmp = do , _errSeverity = Error
s <- readFile "/tmp/t.hs" , _errDiagnostic = ParErrParse
case lexCore' s >>= runRLPC . parseCore of }
Left e -> error e
Right a -> pure a parseTmp :: IO Module
parseTmp = do
s <- readFile "/tmp/t.hs"
case parse s of
Left e -> error (show e)
Right (ts,_) -> pure ts
where
parse = evalRLPC RLPCOptions . (lexCore >=> parseCore)
} }