more correct lexer

This commit is contained in:
crumbtoo
2023-11-21 17:59:11 -07:00
parent 00a265fda1
commit 878e92395a
5 changed files with 204 additions and 54 deletions

View File

@@ -1,10 +1,71 @@
{-# LANGUAGE GeneralisedNewtypeDeriving #-}
{-# LANGUAGE TemplateHaskell #-}
module Compiler.RLPC
( RLPC(..)
, RLPCOptions(RLPCOptions)
, addFatal
, addWound
, Severity(..)
, SrcError(..)
, evalRLPCT
, evalRLPC
)
where
----------------------------------------------------------------------------------
import Control.Arrow ((>>>))
import Control.Monad.Reader
import Control.Monad.Errorful
import Data.Functor.Identity
import Data.Coerce
import Lens.Micro
import Lens.Micro.TH
----------------------------------------------------------------------------------
-- TODO: fancy errors
newtype RLPC a = RLPC { runRLPC :: Either String a }
deriving (Functor, Applicative, Monad)
newtype RLPCT e m a = RLPC {
runRLPCT :: ReaderT RLPCOptions (ErrorfulT (SrcError e) m) a
}
deriving (Functor, Applicative, Monad, MonadReader RLPCOptions)
type RLPC e = RLPCT e Identity
evalRLPCT :: RLPCOptions
-> RLPCT e m a
-> m (Either (SrcError e) (a, [SrcError e]))
evalRLPCT o = runRLPCT >>> flip runReaderT o >>> runErrorfulT
evalRLPC :: RLPCOptions
-> RLPC e a
-> Either (SrcError e) (a, [SrcError e])
evalRLPC o m = coerce $ evalRLPCT o m
data RLPCOptions = RLPCOptions
data SrcError e = SrcError
{ _errLocation :: (Int, Int)
, _errSeverity :: Severity
, _errDiagnostic :: e
}
deriving instance (Show e) => Show (SrcError e)
data Severity = Error
| Warning
| Debug
deriving Show
-- temporary until we have a new doc building system
type ErrorDoc = String
class Diagnostic e where
errorDoc :: e -> ErrorDoc
-- makeLenses ''RLPCOptions
makeLenses ''SrcError
pure []
instance MonadErrorful (SrcError e) (RLPC e) where
addWound = RLPC . lift . addWound
addFatal = RLPC . lift . addFatal

View File

@@ -0,0 +1,62 @@
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TupleSections, PatternSynonyms #-}
module Control.Monad.Errorful
( ErrorfulT
, runErrorfulT
, Errorful
, runErrorful
, MonadErrorful(..)
)
where
----------------------------------------------------------------------------------
import Control.Monad.Trans
import Control.Monad.Except
import Data.Functor.Identity
import Data.Coerce
import Lens.Micro
----------------------------------------------------------------------------------
newtype ErrorfulT e m a = ErrorfulT { runErrorfulT :: m (Either e (a, [e])) }
type Errorful e = ErrorfulT e Identity
pattern Errorful :: (Either e (a, [e])) -> Errorful e a
pattern Errorful a = ErrorfulT (Identity a)
runErrorful :: Errorful e a -> Either e (a, [e])
runErrorful m = coerce (runErrorfulT m)
class (Applicative m) => MonadErrorful e m | m -> e where
addWound :: e -> m ()
addFatal :: e -> m a
-- not sure if i want to add this yet...
-- catchWound :: m a -> (e -> m a) -> m a
instance (Applicative m) => MonadErrorful e (ErrorfulT e m) where
addWound e = ErrorfulT $ pure . Right $ ((), [e])
addFatal e = ErrorfulT $ pure . Left $ e
instance MonadTrans (ErrorfulT e) where
lift m = ErrorfulT (Right . (,[]) <$> m)
instance (Functor m) => Functor (ErrorfulT e m) where
fmap f (ErrorfulT m) = ErrorfulT $ fmap (_1 %~ f) <$> m
instance (Applicative m) => Applicative (ErrorfulT e m) where
pure a = ErrorfulT (pure . Right $ (a, []))
m <*> a = ErrorfulT (m' `apply` a')
where
m' = runErrorfulT m
a' = runErrorfulT a
-- TODO: strict concatenation
apply = liftA2 $ liftA2 (\ (f,e1) (x,e2) -> (f x, e1 ++ e2))
instance (Monad m) => Monad (ErrorfulT e m) where
ErrorfulT m >>= k = ErrorfulT $ do
m' <- m
case m' of
Right (a,es) -> runErrorfulT (k a)
Left e -> pure (Left e)

View File

@@ -1,16 +1,19 @@
{
-- TODO: layout semicolons are not inserted at EOf.
{-# LANGUAGE TemplateHaskell #-}
module Core.Lex
( lexCore
, lexCore'
, CoreToken(..)
, lexTmp
, ParserError
, ParseError(..)
, Located(..)
, AlexPosn(..)
)
where
import Data.Char (chr)
import Debug.Trace
import Core.Syntax
import Compiler.RLPC
import Lens.Micro
import Lens.Micro.TH
}
@@ -184,9 +187,6 @@ getContext = do
type Lexer = AlexInput -> Int -> Alex (Located CoreToken)
alexEOF :: Alex (Located CoreToken)
alexEOF = Alex $ \ st@(AlexState { alex_pos = p }) -> Right (st, Located p TokenEOF)
alexInitUserState :: AlexUserState
alexInitUserState = AlexUserState [Layout 1]
@@ -200,13 +200,29 @@ lexStream = do
Located _ TokenEOF -> pure [l]
_ -> (l:) <$> lexStream
lexCore :: String -> Either String [Located CoreToken]
lexCore s = runAlex s (alexSetStartCode 0 *> lexStream)
-- | The main lexer driver.
lexCore :: String -> RLPC ParseError [Located CoreToken]
lexCore s = case m of
Left e -> addFatal err
where err = SrcError
{ _errLocation = undefined -- TODO: location
, _errSeverity = Error
, _errDiagnostic = ParErrLexical e
}
Right ts -> pure ts
where
m = runAlex s (alexSetStartCode 0 *> lexStream)
lexCore' :: String -> Either String [CoreToken]
-- | @lexCore@, but the tokens are stripped of location info. Useful for
-- debugging
lexCore' :: String -> RLPC ParseError [CoreToken]
lexCore' s = fmap f <$> lexCore s
where f (Located _ t) = t
data ParseError = ParErrLexical String
| ParErrParse
deriving Show
lexWith :: (String -> CoreToken) -> Lexer
lexWith f (p,_,_,s) l = pure $ Located p (f $ take l s)
@@ -266,7 +282,6 @@ getOffside = do
doBol :: Lexer
doBol (p,c,_,s) l = do
off <- getOffside
col <- getSrcCol
case off of
LT -> insRBraceV p
EQ -> insSemi p
@@ -277,13 +292,7 @@ letin (p,_,_,_) l = do
popContext
pure $ Located p TokenIn
lexTmp :: IO [CoreToken]
lexTmp = do
s <- readFile "/tmp/t.hs"
case lexCore' s of
Left e -> error e
Right a -> pure a
data ParserError
alexEOF :: Alex (Located CoreToken)
alexEOF = Alex $ \ st@(AlexState { alex_pos = p }) -> Right (st, Located p TokenEOF)
}

View File

@@ -1,4 +1,3 @@
-- TODO: resolve shift/reduce conflicts
{
module Core.Parse
( parseCore
@@ -8,6 +7,7 @@ module Core.Parse
)
where
import Control.Monad ((>=>))
import Data.Foldable (foldl')
import Core.Syntax
import Core.Lex
@@ -16,31 +16,33 @@ import Compiler.RLPC
%name parseCore Module
%name parseCoreExpr Expr
%tokentype { CoreToken }
%tokentype { Located CoreToken }
%error { parseError }
%monad { RLPC }
%monad { RLPC ParseError }
%token
let { TokenLet }
letrec { TokenLetrec }
module { TokenModule }
where { TokenWhere }
',' { TokenComma }
in { TokenIn }
litint { TokenLitInt $$ }
varname { TokenVarName $$ }
varsym { TokenVarSym $$ }
conname { TokenConName $$ }
consym { TokenConSym $$ }
'λ' { TokenLambda }
'->' { TokenArrow }
'=' { TokenEquals }
'(' { TokenLParen }
')' { TokenRParen }
'{' { TokenLBrace }
'}' { TokenRBrace }
';' { TokenSemicolon }
eof { TokenEOF }
let { Located _ TokenLet }
letrec { Located _ TokenLetrec }
module { Located _ TokenModule }
where { Located _ TokenWhere }
',' { Located _ TokenComma }
in { Located _ TokenIn }
litint { Located _ (TokenLitInt $$) }
varname { Located _ (TokenVarName $$) }
varsym { Located _ (TokenVarSym $$) }
conname { Located _ (TokenConName $$) }
consym { Located _ (TokenConSym $$) }
'λ' { Located _ TokenLambda }
'->' { Located _ TokenArrow }
'=' { Located _ TokenEquals }
'(' { Located _ TokenLParen }
')' { Located _ TokenRParen }
'{' { Located _ TokenLBrace }
'}' { Located _ TokenRBrace }
vl { Located _ TokenLBraceV }
vr { Located _ TokenRBraceV }
';' { Located _ TokenSemicolon }
eof { Located _ TokenEOF }
%%
@@ -53,7 +55,14 @@ Eof : eof { () }
| error { () }
Program :: { Program }
Program : '{' ScDefs Close { Program $2 }
Program : VOpen ScDefs VClose { Program $2 }
VOpen :: { () }
VOpen : vl { () }
VClose :: { () }
VClose : vr { () }
| error { () }
ScDefs :: { [ScDef] }
ScDefs : ScDef ';' ScDefs { $1 : $3 }
@@ -67,15 +76,16 @@ ParList : Var ParList { $1 : $2 }
| {- epsilon -} { [] }
Expr :: { Expr }
Expr : let '{' Bindings Close in Expr { Let NonRec $3 $6 }
| letrec '{' Bindings Close in Expr { Let Rec $3 $6 }
Expr : LetExpr { $1 }
| 'λ' Binders '->' Expr { Lam $2 $4 }
| Application { $1 }
| Expr1 { $1 }
Close :: { () }
Close : '}' { () }
| error { () }
LetExpr :: { Expr }
LetExpr : let VOpen Bindings VClose in Expr { Let NonRec $3 $6 }
| letrec VOpen Bindings VClose in Expr { Let Rec $3 $6 }
| let '{' Bindings '}' in Expr { Let NonRec $3 $6 }
| letrec '{' Bindings '}' in Expr { Let Rec $3 $6 }
Binders :: { [Name] }
Binders : Var Binders { $1 : $2 }
@@ -115,14 +125,21 @@ Con : '(' consym ')' { $2 }
| conname { $1 }
{
parseError :: [CoreToken] -> a
parseError ts = error $ "parse error at token: " <> show (head ts)
parseError :: [Located CoreToken] -> RLPC ParseError a
parseError (Located (AlexPn _ x y) _ : _) = addFatal err
where err = SrcError
{ _errLocation = (x, y)
, _errSeverity = Error
, _errDiagnostic = ParErrParse
}
parseTmp :: IO (Module)
parseTmp :: IO Module
parseTmp = do
s <- readFile "/tmp/t.hs"
case lexCore' s >>= runRLPC . parseCore of
Left e -> error e
Right a -> pure a
case parse s of
Left e -> error (show e)
Right (ts,_) -> pure ts
where
parse = evalRLPC RLPCOptions . (lexCore >=> parseCore)
}