296 lines
10 KiB
Haskell
296 lines
10 KiB
Haskell
{-# LANGUAGE OverloadedStrings, LambdaCase, GeneralisedNewtypeDeriving #-}
|
|
module CoreSyntax
|
|
( ProgramSrc(..)
|
|
, congruentSrc
|
|
, unparseCoreProg
|
|
)
|
|
where
|
|
----------------------------------------------------------------------------------
|
|
import Core.Syntax
|
|
import Compiler.JustRun (justParseSrc)
|
|
import Control.Arrow ((>>>), (&&&))
|
|
import Control.Monad
|
|
import Data.List (intersperse)
|
|
import Data.Coerce (coerce)
|
|
import Data.Text (Text)
|
|
import Data.Text qualified as T
|
|
import Data.HashMap.Strict qualified as H
|
|
import Test.QuickCheck
|
|
import Text.PrettyPrint hiding ((<>))
|
|
import Data.Functor ((<&>))
|
|
import Data.Function ((&), on)
|
|
import Data.String (IsString(..))
|
|
import Lens.Micro.Platform
|
|
import Lens.Micro.Platform.Internal (IsText(..))
|
|
----------------------------------------------------------------------------------
|
|
|
|
newtype ProgramSrc = ProgramSrc Text
|
|
deriving (Show, Eq, Semigroup, Monoid, IsString)
|
|
|
|
instance Arbitrary ProgramSrc where
|
|
arbitrary = sized genProg where
|
|
|
|
genProg :: Int -> Gen ProgramSrc
|
|
genProg n = do
|
|
-- in generating a program, we create a random list of sc names and
|
|
-- assign them type signatures and definitions in random order.
|
|
ns <- replicateM n genName
|
|
-- generate a typesig and def for each name
|
|
ns & each %~ (genTySig &&& genScDef)
|
|
-- [(typesig, scdef)] -> [typesigs and scdefs]
|
|
& uncurry (++) . unzip
|
|
-- [Gen Text] -> Gen [Text]
|
|
& sequenceA
|
|
-- shuffle order of tysigs and scdefs
|
|
>>= shuffle
|
|
-- terminate each tysig and scdef with a semicolon with a blank
|
|
-- line for legibility
|
|
<&> intersperse ";\n\n"
|
|
-- mconcat into a single body of text
|
|
<&> mconcat
|
|
-- she's done! put a bow on her! :D
|
|
<&> ProgramSrc
|
|
|
|
genTySig :: Name -> Gen Text
|
|
genTySig n = conseq [pure n, ws, pure "::", ws, genTy]
|
|
|
|
genScDef :: Name -> Gen Text
|
|
genScDef n = conseq [pure n, ws, pure "=", ws, genExpr]
|
|
|
|
genExpr :: Gen Text
|
|
genExpr = gen 4 0 where
|
|
gen 0 _ = oneof
|
|
[ genVar
|
|
, genLit
|
|
]
|
|
gen n p = oneof
|
|
[ gen 0 p
|
|
, wrapParens <$> gen n' 0
|
|
, genApp n p
|
|
, genLet n p
|
|
, genLam n p
|
|
, genCase n p
|
|
]
|
|
where n' = next n
|
|
|
|
genVar = oneof
|
|
[ genName
|
|
, genCon
|
|
, wrapParens <$> genSymName
|
|
, wrapParens <$> genSymCon
|
|
]
|
|
|
|
genCase n p = conseq [ pure "case", ws1, gen n' 0, ws1, pure "of"
|
|
, pure "{", alts, pure "}"
|
|
]
|
|
<&> pprec 0 p
|
|
where
|
|
n' = next n
|
|
alts = chooseSize (1,6) (listOf1 alt)
|
|
<&> intersperse ";"
|
|
<&> mconcat
|
|
alt = conseq [ tag, ws, pure "->", ws1, gen n' 0 ]
|
|
tag = T.pack . show <$> chooseInt (0,maxBound)
|
|
|
|
genLit = T.pack . show <$> chooseInt (0,maxBound)
|
|
|
|
genApp n p = chooseSize (2,10) (listOf1 (gen n' 1))
|
|
<&> pprec 0 p . mconcat . intersperse " "
|
|
where
|
|
n' = next n
|
|
|
|
genLet n p = conseq [ letw, ws, pure "{", ws, binds
|
|
, ws, pure "}", ws, pure "in"
|
|
, ws1, gen n' 0
|
|
]
|
|
where
|
|
letw = arbitrary <&> \case
|
|
Rec -> "letrec"
|
|
NonRec -> "let"
|
|
binds = chooseSize (1,6) (listOf1 bind)
|
|
<&> intersperse ";"
|
|
<&> mconcat
|
|
bind = conseq [var, ws, pure "=", ws, gen n' 0]
|
|
var = oneof [genName, wrapParens <$> genSymName]
|
|
n' = next n
|
|
|
|
genLam n p = conseq [l, ws, bs, pure "->", ws, gen n' 0]
|
|
<&> pprec 0 p
|
|
where
|
|
-- whitespace because reserved op shenanigans :3
|
|
l = elements [" \\ ", "λ"]
|
|
n' = next n
|
|
bs = chooseSize (0,6) (listOf1 genName)
|
|
<&> mconcat
|
|
|
|
next = (`div` 2)
|
|
|
|
genTy :: Gen Text
|
|
genTy = gen 4 where
|
|
gen 0 = genCon
|
|
gen n = oneof
|
|
[ gen 0
|
|
-- function types
|
|
, conseq [gen n', ws, pure "->", ws, gen n']
|
|
-- TODO: type applications (remember precedence lol)
|
|
]
|
|
where n' = n `div` 2
|
|
|
|
instance Arbitrary Rec where
|
|
arbitrary = elements [Rec,NonRec]
|
|
|
|
chooseSize :: (Int, Int) -> Gen a -> Gen a
|
|
chooseSize (a,b) g = do
|
|
n <- chooseInt (a,b)
|
|
resize n g
|
|
|
|
-- | @pprec q p s@ wraps @s@ with parens when @p <= q@
|
|
pprec :: (IsString a, Monoid a) => Int -> Int -> a -> a
|
|
pprec maxp p
|
|
| p <= maxp = id
|
|
| otherwise = wrapParens
|
|
|
|
wrapParens :: (IsString a, Monoid a) => a -> a
|
|
wrapParens t = "(" <> t <> ")"
|
|
|
|
conseq :: (Applicative f, Monoid m, Traversable t)
|
|
=> t (f m)
|
|
-> f m
|
|
conseq tfm = sequenceA tfm <&> the_cool_kid's_concat
|
|
-- me when `concat` is generalised in the container but specialised in the
|
|
-- value, and `mconcat` is specialised in the container but generalised in
|
|
-- the value. shoutout `foldMap id`
|
|
where the_cool_kid's_concat = foldMap id
|
|
|
|
genName :: Gen Name
|
|
genName = T.pack <$> liftA2 (:) small namechars where
|
|
small = elements ['a'..'z']
|
|
|
|
genCon :: Gen Name
|
|
genCon = T.pack <$> liftA2 (:) large namechars where
|
|
large = elements ['A'..'Z']
|
|
|
|
genSymName :: Gen Name
|
|
genSymName = T.pack <$> liftA2 (:) symbol symchars where
|
|
symbol = elements nameSymbols
|
|
|
|
genSymCon :: Gen Name
|
|
genSymCon = T.pack . (':' :) <$> symchars
|
|
|
|
namechars :: Gen String
|
|
namechars = liftArbitrary namechar where
|
|
namechar :: Gen Char
|
|
namechar = elements $ ['a'..'z'] <> ['A'..'Z'] <> ['0'..'9'] <> "'"
|
|
|
|
nameSymbols :: [Char]
|
|
nameSymbols = "!#$%&*+./<=>?@^|-~"
|
|
|
|
symchars :: Gen String
|
|
symchars = liftArbitrary symchar where
|
|
symchar = elements $ ':' : nameSymbols
|
|
|
|
txt :: (IsText t) => t -> Doc
|
|
txt t = t ^. unpacked & text
|
|
|
|
ws :: (IsString a) => Gen a
|
|
ws = elements [""," ", " "]
|
|
|
|
ws1 :: (IsString a) => Gen a
|
|
ws1 = elements [" ", " "]
|
|
|
|
----------------------------------------------------------------------------------
|
|
|
|
-- | Two bodies of source code are considered congruent iff the parser produces
|
|
-- identical ASTs for both.
|
|
congruentSrc :: ProgramSrc -> ProgramSrc -> Bool
|
|
congruentSrc = (==) `on` (justParseSrc . T.unpack . coerce)
|
|
|
|
----------------------------------------------------------------------------------
|
|
|
|
-- | @unparseCoreProg@ should be inverse to @parseCoreProg@ up to source code
|
|
-- congruency, newtype coercion and errors handling.
|
|
unparseCoreProg :: Program' -> ProgramSrc
|
|
unparseCoreProg p = unparseTypeSigs (p ^. programTypeSigs)
|
|
<> unparseScDefs (p ^. programScDefs)
|
|
|
|
unparseTypeSigs :: H.HashMap Name Type -> ProgramSrc
|
|
unparseTypeSigs = H.foldrWithKey f mempty
|
|
where f k v a = unparseTypeSig k v <> ";\n\n" <> a
|
|
|
|
unparseTypeSig :: Name -> Type -> ProgramSrc
|
|
unparseTypeSig n t = unparseName n <> " :: " <> unparseType t
|
|
|
|
unparseName :: Name -> ProgramSrc
|
|
unparseName = coerce
|
|
|
|
unparseType :: Type -> ProgramSrc
|
|
unparseType = go 0 where
|
|
go :: Int -> Type -> ProgramSrc
|
|
-- (:->) is a special case of TyApp, but we want the infix syntax
|
|
go p (a :-> b) = a : assocFun b
|
|
<&> go 1
|
|
& coerce (T.intercalate " -> ")
|
|
& pprec 0 p
|
|
go p a@(TyApp f x) = assocApp a
|
|
<&> go 1
|
|
& coerce (T.intercalate " ")
|
|
& pprec 1 p
|
|
go _ TyFun = "(->)"
|
|
go _ (TyCon a) = unparseName a
|
|
go _ (TyVar a) = unparseName a
|
|
|
|
assocFun :: Type -> [Type]
|
|
assocFun (a :-> b) = a : assocFun b
|
|
assocFun x = [x]
|
|
|
|
assocApp :: Type -> [Type]
|
|
assocApp (TyApp f x) = assocApp f ++ [x]
|
|
assocApp x = [x]
|
|
|
|
unparseScDefs :: [ScDef'] -> ProgramSrc
|
|
unparseScDefs = foldr f mempty where
|
|
f sc a = unparseScDef sc <> ";\n\n" <> a
|
|
|
|
unparseScDef :: ScDef' -> ProgramSrc
|
|
unparseScDef (ScDef n as e) = (unparseName <$> (n:as)) <> ["=", unparseExpr e]
|
|
& coerce (T.intercalate " ")
|
|
|
|
unparseExpr :: Expr' -> ProgramSrc
|
|
unparseExpr = go 0 where
|
|
go :: Int -> Expr' -> ProgramSrc
|
|
go _ (Var n) = unparseName n
|
|
go _ (Con t a) = mconcat ["Pack{",srcShow t," ",srcShow a,"}"]
|
|
go _ (Lit l) = unparseLit l
|
|
go p a@(App _ _) = srci " " (go 1 <$> assocApp a)
|
|
& pprec 0 p
|
|
go p (Lam bs e) = "λ" <> srci " " (unparseName <$> bs)
|
|
<> " -> " <> go 0 e
|
|
& pprec 0 p
|
|
go p (Let r bs e) = mconcat [lw," { ",bs'," } in ",go 0 e]
|
|
where
|
|
lw = case r of { NonRec -> "let"; Rec -> "letrec" }
|
|
bs' = srci "; " $ unparseBinding <$> bs
|
|
go p (Case e as) = mconcat ["case ",go 0 e," of {",as',"}"]
|
|
& pprec 0 p
|
|
where as' = srci "; " (unparseAlter <$> as)
|
|
|
|
assocApp (App f x) = assocApp f ++ [x]
|
|
assocApp f = [f]
|
|
|
|
srci :: ProgramSrc -> [ProgramSrc] -> ProgramSrc
|
|
srci = coerce T.intercalate
|
|
|
|
unparseBinding :: Binding' -> ProgramSrc
|
|
unparseBinding (k := v) = mconcat [unparseName k, " = ", unparseExpr v]
|
|
|
|
unparseLit :: Lit -> ProgramSrc
|
|
unparseLit (IntL n) = srcShow n
|
|
|
|
srcShow :: (Show a) => a -> ProgramSrc
|
|
srcShow = coerce . T.pack . show
|
|
|
|
unparseAlter :: Alter' -> ProgramSrc
|
|
unparseAlter (Alter (AltData t) as e) = srcShow t <> " " <> coerce (T.unwords as)
|
|
<> " -> " <> unparseExpr e
|
|
|