From 7ef3bf108289a87ff771245a05727dae8e9617c9 Mon Sep 17 00:00:00 2001 From: crumbtoo Date: Mon, 4 Dec 2023 11:21:00 -0700 Subject: [PATCH] prim arith hooray --- docs/src/references/gm-state-transitions.rst | 643 ++++++++++--------- src/GM.hs | 236 +++++-- 2 files changed, 518 insertions(+), 361 deletions(-) diff --git a/docs/src/references/gm-state-transitions.rst b/docs/src/references/gm-state-transitions.rst index ef4daf9..ac39602 100644 --- a/docs/src/references/gm-state-transitions.rst +++ b/docs/src/references/gm-state-transitions.rst @@ -6,296 +6,345 @@ G-Machine State Transition Rules Core Transition Rules ********************* -1. Lookup a global by name and push its value onto the stack +#. Lookup a global by name and push its value onto the stack -.. math:: - \gmrule - { \mathtt{PushGlobal} \; f : i - & s - & d - & h - & m - \begin{bmatrix} - f : a - \end{bmatrix} - } - { i - & a : s - & d - & h - & m - } + .. math:: + \gmrule + { \mathtt{PushGlobal} \; f : i + & s + & d + & h + & m + \begin{bmatrix} + f : a + \end{bmatrix} + } + { i + & a : s + & d + & h + & m + } -2. Allocate an int node on the heap, and push the address of the newly created +#. Allocate an int node on the heap, and push the address of the newly created node onto the stack -.. math:: - \gmrule - { \mathtt{PushInt} \; n : i - & s - & d - & h - & m - } - { i - & a : s - & d - & h - \begin{bmatrix} - a : \mathtt{NNum} \; n - \end{bmatrix} - & m - } + .. math:: + \gmrule + { \mathtt{PushInt} \; n : i + & s + & d + & h + & m + } + { i + & a : s + & d + & h + \begin{bmatrix} + a : \mathtt{NNum} \; n + \end{bmatrix} + & m + } -3. Allocate an application node on the heap, applying the top of the stack to +#. Allocate an application node on the heap, applying the top of the stack to the address directly below it. The address of the application node is pushed onto the stack. -.. math:: - \gmrule - { \mathtt{MkAp} : i - & f : x : s - & d - & h - & m - } - { i - & a : s - & d - & h - \begin{bmatrix} - a : \mathtt{NAp} \; f \; x - \end{bmatrix} - & m - } + .. math:: + \gmrule + { \mathtt{MkAp} : i + & f : x : s + & d + & h + & m + } + { i + & a : s + & d + & h + \begin{bmatrix} + a : \mathtt{NAp} \; f \; x + \end{bmatrix} + & m + } -4. Push a function's argument onto the stack +#. Push a function's argument onto the stack -.. math:: - \gmrule - { \mathtt{Push} \; n : i - & a_0 : \ldots : a_n : s - & d - & h - & m - } - { i - & a_n : a_0 : \ldots : a_n : s - & d - & h - & m - } + .. math:: + \gmrule + { \mathtt{Push} \; n : i + & a_0 : \ldots : a_n : s + & d + & h + & m + } + { i + & a_n : a_0 : \ldots : a_n : s + & d + & h + & m + } -5. Tidy up the stack after instantiating a supercombinator +#. Tidy up the stack after instantiating a supercombinator -.. math:: - \gmrule - { \mathtt{Slide} \; n : i - & a_0 : \ldots : a_n : s - & d - & h - & m - } - { i - & a_0 : s - & d - & h - & m - } + .. math:: + \gmrule + { \mathtt{Slide} \; n : i + & a_0 : \ldots : a_n : s + & d + & h + & m + } + { i + & a_0 : s + & d + & h + & m + } -6. If a number is on top of the stack, :code:`Unwind` leaves the machine in a - halt state +#. If the top of the stack is in WHNF (currently this just means a number) is on + top of the stack, :code:`Unwind` considers evaluation complete. In the case + where the dump is **not** empty, the instruction queue and stack is restored + from the top. -.. math:: - \gmrule - { \mathtt{Unwind} : \nillist - & a : s - & d - & h - \begin{bmatrix} - a : \mathtt{NNum} \; n - \end{bmatrix} - & m - } - { \nillist - & a : s - & d - & h - & m - } + .. math:: + \gmrule + { \mathtt{Unwind} : \nillist + & a : s + & \langle i', s' \rangle : d + & h + \begin{bmatrix} + a : \mathtt{NNum} \; n + \end{bmatrix} + & m + } + { i' + & a : s' + & d + & h + & m + } -7. If an application is on top of the stack, :code:`Unwind` continues unwinding +#. Bulding on the previous rule, in the case where the dump **is** empty, leave + the machine in a halt state (i.e. with an empty instruction queue). -.. math:: - \gmrule - { \mathtt{Unwind} : \nillist - & a : s - & d - & h - \begin{bmatrix} - a : \mathtt{NAp} \; f \; x - \end{bmatrix} - & m - } - { \mathtt{Unwind} : \nillist - & f : a : s - & d - & h - & m - } + .. math:: + \gmrule + { \mathtt{Unwind} : \nillist + & a : s + & \nillist + & h + \begin{bmatrix} + a : \mathtt{NNum} \; n + \end{bmatrix} + & m + } + { \nillist + & a : s + & \nillist + & h + & m + } -8. When a supercombinator is on top of the stack (and the correct number of +#. If an application is on top of the stack, :code:`Unwind` continues unwinding + + .. math:: + \gmrule + { \mathtt{Unwind} : \nillist + & a : s + & d + & h + \begin{bmatrix} + a : \mathtt{NAp} \; f \; x + \end{bmatrix} + & m + } + { \mathtt{Unwind} : \nillist + & f : a : s + & d + & h + & m + } + +#. When a supercombinator is on top of the stack (and the correct number of arguments have been provided), :code:`Unwind` sets up the stack and jumps to the supercombinator's code (:math:`\beta`-reduction) -.. math:: - \gmrule - { \mathtt{Unwind} : \nillist - & a_0 : \ldots : a_n : s - & d - & h - \begin{bmatrix} - a_0 : \mathtt{NGlobal} \; n \; c \\ - a_1 : \mathtt{NAp} \; a_0 \; e_1 \\ - \vdots \\ - a_n : \mathtt{NAp} \; a_{n-1} \; e_n \\ - \end{bmatrix} - & m - } - { c - & e_1 : \ldots : e_n : a_n : s - & d - & h - & m - } + .. math:: + \gmrule + { \mathtt{Unwind} : \nillist + & a_0 : \ldots : a_n : s + & d + & h + \begin{bmatrix} + a_0 : \mathtt{NGlobal} \; n \; c \\ + a_1 : \mathtt{NAp} \; a_0 \; e_1 \\ + \vdots \\ + a_n : \mathtt{NAp} \; a_{n-1} \; e_n \\ + \end{bmatrix} + & m + } + { c + & e_1 : \ldots : e_n : a_n : s + & d + & h + & m + } -9. Pop the stack, and update the nth node to point to the popped address +#. Pop the stack, and update the nth node to point to the popped address -.. math:: - \gmrule - { \mathtt{Update} \; n : i - & e : f : a_1 : \ldots : a_n : s - & d - & h - \begin{bmatrix} - a_1 : \mathtt{NAp} \; f \; e \\ - \vdots \\ - a_n : \mathtt{NAp} \; a_{n-1} \; e_n - \end{bmatrix} - & m - } - { i - & f : a_1 : \ldots : a_n : s - & d - & h - \begin{bmatrix} - a_n : \mathtt{NInd} \; e - \end{bmatrix} - & m - } + .. math:: + \gmrule + { \mathtt{Update} \; n : i + & e : f : a_1 : \ldots : a_n : s + & d + & h + \begin{bmatrix} + a_1 : \mathtt{NAp} \; f \; e \\ + \vdots \\ + a_n : \mathtt{NAp} \; a_{n-1} \; e_n + \end{bmatrix} + & m + } + { i + & f : a_1 : \ldots : a_n : s + & d + & h + \begin{bmatrix} + a_n : \mathtt{NInd} \; e + \end{bmatrix} + & m + } -10. Pop the stack. +#. Pop the stack. -.. math:: - \gmrule - { \mathtt{Pop} \; n : i - & a_1 : \ldots : a_n : s - & d - & h - & m - } - { i - & s - & d - & h - & m - } + .. math:: + \gmrule + { \mathtt{Pop} \; n : i + & a_1 : \ldots : a_n : s + & d + & h + & m + } + { i + & s + & d + & h + & m + } -11. Follow indirections while unwinding +#. Follow indirections while unwinding -.. math:: - \gmrule - { \mathtt{Unwind} : \nillist - & a : s - & d - & h - \begin{bmatrix} - a : \mathtt{NInd} \; a' - \end{bmatrix} - & m - } - { \mathtt{Unwind} : \nillist - & a' : s - & d - & h - & m - } + .. math:: + \gmrule + { \mathtt{Unwind} : \nillist + & a : s + & d + & h + \begin{bmatrix} + a : \mathtt{NInd} \; a' + \end{bmatrix} + & m + } + { \mathtt{Unwind} : \nillist + & a' : s + & d + & h + & m + } -12. Allocate uninitialised heap space +#. Allocate uninitialised heap space -.. math:: - \gmrule - { \mathtt{Alloc} \; n : i - & s - & d - & h - & m - } - { i - & a_1 : \ldots : a_n : s - & d - & h - \begin{bmatrix} - a_1 : \mathtt{NUninitialised} \\ - \vdots \\ - a_n : \mathtt{NUninitialised} \\ - \end{bmatrix} - & m - } + .. math:: + \gmrule + { \mathtt{Alloc} \; n : i + & s + & d + & h + & m + } + { i + & a_1 : \ldots : a_n : s + & d + & h + \begin{bmatrix} + a_1 : \mathtt{NUninitialised} \\ + \vdots \\ + a_n : \mathtt{NUninitialised} \\ + \end{bmatrix} + & m + } -13. When unwinding, if the top of the stack is in WHNF (currently this just - means a number), pop the dump +#. Evaluate the top of the stack to WHNF -.. math:: - \gmrule - { \mathtt{Unwind} : \nillist - & a : s - & \langle i', s' \rangle : d - & h - \begin{bmatrix} - a : \mathtt{NNum} \; n - \end{bmatrix} - & m - } - { i' - & a : s' - & d - & h - & m - } + .. math:: + \gmrule + { \mathtt{Eval} : i + & a : s + & d + & h + & m + } + { \mathtt{Unwind} : \nillist + & a : \nillist + & \langle i, s \rangle : d + & h + & m + } -14. Evaluate the top of the stack to WHNF +#. Reduce a primitive binary operator :math:`*`. -.. math:: - \gmrule - { \mathtt{Eval} : i - & a : s - & d - & h - & m - } - { \mathtt{Unwind} : \nillist - & a : \nillist - & \langle i, s \rangle : d - & h - & m - } + .. math:: + \gmrule + { * : i + & a_1 : a_2 : s + & d + & h + \begin{bmatrix} + a_1 : x \\ + a_2 : y + \end{bmatrix} + & m + } + { i + & a' : s + & d + & h + \begin{bmatrix} + a' : (x * y) + \end{bmatrix} + & m + } +#. Reduce a primitive unary operator :math:`\neg`. + + .. math:: + \gmrule + { \neg : i + & a : s + & d + & h + \begin{bmatrix} + a : x + \end{bmatrix} + & m + } + { i + & a' : s + & d + & h + \begin{bmatrix} + a' : (\neg x) + \end{bmatrix} + & m + } + *************** Extension Rules *************** -1. A sneaky trick to enable sharing of :code:`NNum` nodes. We note that the +#. A sneaky trick to enable sharing of :code:`NNum` nodes. We note that the global environment is a mapping of plain old strings to heap addresses. Strings of digits are not considered valid identifiers, so putting them on the global environment will never conflict with a supercombinator! We abuse @@ -303,51 +352,51 @@ Extension Rules node's address. Consider how this rule might impact garbage collection (remember that the environment is intended for *globals*). -.. math:: - \gmrule - { \mathtt{PushInt} \; n : i - & s - & d - & h - & m - } - { i - & a : s - & d - & h - \begin{bmatrix} - a : \mathtt{NNum} \; n - \end{bmatrix} - & m - \begin{bmatrix} - n' : a - \end{bmatrix} - \\ - \SetCell[c=5]{c} - \text{where $n'$ is the base-10 string rep. of $n$} - } + .. math:: + \gmrule + { \mathtt{PushInt} \; n : i + & s + & d + & h + & m + } + { i + & a : s + & d + & h + \begin{bmatrix} + a : \mathtt{NNum} \; n + \end{bmatrix} + & m + \begin{bmatrix} + n' : a + \end{bmatrix} + \\ + \SetCell[c=5]{c} + \text{where $n'$ is the base-10 string rep. of $n$} + } -2. In order for Extension Rule 1. to be effective, we are also required to take +#. In order for the previous rule to be effective, we are also required to take action when a number already exists in the environment: -.. math:: - \gmrule - { \mathtt{PushInt} \; n : i - & s - & d - & h - & m - \begin{bmatrix} - n' : a - \end{bmatrix} - } - { i - & a : s - & d - & h - & m - \\ - \SetCell[c=5]{c} - \text{where $n'$ is the base-10 string rep. of $n$} - } + .. math:: + \gmrule + { \mathtt{PushInt} \; n : i + & s + & d + & h + & m + \begin{bmatrix} + n' : a + \end{bmatrix} + } + { i + & a : s + & d + & h + & m + \\ + \SetCell[c=5]{c} + \text{where $n'$ is the base-10 string rep. of $n$} + } diff --git a/src/GM.hs b/src/GM.hs index a91bdfb..71d2947 100644 --- a/src/GM.hs +++ b/src/GM.hs @@ -20,9 +20,9 @@ import Text.Printf import Text.PrettyPrint hiding ((<>)) import Text.PrettyPrint.HughesPJ (maybeParens) import Data.Foldable (traverse_) -import Debug.Trace +import Control.Arrow ((>>>)) import System.IO (Handle, hPutStrLn) -import Data.Heap as Heap +import Data.Heap import Core ---------------------------------------------------------------------------------- @@ -52,8 +52,12 @@ data Instr = Unwind | Pop Int | Alloc Int | Eval + -- primitive ops + | Neg | Add + | Sub | Mul + | Div deriving (Show, Eq) data Node = NNum Int @@ -123,21 +127,27 @@ isFinal st = null $ st ^. gmCode step :: GmState -> GmState step st = case head (st ^. gmCode) of - Unwind -> unwind st - PushGlobal n -> pushGlobal n st - PushInt n -> pushInt n st - Push n -> push n st - MkAp -> mkAp st - Slide n -> slide n st - Pop n -> pop n st - Update n -> update n st - Alloc n -> alloc n st + Unwind -> unwindI st + PushGlobal n -> pushGlobalI n st + PushInt n -> pushIntI n st + Push n -> pushI n st + MkAp -> mkApI st + Slide n -> slideI n st + Pop n -> popI n st + Update n -> updateI n st + Alloc n -> allocI n st + Eval -> evalI st + Neg -> negI st + Add -> addI st + Sub -> subI st + Mul -> mulI st + Div -> divI st where - pushGlobal :: Name -> GmState -> GmState - pushGlobal k st = st - & gmCode %~ drop 1 - & gmStack .~ s' + pushGlobalI :: Name -> GmState -> GmState + pushGlobalI k st = st + & gmCode %~ drop 1 + & gmStack .~ s' where s = st ^. gmStack m = st ^. gmEnv @@ -147,8 +157,8 @@ step st = case head (st ^. gmCode) of $ lookup k m -- Extension Rules 1,2 (sharing) - pushInt :: Int -> GmState -> GmState - pushInt n st = case lookup n' m of + pushIntI :: Int -> GmState -> GmState + pushIntI n st = case lookup n' m of Just a -> st & gmCode %~ drop 1 & gmStack .~ s' @@ -163,7 +173,7 @@ step st = case head (st ^. gmCode) of & gmStats . stsAllocations %~ succ -- where s' = a : s - (h',a) = Heap.alloc h (NNum n) + (h',a) = alloc h (NNum n) m' = (n',a) : m where m = st ^. gmEnv @@ -172,12 +182,12 @@ step st = case head (st ^. gmCode) of n' = show n -- Core Rule 2. (no sharing) - -- pushInt :: Int -> GmState -> GmState - -- pushInt n st = st - -- & gmCode %~ drop 1 - -- & gmStack .~ s' - -- & gmHeap .~ h' - -- & gmStats . stsAllocations %~ succ + -- pushIntI :: Int -> GmState -> GmState + -- pushIntI n st = st + -- & gmCode %~ drop 1 + -- & gmStack .~ s' + -- & gmHeap .~ h' + -- & gmStats . stsAllocations %~ succ -- where -- s = st ^. gmStack -- h = st ^. gmHeap @@ -185,26 +195,26 @@ step st = case head (st ^. gmCode) of -- s' = a : s -- (h',a) = alloc h (NNum n) - mkAp :: GmState -> GmState - mkAp st = st - & gmCode %~ drop 1 - & gmStack .~ s' - & gmHeap .~ h' - -- record the application we allocated - & gmStats . stsAllocations %~ succ + mkApI :: GmState -> GmState + mkApI st = st + & gmCode %~ drop 1 + & gmStack .~ s' + & gmHeap .~ h' + -- record the application we allocated + & gmStats . stsAllocations %~ succ where (f:x:ss) = st ^. gmStack h = st ^. gmHeap s' = a : ss - (h',a) = Heap.alloc h (NAp f x) + (h',a) = alloc h (NAp f x) -- a `Push n` instruction pushes the address of (n+1)-th argument onto -- the stack. - push :: Int -> GmState -> GmState - push n st = st - & gmCode %~ drop 1 - & gmStack %~ (a:) + pushI :: Int -> GmState -> GmState + pushI n st = st + & gmCode %~ drop 1 + & gmStack %~ (a:) where h = st ^. gmHeap s = st ^. gmStack @@ -218,35 +228,35 @@ step st = case head (st ^. gmCode) of -- 1: f 1: f x y -- 2: f x -- 3: f x y - slide :: Int -> GmState -> GmState - slide n st = st - & gmCode %~ drop 1 - & gmStack .~ s' + slideI :: Int -> GmState -> GmState + slideI n st = st + & gmCode %~ drop 1 + & gmStack .~ s' where (a:s) = st ^. gmStack s' = a : drop n s - update :: Int -> GmState -> GmState - update n st = st - & gmCode %~ drop 1 - & gmStack .~ s - & gmHeap .~ h' - where + updateI :: Int -> GmState -> GmState + updateI n st = st + & gmCode %~ drop 1 + & gmStack .~ s + & gmHeap .~ h' + where (e:s) = st ^. gmStack an = s !! n h' = st ^. gmHeap - & Heap.update an (NInd e) + & update an (NInd e) - pop :: Int -> GmState -> GmState - pop n st = st - & gmCode %~ drop 1 - & gmStack %~ drop n + popI :: Int -> GmState -> GmState + popI n st = st + & gmCode %~ drop 1 + & gmStack %~ drop n - alloc :: Int -> GmState -> GmState - alloc n st = st - & gmCode %~ drop 1 - & gmStack .~ s' - & gmHeap .~ h' + allocI :: Int -> GmState -> GmState + allocI n st = st + & gmCode %~ drop 1 + & gmStack .~ s' + & gmHeap .~ h' where s = st ^. gmStack h = st ^. gmHeap @@ -256,14 +266,47 @@ step st = case head (st ^. gmCode) of allocNode :: Int -> GmHeap -> (GmHeap, [Addr]) allocNode 0 g = (g,[]) allocNode k g = allocNode (k-1) g' & _2 %~ (a:) - where (g',a) = Heap.alloc g NUninitialised + where (g',a) = alloc g NUninitialised + + evalI :: GmState -> GmState + evalI st = st + -- Unwind performs the actual evaluation; we just set the stage + -- so Unwind knows what to do + & gmCode .~ [Unwind] + -- leave lone scrutinee on stk to be eval'd by Unwind + & gmStack .~ [a] + -- push remaining code & stk to dump + & gmDump %~ ((i,s):) + where + (_:i) = st ^. gmCode + (a:s) = st ^. gmStack + + negI :: GmState -> GmState + negI = primitive1 boxInt unboxInt negate + + addI, subI, mulI, divI :: GmState -> GmState + addI = primitive2 boxInt unboxInt (+) + subI = primitive2 boxInt unboxInt (-) + mulI = primitive2 boxInt unboxInt (*) + divI = primitive2 boxInt unboxInt div -- the complex heart of the G-machine - unwind :: GmState -> GmState - unwind st = case hLookupUnsafe a h of + unwindI :: GmState -> GmState + unwindI st = case hLookupUnsafe a h of NNum n -> st - -- halt; discard all further instructions - & gmCode .~ [] + & gmCode .~ i' + & gmStack .~ s' + & gmDump .~ d' + where + s = st ^. gmStack + (i',s',d') = case st ^. gmDump of + -- if the dump is non-empty, restore the instruction + -- queue and stack, and pop the dump + ((ii,ss):d) -> (ii,a:ss,d) + -- if the dump is empty, clear the instruction queue and + -- leave the stack as is + [] -> ([], s, []) + NAp f x -> st -- leave the Unwind instr; continue unwinding & gmStack %~ (f:) @@ -293,6 +336,56 @@ step st = case head (st ^. gmCode) of a = head s h = st ^. gmHeap + +-- TODO: this desperately needs documentation +primitive1 :: (GmState -> b -> GmState) -- boxing function + -> (Addr -> GmState -> a) -- unboxing function + -> (a -> b) -- operator + -> GmState -> GmState -- state transition +primitive1 box unbox f st + = st + & unbox a + & f + & box (st & gmStack .~ s) + & advanceCode + where + putNewStack = gmStack .~ s + (a:s) = st ^. gmStack + r = box (putNewStack st) (f (unbox a st)) + +-- TODO: this desperately needs documentation +primitive2 :: (GmState -> b -> GmState) -- boxing function + -> (Addr -> GmState -> a) -- unboxing function + -> (a -> a -> b) -- operator + -> GmState -> GmState -- state transition +primitive2 box unbox f st + = st' + & advanceCode + where + (ax:ay:s) = st ^. gmStack + putNewStack = gmStack .~ s + x = unbox ax st + y = unbox ay st + st' = box (putNewStack st) (f x y) + +boxInt :: GmState -> Int -> GmState +boxInt st n = st + & gmHeap .~ h' + & gmStack %~ (a:) + where + h = st ^. gmHeap + (h',a) = alloc h (NNum n) + +unboxInt :: Addr -> GmState -> Int +unboxInt a st = case hLookup a h of + Just (NNum n) -> n + Just _ -> error "unboxInt received a non-int" + Nothing -> error "unboxInt received an invalid address" + where h = st ^. gmHeap + +advanceCode :: GmState -> GmState +advanceCode = gmCode %~ drop 1 + ---------------------------------------------------------------------------------- compile :: Program -> GmState @@ -305,10 +398,25 @@ compile p = GmState c [] [] h g sts type CompiledSC = (Name, Int, Code) -buildInitialHeap :: Program -> (GmHeap, Env) -buildInitialHeap (Program ss) = mapAccumL allocateSc mempty compiled +compiledPrims :: [CompiledSC] +compiledPrims = + [ ("whnf#", 1, [Push 0, Eval, Update 1, Pop 1, Unwind]) + -- , unop "negate#" Neg + , ("negate#", 1, [Push 0, Eval, Neg, Update 1, Pop 1, Unwind]) + , binop "+#" Add + , binop "-#" Sub + , binop "*#" Mul + , binop "/#" Div + ] where - compiled = fmap compileSc ss + unop k i = (k, 1, [Push 0, Eval, i, Update 1, Pop 1, Unwind]) + + binop k i = (k, 2, [Push 1, Eval, Push 1, Eval, i, Update 2, Pop 2, Unwind]) + +buildInitialHeap :: Program -> (GmHeap, Env) +buildInitialHeap (Program ss) = mapAccumL allocateSc mempty compiledScs + where + compiledScs = fmap compileSc ss <> compiledPrims -- note that we don't count sc allocations in the stats allocateSc :: GmHeap -> CompiledSC -> (GmHeap, (Name, Addr))