diff --git a/app/Gyehoek/ANF/Syntax.hs b/app/Gyehoek/ANF/Syntax.hs index 128ae8b..dca3463 100644 --- a/app/Gyehoek/ANF/Syntax.hs +++ b/app/Gyehoek/ANF/Syntax.hs @@ -48,6 +48,8 @@ import Data.InvertibleGrammar.Base ((:-)((:-))) import qualified Gyehoek.Sexp import Control.Lens.Unsound import qualified Data.Bits +import qualified GHC.IO.Encoding as T +import qualified Data.Text.Encoding as T data Val @@ -275,12 +277,36 @@ lowerInt = QBE.ValConst . QBE.CInt . fromIntegral lowerVal - :: forall es. (GenSym :> es) + :: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es) => Val -> (QBE.Val -> Eff es BlockBuilder) -> Eff es BlockBuilder lowerVal (ValLit (LitInt n)) k = k . lowerInt $ n + +lowerVal (ValLit (LitString s)) k = do + rawString <- gensym + r <- gensym + let bs = T.encodeUtf8 s + len = lengthOf each bs + tell . pure $ + QBE.DataDef [] rawString Nothing + [QBE.FieldExtTy QBE.Byte [QBE.String bs]] + Emit (alloc r rawString len) <$> k (QBE.ValTemporary r) + where + alloc r rs len = + [ QBE.Call + (Just (r, QBE.AbiBaseTy QBE.Long)) + (QBE.ValGlobal "scm_from_utf8_string") + Nothing + [ QBE.Arg (QBE.AbiBaseTy QBE.Long) (QBE.ValGlobal rs) + -- N.b. The C function declares this argument as size_t, which + -- /is/ long on my system. + , QBE.Arg (QBE.AbiBaseTy QBE.Long) (lowerInt' len) + ] + [] + ] + lowerVal (ValLit _) k = error "todo" lowerVal (ValVar x) k = k . QBE.ValTemporary . lowerName $ x @@ -303,6 +329,7 @@ lowerArithmetic r p = QBE.BinaryOp r bop x y PrimMul a b -> (QBE.Mul,a,b) _ -> _ +sizeofScm :: Int sizeofScm = 8 lowerCons @@ -417,7 +444,7 @@ lowerCdr r x e k = do <$> lower' e k lowerPrim - :: forall es. (GenSym :> es) + :: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es) => Name -> Prim Val -> Exp -> (QBE.Val -> Eff es BlockBuilder) -> Eff es BlockBuilder @@ -435,7 +462,7 @@ lowerPrim r p e k = PrimWrite x -> lowerWrite r x e k lower' - :: forall es. (GenSym :> es) + :: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es) => Exp -> (QBE.Val -> Eff es BlockBuilder) -> Eff es BlockBuilder @@ -460,7 +487,11 @@ lower' (ExpBegin (x:xs)) k = fold1 <$> traverse low (x:|xs) lower' _ k = _ -lower :: GenSym :> es => QBE.Ident QBE.Label -> Exp -> Eff es QBE.Block +lower + :: (GenSym :> es, Writer (Vector QBE.DataDef) :> es) + => QBE.Ident QBE.Label + -> Exp + -> Eff es QBE.Block lower n e = buildBlock n <$> lower' e (pure . Exit . QBE.Ret . Just) lowerProgram @@ -471,17 +502,17 @@ lowerProgram anfs = -- hack for dev convenience: if there's only one expression, let -- it be the entry point. [e] -> do - b <- lower "start" e + (b,dataDefs) <- runWriter . lower "start" $ e let f = wrapFunction @NonEmpty "main" [b] - pure $ QBE.Program [] [] [f] + pure $ QBE.Program [] (dataDefs ^.. each) [f] _ -> do let low e = do bl <- gensym' "b" fl <- gensym' "f" b <- lower bl e pure $ wrapFunction @NonEmpty fl [b] - fs <- traverse low anfs - pure $ QBE.Program [] [] (fs ^.. traversed) + (fs,dataDefs) <- runWriter $ traverse low anfs + pure $ QBE.Program [] (dataDefs ^.. each) (fs ^.. traversed) wrapFunction :: Foldable1 t diff --git a/app/Gyehoek/Scheme/Syntax.hs b/app/Gyehoek/Scheme/Syntax.hs index a2354ba..61aa494 100644 --- a/app/Gyehoek/Scheme/Syntax.hs +++ b/app/Gyehoek/Scheme/Syntax.hs @@ -38,6 +38,7 @@ data Lit = LitInt Int | LitNil | LitBool Bool + | LitString Text deriving (Show, Generic) data Define @@ -83,6 +84,7 @@ instance SexpIso Lit where $ With (. sexpIso) $ With (. sym "nil") $ With (. sexpIso) + $ With (. sexpIso) $ End instance SexpIso Define where diff --git a/cabal.project b/cabal.project index 48b9e47..ebc155e 100644 --- a/cabal.project +++ b/cabal.project @@ -3,5 +3,5 @@ packages: *.cabal source-repository-package type: git location: https://git.deertopia.net/msyds/qbe-hs.git - tag: ab7cc053a4d58fde841e910f251b8e48b54466ad - --sha256: 0n2jqr6vymlyr0gwzbv3cljhqxnzcq1pzf7m92b16jalkymbcwgy + tag: 64be0096355a8fd23cc1a4910ed5c8e6075aeca9 + --sha256: 0x507fmpyzyvg3f27wss94d7fkrbv6r05jknlphgyi53pscazr9r diff --git a/example/ascii-string-literal b/example/ascii-string-literal new file mode 100755 index 0000000..a0f0665 Binary files /dev/null and b/example/ascii-string-literal differ diff --git a/example/ascii-string-literal.anf b/example/ascii-string-literal.anf new file mode 100644 index 0000000..a25a687 --- /dev/null +++ b/example/ascii-string-literal.anf @@ -0,0 +1,4 @@ +;;; -*- mode:scheme -*- + +(let ((x0 (prim:write "wawa"))) x0) + diff --git a/example/ascii-string-literal.s b/example/ascii-string-literal.s new file mode 100644 index 0000000..38d65f3 --- /dev/null +++ b/example/ascii-string-literal.s @@ -0,0 +1,23 @@ +.data +.balign 8 +.1: + .ascii "wawa" +/* end data */ + +.text +.globl main +main: + pushq %rbp + movq %rsp, %rbp + movl $4, %esi + leaq .1(%rip), %rdi + callq scm_from_utf8_string + movq %rax, %rdi + callq scm_write + leave + ret +.type main, @function +.size main, .-main +/* end function main */ + +.section .note.GNU-stack,"",@progbits diff --git a/example/ascii-string-literal.scm b/example/ascii-string-literal.scm new file mode 100644 index 0000000..81525d1 --- /dev/null +++ b/example/ascii-string-literal.scm @@ -0,0 +1 @@ +(prim:write "wawa") diff --git a/example/ascii-string-literal.ssa b/example/ascii-string-literal.ssa new file mode 100644 index 0000000..2a1f3cd --- /dev/null +++ b/example/ascii-string-literal.ssa @@ -0,0 +1,10 @@ + +data $.1 = +{b "wawa"} +export +function w $main () { +@start + %.2 =l call $scm_from_utf8_string (l $.1, l 4) + %x0 =l call $scm_write (l %.2) + ret %x0 +} \ No newline at end of file diff --git a/example/closure.scm b/example/pseudo/closure.scm similarity index 100% rename from example/closure.scm rename to example/pseudo/closure.scm diff --git a/example/t.s b/example/pseudo/t.s similarity index 100% rename from example/t.s rename to example/pseudo/t.s diff --git a/example/t.scm b/example/pseudo/t.scm similarity index 100% rename from example/t.scm rename to example/pseudo/t.scm diff --git a/example/t.ssa b/example/pseudo/t.ssa similarity index 100% rename from example/t.ssa rename to example/pseudo/t.ssa diff --git a/example/string-literal b/example/string-literal new file mode 100755 index 0000000..7530ee0 Binary files /dev/null and b/example/string-literal differ diff --git a/example/string-literal.anf b/example/string-literal.anf new file mode 100644 index 0000000..bcdd017 --- /dev/null +++ b/example/string-literal.anf @@ -0,0 +1,4 @@ +;;; -*- mode:scheme -*- + +(let ((x0 (prim:write "안녕하세요"))) x0) + diff --git a/example/string-literal.s b/example/string-literal.s new file mode 100644 index 0000000..68a9c79 --- /dev/null +++ b/example/string-literal.s @@ -0,0 +1,23 @@ +.data +.balign 8 +.1: + .ascii "\354\225\210\353\205\225\355\225\230\354\204\270\354\232\224" +/* end data */ + +.text +.globl main +main: + pushq %rbp + movq %rsp, %rbp + movl $15, %esi + leaq .1(%rip), %rdi + callq scm_from_utf8_string + movq %rax, %rdi + callq scm_write + leave + ret +.type main, @function +.size main, .-main +/* end function main */ + +.section .note.GNU-stack,"",@progbits diff --git a/example/string-literal.scm b/example/string-literal.scm new file mode 100644 index 0000000..70b4480 --- /dev/null +++ b/example/string-literal.scm @@ -0,0 +1 @@ +(prim:write "안녕하세요") diff --git a/example/string-literal.ssa b/example/string-literal.ssa new file mode 100644 index 0000000..6f84a5d --- /dev/null +++ b/example/string-literal.ssa @@ -0,0 +1,10 @@ + +data $.1 = +{b "\354\225\210\353\205\225\355\225\230\354\204\270\354\232\224"} +export +function w $main () { +@start + %.2 =l call $scm_from_utf8_string (l $.1, l 15) + %x0 =l call $scm_write (l %.2) + ret %x0 +} \ No newline at end of file diff --git a/play/a.out b/play/a.out index 114fc12..7b7318f 100755 Binary files a/play/a.out and b/play/a.out differ diff --git a/play/string b/play/string new file mode 100755 index 0000000..092ae8e Binary files /dev/null and b/play/string differ diff --git a/play/string.anf b/play/string.anf new file mode 100644 index 0000000..ebed362 --- /dev/null +++ b/play/string.anf @@ -0,0 +1,4 @@ +;;; -*- mode:scheme -*- + +(let ((x0 (prim:write "abc"))) x0) + diff --git a/play/string.s b/play/string.s new file mode 100644 index 0000000..4f9f709 --- /dev/null +++ b/play/string.s @@ -0,0 +1,23 @@ +.data +.balign 8 +.1: + .ascii "abc" +/* end data */ + +.text +.globl main +main: + pushq %rbp + movq %rsp, %rbp + movl $3, %esi + leaq .1(%rip), %rdi + callq scm_from_utf8_string + movq %rax, %rdi + callq scm_write + leave + ret +.type main, @function +.size main, .-main +/* end function main */ + +.section .note.GNU-stack,"",@progbits diff --git a/play/string.scm b/play/string.scm new file mode 100644 index 0000000..8ef0d31 --- /dev/null +++ b/play/string.scm @@ -0,0 +1 @@ +(prim:write "abc") diff --git a/play/string.ssa b/play/string.ssa new file mode 100644 index 0000000..21c48d9 --- /dev/null +++ b/play/string.ssa @@ -0,0 +1,10 @@ + +data $.1 = +{b "abc"} +export +function w $main () { +@start + %.2 =l call $scm_from_utf8_string (l $.1, l 3) + %x0 =l call $scm_write (l %.2) + ret %x0 +} \ No newline at end of file diff --git a/play/t b/play/t index 7234ac0..0f013d1 100755 Binary files a/play/t and b/play/t differ diff --git a/play/t.anf b/play/t.anf index 2ab7790..8f81c6e 100644 --- a/play/t.anf +++ b/play/t.anf @@ -1,4 +1,4 @@ ;;; -*- mode:scheme -*- -(let ((x0 (prim:cons 4 2)) (x1 (prim:cdr x0)) (x2 (prim:* 3 x1)) (x3 (prim:write x2))) x3) +(let ((x0 (prim:cons 4 2)) (x1 (prim:write x0))) x1) diff --git a/play/t.s b/play/t.s index 2f1821f..7ca6047 100644 --- a/play/t.s +++ b/play/t.s @@ -5,9 +5,9 @@ main: movq %rsp, %rbp movl $16, %edi callq GC_malloc - movq $18, (%rax) - movq $10, 8(%rax) - movl $26, %edi + movq %rax, %rdi + movq $18, (%rdi) + movq $10, 8(%rdi) callq scm_write leave ret diff --git a/play/t.scm b/play/t.scm index ba57787..0f24db8 100644 --- a/play/t.scm +++ b/play/t.scm @@ -1,2 +1 @@ -(prim:write (prim:* 3 - (prim:cdr (prim:cons 4 2)))) +(prim:write (prim:cons 4 2)) diff --git a/play/t.ssa b/play/t.ssa index 2b98039..3cc3c6c 100644 --- a/play/t.ssa +++ b/play/t.ssa @@ -2,16 +2,9 @@ export function w $main () { @start %x0 =l call $GC_malloc (l 16) - %.4 =l add %x0, 8 + %.2 =l add %x0, 8 storel 18, %x0 - storel 10, %.4 - %.5 =l add %x0, 8 - %x1 =l loadl %.5 - %.6 =l shr 14, 2 - %.7 =l shr %x1, 2 - %.8 =l mul %.6, %.7 - %.9 =l shl %.8, 2 - %x2 =l add %.9, 2 - %x3 =l call $scm_write (l %x2) - ret %x3 + storel 10, %.2 + %x1 =l call $scm_write (l %x0) + ret %x1 } \ No newline at end of file diff --git a/play/wtf.s b/play/wtf.s new file mode 100644 index 0000000..f5c3363 --- /dev/null +++ b/play/wtf.s @@ -0,0 +1,31 @@ +.data +.balign 8 +fstr: + .ascii "%s" + .byte 0 +/* end data */ + +.data +.balign 8 +str: + .ascii "안녕하세요" + .byte 0 +/* end data */ + +.text +.globl main +main: + pushq %rbp + movq %rsp, %rbp + leaq str(%rip), %rsi + leaq fstr(%rip), %rdi + movl $0, %eax + callq printf + movl $0, %eax + leave + ret +.type main, @function +.size main, .-main +/* end function main */ + +.section .note.GNU-stack,"",@progbits diff --git a/play/wtf.ssa b/play/wtf.ssa new file mode 100644 index 0000000..44fca0e --- /dev/null +++ b/play/wtf.ssa @@ -0,0 +1,8 @@ +data $fstr = { b "%s", b 0 } +data $str = { b "안녕하세요", b 0 } + +export function w $main () { +@start + call $printf (l $fstr, ..., l $str) + ret 0 +} \ No newline at end of file diff --git a/runtime/gyehoek.c b/runtime/gyehoek.c index 09cccd3..96ba5a4 100644 --- a/runtime/gyehoek.c +++ b/runtime/gyehoek.c @@ -2,20 +2,54 @@ #include #include "gyehoek.h" + + +const long scm_tc3_cons = 0; + +const long scm_tc7_obarray = 0x55; +const long scm_tc7_symbol = 0x05; +const long scm_tc7_string = 0x15; + + + SCM scm_newline () { putc ('\n', stdout); return SCM_PACK(NULL); } +static void scm_write_string (SCM x) { + const size_t len = SCM_CELL_WORD (x, 1); + const char *s = (const char *) SCM_UNPACK_POINTER (SCM_CELL_OBJECT (x, 2)); + /* FIXME: this is a very naïve implementation with no escaping. */ + printf ("some unrelated unicode lol: %s\n", "왜 하냐??"); + printf ("\"%.*s\"", (int) len, s); +} + SCM scm_write (SCM x) { if (SCM_IMP (x)) { printf ("%ld", SCM_UNPACK (x) >> 2); + } else if (SCM_CONSP (x)) { + printf ("("); + scm_write (scm_car (x)); + printf (" . "); + scm_write (scm_cdr (x)); + printf (")"); + } else if (SCM_STRINGP (x)) { + scm_write_string (x); } else { printf ("#", SCM_UNPACK (x)); } return SCM_PACK(NULL); } +SCM scm_car (SCM x) { + return SCM_CELL_OBJECT (x, 0); +} + +SCM scm_cdr (SCM x) { + return SCM_CELL_OBJECT (x, 1); +} + SCM scm_words (scm_t_bits word_0, uint32_t n_words) { scm_t_bits *r = GC_malloc (n_words * sizeof (scm_t_bits)); r[0] = word_0; @@ -24,11 +58,18 @@ SCM scm_words (scm_t_bits word_0, uint32_t n_words) { SCM scm_from_utf8_string (const char *str, size_t len) { SCM r = scm_words (scm_tc7_string, 3); - const char *s = GC_malloc (len); SCM_SET_CELL_WORD (r, 1, len); - SCM_SET_CELL_WORD (r, 2, s); + SCM_SET_CELL_WORD (r, 2, str); + printf ("str: %p\n", str); return r; } -SCM scm_from_utf8_symbol (const char *s) { +SCM scm_from_utf8_symbol (const char *s, size_t len) { +} + +SCM scm_cons (SCM car, SCM cdr) { + scm_t_bits *r = GC_malloc (2 * sizeof (scm_t_bits)); + r[0] = SCM_UNPACK (car); + r[1] = SCM_UNPACK (cdr); + return SCM_PACK (r); } diff --git a/runtime/gyehoek.h b/runtime/gyehoek.h index 49302b8..9a33566 100644 --- a/runtime/gyehoek.h +++ b/runtime/gyehoek.h @@ -1,6 +1,7 @@ #ifndef GYEHOEK_H #define GYEHOEK_H +#include #include @@ -38,7 +39,7 @@ typedef union SCM { struct { scm_t_bits n; } n; } SCM; Guile also known as a cons-cell): This is done by first checking that the SCM variable holds a heap object, and second, by checking that tc1==0 holds for the SCM_CELL_TYPE of the SCM variable. */ -#define SCM_I_CONSP(x) (!SCM_IMP (x) && ((1 & SCM_CELL_TYPE (x)) == 0)) +#define SCM_CONSP(x) (!SCM_IMP (x) && ((1 & SCM_CELL_TYPE (x)) == 0)) @@ -49,7 +50,12 @@ typedef union SCM { struct { scm_t_bits n; } n; } SCM; #define SCM_ITAG3(x) (7 & SCM_UNPACK (x)) #define SCM_TYP3(x) (7 & SCM_CELL_TYPE (x)) -#define scm_tc3_cons 0 +/* #define scm_tc3_cons 0 */ +extern const long scm_tc3_cons; + +SCM scm_cons (SCM car, SCM cdr); +SCM scm_car (SCM x); +SCM scm_cdr (SCM x); @@ -64,22 +70,29 @@ typedef union SCM { struct { scm_t_bits n; } n; } SCM; #define SCM_SET_CELL_WORD(x, n, v) \ (SCM_SET_CELL_OBJECT ((x), (n), SCM_PACK (v))) -#define SCM_CELL_TYPE(x) SCM_CELL_WORD (x) +#define SCM_CELL_TYPE(x) SCM_CELL_WORD (x, 0) #define SCM_TYP7(x) (0x7f & SCM_CELL_TYPE (x)) #define SCM_HAS_HEAP_TYPE(x, type, tag) \ (SCM_NIMP (x) && type (x) == (tag)) #define SCM_HAS_TYP7(x, tag) (SCM_HAS_HEAP_TYPE (x, SCM_TYP7, tag)) -#define scm_tc7_obarray 0x55 -#define scm_tc7_symbol 0x05 -#define scm_tc7_string 0x15 +extern const long scm_tc7_obarray; +extern const long scm_tc7_symbol; +extern const long scm_tc7_string; + + + +#define SCM_STRINGP(x) (SCM_TYP7 (x) == scm_tc7_string) SCM scm_words (scm_t_bits word_0, uint32_t n_words); -/* Construct a symbol from a UTF-8 string. */ -SCM scm_from_utf8 (const char *); +/* Construct a Scheme string. */ +SCM scm_from_utf8_string (const char *str, size_t len); + +/* Intern a symbol with a UTF-8 string name. */ +SCM scm_from_utf8_symbol (const char *s, size_t len);