From 13827f880e5b5707e1995bdb4063f61b37d4abe9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Madeleine=20Sydney=20=C5=9Alaga?= Date: Tue, 26 May 2026 02:23:01 -0600 Subject: [PATCH] interned symbols --- app/Gyehoek/ANF/Syntax.hs | 47 ++++++++++++++++----- app/Gyehoek/Scheme/Syntax.hs | 3 ++ play/symbol.scm | 3 ++ runtime/src/capi.rs | 16 ++++---- runtime/src/lib.rs | 2 +- runtime/src/obarray.rs | 23 ----------- runtime/src/obarray/fnv1a.rs | 34 ---------------- runtime/src/primitives.rs | 8 ++++ runtime/src/scm.rs | 79 ++++++++++++++++++++++++++---------- runtime/src/var.rs | 37 +++++++++-------- runtime/test.rs | 5 +++ 11 files changed, 143 insertions(+), 114 deletions(-) create mode 100644 play/symbol.scm delete mode 100644 runtime/src/obarray.rs delete mode 100644 runtime/src/obarray/fnv1a.rs create mode 100644 runtime/test.rs diff --git a/app/Gyehoek/ANF/Syntax.hs b/app/Gyehoek/ANF/Syntax.hs index 4904f36..869b12e 100644 --- a/app/Gyehoek/ANF/Syntax.hs +++ b/app/Gyehoek/ANF/Syntax.hs @@ -276,17 +276,10 @@ lowerInt = QBE.ValConst . QBE.CInt . (Data.Bits..<<. 2) . fromIntegral -lowerVal +lowerString :: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es) - => Val - -> (QBE.Val -> Eff es BlockBuilder) - -> Eff es BlockBuilder - -lowerVal (ValLit (LitInt n)) k = k . lowerInt $ n - --- lowerVal (ValLit (LitQuote (SexpSymbol s))) k = _aaa - -lowerVal (ValLit (LitString s)) k = do + => Text -> (QBE.Val -> Eff es BlockBuilder) -> Eff es BlockBuilder +lowerString s k = do rawString <- gensym r <- gensym let bs = T.encodeUtf8 s @@ -309,6 +302,31 @@ lowerVal (ValLit (LitString s)) k = do [] ] +lowerVal + :: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es) + => Val + -> (QBE.Val -> Eff es BlockBuilder) + -> Eff es BlockBuilder + +lowerVal (ValLit (LitInt n)) k = k . lowerInt $ n + +lowerVal (ValLit (LitQuote (Lam.SexpSymbol s))) k = + lowerString s \s' -> do + r <- gensym + Emit (intern r s') <$> k (QBE.ValTemporary r) + where + intern r s' = + [ QBE.Call + (Just (r, QBE.AbiBaseTy QBE.Long)) + (QBE.ValGlobal "scm_string_to_symbol") + Nothing + [ QBE.Arg (QBE.AbiBaseTy QBE.Long) s' + ] + [] + ] + +lowerVal (ValLit (LitString s)) k = lowerString s k + lowerVal (ValLit _) k = error "todo" lowerVal (ValVar x) k = k . QBE.ValTemporary . lowerName $ x @@ -445,6 +463,14 @@ lowerCdr r x e k = do ] <$> lower' e k +lowerNewline r k = + Emit [ QBE.Call (Just (lowerName r, QBE.AbiBaseTy QBE.Long)) + (QBE.ValGlobal "scm_newline") Nothing + [] + [] + ] + <$> k (QBE.ValTemporary (lowerName r)) + lowerPrim :: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es) => Name -> Prim Val -> Exp @@ -462,6 +488,7 @@ lowerPrim r p e k = PrimCar x -> lowerCar r x e k PrimCdr x -> lowerCdr r x e k PrimWrite x -> lowerWrite r x e k + PrimNewline -> lowerNewline r k lower' :: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es) diff --git a/app/Gyehoek/Scheme/Syntax.hs b/app/Gyehoek/Scheme/Syntax.hs index 639738d..6edd27d 100644 --- a/app/Gyehoek/Scheme/Syntax.hs +++ b/app/Gyehoek/Scheme/Syntax.hs @@ -40,6 +40,7 @@ data Prim e | PrimConsP e | PrimIntegerP e | PrimWrite e + | PrimNewline deriving (Show, Generic, Functor, Foldable, Traversable) instance Each (Prim e) (Prim e') e e' @@ -90,9 +91,11 @@ instance SexpIso a => SexpIso (Prim a) where $ With (. unop "cons?") $ With (. unop "integer?") $ With (. unop "write") + $ With (. nullop "newline") $ End where primname = ("prim:" <>) + nullop s = list $ el (sym (primname s)) unop s = list $ el (sym (primname s)) >>> el sexpIso binop s = list $ el (sym (primname s)) >>> el sexpIso >>> el sexpIso diff --git a/play/symbol.scm b/play/symbol.scm new file mode 100644 index 0000000..56ae711 --- /dev/null +++ b/play/symbol.scm @@ -0,0 +1,3 @@ +(begin (prim:write 'abc) + (prim:newline) + (prim:write 'abc)) diff --git a/runtime/src/capi.rs b/runtime/src/capi.rs index 41c877e..1f30f79 100644 --- a/runtime/src/capi.rs +++ b/runtime/src/capi.rs @@ -12,11 +12,13 @@ pub extern "C" fn scm_from_utf8_string ( scm::make_string (str::from_utf8 (bytes).unwrap ()) } -#[unsafe(no_mangle)] -pub extern "C" fn scm_hash (ptr : *const u8, len : usize) -> u64 { - let bytes = unsafe { slice::from_raw_parts (ptr, len) }; - crate::obarray::hash (str::from_utf8 (bytes).unwrap ()) -} - // #[unsafe(no_mangle)] -// pub extern "C" fn scm_lookup (name : scm_) +// pub extern "C" fn scm_hash (ptr : *const u8, len : usize) -> u64 { +// let bytes = unsafe { slice::from_raw_parts (ptr, len) }; +// crate::obarray::hash (str::from_utf8 (bytes).unwrap ()) +// } + +#[unsafe(no_mangle)] +pub extern "C" fn scm_string_to_symbol (str : scm_bits) -> scm_bits { + crate::scm::string_to_symbol (str) +} diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 285646a..07f245f 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -4,6 +4,6 @@ mod gc; mod scm; mod primitives; -mod obarray; +// mod obarray; mod capi; mod var; diff --git a/runtime/src/obarray.rs b/runtime/src/obarray.rs deleted file mode 100644 index 9c17031..0000000 --- a/runtime/src/obarray.rs +++ /dev/null @@ -1,23 +0,0 @@ -use std::{ops::DerefMut, sync::{LazyLock, Mutex, RwLock}}; -use internment::Intern; - -// mod fnv1a; -// use fnv1a::{FNV1a, SymbolHash}; -// use string_interner::{StringInterner, symbol::SymbolU32}; - -pub struct Obarray ( - LazyLock >> -); - -impl Obarray { - pub const fn new () -> Obarray { - Obarray (LazyLock::new (|| RwLock::new (StringInterner::new ()))) - } - - pub fn intern (&self, name : &str) -> SymbolU32 { - let mut r = symbols.0.write ().unwrap (); - r.deref_mut ().get_or_intern (name) - } -} - -pub static symbols : Obarray = Obarray::new (); diff --git a/runtime/src/obarray/fnv1a.rs b/runtime/src/obarray/fnv1a.rs deleted file mode 100644 index 9c0e015..0000000 --- a/runtime/src/obarray/fnv1a.rs +++ /dev/null @@ -1,34 +0,0 @@ -use std::{collections::HashMap, hash::{BuildHasher, Hasher}}; - -pub struct FNV1a (u64); -pub struct SymbolHash (); - -impl Default for FNV1a { - fn default () -> Self { - FNV1a (offset_basis) - } -} - -impl Hasher for FNV1a { - fn finish (&self) -> u64 { - self.0 - } - - fn write (&mut self, bytes: &[u8]) { - for b in bytes { - self.0 ^= *b as u64; - self.0 *= prime; - } - } -} - -impl BuildHasher for SymbolHash { - type Hasher = FNV1a; - - fn build_hasher (&self) -> Self::Hasher { - FNV1a (offset_basis) - } -} - -pub const offset_basis : u64 = 0xcbf29ce484222325; -pub const prime : u64 = 0x100000001b3; diff --git a/runtime/src/primitives.rs b/runtime/src/primitives.rs index bb74b82..36f2d9e 100644 --- a/runtime/src/primitives.rs +++ b/runtime/src/primitives.rs @@ -17,7 +17,15 @@ pub extern "C" fn scm_write (x: scm_bits) -> scm_bits { SCM::Nil => print! ("()"), SCM::False => print! ("#f"), SCM::True => print! ("#t"), + SCM::Symbol (_s) => print! ("{x:#016x}"), + // SCM::Symbol (s) => print! ("{s}"), }; let _ = stdout ().flush (); return 0; } + +#[unsafe(no_mangle)] +pub extern "C" fn scm_newline () -> scm_bits { + print! ("\n"); + 0 +} diff --git a/runtime/src/scm.rs b/runtime/src/scm.rs index 0535522..374f636 100644 --- a/runtime/src/scm.rs +++ b/runtime/src/scm.rs @@ -5,7 +5,7 @@ use std::slice; use internment::Intern; -use crate::{gc, obarray}; +use crate::{gc}; pub type scm_bits = u64; @@ -23,7 +23,7 @@ pub enum SCM { SmallInt (i64), Cons (scm_bits, scm_bits), String (String), - Symbol (usize), + Symbol (String), Nil, False, True, @@ -33,6 +33,21 @@ pub enum SCM { // pub fn pack (x : SCM) -> scm_bits { // } +#[inline(always)] +pub fn unpack_string (x : scm_bits) -> String { + let len = unsafe { cell_word (x, 1) }; + let str_beginning = (x as *const scm_bits).wrapping_add (2) as *const u8; + let slice = unsafe { + str::from_utf8 ( + slice::from_raw_parts ( + str_beginning, + len.try_into ().unwrap () + ) + ).unwrap () + }; + String::from (slice) +} + // super duper important for this to inline. we want to eliminate the // SCM type at runtime as much as possible. the hope is for inlining // to lead to a case-of-case–esque transformation. @@ -44,17 +59,10 @@ pub fn unpack (x : scm_bits) -> SCM { // `car` x and `cdr` x are safe iff `is_cons` x. unsafe { SCM::Cons (car (x), cdr (x)) } } else if is_string (x) { - let len = unsafe { cell_word (x, 1) }; - let str_beginning = (x as *const scm_bits).wrapping_add (2) as *const u8; - let slice = unsafe { - str::from_utf8 ( - slice::from_raw_parts ( - str_beginning, - len.try_into ().unwrap () - ) - ).unwrap () - }; - return SCM::String (String::from (slice)); + SCM::String (unpack_string (x)) + } else if is_symbol (x) { + let s = unpack_string (unsafe { cell_word (x, 1) }); + SCM::Symbol (s) } else { // concat_panic! ("don't know how to unpack: ", x) panic! ("don't know how to unpack {x:#016x}") @@ -81,6 +89,10 @@ fn is_cons (x: scm_bits) -> bool { } } +fn is_symbol (x : scm_bits) -> bool { + has_tc7 (x, tc7_symbol) +} + fn has_tc7 (x: scm_bits, tc7: u64) -> bool { unsafe { ! is_immediate (x) && (0x7f & cell_type (x)) == tc7 @@ -149,18 +161,43 @@ pub fn make_string (s : &str) -> scm_bits { -pub fn make_symbol (name : &str) -> scm_bits { - let r = unsafe { words (tc7_symbol, 2) }; - let sym = obarray::symbols.intern (name).to_usize (); - unsafe { set_word (r, 1, sym.try_into ().unwrap ()) }; - pack_ptr (r) +// pub fn make_symbol (name : &str) -> scm_bits { +// let r = unsafe { words (tc7_symbol, 2) }; +// let sym = obarray::symbols.intern (name).to_usize (); +// unsafe { set_word (r, 1, sym.try_into ().unwrap ()) }; +// pack_ptr (r) +// } + +struct Symbol ([scm_bits; 2]); + +impl PartialEq for Symbol { + fn eq (&self, other: &Self) -> bool { + if let (SCM::String (s1), SCM::String (s2)) + = (unpack (self.0[1]), unpack (other.0[1])) { + s1 == s2 + } else { + panic! ("not a symbol") + } + } } -pub fn make_symbol_off_heap (name : scm_bits) -> [scm_bits; 2] { - [ tc7_symbol, name ] +impl Eq for Symbol {} + +impl std::hash::Hash for Symbol { + fn hash (&self, state: &mut H) { + if let SCM::String (s) = unpack (self.0[1]) { + s.hash (state) + } else { + panic! ("not a symbol") + } + } +} + +pub fn make_symbol_off_heap (name : scm_bits) -> Symbol { + Symbol ([ tc7_symbol, name ]) } pub fn string_to_symbol (str : scm_bits) -> scm_bits { let r = Intern::new (make_symbol_off_heap (str)); - pack_ptr (r.as_ref () as *const scm_bits) + pack_ptr (r.0.as_ptr ()) } diff --git a/runtime/src/var.rs b/runtime/src/var.rs index f4e31de..d9a6f7d 100644 --- a/runtime/src/var.rs +++ b/runtime/src/var.rs @@ -1,25 +1,26 @@ use std::{collections::HashMap, ops::DerefMut as _, sync::{LazyLock, RwLock}}; -use string_interner::symbol::SymbolU32; use crate::scm::scm_bits; -struct Vars ( - LazyLock >> -); +// struct Vars ( +// LazyLock >> +// ); -impl Vars { - pub const fn new () -> Vars { - Vars (LazyLock::new (|| RwLock::new (HashMap::new ()))) - } +// impl Vars { +// pub const fn new () -> Vars { +// Vars (LazyLock::new (|| RwLock::new (HashMap::new ()))) +// } - pub fn lookup (&self, name : SymbolU32) -> Option { - let r = self.0.write ().unwrap (); - (*r).get (&name).map (|x| *x) - } +// pub fn lookup (&self, name : SymbolU32) -> Option { +// // let r = self.0.write ().unwrap (); +// // (*r).get (&name).map (|x| *x) +// todo! () +// } - pub fn define (&self, name : SymbolU32, value : scm_bits) { - let mut r = self.0.write ().unwrap (); - r.deref_mut ().insert (name, value); - } -} +// pub fn define (&self, name : SymbolU32, value : scm_bits) { +// // let mut r = self.0.write ().unwrap (); +// // r.deref_mut ().insert (name, value); +// todo! () +// } +// } -static vars : Vars = Vars::new (); +// static vars : Vars = Vars::new (); diff --git a/runtime/test.rs b/runtime/test.rs new file mode 100644 index 0000000..0343983 --- /dev/null +++ b/runtime/test.rs @@ -0,0 +1,5 @@ +use gyehoek::scm::*; + +pub fn main () { + +}