interned symbols

This commit is contained in:
2026-05-26 02:23:01 -06:00
parent aca410fbc2
commit 13827f880e
11 changed files with 143 additions and 114 deletions

View File

@@ -276,17 +276,10 @@ lowerInt = QBE.ValConst . QBE.CInt
. (Data.Bits..<<. 2) . (Data.Bits..<<. 2)
. fromIntegral . fromIntegral
lowerVal lowerString
:: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es) :: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es)
=> Val => Text -> (QBE.Val -> Eff es BlockBuilder) -> Eff es BlockBuilder
-> (QBE.Val -> Eff es BlockBuilder) lowerString s k = do
-> Eff es BlockBuilder
lowerVal (ValLit (LitInt n)) k = k . lowerInt $ n
-- lowerVal (ValLit (LitQuote (SexpSymbol s))) k = _aaa
lowerVal (ValLit (LitString s)) k = do
rawString <- gensym rawString <- gensym
r <- gensym r <- gensym
let bs = T.encodeUtf8 s let bs = T.encodeUtf8 s
@@ -309,6 +302,31 @@ lowerVal (ValLit (LitString s)) k = do
[] []
] ]
lowerVal
:: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es)
=> Val
-> (QBE.Val -> Eff es BlockBuilder)
-> Eff es BlockBuilder
lowerVal (ValLit (LitInt n)) k = k . lowerInt $ n
lowerVal (ValLit (LitQuote (Lam.SexpSymbol s))) k =
lowerString s \s' -> do
r <- gensym
Emit (intern r s') <$> k (QBE.ValTemporary r)
where
intern r s' =
[ QBE.Call
(Just (r, QBE.AbiBaseTy QBE.Long))
(QBE.ValGlobal "scm_string_to_symbol")
Nothing
[ QBE.Arg (QBE.AbiBaseTy QBE.Long) s'
]
[]
]
lowerVal (ValLit (LitString s)) k = lowerString s k
lowerVal (ValLit _) k = error "todo" lowerVal (ValLit _) k = error "todo"
lowerVal (ValVar x) k = k . QBE.ValTemporary . lowerName $ x lowerVal (ValVar x) k = k . QBE.ValTemporary . lowerName $ x
@@ -445,6 +463,14 @@ lowerCdr r x e k = do
] ]
<$> lower' e k <$> lower' e k
lowerNewline r k =
Emit [ QBE.Call (Just (lowerName r, QBE.AbiBaseTy QBE.Long))
(QBE.ValGlobal "scm_newline") Nothing
[]
[]
]
<$> k (QBE.ValTemporary (lowerName r))
lowerPrim lowerPrim
:: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es) :: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es)
=> Name -> Prim Val -> Exp => Name -> Prim Val -> Exp
@@ -462,6 +488,7 @@ lowerPrim r p e k =
PrimCar x -> lowerCar r x e k PrimCar x -> lowerCar r x e k
PrimCdr x -> lowerCdr r x e k PrimCdr x -> lowerCdr r x e k
PrimWrite x -> lowerWrite r x e k PrimWrite x -> lowerWrite r x e k
PrimNewline -> lowerNewline r k
lower' lower'
:: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es) :: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es)

View File

@@ -40,6 +40,7 @@ data Prim e
| PrimConsP e | PrimConsP e
| PrimIntegerP e | PrimIntegerP e
| PrimWrite e | PrimWrite e
| PrimNewline
deriving (Show, Generic, Functor, Foldable, Traversable) deriving (Show, Generic, Functor, Foldable, Traversable)
instance Each (Prim e) (Prim e') e e' instance Each (Prim e) (Prim e') e e'
@@ -90,9 +91,11 @@ instance SexpIso a => SexpIso (Prim a) where
$ With (. unop "cons?") $ With (. unop "cons?")
$ With (. unop "integer?") $ With (. unop "integer?")
$ With (. unop "write") $ With (. unop "write")
$ With (. nullop "newline")
$ End $ End
where where
primname = ("prim:" <>) primname = ("prim:" <>)
nullop s = list $ el (sym (primname s))
unop s = list $ el (sym (primname s)) >>> el sexpIso unop s = list $ el (sym (primname s)) >>> el sexpIso
binop s = list $ el (sym (primname s)) >>> el sexpIso >>> el sexpIso binop s = list $ el (sym (primname s)) >>> el sexpIso >>> el sexpIso

3
play/symbol.scm Normal file
View File

@@ -0,0 +1,3 @@
(begin (prim:write 'abc)
(prim:newline)
(prim:write 'abc))

View File

@@ -12,11 +12,13 @@ pub extern "C" fn scm_from_utf8_string (
scm::make_string (str::from_utf8 (bytes).unwrap ()) scm::make_string (str::from_utf8 (bytes).unwrap ())
} }
#[unsafe(no_mangle)]
pub extern "C" fn scm_hash (ptr : *const u8, len : usize) -> u64 {
let bytes = unsafe { slice::from_raw_parts (ptr, len) };
crate::obarray::hash (str::from_utf8 (bytes).unwrap ())
}
// #[unsafe(no_mangle)] // #[unsafe(no_mangle)]
// pub extern "C" fn scm_lookup (name : scm_) // pub extern "C" fn scm_hash (ptr : *const u8, len : usize) -> u64 {
// let bytes = unsafe { slice::from_raw_parts (ptr, len) };
// crate::obarray::hash (str::from_utf8 (bytes).unwrap ())
// }
#[unsafe(no_mangle)]
pub extern "C" fn scm_string_to_symbol (str : scm_bits) -> scm_bits {
crate::scm::string_to_symbol (str)
}

View File

@@ -4,6 +4,6 @@
mod gc; mod gc;
mod scm; mod scm;
mod primitives; mod primitives;
mod obarray; // mod obarray;
mod capi; mod capi;
mod var; mod var;

View File

@@ -1,23 +0,0 @@
use std::{ops::DerefMut, sync::{LazyLock, Mutex, RwLock}};
use internment::Intern;
// mod fnv1a;
// use fnv1a::{FNV1a, SymbolHash};
// use string_interner::{StringInterner, symbol::SymbolU32};
pub struct Obarray (
LazyLock <RwLock < <string_interner::DefaultBackend>>>
);
impl Obarray {
pub const fn new () -> Obarray {
Obarray (LazyLock::new (|| RwLock::new (StringInterner::new ())))
}
pub fn intern (&self, name : &str) -> SymbolU32 {
let mut r = symbols.0.write ().unwrap ();
r.deref_mut ().get_or_intern (name)
}
}
pub static symbols : Obarray = Obarray::new ();

View File

@@ -1,34 +0,0 @@
use std::{collections::HashMap, hash::{BuildHasher, Hasher}};
pub struct FNV1a (u64);
pub struct SymbolHash ();
impl Default for FNV1a {
fn default () -> Self {
FNV1a (offset_basis)
}
}
impl Hasher for FNV1a {
fn finish (&self) -> u64 {
self.0
}
fn write (&mut self, bytes: &[u8]) {
for b in bytes {
self.0 ^= *b as u64;
self.0 *= prime;
}
}
}
impl BuildHasher for SymbolHash {
type Hasher = FNV1a;
fn build_hasher (&self) -> Self::Hasher {
FNV1a (offset_basis)
}
}
pub const offset_basis : u64 = 0xcbf29ce484222325;
pub const prime : u64 = 0x100000001b3;

View File

@@ -17,7 +17,15 @@ pub extern "C" fn scm_write (x: scm_bits) -> scm_bits {
SCM::Nil => print! ("()"), SCM::Nil => print! ("()"),
SCM::False => print! ("#f"), SCM::False => print! ("#f"),
SCM::True => print! ("#t"), SCM::True => print! ("#t"),
SCM::Symbol (_s) => print! ("{x:#016x}"),
// SCM::Symbol (s) => print! ("{s}"),
}; };
let _ = stdout ().flush (); let _ = stdout ().flush ();
return 0; return 0;
} }
#[unsafe(no_mangle)]
pub extern "C" fn scm_newline () -> scm_bits {
print! ("\n");
0
}

View File

@@ -5,7 +5,7 @@ use std::slice;
use internment::Intern; use internment::Intern;
use crate::{gc, obarray}; use crate::{gc};
pub type scm_bits = u64; pub type scm_bits = u64;
@@ -23,7 +23,7 @@ pub enum SCM {
SmallInt (i64), SmallInt (i64),
Cons (scm_bits, scm_bits), Cons (scm_bits, scm_bits),
String (String), String (String),
Symbol (usize), Symbol (String),
Nil, Nil,
False, False,
True, True,
@@ -33,6 +33,21 @@ pub enum SCM {
// pub fn pack (x : SCM) -> scm_bits { // pub fn pack (x : SCM) -> scm_bits {
// } // }
#[inline(always)]
pub fn unpack_string (x : scm_bits) -> String {
let len = unsafe { cell_word (x, 1) };
let str_beginning = (x as *const scm_bits).wrapping_add (2) as *const u8;
let slice = unsafe {
str::from_utf8 (
slice::from_raw_parts (
str_beginning,
len.try_into ().unwrap ()
)
).unwrap ()
};
String::from (slice)
}
// super duper important for this to inline. we want to eliminate the // super duper important for this to inline. we want to eliminate the
// SCM type at runtime as much as possible. the hope is for inlining // SCM type at runtime as much as possible. the hope is for inlining
// to lead to a case-of-caseesque transformation. // to lead to a case-of-caseesque transformation.
@@ -44,17 +59,10 @@ pub fn unpack (x : scm_bits) -> SCM {
// `car` x and `cdr` x are safe iff `is_cons` x. // `car` x and `cdr` x are safe iff `is_cons` x.
unsafe { SCM::Cons (car (x), cdr (x)) } unsafe { SCM::Cons (car (x), cdr (x)) }
} else if is_string (x) { } else if is_string (x) {
let len = unsafe { cell_word (x, 1) }; SCM::String (unpack_string (x))
let str_beginning = (x as *const scm_bits).wrapping_add (2) as *const u8; } else if is_symbol (x) {
let slice = unsafe { let s = unpack_string (unsafe { cell_word (x, 1) });
str::from_utf8 ( SCM::Symbol (s)
slice::from_raw_parts (
str_beginning,
len.try_into ().unwrap ()
)
).unwrap ()
};
return SCM::String (String::from (slice));
} else { } else {
// concat_panic! ("don't know how to unpack: ", x) // concat_panic! ("don't know how to unpack: ", x)
panic! ("don't know how to unpack {x:#016x}") panic! ("don't know how to unpack {x:#016x}")
@@ -81,6 +89,10 @@ fn is_cons (x: scm_bits) -> bool {
} }
} }
fn is_symbol (x : scm_bits) -> bool {
has_tc7 (x, tc7_symbol)
}
fn has_tc7 (x: scm_bits, tc7: u64) -> bool { fn has_tc7 (x: scm_bits, tc7: u64) -> bool {
unsafe { unsafe {
! is_immediate (x) && (0x7f & cell_type (x)) == tc7 ! is_immediate (x) && (0x7f & cell_type (x)) == tc7
@@ -149,18 +161,43 @@ pub fn make_string (s : &str) -> scm_bits {
pub fn make_symbol (name : &str) -> scm_bits { // pub fn make_symbol (name : &str) -> scm_bits {
let r = unsafe { words (tc7_symbol, 2) }; // let r = unsafe { words (tc7_symbol, 2) };
let sym = obarray::symbols.intern (name).to_usize (); // let sym = obarray::symbols.intern (name).to_usize ();
unsafe { set_word (r, 1, sym.try_into ().unwrap ()) }; // unsafe { set_word (r, 1, sym.try_into ().unwrap ()) };
pack_ptr (r) // pack_ptr (r)
// }
struct Symbol ([scm_bits; 2]);
impl PartialEq for Symbol {
fn eq (&self, other: &Self) -> bool {
if let (SCM::String (s1), SCM::String (s2))
= (unpack (self.0[1]), unpack (other.0[1])) {
s1 == s2
} else {
panic! ("not a symbol")
}
}
} }
pub fn make_symbol_off_heap (name : scm_bits) -> [scm_bits; 2] { impl Eq for Symbol {}
[ tc7_symbol, name ]
impl std::hash::Hash for Symbol {
fn hash <H: std::hash::Hasher> (&self, state: &mut H) {
if let SCM::String (s) = unpack (self.0[1]) {
s.hash (state)
} else {
panic! ("not a symbol")
}
}
}
pub fn make_symbol_off_heap (name : scm_bits) -> Symbol {
Symbol ([ tc7_symbol, name ])
} }
pub fn string_to_symbol (str : scm_bits) -> scm_bits { pub fn string_to_symbol (str : scm_bits) -> scm_bits {
let r = Intern::new (make_symbol_off_heap (str)); let r = Intern::new (make_symbol_off_heap (str));
pack_ptr (r.as_ref () as *const scm_bits) pack_ptr (r.0.as_ptr ())
} }

View File

@@ -1,25 +1,26 @@
use std::{collections::HashMap, ops::DerefMut as _, sync::{LazyLock, RwLock}}; use std::{collections::HashMap, ops::DerefMut as _, sync::{LazyLock, RwLock}};
use string_interner::symbol::SymbolU32;
use crate::scm::scm_bits; use crate::scm::scm_bits;
struct Vars ( // struct Vars (
LazyLock <RwLock <HashMap <SymbolU32, scm_bits>>> // LazyLock <RwLock <HashMap <SymbolU32, scm_bits>>>
); // );
impl Vars { // impl Vars {
pub const fn new () -> Vars { // pub const fn new () -> Vars {
Vars (LazyLock::new (|| RwLock::new (HashMap::new ()))) // Vars (LazyLock::new (|| RwLock::new (HashMap::new ())))
} // }
pub fn lookup (&self, name : SymbolU32) -> Option <scm_bits> { // pub fn lookup (&self, name : SymbolU32) -> Option <scm_bits> {
let r = self.0.write ().unwrap (); // // let r = self.0.write ().unwrap ();
(*r).get (&name).map (|x| *x) // // (*r).get (&name).map (|x| *x)
} // todo! ()
// }
pub fn define (&self, name : SymbolU32, value : scm_bits) { // pub fn define (&self, name : SymbolU32, value : scm_bits) {
let mut r = self.0.write ().unwrap (); // // let mut r = self.0.write ().unwrap ();
r.deref_mut ().insert (name, value); // // r.deref_mut ().insert (name, value);
} // todo! ()
} // }
// }
static vars : Vars = Vars::new (); // static vars : Vars = Vars::new ();

5
runtime/test.rs Normal file
View File

@@ -0,0 +1,5 @@
use gyehoek::scm::*;
pub fn main () {
}