interned symbols
This commit is contained in:
@@ -276,17 +276,10 @@ lowerInt = QBE.ValConst . QBE.CInt
|
|||||||
. (Data.Bits..<<. 2)
|
. (Data.Bits..<<. 2)
|
||||||
. fromIntegral
|
. fromIntegral
|
||||||
|
|
||||||
lowerVal
|
lowerString
|
||||||
:: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es)
|
:: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es)
|
||||||
=> Val
|
=> Text -> (QBE.Val -> Eff es BlockBuilder) -> Eff es BlockBuilder
|
||||||
-> (QBE.Val -> Eff es BlockBuilder)
|
lowerString s k = do
|
||||||
-> Eff es BlockBuilder
|
|
||||||
|
|
||||||
lowerVal (ValLit (LitInt n)) k = k . lowerInt $ n
|
|
||||||
|
|
||||||
-- lowerVal (ValLit (LitQuote (SexpSymbol s))) k = _aaa
|
|
||||||
|
|
||||||
lowerVal (ValLit (LitString s)) k = do
|
|
||||||
rawString <- gensym
|
rawString <- gensym
|
||||||
r <- gensym
|
r <- gensym
|
||||||
let bs = T.encodeUtf8 s
|
let bs = T.encodeUtf8 s
|
||||||
@@ -309,6 +302,31 @@ lowerVal (ValLit (LitString s)) k = do
|
|||||||
[]
|
[]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
lowerVal
|
||||||
|
:: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es)
|
||||||
|
=> Val
|
||||||
|
-> (QBE.Val -> Eff es BlockBuilder)
|
||||||
|
-> Eff es BlockBuilder
|
||||||
|
|
||||||
|
lowerVal (ValLit (LitInt n)) k = k . lowerInt $ n
|
||||||
|
|
||||||
|
lowerVal (ValLit (LitQuote (Lam.SexpSymbol s))) k =
|
||||||
|
lowerString s \s' -> do
|
||||||
|
r <- gensym
|
||||||
|
Emit (intern r s') <$> k (QBE.ValTemporary r)
|
||||||
|
where
|
||||||
|
intern r s' =
|
||||||
|
[ QBE.Call
|
||||||
|
(Just (r, QBE.AbiBaseTy QBE.Long))
|
||||||
|
(QBE.ValGlobal "scm_string_to_symbol")
|
||||||
|
Nothing
|
||||||
|
[ QBE.Arg (QBE.AbiBaseTy QBE.Long) s'
|
||||||
|
]
|
||||||
|
[]
|
||||||
|
]
|
||||||
|
|
||||||
|
lowerVal (ValLit (LitString s)) k = lowerString s k
|
||||||
|
|
||||||
lowerVal (ValLit _) k = error "todo"
|
lowerVal (ValLit _) k = error "todo"
|
||||||
lowerVal (ValVar x) k = k . QBE.ValTemporary . lowerName $ x
|
lowerVal (ValVar x) k = k . QBE.ValTemporary . lowerName $ x
|
||||||
|
|
||||||
@@ -445,6 +463,14 @@ lowerCdr r x e k = do
|
|||||||
]
|
]
|
||||||
<$> lower' e k
|
<$> lower' e k
|
||||||
|
|
||||||
|
lowerNewline r k =
|
||||||
|
Emit [ QBE.Call (Just (lowerName r, QBE.AbiBaseTy QBE.Long))
|
||||||
|
(QBE.ValGlobal "scm_newline") Nothing
|
||||||
|
[]
|
||||||
|
[]
|
||||||
|
]
|
||||||
|
<$> k (QBE.ValTemporary (lowerName r))
|
||||||
|
|
||||||
lowerPrim
|
lowerPrim
|
||||||
:: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es)
|
:: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es)
|
||||||
=> Name -> Prim Val -> Exp
|
=> Name -> Prim Val -> Exp
|
||||||
@@ -462,6 +488,7 @@ lowerPrim r p e k =
|
|||||||
PrimCar x -> lowerCar r x e k
|
PrimCar x -> lowerCar r x e k
|
||||||
PrimCdr x -> lowerCdr r x e k
|
PrimCdr x -> lowerCdr r x e k
|
||||||
PrimWrite x -> lowerWrite r x e k
|
PrimWrite x -> lowerWrite r x e k
|
||||||
|
PrimNewline -> lowerNewline r k
|
||||||
|
|
||||||
lower'
|
lower'
|
||||||
:: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es)
|
:: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es)
|
||||||
|
|||||||
@@ -40,6 +40,7 @@ data Prim e
|
|||||||
| PrimConsP e
|
| PrimConsP e
|
||||||
| PrimIntegerP e
|
| PrimIntegerP e
|
||||||
| PrimWrite e
|
| PrimWrite e
|
||||||
|
| PrimNewline
|
||||||
deriving (Show, Generic, Functor, Foldable, Traversable)
|
deriving (Show, Generic, Functor, Foldable, Traversable)
|
||||||
|
|
||||||
instance Each (Prim e) (Prim e') e e'
|
instance Each (Prim e) (Prim e') e e'
|
||||||
@@ -90,9 +91,11 @@ instance SexpIso a => SexpIso (Prim a) where
|
|||||||
$ With (. unop "cons?")
|
$ With (. unop "cons?")
|
||||||
$ With (. unop "integer?")
|
$ With (. unop "integer?")
|
||||||
$ With (. unop "write")
|
$ With (. unop "write")
|
||||||
|
$ With (. nullop "newline")
|
||||||
$ End
|
$ End
|
||||||
where
|
where
|
||||||
primname = ("prim:" <>)
|
primname = ("prim:" <>)
|
||||||
|
nullop s = list $ el (sym (primname s))
|
||||||
unop s = list $ el (sym (primname s)) >>> el sexpIso
|
unop s = list $ el (sym (primname s)) >>> el sexpIso
|
||||||
binop s = list $ el (sym (primname s)) >>> el sexpIso >>> el sexpIso
|
binop s = list $ el (sym (primname s)) >>> el sexpIso >>> el sexpIso
|
||||||
|
|
||||||
|
|||||||
3
play/symbol.scm
Normal file
3
play/symbol.scm
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
(begin (prim:write 'abc)
|
||||||
|
(prim:newline)
|
||||||
|
(prim:write 'abc))
|
||||||
@@ -12,11 +12,13 @@ pub extern "C" fn scm_from_utf8_string (
|
|||||||
scm::make_string (str::from_utf8 (bytes).unwrap ())
|
scm::make_string (str::from_utf8 (bytes).unwrap ())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[unsafe(no_mangle)]
|
|
||||||
pub extern "C" fn scm_hash (ptr : *const u8, len : usize) -> u64 {
|
|
||||||
let bytes = unsafe { slice::from_raw_parts (ptr, len) };
|
|
||||||
crate::obarray::hash (str::from_utf8 (bytes).unwrap ())
|
|
||||||
}
|
|
||||||
|
|
||||||
// #[unsafe(no_mangle)]
|
// #[unsafe(no_mangle)]
|
||||||
// pub extern "C" fn scm_lookup (name : scm_)
|
// pub extern "C" fn scm_hash (ptr : *const u8, len : usize) -> u64 {
|
||||||
|
// let bytes = unsafe { slice::from_raw_parts (ptr, len) };
|
||||||
|
// crate::obarray::hash (str::from_utf8 (bytes).unwrap ())
|
||||||
|
// }
|
||||||
|
|
||||||
|
#[unsafe(no_mangle)]
|
||||||
|
pub extern "C" fn scm_string_to_symbol (str : scm_bits) -> scm_bits {
|
||||||
|
crate::scm::string_to_symbol (str)
|
||||||
|
}
|
||||||
|
|||||||
@@ -4,6 +4,6 @@
|
|||||||
mod gc;
|
mod gc;
|
||||||
mod scm;
|
mod scm;
|
||||||
mod primitives;
|
mod primitives;
|
||||||
mod obarray;
|
// mod obarray;
|
||||||
mod capi;
|
mod capi;
|
||||||
mod var;
|
mod var;
|
||||||
|
|||||||
@@ -1,23 +0,0 @@
|
|||||||
use std::{ops::DerefMut, sync::{LazyLock, Mutex, RwLock}};
|
|
||||||
use internment::Intern;
|
|
||||||
|
|
||||||
// mod fnv1a;
|
|
||||||
// use fnv1a::{FNV1a, SymbolHash};
|
|
||||||
// use string_interner::{StringInterner, symbol::SymbolU32};
|
|
||||||
|
|
||||||
pub struct Obarray (
|
|
||||||
LazyLock <RwLock < <string_interner::DefaultBackend>>>
|
|
||||||
);
|
|
||||||
|
|
||||||
impl Obarray {
|
|
||||||
pub const fn new () -> Obarray {
|
|
||||||
Obarray (LazyLock::new (|| RwLock::new (StringInterner::new ())))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn intern (&self, name : &str) -> SymbolU32 {
|
|
||||||
let mut r = symbols.0.write ().unwrap ();
|
|
||||||
r.deref_mut ().get_or_intern (name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub static symbols : Obarray = Obarray::new ();
|
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
use std::{collections::HashMap, hash::{BuildHasher, Hasher}};
|
|
||||||
|
|
||||||
pub struct FNV1a (u64);
|
|
||||||
pub struct SymbolHash ();
|
|
||||||
|
|
||||||
impl Default for FNV1a {
|
|
||||||
fn default () -> Self {
|
|
||||||
FNV1a (offset_basis)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Hasher for FNV1a {
|
|
||||||
fn finish (&self) -> u64 {
|
|
||||||
self.0
|
|
||||||
}
|
|
||||||
|
|
||||||
fn write (&mut self, bytes: &[u8]) {
|
|
||||||
for b in bytes {
|
|
||||||
self.0 ^= *b as u64;
|
|
||||||
self.0 *= prime;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl BuildHasher for SymbolHash {
|
|
||||||
type Hasher = FNV1a;
|
|
||||||
|
|
||||||
fn build_hasher (&self) -> Self::Hasher {
|
|
||||||
FNV1a (offset_basis)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub const offset_basis : u64 = 0xcbf29ce484222325;
|
|
||||||
pub const prime : u64 = 0x100000001b3;
|
|
||||||
@@ -17,7 +17,15 @@ pub extern "C" fn scm_write (x: scm_bits) -> scm_bits {
|
|||||||
SCM::Nil => print! ("()"),
|
SCM::Nil => print! ("()"),
|
||||||
SCM::False => print! ("#f"),
|
SCM::False => print! ("#f"),
|
||||||
SCM::True => print! ("#t"),
|
SCM::True => print! ("#t"),
|
||||||
|
SCM::Symbol (_s) => print! ("{x:#016x}"),
|
||||||
|
// SCM::Symbol (s) => print! ("{s}"),
|
||||||
};
|
};
|
||||||
let _ = stdout ().flush ();
|
let _ = stdout ().flush ();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[unsafe(no_mangle)]
|
||||||
|
pub extern "C" fn scm_newline () -> scm_bits {
|
||||||
|
print! ("\n");
|
||||||
|
0
|
||||||
|
}
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ use std::slice;
|
|||||||
|
|
||||||
use internment::Intern;
|
use internment::Intern;
|
||||||
|
|
||||||
use crate::{gc, obarray};
|
use crate::{gc};
|
||||||
|
|
||||||
pub type scm_bits = u64;
|
pub type scm_bits = u64;
|
||||||
|
|
||||||
@@ -23,7 +23,7 @@ pub enum SCM {
|
|||||||
SmallInt (i64),
|
SmallInt (i64),
|
||||||
Cons (scm_bits, scm_bits),
|
Cons (scm_bits, scm_bits),
|
||||||
String (String),
|
String (String),
|
||||||
Symbol (usize),
|
Symbol (String),
|
||||||
Nil,
|
Nil,
|
||||||
False,
|
False,
|
||||||
True,
|
True,
|
||||||
@@ -33,6 +33,21 @@ pub enum SCM {
|
|||||||
// pub fn pack (x : SCM) -> scm_bits {
|
// pub fn pack (x : SCM) -> scm_bits {
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn unpack_string (x : scm_bits) -> String {
|
||||||
|
let len = unsafe { cell_word (x, 1) };
|
||||||
|
let str_beginning = (x as *const scm_bits).wrapping_add (2) as *const u8;
|
||||||
|
let slice = unsafe {
|
||||||
|
str::from_utf8 (
|
||||||
|
slice::from_raw_parts (
|
||||||
|
str_beginning,
|
||||||
|
len.try_into ().unwrap ()
|
||||||
|
)
|
||||||
|
).unwrap ()
|
||||||
|
};
|
||||||
|
String::from (slice)
|
||||||
|
}
|
||||||
|
|
||||||
// super duper important for this to inline. we want to eliminate the
|
// super duper important for this to inline. we want to eliminate the
|
||||||
// SCM type at runtime as much as possible. the hope is for inlining
|
// SCM type at runtime as much as possible. the hope is for inlining
|
||||||
// to lead to a case-of-case–esque transformation.
|
// to lead to a case-of-case–esque transformation.
|
||||||
@@ -44,17 +59,10 @@ pub fn unpack (x : scm_bits) -> SCM {
|
|||||||
// `car` x and `cdr` x are safe iff `is_cons` x.
|
// `car` x and `cdr` x are safe iff `is_cons` x.
|
||||||
unsafe { SCM::Cons (car (x), cdr (x)) }
|
unsafe { SCM::Cons (car (x), cdr (x)) }
|
||||||
} else if is_string (x) {
|
} else if is_string (x) {
|
||||||
let len = unsafe { cell_word (x, 1) };
|
SCM::String (unpack_string (x))
|
||||||
let str_beginning = (x as *const scm_bits).wrapping_add (2) as *const u8;
|
} else if is_symbol (x) {
|
||||||
let slice = unsafe {
|
let s = unpack_string (unsafe { cell_word (x, 1) });
|
||||||
str::from_utf8 (
|
SCM::Symbol (s)
|
||||||
slice::from_raw_parts (
|
|
||||||
str_beginning,
|
|
||||||
len.try_into ().unwrap ()
|
|
||||||
)
|
|
||||||
).unwrap ()
|
|
||||||
};
|
|
||||||
return SCM::String (String::from (slice));
|
|
||||||
} else {
|
} else {
|
||||||
// concat_panic! ("don't know how to unpack: ", x)
|
// concat_panic! ("don't know how to unpack: ", x)
|
||||||
panic! ("don't know how to unpack {x:#016x}")
|
panic! ("don't know how to unpack {x:#016x}")
|
||||||
@@ -81,6 +89,10 @@ fn is_cons (x: scm_bits) -> bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_symbol (x : scm_bits) -> bool {
|
||||||
|
has_tc7 (x, tc7_symbol)
|
||||||
|
}
|
||||||
|
|
||||||
fn has_tc7 (x: scm_bits, tc7: u64) -> bool {
|
fn has_tc7 (x: scm_bits, tc7: u64) -> bool {
|
||||||
unsafe {
|
unsafe {
|
||||||
! is_immediate (x) && (0x7f & cell_type (x)) == tc7
|
! is_immediate (x) && (0x7f & cell_type (x)) == tc7
|
||||||
@@ -149,18 +161,43 @@ pub fn make_string (s : &str) -> scm_bits {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
pub fn make_symbol (name : &str) -> scm_bits {
|
// pub fn make_symbol (name : &str) -> scm_bits {
|
||||||
let r = unsafe { words (tc7_symbol, 2) };
|
// let r = unsafe { words (tc7_symbol, 2) };
|
||||||
let sym = obarray::symbols.intern (name).to_usize ();
|
// let sym = obarray::symbols.intern (name).to_usize ();
|
||||||
unsafe { set_word (r, 1, sym.try_into ().unwrap ()) };
|
// unsafe { set_word (r, 1, sym.try_into ().unwrap ()) };
|
||||||
pack_ptr (r)
|
// pack_ptr (r)
|
||||||
|
// }
|
||||||
|
|
||||||
|
struct Symbol ([scm_bits; 2]);
|
||||||
|
|
||||||
|
impl PartialEq for Symbol {
|
||||||
|
fn eq (&self, other: &Self) -> bool {
|
||||||
|
if let (SCM::String (s1), SCM::String (s2))
|
||||||
|
= (unpack (self.0[1]), unpack (other.0[1])) {
|
||||||
|
s1 == s2
|
||||||
|
} else {
|
||||||
|
panic! ("not a symbol")
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn make_symbol_off_heap (name : scm_bits) -> [scm_bits; 2] {
|
impl Eq for Symbol {}
|
||||||
[ tc7_symbol, name ]
|
|
||||||
|
impl std::hash::Hash for Symbol {
|
||||||
|
fn hash <H: std::hash::Hasher> (&self, state: &mut H) {
|
||||||
|
if let SCM::String (s) = unpack (self.0[1]) {
|
||||||
|
s.hash (state)
|
||||||
|
} else {
|
||||||
|
panic! ("not a symbol")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn make_symbol_off_heap (name : scm_bits) -> Symbol {
|
||||||
|
Symbol ([ tc7_symbol, name ])
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn string_to_symbol (str : scm_bits) -> scm_bits {
|
pub fn string_to_symbol (str : scm_bits) -> scm_bits {
|
||||||
let r = Intern::new (make_symbol_off_heap (str));
|
let r = Intern::new (make_symbol_off_heap (str));
|
||||||
pack_ptr (r.as_ref () as *const scm_bits)
|
pack_ptr (r.0.as_ptr ())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,25 +1,26 @@
|
|||||||
use std::{collections::HashMap, ops::DerefMut as _, sync::{LazyLock, RwLock}};
|
use std::{collections::HashMap, ops::DerefMut as _, sync::{LazyLock, RwLock}};
|
||||||
use string_interner::symbol::SymbolU32;
|
|
||||||
use crate::scm::scm_bits;
|
use crate::scm::scm_bits;
|
||||||
|
|
||||||
struct Vars (
|
// struct Vars (
|
||||||
LazyLock <RwLock <HashMap <SymbolU32, scm_bits>>>
|
// LazyLock <RwLock <HashMap <SymbolU32, scm_bits>>>
|
||||||
);
|
// );
|
||||||
|
|
||||||
impl Vars {
|
// impl Vars {
|
||||||
pub const fn new () -> Vars {
|
// pub const fn new () -> Vars {
|
||||||
Vars (LazyLock::new (|| RwLock::new (HashMap::new ())))
|
// Vars (LazyLock::new (|| RwLock::new (HashMap::new ())))
|
||||||
}
|
// }
|
||||||
|
|
||||||
pub fn lookup (&self, name : SymbolU32) -> Option <scm_bits> {
|
// pub fn lookup (&self, name : SymbolU32) -> Option <scm_bits> {
|
||||||
let r = self.0.write ().unwrap ();
|
// // let r = self.0.write ().unwrap ();
|
||||||
(*r).get (&name).map (|x| *x)
|
// // (*r).get (&name).map (|x| *x)
|
||||||
}
|
// todo! ()
|
||||||
|
// }
|
||||||
|
|
||||||
pub fn define (&self, name : SymbolU32, value : scm_bits) {
|
// pub fn define (&self, name : SymbolU32, value : scm_bits) {
|
||||||
let mut r = self.0.write ().unwrap ();
|
// // let mut r = self.0.write ().unwrap ();
|
||||||
r.deref_mut ().insert (name, value);
|
// // r.deref_mut ().insert (name, value);
|
||||||
}
|
// todo! ()
|
||||||
}
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
static vars : Vars = Vars::new ();
|
// static vars : Vars = Vars::new ();
|
||||||
|
|||||||
5
runtime/test.rs
Normal file
5
runtime/test.rs
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
use gyehoek::scm::*;
|
||||||
|
|
||||||
|
pub fn main () {
|
||||||
|
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user