Compare commits

..

6 Commits

Author SHA1 Message Date
37b97f9eb3 fuuuuck! 2026-05-26 18:10:09 -06:00
8345763bee reuse string lits 2026-05-26 07:06:49 -06:00
13827f880e interned symbols 2026-05-26 02:23:08 -06:00
aca410fbc2 2026-05-25 23:13:33 -06:00
198a85afe4 2026-05-25 22:18:41 -06:00
1558c38185 2026-05-24 12:53:29 -06:00
12 changed files with 229 additions and 80 deletions

View File

@@ -50,6 +50,9 @@ import Control.Lens.Unsound
import qualified Data.Bits
import qualified GHC.IO.Encoding as T
import qualified Data.Text.Encoding as T
import Data.HashMap.Strict (HashMap)
import Effectful.State.Static.Local
import qualified Data.HashMap.Strict as HM
data Val
@@ -276,26 +279,24 @@ lowerInt = QBE.ValConst . QBE.CInt
. (Data.Bits..<<. 2)
. fromIntegral
lowerVal
:: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es)
=> Val
-> (QBE.Val -> Eff es BlockBuilder)
-> Eff es BlockBuilder
lowerVal (ValLit (LitInt n)) k = k . lowerInt $ n
-- lowerVal (ValLit (LitQuote (SexpSymbol s))) k = _aaa
lowerVal (ValLit (LitString s)) k = do
rawString <- gensym
lowerString
:: forall es. (GenSym :> es, State StringLiterals :> es)
=> Text -> (QBE.Val -> Eff es BlockBuilder) -> Eff es BlockBuilder
lowerString s k = do
let len = lengthOf each $ T.encodeUtf8 s
rawString <- getRawString
r <- gensym
let bs = T.encodeUtf8 s
len = lengthOf each bs
tell . pure $
QBE.DataDef [] rawString Nothing
[QBE.FieldExtTy QBE.Byte [QBE.String bs]]
Emit (alloc r rawString len) <$> k (QBE.ValTemporary r)
where
-- getRawString
-- :: forall es. (GenSym :> es, State StringLiterals :> es)
-- => Eff es _
getRawString = do
x <- get
case x ^. at s of
Just s' -> pure s'
Nothing -> do r <- gensym
state \lits -> (r, HM.insert s r lits)
alloc r rs len =
[ QBE.Call
(Just (r, QBE.AbiBaseTy QBE.Long))
@@ -309,6 +310,33 @@ lowerVal (ValLit (LitString s)) k = do
[]
]
type StringLiterals = HashMap Text (QBE.Ident QBE.Global)
lowerVal
:: forall es. (GenSym :> es, State StringLiterals :> es)
=> Val
-> (QBE.Val -> Eff es BlockBuilder)
-> Eff es BlockBuilder
lowerVal (ValLit (LitInt n)) k = k . lowerInt $ n
lowerVal (ValLit (LitQuote (Lam.SexpSymbol s))) k =
lowerString s \s' -> do
r <- gensym
Emit (intern r s') <$> k (QBE.ValTemporary r)
where
intern r s' =
[ QBE.Call
(Just (r, QBE.AbiBaseTy QBE.Long))
(QBE.ValGlobal "scm_string_to_symbol")
Nothing
[ QBE.Arg (QBE.AbiBaseTy QBE.Long) s'
]
[]
]
lowerVal (ValLit (LitString s)) k = lowerString s k
lowerVal (ValLit _) k = error "todo"
lowerVal (ValVar x) k = k . QBE.ValTemporary . lowerName $ x
@@ -335,7 +363,7 @@ sizeofScm :: Integral a => a
sizeofScm = 8
lowerCons
:: (GenSym :> es, Writer (Vector QBE.DataDef) :> es)
:: (GenSym :> es, State StringLiterals :> es)
=> Name -> QBE.Val -> QBE.Val -> Exp
-> (QBE.Val -> Eff es BlockBuilder)
-> Eff es BlockBuilder
@@ -424,7 +452,7 @@ smallIntMask :: Integer
smallIntMask = 2 ^ (sizeofScm * 8) - 2
lowerCar
:: (GenSym :> es, Writer (Vector QBE.DataDef) :> es)
:: (GenSym :> es, State StringLiterals :> es)
=> Name -> QBE.Val -> _
-> (QBE.Val -> Eff es BlockBuilder) -> Eff es BlockBuilder
lowerCar r x e k = do
@@ -433,7 +461,7 @@ lowerCar r x e k = do
<$> lower' e k
lowerCdr
:: (GenSym :> es, Writer (Vector QBE.DataDef) :> es)
:: (GenSym :> es, State StringLiterals :> es)
=> Name -> QBE.Val -> Exp
-> (QBE.Val -> Eff es BlockBuilder) -> Eff es BlockBuilder
lowerCdr r x e k = do
@@ -445,8 +473,16 @@ lowerCdr r x e k = do
]
<$> lower' e k
lowerNewline r k =
Emit [ QBE.Call (Just (lowerName r, QBE.AbiBaseTy QBE.Long))
(QBE.ValGlobal "scm_newline") Nothing
[]
[]
]
<$> k (QBE.ValTemporary (lowerName r))
lowerPrim
:: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es)
:: forall es. (GenSym :> es, State StringLiterals :> es)
=> Name -> Prim Val -> Exp
-> (QBE.Val -> Eff es BlockBuilder)
-> Eff es BlockBuilder
@@ -462,9 +498,10 @@ lowerPrim r p e k =
PrimCar x -> lowerCar r x e k
PrimCdr x -> lowerCdr r x e k
PrimWrite x -> lowerWrite r x e k
PrimNewline -> lowerNewline r k
lower'
:: forall es. (GenSym :> es, Writer (Vector QBE.DataDef) :> es)
:: forall es. (GenSym :> es, State StringLiterals :> es)
=> Exp
-> (QBE.Val -> Eff es BlockBuilder)
-> Eff es BlockBuilder
@@ -490,12 +527,17 @@ lower' (ExpBegin (x:xs)) k = fold1 <$> traverse low (x:|xs)
lower' _ k = _
lower
:: (GenSym :> es, Writer (Vector QBE.DataDef) :> es)
:: (GenSym :> es, State StringLiterals :> es)
=> QBE.Ident QBE.Label
-> Exp
-> Eff es QBE.Block
lower n e = buildBlock n <$> lower' e (pure . Exit . QBE.Ret . Just)
lowerStringLiterals =
ifoldMapOf itraversed \s v ->
[ QBE.DataDef [] v Nothing
[QBE.FieldExtTy QBE.Byte [QBE.String (T.encodeUtf8 s)]]]
lowerProgram
:: (GenSym :> es, Traversable t)
=> t Exp -> Eff es QBE.Program
@@ -504,17 +546,18 @@ lowerProgram anfs =
-- hack for dev convenience: if there's only one expression, let
-- it be the entry point.
[e] -> do
(b,dataDefs) <- runWriter . lower "start" $ e
(b,stringLits) <- runState mempty . lower "start" $ e
let f = wrapFunction @NonEmpty "main" [b]
pure $ QBE.Program [] (dataDefs ^.. each) [f]
dataDefs = lowerStringLiterals stringLits
pure $ QBE.Program [] dataDefs [f]
_ -> do
let low e = do
bl <- gensym' "b"
fl <- gensym' "f"
b <- lower bl e
pure $ wrapFunction @NonEmpty fl [b]
(fs,dataDefs) <- runWriter $ traverse low anfs
pure $ QBE.Program [] (dataDefs ^.. each) (fs ^.. traversed)
(fs,stringLits) <- runState mempty $ traverse low anfs
pure $ QBE.Program [] (lowerStringLiterals stringLits) (fs ^.. traversed)
wrapFunction
:: Foldable1 t

View File

@@ -40,6 +40,7 @@ data Prim e
| PrimConsP e
| PrimIntegerP e
| PrimWrite e
| PrimNewline
deriving (Show, Generic, Functor, Foldable, Traversable)
instance Each (Prim e) (Prim e') e e'
@@ -90,9 +91,11 @@ instance SexpIso a => SexpIso (Prim a) where
$ With (. unop "cons?")
$ With (. unop "integer?")
$ With (. unop "write")
$ With (. nullop "newline")
$ End
where
primname = ("prim:" <>)
nullop s = list $ el (sym (primname s))
unop s = list $ el (sym (primname s)) >>> el sexpIso
binop s = list $ el (sym (primname s)) >>> el sexpIso >>> el sexpIso

3
play/symbol.scm Normal file
View File

@@ -0,0 +1,3 @@
(begin (prim:write 'abc)
(prim:newline)
(prim:write 'abc))

39
runtime/Cargo.lock generated
View File

@@ -2,6 +2,12 @@
# It is not intended for manual editing.
version = 4
[[package]]
name = "allocator-api2"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "bdwgc-alloc"
version = "0.6.13"
@@ -39,21 +45,54 @@ dependencies = [
"typewit",
]
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "find-msvc-tools"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
[[package]]
name = "foldhash"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "gyehoek"
version = "0.1.0"
dependencies = [
"bdwgc-alloc",
"const_panic",
"internment",
"libc",
]
[[package]]
name = "hashbrown"
version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [
"allocator-api2",
"equivalent",
"foldhash",
]
[[package]]
name = "internment"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "636d4b0f6a39fd684effe2a73f5310df16a3fa7954c26d36833e98f44d1977a2"
dependencies = [
"hashbrown",
]
[[package]]
name = "libc"
version = "0.2.186"

View File

@@ -13,6 +13,7 @@ bdwgc-alloc = { version = "0.6.13"
, default-features = false
, features = ["cmake"] }
const_panic = "0.2.15"
internment = "0.8.6"
libc = "0.2.186"
[patch.crates-io]

View File

@@ -11,3 +11,14 @@ pub extern "C" fn scm_from_utf8_string (
let bytes = unsafe { slice::from_raw_parts (ptr, len) };
scm::make_string (str::from_utf8 (bytes).unwrap ())
}
// #[unsafe(no_mangle)]
// pub extern "C" fn scm_hash (ptr : *const u8, len : usize) -> u64 {
// let bytes = unsafe { slice::from_raw_parts (ptr, len) };
// crate::obarray::hash (str::from_utf8 (bytes).unwrap ())
// }
#[unsafe(no_mangle)]
pub extern "C" fn scm_string_to_symbol (str : scm_bits) -> scm_bits {
crate::scm::string_to_symbol (str)
}

View File

@@ -4,5 +4,6 @@
mod gc;
mod scm;
mod primitives;
mod obarray;
// mod obarray;
mod capi;
mod var;

View File

@@ -1,10 +0,0 @@
use std::{collections::HashMap, hash::{BuildHasher, Hasher}};
use crate::scm::scm_bits;
use crate::scm;
mod fnv1a;
pub struct Obarray (HashMap <String, scm_bits>);
pub fn hash () {
}

View File

@@ -1,28 +0,0 @@
use std::{collections::HashMap, hash::{BuildHasher, Hasher}};
pub struct FNV1a (u64);
pub struct SymbolHash;
impl Hasher for FNV1a {
fn finish (&self) -> u64 {
self.0
}
fn write (&mut self, bytes: &[u8]) {
for b in bytes {
self.0 ^= *b as u64;
self.0 *= prime;
}
}
}
impl BuildHasher for SymbolHash {
type Hasher = FNV1a;
fn build_hasher (&self) -> Self::Hasher {
FNV1a (offset_basis)
}
}
const offset_basis : u64 = 0xcbf29ce484222325;
const prime : u64 = 0x100000001b3;

View File

@@ -17,7 +17,15 @@ pub extern "C" fn scm_write (x: scm_bits) -> scm_bits {
SCM::Nil => print! ("()"),
SCM::False => print! ("#f"),
SCM::True => print! ("#t"),
SCM::Symbol (_s) => print! ("{x:#016x}"),
// SCM::Symbol (s) => print! ("{s}"),
};
let _ = stdout ().flush ();
return 0;
}
#[unsafe(no_mangle)]
pub extern "C" fn scm_newline () -> scm_bits {
print! ("\n");
0
}

View File

@@ -3,13 +3,15 @@
use std::slice;
use internment::Intern;
use crate::gc;
pub type scm_bits = u64;
pub const tc2_int : u64 = 2;
pub const tc3_cons : u64 = 0;
pub const tc7_weak_set : u64 = 0x55;
pub const tc7_obarray : u64 = 0x55;
pub const tc7_symbol : u64 = 0x05;
pub const tc7_string : u64 = 0x15;
@@ -21,6 +23,7 @@ pub enum SCM {
SmallInt (i64),
Cons (scm_bits, scm_bits),
String (String),
Symbol (String),
Nil,
False,
True,
@@ -30,6 +33,21 @@ pub enum SCM {
// pub fn pack (x : SCM) -> scm_bits {
// }
#[inline(always)]
pub fn unpack_string (x : scm_bits) -> String {
let len = unsafe { cell_word (x, 1) };
let str_beginning = (x as *const scm_bits).wrapping_add (2) as *const u8;
let slice = unsafe {
str::from_utf8 (
slice::from_raw_parts (
str_beginning,
len.try_into ().unwrap ()
)
).unwrap ()
};
String::from (slice)
}
// super duper important for this to inline. we want to eliminate the
// SCM type at runtime as much as possible. the hope is for inlining
// to lead to a case-of-caseesque transformation.
@@ -41,17 +59,10 @@ pub fn unpack (x : scm_bits) -> SCM {
// `car` x and `cdr` x are safe iff `is_cons` x.
unsafe { SCM::Cons (car (x), cdr (x)) }
} else if is_string (x) {
let len = unsafe { cell_word (x, 1) };
let str_beginning = (x as *const scm_bits).wrapping_add (2) as *const u8;
let slice = unsafe {
str::from_utf8 (
slice::from_raw_parts (
str_beginning,
len.try_into ().unwrap ()
)
).unwrap ()
};
return SCM::String (String::from (slice));
SCM::String (unpack_string (x))
} else if is_symbol (x) {
let s = unpack_string (unsafe { cell_word (x, 1) });
SCM::Symbol (s)
} else {
// concat_panic! ("don't know how to unpack: ", x)
panic! ("don't know how to unpack {x:#016x}")
@@ -78,6 +89,10 @@ fn is_cons (x: scm_bits) -> bool {
}
}
fn is_symbol (x : scm_bits) -> bool {
has_tc7 (x, tc7_symbol)
}
fn has_tc7 (x: scm_bits, tc7: u64) -> bool {
unsafe {
! is_immediate (x) && (0x7f & cell_type (x)) == tc7
@@ -146,6 +161,43 @@ pub fn make_string (s : &str) -> scm_bits {
pub fn make_symbol (name : &str) -> scm_bits {
todo! ()
// pub fn make_symbol (name : &str) -> scm_bits {
// let r = unsafe { words (tc7_symbol, 2) };
// let sym = obarray::symbols.intern (name).to_usize ();
// unsafe { set_word (r, 1, sym.try_into ().unwrap ()) };
// pack_ptr (r)
// }
struct Symbol ([scm_bits; 2]);
impl PartialEq for Symbol {
fn eq (&self, other: &Self) -> bool {
if let (SCM::String (s1), SCM::String (s2))
= (unpack (self.0[1]), unpack (other.0[1])) {
s1 == s2
} else {
panic! ("not a symbol")
}
}
}
impl Eq for Symbol {}
impl std::hash::Hash for Symbol {
fn hash <H: std::hash::Hasher> (&self, state: &mut H) {
if let SCM::String (s) = unpack (self.0[1]) {
s.hash (state)
} else {
panic! ("not a symbol")
}
}
}
fn make_symbol_off_heap (name : scm_bits) -> Symbol {
Symbol ([ tc7_symbol, name ])
}
pub fn string_to_symbol (str : scm_bits) -> scm_bits {
let r = Intern::new (make_symbol_off_heap (str));
pack_ptr (r.0.as_ptr ())
}

26
runtime/src/var.rs Normal file
View File

@@ -0,0 +1,26 @@
use std::{collections::HashMap, ops::DerefMut as _, sync::{LazyLock, RwLock}};
use crate::scm::scm_bits;
struct Vars (
LazyLock <RwLock <HashMap <String, scm_bits>>>
);
impl Vars {
pub const fn new () -> Vars {
Vars (LazyLock::new (|| RwLock::new (HashMap::new ())))
}
pub fn lookup (&self, name : String) -> Option <scm_bits> {
// let r = self.0.write ().unwrap ();
// (*r).get (&name).map (|x| *x)
todo! ()
}
pub fn define (&self, name : String, value : scm_bits) {
// let mut r = self.0.write ().unwrap ();
// r.deref_mut ().insert (name, value);
todo! ()
}
}
static vars : Vars = Vars::new ();