This commit is contained in:
2026-05-18 17:11:04 -06:00
parent fbcb129437
commit c1851fe242
11 changed files with 243 additions and 42 deletions

View File

@@ -27,7 +27,7 @@ import Data.Vector.Strict (Vector)
import Data.Function (fix)
import Effectful.Writer.Static.Local
import Gyehoek.Scheme.Syntax qualified as Lam
import Gyehoek.Scheme.Syntax (Name, Prim(..), Lit(..))
import Gyehoek.Scheme.Syntax (Name, Prim(..), Lit(..), Sexp (..))
import Gyehoek.GenSym
import Control.Monad.Cont
import Data.Foldable
@@ -284,6 +284,8 @@ lowerVal
lowerVal (ValLit (LitInt n)) k = k . lowerInt $ n
lowerVal (ValLit (LitQuote (SexpSymbol s))) k = _aaa
lowerVal (ValLit (LitString s)) k = do
rawString <- gensym
r <- gensym

View File

@@ -6,9 +6,9 @@ module Gyehoek.Scheme.Syntax
( Name
, Prim(..)
, Lit(..)
, Prim(..)
, Define(..)
, Exp(..)
, Sexp(..)
)
where

Binary file not shown.

BIN
play/hash Executable file

Binary file not shown.

37
play/hash.c Normal file
View File

@@ -0,0 +1,37 @@
#include <stdio.h>
#include "../runtime/gyehoek.h"
#include "../runtime/weak-set.h"
static int
symbol_lookup_predicate_fn (SCM sym, void *closure) {
SCM other = SCM_PACK_POINTER (closure);
return 1;
}
int main () {
/* const char s[] = "wormy"; */
/* const SCM str = scm_from_utf8_string (s, sizeof(s)); */
/* const unsigned long hash = scm_c_hash (str); */
/* printf ("%ld\n", hash); */
SCM set = scm_c_make_weak_set (31);
SCM str = scm_from_cstring ("my-symbol");
SCM sym = scm_make_symbol (str, scm_c_hash (str));
scm_c_weak_set_insert (set, scm_c_hash (str),
SCM_PACK (SCM_MAKE_SMALL_INT (123)),
symbol_lookup_predicate_fn,
SCM_UNPACK_POINTER (sym));
printf ("%ld\n", weak_set_count (set));
scm_c_weak_set_insert (set, scm_c_hash (str),
SCM_PACK (SCM_MAKE_SMALL_INT (123)),
symbol_lookup_predicate_fn,
SCM_UNPACK_POINTER (sym));
printf ("%ld\n", weak_set_count (set));
SCM str2 = scm_from_cstring ("my-symbol2");
SCM sym2 = scm_make_symbol (str2, scm_c_hash (str2));
scm_c_weak_set_insert (set, scm_c_hash (str2),
SCM_PACK (SCM_MAKE_SMALL_INT (456)),
symbol_lookup_predicate_fn,
SCM_UNPACK_POINTER (sym2));
printf ("%ld\n", weak_set_count (set));
}

View File

@@ -1,12 +1,13 @@
#include <stdio.h>
#include <gc.h>
#include <string.h>
#include "gyehoek.h"
const long scm_tc3_cons = 0;
const long scm_tc7_obarray = 0x55;
const long scm_tc7_weak_set = 0x55;
const long scm_tc7_symbol = 0x05;
const long scm_tc7_string = 0x15;
@@ -17,9 +18,17 @@ SCM scm_newline () {
return SCM_PACK(NULL);
}
size_t scm_c_string_length (SCM str) {
return SCM_CELL_WORD (str, 1);
}
const char *scm_c_string_chars (SCM str) {
return (const char *) SCM_UNPACK_POINTER (SCM_CELL_OBJECT (str, 2));
}
static void scm_write_string (SCM x) {
const size_t len = SCM_CELL_WORD (x, 1);
const char *s = (const char *) SCM_UNPACK_POINTER (SCM_CELL_OBJECT (x, 2));
const size_t len = scm_c_string_length (x);
const char *s = scm_c_string_chars (x);
/* FIXME: this is a very naïve implementation with no escaping. */
printf ("some unrelated unicode lol: %s\n", "왜 하냐??");
printf ("\"%.*s\"", (int) len, s);
@@ -60,10 +69,41 @@ SCM scm_from_utf8_string (const char *str, size_t len) {
SCM r = scm_words (scm_tc7_string, 3);
SCM_SET_CELL_WORD (r, 1, len);
SCM_SET_CELL_WORD (r, 2, str);
printf ("str: %p\n", str);
return r;
}
SCM scm_from_cstring (const char *str) {
return scm_from_utf8_string (str, strlen (str));
}
unsigned long scm_c_hash (SCM str) {
const unsigned long offset_basis = 0xcbf29ce484222325;
const unsigned long prime = 0x100000001b3;
const size_t len = scm_c_string_length (str);
const char *bytes = scm_c_string_chars (str);
unsigned long hash = offset_basis;
for (const char *c = bytes; c < bytes + len; c++) {
hash ^= *c;
hash *= prime;
}
// mask off MSB, since we use it to mark tombstones in weak-set.c.
/* hash &= 0x7fffffffffffffff; */
// shift off MSB, since we use it to mark tombstones in weak-set.c.
/* hash >>= 1; */
/* ensure hash is non-zero */
hash |= hash == 0;
return hash;
}
SCM scm_str_to_symbol (SCM str) {
}
SCM scm_from_utf8_symbol (const char *s, size_t len) {
}
@@ -73,3 +113,10 @@ SCM scm_cons (SCM car, SCM cdr) {
r[1] = SCM_UNPACK (cdr);
return SCM_PACK (r);
}
SCM scm_make_symbol (SCM name, unsigned long hash) {
SCM r = scm_words(scm_tc7_symbol, 3);
SCM_SET_CELL_WORD (r, 1, hash);
SCM_SET_CELL_OBJECT (r, 2, name);
return r;
}

View File

@@ -32,7 +32,7 @@ typedef union SCM { struct { scm_t_bits n; } n; } SCM;
: ((x) >> (y)))
#endif
#define SCM_MAKE_SMALL_INT(x) (SCM_SRS ((SCM_PACK (x)), 2) + 2)
#define SCM_MAKE_SMALL_INT(x) (SCM_SRS ((long)(x), 2) + 2)
#define SCM_GET_SMALL_INT(x) (SCM_UNPACK (x) >> 2)
/* Checking if a SCM variable holds a pair (for historical reasons, in
@@ -76,26 +76,32 @@ SCM scm_cdr (SCM x);
(SCM_NIMP (x) && type (x) == (tag))
#define SCM_HAS_TYP7(x, tag) (SCM_HAS_HEAP_TYPE (x, SCM_TYP7, tag))
extern const long scm_tc7_obarray;
extern const long scm_tc7_weak_set;
extern const long scm_tc7_symbol;
extern const long scm_tc7_string;
#define SCM_STRINGP(x) (SCM_TYP7 (x) == scm_tc7_string)
#define SCM_SYMBOLP(x) (SCM_TYP7 (x) == scm_tc7_symbol)
unsigned long scm_c_hash (SCM str);
SCM scm_words (scm_t_bits word_0, uint32_t n_words);
/* Construct a Scheme string. */
SCM scm_from_utf8_string (const char *str, size_t len);
SCM scm_from_cstring (const char *str);
/* Intern a symbol with a UTF-8 string name. */
SCM scm_from_utf8_symbol (const char *s, size_t len);
SCM scm_make_symbol (SCM name, unsigned long hash);
SCM scm_write (SCM);

View File

@@ -1,24 +0,0 @@
#include "gyehoek.h"
#include "obarray.h"
typedef struct {
unsigned long hash;
scm_t_bits key;
} scm_t_weak_entry;
struct weak_entry_data {
scm_t_weak_entry *in;
scm_t_weak_entry *out;
};
typedef struct {
scm_t_weak_entry *entries; /* the data */
scm_i_pthread_mutex_t lock; /* the lock */
unsigned long size; /* total number of slots. */
unsigned long n_items; /* number of items in set */
unsigned long lower; /* when to shrink */
unsigned long upper; /* when to grow */
int size_index; /* index into hashset_size */
int min_size_index; /* minimum size_index */
} scm_t_weak_set;

View File

@@ -1,8 +0,0 @@
#ifndef OBARRAY_H
#define OBARRAY_H
#define SCM_OBARRAY_P(x) (SCM_HAS_TYP7 (x, scm_tc7_weak_set))
SCM scm_obarray_insert ()
#endif /* OBARRAY_H */

121
runtime/weak-set.c Normal file
View File

@@ -0,0 +1,121 @@
#include <gc.h>
#include <gc/gc.h>
#include <string.h>
#include "gyehoek.h"
#include "weak-set.h"
typedef struct {
unsigned long hash;
scm_t_bits key;
} scm_t_weak_entry;
/* struct weak_entry_data { */
/* scm_t_weak_entry *in; */
/* scm_t_weak_entry *out; */
/* }; */
typedef struct {
scm_t_weak_entry *entries; /* the data */
unsigned long size; /* total number of slots. */
unsigned long n_items; /* number of items in set */
unsigned long lower; /* when to shrink */
unsigned long upper; /* when to grow */
int size_index; /* index into hashset_size */
int min_size_index; /* minimum size_index */
} scm_t_weak_set;
/* Growing or shrinking is triggered when the load factor
*
* L = N / S (N: number of items in set, S: bucket vector length)
*
* passes an upper limit of 0.9 or a lower limit of 0.2.
*
* The implementation stores the upper and lower number of items which
* trigger a resize in the hashset object.
*
* Possible hash set sizes (primes) are stored in the array
* hashset_size.
*/
static unsigned long hashset_size[] = {
31, 61, 113, 223, 443, 883, 1759, 3517, 7027, 14051, 28099, 56197, 112363,
224717, 449419, 898823, 1797641, 3595271, 7190537, 14381041, 28762081,
57524111, 115048217, 230096423
};
#define HASHSET_SIZE_N (sizeof(hashset_size)/sizeof(unsigned long))
SCM scm_c_make_weak_set (unsigned long k) {
scm_t_weak_set *set = GC_malloc (sizeof (scm_t_weak_set));
/* i ripped this from guile and i'm not sure what it's for ^w^. */
int i = 0, n = k ? k : 31;
while (i + 1 < HASHSET_SIZE_N && n > hashset_size[i])
++i;
n = hashset_size[i];
set->entries = GC_malloc_atomic (n * sizeof(scm_t_weak_entry));
memset (set->entries, 0, n * sizeof(scm_t_weak_entry));
set->n_items = 0;
set->size = n;
set->lower = 0;
set->upper = 9 * n / 10;
set->size_index = i;
set->min_size_index = i;
SCM r = scm_words (scm_tc7_weak_set, 2);
SCM_SET_CELL_WORD (r, 1, set);
return r;
}
static int
apply_pred (scm_t_weak_entry *entry, scm_t_set_predicate_fn pred
, void *closure) {
scm_t_weak_entry copy;
memcpy (&copy, entry, sizeof (scm_t_weak_entry));
return pred (SCM_PACK (copy.key), closure);
}
static SCM
find_bucket (scm_t_weak_set *set, unsigned long hash,
SCM key, scm_t_set_predicate_fn pred, void *closure,
unsigned long p) {
const unsigned long other_hash = set->entries[p].hash;
if (other_hash == 0) {
set->entries[p].hash = hash;
set->entries[p].key = SCM_UNPACK (key);
set->n_items++;
return key;
} else if (hash == other_hash
// guile passes a copy.. is this important for weak
// references...
&& pred (SCM_PACK (set->entries[p].key), closure)) {
return SCM_PACK (set->entries[p].key);
} else {
return find_bucket (set, hash, key, pred, closure,
(p + 1) % set->size);
}
}
#define SCM_WEAK_SET(x) ((scm_t_weak_set *) SCM_CELL_WORD (x, 1))
SCM scm_c_weak_set_insert (SCM set, unsigned long hash,
SCM key, scm_t_set_predicate_fn pred,
void* closure) {
scm_t_weak_set *s = SCM_WEAK_SET (set);
unsigned long size = s->size;
if (s->n_items > s->upper) {
/* resize */
}
return find_bucket (s, hash, key, pred, closure, hash % size);
}
SCM scm_c_weak_set_lookup (scm_t_weak_set *set, unsigned long hash, SCM dflt) {
}
unsigned long weak_set_count (SCM set) {
return (SCM_WEAK_SET (set))->n_items;
}

20
runtime/weak-set.h Normal file
View File

@@ -0,0 +1,20 @@
#ifndef WEAK_SET_H
#define WEAK_SET_H
#include "gyehoek.h"
#define SCM_WEAK_SET_P(x) (SCM_HAS_TYP7 (x, scm_tc7_weak_set))
/* Function that returns nonzero if the given object is the one we are
looking for. */
typedef int (*scm_t_set_predicate_fn) (SCM obj, void *closure);
SCM scm_c_make_weak_set (unsigned long k);
SCM scm_c_weak_set_insert (SCM set, unsigned long hash,
SCM key, scm_t_set_predicate_fn pred,
void* closure);
unsigned long weak_set_count (SCM set);
#endif /* WEAK_SET_H */