This commit is contained in:
2026-05-18 17:11:04 -06:00
parent fbcb129437
commit c1851fe242
11 changed files with 243 additions and 42 deletions

View File

@@ -1,12 +1,13 @@
#include <stdio.h>
#include <gc.h>
#include <string.h>
#include "gyehoek.h"
const long scm_tc3_cons = 0;
const long scm_tc7_obarray = 0x55;
const long scm_tc7_weak_set = 0x55;
const long scm_tc7_symbol = 0x05;
const long scm_tc7_string = 0x15;
@@ -17,9 +18,17 @@ SCM scm_newline () {
return SCM_PACK(NULL);
}
size_t scm_c_string_length (SCM str) {
return SCM_CELL_WORD (str, 1);
}
const char *scm_c_string_chars (SCM str) {
return (const char *) SCM_UNPACK_POINTER (SCM_CELL_OBJECT (str, 2));
}
static void scm_write_string (SCM x) {
const size_t len = SCM_CELL_WORD (x, 1);
const char *s = (const char *) SCM_UNPACK_POINTER (SCM_CELL_OBJECT (x, 2));
const size_t len = scm_c_string_length (x);
const char *s = scm_c_string_chars (x);
/* FIXME: this is a very naïve implementation with no escaping. */
printf ("some unrelated unicode lol: %s\n", "왜 하냐??");
printf ("\"%.*s\"", (int) len, s);
@@ -60,10 +69,41 @@ SCM scm_from_utf8_string (const char *str, size_t len) {
SCM r = scm_words (scm_tc7_string, 3);
SCM_SET_CELL_WORD (r, 1, len);
SCM_SET_CELL_WORD (r, 2, str);
printf ("str: %p\n", str);
return r;
}
SCM scm_from_cstring (const char *str) {
return scm_from_utf8_string (str, strlen (str));
}
unsigned long scm_c_hash (SCM str) {
const unsigned long offset_basis = 0xcbf29ce484222325;
const unsigned long prime = 0x100000001b3;
const size_t len = scm_c_string_length (str);
const char *bytes = scm_c_string_chars (str);
unsigned long hash = offset_basis;
for (const char *c = bytes; c < bytes + len; c++) {
hash ^= *c;
hash *= prime;
}
// mask off MSB, since we use it to mark tombstones in weak-set.c.
/* hash &= 0x7fffffffffffffff; */
// shift off MSB, since we use it to mark tombstones in weak-set.c.
/* hash >>= 1; */
/* ensure hash is non-zero */
hash |= hash == 0;
return hash;
}
SCM scm_str_to_symbol (SCM str) {
}
SCM scm_from_utf8_symbol (const char *s, size_t len) {
}
@@ -73,3 +113,10 @@ SCM scm_cons (SCM car, SCM cdr) {
r[1] = SCM_UNPACK (cdr);
return SCM_PACK (r);
}
SCM scm_make_symbol (SCM name, unsigned long hash) {
SCM r = scm_words(scm_tc7_symbol, 3);
SCM_SET_CELL_WORD (r, 1, hash);
SCM_SET_CELL_OBJECT (r, 2, name);
return r;
}

View File

@@ -32,7 +32,7 @@ typedef union SCM { struct { scm_t_bits n; } n; } SCM;
: ((x) >> (y)))
#endif
#define SCM_MAKE_SMALL_INT(x) (SCM_SRS ((SCM_PACK (x)), 2) + 2)
#define SCM_MAKE_SMALL_INT(x) (SCM_SRS ((long)(x), 2) + 2)
#define SCM_GET_SMALL_INT(x) (SCM_UNPACK (x) >> 2)
/* Checking if a SCM variable holds a pair (for historical reasons, in
@@ -76,26 +76,32 @@ SCM scm_cdr (SCM x);
(SCM_NIMP (x) && type (x) == (tag))
#define SCM_HAS_TYP7(x, tag) (SCM_HAS_HEAP_TYPE (x, SCM_TYP7, tag))
extern const long scm_tc7_obarray;
extern const long scm_tc7_weak_set;
extern const long scm_tc7_symbol;
extern const long scm_tc7_string;
#define SCM_STRINGP(x) (SCM_TYP7 (x) == scm_tc7_string)
#define SCM_SYMBOLP(x) (SCM_TYP7 (x) == scm_tc7_symbol)
unsigned long scm_c_hash (SCM str);
SCM scm_words (scm_t_bits word_0, uint32_t n_words);
/* Construct a Scheme string. */
SCM scm_from_utf8_string (const char *str, size_t len);
SCM scm_from_cstring (const char *str);
/* Intern a symbol with a UTF-8 string name. */
SCM scm_from_utf8_symbol (const char *s, size_t len);
SCM scm_make_symbol (SCM name, unsigned long hash);
SCM scm_write (SCM);

View File

@@ -1,24 +0,0 @@
#include "gyehoek.h"
#include "obarray.h"
typedef struct {
unsigned long hash;
scm_t_bits key;
} scm_t_weak_entry;
struct weak_entry_data {
scm_t_weak_entry *in;
scm_t_weak_entry *out;
};
typedef struct {
scm_t_weak_entry *entries; /* the data */
scm_i_pthread_mutex_t lock; /* the lock */
unsigned long size; /* total number of slots. */
unsigned long n_items; /* number of items in set */
unsigned long lower; /* when to shrink */
unsigned long upper; /* when to grow */
int size_index; /* index into hashset_size */
int min_size_index; /* minimum size_index */
} scm_t_weak_set;

View File

@@ -1,8 +0,0 @@
#ifndef OBARRAY_H
#define OBARRAY_H
#define SCM_OBARRAY_P(x) (SCM_HAS_TYP7 (x, scm_tc7_weak_set))
SCM scm_obarray_insert ()
#endif /* OBARRAY_H */

121
runtime/weak-set.c Normal file
View File

@@ -0,0 +1,121 @@
#include <gc.h>
#include <gc/gc.h>
#include <string.h>
#include "gyehoek.h"
#include "weak-set.h"
typedef struct {
unsigned long hash;
scm_t_bits key;
} scm_t_weak_entry;
/* struct weak_entry_data { */
/* scm_t_weak_entry *in; */
/* scm_t_weak_entry *out; */
/* }; */
typedef struct {
scm_t_weak_entry *entries; /* the data */
unsigned long size; /* total number of slots. */
unsigned long n_items; /* number of items in set */
unsigned long lower; /* when to shrink */
unsigned long upper; /* when to grow */
int size_index; /* index into hashset_size */
int min_size_index; /* minimum size_index */
} scm_t_weak_set;
/* Growing or shrinking is triggered when the load factor
*
* L = N / S (N: number of items in set, S: bucket vector length)
*
* passes an upper limit of 0.9 or a lower limit of 0.2.
*
* The implementation stores the upper and lower number of items which
* trigger a resize in the hashset object.
*
* Possible hash set sizes (primes) are stored in the array
* hashset_size.
*/
static unsigned long hashset_size[] = {
31, 61, 113, 223, 443, 883, 1759, 3517, 7027, 14051, 28099, 56197, 112363,
224717, 449419, 898823, 1797641, 3595271, 7190537, 14381041, 28762081,
57524111, 115048217, 230096423
};
#define HASHSET_SIZE_N (sizeof(hashset_size)/sizeof(unsigned long))
SCM scm_c_make_weak_set (unsigned long k) {
scm_t_weak_set *set = GC_malloc (sizeof (scm_t_weak_set));
/* i ripped this from guile and i'm not sure what it's for ^w^. */
int i = 0, n = k ? k : 31;
while (i + 1 < HASHSET_SIZE_N && n > hashset_size[i])
++i;
n = hashset_size[i];
set->entries = GC_malloc_atomic (n * sizeof(scm_t_weak_entry));
memset (set->entries, 0, n * sizeof(scm_t_weak_entry));
set->n_items = 0;
set->size = n;
set->lower = 0;
set->upper = 9 * n / 10;
set->size_index = i;
set->min_size_index = i;
SCM r = scm_words (scm_tc7_weak_set, 2);
SCM_SET_CELL_WORD (r, 1, set);
return r;
}
static int
apply_pred (scm_t_weak_entry *entry, scm_t_set_predicate_fn pred
, void *closure) {
scm_t_weak_entry copy;
memcpy (&copy, entry, sizeof (scm_t_weak_entry));
return pred (SCM_PACK (copy.key), closure);
}
static SCM
find_bucket (scm_t_weak_set *set, unsigned long hash,
SCM key, scm_t_set_predicate_fn pred, void *closure,
unsigned long p) {
const unsigned long other_hash = set->entries[p].hash;
if (other_hash == 0) {
set->entries[p].hash = hash;
set->entries[p].key = SCM_UNPACK (key);
set->n_items++;
return key;
} else if (hash == other_hash
// guile passes a copy.. is this important for weak
// references...
&& pred (SCM_PACK (set->entries[p].key), closure)) {
return SCM_PACK (set->entries[p].key);
} else {
return find_bucket (set, hash, key, pred, closure,
(p + 1) % set->size);
}
}
#define SCM_WEAK_SET(x) ((scm_t_weak_set *) SCM_CELL_WORD (x, 1))
SCM scm_c_weak_set_insert (SCM set, unsigned long hash,
SCM key, scm_t_set_predicate_fn pred,
void* closure) {
scm_t_weak_set *s = SCM_WEAK_SET (set);
unsigned long size = s->size;
if (s->n_items > s->upper) {
/* resize */
}
return find_bucket (s, hash, key, pred, closure, hash % size);
}
SCM scm_c_weak_set_lookup (scm_t_weak_set *set, unsigned long hash, SCM dflt) {
}
unsigned long weak_set_count (SCM set) {
return (SCM_WEAK_SET (set))->n_items;
}

20
runtime/weak-set.h Normal file
View File

@@ -0,0 +1,20 @@
#ifndef WEAK_SET_H
#define WEAK_SET_H
#include "gyehoek.h"
#define SCM_WEAK_SET_P(x) (SCM_HAS_TYP7 (x, scm_tc7_weak_set))
/* Function that returns nonzero if the given object is the one we are
looking for. */
typedef int (*scm_t_set_predicate_fn) (SCM obj, void *closure);
SCM scm_c_make_weak_set (unsigned long k);
SCM scm_c_weak_set_insert (SCM set, unsigned long hash,
SCM key, scm_t_set_predicate_fn pred,
void* closure);
unsigned long weak_set_count (SCM set);
#endif /* WEAK_SET_H */