This commit is contained in:
@@ -1,12 +1,13 @@
|
||||
#include <stdio.h>
|
||||
#include <gc.h>
|
||||
#include <string.h>
|
||||
#include "gyehoek.h"
|
||||
|
||||
|
||||
|
||||
const long scm_tc3_cons = 0;
|
||||
|
||||
const long scm_tc7_obarray = 0x55;
|
||||
const long scm_tc7_weak_set = 0x55;
|
||||
const long scm_tc7_symbol = 0x05;
|
||||
const long scm_tc7_string = 0x15;
|
||||
|
||||
@@ -17,9 +18,17 @@ SCM scm_newline () {
|
||||
return SCM_PACK(NULL);
|
||||
}
|
||||
|
||||
size_t scm_c_string_length (SCM str) {
|
||||
return SCM_CELL_WORD (str, 1);
|
||||
}
|
||||
|
||||
const char *scm_c_string_chars (SCM str) {
|
||||
return (const char *) SCM_UNPACK_POINTER (SCM_CELL_OBJECT (str, 2));
|
||||
}
|
||||
|
||||
static void scm_write_string (SCM x) {
|
||||
const size_t len = SCM_CELL_WORD (x, 1);
|
||||
const char *s = (const char *) SCM_UNPACK_POINTER (SCM_CELL_OBJECT (x, 2));
|
||||
const size_t len = scm_c_string_length (x);
|
||||
const char *s = scm_c_string_chars (x);
|
||||
/* FIXME: this is a very naïve implementation with no escaping. */
|
||||
printf ("some unrelated unicode lol: %s\n", "왜 하냐??");
|
||||
printf ("\"%.*s\"", (int) len, s);
|
||||
@@ -60,10 +69,41 @@ SCM scm_from_utf8_string (const char *str, size_t len) {
|
||||
SCM r = scm_words (scm_tc7_string, 3);
|
||||
SCM_SET_CELL_WORD (r, 1, len);
|
||||
SCM_SET_CELL_WORD (r, 2, str);
|
||||
printf ("str: %p\n", str);
|
||||
return r;
|
||||
}
|
||||
|
||||
SCM scm_from_cstring (const char *str) {
|
||||
return scm_from_utf8_string (str, strlen (str));
|
||||
}
|
||||
|
||||
unsigned long scm_c_hash (SCM str) {
|
||||
const unsigned long offset_basis = 0xcbf29ce484222325;
|
||||
const unsigned long prime = 0x100000001b3;
|
||||
const size_t len = scm_c_string_length (str);
|
||||
const char *bytes = scm_c_string_chars (str);
|
||||
unsigned long hash = offset_basis;
|
||||
|
||||
for (const char *c = bytes; c < bytes + len; c++) {
|
||||
hash ^= *c;
|
||||
hash *= prime;
|
||||
}
|
||||
|
||||
// mask off MSB, since we use it to mark tombstones in weak-set.c.
|
||||
/* hash &= 0x7fffffffffffffff; */
|
||||
|
||||
// shift off MSB, since we use it to mark tombstones in weak-set.c.
|
||||
/* hash >>= 1; */
|
||||
|
||||
/* ensure hash is non-zero */
|
||||
hash |= hash == 0;
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
SCM scm_str_to_symbol (SCM str) {
|
||||
|
||||
}
|
||||
|
||||
SCM scm_from_utf8_symbol (const char *s, size_t len) {
|
||||
}
|
||||
|
||||
@@ -73,3 +113,10 @@ SCM scm_cons (SCM car, SCM cdr) {
|
||||
r[1] = SCM_UNPACK (cdr);
|
||||
return SCM_PACK (r);
|
||||
}
|
||||
|
||||
SCM scm_make_symbol (SCM name, unsigned long hash) {
|
||||
SCM r = scm_words(scm_tc7_symbol, 3);
|
||||
SCM_SET_CELL_WORD (r, 1, hash);
|
||||
SCM_SET_CELL_OBJECT (r, 2, name);
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ typedef union SCM { struct { scm_t_bits n; } n; } SCM;
|
||||
: ((x) >> (y)))
|
||||
#endif
|
||||
|
||||
#define SCM_MAKE_SMALL_INT(x) (SCM_SRS ((SCM_PACK (x)), 2) + 2)
|
||||
#define SCM_MAKE_SMALL_INT(x) (SCM_SRS ((long)(x), 2) + 2)
|
||||
#define SCM_GET_SMALL_INT(x) (SCM_UNPACK (x) >> 2)
|
||||
|
||||
/* Checking if a SCM variable holds a pair (for historical reasons, in
|
||||
@@ -76,26 +76,32 @@ SCM scm_cdr (SCM x);
|
||||
(SCM_NIMP (x) && type (x) == (tag))
|
||||
#define SCM_HAS_TYP7(x, tag) (SCM_HAS_HEAP_TYPE (x, SCM_TYP7, tag))
|
||||
|
||||
extern const long scm_tc7_obarray;
|
||||
extern const long scm_tc7_weak_set;
|
||||
extern const long scm_tc7_symbol;
|
||||
extern const long scm_tc7_string;
|
||||
|
||||
|
||||
|
||||
#define SCM_STRINGP(x) (SCM_TYP7 (x) == scm_tc7_string)
|
||||
#define SCM_SYMBOLP(x) (SCM_TYP7 (x) == scm_tc7_symbol)
|
||||
|
||||
|
||||
|
||||
unsigned long scm_c_hash (SCM str);
|
||||
|
||||
SCM scm_words (scm_t_bits word_0, uint32_t n_words);
|
||||
|
||||
/* Construct a Scheme string. */
|
||||
SCM scm_from_utf8_string (const char *str, size_t len);
|
||||
SCM scm_from_cstring (const char *str);
|
||||
|
||||
/* Intern a symbol with a UTF-8 string name. */
|
||||
SCM scm_from_utf8_symbol (const char *s, size_t len);
|
||||
|
||||
|
||||
|
||||
SCM scm_make_symbol (SCM name, unsigned long hash);
|
||||
|
||||
|
||||
|
||||
SCM scm_write (SCM);
|
||||
|
||||
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
#include "gyehoek.h"
|
||||
#include "obarray.h"
|
||||
|
||||
typedef struct {
|
||||
unsigned long hash;
|
||||
scm_t_bits key;
|
||||
} scm_t_weak_entry;
|
||||
|
||||
struct weak_entry_data {
|
||||
scm_t_weak_entry *in;
|
||||
scm_t_weak_entry *out;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
scm_t_weak_entry *entries; /* the data */
|
||||
scm_i_pthread_mutex_t lock; /* the lock */
|
||||
unsigned long size; /* total number of slots. */
|
||||
unsigned long n_items; /* number of items in set */
|
||||
unsigned long lower; /* when to shrink */
|
||||
unsigned long upper; /* when to grow */
|
||||
int size_index; /* index into hashset_size */
|
||||
int min_size_index; /* minimum size_index */
|
||||
} scm_t_weak_set;
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
#ifndef OBARRAY_H
|
||||
#define OBARRAY_H
|
||||
|
||||
#define SCM_OBARRAY_P(x) (SCM_HAS_TYP7 (x, scm_tc7_weak_set))
|
||||
|
||||
SCM scm_obarray_insert ()
|
||||
|
||||
#endif /* OBARRAY_H */
|
||||
121
runtime/weak-set.c
Normal file
121
runtime/weak-set.c
Normal file
@@ -0,0 +1,121 @@
|
||||
#include <gc.h>
|
||||
#include <gc/gc.h>
|
||||
#include <string.h>
|
||||
#include "gyehoek.h"
|
||||
#include "weak-set.h"
|
||||
|
||||
typedef struct {
|
||||
unsigned long hash;
|
||||
scm_t_bits key;
|
||||
} scm_t_weak_entry;
|
||||
|
||||
/* struct weak_entry_data { */
|
||||
/* scm_t_weak_entry *in; */
|
||||
/* scm_t_weak_entry *out; */
|
||||
/* }; */
|
||||
|
||||
typedef struct {
|
||||
scm_t_weak_entry *entries; /* the data */
|
||||
unsigned long size; /* total number of slots. */
|
||||
unsigned long n_items; /* number of items in set */
|
||||
unsigned long lower; /* when to shrink */
|
||||
unsigned long upper; /* when to grow */
|
||||
int size_index; /* index into hashset_size */
|
||||
int min_size_index; /* minimum size_index */
|
||||
} scm_t_weak_set;
|
||||
|
||||
/* Growing or shrinking is triggered when the load factor
|
||||
*
|
||||
* L = N / S (N: number of items in set, S: bucket vector length)
|
||||
*
|
||||
* passes an upper limit of 0.9 or a lower limit of 0.2.
|
||||
*
|
||||
* The implementation stores the upper and lower number of items which
|
||||
* trigger a resize in the hashset object.
|
||||
*
|
||||
* Possible hash set sizes (primes) are stored in the array
|
||||
* hashset_size.
|
||||
*/
|
||||
|
||||
static unsigned long hashset_size[] = {
|
||||
31, 61, 113, 223, 443, 883, 1759, 3517, 7027, 14051, 28099, 56197, 112363,
|
||||
224717, 449419, 898823, 1797641, 3595271, 7190537, 14381041, 28762081,
|
||||
57524111, 115048217, 230096423
|
||||
};
|
||||
|
||||
#define HASHSET_SIZE_N (sizeof(hashset_size)/sizeof(unsigned long))
|
||||
|
||||
SCM scm_c_make_weak_set (unsigned long k) {
|
||||
scm_t_weak_set *set = GC_malloc (sizeof (scm_t_weak_set));
|
||||
|
||||
/* i ripped this from guile and i'm not sure what it's for ^w^. */
|
||||
int i = 0, n = k ? k : 31;
|
||||
while (i + 1 < HASHSET_SIZE_N && n > hashset_size[i])
|
||||
++i;
|
||||
n = hashset_size[i];
|
||||
|
||||
set->entries = GC_malloc_atomic (n * sizeof(scm_t_weak_entry));
|
||||
memset (set->entries, 0, n * sizeof(scm_t_weak_entry));
|
||||
set->n_items = 0;
|
||||
set->size = n;
|
||||
set->lower = 0;
|
||||
set->upper = 9 * n / 10;
|
||||
set->size_index = i;
|
||||
set->min_size_index = i;
|
||||
|
||||
SCM r = scm_words (scm_tc7_weak_set, 2);
|
||||
SCM_SET_CELL_WORD (r, 1, set);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
apply_pred (scm_t_weak_entry *entry, scm_t_set_predicate_fn pred
|
||||
, void *closure) {
|
||||
scm_t_weak_entry copy;
|
||||
memcpy (©, entry, sizeof (scm_t_weak_entry));
|
||||
return pred (SCM_PACK (copy.key), closure);
|
||||
}
|
||||
|
||||
static SCM
|
||||
find_bucket (scm_t_weak_set *set, unsigned long hash,
|
||||
SCM key, scm_t_set_predicate_fn pred, void *closure,
|
||||
unsigned long p) {
|
||||
const unsigned long other_hash = set->entries[p].hash;
|
||||
if (other_hash == 0) {
|
||||
set->entries[p].hash = hash;
|
||||
set->entries[p].key = SCM_UNPACK (key);
|
||||
set->n_items++;
|
||||
return key;
|
||||
} else if (hash == other_hash
|
||||
// guile passes a copy.. is this important for weak
|
||||
// references...
|
||||
&& pred (SCM_PACK (set->entries[p].key), closure)) {
|
||||
return SCM_PACK (set->entries[p].key);
|
||||
} else {
|
||||
return find_bucket (set, hash, key, pred, closure,
|
||||
(p + 1) % set->size);
|
||||
}
|
||||
}
|
||||
|
||||
#define SCM_WEAK_SET(x) ((scm_t_weak_set *) SCM_CELL_WORD (x, 1))
|
||||
|
||||
SCM scm_c_weak_set_insert (SCM set, unsigned long hash,
|
||||
SCM key, scm_t_set_predicate_fn pred,
|
||||
void* closure) {
|
||||
scm_t_weak_set *s = SCM_WEAK_SET (set);
|
||||
unsigned long size = s->size;
|
||||
|
||||
if (s->n_items > s->upper) {
|
||||
/* resize */
|
||||
}
|
||||
|
||||
return find_bucket (s, hash, key, pred, closure, hash % size);
|
||||
}
|
||||
|
||||
SCM scm_c_weak_set_lookup (scm_t_weak_set *set, unsigned long hash, SCM dflt) {
|
||||
|
||||
}
|
||||
|
||||
unsigned long weak_set_count (SCM set) {
|
||||
return (SCM_WEAK_SET (set))->n_items;
|
||||
}
|
||||
20
runtime/weak-set.h
Normal file
20
runtime/weak-set.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#ifndef WEAK_SET_H
|
||||
#define WEAK_SET_H
|
||||
|
||||
#include "gyehoek.h"
|
||||
|
||||
#define SCM_WEAK_SET_P(x) (SCM_HAS_TYP7 (x, scm_tc7_weak_set))
|
||||
|
||||
/* Function that returns nonzero if the given object is the one we are
|
||||
looking for. */
|
||||
typedef int (*scm_t_set_predicate_fn) (SCM obj, void *closure);
|
||||
|
||||
SCM scm_c_make_weak_set (unsigned long k);
|
||||
|
||||
SCM scm_c_weak_set_insert (SCM set, unsigned long hash,
|
||||
SCM key, scm_t_set_predicate_fn pred,
|
||||
void* closure);
|
||||
|
||||
unsigned long weak_set_count (SCM set);
|
||||
|
||||
#endif /* WEAK_SET_H */
|
||||
Reference in New Issue
Block a user