mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-02 15:52:50 -06:00
Merge with master and drop the Haskell runtime completely
This commit is contained in:
@@ -68,6 +68,7 @@ libpgf_la_SOURCES = \
|
||||
pgf/data.h \
|
||||
pgf/expr.c \
|
||||
pgf/expr.h \
|
||||
pgf/scanner.c \
|
||||
pgf/parser.c \
|
||||
pgf/lookup.c \
|
||||
pgf/jit.c \
|
||||
|
||||
@@ -64,6 +64,8 @@
|
||||
|
||||
#ifdef GU_ALIGNOF
|
||||
# define gu_alignof GU_ALIGNOF
|
||||
#elif defined(_MSC_VER)
|
||||
# define gu_alignof __alignof
|
||||
#else
|
||||
# define gu_alignof(t_) \
|
||||
((size_t)(offsetof(struct { char c_; t_ e_; }, e_)))
|
||||
@@ -77,7 +79,7 @@
|
||||
|
||||
#define GU_COMMA ,
|
||||
|
||||
#define GU_ARRAY_LEN(t,a) (sizeof((const t[])a) / sizeof(t))
|
||||
#define GU_ARRAY_LEN(a) (sizeof(a) / sizeof(a[0]))
|
||||
|
||||
#define GU_ID(...) __VA_ARGS__
|
||||
|
||||
@@ -183,9 +185,13 @@ typedef union {
|
||||
void (*fp)();
|
||||
} GuMaxAlign;
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <malloc.h>
|
||||
#define gu_alloca(N) alloca(N)
|
||||
#else
|
||||
#define gu_alloca(N) \
|
||||
(((union { GuMaxAlign align_; uint8_t buf_[N]; }){{0}}).buf_)
|
||||
|
||||
#endif
|
||||
|
||||
// For Doxygen
|
||||
#define GU_PRIVATE /** @private */
|
||||
|
||||
@@ -7,6 +7,9 @@
|
||||
|
||||
typedef struct GuMapData GuMapData;
|
||||
|
||||
#define SKIP_DELETED 1
|
||||
#define SKIP_NONE 2
|
||||
|
||||
struct GuMapData {
|
||||
uint8_t* keys;
|
||||
uint8_t* values;
|
||||
@@ -19,6 +22,7 @@ struct GuMap {
|
||||
GuHasher* hasher;
|
||||
size_t key_size;
|
||||
size_t value_size;
|
||||
size_t cell_size; // cell_size = GU_MAX(value_size,sizeof(uint8_t))
|
||||
const void* default_value;
|
||||
GuMapData data;
|
||||
|
||||
@@ -30,9 +34,7 @@ gu_map_finalize(GuFinalizer* fin)
|
||||
{
|
||||
GuMap* map = gu_container(fin, GuMap, fin);
|
||||
gu_mem_buf_free(map->data.keys);
|
||||
if (map->value_size) {
|
||||
gu_mem_buf_free(map->data.values);
|
||||
}
|
||||
gu_mem_buf_free(map->data.values);
|
||||
}
|
||||
|
||||
static const GuWord gu_map_empty_key = 0;
|
||||
@@ -68,7 +70,7 @@ gu_map_entry_is_free(GuMap* map, GuMapData* data, size_t idx)
|
||||
}
|
||||
|
||||
static bool
|
||||
gu_map_lookup(GuMap* map, const void* key, size_t* idx_out)
|
||||
gu_map_lookup(GuMap* map, const void* key, uint8_t del, size_t* idx_out)
|
||||
{
|
||||
size_t n = map->data.n_entries;
|
||||
if (map->hasher == gu_addr_hasher) {
|
||||
@@ -78,13 +80,17 @@ gu_map_lookup(GuMap* map, const void* key, size_t* idx_out)
|
||||
while (true) {
|
||||
const void* entry_key =
|
||||
((const void**)map->data.keys)[idx];
|
||||
|
||||
if (entry_key == NULL && map->data.zero_idx != idx) {
|
||||
*idx_out = idx;
|
||||
return false;
|
||||
if (map->data.values[idx * map->cell_size] != del) { //skip deleted
|
||||
*idx_out = idx;
|
||||
return false;
|
||||
}
|
||||
} else if (entry_key == key) {
|
||||
*idx_out = idx;
|
||||
return true;
|
||||
}
|
||||
|
||||
idx = (idx + offset) % n;
|
||||
}
|
||||
} else if (map->hasher == gu_word_hasher) {
|
||||
@@ -156,33 +162,18 @@ gu_map_resize(GuMap* map, size_t req_entries)
|
||||
size_t key_size = map->key_size;
|
||||
size_t key_alloc = 0;
|
||||
data->keys = gu_mem_buf_alloc(req_entries * key_size, &key_alloc);
|
||||
memset(data->keys, 0, key_alloc);
|
||||
|
||||
size_t value_size = map->value_size;
|
||||
size_t value_alloc = 0;
|
||||
if (value_size) {
|
||||
data->values = gu_mem_buf_alloc(req_entries * value_size,
|
||||
&value_alloc);
|
||||
memset(data->values, 0, value_alloc);
|
||||
}
|
||||
|
||||
data->n_entries = gu_twin_prime_inf(value_size ?
|
||||
GU_MIN(key_alloc / key_size,
|
||||
value_alloc / value_size)
|
||||
: key_alloc / key_size);
|
||||
if (map->hasher == gu_addr_hasher) {
|
||||
for (size_t i = 0; i < data->n_entries; i++) {
|
||||
((const void**)data->keys)[i] = NULL;
|
||||
}
|
||||
} else if (map->hasher == gu_string_hasher) {
|
||||
for (size_t i = 0; i < data->n_entries; i++) {
|
||||
((GuString*)data->keys)[i] = NULL;
|
||||
}
|
||||
} else {
|
||||
memset(data->keys, 0, key_alloc);
|
||||
}
|
||||
size_t cell_size = map->cell_size;
|
||||
data->values = gu_mem_buf_alloc(req_entries * cell_size, &value_alloc);
|
||||
memset(data->values, 0, value_alloc);
|
||||
|
||||
data->n_entries = gu_twin_prime_inf(
|
||||
GU_MIN(key_alloc / key_size,
|
||||
value_alloc / cell_size));
|
||||
gu_assert(data->n_entries > data->n_occupied);
|
||||
|
||||
|
||||
data->n_occupied = 0;
|
||||
data->zero_idx = SIZE_MAX;
|
||||
|
||||
@@ -196,16 +187,14 @@ gu_map_resize(GuMap* map, size_t req_entries)
|
||||
} else if (map->hasher == gu_string_hasher) {
|
||||
old_key = (void*) *(GuString*)old_key;
|
||||
}
|
||||
void* old_value = &old_data.values[i * value_size];
|
||||
void* old_value = &old_data.values[i * cell_size];
|
||||
|
||||
memcpy(gu_map_insert(map, old_key),
|
||||
old_value, map->value_size);
|
||||
}
|
||||
|
||||
gu_mem_buf_free(old_data.keys);
|
||||
if (value_size) {
|
||||
gu_mem_buf_free(old_data.values);
|
||||
}
|
||||
gu_mem_buf_free(old_data.values);
|
||||
}
|
||||
|
||||
|
||||
@@ -226,9 +215,9 @@ GU_API void*
|
||||
gu_map_find(GuMap* map, const void* key)
|
||||
{
|
||||
size_t idx;
|
||||
bool found = gu_map_lookup(map, key, &idx);
|
||||
bool found = gu_map_lookup(map, key, SKIP_DELETED, &idx);
|
||||
if (found) {
|
||||
return &map->data.values[idx * map->value_size];
|
||||
return &map->data.values[idx * map->cell_size];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
@@ -244,7 +233,7 @@ GU_API const void*
|
||||
gu_map_find_key(GuMap* map, const void* key)
|
||||
{
|
||||
size_t idx;
|
||||
bool found = gu_map_lookup(map, key, &idx);
|
||||
bool found = gu_map_lookup(map, key, SKIP_DELETED, &idx);
|
||||
if (found) {
|
||||
return &map->data.keys[idx * map->key_size];
|
||||
}
|
||||
@@ -255,17 +244,17 @@ GU_API bool
|
||||
gu_map_has(GuMap* ht, const void* key)
|
||||
{
|
||||
size_t idx;
|
||||
return gu_map_lookup(ht, key, &idx);
|
||||
return gu_map_lookup(ht, key, SKIP_DELETED, &idx);
|
||||
}
|
||||
|
||||
GU_API void*
|
||||
gu_map_insert(GuMap* map, const void* key)
|
||||
{
|
||||
size_t idx;
|
||||
bool found = gu_map_lookup(map, key, &idx);
|
||||
bool found = gu_map_lookup(map, key, SKIP_NONE, &idx);
|
||||
if (!found) {
|
||||
if (gu_map_maybe_resize(map)) {
|
||||
found = gu_map_lookup(map, key, &idx);
|
||||
found = gu_map_lookup(map, key, SKIP_NONE, &idx);
|
||||
gu_assert(!found);
|
||||
}
|
||||
if (map->hasher == gu_addr_hasher) {
|
||||
@@ -277,7 +266,7 @@ gu_map_insert(GuMap* map, const void* key)
|
||||
key, map->key_size);
|
||||
}
|
||||
if (map->default_value) {
|
||||
memcpy(&map->data.values[idx * map->value_size],
|
||||
memcpy(&map->data.values[idx * map->cell_size],
|
||||
map->default_value, map->value_size);
|
||||
}
|
||||
if (gu_map_entry_is_free(map, &map->data, idx)) {
|
||||
@@ -286,7 +275,32 @@ gu_map_insert(GuMap* map, const void* key)
|
||||
}
|
||||
map->data.n_occupied++;
|
||||
}
|
||||
return &map->data.values[idx * map->value_size];
|
||||
return &map->data.values[idx * map->cell_size];
|
||||
}
|
||||
|
||||
GU_API void
|
||||
gu_map_delete(GuMap* map, const void* key)
|
||||
{
|
||||
size_t idx;
|
||||
bool found = gu_map_lookup(map, key, SKIP_NONE, &idx);
|
||||
if (found) {
|
||||
if (map->hasher == gu_addr_hasher) {
|
||||
((const void**)map->data.keys)[idx] = NULL;
|
||||
} else if (map->hasher == gu_string_hasher) {
|
||||
((GuString*)map->data.keys)[idx] = NULL;
|
||||
} else {
|
||||
memset(&map->data.keys[idx * map->key_size],
|
||||
0, map->key_size);
|
||||
}
|
||||
map->data.values[idx * map->cell_size] = SKIP_DELETED;
|
||||
|
||||
if (gu_map_buf_is_zero(&map->data.keys[idx * map->key_size],
|
||||
map->key_size)) {
|
||||
map->data.zero_idx = SIZE_MAX;
|
||||
}
|
||||
|
||||
map->data.n_occupied--;
|
||||
}
|
||||
}
|
||||
|
||||
GU_API void
|
||||
@@ -297,7 +311,7 @@ gu_map_iter(GuMap* map, GuMapItor* itor, GuExn* err)
|
||||
continue;
|
||||
}
|
||||
const void* key = &map->data.keys[i * map->key_size];
|
||||
void* value = &map->data.values[i * map->value_size];
|
||||
void* value = &map->data.values[i * map->cell_size];
|
||||
if (map->hasher == gu_addr_hasher) {
|
||||
key = *(const void* const*) key;
|
||||
} else if (map->hasher == gu_string_hasher) {
|
||||
@@ -307,47 +321,30 @@ gu_map_iter(GuMap* map, GuMapItor* itor, GuExn* err)
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
GuEnum en;
|
||||
GuMap* ht;
|
||||
size_t i;
|
||||
GuMapKeyValue x;
|
||||
} GuMapEnum;
|
||||
|
||||
static void
|
||||
gu_map_enum_next(GuEnum* self, void* to, GuPool* pool)
|
||||
GU_API bool
|
||||
gu_map_next(GuMap* map, size_t* pi, void** pkey, void* pvalue)
|
||||
{
|
||||
*((GuMapKeyValue**) to) = NULL;
|
||||
|
||||
size_t i;
|
||||
GuMapEnum* en = (GuMapEnum*) self;
|
||||
for (i = en->i; i < en->ht->data.n_entries; i++) {
|
||||
if (gu_map_entry_is_free(en->ht, &en->ht->data, i)) {
|
||||
while (*pi < map->data.n_entries) {
|
||||
if (gu_map_entry_is_free(map, &map->data, *pi)) {
|
||||
(*pi)++;
|
||||
continue;
|
||||
}
|
||||
en->x.key = &en->ht->data.keys[i * en->ht->key_size];
|
||||
en->x.value = &en->ht->data.values[i * en->ht->value_size];
|
||||
if (en->ht->hasher == gu_addr_hasher) {
|
||||
en->x.key = *(const void* const*) en->x.key;
|
||||
} else if (en->ht->hasher == gu_string_hasher) {
|
||||
en->x.key = *(GuString*) en->x.key;
|
||||
|
||||
*pkey = &map->data.keys[*pi * map->key_size];
|
||||
if (map->hasher == gu_addr_hasher) {
|
||||
*pkey = *(void**) *pkey;
|
||||
} else if (map->hasher == gu_string_hasher) {
|
||||
*pkey = *(void**) *pkey;
|
||||
}
|
||||
|
||||
*((GuMapKeyValue**) to) = &en->x;
|
||||
break;
|
||||
}
|
||||
|
||||
en->i = i+1;
|
||||
}
|
||||
memcpy(pvalue, &map->data.values[*pi * map->cell_size],
|
||||
map->value_size);
|
||||
|
||||
GU_API GuEnum*
|
||||
gu_map_enum(GuMap* ht, GuPool* pool)
|
||||
{
|
||||
GuMapEnum* en = gu_new(GuMapEnum, pool);
|
||||
en->en.next = gu_map_enum_next;
|
||||
en->ht = ht;
|
||||
en->i = 0;
|
||||
return &en->en;
|
||||
(*pi)++;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
GU_API size_t
|
||||
@@ -363,8 +360,6 @@ gu_map_count(GuMap* map)
|
||||
return count;
|
||||
}
|
||||
|
||||
static const uint8_t gu_map_no_values[1] = { 0 };
|
||||
|
||||
GU_API GuMap*
|
||||
gu_make_map(size_t key_size, GuHasher* hasher,
|
||||
size_t value_size, const void* default_value,
|
||||
@@ -375,7 +370,7 @@ gu_make_map(size_t key_size, GuHasher* hasher,
|
||||
.n_occupied = 0,
|
||||
.n_entries = 0,
|
||||
.keys = NULL,
|
||||
.values = value_size ? NULL : (uint8_t*) gu_map_no_values,
|
||||
.values = NULL,
|
||||
.zero_idx = SIZE_MAX
|
||||
};
|
||||
GuMap* map = gu_new(GuMap, pool);
|
||||
@@ -384,6 +379,7 @@ gu_make_map(size_t key_size, GuHasher* hasher,
|
||||
map->data = data;
|
||||
map->key_size = key_size;
|
||||
map->value_size = value_size;
|
||||
map->cell_size = GU_MAX(value_size,sizeof(uint8_t));
|
||||
map->fin.fn = gu_map_finalize;
|
||||
gu_pool_finally(pool, &map->fin);
|
||||
|
||||
|
||||
@@ -62,6 +62,9 @@ gu_map_has(GuMap* ht, const void* key);
|
||||
GU_API_DECL void*
|
||||
gu_map_insert(GuMap* ht, const void* key);
|
||||
|
||||
GU_API_DECL void
|
||||
gu_map_delete(GuMap* ht, const void* key);
|
||||
|
||||
#define gu_map_put(MAP, KEYP, V, VAL) \
|
||||
GU_BEGIN \
|
||||
V* gu_map_put_p_ = gu_map_insert((MAP), (KEYP)); \
|
||||
@@ -71,13 +74,8 @@ gu_map_insert(GuMap* ht, const void* key);
|
||||
GU_API_DECL void
|
||||
gu_map_iter(GuMap* ht, GuMapItor* itor, GuExn* err);
|
||||
|
||||
typedef struct {
|
||||
const void* key;
|
||||
void* value;
|
||||
} GuMapKeyValue;
|
||||
|
||||
GU_API_DECL GuEnum*
|
||||
gu_map_enum(GuMap* ht, GuPool* pool);
|
||||
GU_API bool
|
||||
gu_map_next(GuMap* map, size_t* pi, void** pkey, void* pvalue);
|
||||
|
||||
typedef GuMap GuIntMap;
|
||||
|
||||
|
||||
@@ -8,6 +8,10 @@
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
#if defined(__MINGW32__) || defined(_MSC_VER)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
@@ -108,6 +112,39 @@ gu_mem_buf_alloc(size_t min_size, size_t* real_size_out)
|
||||
return gu_mem_buf_realloc(NULL, min_size, real_size_out);
|
||||
}
|
||||
|
||||
#if defined(__MINGW32__) || defined(_MSC_VER)
|
||||
#include <windows.h>
|
||||
|
||||
static int
|
||||
getpagesize()
|
||||
{
|
||||
SYSTEM_INFO system_info;
|
||||
GetSystemInfo(&system_info);
|
||||
return system_info.dwPageSize;
|
||||
}
|
||||
#endif
|
||||
|
||||
GU_API void*
|
||||
gu_mem_page_alloc(size_t min_size, size_t* real_size_out)
|
||||
{
|
||||
size_t page_size = getpagesize();
|
||||
size_t size = ((min_size + page_size - 1) / page_size) * page_size;
|
||||
void *page = NULL;
|
||||
|
||||
#if defined(ANDROID)
|
||||
if ((page = memalign(page_size, size)) == NULL) {
|
||||
#elif defined(__MINGW32__) || defined(_MSC_VER)
|
||||
if ((page = malloc(size)) == NULL) {
|
||||
#else
|
||||
if (posix_memalign(&page, page_size, size) != 0) {
|
||||
#endif
|
||||
gu_fatal("Memory allocation failed");
|
||||
}
|
||||
|
||||
*real_size_out = size;
|
||||
return page;
|
||||
}
|
||||
|
||||
GU_API void
|
||||
gu_mem_buf_free(void* buf)
|
||||
{
|
||||
@@ -132,6 +169,7 @@ struct GuFinalizerNode {
|
||||
enum GuPoolType {
|
||||
GU_POOL_HEAP,
|
||||
GU_POOL_LOCAL,
|
||||
GU_POOL_PAGE,
|
||||
GU_POOL_MMAP
|
||||
};
|
||||
|
||||
@@ -180,6 +218,16 @@ gu_new_pool(void)
|
||||
return pool;
|
||||
}
|
||||
|
||||
GU_API GuPool*
|
||||
gu_new_page_pool(void)
|
||||
{
|
||||
size_t sz = GU_FLEX_SIZE(GuPool, init_buf, gu_mem_pool_initial_size);
|
||||
uint8_t* buf = gu_mem_page_alloc(sz, &sz);
|
||||
GuPool* pool = gu_init_pool(buf, sz);
|
||||
pool->type = GU_POOL_PAGE;
|
||||
return pool;
|
||||
}
|
||||
|
||||
GU_API GuPool*
|
||||
gu_mmap_pool(char* fpath, void* addr, size_t size, void**pptr)
|
||||
{
|
||||
@@ -238,7 +286,10 @@ gu_pool_expand(GuPool* pool, size_t req)
|
||||
gu_mem_chunk_max_size));
|
||||
gu_assert(real_req >= sizeof(GuMemChunk));
|
||||
size_t size = 0;
|
||||
GuMemChunk* chunk = gu_mem_buf_alloc(real_req, &size);
|
||||
GuMemChunk* chunk =
|
||||
(pool->type == GU_POOL_PAGE)
|
||||
? gu_mem_page_alloc(real_req, &size)
|
||||
: gu_mem_buf_alloc(real_req, &size);
|
||||
chunk->next = pool->chunks;
|
||||
pool->chunks = chunk;
|
||||
pool->curr_buf = (uint8_t*) chunk;
|
||||
@@ -309,6 +360,7 @@ gu_malloc_prefixed(GuPool* pool, size_t pre_align, size_t pre_size,
|
||||
size_t full_size = gu_mem_advance(offsetof(GuMemChunk, data),
|
||||
pre_align, pre_size, align, size);
|
||||
if (full_size > gu_mem_max_shared_alloc &&
|
||||
pool->type != GU_POOL_PAGE &&
|
||||
pool->type != GU_POOL_MMAP) {
|
||||
GuMemChunk* chunk = gu_mem_alloc(full_size);
|
||||
chunk->next = pool->chunks;
|
||||
|
||||
@@ -55,6 +55,11 @@ gu_local_pool_(uint8_t* init_buf, size_t sz);
|
||||
* should not be used in the bodies of recursive functions.
|
||||
*/
|
||||
|
||||
/// Create a pool where each chunk is corresponds to one or
|
||||
/// more pages.
|
||||
GU_API_DECL GuPool*
|
||||
gu_new_page_pool(void);
|
||||
|
||||
/// Create a pool stored in a memory mapped file.
|
||||
GU_API_DECL GuPool*
|
||||
gu_mmap_pool(char* fpath, void* addr, size_t size, void**pptr);
|
||||
@@ -198,6 +203,9 @@ gu_mem_buf_realloc(
|
||||
size_t min_size,
|
||||
size_t* real_size_out);
|
||||
|
||||
/// Allocate enough memory pages to contain min_size bytes.
|
||||
GU_API_DECL void*
|
||||
gu_mem_page_alloc(size_t min_size, size_t* real_size_out);
|
||||
|
||||
/// Free a memory buffer.
|
||||
GU_API_DECL void
|
||||
|
||||
@@ -100,6 +100,11 @@ gu_seq_free(GuSeq* seq)
|
||||
gu_mem_buf_free(seq);
|
||||
}
|
||||
|
||||
static void
|
||||
gu_dummy_finalizer(GuFinalizer* self)
|
||||
{
|
||||
}
|
||||
|
||||
GU_API void
|
||||
gu_buf_require(GuBuf* buf, size_t req_len)
|
||||
{
|
||||
@@ -109,7 +114,9 @@ gu_buf_require(GuBuf* buf, size_t req_len)
|
||||
|
||||
size_t req_size = sizeof(GuSeq) + buf->elem_size * req_len;
|
||||
size_t real_size;
|
||||
|
||||
|
||||
gu_require(buf->fin.fn != gu_dummy_finalizer);
|
||||
|
||||
if (buf->seq == NULL || buf->seq == gu_empty_seq()) {
|
||||
buf->seq = gu_mem_buf_alloc(req_size, &real_size);
|
||||
buf->seq->len = 0;
|
||||
@@ -164,6 +171,24 @@ gu_buf_freeze(GuBuf* buf, GuPool* pool)
|
||||
return seq;
|
||||
}
|
||||
|
||||
GU_API void
|
||||
gu_buf_evacuate(GuBuf* buf, GuPool* pool)
|
||||
{
|
||||
if (buf->seq != gu_empty_seq()) {
|
||||
size_t len = gu_buf_length(buf);
|
||||
|
||||
GuSeq* seq = gu_make_seq(buf->elem_size, len, pool);
|
||||
void* bufdata = gu_buf_data(buf);
|
||||
void* seqdata = gu_seq_data(seq);
|
||||
memcpy(seqdata, bufdata, buf->elem_size * len);
|
||||
gu_mem_buf_free(buf->seq);
|
||||
|
||||
buf->seq = seq;
|
||||
buf->fin.fn = gu_dummy_finalizer;
|
||||
buf->avail_len = len;
|
||||
}
|
||||
}
|
||||
|
||||
GU_API void*
|
||||
gu_buf_insert(GuBuf* buf, size_t index)
|
||||
{
|
||||
@@ -335,13 +360,8 @@ GU_API void
|
||||
gu_buf_heap_pop(GuBuf *buf, GuOrder *order, void* data_out)
|
||||
{
|
||||
const void* last = gu_buf_trim(buf); // raises an error if empty
|
||||
|
||||
if (gu_buf_length(buf) > 0) {
|
||||
memcpy(data_out, buf->seq->data, buf->elem_size);
|
||||
gu_heap_siftup(buf, order, last, 0);
|
||||
} else {
|
||||
memcpy(data_out, last, buf->elem_size);
|
||||
}
|
||||
memcpy(data_out, buf->seq->data, buf->elem_size);
|
||||
gu_heap_siftup(buf, order, last, 0);
|
||||
}
|
||||
|
||||
GU_API void
|
||||
|
||||
@@ -182,6 +182,9 @@ gu_buf_heapify(GuBuf *buf, GuOrder *order);
|
||||
|
||||
GU_API_DECL GuSeq*
|
||||
gu_buf_freeze(GuBuf* buf, GuPool* pool);
|
||||
|
||||
GU_API_DECL void
|
||||
gu_buf_evacuate(GuBuf* buf, GuPool* pool);
|
||||
#endif // GU_SEQ_H_
|
||||
|
||||
#ifdef GU_STRING_H_
|
||||
|
||||
@@ -344,8 +344,9 @@ struct PgfCCat {
|
||||
PgfCncFuns* linrefs;
|
||||
size_t n_synprods;
|
||||
PgfProductionSeq* prods;
|
||||
float viterbi_prob;
|
||||
prob_t viterbi_prob;
|
||||
int fid;
|
||||
int chunk_count;
|
||||
PgfItemConts* conts;
|
||||
struct PgfAnswers* answers;
|
||||
GuFinalizer fin[0];
|
||||
|
||||
@@ -198,16 +198,16 @@ pgf_literal_hash(GuHash h, PgfLiteral lit);
|
||||
PGF_API_DECL GuHash
|
||||
pgf_expr_hash(GuHash h, PgfExpr e);
|
||||
|
||||
PGF_API size_t
|
||||
PGF_API_DECL size_t
|
||||
pgf_expr_size(PgfExpr expr);
|
||||
|
||||
PGF_API GuSeq*
|
||||
PGF_API_DECL GuSeq*
|
||||
pgf_expr_functions(PgfExpr expr, GuPool* pool);
|
||||
|
||||
PGF_API PgfExpr
|
||||
PGF_API_DECL PgfExpr
|
||||
pgf_expr_substitute(PgfExpr expr, GuSeq* meta_values, GuPool* pool);
|
||||
|
||||
PGF_API PgfType*
|
||||
PGF_API_DECL PgfType*
|
||||
pgf_type_substitute(PgfType* type, GuSeq* meta_values, GuPool* pool);
|
||||
|
||||
typedef struct PgfPrintContext PgfPrintContext;
|
||||
|
||||
@@ -5,9 +5,6 @@
|
||||
#include <pgf/reasoner.h>
|
||||
#include <pgf/reader.h>
|
||||
#include "lightning.h"
|
||||
#if defined(__MINGW32__) || defined(_MSC_VER)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
//#define PGF_JIT_DEBUG
|
||||
|
||||
@@ -43,18 +40,6 @@ typedef struct {
|
||||
#define JIT_VSTATE JIT_V1
|
||||
#define JIT_VCLOS JIT_V2
|
||||
|
||||
#if defined(__MINGW32__) || defined(_MSC_VER)
|
||||
#include <windows.h>
|
||||
|
||||
static int
|
||||
getpagesize()
|
||||
{
|
||||
SYSTEM_INFO system_info;
|
||||
GetSystemInfo(&system_info);
|
||||
return system_info.dwPageSize;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static void
|
||||
pgf_jit_finalize_page(GuFinalizer* self)
|
||||
@@ -65,19 +50,8 @@ pgf_jit_finalize_page(GuFinalizer* self)
|
||||
static void
|
||||
pgf_jit_alloc_page(PgfReader* rdr)
|
||||
{
|
||||
void *page;
|
||||
|
||||
size_t page_size = getpagesize();
|
||||
|
||||
#if defined(ANDROID)
|
||||
if ((page = memalign(page_size, page_size)) == NULL) {
|
||||
#elif defined(__MINGW32__) || defined(_MSC_VER)
|
||||
if ((page = malloc(page_size)) == NULL) {
|
||||
#else
|
||||
if (posix_memalign(&page, page_size, page_size) != 0) {
|
||||
#endif
|
||||
gu_fatal("Memory allocation failed");
|
||||
}
|
||||
size_t page_size;
|
||||
void *page = gu_mem_page_alloc(sizeof(GuFinalizer), &page_size);
|
||||
|
||||
GuFinalizer* fin = page;
|
||||
fin->fn = pgf_jit_finalize_page;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -162,6 +162,22 @@ PGF_API_DECL void
|
||||
pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
|
||||
PgfMorphoCallback* callback, GuExn* err);
|
||||
|
||||
typedef struct {
|
||||
size_t pos;
|
||||
GuString ptr;
|
||||
} PgfCohortSpot;
|
||||
|
||||
typedef struct {
|
||||
PgfCohortSpot start;
|
||||
PgfCohortSpot end;
|
||||
GuBuf* buf;
|
||||
} PgfCohortRange;
|
||||
|
||||
PGF_API_DECL GuEnum*
|
||||
pgf_lookup_cohorts(PgfConcr *concr, GuString sentence,
|
||||
PgfMorphoCallback* callback,
|
||||
GuPool* pool, GuExn* err);
|
||||
|
||||
typedef struct PgfFullFormEntry PgfFullFormEntry;
|
||||
|
||||
PGF_API_DECL GuEnum*
|
||||
|
||||
@@ -98,6 +98,74 @@ pgf_print_fid(int fid, GuOut* out, GuExn* err)
|
||||
gu_printf(out, err, "C%d", fid);
|
||||
}
|
||||
|
||||
PGF_INTERNAL void
|
||||
pgf_print_production_args(PgfPArgs* args,
|
||||
GuOut* out, GuExn* err)
|
||||
{
|
||||
size_t n_args = gu_seq_length(args);
|
||||
for (size_t j = 0; j < n_args; j++) {
|
||||
if (j > 0)
|
||||
gu_putc(',',out,err);
|
||||
|
||||
PgfPArg arg = gu_seq_get(args, PgfPArg, j);
|
||||
|
||||
if (arg.hypos != NULL &&
|
||||
gu_seq_length(arg.hypos) > 0) {
|
||||
size_t n_hypos = gu_seq_length(arg.hypos);
|
||||
for (size_t k = 0; k < n_hypos; k++) {
|
||||
PgfCCat *hypo = gu_seq_get(arg.hypos, PgfCCat*, k);
|
||||
pgf_print_fid(hypo->fid, out, err);
|
||||
gu_putc(' ',out,err);
|
||||
}
|
||||
gu_puts("-> ",out,err);
|
||||
}
|
||||
|
||||
pgf_print_fid(arg.ccat->fid, out, err);
|
||||
}
|
||||
}
|
||||
|
||||
PGF_INTERNAL void
|
||||
pgf_print_production(int fid, PgfProduction prod,
|
||||
GuOut *out, GuExn* err)
|
||||
{
|
||||
pgf_print_fid(fid, out, err);
|
||||
gu_puts(" -> ", out, err);
|
||||
|
||||
GuVariantInfo i = gu_variant_open(prod);
|
||||
switch (i.tag) {
|
||||
case PGF_PRODUCTION_APPLY: {
|
||||
PgfProductionApply* papp = i.data;
|
||||
gu_printf(out,err,"F%d(",papp->fun->funid);
|
||||
if (papp->fun->ep != NULL) {
|
||||
pgf_print_expr(papp->fun->ep->expr, NULL, 0, out, err);
|
||||
} else {
|
||||
PgfPArg* parg = gu_seq_index(papp->args, PgfPArg, 0);
|
||||
gu_printf(out,err,"linref %s", parg->ccat->cnccat->abscat->name);
|
||||
}
|
||||
gu_printf(out,err,")[");
|
||||
pgf_print_production_args(papp->args,out,err);
|
||||
gu_printf(out,err,"]\n");
|
||||
break;
|
||||
}
|
||||
case PGF_PRODUCTION_COERCE: {
|
||||
PgfProductionCoerce* pcoerce = i.data;
|
||||
gu_puts("_[",out,err);
|
||||
pgf_print_fid(pcoerce->coerce->fid, out, err);
|
||||
gu_puts("]\n",out,err);
|
||||
break;
|
||||
}
|
||||
case PGF_PRODUCTION_EXTERN: {
|
||||
PgfProductionExtern* pext = i.data;
|
||||
gu_printf(out,err,"<extern>(");
|
||||
pgf_print_expr(pext->ep->expr, NULL, 0, out, err);
|
||||
gu_printf(out,err,")[]\n");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_print_productions(GuMapItor* fn, const void* key, void* value,
|
||||
GuExn* err)
|
||||
@@ -111,48 +179,7 @@ pgf_print_productions(GuMapItor* fn, const void* key, void* value,
|
||||
size_t n_prods = gu_seq_length(ccat->prods);
|
||||
for (size_t i = 0; i < n_prods; i++) {
|
||||
PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i);
|
||||
|
||||
gu_puts(" ", out, err);
|
||||
pgf_print_fid(fid, out, err);
|
||||
gu_puts(" -> ", out, err);
|
||||
|
||||
GuVariantInfo i = gu_variant_open(prod);
|
||||
switch (i.tag) {
|
||||
case PGF_PRODUCTION_APPLY: {
|
||||
PgfProductionApply* papp = i.data;
|
||||
gu_printf(out,err,"F%d[",papp->fun->funid);
|
||||
size_t n_args = gu_seq_length(papp->args);
|
||||
for (size_t j = 0; j < n_args; j++) {
|
||||
if (j > 0)
|
||||
gu_putc(',',out,err);
|
||||
|
||||
PgfPArg arg = gu_seq_get(papp->args, PgfPArg, j);
|
||||
|
||||
if (arg.hypos != NULL) {
|
||||
size_t n_hypos = gu_seq_length(arg.hypos);
|
||||
for (size_t k = 0; k < n_hypos; k++) {
|
||||
if (k > 0)
|
||||
gu_putc(' ',out,err);
|
||||
PgfCCat *hypo = gu_seq_get(arg.hypos, PgfCCat*, k);
|
||||
pgf_print_fid(hypo->fid, out, err);
|
||||
}
|
||||
}
|
||||
|
||||
pgf_print_fid(arg.ccat->fid, out, err);
|
||||
}
|
||||
gu_printf(out,err,"]\n");
|
||||
break;
|
||||
}
|
||||
case PGF_PRODUCTION_COERCE: {
|
||||
PgfProductionCoerce* pcoerce = i.data;
|
||||
gu_puts("_[", out, err);
|
||||
pgf_print_fid(pcoerce->coerce->fid, out, err);
|
||||
gu_puts("]\n", out, err);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
pgf_print_production(fid, prod, out, err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -328,16 +328,20 @@ pgf_read_patt(PgfReader* rdr)
|
||||
uint8_t tag = pgf_read_tag(rdr);
|
||||
switch (tag) {
|
||||
case PGF_PATT_APP: {
|
||||
PgfCId ctor = pgf_read_cid(rdr, rdr->opool);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
|
||||
size_t n_args = pgf_read_len(rdr);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
|
||||
PgfPattApp *papp =
|
||||
gu_new_variant(PGF_PATT_APP,
|
||||
PgfPattApp,
|
||||
&patt, rdr->opool);
|
||||
papp->ctor = pgf_read_cid(rdr, rdr->opool);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
|
||||
papp->n_args = pgf_read_len(rdr);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
|
||||
gu_new_flex_variant(PGF_PATT_APP,
|
||||
PgfPattApp,
|
||||
args, n_args,
|
||||
&patt, rdr->opool);
|
||||
papp->ctor = ctor;
|
||||
papp->n_args = n_args;
|
||||
|
||||
for (size_t i = 0; i < papp->n_args; i++) {
|
||||
papp->args[i] = pgf_read_patt(rdr);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
@@ -840,6 +844,7 @@ pgf_read_fid(PgfReader* rdr, PgfConcr* concr)
|
||||
ccat->prods = NULL;
|
||||
ccat->viterbi_prob = 0;
|
||||
ccat->fid = fid;
|
||||
ccat->chunk_count = 1;
|
||||
ccat->conts = NULL;
|
||||
ccat->answers = NULL;
|
||||
|
||||
@@ -1077,6 +1082,7 @@ pgf_read_cnccat(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, PgfCId name)
|
||||
ccat->prods = NULL;
|
||||
ccat->viterbi_prob = 0;
|
||||
ccat->fid = fid;
|
||||
ccat->chunk_count = 1;
|
||||
ccat->conts = NULL;
|
||||
ccat->answers = NULL;
|
||||
|
||||
|
||||
516
src/runtime/c/pgf/scanner.c
Normal file
516
src/runtime/c/pgf/scanner.c
Normal file
@@ -0,0 +1,516 @@
|
||||
#include <pgf/data.h>
|
||||
#include <pgf/expr.h>
|
||||
#include <pgf/linearizer.h>
|
||||
#include <gu/utf8.h>
|
||||
|
||||
PGF_INTERNAL int
|
||||
cmp_string(PgfCohortSpot* spot, GuString tok,
|
||||
bool case_sensitive)
|
||||
{
|
||||
for (;;) {
|
||||
GuUCS c2 = gu_utf8_decode((const uint8_t**) &tok);
|
||||
if (c2 == 0)
|
||||
return 0;
|
||||
|
||||
const uint8_t* p = (uint8_t*) spot->ptr;
|
||||
GuUCS c1 = gu_utf8_decode(&p);
|
||||
if (c1 == 0)
|
||||
return -1;
|
||||
|
||||
if (!case_sensitive) {
|
||||
c1 = gu_ucs_to_lower(c1);
|
||||
c2 = gu_ucs_to_lower(c2);
|
||||
}
|
||||
|
||||
if (c1 != c2)
|
||||
return (c1-c2);
|
||||
|
||||
spot->ptr = (GuString) p;
|
||||
spot->pos++;
|
||||
}
|
||||
}
|
||||
|
||||
PGF_INTERNAL bool
|
||||
skip_space(GuString* psent, size_t* ppos)
|
||||
{
|
||||
const uint8_t* p = (uint8_t*) *psent;
|
||||
if (!gu_ucs_is_space(gu_utf8_decode(&p)))
|
||||
return false;
|
||||
|
||||
*psent = (GuString) p;
|
||||
(*ppos)++;
|
||||
return true;
|
||||
}
|
||||
|
||||
PGF_INTERNAL int
|
||||
pgf_symbols_cmp(PgfCohortSpot* spot,
|
||||
PgfSymbols* syms, size_t* sym_idx,
|
||||
bool case_sensitive)
|
||||
{
|
||||
size_t n_syms = gu_seq_length(syms);
|
||||
while (*sym_idx < n_syms) {
|
||||
PgfSymbol sym = gu_seq_get(syms, PgfSymbol, *sym_idx);
|
||||
|
||||
if (*sym_idx > 0) {
|
||||
if (!skip_space(&spot->ptr,&spot->pos)) {
|
||||
if (*spot->ptr == 0)
|
||||
return -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
while (*spot->ptr != 0) {
|
||||
if (!skip_space(&spot->ptr,&spot->pos))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
GuVariantInfo inf = gu_variant_open(sym);
|
||||
switch (inf.tag) {
|
||||
case PGF_SYMBOL_CAT:
|
||||
case PGF_SYMBOL_LIT:
|
||||
case PGF_SYMBOL_VAR: {
|
||||
if (*spot->ptr == 0)
|
||||
return -1;
|
||||
return 1;
|
||||
}
|
||||
case PGF_SYMBOL_KS: {
|
||||
PgfSymbolKS* pks = inf.data;
|
||||
if (*spot->ptr == 0)
|
||||
return -1;
|
||||
|
||||
int cmp = cmp_string(spot,pks->token, case_sensitive);
|
||||
if (cmp != 0)
|
||||
return cmp;
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KP:
|
||||
case PGF_SYMBOL_BIND:
|
||||
case PGF_SYMBOL_NE:
|
||||
case PGF_SYMBOL_SOFT_BIND:
|
||||
case PGF_SYMBOL_SOFT_SPACE:
|
||||
case PGF_SYMBOL_CAPIT:
|
||||
case PGF_SYMBOL_ALL_CAPIT: {
|
||||
return -1;
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
|
||||
(*sym_idx)++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_morpho_iter(PgfProductionIdx* idx,
|
||||
PgfMorphoCallback* callback,
|
||||
GuExn* err)
|
||||
{
|
||||
size_t n_entries = gu_buf_length(idx);
|
||||
for (size_t i = 0; i < n_entries; i++) {
|
||||
PgfProductionIdxEntry* entry =
|
||||
gu_buf_index(idx, PgfProductionIdxEntry, i);
|
||||
|
||||
PgfCId lemma = entry->papp->fun->absfun->name;
|
||||
GuString analysis = entry->ccat->cnccat->labels[entry->lin_idx];
|
||||
|
||||
prob_t prob = entry->ccat->cnccat->abscat->prob +
|
||||
entry->papp->fun->absfun->ep.prob;
|
||||
callback->callback(callback,
|
||||
lemma, analysis, prob, err);
|
||||
if (!gu_ok(err))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
GuOrder order;
|
||||
bool case_sensitive;
|
||||
} PgfSequenceOrder;
|
||||
|
||||
PGF_INTERNAL bool
|
||||
pgf_is_case_sensitive(PgfConcr* concr)
|
||||
{
|
||||
PgfFlag* flag =
|
||||
gu_seq_binsearch(concr->cflags, pgf_flag_order, PgfFlag, "case_sensitive");
|
||||
if (flag != NULL) {
|
||||
GuVariantInfo inf = gu_variant_open(flag->value);
|
||||
if (inf.tag == PGF_LITERAL_STR) {
|
||||
PgfLiteralStr* lstr = inf.data;
|
||||
if (strcmp(lstr->val, "off") == 0)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static int
|
||||
pgf_sequence_cmp_fn(GuOrder* order, const void* p1, const void* p2)
|
||||
{
|
||||
PgfSequenceOrder* self = gu_container(order, PgfSequenceOrder, order);
|
||||
|
||||
PgfCohortSpot spot = {0, (GuString) p1};
|
||||
|
||||
const PgfSequence* sp2 = p2;
|
||||
|
||||
size_t sym_idx = 0;
|
||||
int res = pgf_symbols_cmp(&spot, sp2->syms, &sym_idx, self->case_sensitive);
|
||||
if (res == 0 && (*spot.ptr != 0 || sym_idx != gu_seq_length(sp2->syms))) {
|
||||
res = 1;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
PGF_API void
|
||||
pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
|
||||
PgfMorphoCallback* callback, GuExn* err)
|
||||
{
|
||||
if (concr->sequences == NULL) {
|
||||
GuExnData* err_data = gu_raise(err, PgfExn);
|
||||
if (err_data) {
|
||||
err_data->data = "The concrete syntax is not loaded";
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
size_t index = 0;
|
||||
PgfSequenceOrder order = { { pgf_sequence_cmp_fn },
|
||||
pgf_is_case_sensitive(concr) };
|
||||
if (gu_seq_binsearch_index(concr->sequences, &order.order,
|
||||
PgfSequence, (void*) sentence,
|
||||
&index)) {
|
||||
PgfSequence* seq = NULL;
|
||||
|
||||
/* If the match is case-insensitive then there might be more
|
||||
* matches around the current index. We must check the neighbour
|
||||
* sequences for matching as well.
|
||||
*/
|
||||
|
||||
if (!order.case_sensitive) {
|
||||
size_t i = index;
|
||||
while (i > 0) {
|
||||
seq = gu_seq_index(concr->sequences, PgfSequence, i-1);
|
||||
|
||||
size_t sym_idx = 0;
|
||||
PgfCohortSpot spot = {0, sentence};
|
||||
if (pgf_symbols_cmp(&spot, seq->syms, &sym_idx, order.case_sensitive) != 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (seq->idx != NULL)
|
||||
pgf_morpho_iter(seq->idx, callback, err);
|
||||
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
seq = gu_seq_index(concr->sequences, PgfSequence, index);
|
||||
if (seq->idx != NULL)
|
||||
pgf_morpho_iter(seq->idx, callback, err);
|
||||
|
||||
if (!order.case_sensitive) {
|
||||
size_t i = index+1;
|
||||
while (i < gu_seq_length(concr->sequences)) {
|
||||
seq = gu_seq_index(concr->sequences, PgfSequence, i);
|
||||
|
||||
size_t sym_idx = 0;
|
||||
PgfCohortSpot spot = {0, sentence};
|
||||
if (pgf_symbols_cmp(&spot, seq->syms, &sym_idx, order.case_sensitive) != 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (seq->idx != NULL)
|
||||
pgf_morpho_iter(seq->idx, callback, err);
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
GuEnum en;
|
||||
PgfConcr* concr;
|
||||
GuString sentence;
|
||||
GuString current;
|
||||
size_t len;
|
||||
PgfMorphoCallback* callback;
|
||||
GuExn* err;
|
||||
bool case_sensitive;
|
||||
GuBuf* spots;
|
||||
GuBuf* found;
|
||||
} PgfCohortsState;
|
||||
|
||||
static int
|
||||
cmp_cohort_spot(GuOrder* self, const void* a, const void* b)
|
||||
{
|
||||
PgfCohortSpot *s1 = (PgfCohortSpot *) a;
|
||||
PgfCohortSpot *s2 = (PgfCohortSpot *) b;
|
||||
|
||||
return (s1->ptr-s2->ptr);
|
||||
}
|
||||
|
||||
static GuOrder
|
||||
pgf_cohort_spot_order[1] = {{ cmp_cohort_spot }};
|
||||
|
||||
static void
|
||||
pgf_lookup_cohorts_helper(PgfCohortsState *state, PgfCohortSpot* spot,
|
||||
int i, int j, ptrdiff_t min, ptrdiff_t max)
|
||||
{
|
||||
// This is a variation of a binary search algorithm which
|
||||
// can retrieve all prefixes of a string with minimal
|
||||
// comparisons, i.e. there is no need to lookup every
|
||||
// prefix separately.
|
||||
|
||||
while (i <= j) {
|
||||
int k = (i+j) / 2;
|
||||
PgfSequence* seq = gu_seq_index(state->concr->sequences, PgfSequence, k);
|
||||
|
||||
PgfCohortSpot current = *spot;
|
||||
|
||||
size_t sym_idx = 0;
|
||||
int cmp = pgf_symbols_cmp(¤t, seq->syms, &sym_idx, state->case_sensitive);
|
||||
if (cmp < 0) {
|
||||
j = k-1;
|
||||
} else if (cmp > 0) {
|
||||
ptrdiff_t len = current.ptr - spot->ptr;
|
||||
|
||||
if (min <= len)
|
||||
pgf_lookup_cohorts_helper(state, spot, i, k-1, min, len);
|
||||
|
||||
if (len+1 <= max)
|
||||
pgf_lookup_cohorts_helper(state, spot, k+1, j, len+1, max);
|
||||
|
||||
break;
|
||||
} else {
|
||||
ptrdiff_t len = current.ptr - spot->ptr;
|
||||
|
||||
if (min <= len)
|
||||
pgf_lookup_cohorts_helper(state, spot, i, k-1, min, len);
|
||||
|
||||
if (seq->idx != NULL && gu_buf_length(seq->idx) > 0) {
|
||||
PgfCohortRange* range = gu_buf_insert(state->found, 0);
|
||||
range->start = *spot;
|
||||
range->end = current;
|
||||
range->buf = seq->idx;
|
||||
}
|
||||
|
||||
while (*current.ptr != 0) {
|
||||
if (!skip_space(¤t.ptr, ¤t.pos))
|
||||
break;
|
||||
}
|
||||
|
||||
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, ¤t);
|
||||
|
||||
if (len <= max)
|
||||
pgf_lookup_cohorts_helper(state, spot, k+1, j, len, max);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_lookup_cohorts_enum_next(GuEnum* self, void* to, GuPool* pool)
|
||||
{
|
||||
PgfCohortsState* state = gu_container(self, PgfCohortsState, en);
|
||||
|
||||
while (gu_buf_length(state->found) == 0 &&
|
||||
gu_buf_length(state->spots) > 0) {
|
||||
PgfCohortSpot spot;
|
||||
gu_buf_heap_pop(state->spots, pgf_cohort_spot_order, &spot);
|
||||
|
||||
if (spot.ptr == state->current)
|
||||
continue;
|
||||
|
||||
if (*spot.ptr == 0)
|
||||
break;
|
||||
|
||||
pgf_lookup_cohorts_helper
|
||||
(state, &spot,
|
||||
0, gu_seq_length(state->concr->sequences)-1,
|
||||
1, (state->sentence+state->len)-spot.ptr);
|
||||
|
||||
if (gu_buf_length(state->found) == 0) {
|
||||
// skip one character and try again
|
||||
gu_utf8_decode((const uint8_t**) &spot.ptr);
|
||||
spot.pos++;
|
||||
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &spot);
|
||||
}
|
||||
}
|
||||
|
||||
PgfCohortRange* pRes = (PgfCohortRange*)to;
|
||||
|
||||
if (gu_buf_length(state->found) == 0) {
|
||||
pRes->start.pos = 0;
|
||||
pRes->start.ptr = NULL;
|
||||
pRes->end.pos = 0;
|
||||
pRes->end.ptr = NULL;
|
||||
pRes->buf = NULL;
|
||||
state->current = NULL;
|
||||
return;
|
||||
} else do {
|
||||
*pRes = gu_buf_pop(state->found, PgfCohortRange);
|
||||
state->current = pRes->start.ptr;
|
||||
pgf_morpho_iter(pRes->buf, state->callback, state->err);
|
||||
} while (gu_buf_length(state->found) > 0 &&
|
||||
gu_buf_index_last(state->found, PgfCohortRange)->end.ptr == pRes->end.ptr);
|
||||
|
||||
}
|
||||
|
||||
PGF_API GuEnum*
|
||||
pgf_lookup_cohorts(PgfConcr *concr, GuString sentence,
|
||||
PgfMorphoCallback* callback,
|
||||
GuPool* pool, GuExn* err)
|
||||
{
|
||||
if (concr->sequences == NULL) {
|
||||
GuExnData* err_data = gu_raise(err, PgfExn);
|
||||
if (err_data) {
|
||||
err_data->data = "The concrete syntax is not loaded";
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
PgfCohortsState* state = gu_new(PgfCohortsState, pool);
|
||||
state->en.next = pgf_lookup_cohorts_enum_next;
|
||||
state->concr = concr;
|
||||
state->sentence= sentence;
|
||||
state->len = strlen(sentence);
|
||||
state->callback= callback;
|
||||
state->err = err;
|
||||
state->case_sensitive = pgf_is_case_sensitive(concr);
|
||||
state->spots = gu_new_buf(PgfCohortSpot, pool);
|
||||
state->found = gu_new_buf(PgfCohortRange, pool);
|
||||
|
||||
PgfCohortSpot spot = {0,sentence};
|
||||
while (*spot.ptr != 0) {
|
||||
if (!skip_space(&spot.ptr, &spot.pos))
|
||||
break;
|
||||
}
|
||||
|
||||
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &spot);
|
||||
|
||||
return &state->en;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
GuEnum en;
|
||||
PgfSequences* sequences;
|
||||
GuString prefix;
|
||||
size_t seq_idx;
|
||||
bool case_sensitive;
|
||||
} PgfFullFormState;
|
||||
|
||||
struct PgfFullFormEntry {
|
||||
GuString tokens;
|
||||
PgfProductionIdx* idx;
|
||||
};
|
||||
|
||||
static void
|
||||
gu_fullform_enum_next(GuEnum* self, void* to, GuPool* pool)
|
||||
{
|
||||
PgfFullFormState* st = gu_container(self, PgfFullFormState, en);
|
||||
PgfFullFormEntry* entry = NULL;
|
||||
|
||||
if (st->sequences != NULL) {
|
||||
size_t n_seqs = gu_seq_length(st->sequences);
|
||||
while (st->seq_idx < n_seqs) {
|
||||
PgfSequence* seq = gu_seq_index(st->sequences, PgfSequence, st->seq_idx);
|
||||
GuString tokens = pgf_get_tokens(seq->syms, 0, pool);
|
||||
|
||||
PgfCohortSpot spot = {0, st->prefix};
|
||||
if (cmp_string(&spot, tokens, st->case_sensitive) > 0 || *spot.ptr != 0) {
|
||||
st->seq_idx = n_seqs;
|
||||
break;
|
||||
}
|
||||
|
||||
if (*tokens != 0 && seq->idx != NULL) {
|
||||
entry = gu_new(PgfFullFormEntry, pool);
|
||||
entry->tokens = tokens;
|
||||
entry->idx = seq->idx;
|
||||
|
||||
st->seq_idx++;
|
||||
break;
|
||||
}
|
||||
|
||||
st->seq_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
*((PgfFullFormEntry**) to) = entry;
|
||||
}
|
||||
|
||||
PGF_API GuEnum*
|
||||
pgf_fullform_lexicon(PgfConcr *concr, GuPool* pool)
|
||||
{
|
||||
PgfFullFormState* st = gu_new(PgfFullFormState, pool);
|
||||
st->en.next = gu_fullform_enum_next;
|
||||
st->sequences = concr->sequences;
|
||||
st->prefix = "";
|
||||
st->seq_idx = 0;
|
||||
st->case_sensitive = true;
|
||||
return &st->en;
|
||||
}
|
||||
|
||||
PGF_API GuString
|
||||
pgf_fullform_get_string(PgfFullFormEntry* entry)
|
||||
{
|
||||
return entry->tokens;
|
||||
}
|
||||
|
||||
PGF_API void
|
||||
pgf_fullform_get_analyses(PgfFullFormEntry* entry,
|
||||
PgfMorphoCallback* callback, GuExn* err)
|
||||
{
|
||||
pgf_morpho_iter(entry->idx, callback, err);
|
||||
}
|
||||
|
||||
PGF_API GuEnum*
|
||||
pgf_lookup_word_prefix(PgfConcr *concr, GuString prefix,
|
||||
GuPool* pool, GuExn* err)
|
||||
{
|
||||
if (concr->sequences == NULL) {
|
||||
GuExnData* err_data = gu_raise(err, PgfExn);
|
||||
if (err_data) {
|
||||
err_data->data = "The concrete syntax is not loaded";
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
PgfFullFormState* state = gu_new(PgfFullFormState, pool);
|
||||
state->en.next = gu_fullform_enum_next;
|
||||
state->sequences = concr->sequences;
|
||||
state->prefix = prefix;
|
||||
state->seq_idx = 0;
|
||||
state->case_sensitive = pgf_is_case_sensitive(concr);
|
||||
|
||||
PgfSequenceOrder order = { { pgf_sequence_cmp_fn },
|
||||
state->case_sensitive };
|
||||
if (!gu_seq_binsearch_index(concr->sequences, &order.order,
|
||||
PgfSequence, (void*) prefix,
|
||||
&state->seq_idx)) {
|
||||
state->seq_idx++;
|
||||
} else if (!state->case_sensitive) {
|
||||
/* If the match is case-insensitive then there might be more
|
||||
* matches around the current index. Since we scroll down
|
||||
* anyway, it is enough to search upwards now.
|
||||
*/
|
||||
|
||||
while (state->seq_idx > 0) {
|
||||
PgfSequence* seq =
|
||||
gu_seq_index(concr->sequences, PgfSequence, state->seq_idx-1);
|
||||
|
||||
size_t sym_idx = 0;
|
||||
PgfCohortSpot spot = {0, state->prefix};
|
||||
if (pgf_symbols_cmp(&spot, seq->syms, &sym_idx, state->case_sensitive) > 0 || *spot.ptr != 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
state->seq_idx--;
|
||||
}
|
||||
}
|
||||
|
||||
return &state->en;
|
||||
}
|
||||
@@ -499,14 +499,17 @@ store_expr(SgSG* sg,
|
||||
PgfExprLit* elit = ei.data;
|
||||
|
||||
Mem mem[2];
|
||||
size_t len = 0;
|
||||
|
||||
GuVariantInfo li = gu_variant_open(elit->lit);
|
||||
switch (li.tag) {
|
||||
case PGF_LITERAL_STR: {
|
||||
PgfLiteralStr* lstr = li.data;
|
||||
|
||||
len = strlen(lstr->val);
|
||||
|
||||
mem[0].flags = MEM_Str;
|
||||
mem[0].n = strlen(lstr->val);
|
||||
mem[0].n = len;
|
||||
mem[0].z = lstr->val;
|
||||
break;
|
||||
}
|
||||
@@ -515,6 +518,7 @@ store_expr(SgSG* sg,
|
||||
|
||||
mem[0].flags = MEM_Int;
|
||||
mem[0].u.i = lint->val;
|
||||
len = sizeof(mem[0].u.i);
|
||||
break;
|
||||
}
|
||||
case PGF_LITERAL_FLT: {
|
||||
@@ -522,6 +526,7 @@ store_expr(SgSG* sg,
|
||||
|
||||
mem[0].flags = MEM_Real;
|
||||
mem[0].u.r = lflt->val;
|
||||
len = sizeof(mem[0].u.r);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -556,7 +561,7 @@ store_expr(SgSG* sg,
|
||||
int serial_type_arg = sqlite3BtreeSerialType(&mem[1], file_format);
|
||||
int serial_type_arg_hdr_len = sqlite3BtreeVarintLen(serial_type_arg);
|
||||
|
||||
unsigned char* buf = malloc(1+serial_type_lit_hdr_len+(serial_type_arg_hdr_len > 1 ? serial_type_arg_hdr_len : 1)+mem[0].n+8);
|
||||
unsigned char* buf = malloc(1+serial_type_lit_hdr_len+(serial_type_arg_hdr_len > 1 ? serial_type_arg_hdr_len : 1)+len+8);
|
||||
unsigned char* p = buf;
|
||||
*p++ = 1+serial_type_lit_hdr_len+serial_type_arg_hdr_len;
|
||||
p += putVarint32(p, serial_type_lit);
|
||||
|
||||
@@ -4835,7 +4835,6 @@ SQLITE_PRIVATE int sqlite3HeaderSizeBtree(void);
|
||||
SQLITE_PRIVATE void sqlite3VdbeRecordUnpack(KeyInfo*,int,const void*,UnpackedRecord*);
|
||||
SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord(KeyInfo *, char *, int, char **);
|
||||
|
||||
typedef int (*RecordCompare)(int,const void*,UnpackedRecord*);
|
||||
SQLITE_PRIVATE RecordCompare sqlite3VdbeFindCompare(UnpackedRecord*);
|
||||
|
||||
/************** End of btreeInt.h ********************************************/
|
||||
|
||||
Reference in New Issue
Block a user