mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-08 02:32:50 -06:00
Merge with master and drop the Haskell runtime completely
This commit is contained in:
@@ -68,6 +68,7 @@ libpgf_la_SOURCES = \
|
||||
pgf/data.h \
|
||||
pgf/expr.c \
|
||||
pgf/expr.h \
|
||||
pgf/scanner.c \
|
||||
pgf/parser.c \
|
||||
pgf/lookup.c \
|
||||
pgf/jit.c \
|
||||
|
||||
@@ -64,6 +64,8 @@
|
||||
|
||||
#ifdef GU_ALIGNOF
|
||||
# define gu_alignof GU_ALIGNOF
|
||||
#elif defined(_MSC_VER)
|
||||
# define gu_alignof __alignof
|
||||
#else
|
||||
# define gu_alignof(t_) \
|
||||
((size_t)(offsetof(struct { char c_; t_ e_; }, e_)))
|
||||
@@ -77,7 +79,7 @@
|
||||
|
||||
#define GU_COMMA ,
|
||||
|
||||
#define GU_ARRAY_LEN(t,a) (sizeof((const t[])a) / sizeof(t))
|
||||
#define GU_ARRAY_LEN(a) (sizeof(a) / sizeof(a[0]))
|
||||
|
||||
#define GU_ID(...) __VA_ARGS__
|
||||
|
||||
@@ -183,9 +185,13 @@ typedef union {
|
||||
void (*fp)();
|
||||
} GuMaxAlign;
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <malloc.h>
|
||||
#define gu_alloca(N) alloca(N)
|
||||
#else
|
||||
#define gu_alloca(N) \
|
||||
(((union { GuMaxAlign align_; uint8_t buf_[N]; }){{0}}).buf_)
|
||||
|
||||
#endif
|
||||
|
||||
// For Doxygen
|
||||
#define GU_PRIVATE /** @private */
|
||||
|
||||
@@ -7,6 +7,9 @@
|
||||
|
||||
typedef struct GuMapData GuMapData;
|
||||
|
||||
#define SKIP_DELETED 1
|
||||
#define SKIP_NONE 2
|
||||
|
||||
struct GuMapData {
|
||||
uint8_t* keys;
|
||||
uint8_t* values;
|
||||
@@ -19,6 +22,7 @@ struct GuMap {
|
||||
GuHasher* hasher;
|
||||
size_t key_size;
|
||||
size_t value_size;
|
||||
size_t cell_size; // cell_size = GU_MAX(value_size,sizeof(uint8_t))
|
||||
const void* default_value;
|
||||
GuMapData data;
|
||||
|
||||
@@ -30,9 +34,7 @@ gu_map_finalize(GuFinalizer* fin)
|
||||
{
|
||||
GuMap* map = gu_container(fin, GuMap, fin);
|
||||
gu_mem_buf_free(map->data.keys);
|
||||
if (map->value_size) {
|
||||
gu_mem_buf_free(map->data.values);
|
||||
}
|
||||
gu_mem_buf_free(map->data.values);
|
||||
}
|
||||
|
||||
static const GuWord gu_map_empty_key = 0;
|
||||
@@ -68,7 +70,7 @@ gu_map_entry_is_free(GuMap* map, GuMapData* data, size_t idx)
|
||||
}
|
||||
|
||||
static bool
|
||||
gu_map_lookup(GuMap* map, const void* key, size_t* idx_out)
|
||||
gu_map_lookup(GuMap* map, const void* key, uint8_t del, size_t* idx_out)
|
||||
{
|
||||
size_t n = map->data.n_entries;
|
||||
if (map->hasher == gu_addr_hasher) {
|
||||
@@ -78,13 +80,17 @@ gu_map_lookup(GuMap* map, const void* key, size_t* idx_out)
|
||||
while (true) {
|
||||
const void* entry_key =
|
||||
((const void**)map->data.keys)[idx];
|
||||
|
||||
if (entry_key == NULL && map->data.zero_idx != idx) {
|
||||
*idx_out = idx;
|
||||
return false;
|
||||
if (map->data.values[idx * map->cell_size] != del) { //skip deleted
|
||||
*idx_out = idx;
|
||||
return false;
|
||||
}
|
||||
} else if (entry_key == key) {
|
||||
*idx_out = idx;
|
||||
return true;
|
||||
}
|
||||
|
||||
idx = (idx + offset) % n;
|
||||
}
|
||||
} else if (map->hasher == gu_word_hasher) {
|
||||
@@ -156,33 +162,18 @@ gu_map_resize(GuMap* map, size_t req_entries)
|
||||
size_t key_size = map->key_size;
|
||||
size_t key_alloc = 0;
|
||||
data->keys = gu_mem_buf_alloc(req_entries * key_size, &key_alloc);
|
||||
memset(data->keys, 0, key_alloc);
|
||||
|
||||
size_t value_size = map->value_size;
|
||||
size_t value_alloc = 0;
|
||||
if (value_size) {
|
||||
data->values = gu_mem_buf_alloc(req_entries * value_size,
|
||||
&value_alloc);
|
||||
memset(data->values, 0, value_alloc);
|
||||
}
|
||||
|
||||
data->n_entries = gu_twin_prime_inf(value_size ?
|
||||
GU_MIN(key_alloc / key_size,
|
||||
value_alloc / value_size)
|
||||
: key_alloc / key_size);
|
||||
if (map->hasher == gu_addr_hasher) {
|
||||
for (size_t i = 0; i < data->n_entries; i++) {
|
||||
((const void**)data->keys)[i] = NULL;
|
||||
}
|
||||
} else if (map->hasher == gu_string_hasher) {
|
||||
for (size_t i = 0; i < data->n_entries; i++) {
|
||||
((GuString*)data->keys)[i] = NULL;
|
||||
}
|
||||
} else {
|
||||
memset(data->keys, 0, key_alloc);
|
||||
}
|
||||
size_t cell_size = map->cell_size;
|
||||
data->values = gu_mem_buf_alloc(req_entries * cell_size, &value_alloc);
|
||||
memset(data->values, 0, value_alloc);
|
||||
|
||||
data->n_entries = gu_twin_prime_inf(
|
||||
GU_MIN(key_alloc / key_size,
|
||||
value_alloc / cell_size));
|
||||
gu_assert(data->n_entries > data->n_occupied);
|
||||
|
||||
|
||||
data->n_occupied = 0;
|
||||
data->zero_idx = SIZE_MAX;
|
||||
|
||||
@@ -196,16 +187,14 @@ gu_map_resize(GuMap* map, size_t req_entries)
|
||||
} else if (map->hasher == gu_string_hasher) {
|
||||
old_key = (void*) *(GuString*)old_key;
|
||||
}
|
||||
void* old_value = &old_data.values[i * value_size];
|
||||
void* old_value = &old_data.values[i * cell_size];
|
||||
|
||||
memcpy(gu_map_insert(map, old_key),
|
||||
old_value, map->value_size);
|
||||
}
|
||||
|
||||
gu_mem_buf_free(old_data.keys);
|
||||
if (value_size) {
|
||||
gu_mem_buf_free(old_data.values);
|
||||
}
|
||||
gu_mem_buf_free(old_data.values);
|
||||
}
|
||||
|
||||
|
||||
@@ -226,9 +215,9 @@ GU_API void*
|
||||
gu_map_find(GuMap* map, const void* key)
|
||||
{
|
||||
size_t idx;
|
||||
bool found = gu_map_lookup(map, key, &idx);
|
||||
bool found = gu_map_lookup(map, key, SKIP_DELETED, &idx);
|
||||
if (found) {
|
||||
return &map->data.values[idx * map->value_size];
|
||||
return &map->data.values[idx * map->cell_size];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
@@ -244,7 +233,7 @@ GU_API const void*
|
||||
gu_map_find_key(GuMap* map, const void* key)
|
||||
{
|
||||
size_t idx;
|
||||
bool found = gu_map_lookup(map, key, &idx);
|
||||
bool found = gu_map_lookup(map, key, SKIP_DELETED, &idx);
|
||||
if (found) {
|
||||
return &map->data.keys[idx * map->key_size];
|
||||
}
|
||||
@@ -255,17 +244,17 @@ GU_API bool
|
||||
gu_map_has(GuMap* ht, const void* key)
|
||||
{
|
||||
size_t idx;
|
||||
return gu_map_lookup(ht, key, &idx);
|
||||
return gu_map_lookup(ht, key, SKIP_DELETED, &idx);
|
||||
}
|
||||
|
||||
GU_API void*
|
||||
gu_map_insert(GuMap* map, const void* key)
|
||||
{
|
||||
size_t idx;
|
||||
bool found = gu_map_lookup(map, key, &idx);
|
||||
bool found = gu_map_lookup(map, key, SKIP_NONE, &idx);
|
||||
if (!found) {
|
||||
if (gu_map_maybe_resize(map)) {
|
||||
found = gu_map_lookup(map, key, &idx);
|
||||
found = gu_map_lookup(map, key, SKIP_NONE, &idx);
|
||||
gu_assert(!found);
|
||||
}
|
||||
if (map->hasher == gu_addr_hasher) {
|
||||
@@ -277,7 +266,7 @@ gu_map_insert(GuMap* map, const void* key)
|
||||
key, map->key_size);
|
||||
}
|
||||
if (map->default_value) {
|
||||
memcpy(&map->data.values[idx * map->value_size],
|
||||
memcpy(&map->data.values[idx * map->cell_size],
|
||||
map->default_value, map->value_size);
|
||||
}
|
||||
if (gu_map_entry_is_free(map, &map->data, idx)) {
|
||||
@@ -286,7 +275,32 @@ gu_map_insert(GuMap* map, const void* key)
|
||||
}
|
||||
map->data.n_occupied++;
|
||||
}
|
||||
return &map->data.values[idx * map->value_size];
|
||||
return &map->data.values[idx * map->cell_size];
|
||||
}
|
||||
|
||||
GU_API void
|
||||
gu_map_delete(GuMap* map, const void* key)
|
||||
{
|
||||
size_t idx;
|
||||
bool found = gu_map_lookup(map, key, SKIP_NONE, &idx);
|
||||
if (found) {
|
||||
if (map->hasher == gu_addr_hasher) {
|
||||
((const void**)map->data.keys)[idx] = NULL;
|
||||
} else if (map->hasher == gu_string_hasher) {
|
||||
((GuString*)map->data.keys)[idx] = NULL;
|
||||
} else {
|
||||
memset(&map->data.keys[idx * map->key_size],
|
||||
0, map->key_size);
|
||||
}
|
||||
map->data.values[idx * map->cell_size] = SKIP_DELETED;
|
||||
|
||||
if (gu_map_buf_is_zero(&map->data.keys[idx * map->key_size],
|
||||
map->key_size)) {
|
||||
map->data.zero_idx = SIZE_MAX;
|
||||
}
|
||||
|
||||
map->data.n_occupied--;
|
||||
}
|
||||
}
|
||||
|
||||
GU_API void
|
||||
@@ -297,7 +311,7 @@ gu_map_iter(GuMap* map, GuMapItor* itor, GuExn* err)
|
||||
continue;
|
||||
}
|
||||
const void* key = &map->data.keys[i * map->key_size];
|
||||
void* value = &map->data.values[i * map->value_size];
|
||||
void* value = &map->data.values[i * map->cell_size];
|
||||
if (map->hasher == gu_addr_hasher) {
|
||||
key = *(const void* const*) key;
|
||||
} else if (map->hasher == gu_string_hasher) {
|
||||
@@ -307,47 +321,30 @@ gu_map_iter(GuMap* map, GuMapItor* itor, GuExn* err)
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
GuEnum en;
|
||||
GuMap* ht;
|
||||
size_t i;
|
||||
GuMapKeyValue x;
|
||||
} GuMapEnum;
|
||||
|
||||
static void
|
||||
gu_map_enum_next(GuEnum* self, void* to, GuPool* pool)
|
||||
GU_API bool
|
||||
gu_map_next(GuMap* map, size_t* pi, void** pkey, void* pvalue)
|
||||
{
|
||||
*((GuMapKeyValue**) to) = NULL;
|
||||
|
||||
size_t i;
|
||||
GuMapEnum* en = (GuMapEnum*) self;
|
||||
for (i = en->i; i < en->ht->data.n_entries; i++) {
|
||||
if (gu_map_entry_is_free(en->ht, &en->ht->data, i)) {
|
||||
while (*pi < map->data.n_entries) {
|
||||
if (gu_map_entry_is_free(map, &map->data, *pi)) {
|
||||
(*pi)++;
|
||||
continue;
|
||||
}
|
||||
en->x.key = &en->ht->data.keys[i * en->ht->key_size];
|
||||
en->x.value = &en->ht->data.values[i * en->ht->value_size];
|
||||
if (en->ht->hasher == gu_addr_hasher) {
|
||||
en->x.key = *(const void* const*) en->x.key;
|
||||
} else if (en->ht->hasher == gu_string_hasher) {
|
||||
en->x.key = *(GuString*) en->x.key;
|
||||
|
||||
*pkey = &map->data.keys[*pi * map->key_size];
|
||||
if (map->hasher == gu_addr_hasher) {
|
||||
*pkey = *(void**) *pkey;
|
||||
} else if (map->hasher == gu_string_hasher) {
|
||||
*pkey = *(void**) *pkey;
|
||||
}
|
||||
|
||||
*((GuMapKeyValue**) to) = &en->x;
|
||||
break;
|
||||
}
|
||||
|
||||
en->i = i+1;
|
||||
}
|
||||
memcpy(pvalue, &map->data.values[*pi * map->cell_size],
|
||||
map->value_size);
|
||||
|
||||
GU_API GuEnum*
|
||||
gu_map_enum(GuMap* ht, GuPool* pool)
|
||||
{
|
||||
GuMapEnum* en = gu_new(GuMapEnum, pool);
|
||||
en->en.next = gu_map_enum_next;
|
||||
en->ht = ht;
|
||||
en->i = 0;
|
||||
return &en->en;
|
||||
(*pi)++;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
GU_API size_t
|
||||
@@ -363,8 +360,6 @@ gu_map_count(GuMap* map)
|
||||
return count;
|
||||
}
|
||||
|
||||
static const uint8_t gu_map_no_values[1] = { 0 };
|
||||
|
||||
GU_API GuMap*
|
||||
gu_make_map(size_t key_size, GuHasher* hasher,
|
||||
size_t value_size, const void* default_value,
|
||||
@@ -375,7 +370,7 @@ gu_make_map(size_t key_size, GuHasher* hasher,
|
||||
.n_occupied = 0,
|
||||
.n_entries = 0,
|
||||
.keys = NULL,
|
||||
.values = value_size ? NULL : (uint8_t*) gu_map_no_values,
|
||||
.values = NULL,
|
||||
.zero_idx = SIZE_MAX
|
||||
};
|
||||
GuMap* map = gu_new(GuMap, pool);
|
||||
@@ -384,6 +379,7 @@ gu_make_map(size_t key_size, GuHasher* hasher,
|
||||
map->data = data;
|
||||
map->key_size = key_size;
|
||||
map->value_size = value_size;
|
||||
map->cell_size = GU_MAX(value_size,sizeof(uint8_t));
|
||||
map->fin.fn = gu_map_finalize;
|
||||
gu_pool_finally(pool, &map->fin);
|
||||
|
||||
|
||||
@@ -62,6 +62,9 @@ gu_map_has(GuMap* ht, const void* key);
|
||||
GU_API_DECL void*
|
||||
gu_map_insert(GuMap* ht, const void* key);
|
||||
|
||||
GU_API_DECL void
|
||||
gu_map_delete(GuMap* ht, const void* key);
|
||||
|
||||
#define gu_map_put(MAP, KEYP, V, VAL) \
|
||||
GU_BEGIN \
|
||||
V* gu_map_put_p_ = gu_map_insert((MAP), (KEYP)); \
|
||||
@@ -71,13 +74,8 @@ gu_map_insert(GuMap* ht, const void* key);
|
||||
GU_API_DECL void
|
||||
gu_map_iter(GuMap* ht, GuMapItor* itor, GuExn* err);
|
||||
|
||||
typedef struct {
|
||||
const void* key;
|
||||
void* value;
|
||||
} GuMapKeyValue;
|
||||
|
||||
GU_API_DECL GuEnum*
|
||||
gu_map_enum(GuMap* ht, GuPool* pool);
|
||||
GU_API bool
|
||||
gu_map_next(GuMap* map, size_t* pi, void** pkey, void* pvalue);
|
||||
|
||||
typedef GuMap GuIntMap;
|
||||
|
||||
|
||||
@@ -8,6 +8,10 @@
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
#if defined(__MINGW32__) || defined(_MSC_VER)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
@@ -108,6 +112,39 @@ gu_mem_buf_alloc(size_t min_size, size_t* real_size_out)
|
||||
return gu_mem_buf_realloc(NULL, min_size, real_size_out);
|
||||
}
|
||||
|
||||
#if defined(__MINGW32__) || defined(_MSC_VER)
|
||||
#include <windows.h>
|
||||
|
||||
static int
|
||||
getpagesize()
|
||||
{
|
||||
SYSTEM_INFO system_info;
|
||||
GetSystemInfo(&system_info);
|
||||
return system_info.dwPageSize;
|
||||
}
|
||||
#endif
|
||||
|
||||
GU_API void*
|
||||
gu_mem_page_alloc(size_t min_size, size_t* real_size_out)
|
||||
{
|
||||
size_t page_size = getpagesize();
|
||||
size_t size = ((min_size + page_size - 1) / page_size) * page_size;
|
||||
void *page = NULL;
|
||||
|
||||
#if defined(ANDROID)
|
||||
if ((page = memalign(page_size, size)) == NULL) {
|
||||
#elif defined(__MINGW32__) || defined(_MSC_VER)
|
||||
if ((page = malloc(size)) == NULL) {
|
||||
#else
|
||||
if (posix_memalign(&page, page_size, size) != 0) {
|
||||
#endif
|
||||
gu_fatal("Memory allocation failed");
|
||||
}
|
||||
|
||||
*real_size_out = size;
|
||||
return page;
|
||||
}
|
||||
|
||||
GU_API void
|
||||
gu_mem_buf_free(void* buf)
|
||||
{
|
||||
@@ -132,6 +169,7 @@ struct GuFinalizerNode {
|
||||
enum GuPoolType {
|
||||
GU_POOL_HEAP,
|
||||
GU_POOL_LOCAL,
|
||||
GU_POOL_PAGE,
|
||||
GU_POOL_MMAP
|
||||
};
|
||||
|
||||
@@ -180,6 +218,16 @@ gu_new_pool(void)
|
||||
return pool;
|
||||
}
|
||||
|
||||
GU_API GuPool*
|
||||
gu_new_page_pool(void)
|
||||
{
|
||||
size_t sz = GU_FLEX_SIZE(GuPool, init_buf, gu_mem_pool_initial_size);
|
||||
uint8_t* buf = gu_mem_page_alloc(sz, &sz);
|
||||
GuPool* pool = gu_init_pool(buf, sz);
|
||||
pool->type = GU_POOL_PAGE;
|
||||
return pool;
|
||||
}
|
||||
|
||||
GU_API GuPool*
|
||||
gu_mmap_pool(char* fpath, void* addr, size_t size, void**pptr)
|
||||
{
|
||||
@@ -238,7 +286,10 @@ gu_pool_expand(GuPool* pool, size_t req)
|
||||
gu_mem_chunk_max_size));
|
||||
gu_assert(real_req >= sizeof(GuMemChunk));
|
||||
size_t size = 0;
|
||||
GuMemChunk* chunk = gu_mem_buf_alloc(real_req, &size);
|
||||
GuMemChunk* chunk =
|
||||
(pool->type == GU_POOL_PAGE)
|
||||
? gu_mem_page_alloc(real_req, &size)
|
||||
: gu_mem_buf_alloc(real_req, &size);
|
||||
chunk->next = pool->chunks;
|
||||
pool->chunks = chunk;
|
||||
pool->curr_buf = (uint8_t*) chunk;
|
||||
@@ -309,6 +360,7 @@ gu_malloc_prefixed(GuPool* pool, size_t pre_align, size_t pre_size,
|
||||
size_t full_size = gu_mem_advance(offsetof(GuMemChunk, data),
|
||||
pre_align, pre_size, align, size);
|
||||
if (full_size > gu_mem_max_shared_alloc &&
|
||||
pool->type != GU_POOL_PAGE &&
|
||||
pool->type != GU_POOL_MMAP) {
|
||||
GuMemChunk* chunk = gu_mem_alloc(full_size);
|
||||
chunk->next = pool->chunks;
|
||||
|
||||
@@ -55,6 +55,11 @@ gu_local_pool_(uint8_t* init_buf, size_t sz);
|
||||
* should not be used in the bodies of recursive functions.
|
||||
*/
|
||||
|
||||
/// Create a pool where each chunk is corresponds to one or
|
||||
/// more pages.
|
||||
GU_API_DECL GuPool*
|
||||
gu_new_page_pool(void);
|
||||
|
||||
/// Create a pool stored in a memory mapped file.
|
||||
GU_API_DECL GuPool*
|
||||
gu_mmap_pool(char* fpath, void* addr, size_t size, void**pptr);
|
||||
@@ -198,6 +203,9 @@ gu_mem_buf_realloc(
|
||||
size_t min_size,
|
||||
size_t* real_size_out);
|
||||
|
||||
/// Allocate enough memory pages to contain min_size bytes.
|
||||
GU_API_DECL void*
|
||||
gu_mem_page_alloc(size_t min_size, size_t* real_size_out);
|
||||
|
||||
/// Free a memory buffer.
|
||||
GU_API_DECL void
|
||||
|
||||
@@ -100,6 +100,11 @@ gu_seq_free(GuSeq* seq)
|
||||
gu_mem_buf_free(seq);
|
||||
}
|
||||
|
||||
static void
|
||||
gu_dummy_finalizer(GuFinalizer* self)
|
||||
{
|
||||
}
|
||||
|
||||
GU_API void
|
||||
gu_buf_require(GuBuf* buf, size_t req_len)
|
||||
{
|
||||
@@ -109,7 +114,9 @@ gu_buf_require(GuBuf* buf, size_t req_len)
|
||||
|
||||
size_t req_size = sizeof(GuSeq) + buf->elem_size * req_len;
|
||||
size_t real_size;
|
||||
|
||||
|
||||
gu_require(buf->fin.fn != gu_dummy_finalizer);
|
||||
|
||||
if (buf->seq == NULL || buf->seq == gu_empty_seq()) {
|
||||
buf->seq = gu_mem_buf_alloc(req_size, &real_size);
|
||||
buf->seq->len = 0;
|
||||
@@ -164,6 +171,24 @@ gu_buf_freeze(GuBuf* buf, GuPool* pool)
|
||||
return seq;
|
||||
}
|
||||
|
||||
GU_API void
|
||||
gu_buf_evacuate(GuBuf* buf, GuPool* pool)
|
||||
{
|
||||
if (buf->seq != gu_empty_seq()) {
|
||||
size_t len = gu_buf_length(buf);
|
||||
|
||||
GuSeq* seq = gu_make_seq(buf->elem_size, len, pool);
|
||||
void* bufdata = gu_buf_data(buf);
|
||||
void* seqdata = gu_seq_data(seq);
|
||||
memcpy(seqdata, bufdata, buf->elem_size * len);
|
||||
gu_mem_buf_free(buf->seq);
|
||||
|
||||
buf->seq = seq;
|
||||
buf->fin.fn = gu_dummy_finalizer;
|
||||
buf->avail_len = len;
|
||||
}
|
||||
}
|
||||
|
||||
GU_API void*
|
||||
gu_buf_insert(GuBuf* buf, size_t index)
|
||||
{
|
||||
@@ -335,13 +360,8 @@ GU_API void
|
||||
gu_buf_heap_pop(GuBuf *buf, GuOrder *order, void* data_out)
|
||||
{
|
||||
const void* last = gu_buf_trim(buf); // raises an error if empty
|
||||
|
||||
if (gu_buf_length(buf) > 0) {
|
||||
memcpy(data_out, buf->seq->data, buf->elem_size);
|
||||
gu_heap_siftup(buf, order, last, 0);
|
||||
} else {
|
||||
memcpy(data_out, last, buf->elem_size);
|
||||
}
|
||||
memcpy(data_out, buf->seq->data, buf->elem_size);
|
||||
gu_heap_siftup(buf, order, last, 0);
|
||||
}
|
||||
|
||||
GU_API void
|
||||
|
||||
@@ -182,6 +182,9 @@ gu_buf_heapify(GuBuf *buf, GuOrder *order);
|
||||
|
||||
GU_API_DECL GuSeq*
|
||||
gu_buf_freeze(GuBuf* buf, GuPool* pool);
|
||||
|
||||
GU_API_DECL void
|
||||
gu_buf_evacuate(GuBuf* buf, GuPool* pool);
|
||||
#endif // GU_SEQ_H_
|
||||
|
||||
#ifdef GU_STRING_H_
|
||||
|
||||
@@ -344,8 +344,9 @@ struct PgfCCat {
|
||||
PgfCncFuns* linrefs;
|
||||
size_t n_synprods;
|
||||
PgfProductionSeq* prods;
|
||||
float viterbi_prob;
|
||||
prob_t viterbi_prob;
|
||||
int fid;
|
||||
int chunk_count;
|
||||
PgfItemConts* conts;
|
||||
struct PgfAnswers* answers;
|
||||
GuFinalizer fin[0];
|
||||
|
||||
@@ -198,16 +198,16 @@ pgf_literal_hash(GuHash h, PgfLiteral lit);
|
||||
PGF_API_DECL GuHash
|
||||
pgf_expr_hash(GuHash h, PgfExpr e);
|
||||
|
||||
PGF_API size_t
|
||||
PGF_API_DECL size_t
|
||||
pgf_expr_size(PgfExpr expr);
|
||||
|
||||
PGF_API GuSeq*
|
||||
PGF_API_DECL GuSeq*
|
||||
pgf_expr_functions(PgfExpr expr, GuPool* pool);
|
||||
|
||||
PGF_API PgfExpr
|
||||
PGF_API_DECL PgfExpr
|
||||
pgf_expr_substitute(PgfExpr expr, GuSeq* meta_values, GuPool* pool);
|
||||
|
||||
PGF_API PgfType*
|
||||
PGF_API_DECL PgfType*
|
||||
pgf_type_substitute(PgfType* type, GuSeq* meta_values, GuPool* pool);
|
||||
|
||||
typedef struct PgfPrintContext PgfPrintContext;
|
||||
|
||||
@@ -5,9 +5,6 @@
|
||||
#include <pgf/reasoner.h>
|
||||
#include <pgf/reader.h>
|
||||
#include "lightning.h"
|
||||
#if defined(__MINGW32__) || defined(_MSC_VER)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
//#define PGF_JIT_DEBUG
|
||||
|
||||
@@ -43,18 +40,6 @@ typedef struct {
|
||||
#define JIT_VSTATE JIT_V1
|
||||
#define JIT_VCLOS JIT_V2
|
||||
|
||||
#if defined(__MINGW32__) || defined(_MSC_VER)
|
||||
#include <windows.h>
|
||||
|
||||
static int
|
||||
getpagesize()
|
||||
{
|
||||
SYSTEM_INFO system_info;
|
||||
GetSystemInfo(&system_info);
|
||||
return system_info.dwPageSize;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static void
|
||||
pgf_jit_finalize_page(GuFinalizer* self)
|
||||
@@ -65,19 +50,8 @@ pgf_jit_finalize_page(GuFinalizer* self)
|
||||
static void
|
||||
pgf_jit_alloc_page(PgfReader* rdr)
|
||||
{
|
||||
void *page;
|
||||
|
||||
size_t page_size = getpagesize();
|
||||
|
||||
#if defined(ANDROID)
|
||||
if ((page = memalign(page_size, page_size)) == NULL) {
|
||||
#elif defined(__MINGW32__) || defined(_MSC_VER)
|
||||
if ((page = malloc(page_size)) == NULL) {
|
||||
#else
|
||||
if (posix_memalign(&page, page_size, page_size) != 0) {
|
||||
#endif
|
||||
gu_fatal("Memory allocation failed");
|
||||
}
|
||||
size_t page_size;
|
||||
void *page = gu_mem_page_alloc(sizeof(GuFinalizer), &page_size);
|
||||
|
||||
GuFinalizer* fin = page;
|
||||
fin->fn = pgf_jit_finalize_page;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -162,6 +162,22 @@ PGF_API_DECL void
|
||||
pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
|
||||
PgfMorphoCallback* callback, GuExn* err);
|
||||
|
||||
typedef struct {
|
||||
size_t pos;
|
||||
GuString ptr;
|
||||
} PgfCohortSpot;
|
||||
|
||||
typedef struct {
|
||||
PgfCohortSpot start;
|
||||
PgfCohortSpot end;
|
||||
GuBuf* buf;
|
||||
} PgfCohortRange;
|
||||
|
||||
PGF_API_DECL GuEnum*
|
||||
pgf_lookup_cohorts(PgfConcr *concr, GuString sentence,
|
||||
PgfMorphoCallback* callback,
|
||||
GuPool* pool, GuExn* err);
|
||||
|
||||
typedef struct PgfFullFormEntry PgfFullFormEntry;
|
||||
|
||||
PGF_API_DECL GuEnum*
|
||||
|
||||
@@ -98,6 +98,74 @@ pgf_print_fid(int fid, GuOut* out, GuExn* err)
|
||||
gu_printf(out, err, "C%d", fid);
|
||||
}
|
||||
|
||||
PGF_INTERNAL void
|
||||
pgf_print_production_args(PgfPArgs* args,
|
||||
GuOut* out, GuExn* err)
|
||||
{
|
||||
size_t n_args = gu_seq_length(args);
|
||||
for (size_t j = 0; j < n_args; j++) {
|
||||
if (j > 0)
|
||||
gu_putc(',',out,err);
|
||||
|
||||
PgfPArg arg = gu_seq_get(args, PgfPArg, j);
|
||||
|
||||
if (arg.hypos != NULL &&
|
||||
gu_seq_length(arg.hypos) > 0) {
|
||||
size_t n_hypos = gu_seq_length(arg.hypos);
|
||||
for (size_t k = 0; k < n_hypos; k++) {
|
||||
PgfCCat *hypo = gu_seq_get(arg.hypos, PgfCCat*, k);
|
||||
pgf_print_fid(hypo->fid, out, err);
|
||||
gu_putc(' ',out,err);
|
||||
}
|
||||
gu_puts("-> ",out,err);
|
||||
}
|
||||
|
||||
pgf_print_fid(arg.ccat->fid, out, err);
|
||||
}
|
||||
}
|
||||
|
||||
PGF_INTERNAL void
|
||||
pgf_print_production(int fid, PgfProduction prod,
|
||||
GuOut *out, GuExn* err)
|
||||
{
|
||||
pgf_print_fid(fid, out, err);
|
||||
gu_puts(" -> ", out, err);
|
||||
|
||||
GuVariantInfo i = gu_variant_open(prod);
|
||||
switch (i.tag) {
|
||||
case PGF_PRODUCTION_APPLY: {
|
||||
PgfProductionApply* papp = i.data;
|
||||
gu_printf(out,err,"F%d(",papp->fun->funid);
|
||||
if (papp->fun->ep != NULL) {
|
||||
pgf_print_expr(papp->fun->ep->expr, NULL, 0, out, err);
|
||||
} else {
|
||||
PgfPArg* parg = gu_seq_index(papp->args, PgfPArg, 0);
|
||||
gu_printf(out,err,"linref %s", parg->ccat->cnccat->abscat->name);
|
||||
}
|
||||
gu_printf(out,err,")[");
|
||||
pgf_print_production_args(papp->args,out,err);
|
||||
gu_printf(out,err,"]\n");
|
||||
break;
|
||||
}
|
||||
case PGF_PRODUCTION_COERCE: {
|
||||
PgfProductionCoerce* pcoerce = i.data;
|
||||
gu_puts("_[",out,err);
|
||||
pgf_print_fid(pcoerce->coerce->fid, out, err);
|
||||
gu_puts("]\n",out,err);
|
||||
break;
|
||||
}
|
||||
case PGF_PRODUCTION_EXTERN: {
|
||||
PgfProductionExtern* pext = i.data;
|
||||
gu_printf(out,err,"<extern>(");
|
||||
pgf_print_expr(pext->ep->expr, NULL, 0, out, err);
|
||||
gu_printf(out,err,")[]\n");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_print_productions(GuMapItor* fn, const void* key, void* value,
|
||||
GuExn* err)
|
||||
@@ -111,48 +179,7 @@ pgf_print_productions(GuMapItor* fn, const void* key, void* value,
|
||||
size_t n_prods = gu_seq_length(ccat->prods);
|
||||
for (size_t i = 0; i < n_prods; i++) {
|
||||
PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i);
|
||||
|
||||
gu_puts(" ", out, err);
|
||||
pgf_print_fid(fid, out, err);
|
||||
gu_puts(" -> ", out, err);
|
||||
|
||||
GuVariantInfo i = gu_variant_open(prod);
|
||||
switch (i.tag) {
|
||||
case PGF_PRODUCTION_APPLY: {
|
||||
PgfProductionApply* papp = i.data;
|
||||
gu_printf(out,err,"F%d[",papp->fun->funid);
|
||||
size_t n_args = gu_seq_length(papp->args);
|
||||
for (size_t j = 0; j < n_args; j++) {
|
||||
if (j > 0)
|
||||
gu_putc(',',out,err);
|
||||
|
||||
PgfPArg arg = gu_seq_get(papp->args, PgfPArg, j);
|
||||
|
||||
if (arg.hypos != NULL) {
|
||||
size_t n_hypos = gu_seq_length(arg.hypos);
|
||||
for (size_t k = 0; k < n_hypos; k++) {
|
||||
if (k > 0)
|
||||
gu_putc(' ',out,err);
|
||||
PgfCCat *hypo = gu_seq_get(arg.hypos, PgfCCat*, k);
|
||||
pgf_print_fid(hypo->fid, out, err);
|
||||
}
|
||||
}
|
||||
|
||||
pgf_print_fid(arg.ccat->fid, out, err);
|
||||
}
|
||||
gu_printf(out,err,"]\n");
|
||||
break;
|
||||
}
|
||||
case PGF_PRODUCTION_COERCE: {
|
||||
PgfProductionCoerce* pcoerce = i.data;
|
||||
gu_puts("_[", out, err);
|
||||
pgf_print_fid(pcoerce->coerce->fid, out, err);
|
||||
gu_puts("]\n", out, err);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
pgf_print_production(fid, prod, out, err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -328,16 +328,20 @@ pgf_read_patt(PgfReader* rdr)
|
||||
uint8_t tag = pgf_read_tag(rdr);
|
||||
switch (tag) {
|
||||
case PGF_PATT_APP: {
|
||||
PgfCId ctor = pgf_read_cid(rdr, rdr->opool);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
|
||||
size_t n_args = pgf_read_len(rdr);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
|
||||
PgfPattApp *papp =
|
||||
gu_new_variant(PGF_PATT_APP,
|
||||
PgfPattApp,
|
||||
&patt, rdr->opool);
|
||||
papp->ctor = pgf_read_cid(rdr, rdr->opool);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
|
||||
papp->n_args = pgf_read_len(rdr);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
|
||||
gu_new_flex_variant(PGF_PATT_APP,
|
||||
PgfPattApp,
|
||||
args, n_args,
|
||||
&patt, rdr->opool);
|
||||
papp->ctor = ctor;
|
||||
papp->n_args = n_args;
|
||||
|
||||
for (size_t i = 0; i < papp->n_args; i++) {
|
||||
papp->args[i] = pgf_read_patt(rdr);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
@@ -840,6 +844,7 @@ pgf_read_fid(PgfReader* rdr, PgfConcr* concr)
|
||||
ccat->prods = NULL;
|
||||
ccat->viterbi_prob = 0;
|
||||
ccat->fid = fid;
|
||||
ccat->chunk_count = 1;
|
||||
ccat->conts = NULL;
|
||||
ccat->answers = NULL;
|
||||
|
||||
@@ -1077,6 +1082,7 @@ pgf_read_cnccat(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, PgfCId name)
|
||||
ccat->prods = NULL;
|
||||
ccat->viterbi_prob = 0;
|
||||
ccat->fid = fid;
|
||||
ccat->chunk_count = 1;
|
||||
ccat->conts = NULL;
|
||||
ccat->answers = NULL;
|
||||
|
||||
|
||||
516
src/runtime/c/pgf/scanner.c
Normal file
516
src/runtime/c/pgf/scanner.c
Normal file
@@ -0,0 +1,516 @@
|
||||
#include <pgf/data.h>
|
||||
#include <pgf/expr.h>
|
||||
#include <pgf/linearizer.h>
|
||||
#include <gu/utf8.h>
|
||||
|
||||
PGF_INTERNAL int
|
||||
cmp_string(PgfCohortSpot* spot, GuString tok,
|
||||
bool case_sensitive)
|
||||
{
|
||||
for (;;) {
|
||||
GuUCS c2 = gu_utf8_decode((const uint8_t**) &tok);
|
||||
if (c2 == 0)
|
||||
return 0;
|
||||
|
||||
const uint8_t* p = (uint8_t*) spot->ptr;
|
||||
GuUCS c1 = gu_utf8_decode(&p);
|
||||
if (c1 == 0)
|
||||
return -1;
|
||||
|
||||
if (!case_sensitive) {
|
||||
c1 = gu_ucs_to_lower(c1);
|
||||
c2 = gu_ucs_to_lower(c2);
|
||||
}
|
||||
|
||||
if (c1 != c2)
|
||||
return (c1-c2);
|
||||
|
||||
spot->ptr = (GuString) p;
|
||||
spot->pos++;
|
||||
}
|
||||
}
|
||||
|
||||
PGF_INTERNAL bool
|
||||
skip_space(GuString* psent, size_t* ppos)
|
||||
{
|
||||
const uint8_t* p = (uint8_t*) *psent;
|
||||
if (!gu_ucs_is_space(gu_utf8_decode(&p)))
|
||||
return false;
|
||||
|
||||
*psent = (GuString) p;
|
||||
(*ppos)++;
|
||||
return true;
|
||||
}
|
||||
|
||||
PGF_INTERNAL int
|
||||
pgf_symbols_cmp(PgfCohortSpot* spot,
|
||||
PgfSymbols* syms, size_t* sym_idx,
|
||||
bool case_sensitive)
|
||||
{
|
||||
size_t n_syms = gu_seq_length(syms);
|
||||
while (*sym_idx < n_syms) {
|
||||
PgfSymbol sym = gu_seq_get(syms, PgfSymbol, *sym_idx);
|
||||
|
||||
if (*sym_idx > 0) {
|
||||
if (!skip_space(&spot->ptr,&spot->pos)) {
|
||||
if (*spot->ptr == 0)
|
||||
return -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
while (*spot->ptr != 0) {
|
||||
if (!skip_space(&spot->ptr,&spot->pos))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
GuVariantInfo inf = gu_variant_open(sym);
|
||||
switch (inf.tag) {
|
||||
case PGF_SYMBOL_CAT:
|
||||
case PGF_SYMBOL_LIT:
|
||||
case PGF_SYMBOL_VAR: {
|
||||
if (*spot->ptr == 0)
|
||||
return -1;
|
||||
return 1;
|
||||
}
|
||||
case PGF_SYMBOL_KS: {
|
||||
PgfSymbolKS* pks = inf.data;
|
||||
if (*spot->ptr == 0)
|
||||
return -1;
|
||||
|
||||
int cmp = cmp_string(spot,pks->token, case_sensitive);
|
||||
if (cmp != 0)
|
||||
return cmp;
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KP:
|
||||
case PGF_SYMBOL_BIND:
|
||||
case PGF_SYMBOL_NE:
|
||||
case PGF_SYMBOL_SOFT_BIND:
|
||||
case PGF_SYMBOL_SOFT_SPACE:
|
||||
case PGF_SYMBOL_CAPIT:
|
||||
case PGF_SYMBOL_ALL_CAPIT: {
|
||||
return -1;
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
|
||||
(*sym_idx)++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_morpho_iter(PgfProductionIdx* idx,
|
||||
PgfMorphoCallback* callback,
|
||||
GuExn* err)
|
||||
{
|
||||
size_t n_entries = gu_buf_length(idx);
|
||||
for (size_t i = 0; i < n_entries; i++) {
|
||||
PgfProductionIdxEntry* entry =
|
||||
gu_buf_index(idx, PgfProductionIdxEntry, i);
|
||||
|
||||
PgfCId lemma = entry->papp->fun->absfun->name;
|
||||
GuString analysis = entry->ccat->cnccat->labels[entry->lin_idx];
|
||||
|
||||
prob_t prob = entry->ccat->cnccat->abscat->prob +
|
||||
entry->papp->fun->absfun->ep.prob;
|
||||
callback->callback(callback,
|
||||
lemma, analysis, prob, err);
|
||||
if (!gu_ok(err))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
GuOrder order;
|
||||
bool case_sensitive;
|
||||
} PgfSequenceOrder;
|
||||
|
||||
PGF_INTERNAL bool
|
||||
pgf_is_case_sensitive(PgfConcr* concr)
|
||||
{
|
||||
PgfFlag* flag =
|
||||
gu_seq_binsearch(concr->cflags, pgf_flag_order, PgfFlag, "case_sensitive");
|
||||
if (flag != NULL) {
|
||||
GuVariantInfo inf = gu_variant_open(flag->value);
|
||||
if (inf.tag == PGF_LITERAL_STR) {
|
||||
PgfLiteralStr* lstr = inf.data;
|
||||
if (strcmp(lstr->val, "off") == 0)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static int
|
||||
pgf_sequence_cmp_fn(GuOrder* order, const void* p1, const void* p2)
|
||||
{
|
||||
PgfSequenceOrder* self = gu_container(order, PgfSequenceOrder, order);
|
||||
|
||||
PgfCohortSpot spot = {0, (GuString) p1};
|
||||
|
||||
const PgfSequence* sp2 = p2;
|
||||
|
||||
size_t sym_idx = 0;
|
||||
int res = pgf_symbols_cmp(&spot, sp2->syms, &sym_idx, self->case_sensitive);
|
||||
if (res == 0 && (*spot.ptr != 0 || sym_idx != gu_seq_length(sp2->syms))) {
|
||||
res = 1;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
PGF_API void
|
||||
pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
|
||||
PgfMorphoCallback* callback, GuExn* err)
|
||||
{
|
||||
if (concr->sequences == NULL) {
|
||||
GuExnData* err_data = gu_raise(err, PgfExn);
|
||||
if (err_data) {
|
||||
err_data->data = "The concrete syntax is not loaded";
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
size_t index = 0;
|
||||
PgfSequenceOrder order = { { pgf_sequence_cmp_fn },
|
||||
pgf_is_case_sensitive(concr) };
|
||||
if (gu_seq_binsearch_index(concr->sequences, &order.order,
|
||||
PgfSequence, (void*) sentence,
|
||||
&index)) {
|
||||
PgfSequence* seq = NULL;
|
||||
|
||||
/* If the match is case-insensitive then there might be more
|
||||
* matches around the current index. We must check the neighbour
|
||||
* sequences for matching as well.
|
||||
*/
|
||||
|
||||
if (!order.case_sensitive) {
|
||||
size_t i = index;
|
||||
while (i > 0) {
|
||||
seq = gu_seq_index(concr->sequences, PgfSequence, i-1);
|
||||
|
||||
size_t sym_idx = 0;
|
||||
PgfCohortSpot spot = {0, sentence};
|
||||
if (pgf_symbols_cmp(&spot, seq->syms, &sym_idx, order.case_sensitive) != 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (seq->idx != NULL)
|
||||
pgf_morpho_iter(seq->idx, callback, err);
|
||||
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
seq = gu_seq_index(concr->sequences, PgfSequence, index);
|
||||
if (seq->idx != NULL)
|
||||
pgf_morpho_iter(seq->idx, callback, err);
|
||||
|
||||
if (!order.case_sensitive) {
|
||||
size_t i = index+1;
|
||||
while (i < gu_seq_length(concr->sequences)) {
|
||||
seq = gu_seq_index(concr->sequences, PgfSequence, i);
|
||||
|
||||
size_t sym_idx = 0;
|
||||
PgfCohortSpot spot = {0, sentence};
|
||||
if (pgf_symbols_cmp(&spot, seq->syms, &sym_idx, order.case_sensitive) != 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (seq->idx != NULL)
|
||||
pgf_morpho_iter(seq->idx, callback, err);
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
GuEnum en;
|
||||
PgfConcr* concr;
|
||||
GuString sentence;
|
||||
GuString current;
|
||||
size_t len;
|
||||
PgfMorphoCallback* callback;
|
||||
GuExn* err;
|
||||
bool case_sensitive;
|
||||
GuBuf* spots;
|
||||
GuBuf* found;
|
||||
} PgfCohortsState;
|
||||
|
||||
static int
|
||||
cmp_cohort_spot(GuOrder* self, const void* a, const void* b)
|
||||
{
|
||||
PgfCohortSpot *s1 = (PgfCohortSpot *) a;
|
||||
PgfCohortSpot *s2 = (PgfCohortSpot *) b;
|
||||
|
||||
return (s1->ptr-s2->ptr);
|
||||
}
|
||||
|
||||
static GuOrder
|
||||
pgf_cohort_spot_order[1] = {{ cmp_cohort_spot }};
|
||||
|
||||
static void
|
||||
pgf_lookup_cohorts_helper(PgfCohortsState *state, PgfCohortSpot* spot,
|
||||
int i, int j, ptrdiff_t min, ptrdiff_t max)
|
||||
{
|
||||
// This is a variation of a binary search algorithm which
|
||||
// can retrieve all prefixes of a string with minimal
|
||||
// comparisons, i.e. there is no need to lookup every
|
||||
// prefix separately.
|
||||
|
||||
while (i <= j) {
|
||||
int k = (i+j) / 2;
|
||||
PgfSequence* seq = gu_seq_index(state->concr->sequences, PgfSequence, k);
|
||||
|
||||
PgfCohortSpot current = *spot;
|
||||
|
||||
size_t sym_idx = 0;
|
||||
int cmp = pgf_symbols_cmp(¤t, seq->syms, &sym_idx, state->case_sensitive);
|
||||
if (cmp < 0) {
|
||||
j = k-1;
|
||||
} else if (cmp > 0) {
|
||||
ptrdiff_t len = current.ptr - spot->ptr;
|
||||
|
||||
if (min <= len)
|
||||
pgf_lookup_cohorts_helper(state, spot, i, k-1, min, len);
|
||||
|
||||
if (len+1 <= max)
|
||||
pgf_lookup_cohorts_helper(state, spot, k+1, j, len+1, max);
|
||||
|
||||
break;
|
||||
} else {
|
||||
ptrdiff_t len = current.ptr - spot->ptr;
|
||||
|
||||
if (min <= len)
|
||||
pgf_lookup_cohorts_helper(state, spot, i, k-1, min, len);
|
||||
|
||||
if (seq->idx != NULL && gu_buf_length(seq->idx) > 0) {
|
||||
PgfCohortRange* range = gu_buf_insert(state->found, 0);
|
||||
range->start = *spot;
|
||||
range->end = current;
|
||||
range->buf = seq->idx;
|
||||
}
|
||||
|
||||
while (*current.ptr != 0) {
|
||||
if (!skip_space(¤t.ptr, ¤t.pos))
|
||||
break;
|
||||
}
|
||||
|
||||
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, ¤t);
|
||||
|
||||
if (len <= max)
|
||||
pgf_lookup_cohorts_helper(state, spot, k+1, j, len, max);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_lookup_cohorts_enum_next(GuEnum* self, void* to, GuPool* pool)
|
||||
{
|
||||
PgfCohortsState* state = gu_container(self, PgfCohortsState, en);
|
||||
|
||||
while (gu_buf_length(state->found) == 0 &&
|
||||
gu_buf_length(state->spots) > 0) {
|
||||
PgfCohortSpot spot;
|
||||
gu_buf_heap_pop(state->spots, pgf_cohort_spot_order, &spot);
|
||||
|
||||
if (spot.ptr == state->current)
|
||||
continue;
|
||||
|
||||
if (*spot.ptr == 0)
|
||||
break;
|
||||
|
||||
pgf_lookup_cohorts_helper
|
||||
(state, &spot,
|
||||
0, gu_seq_length(state->concr->sequences)-1,
|
||||
1, (state->sentence+state->len)-spot.ptr);
|
||||
|
||||
if (gu_buf_length(state->found) == 0) {
|
||||
// skip one character and try again
|
||||
gu_utf8_decode((const uint8_t**) &spot.ptr);
|
||||
spot.pos++;
|
||||
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &spot);
|
||||
}
|
||||
}
|
||||
|
||||
PgfCohortRange* pRes = (PgfCohortRange*)to;
|
||||
|
||||
if (gu_buf_length(state->found) == 0) {
|
||||
pRes->start.pos = 0;
|
||||
pRes->start.ptr = NULL;
|
||||
pRes->end.pos = 0;
|
||||
pRes->end.ptr = NULL;
|
||||
pRes->buf = NULL;
|
||||
state->current = NULL;
|
||||
return;
|
||||
} else do {
|
||||
*pRes = gu_buf_pop(state->found, PgfCohortRange);
|
||||
state->current = pRes->start.ptr;
|
||||
pgf_morpho_iter(pRes->buf, state->callback, state->err);
|
||||
} while (gu_buf_length(state->found) > 0 &&
|
||||
gu_buf_index_last(state->found, PgfCohortRange)->end.ptr == pRes->end.ptr);
|
||||
|
||||
}
|
||||
|
||||
PGF_API GuEnum*
|
||||
pgf_lookup_cohorts(PgfConcr *concr, GuString sentence,
|
||||
PgfMorphoCallback* callback,
|
||||
GuPool* pool, GuExn* err)
|
||||
{
|
||||
if (concr->sequences == NULL) {
|
||||
GuExnData* err_data = gu_raise(err, PgfExn);
|
||||
if (err_data) {
|
||||
err_data->data = "The concrete syntax is not loaded";
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
PgfCohortsState* state = gu_new(PgfCohortsState, pool);
|
||||
state->en.next = pgf_lookup_cohorts_enum_next;
|
||||
state->concr = concr;
|
||||
state->sentence= sentence;
|
||||
state->len = strlen(sentence);
|
||||
state->callback= callback;
|
||||
state->err = err;
|
||||
state->case_sensitive = pgf_is_case_sensitive(concr);
|
||||
state->spots = gu_new_buf(PgfCohortSpot, pool);
|
||||
state->found = gu_new_buf(PgfCohortRange, pool);
|
||||
|
||||
PgfCohortSpot spot = {0,sentence};
|
||||
while (*spot.ptr != 0) {
|
||||
if (!skip_space(&spot.ptr, &spot.pos))
|
||||
break;
|
||||
}
|
||||
|
||||
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &spot);
|
||||
|
||||
return &state->en;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
GuEnum en;
|
||||
PgfSequences* sequences;
|
||||
GuString prefix;
|
||||
size_t seq_idx;
|
||||
bool case_sensitive;
|
||||
} PgfFullFormState;
|
||||
|
||||
struct PgfFullFormEntry {
|
||||
GuString tokens;
|
||||
PgfProductionIdx* idx;
|
||||
};
|
||||
|
||||
static void
|
||||
gu_fullform_enum_next(GuEnum* self, void* to, GuPool* pool)
|
||||
{
|
||||
PgfFullFormState* st = gu_container(self, PgfFullFormState, en);
|
||||
PgfFullFormEntry* entry = NULL;
|
||||
|
||||
if (st->sequences != NULL) {
|
||||
size_t n_seqs = gu_seq_length(st->sequences);
|
||||
while (st->seq_idx < n_seqs) {
|
||||
PgfSequence* seq = gu_seq_index(st->sequences, PgfSequence, st->seq_idx);
|
||||
GuString tokens = pgf_get_tokens(seq->syms, 0, pool);
|
||||
|
||||
PgfCohortSpot spot = {0, st->prefix};
|
||||
if (cmp_string(&spot, tokens, st->case_sensitive) > 0 || *spot.ptr != 0) {
|
||||
st->seq_idx = n_seqs;
|
||||
break;
|
||||
}
|
||||
|
||||
if (*tokens != 0 && seq->idx != NULL) {
|
||||
entry = gu_new(PgfFullFormEntry, pool);
|
||||
entry->tokens = tokens;
|
||||
entry->idx = seq->idx;
|
||||
|
||||
st->seq_idx++;
|
||||
break;
|
||||
}
|
||||
|
||||
st->seq_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
*((PgfFullFormEntry**) to) = entry;
|
||||
}
|
||||
|
||||
PGF_API GuEnum*
|
||||
pgf_fullform_lexicon(PgfConcr *concr, GuPool* pool)
|
||||
{
|
||||
PgfFullFormState* st = gu_new(PgfFullFormState, pool);
|
||||
st->en.next = gu_fullform_enum_next;
|
||||
st->sequences = concr->sequences;
|
||||
st->prefix = "";
|
||||
st->seq_idx = 0;
|
||||
st->case_sensitive = true;
|
||||
return &st->en;
|
||||
}
|
||||
|
||||
PGF_API GuString
|
||||
pgf_fullform_get_string(PgfFullFormEntry* entry)
|
||||
{
|
||||
return entry->tokens;
|
||||
}
|
||||
|
||||
PGF_API void
|
||||
pgf_fullform_get_analyses(PgfFullFormEntry* entry,
|
||||
PgfMorphoCallback* callback, GuExn* err)
|
||||
{
|
||||
pgf_morpho_iter(entry->idx, callback, err);
|
||||
}
|
||||
|
||||
PGF_API GuEnum*
|
||||
pgf_lookup_word_prefix(PgfConcr *concr, GuString prefix,
|
||||
GuPool* pool, GuExn* err)
|
||||
{
|
||||
if (concr->sequences == NULL) {
|
||||
GuExnData* err_data = gu_raise(err, PgfExn);
|
||||
if (err_data) {
|
||||
err_data->data = "The concrete syntax is not loaded";
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
PgfFullFormState* state = gu_new(PgfFullFormState, pool);
|
||||
state->en.next = gu_fullform_enum_next;
|
||||
state->sequences = concr->sequences;
|
||||
state->prefix = prefix;
|
||||
state->seq_idx = 0;
|
||||
state->case_sensitive = pgf_is_case_sensitive(concr);
|
||||
|
||||
PgfSequenceOrder order = { { pgf_sequence_cmp_fn },
|
||||
state->case_sensitive };
|
||||
if (!gu_seq_binsearch_index(concr->sequences, &order.order,
|
||||
PgfSequence, (void*) prefix,
|
||||
&state->seq_idx)) {
|
||||
state->seq_idx++;
|
||||
} else if (!state->case_sensitive) {
|
||||
/* If the match is case-insensitive then there might be more
|
||||
* matches around the current index. Since we scroll down
|
||||
* anyway, it is enough to search upwards now.
|
||||
*/
|
||||
|
||||
while (state->seq_idx > 0) {
|
||||
PgfSequence* seq =
|
||||
gu_seq_index(concr->sequences, PgfSequence, state->seq_idx-1);
|
||||
|
||||
size_t sym_idx = 0;
|
||||
PgfCohortSpot spot = {0, state->prefix};
|
||||
if (pgf_symbols_cmp(&spot, seq->syms, &sym_idx, state->case_sensitive) > 0 || *spot.ptr != 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
state->seq_idx--;
|
||||
}
|
||||
}
|
||||
|
||||
return &state->en;
|
||||
}
|
||||
@@ -499,14 +499,17 @@ store_expr(SgSG* sg,
|
||||
PgfExprLit* elit = ei.data;
|
||||
|
||||
Mem mem[2];
|
||||
size_t len = 0;
|
||||
|
||||
GuVariantInfo li = gu_variant_open(elit->lit);
|
||||
switch (li.tag) {
|
||||
case PGF_LITERAL_STR: {
|
||||
PgfLiteralStr* lstr = li.data;
|
||||
|
||||
len = strlen(lstr->val);
|
||||
|
||||
mem[0].flags = MEM_Str;
|
||||
mem[0].n = strlen(lstr->val);
|
||||
mem[0].n = len;
|
||||
mem[0].z = lstr->val;
|
||||
break;
|
||||
}
|
||||
@@ -515,6 +518,7 @@ store_expr(SgSG* sg,
|
||||
|
||||
mem[0].flags = MEM_Int;
|
||||
mem[0].u.i = lint->val;
|
||||
len = sizeof(mem[0].u.i);
|
||||
break;
|
||||
}
|
||||
case PGF_LITERAL_FLT: {
|
||||
@@ -522,6 +526,7 @@ store_expr(SgSG* sg,
|
||||
|
||||
mem[0].flags = MEM_Real;
|
||||
mem[0].u.r = lflt->val;
|
||||
len = sizeof(mem[0].u.r);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -556,7 +561,7 @@ store_expr(SgSG* sg,
|
||||
int serial_type_arg = sqlite3BtreeSerialType(&mem[1], file_format);
|
||||
int serial_type_arg_hdr_len = sqlite3BtreeVarintLen(serial_type_arg);
|
||||
|
||||
unsigned char* buf = malloc(1+serial_type_lit_hdr_len+(serial_type_arg_hdr_len > 1 ? serial_type_arg_hdr_len : 1)+mem[0].n+8);
|
||||
unsigned char* buf = malloc(1+serial_type_lit_hdr_len+(serial_type_arg_hdr_len > 1 ? serial_type_arg_hdr_len : 1)+len+8);
|
||||
unsigned char* p = buf;
|
||||
*p++ = 1+serial_type_lit_hdr_len+serial_type_arg_hdr_len;
|
||||
p += putVarint32(p, serial_type_lit);
|
||||
|
||||
@@ -4835,7 +4835,6 @@ SQLITE_PRIVATE int sqlite3HeaderSizeBtree(void);
|
||||
SQLITE_PRIVATE void sqlite3VdbeRecordUnpack(KeyInfo*,int,const void*,UnpackedRecord*);
|
||||
SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord(KeyInfo *, char *, int, char **);
|
||||
|
||||
typedef int (*RecordCompare)(int,const void*,UnpackedRecord*);
|
||||
SQLITE_PRIVATE RecordCompare sqlite3VdbeFindCompare(UnpackedRecord*);
|
||||
|
||||
/************** End of btreeInt.h ********************************************/
|
||||
|
||||
@@ -66,7 +66,7 @@ module PGF2 (-- * PGF
|
||||
-- ** Generation
|
||||
generateAll,
|
||||
-- ** Morphological Analysis
|
||||
MorphoAnalysis, lookupMorpho, fullFormLexicon,
|
||||
MorphoAnalysis, lookupMorpho, lookupCohorts, fullFormLexicon,
|
||||
-- ** Visualizations
|
||||
GraphvizOptions(..), graphvizDefaults,
|
||||
graphvizAbstractTree, graphvizParseTree,
|
||||
@@ -168,8 +168,6 @@ showPGF p =
|
||||
languages :: PGF -> Map.Map ConcName Concr
|
||||
languages p = langs p
|
||||
|
||||
-- | The abstract language name is the name of the top-level
|
||||
-- abstract module
|
||||
concreteName :: Concr -> ConcName
|
||||
concreteName c = unsafePerformIO (peekUtf8CString =<< pgf_concrete_name (concr c))
|
||||
|
||||
@@ -893,8 +891,23 @@ newGraphvizOptions pool opts = do
|
||||
-- Functions using Concr
|
||||
-- Morpho analyses, parsing & linearization
|
||||
|
||||
type MorphoAnalysis = (Fun,Cat,Float)
|
||||
-- | This triple is returned by all functions that deal with
|
||||
-- the grammar's lexicon. Its first element is the name of an abstract
|
||||
-- lexical function which can produce a given word or
|
||||
-- a multiword expression (i.e. this is the lemma).
|
||||
-- After that follows a string which describes
|
||||
-- the particular inflection form.
|
||||
--
|
||||
-- The last element is a logarithm from the
|
||||
-- the probability of the function. The probability is not
|
||||
-- conditionalized on the category of the function. This makes it
|
||||
-- possible to compare the likelihood of two functions even if they
|
||||
-- have different types.
|
||||
type MorphoAnalysis = (Fun,String,Float)
|
||||
|
||||
-- | 'lookupMorpho' takes a string which must be a single word or
|
||||
-- a multiword expression. It then computes the list of all possible
|
||||
-- morphological analyses.
|
||||
lookupMorpho :: Concr -> String -> [MorphoAnalysis]
|
||||
lookupMorpho (Concr concr master) sent =
|
||||
unsafePerformIO $
|
||||
@@ -908,6 +921,45 @@ lookupMorpho (Concr concr master) sent =
|
||||
freeHaskellFunPtr fptr
|
||||
readIORef ref
|
||||
|
||||
-- | 'lookupCohorts' takes an arbitrary string an produces
|
||||
-- a list of all places where lexical items from the grammar have been
|
||||
-- identified (i.e. cohorts). The list consists of triples of the format @(start,ans,end)@,
|
||||
-- where @start-end@ identifies the span in the text and @ans@ is
|
||||
-- the list of possible morphological analyses similar to 'lookupMorpho'.
|
||||
--
|
||||
-- The list is sorted first by the @start@ position and after than
|
||||
-- by the @end@ position. This can be used for instance if you want to
|
||||
-- filter only the longest matches.
|
||||
lookupCohorts :: Concr -> String -> [(Int,[MorphoAnalysis],Int)]
|
||||
lookupCohorts lang@(Concr concr master) sent =
|
||||
unsafePerformIO $
|
||||
do pl <- gu_new_pool
|
||||
ref <- newIORef []
|
||||
cback <- gu_malloc pl (#size PgfMorphoCallback)
|
||||
fptr <- wrapLookupMorphoCallback (getAnalysis ref)
|
||||
(#poke PgfMorphoCallback, callback) cback fptr
|
||||
c_sent <- newUtf8CString sent pl
|
||||
enum <- pgf_lookup_cohorts concr c_sent cback pl nullPtr
|
||||
fpl <- newForeignPtr gu_pool_finalizer pl
|
||||
fromCohortRange enum fpl fptr ref
|
||||
where
|
||||
fromCohortRange enum fpl fptr ref =
|
||||
allocaBytes (#size PgfCohortRange) $ \ptr ->
|
||||
withForeignPtr fpl $ \pl ->
|
||||
do gu_enum_next enum ptr pl
|
||||
buf <- (#peek PgfCohortRange, buf) ptr
|
||||
if buf == nullPtr
|
||||
then do finalizeForeignPtr fpl
|
||||
freeHaskellFunPtr fptr
|
||||
touchConcr lang
|
||||
return []
|
||||
else do start <- (#peek PgfCohortRange, start.pos) ptr
|
||||
end <- (#peek PgfCohortRange, end.pos) ptr
|
||||
ans <- readIORef ref
|
||||
writeIORef ref []
|
||||
cohs <- unsafeInterleaveIO (fromCohortRange enum fpl fptr ref)
|
||||
return ((start,ans,end):cohs)
|
||||
|
||||
fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])]
|
||||
fullFormLexicon lang =
|
||||
unsafePerformIO $
|
||||
@@ -1393,11 +1445,13 @@ bracketedLinearize lang e = unsafePerformIO $
|
||||
|
||||
end_phrase ref _ c_cat c_fid c_lindex c_fun = do
|
||||
(bs':stack,bs) <- readIORef ref
|
||||
cat <- peekUtf8CString c_cat
|
||||
let fid = fromIntegral c_fid
|
||||
let lindex = fromIntegral c_lindex
|
||||
fun <- peekUtf8CString c_fun
|
||||
writeIORef ref (stack, Bracket cat fid lindex fun (reverse bs) : bs')
|
||||
if null bs
|
||||
then writeIORef ref (stack, bs')
|
||||
else do cat <- peekUtf8CString c_cat
|
||||
let fid = fromIntegral c_fid
|
||||
let lindex = fromIntegral c_lindex
|
||||
fun <- peekUtf8CString c_fun
|
||||
writeIORef ref (stack, Bracket cat fid lindex fun (reverse bs) : bs')
|
||||
|
||||
symbol_ne exn _ = do
|
||||
gu_exn_raise exn gu_exn_type_PgfLinNonExist
|
||||
|
||||
@@ -6,7 +6,9 @@ import System.IO.Unsafe(unsafePerformIO)
|
||||
import Foreign hiding (unsafePerformIO)
|
||||
import Foreign.C
|
||||
import Data.IORef
|
||||
import Data.Data
|
||||
import PGF2.FFI
|
||||
import Data.Maybe(fromJust)
|
||||
|
||||
type Cat = String -- ^ Name of syntactic category
|
||||
type Fun = String -- ^ Name of function
|
||||
@@ -36,6 +38,20 @@ instance Eq Expr where
|
||||
e1_touch >> e2_touch
|
||||
return (res /= 0)
|
||||
|
||||
instance Data Expr where
|
||||
gfoldl f z e = z (fromJust . readExpr) `f` (showExpr [] e)
|
||||
toConstr _ = readExprConstr
|
||||
gunfold k z c = case constrIndex c of
|
||||
1 -> k (z (fromJust . readExpr))
|
||||
_ -> error "gunfold"
|
||||
dataTypeOf _ = exprDataType
|
||||
|
||||
readExprConstr :: Constr
|
||||
readExprConstr = mkConstr exprDataType "(fromJust . readExpr)" [] Prefix
|
||||
|
||||
exprDataType :: DataType
|
||||
exprDataType = mkDataType "PGF2.Expr" [readExprConstr]
|
||||
|
||||
-- | Constructs an expression by lambda abstraction
|
||||
mkAbs :: BindType -> String -> Expr -> Expr
|
||||
mkAbs bind_type var (Expr body bodyTouch) =
|
||||
|
||||
@@ -100,7 +100,7 @@ foreign import ccall unsafe "gu/string.h gu_string_buf_out"
|
||||
foreign import ccall unsafe "gu/file.h gu_file_in"
|
||||
gu_file_in :: Ptr () -> Ptr GuPool -> IO (Ptr GuIn)
|
||||
|
||||
foreign import ccall unsafe "gu/enum.h gu_enum_next"
|
||||
foreign import ccall safe "gu/enum.h gu_enum_next"
|
||||
gu_enum_next :: Ptr a -> Ptr (Ptr b) -> Ptr GuPool -> IO ()
|
||||
|
||||
foreign import ccall unsafe "gu/string.h gu_string_buf_freeze"
|
||||
@@ -409,6 +409,9 @@ foreign import ccall "pgf/pgf.h pgf_parse_with_oracle"
|
||||
foreign import ccall "pgf/pgf.h pgf_lookup_morpho"
|
||||
pgf_lookup_morpho :: Ptr PgfConcr -> CString -> Ptr PgfMorphoCallback -> Ptr GuExn -> IO ()
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_lookup_cohorts"
|
||||
pgf_lookup_cohorts :: Ptr PgfConcr -> CString -> Ptr PgfMorphoCallback -> Ptr GuPool -> Ptr GuExn -> IO (Ptr GuEnum)
|
||||
|
||||
type LookupMorphoCallback = Ptr PgfMorphoCallback -> CString -> CString -> Float -> Ptr GuExn -> IO ()
|
||||
|
||||
foreign import ccall "wrapper"
|
||||
|
||||
@@ -16,6 +16,9 @@ module PGF2.Internal(-- * Access the internal structures
|
||||
eAbs, eApp, eMeta, eFun, eVar, eLit, eTyped, eImplArg, dTyp, hypo,
|
||||
AbstrInfo, newAbstr, ConcrInfo, newConcr, newPGF,
|
||||
|
||||
-- * Expose PGF and Concr for FFI with C
|
||||
PGF(..), Concr(..),
|
||||
|
||||
-- * Write an in-memory PGF to a file
|
||||
unionPGF, writePGF, writeConcr,
|
||||
|
||||
@@ -592,17 +595,17 @@ newAbstr aflags cats funs = unsafePerformIO $ do
|
||||
|
||||
data ConcrInfo = ConcrInfo (Ptr GuSeq) (Ptr GuMap) (Ptr GuMap) (Ptr GuSeq) (Ptr GuSeq) (Ptr GuMap) (Ptr PgfConcr -> Ptr GuPool -> IO ()) CInt
|
||||
|
||||
newConcr :: (?builder :: Builder s) => B s AbstrInfo ->
|
||||
[(String,Literal)] -> -- ^ Concrete syntax flags
|
||||
[(String,String)] -> -- ^ Printnames
|
||||
[(FId,[FunId])] -> -- ^ Lindefs
|
||||
[(FId,[FunId])] -> -- ^ Linrefs
|
||||
[(FId,[Production])] -> -- ^ Productions
|
||||
[(Fun,[SeqId])] -> -- ^ Concrete functions (must be sorted by Fun)
|
||||
[[Symbol]] -> -- ^ Sequences (must be sorted)
|
||||
[(Cat,FId,FId,[String])] -> -- ^ Concrete categories
|
||||
FId -> -- ^ The total count of the categories
|
||||
B s ConcrInfo
|
||||
newConcr :: (?builder :: Builder s) => B s AbstrInfo
|
||||
-> [(String,Literal)] -- ^ Concrete syntax flags
|
||||
-> [(String,String)] -- ^ Printnames
|
||||
-> [(FId,[FunId])] -- ^ Lindefs
|
||||
-> [(FId,[FunId])] -- ^ Linrefs
|
||||
-> [(FId,[Production])] -- ^ Productions
|
||||
-> [(Fun,[SeqId])] -- ^ Concrete functions (must be sorted by Fun)
|
||||
-> [[Symbol]] -- ^ Sequences (must be sorted)
|
||||
-> [(Cat,FId,FId,[String])] -- ^ Concrete categories
|
||||
-> FId -- ^ The total count of the categories
|
||||
-> B s ConcrInfo
|
||||
newConcr (B (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _)) cflags printnames lindefs linrefs prods cncfuns sequences cnccats total_cats = unsafePerformIO $ do
|
||||
c_cflags <- newFlags cflags pool
|
||||
c_printname <- newMap (#size GuString) gu_string_hasher newUtf8CString
|
||||
|
||||
@@ -100,7 +100,7 @@ hspgf_predict_callback(PgfOracleCallback* self,
|
||||
size_t offset)
|
||||
{
|
||||
HSPgfOracleCallback* oracle = gu_container(self, HSPgfOracleCallback, oracle);
|
||||
oracle->predict(cat,label,hspgf_offset2hs(oracle->sentence, offset));
|
||||
return oracle->predict(cat,label,hspgf_offset2hs(oracle->sentence, offset));
|
||||
}
|
||||
|
||||
static bool
|
||||
@@ -110,7 +110,7 @@ hspgf_complete_callback(PgfOracleCallback* self,
|
||||
size_t offset)
|
||||
{
|
||||
HSPgfOracleCallback* oracle = gu_container(self, HSPgfOracleCallback, oracle);
|
||||
oracle->complete(cat,label,hspgf_offset2hs(oracle->sentence, offset));
|
||||
return oracle->complete(cat,label,hspgf_offset2hs(oracle->sentence, offset));
|
||||
}
|
||||
|
||||
static PgfExprProb*
|
||||
|
||||
@@ -371,7 +371,7 @@ browse pgf id = fmap (\def -> (def,producers,consumers)) definition
|
||||
Just (hyps,_,_) -> Just $ render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps)))
|
||||
Nothing -> Nothing
|
||||
|
||||
(producers,consumers) = Map.foldWithKey accum ([],[]) (funs (abstract pgf))
|
||||
(producers,consumers) = Map.foldrWithKey accum ([],[]) (funs (abstract pgf))
|
||||
where
|
||||
accum f (ty,_,_,_) (plist,clist) =
|
||||
let !plist' = if id `elem` ps then f : plist else plist
|
||||
|
||||
@@ -58,8 +58,8 @@ bracketedTokn :: Maybe Int -> Forest -> BracketedTokn
|
||||
bracketedTokn dp f@(Forest abs cnc forest root) =
|
||||
case [computeSeq isTrusted seq (map (render forest) args) | (seq,args) <- root] of
|
||||
([bs@(Bracket_{})]:_) -> bs
|
||||
(bss:_) -> Bracket_ wildCId 0 0 wildCId [] bss
|
||||
[] -> Bracket_ wildCId 0 0 wildCId [] []
|
||||
(bss:_) -> Bracket_ wildCId 0 0 0 wildCId [] bss
|
||||
[] -> Bracket_ wildCId 0 0 0 wildCId [] []
|
||||
where
|
||||
isTrusted (_,fid) = IntSet.member fid trusted
|
||||
|
||||
@@ -190,7 +190,7 @@ foldForest :: (FunId -> [PArg] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -
|
||||
foldForest f g b fcat forest =
|
||||
case IntMap.lookup fcat forest of
|
||||
Nothing -> b
|
||||
Just set -> Set.fold foldProd b set
|
||||
Just set -> Set.foldr foldProd b set
|
||||
where
|
||||
foldProd (PCoerce fcat) b = foldForest f g b fcat forest
|
||||
foldProd (PApply funid args) b = f funid args b
|
||||
|
||||
@@ -33,6 +33,7 @@ fromStr = from False id
|
||||
from space cap ts =
|
||||
case ts of
|
||||
[] -> []
|
||||
TK "":ts -> from space cap ts
|
||||
TK s:ts -> put s++from True cap ts
|
||||
BIND:ts -> from False cap ts
|
||||
SOFT_BIND:ts -> from False cap ts
|
||||
|
||||
@@ -137,7 +137,7 @@ cidVar = mkCId "__gfVar"
|
||||
-- mark the beginning and the end of each constituent.
|
||||
data BracketedString
|
||||
= Leaf Token -- ^ this is the leaf i.e. a single token
|
||||
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedString]
|
||||
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedString]
|
||||
-- ^ this is a bracket. The 'CId' is the category of
|
||||
-- the phrase. The 'FId' is an unique identifier for
|
||||
-- every phrase in the sentence. For context-free grammars
|
||||
@@ -151,7 +151,7 @@ data BracketedString
|
||||
-- that represents the same constituent.
|
||||
|
||||
data BracketedTokn
|
||||
= Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedTokn] -- Invariant: the list is not empty
|
||||
= Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedTokn] -- Invariant: the list is not empty
|
||||
| LeafKS Token
|
||||
| LeafNE
|
||||
| LeafBIND
|
||||
@@ -169,12 +169,12 @@ showBracketedString :: BracketedString -> String
|
||||
showBracketedString = render . ppBracketedString
|
||||
|
||||
ppBracketedString (Leaf t) = text t
|
||||
ppBracketedString (Bracket cat fid index _ _ bss) = parens (ppCId cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
|
||||
ppBracketedString (Bracket cat fid fid' index _ _ bss) = parens (ppCId cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
|
||||
|
||||
-- | The length of the bracketed string in number of tokens.
|
||||
lengthBracketedString :: BracketedString -> Int
|
||||
lengthBracketedString (Leaf _) = 1
|
||||
lengthBracketedString (Bracket _ _ _ _ _ bss) = sum (map lengthBracketedString bss)
|
||||
lengthBracketedString (Leaf _) = 1
|
||||
lengthBracketedString (Bracket _ _ _ _ _ _ bss) = sum (map lengthBracketedString bss)
|
||||
|
||||
untokn :: Maybe String -> [BracketedTokn] -> (Maybe String,[BracketedString])
|
||||
untokn nw bss =
|
||||
@@ -183,10 +183,10 @@ untokn nw bss =
|
||||
Just bss -> (nw,concat bss)
|
||||
Nothing -> (nw,[])
|
||||
where
|
||||
untokn nw (Bracket_ cat fid index fun es bss) =
|
||||
untokn nw (Bracket_ cat fid fid' index fun es bss) =
|
||||
let (nw',bss') = mapAccumR untokn nw bss
|
||||
in case sequence bss' of
|
||||
Just bss -> (nw',Just [Bracket cat fid index fun es (concat bss)])
|
||||
Just bss -> (nw',Just [Bracket cat fid fid' index fun es (concat bss)])
|
||||
Nothing -> (Nothing, Nothing)
|
||||
untokn nw (LeafKS t)
|
||||
| null t = (nw,Just [])
|
||||
@@ -227,16 +227,16 @@ computeSeq filter seq args = concatMap compute seq
|
||||
|
||||
getArg d r
|
||||
| not (null arg_lin) &&
|
||||
filter ct = [Bracket_ cat fid r fun es arg_lin]
|
||||
filter ct = [Bracket_ cat fid fid' r fun es arg_lin]
|
||||
| otherwise = arg_lin
|
||||
where
|
||||
arg_lin = lin ! r
|
||||
(ct@(cat,fid),_,fun,es,(_xs,lin)) = args !! d
|
||||
arg_lin = lin ! r
|
||||
(ct@(cat,fid),fid',fun,es,(_xs,lin)) = args !! d
|
||||
|
||||
getVar d r = [LeafKS (showCId (xs !! r))]
|
||||
where
|
||||
(_ct,_,_fun,_es,(xs,_lin)) = args !! d
|
||||
|
||||
flattenBracketedString :: BracketedString -> [String]
|
||||
flattenBracketedString (Leaf w) = [w]
|
||||
flattenBracketedString (Bracket _ _ _ _ _ bss) = concatMap flattenBracketedString bss
|
||||
flattenBracketedString (Leaf w) = [w]
|
||||
flattenBracketedString (Bracket _ _ _ _ _ _ bss) = concatMap flattenBracketedString bss
|
||||
|
||||
@@ -198,7 +198,7 @@ recoveryStates open_types (EState abs cnc chart) =
|
||||
Nothing -> []
|
||||
|
||||
complete open_fcats items ac =
|
||||
foldl (Set.fold (\(Active j' ppos funid seqid args keyc) ->
|
||||
foldl (Set.foldr (\(Active j' ppos funid seqid args keyc) ->
|
||||
(:) (Active j' (ppos+1) funid seqid args keyc)))
|
||||
items
|
||||
[set | fcat <- open_fcats, (set,_) <- lookupACByFCat fcat ac]
|
||||
@@ -363,7 +363,7 @@ process flit ftok cnc (item@(Active j ppos funid seqid args key0):items) acc cha
|
||||
|
||||
items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of
|
||||
Nothing -> items
|
||||
Just (set,sc) -> Set.fold (\(Active j' ppos funid seqid args keyc) ->
|
||||
Just (set,sc) -> Set.foldr (\(Active j' ppos funid seqid args keyc) ->
|
||||
let SymCat d _ = unsafeAt (unsafeAt (sequences cnc) seqid) ppos
|
||||
PArg hypos _ = args !! d
|
||||
in (:) (Active j' (ppos+1) funid seqid (updateAt d (PArg hypos fid) args) keyc)) items set
|
||||
@@ -395,7 +395,7 @@ process flit ftok cnc (item@(Active j ppos funid seqid args key0):items) acc cha
|
||||
predict flit ftok cnc forest key0 key@(AK fid lbl) k acc items =
|
||||
let (acc1,items1) = case IntMap.lookup fid forest of
|
||||
Nothing -> (acc,items)
|
||||
Just set -> Set.fold foldProd (acc,items) set
|
||||
Just set -> Set.foldr foldProd (acc,items) set
|
||||
|
||||
(acc2,items2) = case IntMap.lookup fid (lexicon cnc) >>= IntMap.lookup lbl of
|
||||
Just tmap -> let (mb_v,toks) = TrieMap.decompose (TrieMap.map (toItems key0 k) tmap)
|
||||
|
||||
@@ -79,12 +79,12 @@ unionsWith f = foldl (unionWith f) empty
|
||||
elems :: TrieMap k v -> [v]
|
||||
elems tr = collect tr []
|
||||
where
|
||||
collect (Tr mb_v m) xs = maybe id (:) mb_v (Map.fold collect xs m)
|
||||
collect (Tr mb_v m) xs = maybe id (:) mb_v (Map.foldr collect xs m)
|
||||
|
||||
toList :: TrieMap k v -> [([k],v)]
|
||||
toList tr = collect [] tr []
|
||||
where
|
||||
collect ks (Tr mb_v m) xs = maybe id (\v -> (:) (ks,v)) mb_v (Map.foldWithKey (\k -> collect (k:ks)) xs m)
|
||||
collect ks (Tr mb_v m) xs = maybe id (\v -> (:) (ks,v)) mb_v (Map.foldrWithKey (\k -> collect (k:ks)) xs m)
|
||||
|
||||
fromListWith :: Ord k => (v -> v -> v) -> [([k],v)] -> TrieMap k v
|
||||
fromListWith f xs = foldl' (\trie (ks,v) -> insertWith f ks v trie) empty xs
|
||||
|
||||
@@ -34,8 +34,9 @@ import PGF.Macros (lookValCat, BracketedString(..))
|
||||
|
||||
import qualified Data.Map as Map
|
||||
--import qualified Data.IntMap as IntMap
|
||||
import Data.List (intersperse,nub,mapAccumL,find,groupBy)
|
||||
--import Data.Char (isDigit)
|
||||
import Data.List (intersperse,nub,mapAccumL,find,groupBy,sortBy,partition)
|
||||
import Data.Ord (comparing)
|
||||
import Data.Char (isDigit)
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Text.PrettyPrint
|
||||
|
||||
@@ -131,6 +132,7 @@ graphvizDependencyTree format debug mlab mclab pgf lang t =
|
||||
"latex" -> render . ppLaTeX $ conll2latex' conll
|
||||
"svg" -> render . ppSVG . toSVG $ conll2latex' conll
|
||||
"conll" -> printCoNLL conll
|
||||
"conllu" -> printCoNLL ([["# text = " ++ linearize pgf lang t], ["# tree = " ++ showExpr [] t]] ++ conll)
|
||||
"malt_tab" -> render $ vcat (map (hcat . intersperse (char '\t') . (\ws -> [ws !! 0,ws !! 1,ws !! 3,ws !! 6,ws !! 7])) wnodes)
|
||||
"malt_input" -> render $ vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
|
||||
_ -> render $ text "digraph {" $$
|
||||
@@ -144,16 +146,16 @@ graphvizDependencyTree format debug mlab mclab pgf lang t =
|
||||
conll = maybe conll0 (\ls -> fixCoNLL ls conll0) mclab
|
||||
conll0 = (map.map) render wnodes
|
||||
nodes = map mkNode leaves
|
||||
links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves]
|
||||
links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun,_),_,w) <- tail leaves]
|
||||
|
||||
-- CoNLL format: ID FORM LEMMA PLEMMA POS PPOS FEAT PFEAT HEAD PHEAD DEPREL PDEPREL
|
||||
-- P variants are automatically predicted rather than gold standard
|
||||
|
||||
wnodes = [[int i, maltws ws, ppCId fun, ppCId (posCat cat), ppCId cat, unspec, int parent, text lab, unspec, unspec] |
|
||||
((cat,fid,fun),i,ws) <- tail leaves,
|
||||
wnodes = [[int i, maltws ws, ppCId fun, ppCId (posCat cat), ppCId cat, int lind, int parent, text lab, unspec, unspec] |
|
||||
((cat,fid,fun,lind),i,ws) <- tail leaves,
|
||||
let (lab,parent) = fromMaybe (dep_lbl,0)
|
||||
(do (lbl,fid) <- lookup fid deps
|
||||
(_,i,_) <- find (\((_,fid1,_),i,_) -> fid == fid1) leaves
|
||||
(_,i,_) <- find (\((_,fid1,_,_),i,_) -> fid == fid1) leaves
|
||||
return (lbl,i))
|
||||
]
|
||||
maltws = text . concat . intersperse "+" . words -- no spaces in column 2
|
||||
@@ -162,7 +164,7 @@ graphvizDependencyTree format debug mlab mclab pgf lang t =
|
||||
|
||||
bss = bracketedLinearize pgf lang t
|
||||
|
||||
root = (wildCId,nil,wildCId)
|
||||
root = (wildCId,nil,wildCId,0)
|
||||
|
||||
leaves = (root,0,root_lbl) : (groupAndIndexIt 1 . concatMap (getLeaves root)) bss
|
||||
deps = let (_,(h,deps)) = getDeps 0 [] t []
|
||||
@@ -180,10 +182,10 @@ graphvizDependencyTree format debug mlab mclab pgf lang t =
|
||||
|
||||
getLeaves parent bs =
|
||||
case bs of
|
||||
Leaf w -> [(parent,w)]
|
||||
Bracket cat fid _ fun _ bss -> concatMap (getLeaves (cat,fid,fun)) bss
|
||||
Leaf w -> [(parent,w)]
|
||||
Bracket cat fid _ lind fun _ bss -> concatMap (getLeaves (cat,fid,fun,lind)) bss
|
||||
|
||||
mkNode ((_,p,_),i,w) =
|
||||
mkNode ((_,p,_,_),i,w) =
|
||||
tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi
|
||||
|
||||
mkLink (x,(lbl,y)) = tag y <+> text "->" <+> tag x <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;"
|
||||
@@ -234,10 +236,18 @@ graphvizDependencyTree format debug mlab mclab pgf lang t =
|
||||
root_lbl = "ROOT"
|
||||
unspec = text "_"
|
||||
|
||||
-- auxiliaries for UD conversion PK 15/12/2018
|
||||
rmcomments :: String -> String
|
||||
rmcomments [] = []
|
||||
rmcomments ('-':'-':xs) = []
|
||||
rmcomments ('-':x :xs) = '-':rmcomments (x:xs)
|
||||
rmcomments (x:xs) = x:rmcomments xs
|
||||
|
||||
-- | Prepare lines obtained from a configuration file for labels for
|
||||
-- use with 'graphvizDependencyTree'. Format per line /fun/ /label/@*@.
|
||||
getDepLabels :: String -> Labels
|
||||
getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map words (lines s)]
|
||||
-- getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map words (lines s)]
|
||||
getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map (words . rmcomments) (lines s)]
|
||||
|
||||
-- the old function, without dependencies
|
||||
graphvizParseTree :: PGF -> Language -> GraphvizOptions -> Tree -> String
|
||||
@@ -291,13 +301,13 @@ graphvizBracketedString opts mbl tree bss = render graphviz_code
|
||||
getInternals [] = []
|
||||
getInternals nodes
|
||||
= nub [(parent, fid, mkNode fun cat) |
|
||||
(parent, Bracket cat fid _ fun _ _) <- nodes]
|
||||
(parent, Bracket cat fid _ _ fun _ _) <- nodes]
|
||||
: getInternals [(fid, child) |
|
||||
(_, Bracket _ fid _ _ _ children) <- nodes,
|
||||
(_, Bracket _ fid _ _ _ _ children) <- nodes,
|
||||
child <- children]
|
||||
|
||||
getLeaves cat parent (Leaf word) = [(parent, (cat, word))] -- the lowest cat before the word
|
||||
getLeaves _ parent (Bracket cat fid i _ _ children)
|
||||
getLeaves _ parent (Bracket cat fid _ i _ _ children)
|
||||
= concatMap (getLeaves cat fid) children
|
||||
|
||||
mkLevel nodes
|
||||
@@ -401,8 +411,8 @@ genPreAlignment pgf langs = lin2align . linsBracketed
|
||||
|
||||
getLeaves parent bs =
|
||||
case bs of
|
||||
Leaf w -> [(parent,w)]
|
||||
Bracket _ fid _ _ _ bss -> concatMap (getLeaves fid) bss
|
||||
Leaf w -> [(parent,w)]
|
||||
Bracket _ fid _ _ _ _ bss -> concatMap (getLeaves fid) bss
|
||||
|
||||
mkLayers (cs:css:rest) = let (lrest, rrest) = mkLayers (css:rest)
|
||||
in ((fields cs) : lrest, (map (mkLinks css) cs) : rrest)
|
||||
@@ -512,7 +522,7 @@ conll2latex' = dep2latex . conll2dep'
|
||||
|
||||
data Dep = Dep {
|
||||
wordLength :: Int -> Double -- length of word at position int -- was: fixed width, millimetres (>= 20.0)
|
||||
, tokens :: [(String,String)] -- word, pos (0..)
|
||||
, tokens :: [(String,(String,String))] -- word, (pos,features) (0..)
|
||||
, deps :: [((Int,Int),String)] -- from, to, label
|
||||
, root :: Int -- root word position
|
||||
}
|
||||
@@ -552,7 +562,8 @@ dep2latex d =
|
||||
[Comment (unwords (map fst (tokens d))),
|
||||
Picture defaultUnit (width,height) (
|
||||
[Put (wpos rwld i,0) (Text w) | (i,w) <- zip [0..] (map fst (tokens d))] -- words
|
||||
++ [Put (wpos rwld i,15) (TinyText w) | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom
|
||||
++ [Put (wpos rwld i,15) (TinyText w) | (i,(w,_)) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom
|
||||
--- ++ [Put (wpos rwld i,-15) (TinyText w) | (i,(_,w)) <- zip [0..] (map snd (tokens d))] -- features 15u below bottom -> DON'T SHOW
|
||||
++ concat [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d] -- arcs and labels
|
||||
++ [Put (wpos rwld (root d) + 15,height) (ArrowDown (height-arcbase))]
|
||||
++ [Put (wpos rwld (root d) + 20,height - 10) (TinyText "ROOT")]
|
||||
@@ -583,8 +594,8 @@ conll2dep' ls = Dep {
|
||||
, root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1]
|
||||
}
|
||||
where
|
||||
wld i = maximum (0:[charWidth * fromIntegral (length w) | w <- let (tok,pos) = toks !! i in [tok,pos]])
|
||||
toks = [(w,c) | _:w:_:c:_ <- ls]
|
||||
wld i = maximum (0:[charWidth * fromIntegral (length w) | w <- let (tok,(pos,feat)) = toks !! i in [tok,pos {-,feat-}]]) --- feat not shown
|
||||
toks = [(w,(c,m)) | _:w:_:c:_:m:_ <- ls]
|
||||
dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"]
|
||||
--maxdist = maximum [abs (x-y) | ((x,y),_) <- dps]
|
||||
|
||||
@@ -749,18 +760,26 @@ ppSVG svg =
|
||||
-- UseComp {"not"} PART neg head
|
||||
-- UseComp {*} AUX cop head
|
||||
|
||||
type CncLabels = [(String, String -> Maybe (String -> String,String,String))]
|
||||
-- (fun, word -> (pos,label,target))
|
||||
-- the pos can remain unchanged, as in the current notation in the article
|
||||
type CncLabels = [
|
||||
Either
|
||||
(String, String -> Maybe (String -> String,String,String))
|
||||
-- (fun, word -> (pos,label,target))
|
||||
-- the pos can remain unchanged, as in the current notation in the article
|
||||
(String,[String])
|
||||
-- (category, morphological forms)
|
||||
]
|
||||
|
||||
fixCoNLL :: CncLabels -> CoNLL -> CoNLL
|
||||
fixCoNLL labels conll = map fixc conll where
|
||||
fixCoNLL cncLabels conll = map fixc conll where
|
||||
labels = [l | Left l <- cncLabels]
|
||||
flabels = [r | Right r <- cncLabels]
|
||||
|
||||
fixc row = case row of
|
||||
(i:word:fun:pos:cat:x_:"0":"dep":xs) -> (i:word:fun:pos:cat:x_:"0":"root":xs) --- change the root label from dep to root
|
||||
(i:word:fun:pos:cat:x_:"0":"dep":xs) -> (i:word:fun:pos:cat:(feat cat word x_):"0":"root":xs) --- change the root label from dep to root
|
||||
(i:word:fun:pos:cat:x_:j:label:xs) -> case look (fun,word) of
|
||||
Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:x_:j :label':xs)
|
||||
Just (pos',label',target) -> (i:word:fun:pos' pos:cat:x_: getDep j target:label':xs)
|
||||
_ -> row
|
||||
Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:(feat cat word x_):j :label':xs)
|
||||
Just (pos',label',target) -> (i:word:fun:pos' pos:cat:(feat cat word x_): getDep j target:label':xs)
|
||||
_ -> (i:word:fun:pos:cat:(feat cat word x_):j:label:xs)
|
||||
_ -> row
|
||||
|
||||
look (fun,word) = case lookup fun labels of
|
||||
@@ -775,16 +794,48 @@ fixCoNLL labels conll = map fixc conll where
|
||||
|
||||
getDep j label = maybe j id $ lookup (label,j) [((label,j),i) | i:word:fun:pos:cat:x_:j:label:xs <- conll]
|
||||
|
||||
feat cat word x = case lookup cat flabels of
|
||||
Just tags | all isDigit x && length tags > read x -> tags !! read x
|
||||
_ -> case lookup (show word) flabels of
|
||||
Just (t:_) -> t
|
||||
_ -> cat ++ "-" ++ x
|
||||
|
||||
getCncDepLabels :: String -> CncLabels
|
||||
getCncDepLabels = map merge . groupBy (\ (x,_) (a,_) -> x == a) . concatMap analyse . filter choose . lines where
|
||||
getCncDepLabels s = wlabels ws ++ flabels fs
|
||||
where
|
||||
wlabels =
|
||||
map Left .
|
||||
map merge .
|
||||
groupBy (\ (x,_) (a,_) -> x == a) .
|
||||
sortBy (comparing fst) .
|
||||
concatMap analyse .
|
||||
filter chooseW
|
||||
|
||||
flabels =
|
||||
map Right .
|
||||
map collectTags .
|
||||
map words
|
||||
|
||||
(fs,ws) = partition chooseF $ map uncomment $ lines s
|
||||
|
||||
--- choose is for compatibility with the general notation
|
||||
choose line = notElem '(' line && elem '{' line --- ignoring non-local (with "(") and abstract (without "{") rules
|
||||
|
||||
chooseW line = notElem '(' line &&
|
||||
elem '{' line
|
||||
--- ignoring non-local (with "(") and abstract (without "{") rules
|
||||
---- TODO: this means that "(" cannot be a token
|
||||
|
||||
chooseF line = take 1 line == "@" --- feature assignments have the form e.g. @N SgNom SgGen ; no spaces inside tags
|
||||
|
||||
uncomment line = case line of
|
||||
'-':'-':_ -> ""
|
||||
c:cs -> c : uncomment cs
|
||||
_ -> line
|
||||
|
||||
analyse line = case break (=='{') line of
|
||||
(beg,_:ws) -> case break (=='}') ws of
|
||||
(toks,_:target) -> case (words beg, words target) of
|
||||
(fun:_,[ label,j]) -> [(fun, (tok, (id, label,j))) | tok <- getToks toks]
|
||||
(fun:_,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | tok <- getToks toks]
|
||||
(toks,_:target) -> case (getToks beg, words target) of
|
||||
(funs,[ label,j]) -> [(fun, (tok, (id, label,j))) | fun <- funs, tok <- getToks toks]
|
||||
(funs,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | fun <- funs, tok <- getToks toks]
|
||||
_ -> []
|
||||
_ -> []
|
||||
_ -> []
|
||||
@@ -793,8 +844,13 @@ getCncDepLabels = map merge . groupBy (\ (x,_) (a,_) -> x == a) . concatMap ana
|
||||
Just new -> return new
|
||||
_ -> lookup "*" (map snd rules)
|
||||
)
|
||||
getToks = words . map (\c -> if elem c "\"," then ' ' else c)
|
||||
getToks = map unquote . filter (/=",") . toks
|
||||
toks s = case lex s of [(t,"")] -> [t] ; [(t,cc)] -> t:toks cc ; _ -> []
|
||||
unquote s = case s of '"':cc@(_:_) | last cc == '"' -> init cc ; _ -> s
|
||||
|
||||
collectTags (w:ws) = (tail w,ws)
|
||||
|
||||
-- added init to remove the last \n. otherwise, two empty lines are in between each sentence PK 17/12/2018
|
||||
printCoNLL :: CoNLL -> String
|
||||
printCoNLL = unlines . map (concat . intersperse "\t")
|
||||
printCoNLL = init . unlines . map (concat . intersperse "\t")
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
name: pgf
|
||||
version: 3.9-git
|
||||
version: 3.10
|
||||
|
||||
cabal-version: >= 1.20
|
||||
build-type: Simple
|
||||
@@ -12,11 +12,6 @@ bug-reports: https://github.com/GrammaticalFramework/GF/issues
|
||||
maintainer: Thomas Hallgren
|
||||
tested-with: GHC==7.6.3, GHC==7.8.3, GHC==7.10.3, GHC==8.0.2
|
||||
|
||||
flag custom-binary
|
||||
Description: Use a customised version of the binary package
|
||||
Default: True
|
||||
Manual: True
|
||||
|
||||
Library
|
||||
default-language: Haskell2010
|
||||
build-depends: base >= 4.6 && <5,
|
||||
@@ -29,18 +24,14 @@ Library
|
||||
mtl,
|
||||
exceptions
|
||||
|
||||
if flag(custom-binary)
|
||||
hs-source-dirs: ., binary
|
||||
other-modules:
|
||||
-- not really part of GF but I have changed the original binary library
|
||||
-- and we have to keep the copy for now.
|
||||
Data.Binary
|
||||
Data.Binary.Put
|
||||
Data.Binary.Get
|
||||
Data.Binary.Builder
|
||||
Data.Binary.IEEE754
|
||||
else
|
||||
build-depends: binary, data-binary-ieee754
|
||||
other-modules:
|
||||
-- not really part of GF but I have changed the original binary library
|
||||
-- and we have to keep the copy for now.
|
||||
Data.Binary
|
||||
Data.Binary.Put
|
||||
Data.Binary.Get
|
||||
Data.Binary.Builder
|
||||
Data.Binary.IEEE754
|
||||
|
||||
--ghc-options: -fwarn-unused-imports
|
||||
--if impl(ghc>=7.8)
|
||||
|
||||
@@ -1,29 +1,37 @@
|
||||
INSTALL_PATH = /usr/local
|
||||
|
||||
C_SOURCES = jpgf.c jsg.c jni_utils.c
|
||||
JAVA_SOURCES = $(wildcard org/grammaticalframework/pgf/*.java) \
|
||||
$(wildcard org/grammaticalframework/sg/*.java)
|
||||
|
||||
JNI_INCLUDES = $(if $(wildcard /usr/lib/jvm/default-java/include/.*), -I/usr/lib/jvm/default-java/include -I/usr/lib/jvm/default-java/include/linux, \
|
||||
$(if $(wildcard /System/Library/Frameworks/JavaVM.framework/Versions/A/Headers/.*), -I/System/Library/Frameworks/JavaVM.framework/Versions/A/Headers, \
|
||||
$(if $(wildcard /Library/Java/Home/include/.*), -I/Library/Java/Home/include/ -I/Library/Java/Home/include/darwin, \
|
||||
$(error No JNI headers found))))
|
||||
$(if $(wildcard /usr/lib/jvm/java-1.11.0-openjdk-amd64/include/.*), -I/usr/lib/jvm/java-1.11.0-openjdk-amd64/include/ -I/usr/lib/jvm/java-1.11.0-openjdk-amd64/include/linux, \
|
||||
$(if $(wildcard /System/Library/Frameworks/JavaVM.framework/Versions/A/Headers/.*), -I/System/Library/Frameworks/JavaVM.framework/Versions/A/Headers, \
|
||||
$(if $(wildcard /Library/Java/Home/include/.*), -I/Library/Java/Home/include/ -I/Library/Java/Home/include/darwin, \
|
||||
$(error No JNI headers found)))))
|
||||
|
||||
# For Windows replace the previous line with something like this:
|
||||
# For compilation on Windows replace the previous line with something like this:
|
||||
#
|
||||
# JNI_INCLUDES = -I "C:/Program Files/Java/jdk1.8.0_171/include" -I "C:/Program Files/Java/jdk1.8.0_171/include/win32" -I "C:/MinGW/msys/1.0/local/include"
|
||||
# WINDOWS_FLAGS = -L"C:/MinGW/msys/1.0/local/lib" -no-undefined
|
||||
# WINDOWS_LDFLAGS = -L"C:/MinGW/msys/1.0/local/lib" -no-undefined
|
||||
|
||||
INSTALL_PATH = /usr/local/lib
|
||||
LIBTOOL = glibtool --tag=CC
|
||||
GCC = gcc
|
||||
LIBTOOL = $(if $(shell command -v glibtool 2>/dev/null), glibtool, libtool) --tag=CC
|
||||
|
||||
LIBTOOL = $(if $(shell command -v glibtool 2>/dev/null), glibtool --tag=CC, libtool)
|
||||
# For cross-compilation from Linux to Windows replace the previous two lines with:
|
||||
#
|
||||
# GCC = x86_64-w64-mingw32-gcc
|
||||
# LIBTOOL = ../c/libtool
|
||||
# WINDOWS_CCFLAGS = -I$(INSTALL_PATH)/include
|
||||
# WINDOWS_LDFLAGS = -L$(INSTALL_PATH)/lib -no-undefined
|
||||
|
||||
all: libjpgf.la jpgf.jar
|
||||
|
||||
libjpgf.la: $(patsubst %.c, %.lo, $(C_SOURCES))
|
||||
$(LIBTOOL) --mode=link gcc $(CFLAGS) -g -O -o libjpgf.la -shared $^ -rpath $(INSTALL_PATH) -lgu -lpgf -lsg $(WINDOWS_FLAGS)
|
||||
$(LIBTOOL) --mode=link $(GCC) $(CFLAGS) -g -O -o libjpgf.la -shared $^ -rpath $(INSTALL_PATH)/lib -lgu -lpgf -lsg $(WINDOWS_LDFLAGS)
|
||||
|
||||
%.lo : %.c
|
||||
$(LIBTOOL) --mode=compile gcc $(CFLAGS) -g -O -c $(JNI_INCLUDES) -std=c99 -shared $< -o $@
|
||||
$(LIBTOOL) --mode=compile $(GCC) $(CFLAGS) -g -O -c $(JNI_INCLUDES) $(WINDOWS_CCFLAGS) -std=c99 -shared $< -o $@
|
||||
|
||||
jpgf.jar: $(patsubst %.java, %.class, $(JAVA_SOURCES))
|
||||
jar -cf $@ org/grammaticalframework/pgf/*.class org/grammaticalframework/sg/*.class
|
||||
@@ -32,8 +40,8 @@ jpgf.jar: $(patsubst %.java, %.class, $(JAVA_SOURCES))
|
||||
javac $<
|
||||
|
||||
install: libjpgf.la jpgf.jar
|
||||
$(LIBTOOL) --mode=install install -s libjpgf.la $(INSTALL_PATH)
|
||||
install jpgf.jar $(INSTALL_PATH)
|
||||
$(LIBTOOL) --mode=install install -s libjpgf.la $(INSTALL_PATH)/lib
|
||||
install jpgf.jar $(INSTALL_PATH)/lib
|
||||
|
||||
|
||||
doc:
|
||||
|
||||
4
src/runtime/javascript/DEPRECATED.md
Normal file
4
src/runtime/javascript/DEPRECATED.md
Normal file
@@ -0,0 +1,4 @@
|
||||
# Deprecation notice
|
||||
|
||||
As of June 2019, this JavaScript version of the GF runtime is considered deprecated,
|
||||
in favour of the TypeScript version in <https://github.com/GrammaticalFramework/gf-typescript>.
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@@ -32,7 +32,7 @@
|
||||
</script>
|
||||
<title>Web-based GF Translator</title>
|
||||
</head>
|
||||
<body onload="populateLangs(Food, 'fromLang', 'toLang')">
|
||||
<body onload="populateLangs(grammar, 'fromLang', 'toLang')">
|
||||
<form id="translate">
|
||||
<p>
|
||||
<input type="text" name="inputText" id="inputText" value="this cheese is warm" size="50" />
|
||||
|
||||
7
src/runtime/typescript/MOVED.md
Normal file
7
src/runtime/typescript/MOVED.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Project moved
|
||||
|
||||
The GF TypeScript runtime has been moved to the repository:
|
||||
<https://github.com/GrammaticalFramework/gf-typescript>
|
||||
|
||||
If you are looking for an updated version of the JavaScript runtime,
|
||||
you should also look there.
|
||||
337
src/runtime/typescript/gflib.d.ts
vendored
337
src/runtime/typescript/gflib.d.ts
vendored
@@ -1,337 +0,0 @@
|
||||
/**
|
||||
* gflib.dt.s
|
||||
*
|
||||
* by John J. Camilleri
|
||||
*
|
||||
* TypeScript type definitions for the "original" JS GF runtime (GF:src/runtime/javascript/gflib.js)
|
||||
*/
|
||||
|
||||
// Note: the String prototype is extended with:
|
||||
// String.prototype.tag = "";
|
||||
// String.prototype.setTag = function (tag) { this.tag = tag; };
|
||||
|
||||
/**
|
||||
* A GF grammar is one abstract and multiple concretes
|
||||
*/
|
||||
declare class GFGrammar {
|
||||
abstract: GFAbstract
|
||||
concretes: {[key: string]: GFConcrete}
|
||||
|
||||
constructor(abstract: GFAbstract, concretes: {[key: string]: GFConcrete})
|
||||
|
||||
translate(
|
||||
input: string,
|
||||
fromLang: string,
|
||||
toLang: string
|
||||
): {[key: string]: {[key: string]: string}}
|
||||
}
|
||||
|
||||
/**
|
||||
* Abstract Syntax Tree
|
||||
*/
|
||||
declare class Fun {
|
||||
name: string
|
||||
args: Fun[]
|
||||
|
||||
constructor(name: string, ...args: Fun[])
|
||||
|
||||
print(): string
|
||||
show(): string
|
||||
getArg(i: number): Fun
|
||||
setArg(i: number, c: Fun): void
|
||||
isMeta(): boolean
|
||||
isComplete(): boolean
|
||||
isLiteral(): boolean
|
||||
isString(): boolean
|
||||
isInt(): boolean
|
||||
isFloat(): boolean
|
||||
isEqual(obj: any): boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Abstract syntax
|
||||
*/
|
||||
declare class GFAbstract {
|
||||
startcat: string
|
||||
types: {[key: string]: Type} // key is function name
|
||||
|
||||
constructor(startcat: string, types: {[key: string]: Type})
|
||||
|
||||
addType(fun: string, args: string[], cat: string): void
|
||||
getArgs(fun: string): string[]
|
||||
getCat(fun: string): string
|
||||
annotate(tree: Fun, type: string): Fun
|
||||
handleLiterals(tree: Fun, type: Type): Fun
|
||||
copyTree(x: Fun): Fun
|
||||
parseTree(str: string, type: string): Fun
|
||||
parseTree_(tokens: string[], prec: number): Fun
|
||||
}
|
||||
|
||||
/**
|
||||
* Type
|
||||
*/
|
||||
declare class Type {
|
||||
args: string[]
|
||||
cat: string
|
||||
|
||||
constructor(args: string[], cat: string)
|
||||
}
|
||||
|
||||
type ApplyOrCoerce = Apply | Coerce
|
||||
|
||||
/**
|
||||
* Concrete syntax
|
||||
*/
|
||||
declare class GFConcrete {
|
||||
flags: {[key: string]: string}
|
||||
productions: {[key: number]: ApplyOrCoerce[]}
|
||||
functions: CncFun[]
|
||||
sequences: Array<Array<Sym>>
|
||||
startCats: {[key: string]: {s: number, e: number}}
|
||||
totalFIds: number
|
||||
pproductions: {[key: number]: ApplyOrCoerce[]}
|
||||
lproductions: {[key: string]: {fid: FId, fun: CncFun}}
|
||||
|
||||
constructor(
|
||||
flags: {[key: string]: string},
|
||||
productions: {[key: number]: ApplyOrCoerce[]},
|
||||
functions: CncFun[],
|
||||
sequences: Array<Array<Sym>>,
|
||||
startCats: {[key: string]: {s: number, e: number}},
|
||||
totalFIds: number
|
||||
)
|
||||
|
||||
linearizeSyms(tree: Fun, tag: string): Array<{fid: FId, table: any}>
|
||||
syms2toks(syms: Sym[]): string[]
|
||||
linearizeAll(tree: Fun): string[]
|
||||
linearize(tree: Fun): string
|
||||
tagAndLinearize(tree: Fun): string[]
|
||||
unlex(ts: string): string
|
||||
tagIt(obj: any, tag: string): any
|
||||
// showRules(): string // Uncaught TypeError: Cannot read property 'length' of undefined at gflib.js:451
|
||||
tokenize(string: string): string[]
|
||||
parseString(string: string, cat: string): Fun[]
|
||||
complete(
|
||||
input: string,
|
||||
cat: string
|
||||
): {consumed: string[], suggestions: string[]}
|
||||
}
|
||||
|
||||
/**
|
||||
* Function ID
|
||||
*/
|
||||
type FId = number
|
||||
|
||||
/**
|
||||
* Apply
|
||||
*/
|
||||
declare class Apply {
|
||||
id: string
|
||||
fun: FId
|
||||
args: PArg[]
|
||||
|
||||
constructor(fun: FId, args: PArg[])
|
||||
|
||||
show(cat: string): string
|
||||
isEqual(obj: any): boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* PArg
|
||||
*/
|
||||
declare class PArg {
|
||||
fid: FId
|
||||
hypos: any[]
|
||||
|
||||
constructor(fid: FId, ...hypos: any[])
|
||||
}
|
||||
|
||||
/**
|
||||
* Coerce
|
||||
*/
|
||||
declare class Coerce {
|
||||
id: string
|
||||
arg: FId
|
||||
|
||||
constructor(arg: FId)
|
||||
|
||||
show(cat: string): string
|
||||
}
|
||||
|
||||
/**
|
||||
* Const
|
||||
*/
|
||||
declare class Const {
|
||||
id: string
|
||||
lit: Fun
|
||||
toks: any[]
|
||||
|
||||
constructor(lit: Fun, toks: any[])
|
||||
|
||||
show(cat: string): string
|
||||
isEqual(obj: any): boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* CncFun
|
||||
*/
|
||||
declare class CncFun {
|
||||
name: string
|
||||
lins: FId[]
|
||||
|
||||
constructor(name: string, lins: FId[])
|
||||
}
|
||||
|
||||
type Sym = SymCat | SymKS | SymKP | SymLit
|
||||
|
||||
/**
|
||||
* SymCat
|
||||
*/
|
||||
declare class SymCat {
|
||||
id: string
|
||||
i: number
|
||||
label: number
|
||||
|
||||
constructor(i: number, label: number)
|
||||
|
||||
getId(): string
|
||||
getArgNum(): number
|
||||
show(): string
|
||||
}
|
||||
|
||||
/**
|
||||
* SymKS
|
||||
*/
|
||||
declare class SymKS {
|
||||
id: string
|
||||
tokens: string[]
|
||||
|
||||
constructor(...tokens: string[])
|
||||
|
||||
getId(): string
|
||||
show(): string
|
||||
}
|
||||
|
||||
/**
|
||||
* SymKP
|
||||
*/
|
||||
declare class SymKP {
|
||||
id: string
|
||||
tokens: string[]
|
||||
alts: Alt[]
|
||||
|
||||
constructor(tokens: string[], alts: Alt[])
|
||||
|
||||
getId(): string
|
||||
show(): string
|
||||
}
|
||||
|
||||
/**
|
||||
* Alt
|
||||
*/
|
||||
declare class Alt {
|
||||
tokens: string[]
|
||||
prefixes: string[]
|
||||
|
||||
constructor(tokens: string[], prefixes: string[])
|
||||
}
|
||||
|
||||
/**
|
||||
* SymLit
|
||||
*/
|
||||
declare class SymLit {
|
||||
id: string
|
||||
i: number
|
||||
label: number
|
||||
|
||||
constructor(i: number, label: number)
|
||||
|
||||
getId(): string
|
||||
show(): string
|
||||
}
|
||||
|
||||
/**
|
||||
* Trie
|
||||
*/
|
||||
declare class Trie {
|
||||
value: any
|
||||
items: Trie[]
|
||||
|
||||
insertChain(keys, obj): void
|
||||
insertChain1(keys, obj): void
|
||||
lookup(key, obj): any
|
||||
isEmpty(): boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* ParseState
|
||||
*/
|
||||
declare class ParseState {
|
||||
concrete: GFConcrete
|
||||
startCat: string
|
||||
items: Trie
|
||||
chart: Chart
|
||||
|
||||
constructor(concrete: GFConcrete, startCat: string)
|
||||
|
||||
next(token: string): boolean
|
||||
complete(correntToken: string): Trie
|
||||
extractTrees(): any[]
|
||||
process(
|
||||
agenda,
|
||||
literalCallback: (fid: FId) => any,
|
||||
tokenCallback: (tokens: string[], item: any) => any
|
||||
): void
|
||||
}
|
||||
|
||||
/**
|
||||
* Chart
|
||||
*/
|
||||
declare class Chart {
|
||||
active: any
|
||||
actives: {[key: number]: any}
|
||||
passive: any
|
||||
forest: {[key: number]: ApplyOrCoerce[]}
|
||||
nextId: number
|
||||
offset: number
|
||||
|
||||
constructor(concrete: GFConcrete)
|
||||
|
||||
lookupAC(fid: FId,label)
|
||||
lookupACo(offset, fid: FId, label)
|
||||
|
||||
labelsAC(fid: FId)
|
||||
insertAC(fid: FId, label, items): void
|
||||
|
||||
lookupPC(fid: FId, label, offset)
|
||||
insertPC(fid1: FId, label, offset, fid2: FId): void
|
||||
shift(): void
|
||||
expandForest(fid: FId): any[]
|
||||
}
|
||||
|
||||
/**
|
||||
* ActiveItem
|
||||
*/
|
||||
declare class ActiveItem {
|
||||
offset: number
|
||||
dot: number
|
||||
fun: CncFun
|
||||
seq: Array<Sym>
|
||||
args: PArg[]
|
||||
fid: FId
|
||||
lbl: number
|
||||
|
||||
constructor(
|
||||
offset: number,
|
||||
dot: number,
|
||||
fun: CncFun,
|
||||
seq: Array<Sym>,
|
||||
args: PArg[],
|
||||
fid: FId,
|
||||
lbl: number
|
||||
)
|
||||
|
||||
isEqual(obj: any): boolean
|
||||
shiftOverArg(i: number, fid: FId): ActiveItem
|
||||
shiftOverTokn(): ActiveItem
|
||||
}
|
||||
Reference in New Issue
Block a user