Merge with master and drop the Haskell runtime completely

This commit is contained in:
krangelov
2019-09-19 22:01:57 +02:00
488 changed files with 8762 additions and 39251 deletions

View File

@@ -68,6 +68,7 @@ libpgf_la_SOURCES = \
pgf/data.h \
pgf/expr.c \
pgf/expr.h \
pgf/scanner.c \
pgf/parser.c \
pgf/lookup.c \
pgf/jit.c \

View File

@@ -64,6 +64,8 @@
#ifdef GU_ALIGNOF
# define gu_alignof GU_ALIGNOF
#elif defined(_MSC_VER)
# define gu_alignof __alignof
#else
# define gu_alignof(t_) \
((size_t)(offsetof(struct { char c_; t_ e_; }, e_)))
@@ -77,7 +79,7 @@
#define GU_COMMA ,
#define GU_ARRAY_LEN(t,a) (sizeof((const t[])a) / sizeof(t))
#define GU_ARRAY_LEN(a) (sizeof(a) / sizeof(a[0]))
#define GU_ID(...) __VA_ARGS__
@@ -183,9 +185,13 @@ typedef union {
void (*fp)();
} GuMaxAlign;
#if defined(_MSC_VER)
#include <malloc.h>
#define gu_alloca(N) alloca(N)
#else
#define gu_alloca(N) \
(((union { GuMaxAlign align_; uint8_t buf_[N]; }){{0}}).buf_)
#endif
// For Doxygen
#define GU_PRIVATE /** @private */

View File

@@ -7,6 +7,9 @@
typedef struct GuMapData GuMapData;
#define SKIP_DELETED 1
#define SKIP_NONE 2
struct GuMapData {
uint8_t* keys;
uint8_t* values;
@@ -19,6 +22,7 @@ struct GuMap {
GuHasher* hasher;
size_t key_size;
size_t value_size;
size_t cell_size; // cell_size = GU_MAX(value_size,sizeof(uint8_t))
const void* default_value;
GuMapData data;
@@ -30,9 +34,7 @@ gu_map_finalize(GuFinalizer* fin)
{
GuMap* map = gu_container(fin, GuMap, fin);
gu_mem_buf_free(map->data.keys);
if (map->value_size) {
gu_mem_buf_free(map->data.values);
}
gu_mem_buf_free(map->data.values);
}
static const GuWord gu_map_empty_key = 0;
@@ -68,7 +70,7 @@ gu_map_entry_is_free(GuMap* map, GuMapData* data, size_t idx)
}
static bool
gu_map_lookup(GuMap* map, const void* key, size_t* idx_out)
gu_map_lookup(GuMap* map, const void* key, uint8_t del, size_t* idx_out)
{
size_t n = map->data.n_entries;
if (map->hasher == gu_addr_hasher) {
@@ -78,13 +80,17 @@ gu_map_lookup(GuMap* map, const void* key, size_t* idx_out)
while (true) {
const void* entry_key =
((const void**)map->data.keys)[idx];
if (entry_key == NULL && map->data.zero_idx != idx) {
*idx_out = idx;
return false;
if (map->data.values[idx * map->cell_size] != del) { //skip deleted
*idx_out = idx;
return false;
}
} else if (entry_key == key) {
*idx_out = idx;
return true;
}
idx = (idx + offset) % n;
}
} else if (map->hasher == gu_word_hasher) {
@@ -156,33 +162,18 @@ gu_map_resize(GuMap* map, size_t req_entries)
size_t key_size = map->key_size;
size_t key_alloc = 0;
data->keys = gu_mem_buf_alloc(req_entries * key_size, &key_alloc);
memset(data->keys, 0, key_alloc);
size_t value_size = map->value_size;
size_t value_alloc = 0;
if (value_size) {
data->values = gu_mem_buf_alloc(req_entries * value_size,
&value_alloc);
memset(data->values, 0, value_alloc);
}
data->n_entries = gu_twin_prime_inf(value_size ?
GU_MIN(key_alloc / key_size,
value_alloc / value_size)
: key_alloc / key_size);
if (map->hasher == gu_addr_hasher) {
for (size_t i = 0; i < data->n_entries; i++) {
((const void**)data->keys)[i] = NULL;
}
} else if (map->hasher == gu_string_hasher) {
for (size_t i = 0; i < data->n_entries; i++) {
((GuString*)data->keys)[i] = NULL;
}
} else {
memset(data->keys, 0, key_alloc);
}
size_t cell_size = map->cell_size;
data->values = gu_mem_buf_alloc(req_entries * cell_size, &value_alloc);
memset(data->values, 0, value_alloc);
data->n_entries = gu_twin_prime_inf(
GU_MIN(key_alloc / key_size,
value_alloc / cell_size));
gu_assert(data->n_entries > data->n_occupied);
data->n_occupied = 0;
data->zero_idx = SIZE_MAX;
@@ -196,16 +187,14 @@ gu_map_resize(GuMap* map, size_t req_entries)
} else if (map->hasher == gu_string_hasher) {
old_key = (void*) *(GuString*)old_key;
}
void* old_value = &old_data.values[i * value_size];
void* old_value = &old_data.values[i * cell_size];
memcpy(gu_map_insert(map, old_key),
old_value, map->value_size);
}
gu_mem_buf_free(old_data.keys);
if (value_size) {
gu_mem_buf_free(old_data.values);
}
gu_mem_buf_free(old_data.values);
}
@@ -226,9 +215,9 @@ GU_API void*
gu_map_find(GuMap* map, const void* key)
{
size_t idx;
bool found = gu_map_lookup(map, key, &idx);
bool found = gu_map_lookup(map, key, SKIP_DELETED, &idx);
if (found) {
return &map->data.values[idx * map->value_size];
return &map->data.values[idx * map->cell_size];
}
return NULL;
}
@@ -244,7 +233,7 @@ GU_API const void*
gu_map_find_key(GuMap* map, const void* key)
{
size_t idx;
bool found = gu_map_lookup(map, key, &idx);
bool found = gu_map_lookup(map, key, SKIP_DELETED, &idx);
if (found) {
return &map->data.keys[idx * map->key_size];
}
@@ -255,17 +244,17 @@ GU_API bool
gu_map_has(GuMap* ht, const void* key)
{
size_t idx;
return gu_map_lookup(ht, key, &idx);
return gu_map_lookup(ht, key, SKIP_DELETED, &idx);
}
GU_API void*
gu_map_insert(GuMap* map, const void* key)
{
size_t idx;
bool found = gu_map_lookup(map, key, &idx);
bool found = gu_map_lookup(map, key, SKIP_NONE, &idx);
if (!found) {
if (gu_map_maybe_resize(map)) {
found = gu_map_lookup(map, key, &idx);
found = gu_map_lookup(map, key, SKIP_NONE, &idx);
gu_assert(!found);
}
if (map->hasher == gu_addr_hasher) {
@@ -277,7 +266,7 @@ gu_map_insert(GuMap* map, const void* key)
key, map->key_size);
}
if (map->default_value) {
memcpy(&map->data.values[idx * map->value_size],
memcpy(&map->data.values[idx * map->cell_size],
map->default_value, map->value_size);
}
if (gu_map_entry_is_free(map, &map->data, idx)) {
@@ -286,7 +275,32 @@ gu_map_insert(GuMap* map, const void* key)
}
map->data.n_occupied++;
}
return &map->data.values[idx * map->value_size];
return &map->data.values[idx * map->cell_size];
}
GU_API void
gu_map_delete(GuMap* map, const void* key)
{
size_t idx;
bool found = gu_map_lookup(map, key, SKIP_NONE, &idx);
if (found) {
if (map->hasher == gu_addr_hasher) {
((const void**)map->data.keys)[idx] = NULL;
} else if (map->hasher == gu_string_hasher) {
((GuString*)map->data.keys)[idx] = NULL;
} else {
memset(&map->data.keys[idx * map->key_size],
0, map->key_size);
}
map->data.values[idx * map->cell_size] = SKIP_DELETED;
if (gu_map_buf_is_zero(&map->data.keys[idx * map->key_size],
map->key_size)) {
map->data.zero_idx = SIZE_MAX;
}
map->data.n_occupied--;
}
}
GU_API void
@@ -297,7 +311,7 @@ gu_map_iter(GuMap* map, GuMapItor* itor, GuExn* err)
continue;
}
const void* key = &map->data.keys[i * map->key_size];
void* value = &map->data.values[i * map->value_size];
void* value = &map->data.values[i * map->cell_size];
if (map->hasher == gu_addr_hasher) {
key = *(const void* const*) key;
} else if (map->hasher == gu_string_hasher) {
@@ -307,47 +321,30 @@ gu_map_iter(GuMap* map, GuMapItor* itor, GuExn* err)
}
}
typedef struct {
GuEnum en;
GuMap* ht;
size_t i;
GuMapKeyValue x;
} GuMapEnum;
static void
gu_map_enum_next(GuEnum* self, void* to, GuPool* pool)
GU_API bool
gu_map_next(GuMap* map, size_t* pi, void** pkey, void* pvalue)
{
*((GuMapKeyValue**) to) = NULL;
size_t i;
GuMapEnum* en = (GuMapEnum*) self;
for (i = en->i; i < en->ht->data.n_entries; i++) {
if (gu_map_entry_is_free(en->ht, &en->ht->data, i)) {
while (*pi < map->data.n_entries) {
if (gu_map_entry_is_free(map, &map->data, *pi)) {
(*pi)++;
continue;
}
en->x.key = &en->ht->data.keys[i * en->ht->key_size];
en->x.value = &en->ht->data.values[i * en->ht->value_size];
if (en->ht->hasher == gu_addr_hasher) {
en->x.key = *(const void* const*) en->x.key;
} else if (en->ht->hasher == gu_string_hasher) {
en->x.key = *(GuString*) en->x.key;
*pkey = &map->data.keys[*pi * map->key_size];
if (map->hasher == gu_addr_hasher) {
*pkey = *(void**) *pkey;
} else if (map->hasher == gu_string_hasher) {
*pkey = *(void**) *pkey;
}
*((GuMapKeyValue**) to) = &en->x;
break;
}
en->i = i+1;
}
memcpy(pvalue, &map->data.values[*pi * map->cell_size],
map->value_size);
GU_API GuEnum*
gu_map_enum(GuMap* ht, GuPool* pool)
{
GuMapEnum* en = gu_new(GuMapEnum, pool);
en->en.next = gu_map_enum_next;
en->ht = ht;
en->i = 0;
return &en->en;
(*pi)++;
return true;
}
return false;
}
GU_API size_t
@@ -363,8 +360,6 @@ gu_map_count(GuMap* map)
return count;
}
static const uint8_t gu_map_no_values[1] = { 0 };
GU_API GuMap*
gu_make_map(size_t key_size, GuHasher* hasher,
size_t value_size, const void* default_value,
@@ -375,7 +370,7 @@ gu_make_map(size_t key_size, GuHasher* hasher,
.n_occupied = 0,
.n_entries = 0,
.keys = NULL,
.values = value_size ? NULL : (uint8_t*) gu_map_no_values,
.values = NULL,
.zero_idx = SIZE_MAX
};
GuMap* map = gu_new(GuMap, pool);
@@ -384,6 +379,7 @@ gu_make_map(size_t key_size, GuHasher* hasher,
map->data = data;
map->key_size = key_size;
map->value_size = value_size;
map->cell_size = GU_MAX(value_size,sizeof(uint8_t));
map->fin.fn = gu_map_finalize;
gu_pool_finally(pool, &map->fin);

View File

@@ -62,6 +62,9 @@ gu_map_has(GuMap* ht, const void* key);
GU_API_DECL void*
gu_map_insert(GuMap* ht, const void* key);
GU_API_DECL void
gu_map_delete(GuMap* ht, const void* key);
#define gu_map_put(MAP, KEYP, V, VAL) \
GU_BEGIN \
V* gu_map_put_p_ = gu_map_insert((MAP), (KEYP)); \
@@ -71,13 +74,8 @@ gu_map_insert(GuMap* ht, const void* key);
GU_API_DECL void
gu_map_iter(GuMap* ht, GuMapItor* itor, GuExn* err);
typedef struct {
const void* key;
void* value;
} GuMapKeyValue;
GU_API_DECL GuEnum*
gu_map_enum(GuMap* ht, GuPool* pool);
GU_API bool
gu_map_next(GuMap* map, size_t* pi, void** pkey, void* pvalue);
typedef GuMap GuIntMap;

View File

@@ -8,6 +8,10 @@
#include <sys/mman.h>
#include <sys/stat.h>
#endif
#if defined(__MINGW32__) || defined(_MSC_VER)
#include <malloc.h>
#endif
#if !defined(_MSC_VER)
#include <unistd.h>
#endif
@@ -108,6 +112,39 @@ gu_mem_buf_alloc(size_t min_size, size_t* real_size_out)
return gu_mem_buf_realloc(NULL, min_size, real_size_out);
}
#if defined(__MINGW32__) || defined(_MSC_VER)
#include <windows.h>
static int
getpagesize()
{
SYSTEM_INFO system_info;
GetSystemInfo(&system_info);
return system_info.dwPageSize;
}
#endif
GU_API void*
gu_mem_page_alloc(size_t min_size, size_t* real_size_out)
{
size_t page_size = getpagesize();
size_t size = ((min_size + page_size - 1) / page_size) * page_size;
void *page = NULL;
#if defined(ANDROID)
if ((page = memalign(page_size, size)) == NULL) {
#elif defined(__MINGW32__) || defined(_MSC_VER)
if ((page = malloc(size)) == NULL) {
#else
if (posix_memalign(&page, page_size, size) != 0) {
#endif
gu_fatal("Memory allocation failed");
}
*real_size_out = size;
return page;
}
GU_API void
gu_mem_buf_free(void* buf)
{
@@ -132,6 +169,7 @@ struct GuFinalizerNode {
enum GuPoolType {
GU_POOL_HEAP,
GU_POOL_LOCAL,
GU_POOL_PAGE,
GU_POOL_MMAP
};
@@ -180,6 +218,16 @@ gu_new_pool(void)
return pool;
}
GU_API GuPool*
gu_new_page_pool(void)
{
size_t sz = GU_FLEX_SIZE(GuPool, init_buf, gu_mem_pool_initial_size);
uint8_t* buf = gu_mem_page_alloc(sz, &sz);
GuPool* pool = gu_init_pool(buf, sz);
pool->type = GU_POOL_PAGE;
return pool;
}
GU_API GuPool*
gu_mmap_pool(char* fpath, void* addr, size_t size, void**pptr)
{
@@ -238,7 +286,10 @@ gu_pool_expand(GuPool* pool, size_t req)
gu_mem_chunk_max_size));
gu_assert(real_req >= sizeof(GuMemChunk));
size_t size = 0;
GuMemChunk* chunk = gu_mem_buf_alloc(real_req, &size);
GuMemChunk* chunk =
(pool->type == GU_POOL_PAGE)
? gu_mem_page_alloc(real_req, &size)
: gu_mem_buf_alloc(real_req, &size);
chunk->next = pool->chunks;
pool->chunks = chunk;
pool->curr_buf = (uint8_t*) chunk;
@@ -309,6 +360,7 @@ gu_malloc_prefixed(GuPool* pool, size_t pre_align, size_t pre_size,
size_t full_size = gu_mem_advance(offsetof(GuMemChunk, data),
pre_align, pre_size, align, size);
if (full_size > gu_mem_max_shared_alloc &&
pool->type != GU_POOL_PAGE &&
pool->type != GU_POOL_MMAP) {
GuMemChunk* chunk = gu_mem_alloc(full_size);
chunk->next = pool->chunks;

View File

@@ -55,6 +55,11 @@ gu_local_pool_(uint8_t* init_buf, size_t sz);
* should not be used in the bodies of recursive functions.
*/
/// Create a pool where each chunk is corresponds to one or
/// more pages.
GU_API_DECL GuPool*
gu_new_page_pool(void);
/// Create a pool stored in a memory mapped file.
GU_API_DECL GuPool*
gu_mmap_pool(char* fpath, void* addr, size_t size, void**pptr);
@@ -198,6 +203,9 @@ gu_mem_buf_realloc(
size_t min_size,
size_t* real_size_out);
/// Allocate enough memory pages to contain min_size bytes.
GU_API_DECL void*
gu_mem_page_alloc(size_t min_size, size_t* real_size_out);
/// Free a memory buffer.
GU_API_DECL void

View File

@@ -100,6 +100,11 @@ gu_seq_free(GuSeq* seq)
gu_mem_buf_free(seq);
}
static void
gu_dummy_finalizer(GuFinalizer* self)
{
}
GU_API void
gu_buf_require(GuBuf* buf, size_t req_len)
{
@@ -109,7 +114,9 @@ gu_buf_require(GuBuf* buf, size_t req_len)
size_t req_size = sizeof(GuSeq) + buf->elem_size * req_len;
size_t real_size;
gu_require(buf->fin.fn != gu_dummy_finalizer);
if (buf->seq == NULL || buf->seq == gu_empty_seq()) {
buf->seq = gu_mem_buf_alloc(req_size, &real_size);
buf->seq->len = 0;
@@ -164,6 +171,24 @@ gu_buf_freeze(GuBuf* buf, GuPool* pool)
return seq;
}
GU_API void
gu_buf_evacuate(GuBuf* buf, GuPool* pool)
{
if (buf->seq != gu_empty_seq()) {
size_t len = gu_buf_length(buf);
GuSeq* seq = gu_make_seq(buf->elem_size, len, pool);
void* bufdata = gu_buf_data(buf);
void* seqdata = gu_seq_data(seq);
memcpy(seqdata, bufdata, buf->elem_size * len);
gu_mem_buf_free(buf->seq);
buf->seq = seq;
buf->fin.fn = gu_dummy_finalizer;
buf->avail_len = len;
}
}
GU_API void*
gu_buf_insert(GuBuf* buf, size_t index)
{
@@ -335,13 +360,8 @@ GU_API void
gu_buf_heap_pop(GuBuf *buf, GuOrder *order, void* data_out)
{
const void* last = gu_buf_trim(buf); // raises an error if empty
if (gu_buf_length(buf) > 0) {
memcpy(data_out, buf->seq->data, buf->elem_size);
gu_heap_siftup(buf, order, last, 0);
} else {
memcpy(data_out, last, buf->elem_size);
}
memcpy(data_out, buf->seq->data, buf->elem_size);
gu_heap_siftup(buf, order, last, 0);
}
GU_API void

View File

@@ -182,6 +182,9 @@ gu_buf_heapify(GuBuf *buf, GuOrder *order);
GU_API_DECL GuSeq*
gu_buf_freeze(GuBuf* buf, GuPool* pool);
GU_API_DECL void
gu_buf_evacuate(GuBuf* buf, GuPool* pool);
#endif // GU_SEQ_H_
#ifdef GU_STRING_H_

View File

@@ -344,8 +344,9 @@ struct PgfCCat {
PgfCncFuns* linrefs;
size_t n_synprods;
PgfProductionSeq* prods;
float viterbi_prob;
prob_t viterbi_prob;
int fid;
int chunk_count;
PgfItemConts* conts;
struct PgfAnswers* answers;
GuFinalizer fin[0];

View File

@@ -198,16 +198,16 @@ pgf_literal_hash(GuHash h, PgfLiteral lit);
PGF_API_DECL GuHash
pgf_expr_hash(GuHash h, PgfExpr e);
PGF_API size_t
PGF_API_DECL size_t
pgf_expr_size(PgfExpr expr);
PGF_API GuSeq*
PGF_API_DECL GuSeq*
pgf_expr_functions(PgfExpr expr, GuPool* pool);
PGF_API PgfExpr
PGF_API_DECL PgfExpr
pgf_expr_substitute(PgfExpr expr, GuSeq* meta_values, GuPool* pool);
PGF_API PgfType*
PGF_API_DECL PgfType*
pgf_type_substitute(PgfType* type, GuSeq* meta_values, GuPool* pool);
typedef struct PgfPrintContext PgfPrintContext;

View File

@@ -5,9 +5,6 @@
#include <pgf/reasoner.h>
#include <pgf/reader.h>
#include "lightning.h"
#if defined(__MINGW32__) || defined(_MSC_VER)
#include <malloc.h>
#endif
//#define PGF_JIT_DEBUG
@@ -43,18 +40,6 @@ typedef struct {
#define JIT_VSTATE JIT_V1
#define JIT_VCLOS JIT_V2
#if defined(__MINGW32__) || defined(_MSC_VER)
#include <windows.h>
static int
getpagesize()
{
SYSTEM_INFO system_info;
GetSystemInfo(&system_info);
return system_info.dwPageSize;
}
#endif
static void
pgf_jit_finalize_page(GuFinalizer* self)
@@ -65,19 +50,8 @@ pgf_jit_finalize_page(GuFinalizer* self)
static void
pgf_jit_alloc_page(PgfReader* rdr)
{
void *page;
size_t page_size = getpagesize();
#if defined(ANDROID)
if ((page = memalign(page_size, page_size)) == NULL) {
#elif defined(__MINGW32__) || defined(_MSC_VER)
if ((page = malloc(page_size)) == NULL) {
#else
if (posix_memalign(&page, page_size, page_size) != 0) {
#endif
gu_fatal("Memory allocation failed");
}
size_t page_size;
void *page = gu_mem_page_alloc(sizeof(GuFinalizer), &page_size);
GuFinalizer* fin = page;
fin->fn = pgf_jit_finalize_page;

File diff suppressed because it is too large Load Diff

View File

@@ -162,6 +162,22 @@ PGF_API_DECL void
pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
PgfMorphoCallback* callback, GuExn* err);
typedef struct {
size_t pos;
GuString ptr;
} PgfCohortSpot;
typedef struct {
PgfCohortSpot start;
PgfCohortSpot end;
GuBuf* buf;
} PgfCohortRange;
PGF_API_DECL GuEnum*
pgf_lookup_cohorts(PgfConcr *concr, GuString sentence,
PgfMorphoCallback* callback,
GuPool* pool, GuExn* err);
typedef struct PgfFullFormEntry PgfFullFormEntry;
PGF_API_DECL GuEnum*

View File

@@ -98,6 +98,74 @@ pgf_print_fid(int fid, GuOut* out, GuExn* err)
gu_printf(out, err, "C%d", fid);
}
PGF_INTERNAL void
pgf_print_production_args(PgfPArgs* args,
GuOut* out, GuExn* err)
{
size_t n_args = gu_seq_length(args);
for (size_t j = 0; j < n_args; j++) {
if (j > 0)
gu_putc(',',out,err);
PgfPArg arg = gu_seq_get(args, PgfPArg, j);
if (arg.hypos != NULL &&
gu_seq_length(arg.hypos) > 0) {
size_t n_hypos = gu_seq_length(arg.hypos);
for (size_t k = 0; k < n_hypos; k++) {
PgfCCat *hypo = gu_seq_get(arg.hypos, PgfCCat*, k);
pgf_print_fid(hypo->fid, out, err);
gu_putc(' ',out,err);
}
gu_puts("-> ",out,err);
}
pgf_print_fid(arg.ccat->fid, out, err);
}
}
PGF_INTERNAL void
pgf_print_production(int fid, PgfProduction prod,
GuOut *out, GuExn* err)
{
pgf_print_fid(fid, out, err);
gu_puts(" -> ", out, err);
GuVariantInfo i = gu_variant_open(prod);
switch (i.tag) {
case PGF_PRODUCTION_APPLY: {
PgfProductionApply* papp = i.data;
gu_printf(out,err,"F%d(",papp->fun->funid);
if (papp->fun->ep != NULL) {
pgf_print_expr(papp->fun->ep->expr, NULL, 0, out, err);
} else {
PgfPArg* parg = gu_seq_index(papp->args, PgfPArg, 0);
gu_printf(out,err,"linref %s", parg->ccat->cnccat->abscat->name);
}
gu_printf(out,err,")[");
pgf_print_production_args(papp->args,out,err);
gu_printf(out,err,"]\n");
break;
}
case PGF_PRODUCTION_COERCE: {
PgfProductionCoerce* pcoerce = i.data;
gu_puts("_[",out,err);
pgf_print_fid(pcoerce->coerce->fid, out, err);
gu_puts("]\n",out,err);
break;
}
case PGF_PRODUCTION_EXTERN: {
PgfProductionExtern* pext = i.data;
gu_printf(out,err,"<extern>(");
pgf_print_expr(pext->ep->expr, NULL, 0, out, err);
gu_printf(out,err,")[]\n");
break;
}
default:
gu_impossible();
}
}
static void
pgf_print_productions(GuMapItor* fn, const void* key, void* value,
GuExn* err)
@@ -111,48 +179,7 @@ pgf_print_productions(GuMapItor* fn, const void* key, void* value,
size_t n_prods = gu_seq_length(ccat->prods);
for (size_t i = 0; i < n_prods; i++) {
PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i);
gu_puts(" ", out, err);
pgf_print_fid(fid, out, err);
gu_puts(" -> ", out, err);
GuVariantInfo i = gu_variant_open(prod);
switch (i.tag) {
case PGF_PRODUCTION_APPLY: {
PgfProductionApply* papp = i.data;
gu_printf(out,err,"F%d[",papp->fun->funid);
size_t n_args = gu_seq_length(papp->args);
for (size_t j = 0; j < n_args; j++) {
if (j > 0)
gu_putc(',',out,err);
PgfPArg arg = gu_seq_get(papp->args, PgfPArg, j);
if (arg.hypos != NULL) {
size_t n_hypos = gu_seq_length(arg.hypos);
for (size_t k = 0; k < n_hypos; k++) {
if (k > 0)
gu_putc(' ',out,err);
PgfCCat *hypo = gu_seq_get(arg.hypos, PgfCCat*, k);
pgf_print_fid(hypo->fid, out, err);
}
}
pgf_print_fid(arg.ccat->fid, out, err);
}
gu_printf(out,err,"]\n");
break;
}
case PGF_PRODUCTION_COERCE: {
PgfProductionCoerce* pcoerce = i.data;
gu_puts("_[", out, err);
pgf_print_fid(pcoerce->coerce->fid, out, err);
gu_puts("]\n", out, err);
break;
}
default:
gu_impossible();
}
pgf_print_production(fid, prod, out, err);
}
}
}

View File

@@ -328,16 +328,20 @@ pgf_read_patt(PgfReader* rdr)
uint8_t tag = pgf_read_tag(rdr);
switch (tag) {
case PGF_PATT_APP: {
PgfCId ctor = pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, gu_null_variant);
size_t n_args = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
PgfPattApp *papp =
gu_new_variant(PGF_PATT_APP,
PgfPattApp,
&patt, rdr->opool);
papp->ctor = pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, gu_null_variant);
papp->n_args = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
gu_new_flex_variant(PGF_PATT_APP,
PgfPattApp,
args, n_args,
&patt, rdr->opool);
papp->ctor = ctor;
papp->n_args = n_args;
for (size_t i = 0; i < papp->n_args; i++) {
papp->args[i] = pgf_read_patt(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
@@ -840,6 +844,7 @@ pgf_read_fid(PgfReader* rdr, PgfConcr* concr)
ccat->prods = NULL;
ccat->viterbi_prob = 0;
ccat->fid = fid;
ccat->chunk_count = 1;
ccat->conts = NULL;
ccat->answers = NULL;
@@ -1077,6 +1082,7 @@ pgf_read_cnccat(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, PgfCId name)
ccat->prods = NULL;
ccat->viterbi_prob = 0;
ccat->fid = fid;
ccat->chunk_count = 1;
ccat->conts = NULL;
ccat->answers = NULL;

516
src/runtime/c/pgf/scanner.c Normal file
View File

@@ -0,0 +1,516 @@
#include <pgf/data.h>
#include <pgf/expr.h>
#include <pgf/linearizer.h>
#include <gu/utf8.h>
PGF_INTERNAL int
cmp_string(PgfCohortSpot* spot, GuString tok,
bool case_sensitive)
{
for (;;) {
GuUCS c2 = gu_utf8_decode((const uint8_t**) &tok);
if (c2 == 0)
return 0;
const uint8_t* p = (uint8_t*) spot->ptr;
GuUCS c1 = gu_utf8_decode(&p);
if (c1 == 0)
return -1;
if (!case_sensitive) {
c1 = gu_ucs_to_lower(c1);
c2 = gu_ucs_to_lower(c2);
}
if (c1 != c2)
return (c1-c2);
spot->ptr = (GuString) p;
spot->pos++;
}
}
PGF_INTERNAL bool
skip_space(GuString* psent, size_t* ppos)
{
const uint8_t* p = (uint8_t*) *psent;
if (!gu_ucs_is_space(gu_utf8_decode(&p)))
return false;
*psent = (GuString) p;
(*ppos)++;
return true;
}
PGF_INTERNAL int
pgf_symbols_cmp(PgfCohortSpot* spot,
PgfSymbols* syms, size_t* sym_idx,
bool case_sensitive)
{
size_t n_syms = gu_seq_length(syms);
while (*sym_idx < n_syms) {
PgfSymbol sym = gu_seq_get(syms, PgfSymbol, *sym_idx);
if (*sym_idx > 0) {
if (!skip_space(&spot->ptr,&spot->pos)) {
if (*spot->ptr == 0)
return -1;
return 1;
}
while (*spot->ptr != 0) {
if (!skip_space(&spot->ptr,&spot->pos))
break;
}
}
GuVariantInfo inf = gu_variant_open(sym);
switch (inf.tag) {
case PGF_SYMBOL_CAT:
case PGF_SYMBOL_LIT:
case PGF_SYMBOL_VAR: {
if (*spot->ptr == 0)
return -1;
return 1;
}
case PGF_SYMBOL_KS: {
PgfSymbolKS* pks = inf.data;
if (*spot->ptr == 0)
return -1;
int cmp = cmp_string(spot,pks->token, case_sensitive);
if (cmp != 0)
return cmp;
break;
}
case PGF_SYMBOL_KP:
case PGF_SYMBOL_BIND:
case PGF_SYMBOL_NE:
case PGF_SYMBOL_SOFT_BIND:
case PGF_SYMBOL_SOFT_SPACE:
case PGF_SYMBOL_CAPIT:
case PGF_SYMBOL_ALL_CAPIT: {
return -1;
}
default:
gu_impossible();
}
(*sym_idx)++;
}
return 0;
}
static void
pgf_morpho_iter(PgfProductionIdx* idx,
PgfMorphoCallback* callback,
GuExn* err)
{
size_t n_entries = gu_buf_length(idx);
for (size_t i = 0; i < n_entries; i++) {
PgfProductionIdxEntry* entry =
gu_buf_index(idx, PgfProductionIdxEntry, i);
PgfCId lemma = entry->papp->fun->absfun->name;
GuString analysis = entry->ccat->cnccat->labels[entry->lin_idx];
prob_t prob = entry->ccat->cnccat->abscat->prob +
entry->papp->fun->absfun->ep.prob;
callback->callback(callback,
lemma, analysis, prob, err);
if (!gu_ok(err))
return;
}
}
typedef struct {
GuOrder order;
bool case_sensitive;
} PgfSequenceOrder;
PGF_INTERNAL bool
pgf_is_case_sensitive(PgfConcr* concr)
{
PgfFlag* flag =
gu_seq_binsearch(concr->cflags, pgf_flag_order, PgfFlag, "case_sensitive");
if (flag != NULL) {
GuVariantInfo inf = gu_variant_open(flag->value);
if (inf.tag == PGF_LITERAL_STR) {
PgfLiteralStr* lstr = inf.data;
if (strcmp(lstr->val, "off") == 0)
return false;
}
}
return true;
}
static int
pgf_sequence_cmp_fn(GuOrder* order, const void* p1, const void* p2)
{
PgfSequenceOrder* self = gu_container(order, PgfSequenceOrder, order);
PgfCohortSpot spot = {0, (GuString) p1};
const PgfSequence* sp2 = p2;
size_t sym_idx = 0;
int res = pgf_symbols_cmp(&spot, sp2->syms, &sym_idx, self->case_sensitive);
if (res == 0 && (*spot.ptr != 0 || sym_idx != gu_seq_length(sp2->syms))) {
res = 1;
}
return res;
}
PGF_API void
pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
PgfMorphoCallback* callback, GuExn* err)
{
if (concr->sequences == NULL) {
GuExnData* err_data = gu_raise(err, PgfExn);
if (err_data) {
err_data->data = "The concrete syntax is not loaded";
return;
}
}
size_t index = 0;
PgfSequenceOrder order = { { pgf_sequence_cmp_fn },
pgf_is_case_sensitive(concr) };
if (gu_seq_binsearch_index(concr->sequences, &order.order,
PgfSequence, (void*) sentence,
&index)) {
PgfSequence* seq = NULL;
/* If the match is case-insensitive then there might be more
* matches around the current index. We must check the neighbour
* sequences for matching as well.
*/
if (!order.case_sensitive) {
size_t i = index;
while (i > 0) {
seq = gu_seq_index(concr->sequences, PgfSequence, i-1);
size_t sym_idx = 0;
PgfCohortSpot spot = {0, sentence};
if (pgf_symbols_cmp(&spot, seq->syms, &sym_idx, order.case_sensitive) != 0) {
break;
}
if (seq->idx != NULL)
pgf_morpho_iter(seq->idx, callback, err);
i--;
}
}
seq = gu_seq_index(concr->sequences, PgfSequence, index);
if (seq->idx != NULL)
pgf_morpho_iter(seq->idx, callback, err);
if (!order.case_sensitive) {
size_t i = index+1;
while (i < gu_seq_length(concr->sequences)) {
seq = gu_seq_index(concr->sequences, PgfSequence, i);
size_t sym_idx = 0;
PgfCohortSpot spot = {0, sentence};
if (pgf_symbols_cmp(&spot, seq->syms, &sym_idx, order.case_sensitive) != 0) {
break;
}
if (seq->idx != NULL)
pgf_morpho_iter(seq->idx, callback, err);
i++;
}
}
}
}
typedef struct {
GuEnum en;
PgfConcr* concr;
GuString sentence;
GuString current;
size_t len;
PgfMorphoCallback* callback;
GuExn* err;
bool case_sensitive;
GuBuf* spots;
GuBuf* found;
} PgfCohortsState;
static int
cmp_cohort_spot(GuOrder* self, const void* a, const void* b)
{
PgfCohortSpot *s1 = (PgfCohortSpot *) a;
PgfCohortSpot *s2 = (PgfCohortSpot *) b;
return (s1->ptr-s2->ptr);
}
static GuOrder
pgf_cohort_spot_order[1] = {{ cmp_cohort_spot }};
static void
pgf_lookup_cohorts_helper(PgfCohortsState *state, PgfCohortSpot* spot,
int i, int j, ptrdiff_t min, ptrdiff_t max)
{
// This is a variation of a binary search algorithm which
// can retrieve all prefixes of a string with minimal
// comparisons, i.e. there is no need to lookup every
// prefix separately.
while (i <= j) {
int k = (i+j) / 2;
PgfSequence* seq = gu_seq_index(state->concr->sequences, PgfSequence, k);
PgfCohortSpot current = *spot;
size_t sym_idx = 0;
int cmp = pgf_symbols_cmp(&current, seq->syms, &sym_idx, state->case_sensitive);
if (cmp < 0) {
j = k-1;
} else if (cmp > 0) {
ptrdiff_t len = current.ptr - spot->ptr;
if (min <= len)
pgf_lookup_cohorts_helper(state, spot, i, k-1, min, len);
if (len+1 <= max)
pgf_lookup_cohorts_helper(state, spot, k+1, j, len+1, max);
break;
} else {
ptrdiff_t len = current.ptr - spot->ptr;
if (min <= len)
pgf_lookup_cohorts_helper(state, spot, i, k-1, min, len);
if (seq->idx != NULL && gu_buf_length(seq->idx) > 0) {
PgfCohortRange* range = gu_buf_insert(state->found, 0);
range->start = *spot;
range->end = current;
range->buf = seq->idx;
}
while (*current.ptr != 0) {
if (!skip_space(&current.ptr, &current.pos))
break;
}
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &current);
if (len <= max)
pgf_lookup_cohorts_helper(state, spot, k+1, j, len, max);
break;
}
}
}
static void
pgf_lookup_cohorts_enum_next(GuEnum* self, void* to, GuPool* pool)
{
PgfCohortsState* state = gu_container(self, PgfCohortsState, en);
while (gu_buf_length(state->found) == 0 &&
gu_buf_length(state->spots) > 0) {
PgfCohortSpot spot;
gu_buf_heap_pop(state->spots, pgf_cohort_spot_order, &spot);
if (spot.ptr == state->current)
continue;
if (*spot.ptr == 0)
break;
pgf_lookup_cohorts_helper
(state, &spot,
0, gu_seq_length(state->concr->sequences)-1,
1, (state->sentence+state->len)-spot.ptr);
if (gu_buf_length(state->found) == 0) {
// skip one character and try again
gu_utf8_decode((const uint8_t**) &spot.ptr);
spot.pos++;
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &spot);
}
}
PgfCohortRange* pRes = (PgfCohortRange*)to;
if (gu_buf_length(state->found) == 0) {
pRes->start.pos = 0;
pRes->start.ptr = NULL;
pRes->end.pos = 0;
pRes->end.ptr = NULL;
pRes->buf = NULL;
state->current = NULL;
return;
} else do {
*pRes = gu_buf_pop(state->found, PgfCohortRange);
state->current = pRes->start.ptr;
pgf_morpho_iter(pRes->buf, state->callback, state->err);
} while (gu_buf_length(state->found) > 0 &&
gu_buf_index_last(state->found, PgfCohortRange)->end.ptr == pRes->end.ptr);
}
PGF_API GuEnum*
pgf_lookup_cohorts(PgfConcr *concr, GuString sentence,
PgfMorphoCallback* callback,
GuPool* pool, GuExn* err)
{
if (concr->sequences == NULL) {
GuExnData* err_data = gu_raise(err, PgfExn);
if (err_data) {
err_data->data = "The concrete syntax is not loaded";
return NULL;
}
}
PgfCohortsState* state = gu_new(PgfCohortsState, pool);
state->en.next = pgf_lookup_cohorts_enum_next;
state->concr = concr;
state->sentence= sentence;
state->len = strlen(sentence);
state->callback= callback;
state->err = err;
state->case_sensitive = pgf_is_case_sensitive(concr);
state->spots = gu_new_buf(PgfCohortSpot, pool);
state->found = gu_new_buf(PgfCohortRange, pool);
PgfCohortSpot spot = {0,sentence};
while (*spot.ptr != 0) {
if (!skip_space(&spot.ptr, &spot.pos))
break;
}
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &spot);
return &state->en;
}
typedef struct {
GuEnum en;
PgfSequences* sequences;
GuString prefix;
size_t seq_idx;
bool case_sensitive;
} PgfFullFormState;
struct PgfFullFormEntry {
GuString tokens;
PgfProductionIdx* idx;
};
static void
gu_fullform_enum_next(GuEnum* self, void* to, GuPool* pool)
{
PgfFullFormState* st = gu_container(self, PgfFullFormState, en);
PgfFullFormEntry* entry = NULL;
if (st->sequences != NULL) {
size_t n_seqs = gu_seq_length(st->sequences);
while (st->seq_idx < n_seqs) {
PgfSequence* seq = gu_seq_index(st->sequences, PgfSequence, st->seq_idx);
GuString tokens = pgf_get_tokens(seq->syms, 0, pool);
PgfCohortSpot spot = {0, st->prefix};
if (cmp_string(&spot, tokens, st->case_sensitive) > 0 || *spot.ptr != 0) {
st->seq_idx = n_seqs;
break;
}
if (*tokens != 0 && seq->idx != NULL) {
entry = gu_new(PgfFullFormEntry, pool);
entry->tokens = tokens;
entry->idx = seq->idx;
st->seq_idx++;
break;
}
st->seq_idx++;
}
}
*((PgfFullFormEntry**) to) = entry;
}
PGF_API GuEnum*
pgf_fullform_lexicon(PgfConcr *concr, GuPool* pool)
{
PgfFullFormState* st = gu_new(PgfFullFormState, pool);
st->en.next = gu_fullform_enum_next;
st->sequences = concr->sequences;
st->prefix = "";
st->seq_idx = 0;
st->case_sensitive = true;
return &st->en;
}
PGF_API GuString
pgf_fullform_get_string(PgfFullFormEntry* entry)
{
return entry->tokens;
}
PGF_API void
pgf_fullform_get_analyses(PgfFullFormEntry* entry,
PgfMorphoCallback* callback, GuExn* err)
{
pgf_morpho_iter(entry->idx, callback, err);
}
PGF_API GuEnum*
pgf_lookup_word_prefix(PgfConcr *concr, GuString prefix,
GuPool* pool, GuExn* err)
{
if (concr->sequences == NULL) {
GuExnData* err_data = gu_raise(err, PgfExn);
if (err_data) {
err_data->data = "The concrete syntax is not loaded";
return NULL;
}
}
PgfFullFormState* state = gu_new(PgfFullFormState, pool);
state->en.next = gu_fullform_enum_next;
state->sequences = concr->sequences;
state->prefix = prefix;
state->seq_idx = 0;
state->case_sensitive = pgf_is_case_sensitive(concr);
PgfSequenceOrder order = { { pgf_sequence_cmp_fn },
state->case_sensitive };
if (!gu_seq_binsearch_index(concr->sequences, &order.order,
PgfSequence, (void*) prefix,
&state->seq_idx)) {
state->seq_idx++;
} else if (!state->case_sensitive) {
/* If the match is case-insensitive then there might be more
* matches around the current index. Since we scroll down
* anyway, it is enough to search upwards now.
*/
while (state->seq_idx > 0) {
PgfSequence* seq =
gu_seq_index(concr->sequences, PgfSequence, state->seq_idx-1);
size_t sym_idx = 0;
PgfCohortSpot spot = {0, state->prefix};
if (pgf_symbols_cmp(&spot, seq->syms, &sym_idx, state->case_sensitive) > 0 || *spot.ptr != 0) {
break;
}
state->seq_idx--;
}
}
return &state->en;
}

View File

@@ -499,14 +499,17 @@ store_expr(SgSG* sg,
PgfExprLit* elit = ei.data;
Mem mem[2];
size_t len = 0;
GuVariantInfo li = gu_variant_open(elit->lit);
switch (li.tag) {
case PGF_LITERAL_STR: {
PgfLiteralStr* lstr = li.data;
len = strlen(lstr->val);
mem[0].flags = MEM_Str;
mem[0].n = strlen(lstr->val);
mem[0].n = len;
mem[0].z = lstr->val;
break;
}
@@ -515,6 +518,7 @@ store_expr(SgSG* sg,
mem[0].flags = MEM_Int;
mem[0].u.i = lint->val;
len = sizeof(mem[0].u.i);
break;
}
case PGF_LITERAL_FLT: {
@@ -522,6 +526,7 @@ store_expr(SgSG* sg,
mem[0].flags = MEM_Real;
mem[0].u.r = lflt->val;
len = sizeof(mem[0].u.r);
break;
}
default:
@@ -556,7 +561,7 @@ store_expr(SgSG* sg,
int serial_type_arg = sqlite3BtreeSerialType(&mem[1], file_format);
int serial_type_arg_hdr_len = sqlite3BtreeVarintLen(serial_type_arg);
unsigned char* buf = malloc(1+serial_type_lit_hdr_len+(serial_type_arg_hdr_len > 1 ? serial_type_arg_hdr_len : 1)+mem[0].n+8);
unsigned char* buf = malloc(1+serial_type_lit_hdr_len+(serial_type_arg_hdr_len > 1 ? serial_type_arg_hdr_len : 1)+len+8);
unsigned char* p = buf;
*p++ = 1+serial_type_lit_hdr_len+serial_type_arg_hdr_len;
p += putVarint32(p, serial_type_lit);

View File

@@ -4835,7 +4835,6 @@ SQLITE_PRIVATE int sqlite3HeaderSizeBtree(void);
SQLITE_PRIVATE void sqlite3VdbeRecordUnpack(KeyInfo*,int,const void*,UnpackedRecord*);
SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord(KeyInfo *, char *, int, char **);
typedef int (*RecordCompare)(int,const void*,UnpackedRecord*);
SQLITE_PRIVATE RecordCompare sqlite3VdbeFindCompare(UnpackedRecord*);
/************** End of btreeInt.h ********************************************/

View File

@@ -66,7 +66,7 @@ module PGF2 (-- * PGF
-- ** Generation
generateAll,
-- ** Morphological Analysis
MorphoAnalysis, lookupMorpho, fullFormLexicon,
MorphoAnalysis, lookupMorpho, lookupCohorts, fullFormLexicon,
-- ** Visualizations
GraphvizOptions(..), graphvizDefaults,
graphvizAbstractTree, graphvizParseTree,
@@ -168,8 +168,6 @@ showPGF p =
languages :: PGF -> Map.Map ConcName Concr
languages p = langs p
-- | The abstract language name is the name of the top-level
-- abstract module
concreteName :: Concr -> ConcName
concreteName c = unsafePerformIO (peekUtf8CString =<< pgf_concrete_name (concr c))
@@ -893,8 +891,23 @@ newGraphvizOptions pool opts = do
-- Functions using Concr
-- Morpho analyses, parsing & linearization
type MorphoAnalysis = (Fun,Cat,Float)
-- | This triple is returned by all functions that deal with
-- the grammar's lexicon. Its first element is the name of an abstract
-- lexical function which can produce a given word or
-- a multiword expression (i.e. this is the lemma).
-- After that follows a string which describes
-- the particular inflection form.
--
-- The last element is a logarithm from the
-- the probability of the function. The probability is not
-- conditionalized on the category of the function. This makes it
-- possible to compare the likelihood of two functions even if they
-- have different types.
type MorphoAnalysis = (Fun,String,Float)
-- | 'lookupMorpho' takes a string which must be a single word or
-- a multiword expression. It then computes the list of all possible
-- morphological analyses.
lookupMorpho :: Concr -> String -> [MorphoAnalysis]
lookupMorpho (Concr concr master) sent =
unsafePerformIO $
@@ -908,6 +921,45 @@ lookupMorpho (Concr concr master) sent =
freeHaskellFunPtr fptr
readIORef ref
-- | 'lookupCohorts' takes an arbitrary string an produces
-- a list of all places where lexical items from the grammar have been
-- identified (i.e. cohorts). The list consists of triples of the format @(start,ans,end)@,
-- where @start-end@ identifies the span in the text and @ans@ is
-- the list of possible morphological analyses similar to 'lookupMorpho'.
--
-- The list is sorted first by the @start@ position and after than
-- by the @end@ position. This can be used for instance if you want to
-- filter only the longest matches.
lookupCohorts :: Concr -> String -> [(Int,[MorphoAnalysis],Int)]
lookupCohorts lang@(Concr concr master) sent =
unsafePerformIO $
do pl <- gu_new_pool
ref <- newIORef []
cback <- gu_malloc pl (#size PgfMorphoCallback)
fptr <- wrapLookupMorphoCallback (getAnalysis ref)
(#poke PgfMorphoCallback, callback) cback fptr
c_sent <- newUtf8CString sent pl
enum <- pgf_lookup_cohorts concr c_sent cback pl nullPtr
fpl <- newForeignPtr gu_pool_finalizer pl
fromCohortRange enum fpl fptr ref
where
fromCohortRange enum fpl fptr ref =
allocaBytes (#size PgfCohortRange) $ \ptr ->
withForeignPtr fpl $ \pl ->
do gu_enum_next enum ptr pl
buf <- (#peek PgfCohortRange, buf) ptr
if buf == nullPtr
then do finalizeForeignPtr fpl
freeHaskellFunPtr fptr
touchConcr lang
return []
else do start <- (#peek PgfCohortRange, start.pos) ptr
end <- (#peek PgfCohortRange, end.pos) ptr
ans <- readIORef ref
writeIORef ref []
cohs <- unsafeInterleaveIO (fromCohortRange enum fpl fptr ref)
return ((start,ans,end):cohs)
fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])]
fullFormLexicon lang =
unsafePerformIO $
@@ -1393,11 +1445,13 @@ bracketedLinearize lang e = unsafePerformIO $
end_phrase ref _ c_cat c_fid c_lindex c_fun = do
(bs':stack,bs) <- readIORef ref
cat <- peekUtf8CString c_cat
let fid = fromIntegral c_fid
let lindex = fromIntegral c_lindex
fun <- peekUtf8CString c_fun
writeIORef ref (stack, Bracket cat fid lindex fun (reverse bs) : bs')
if null bs
then writeIORef ref (stack, bs')
else do cat <- peekUtf8CString c_cat
let fid = fromIntegral c_fid
let lindex = fromIntegral c_lindex
fun <- peekUtf8CString c_fun
writeIORef ref (stack, Bracket cat fid lindex fun (reverse bs) : bs')
symbol_ne exn _ = do
gu_exn_raise exn gu_exn_type_PgfLinNonExist

View File

@@ -6,7 +6,9 @@ import System.IO.Unsafe(unsafePerformIO)
import Foreign hiding (unsafePerformIO)
import Foreign.C
import Data.IORef
import Data.Data
import PGF2.FFI
import Data.Maybe(fromJust)
type Cat = String -- ^ Name of syntactic category
type Fun = String -- ^ Name of function
@@ -36,6 +38,20 @@ instance Eq Expr where
e1_touch >> e2_touch
return (res /= 0)
instance Data Expr where
gfoldl f z e = z (fromJust . readExpr) `f` (showExpr [] e)
toConstr _ = readExprConstr
gunfold k z c = case constrIndex c of
1 -> k (z (fromJust . readExpr))
_ -> error "gunfold"
dataTypeOf _ = exprDataType
readExprConstr :: Constr
readExprConstr = mkConstr exprDataType "(fromJust . readExpr)" [] Prefix
exprDataType :: DataType
exprDataType = mkDataType "PGF2.Expr" [readExprConstr]
-- | Constructs an expression by lambda abstraction
mkAbs :: BindType -> String -> Expr -> Expr
mkAbs bind_type var (Expr body bodyTouch) =

View File

@@ -100,7 +100,7 @@ foreign import ccall unsafe "gu/string.h gu_string_buf_out"
foreign import ccall unsafe "gu/file.h gu_file_in"
gu_file_in :: Ptr () -> Ptr GuPool -> IO (Ptr GuIn)
foreign import ccall unsafe "gu/enum.h gu_enum_next"
foreign import ccall safe "gu/enum.h gu_enum_next"
gu_enum_next :: Ptr a -> Ptr (Ptr b) -> Ptr GuPool -> IO ()
foreign import ccall unsafe "gu/string.h gu_string_buf_freeze"
@@ -409,6 +409,9 @@ foreign import ccall "pgf/pgf.h pgf_parse_with_oracle"
foreign import ccall "pgf/pgf.h pgf_lookup_morpho"
pgf_lookup_morpho :: Ptr PgfConcr -> CString -> Ptr PgfMorphoCallback -> Ptr GuExn -> IO ()
foreign import ccall "pgf/pgf.h pgf_lookup_cohorts"
pgf_lookup_cohorts :: Ptr PgfConcr -> CString -> Ptr PgfMorphoCallback -> Ptr GuPool -> Ptr GuExn -> IO (Ptr GuEnum)
type LookupMorphoCallback = Ptr PgfMorphoCallback -> CString -> CString -> Float -> Ptr GuExn -> IO ()
foreign import ccall "wrapper"

View File

@@ -16,6 +16,9 @@ module PGF2.Internal(-- * Access the internal structures
eAbs, eApp, eMeta, eFun, eVar, eLit, eTyped, eImplArg, dTyp, hypo,
AbstrInfo, newAbstr, ConcrInfo, newConcr, newPGF,
-- * Expose PGF and Concr for FFI with C
PGF(..), Concr(..),
-- * Write an in-memory PGF to a file
unionPGF, writePGF, writeConcr,
@@ -592,17 +595,17 @@ newAbstr aflags cats funs = unsafePerformIO $ do
data ConcrInfo = ConcrInfo (Ptr GuSeq) (Ptr GuMap) (Ptr GuMap) (Ptr GuSeq) (Ptr GuSeq) (Ptr GuMap) (Ptr PgfConcr -> Ptr GuPool -> IO ()) CInt
newConcr :: (?builder :: Builder s) => B s AbstrInfo ->
[(String,Literal)] -> -- ^ Concrete syntax flags
[(String,String)] -> -- ^ Printnames
[(FId,[FunId])] -> -- ^ Lindefs
[(FId,[FunId])] -> -- ^ Linrefs
[(FId,[Production])] -> -- ^ Productions
[(Fun,[SeqId])] -> -- ^ Concrete functions (must be sorted by Fun)
[[Symbol]] -> -- ^ Sequences (must be sorted)
[(Cat,FId,FId,[String])] -> -- ^ Concrete categories
FId -> -- ^ The total count of the categories
B s ConcrInfo
newConcr :: (?builder :: Builder s) => B s AbstrInfo
-> [(String,Literal)] -- ^ Concrete syntax flags
-> [(String,String)] -- ^ Printnames
-> [(FId,[FunId])] -- ^ Lindefs
-> [(FId,[FunId])] -- ^ Linrefs
-> [(FId,[Production])] -- ^ Productions
-> [(Fun,[SeqId])] -- ^ Concrete functions (must be sorted by Fun)
-> [[Symbol]] -- ^ Sequences (must be sorted)
-> [(Cat,FId,FId,[String])] -- ^ Concrete categories
-> FId -- ^ The total count of the categories
-> B s ConcrInfo
newConcr (B (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _)) cflags printnames lindefs linrefs prods cncfuns sequences cnccats total_cats = unsafePerformIO $ do
c_cflags <- newFlags cflags pool
c_printname <- newMap (#size GuString) gu_string_hasher newUtf8CString

View File

@@ -100,7 +100,7 @@ hspgf_predict_callback(PgfOracleCallback* self,
size_t offset)
{
HSPgfOracleCallback* oracle = gu_container(self, HSPgfOracleCallback, oracle);
oracle->predict(cat,label,hspgf_offset2hs(oracle->sentence, offset));
return oracle->predict(cat,label,hspgf_offset2hs(oracle->sentence, offset));
}
static bool
@@ -110,7 +110,7 @@ hspgf_complete_callback(PgfOracleCallback* self,
size_t offset)
{
HSPgfOracleCallback* oracle = gu_container(self, HSPgfOracleCallback, oracle);
oracle->complete(cat,label,hspgf_offset2hs(oracle->sentence, offset));
return oracle->complete(cat,label,hspgf_offset2hs(oracle->sentence, offset));
}
static PgfExprProb*

View File

@@ -371,7 +371,7 @@ browse pgf id = fmap (\def -> (def,producers,consumers)) definition
Just (hyps,_,_) -> Just $ render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps)))
Nothing -> Nothing
(producers,consumers) = Map.foldWithKey accum ([],[]) (funs (abstract pgf))
(producers,consumers) = Map.foldrWithKey accum ([],[]) (funs (abstract pgf))
where
accum f (ty,_,_,_) (plist,clist) =
let !plist' = if id `elem` ps then f : plist else plist

View File

@@ -58,8 +58,8 @@ bracketedTokn :: Maybe Int -> Forest -> BracketedTokn
bracketedTokn dp f@(Forest abs cnc forest root) =
case [computeSeq isTrusted seq (map (render forest) args) | (seq,args) <- root] of
([bs@(Bracket_{})]:_) -> bs
(bss:_) -> Bracket_ wildCId 0 0 wildCId [] bss
[] -> Bracket_ wildCId 0 0 wildCId [] []
(bss:_) -> Bracket_ wildCId 0 0 0 wildCId [] bss
[] -> Bracket_ wildCId 0 0 0 wildCId [] []
where
isTrusted (_,fid) = IntSet.member fid trusted
@@ -190,7 +190,7 @@ foldForest :: (FunId -> [PArg] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -
foldForest f g b fcat forest =
case IntMap.lookup fcat forest of
Nothing -> b
Just set -> Set.fold foldProd b set
Just set -> Set.foldr foldProd b set
where
foldProd (PCoerce fcat) b = foldForest f g b fcat forest
foldProd (PApply funid args) b = f funid args b

View File

@@ -33,6 +33,7 @@ fromStr = from False id
from space cap ts =
case ts of
[] -> []
TK "":ts -> from space cap ts
TK s:ts -> put s++from True cap ts
BIND:ts -> from False cap ts
SOFT_BIND:ts -> from False cap ts

View File

@@ -137,7 +137,7 @@ cidVar = mkCId "__gfVar"
-- mark the beginning and the end of each constituent.
data BracketedString
= Leaf Token -- ^ this is the leaf i.e. a single token
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedString]
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedString]
-- ^ this is a bracket. The 'CId' is the category of
-- the phrase. The 'FId' is an unique identifier for
-- every phrase in the sentence. For context-free grammars
@@ -151,7 +151,7 @@ data BracketedString
-- that represents the same constituent.
data BracketedTokn
= Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedTokn] -- Invariant: the list is not empty
= Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedTokn] -- Invariant: the list is not empty
| LeafKS Token
| LeafNE
| LeafBIND
@@ -169,12 +169,12 @@ showBracketedString :: BracketedString -> String
showBracketedString = render . ppBracketedString
ppBracketedString (Leaf t) = text t
ppBracketedString (Bracket cat fid index _ _ bss) = parens (ppCId cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
ppBracketedString (Bracket cat fid fid' index _ _ bss) = parens (ppCId cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
-- | The length of the bracketed string in number of tokens.
lengthBracketedString :: BracketedString -> Int
lengthBracketedString (Leaf _) = 1
lengthBracketedString (Bracket _ _ _ _ _ bss) = sum (map lengthBracketedString bss)
lengthBracketedString (Leaf _) = 1
lengthBracketedString (Bracket _ _ _ _ _ _ bss) = sum (map lengthBracketedString bss)
untokn :: Maybe String -> [BracketedTokn] -> (Maybe String,[BracketedString])
untokn nw bss =
@@ -183,10 +183,10 @@ untokn nw bss =
Just bss -> (nw,concat bss)
Nothing -> (nw,[])
where
untokn nw (Bracket_ cat fid index fun es bss) =
untokn nw (Bracket_ cat fid fid' index fun es bss) =
let (nw',bss') = mapAccumR untokn nw bss
in case sequence bss' of
Just bss -> (nw',Just [Bracket cat fid index fun es (concat bss)])
Just bss -> (nw',Just [Bracket cat fid fid' index fun es (concat bss)])
Nothing -> (Nothing, Nothing)
untokn nw (LeafKS t)
| null t = (nw,Just [])
@@ -227,16 +227,16 @@ computeSeq filter seq args = concatMap compute seq
getArg d r
| not (null arg_lin) &&
filter ct = [Bracket_ cat fid r fun es arg_lin]
filter ct = [Bracket_ cat fid fid' r fun es arg_lin]
| otherwise = arg_lin
where
arg_lin = lin ! r
(ct@(cat,fid),_,fun,es,(_xs,lin)) = args !! d
arg_lin = lin ! r
(ct@(cat,fid),fid',fun,es,(_xs,lin)) = args !! d
getVar d r = [LeafKS (showCId (xs !! r))]
where
(_ct,_,_fun,_es,(xs,_lin)) = args !! d
flattenBracketedString :: BracketedString -> [String]
flattenBracketedString (Leaf w) = [w]
flattenBracketedString (Bracket _ _ _ _ _ bss) = concatMap flattenBracketedString bss
flattenBracketedString (Leaf w) = [w]
flattenBracketedString (Bracket _ _ _ _ _ _ bss) = concatMap flattenBracketedString bss

View File

@@ -198,7 +198,7 @@ recoveryStates open_types (EState abs cnc chart) =
Nothing -> []
complete open_fcats items ac =
foldl (Set.fold (\(Active j' ppos funid seqid args keyc) ->
foldl (Set.foldr (\(Active j' ppos funid seqid args keyc) ->
(:) (Active j' (ppos+1) funid seqid args keyc)))
items
[set | fcat <- open_fcats, (set,_) <- lookupACByFCat fcat ac]
@@ -363,7 +363,7 @@ process flit ftok cnc (item@(Active j ppos funid seqid args key0):items) acc cha
items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of
Nothing -> items
Just (set,sc) -> Set.fold (\(Active j' ppos funid seqid args keyc) ->
Just (set,sc) -> Set.foldr (\(Active j' ppos funid seqid args keyc) ->
let SymCat d _ = unsafeAt (unsafeAt (sequences cnc) seqid) ppos
PArg hypos _ = args !! d
in (:) (Active j' (ppos+1) funid seqid (updateAt d (PArg hypos fid) args) keyc)) items set
@@ -395,7 +395,7 @@ process flit ftok cnc (item@(Active j ppos funid seqid args key0):items) acc cha
predict flit ftok cnc forest key0 key@(AK fid lbl) k acc items =
let (acc1,items1) = case IntMap.lookup fid forest of
Nothing -> (acc,items)
Just set -> Set.fold foldProd (acc,items) set
Just set -> Set.foldr foldProd (acc,items) set
(acc2,items2) = case IntMap.lookup fid (lexicon cnc) >>= IntMap.lookup lbl of
Just tmap -> let (mb_v,toks) = TrieMap.decompose (TrieMap.map (toItems key0 k) tmap)

View File

@@ -79,12 +79,12 @@ unionsWith f = foldl (unionWith f) empty
elems :: TrieMap k v -> [v]
elems tr = collect tr []
where
collect (Tr mb_v m) xs = maybe id (:) mb_v (Map.fold collect xs m)
collect (Tr mb_v m) xs = maybe id (:) mb_v (Map.foldr collect xs m)
toList :: TrieMap k v -> [([k],v)]
toList tr = collect [] tr []
where
collect ks (Tr mb_v m) xs = maybe id (\v -> (:) (ks,v)) mb_v (Map.foldWithKey (\k -> collect (k:ks)) xs m)
collect ks (Tr mb_v m) xs = maybe id (\v -> (:) (ks,v)) mb_v (Map.foldrWithKey (\k -> collect (k:ks)) xs m)
fromListWith :: Ord k => (v -> v -> v) -> [([k],v)] -> TrieMap k v
fromListWith f xs = foldl' (\trie (ks,v) -> insertWith f ks v trie) empty xs

View File

@@ -34,8 +34,9 @@ import PGF.Macros (lookValCat, BracketedString(..))
import qualified Data.Map as Map
--import qualified Data.IntMap as IntMap
import Data.List (intersperse,nub,mapAccumL,find,groupBy)
--import Data.Char (isDigit)
import Data.List (intersperse,nub,mapAccumL,find,groupBy,sortBy,partition)
import Data.Ord (comparing)
import Data.Char (isDigit)
import Data.Maybe (fromMaybe)
import Text.PrettyPrint
@@ -131,6 +132,7 @@ graphvizDependencyTree format debug mlab mclab pgf lang t =
"latex" -> render . ppLaTeX $ conll2latex' conll
"svg" -> render . ppSVG . toSVG $ conll2latex' conll
"conll" -> printCoNLL conll
"conllu" -> printCoNLL ([["# text = " ++ linearize pgf lang t], ["# tree = " ++ showExpr [] t]] ++ conll)
"malt_tab" -> render $ vcat (map (hcat . intersperse (char '\t') . (\ws -> [ws !! 0,ws !! 1,ws !! 3,ws !! 6,ws !! 7])) wnodes)
"malt_input" -> render $ vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
_ -> render $ text "digraph {" $$
@@ -144,16 +146,16 @@ graphvizDependencyTree format debug mlab mclab pgf lang t =
conll = maybe conll0 (\ls -> fixCoNLL ls conll0) mclab
conll0 = (map.map) render wnodes
nodes = map mkNode leaves
links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves]
links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun,_),_,w) <- tail leaves]
-- CoNLL format: ID FORM LEMMA PLEMMA POS PPOS FEAT PFEAT HEAD PHEAD DEPREL PDEPREL
-- P variants are automatically predicted rather than gold standard
wnodes = [[int i, maltws ws, ppCId fun, ppCId (posCat cat), ppCId cat, unspec, int parent, text lab, unspec, unspec] |
((cat,fid,fun),i,ws) <- tail leaves,
wnodes = [[int i, maltws ws, ppCId fun, ppCId (posCat cat), ppCId cat, int lind, int parent, text lab, unspec, unspec] |
((cat,fid,fun,lind),i,ws) <- tail leaves,
let (lab,parent) = fromMaybe (dep_lbl,0)
(do (lbl,fid) <- lookup fid deps
(_,i,_) <- find (\((_,fid1,_),i,_) -> fid == fid1) leaves
(_,i,_) <- find (\((_,fid1,_,_),i,_) -> fid == fid1) leaves
return (lbl,i))
]
maltws = text . concat . intersperse "+" . words -- no spaces in column 2
@@ -162,7 +164,7 @@ graphvizDependencyTree format debug mlab mclab pgf lang t =
bss = bracketedLinearize pgf lang t
root = (wildCId,nil,wildCId)
root = (wildCId,nil,wildCId,0)
leaves = (root,0,root_lbl) : (groupAndIndexIt 1 . concatMap (getLeaves root)) bss
deps = let (_,(h,deps)) = getDeps 0 [] t []
@@ -180,10 +182,10 @@ graphvizDependencyTree format debug mlab mclab pgf lang t =
getLeaves parent bs =
case bs of
Leaf w -> [(parent,w)]
Bracket cat fid _ fun _ bss -> concatMap (getLeaves (cat,fid,fun)) bss
Leaf w -> [(parent,w)]
Bracket cat fid _ lind fun _ bss -> concatMap (getLeaves (cat,fid,fun,lind)) bss
mkNode ((_,p,_),i,w) =
mkNode ((_,p,_,_),i,w) =
tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi
mkLink (x,(lbl,y)) = tag y <+> text "->" <+> tag x <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;"
@@ -234,10 +236,18 @@ graphvizDependencyTree format debug mlab mclab pgf lang t =
root_lbl = "ROOT"
unspec = text "_"
-- auxiliaries for UD conversion PK 15/12/2018
rmcomments :: String -> String
rmcomments [] = []
rmcomments ('-':'-':xs) = []
rmcomments ('-':x :xs) = '-':rmcomments (x:xs)
rmcomments (x:xs) = x:rmcomments xs
-- | Prepare lines obtained from a configuration file for labels for
-- use with 'graphvizDependencyTree'. Format per line /fun/ /label/@*@.
getDepLabels :: String -> Labels
getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map words (lines s)]
-- getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map words (lines s)]
getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map (words . rmcomments) (lines s)]
-- the old function, without dependencies
graphvizParseTree :: PGF -> Language -> GraphvizOptions -> Tree -> String
@@ -291,13 +301,13 @@ graphvizBracketedString opts mbl tree bss = render graphviz_code
getInternals [] = []
getInternals nodes
= nub [(parent, fid, mkNode fun cat) |
(parent, Bracket cat fid _ fun _ _) <- nodes]
(parent, Bracket cat fid _ _ fun _ _) <- nodes]
: getInternals [(fid, child) |
(_, Bracket _ fid _ _ _ children) <- nodes,
(_, Bracket _ fid _ _ _ _ children) <- nodes,
child <- children]
getLeaves cat parent (Leaf word) = [(parent, (cat, word))] -- the lowest cat before the word
getLeaves _ parent (Bracket cat fid i _ _ children)
getLeaves _ parent (Bracket cat fid _ i _ _ children)
= concatMap (getLeaves cat fid) children
mkLevel nodes
@@ -401,8 +411,8 @@ genPreAlignment pgf langs = lin2align . linsBracketed
getLeaves parent bs =
case bs of
Leaf w -> [(parent,w)]
Bracket _ fid _ _ _ bss -> concatMap (getLeaves fid) bss
Leaf w -> [(parent,w)]
Bracket _ fid _ _ _ _ bss -> concatMap (getLeaves fid) bss
mkLayers (cs:css:rest) = let (lrest, rrest) = mkLayers (css:rest)
in ((fields cs) : lrest, (map (mkLinks css) cs) : rrest)
@@ -512,7 +522,7 @@ conll2latex' = dep2latex . conll2dep'
data Dep = Dep {
wordLength :: Int -> Double -- length of word at position int -- was: fixed width, millimetres (>= 20.0)
, tokens :: [(String,String)] -- word, pos (0..)
, tokens :: [(String,(String,String))] -- word, (pos,features) (0..)
, deps :: [((Int,Int),String)] -- from, to, label
, root :: Int -- root word position
}
@@ -552,7 +562,8 @@ dep2latex d =
[Comment (unwords (map fst (tokens d))),
Picture defaultUnit (width,height) (
[Put (wpos rwld i,0) (Text w) | (i,w) <- zip [0..] (map fst (tokens d))] -- words
++ [Put (wpos rwld i,15) (TinyText w) | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom
++ [Put (wpos rwld i,15) (TinyText w) | (i,(w,_)) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom
--- ++ [Put (wpos rwld i,-15) (TinyText w) | (i,(_,w)) <- zip [0..] (map snd (tokens d))] -- features 15u below bottom -> DON'T SHOW
++ concat [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d] -- arcs and labels
++ [Put (wpos rwld (root d) + 15,height) (ArrowDown (height-arcbase))]
++ [Put (wpos rwld (root d) + 20,height - 10) (TinyText "ROOT")]
@@ -583,8 +594,8 @@ conll2dep' ls = Dep {
, root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1]
}
where
wld i = maximum (0:[charWidth * fromIntegral (length w) | w <- let (tok,pos) = toks !! i in [tok,pos]])
toks = [(w,c) | _:w:_:c:_ <- ls]
wld i = maximum (0:[charWidth * fromIntegral (length w) | w <- let (tok,(pos,feat)) = toks !! i in [tok,pos {-,feat-}]]) --- feat not shown
toks = [(w,(c,m)) | _:w:_:c:_:m:_ <- ls]
dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"]
--maxdist = maximum [abs (x-y) | ((x,y),_) <- dps]
@@ -749,18 +760,26 @@ ppSVG svg =
-- UseComp {"not"} PART neg head
-- UseComp {*} AUX cop head
type CncLabels = [(String, String -> Maybe (String -> String,String,String))]
-- (fun, word -> (pos,label,target))
-- the pos can remain unchanged, as in the current notation in the article
type CncLabels = [
Either
(String, String -> Maybe (String -> String,String,String))
-- (fun, word -> (pos,label,target))
-- the pos can remain unchanged, as in the current notation in the article
(String,[String])
-- (category, morphological forms)
]
fixCoNLL :: CncLabels -> CoNLL -> CoNLL
fixCoNLL labels conll = map fixc conll where
fixCoNLL cncLabels conll = map fixc conll where
labels = [l | Left l <- cncLabels]
flabels = [r | Right r <- cncLabels]
fixc row = case row of
(i:word:fun:pos:cat:x_:"0":"dep":xs) -> (i:word:fun:pos:cat:x_:"0":"root":xs) --- change the root label from dep to root
(i:word:fun:pos:cat:x_:"0":"dep":xs) -> (i:word:fun:pos:cat:(feat cat word x_):"0":"root":xs) --- change the root label from dep to root
(i:word:fun:pos:cat:x_:j:label:xs) -> case look (fun,word) of
Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:x_:j :label':xs)
Just (pos',label',target) -> (i:word:fun:pos' pos:cat:x_: getDep j target:label':xs)
_ -> row
Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:(feat cat word x_):j :label':xs)
Just (pos',label',target) -> (i:word:fun:pos' pos:cat:(feat cat word x_): getDep j target:label':xs)
_ -> (i:word:fun:pos:cat:(feat cat word x_):j:label:xs)
_ -> row
look (fun,word) = case lookup fun labels of
@@ -775,16 +794,48 @@ fixCoNLL labels conll = map fixc conll where
getDep j label = maybe j id $ lookup (label,j) [((label,j),i) | i:word:fun:pos:cat:x_:j:label:xs <- conll]
feat cat word x = case lookup cat flabels of
Just tags | all isDigit x && length tags > read x -> tags !! read x
_ -> case lookup (show word) flabels of
Just (t:_) -> t
_ -> cat ++ "-" ++ x
getCncDepLabels :: String -> CncLabels
getCncDepLabels = map merge . groupBy (\ (x,_) (a,_) -> x == a) . concatMap analyse . filter choose . lines where
getCncDepLabels s = wlabels ws ++ flabels fs
where
wlabels =
map Left .
map merge .
groupBy (\ (x,_) (a,_) -> x == a) .
sortBy (comparing fst) .
concatMap analyse .
filter chooseW
flabels =
map Right .
map collectTags .
map words
(fs,ws) = partition chooseF $ map uncomment $ lines s
--- choose is for compatibility with the general notation
choose line = notElem '(' line && elem '{' line --- ignoring non-local (with "(") and abstract (without "{") rules
chooseW line = notElem '(' line &&
elem '{' line
--- ignoring non-local (with "(") and abstract (without "{") rules
---- TODO: this means that "(" cannot be a token
chooseF line = take 1 line == "@" --- feature assignments have the form e.g. @N SgNom SgGen ; no spaces inside tags
uncomment line = case line of
'-':'-':_ -> ""
c:cs -> c : uncomment cs
_ -> line
analyse line = case break (=='{') line of
(beg,_:ws) -> case break (=='}') ws of
(toks,_:target) -> case (words beg, words target) of
(fun:_,[ label,j]) -> [(fun, (tok, (id, label,j))) | tok <- getToks toks]
(fun:_,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | tok <- getToks toks]
(toks,_:target) -> case (getToks beg, words target) of
(funs,[ label,j]) -> [(fun, (tok, (id, label,j))) | fun <- funs, tok <- getToks toks]
(funs,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | fun <- funs, tok <- getToks toks]
_ -> []
_ -> []
_ -> []
@@ -793,8 +844,13 @@ getCncDepLabels = map merge . groupBy (\ (x,_) (a,_) -> x == a) . concatMap ana
Just new -> return new
_ -> lookup "*" (map snd rules)
)
getToks = words . map (\c -> if elem c "\"," then ' ' else c)
getToks = map unquote . filter (/=",") . toks
toks s = case lex s of [(t,"")] -> [t] ; [(t,cc)] -> t:toks cc ; _ -> []
unquote s = case s of '"':cc@(_:_) | last cc == '"' -> init cc ; _ -> s
collectTags (w:ws) = (tail w,ws)
-- added init to remove the last \n. otherwise, two empty lines are in between each sentence PK 17/12/2018
printCoNLL :: CoNLL -> String
printCoNLL = unlines . map (concat . intersperse "\t")
printCoNLL = init . unlines . map (concat . intersperse "\t")

View File

@@ -1,5 +1,5 @@
name: pgf
version: 3.9-git
version: 3.10
cabal-version: >= 1.20
build-type: Simple
@@ -12,11 +12,6 @@ bug-reports: https://github.com/GrammaticalFramework/GF/issues
maintainer: Thomas Hallgren
tested-with: GHC==7.6.3, GHC==7.8.3, GHC==7.10.3, GHC==8.0.2
flag custom-binary
Description: Use a customised version of the binary package
Default: True
Manual: True
Library
default-language: Haskell2010
build-depends: base >= 4.6 && <5,
@@ -29,18 +24,14 @@ Library
mtl,
exceptions
if flag(custom-binary)
hs-source-dirs: ., binary
other-modules:
-- not really part of GF but I have changed the original binary library
-- and we have to keep the copy for now.
Data.Binary
Data.Binary.Put
Data.Binary.Get
Data.Binary.Builder
Data.Binary.IEEE754
else
build-depends: binary, data-binary-ieee754
other-modules:
-- not really part of GF but I have changed the original binary library
-- and we have to keep the copy for now.
Data.Binary
Data.Binary.Put
Data.Binary.Get
Data.Binary.Builder
Data.Binary.IEEE754
--ghc-options: -fwarn-unused-imports
--if impl(ghc>=7.8)

View File

@@ -1,29 +1,37 @@
INSTALL_PATH = /usr/local
C_SOURCES = jpgf.c jsg.c jni_utils.c
JAVA_SOURCES = $(wildcard org/grammaticalframework/pgf/*.java) \
$(wildcard org/grammaticalframework/sg/*.java)
JNI_INCLUDES = $(if $(wildcard /usr/lib/jvm/default-java/include/.*), -I/usr/lib/jvm/default-java/include -I/usr/lib/jvm/default-java/include/linux, \
$(if $(wildcard /System/Library/Frameworks/JavaVM.framework/Versions/A/Headers/.*), -I/System/Library/Frameworks/JavaVM.framework/Versions/A/Headers, \
$(if $(wildcard /Library/Java/Home/include/.*), -I/Library/Java/Home/include/ -I/Library/Java/Home/include/darwin, \
$(error No JNI headers found))))
$(if $(wildcard /usr/lib/jvm/java-1.11.0-openjdk-amd64/include/.*), -I/usr/lib/jvm/java-1.11.0-openjdk-amd64/include/ -I/usr/lib/jvm/java-1.11.0-openjdk-amd64/include/linux, \
$(if $(wildcard /System/Library/Frameworks/JavaVM.framework/Versions/A/Headers/.*), -I/System/Library/Frameworks/JavaVM.framework/Versions/A/Headers, \
$(if $(wildcard /Library/Java/Home/include/.*), -I/Library/Java/Home/include/ -I/Library/Java/Home/include/darwin, \
$(error No JNI headers found)))))
# For Windows replace the previous line with something like this:
# For compilation on Windows replace the previous line with something like this:
#
# JNI_INCLUDES = -I "C:/Program Files/Java/jdk1.8.0_171/include" -I "C:/Program Files/Java/jdk1.8.0_171/include/win32" -I "C:/MinGW/msys/1.0/local/include"
# WINDOWS_FLAGS = -L"C:/MinGW/msys/1.0/local/lib" -no-undefined
# WINDOWS_LDFLAGS = -L"C:/MinGW/msys/1.0/local/lib" -no-undefined
INSTALL_PATH = /usr/local/lib
LIBTOOL = glibtool --tag=CC
GCC = gcc
LIBTOOL = $(if $(shell command -v glibtool 2>/dev/null), glibtool, libtool) --tag=CC
LIBTOOL = $(if $(shell command -v glibtool 2>/dev/null), glibtool --tag=CC, libtool)
# For cross-compilation from Linux to Windows replace the previous two lines with:
#
# GCC = x86_64-w64-mingw32-gcc
# LIBTOOL = ../c/libtool
# WINDOWS_CCFLAGS = -I$(INSTALL_PATH)/include
# WINDOWS_LDFLAGS = -L$(INSTALL_PATH)/lib -no-undefined
all: libjpgf.la jpgf.jar
libjpgf.la: $(patsubst %.c, %.lo, $(C_SOURCES))
$(LIBTOOL) --mode=link gcc $(CFLAGS) -g -O -o libjpgf.la -shared $^ -rpath $(INSTALL_PATH) -lgu -lpgf -lsg $(WINDOWS_FLAGS)
$(LIBTOOL) --mode=link $(GCC) $(CFLAGS) -g -O -o libjpgf.la -shared $^ -rpath $(INSTALL_PATH)/lib -lgu -lpgf -lsg $(WINDOWS_LDFLAGS)
%.lo : %.c
$(LIBTOOL) --mode=compile gcc $(CFLAGS) -g -O -c $(JNI_INCLUDES) -std=c99 -shared $< -o $@
$(LIBTOOL) --mode=compile $(GCC) $(CFLAGS) -g -O -c $(JNI_INCLUDES) $(WINDOWS_CCFLAGS) -std=c99 -shared $< -o $@
jpgf.jar: $(patsubst %.java, %.class, $(JAVA_SOURCES))
jar -cf $@ org/grammaticalframework/pgf/*.class org/grammaticalframework/sg/*.class
@@ -32,8 +40,8 @@ jpgf.jar: $(patsubst %.java, %.class, $(JAVA_SOURCES))
javac $<
install: libjpgf.la jpgf.jar
$(LIBTOOL) --mode=install install -s libjpgf.la $(INSTALL_PATH)
install jpgf.jar $(INSTALL_PATH)
$(LIBTOOL) --mode=install install -s libjpgf.la $(INSTALL_PATH)/lib
install jpgf.jar $(INSTALL_PATH)/lib
doc:

View File

@@ -0,0 +1,4 @@
# Deprecation notice
As of June 2019, this JavaScript version of the GF runtime is considered deprecated,
in favour of the TypeScript version in <https://github.com/GrammaticalFramework/gf-typescript>.

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@@ -32,7 +32,7 @@
</script>
<title>Web-based GF Translator</title>
</head>
<body onload="populateLangs(Food, 'fromLang', 'toLang')">
<body onload="populateLangs(grammar, 'fromLang', 'toLang')">
<form id="translate">
<p>
<input type="text" name="inputText" id="inputText" value="this cheese is warm" size="50" />

View File

@@ -0,0 +1,7 @@
# Project moved
The GF TypeScript runtime has been moved to the repository:
<https://github.com/GrammaticalFramework/gf-typescript>
If you are looking for an updated version of the JavaScript runtime,
you should also look there.

View File

@@ -1,337 +0,0 @@
/**
* gflib.dt.s
*
* by John J. Camilleri
*
* TypeScript type definitions for the "original" JS GF runtime (GF:src/runtime/javascript/gflib.js)
*/
// Note: the String prototype is extended with:
// String.prototype.tag = "";
// String.prototype.setTag = function (tag) { this.tag = tag; };
/**
* A GF grammar is one abstract and multiple concretes
*/
declare class GFGrammar {
abstract: GFAbstract
concretes: {[key: string]: GFConcrete}
constructor(abstract: GFAbstract, concretes: {[key: string]: GFConcrete})
translate(
input: string,
fromLang: string,
toLang: string
): {[key: string]: {[key: string]: string}}
}
/**
* Abstract Syntax Tree
*/
declare class Fun {
name: string
args: Fun[]
constructor(name: string, ...args: Fun[])
print(): string
show(): string
getArg(i: number): Fun
setArg(i: number, c: Fun): void
isMeta(): boolean
isComplete(): boolean
isLiteral(): boolean
isString(): boolean
isInt(): boolean
isFloat(): boolean
isEqual(obj: any): boolean
}
/**
* Abstract syntax
*/
declare class GFAbstract {
startcat: string
types: {[key: string]: Type} // key is function name
constructor(startcat: string, types: {[key: string]: Type})
addType(fun: string, args: string[], cat: string): void
getArgs(fun: string): string[]
getCat(fun: string): string
annotate(tree: Fun, type: string): Fun
handleLiterals(tree: Fun, type: Type): Fun
copyTree(x: Fun): Fun
parseTree(str: string, type: string): Fun
parseTree_(tokens: string[], prec: number): Fun
}
/**
* Type
*/
declare class Type {
args: string[]
cat: string
constructor(args: string[], cat: string)
}
type ApplyOrCoerce = Apply | Coerce
/**
* Concrete syntax
*/
declare class GFConcrete {
flags: {[key: string]: string}
productions: {[key: number]: ApplyOrCoerce[]}
functions: CncFun[]
sequences: Array<Array<Sym>>
startCats: {[key: string]: {s: number, e: number}}
totalFIds: number
pproductions: {[key: number]: ApplyOrCoerce[]}
lproductions: {[key: string]: {fid: FId, fun: CncFun}}
constructor(
flags: {[key: string]: string},
productions: {[key: number]: ApplyOrCoerce[]},
functions: CncFun[],
sequences: Array<Array<Sym>>,
startCats: {[key: string]: {s: number, e: number}},
totalFIds: number
)
linearizeSyms(tree: Fun, tag: string): Array<{fid: FId, table: any}>
syms2toks(syms: Sym[]): string[]
linearizeAll(tree: Fun): string[]
linearize(tree: Fun): string
tagAndLinearize(tree: Fun): string[]
unlex(ts: string): string
tagIt(obj: any, tag: string): any
// showRules(): string // Uncaught TypeError: Cannot read property 'length' of undefined at gflib.js:451
tokenize(string: string): string[]
parseString(string: string, cat: string): Fun[]
complete(
input: string,
cat: string
): {consumed: string[], suggestions: string[]}
}
/**
* Function ID
*/
type FId = number
/**
* Apply
*/
declare class Apply {
id: string
fun: FId
args: PArg[]
constructor(fun: FId, args: PArg[])
show(cat: string): string
isEqual(obj: any): boolean
}
/**
* PArg
*/
declare class PArg {
fid: FId
hypos: any[]
constructor(fid: FId, ...hypos: any[])
}
/**
* Coerce
*/
declare class Coerce {
id: string
arg: FId
constructor(arg: FId)
show(cat: string): string
}
/**
* Const
*/
declare class Const {
id: string
lit: Fun
toks: any[]
constructor(lit: Fun, toks: any[])
show(cat: string): string
isEqual(obj: any): boolean
}
/**
* CncFun
*/
declare class CncFun {
name: string
lins: FId[]
constructor(name: string, lins: FId[])
}
type Sym = SymCat | SymKS | SymKP | SymLit
/**
* SymCat
*/
declare class SymCat {
id: string
i: number
label: number
constructor(i: number, label: number)
getId(): string
getArgNum(): number
show(): string
}
/**
* SymKS
*/
declare class SymKS {
id: string
tokens: string[]
constructor(...tokens: string[])
getId(): string
show(): string
}
/**
* SymKP
*/
declare class SymKP {
id: string
tokens: string[]
alts: Alt[]
constructor(tokens: string[], alts: Alt[])
getId(): string
show(): string
}
/**
* Alt
*/
declare class Alt {
tokens: string[]
prefixes: string[]
constructor(tokens: string[], prefixes: string[])
}
/**
* SymLit
*/
declare class SymLit {
id: string
i: number
label: number
constructor(i: number, label: number)
getId(): string
show(): string
}
/**
* Trie
*/
declare class Trie {
value: any
items: Trie[]
insertChain(keys, obj): void
insertChain1(keys, obj): void
lookup(key, obj): any
isEmpty(): boolean
}
/**
* ParseState
*/
declare class ParseState {
concrete: GFConcrete
startCat: string
items: Trie
chart: Chart
constructor(concrete: GFConcrete, startCat: string)
next(token: string): boolean
complete(correntToken: string): Trie
extractTrees(): any[]
process(
agenda,
literalCallback: (fid: FId) => any,
tokenCallback: (tokens: string[], item: any) => any
): void
}
/**
* Chart
*/
declare class Chart {
active: any
actives: {[key: number]: any}
passive: any
forest: {[key: number]: ApplyOrCoerce[]}
nextId: number
offset: number
constructor(concrete: GFConcrete)
lookupAC(fid: FId,label)
lookupACo(offset, fid: FId, label)
labelsAC(fid: FId)
insertAC(fid: FId, label, items): void
lookupPC(fid: FId, label, offset)
insertPC(fid1: FId, label, offset, fid2: FId): void
shift(): void
expandForest(fid: FId): any[]
}
/**
* ActiveItem
*/
declare class ActiveItem {
offset: number
dot: number
fun: CncFun
seq: Array<Sym>
args: PArg[]
fid: FId
lbl: number
constructor(
offset: number,
dot: number,
fun: CncFun,
seq: Array<Sym>,
args: PArg[],
fid: FId,
lbl: number
)
isEqual(obj: any): boolean
shiftOverArg(i: number, fid: FId): ActiveItem
shiftOverTokn(): ActiveItem
}