GuString is now an ordinary C string - it makes live easier. In addition PgfSymbolKS, PgfExprFun and PgfLiteralStr now keep their strings as embedded flexible arrays. The latest change gives us the same compactness as the old representation but it is a lot easier to use.

This commit is contained in:
kr.angelov
2013-10-04 12:04:39 +00:00
parent 27091048ce
commit e8335806af
25 changed files with 412 additions and 779 deletions

View File

@@ -4,11 +4,13 @@
#include <gu/map.h>
#include <gu/assert.h>
#include <gu/prime.h>
#include <gu/string.h>
typedef enum {
GU_MAP_GENERIC,
GU_MAP_ADDR,
GU_MAP_WORD
GU_MAP_WORD,
GU_MAP_STRING
} GuMapKind;
typedef struct GuMapData GuMapData;
@@ -66,6 +68,9 @@ gu_map_entry_is_free(GuMap* map, GuMapData* data, size_t idx)
} else if (map->kind == GU_MAP_WORD) {
GuWord key = ((GuWord*)data->keys)[idx];
return key == 0;
} else if (map->kind == GU_MAP_STRING) {
GuString key = ((GuString*)data->keys)[idx];
return key == NULL;
}
gu_assert(map->kind == GU_MAP_GENERIC);
const void* key = &data->keys[idx * map->key_size];
@@ -137,6 +142,27 @@ gu_map_lookup(GuMap* map, const void* key, size_t* idx_out)
gu_impossible();
break;
}
case GU_MAP_STRING: {
GuHasher* hasher = map->hasher;
GuEquality* eq = (GuEquality*) hasher;
GuHash hash = hasher->hash(hasher, key);
size_t idx = hash % n;
size_t offset = (hash % (n - 2)) + 1;
while (true) {
GuString entry_key =
((GuString*)map->data.keys)[idx];
if (entry_key == NULL && map->data.zero_idx != idx) {
*idx_out = idx;
return false;
} else if (eq->is_equal(eq, key, entry_key)) {
*idx_out = idx;
return true;
}
idx = (idx + offset) % n;
}
gu_impossible();
break;
}
default:
gu_impossible();
}
@@ -179,6 +205,11 @@ gu_map_resize(GuMap* map)
((const void**)data->keys)[i] = NULL;
}
break;
case GU_MAP_STRING:
for (size_t i = 0; i < data->n_entries; i++) {
((GuString*)data->keys)[i] = NULL;
}
break;
default:
gu_impossible();
}
@@ -195,6 +226,8 @@ gu_map_resize(GuMap* map)
void* old_key = &old_data.keys[i * key_size];
if (map->kind == GU_MAP_ADDR) {
old_key = *(void**)old_key;
} else if (map->kind == GU_MAP_STRING) {
old_key = (void*) *(GuString*)old_key;
}
void* old_value = &old_data.values[i * value_size];
@@ -268,6 +301,8 @@ gu_map_insert(GuMap* map, const void* key)
}
if (map->kind == GU_MAP_ADDR) {
((const void**)map->data.keys)[idx] = key;
} else if (map->kind == GU_MAP_STRING) {
((GuString*)map->data.keys)[idx] = key;
} else {
memcpy(&map->data.keys[idx * map->key_size],
key, map->key_size);
@@ -296,6 +331,8 @@ gu_map_iter(GuMap* map, GuMapItor* itor, GuExn* err)
void* value = &map->data.values[i * map->value_size];
if (map->kind == GU_MAP_ADDR) {
key = *(const void* const*) key;
} else if (map->kind == GU_MAP_STRING) {
key = *(GuString*) key;
}
itor->fn(itor, key, value, err);
}
@@ -323,8 +360,10 @@ gu_map_enum_next(GuEnum* self, void* to, GuPool* pool)
en->x.value = &en->ht->data.values[i * en->ht->value_size];
if (en->ht->kind == GU_MAP_ADDR) {
en->x.key = *(const void* const*) en->x.key;
} else if (en->ht->kind == GU_MAP_STRING) {
en->x.key = *(GuString*) en->x.key;
}
*((GuMapKeyValue**) to) = &en->x;
break;
}
@@ -365,10 +404,12 @@ gu_make_map(size_t key_size, GuHasher* hasher,
GuMapKind kind =
((!hasher || hasher == gu_addr_hasher)
? GU_MAP_ADDR
: (hasher == gu_string_hasher)
? GU_MAP_STRING
: (key_size == sizeof(GuWord) && hasher == gu_word_hasher)
? GU_MAP_WORD
: GU_MAP_GENERIC);
if (kind == GU_MAP_ADDR) {
if (kind == GU_MAP_ADDR || kind == GU_MAP_STRING) {
key_size = sizeof(GuWord);
}
GuMapData data = {

View File

@@ -427,13 +427,3 @@ gu_buf_out(GuBuf* buf, GuPool* pool)
GU_DEFINE_KIND(GuSeq, GuOpaque);
GU_DEFINE_KIND(GuBuf, abstract);
char*
gu_char_buf_str(GuCharBuf* chars, GuPool* pool)
{
size_t len = gu_buf_length(chars);
char* data = gu_buf_data(chars);
char* str = gu_new_str(len, pool);
memcpy(str, data, len);
return str;
}

View File

@@ -135,13 +135,6 @@ gu_buf_heapify(GuBuf *buf, GuOrder *order);
GuSeq*
gu_buf_freeze(GuBuf* buf, GuPool* pool);
typedef GuBuf GuCharBuf;
typedef GuBuf GuByteBuf;
char*
gu_char_buf_str(GuCharBuf* chars, GuPool* pool);
#endif // GU_SEQ_H_
#if defined(GU_OUT_H_) && !defined(GU_SEQ_H_OUT_)

View File

@@ -7,21 +7,17 @@
#include <gu/assert.h>
#include <stdlib.h>
const GuString gu_empty_string = { 1 };
struct GuStringBuf {
GuByteBuf* bbuf;
GuBuf* buf;
GuOut* out;
};
GuStringBuf*
gu_string_buf(GuPool* pool)
{
GuBuf* buf = gu_new_buf(uint8_t, pool);
GuOut* out = gu_buf_out(buf, pool);
GuStringBuf* sbuf = gu_new(GuStringBuf, pool);
sbuf->bbuf = buf;
sbuf->out = out;
sbuf->buf = gu_new_buf(char, pool);
sbuf->out = gu_buf_out(sbuf->buf, pool);
return sbuf;
}
@@ -31,176 +27,64 @@ gu_string_buf_out(GuStringBuf* sb)
return sb->out;
}
static GuString
gu_utf8_string(const uint8_t* buf, size_t sz, GuPool* pool)
{
if (sz < GU_MIN(sizeof(GuWord), 128)) {
GuWord w = 0;
for (size_t n = 0; n < sz; n++) {
w = w << 8 | buf[n];
}
w = w << 8 | (sz << 1) | 1;
return (GuString) { w };
}
uint8_t* p = NULL;
if (sz < 256) {
p = gu_malloc_aligned(pool, 1 + sz, 2);
p[0] = (uint8_t) sz;
} else {
p = gu_malloc_prefixed(pool, gu_alignof(size_t),
sizeof(size_t), 1, 1 + sz);
((size_t*) p)[-1] = sz;
p[0] = 0;
}
memcpy(&p[1], buf, sz);
return (GuString) { (GuWord) (void*) p };
}
GuString
gu_string_buf_freeze(GuStringBuf* sb, GuPool* pool)
{
gu_out_flush(sb->out, NULL);
uint8_t* data = gu_buf_data(sb->bbuf);
size_t len = gu_buf_length(sb->bbuf);
return gu_utf8_string(data, len, pool);
char* data = gu_buf_data(sb->buf);
size_t len = gu_buf_length(sb->buf);
char* p = gu_malloc_aligned(pool, len+1, 2);
memcpy(p, data, len);
p[len] = 0;
return p;
}
GuIn*
gu_string_in(GuString s, GuPool* pool)
{
GuWord w = s.w_;
uint8_t* buf = NULL;
size_t len = 0;
if (w & 1) {
len = (w & 0xff) >> 1;
buf = gu_new_n(uint8_t, len, pool);
for (int i = len - 1; i >= 0; i--) {
w >>= 8;
buf[i] = w & 0xff;
}
} else {
uint8_t* p = (void*) w;
len = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
buf = &p[1];
}
return gu_data_in(buf, len, pool);
}
static bool
gu_string_is_long(GuString s)
{
return !(s.w_ & 1);
}
bool
gu_string_is_stable(GuString s)
{
return !gu_string_is_long(s);
}
static size_t
gu_string_long_length(GuString s)
{
gu_assert(gu_string_is_long(s));
uint8_t* p = (void*) s.w_;
uint8_t len = p[0];
if (len > 0) {
return len;
}
return ((size_t*) p)[-1];
}
size_t
gu_string_length(GuString s)
{
if (gu_string_is_long(s)) {
return gu_string_long_length(s);
}
return (s.w_ & 0xff) >> 1;
}
static uint8_t*
gu_string_long_data(GuString s)
{
gu_require(gu_string_is_long(s));
uint8_t* p = (void*) s.w_;
return &p[1];
return gu_data_in((uint8_t*) s, strlen(s), pool);
}
GuString
gu_string_copy(GuString string, GuPool* pool)
{
if (gu_string_is_long(string)) {
uint8_t* data = gu_string_long_data(string);
size_t len = gu_string_long_length(string);
return gu_utf8_string(data, len, pool);
} else {
return string;
}
size_t len = strlen(string);
char* p = gu_malloc_aligned(pool, len+1, 2);
memcpy(p, string, len+1);
return p;
}
void
gu_string_write(GuString s, GuOut* out, GuExn* err)
{
GuWord w = s.w_;
uint8_t buf[sizeof(GuWord)];
uint8_t* src;
size_t sz;
if (w & 1) {
sz = (w & 0xff) >> 1;
gu_assert(sz <= sizeof(GuWord));
size_t i = sz;
while (i > 0) {
w >>= 8;
buf[--i] = w & 0xff;
}
src = buf;
} else {
uint8_t* p = (void*) w;
sz = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
src = &p[1];
}
gu_out_bytes(out, src, sz, err);
gu_out_bytes(out, (uint8_t*) s, strlen(s), err);
}
GuString
gu_string_read(size_t len, GuPool* pool, GuIn* in, GuExn* err)
{
uint8_t* buf = alloca(len*4);
uint8_t* p = buf;
char* buf = alloca(len*6+1);
char* p = buf;
for (size_t i = 0; i < len; i++) {
gu_in_utf8_buf(&p, in, err);
gu_in_utf8_buf((uint8_t**) &p, in, err);
}
return gu_utf8_string(buf, p-buf, pool);
*p++ = 0;
p = gu_malloc_aligned(pool, p-buf, 2);
strcpy(p, buf);
return p;
}
GuString
gu_string_read_latin1(size_t len, GuPool* pool, GuIn* in, GuExn* err)
{
if (len < GU_MIN(sizeof(GuWord), 128)) {
GuWord w = 0;
for (size_t n = 0; n < len; n++) {
w = w << 8 | gu_in_u8(in, err);
}
w = w << 8 | (len << 1) | 1;
return (GuString) { w };
}
uint8_t* p = NULL;
if (len < 256) {
p = gu_malloc_aligned(pool, 1 + len, 2);
p[0] = (uint8_t) len;
} else {
p = gu_malloc_prefixed(pool, gu_alignof(size_t),
sizeof(size_t), 1, 1 + len);
((size_t*) p)[-1] = len;
p[0] = 0;
}
gu_in_bytes(in, &p[1], len, err);
return (GuString) { (GuWord) (void*) p };
char* p = gu_malloc_aligned(pool, len+1, 2);
gu_in_bytes(in, (uint8_t*)p, len, err);
p[len] = 0;
return p;
}
GuString
@@ -226,52 +110,24 @@ gu_format_string(GuPool* pool, const char* fmt, ...)
return s;
}
GuString
gu_str_string(const char* str, GuPool* pool)
{
return gu_utf8_string((const uint8_t*) str, strlen(str), pool);
}
bool
gu_string_to_int(GuString s, int *res)
{
GuWord w = s.w_;
uint8_t buf[sizeof(GuWord)];
char* src;
size_t sz;
if (w & 1) {
sz = (w & 0xff) >> 1;
gu_assert(sz <= sizeof(GuWord));
size_t i = sz;
while (i > 0) {
w >>= 8;
buf[--i] = w & 0xff;
}
src = (char*) buf;
} else {
uint8_t* p = (void*) w;
sz = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
src = (char*) &p[1];
}
size_t i = 0;
bool neg = false;
if (src[i] == '-') {
if (*s == '-') {
neg = true;
i++;
s++;
}
if (i >= sz)
if (*s == 0)
return false;
int n = 0;
for (; i < sz; i++) {
if (src[i] < '0' || src[i] > '9')
for (; *s; s++) {
if (*s < '0' || *s > '9')
return false;
n = n * 10 + (src[i] - '0');
n = n * 10 + (*s - '0');
}
*res = neg ? -n : n;
@@ -281,54 +137,33 @@ gu_string_to_int(GuString s, int *res)
bool
gu_string_to_double(GuString s, double *res)
{
GuWord w = s.w_;
uint8_t buf[sizeof(GuWord)];
char* src;
size_t sz;
if (w & 1) {
sz = (w & 0xff) >> 1;
gu_assert(sz <= sizeof(GuWord));
size_t i = sz;
while (i > 0) {
w >>= 8;
buf[--i] = w & 0xff;
}
src = (char*) buf;
} else {
uint8_t* p = (void*) w;
sz = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
src = (char*) &p[1];
}
size_t i = 0;
bool neg = false;
bool dec = false;
double exp = 1;
if (src[i] == '-') {
if (*s == '-') {
neg = true;
i++;
s++;
}
if (i >= sz)
if (*s == 0)
return false;
double d = 0;
for (; i < sz; i++) {
if (src[i] == '.') {
for (; *s; s++) {
if (*s == '.') {
if (dec) return false;
dec = true;
continue;
}
if (src[i] < '0' || src[i] > '9')
if (*s < '0' || *s > '9')
return false;
if (dec) exp = exp * 10;
d = d * 10 + (src[i] - '0');
d = d * 10 + (*s - '0');
}
*res = (neg ? -d : d) / exp;
@@ -338,54 +173,18 @@ gu_string_to_double(GuString s, double *res)
bool
gu_string_is_prefix(GuString s1, GuString s2)
{
GuWord w1 = s1.w_;
uint8_t buf1[sizeof(GuWord)];
size_t sz1;
char* str1;
if (w1 & 1) {
sz1 = (w1 & 0xff) >> 1;
gu_assert(sz1 <= sizeof(GuWord));
size_t i = sz1;
while (i > 0) {
w1 >>= 8;
buf1[--i] = w1 & 0xff;
}
str1 = (char*) buf1;
} else {
uint8_t* p = (void*) w1;
sz1 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
str1 = (char*) &p[1];
}
size_t len1 = strlen(s1);
size_t len2 = strlen(s2);
GuWord w2 = s2.w_;
uint8_t buf2[sizeof(GuWord)];
size_t sz2;
char* str2;
if (w2 & 1) {
sz2 = (w2 & 0xff) >> 1;
gu_assert(sz2 <= sizeof(GuWord));
size_t i = sz2;
while (i > 0) {
w2 >>= 8;
buf2[--i] = w2 & 0xff;
}
str2 = (char*) buf2;
} else {
uint8_t* p = (void*) w2;
sz2 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
str2 = (char*) &p[1];
}
if (sz1 > sz2)
if (len1 > len2)
return false;
for (size_t sz = sz1; sz--; sz > 0) {
if (*str1 != *str2)
for (size_t len = len1; len--; len > 0) {
if (*s1 != *s2)
return false;
str1++;
str2++;
s1++;
s2++;
}
return true;
@@ -394,108 +193,23 @@ gu_string_is_prefix(GuString s1, GuString s2)
GuHash
gu_string_hash(GuHash h, GuString s)
{
if (s.w_ & 1) {
return h*101 + s.w_;
}
size_t len = gu_string_length(s);
uint8_t* data = gu_string_long_data(s);
return gu_hash_bytes(h, data, len);
}
bool
gu_string_eq(GuString s1, GuString s2)
{
if (s1.w_ == s2.w_) {
return true;
} else if (gu_string_is_long(s1) && gu_string_is_long(s2)) {
size_t len1 = gu_string_long_length(s1);
size_t len2 = gu_string_long_length(s2);
if (len1 != len2) {
return false;
}
uint8_t* data1 = gu_string_long_data(s1);
uint8_t* data2 = gu_string_long_data(s2);
return (memcmp(data1, data2, len1) == 0);
}
return false;
return gu_hash_bytes(h, (uint8_t*)s, strlen(s));
}
static bool
gu_string_eq_fn(GuEquality* self, const void* p1, const void* p2)
{
(void) self;
const GuString* sp1 = p1;
const GuString* sp2 = p2;
return gu_string_eq(*sp1, *sp2);
return strcmp((GuString) p1, (GuString) p2) == 0;
}
GuEquality gu_string_equality[1] = { { gu_string_eq_fn } };
int
gu_string_cmp(GuString s1, GuString s2)
{
uint8_t buf1[sizeof(GuWord)];
char* src1;
size_t sz1;
if (s1.w_ & 1) {
sz1 = (s1.w_ & 0xff) >> 1;
gu_assert(sz1 <= sizeof(GuWord));
size_t i = sz1;
while (i > 0) {
s1.w_ >>= 8;
buf1[--i] = s1.w_ & 0xff;
}
src1 = (char*) buf1;
} else {
uint8_t* p = (void*) s1.w_;
sz1 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
src1 = (char*) &p[1];
}
uint8_t buf2[sizeof(GuWord)];
char* src2;
size_t sz2;
if (s2.w_ & 1) {
sz2 = (s2.w_ & 0xff) >> 1;
gu_assert(sz2 <= sizeof(GuWord));
size_t i = sz2;
while (i > 0) {
s2.w_ >>= 8;
buf2[--i] = s2.w_ & 0xff;
}
src2 = (char*) buf2;
} else {
uint8_t* p = (void*) s2.w_;
sz2 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
src2 = (char*) &p[1];
}
for (size_t i = 0; ; i++) {
if (sz1 == i && i == sz2)
break;
if (sz1 <= i)
return -1;
if (i >= sz2)
return 1;
if (src1[i] > src2[i])
return 1;
else if (src1[i] < src2[i])
return -1;
}
return 0;
}
static int
gu_string_cmp_fn(GuOrder* self, const void* p1, const void* p2)
{
(void) self;
const GuString* sp1 = p1;
const GuString* sp2 = p2;
return gu_string_cmp(*sp1, *sp2);
return strcmp((GuString) p1, (GuString) p2);
}
GuOrder gu_string_order[1] = { { gu_string_cmp_fn } };
@@ -504,8 +218,7 @@ static GuHash
gu_string_hasher_hash(GuHasher* self, const void* p)
{
(void) self;
const GuString* sp = p;
return gu_string_hash(0, *sp);
return gu_string_hash(0, (GuString) p);
}
GuHasher gu_string_hasher[1] = {
@@ -516,5 +229,5 @@ GuHasher gu_string_hasher[1] = {
};
GU_DEFINE_TYPE(GuString, GuOpaque, _);
GU_DEFINE_KIND(GuString, pointer);
GU_DEFINE_KIND(GuStringMap, GuMap);

View File

@@ -1,22 +1,3 @@
/*
* Copyright 2011 University of Helsinki.
*
* This file is part of libgu.
*
* Libgu is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation, either version 3 of the License, or (at your
* option) any later version.
*
* Libgu is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with libgu. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef GU_STRING_H_
#define GU_STRING_H_
@@ -24,9 +5,7 @@
#include <gu/in.h>
#include <gu/out.h>
typedef GuOpaque() GuString;
extern const GuString gu_empty_string;
typedef const char* GuString;
GuString
gu_string_copy(GuString string, GuPool* pool);
@@ -43,12 +22,6 @@ gu_string_read_latin1(size_t len, GuPool* pool, GuIn* in, GuExn* err);
GuIn*
gu_string_in(GuString string, GuPool* pool);
bool
gu_string_is_stable(GuString string);
GuString
gu_ucs_string(const GuUCS* ubuf, size_t len, GuPool* pool);
typedef struct GuStringBuf GuStringBuf;
GuStringBuf*
@@ -66,34 +39,21 @@ gu_format_string_v(const char* fmt, va_list args, GuPool* pool);
GuString
gu_format_string(GuPool* pool, const char* fmt, ...);
GuString
gu_str_string(const char* str, GuPool* pool);
bool
gu_string_to_int(GuString s, int *res);
bool
gu_string_to_double(GuString s, double *res);
bool
gu_string_is_prefix(GuString s1, GuString s2);
size_t
gu_string_length(GuString s);
#endif // GU_STRING_H_
#if defined(GU_FUN_H_) && !defined(GU_STRING_H_FUN_)
#define GU_STRING_H_FUN_
bool
gu_string_eq(GuString s1, GuString s2);
extern GuEquality gu_string_equality[1];
int
gu_string_cmp(GuString s1, GuString s2);
extern GuOrder gu_string_order[1];
#endif
@@ -110,7 +70,7 @@ extern GuHasher gu_string_hasher[1];
# ifndef GU_STRING_H_TYPE_
# define GU_STRING_H_TYPE_
extern GU_DECLARE_TYPE(GuString, GuOpaque);
extern GU_DECLARE_KIND(GuString);
# endif
# if defined(GU_MAP_H_TYPE_) && !defined(GU_STRING_H_MAP_TYPE_)
@@ -132,8 +92,6 @@ typedef GuType_GuMap GuType_GuStringMap;
#define GU_STRING_H_SEQ_
typedef GuSeq GuStrings;
// typedef GuBuf GuStringBuf;
#endif

View File

@@ -15,7 +15,7 @@ pgf_tokens_equal(PgfTokens* t1, PgfTokens* t2)
for (size_t i = 0; i < len1; i++) {
GuString s1 = gu_seq_get(t1, PgfToken, i);
GuString s2 = gu_seq_get(t2, PgfToken, i);
if (!gu_string_eq(s1, s2)) {
if (strcmp(s1, s2) != 0) {
return false;
}
}
@@ -51,7 +51,9 @@ GU_DEFINE_TYPE(PgfMetaChildMap, GuMap,
GU_DEFINE_TYPE(PgfAbsCat, abstract);
static GuString empty_string = "";
GU_DEFINE_TYPE(
PgfPrintNames, PgfCIdMap, gu_type(GuString), &gu_empty_string);
PgfPrintNames, PgfCIdMap, gu_type(GuString), &empty_string);
GU_DEFINE_TYPE(PgfConcr, abstract);

View File

@@ -190,7 +190,7 @@ typedef struct {
typedef PgfSymbolIdx PgfSymbolCat, PgfSymbolLit, PgfSymbolVar;
typedef struct {
PgfToken token;
char token[0]; // a flexible array that contains the token
} PgfSymbolKS;
typedef struct PgfSymbolKP

View File

@@ -152,7 +152,7 @@ struct PgfExprParser {
GuPool* expr_pool;
GuPool* tmp_pool;
PGF_TOKEN_TAG token_tag;
GuCharBuf* token_value;
GuBuf* token_value;
int ch;
};
@@ -232,7 +232,7 @@ pgf_expr_parser_token(PgfExprParser* parser)
parser->token_tag = PGF_TOKEN_WILD;
break;
default: {
GuCharBuf* chars = gu_new_buf(char, parser->tmp_pool);
GuBuf* chars = gu_new_buf(char, parser->tmp_pool);
if (isalpha(parser->ch)) {
while (isalnum(parser->ch) ||
@@ -241,6 +241,7 @@ pgf_expr_parser_token(PgfExprParser* parser)
gu_buf_push(chars, char, parser->ch);
pgf_expr_parser_getc(parser);
}
gu_buf_push(chars, char, 0);
parser->token_tag = PGF_TOKEN_IDENT;
parser->token_value = chars;
} else if (isdigit(parser->ch)) {
@@ -257,9 +258,11 @@ pgf_expr_parser_token(PgfExprParser* parser)
gu_buf_push(chars, char, parser->ch);
pgf_expr_parser_getc(parser);
}
gu_buf_push(chars, char, 0);
parser->token_tag = PGF_TOKEN_FLT;
parser->token_value = chars;
} else {
gu_buf_push(chars, char, 0);
parser->token_tag = PGF_TOKEN_INT;
parser->token_value = chars;
}
@@ -273,6 +276,7 @@ pgf_expr_parser_token(PgfExprParser* parser)
if (parser->ch == '"') {
pgf_expr_parser_getc(parser);
gu_buf_push(chars, char, 0);
parser->token_tag = PGF_TOKEN_STR;
parser->token_value = chars;
}
@@ -341,18 +345,20 @@ pgf_expr_parser_term(PgfExprParser* parser)
0);
}
case PGF_TOKEN_IDENT: {
char* str =
gu_char_buf_str(parser->token_value, parser->tmp_pool);
PgfCId id = gu_str_string(str, parser->expr_pool);
PgfCId id = gu_buf_data(parser->token_value);
pgf_expr_parser_token(parser);
return gu_new_variant_i(parser->expr_pool,
PGF_EXPR_FUN,
PgfExprFun,
id);
PgfExpr e;
PgfExprFun* fun =
gu_new_flex_variant(PGF_EXPR_FUN,
PgfExprFun,
fun, strlen(id)+1,
&e, parser->expr_pool);
strcpy(fun->fun, id);
return e;
}
case PGF_TOKEN_INT: {
char* str =
gu_char_buf_str(parser->token_value, parser->tmp_pool);
gu_buf_data(parser->token_value);
int n = atoi(str);
pgf_expr_parser_token(parser);
PgfLiteral lit =
@@ -367,22 +373,23 @@ pgf_expr_parser_term(PgfExprParser* parser)
}
case PGF_TOKEN_STR: {
char* str =
gu_char_buf_str(parser->token_value, parser->tmp_pool);
GuString s = gu_str_string(str, parser->expr_pool);
gu_buf_data(parser->token_value);
pgf_expr_parser_token(parser);
PgfLiteral lit =
gu_new_variant_i(parser->expr_pool,
PGF_LITERAL_STR,
PgfLiteralStr,
s);
PgfLiteral lit;
PgfLiteralStr* plit =
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, strlen(str)+1,
&lit, parser->expr_pool);
strcpy(plit->val, str);
return gu_new_variant_i(parser->expr_pool,
PGF_EXPR_LIT,
PgfExprLit,
lit);
PGF_EXPR_LIT,
PgfExprLit,
lit);
}
case PGF_TOKEN_FLT: {
char* str =
gu_char_buf_str(parser->token_value, parser->tmp_pool);
gu_buf_data(parser->token_value);
double d = atof(str);
pgf_expr_parser_token(parser);
PgfLiteral lit =
@@ -442,12 +449,11 @@ pgf_expr_parser_bind(PgfExprParser* parser, GuBuf* binds)
for (;;) {
if (parser->token_tag == PGF_TOKEN_IDENT) {
char* str =
gu_char_buf_str(parser->token_value, parser->tmp_pool);
var = gu_str_string(str, parser->expr_pool);
var =
gu_string_copy(gu_buf_data(parser->token_value), parser->expr_pool);
pgf_expr_parser_token(parser);
} else if (parser->token_tag == PGF_TOKEN_WILD) {
var = gu_str_string("_", parser->expr_pool);
var = "_";
pgf_expr_parser_token(parser);
} else {
return false;
@@ -562,12 +568,11 @@ pgf_expr_parser_hypos(PgfExprParser* parser, GuBuf* hypos)
}
if (parser->token_tag == PGF_TOKEN_IDENT) {
char* str =
gu_char_buf_str(parser->token_value, parser->tmp_pool);
var = gu_str_string(str, parser->expr_pool);
var =
gu_string_copy(gu_buf_data(parser->token_value), parser->expr_pool);
pgf_expr_parser_token(parser);
} else if (parser->token_tag == PGF_TOKEN_WILD) {
var = gu_str_string("_", parser->expr_pool);
var = "_";
pgf_expr_parser_token(parser);
} else {
return false;
@@ -603,9 +608,8 @@ pgf_expr_parser_atom(PgfExprParser* parser)
if (parser->token_tag != PGF_TOKEN_IDENT)
return NULL;
char* str =
gu_char_buf_str(parser->token_value, parser->tmp_pool);
PgfCId cid = gu_str_string(str, parser->expr_pool);
PgfCId cid =
gu_string_copy(gu_buf_data(parser->token_value), parser->expr_pool);
pgf_expr_parser_token(parser);
GuBuf* args = gu_new_buf(PgfExpr, parser->tmp_pool);
@@ -663,7 +667,7 @@ pgf_expr_parser_type(PgfExprParser* parser)
} else {
PgfHypo* hypo = gu_buf_extend(hypos);
hypo->bind_type = PGF_BIND_TYPE_EXPLICIT;
hypo->cid = gu_str_string("_", parser->expr_pool);
hypo->cid = "_";
hypo->type = NULL;
}
@@ -699,7 +703,7 @@ pgf_expr_parser_type(PgfExprParser* parser)
PgfHypo* hypo = gu_buf_extend(hypos);
hypo->bind_type = PGF_BIND_TYPE_EXPLICIT;
hypo->cid = gu_str_string("_", parser->expr_pool);
hypo->cid = "_";
hypo->type = type;
}
}
@@ -761,7 +765,7 @@ pgf_literal_eq(PgfLiteral lit1, PgfLiteral lit2)
case PGF_LITERAL_STR: {
PgfLiteralStr* lit1 = ei1.data;
PgfLiteralStr* lit2 = ei2.data;
return gu_string_eq(lit1->val, lit2->val);
return strcmp(lit1->val, lit2->val) == 0;
}
case PGF_LITERAL_INT: {
PgfLiteralInt* lit1 = ei1.data;
@@ -793,7 +797,7 @@ pgf_expr_eq(PgfExpr e1, PgfExpr e2)
case PGF_EXPR_ABS: {
PgfExprAbs* abs1 = ei1.data;
PgfExprAbs* abs2 = ei2.data;
return gu_string_eq(abs1->id, abs2->id) &&
return strcmp(abs1->id, abs2->id) == 0 &&
pgf_expr_eq(abs1->body, abs2->body);
}
case PGF_EXPR_APP: {
@@ -815,7 +819,7 @@ pgf_expr_eq(PgfExpr e1, PgfExpr e2)
case PGF_EXPR_FUN: {
PgfExprFun* fun1 = ei1.data;
PgfExprFun* fun2 = ei2.data;
return gu_string_eq(fun1->fun, fun2->fun);
return strcmp(fun1->fun, fun2->fun) == 0;
}
case PGF_EXPR_VAR: {
PgfExprVar* var1 = ei1.data;
@@ -1076,9 +1080,8 @@ pgf_print_hypo(PgfHypo *hypo, PgfPrintContext* ctxt, int prec,
gu_puts(")", out, err);
} else {
GuPool* tmp_pool = gu_new_pool();
GuString tmp = gu_str_string("_", tmp_pool);
if (!gu_string_eq(hypo->cid, tmp)) {
if (strcmp(hypo->cid, "_") != 0) {
gu_puts("(", out, err);
gu_string_write(hypo->cid, out, err);
gu_puts(" : ", out, err);
@@ -1158,14 +1161,14 @@ pgf_type_eq(PgfType* t1, PgfType* t2)
if (hypo1->bind_type != hypo2->bind_type)
return false;
if (!gu_string_eq(hypo1->cid, hypo2->cid))
if (strcmp(hypo1->cid, hypo2->cid) != 0)
return false;
if (!pgf_type_eq(hypo1->type, hypo2->type))
return false;
}
if (!gu_string_eq(t1->cid, t2->cid))
if (strcmp(t1->cid, t2->cid) != 0)
return false;
if (t1->n_exprs != t2->n_exprs)

View File

@@ -37,7 +37,7 @@ typedef enum {
} PgfLiteralTag;
typedef struct {
GuString val;
char val[0]; // a flexible array that contains the value
} PgfLiteralStr;
typedef struct {
@@ -102,7 +102,7 @@ typedef struct {
} PgfExprMeta;
typedef struct {
PgfCId fun;
char fun[0];
} PgfExprFun;
typedef struct {

View File

@@ -112,7 +112,6 @@ typedef struct {
size_t level;
GuBuf* internals;
GuBuf* leaves;
GuString wildcard;
} PgfBracketLznState;
static void
@@ -167,7 +166,7 @@ pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int linde
{
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
if (gu_string_eq(cat, state->wildcard))
if (strcmp(cat, "_") == 0)
return;
state->level++;
@@ -203,7 +202,7 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lindex,
{
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
if (gu_string_eq(cat, state->wildcard))
if (strcmp(cat, "_") == 0)
return;
state->level--;
@@ -281,7 +280,6 @@ pgf_graphviz_parse_tree(PgfConcr* concr, PgfExpr expr, GuOut* out, GuExn* err)
state.level = -1;
state.internals = gu_new_buf(GuBuf*, tmp_pool);
state.leaves = gu_new_buf(PgfParseNode*, tmp_pool);
state.wildcard = gu_str_string("_", tmp_pool);
pgf_lzr_linearize(concr, ctree, 0, &state.funcs);
size_t len = gu_buf_length(state.internals);

View File

@@ -201,7 +201,7 @@ pgf_jit_predicate(PgfJitState* state, PgfCIdMap* abscats,
// call the predicate for the category in hypo->type->cid
PgfAbsCat* arg =
gu_map_get(abscats, &hypo->type->cid, PgfAbsCat*);
gu_map_get(abscats, hypo->type->cid, PgfAbsCat*);
#ifdef PGF_JIT_DEBUG
gu_puts(" CALL ", wtr, err);
@@ -314,7 +314,7 @@ pgf_jit_done(PgfJitState* state, PgfAbstr* abstr)
PgfCallPatch* patch =
gu_buf_index(state->patches, PgfCallPatch, i);
PgfAbsCat* arg =
gu_map_get(abstr->cats, &patch->cid, PgfAbsCat*);
gu_map_get(abstr->cats, patch->cid, PgfAbsCat*);
gu_assert(arg != NULL);
jit_patch_calli(patch->ref,(jit_insn*) arg->predicate);

View File

@@ -108,7 +108,7 @@ pgf_new_simple_lexer(GuIn *in, GuPool *pool)
{
PgfSimpleLexer* lexer = gu_new(PgfSimpleLexer, pool);
lexer->base.read_token = pgf_simple_lexer_read_token;
lexer->base.tok = gu_empty_string;
lexer->base.tok = "";
lexer->in = in;
lexer->pool = pool;
lexer->ucs = ' ';

View File

@@ -45,12 +45,12 @@ pgf_lzr_index(PgfConcr* concr,
case PGF_PRODUCTION_APPLY: {
PgfProductionApply* papply = data;
PgfCncOverloadMap* overl_table =
gu_map_get(concr->fun_indices, &papply->fun->absfun->name,
gu_map_get(concr->fun_indices, papply->fun->absfun->name,
PgfCncOverloadMap*);
if (!overl_table) {
overl_table = gu_map_type_new(PgfCncOverloadMap, pool);
gu_map_put(concr->fun_indices,
&papply->fun->absfun->name, PgfCncOverloadMap*, overl_table);
papply->fun->absfun->name, PgfCncOverloadMap*, overl_table);
}
pgf_lzr_add_overl_entry(overl_table, ccat, papply, pool);
break;
@@ -227,11 +227,12 @@ pgf_lzn_resolve_def(PgfLzn* lzn, PgfCncFuns* lindefs, GuString s, GuPool* pool)
PgfCncTreeLit,
&lit, pool);
clit->fid = lzn->fid++;
clit->lit =
gu_new_variant_i(pool,
PGF_LITERAL_STR,
PgfLiteralStr,
s);
PgfLiteralStr* lit_str =
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, strlen(s),
&clit->lit, pool);
strcpy((char*) lit_str->val, (char*) s);
if (lindefs == NULL)
return lit;
@@ -322,8 +323,7 @@ pgf_lzn_resolve(PgfLzn* lzn, PgfExpr expr, PgfCCat* ccat, GuPool* pool)
goto done;
}
GuString s = gu_str_string("?", pool);
ret = pgf_lzn_resolve_def(lzn, ccat->lindefs, s, pool);
ret = pgf_lzn_resolve_def(lzn, ccat->lindefs, "?", pool);
goto done;
}
}
@@ -331,7 +331,7 @@ pgf_lzn_resolve(PgfLzn* lzn, PgfExpr expr, PgfCCat* ccat, GuPool* pool)
PgfExprFun* efun = i.data;
PgfCncOverloadMap* overl_table =
gu_map_get(lzn->concr->fun_indices, &efun->fun, PgfCncOverloadMap*);
gu_map_get(lzn->concr->fun_indices, efun->fun, PgfCncOverloadMap*);
if (overl_table == NULL) {
if (ccat != NULL && ccat->lindefs == NULL) {
goto done;
@@ -345,7 +345,7 @@ pgf_lzn_resolve(PgfLzn* lzn, PgfExpr expr, PgfCCat* ccat, GuPool* pool)
gu_putc('[', out, err);
gu_string_write(efun->fun, out, err);
gu_putc(']', out, err);
GuString s = gu_string_buf_freeze(sbuf, pool);
GuString s = gu_string_buf_freeze(sbuf, tmp_pool);
if (ccat != NULL) {
ret = pgf_lzn_resolve_def(lzn, ccat->lindefs, s, pool);
@@ -356,10 +356,11 @@ pgf_lzn_resolve(PgfLzn* lzn, PgfExpr expr, PgfCCat* ccat, GuPool* pool)
&ret, pool);
clit->fid = lzn->fid++;
PgfLiteralStr* lit =
gu_new_variant(PGF_LITERAL_STR,
PgfLiteralStr,
&clit->lit, pool);
lit->val = s;
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, strlen(s)+1,
&clit->lit, pool);
strcpy(lit->val, s);
}
gu_pool_free(tmp_pool);
@@ -557,7 +558,7 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs
if (fns->begin_phrase) {
fns->begin_phrase(fnsp,
cat, flit->fid, 0,
gu_empty_string);
"");
}
if (fns->expr_literal) {
@@ -567,7 +568,7 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs
if (fns->end_phrase) {
fns->end_phrase(fnsp,
cat, flit->fid, 0,
gu_empty_string);
"");
}
break;
@@ -697,7 +698,7 @@ pgf_get_tokens(PgfSequence* seq, uint16_t seq_idx, GuPool* pool)
pgf_lzr_linearize_sequence(NULL, NULL, seq, seq_idx, &flin.funcs);
GuString tokens = gu_ok(err) ? gu_string_buf_freeze(sbuf, pool)
: gu_empty_string;
: "";
gu_pool_free(tmp_pool);

View File

@@ -39,10 +39,11 @@ pgf_match_string_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
PgfExprLit,
&ep->expr, pool);
PgfLiteralStr *lit_str =
gu_new_variant(PGF_LITERAL_STR,
PgfLiteralStr,
&expr_lit->lit, pool);
lit_str->val = sks->token;
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, strlen(sks->token)+1,
&expr_lit->lit, pool);
strcpy(lit_str->val, sks->token);
*out_ep = ep;
accepted = false;
@@ -185,12 +186,10 @@ pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
GuString hyp = gu_str_string("-", tmp_pool);
bool iscap = false;
if (gu_string_eq(tok, hyp)) {
if (strcmp(tok, "-") == 0) {
iscap = true;
} else if (!gu_string_eq(tok, gu_empty_string)) {
} else if (*tok) {
GuIn* in = gu_string_in(tok, tmp_pool);
iscap = iswupper(gu_in_utf8(in, err));
}
@@ -218,21 +217,24 @@ pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
gu_new_variant(PGF_EXPR_APP,
PgfExprApp,
&ep->expr, pool);
GuString con = "MkSymb";
PgfExprFun *expr_fun =
gu_new_variant(PGF_EXPR_FUN,
PgfExprFun,
&expr_app->fun, pool);
expr_fun->fun = gu_str_string("MkSymb", pool);
gu_new_flex_variant(PGF_EXPR_FUN,
PgfExprFun,
fun, strlen(con)+1,
&expr_app->fun, pool);
strcpy(expr_fun->fun, con);
PgfExprLit *expr_lit =
gu_new_variant(PGF_EXPR_LIT,
PgfExprLit,
&expr_app->arg, pool);
GuString val = gu_string_buf_freeze(sbuf, tmp_pool);
PgfLiteralStr *lit_str =
gu_new_variant(PGF_LITERAL_STR,
PgfLiteralStr,
&expr_lit->lit, pool);
lit_str->val = gu_string_buf_freeze(sbuf, pool);
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, strlen(val)+1,
&expr_lit->lit, pool);
strcpy(lit_str->val, val);
*out_ep = ep;
} else {
*out_ep = NULL;

View File

@@ -145,24 +145,7 @@ GU_DEFINE_TYPE(PgfLeftcornerTokIdx, GuStringMap,
static PgfSymbol
pgf_prev_extern_sym(PgfSymbol sym)
{
GuVariantInfo i = gu_variant_open(sym);
switch (i.tag) {
case PGF_SYMBOL_CAT:
return *((PgfSymbol*) (((PgfSymbolCat*) i.data)+1));
case PGF_SYMBOL_KP:
return *((PgfSymbol*) (((PgfSymbolKP*) i.data)+1));
case PGF_SYMBOL_KS:
return *((PgfSymbol*) (((PgfSymbolKS*) i.data)+1));
case PGF_SYMBOL_LIT:
return *((PgfSymbol*) (((PgfSymbolLit*) i.data)+1));
case PGF_SYMBOL_VAR:
return *((PgfSymbol*) (((PgfSymbolVar*) i.data)+1));
case PGF_SYMBOL_NE:
return *((PgfSymbol*) (((PgfSymbolNE*) i.data)+1));
default:
gu_impossible();
return gu_null_variant;
}
return *(((PgfSymbol*) gu_variant_data(sym))-1);
}
size_t
@@ -1126,19 +1109,19 @@ pgf_parsing_meta_scan(PgfParseState* before, PgfParseState* after,
PgfItem* meta_item, prob_t meta_prob)
{
PgfToken tok = after->ts->fn->get_token(after->ts);
if (!gu_string_eq(tok, gu_empty_string)) {
if (*tok == 0) {
PgfItem* item = pgf_item_copy(meta_item, before->ps->pool, before->ps);
item->inside_prob += meta_prob;
PgfSymbol prev = item->curr_sym;
PgfSymbolKS* sks = (PgfSymbolKS*)
gu_alloc_variant(PGF_SYMBOL_KS,
sizeof(PgfSymbolKS)+sizeof(PgfSymbol),
sizeof(PgfSymbol)+sizeof(PgfSymbolKS)+strlen(tok)+1,
gu_alignof(PgfSymbolKS),
&item->curr_sym, after->ps->pool);
*((PgfSymbol*)(sks+1)) = prev;
sks->token = tok;
*(((PgfSymbol*) sks)-1) = prev;
strcpy((char*) sks->token, (char*) tok);
gu_buf_heap_push(before->agenda, &pgf_item_prob_order, &item);
}
@@ -1162,7 +1145,7 @@ pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err
PgfItem* meta_item = clo->meta_item;
PgfCncCat* cnccat =
gu_map_get(state->ps->concr->cnccats, &abscat->name, PgfCncCat*);
gu_map_get(state->ps->concr->cnccats, abscat->name, PgfCncCat*);
if (cnccat == NULL)
return;
@@ -1193,7 +1176,7 @@ pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err
sizeof(PgfSymbolCat)+sizeof(PgfSymbol),
gu_alignof(PgfSymbolCat),
&item->curr_sym, state->ps->pool);
*((PgfSymbol*)(scat+1)) = prev;
*(((PgfSymbol*)scat)-1) = prev;
scat->d = nargs;
scat->r = lin_idx;
@@ -1342,6 +1325,10 @@ pgf_parsing_symbol(PgfParseState* before, PgfParseState* after,
pgf_item_free(before, after, item);
break;
}
case PGF_SYMBOL_BIND: {
pgf_item_free(before, after, item);
break;
}
default:
gu_impossible();
}
@@ -1412,7 +1399,7 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
} else {
PgfToken tok = (after != NULL)
? after->ts->fn->get_token(after->ts)
: gu_empty_string;
: "";
PgfExprProb *ep = NULL;
bool accepted =
@@ -1428,11 +1415,11 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
PgfSymbol prev = item->curr_sym;
PgfSymbolKS* sks = (PgfSymbolKS*)
gu_alloc_variant(PGF_SYMBOL_KS,
sizeof(PgfSymbolKS)+sizeof(PgfSymbol),
sizeof(PgfSymbol)+sizeof(PgfSymbolKS)+strlen(tok)+1,
gu_alignof(PgfSymbolKS),
&item->curr_sym, after->ps->pool);
*((PgfSymbol*)(sks+1)) = prev;
sks->token = tok;
*(((PgfSymbol*) sks)-1) = prev;
strcpy((char*) sks->token, (char*) tok);
item->seq_idx++;
pgf_parsing_add_transition(before, after, tok, item);
@@ -1546,9 +1533,7 @@ pgf_parsing_proceed(PgfParseState* state)
static prob_t
pgf_parsing_default_beam_size(PgfConcr* concr)
{
GuPool* tmp_pool = gu_new_pool();
PgfCId flag_name = gu_str_string("beam_size", tmp_pool);
PgfLiteral lit = gu_map_get(concr->cflags, &flag_name, PgfLiteral);
PgfLiteral lit = gu_map_get(concr->cflags, "beam_size", PgfLiteral);
if (gu_variant_is_null(lit))
return 0;
@@ -1677,7 +1662,7 @@ typedef struct {
static bool
pgf_real_match_token(PgfTokenState* ts, PgfToken tok, PgfItem* item)
{
return gu_string_eq(gu_container(ts, PgfRealTokenState, ts)->tok, tok);
return strcmp(gu_container(ts, PgfRealTokenState, ts)->tok, tok) == 0;
}
static PgfToken
@@ -1707,7 +1692,7 @@ pgf_parser_next_state(PgfParseState* prev, PgfToken tok)
pgf_new_token_state(PgfRealTokenState, prev->ps->pool);
ts->tok = tok;
ts->lexicon_idx = gu_map_get(prev->ps->concr->leftcorner_tok_idx,
&tok, PgfProductionIdx*);
tok, PgfProductionIdx*);
if (ts->lexicon_idx != NULL) {
PgfLexiconFn clo = { { pgf_parser_compute_lexicon_prob }, &ts->ts };
gu_map_iter(ts->lexicon_idx, &clo.fn, NULL);
@@ -1758,7 +1743,7 @@ pgf_prefix_match_token(PgfTokenState* ts0, PgfToken tok, PgfItem* item)
static PgfToken
pgf_prefix_get_token(PgfTokenState* ts) {
return gu_empty_string;
return "";
}
static PgfProductionIdx*
@@ -2165,7 +2150,7 @@ pgf_parser_init_state(PgfConcr* concr, PgfCId cat, size_t lin_idx,
GuPool* pool, GuPool* out_pool)
{
PgfCncCat* cnccat =
gu_map_get(concr->cnccats, &cat, PgfCncCat*);
gu_map_get(concr->cnccats, cat, PgfCncCat*);
if (!cnccat)
return NULL;
@@ -2226,7 +2211,7 @@ pgf_parser_add_literal(PgfConcr *concr, PgfCId cat,
PgfLiteralCallback* callback)
{
PgfCncCat* cnccat =
gu_map_get(concr->cnccats, &cat, PgfCncCat*);
gu_map_get(concr->cnccats, cat, PgfCncCat*);
if (cnccat == NULL)
return;
@@ -2281,7 +2266,7 @@ pgf_morpho_iter(GuMapItor* fn, const void* key, void* value, GuExn* err)
PgfToken tok1 = symks->token;
PgfToken tok2 = gu_seq_get(clo->tokens, PgfToken, pos++);
if (!gu_string_eq(tok1, tok2))
if (strcmp(tok1, tok2) != 0)
goto cont;
}
default:
@@ -2320,7 +2305,7 @@ pgf_lookup_morpho(PgfConcr *concr, PgfLexer *lexer,
}
PgfProductionIdx* lexicon_idx =
gu_map_get(concr->leftcorner_tok_idx, &tok, PgfProductionIdx*);
gu_map_get(concr->leftcorner_tok_idx, tok, PgfProductionIdx*);
if (lexicon_idx == NULL) {
gu_pool_free(tmp_pool);
return;
@@ -2374,10 +2359,10 @@ pgf_fullform_iter(GuMapItor* fn, const void* key, void* value, GuExn* err)
// create a new production index with keys that
// are multiword units
PgfProductionIdx* lexicon_idx =
gu_map_get(st->new_idx, &tokens, PgfProductionIdx*);
gu_map_get(st->new_idx, tokens, PgfProductionIdx*);
if (lexicon_idx == NULL) {
lexicon_idx = gu_map_type_new(PgfProductionIdx, st->pool);
gu_map_put(st->new_idx, &tokens, PgfProductionIdx*, lexicon_idx);
gu_map_put(st->new_idx, tokens, PgfProductionIdx*, lexicon_idx);
}
PgfProductionBuf* prods =
@@ -2443,7 +2428,7 @@ pgf_fullform_lexicon(PgfConcr *concr, GuPool* pool)
GuString
pgf_fullform_get_string(PgfFullFormEntry* entry)
{
return *((GuString*) entry->key);
return (GuString) entry->key;
}
void
@@ -2462,10 +2447,10 @@ pgf_parser_index_token(PgfConcr* concr,
GuPool *pool)
{
PgfProductionIdx* set =
gu_map_get(concr->leftcorner_tok_idx, &tok, PgfProductionIdx*);
gu_map_get(concr->leftcorner_tok_idx, tok, PgfProductionIdx*);
if (set == NULL) {
set = gu_map_type_new(PgfProductionIdx, pool);
gu_map_put(concr->leftcorner_tok_idx, &tok, PgfProductionIdx*, set);
gu_map_put(concr->leftcorner_tok_idx, tok, PgfProductionIdx*, set);
}
PgfCFCat cfc = {ccat, lin_idx};
@@ -2527,6 +2512,7 @@ pgf_parser_index_symbol(PgfConcr* concr, PgfSymbol sym,
case PGF_SYMBOL_CAT:
case PGF_SYMBOL_LIT:
case PGF_SYMBOL_NE:
case PGF_SYMBOL_BIND:
case PGF_SYMBOL_VAR:
// Nothing to be done here
break;

View File

@@ -111,7 +111,7 @@ pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_id
if (phrase->start == start &&
phrase->end == end &&
gu_string_eq(phrase->cat, cat) &&
strcmp(phrase->cat, cat) == 0 &&
phrase->lin_idx == lin_idx) {
state->matches++;
break;

View File

@@ -49,30 +49,28 @@ pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath,
GuPool* tmp_pool = gu_new_pool();
for (;;) {
char cat1_s[21];
char cat2_s[21];
char cat1[21];
char cat2[21];
prob_t prob;
if (fscanf(fp, "%20s\t%20s\t%f", cat1_s, cat2_s, &prob) < 3)
if (fscanf(fp, "%20s\t%20s\t%f", cat1, cat2, &prob) < 3)
break;
prob = - log(prob);
GuString cat1 = gu_str_string(cat1_s, tmp_pool);
PgfAbsCat* abscat1 =
gu_map_get(pgf->abstract.cats, &cat1, PgfAbsCat*);
gu_map_get(pgf->abstract.cats, cat1, PgfAbsCat*);
if (abscat1 == NULL) {
gu_raise(err, PgfExn);
goto close;
}
if (strcmp(cat2_s, "*") == 0) {
if (strcmp(cat2, "*") == 0) {
abscat1->meta_prob = prob;
} else if (strcmp(cat2_s, "_") == 0) {
} else if (strcmp(cat2, "_") == 0) {
abscat1->meta_token_prob = prob;
} else {
GuString cat2 = gu_str_string(cat2_s, tmp_pool);
PgfAbsCat* abscat2 = gu_map_get(pgf->abstract.cats, &cat2, PgfAbsCat*);
PgfAbsCat* abscat2 = gu_map_get(pgf->abstract.cats, cat2, PgfAbsCat*);
if (abscat2 == NULL) {
gu_raise(err, PgfExn);
goto close;
@@ -107,7 +105,7 @@ pgf_iter_languages(PgfPGF* pgf, GuMapItor* fn, GuExn* err)
PgfConcr*
pgf_get_language(PgfPGF* pgf, PgfCId lang)
{
return gu_map_get(pgf->concretes, &lang, PgfConcr*);
return gu_map_get(pgf->concretes, lang, PgfConcr*);
}
GuString
@@ -123,16 +121,13 @@ pgf_iter_categories(PgfPGF* pgf, GuMapItor* fn, GuExn* err)
}
PgfCId
pgf_start_cat(PgfPGF* pgf, GuPool* pool)
pgf_start_cat(PgfPGF* pgf)
{
GuPool* tmp_pool = gu_local_pool();
GuString s = gu_str_string("startcat", tmp_pool);
PgfLiteral lit =
gu_map_get(pgf->abstract.aflags, &s, PgfLiteral);
gu_map_get(pgf->abstract.aflags, "startcat", PgfLiteral);
if (gu_variant_is_null(lit))
return gu_str_string("S", pool);
return "S";
GuVariantInfo i = gu_variant_open(lit);
switch (i.tag) {
@@ -142,20 +137,17 @@ pgf_start_cat(PgfPGF* pgf, GuPool* pool)
}
}
return gu_str_string("S", pool);
return "S";
}
GuString
pgf_language_code(PgfConcr* concr)
{
GuPool* tmp_pool = gu_local_pool();
GuString s = gu_str_string("language", tmp_pool);
PgfLiteral lit =
gu_map_get(concr->cflags, &s, PgfLiteral);
gu_map_get(concr->cflags, "language", PgfLiteral);
if (gu_variant_is_null(lit))
return gu_empty_string;
return "";
GuVariantInfo i = gu_variant_open(lit);
switch (i.tag) {
@@ -165,7 +157,7 @@ pgf_language_code(PgfConcr* concr)
}
}
return gu_empty_string;
return "";
}
void
@@ -188,8 +180,8 @@ pgf_filter_by_cat(GuMapItor* fn, const void* key, void* value, GuExn* err)
PgfFunByCatIter* clo = (PgfFunByCatIter*) fn;
PgfAbsFun* absfun = *((PgfAbsFun**) value);
if (gu_string_eq(absfun->type->cid, clo->catname)) {
clo->client_fn->fn(clo->client_fn, &absfun->name, NULL, err);
if (strcmp(absfun->type->cid, clo->catname) == 0) {
clo->client_fn->fn(clo->client_fn, absfun->name, NULL, err);
}
}
@@ -205,10 +197,10 @@ PgfType*
pgf_function_type(PgfPGF* pgf, PgfCId funname)
{
PgfAbsFun* absfun =
gu_map_get(pgf->abstract.funs, &funname, PgfAbsFun*);
gu_map_get(pgf->abstract.funs, funname, PgfAbsFun*);
if (absfun == NULL)
return NULL;
return absfun->type;
}
@@ -216,8 +208,8 @@ GuString
pgf_print_name(PgfConcr* concr, PgfCId id)
{
PgfCId name =
gu_map_get(concr->printnames, &id, PgfCId);
if (gu_string_eq(name, gu_empty_string))
gu_map_get(concr->printnames, id, PgfCId);
if (*name == 0)
name = id;
return name;
}
@@ -226,7 +218,7 @@ void
pgf_linearize(PgfConcr* concr, PgfExpr expr, GuOut* out, GuExn* err)
{
GuPool* tmp_pool = gu_local_pool();
GuEnum* cts =
pgf_lzr_concretize(concr, expr, tmp_pool);
PgfCncTree ctree = gu_next(cts, PgfCncTree, tmp_pool);

View File

@@ -102,7 +102,7 @@ void
pgf_iter_categories(PgfPGF* pgf, GuMapItor* fn, GuExn* err);
PgfCId
pgf_start_cat(PgfPGF* pgf, GuPool* pool);
pgf_start_cat(PgfPGF* pgf);
void
pgf_iter_functions(PgfPGF* pgf, GuMapItor* fn, GuExn* err);

View File

@@ -13,7 +13,7 @@
#include <gu/exn.h>
#include <gu/utf8.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
//
// PgfReader
@@ -116,11 +116,21 @@ pgf_read_literal(PgfReader* rdr)
uint8_t tag = pgf_read_tag(rdr);
switch (tag) {
case PGF_LITERAL_STR: {
GuLength len = pgf_read_len(rdr);
uint8_t* buf = alloca(len*6+1);
uint8_t* p = buf;
for (size_t i = 0; i < len; i++) {
gu_in_utf8_buf(&p, rdr->in, rdr->err);
gu_return_on_exn(rdr->err, gu_null_variant);
}
*p++ = 0;
PgfLiteralStr *lit_str =
gu_new_variant(PGF_LITERAL_STR,
PgfLiteralStr,
&lit, rdr->opool);
lit_str->val = pgf_read_string(rdr);
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, p-buf,
&lit, rdr->opool);
strcpy((char*) lit_str->val, (char*) buf);
break;
}
case PGF_LITERAL_INT: {
@@ -160,7 +170,7 @@ pgf_read_flags(PgfReader* rdr)
PgfLiteral value = pgf_read_literal(rdr);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(flags, &name, PgfLiteral, value);
gu_map_put(flags, name, PgfLiteral, value);
}
return flags;
@@ -224,11 +234,16 @@ pgf_read_expr_(PgfReader* rdr)
break;
}
case PGF_EXPR_FUN: {
size_t len = pgf_read_len(rdr);
PgfExprFun *efun =
gu_new_variant(PGF_EXPR_FUN,
PgfExprFun,
&expr, rdr->opool);
efun->fun = pgf_read_cid(rdr, rdr->opool);
gu_new_flex_variant(PGF_EXPR_FUN,
PgfExprFun,
fun, len+1,
&expr, rdr->opool);
gu_in_bytes(rdr->in, (uint8_t*)efun->fun, len, rdr->err);
efun->fun[len] = 0;
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
@@ -403,7 +418,17 @@ pgf_read_absfun(PgfReader* rdr)
{
PgfAbsFun* absfun = gu_new(PgfAbsFun, rdr->opool);
absfun->name = pgf_read_cid(rdr, rdr->opool);
size_t len = pgf_read_len(rdr);
PgfExprFun *efun =
gu_new_flex_variant(PGF_EXPR_FUN,
PgfExprFun,
fun, len+1,
&absfun->ep.expr, rdr->opool);
gu_in_bytes(rdr->in, (uint8_t*)efun->fun, len, rdr->err);
efun->fun[len] = 0;
absfun->name = efun->fun;
gu_return_on_exn(rdr->err, NULL);
absfun->type = pgf_read_type_(rdr);
@@ -449,12 +474,6 @@ pgf_read_absfun(PgfReader* rdr)
absfun->ep.prob = - log(gu_in_f64be(rdr->in, rdr->err));
PgfExprFun* expr_fun =
gu_new_variant(PGF_EXPR_FUN,
PgfExprFun,
&absfun->ep.expr, rdr->opool);
expr_fun->fun = absfun->name;
return absfun;
}
@@ -474,7 +493,7 @@ pgf_read_absfuns(PgfReader* rdr)
PgfAbsFun* absfun = pgf_read_absfun(rdr);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(absfuns, &absfun->name, PgfAbsFun*, absfun);
gu_map_put(absfuns, absfun->name, PgfAbsFun*, absfun);
}
return absfuns;
@@ -514,7 +533,8 @@ pgf_read_abscat(PgfReader* rdr, PgfAbstr* abstr, PgfCIdMap* abscats)
gu_return_on_exn(rdr->err, NULL);
PgfAbsFun* absfun =
gu_map_get(abstr->funs, &name, PgfAbsFun*);
gu_map_get(abstr->funs, name, PgfAbsFun*);
assert(absfun != NULL);
gu_buf_push(functions, PgfAbsFun*, absfun);
}
@@ -539,7 +559,7 @@ pgf_read_abscats(PgfReader* rdr, PgfAbstr* abstr)
PgfAbsCat* abscat = pgf_read_abscat(rdr, abstr, abscats);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(abscats, &abscat->name, PgfAbsCat*, abscat);
gu_map_put(abscats, abscat->name, PgfAbsCat*, abscat);
}
return abscats;
@@ -567,7 +587,7 @@ pgf_read_printnames(PgfReader* rdr)
GuMapType* map_type = (GuMapType*)
GU_TYPE_LIT(GuStringMap, _,
gu_type(GuString),
&gu_empty_string);
&"");
PgfCIdMap* printnames = gu_map_type_make(map_type, rdr->opool);
size_t len = pgf_read_len(rdr);
@@ -580,7 +600,7 @@ pgf_read_printnames(PgfReader* rdr)
GuString printname = pgf_read_string(rdr);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(printnames, &name, GuString, printname);
gu_map_put(printnames, name, GuString, printname);
}
return printnames;
@@ -654,12 +674,21 @@ pgf_read_symbol(PgfReader* rdr)
break;
}
case PGF_SYMBOL_KS: {
GuLength len = pgf_read_len(rdr);
uint8_t* buf = alloca(len*6+1);
uint8_t* p = buf;
for (size_t i = 0; i < len; i++) {
gu_in_utf8_buf(&p, rdr->in, rdr->err);
gu_return_on_exn(rdr->err, gu_null_variant);
}
*p++ = 0;
PgfSymbolKS *sym_ks =
gu_new_variant(PGF_SYMBOL_KS,
PgfSymbolKS,
&sym, rdr->opool);
sym_ks->token = pgf_read_string(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
gu_new_flex_variant(PGF_SYMBOL_KS,
PgfSymbolKS,
token, p-buf,
&sym, rdr->opool);
strcpy((char*) sym_ks->token, (char*) buf);
break;
}
case PGF_SYMBOL_KP: {
@@ -747,7 +776,7 @@ pgf_read_cncfun(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, int funid)
gu_return_on_exn(rdr->err, NULL);
PgfAbsFun* absfun =
gu_map_get(abstr->funs, &name, PgfAbsFun*);
gu_map_get(abstr->funs, name, PgfAbsFun*);
PgfCncFun* cncfun = gu_new_flex(rdr->opool, PgfCncFun, lins, len);
cncfun->absfun = absfun;
@@ -956,7 +985,7 @@ pgf_read_cnccat(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, PgfCId name)
gu_malloc(rdr->opool, sizeof(PgfCncCat)+n_lins*sizeof(GuString));
cnccat->abscat =
gu_map_get(abstr->cats, &name, PgfAbsCat*);
gu_map_get(abstr->cats, name, PgfAbsCat*);
gu_assert(cnccat->abscat != NULL);
int len = last + 1 - first;
@@ -1011,7 +1040,7 @@ pgf_read_cnccats(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr)
pgf_read_cnccat(rdr, abstr, concr, name);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(cnccats, &name, PgfCncCat*, cnccat);
gu_map_put(cnccats, name, PgfCncCat*, cnccat);
}
return cnccats;
@@ -1100,7 +1129,7 @@ pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr)
pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, NULL);
concr->cflags =
concr->cflags =
pgf_read_flags(rdr);
gu_return_on_exn(rdr->err, NULL);
@@ -1150,7 +1179,7 @@ pgf_read_concretes(PgfReader* rdr, PgfAbstr* abstr)
PgfConcr* concr = pgf_read_concrete(rdr, abstr);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(concretes, &concr->name, PgfConcr*, concr);
gu_map_put(concretes, concr->name, PgfConcr*, concr);
}
return concretes;

View File

@@ -230,14 +230,14 @@ pgf_reasoner_try_first(PgfReasoner* rs, PgfExprState* parent, PgfAbsFun* absfun)
{
PgfCId cat = absfun->type->cid;
PgfAnswers* answers = gu_map_get(rs->table, &cat, PgfAnswers*);
PgfAnswers* answers = gu_map_get(rs->table, cat, PgfAnswers*);
if (answers == NULL) {
answers = gu_new(PgfAnswers, rs->tmp_pool);
answers->parents = gu_new_buf(PgfExprState*, rs->tmp_pool);
answers->exprs = gu_new_buf(PgfExprProb*, rs->tmp_pool);
answers->outside_prob = parent->base.prob;
gu_map_put(rs->table, &cat, PgfAnswers*, answers);
gu_map_put(rs->table, cat, PgfAnswers*, answers);
}
gu_buf_push(answers->parents, PgfExprState*, parent);
@@ -397,9 +397,9 @@ pgf_generate_all(PgfPGF* pgf, PgfCId cat, GuPool* pool)
answers->parents = gu_new_buf(PgfExprState*, rs->tmp_pool);
answers->exprs = rs->exprs;
answers->outside_prob = 0;
gu_map_put(rs->table, &cat, PgfAnswers*, answers);
gu_map_put(rs->table, cat, PgfAnswers*, answers);
PgfAbsCat* abscat = gu_map_get(rs->abstract->cats, &cat, PgfAbsCat*);
PgfAbsCat* abscat = gu_map_get(rs->abstract->cats, cat, PgfAbsCat*);
if (abscat != NULL) {
((PgfPredicate) abscat->predicate)(rs, NULL);
}

View File

@@ -28,8 +28,8 @@ int main(int argc, char* argv[]) {
goto fail;
}
char* filename = argv[1];
GuString cat = gu_str_string(argv[2], pool);
GuString lang = gu_str_string(argv[3], pool);
GuString cat = argv[2];
GuString lang = argv[3];
double heuristics = 0.95;
if (argc == 5) {
@@ -61,7 +61,7 @@ int main(int argc, char* argv[]) {
}
/* // Register a callback for the literal category Symbol */
/* pgf_parser_add_literal(from_concr, gu_str_string("Symb", pool), */
/* pgf_parser_add_literal(from_concr, "Symb", */
/* &pgf_nerc_literal_callback); */
clock_t end = clock();
@@ -110,7 +110,7 @@ int main(int argc, char* argv[]) {
clock_t start = clock();
GuIn *in = gu_string_in(gu_str_string(line, ppool), ppool);
GuIn *in = gu_string_in(line, ppool);
PgfLexer *lexer = pgf_new_simple_lexer(in, ppool);
GuEnum* result = pgf_parse_with_heuristics(concr, cat, lexer, heuristics, ppool, ppool);

View File

@@ -200,8 +200,8 @@ int main ()
goto fail;
}
GuString cat = gu_str_string("Phr", pool);
GuString from_lang = gu_str_string("ParseEng", pool);
GuString cat = "Phr";
GuString from_lang = "ParseEng";
PgfConcr* from_concr =
pgf_get_language(pgf, from_lang);
if (!from_concr) {
@@ -210,7 +210,7 @@ int main ()
}
// Register a callback for the literal category Symbol
pgf_parser_add_literal(from_concr, gu_str_string("Symb", pool),
pgf_parser_add_literal(from_concr, "Symb",
&pgf_nerc_literal_callback);
while (FCGI_Accept() >= 0) {
@@ -233,7 +233,7 @@ int main ()
PgfConcr* to_concr = NULL;
if (strlen(to_lang_buf) > 0) {
GuString to_lang = gu_str_string(to_lang_buf, ppool);
GuString to_lang = to_lang_buf;
to_concr =
pgf_get_language(pgf, to_lang);
if (!to_concr) {
@@ -248,7 +248,7 @@ int main ()
sentence[len+1] = '\0';
GuReader *rdr =
gu_string_reader(gu_str_string(sentence, ppool), ppool);
gu_string_reader(sentence, ppool);
PgfLexer *lexer =
pgf_new_simple_lexer(rdr, ppool);

View File

@@ -53,10 +53,10 @@ int main(int argc, char* argv[]) {
}
char* filename = argv[1];
GuString cat = gu_str_string(argv[2], pool);
GuString cat = argv[2];
GuString from_lang = gu_str_string(argv[3], pool);
GuString to_lang = gu_str_string(argv[4], pool);
GuString from_lang = argv[3];
GuString to_lang = argv[4];
// Create an exception frame that catches all errors.
GuExn* err = gu_new_exn(NULL, gu_kind(type), pool);
@@ -91,7 +91,7 @@ int main(int argc, char* argv[]) {
}
// Register a callback for the literal category Symbol
pgf_parser_add_literal(from_concr, gu_str_string("Symb", pool),
pgf_parser_add_literal(from_concr, "Symb",
&pgf_nerc_literal_callback);
// Create an output stream for stdout
@@ -154,7 +154,7 @@ int main(int argc, char* argv[]) {
ppool = gu_new_pool();
GuIn *in =
gu_string_in(gu_str_string(line, ppool), ppool);
gu_string_in(line, ppool);
PgfLexer *lexer =
pgf_new_simple_lexer(in, ppool);
@@ -166,7 +166,7 @@ int main(int argc, char* argv[]) {
PgfToken tok =
pgf_lexer_current_token(lexer);
if (gu_string_eq(tok, gu_empty_string))
if (*tok == 0)
gu_puts("Couldn't begin parsing", out, err);
else {
gu_puts("Unexpected token: \"", out, err);

View File

@@ -11,32 +11,13 @@
static jstring
gu2j_string(JNIEnv *env, GuString s) {
GuWord w = s.w_;
uint8_t buf[sizeof(GuWord)];
uint8_t* utf8;
size_t len;
if (w & 1) {
len = (w & 0xff) >> 1;
gu_assert(len <= sizeof(GuWord));
size_t i = len;
while (i > 0) {
w >>= 8;
buf[--i] = w & 0xff;
}
utf8 = buf;
} else {
uint8_t* p = (void*) w;
len = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
utf8 = &p[1];
}
const uint8_t* src = utf8;
const char* utf8 = s;
size_t len = strlen(s);
jchar* utf16 = alloca(len*sizeof(jchar));
jchar* dst = utf16;
while (src-utf8 < len) {
GuUCS ucs = gu_utf8_decode(&src);
while (s-utf8 < len) {
GuUCS ucs = gu_utf8_decode((const uint8_t**) &s);
if (ucs <= 0xFFFF) {
*dst++ = ucs;
@@ -52,10 +33,10 @@ gu2j_string(JNIEnv *env, GuString s) {
static GuString
j2gu_string(JNIEnv *env, jstring s, GuPool* pool) {
const char *str = (*env)->GetStringUTFChars(env, s, 0);
GuString s = gu_str_string(str, pool);
GuString str = (*env)->GetStringUTFChars(env, s, 0);
str = gu_string_copy(str, pool);
(*env)->ReleaseStringUTFChars(env, s, str);
return s;
return str;
}
static void*
@@ -223,10 +204,7 @@ Java_org_grammaticalframework_pgf_PGF_getAbstractName(JNIEnv* env, jobject self)
JNIEXPORT jstring JNICALL
Java_org_grammaticalframework_pgf_PGF_getStartCat(JNIEnv* env, jobject self)
{
GuPool* tmp_pool = gu_local_pool();
jstring jname = gu2j_string(env, pgf_start_cat(get_ref(env, self), tmp_pool));
gu_pool_free(tmp_pool);
return jname;
return gu2j_string(env, pgf_start_cat(get_ref(env, self)));
}
typedef struct {
@@ -313,7 +291,7 @@ Java_org_grammaticalframework_pgf_Parser_parse
PgfToken tok =
pgf_lexer_current_token(lexer);
if (gu_string_eq(tok, gu_empty_string))
if (*tok == 0)
throw_string_exception(env, "org/grammaticalframework/pgf/PGFError", "The sentence cannot be parsed");
else
throw_jstring_exception(env, "org/grammaticalframework/pgf/ParseError", gu2j_string(env, tok));

View File

@@ -12,31 +12,6 @@ static PyObject* PGFError;
static PyObject* ParseError;
static PyObject*
gu2py_string(GuString s) {
GuWord w = s.w_;
uint8_t buf[sizeof(GuWord)];
char* src;
size_t len;
if (w & 1) {
len = (w & 0xff) >> 1;
gu_assert(len <= sizeof(GuWord));
size_t i = len;
while (i > 0) {
w >>= 8;
buf[--i] = w & 0xff;
}
src = (char*) buf;
} else {
uint8_t* p = (void*) w;
len = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
src = (char*) &p[1];
}
return PyString_FromStringAndSize(src, len);
}
typedef struct {
PyObject_HEAD
GuPool* pool;
@@ -129,7 +104,7 @@ Expr_repr(ExprObject *self)
pgf_print_expr(self->expr, NULL, 0, out, err);
GuString str = gu_string_buf_freeze(sbuf, tmp_pool);
PyObject* pystr = gu2py_string(str);
PyObject* pystr = PyString_FromString(str);
gu_pool_free(tmp_pool);
return pystr;
@@ -258,11 +233,13 @@ Expr_initLiteral(ExprObject *self, PyObject *lit)
e->lit = gu_null_variant;
if (PyString_Check(lit)) {
GuString s = PyString_AsString(lit);
PgfLiteralStr* slit =
gu_new_variant(PGF_LITERAL_STR,
PgfLiteralStr,
&e->lit, self->pool);
slit->val = gu_str_string(PyString_AsString(lit), self->pool);
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, strlen(s)+1,
&e->lit, self->pool);
strcpy(slit->val, s);
} else if (PyInt_Check(lit)) {
PgfLiteralInt* ilit =
gu_new_variant(PGF_LITERAL_INT,
@@ -293,10 +270,11 @@ Expr_initApp(ExprObject *self, const char* fname, PyObject *args)
self->pool = gu_new_pool();
PgfExprFun* e =
gu_new_variant(PGF_EXPR_FUN,
PgfExprFun,
&self->expr, self->pool);
e->fun = gu_str_string(fname, self->pool);
gu_new_flex_variant(PGF_EXPR_FUN,
PgfExprFun,
fun, strlen(fname)+1,
&self->expr, self->pool);
strcpy(e->fun, fname);
for (Py_ssize_t i = 0; i < n_args; i++) {
PyObject* obj = PyList_GetItem(args, i);
@@ -345,7 +323,7 @@ Expr_unpack(ExprObject* self, PyObject *fargs)
PyObject* py_bindtype =
(eabs->bind_type == PGF_BIND_TYPE_EXPLICIT) ? Py_True
: Py_False;
PyObject* py_var = gu2py_string(eabs->id);
PyObject* py_var = PyString_FromString(eabs->id);
PyObject* res =
Py_BuildValue("OOOO", py_bindtype, py_var, py_body, args);
Py_DECREF(py_var);
@@ -383,7 +361,7 @@ Expr_unpack(ExprObject* self, PyObject *fargs)
switch (i.tag) {
case PGF_LITERAL_STR: {
PgfLiteralStr* lstr = i.data;
return gu2py_string(lstr->val);
return PyString_FromString(lstr->val);
}
case PGF_LITERAL_INT: {
PgfLiteralInt* lint = i.data;
@@ -405,7 +383,7 @@ Expr_unpack(ExprObject* self, PyObject *fargs)
}
case PGF_EXPR_FUN: {
PgfExprFun* efun = i.data;
PyObject* fun = gu2py_string(efun->fun);
PyObject* fun = PyString_FromString(efun->fun);
PyObject* res = Py_BuildValue("OO", fun, args);
Py_DECREF(fun);
Py_DECREF(args);
@@ -482,7 +460,7 @@ redo:;
}
case PGF_LITERAL_STR: {
PgfLiteralStr* lstr = i.data;
return gu2py_string(lstr->val);
return PyString_FromString(lstr->val);
}
}
}
@@ -497,7 +475,7 @@ redo:;
case PGF_EXPR_FUN: {
PgfExprFun* efun = i.data;
if (strcmp(name, "name") == 0) {
return gu2py_string(efun->fun);
return PyString_FromString(efun->fun);
}
break;
}
@@ -604,7 +582,7 @@ Type_init(TypeObject *self, PyObject *args, PyObject *kwds)
if (obj->ob_type == &pgf_TypeType) {
py_bindtype = Py_True;
cid = gu_str_string("_", self->pool);
cid = "_";
py_type = obj;
} else {
if (!PyTuple_Check(obj) ||
@@ -624,7 +602,7 @@ Type_init(TypeObject *self, PyObject *args, PyObject *kwds)
PyErr_SetString(PyExc_TypeError, "the arguments in the first list must be triples of (boolean,string,pgf.Type)");
return -1;
}
cid = gu_str_string(PyString_AsString(py_var), self->pool);
cid = gu_string_copy(PyString_AsString(py_var), self->pool);
py_type = PyTuple_GetItem(obj, 2);
if (py_type->ob_type != &pgf_TypeType) {
@@ -644,7 +622,7 @@ Type_init(TypeObject *self, PyObject *args, PyObject *kwds)
Py_INCREF(py_type);
}
self->type->cid = gu_str_string(catname_s, self->pool);
self->type->cid = gu_string_copy(catname_s, self->pool);
self->type->n_exprs = n_exprs;
for (Py_ssize_t i = 0; i < n_exprs; i++) {
@@ -675,7 +653,7 @@ Type_repr(TypeObject *self)
pgf_print_type(self->type, NULL, 0, out, err);
GuString str = gu_string_buf_freeze(sbuf, tmp_pool);
PyObject* pystr = gu2py_string(str);
PyObject* pystr = PyString_FromString(str);
gu_pool_free(tmp_pool);
return pystr;
@@ -713,7 +691,7 @@ Type_getHypos(TypeObject *self, void *closure)
(hypo->bind_type == PGF_BIND_TYPE_EXPLICIT) ? Py_True
: Py_False;
PyObject* py_var = gu2py_string(hypo->cid);
PyObject* py_var = PyString_FromString(hypo->cid);
if (py_var == NULL)
goto fail;
@@ -752,7 +730,7 @@ fail:
static PyObject*
Type_getCat(TypeObject *self, void *closure)
{
return gu2py_string(self->type->cid);
return PyString_FromString(self->type->cid);
}
static PyObject*
@@ -920,8 +898,8 @@ Iter_fetch_token(IterObject* self)
if (tp == NULL)
return NULL;
PyObject* py_tok = gu2py_string(tp->tok);
PyObject* py_cat = gu2py_string(tp->cat);
PyObject* py_tok = PyString_FromString(tp->tok);
PyObject* py_cat = PyString_FromString(tp->cat);
PyObject* res = Py_BuildValue("(f,O,O)", tp->prob, py_tok, py_cat);
Py_DECREF(py_tok);
@@ -1061,16 +1039,11 @@ Concr_init(ConcrObject *self, PyObject *args, PyObject *kwds)
static PyObject*
Concr_printName(ConcrObject* self, PyObject *args)
{
const char *name_s;
if (!PyArg_ParseTuple(args, "s", &name_s))
GuString name;
if (!PyArg_ParseTuple(args, "s", &name))
return NULL;
GuPool *tmp_pool = gu_local_pool();
GuString name = gu_str_string(name_s, tmp_pool);
PyObject* pyname = gu2py_string(pgf_print_name(self->concr, name));
gu_pool_free(tmp_pool);
return pyname;
return PyString_FromString(pgf_print_name(self->concr, name));
}
typedef struct {
@@ -1085,7 +1058,7 @@ static PgfToken
pypgf_python_lexer_read_token(PgfLexer *base, GuExn* err)
{
PgfPythonLexer* lexer = (PgfPythonLexer*) base;
lexer->base.tok = gu_empty_string;
lexer->base.tok = "";
PyObject* item = PyIter_Next(lexer->pylexer);
if (item == NULL)
@@ -1098,7 +1071,7 @@ pypgf_python_lexer_read_token(PgfLexer *base, GuExn* err)
if (str == NULL)
gu_raise(err, PyPgfLexerExn);
else
lexer->base.tok = gu_str_string(str, lexer->pool);
lexer->base.tok = gu_string_copy(str, lexer->pool);
}
return lexer->base.tok;
@@ -1109,7 +1082,7 @@ pypgf_new_python_lexer(PyObject* pylexer, GuPool* pool)
{
PgfPythonLexer* lexer = gu_new(PgfPythonLexer, pool);
lexer->base.read_token = pypgf_python_lexer_read_token;
lexer->base.tok = gu_empty_string;
lexer->base.tok = "";
lexer->pylexer = pylexer;
lexer->pool = pool;
return ((PgfLexer*) lexer);
@@ -1146,11 +1119,11 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
int len;
const uint8_t *buf = NULL;
PyObject* py_lexer = NULL;
const char *catname_s = NULL;
PgfCId catname = pgf_start_cat(self->grammar->pgf);
int max_count = -1;
double heuristics = -1;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#Osid", kwlist,
&buf, &len, &py_lexer, &catname_s, &max_count, &heuristics))
&buf, &len, &py_lexer, &catname, &max_count, &heuristics))
return NULL;
if ((buf == NULL && py_lexer == NULL) ||
@@ -1187,10 +1160,6 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
pyres->counter = 0;
pyres->fetch = Iter_fetch_expr;
GuString catname =
(catname_s == NULL) ? pgf_start_cat(self->grammar->pgf, pyres->pool)
: gu_str_string(catname_s, pyres->pool);
PgfLexer *lexer = NULL;
if (buf != NULL) {
GuIn* in = gu_data_in(buf, len, pyres->pool);
@@ -1208,10 +1177,10 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
PgfToken tok =
pgf_lexer_current_token(lexer);
if (gu_string_eq(tok, gu_empty_string))
if (*tok == 0)
PyErr_SetString(PGFError, "The sentence cannot be parsed");
else {
PyObject* py_tok = gu2py_string(tok);
PyObject* py_tok = PyString_FromString(tok);
PyObject_SetAttrString(ParseError, "token", py_tok);
PyErr_Format(ParseError, "Unexpected token: \"%s\"",
PyString_AsString(py_tok));
@@ -1236,12 +1205,12 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
int len;
const uint8_t *buf = NULL;
PyObject* py_lexer = NULL;
const char *catname_s = NULL;
const char *prefix_s = NULL;
GuString catname = pgf_start_cat(self->grammar->pgf);
GuString prefix = "";
int max_count = -1;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#Ossi", kwlist,
&buf, &len, &py_lexer, &catname_s,
&prefix_s, &max_count))
&buf, &len, &py_lexer, &catname,
&prefix, &max_count))
return NULL;
if ((buf == NULL && py_lexer == NULL) ||
@@ -1276,14 +1245,6 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
GuPool *tmp_pool = gu_local_pool();
GuString catname =
(catname_s == NULL) ? pgf_start_cat(self->grammar->pgf, tmp_pool)
: gu_str_string(catname_s, tmp_pool);
GuString prefix =
(prefix_s == NULL) ? gu_empty_string
: gu_str_string(prefix_s, pyres->pool);
PgfLexer *lexer = NULL;
if (buf != NULL) {
GuIn* in = gu_data_in(buf, len, tmp_pool);
@@ -1303,10 +1264,10 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
PgfToken tok =
pgf_lexer_current_token(lexer);
if (gu_string_eq(tok, gu_empty_string))
if (*tok == 0)
PyErr_SetString(PGFError, "The sentence cannot be parsed");
else {
PyObject* py_tok = gu2py_string(tok);
PyObject* py_tok = PyString_FromString(tok);
PyObject_SetAttrString(ParseError, "token", py_tok);
PyErr_Format(ParseError, "Unexpected token: \"%s\"",
PyString_AsString(py_tok));
@@ -1323,14 +1284,12 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
static PyObject*
Concr_parseval(ConcrObject* self, PyObject *args) {
ExprObject* pyexpr = NULL;
const char* s_cat = NULL;
if (!PyArg_ParseTuple(args, "O!s", &pgf_ExprType, &pyexpr, &s_cat))
PgfCId cat = "";
if (!PyArg_ParseTuple(args, "O!s", &pgf_ExprType, &pyexpr, &cat))
return NULL;
GuPool* tmp_pool = gu_local_pool();
PgfCId cat = gu_str_string(s_cat, tmp_pool);
double precision = 0;
double recall = 0;
double exact = 0;
@@ -1387,7 +1346,7 @@ Concr_linearize(ConcrObject* self, PyObject *args)
}
GuString str = gu_string_buf_freeze(sbuf, tmp_pool);
PyObject* pystr = gu2py_string(str);
PyObject* pystr = PyString_FromString(str);
gu_pool_free(tmp_pool);
return pystr;
@@ -1526,7 +1485,7 @@ pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
{
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
PyObject* str = gu2py_string(tok);
PyObject* str = PyString_FromString(tok);
PyList_Append(state->list, str);
Py_DECREF(str);
}
@@ -1540,7 +1499,7 @@ pgf_bracket_lzn_expr_literal(PgfLinFuncs** funcs, PgfLiteral lit)
switch (i.tag) {
case PGF_LITERAL_STR: {
PgfLiteralStr* lstr = i.data;
PyObject* str = gu2py_string(lstr->val);
PyObject* str = PyString_FromString(lstr->val);
PyList_Append(state->list, str);
Py_DECREF(str);
break;
@@ -1584,10 +1543,10 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lindex,
BracketObject* bracket = (BracketObject *)
pgf_BracketType.tp_alloc(&pgf_BracketType, 0);
if (bracket != NULL) {
bracket->cat = gu2py_string(cat);
bracket->cat = PyString_FromString(cat);
bracket->fid = fid;
bracket->lindex = lindex;
bracket->fun = gu2py_string(fun);
bracket->fun = PyString_FromString(fun);
bracket->children = state->list;
PyList_Append(parent, (PyObject*) bracket);
Py_DECREF(bracket);
@@ -1652,13 +1611,13 @@ Concr_bracketedLinearize(ConcrObject* self, PyObject *args)
static PyObject*
Concr_getName(ConcrObject *self, void *closure)
{
return gu2py_string(pgf_concrete_name(self->concr));
return PyString_FromString(pgf_concrete_name(self->concr));
}
static PyObject*
Concr_getLanguageCode(ConcrObject *self, void *closure)
{
return gu2py_string(pgf_language_code(self->concr));
return PyString_FromString(pgf_language_code(self->concr));
}
static PyObject*
@@ -1679,7 +1638,7 @@ Concr_graphvizParseTree(ConcrObject* self, PyObject *args) {
}
GuString str = gu_string_buf_freeze(sbuf, tmp_pool);
PyObject* pystr = gu2py_string(str);
PyObject* pystr = PyString_FromString(str);
gu_pool_free(tmp_pool);
return pystr;
@@ -1697,8 +1656,8 @@ pypgf_collect_morpho(PgfMorphoCallback* self,
{
PyMorphoCallback* callback = (PyMorphoCallback*) self;
PyObject* py_lemma = gu2py_string(lemma);
PyObject* py_analysis = gu2py_string(analysis);
PyObject* py_lemma = PyString_FromString(lemma);
PyObject* py_analysis = PyString_FromString(analysis);
PyObject* res =
Py_BuildValue("OOf", py_lemma, py_analysis, prob);
@@ -1780,7 +1739,7 @@ Iter_fetch_fullform(IterObject* self)
GuString tokens =
pgf_fullform_get_string(entry);
py_tokens = gu2py_string(tokens);
py_tokens = PyString_FromString(tokens);
if (py_tokens == NULL)
goto done;
@@ -1936,7 +1895,7 @@ PGF_dealloc(PGFObject* self)
static PyObject*
PGF_getAbstractName(PGFObject *self, void *closure)
{
return gu2py_string(pgf_abstract_name(self->pgf));
return PyString_FromString(pgf_abstract_name(self->pgf));
}
typedef struct {
@@ -1948,14 +1907,14 @@ typedef struct {
static void
pgf_collect_langs(GuMapItor* fn, const void* key, void* value, GuExn* err)
{
PgfCId name = *((PgfCId*) key);
PgfCId name = (PgfCId) key;
PgfConcr* concr = *((PgfConcr**) value);
PyPGFClosure* clo = (PyPGFClosure*) fn;
PyObject* py_name = NULL;
PyObject* py_lang = NULL;
py_name = gu2py_string(name);
py_name = PyString_FromString(name);
if (py_name == NULL) {
gu_raise(err, PgfExn);
goto end;
@@ -2012,12 +1971,12 @@ PGF_getLanguages(PGFObject *self, void *closure)
static void
pgf_collect_cats(GuMapItor* fn, const void* key, void* value, GuExn* err)
{
PgfCId name = *((PgfCId*) key);
PgfCId name = (PgfCId) key;
PyPGFClosure* clo = (PyPGFClosure*) fn;
PyObject* py_name = NULL;
py_name = gu2py_string(name);
py_name = PyString_FromString(name);
if (py_name == NULL) {
gu_raise(err, PgfExn);
goto end;
@@ -2059,21 +2018,18 @@ PGF_getCategories(PGFObject *self, void *closure)
static PyObject*
PGF_getStartCat(PGFObject *self, void *closure)
{
GuPool* tmp_pool = gu_local_pool();
PyObject* pyname = gu2py_string(pgf_start_cat(self->pgf, tmp_pool));
gu_pool_free(tmp_pool);
return pyname;
return PyString_FromString(pgf_start_cat(self->pgf));
}
static void
pgf_collect_funs(GuMapItor* fn, const void* key, void* value, GuExn* err)
{
PgfCId name = *((PgfCId*) key);
PgfCId name = (PgfCId) key;
PyPGFClosure* clo = (PyPGFClosure*) fn;
PyObject* py_name = NULL;
py_name = gu2py_string(name);
py_name = PyString_FromString(name);
if (py_name == NULL) {
gu_raise(err, PgfExn);
goto end;
@@ -2115,19 +2071,17 @@ PGF_getFunctions(PGFObject *self, void *closure)
static PyObject*
PGF_functionsByCat(PGFObject* self, PyObject *args)
{
const char *catname_s;
if (!PyArg_ParseTuple(args, "s", &catname_s))
PgfCId catname;
if (!PyArg_ParseTuple(args, "s", &catname))
return NULL;
GuPool *tmp_pool = gu_local_pool();
GuString catname = gu_str_string(catname_s, tmp_pool);
PyObject* functions = PyList_New(0);
if (functions == NULL) {
gu_pool_free(tmp_pool);
return NULL;
}
GuPool *tmp_pool = gu_local_pool();
// Create an exception frame that catches all errors.
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
@@ -2146,20 +2100,14 @@ PGF_functionsByCat(PGFObject* self, PyObject *args)
static TypeObject*
PGF_functionType(PGFObject* self, PyObject *args)
{
const char *funname_s;
if (!PyArg_ParseTuple(args, "s", &funname_s))
PgfCId funname;
if (!PyArg_ParseTuple(args, "s", &funname))
return NULL;
GuPool *tmp_pool = gu_local_pool();
GuString funname = gu_str_string(funname_s, tmp_pool);
PgfType* type =
pgf_function_type(self->pgf, funname);
gu_pool_free(tmp_pool);
if (type == NULL) {
PyErr_Format(PyExc_KeyError, "Function '%s' is not defined", funname_s);
PyErr_Format(PyExc_KeyError, "Function '%s' is not defined", funname);
return NULL;
}
@@ -2179,10 +2127,10 @@ PGF_generateAll(PGFObject* self, PyObject *args, PyObject *keywds)
{
static char *kwlist[] = {"cat", "n", NULL};
const char *catname_s;
PgfCId catname;
int max_count = -1;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|i", kwlist,
&catname_s, &max_count))
&catname, &max_count))
return NULL;
IterObject* pyres = (IterObject*)
@@ -2201,7 +2149,6 @@ PGF_generateAll(PGFObject* self, PyObject *args, PyObject *keywds)
pyres->container = (PyObject*) pyres;
GuPool *tmp_pool = gu_local_pool();
GuString catname = gu_str_string(catname_s, tmp_pool);
pyres->res =
pgf_generate_all(self->pgf, catname, pyres->pool);
@@ -2245,7 +2192,7 @@ PGF_graphvizAbstractTree(PGFObject* self, PyObject *args) {
}
GuString str = gu_string_buf_freeze(sbuf, tmp_pool);
PyObject* pystr = gu2py_string(str);
PyObject* pystr = PyString_FromString(str);
gu_pool_free(tmp_pool);
return pystr;