GuString is now an ordinary C string - it makes live easier. In addition PgfSymbolKS, PgfExprFun and PgfLiteralStr now keep their strings as embedded flexible arrays. The latest change gives us the same compactness as the old representation but it is a lot easier to use.

This commit is contained in:
kr.angelov
2013-10-04 12:04:39 +00:00
parent 27091048ce
commit e8335806af
25 changed files with 412 additions and 779 deletions

View File

@@ -4,11 +4,13 @@
#include <gu/map.h>
#include <gu/assert.h>
#include <gu/prime.h>
#include <gu/string.h>
typedef enum {
GU_MAP_GENERIC,
GU_MAP_ADDR,
GU_MAP_WORD
GU_MAP_WORD,
GU_MAP_STRING
} GuMapKind;
typedef struct GuMapData GuMapData;
@@ -66,6 +68,9 @@ gu_map_entry_is_free(GuMap* map, GuMapData* data, size_t idx)
} else if (map->kind == GU_MAP_WORD) {
GuWord key = ((GuWord*)data->keys)[idx];
return key == 0;
} else if (map->kind == GU_MAP_STRING) {
GuString key = ((GuString*)data->keys)[idx];
return key == NULL;
}
gu_assert(map->kind == GU_MAP_GENERIC);
const void* key = &data->keys[idx * map->key_size];
@@ -137,6 +142,27 @@ gu_map_lookup(GuMap* map, const void* key, size_t* idx_out)
gu_impossible();
break;
}
case GU_MAP_STRING: {
GuHasher* hasher = map->hasher;
GuEquality* eq = (GuEquality*) hasher;
GuHash hash = hasher->hash(hasher, key);
size_t idx = hash % n;
size_t offset = (hash % (n - 2)) + 1;
while (true) {
GuString entry_key =
((GuString*)map->data.keys)[idx];
if (entry_key == NULL && map->data.zero_idx != idx) {
*idx_out = idx;
return false;
} else if (eq->is_equal(eq, key, entry_key)) {
*idx_out = idx;
return true;
}
idx = (idx + offset) % n;
}
gu_impossible();
break;
}
default:
gu_impossible();
}
@@ -179,6 +205,11 @@ gu_map_resize(GuMap* map)
((const void**)data->keys)[i] = NULL;
}
break;
case GU_MAP_STRING:
for (size_t i = 0; i < data->n_entries; i++) {
((GuString*)data->keys)[i] = NULL;
}
break;
default:
gu_impossible();
}
@@ -195,6 +226,8 @@ gu_map_resize(GuMap* map)
void* old_key = &old_data.keys[i * key_size];
if (map->kind == GU_MAP_ADDR) {
old_key = *(void**)old_key;
} else if (map->kind == GU_MAP_STRING) {
old_key = (void*) *(GuString*)old_key;
}
void* old_value = &old_data.values[i * value_size];
@@ -268,6 +301,8 @@ gu_map_insert(GuMap* map, const void* key)
}
if (map->kind == GU_MAP_ADDR) {
((const void**)map->data.keys)[idx] = key;
} else if (map->kind == GU_MAP_STRING) {
((GuString*)map->data.keys)[idx] = key;
} else {
memcpy(&map->data.keys[idx * map->key_size],
key, map->key_size);
@@ -296,6 +331,8 @@ gu_map_iter(GuMap* map, GuMapItor* itor, GuExn* err)
void* value = &map->data.values[i * map->value_size];
if (map->kind == GU_MAP_ADDR) {
key = *(const void* const*) key;
} else if (map->kind == GU_MAP_STRING) {
key = *(GuString*) key;
}
itor->fn(itor, key, value, err);
}
@@ -323,8 +360,10 @@ gu_map_enum_next(GuEnum* self, void* to, GuPool* pool)
en->x.value = &en->ht->data.values[i * en->ht->value_size];
if (en->ht->kind == GU_MAP_ADDR) {
en->x.key = *(const void* const*) en->x.key;
} else if (en->ht->kind == GU_MAP_STRING) {
en->x.key = *(GuString*) en->x.key;
}
*((GuMapKeyValue**) to) = &en->x;
break;
}
@@ -365,10 +404,12 @@ gu_make_map(size_t key_size, GuHasher* hasher,
GuMapKind kind =
((!hasher || hasher == gu_addr_hasher)
? GU_MAP_ADDR
: (hasher == gu_string_hasher)
? GU_MAP_STRING
: (key_size == sizeof(GuWord) && hasher == gu_word_hasher)
? GU_MAP_WORD
: GU_MAP_GENERIC);
if (kind == GU_MAP_ADDR) {
if (kind == GU_MAP_ADDR || kind == GU_MAP_STRING) {
key_size = sizeof(GuWord);
}
GuMapData data = {

View File

@@ -427,13 +427,3 @@ gu_buf_out(GuBuf* buf, GuPool* pool)
GU_DEFINE_KIND(GuSeq, GuOpaque);
GU_DEFINE_KIND(GuBuf, abstract);
char*
gu_char_buf_str(GuCharBuf* chars, GuPool* pool)
{
size_t len = gu_buf_length(chars);
char* data = gu_buf_data(chars);
char* str = gu_new_str(len, pool);
memcpy(str, data, len);
return str;
}

View File

@@ -135,13 +135,6 @@ gu_buf_heapify(GuBuf *buf, GuOrder *order);
GuSeq*
gu_buf_freeze(GuBuf* buf, GuPool* pool);
typedef GuBuf GuCharBuf;
typedef GuBuf GuByteBuf;
char*
gu_char_buf_str(GuCharBuf* chars, GuPool* pool);
#endif // GU_SEQ_H_
#if defined(GU_OUT_H_) && !defined(GU_SEQ_H_OUT_)

View File

@@ -7,21 +7,17 @@
#include <gu/assert.h>
#include <stdlib.h>
const GuString gu_empty_string = { 1 };
struct GuStringBuf {
GuByteBuf* bbuf;
GuBuf* buf;
GuOut* out;
};
GuStringBuf*
gu_string_buf(GuPool* pool)
{
GuBuf* buf = gu_new_buf(uint8_t, pool);
GuOut* out = gu_buf_out(buf, pool);
GuStringBuf* sbuf = gu_new(GuStringBuf, pool);
sbuf->bbuf = buf;
sbuf->out = out;
sbuf->buf = gu_new_buf(char, pool);
sbuf->out = gu_buf_out(sbuf->buf, pool);
return sbuf;
}
@@ -31,176 +27,64 @@ gu_string_buf_out(GuStringBuf* sb)
return sb->out;
}
static GuString
gu_utf8_string(const uint8_t* buf, size_t sz, GuPool* pool)
{
if (sz < GU_MIN(sizeof(GuWord), 128)) {
GuWord w = 0;
for (size_t n = 0; n < sz; n++) {
w = w << 8 | buf[n];
}
w = w << 8 | (sz << 1) | 1;
return (GuString) { w };
}
uint8_t* p = NULL;
if (sz < 256) {
p = gu_malloc_aligned(pool, 1 + sz, 2);
p[0] = (uint8_t) sz;
} else {
p = gu_malloc_prefixed(pool, gu_alignof(size_t),
sizeof(size_t), 1, 1 + sz);
((size_t*) p)[-1] = sz;
p[0] = 0;
}
memcpy(&p[1], buf, sz);
return (GuString) { (GuWord) (void*) p };
}
GuString
gu_string_buf_freeze(GuStringBuf* sb, GuPool* pool)
{
gu_out_flush(sb->out, NULL);
uint8_t* data = gu_buf_data(sb->bbuf);
size_t len = gu_buf_length(sb->bbuf);
return gu_utf8_string(data, len, pool);
char* data = gu_buf_data(sb->buf);
size_t len = gu_buf_length(sb->buf);
char* p = gu_malloc_aligned(pool, len+1, 2);
memcpy(p, data, len);
p[len] = 0;
return p;
}
GuIn*
gu_string_in(GuString s, GuPool* pool)
{
GuWord w = s.w_;
uint8_t* buf = NULL;
size_t len = 0;
if (w & 1) {
len = (w & 0xff) >> 1;
buf = gu_new_n(uint8_t, len, pool);
for (int i = len - 1; i >= 0; i--) {
w >>= 8;
buf[i] = w & 0xff;
}
} else {
uint8_t* p = (void*) w;
len = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
buf = &p[1];
}
return gu_data_in(buf, len, pool);
}
static bool
gu_string_is_long(GuString s)
{
return !(s.w_ & 1);
}
bool
gu_string_is_stable(GuString s)
{
return !gu_string_is_long(s);
}
static size_t
gu_string_long_length(GuString s)
{
gu_assert(gu_string_is_long(s));
uint8_t* p = (void*) s.w_;
uint8_t len = p[0];
if (len > 0) {
return len;
}
return ((size_t*) p)[-1];
}
size_t
gu_string_length(GuString s)
{
if (gu_string_is_long(s)) {
return gu_string_long_length(s);
}
return (s.w_ & 0xff) >> 1;
}
static uint8_t*
gu_string_long_data(GuString s)
{
gu_require(gu_string_is_long(s));
uint8_t* p = (void*) s.w_;
return &p[1];
return gu_data_in((uint8_t*) s, strlen(s), pool);
}
GuString
gu_string_copy(GuString string, GuPool* pool)
{
if (gu_string_is_long(string)) {
uint8_t* data = gu_string_long_data(string);
size_t len = gu_string_long_length(string);
return gu_utf8_string(data, len, pool);
} else {
return string;
}
size_t len = strlen(string);
char* p = gu_malloc_aligned(pool, len+1, 2);
memcpy(p, string, len+1);
return p;
}
void
gu_string_write(GuString s, GuOut* out, GuExn* err)
{
GuWord w = s.w_;
uint8_t buf[sizeof(GuWord)];
uint8_t* src;
size_t sz;
if (w & 1) {
sz = (w & 0xff) >> 1;
gu_assert(sz <= sizeof(GuWord));
size_t i = sz;
while (i > 0) {
w >>= 8;
buf[--i] = w & 0xff;
}
src = buf;
} else {
uint8_t* p = (void*) w;
sz = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
src = &p[1];
}
gu_out_bytes(out, src, sz, err);
gu_out_bytes(out, (uint8_t*) s, strlen(s), err);
}
GuString
gu_string_read(size_t len, GuPool* pool, GuIn* in, GuExn* err)
{
uint8_t* buf = alloca(len*4);
uint8_t* p = buf;
char* buf = alloca(len*6+1);
char* p = buf;
for (size_t i = 0; i < len; i++) {
gu_in_utf8_buf(&p, in, err);
gu_in_utf8_buf((uint8_t**) &p, in, err);
}
return gu_utf8_string(buf, p-buf, pool);
*p++ = 0;
p = gu_malloc_aligned(pool, p-buf, 2);
strcpy(p, buf);
return p;
}
GuString
gu_string_read_latin1(size_t len, GuPool* pool, GuIn* in, GuExn* err)
{
if (len < GU_MIN(sizeof(GuWord), 128)) {
GuWord w = 0;
for (size_t n = 0; n < len; n++) {
w = w << 8 | gu_in_u8(in, err);
}
w = w << 8 | (len << 1) | 1;
return (GuString) { w };
}
uint8_t* p = NULL;
if (len < 256) {
p = gu_malloc_aligned(pool, 1 + len, 2);
p[0] = (uint8_t) len;
} else {
p = gu_malloc_prefixed(pool, gu_alignof(size_t),
sizeof(size_t), 1, 1 + len);
((size_t*) p)[-1] = len;
p[0] = 0;
}
gu_in_bytes(in, &p[1], len, err);
return (GuString) { (GuWord) (void*) p };
char* p = gu_malloc_aligned(pool, len+1, 2);
gu_in_bytes(in, (uint8_t*)p, len, err);
p[len] = 0;
return p;
}
GuString
@@ -226,52 +110,24 @@ gu_format_string(GuPool* pool, const char* fmt, ...)
return s;
}
GuString
gu_str_string(const char* str, GuPool* pool)
{
return gu_utf8_string((const uint8_t*) str, strlen(str), pool);
}
bool
gu_string_to_int(GuString s, int *res)
{
GuWord w = s.w_;
uint8_t buf[sizeof(GuWord)];
char* src;
size_t sz;
if (w & 1) {
sz = (w & 0xff) >> 1;
gu_assert(sz <= sizeof(GuWord));
size_t i = sz;
while (i > 0) {
w >>= 8;
buf[--i] = w & 0xff;
}
src = (char*) buf;
} else {
uint8_t* p = (void*) w;
sz = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
src = (char*) &p[1];
}
size_t i = 0;
bool neg = false;
if (src[i] == '-') {
if (*s == '-') {
neg = true;
i++;
s++;
}
if (i >= sz)
if (*s == 0)
return false;
int n = 0;
for (; i < sz; i++) {
if (src[i] < '0' || src[i] > '9')
for (; *s; s++) {
if (*s < '0' || *s > '9')
return false;
n = n * 10 + (src[i] - '0');
n = n * 10 + (*s - '0');
}
*res = neg ? -n : n;
@@ -281,54 +137,33 @@ gu_string_to_int(GuString s, int *res)
bool
gu_string_to_double(GuString s, double *res)
{
GuWord w = s.w_;
uint8_t buf[sizeof(GuWord)];
char* src;
size_t sz;
if (w & 1) {
sz = (w & 0xff) >> 1;
gu_assert(sz <= sizeof(GuWord));
size_t i = sz;
while (i > 0) {
w >>= 8;
buf[--i] = w & 0xff;
}
src = (char*) buf;
} else {
uint8_t* p = (void*) w;
sz = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
src = (char*) &p[1];
}
size_t i = 0;
bool neg = false;
bool dec = false;
double exp = 1;
if (src[i] == '-') {
if (*s == '-') {
neg = true;
i++;
s++;
}
if (i >= sz)
if (*s == 0)
return false;
double d = 0;
for (; i < sz; i++) {
if (src[i] == '.') {
for (; *s; s++) {
if (*s == '.') {
if (dec) return false;
dec = true;
continue;
}
if (src[i] < '0' || src[i] > '9')
if (*s < '0' || *s > '9')
return false;
if (dec) exp = exp * 10;
d = d * 10 + (src[i] - '0');
d = d * 10 + (*s - '0');
}
*res = (neg ? -d : d) / exp;
@@ -338,54 +173,18 @@ gu_string_to_double(GuString s, double *res)
bool
gu_string_is_prefix(GuString s1, GuString s2)
{
GuWord w1 = s1.w_;
uint8_t buf1[sizeof(GuWord)];
size_t sz1;
char* str1;
if (w1 & 1) {
sz1 = (w1 & 0xff) >> 1;
gu_assert(sz1 <= sizeof(GuWord));
size_t i = sz1;
while (i > 0) {
w1 >>= 8;
buf1[--i] = w1 & 0xff;
}
str1 = (char*) buf1;
} else {
uint8_t* p = (void*) w1;
sz1 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
str1 = (char*) &p[1];
}
size_t len1 = strlen(s1);
size_t len2 = strlen(s2);
GuWord w2 = s2.w_;
uint8_t buf2[sizeof(GuWord)];
size_t sz2;
char* str2;
if (w2 & 1) {
sz2 = (w2 & 0xff) >> 1;
gu_assert(sz2 <= sizeof(GuWord));
size_t i = sz2;
while (i > 0) {
w2 >>= 8;
buf2[--i] = w2 & 0xff;
}
str2 = (char*) buf2;
} else {
uint8_t* p = (void*) w2;
sz2 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
str2 = (char*) &p[1];
}
if (sz1 > sz2)
if (len1 > len2)
return false;
for (size_t sz = sz1; sz--; sz > 0) {
if (*str1 != *str2)
for (size_t len = len1; len--; len > 0) {
if (*s1 != *s2)
return false;
str1++;
str2++;
s1++;
s2++;
}
return true;
@@ -394,108 +193,23 @@ gu_string_is_prefix(GuString s1, GuString s2)
GuHash
gu_string_hash(GuHash h, GuString s)
{
if (s.w_ & 1) {
return h*101 + s.w_;
}
size_t len = gu_string_length(s);
uint8_t* data = gu_string_long_data(s);
return gu_hash_bytes(h, data, len);
}
bool
gu_string_eq(GuString s1, GuString s2)
{
if (s1.w_ == s2.w_) {
return true;
} else if (gu_string_is_long(s1) && gu_string_is_long(s2)) {
size_t len1 = gu_string_long_length(s1);
size_t len2 = gu_string_long_length(s2);
if (len1 != len2) {
return false;
}
uint8_t* data1 = gu_string_long_data(s1);
uint8_t* data2 = gu_string_long_data(s2);
return (memcmp(data1, data2, len1) == 0);
}
return false;
return gu_hash_bytes(h, (uint8_t*)s, strlen(s));
}
static bool
gu_string_eq_fn(GuEquality* self, const void* p1, const void* p2)
{
(void) self;
const GuString* sp1 = p1;
const GuString* sp2 = p2;
return gu_string_eq(*sp1, *sp2);
return strcmp((GuString) p1, (GuString) p2) == 0;
}
GuEquality gu_string_equality[1] = { { gu_string_eq_fn } };
int
gu_string_cmp(GuString s1, GuString s2)
{
uint8_t buf1[sizeof(GuWord)];
char* src1;
size_t sz1;
if (s1.w_ & 1) {
sz1 = (s1.w_ & 0xff) >> 1;
gu_assert(sz1 <= sizeof(GuWord));
size_t i = sz1;
while (i > 0) {
s1.w_ >>= 8;
buf1[--i] = s1.w_ & 0xff;
}
src1 = (char*) buf1;
} else {
uint8_t* p = (void*) s1.w_;
sz1 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
src1 = (char*) &p[1];
}
uint8_t buf2[sizeof(GuWord)];
char* src2;
size_t sz2;
if (s2.w_ & 1) {
sz2 = (s2.w_ & 0xff) >> 1;
gu_assert(sz2 <= sizeof(GuWord));
size_t i = sz2;
while (i > 0) {
s2.w_ >>= 8;
buf2[--i] = s2.w_ & 0xff;
}
src2 = (char*) buf2;
} else {
uint8_t* p = (void*) s2.w_;
sz2 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
src2 = (char*) &p[1];
}
for (size_t i = 0; ; i++) {
if (sz1 == i && i == sz2)
break;
if (sz1 <= i)
return -1;
if (i >= sz2)
return 1;
if (src1[i] > src2[i])
return 1;
else if (src1[i] < src2[i])
return -1;
}
return 0;
}
static int
gu_string_cmp_fn(GuOrder* self, const void* p1, const void* p2)
{
(void) self;
const GuString* sp1 = p1;
const GuString* sp2 = p2;
return gu_string_cmp(*sp1, *sp2);
return strcmp((GuString) p1, (GuString) p2);
}
GuOrder gu_string_order[1] = { { gu_string_cmp_fn } };
@@ -504,8 +218,7 @@ static GuHash
gu_string_hasher_hash(GuHasher* self, const void* p)
{
(void) self;
const GuString* sp = p;
return gu_string_hash(0, *sp);
return gu_string_hash(0, (GuString) p);
}
GuHasher gu_string_hasher[1] = {
@@ -516,5 +229,5 @@ GuHasher gu_string_hasher[1] = {
};
GU_DEFINE_TYPE(GuString, GuOpaque, _);
GU_DEFINE_KIND(GuString, pointer);
GU_DEFINE_KIND(GuStringMap, GuMap);

View File

@@ -1,22 +1,3 @@
/*
* Copyright 2011 University of Helsinki.
*
* This file is part of libgu.
*
* Libgu is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation, either version 3 of the License, or (at your
* option) any later version.
*
* Libgu is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with libgu. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef GU_STRING_H_
#define GU_STRING_H_
@@ -24,9 +5,7 @@
#include <gu/in.h>
#include <gu/out.h>
typedef GuOpaque() GuString;
extern const GuString gu_empty_string;
typedef const char* GuString;
GuString
gu_string_copy(GuString string, GuPool* pool);
@@ -43,12 +22,6 @@ gu_string_read_latin1(size_t len, GuPool* pool, GuIn* in, GuExn* err);
GuIn*
gu_string_in(GuString string, GuPool* pool);
bool
gu_string_is_stable(GuString string);
GuString
gu_ucs_string(const GuUCS* ubuf, size_t len, GuPool* pool);
typedef struct GuStringBuf GuStringBuf;
GuStringBuf*
@@ -66,34 +39,21 @@ gu_format_string_v(const char* fmt, va_list args, GuPool* pool);
GuString
gu_format_string(GuPool* pool, const char* fmt, ...);
GuString
gu_str_string(const char* str, GuPool* pool);
bool
gu_string_to_int(GuString s, int *res);
bool
gu_string_to_double(GuString s, double *res);
bool
gu_string_is_prefix(GuString s1, GuString s2);
size_t
gu_string_length(GuString s);
#endif // GU_STRING_H_
#if defined(GU_FUN_H_) && !defined(GU_STRING_H_FUN_)
#define GU_STRING_H_FUN_
bool
gu_string_eq(GuString s1, GuString s2);
extern GuEquality gu_string_equality[1];
int
gu_string_cmp(GuString s1, GuString s2);
extern GuOrder gu_string_order[1];
#endif
@@ -110,7 +70,7 @@ extern GuHasher gu_string_hasher[1];
# ifndef GU_STRING_H_TYPE_
# define GU_STRING_H_TYPE_
extern GU_DECLARE_TYPE(GuString, GuOpaque);
extern GU_DECLARE_KIND(GuString);
# endif
# if defined(GU_MAP_H_TYPE_) && !defined(GU_STRING_H_MAP_TYPE_)
@@ -132,8 +92,6 @@ typedef GuType_GuMap GuType_GuStringMap;
#define GU_STRING_H_SEQ_
typedef GuSeq GuStrings;
// typedef GuBuf GuStringBuf;
#endif