1
0
forked from GitHub/gf-core

GuString is now an ordinary C string - it makes live easier. In addition PgfSymbolKS, PgfExprFun and PgfLiteralStr now keep their strings as embedded flexible arrays. The latest change gives us the same compactness as the old representation but it is a lot easier to use.

This commit is contained in:
kr.angelov
2013-10-04 12:04:39 +00:00
parent 27091048ce
commit e8335806af
25 changed files with 412 additions and 779 deletions

View File

@@ -7,21 +7,17 @@
#include <gu/assert.h>
#include <stdlib.h>
const GuString gu_empty_string = { 1 };
struct GuStringBuf {
GuByteBuf* bbuf;
GuBuf* buf;
GuOut* out;
};
GuStringBuf*
gu_string_buf(GuPool* pool)
{
GuBuf* buf = gu_new_buf(uint8_t, pool);
GuOut* out = gu_buf_out(buf, pool);
GuStringBuf* sbuf = gu_new(GuStringBuf, pool);
sbuf->bbuf = buf;
sbuf->out = out;
sbuf->buf = gu_new_buf(char, pool);
sbuf->out = gu_buf_out(sbuf->buf, pool);
return sbuf;
}
@@ -31,176 +27,64 @@ gu_string_buf_out(GuStringBuf* sb)
return sb->out;
}
static GuString
gu_utf8_string(const uint8_t* buf, size_t sz, GuPool* pool)
{
if (sz < GU_MIN(sizeof(GuWord), 128)) {
GuWord w = 0;
for (size_t n = 0; n < sz; n++) {
w = w << 8 | buf[n];
}
w = w << 8 | (sz << 1) | 1;
return (GuString) { w };
}
uint8_t* p = NULL;
if (sz < 256) {
p = gu_malloc_aligned(pool, 1 + sz, 2);
p[0] = (uint8_t) sz;
} else {
p = gu_malloc_prefixed(pool, gu_alignof(size_t),
sizeof(size_t), 1, 1 + sz);
((size_t*) p)[-1] = sz;
p[0] = 0;
}
memcpy(&p[1], buf, sz);
return (GuString) { (GuWord) (void*) p };
}
GuString
gu_string_buf_freeze(GuStringBuf* sb, GuPool* pool)
{
gu_out_flush(sb->out, NULL);
uint8_t* data = gu_buf_data(sb->bbuf);
size_t len = gu_buf_length(sb->bbuf);
return gu_utf8_string(data, len, pool);
char* data = gu_buf_data(sb->buf);
size_t len = gu_buf_length(sb->buf);
char* p = gu_malloc_aligned(pool, len+1, 2);
memcpy(p, data, len);
p[len] = 0;
return p;
}
GuIn*
gu_string_in(GuString s, GuPool* pool)
{
GuWord w = s.w_;
uint8_t* buf = NULL;
size_t len = 0;
if (w & 1) {
len = (w & 0xff) >> 1;
buf = gu_new_n(uint8_t, len, pool);
for (int i = len - 1; i >= 0; i--) {
w >>= 8;
buf[i] = w & 0xff;
}
} else {
uint8_t* p = (void*) w;
len = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
buf = &p[1];
}
return gu_data_in(buf, len, pool);
}
static bool
gu_string_is_long(GuString s)
{
return !(s.w_ & 1);
}
bool
gu_string_is_stable(GuString s)
{
return !gu_string_is_long(s);
}
static size_t
gu_string_long_length(GuString s)
{
gu_assert(gu_string_is_long(s));
uint8_t* p = (void*) s.w_;
uint8_t len = p[0];
if (len > 0) {
return len;
}
return ((size_t*) p)[-1];
}
size_t
gu_string_length(GuString s)
{
if (gu_string_is_long(s)) {
return gu_string_long_length(s);
}
return (s.w_ & 0xff) >> 1;
}
static uint8_t*
gu_string_long_data(GuString s)
{
gu_require(gu_string_is_long(s));
uint8_t* p = (void*) s.w_;
return &p[1];
return gu_data_in((uint8_t*) s, strlen(s), pool);
}
GuString
gu_string_copy(GuString string, GuPool* pool)
{
if (gu_string_is_long(string)) {
uint8_t* data = gu_string_long_data(string);
size_t len = gu_string_long_length(string);
return gu_utf8_string(data, len, pool);
} else {
return string;
}
size_t len = strlen(string);
char* p = gu_malloc_aligned(pool, len+1, 2);
memcpy(p, string, len+1);
return p;
}
void
gu_string_write(GuString s, GuOut* out, GuExn* err)
{
GuWord w = s.w_;
uint8_t buf[sizeof(GuWord)];
uint8_t* src;
size_t sz;
if (w & 1) {
sz = (w & 0xff) >> 1;
gu_assert(sz <= sizeof(GuWord));
size_t i = sz;
while (i > 0) {
w >>= 8;
buf[--i] = w & 0xff;
}
src = buf;
} else {
uint8_t* p = (void*) w;
sz = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
src = &p[1];
}
gu_out_bytes(out, src, sz, err);
gu_out_bytes(out, (uint8_t*) s, strlen(s), err);
}
GuString
gu_string_read(size_t len, GuPool* pool, GuIn* in, GuExn* err)
{
uint8_t* buf = alloca(len*4);
uint8_t* p = buf;
char* buf = alloca(len*6+1);
char* p = buf;
for (size_t i = 0; i < len; i++) {
gu_in_utf8_buf(&p, in, err);
gu_in_utf8_buf((uint8_t**) &p, in, err);
}
return gu_utf8_string(buf, p-buf, pool);
*p++ = 0;
p = gu_malloc_aligned(pool, p-buf, 2);
strcpy(p, buf);
return p;
}
GuString
gu_string_read_latin1(size_t len, GuPool* pool, GuIn* in, GuExn* err)
{
if (len < GU_MIN(sizeof(GuWord), 128)) {
GuWord w = 0;
for (size_t n = 0; n < len; n++) {
w = w << 8 | gu_in_u8(in, err);
}
w = w << 8 | (len << 1) | 1;
return (GuString) { w };
}
uint8_t* p = NULL;
if (len < 256) {
p = gu_malloc_aligned(pool, 1 + len, 2);
p[0] = (uint8_t) len;
} else {
p = gu_malloc_prefixed(pool, gu_alignof(size_t),
sizeof(size_t), 1, 1 + len);
((size_t*) p)[-1] = len;
p[0] = 0;
}
gu_in_bytes(in, &p[1], len, err);
return (GuString) { (GuWord) (void*) p };
char* p = gu_malloc_aligned(pool, len+1, 2);
gu_in_bytes(in, (uint8_t*)p, len, err);
p[len] = 0;
return p;
}
GuString
@@ -226,52 +110,24 @@ gu_format_string(GuPool* pool, const char* fmt, ...)
return s;
}
GuString
gu_str_string(const char* str, GuPool* pool)
{
return gu_utf8_string((const uint8_t*) str, strlen(str), pool);
}
bool
gu_string_to_int(GuString s, int *res)
{
GuWord w = s.w_;
uint8_t buf[sizeof(GuWord)];
char* src;
size_t sz;
if (w & 1) {
sz = (w & 0xff) >> 1;
gu_assert(sz <= sizeof(GuWord));
size_t i = sz;
while (i > 0) {
w >>= 8;
buf[--i] = w & 0xff;
}
src = (char*) buf;
} else {
uint8_t* p = (void*) w;
sz = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
src = (char*) &p[1];
}
size_t i = 0;
bool neg = false;
if (src[i] == '-') {
if (*s == '-') {
neg = true;
i++;
s++;
}
if (i >= sz)
if (*s == 0)
return false;
int n = 0;
for (; i < sz; i++) {
if (src[i] < '0' || src[i] > '9')
for (; *s; s++) {
if (*s < '0' || *s > '9')
return false;
n = n * 10 + (src[i] - '0');
n = n * 10 + (*s - '0');
}
*res = neg ? -n : n;
@@ -281,54 +137,33 @@ gu_string_to_int(GuString s, int *res)
bool
gu_string_to_double(GuString s, double *res)
{
GuWord w = s.w_;
uint8_t buf[sizeof(GuWord)];
char* src;
size_t sz;
if (w & 1) {
sz = (w & 0xff) >> 1;
gu_assert(sz <= sizeof(GuWord));
size_t i = sz;
while (i > 0) {
w >>= 8;
buf[--i] = w & 0xff;
}
src = (char*) buf;
} else {
uint8_t* p = (void*) w;
sz = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
src = (char*) &p[1];
}
size_t i = 0;
bool neg = false;
bool dec = false;
double exp = 1;
if (src[i] == '-') {
if (*s == '-') {
neg = true;
i++;
s++;
}
if (i >= sz)
if (*s == 0)
return false;
double d = 0;
for (; i < sz; i++) {
if (src[i] == '.') {
for (; *s; s++) {
if (*s == '.') {
if (dec) return false;
dec = true;
continue;
}
if (src[i] < '0' || src[i] > '9')
if (*s < '0' || *s > '9')
return false;
if (dec) exp = exp * 10;
d = d * 10 + (src[i] - '0');
d = d * 10 + (*s - '0');
}
*res = (neg ? -d : d) / exp;
@@ -338,54 +173,18 @@ gu_string_to_double(GuString s, double *res)
bool
gu_string_is_prefix(GuString s1, GuString s2)
{
GuWord w1 = s1.w_;
uint8_t buf1[sizeof(GuWord)];
size_t sz1;
char* str1;
if (w1 & 1) {
sz1 = (w1 & 0xff) >> 1;
gu_assert(sz1 <= sizeof(GuWord));
size_t i = sz1;
while (i > 0) {
w1 >>= 8;
buf1[--i] = w1 & 0xff;
}
str1 = (char*) buf1;
} else {
uint8_t* p = (void*) w1;
sz1 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
str1 = (char*) &p[1];
}
size_t len1 = strlen(s1);
size_t len2 = strlen(s2);
GuWord w2 = s2.w_;
uint8_t buf2[sizeof(GuWord)];
size_t sz2;
char* str2;
if (w2 & 1) {
sz2 = (w2 & 0xff) >> 1;
gu_assert(sz2 <= sizeof(GuWord));
size_t i = sz2;
while (i > 0) {
w2 >>= 8;
buf2[--i] = w2 & 0xff;
}
str2 = (char*) buf2;
} else {
uint8_t* p = (void*) w2;
sz2 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
str2 = (char*) &p[1];
}
if (sz1 > sz2)
if (len1 > len2)
return false;
for (size_t sz = sz1; sz--; sz > 0) {
if (*str1 != *str2)
for (size_t len = len1; len--; len > 0) {
if (*s1 != *s2)
return false;
str1++;
str2++;
s1++;
s2++;
}
return true;
@@ -394,108 +193,23 @@ gu_string_is_prefix(GuString s1, GuString s2)
GuHash
gu_string_hash(GuHash h, GuString s)
{
if (s.w_ & 1) {
return h*101 + s.w_;
}
size_t len = gu_string_length(s);
uint8_t* data = gu_string_long_data(s);
return gu_hash_bytes(h, data, len);
}
bool
gu_string_eq(GuString s1, GuString s2)
{
if (s1.w_ == s2.w_) {
return true;
} else if (gu_string_is_long(s1) && gu_string_is_long(s2)) {
size_t len1 = gu_string_long_length(s1);
size_t len2 = gu_string_long_length(s2);
if (len1 != len2) {
return false;
}
uint8_t* data1 = gu_string_long_data(s1);
uint8_t* data2 = gu_string_long_data(s2);
return (memcmp(data1, data2, len1) == 0);
}
return false;
return gu_hash_bytes(h, (uint8_t*)s, strlen(s));
}
static bool
gu_string_eq_fn(GuEquality* self, const void* p1, const void* p2)
{
(void) self;
const GuString* sp1 = p1;
const GuString* sp2 = p2;
return gu_string_eq(*sp1, *sp2);
return strcmp((GuString) p1, (GuString) p2) == 0;
}
GuEquality gu_string_equality[1] = { { gu_string_eq_fn } };
int
gu_string_cmp(GuString s1, GuString s2)
{
uint8_t buf1[sizeof(GuWord)];
char* src1;
size_t sz1;
if (s1.w_ & 1) {
sz1 = (s1.w_ & 0xff) >> 1;
gu_assert(sz1 <= sizeof(GuWord));
size_t i = sz1;
while (i > 0) {
s1.w_ >>= 8;
buf1[--i] = s1.w_ & 0xff;
}
src1 = (char*) buf1;
} else {
uint8_t* p = (void*) s1.w_;
sz1 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
src1 = (char*) &p[1];
}
uint8_t buf2[sizeof(GuWord)];
char* src2;
size_t sz2;
if (s2.w_ & 1) {
sz2 = (s2.w_ & 0xff) >> 1;
gu_assert(sz2 <= sizeof(GuWord));
size_t i = sz2;
while (i > 0) {
s2.w_ >>= 8;
buf2[--i] = s2.w_ & 0xff;
}
src2 = (char*) buf2;
} else {
uint8_t* p = (void*) s2.w_;
sz2 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
src2 = (char*) &p[1];
}
for (size_t i = 0; ; i++) {
if (sz1 == i && i == sz2)
break;
if (sz1 <= i)
return -1;
if (i >= sz2)
return 1;
if (src1[i] > src2[i])
return 1;
else if (src1[i] < src2[i])
return -1;
}
return 0;
}
static int
gu_string_cmp_fn(GuOrder* self, const void* p1, const void* p2)
{
(void) self;
const GuString* sp1 = p1;
const GuString* sp2 = p2;
return gu_string_cmp(*sp1, *sp2);
return strcmp((GuString) p1, (GuString) p2);
}
GuOrder gu_string_order[1] = { { gu_string_cmp_fn } };
@@ -504,8 +218,7 @@ static GuHash
gu_string_hasher_hash(GuHasher* self, const void* p)
{
(void) self;
const GuString* sp = p;
return gu_string_hash(0, *sp);
return gu_string_hash(0, (GuString) p);
}
GuHasher gu_string_hasher[1] = {
@@ -516,5 +229,5 @@ GuHasher gu_string_hasher[1] = {
};
GU_DEFINE_TYPE(GuString, GuOpaque, _);
GU_DEFINE_KIND(GuString, pointer);
GU_DEFINE_KIND(GuStringMap, GuMap);