diff --git a/src/runtime/c/gu/in.c b/src/runtime/c/gu/in.c index 835badfa7..ddac81102 100644 --- a/src/runtime/c/gu/in.c +++ b/src/runtime/c/gu/in.c @@ -253,7 +253,6 @@ gu_in_f64be(GuIn* in, GuExn* err) return gu_decode_double(gu_in_u64le(in, err)); } - static void gu_in_fini(GuFinalizer* fin) { diff --git a/src/runtime/c/gu/string.c b/src/runtime/c/gu/string.c index e2708aae1..c8b443daf 100644 --- a/src/runtime/c/gu/string.c +++ b/src/runtime/c/gu/string.c @@ -5,6 +5,7 @@ #include #include #include +#include const GuString gu_empty_string = { 1 }; @@ -165,6 +166,43 @@ gu_string_write(GuString s, GuOut* out, GuExn* err) gu_out_bytes(out, src, sz, err); } +GuString +gu_string_read(size_t len, GuPool* pool, GuIn* in, GuExn* err) +{ + uint8_t* buf = alloca(len*4); + uint8_t* p = buf; + for (size_t i = 0; i < len; i++) { + gu_in_utf8_buf(&p, in, err); + } + return gu_utf8_string(buf, p-buf, pool); +} + +GuString +gu_string_read_latin1(size_t len, GuPool* pool, GuIn* in, GuExn* err) +{ + if (len < GU_MIN(sizeof(GuWord), 128)) { + GuWord w = 0; + for (size_t n = 0; n < len; n++) { + w = w << 8 | gu_in_u8(in, err); + } + w = w << 8 | (len << 1) | 1; + return (GuString) { w }; + } + uint8_t* p = NULL; + if (len < 256) { + p = gu_malloc_aligned(pool, 1 + len, 2); + p[0] = (uint8_t) len; + } else { + p = gu_malloc_prefixed(pool, gu_alignof(size_t), + sizeof(size_t), 1, 1 + len); + ((size_t*) p)[-1] = len; + p[0] = 0; + } + + gu_in_bytes(in, &p[1], len, err); + return (GuString) { (GuWord) (void*) p }; +} + GuString gu_format_string_v(const char* fmt, va_list args, GuPool* pool) { diff --git a/src/runtime/c/gu/string.h b/src/runtime/c/gu/string.h index 310c725b5..a24fe3068 100644 --- a/src/runtime/c/gu/string.h +++ b/src/runtime/c/gu/string.h @@ -34,6 +34,12 @@ gu_string_copy(GuString string, GuPool* pool); void gu_string_write(GuString string, GuOut* out, GuExn* err); +GuString +gu_string_read(size_t len, GuPool* pool, GuIn* in, GuExn* err); + +GuString +gu_string_read_latin1(size_t len, GuPool* pool, GuIn* in, GuExn* err); + GuIn* gu_string_in(GuString string, GuPool* pool); diff --git a/src/runtime/c/gu/utf8.c b/src/runtime/c/gu/utf8.c index 38eb91a9f..2377f1ac2 100644 --- a/src/runtime/c/gu/utf8.c +++ b/src/runtime/c/gu/utf8.c @@ -72,8 +72,8 @@ fail: return 0; } -extern inline void -gu_out_utf8(GuUCS ucs, GuOut* out, GuExn* err); +extern inline GuUCS +gu_in_utf8(GuIn* in, GuExn* err); static size_t gu_advance_utf8(GuUCS ucs, uint8_t* buf) @@ -121,5 +121,46 @@ gu_out_utf8_(GuUCS ucs, GuOut* out, GuExn* err) } } -extern inline GuUCS -gu_in_utf8(GuIn* in, GuExn* err); +extern inline void +gu_out_utf8(GuUCS ucs, GuOut* out, GuExn* err); + +void +gu_in_utf8_buf(uint8_t** buf, GuIn* in, GuExn* err) +{ + uint8_t* p = *buf; + + uint8_t c = gu_in_u8(in, err); + if (!gu_ok(err)) { + return; + } + *(p++) = c; + int len = (c < 0x80 ? 0 : + c < 0xc2 ? -1 : + c < 0xe0 ? 1 : + c < 0xf0 ? 2 : + c < 0xf5 ? 3 : + -1); + if (len < 0) { + goto fail; + } else if (len == 0) { + *buf = p; + return; + } + static const uint8_t mask[4] = { 0x7f, 0x1f, 0x0f, 0x07 }; + // If reading the extra bytes causes EOF, it is an encoding + // error, not a legitimate end of character stream. + GuExn* tmp_err = gu_exn(err, GuEOF, NULL); + gu_in_bytes(in, p, len, tmp_err); + if (tmp_err->caught) { + goto fail; + } + if (!gu_ok(err)) { + return; + } + *buf = p; + return; + +fail: + gu_raise(err, GuUCSExn); + return; +} diff --git a/src/runtime/c/gu/utf8.h b/src/runtime/c/gu/utf8.h index 7cf42d56a..7674c6e02 100644 --- a/src/runtime/c/gu/utf8.h +++ b/src/runtime/c/gu/utf8.h @@ -29,7 +29,12 @@ gu_out_utf8(GuUCS ucs, GuOut* out, GuExn* err) } } +// Helper functions used in other modules + GuUCS gu_utf8_decode(const uint8_t** utf8); +void +gu_in_utf8_buf(uint8_t** buf, GuIn* in, GuExn* err); + #endif // GU_UTF8_H_ diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c index 1b776beec..b6d3c8854 100644 --- a/src/runtime/c/pgf/reader.c +++ b/src/runtime/c/pgf/reader.c @@ -91,38 +91,15 @@ pgf_read_len(PgfReader* rdr) static PgfCId pgf_read_cid(PgfReader* rdr) { - GuPool* tmp_pool = gu_new_pool(); - GuStringBuf* sbuf = gu_string_buf(tmp_pool); - GuOut* out = gu_string_buf_out(sbuf); - size_t len = pgf_read_len(rdr); - for (size_t i = 0; i < len; i++) { - // CIds are in latin-1 - GuUCS ucs = gu_in_u8(rdr->in, rdr->err); - gu_out_utf8(ucs, out, rdr->err); - } - GuString str = gu_string_buf_freeze(sbuf, rdr->opool); - gu_pool_free(tmp_pool); - return str; + return gu_string_read_latin1(len, rdr->opool, rdr->in, rdr->err); } static GuString pgf_read_string(PgfReader* rdr) { - GuPool* tmp_pool = gu_new_pool(); - GuStringBuf* sbuf = gu_string_buf(tmp_pool); - GuOut* out = gu_string_buf_out(sbuf); - GuLength len = pgf_read_len(rdr); - - for (size_t i = 0; i < len; i++) { - GuUCS ucs = gu_in_utf8(rdr->in, rdr->err); - gu_out_utf8(ucs, out, rdr->err); - } - GuString str = gu_string_buf_freeze(sbuf, rdr->opool); - gu_pool_free(tmp_pool); - - return str; + return gu_string_read(len, rdr->opool, rdr->in, rdr->err); } static void