diff --git a/src/runtime/c/pgf/reader.cxx b/src/runtime/c/pgf/reader.cxx index cf939441e..1734d4397 100644 --- a/src/runtime/c/pgf/reader.cxx +++ b/src/runtime/c/pgf/reader.cxx @@ -84,7 +84,7 @@ uint64_t PgfReader::read_uint() return u; } -object PgfReader::read_name_internal(size_t struct_size) +object PgfReader::read_text_internal(size_t struct_size) { size_t size = read_len(); object offs = current_db->malloc_internal(struct_size+sizeof(PgfText)+size+1); @@ -104,51 +104,6 @@ object PgfReader::read_name_internal(size_t struct_size) return offs; } -object PgfReader::read_text_internal(size_t struct_size) -{ - size_t len = read_len(); - - char* buf = (char*) alloca(len*6+1); - char* p = buf; - for (size_t i = 0; i < len; i++) { - uint8_t c = read_uint8(); - *(p++) = (char) c; - - if (c < 0x80) { - continue; - } - if (c < 0xc2) { - throw pgf_error("utf8 decoding error"); - } - - int len = (c < 0xe0 ? 1 : - c < 0xf0 ? 2 : - c < 0xf8 ? 3 : - c < 0xfc ? 4 : - 5 - ); - // If reading the extra bytes causes EOF, it is an encoding - // error, not a legitimate end of character stream. - fread(p, len, 1, in); - if (feof(in)) - throw pgf_error("utf8 decoding error"); - if (ferror(in)) - throw pgf_error("an error occured while reading the grammar"); - - p += len; - } - - size_t size = p-buf; - *p++ = 0; - - object offs = current_db->malloc_internal(struct_size+sizeof(PgfText)+size+1); - PgfText* ptext = (PgfText*) (current_base+offs+struct_size); - ptext->size = size; - memcpy(ptext->text, buf, size+1); - - return offs; -} - template Namespace PgfReader::read_namespace(ref (PgfReader::*read_value)(), size_t len) { @@ -370,14 +325,6 @@ ref PgfReader::read_abscat() ref abscat = read_name(&PgfAbsCat::name); abscat->ref_count = 1; abscat->context = read_vector(&PgfReader::read_hypo); - - // for now we just read the set of functions per category and ignore them - size_t n_funs = read_len(); - for (size_t i = 0; i < n_funs; i++) { - read_double(); - read_name(); - } - abscat->prob = - log(read_double()); return abscat; } diff --git a/src/runtime/c/pgf/reader.h b/src/runtime/c/pgf/reader.h index fe5006eb2..6e1c5a554 100644 --- a/src/runtime/c/pgf/reader.h +++ b/src/runtime/c/pgf/reader.h @@ -22,11 +22,11 @@ public: template ref read_name(PgfText V::* field) { - return read_name_internal((size_t) &(((V*) NULL)->*field)); + return read_text_internal((size_t) &(((V*) NULL)->*field)); }; ref read_name() { - return read_name_internal(0); + return read_text_internal(0); }; template diff --git a/src/runtime/c/pgf/writer.cxx b/src/runtime/c/pgf/writer.cxx index ecb267c68..4c9b9131c 100644 --- a/src/runtime/c/pgf/writer.cxx +++ b/src/runtime/c/pgf/writer.cxx @@ -106,25 +106,9 @@ void PgfWriter::write_uint(uint64_t u) } } -void PgfWriter::write_name(PgfText *text) -{ - write_len(text->size); - size_t n_items = fwrite(&text->text, text->size, 1, out); - if (ferror(out)) - throw pgf_error("an error occured while writing out the grammar"); - if (text->size != 0 && n_items != 1) - throw pgf_error("couldn't write to the output file"); -} - void PgfWriter::write_text(PgfText *text) { - size_t len = 0; - const uint8_t* p = (const uint8_t*) &text->text; - const uint8_t* e = p + text->size; - while (p < e && pgf_utf8_decode(&p) != 0) - len++; - - write_len(len); + write_len(text->size); size_t n_items = fwrite(&text->text, text->size, 1, out); if (ferror(out)) throw pgf_error("an error occured while writing out the grammar"); @@ -273,48 +257,10 @@ void PgfWriter::write_absfun(ref absfun) write_double(expf(-absfun->prob)); } -static -void count_funs_by_cat(Namespace funs, PgfText *cat, size_t *pcount) -{ - if (funs == 0) - return; - - count_funs_by_cat(funs->left, cat, pcount); - - if (textcmp(&funs->value->name, cat) == 0) { - *pcount++; - } - - count_funs_by_cat(funs->right, cat, pcount); -} - -static -void write_funs_by_cat(Namespace funs, PgfText *cat, PgfWriter *wtr) -{ - if (funs == 0) - return; - - write_funs_by_cat(funs->left, cat, wtr); - - if (textcmp(&funs->value->name, cat) == 0) { - wtr->write_double(expf(-funs->value->prob)); - wtr->write_name(&funs->value->name); - } - - write_funs_by_cat(funs->right, cat, wtr); -} - void PgfWriter::write_abscat(ref abscat) { write_name(&abscat->name); - write_vector(abscat->context, &PgfWriter::write_hypo); - - size_t n_count = 0; - count_funs_by_cat(abstract->funs, &abscat->name, &n_count); - - write_len(n_count); - write_funs_by_cat(abstract->funs, &abscat->name, this); - + write_vector(abscat->context, &PgfWriter::write_hypo); write_double(expf(-abscat->prob)); } diff --git a/src/runtime/c/pgf/writer.h b/src/runtime/c/pgf/writer.h index 1bf7c9370..6e9b1a83a 100644 --- a/src/runtime/c/pgf/writer.h +++ b/src/runtime/c/pgf/writer.h @@ -16,7 +16,7 @@ public: void write_tag(uint8_t t) { write_uint8(t); } - void write_name(PgfText *text); + void write_name(PgfText *text) { write_text(text); }; void write_text(PgfText *text); template diff --git a/src/runtime/haskell/tests/basic.pgf b/src/runtime/haskell/tests/basic.pgf index 525ac33a0..84e8cff6c 100644 Binary files a/src/runtime/haskell/tests/basic.pgf and b/src/runtime/haskell/tests/basic.pgf differ