remove redundancies in the .pgf format kept for lagacy reasons

This commit is contained in:
Krasimir Angelov
2022-02-08 19:04:08 +01:00
parent 4ee671e59d
commit fdd33b63d9
5 changed files with 6 additions and 113 deletions

View File

@@ -84,7 +84,7 @@ uint64_t PgfReader::read_uint()
return u;
}
object PgfReader::read_name_internal(size_t struct_size)
object PgfReader::read_text_internal(size_t struct_size)
{
size_t size = read_len();
object offs = current_db->malloc_internal(struct_size+sizeof(PgfText)+size+1);
@@ -104,51 +104,6 @@ object PgfReader::read_name_internal(size_t struct_size)
return offs;
}
object PgfReader::read_text_internal(size_t struct_size)
{
size_t len = read_len();
char* buf = (char*) alloca(len*6+1);
char* p = buf;
for (size_t i = 0; i < len; i++) {
uint8_t c = read_uint8();
*(p++) = (char) c;
if (c < 0x80) {
continue;
}
if (c < 0xc2) {
throw pgf_error("utf8 decoding error");
}
int len = (c < 0xe0 ? 1 :
c < 0xf0 ? 2 :
c < 0xf8 ? 3 :
c < 0xfc ? 4 :
5
);
// If reading the extra bytes causes EOF, it is an encoding
// error, not a legitimate end of character stream.
fread(p, len, 1, in);
if (feof(in))
throw pgf_error("utf8 decoding error");
if (ferror(in))
throw pgf_error("an error occured while reading the grammar");
p += len;
}
size_t size = p-buf;
*p++ = 0;
object offs = current_db->malloc_internal(struct_size+sizeof(PgfText)+size+1);
PgfText* ptext = (PgfText*) (current_base+offs+struct_size);
ptext->size = size;
memcpy(ptext->text, buf, size+1);
return offs;
}
template<class V>
Namespace<V> PgfReader::read_namespace(ref<V> (PgfReader::*read_value)(), size_t len)
{
@@ -370,14 +325,6 @@ ref<PgfAbsCat> PgfReader::read_abscat()
ref<PgfAbsCat> abscat = read_name<PgfAbsCat>(&PgfAbsCat::name);
abscat->ref_count = 1;
abscat->context = read_vector<PgfHypo>(&PgfReader::read_hypo);
// for now we just read the set of functions per category and ignore them
size_t n_funs = read_len();
for (size_t i = 0; i < n_funs; i++) {
read_double();
read_name();
}
abscat->prob = - log(read_double());
return abscat;
}

View File

@@ -22,11 +22,11 @@ public:
template<class V>
ref<V> read_name(PgfText V::* field) {
return read_name_internal((size_t) &(((V*) NULL)->*field));
return read_text_internal((size_t) &(((V*) NULL)->*field));
};
ref<PgfText> read_name() {
return read_name_internal(0);
return read_text_internal(0);
};
template<class V>

View File

@@ -106,25 +106,9 @@ void PgfWriter::write_uint(uint64_t u)
}
}
void PgfWriter::write_name(PgfText *text)
{
write_len(text->size);
size_t n_items = fwrite(&text->text, text->size, 1, out);
if (ferror(out))
throw pgf_error("an error occured while writing out the grammar");
if (text->size != 0 && n_items != 1)
throw pgf_error("couldn't write to the output file");
}
void PgfWriter::write_text(PgfText *text)
{
size_t len = 0;
const uint8_t* p = (const uint8_t*) &text->text;
const uint8_t* e = p + text->size;
while (p < e && pgf_utf8_decode(&p) != 0)
len++;
write_len(len);
write_len(text->size);
size_t n_items = fwrite(&text->text, text->size, 1, out);
if (ferror(out))
throw pgf_error("an error occured while writing out the grammar");
@@ -273,48 +257,10 @@ void PgfWriter::write_absfun(ref<PgfAbsFun> absfun)
write_double(expf(-absfun->prob));
}
static
void count_funs_by_cat(Namespace<PgfAbsFun> funs, PgfText *cat, size_t *pcount)
{
if (funs == 0)
return;
count_funs_by_cat(funs->left, cat, pcount);
if (textcmp(&funs->value->name, cat) == 0) {
*pcount++;
}
count_funs_by_cat(funs->right, cat, pcount);
}
static
void write_funs_by_cat(Namespace<PgfAbsFun> funs, PgfText *cat, PgfWriter *wtr)
{
if (funs == 0)
return;
write_funs_by_cat(funs->left, cat, wtr);
if (textcmp(&funs->value->name, cat) == 0) {
wtr->write_double(expf(-funs->value->prob));
wtr->write_name(&funs->value->name);
}
write_funs_by_cat(funs->right, cat, wtr);
}
void PgfWriter::write_abscat(ref<PgfAbsCat> abscat)
{
write_name(&abscat->name);
write_vector(abscat->context, &PgfWriter::write_hypo);
size_t n_count = 0;
count_funs_by_cat(abstract->funs, &abscat->name, &n_count);
write_len(n_count);
write_funs_by_cat(abstract->funs, &abscat->name, this);
write_vector(abscat->context, &PgfWriter::write_hypo);
write_double(expf(-abscat->prob));
}

View File

@@ -16,7 +16,7 @@ public:
void write_tag(uint8_t t) { write_uint8(t); }
void write_name(PgfText *text);
void write_name(PgfText *text) { write_text(text); };
void write_text(PgfText *text);
template<class V>

Binary file not shown.