forked from GitHub/gf-core
remove redundancies in the .pgf format kept for lagacy reasons
This commit is contained in:
@@ -84,7 +84,7 @@ uint64_t PgfReader::read_uint()
|
||||
return u;
|
||||
}
|
||||
|
||||
object PgfReader::read_name_internal(size_t struct_size)
|
||||
object PgfReader::read_text_internal(size_t struct_size)
|
||||
{
|
||||
size_t size = read_len();
|
||||
object offs = current_db->malloc_internal(struct_size+sizeof(PgfText)+size+1);
|
||||
@@ -104,51 +104,6 @@ object PgfReader::read_name_internal(size_t struct_size)
|
||||
return offs;
|
||||
}
|
||||
|
||||
object PgfReader::read_text_internal(size_t struct_size)
|
||||
{
|
||||
size_t len = read_len();
|
||||
|
||||
char* buf = (char*) alloca(len*6+1);
|
||||
char* p = buf;
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
uint8_t c = read_uint8();
|
||||
*(p++) = (char) c;
|
||||
|
||||
if (c < 0x80) {
|
||||
continue;
|
||||
}
|
||||
if (c < 0xc2) {
|
||||
throw pgf_error("utf8 decoding error");
|
||||
}
|
||||
|
||||
int len = (c < 0xe0 ? 1 :
|
||||
c < 0xf0 ? 2 :
|
||||
c < 0xf8 ? 3 :
|
||||
c < 0xfc ? 4 :
|
||||
5
|
||||
);
|
||||
// If reading the extra bytes causes EOF, it is an encoding
|
||||
// error, not a legitimate end of character stream.
|
||||
fread(p, len, 1, in);
|
||||
if (feof(in))
|
||||
throw pgf_error("utf8 decoding error");
|
||||
if (ferror(in))
|
||||
throw pgf_error("an error occured while reading the grammar");
|
||||
|
||||
p += len;
|
||||
}
|
||||
|
||||
size_t size = p-buf;
|
||||
*p++ = 0;
|
||||
|
||||
object offs = current_db->malloc_internal(struct_size+sizeof(PgfText)+size+1);
|
||||
PgfText* ptext = (PgfText*) (current_base+offs+struct_size);
|
||||
ptext->size = size;
|
||||
memcpy(ptext->text, buf, size+1);
|
||||
|
||||
return offs;
|
||||
}
|
||||
|
||||
template<class V>
|
||||
Namespace<V> PgfReader::read_namespace(ref<V> (PgfReader::*read_value)(), size_t len)
|
||||
{
|
||||
@@ -370,14 +325,6 @@ ref<PgfAbsCat> PgfReader::read_abscat()
|
||||
ref<PgfAbsCat> abscat = read_name<PgfAbsCat>(&PgfAbsCat::name);
|
||||
abscat->ref_count = 1;
|
||||
abscat->context = read_vector<PgfHypo>(&PgfReader::read_hypo);
|
||||
|
||||
// for now we just read the set of functions per category and ignore them
|
||||
size_t n_funs = read_len();
|
||||
for (size_t i = 0; i < n_funs; i++) {
|
||||
read_double();
|
||||
read_name();
|
||||
}
|
||||
|
||||
abscat->prob = - log(read_double());
|
||||
return abscat;
|
||||
}
|
||||
|
||||
@@ -22,11 +22,11 @@ public:
|
||||
|
||||
template<class V>
|
||||
ref<V> read_name(PgfText V::* field) {
|
||||
return read_name_internal((size_t) &(((V*) NULL)->*field));
|
||||
return read_text_internal((size_t) &(((V*) NULL)->*field));
|
||||
};
|
||||
|
||||
ref<PgfText> read_name() {
|
||||
return read_name_internal(0);
|
||||
return read_text_internal(0);
|
||||
};
|
||||
|
||||
template<class V>
|
||||
|
||||
@@ -106,25 +106,9 @@ void PgfWriter::write_uint(uint64_t u)
|
||||
}
|
||||
}
|
||||
|
||||
void PgfWriter::write_name(PgfText *text)
|
||||
{
|
||||
write_len(text->size);
|
||||
size_t n_items = fwrite(&text->text, text->size, 1, out);
|
||||
if (ferror(out))
|
||||
throw pgf_error("an error occured while writing out the grammar");
|
||||
if (text->size != 0 && n_items != 1)
|
||||
throw pgf_error("couldn't write to the output file");
|
||||
}
|
||||
|
||||
void PgfWriter::write_text(PgfText *text)
|
||||
{
|
||||
size_t len = 0;
|
||||
const uint8_t* p = (const uint8_t*) &text->text;
|
||||
const uint8_t* e = p + text->size;
|
||||
while (p < e && pgf_utf8_decode(&p) != 0)
|
||||
len++;
|
||||
|
||||
write_len(len);
|
||||
write_len(text->size);
|
||||
size_t n_items = fwrite(&text->text, text->size, 1, out);
|
||||
if (ferror(out))
|
||||
throw pgf_error("an error occured while writing out the grammar");
|
||||
@@ -273,48 +257,10 @@ void PgfWriter::write_absfun(ref<PgfAbsFun> absfun)
|
||||
write_double(expf(-absfun->prob));
|
||||
}
|
||||
|
||||
static
|
||||
void count_funs_by_cat(Namespace<PgfAbsFun> funs, PgfText *cat, size_t *pcount)
|
||||
{
|
||||
if (funs == 0)
|
||||
return;
|
||||
|
||||
count_funs_by_cat(funs->left, cat, pcount);
|
||||
|
||||
if (textcmp(&funs->value->name, cat) == 0) {
|
||||
*pcount++;
|
||||
}
|
||||
|
||||
count_funs_by_cat(funs->right, cat, pcount);
|
||||
}
|
||||
|
||||
static
|
||||
void write_funs_by_cat(Namespace<PgfAbsFun> funs, PgfText *cat, PgfWriter *wtr)
|
||||
{
|
||||
if (funs == 0)
|
||||
return;
|
||||
|
||||
write_funs_by_cat(funs->left, cat, wtr);
|
||||
|
||||
if (textcmp(&funs->value->name, cat) == 0) {
|
||||
wtr->write_double(expf(-funs->value->prob));
|
||||
wtr->write_name(&funs->value->name);
|
||||
}
|
||||
|
||||
write_funs_by_cat(funs->right, cat, wtr);
|
||||
}
|
||||
|
||||
void PgfWriter::write_abscat(ref<PgfAbsCat> abscat)
|
||||
{
|
||||
write_name(&abscat->name);
|
||||
write_vector(abscat->context, &PgfWriter::write_hypo);
|
||||
|
||||
size_t n_count = 0;
|
||||
count_funs_by_cat(abstract->funs, &abscat->name, &n_count);
|
||||
|
||||
write_len(n_count);
|
||||
write_funs_by_cat(abstract->funs, &abscat->name, this);
|
||||
|
||||
write_vector(abscat->context, &PgfWriter::write_hypo);
|
||||
write_double(expf(-abscat->prob));
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ public:
|
||||
|
||||
void write_tag(uint8_t t) { write_uint8(t); }
|
||||
|
||||
void write_name(PgfText *text);
|
||||
void write_name(PgfText *text) { write_text(text); };
|
||||
void write_text(PgfText *text);
|
||||
|
||||
template<class V>
|
||||
|
||||
Binary file not shown.
Reference in New Issue
Block a user