mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-20 08:32:50 -06:00
first draft of an LR parser
This commit is contained in:
@@ -42,7 +42,9 @@ libpgf_la_SOURCES = \
|
|||||||
pgf/probspace.cxx \
|
pgf/probspace.cxx \
|
||||||
pgf/probspace.h \
|
pgf/probspace.h \
|
||||||
pgf/generator.cxx \
|
pgf/generator.cxx \
|
||||||
pgf/generator.h
|
pgf/generator.h \
|
||||||
|
pgf/md5.cxx \
|
||||||
|
pgf/md5.h
|
||||||
|
|
||||||
libpgf_la_LDFLAGS = -no-undefined -version-info 4:0:0
|
libpgf_la_LDFLAGS = -no-undefined -version-info 4:0:0
|
||||||
libpgf_la_CXXFLAGS = -fno-rtti -std=c++11 -DCOMPILING_PGF
|
libpgf_la_CXXFLAGS = -fno-rtti -std=c++11 -DCOMPILING_PGF
|
||||||
|
|||||||
@@ -48,9 +48,9 @@ void PgfConcr::release(ref<PgfConcr> concr)
|
|||||||
void PgfConcrLincat::release(ref<PgfConcrLincat> lincat)
|
void PgfConcrLincat::release(ref<PgfConcrLincat> lincat)
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < lincat->fields->len; i++) {
|
for (size_t i = 0; i < lincat->fields->len; i++) {
|
||||||
PgfLincatField::release(vector_elem(lincat->fields, i));
|
text_db_release(*vector_elem(lincat->fields, i));
|
||||||
}
|
}
|
||||||
Vector<PgfLincatField>::release(lincat->fields);
|
Vector<ref<PgfText>>::release(lincat->fields);
|
||||||
|
|
||||||
for (size_t i = 0; i < lincat->args->len; i++) {
|
for (size_t i = 0; i < lincat->args->len; i++) {
|
||||||
PgfLParam::release(vector_elem(lincat->args, i)->param);
|
PgfLParam::release(vector_elem(lincat->args, i)->param);
|
||||||
@@ -67,13 +67,6 @@ void PgfConcrLincat::release(ref<PgfConcrLincat> lincat)
|
|||||||
PgfDB::free(lincat, lincat->name.size+1);
|
PgfDB::free(lincat, lincat->name.size+1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PgfLincatField::release(ref<PgfLincatField> field)
|
|
||||||
{
|
|
||||||
text_db_release(field->name);
|
|
||||||
if (field->backrefs != 0)
|
|
||||||
Vector<PgfLincatBackref>::release(field->backrefs);
|
|
||||||
}
|
|
||||||
|
|
||||||
void PgfLParam::release(ref<PgfLParam> param)
|
void PgfLParam::release(ref<PgfLParam> param)
|
||||||
{
|
{
|
||||||
PgfDB::free(param, param->n_terms*sizeof(param->terms[0]));
|
PgfDB::free(param, param->n_terms*sizeof(param->terms[0]));
|
||||||
|
|||||||
@@ -224,19 +224,6 @@ struct PGF_INTERNAL_DECL PgfSymbolALLCAPIT {
|
|||||||
static const uint8_t tag = 10;
|
static const uint8_t tag = 10;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PGF_INTERNAL_DECL PgfConcrLincat;
|
|
||||||
struct PGF_INTERNAL_DECL PgfLincatBackref;
|
|
||||||
struct PGF_INTERNAL_DECL PgfLincatEpsilon;
|
|
||||||
|
|
||||||
struct PGF_INTERNAL_DECL PgfLincatField {
|
|
||||||
ref<PgfConcrLincat> lincat;
|
|
||||||
ref<PgfText> name;
|
|
||||||
ref<Vector<PgfLincatBackref>> backrefs;
|
|
||||||
ref<Vector<PgfLincatEpsilon>> epsilons;
|
|
||||||
|
|
||||||
static void release(ref<PgfLincatField> field);
|
|
||||||
};
|
|
||||||
|
|
||||||
struct PGF_INTERNAL_DECL PgfConcrLincat {
|
struct PGF_INTERNAL_DECL PgfConcrLincat {
|
||||||
static const uint8_t tag = 0;
|
static const uint8_t tag = 0;
|
||||||
|
|
||||||
@@ -246,7 +233,7 @@ struct PGF_INTERNAL_DECL PgfConcrLincat {
|
|||||||
ref<Vector<PgfPArg>> args;
|
ref<Vector<PgfPArg>> args;
|
||||||
ref<Vector<ref<PgfPResult>>> res;
|
ref<Vector<ref<PgfPResult>>> res;
|
||||||
ref<Vector<ref<PgfSequence>>> seqs;
|
ref<Vector<ref<PgfSequence>>> seqs;
|
||||||
ref<Vector<PgfLincatField>> fields;
|
ref<Vector<ref<PgfText>>> fields;
|
||||||
|
|
||||||
PgfText name;
|
PgfText name;
|
||||||
|
|
||||||
@@ -268,18 +255,6 @@ struct PGF_INTERNAL_DECL PgfConcrLin {
|
|||||||
static void release(ref<PgfConcrLin> lin);
|
static void release(ref<PgfConcrLin> lin);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PGF_INTERNAL_DECL PgfLinSeqIndex {
|
|
||||||
ref<PgfConcrLin> lin;
|
|
||||||
size_t seq_index;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct PGF_INTERNAL_DECL PgfLincatBackref : public PgfLinSeqIndex {
|
|
||||||
size_t dot;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct PGF_INTERNAL_DECL PgfLincatEpsilon : public PgfLinSeqIndex {
|
|
||||||
};
|
|
||||||
|
|
||||||
struct PGF_INTERNAL_DECL PgfConcrPrintname {
|
struct PGF_INTERNAL_DECL PgfConcrPrintname {
|
||||||
ref<PgfText> printname;
|
ref<PgfText> printname;
|
||||||
PgfText name;
|
PgfText name;
|
||||||
@@ -287,6 +262,25 @@ struct PGF_INTERNAL_DECL PgfConcrPrintname {
|
|||||||
static void release(ref<PgfConcrPrintname> printname);
|
static void release(ref<PgfConcrPrintname> printname);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct PGF_INTERNAL_DECL PgfLRShift {
|
||||||
|
size_t next_state;
|
||||||
|
ref<PgfConcrLincat> lincat;
|
||||||
|
size_t r;
|
||||||
|
bool is_epsilon;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PGF_INTERNAL_DECL PgfLRReduce {
|
||||||
|
object lin_obj;
|
||||||
|
size_t seq_index;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PGF_INTERNAL_DECL PgfLRState {
|
||||||
|
ref<Vector<PgfLRShift>> shifts;
|
||||||
|
ref<Vector<PgfLRReduce>> reductions;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef Vector<PgfLRState> PgfLRTable;
|
||||||
|
|
||||||
struct PGF_INTERNAL_DECL PgfConcr {
|
struct PGF_INTERNAL_DECL PgfConcr {
|
||||||
static const uint8_t tag = 1;
|
static const uint8_t tag = 1;
|
||||||
|
|
||||||
@@ -296,6 +290,8 @@ struct PGF_INTERNAL_DECL PgfConcr {
|
|||||||
PgfPhrasetable phrasetable;
|
PgfPhrasetable phrasetable;
|
||||||
Namespace<PgfConcrPrintname> printnames;
|
Namespace<PgfConcrPrintname> printnames;
|
||||||
|
|
||||||
|
ref<PgfLRTable> lrtable;
|
||||||
|
|
||||||
PgfText name;
|
PgfText name;
|
||||||
|
|
||||||
static void release(ref<PgfConcr> pgf);
|
static void release(ref<PgfConcr> pgf);
|
||||||
|
|||||||
@@ -287,7 +287,7 @@ void PgfLinearizer::TreeLinNode::check_category(PgfLinearizer *linearizer, PgfTe
|
|||||||
void PgfLinearizer::TreeLinNode::linearize(PgfLinearizationOutputIface *out, PgfLinearizer *linearizer, size_t lindex)
|
void PgfLinearizer::TreeLinNode::linearize(PgfLinearizationOutputIface *out, PgfLinearizer *linearizer, size_t lindex)
|
||||||
{
|
{
|
||||||
PgfText *cat = &lin->absfun->type->name;
|
PgfText *cat = &lin->absfun->type->name;
|
||||||
PgfText *field = &*(vector_elem(lin->lincat->fields, lindex)->name);
|
PgfText *field = &**vector_elem(lin->lincat->fields, lindex);
|
||||||
|
|
||||||
if (linearizer->pre_stack == NULL)
|
if (linearizer->pre_stack == NULL)
|
||||||
out->begin_phrase(cat, fid, field, &lin->name);
|
out->begin_phrase(cat, fid, field, &lin->name);
|
||||||
@@ -390,7 +390,7 @@ void PgfLinearizer::TreeLindefNode::linearize_arg(PgfLinearizationOutputIface *o
|
|||||||
void PgfLinearizer::TreeLindefNode::linearize(PgfLinearizationOutputIface *out, PgfLinearizer *linearizer, size_t lindex)
|
void PgfLinearizer::TreeLindefNode::linearize(PgfLinearizationOutputIface *out, PgfLinearizer *linearizer, size_t lindex)
|
||||||
{
|
{
|
||||||
if (lincat != 0) {
|
if (lincat != 0) {
|
||||||
PgfText *field = &*(vector_elem(lincat->fields, lindex)->name);
|
PgfText *field = &**vector_elem(lincat->fields, lindex);
|
||||||
if (linearizer->pre_stack == NULL)
|
if (linearizer->pre_stack == NULL)
|
||||||
out->begin_phrase(&lincat->name, fid, field, fun);
|
out->begin_phrase(&lincat->name, fid, field, fun);
|
||||||
else {
|
else {
|
||||||
@@ -543,7 +543,7 @@ void PgfLinearizer::TreeLitNode::linearize(PgfLinearizationOutputIface *out, Pgf
|
|||||||
{
|
{
|
||||||
PgfText *field = NULL;
|
PgfText *field = NULL;
|
||||||
if (lincat != 0) {
|
if (lincat != 0) {
|
||||||
field = &*(vector_elem(lincat->fields, lindex)->name);
|
field = &**vector_elem(lincat->fields, lindex);
|
||||||
}
|
}
|
||||||
|
|
||||||
linearizer->flush_pre_stack(out, literal);
|
linearizer->flush_pre_stack(out, literal);
|
||||||
|
|||||||
197
src/runtime/c/pgf/md5.cxx
Normal file
197
src/runtime/c/pgf/md5.cxx
Normal file
@@ -0,0 +1,197 @@
|
|||||||
|
/*
|
||||||
|
* Derived from the RSA Data Security, Inc. MD5 Message-Digest Algorithm
|
||||||
|
* and modified slightly to be functionally identical but condensed into control structures.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "data.h"
|
||||||
|
#include "md5.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Constants defined by the MD5 algorithm
|
||||||
|
*/
|
||||||
|
#define A 0x67452301
|
||||||
|
#define B 0xefcdab89
|
||||||
|
#define C 0x98badcfe
|
||||||
|
#define D 0x10325476
|
||||||
|
|
||||||
|
static uint32_t S[] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
|
||||||
|
5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
|
||||||
|
4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
|
||||||
|
6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};
|
||||||
|
|
||||||
|
static uint32_t K[] = {0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
|
||||||
|
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
|
||||||
|
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
|
||||||
|
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
|
||||||
|
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
|
||||||
|
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
|
||||||
|
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
|
||||||
|
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
|
||||||
|
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
|
||||||
|
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
|
||||||
|
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
|
||||||
|
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
|
||||||
|
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
|
||||||
|
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
|
||||||
|
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
|
||||||
|
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Padding used to make the size (in bits) of the input congruent to 448 mod 512
|
||||||
|
*/
|
||||||
|
static uint8_t PADDING[] = {0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Bit-manipulation functions defined by the MD5 algorithm
|
||||||
|
*/
|
||||||
|
#define F(X, Y, Z) ((X & Y) | (~X & Z))
|
||||||
|
#define G(X, Y, Z) ((X & Z) | (Y & ~Z))
|
||||||
|
#define H(X, Y, Z) (X ^ Y ^ Z)
|
||||||
|
#define I(X, Y, Z) (Y ^ (X | ~Z))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Rotates a 32-bit word left by n bits
|
||||||
|
*/
|
||||||
|
uint32_t rotateLeft(uint32_t x, uint32_t n){
|
||||||
|
return (x << n) | (x >> (32 - n));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize a context
|
||||||
|
*/
|
||||||
|
MD5Context::MD5Context()
|
||||||
|
{
|
||||||
|
size = (uint64_t)0;
|
||||||
|
|
||||||
|
buffer[0] = (uint32_t)A;
|
||||||
|
buffer[1] = (uint32_t)B;
|
||||||
|
buffer[2] = (uint32_t)C;
|
||||||
|
buffer[3] = (uint32_t)D;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Step on 512 bits of input with the main MD5 algorithm.
|
||||||
|
*/
|
||||||
|
static
|
||||||
|
void md5Step(uint32_t *buffer, uint32_t *input){
|
||||||
|
uint32_t AA = buffer[0];
|
||||||
|
uint32_t BB = buffer[1];
|
||||||
|
uint32_t CC = buffer[2];
|
||||||
|
uint32_t DD = buffer[3];
|
||||||
|
|
||||||
|
uint32_t E;
|
||||||
|
|
||||||
|
unsigned int j;
|
||||||
|
|
||||||
|
for(unsigned int i = 0; i < 64; ++i){
|
||||||
|
switch(i / 16){
|
||||||
|
case 0:
|
||||||
|
E = F(BB, CC, DD);
|
||||||
|
j = i;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
E = G(BB, CC, DD);
|
||||||
|
j = ((i * 5) + 1) % 16;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
E = H(BB, CC, DD);
|
||||||
|
j = ((i * 3) + 5) % 16;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
E = I(BB, CC, DD);
|
||||||
|
j = (i * 7) % 16;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t temp = DD;
|
||||||
|
DD = CC;
|
||||||
|
CC = BB;
|
||||||
|
BB = BB + rotateLeft(AA + E + K[i] + input[j], S[i]);
|
||||||
|
AA = temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer[0] += AA;
|
||||||
|
buffer[1] += BB;
|
||||||
|
buffer[2] += CC;
|
||||||
|
buffer[3] += DD;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Add some amount of input to the context
|
||||||
|
*
|
||||||
|
* If the input fills out a block of 512 bits, apply the algorithm (md5Step)
|
||||||
|
* and save the result in the buffer. Also updates the overall size.
|
||||||
|
*/
|
||||||
|
void MD5Context::update(uint8_t *input_buffer, size_t input_len)
|
||||||
|
{
|
||||||
|
uint32_t input[16];
|
||||||
|
unsigned int offset = this->size % 64;
|
||||||
|
this->size += (uint64_t)input_len;
|
||||||
|
|
||||||
|
// Copy each byte in input_buffer into the next space in our context input
|
||||||
|
for (unsigned int i = 0; i < input_len; ++i) {
|
||||||
|
this->input[offset++] = (uint8_t)*(input_buffer + i);
|
||||||
|
|
||||||
|
// If we've filled our context input, copy it into our local array input
|
||||||
|
// then reset the offset to 0 and fill in a new buffer.
|
||||||
|
// Every time we fill out a chunk, we run it through the algorithm
|
||||||
|
// to enable some back and forth between cpu and i/o
|
||||||
|
if (offset % 64 == 0){
|
||||||
|
for (unsigned int j = 0; j < 16; ++j) {
|
||||||
|
// Convert to little-endian
|
||||||
|
// The local variable `input` our 512-bit chunk separated into 32-bit words
|
||||||
|
// we can use in calculations
|
||||||
|
input[j] = (uint32_t)(this->input[(j * 4) + 3]) << 24 |
|
||||||
|
(uint32_t)(this->input[(j * 4) + 2]) << 16 |
|
||||||
|
(uint32_t)(this->input[(j * 4) + 1]) << 8 |
|
||||||
|
(uint32_t)(this->input[(j * 4)]);
|
||||||
|
}
|
||||||
|
md5Step(this->buffer, input);
|
||||||
|
offset = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Pad the current input to get to 448 bytes, append the size in bits to the very end,
|
||||||
|
* and save the result of the final iteration into digest.
|
||||||
|
*/
|
||||||
|
void MD5Context::finalize(MD5Digest *digest)
|
||||||
|
{
|
||||||
|
uint32_t input[16];
|
||||||
|
unsigned int offset = this->size % 64;
|
||||||
|
unsigned int padding_length = offset < 56 ? 56 - offset : (56 + 64) - offset;
|
||||||
|
|
||||||
|
// Fill in the padding and undo the changes to size that resulted from the update
|
||||||
|
update(PADDING, padding_length);
|
||||||
|
this->size -= (uint64_t)padding_length;
|
||||||
|
|
||||||
|
// Do a final update (internal to this function)
|
||||||
|
// Last two 32-bit words are the two halves of the size (converted from bytes to bits)
|
||||||
|
for(unsigned int j = 0; j < 14; ++j)
|
||||||
|
{
|
||||||
|
input[j] = (uint32_t)(this->input[(j * 4) + 3]) << 24 |
|
||||||
|
(uint32_t)(this->input[(j * 4) + 2]) << 16 |
|
||||||
|
(uint32_t)(this->input[(j * 4) + 1]) << 8 |
|
||||||
|
(uint32_t)(this->input[(j * 4)]);
|
||||||
|
}
|
||||||
|
input[14] = (uint32_t)(this->size * 8);
|
||||||
|
input[15] = (uint32_t)((this->size * 8) >> 32);
|
||||||
|
|
||||||
|
md5Step(this->buffer, input);
|
||||||
|
|
||||||
|
// Move the result into digest (convert from little-endian)
|
||||||
|
for(unsigned int i = 0; i < 4; ++i){
|
||||||
|
digest->b[(i * 4) + 0] = (uint8_t)((this->buffer[i] & 0x000000FF));
|
||||||
|
digest->b[(i * 4) + 1] = (uint8_t)((this->buffer[i] & 0x0000FF00) >> 8);
|
||||||
|
digest->b[(i * 4) + 2] = (uint8_t)((this->buffer[i] & 0x00FF0000) >> 16);
|
||||||
|
digest->b[(i * 4) + 3] = (uint8_t)((this->buffer[i] & 0xFF000000) >> 24);
|
||||||
|
}
|
||||||
|
}
|
||||||
30
src/runtime/c/pgf/md5.h
Normal file
30
src/runtime/c/pgf/md5.h
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
#ifndef MD5_H
|
||||||
|
#define MD5_H
|
||||||
|
|
||||||
|
struct PGF_INTERNAL_DECL MD5Digest {
|
||||||
|
uint8_t b[16];
|
||||||
|
};
|
||||||
|
|
||||||
|
inline bool operator < (const MD5Digest &d1, const MD5Digest &d2) {
|
||||||
|
return memcmp(d1.b, d2.b, 16) < 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
class PGF_INTERNAL_DECL MD5Context {
|
||||||
|
uint64_t size; // Size of input in bytes
|
||||||
|
uint32_t buffer[4]; // Current accumulation of hash
|
||||||
|
uint8_t input[64]; // Input to be used in the next step
|
||||||
|
|
||||||
|
public:
|
||||||
|
MD5Context();
|
||||||
|
void update(uint8_t *input, size_t input_len);
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void update(T &input)
|
||||||
|
{
|
||||||
|
update((uint8_t *) &input, sizeof(T));
|
||||||
|
}
|
||||||
|
|
||||||
|
void finalize(MD5Digest *digest);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,59 +1,114 @@
|
|||||||
#ifndef PARSER_H
|
#ifndef LR_TABLE_H
|
||||||
#define PARSER_H
|
#define LR_TABLE_H
|
||||||
|
|
||||||
|
#include "md5.h"
|
||||||
|
|
||||||
|
class PGF_INTERNAL_DECL PgfLRTableMaker
|
||||||
|
{
|
||||||
|
struct State;
|
||||||
|
struct Item;
|
||||||
|
struct Predictions;
|
||||||
|
|
||||||
|
struct CompareItem;
|
||||||
|
static const CompareItem compare_item;
|
||||||
|
|
||||||
|
typedef std::pair<ref<PgfText>,size_t> Key;
|
||||||
|
|
||||||
|
struct PGF_INTERNAL_DECL CompareKey : std::less<Key> {
|
||||||
|
bool operator() (const Key& k1, const Key& k2) const {
|
||||||
|
int cmp = textcmp(k1.first,k2.first);
|
||||||
|
if (cmp < 0)
|
||||||
|
return true;
|
||||||
|
else if (cmp > 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return (k1.second < k2.second);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
ref<PgfAbstr> abstr;
|
||||||
|
ref<PgfConcr> concr;
|
||||||
|
|
||||||
|
std::vector<State*> todo;
|
||||||
|
std::map<MD5Digest,State*> states;
|
||||||
|
std::map<Key,Predictions*,CompareKey> predictions;
|
||||||
|
std::map<Predictions*,State*> continuations;
|
||||||
|
std::vector<Item*> completed;
|
||||||
|
|
||||||
|
void process(Item *item);
|
||||||
|
void symbol(Item *item, PgfSymbol sym);
|
||||||
|
void predict(Item *item, ref<PgfText> cat,
|
||||||
|
ref<Vector<PgfVariableRange>> vars, PgfLParam *r);
|
||||||
|
void predict(Item *item, ref<PgfText> cat, size_t r);
|
||||||
|
void predict(ref<PgfAbsFun> absfun, Predictions *preds);
|
||||||
|
void complete(Item *item);
|
||||||
|
|
||||||
|
static void print_item(Item *item);
|
||||||
|
|
||||||
|
public:
|
||||||
|
PgfLRTableMaker(ref<PgfAbstr> abstr, ref<PgfConcr> concr);
|
||||||
|
ref<PgfLRTable> make();
|
||||||
|
};
|
||||||
|
|
||||||
|
class PgfPrinter;
|
||||||
|
|
||||||
|
class PGF_INTERNAL_DECL PgfParser : public PgfPhraseScanner, public PgfExprEnum
|
||||||
|
{
|
||||||
|
ref<PgfConcr> concr;
|
||||||
|
PgfText *sentence;
|
||||||
|
PgfMarshaller *m;
|
||||||
|
PgfUnmarshaller *u;
|
||||||
|
|
||||||
|
struct Choice;
|
||||||
|
struct Production;
|
||||||
|
struct StackNode;
|
||||||
|
struct ParseState;
|
||||||
|
struct ExprState;
|
||||||
|
struct ExprInstance;
|
||||||
|
struct Result;
|
||||||
|
struct CompareExprState : std::less<ExprState*> {
|
||||||
|
bool operator() (const ExprState *state1, const ExprState *state2) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
ParseState *before, *after, *ahead;
|
||||||
|
std::priority_queue<ExprState*, std::vector<ExprState*>, CompareExprState> queue;
|
||||||
|
int last_fid;
|
||||||
|
|
||||||
|
Result *top_res;
|
||||||
|
size_t top_res_index;
|
||||||
|
|
||||||
|
void shift(StackNode *parent, ref<PgfConcrLincat> lincat, size_t r, Production *prod,
|
||||||
|
ParseState *state);
|
||||||
|
void reduce(StackNode *parent, ref<PgfConcrLin> lin, size_t seq_index,
|
||||||
|
size_t n, std::vector<Choice*> &args);
|
||||||
|
void complete(StackNode *parent, ref<PgfConcrLincat> lincat, size_t seq_index,
|
||||||
|
size_t n, std::vector<Choice*> &args);
|
||||||
|
void reduce_all(StackNode *state);
|
||||||
|
void print_prod(Choice *choice, Production *prod);
|
||||||
|
void print_transition(StackNode *source, StackNode *target, ParseState *state);
|
||||||
|
|
||||||
|
typedef std::map<std::pair<Choice*,Choice*>,Choice*> intersection_map;
|
||||||
|
|
||||||
|
Choice *intersect_choice(Choice *choice1, Choice *choice2, intersection_map &im);
|
||||||
|
|
||||||
|
void print_expr_state_before(PgfPrinter *printer, ExprState *state);
|
||||||
|
void print_expr_state_after(PgfPrinter *printer, ExprState *state);
|
||||||
|
void print_expr_state(ExprState *state);
|
||||||
|
|
||||||
|
void predict_expr_states(Choice *choice, prob_t outside_prob);
|
||||||
|
bool process_expr_state(ExprState *state);
|
||||||
|
void complete_expr_state(ExprState *state);
|
||||||
|
void combine_expr_state(ExprState *state, ExprInstance &inst);
|
||||||
|
void release_expr_state(ExprState *state);
|
||||||
|
|
||||||
class PGF_INTERNAL_DECL PgfParser : public PgfPhraseScanner, public PgfExprEnum {
|
|
||||||
public:
|
public:
|
||||||
PgfParser(ref<PgfConcr> concr, ref<PgfConcrLincat> start, PgfText *sentence, PgfMarshaller *m, PgfUnmarshaller *u);
|
PgfParser(ref<PgfConcr> concr, ref<PgfConcrLincat> start, PgfText *sentence, PgfMarshaller *m, PgfUnmarshaller *u);
|
||||||
|
|
||||||
void space(PgfTextSpot *start, PgfTextSpot *end, PgfExn* err);
|
virtual void space(PgfTextSpot *start, PgfTextSpot *end, PgfExn* err);
|
||||||
void start_matches(PgfTextSpot *end, PgfExn* err);
|
virtual void start_matches(PgfTextSpot *end, PgfExn* err);
|
||||||
void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err);
|
virtual void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err);
|
||||||
void end_matches(PgfTextSpot *end, PgfExn* err);
|
virtual void end_matches(PgfTextSpot *end, PgfExn* err);
|
||||||
|
|
||||||
void prepare();
|
|
||||||
PgfExpr fetch(PgfDB *db, prob_t *prob);
|
PgfExpr fetch(PgfDB *db, prob_t *prob);
|
||||||
|
|
||||||
virtual ~PgfParser();
|
|
||||||
|
|
||||||
private:
|
|
||||||
class CFGCat;
|
|
||||||
class State;
|
|
||||||
class Choice;
|
|
||||||
class Production;
|
|
||||||
|
|
||||||
class ParseItemConts;
|
|
||||||
|
|
||||||
class Item {
|
|
||||||
public:
|
|
||||||
prob_t get_prob() { return inside_prob + outside_prob; };
|
|
||||||
|
|
||||||
virtual State *proceed(PgfParser *parser, PgfUnmarshaller *u) = 0;
|
|
||||||
virtual bool combine(PgfParser *parser, ParseItemConts *conts, PgfExpr expr, prob_t inside_prob, PgfUnmarshaller *u) = 0;
|
|
||||||
virtual void print1(PgfPrinter *printer, State *state, PgfMarshaller *m) = 0;
|
|
||||||
virtual void print2(PgfPrinter *printer, State *state, int x, PgfMarshaller *m) = 0;
|
|
||||||
virtual PgfExpr get_expr(PgfUnmarshaller *u) = 0;
|
|
||||||
|
|
||||||
void trace(State *state, PgfMarshaller *m);
|
|
||||||
|
|
||||||
protected:
|
|
||||||
prob_t inside_prob;
|
|
||||||
prob_t outside_prob;
|
|
||||||
};
|
|
||||||
|
|
||||||
class ParseItem;
|
|
||||||
class ExprItem;
|
|
||||||
class MetaItem;
|
|
||||||
|
|
||||||
ref<PgfConcr> concr;
|
|
||||||
ref<PgfConcrLincat> start;
|
|
||||||
PgfText *sentence;
|
|
||||||
|
|
||||||
size_t last_choice_id;
|
|
||||||
|
|
||||||
State *before, *after;
|
|
||||||
|
|
||||||
PgfMarshaller *m;
|
|
||||||
PgfUnmarshaller *u;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -861,9 +861,9 @@ public:
|
|||||||
|
|
||||||
virtual void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err)
|
virtual void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err)
|
||||||
{
|
{
|
||||||
ref<PgfLincatField> field =
|
ref<PgfText> field =
|
||||||
vector_elem(lin->lincat->fields, seq_index % lin->lincat->fields->len);
|
*vector_elem(lin->lincat->fields, seq_index % lin->lincat->fields->len);
|
||||||
callback->fn(callback, &lin->absfun->name, &(*field->name), lin->lincat->abscat->prob+lin->absfun->prob, err);
|
callback->fn(callback, &lin->absfun->name, field, lin->lincat->abscat->prob+lin->absfun->prob, err);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void end_matches(PgfTextSpot *end, PgfExn* err)
|
virtual void end_matches(PgfTextSpot *end, PgfExn* err)
|
||||||
@@ -909,9 +909,9 @@ public:
|
|||||||
|
|
||||||
virtual void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err)
|
virtual void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err)
|
||||||
{
|
{
|
||||||
ref<PgfLincatField> field =
|
ref<PgfText> field =
|
||||||
vector_elem(lin->lincat->fields, seq_index % lin->lincat->fields->len);
|
*vector_elem(lin->lincat->fields, seq_index % lin->lincat->fields->len);
|
||||||
callback->morpho.fn(&callback->morpho, &lin->absfun->name, &(*field->name), lin->lincat->abscat->prob+lin->absfun->prob, err);
|
callback->morpho.fn(&callback->morpho, &lin->absfun->name, field, lin->lincat->abscat->prob+lin->absfun->prob, err);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void end_matches(PgfTextSpot *end, PgfExn* err)
|
virtual void end_matches(PgfTextSpot *end, PgfExn* err)
|
||||||
@@ -976,7 +976,7 @@ PGF_API
|
|||||||
PgfText *pgf_get_lincat_field_internal(object o, size_t i)
|
PgfText *pgf_get_lincat_field_internal(object o, size_t i)
|
||||||
{
|
{
|
||||||
ref<PgfConcrLincat> lincat = o;
|
ref<PgfConcrLincat> lincat = o;
|
||||||
return &*(vector_elem(lincat->fields, i)->name);
|
return &**vector_elem(lincat->fields, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
PGF_API
|
PGF_API
|
||||||
@@ -1654,6 +1654,7 @@ class PGF_INTERNAL PgfLinBuilder : public PgfLinBuilderIface
|
|||||||
ref<Vector<ref<PgfSequence>>> seqs;
|
ref<Vector<ref<PgfSequence>>> seqs;
|
||||||
|
|
||||||
object container; // what are we building?
|
object container; // what are we building?
|
||||||
|
ref<PgfConcrLincat> container_lincat;
|
||||||
|
|
||||||
size_t var_index;
|
size_t var_index;
|
||||||
size_t arg_index;
|
size_t arg_index;
|
||||||
@@ -1712,17 +1713,15 @@ public:
|
|||||||
lincat->seqs = seqs;
|
lincat->seqs = seqs;
|
||||||
lincat->n_lindefs = n_lindefs;
|
lincat->n_lindefs = n_lindefs;
|
||||||
|
|
||||||
ref<Vector<PgfLincatField>> db_fields = vector_new<PgfLincatField>(n_fields);
|
ref<Vector<ref<PgfText>>> db_fields = vector_new<ref<PgfText>>(n_fields);
|
||||||
for (size_t i = 0; i < n_fields; i++) {
|
for (size_t i = 0; i < n_fields; i++) {
|
||||||
ref<PgfText> name = textdup_db(fields[i]);
|
ref<PgfText> name = textdup_db(fields[i]);
|
||||||
vector_elem(db_fields, i)->lincat = lincat;
|
*vector_elem(db_fields, i) = name;
|
||||||
vector_elem(db_fields, i)->name = name;
|
|
||||||
vector_elem(db_fields, i)->backrefs = 0;
|
|
||||||
vector_elem(db_fields, i)->epsilons = 0;
|
|
||||||
}
|
}
|
||||||
lincat->fields = db_fields;
|
lincat->fields = db_fields;
|
||||||
|
|
||||||
this->container = lincat.tagged();
|
this->container = lincat.tagged();
|
||||||
|
this->container_lincat = 0;
|
||||||
|
|
||||||
build->build(this, err);
|
build->build(this, err);
|
||||||
if (err->type == PGF_EXN_NONE && res_index != res->len) {
|
if (err->type == PGF_EXN_NONE && res_index != res->len) {
|
||||||
@@ -1760,6 +1759,7 @@ public:
|
|||||||
lin->seqs = seqs;
|
lin->seqs = seqs;
|
||||||
|
|
||||||
this->container = lin.tagged();
|
this->container = lin.tagged();
|
||||||
|
this->container_lincat = lincat;
|
||||||
|
|
||||||
build->build(this, err);
|
build->build(this, err);
|
||||||
if (err->type == PGF_EXN_NONE && res_index != res->len) {
|
if (err->type == PGF_EXN_NONE && res_index != res->len) {
|
||||||
@@ -2149,7 +2149,7 @@ public:
|
|||||||
|
|
||||||
PgfPhrasetable phrasetable =
|
PgfPhrasetable phrasetable =
|
||||||
phrasetable_internalize(concr->phrasetable,
|
phrasetable_internalize(concr->phrasetable,
|
||||||
seq, container, seq_index,
|
seq, container_lincat, container, seq_index,
|
||||||
&entry);
|
&entry);
|
||||||
concr->phrasetable = phrasetable;
|
concr->phrasetable = phrasetable;
|
||||||
*vector_elem(seqs, seq_index) = entry->seq;
|
*vector_elem(seqs, seq_index) = entry->seq;
|
||||||
@@ -2418,7 +2418,7 @@ PgfText **pgf_category_fields(PgfDB *db, PgfConcrRevision revision,
|
|||||||
if (fields == 0)
|
if (fields == 0)
|
||||||
throw pgf_systemerror(ENOMEM);
|
throw pgf_systemerror(ENOMEM);
|
||||||
for (size_t i = 0; i < n_fields; i++) {
|
for (size_t i = 0; i < n_fields; i++) {
|
||||||
fields[i] = textdup(vector_elem(lincat->fields, i)->name);
|
fields[i] = textdup(*vector_elem(lincat->fields, i));
|
||||||
}
|
}
|
||||||
*p_n_fields = n_fields;
|
*p_n_fields = n_fields;
|
||||||
return fields;
|
return fields;
|
||||||
@@ -2511,7 +2511,7 @@ PgfText **pgf_tabular_linearize(PgfDB *db, PgfConcrRevision revision,
|
|||||||
|
|
||||||
PgfText *text = out.get_text();
|
PgfText *text = out.get_text();
|
||||||
if (text != NULL) {
|
if (text != NULL) {
|
||||||
res[pos++] = textdup(&*(vector_elem(lincat->fields,i)->name));
|
res[pos++] = textdup(&**vector_elem(lincat->fields,i));
|
||||||
res[pos++] = text;
|
res[pos++] = text;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2550,7 +2550,7 @@ PgfText **pgf_tabular_linearize_all(PgfDB *db, PgfConcrRevision revision,
|
|||||||
|
|
||||||
PgfText *text = out.get_text();
|
PgfText *text = out.get_text();
|
||||||
if (text != NULL) {
|
if (text != NULL) {
|
||||||
res[pos++] = textdup(&*(vector_elem(lincat->fields, i)->name));
|
res[pos++] = textdup(&**vector_elem(lincat->fields, i));
|
||||||
res[pos++] = text;
|
res[pos++] = text;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2656,7 +2656,6 @@ PgfExprEnum *pgf_parse(PgfDB *db, PgfConcrRevision revision,
|
|||||||
phrasetable_lookup_cohorts(concr->phrasetable,
|
phrasetable_lookup_cohorts(concr->phrasetable,
|
||||||
sentence, case_sensitive,
|
sentence, case_sensitive,
|
||||||
parser, err);
|
parser, err);
|
||||||
parser->prepare();
|
|
||||||
return parser;
|
return parser;
|
||||||
} PGF_API_END
|
} PGF_API_END
|
||||||
|
|
||||||
|
|||||||
@@ -299,9 +299,87 @@ int text_sequence_cmp(PgfTextSpot *spot, const uint8_t *end,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
int backref_cmp(ref<PgfSequenceBackref> backref, ref<PgfConcrLincat> lincat, size_t r)
|
||||||
|
{
|
||||||
|
int cmp = 0;
|
||||||
|
switch (ref<PgfConcrLin>::get_tag(backref->container)) {
|
||||||
|
case PgfConcrLin::tag: {
|
||||||
|
ref<PgfConcrLin> lin = ref<PgfConcrLin>::untagged(backref->container);
|
||||||
|
if (lincat.as_object() < lin->lincat.as_object())
|
||||||
|
cmp = -1;
|
||||||
|
else if (lincat.as_object() > lin->lincat.as_object())
|
||||||
|
cmp = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PgfConcrLincat::tag: {
|
||||||
|
if (lincat.as_object() > 0)
|
||||||
|
cmp = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cmp == 0) {
|
||||||
|
size_t r1 =
|
||||||
|
(lincat == 0) ? 0
|
||||||
|
: backref->seq_index % lincat->fields->len;
|
||||||
|
if (r < r1)
|
||||||
|
cmp = -1;
|
||||||
|
else if (r > r1)
|
||||||
|
cmp = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
ref<Vector<PgfSequenceBackref>> phrasetable_update_backrefs(PgfPhrasetable table,
|
||||||
|
ref<PgfConcrLincat> lincat,
|
||||||
|
object container,
|
||||||
|
size_t seq_index)
|
||||||
|
{
|
||||||
|
size_t len = (table->value.backrefs != 0)
|
||||||
|
? table->value.backrefs->len
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
ref<Vector<PgfSequenceBackref>> backrefs =
|
||||||
|
vector_resize<PgfSequenceBackref>(table->value.backrefs, len+1, table->txn_id);
|
||||||
|
ssize_t i = 0;
|
||||||
|
ssize_t j = len-1;
|
||||||
|
if (table->value.seq->syms.len == 0 && len > 0) {
|
||||||
|
// The backrefs for the epsilon sequence are sorted by lincat and r
|
||||||
|
|
||||||
|
size_t r = (lincat!=0) ? (seq_index % lincat->fields->len) : 0;
|
||||||
|
while (i <= j) {
|
||||||
|
ssize_t k = (i + j) / 2;
|
||||||
|
ref<PgfSequenceBackref> backref = vector_elem(backrefs, k);
|
||||||
|
|
||||||
|
int cmp = backref_cmp(backref, lincat, r);
|
||||||
|
if (cmp < 0) {
|
||||||
|
while (j >= k) {
|
||||||
|
backrefs->data[j+1] = backrefs->data[j];
|
||||||
|
j--;
|
||||||
|
}
|
||||||
|
} else if (cmp > 0) {
|
||||||
|
i = k+1;
|
||||||
|
} else {
|
||||||
|
while (j > k) {
|
||||||
|
backrefs->data[j+1] = backrefs->data[j];
|
||||||
|
j--;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
backrefs->data[j+1].container = container;
|
||||||
|
backrefs->data[j+1].seq_index = seq_index;
|
||||||
|
return backrefs;
|
||||||
|
}
|
||||||
|
|
||||||
PGF_INTERNAL
|
PGF_INTERNAL
|
||||||
PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
|
PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
|
||||||
ref<PgfSequence> seq,
|
ref<PgfSequence> seq,
|
||||||
|
ref<PgfConcrLincat> lincat,
|
||||||
object container,
|
object container,
|
||||||
size_t seq_index,
|
size_t seq_index,
|
||||||
ref<PgfPhrasetableEntry> *pentry)
|
ref<PgfPhrasetableEntry> *pentry)
|
||||||
@@ -321,6 +399,7 @@ PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
|
|||||||
if (cmp < 0) {
|
if (cmp < 0) {
|
||||||
PgfPhrasetable left = phrasetable_internalize(table->left,
|
PgfPhrasetable left = phrasetable_internalize(table->left,
|
||||||
seq,
|
seq,
|
||||||
|
lincat,
|
||||||
container,
|
container,
|
||||||
seq_index,
|
seq_index,
|
||||||
pentry);
|
pentry);
|
||||||
@@ -329,6 +408,7 @@ PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
|
|||||||
} else if (cmp > 0) {
|
} else if (cmp > 0) {
|
||||||
PgfPhrasetable right = phrasetable_internalize(table->right,
|
PgfPhrasetable right = phrasetable_internalize(table->right,
|
||||||
seq,
|
seq,
|
||||||
|
lincat,
|
||||||
container,
|
container,
|
||||||
seq_index,
|
seq_index,
|
||||||
pentry);
|
pentry);
|
||||||
@@ -342,9 +422,7 @@ PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
|
|||||||
: 0;
|
: 0;
|
||||||
|
|
||||||
ref<Vector<PgfSequenceBackref>> backrefs =
|
ref<Vector<PgfSequenceBackref>> backrefs =
|
||||||
vector_resize<PgfSequenceBackref>(table->value.backrefs, len+1, table->txn_id);
|
phrasetable_update_backrefs(table,lincat,container,seq_index);
|
||||||
backrefs->data[len].container = container;
|
|
||||||
backrefs->data[len].seq_index = seq_index;
|
|
||||||
|
|
||||||
PgfPhrasetable new_table =
|
PgfPhrasetable new_table =
|
||||||
Node<PgfPhrasetableEntry>::upd_node(table, table->left, table->right);
|
Node<PgfPhrasetableEntry>::upd_node(table, table->left, table->right);
|
||||||
@@ -356,6 +434,7 @@ PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
|
|||||||
|
|
||||||
PGF_INTERNAL
|
PGF_INTERNAL
|
||||||
ref<PgfSequence> phrasetable_relink(PgfPhrasetable table,
|
ref<PgfSequence> phrasetable_relink(PgfPhrasetable table,
|
||||||
|
ref<PgfConcrLincat> lincat,
|
||||||
object container,
|
object container,
|
||||||
size_t seq_index,
|
size_t seq_index,
|
||||||
size_t seq_id)
|
size_t seq_id)
|
||||||
@@ -370,9 +449,7 @@ ref<PgfSequence> phrasetable_relink(PgfPhrasetable table,
|
|||||||
: table->value.backrefs->len;
|
: table->value.backrefs->len;
|
||||||
|
|
||||||
ref<Vector<PgfSequenceBackref>> backrefs =
|
ref<Vector<PgfSequenceBackref>> backrefs =
|
||||||
vector_resize<PgfSequenceBackref>(table->value.backrefs, len+1, table->txn_id);
|
phrasetable_update_backrefs(table,lincat,container,seq_index);
|
||||||
backrefs->data[len].container = container;
|
|
||||||
backrefs->data[len].seq_index = seq_index;
|
|
||||||
table->value.backrefs = backrefs;
|
table->value.backrefs = backrefs;
|
||||||
|
|
||||||
return table->value.seq;
|
return table->value.seq;
|
||||||
@@ -397,12 +474,16 @@ PgfPhrasetable phrasetable_delete(PgfPhrasetable table,
|
|||||||
PgfPhrasetable left = phrasetable_delete(table->left,
|
PgfPhrasetable left = phrasetable_delete(table->left,
|
||||||
container, seq_index,
|
container, seq_index,
|
||||||
seq);
|
seq);
|
||||||
|
if (left == table->left)
|
||||||
|
return table;
|
||||||
table = Node<PgfPhrasetableEntry>::upd_node(table,left,table->right);
|
table = Node<PgfPhrasetableEntry>::upd_node(table,left,table->right);
|
||||||
return Node<PgfPhrasetableEntry>::balanceR(table);
|
return Node<PgfPhrasetableEntry>::balanceR(table);
|
||||||
} else if (cmp > 0) {
|
} else if (cmp > 0) {
|
||||||
PgfPhrasetable right = phrasetable_delete(table->right,
|
PgfPhrasetable right = phrasetable_delete(table->right,
|
||||||
container, seq_index,
|
container, seq_index,
|
||||||
seq);
|
seq);
|
||||||
|
if (right == table->right)
|
||||||
|
return table;
|
||||||
table = Node<PgfPhrasetableEntry>::upd_node(table,table->left,right);
|
table = Node<PgfPhrasetableEntry>::upd_node(table,table->left,right);
|
||||||
return Node<PgfPhrasetableEntry>::balanceL(table);
|
return Node<PgfPhrasetableEntry>::balanceL(table);
|
||||||
} else {
|
} else {
|
||||||
@@ -566,10 +647,10 @@ void finish_skipping(PgfCohortsState *state) {
|
|||||||
|
|
||||||
state->queue.pop();
|
state->queue.pop();
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
state->scanner->space(&state->spot, &state->spot,
|
state->scanner->space(&state->spot, &state->spot,
|
||||||
state->err);
|
state->err);
|
||||||
|
*/
|
||||||
state->last.pos = 0;
|
state->last.pos = 0;
|
||||||
state->last.ptr = NULL;
|
state->last.ptr = NULL;
|
||||||
state->skipping = false;
|
state->skipping = false;
|
||||||
@@ -740,6 +821,56 @@ void phrasetable_lookup_cohorts(PgfPhrasetable table,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PGF_INTERNAL
|
||||||
|
void phrasetable_lookup_epsilons(PgfPhrasetable table,
|
||||||
|
ref<PgfConcrLincat> lincat, size_t r,
|
||||||
|
std::function<void(ref<PgfConcrLin>,size_t)> &f)
|
||||||
|
{
|
||||||
|
while (table->left != 0) {
|
||||||
|
table = table->left;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (table->value.seq->syms.len > 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
size_t len = (table->value.backrefs != 0)
|
||||||
|
? table->value.backrefs->len
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
ssize_t i = 0;
|
||||||
|
ssize_t j = len-1;
|
||||||
|
while (i <= j) {
|
||||||
|
ssize_t k = (i + j) / 2;
|
||||||
|
ref<PgfSequenceBackref> backref = vector_elem(table->value.backrefs, k);
|
||||||
|
|
||||||
|
int cmp = backref_cmp(backref, lincat, r);
|
||||||
|
if (cmp < 0) {
|
||||||
|
j = k-1;
|
||||||
|
} else if (cmp > 0) {
|
||||||
|
i = k+1;
|
||||||
|
} else {
|
||||||
|
i = k;
|
||||||
|
while (i > 0) {
|
||||||
|
ref<PgfSequenceBackref> backref = vector_elem(table->value.backrefs, i-1);
|
||||||
|
if (backref_cmp(backref, lincat, r) != 0)
|
||||||
|
break;
|
||||||
|
f(ref<PgfConcrLin>::untagged(backref->container),backref->seq_index);
|
||||||
|
i--;
|
||||||
|
}
|
||||||
|
f(ref<PgfConcrLin>::untagged(backref->container),backref->seq_index);
|
||||||
|
j = k;
|
||||||
|
while (j < len-1) {
|
||||||
|
ref<PgfSequenceBackref> backref = vector_elem(table->value.backrefs, j+1);
|
||||||
|
if (backref_cmp(backref, lincat, r) != 0)
|
||||||
|
break;
|
||||||
|
f(ref<PgfConcrLin>::untagged(backref->container),backref->seq_index);
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
PGF_INTERNAL
|
PGF_INTERNAL
|
||||||
void phrasetable_iter(PgfConcr *concr,
|
void phrasetable_iter(PgfConcr *concr,
|
||||||
PgfPhrasetable table,
|
PgfPhrasetable table,
|
||||||
@@ -768,10 +899,10 @@ void phrasetable_iter(PgfConcr *concr,
|
|||||||
ref<PgfConcrLincat> lincat =
|
ref<PgfConcrLincat> lincat =
|
||||||
namespace_lookup(concr->lincats, &lin->absfun->type->name);
|
namespace_lookup(concr->lincats, &lin->absfun->type->name);
|
||||||
if (lincat != 0) {
|
if (lincat != 0) {
|
||||||
ref<PgfLincatField> field =
|
ref<PgfText> field =
|
||||||
vector_elem(lincat->fields, backref.seq_index % lincat->fields->len);
|
*vector_elem(lincat->fields, backref.seq_index % lincat->fields->len);
|
||||||
|
|
||||||
callback->fn(callback, &lin->absfun->name, &(*field->name), lincat->abscat->prob+lin->absfun->prob, err);
|
callback->fn(callback, &lin->absfun->name, &*field, lincat->abscat->prob+lin->absfun->prob, err);
|
||||||
if (err->type != PGF_EXN_NONE)
|
if (err->type != PGF_EXN_NONE)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -52,15 +52,19 @@ private:
|
|||||||
#pragma GCC diagnostic pop
|
#pragma GCC diagnostic pop
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
struct PgfConcrLincat;
|
||||||
|
|
||||||
PGF_INTERNAL_DECL
|
PGF_INTERNAL_DECL
|
||||||
PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
|
PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
|
||||||
ref<PgfSequence> seq,
|
ref<PgfSequence> seq,
|
||||||
|
ref<PgfConcrLincat> lincat,
|
||||||
object container,
|
object container,
|
||||||
size_t seq_index,
|
size_t seq_index,
|
||||||
ref<PgfPhrasetableEntry> *pentry);
|
ref<PgfPhrasetableEntry> *pentry);
|
||||||
|
|
||||||
PGF_INTERNAL_DECL
|
PGF_INTERNAL_DECL
|
||||||
ref<PgfSequence> phrasetable_relink(PgfPhrasetable table,
|
ref<PgfSequence> phrasetable_relink(PgfPhrasetable table,
|
||||||
|
ref<PgfConcrLincat> lincat,
|
||||||
object container,
|
object container,
|
||||||
size_t seq_index,
|
size_t seq_index,
|
||||||
size_t seq_id);
|
size_t seq_id);
|
||||||
@@ -101,6 +105,11 @@ void phrasetable_lookup_cohorts(PgfPhrasetable table,
|
|||||||
bool case_sensitive,
|
bool case_sensitive,
|
||||||
PgfPhraseScanner *scanner, PgfExn* err);
|
PgfPhraseScanner *scanner, PgfExn* err);
|
||||||
|
|
||||||
|
PGF_INTERNAL_DECL
|
||||||
|
void phrasetable_lookup_epsilons(PgfPhrasetable table,
|
||||||
|
ref<PgfConcrLincat> lincat, size_t r,
|
||||||
|
std::function<void(ref<PgfConcrLin>, size_t)> &f);
|
||||||
|
|
||||||
PGF_INTERNAL_DECL
|
PGF_INTERNAL_DECL
|
||||||
void phrasetable_iter(PgfConcr *concr,
|
void phrasetable_iter(PgfConcr *concr,
|
||||||
PgfPhrasetable table,
|
PgfPhrasetable table,
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
#include "data.h"
|
#include "data.h"
|
||||||
#include "reader.h"
|
#include "reader.h"
|
||||||
|
#include "parser.h"
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
@@ -650,14 +651,14 @@ ref<PgfSequence> PgfReader::read_seq()
|
|||||||
return seq;
|
return seq;
|
||||||
}
|
}
|
||||||
|
|
||||||
ref<Vector<ref<PgfSequence>>> PgfReader::read_seq_ids(object container)
|
ref<Vector<ref<PgfSequence>>> PgfReader::read_seq_ids(ref<PgfConcrLincat> lincat, object container)
|
||||||
{
|
{
|
||||||
size_t len = read_len();
|
size_t len = read_len();
|
||||||
ref<Vector<ref<PgfSequence>>> vec = vector_new<ref<PgfSequence>>(len);
|
ref<Vector<ref<PgfSequence>>> vec = vector_new<ref<PgfSequence>>(len);
|
||||||
for (size_t i = 0; i < len; i++) {
|
for (size_t i = 0; i < len; i++) {
|
||||||
size_t seq_id = read_len();
|
size_t seq_id = read_len();
|
||||||
ref<PgfSequence> seq = phrasetable_relink(concrete->phrasetable,
|
ref<PgfSequence> seq = phrasetable_relink(concrete->phrasetable,
|
||||||
container, i,
|
lincat, container, i,
|
||||||
seq_id);
|
seq_id);
|
||||||
if (seq == 0) {
|
if (seq == 0) {
|
||||||
throw pgf_error("Invalid sequence id");
|
throw pgf_error("Invalid sequence id");
|
||||||
@@ -701,7 +702,7 @@ ref<PgfConcrLincat> PgfReader::read_lincat()
|
|||||||
auto n_lindefs = read_len();
|
auto n_lindefs = read_len();
|
||||||
auto args = read_vector(&PgfReader::read_parg);
|
auto args = read_vector(&PgfReader::read_parg);
|
||||||
auto res = read_vector(&PgfReader::read_presult2);
|
auto res = read_vector(&PgfReader::read_presult2);
|
||||||
auto seqs = read_seq_ids(lincat.tagged());
|
auto seqs = read_seq_ids(0, lincat.tagged());
|
||||||
|
|
||||||
lincat->abscat = namespace_lookup(abstract->cats, &lincat->name);
|
lincat->abscat = namespace_lookup(abstract->cats, &lincat->name);
|
||||||
lincat->fields = fields;
|
lincat->fields = fields;
|
||||||
@@ -712,130 +713,35 @@ ref<PgfConcrLincat> PgfReader::read_lincat()
|
|||||||
return lincat;
|
return lincat;
|
||||||
}
|
}
|
||||||
|
|
||||||
ref<Vector<PgfLincatField>> PgfReader::read_lincat_fields(ref<PgfConcrLincat> lincat)
|
ref<Vector<ref<PgfText>>> PgfReader::read_lincat_fields(ref<PgfConcrLincat> lincat)
|
||||||
{
|
{
|
||||||
size_t len = read_len();
|
size_t len = read_len();
|
||||||
ref<Vector<PgfLincatField>> fields = vector_new<PgfLincatField>(len);
|
ref<Vector<ref<PgfText>>> fields = vector_new<ref<PgfText>>(len);
|
||||||
for (size_t i = 0; i < len; i++) {
|
for (size_t i = 0; i < len; i++) {
|
||||||
auto name = read_text();
|
auto name = read_text();
|
||||||
|
*vector_elem(fields,i) = name;
|
||||||
ref<PgfLincatField> field = vector_elem(fields,i);
|
|
||||||
field->lincat = lincat;
|
|
||||||
field->name = name;
|
|
||||||
field->backrefs = 0;
|
|
||||||
field->epsilons = 0;
|
|
||||||
}
|
}
|
||||||
return fields;
|
return fields;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void add_to_index(ref<PgfConcr> concrete, ref<PgfConcrLin> lin, size_t seq_index, size_t dot)
|
|
||||||
{
|
|
||||||
size_t n_fields = lin->lincat->fields->len;
|
|
||||||
ref<PgfSequence> seq = *vector_elem(lin->seqs,seq_index);
|
|
||||||
ref<PgfPResult> result = *vector_elem(lin->res, seq_index / n_fields);
|
|
||||||
ref<PgfLincatField> field = vector_elem(lin->lincat->fields, seq_index % n_fields);
|
|
||||||
|
|
||||||
if (dot >= seq->syms.len) {
|
|
||||||
ref<Vector<PgfLincatEpsilon>> epsilons = field->epsilons;
|
|
||||||
epsilons =
|
|
||||||
vector_resize(epsilons, ((epsilons == 0) ? 0 : epsilons->len)+1,
|
|
||||||
PgfDB::get_txn_id());
|
|
||||||
field->epsilons = epsilons;
|
|
||||||
ref<PgfLincatEpsilon> epsilon =
|
|
||||||
vector_elem(epsilons,epsilons->len-1);
|
|
||||||
epsilon->lin = lin;
|
|
||||||
epsilon->seq_index = seq_index;
|
|
||||||
|
|
||||||
if (epsilons->len == 1 && field->backrefs != 0) {
|
|
||||||
for (size_t i = 0; i < field->backrefs->len; i++) {
|
|
||||||
ref<PgfLincatBackref> backref = vector_elem(field->backrefs,i);
|
|
||||||
add_to_index(concrete,backref->lin,backref->seq_index,backref->dot+1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
PgfSymbol sym = *vector_elem(&seq->syms,dot);
|
|
||||||
switch (ref<PgfSymbol>::get_tag(sym)) {
|
|
||||||
case PgfSymbolCat::tag: {
|
|
||||||
auto sym_cat = ref<PgfSymbolCat>::untagged(sym);
|
|
||||||
|
|
||||||
ref<PgfHypo> hypo =
|
|
||||||
vector_elem(lin->absfun->type->hypos,sym_cat->d);
|
|
||||||
ref<PgfConcrLincat> lincat =
|
|
||||||
namespace_lookup(concrete->lincats,
|
|
||||||
&hypo->type->name);
|
|
||||||
if (lincat == 0)
|
|
||||||
throw pgf_error("Found a lin which uses a category without a lincat");
|
|
||||||
|
|
||||||
size_t max_values = 1;
|
|
||||||
size_t *ranges = (size_t *)
|
|
||||||
alloca(sym_cat->r.n_terms*sizeof(size_t));
|
|
||||||
for (size_t i = 0; i < sym_cat->r.n_terms; i++) {
|
|
||||||
for (size_t j = 0; j < result->vars->len; j++) {
|
|
||||||
auto var_range = vector_elem(result->vars, j);
|
|
||||||
if (var_range->var == sym_cat->r.terms[i].var) {
|
|
||||||
ranges[i] = vector_elem(result->vars, j)->range;
|
|
||||||
max_values *= var_range->range;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_epsilon = false;
|
|
||||||
for (size_t values = 0; values < max_values; values++) {
|
|
||||||
size_t v = values;
|
|
||||||
size_t index = sym_cat->r.i0;
|
|
||||||
for (size_t i = 0; i < sym_cat->r.n_terms; i++) {
|
|
||||||
index += sym_cat->r.terms[i].factor * (v % ranges[i]);
|
|
||||||
v = v / ranges[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
ref<Vector<PgfLincatBackref>> backrefs =
|
|
||||||
vector_elem(lincat->fields,index)->backrefs;
|
|
||||||
backrefs =
|
|
||||||
vector_resize(backrefs, ((backrefs == 0) ? 0 : backrefs->len)+1,
|
|
||||||
PgfDB::get_txn_id());
|
|
||||||
vector_elem(lincat->fields,index)->backrefs = backrefs;
|
|
||||||
ref<PgfLincatBackref> backref =
|
|
||||||
vector_elem(backrefs,backrefs->len-1);
|
|
||||||
backref->lin = lin;
|
|
||||||
backref->seq_index = seq_index;
|
|
||||||
backref->dot = dot;
|
|
||||||
|
|
||||||
if (vector_elem(lincat->fields,index)->epsilons != 0)
|
|
||||||
is_epsilon = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_epsilon)
|
|
||||||
add_to_index(concrete,lin,seq_index,dot+1);
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
ref<PgfConcrLin> PgfReader::read_lin()
|
ref<PgfConcrLin> PgfReader::read_lin()
|
||||||
{
|
{
|
||||||
ref<PgfConcrLin> lin = read_name(&PgfConcrLin::name);
|
ref<PgfConcrLin> lin = read_name(&PgfConcrLin::name);
|
||||||
lin->absfun = namespace_lookup(abstract->funs, &lin->name);
|
lin->absfun = namespace_lookup(abstract->funs, &lin->name);
|
||||||
if (lin->absfun == 0)
|
if (lin->absfun == 0)
|
||||||
throw pgf_error("Found a lin without a fun");
|
throw pgf_error("Found a lin without a fun");
|
||||||
|
|
||||||
auto args = read_vector(&PgfReader::read_parg);
|
|
||||||
auto res = read_vector(&PgfReader::read_presult2);
|
|
||||||
auto seqs = read_seq_ids(lin.tagged());
|
|
||||||
|
|
||||||
lin->args = args;
|
|
||||||
lin->res = res;
|
|
||||||
lin->seqs = seqs;
|
|
||||||
lin->lincat =
|
lin->lincat =
|
||||||
namespace_lookup(concrete->lincats, &lin->absfun->type->name);
|
namespace_lookup(concrete->lincats, &lin->absfun->type->name);
|
||||||
if (lin->lincat == 0)
|
if (lin->lincat == 0)
|
||||||
throw pgf_error("Found a lin which uses a category without a lincat");
|
throw pgf_error("Found a lin which uses a category without a lincat");
|
||||||
|
|
||||||
for (size_t seq_index = 0; seq_index < lin->seqs->len; seq_index++) {
|
auto args = read_vector(&PgfReader::read_parg);
|
||||||
add_to_index(concrete, lin, seq_index, 0);
|
auto res = read_vector(&PgfReader::read_presult2);
|
||||||
}
|
auto seqs = read_seq_ids(lin->lincat, lin.tagged());
|
||||||
|
|
||||||
|
lin->args = args;
|
||||||
|
lin->res = res;
|
||||||
|
lin->seqs = seqs;
|
||||||
|
|
||||||
return lin;
|
return lin;
|
||||||
}
|
}
|
||||||
@@ -866,6 +772,9 @@ ref<PgfConcr> PgfReader::read_concrete()
|
|||||||
auto printnames = read_namespace<PgfConcrPrintname>(&PgfReader::read_printname);
|
auto printnames = read_namespace<PgfConcrPrintname>(&PgfReader::read_printname);
|
||||||
concrete->printnames = printnames;
|
concrete->printnames = printnames;
|
||||||
|
|
||||||
|
PgfLRTableMaker maker(abstract, concrete);
|
||||||
|
concrete->lrtable = maker.make();
|
||||||
|
|
||||||
return concrete;
|
return concrete;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -71,14 +71,14 @@ public:
|
|||||||
void merge_abstract(ref<PgfAbstr> abstract);
|
void merge_abstract(ref<PgfAbstr> abstract);
|
||||||
|
|
||||||
ref<PgfConcrLincat> read_lincat();
|
ref<PgfConcrLincat> read_lincat();
|
||||||
ref<Vector<PgfLincatField>> read_lincat_fields(ref<PgfConcrLincat> lincat);
|
ref<Vector<ref<PgfText>>> read_lincat_fields(ref<PgfConcrLincat> lincat);
|
||||||
ref<PgfLParam> read_lparam();
|
ref<PgfLParam> read_lparam();
|
||||||
void read_variable_range(ref<PgfVariableRange> var_info);
|
void read_variable_range(ref<PgfVariableRange> var_info);
|
||||||
void read_parg(ref<PgfPArg> parg);
|
void read_parg(ref<PgfPArg> parg);
|
||||||
ref<PgfPResult> read_presult();
|
ref<PgfPResult> read_presult();
|
||||||
PgfSymbol read_symbol();
|
PgfSymbol read_symbol();
|
||||||
ref<PgfSequence> read_seq();
|
ref<PgfSequence> read_seq();
|
||||||
ref<Vector<ref<PgfSequence>>> read_seq_ids(object container);
|
ref<Vector<ref<PgfSequence>>> read_seq_ids(ref<PgfConcrLincat> lincat, object container);
|
||||||
PgfPhrasetable read_phrasetable(size_t len);
|
PgfPhrasetable read_phrasetable(size_t len);
|
||||||
PgfPhrasetable read_phrasetable();
|
PgfPhrasetable read_phrasetable();
|
||||||
ref<PgfConcrLin> read_lin();
|
ref<PgfConcrLin> read_lin();
|
||||||
|
|||||||
@@ -391,9 +391,9 @@ void PgfWriter::write_lincat(ref<PgfConcrLincat> lincat)
|
|||||||
write_vector(lincat->seqs, &PgfWriter::write_seq_id);
|
write_vector(lincat->seqs, &PgfWriter::write_seq_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PgfWriter::write_lincat_field(ref<PgfLincatField> field)
|
void PgfWriter::write_lincat_field(ref<ref<PgfText>> field)
|
||||||
{
|
{
|
||||||
write_text(field->name);
|
write_text(*field);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PgfWriter::write_lin(ref<PgfConcrLin> lin)
|
void PgfWriter::write_lin(ref<PgfConcrLin> lin)
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ public:
|
|||||||
void write_abstract(ref<PgfAbstr> abstract);
|
void write_abstract(ref<PgfAbstr> abstract);
|
||||||
|
|
||||||
void write_lincat(ref<PgfConcrLincat> lincat);
|
void write_lincat(ref<PgfConcrLincat> lincat);
|
||||||
void write_lincat_field(ref<PgfLincatField> field);
|
void write_lincat_field(ref<ref<PgfText>> field);
|
||||||
void write_variable_range(ref<PgfVariableRange> var);
|
void write_variable_range(ref<PgfVariableRange> var);
|
||||||
void write_lparam(ref<PgfLParam> lparam);
|
void write_lparam(ref<PgfLParam> lparam);
|
||||||
void write_parg(ref<PgfPArg> linarg);
|
void write_parg(ref<PgfPArg> linarg);
|
||||||
|
|||||||
Reference in New Issue
Block a user