forked from GitHub/gf-core
first draft of an LR parser
This commit is contained in:
@@ -42,7 +42,9 @@ libpgf_la_SOURCES = \
|
||||
pgf/probspace.cxx \
|
||||
pgf/probspace.h \
|
||||
pgf/generator.cxx \
|
||||
pgf/generator.h
|
||||
pgf/generator.h \
|
||||
pgf/md5.cxx \
|
||||
pgf/md5.h
|
||||
|
||||
libpgf_la_LDFLAGS = -no-undefined -version-info 4:0:0
|
||||
libpgf_la_CXXFLAGS = -fno-rtti -std=c++11 -DCOMPILING_PGF
|
||||
|
||||
@@ -48,9 +48,9 @@ void PgfConcr::release(ref<PgfConcr> concr)
|
||||
void PgfConcrLincat::release(ref<PgfConcrLincat> lincat)
|
||||
{
|
||||
for (size_t i = 0; i < lincat->fields->len; i++) {
|
||||
PgfLincatField::release(vector_elem(lincat->fields, i));
|
||||
text_db_release(*vector_elem(lincat->fields, i));
|
||||
}
|
||||
Vector<PgfLincatField>::release(lincat->fields);
|
||||
Vector<ref<PgfText>>::release(lincat->fields);
|
||||
|
||||
for (size_t i = 0; i < lincat->args->len; i++) {
|
||||
PgfLParam::release(vector_elem(lincat->args, i)->param);
|
||||
@@ -67,13 +67,6 @@ void PgfConcrLincat::release(ref<PgfConcrLincat> lincat)
|
||||
PgfDB::free(lincat, lincat->name.size+1);
|
||||
}
|
||||
|
||||
void PgfLincatField::release(ref<PgfLincatField> field)
|
||||
{
|
||||
text_db_release(field->name);
|
||||
if (field->backrefs != 0)
|
||||
Vector<PgfLincatBackref>::release(field->backrefs);
|
||||
}
|
||||
|
||||
void PgfLParam::release(ref<PgfLParam> param)
|
||||
{
|
||||
PgfDB::free(param, param->n_terms*sizeof(param->terms[0]));
|
||||
|
||||
@@ -224,19 +224,6 @@ struct PGF_INTERNAL_DECL PgfSymbolALLCAPIT {
|
||||
static const uint8_t tag = 10;
|
||||
};
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfConcrLincat;
|
||||
struct PGF_INTERNAL_DECL PgfLincatBackref;
|
||||
struct PGF_INTERNAL_DECL PgfLincatEpsilon;
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfLincatField {
|
||||
ref<PgfConcrLincat> lincat;
|
||||
ref<PgfText> name;
|
||||
ref<Vector<PgfLincatBackref>> backrefs;
|
||||
ref<Vector<PgfLincatEpsilon>> epsilons;
|
||||
|
||||
static void release(ref<PgfLincatField> field);
|
||||
};
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfConcrLincat {
|
||||
static const uint8_t tag = 0;
|
||||
|
||||
@@ -246,7 +233,7 @@ struct PGF_INTERNAL_DECL PgfConcrLincat {
|
||||
ref<Vector<PgfPArg>> args;
|
||||
ref<Vector<ref<PgfPResult>>> res;
|
||||
ref<Vector<ref<PgfSequence>>> seqs;
|
||||
ref<Vector<PgfLincatField>> fields;
|
||||
ref<Vector<ref<PgfText>>> fields;
|
||||
|
||||
PgfText name;
|
||||
|
||||
@@ -268,18 +255,6 @@ struct PGF_INTERNAL_DECL PgfConcrLin {
|
||||
static void release(ref<PgfConcrLin> lin);
|
||||
};
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfLinSeqIndex {
|
||||
ref<PgfConcrLin> lin;
|
||||
size_t seq_index;
|
||||
};
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfLincatBackref : public PgfLinSeqIndex {
|
||||
size_t dot;
|
||||
};
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfLincatEpsilon : public PgfLinSeqIndex {
|
||||
};
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfConcrPrintname {
|
||||
ref<PgfText> printname;
|
||||
PgfText name;
|
||||
@@ -287,6 +262,25 @@ struct PGF_INTERNAL_DECL PgfConcrPrintname {
|
||||
static void release(ref<PgfConcrPrintname> printname);
|
||||
};
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfLRShift {
|
||||
size_t next_state;
|
||||
ref<PgfConcrLincat> lincat;
|
||||
size_t r;
|
||||
bool is_epsilon;
|
||||
};
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfLRReduce {
|
||||
object lin_obj;
|
||||
size_t seq_index;
|
||||
};
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfLRState {
|
||||
ref<Vector<PgfLRShift>> shifts;
|
||||
ref<Vector<PgfLRReduce>> reductions;
|
||||
};
|
||||
|
||||
typedef Vector<PgfLRState> PgfLRTable;
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfConcr {
|
||||
static const uint8_t tag = 1;
|
||||
|
||||
@@ -296,6 +290,8 @@ struct PGF_INTERNAL_DECL PgfConcr {
|
||||
PgfPhrasetable phrasetable;
|
||||
Namespace<PgfConcrPrintname> printnames;
|
||||
|
||||
ref<PgfLRTable> lrtable;
|
||||
|
||||
PgfText name;
|
||||
|
||||
static void release(ref<PgfConcr> pgf);
|
||||
|
||||
@@ -287,7 +287,7 @@ void PgfLinearizer::TreeLinNode::check_category(PgfLinearizer *linearizer, PgfTe
|
||||
void PgfLinearizer::TreeLinNode::linearize(PgfLinearizationOutputIface *out, PgfLinearizer *linearizer, size_t lindex)
|
||||
{
|
||||
PgfText *cat = &lin->absfun->type->name;
|
||||
PgfText *field = &*(vector_elem(lin->lincat->fields, lindex)->name);
|
||||
PgfText *field = &**vector_elem(lin->lincat->fields, lindex);
|
||||
|
||||
if (linearizer->pre_stack == NULL)
|
||||
out->begin_phrase(cat, fid, field, &lin->name);
|
||||
@@ -390,7 +390,7 @@ void PgfLinearizer::TreeLindefNode::linearize_arg(PgfLinearizationOutputIface *o
|
||||
void PgfLinearizer::TreeLindefNode::linearize(PgfLinearizationOutputIface *out, PgfLinearizer *linearizer, size_t lindex)
|
||||
{
|
||||
if (lincat != 0) {
|
||||
PgfText *field = &*(vector_elem(lincat->fields, lindex)->name);
|
||||
PgfText *field = &**vector_elem(lincat->fields, lindex);
|
||||
if (linearizer->pre_stack == NULL)
|
||||
out->begin_phrase(&lincat->name, fid, field, fun);
|
||||
else {
|
||||
@@ -543,7 +543,7 @@ void PgfLinearizer::TreeLitNode::linearize(PgfLinearizationOutputIface *out, Pgf
|
||||
{
|
||||
PgfText *field = NULL;
|
||||
if (lincat != 0) {
|
||||
field = &*(vector_elem(lincat->fields, lindex)->name);
|
||||
field = &**vector_elem(lincat->fields, lindex);
|
||||
}
|
||||
|
||||
linearizer->flush_pre_stack(out, literal);
|
||||
|
||||
197
src/runtime/c/pgf/md5.cxx
Normal file
197
src/runtime/c/pgf/md5.cxx
Normal file
@@ -0,0 +1,197 @@
|
||||
/*
|
||||
* Derived from the RSA Data Security, Inc. MD5 Message-Digest Algorithm
|
||||
* and modified slightly to be functionally identical but condensed into control structures.
|
||||
*/
|
||||
|
||||
#include "data.h"
|
||||
#include "md5.h"
|
||||
|
||||
/*
|
||||
* Constants defined by the MD5 algorithm
|
||||
*/
|
||||
#define A 0x67452301
|
||||
#define B 0xefcdab89
|
||||
#define C 0x98badcfe
|
||||
#define D 0x10325476
|
||||
|
||||
static uint32_t S[] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
|
||||
5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
|
||||
4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
|
||||
6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};
|
||||
|
||||
static uint32_t K[] = {0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
|
||||
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
|
||||
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
|
||||
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
|
||||
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
|
||||
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
|
||||
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
|
||||
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
|
||||
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
|
||||
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
|
||||
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
|
||||
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
|
||||
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
|
||||
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
|
||||
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
|
||||
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391};
|
||||
|
||||
/*
|
||||
* Padding used to make the size (in bits) of the input congruent to 448 mod 512
|
||||
*/
|
||||
static uint8_t PADDING[] = {0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
|
||||
|
||||
/*
|
||||
* Bit-manipulation functions defined by the MD5 algorithm
|
||||
*/
|
||||
#define F(X, Y, Z) ((X & Y) | (~X & Z))
|
||||
#define G(X, Y, Z) ((X & Z) | (Y & ~Z))
|
||||
#define H(X, Y, Z) (X ^ Y ^ Z)
|
||||
#define I(X, Y, Z) (Y ^ (X | ~Z))
|
||||
|
||||
/*
|
||||
* Rotates a 32-bit word left by n bits
|
||||
*/
|
||||
uint32_t rotateLeft(uint32_t x, uint32_t n){
|
||||
return (x << n) | (x >> (32 - n));
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize a context
|
||||
*/
|
||||
MD5Context::MD5Context()
|
||||
{
|
||||
size = (uint64_t)0;
|
||||
|
||||
buffer[0] = (uint32_t)A;
|
||||
buffer[1] = (uint32_t)B;
|
||||
buffer[2] = (uint32_t)C;
|
||||
buffer[3] = (uint32_t)D;
|
||||
}
|
||||
|
||||
/*
|
||||
* Step on 512 bits of input with the main MD5 algorithm.
|
||||
*/
|
||||
static
|
||||
void md5Step(uint32_t *buffer, uint32_t *input){
|
||||
uint32_t AA = buffer[0];
|
||||
uint32_t BB = buffer[1];
|
||||
uint32_t CC = buffer[2];
|
||||
uint32_t DD = buffer[3];
|
||||
|
||||
uint32_t E;
|
||||
|
||||
unsigned int j;
|
||||
|
||||
for(unsigned int i = 0; i < 64; ++i){
|
||||
switch(i / 16){
|
||||
case 0:
|
||||
E = F(BB, CC, DD);
|
||||
j = i;
|
||||
break;
|
||||
case 1:
|
||||
E = G(BB, CC, DD);
|
||||
j = ((i * 5) + 1) % 16;
|
||||
break;
|
||||
case 2:
|
||||
E = H(BB, CC, DD);
|
||||
j = ((i * 3) + 5) % 16;
|
||||
break;
|
||||
default:
|
||||
E = I(BB, CC, DD);
|
||||
j = (i * 7) % 16;
|
||||
break;
|
||||
}
|
||||
|
||||
uint32_t temp = DD;
|
||||
DD = CC;
|
||||
CC = BB;
|
||||
BB = BB + rotateLeft(AA + E + K[i] + input[j], S[i]);
|
||||
AA = temp;
|
||||
}
|
||||
|
||||
buffer[0] += AA;
|
||||
buffer[1] += BB;
|
||||
buffer[2] += CC;
|
||||
buffer[3] += DD;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add some amount of input to the context
|
||||
*
|
||||
* If the input fills out a block of 512 bits, apply the algorithm (md5Step)
|
||||
* and save the result in the buffer. Also updates the overall size.
|
||||
*/
|
||||
void MD5Context::update(uint8_t *input_buffer, size_t input_len)
|
||||
{
|
||||
uint32_t input[16];
|
||||
unsigned int offset = this->size % 64;
|
||||
this->size += (uint64_t)input_len;
|
||||
|
||||
// Copy each byte in input_buffer into the next space in our context input
|
||||
for (unsigned int i = 0; i < input_len; ++i) {
|
||||
this->input[offset++] = (uint8_t)*(input_buffer + i);
|
||||
|
||||
// If we've filled our context input, copy it into our local array input
|
||||
// then reset the offset to 0 and fill in a new buffer.
|
||||
// Every time we fill out a chunk, we run it through the algorithm
|
||||
// to enable some back and forth between cpu and i/o
|
||||
if (offset % 64 == 0){
|
||||
for (unsigned int j = 0; j < 16; ++j) {
|
||||
// Convert to little-endian
|
||||
// The local variable `input` our 512-bit chunk separated into 32-bit words
|
||||
// we can use in calculations
|
||||
input[j] = (uint32_t)(this->input[(j * 4) + 3]) << 24 |
|
||||
(uint32_t)(this->input[(j * 4) + 2]) << 16 |
|
||||
(uint32_t)(this->input[(j * 4) + 1]) << 8 |
|
||||
(uint32_t)(this->input[(j * 4)]);
|
||||
}
|
||||
md5Step(this->buffer, input);
|
||||
offset = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Pad the current input to get to 448 bytes, append the size in bits to the very end,
|
||||
* and save the result of the final iteration into digest.
|
||||
*/
|
||||
void MD5Context::finalize(MD5Digest *digest)
|
||||
{
|
||||
uint32_t input[16];
|
||||
unsigned int offset = this->size % 64;
|
||||
unsigned int padding_length = offset < 56 ? 56 - offset : (56 + 64) - offset;
|
||||
|
||||
// Fill in the padding and undo the changes to size that resulted from the update
|
||||
update(PADDING, padding_length);
|
||||
this->size -= (uint64_t)padding_length;
|
||||
|
||||
// Do a final update (internal to this function)
|
||||
// Last two 32-bit words are the two halves of the size (converted from bytes to bits)
|
||||
for(unsigned int j = 0; j < 14; ++j)
|
||||
{
|
||||
input[j] = (uint32_t)(this->input[(j * 4) + 3]) << 24 |
|
||||
(uint32_t)(this->input[(j * 4) + 2]) << 16 |
|
||||
(uint32_t)(this->input[(j * 4) + 1]) << 8 |
|
||||
(uint32_t)(this->input[(j * 4)]);
|
||||
}
|
||||
input[14] = (uint32_t)(this->size * 8);
|
||||
input[15] = (uint32_t)((this->size * 8) >> 32);
|
||||
|
||||
md5Step(this->buffer, input);
|
||||
|
||||
// Move the result into digest (convert from little-endian)
|
||||
for(unsigned int i = 0; i < 4; ++i){
|
||||
digest->b[(i * 4) + 0] = (uint8_t)((this->buffer[i] & 0x000000FF));
|
||||
digest->b[(i * 4) + 1] = (uint8_t)((this->buffer[i] & 0x0000FF00) >> 8);
|
||||
digest->b[(i * 4) + 2] = (uint8_t)((this->buffer[i] & 0x00FF0000) >> 16);
|
||||
digest->b[(i * 4) + 3] = (uint8_t)((this->buffer[i] & 0xFF000000) >> 24);
|
||||
}
|
||||
}
|
||||
30
src/runtime/c/pgf/md5.h
Normal file
30
src/runtime/c/pgf/md5.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef MD5_H
|
||||
#define MD5_H
|
||||
|
||||
struct PGF_INTERNAL_DECL MD5Digest {
|
||||
uint8_t b[16];
|
||||
};
|
||||
|
||||
inline bool operator < (const MD5Digest &d1, const MD5Digest &d2) {
|
||||
return memcmp(d1.b, d2.b, 16) < 0;
|
||||
}
|
||||
|
||||
class PGF_INTERNAL_DECL MD5Context {
|
||||
uint64_t size; // Size of input in bytes
|
||||
uint32_t buffer[4]; // Current accumulation of hash
|
||||
uint8_t input[64]; // Input to be used in the next step
|
||||
|
||||
public:
|
||||
MD5Context();
|
||||
void update(uint8_t *input, size_t input_len);
|
||||
|
||||
template <class T>
|
||||
void update(T &input)
|
||||
{
|
||||
update((uint8_t *) &input, sizeof(T));
|
||||
}
|
||||
|
||||
void finalize(MD5Digest *digest);
|
||||
};
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,59 +1,114 @@
|
||||
#ifndef PARSER_H
|
||||
#define PARSER_H
|
||||
#ifndef LR_TABLE_H
|
||||
#define LR_TABLE_H
|
||||
|
||||
#include "md5.h"
|
||||
|
||||
class PGF_INTERNAL_DECL PgfLRTableMaker
|
||||
{
|
||||
struct State;
|
||||
struct Item;
|
||||
struct Predictions;
|
||||
|
||||
struct CompareItem;
|
||||
static const CompareItem compare_item;
|
||||
|
||||
typedef std::pair<ref<PgfText>,size_t> Key;
|
||||
|
||||
struct PGF_INTERNAL_DECL CompareKey : std::less<Key> {
|
||||
bool operator() (const Key& k1, const Key& k2) const {
|
||||
int cmp = textcmp(k1.first,k2.first);
|
||||
if (cmp < 0)
|
||||
return true;
|
||||
else if (cmp > 0)
|
||||
return false;
|
||||
|
||||
return (k1.second < k2.second);
|
||||
}
|
||||
};
|
||||
|
||||
ref<PgfAbstr> abstr;
|
||||
ref<PgfConcr> concr;
|
||||
|
||||
std::vector<State*> todo;
|
||||
std::map<MD5Digest,State*> states;
|
||||
std::map<Key,Predictions*,CompareKey> predictions;
|
||||
std::map<Predictions*,State*> continuations;
|
||||
std::vector<Item*> completed;
|
||||
|
||||
void process(Item *item);
|
||||
void symbol(Item *item, PgfSymbol sym);
|
||||
void predict(Item *item, ref<PgfText> cat,
|
||||
ref<Vector<PgfVariableRange>> vars, PgfLParam *r);
|
||||
void predict(Item *item, ref<PgfText> cat, size_t r);
|
||||
void predict(ref<PgfAbsFun> absfun, Predictions *preds);
|
||||
void complete(Item *item);
|
||||
|
||||
static void print_item(Item *item);
|
||||
|
||||
public:
|
||||
PgfLRTableMaker(ref<PgfAbstr> abstr, ref<PgfConcr> concr);
|
||||
ref<PgfLRTable> make();
|
||||
};
|
||||
|
||||
class PgfPrinter;
|
||||
|
||||
class PGF_INTERNAL_DECL PgfParser : public PgfPhraseScanner, public PgfExprEnum
|
||||
{
|
||||
ref<PgfConcr> concr;
|
||||
PgfText *sentence;
|
||||
PgfMarshaller *m;
|
||||
PgfUnmarshaller *u;
|
||||
|
||||
struct Choice;
|
||||
struct Production;
|
||||
struct StackNode;
|
||||
struct ParseState;
|
||||
struct ExprState;
|
||||
struct ExprInstance;
|
||||
struct Result;
|
||||
struct CompareExprState : std::less<ExprState*> {
|
||||
bool operator() (const ExprState *state1, const ExprState *state2) const;
|
||||
};
|
||||
|
||||
ParseState *before, *after, *ahead;
|
||||
std::priority_queue<ExprState*, std::vector<ExprState*>, CompareExprState> queue;
|
||||
int last_fid;
|
||||
|
||||
Result *top_res;
|
||||
size_t top_res_index;
|
||||
|
||||
void shift(StackNode *parent, ref<PgfConcrLincat> lincat, size_t r, Production *prod,
|
||||
ParseState *state);
|
||||
void reduce(StackNode *parent, ref<PgfConcrLin> lin, size_t seq_index,
|
||||
size_t n, std::vector<Choice*> &args);
|
||||
void complete(StackNode *parent, ref<PgfConcrLincat> lincat, size_t seq_index,
|
||||
size_t n, std::vector<Choice*> &args);
|
||||
void reduce_all(StackNode *state);
|
||||
void print_prod(Choice *choice, Production *prod);
|
||||
void print_transition(StackNode *source, StackNode *target, ParseState *state);
|
||||
|
||||
typedef std::map<std::pair<Choice*,Choice*>,Choice*> intersection_map;
|
||||
|
||||
Choice *intersect_choice(Choice *choice1, Choice *choice2, intersection_map &im);
|
||||
|
||||
void print_expr_state_before(PgfPrinter *printer, ExprState *state);
|
||||
void print_expr_state_after(PgfPrinter *printer, ExprState *state);
|
||||
void print_expr_state(ExprState *state);
|
||||
|
||||
void predict_expr_states(Choice *choice, prob_t outside_prob);
|
||||
bool process_expr_state(ExprState *state);
|
||||
void complete_expr_state(ExprState *state);
|
||||
void combine_expr_state(ExprState *state, ExprInstance &inst);
|
||||
void release_expr_state(ExprState *state);
|
||||
|
||||
class PGF_INTERNAL_DECL PgfParser : public PgfPhraseScanner, public PgfExprEnum {
|
||||
public:
|
||||
PgfParser(ref<PgfConcr> concr, ref<PgfConcrLincat> start, PgfText *sentence, PgfMarshaller *m, PgfUnmarshaller *u);
|
||||
|
||||
void space(PgfTextSpot *start, PgfTextSpot *end, PgfExn* err);
|
||||
void start_matches(PgfTextSpot *end, PgfExn* err);
|
||||
void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err);
|
||||
void end_matches(PgfTextSpot *end, PgfExn* err);
|
||||
virtual void space(PgfTextSpot *start, PgfTextSpot *end, PgfExn* err);
|
||||
virtual void start_matches(PgfTextSpot *end, PgfExn* err);
|
||||
virtual void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err);
|
||||
virtual void end_matches(PgfTextSpot *end, PgfExn* err);
|
||||
|
||||
void prepare();
|
||||
PgfExpr fetch(PgfDB *db, prob_t *prob);
|
||||
|
||||
virtual ~PgfParser();
|
||||
|
||||
private:
|
||||
class CFGCat;
|
||||
class State;
|
||||
class Choice;
|
||||
class Production;
|
||||
|
||||
class ParseItemConts;
|
||||
|
||||
class Item {
|
||||
public:
|
||||
prob_t get_prob() { return inside_prob + outside_prob; };
|
||||
|
||||
virtual State *proceed(PgfParser *parser, PgfUnmarshaller *u) = 0;
|
||||
virtual bool combine(PgfParser *parser, ParseItemConts *conts, PgfExpr expr, prob_t inside_prob, PgfUnmarshaller *u) = 0;
|
||||
virtual void print1(PgfPrinter *printer, State *state, PgfMarshaller *m) = 0;
|
||||
virtual void print2(PgfPrinter *printer, State *state, int x, PgfMarshaller *m) = 0;
|
||||
virtual PgfExpr get_expr(PgfUnmarshaller *u) = 0;
|
||||
|
||||
void trace(State *state, PgfMarshaller *m);
|
||||
|
||||
protected:
|
||||
prob_t inside_prob;
|
||||
prob_t outside_prob;
|
||||
};
|
||||
|
||||
class ParseItem;
|
||||
class ExprItem;
|
||||
class MetaItem;
|
||||
|
||||
ref<PgfConcr> concr;
|
||||
ref<PgfConcrLincat> start;
|
||||
PgfText *sentence;
|
||||
|
||||
size_t last_choice_id;
|
||||
|
||||
State *before, *after;
|
||||
|
||||
PgfMarshaller *m;
|
||||
PgfUnmarshaller *u;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -861,9 +861,9 @@ public:
|
||||
|
||||
virtual void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err)
|
||||
{
|
||||
ref<PgfLincatField> field =
|
||||
vector_elem(lin->lincat->fields, seq_index % lin->lincat->fields->len);
|
||||
callback->fn(callback, &lin->absfun->name, &(*field->name), lin->lincat->abscat->prob+lin->absfun->prob, err);
|
||||
ref<PgfText> field =
|
||||
*vector_elem(lin->lincat->fields, seq_index % lin->lincat->fields->len);
|
||||
callback->fn(callback, &lin->absfun->name, field, lin->lincat->abscat->prob+lin->absfun->prob, err);
|
||||
}
|
||||
|
||||
virtual void end_matches(PgfTextSpot *end, PgfExn* err)
|
||||
@@ -909,9 +909,9 @@ public:
|
||||
|
||||
virtual void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err)
|
||||
{
|
||||
ref<PgfLincatField> field =
|
||||
vector_elem(lin->lincat->fields, seq_index % lin->lincat->fields->len);
|
||||
callback->morpho.fn(&callback->morpho, &lin->absfun->name, &(*field->name), lin->lincat->abscat->prob+lin->absfun->prob, err);
|
||||
ref<PgfText> field =
|
||||
*vector_elem(lin->lincat->fields, seq_index % lin->lincat->fields->len);
|
||||
callback->morpho.fn(&callback->morpho, &lin->absfun->name, field, lin->lincat->abscat->prob+lin->absfun->prob, err);
|
||||
}
|
||||
|
||||
virtual void end_matches(PgfTextSpot *end, PgfExn* err)
|
||||
@@ -976,7 +976,7 @@ PGF_API
|
||||
PgfText *pgf_get_lincat_field_internal(object o, size_t i)
|
||||
{
|
||||
ref<PgfConcrLincat> lincat = o;
|
||||
return &*(vector_elem(lincat->fields, i)->name);
|
||||
return &**vector_elem(lincat->fields, i);
|
||||
}
|
||||
|
||||
PGF_API
|
||||
@@ -1654,6 +1654,7 @@ class PGF_INTERNAL PgfLinBuilder : public PgfLinBuilderIface
|
||||
ref<Vector<ref<PgfSequence>>> seqs;
|
||||
|
||||
object container; // what are we building?
|
||||
ref<PgfConcrLincat> container_lincat;
|
||||
|
||||
size_t var_index;
|
||||
size_t arg_index;
|
||||
@@ -1712,17 +1713,15 @@ public:
|
||||
lincat->seqs = seqs;
|
||||
lincat->n_lindefs = n_lindefs;
|
||||
|
||||
ref<Vector<PgfLincatField>> db_fields = vector_new<PgfLincatField>(n_fields);
|
||||
ref<Vector<ref<PgfText>>> db_fields = vector_new<ref<PgfText>>(n_fields);
|
||||
for (size_t i = 0; i < n_fields; i++) {
|
||||
ref<PgfText> name = textdup_db(fields[i]);
|
||||
vector_elem(db_fields, i)->lincat = lincat;
|
||||
vector_elem(db_fields, i)->name = name;
|
||||
vector_elem(db_fields, i)->backrefs = 0;
|
||||
vector_elem(db_fields, i)->epsilons = 0;
|
||||
*vector_elem(db_fields, i) = name;
|
||||
}
|
||||
lincat->fields = db_fields;
|
||||
|
||||
this->container = lincat.tagged();
|
||||
this->container_lincat = 0;
|
||||
|
||||
build->build(this, err);
|
||||
if (err->type == PGF_EXN_NONE && res_index != res->len) {
|
||||
@@ -1760,6 +1759,7 @@ public:
|
||||
lin->seqs = seqs;
|
||||
|
||||
this->container = lin.tagged();
|
||||
this->container_lincat = lincat;
|
||||
|
||||
build->build(this, err);
|
||||
if (err->type == PGF_EXN_NONE && res_index != res->len) {
|
||||
@@ -2149,7 +2149,7 @@ public:
|
||||
|
||||
PgfPhrasetable phrasetable =
|
||||
phrasetable_internalize(concr->phrasetable,
|
||||
seq, container, seq_index,
|
||||
seq, container_lincat, container, seq_index,
|
||||
&entry);
|
||||
concr->phrasetable = phrasetable;
|
||||
*vector_elem(seqs, seq_index) = entry->seq;
|
||||
@@ -2418,7 +2418,7 @@ PgfText **pgf_category_fields(PgfDB *db, PgfConcrRevision revision,
|
||||
if (fields == 0)
|
||||
throw pgf_systemerror(ENOMEM);
|
||||
for (size_t i = 0; i < n_fields; i++) {
|
||||
fields[i] = textdup(vector_elem(lincat->fields, i)->name);
|
||||
fields[i] = textdup(*vector_elem(lincat->fields, i));
|
||||
}
|
||||
*p_n_fields = n_fields;
|
||||
return fields;
|
||||
@@ -2511,7 +2511,7 @@ PgfText **pgf_tabular_linearize(PgfDB *db, PgfConcrRevision revision,
|
||||
|
||||
PgfText *text = out.get_text();
|
||||
if (text != NULL) {
|
||||
res[pos++] = textdup(&*(vector_elem(lincat->fields,i)->name));
|
||||
res[pos++] = textdup(&**vector_elem(lincat->fields,i));
|
||||
res[pos++] = text;
|
||||
}
|
||||
}
|
||||
@@ -2550,7 +2550,7 @@ PgfText **pgf_tabular_linearize_all(PgfDB *db, PgfConcrRevision revision,
|
||||
|
||||
PgfText *text = out.get_text();
|
||||
if (text != NULL) {
|
||||
res[pos++] = textdup(&*(vector_elem(lincat->fields, i)->name));
|
||||
res[pos++] = textdup(&**vector_elem(lincat->fields, i));
|
||||
res[pos++] = text;
|
||||
}
|
||||
}
|
||||
@@ -2656,7 +2656,6 @@ PgfExprEnum *pgf_parse(PgfDB *db, PgfConcrRevision revision,
|
||||
phrasetable_lookup_cohorts(concr->phrasetable,
|
||||
sentence, case_sensitive,
|
||||
parser, err);
|
||||
parser->prepare();
|
||||
return parser;
|
||||
} PGF_API_END
|
||||
|
||||
|
||||
@@ -299,9 +299,87 @@ int text_sequence_cmp(PgfTextSpot *spot, const uint8_t *end,
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
int backref_cmp(ref<PgfSequenceBackref> backref, ref<PgfConcrLincat> lincat, size_t r)
|
||||
{
|
||||
int cmp = 0;
|
||||
switch (ref<PgfConcrLin>::get_tag(backref->container)) {
|
||||
case PgfConcrLin::tag: {
|
||||
ref<PgfConcrLin> lin = ref<PgfConcrLin>::untagged(backref->container);
|
||||
if (lincat.as_object() < lin->lincat.as_object())
|
||||
cmp = -1;
|
||||
else if (lincat.as_object() > lin->lincat.as_object())
|
||||
cmp = 1;
|
||||
break;
|
||||
}
|
||||
case PgfConcrLincat::tag: {
|
||||
if (lincat.as_object() > 0)
|
||||
cmp = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (cmp == 0) {
|
||||
size_t r1 =
|
||||
(lincat == 0) ? 0
|
||||
: backref->seq_index % lincat->fields->len;
|
||||
if (r < r1)
|
||||
cmp = -1;
|
||||
else if (r > r1)
|
||||
cmp = 1;
|
||||
}
|
||||
|
||||
return cmp;
|
||||
}
|
||||
|
||||
static
|
||||
ref<Vector<PgfSequenceBackref>> phrasetable_update_backrefs(PgfPhrasetable table,
|
||||
ref<PgfConcrLincat> lincat,
|
||||
object container,
|
||||
size_t seq_index)
|
||||
{
|
||||
size_t len = (table->value.backrefs != 0)
|
||||
? table->value.backrefs->len
|
||||
: 0;
|
||||
|
||||
ref<Vector<PgfSequenceBackref>> backrefs =
|
||||
vector_resize<PgfSequenceBackref>(table->value.backrefs, len+1, table->txn_id);
|
||||
ssize_t i = 0;
|
||||
ssize_t j = len-1;
|
||||
if (table->value.seq->syms.len == 0 && len > 0) {
|
||||
// The backrefs for the epsilon sequence are sorted by lincat and r
|
||||
|
||||
size_t r = (lincat!=0) ? (seq_index % lincat->fields->len) : 0;
|
||||
while (i <= j) {
|
||||
ssize_t k = (i + j) / 2;
|
||||
ref<PgfSequenceBackref> backref = vector_elem(backrefs, k);
|
||||
|
||||
int cmp = backref_cmp(backref, lincat, r);
|
||||
if (cmp < 0) {
|
||||
while (j >= k) {
|
||||
backrefs->data[j+1] = backrefs->data[j];
|
||||
j--;
|
||||
}
|
||||
} else if (cmp > 0) {
|
||||
i = k+1;
|
||||
} else {
|
||||
while (j > k) {
|
||||
backrefs->data[j+1] = backrefs->data[j];
|
||||
j--;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
backrefs->data[j+1].container = container;
|
||||
backrefs->data[j+1].seq_index = seq_index;
|
||||
return backrefs;
|
||||
}
|
||||
|
||||
PGF_INTERNAL
|
||||
PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
|
||||
ref<PgfSequence> seq,
|
||||
ref<PgfConcrLincat> lincat,
|
||||
object container,
|
||||
size_t seq_index,
|
||||
ref<PgfPhrasetableEntry> *pentry)
|
||||
@@ -321,6 +399,7 @@ PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
|
||||
if (cmp < 0) {
|
||||
PgfPhrasetable left = phrasetable_internalize(table->left,
|
||||
seq,
|
||||
lincat,
|
||||
container,
|
||||
seq_index,
|
||||
pentry);
|
||||
@@ -329,6 +408,7 @@ PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
|
||||
} else if (cmp > 0) {
|
||||
PgfPhrasetable right = phrasetable_internalize(table->right,
|
||||
seq,
|
||||
lincat,
|
||||
container,
|
||||
seq_index,
|
||||
pentry);
|
||||
@@ -342,9 +422,7 @@ PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
|
||||
: 0;
|
||||
|
||||
ref<Vector<PgfSequenceBackref>> backrefs =
|
||||
vector_resize<PgfSequenceBackref>(table->value.backrefs, len+1, table->txn_id);
|
||||
backrefs->data[len].container = container;
|
||||
backrefs->data[len].seq_index = seq_index;
|
||||
phrasetable_update_backrefs(table,lincat,container,seq_index);
|
||||
|
||||
PgfPhrasetable new_table =
|
||||
Node<PgfPhrasetableEntry>::upd_node(table, table->left, table->right);
|
||||
@@ -356,6 +434,7 @@ PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
|
||||
|
||||
PGF_INTERNAL
|
||||
ref<PgfSequence> phrasetable_relink(PgfPhrasetable table,
|
||||
ref<PgfConcrLincat> lincat,
|
||||
object container,
|
||||
size_t seq_index,
|
||||
size_t seq_id)
|
||||
@@ -370,9 +449,7 @@ ref<PgfSequence> phrasetable_relink(PgfPhrasetable table,
|
||||
: table->value.backrefs->len;
|
||||
|
||||
ref<Vector<PgfSequenceBackref>> backrefs =
|
||||
vector_resize<PgfSequenceBackref>(table->value.backrefs, len+1, table->txn_id);
|
||||
backrefs->data[len].container = container;
|
||||
backrefs->data[len].seq_index = seq_index;
|
||||
phrasetable_update_backrefs(table,lincat,container,seq_index);
|
||||
table->value.backrefs = backrefs;
|
||||
|
||||
return table->value.seq;
|
||||
@@ -397,12 +474,16 @@ PgfPhrasetable phrasetable_delete(PgfPhrasetable table,
|
||||
PgfPhrasetable left = phrasetable_delete(table->left,
|
||||
container, seq_index,
|
||||
seq);
|
||||
if (left == table->left)
|
||||
return table;
|
||||
table = Node<PgfPhrasetableEntry>::upd_node(table,left,table->right);
|
||||
return Node<PgfPhrasetableEntry>::balanceR(table);
|
||||
} else if (cmp > 0) {
|
||||
PgfPhrasetable right = phrasetable_delete(table->right,
|
||||
container, seq_index,
|
||||
seq);
|
||||
if (right == table->right)
|
||||
return table;
|
||||
table = Node<PgfPhrasetableEntry>::upd_node(table,table->left,right);
|
||||
return Node<PgfPhrasetableEntry>::balanceL(table);
|
||||
} else {
|
||||
@@ -566,10 +647,10 @@ void finish_skipping(PgfCohortsState *state) {
|
||||
|
||||
state->queue.pop();
|
||||
}
|
||||
|
||||
/*
|
||||
state->scanner->space(&state->spot, &state->spot,
|
||||
state->err);
|
||||
|
||||
*/
|
||||
state->last.pos = 0;
|
||||
state->last.ptr = NULL;
|
||||
state->skipping = false;
|
||||
@@ -740,6 +821,56 @@ void phrasetable_lookup_cohorts(PgfPhrasetable table,
|
||||
}
|
||||
}
|
||||
|
||||
PGF_INTERNAL
|
||||
void phrasetable_lookup_epsilons(PgfPhrasetable table,
|
||||
ref<PgfConcrLincat> lincat, size_t r,
|
||||
std::function<void(ref<PgfConcrLin>,size_t)> &f)
|
||||
{
|
||||
while (table->left != 0) {
|
||||
table = table->left;
|
||||
}
|
||||
|
||||
if (table->value.seq->syms.len > 0)
|
||||
return;
|
||||
|
||||
size_t len = (table->value.backrefs != 0)
|
||||
? table->value.backrefs->len
|
||||
: 0;
|
||||
|
||||
ssize_t i = 0;
|
||||
ssize_t j = len-1;
|
||||
while (i <= j) {
|
||||
ssize_t k = (i + j) / 2;
|
||||
ref<PgfSequenceBackref> backref = vector_elem(table->value.backrefs, k);
|
||||
|
||||
int cmp = backref_cmp(backref, lincat, r);
|
||||
if (cmp < 0) {
|
||||
j = k-1;
|
||||
} else if (cmp > 0) {
|
||||
i = k+1;
|
||||
} else {
|
||||
i = k;
|
||||
while (i > 0) {
|
||||
ref<PgfSequenceBackref> backref = vector_elem(table->value.backrefs, i-1);
|
||||
if (backref_cmp(backref, lincat, r) != 0)
|
||||
break;
|
||||
f(ref<PgfConcrLin>::untagged(backref->container),backref->seq_index);
|
||||
i--;
|
||||
}
|
||||
f(ref<PgfConcrLin>::untagged(backref->container),backref->seq_index);
|
||||
j = k;
|
||||
while (j < len-1) {
|
||||
ref<PgfSequenceBackref> backref = vector_elem(table->value.backrefs, j+1);
|
||||
if (backref_cmp(backref, lincat, r) != 0)
|
||||
break;
|
||||
f(ref<PgfConcrLin>::untagged(backref->container),backref->seq_index);
|
||||
j++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PGF_INTERNAL
|
||||
void phrasetable_iter(PgfConcr *concr,
|
||||
PgfPhrasetable table,
|
||||
@@ -768,10 +899,10 @@ void phrasetable_iter(PgfConcr *concr,
|
||||
ref<PgfConcrLincat> lincat =
|
||||
namespace_lookup(concr->lincats, &lin->absfun->type->name);
|
||||
if (lincat != 0) {
|
||||
ref<PgfLincatField> field =
|
||||
vector_elem(lincat->fields, backref.seq_index % lincat->fields->len);
|
||||
ref<PgfText> field =
|
||||
*vector_elem(lincat->fields, backref.seq_index % lincat->fields->len);
|
||||
|
||||
callback->fn(callback, &lin->absfun->name, &(*field->name), lincat->abscat->prob+lin->absfun->prob, err);
|
||||
callback->fn(callback, &lin->absfun->name, &*field, lincat->abscat->prob+lin->absfun->prob, err);
|
||||
if (err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -52,15 +52,19 @@ private:
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
struct PgfConcrLincat;
|
||||
|
||||
PGF_INTERNAL_DECL
|
||||
PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
|
||||
ref<PgfSequence> seq,
|
||||
ref<PgfConcrLincat> lincat,
|
||||
object container,
|
||||
size_t seq_index,
|
||||
ref<PgfPhrasetableEntry> *pentry);
|
||||
|
||||
PGF_INTERNAL_DECL
|
||||
ref<PgfSequence> phrasetable_relink(PgfPhrasetable table,
|
||||
ref<PgfConcrLincat> lincat,
|
||||
object container,
|
||||
size_t seq_index,
|
||||
size_t seq_id);
|
||||
@@ -101,6 +105,11 @@ void phrasetable_lookup_cohorts(PgfPhrasetable table,
|
||||
bool case_sensitive,
|
||||
PgfPhraseScanner *scanner, PgfExn* err);
|
||||
|
||||
PGF_INTERNAL_DECL
|
||||
void phrasetable_lookup_epsilons(PgfPhrasetable table,
|
||||
ref<PgfConcrLincat> lincat, size_t r,
|
||||
std::function<void(ref<PgfConcrLin>, size_t)> &f);
|
||||
|
||||
PGF_INTERNAL_DECL
|
||||
void phrasetable_iter(PgfConcr *concr,
|
||||
PgfPhrasetable table,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#include "data.h"
|
||||
#include "reader.h"
|
||||
#include "parser.h"
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
@@ -650,14 +651,14 @@ ref<PgfSequence> PgfReader::read_seq()
|
||||
return seq;
|
||||
}
|
||||
|
||||
ref<Vector<ref<PgfSequence>>> PgfReader::read_seq_ids(object container)
|
||||
ref<Vector<ref<PgfSequence>>> PgfReader::read_seq_ids(ref<PgfConcrLincat> lincat, object container)
|
||||
{
|
||||
size_t len = read_len();
|
||||
ref<Vector<ref<PgfSequence>>> vec = vector_new<ref<PgfSequence>>(len);
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
size_t seq_id = read_len();
|
||||
ref<PgfSequence> seq = phrasetable_relink(concrete->phrasetable,
|
||||
container, i,
|
||||
lincat, container, i,
|
||||
seq_id);
|
||||
if (seq == 0) {
|
||||
throw pgf_error("Invalid sequence id");
|
||||
@@ -701,7 +702,7 @@ ref<PgfConcrLincat> PgfReader::read_lincat()
|
||||
auto n_lindefs = read_len();
|
||||
auto args = read_vector(&PgfReader::read_parg);
|
||||
auto res = read_vector(&PgfReader::read_presult2);
|
||||
auto seqs = read_seq_ids(lincat.tagged());
|
||||
auto seqs = read_seq_ids(0, lincat.tagged());
|
||||
|
||||
lincat->abscat = namespace_lookup(abstract->cats, &lincat->name);
|
||||
lincat->fields = fields;
|
||||
@@ -712,130 +713,35 @@ ref<PgfConcrLincat> PgfReader::read_lincat()
|
||||
return lincat;
|
||||
}
|
||||
|
||||
ref<Vector<PgfLincatField>> PgfReader::read_lincat_fields(ref<PgfConcrLincat> lincat)
|
||||
ref<Vector<ref<PgfText>>> PgfReader::read_lincat_fields(ref<PgfConcrLincat> lincat)
|
||||
{
|
||||
size_t len = read_len();
|
||||
ref<Vector<PgfLincatField>> fields = vector_new<PgfLincatField>(len);
|
||||
ref<Vector<ref<PgfText>>> fields = vector_new<ref<PgfText>>(len);
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
auto name = read_text();
|
||||
|
||||
ref<PgfLincatField> field = vector_elem(fields,i);
|
||||
field->lincat = lincat;
|
||||
field->name = name;
|
||||
field->backrefs = 0;
|
||||
field->epsilons = 0;
|
||||
*vector_elem(fields,i) = name;
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
|
||||
static void add_to_index(ref<PgfConcr> concrete, ref<PgfConcrLin> lin, size_t seq_index, size_t dot)
|
||||
{
|
||||
size_t n_fields = lin->lincat->fields->len;
|
||||
ref<PgfSequence> seq = *vector_elem(lin->seqs,seq_index);
|
||||
ref<PgfPResult> result = *vector_elem(lin->res, seq_index / n_fields);
|
||||
ref<PgfLincatField> field = vector_elem(lin->lincat->fields, seq_index % n_fields);
|
||||
|
||||
if (dot >= seq->syms.len) {
|
||||
ref<Vector<PgfLincatEpsilon>> epsilons = field->epsilons;
|
||||
epsilons =
|
||||
vector_resize(epsilons, ((epsilons == 0) ? 0 : epsilons->len)+1,
|
||||
PgfDB::get_txn_id());
|
||||
field->epsilons = epsilons;
|
||||
ref<PgfLincatEpsilon> epsilon =
|
||||
vector_elem(epsilons,epsilons->len-1);
|
||||
epsilon->lin = lin;
|
||||
epsilon->seq_index = seq_index;
|
||||
|
||||
if (epsilons->len == 1 && field->backrefs != 0) {
|
||||
for (size_t i = 0; i < field->backrefs->len; i++) {
|
||||
ref<PgfLincatBackref> backref = vector_elem(field->backrefs,i);
|
||||
add_to_index(concrete,backref->lin,backref->seq_index,backref->dot+1);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
PgfSymbol sym = *vector_elem(&seq->syms,dot);
|
||||
switch (ref<PgfSymbol>::get_tag(sym)) {
|
||||
case PgfSymbolCat::tag: {
|
||||
auto sym_cat = ref<PgfSymbolCat>::untagged(sym);
|
||||
|
||||
ref<PgfHypo> hypo =
|
||||
vector_elem(lin->absfun->type->hypos,sym_cat->d);
|
||||
ref<PgfConcrLincat> lincat =
|
||||
namespace_lookup(concrete->lincats,
|
||||
&hypo->type->name);
|
||||
if (lincat == 0)
|
||||
throw pgf_error("Found a lin which uses a category without a lincat");
|
||||
|
||||
size_t max_values = 1;
|
||||
size_t *ranges = (size_t *)
|
||||
alloca(sym_cat->r.n_terms*sizeof(size_t));
|
||||
for (size_t i = 0; i < sym_cat->r.n_terms; i++) {
|
||||
for (size_t j = 0; j < result->vars->len; j++) {
|
||||
auto var_range = vector_elem(result->vars, j);
|
||||
if (var_range->var == sym_cat->r.terms[i].var) {
|
||||
ranges[i] = vector_elem(result->vars, j)->range;
|
||||
max_values *= var_range->range;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool is_epsilon = false;
|
||||
for (size_t values = 0; values < max_values; values++) {
|
||||
size_t v = values;
|
||||
size_t index = sym_cat->r.i0;
|
||||
for (size_t i = 0; i < sym_cat->r.n_terms; i++) {
|
||||
index += sym_cat->r.terms[i].factor * (v % ranges[i]);
|
||||
v = v / ranges[i];
|
||||
}
|
||||
|
||||
ref<Vector<PgfLincatBackref>> backrefs =
|
||||
vector_elem(lincat->fields,index)->backrefs;
|
||||
backrefs =
|
||||
vector_resize(backrefs, ((backrefs == 0) ? 0 : backrefs->len)+1,
|
||||
PgfDB::get_txn_id());
|
||||
vector_elem(lincat->fields,index)->backrefs = backrefs;
|
||||
ref<PgfLincatBackref> backref =
|
||||
vector_elem(backrefs,backrefs->len-1);
|
||||
backref->lin = lin;
|
||||
backref->seq_index = seq_index;
|
||||
backref->dot = dot;
|
||||
|
||||
if (vector_elem(lincat->fields,index)->epsilons != 0)
|
||||
is_epsilon = true;
|
||||
}
|
||||
|
||||
if (is_epsilon)
|
||||
add_to_index(concrete,lin,seq_index,dot+1);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
ref<PgfConcrLin> PgfReader::read_lin()
|
||||
{
|
||||
ref<PgfConcrLin> lin = read_name(&PgfConcrLin::name);
|
||||
lin->absfun = namespace_lookup(abstract->funs, &lin->name);
|
||||
if (lin->absfun == 0)
|
||||
throw pgf_error("Found a lin without a fun");
|
||||
|
||||
auto args = read_vector(&PgfReader::read_parg);
|
||||
auto res = read_vector(&PgfReader::read_presult2);
|
||||
auto seqs = read_seq_ids(lin.tagged());
|
||||
|
||||
lin->args = args;
|
||||
lin->res = res;
|
||||
lin->seqs = seqs;
|
||||
lin->lincat =
|
||||
namespace_lookup(concrete->lincats, &lin->absfun->type->name);
|
||||
if (lin->lincat == 0)
|
||||
throw pgf_error("Found a lin which uses a category without a lincat");
|
||||
|
||||
for (size_t seq_index = 0; seq_index < lin->seqs->len; seq_index++) {
|
||||
add_to_index(concrete, lin, seq_index, 0);
|
||||
}
|
||||
auto args = read_vector(&PgfReader::read_parg);
|
||||
auto res = read_vector(&PgfReader::read_presult2);
|
||||
auto seqs = read_seq_ids(lin->lincat, lin.tagged());
|
||||
|
||||
lin->args = args;
|
||||
lin->res = res;
|
||||
lin->seqs = seqs;
|
||||
|
||||
return lin;
|
||||
}
|
||||
@@ -866,6 +772,9 @@ ref<PgfConcr> PgfReader::read_concrete()
|
||||
auto printnames = read_namespace<PgfConcrPrintname>(&PgfReader::read_printname);
|
||||
concrete->printnames = printnames;
|
||||
|
||||
PgfLRTableMaker maker(abstract, concrete);
|
||||
concrete->lrtable = maker.make();
|
||||
|
||||
return concrete;
|
||||
}
|
||||
|
||||
|
||||
@@ -71,14 +71,14 @@ public:
|
||||
void merge_abstract(ref<PgfAbstr> abstract);
|
||||
|
||||
ref<PgfConcrLincat> read_lincat();
|
||||
ref<Vector<PgfLincatField>> read_lincat_fields(ref<PgfConcrLincat> lincat);
|
||||
ref<Vector<ref<PgfText>>> read_lincat_fields(ref<PgfConcrLincat> lincat);
|
||||
ref<PgfLParam> read_lparam();
|
||||
void read_variable_range(ref<PgfVariableRange> var_info);
|
||||
void read_parg(ref<PgfPArg> parg);
|
||||
ref<PgfPResult> read_presult();
|
||||
PgfSymbol read_symbol();
|
||||
ref<PgfSequence> read_seq();
|
||||
ref<Vector<ref<PgfSequence>>> read_seq_ids(object container);
|
||||
ref<Vector<ref<PgfSequence>>> read_seq_ids(ref<PgfConcrLincat> lincat, object container);
|
||||
PgfPhrasetable read_phrasetable(size_t len);
|
||||
PgfPhrasetable read_phrasetable();
|
||||
ref<PgfConcrLin> read_lin();
|
||||
|
||||
@@ -391,9 +391,9 @@ void PgfWriter::write_lincat(ref<PgfConcrLincat> lincat)
|
||||
write_vector(lincat->seqs, &PgfWriter::write_seq_id);
|
||||
}
|
||||
|
||||
void PgfWriter::write_lincat_field(ref<PgfLincatField> field)
|
||||
void PgfWriter::write_lincat_field(ref<ref<PgfText>> field)
|
||||
{
|
||||
write_text(field->name);
|
||||
write_text(*field);
|
||||
}
|
||||
|
||||
void PgfWriter::write_lin(ref<PgfConcrLin> lin)
|
||||
|
||||
@@ -39,7 +39,7 @@ public:
|
||||
void write_abstract(ref<PgfAbstr> abstract);
|
||||
|
||||
void write_lincat(ref<PgfConcrLincat> lincat);
|
||||
void write_lincat_field(ref<PgfLincatField> field);
|
||||
void write_lincat_field(ref<ref<PgfText>> field);
|
||||
void write_variable_range(ref<PgfVariableRange> var);
|
||||
void write_lparam(ref<PgfLParam> lparam);
|
||||
void write_parg(ref<PgfPArg> linarg);
|
||||
|
||||
Reference in New Issue
Block a user