mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-13 13:12:51 -06:00
support syntagmatic words
This commit is contained in:
@@ -269,6 +269,12 @@ struct PGF_INTERNAL_DECL PgfLRShift {
|
|||||||
size_t r;
|
size_t r;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct PGF_INTERNAL_DECL PgfLRShiftKS {
|
||||||
|
size_t next_state;
|
||||||
|
ref<PgfSequence> seq;
|
||||||
|
size_t sym_idx;
|
||||||
|
};
|
||||||
|
|
||||||
struct PgfLRReduceArg;
|
struct PgfLRReduceArg;
|
||||||
|
|
||||||
struct PGF_INTERNAL_DECL PgfLRProduction {
|
struct PGF_INTERNAL_DECL PgfLRProduction {
|
||||||
@@ -300,6 +306,7 @@ struct PGF_INTERNAL_DECL PgfLRReduce {
|
|||||||
|
|
||||||
struct PGF_INTERNAL_DECL PgfLRState {
|
struct PGF_INTERNAL_DECL PgfLRState {
|
||||||
ref<Vector<PgfLRShift>> shifts;
|
ref<Vector<PgfLRShift>> shifts;
|
||||||
|
ref<Vector<PgfLRShiftKS>> tokens;
|
||||||
ref<Vector<PgfLRReduce>> reductions;
|
ref<Vector<PgfLRReduce>> reductions;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -4,8 +4,8 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
//#define DEBUG_STATE_CREATION
|
//#define DEBUG_STATE_CREATION
|
||||||
//#define DEBUG_AUTOMATON
|
#define DEBUG_AUTOMATON
|
||||||
//#define DEBUG_PARSER
|
#define DEBUG_PARSER
|
||||||
//#define DEBUG_GENERATOR
|
//#define DEBUG_GENERATOR
|
||||||
|
|
||||||
struct PgfLRTableMaker::CCat {
|
struct PgfLRTableMaker::CCat {
|
||||||
@@ -356,12 +356,39 @@ void *PgfLRTableMaker::Item::operator new(size_t size, Item *item) {
|
|||||||
return new_item;
|
return new_item;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool PgfLRTableMaker::CompareKey3::operator() (const Key3& k1, const Key3& k2) const {
|
||||||
|
size_t i = k1.second;
|
||||||
|
size_t j = k2.second;
|
||||||
|
for (;;) {
|
||||||
|
if (i >= k1.first->syms.len || ref<PgfSymbol>::get_tag(k1.first->syms.data[i]) != PgfSymbolKS::tag)
|
||||||
|
return (j < k2.first->syms.len && ref<PgfSymbol>::get_tag(k2.first->syms.data[j]) == PgfSymbolKS::tag);
|
||||||
|
|
||||||
|
if (j >= k2.first->syms.len || ref<PgfSymbol>::get_tag(k2.first->syms.data[j]) != PgfSymbolKS::tag)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
auto symks1 = ref<PgfSymbolKS>::untagged(k1.first->syms.data[i]);
|
||||||
|
auto symks2 = ref<PgfSymbolKS>::untagged(k2.first->syms.data[j]);
|
||||||
|
|
||||||
|
int res[2] = {0,0};
|
||||||
|
texticmp(&symks1->token, &symks2->token, res);
|
||||||
|
if (res[0] < 0)
|
||||||
|
return true;
|
||||||
|
if (res[0] > 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
i++; j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
struct PgfLRTableMaker::State {
|
struct PgfLRTableMaker::State {
|
||||||
size_t id;
|
size_t id;
|
||||||
std::vector<Item*> items;
|
std::vector<Item*> items;
|
||||||
std::vector<Item*> completed;
|
std::vector<Item*> completed;
|
||||||
std::map<Key1,State*,CompareKey1> ccats1;
|
std::map<Key1,State*,CompareKey1> ccats1;
|
||||||
std::map<Key2,State*,CompareKey2> ccats2;
|
std::map<Key2,State*,CompareKey2> ccats2;
|
||||||
|
std::map<Key3,State*,CompareKey3> tokens;
|
||||||
|
|
||||||
State() {
|
State() {
|
||||||
this->id = 0;
|
this->id = 0;
|
||||||
@@ -651,9 +678,19 @@ void PgfLRTableMaker::symbol(State *state, Fold fold, Item *item, PgfSymbol sym)
|
|||||||
auto symks = ref<PgfSymbolKS>::untagged(sym);
|
auto symks = ref<PgfSymbolKS>::untagged(sym);
|
||||||
if (fold == PROBE) {
|
if (fold == PROBE) {
|
||||||
item->ccat->productive = true;
|
item->ccat->productive = true;
|
||||||
|
} else {
|
||||||
|
auto &next_state = state->tokens[Key3(item->seq,item->sym_idx)];
|
||||||
|
if (next_state == NULL) {
|
||||||
|
next_state = new State;
|
||||||
|
}
|
||||||
|
while (item->sym_idx < item->seq->syms.len) {
|
||||||
|
if (ref<PgfSymbol>::get_tag(item->seq->syms.data[item->sym_idx]) != PgfSymbolKS::tag)
|
||||||
|
break;
|
||||||
|
item->sym_idx++;
|
||||||
|
}
|
||||||
|
item->stk_size++;
|
||||||
|
next_state->push_item(item);
|
||||||
}
|
}
|
||||||
if (item->ref_cnt == 0)
|
|
||||||
delete item;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@@ -879,7 +916,7 @@ void PgfLRTableMaker::complete(State *state, Fold fold, Item *item)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PgfLRTableMaker::transition(PgfConcrLincat *lincat, size_t lin_idx, State *&state)
|
void PgfLRTableMaker::internalize_state(State *&state)
|
||||||
{
|
{
|
||||||
MD5Context ctxt;
|
MD5Context ctxt;
|
||||||
auto begin = state->items.begin();
|
auto begin = state->items.begin();
|
||||||
@@ -912,11 +949,6 @@ void PgfLRTableMaker::transition(PgfConcrLincat *lincat, size_t lin_idx, State *
|
|||||||
delete state;
|
delete state;
|
||||||
state = next_state;
|
state = next_state;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(DEBUG_AUTOMATON)
|
|
||||||
fprintf(stderr, "%s.%zu: state %ld\n",
|
|
||||||
lincat->name.text, lin_idx, state->id);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ref<PgfLRTable> PgfLRTableMaker::make()
|
ref<PgfLRTable> PgfLRTableMaker::make()
|
||||||
@@ -945,10 +977,38 @@ ref<PgfLRTable> PgfLRTableMaker::make()
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (auto &i : state->ccats1) {
|
for (auto &i : state->ccats1) {
|
||||||
transition(i.first.first, i.first.second, i.second);
|
internalize_state(i.second);
|
||||||
|
#if defined(DEBUG_AUTOMATON)
|
||||||
|
fprintf(stderr, "%s.%zu: state %ld\n",
|
||||||
|
i.first.first->name.text, i.first.second, i.second->id);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
for (auto &i : state->ccats2) {
|
for (auto &i : state->ccats2) {
|
||||||
transition(i.first.first->lincat, i.first.second, i.second);
|
internalize_state(i.second);
|
||||||
|
#if defined(DEBUG_AUTOMATON)
|
||||||
|
fprintf(stderr, "%s.%zu: state %ld\n",
|
||||||
|
i.first.first->lincat->name.text, i.first.second, i.second->id);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
for (auto &i : state->tokens) {
|
||||||
|
internalize_state(i.second);
|
||||||
|
#if defined(DEBUG_AUTOMATON)
|
||||||
|
PgfPrinter printer(NULL, 0, NULL);
|
||||||
|
size_t sym_idx = i.first.second;
|
||||||
|
ref<PgfSequence> seq = i.first.first;
|
||||||
|
while (sym_idx < seq->syms.len) {
|
||||||
|
PgfSymbol sym = seq->syms.data[sym_idx];
|
||||||
|
if (ref<PgfSymbol>::get_tag(sym) != PgfSymbolKS::tag)
|
||||||
|
break;
|
||||||
|
printer.symbol(sym);
|
||||||
|
sym_idx++;
|
||||||
|
}
|
||||||
|
printer.nprintf(64, ": state %ld\n", i.second->id);
|
||||||
|
|
||||||
|
PgfText *text = printer.get_text();
|
||||||
|
fputs(text->text, stderr);
|
||||||
|
free(text);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -971,6 +1031,18 @@ ref<PgfLRTable> PgfLRTableMaker::make()
|
|||||||
shift->next_state = i.second->id;
|
shift->next_state = i.second->id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ref<Vector<PgfLRShiftKS>> tokens = 0;
|
||||||
|
if (state->tokens.size() > 0) {
|
||||||
|
size_t index = 0;
|
||||||
|
tokens = vector_new<PgfLRShiftKS>(state->tokens.size());
|
||||||
|
for (auto i : state->tokens) {
|
||||||
|
ref<PgfLRShiftKS> shift = vector_elem(tokens,index++);
|
||||||
|
shift->seq = i.first.first;
|
||||||
|
shift->sym_idx = i.first.second;
|
||||||
|
shift->next_state = i.second->id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
auto reductions = vector_new<PgfLRReduce>(state->completed.size());
|
auto reductions = vector_new<PgfLRReduce>(state->completed.size());
|
||||||
for (size_t i = 0; i < state->completed.size(); i++) {
|
for (size_t i = 0; i < state->completed.size(); i++) {
|
||||||
Item *item = state->completed[i];
|
Item *item = state->completed[i];
|
||||||
@@ -993,6 +1065,7 @@ ref<PgfLRTable> PgfLRTableMaker::make()
|
|||||||
|
|
||||||
ref<PgfLRState> lrstate = vector_elem(lrtable, state->id);
|
ref<PgfLRState> lrstate = vector_elem(lrtable, state->id);
|
||||||
lrstate->shifts = shifts;
|
lrstate->shifts = shifts;
|
||||||
|
lrstate->tokens = tokens;
|
||||||
lrstate->reductions = reductions;
|
lrstate->reductions = reductions;
|
||||||
}
|
}
|
||||||
return lrtable;
|
return lrtable;
|
||||||
@@ -1111,19 +1184,38 @@ void PgfParser::print_prod(Choice *choice, Production *prod)
|
|||||||
free(text);
|
free(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PgfParser::print_transition(StackNode *source, StackNode *target, Stage *stage)
|
void PgfParser::print_transition(StackNode *source, StackNode *target, Stage *stage, ref<PgfLRShiftKS> shift)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "state %ld --- ?%d ---> state %ld (position %zu-%zu, nodes %zu)\n",
|
PgfPrinter printer(NULL, 0, m);
|
||||||
source->state_id, target->choice->fid, target->state_id,
|
printer.nprintf(64, "state %ld --- ", source->state_id);
|
||||||
stage->start.pos, stage->end.pos,
|
if (target->choice != 0) {
|
||||||
stage->nodes.size());
|
printer.nprintf(32, "?%d", target->choice->fid);
|
||||||
|
}
|
||||||
|
if (shift != 0) {
|
||||||
|
size_t sym_idx = shift->sym_idx;
|
||||||
|
ref<PgfSequence> seq = shift->seq;
|
||||||
|
while (sym_idx < seq->syms.len) {
|
||||||
|
PgfSymbol sym = seq->syms.data[sym_idx];
|
||||||
|
if (ref<PgfSymbol>::get_tag(sym) != PgfSymbolKS::tag)
|
||||||
|
break;
|
||||||
|
printer.symbol(sym);
|
||||||
|
sym_idx++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printer.nprintf(80, " ---> state %ld (position %zu-%zu, nodes %zu)\n",
|
||||||
|
target->state_id,
|
||||||
|
stage->start.pos, stage->end.pos, stage->nodes.size());
|
||||||
|
PgfText *text = printer.get_text();
|
||||||
|
fputs(text->text, stderr);
|
||||||
|
free(text);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
PgfParser::PgfParser(ref<PgfConcr> concr, ref<PgfConcrLincat> start, PgfText *sentence, PgfMarshaller *m, PgfUnmarshaller *u)
|
PgfParser::PgfParser(ref<PgfConcr> concr, ref<PgfConcrLincat> start, PgfText *sentence, bool case_sensitive, PgfMarshaller *m, PgfUnmarshaller *u)
|
||||||
{
|
{
|
||||||
this->concr = concr;
|
this->concr = concr;
|
||||||
this->sentence = sentence;
|
this->sentence = sentence;
|
||||||
|
this->case_sensitive = case_sensitive;
|
||||||
this->m = m;
|
this->m = m;
|
||||||
this->u = u;
|
this->u = u;
|
||||||
this->last_fid = 0;
|
this->last_fid = 0;
|
||||||
@@ -1134,12 +1226,12 @@ PgfParser::PgfParser(ref<PgfConcr> concr, ref<PgfConcrLincat> start, PgfText *se
|
|||||||
spot.pos = 0;
|
spot.pos = 0;
|
||||||
spot.ptr = (uint8_t*) sentence->text;
|
spot.ptr = (uint8_t*) sentence->text;
|
||||||
|
|
||||||
this->before = NULL;
|
this->before = new Stage(spot);
|
||||||
this->after = NULL;
|
this->after = NULL;
|
||||||
this->ahead = new Stage(spot);
|
this->ahead = NULL;
|
||||||
|
|
||||||
StackNode *node = new StackNode(ahead, 0);
|
StackNode *node = new StackNode(before, 0);
|
||||||
this->ahead->nodes.push_back(node);
|
this->before->nodes.push_back(node);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PgfParser::shift(StackNode *parent, ref<PgfConcrLincat> lincat, size_t r, Production *prod,
|
void PgfParser::shift(StackNode *parent, ref<PgfConcrLincat> lincat, size_t r, Production *prod,
|
||||||
@@ -1172,7 +1264,7 @@ void PgfParser::shift(StackNode *parent, ref<PgfConcrLincat> lincat, size_t r, P
|
|||||||
if (std::find(node->parents.begin(), node->parents.end(), parent) == node->parents.end()) {
|
if (std::find(node->parents.begin(), node->parents.end(), parent) == node->parents.end()) {
|
||||||
node->parents.push_back(parent);
|
node->parents.push_back(parent);
|
||||||
#ifdef DEBUG_PARSER
|
#ifdef DEBUG_PARSER
|
||||||
print_transition(parent,node,after);
|
print_transition(parent,node,after,0);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1181,6 +1273,48 @@ void PgfParser::shift(StackNode *parent, ref<PgfConcrLincat> lincat, size_t r, P
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PgfParser::shift(StackNode *parent, Stage *before)
|
||||||
|
{
|
||||||
|
ref<Vector<PgfLRShiftKS>> shifts = vector_elem(concr->lrtable,parent->state_id)->tokens;
|
||||||
|
if (shifts != 0) {
|
||||||
|
const uint8_t *sent_end = (const uint8_t *) &sentence->text[sentence->size];
|
||||||
|
for (size_t i = 0; i < shifts->len; i++) {
|
||||||
|
ref<PgfLRShiftKS> shift = vector_elem(shifts, i);
|
||||||
|
PgfTextSpot spot = before->end;
|
||||||
|
size_t sym_idx = shift->sym_idx;
|
||||||
|
int cmp =
|
||||||
|
text_sequence_cmp(&spot, sent_end,
|
||||||
|
shift->seq, &sym_idx,
|
||||||
|
case_sensitive, SM_PARTIAL);
|
||||||
|
if (cmp == 0) {
|
||||||
|
start_matches(&spot, NULL);
|
||||||
|
|
||||||
|
StackNode *node = NULL;
|
||||||
|
for (StackNode *n : after->nodes) {
|
||||||
|
if (n->stage == before && n->state_id == shift->next_state) {
|
||||||
|
node = n;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (node == NULL) {
|
||||||
|
node = new StackNode(before, shift->next_state);
|
||||||
|
node->choice = NULL;
|
||||||
|
after->nodes.push_back(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (std::find(node->parents.begin(), node->parents.end(), parent) == node->parents.end()) {
|
||||||
|
node->parents.push_back(parent);
|
||||||
|
#ifdef DEBUG_PARSER
|
||||||
|
print_transition(parent,node,after,shift);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
end_matches(&spot, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
PgfParser::Choice *PgfParser::intersect_choice(Choice *choice1, Choice *choice2, intersection_map &im)
|
PgfParser::Choice *PgfParser::intersect_choice(Choice *choice1, Choice *choice2, intersection_map &im)
|
||||||
{
|
{
|
||||||
if (choice1 == NULL)
|
if (choice1 == NULL)
|
||||||
@@ -1352,6 +1486,7 @@ void PgfParser::space(PgfTextSpot *start, PgfTextSpot *end, PgfExn* err)
|
|||||||
while (i < before->nodes.size()) {
|
while (i < before->nodes.size()) {
|
||||||
StackNode *node = before->nodes[i++];
|
StackNode *node = before->nodes[i++];
|
||||||
reduce_all(node);
|
reduce_all(node);
|
||||||
|
shift(node, before);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -53,6 +53,12 @@ class PGF_INTERNAL_DECL PgfLRTableMaker
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef std::pair<ref<PgfSequence>,size_t> Key3;
|
||||||
|
|
||||||
|
struct PGF_INTERNAL_DECL CompareKey3 : std::less<Key3> {
|
||||||
|
bool operator() (const Key3& k1, const Key3& k2) const;
|
||||||
|
};
|
||||||
|
|
||||||
ref<PgfAbstr> abstr;
|
ref<PgfAbstr> abstr;
|
||||||
ref<PgfConcr> concr;
|
ref<PgfConcr> concr;
|
||||||
|
|
||||||
@@ -81,7 +87,7 @@ class PGF_INTERNAL_DECL PgfLRTableMaker
|
|||||||
void print_production(CCat *ccat, Production *prod);
|
void print_production(CCat *ccat, Production *prod);
|
||||||
void print_item(Item *item);
|
void print_item(Item *item);
|
||||||
|
|
||||||
void transition(PgfConcrLincat *lincat, size_t lin_idx, State *&state);
|
void internalize_state(State *&state);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
PgfLRTableMaker(ref<PgfAbstr> abstr, ref<PgfConcr> concr);
|
PgfLRTableMaker(ref<PgfAbstr> abstr, ref<PgfConcr> concr);
|
||||||
@@ -95,6 +101,7 @@ class PGF_INTERNAL_DECL PgfParser : public PgfPhraseScanner, public PgfExprEnum
|
|||||||
{
|
{
|
||||||
ref<PgfConcr> concr;
|
ref<PgfConcr> concr;
|
||||||
PgfText *sentence;
|
PgfText *sentence;
|
||||||
|
bool case_sensitive;
|
||||||
PgfMarshaller *m;
|
PgfMarshaller *m;
|
||||||
PgfUnmarshaller *u;
|
PgfUnmarshaller *u;
|
||||||
|
|
||||||
@@ -119,6 +126,7 @@ class PGF_INTERNAL_DECL PgfParser : public PgfPhraseScanner, public PgfExprEnum
|
|||||||
|
|
||||||
void shift(StackNode *parent, ref<PgfConcrLincat> lincat, size_t r, Production *prod,
|
void shift(StackNode *parent, ref<PgfConcrLincat> lincat, size_t r, Production *prod,
|
||||||
Stage *before, Stage *after);
|
Stage *before, Stage *after);
|
||||||
|
void shift(StackNode *parent, Stage *before);
|
||||||
void reduce(StackNode *parent, ref<PgfConcrLin> lin, ref<PgfLRReduce> red,
|
void reduce(StackNode *parent, ref<PgfConcrLin> lin, ref<PgfLRReduce> red,
|
||||||
size_t n, std::vector<Choice*> &args,
|
size_t n, std::vector<Choice*> &args,
|
||||||
Stage *before, Stage *after);
|
Stage *before, Stage *after);
|
||||||
@@ -127,7 +135,7 @@ class PGF_INTERNAL_DECL PgfParser : public PgfPhraseScanner, public PgfExprEnum
|
|||||||
size_t n, std::vector<Choice*> &args);
|
size_t n, std::vector<Choice*> &args);
|
||||||
void reduce_all(StackNode *state);
|
void reduce_all(StackNode *state);
|
||||||
void print_prod(Choice *choice, Production *prod);
|
void print_prod(Choice *choice, Production *prod);
|
||||||
void print_transition(StackNode *source, StackNode *target, Stage *stage);
|
void print_transition(StackNode *source, StackNode *target, Stage *stage, ref<PgfLRShiftKS> shift);
|
||||||
|
|
||||||
typedef std::map<std::pair<Choice*,Choice*>,Choice*> intersection_map;
|
typedef std::map<std::pair<Choice*,Choice*>,Choice*> intersection_map;
|
||||||
|
|
||||||
@@ -144,7 +152,7 @@ class PGF_INTERNAL_DECL PgfParser : public PgfPhraseScanner, public PgfExprEnum
|
|||||||
void release_expr_state(ExprState *state);
|
void release_expr_state(ExprState *state);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
PgfParser(ref<PgfConcr> concr, ref<PgfConcrLincat> start, PgfText *sentence, PgfMarshaller *m, PgfUnmarshaller *u);
|
PgfParser(ref<PgfConcr> concr, ref<PgfConcrLincat> start, PgfText *sentence, bool case_sensitive, PgfMarshaller *m, PgfUnmarshaller *u);
|
||||||
|
|
||||||
virtual void space(PgfTextSpot *start, PgfTextSpot *end, PgfExn* err);
|
virtual void space(PgfTextSpot *start, PgfTextSpot *end, PgfExn* err);
|
||||||
virtual void start_matches(PgfTextSpot *end, PgfExn* err);
|
virtual void start_matches(PgfTextSpot *end, PgfExn* err);
|
||||||
|
|||||||
@@ -2743,7 +2743,7 @@ PgfExprEnum *pgf_parse(PgfDB *db, PgfConcrRevision revision,
|
|||||||
if (lincat_u.lincat == 0)
|
if (lincat_u.lincat == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
PgfParser *parser = new PgfParser(concr, lincat_u.lincat, sentence, m, u);
|
PgfParser *parser = new PgfParser(concr, lincat_u.lincat, sentence, case_sensitive, m, u);
|
||||||
phrasetable_lookup_cohorts(concr->phrasetable,
|
phrasetable_lookup_cohorts(concr->phrasetable,
|
||||||
sentence, case_sensitive,
|
sentence, case_sensitive,
|
||||||
parser, err);
|
parser, err);
|
||||||
@@ -3170,6 +3170,24 @@ pgf_graphviz_lr_automaton(PgfDB *db, PgfConcrRevision revision,
|
|||||||
printer.efun(&shift->lincat->name);
|
printer.efun(&shift->lincat->name);
|
||||||
printer.nprintf(16, ".%zu\"];\n", shift->r);
|
printer.nprintf(16, ".%zu\"];\n", shift->r);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (size_t j = 0; j < state->tokens->len; j++) {
|
||||||
|
ref<PgfLRShiftKS> shift = vector_elem(state->tokens, j);
|
||||||
|
printer.nprintf(16, " s%zu -> s%zu [label=\"", i, shift->next_state);
|
||||||
|
size_t sym_idx = shift->sym_idx;
|
||||||
|
while (sym_idx < shift->seq->syms.len) {
|
||||||
|
if (ref<PgfSymbol>::get_tag(shift->seq->syms.data[sym_idx]) != PgfSymbolKS::tag)
|
||||||
|
break;
|
||||||
|
if (sym_idx > shift->sym_idx)
|
||||||
|
printer.puts(" ");
|
||||||
|
auto symks = ref<PgfSymbolKS>::untagged(shift->seq->syms.data[sym_idx]);
|
||||||
|
printer.puts("\\\"");
|
||||||
|
printer.put_esc_str(&symks->token);
|
||||||
|
printer.puts("\\\"");
|
||||||
|
sym_idx++;
|
||||||
|
}
|
||||||
|
printer.puts("\"];\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
printer.puts("}");
|
printer.puts("}");
|
||||||
|
|
||||||
|
|||||||
@@ -228,28 +228,33 @@ int sequence_cmp(ref<PgfSequence> seq1, ref<PgfSequence> seq2)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
PGF_INTERNAL
|
||||||
int text_sequence_cmp(PgfTextSpot *spot, const uint8_t *end,
|
int text_sequence_cmp(PgfTextSpot *spot, const uint8_t *end,
|
||||||
ref<PgfSequence> seq,
|
ref<PgfSequence> seq, size_t *p_i,
|
||||||
bool case_sensitive, bool full_match)
|
bool case_sensitive, SeqMatch sm)
|
||||||
{
|
{
|
||||||
int res1 = 0;
|
int res1 = 0;
|
||||||
|
|
||||||
size_t i = 0;
|
|
||||||
const uint8_t *s2 = NULL;
|
const uint8_t *s2 = NULL;
|
||||||
const uint8_t *e2 = NULL;
|
const uint8_t *e2 = NULL;
|
||||||
|
|
||||||
|
uint8_t t = 0xff;
|
||||||
|
if (*p_i < seq->syms.len) {
|
||||||
|
t = ref<PgfSymbol>::get_tag(seq->syms.data[*p_i]);
|
||||||
|
}
|
||||||
|
|
||||||
size_t count = 0;
|
size_t count = 0;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (spot->ptr >= end) {
|
if (spot->ptr >= end) {
|
||||||
if (s2 < e2 || i < seq->syms.len)
|
if (s2 < e2 || t == PgfSymbolKS::tag)
|
||||||
return -1;
|
return -1;
|
||||||
return case_sensitive ? res1 : 0;
|
return case_sensitive ? res1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (s2 >= e2 && i >= seq->syms.len)
|
if (s2 >= e2 && t != PgfSymbolKS::tag) {
|
||||||
return full_match ? 1 : 0;
|
return (sm == SM_FULL_MATCH) ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t ucs1 = pgf_utf8_decode(&spot->ptr); spot->pos++;
|
uint32_t ucs1 = pgf_utf8_decode(&spot->ptr); spot->pos++;
|
||||||
uint32_t ucs1i = pgf_utf8_to_upper(ucs1);
|
uint32_t ucs1i = pgf_utf8_to_upper(ucs1);
|
||||||
@@ -268,16 +273,21 @@ int text_sequence_cmp(PgfTextSpot *spot, const uint8_t *end,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t t = ref<PgfSymbol>::get_tag(seq->syms.data[i]);
|
|
||||||
if (t != PgfSymbolKS::tag) {
|
if (t != PgfSymbolKS::tag) {
|
||||||
|
if (sm == SM_PARTIAL)
|
||||||
|
return 0;
|
||||||
return ((int) PgfSymbolKS::tag) - ((int) t);
|
return ((int) PgfSymbolKS::tag) - ((int) t);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto sym_ks = ref<PgfSymbolKS>::untagged(seq->syms.data[i]);
|
auto sym_ks = ref<PgfSymbolKS>::untagged(seq->syms.data[*p_i]);
|
||||||
s2 = (uint8_t *) &sym_ks->token.text;
|
s2 = (uint8_t *) &sym_ks->token.text;
|
||||||
e2 = s2+sym_ks->token.size;
|
e2 = s2+sym_ks->token.size;
|
||||||
|
|
||||||
i++;
|
(*p_i)++;
|
||||||
|
t = 0xff;
|
||||||
|
if (*p_i < seq->syms.len) {
|
||||||
|
t = ref<PgfSymbol>::get_tag(seq->syms.data[*p_i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t ucs2 = pgf_utf8_decode(&s2);
|
uint32_t ucs2 = pgf_utf8_decode(&s2);
|
||||||
@@ -552,7 +562,8 @@ void phrasetable_lookup(PgfPhrasetable table,
|
|||||||
current.pos = 0;
|
current.pos = 0;
|
||||||
current.ptr = (uint8_t *) sentence->text;
|
current.ptr = (uint8_t *) sentence->text;
|
||||||
const uint8_t *end = current.ptr+sentence->size;
|
const uint8_t *end = current.ptr+sentence->size;
|
||||||
int cmp = text_sequence_cmp(¤t,end,table->value.seq,case_sensitive,true);
|
size_t sym_idx = 0;
|
||||||
|
int cmp = text_sequence_cmp(¤t,end,table->value.seq,&sym_idx,case_sensitive,SM_FULL_MATCH);
|
||||||
if (cmp < 0) {
|
if (cmp < 0) {
|
||||||
phrasetable_lookup(table->left,sentence,case_sensitive,scanner,err);
|
phrasetable_lookup(table->left,sentence,case_sensitive,scanner,err);
|
||||||
} else if (cmp > 0) {
|
} else if (cmp > 0) {
|
||||||
@@ -662,7 +673,8 @@ void phrasetable_lookup_prefixes(PgfCohortsState *state,
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
PgfTextSpot current = state->spot;
|
PgfTextSpot current = state->spot;
|
||||||
int cmp = text_sequence_cmp(¤t,state->end,table->value.seq,state->case_sensitive,false);
|
size_t sym_idx = 0;
|
||||||
|
int cmp = text_sequence_cmp(¤t,state->end,table->value.seq,&sym_idx,state->case_sensitive,SM_PREFIX);
|
||||||
if (cmp < 0) {
|
if (cmp < 0) {
|
||||||
phrasetable_lookup_prefixes(state,table->left,min,max);
|
phrasetable_lookup_prefixes(state,table->left,min,max);
|
||||||
} else if (cmp > 0) {
|
} else if (cmp > 0) {
|
||||||
|
|||||||
@@ -115,4 +115,13 @@ void phrasetable_iter(PgfConcr *concr,
|
|||||||
PGF_INTERNAL_DECL
|
PGF_INTERNAL_DECL
|
||||||
void phrasetable_release(PgfPhrasetable table);
|
void phrasetable_release(PgfPhrasetable table);
|
||||||
|
|
||||||
|
// The following are used internally in the parser
|
||||||
|
|
||||||
|
enum SeqMatch { SM_FULL_MATCH, SM_PREFIX, SM_PARTIAL };
|
||||||
|
|
||||||
|
PGF_INTERNAL_DECL
|
||||||
|
int text_sequence_cmp(PgfTextSpot *spot, const uint8_t *end,
|
||||||
|
ref<PgfSequence> seq, size_t *p_i,
|
||||||
|
bool case_sensitive, SeqMatch sm);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -45,6 +45,47 @@ void PgfPrinter::puts(const char *s)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PgfPrinter::put_esc_str(PgfText *v)
|
||||||
|
{
|
||||||
|
PgfText *charbuf = (PgfText *) alloca(sizeof(PgfText)+7);
|
||||||
|
|
||||||
|
const uint8_t* start = (uint8_t*) v->text;
|
||||||
|
const uint8_t* end = start + v->size;
|
||||||
|
while (start < end) {
|
||||||
|
const uint8_t* s = start;
|
||||||
|
uint32_t c = pgf_utf8_decode(&s);
|
||||||
|
switch (c) {
|
||||||
|
case '\\':
|
||||||
|
puts("\\\\");
|
||||||
|
break;
|
||||||
|
case '"':
|
||||||
|
puts("\\\"");
|
||||||
|
break;
|
||||||
|
case '\n':
|
||||||
|
puts("\\n");
|
||||||
|
break;
|
||||||
|
case '\r':
|
||||||
|
puts("\\r");
|
||||||
|
break;
|
||||||
|
case '\b':
|
||||||
|
puts("\\b");
|
||||||
|
break;
|
||||||
|
case '\t':
|
||||||
|
puts("\\t");
|
||||||
|
break;
|
||||||
|
case '\0':
|
||||||
|
puts("\\0");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
charbuf->size = s-start;
|
||||||
|
memcpy(charbuf->text, start, charbuf->size);
|
||||||
|
charbuf->text[charbuf->size] = 0;
|
||||||
|
puts(charbuf);
|
||||||
|
}
|
||||||
|
start = s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void PgfPrinter::nprintf(size_t buf_size, const char *format, ...)
|
void PgfPrinter::nprintf(size_t buf_size, const char *format, ...)
|
||||||
{
|
{
|
||||||
again: {
|
again: {
|
||||||
@@ -348,44 +389,8 @@ PgfLiteral PgfPrinter::lflt(double v)
|
|||||||
|
|
||||||
PgfLiteral PgfPrinter::lstr(PgfText *v)
|
PgfLiteral PgfPrinter::lstr(PgfText *v)
|
||||||
{
|
{
|
||||||
PgfText *charbuf = (PgfText *) alloca(sizeof(PgfText)+7);
|
|
||||||
|
|
||||||
puts("\"");
|
puts("\"");
|
||||||
const uint8_t* start = (uint8_t*) v->text;
|
put_esc_str(v);
|
||||||
const uint8_t* end = start + v->size;
|
|
||||||
while (start < end) {
|
|
||||||
const uint8_t* s = start;
|
|
||||||
uint32_t c = pgf_utf8_decode(&s);
|
|
||||||
switch (c) {
|
|
||||||
case '\\':
|
|
||||||
puts("\\\\");
|
|
||||||
break;
|
|
||||||
case '"':
|
|
||||||
puts("\\\"");
|
|
||||||
break;
|
|
||||||
case '\n':
|
|
||||||
puts("\\n");
|
|
||||||
break;
|
|
||||||
case '\r':
|
|
||||||
puts("\\r");
|
|
||||||
break;
|
|
||||||
case '\b':
|
|
||||||
puts("\\b");
|
|
||||||
break;
|
|
||||||
case '\t':
|
|
||||||
puts("\\t");
|
|
||||||
break;
|
|
||||||
case '\0':
|
|
||||||
puts("\\0");
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
charbuf->size = s-start;
|
|
||||||
memcpy(charbuf->text, start, charbuf->size);
|
|
||||||
charbuf->text[charbuf->size] = 0;
|
|
||||||
puts(charbuf);
|
|
||||||
}
|
|
||||||
start = s;
|
|
||||||
}
|
|
||||||
puts("\"");
|
puts("\"");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -46,6 +46,8 @@ public:
|
|||||||
void puts(PgfText *s);
|
void puts(PgfText *s);
|
||||||
void puts(const char *s);
|
void puts(const char *s);
|
||||||
|
|
||||||
|
void put_esc_str(PgfText *v);
|
||||||
|
|
||||||
// buf_size is the expected buffer size. If larger is needed,
|
// buf_size is the expected buffer size. If larger is needed,
|
||||||
// it will be allocated automatically.
|
// it will be allocated automatically.
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
|
|||||||
Reference in New Issue
Block a user