forked from GitHub/gf-core
support syntagmatic words
This commit is contained in:
@@ -269,6 +269,12 @@ struct PGF_INTERNAL_DECL PgfLRShift {
|
||||
size_t r;
|
||||
};
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfLRShiftKS {
|
||||
size_t next_state;
|
||||
ref<PgfSequence> seq;
|
||||
size_t sym_idx;
|
||||
};
|
||||
|
||||
struct PgfLRReduceArg;
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfLRProduction {
|
||||
@@ -300,6 +306,7 @@ struct PGF_INTERNAL_DECL PgfLRReduce {
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfLRState {
|
||||
ref<Vector<PgfLRShift>> shifts;
|
||||
ref<Vector<PgfLRShiftKS>> tokens;
|
||||
ref<Vector<PgfLRReduce>> reductions;
|
||||
};
|
||||
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
#include <algorithm>
|
||||
|
||||
//#define DEBUG_STATE_CREATION
|
||||
//#define DEBUG_AUTOMATON
|
||||
//#define DEBUG_PARSER
|
||||
#define DEBUG_AUTOMATON
|
||||
#define DEBUG_PARSER
|
||||
//#define DEBUG_GENERATOR
|
||||
|
||||
struct PgfLRTableMaker::CCat {
|
||||
@@ -356,12 +356,39 @@ void *PgfLRTableMaker::Item::operator new(size_t size, Item *item) {
|
||||
return new_item;
|
||||
}
|
||||
|
||||
bool PgfLRTableMaker::CompareKey3::operator() (const Key3& k1, const Key3& k2) const {
|
||||
size_t i = k1.second;
|
||||
size_t j = k2.second;
|
||||
for (;;) {
|
||||
if (i >= k1.first->syms.len || ref<PgfSymbol>::get_tag(k1.first->syms.data[i]) != PgfSymbolKS::tag)
|
||||
return (j < k2.first->syms.len && ref<PgfSymbol>::get_tag(k2.first->syms.data[j]) == PgfSymbolKS::tag);
|
||||
|
||||
if (j >= k2.first->syms.len || ref<PgfSymbol>::get_tag(k2.first->syms.data[j]) != PgfSymbolKS::tag)
|
||||
return false;
|
||||
|
||||
auto symks1 = ref<PgfSymbolKS>::untagged(k1.first->syms.data[i]);
|
||||
auto symks2 = ref<PgfSymbolKS>::untagged(k2.first->syms.data[j]);
|
||||
|
||||
int res[2] = {0,0};
|
||||
texticmp(&symks1->token, &symks2->token, res);
|
||||
if (res[0] < 0)
|
||||
return true;
|
||||
if (res[0] > 0)
|
||||
return false;
|
||||
|
||||
i++; j++;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
struct PgfLRTableMaker::State {
|
||||
size_t id;
|
||||
std::vector<Item*> items;
|
||||
std::vector<Item*> completed;
|
||||
std::map<Key1,State*,CompareKey1> ccats1;
|
||||
std::map<Key2,State*,CompareKey2> ccats2;
|
||||
std::map<Key3,State*,CompareKey3> tokens;
|
||||
|
||||
State() {
|
||||
this->id = 0;
|
||||
@@ -651,9 +678,19 @@ void PgfLRTableMaker::symbol(State *state, Fold fold, Item *item, PgfSymbol sym)
|
||||
auto symks = ref<PgfSymbolKS>::untagged(sym);
|
||||
if (fold == PROBE) {
|
||||
item->ccat->productive = true;
|
||||
} else {
|
||||
auto &next_state = state->tokens[Key3(item->seq,item->sym_idx)];
|
||||
if (next_state == NULL) {
|
||||
next_state = new State;
|
||||
}
|
||||
while (item->sym_idx < item->seq->syms.len) {
|
||||
if (ref<PgfSymbol>::get_tag(item->seq->syms.data[item->sym_idx]) != PgfSymbolKS::tag)
|
||||
break;
|
||||
item->sym_idx++;
|
||||
}
|
||||
item->stk_size++;
|
||||
next_state->push_item(item);
|
||||
}
|
||||
if (item->ref_cnt == 0)
|
||||
delete item;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -879,7 +916,7 @@ void PgfLRTableMaker::complete(State *state, Fold fold, Item *item)
|
||||
}
|
||||
}
|
||||
|
||||
void PgfLRTableMaker::transition(PgfConcrLincat *lincat, size_t lin_idx, State *&state)
|
||||
void PgfLRTableMaker::internalize_state(State *&state)
|
||||
{
|
||||
MD5Context ctxt;
|
||||
auto begin = state->items.begin();
|
||||
@@ -912,11 +949,6 @@ void PgfLRTableMaker::transition(PgfConcrLincat *lincat, size_t lin_idx, State *
|
||||
delete state;
|
||||
state = next_state;
|
||||
}
|
||||
|
||||
#if defined(DEBUG_AUTOMATON)
|
||||
fprintf(stderr, "%s.%zu: state %ld\n",
|
||||
lincat->name.text, lin_idx, state->id);
|
||||
#endif
|
||||
}
|
||||
|
||||
ref<PgfLRTable> PgfLRTableMaker::make()
|
||||
@@ -945,10 +977,38 @@ ref<PgfLRTable> PgfLRTableMaker::make()
|
||||
}
|
||||
|
||||
for (auto &i : state->ccats1) {
|
||||
transition(i.first.first, i.first.second, i.second);
|
||||
internalize_state(i.second);
|
||||
#if defined(DEBUG_AUTOMATON)
|
||||
fprintf(stderr, "%s.%zu: state %ld\n",
|
||||
i.first.first->name.text, i.first.second, i.second->id);
|
||||
#endif
|
||||
}
|
||||
for (auto &i : state->ccats2) {
|
||||
transition(i.first.first->lincat, i.first.second, i.second);
|
||||
internalize_state(i.second);
|
||||
#if defined(DEBUG_AUTOMATON)
|
||||
fprintf(stderr, "%s.%zu: state %ld\n",
|
||||
i.first.first->lincat->name.text, i.first.second, i.second->id);
|
||||
#endif
|
||||
}
|
||||
for (auto &i : state->tokens) {
|
||||
internalize_state(i.second);
|
||||
#if defined(DEBUG_AUTOMATON)
|
||||
PgfPrinter printer(NULL, 0, NULL);
|
||||
size_t sym_idx = i.first.second;
|
||||
ref<PgfSequence> seq = i.first.first;
|
||||
while (sym_idx < seq->syms.len) {
|
||||
PgfSymbol sym = seq->syms.data[sym_idx];
|
||||
if (ref<PgfSymbol>::get_tag(sym) != PgfSymbolKS::tag)
|
||||
break;
|
||||
printer.symbol(sym);
|
||||
sym_idx++;
|
||||
}
|
||||
printer.nprintf(64, ": state %ld\n", i.second->id);
|
||||
|
||||
PgfText *text = printer.get_text();
|
||||
fputs(text->text, stderr);
|
||||
free(text);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -971,6 +1031,18 @@ ref<PgfLRTable> PgfLRTableMaker::make()
|
||||
shift->next_state = i.second->id;
|
||||
}
|
||||
|
||||
ref<Vector<PgfLRShiftKS>> tokens = 0;
|
||||
if (state->tokens.size() > 0) {
|
||||
size_t index = 0;
|
||||
tokens = vector_new<PgfLRShiftKS>(state->tokens.size());
|
||||
for (auto i : state->tokens) {
|
||||
ref<PgfLRShiftKS> shift = vector_elem(tokens,index++);
|
||||
shift->seq = i.first.first;
|
||||
shift->sym_idx = i.first.second;
|
||||
shift->next_state = i.second->id;
|
||||
}
|
||||
}
|
||||
|
||||
auto reductions = vector_new<PgfLRReduce>(state->completed.size());
|
||||
for (size_t i = 0; i < state->completed.size(); i++) {
|
||||
Item *item = state->completed[i];
|
||||
@@ -993,6 +1065,7 @@ ref<PgfLRTable> PgfLRTableMaker::make()
|
||||
|
||||
ref<PgfLRState> lrstate = vector_elem(lrtable, state->id);
|
||||
lrstate->shifts = shifts;
|
||||
lrstate->tokens = tokens;
|
||||
lrstate->reductions = reductions;
|
||||
}
|
||||
return lrtable;
|
||||
@@ -1111,19 +1184,38 @@ void PgfParser::print_prod(Choice *choice, Production *prod)
|
||||
free(text);
|
||||
}
|
||||
|
||||
void PgfParser::print_transition(StackNode *source, StackNode *target, Stage *stage)
|
||||
void PgfParser::print_transition(StackNode *source, StackNode *target, Stage *stage, ref<PgfLRShiftKS> shift)
|
||||
{
|
||||
fprintf(stderr, "state %ld --- ?%d ---> state %ld (position %zu-%zu, nodes %zu)\n",
|
||||
source->state_id, target->choice->fid, target->state_id,
|
||||
stage->start.pos, stage->end.pos,
|
||||
stage->nodes.size());
|
||||
PgfPrinter printer(NULL, 0, m);
|
||||
printer.nprintf(64, "state %ld --- ", source->state_id);
|
||||
if (target->choice != 0) {
|
||||
printer.nprintf(32, "?%d", target->choice->fid);
|
||||
}
|
||||
if (shift != 0) {
|
||||
size_t sym_idx = shift->sym_idx;
|
||||
ref<PgfSequence> seq = shift->seq;
|
||||
while (sym_idx < seq->syms.len) {
|
||||
PgfSymbol sym = seq->syms.data[sym_idx];
|
||||
if (ref<PgfSymbol>::get_tag(sym) != PgfSymbolKS::tag)
|
||||
break;
|
||||
printer.symbol(sym);
|
||||
sym_idx++;
|
||||
}
|
||||
}
|
||||
printer.nprintf(80, " ---> state %ld (position %zu-%zu, nodes %zu)\n",
|
||||
target->state_id,
|
||||
stage->start.pos, stage->end.pos, stage->nodes.size());
|
||||
PgfText *text = printer.get_text();
|
||||
fputs(text->text, stderr);
|
||||
free(text);
|
||||
}
|
||||
#endif
|
||||
|
||||
PgfParser::PgfParser(ref<PgfConcr> concr, ref<PgfConcrLincat> start, PgfText *sentence, PgfMarshaller *m, PgfUnmarshaller *u)
|
||||
PgfParser::PgfParser(ref<PgfConcr> concr, ref<PgfConcrLincat> start, PgfText *sentence, bool case_sensitive, PgfMarshaller *m, PgfUnmarshaller *u)
|
||||
{
|
||||
this->concr = concr;
|
||||
this->sentence = sentence;
|
||||
this->case_sensitive = case_sensitive;
|
||||
this->m = m;
|
||||
this->u = u;
|
||||
this->last_fid = 0;
|
||||
@@ -1134,12 +1226,12 @@ PgfParser::PgfParser(ref<PgfConcr> concr, ref<PgfConcrLincat> start, PgfText *se
|
||||
spot.pos = 0;
|
||||
spot.ptr = (uint8_t*) sentence->text;
|
||||
|
||||
this->before = NULL;
|
||||
this->before = new Stage(spot);
|
||||
this->after = NULL;
|
||||
this->ahead = new Stage(spot);
|
||||
this->ahead = NULL;
|
||||
|
||||
StackNode *node = new StackNode(ahead, 0);
|
||||
this->ahead->nodes.push_back(node);
|
||||
StackNode *node = new StackNode(before, 0);
|
||||
this->before->nodes.push_back(node);
|
||||
}
|
||||
|
||||
void PgfParser::shift(StackNode *parent, ref<PgfConcrLincat> lincat, size_t r, Production *prod,
|
||||
@@ -1172,7 +1264,7 @@ void PgfParser::shift(StackNode *parent, ref<PgfConcrLincat> lincat, size_t r, P
|
||||
if (std::find(node->parents.begin(), node->parents.end(), parent) == node->parents.end()) {
|
||||
node->parents.push_back(parent);
|
||||
#ifdef DEBUG_PARSER
|
||||
print_transition(parent,node,after);
|
||||
print_transition(parent,node,after,0);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -1181,6 +1273,48 @@ void PgfParser::shift(StackNode *parent, ref<PgfConcrLincat> lincat, size_t r, P
|
||||
}
|
||||
}
|
||||
|
||||
void PgfParser::shift(StackNode *parent, Stage *before)
|
||||
{
|
||||
ref<Vector<PgfLRShiftKS>> shifts = vector_elem(concr->lrtable,parent->state_id)->tokens;
|
||||
if (shifts != 0) {
|
||||
const uint8_t *sent_end = (const uint8_t *) &sentence->text[sentence->size];
|
||||
for (size_t i = 0; i < shifts->len; i++) {
|
||||
ref<PgfLRShiftKS> shift = vector_elem(shifts, i);
|
||||
PgfTextSpot spot = before->end;
|
||||
size_t sym_idx = shift->sym_idx;
|
||||
int cmp =
|
||||
text_sequence_cmp(&spot, sent_end,
|
||||
shift->seq, &sym_idx,
|
||||
case_sensitive, SM_PARTIAL);
|
||||
if (cmp == 0) {
|
||||
start_matches(&spot, NULL);
|
||||
|
||||
StackNode *node = NULL;
|
||||
for (StackNode *n : after->nodes) {
|
||||
if (n->stage == before && n->state_id == shift->next_state) {
|
||||
node = n;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (node == NULL) {
|
||||
node = new StackNode(before, shift->next_state);
|
||||
node->choice = NULL;
|
||||
after->nodes.push_back(node);
|
||||
}
|
||||
|
||||
if (std::find(node->parents.begin(), node->parents.end(), parent) == node->parents.end()) {
|
||||
node->parents.push_back(parent);
|
||||
#ifdef DEBUG_PARSER
|
||||
print_transition(parent,node,after,shift);
|
||||
#endif
|
||||
}
|
||||
|
||||
end_matches(&spot, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PgfParser::Choice *PgfParser::intersect_choice(Choice *choice1, Choice *choice2, intersection_map &im)
|
||||
{
|
||||
if (choice1 == NULL)
|
||||
@@ -1352,6 +1486,7 @@ void PgfParser::space(PgfTextSpot *start, PgfTextSpot *end, PgfExn* err)
|
||||
while (i < before->nodes.size()) {
|
||||
StackNode *node = before->nodes[i++];
|
||||
reduce_all(node);
|
||||
shift(node, before);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -53,6 +53,12 @@ class PGF_INTERNAL_DECL PgfLRTableMaker
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::pair<ref<PgfSequence>,size_t> Key3;
|
||||
|
||||
struct PGF_INTERNAL_DECL CompareKey3 : std::less<Key3> {
|
||||
bool operator() (const Key3& k1, const Key3& k2) const;
|
||||
};
|
||||
|
||||
ref<PgfAbstr> abstr;
|
||||
ref<PgfConcr> concr;
|
||||
|
||||
@@ -81,7 +87,7 @@ class PGF_INTERNAL_DECL PgfLRTableMaker
|
||||
void print_production(CCat *ccat, Production *prod);
|
||||
void print_item(Item *item);
|
||||
|
||||
void transition(PgfConcrLincat *lincat, size_t lin_idx, State *&state);
|
||||
void internalize_state(State *&state);
|
||||
|
||||
public:
|
||||
PgfLRTableMaker(ref<PgfAbstr> abstr, ref<PgfConcr> concr);
|
||||
@@ -95,6 +101,7 @@ class PGF_INTERNAL_DECL PgfParser : public PgfPhraseScanner, public PgfExprEnum
|
||||
{
|
||||
ref<PgfConcr> concr;
|
||||
PgfText *sentence;
|
||||
bool case_sensitive;
|
||||
PgfMarshaller *m;
|
||||
PgfUnmarshaller *u;
|
||||
|
||||
@@ -119,6 +126,7 @@ class PGF_INTERNAL_DECL PgfParser : public PgfPhraseScanner, public PgfExprEnum
|
||||
|
||||
void shift(StackNode *parent, ref<PgfConcrLincat> lincat, size_t r, Production *prod,
|
||||
Stage *before, Stage *after);
|
||||
void shift(StackNode *parent, Stage *before);
|
||||
void reduce(StackNode *parent, ref<PgfConcrLin> lin, ref<PgfLRReduce> red,
|
||||
size_t n, std::vector<Choice*> &args,
|
||||
Stage *before, Stage *after);
|
||||
@@ -127,7 +135,7 @@ class PGF_INTERNAL_DECL PgfParser : public PgfPhraseScanner, public PgfExprEnum
|
||||
size_t n, std::vector<Choice*> &args);
|
||||
void reduce_all(StackNode *state);
|
||||
void print_prod(Choice *choice, Production *prod);
|
||||
void print_transition(StackNode *source, StackNode *target, Stage *stage);
|
||||
void print_transition(StackNode *source, StackNode *target, Stage *stage, ref<PgfLRShiftKS> shift);
|
||||
|
||||
typedef std::map<std::pair<Choice*,Choice*>,Choice*> intersection_map;
|
||||
|
||||
@@ -144,7 +152,7 @@ class PGF_INTERNAL_DECL PgfParser : public PgfPhraseScanner, public PgfExprEnum
|
||||
void release_expr_state(ExprState *state);
|
||||
|
||||
public:
|
||||
PgfParser(ref<PgfConcr> concr, ref<PgfConcrLincat> start, PgfText *sentence, PgfMarshaller *m, PgfUnmarshaller *u);
|
||||
PgfParser(ref<PgfConcr> concr, ref<PgfConcrLincat> start, PgfText *sentence, bool case_sensitive, PgfMarshaller *m, PgfUnmarshaller *u);
|
||||
|
||||
virtual void space(PgfTextSpot *start, PgfTextSpot *end, PgfExn* err);
|
||||
virtual void start_matches(PgfTextSpot *end, PgfExn* err);
|
||||
|
||||
@@ -2743,7 +2743,7 @@ PgfExprEnum *pgf_parse(PgfDB *db, PgfConcrRevision revision,
|
||||
if (lincat_u.lincat == 0)
|
||||
return 0;
|
||||
|
||||
PgfParser *parser = new PgfParser(concr, lincat_u.lincat, sentence, m, u);
|
||||
PgfParser *parser = new PgfParser(concr, lincat_u.lincat, sentence, case_sensitive, m, u);
|
||||
phrasetable_lookup_cohorts(concr->phrasetable,
|
||||
sentence, case_sensitive,
|
||||
parser, err);
|
||||
@@ -3170,6 +3170,24 @@ pgf_graphviz_lr_automaton(PgfDB *db, PgfConcrRevision revision,
|
||||
printer.efun(&shift->lincat->name);
|
||||
printer.nprintf(16, ".%zu\"];\n", shift->r);
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < state->tokens->len; j++) {
|
||||
ref<PgfLRShiftKS> shift = vector_elem(state->tokens, j);
|
||||
printer.nprintf(16, " s%zu -> s%zu [label=\"", i, shift->next_state);
|
||||
size_t sym_idx = shift->sym_idx;
|
||||
while (sym_idx < shift->seq->syms.len) {
|
||||
if (ref<PgfSymbol>::get_tag(shift->seq->syms.data[sym_idx]) != PgfSymbolKS::tag)
|
||||
break;
|
||||
if (sym_idx > shift->sym_idx)
|
||||
printer.puts(" ");
|
||||
auto symks = ref<PgfSymbolKS>::untagged(shift->seq->syms.data[sym_idx]);
|
||||
printer.puts("\\\"");
|
||||
printer.put_esc_str(&symks->token);
|
||||
printer.puts("\\\"");
|
||||
sym_idx++;
|
||||
}
|
||||
printer.puts("\"];\n");
|
||||
}
|
||||
}
|
||||
printer.puts("}");
|
||||
|
||||
|
||||
@@ -228,28 +228,33 @@ int sequence_cmp(ref<PgfSequence> seq1, ref<PgfSequence> seq2)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static
|
||||
PGF_INTERNAL
|
||||
int text_sequence_cmp(PgfTextSpot *spot, const uint8_t *end,
|
||||
ref<PgfSequence> seq,
|
||||
bool case_sensitive, bool full_match)
|
||||
ref<PgfSequence> seq, size_t *p_i,
|
||||
bool case_sensitive, SeqMatch sm)
|
||||
{
|
||||
int res1 = 0;
|
||||
|
||||
size_t i = 0;
|
||||
const uint8_t *s2 = NULL;
|
||||
const uint8_t *e2 = NULL;
|
||||
|
||||
uint8_t t = 0xff;
|
||||
if (*p_i < seq->syms.len) {
|
||||
t = ref<PgfSymbol>::get_tag(seq->syms.data[*p_i]);
|
||||
}
|
||||
|
||||
size_t count = 0;
|
||||
|
||||
for (;;) {
|
||||
if (spot->ptr >= end) {
|
||||
if (s2 < e2 || i < seq->syms.len)
|
||||
if (s2 < e2 || t == PgfSymbolKS::tag)
|
||||
return -1;
|
||||
return case_sensitive ? res1 : 0;
|
||||
}
|
||||
|
||||
if (s2 >= e2 && i >= seq->syms.len)
|
||||
return full_match ? 1 : 0;
|
||||
if (s2 >= e2 && t != PgfSymbolKS::tag) {
|
||||
return (sm == SM_FULL_MATCH) ? 1 : 0;
|
||||
}
|
||||
|
||||
uint32_t ucs1 = pgf_utf8_decode(&spot->ptr); spot->pos++;
|
||||
uint32_t ucs1i = pgf_utf8_to_upper(ucs1);
|
||||
@@ -268,16 +273,21 @@ int text_sequence_cmp(PgfTextSpot *spot, const uint8_t *end,
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t t = ref<PgfSymbol>::get_tag(seq->syms.data[i]);
|
||||
if (t != PgfSymbolKS::tag) {
|
||||
if (sm == SM_PARTIAL)
|
||||
return 0;
|
||||
return ((int) PgfSymbolKS::tag) - ((int) t);
|
||||
}
|
||||
|
||||
auto sym_ks = ref<PgfSymbolKS>::untagged(seq->syms.data[i]);
|
||||
auto sym_ks = ref<PgfSymbolKS>::untagged(seq->syms.data[*p_i]);
|
||||
s2 = (uint8_t *) &sym_ks->token.text;
|
||||
e2 = s2+sym_ks->token.size;
|
||||
|
||||
i++;
|
||||
(*p_i)++;
|
||||
t = 0xff;
|
||||
if (*p_i < seq->syms.len) {
|
||||
t = ref<PgfSymbol>::get_tag(seq->syms.data[*p_i]);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t ucs2 = pgf_utf8_decode(&s2);
|
||||
@@ -552,7 +562,8 @@ void phrasetable_lookup(PgfPhrasetable table,
|
||||
current.pos = 0;
|
||||
current.ptr = (uint8_t *) sentence->text;
|
||||
const uint8_t *end = current.ptr+sentence->size;
|
||||
int cmp = text_sequence_cmp(¤t,end,table->value.seq,case_sensitive,true);
|
||||
size_t sym_idx = 0;
|
||||
int cmp = text_sequence_cmp(¤t,end,table->value.seq,&sym_idx,case_sensitive,SM_FULL_MATCH);
|
||||
if (cmp < 0) {
|
||||
phrasetable_lookup(table->left,sentence,case_sensitive,scanner,err);
|
||||
} else if (cmp > 0) {
|
||||
@@ -662,7 +673,8 @@ void phrasetable_lookup_prefixes(PgfCohortsState *state,
|
||||
return;
|
||||
|
||||
PgfTextSpot current = state->spot;
|
||||
int cmp = text_sequence_cmp(¤t,state->end,table->value.seq,state->case_sensitive,false);
|
||||
size_t sym_idx = 0;
|
||||
int cmp = text_sequence_cmp(¤t,state->end,table->value.seq,&sym_idx,state->case_sensitive,SM_PREFIX);
|
||||
if (cmp < 0) {
|
||||
phrasetable_lookup_prefixes(state,table->left,min,max);
|
||||
} else if (cmp > 0) {
|
||||
|
||||
@@ -115,4 +115,13 @@ void phrasetable_iter(PgfConcr *concr,
|
||||
PGF_INTERNAL_DECL
|
||||
void phrasetable_release(PgfPhrasetable table);
|
||||
|
||||
// The following are used internally in the parser
|
||||
|
||||
enum SeqMatch { SM_FULL_MATCH, SM_PREFIX, SM_PARTIAL };
|
||||
|
||||
PGF_INTERNAL_DECL
|
||||
int text_sequence_cmp(PgfTextSpot *spot, const uint8_t *end,
|
||||
ref<PgfSequence> seq, size_t *p_i,
|
||||
bool case_sensitive, SeqMatch sm);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -45,6 +45,47 @@ void PgfPrinter::puts(const char *s)
|
||||
}
|
||||
}
|
||||
|
||||
void PgfPrinter::put_esc_str(PgfText *v)
|
||||
{
|
||||
PgfText *charbuf = (PgfText *) alloca(sizeof(PgfText)+7);
|
||||
|
||||
const uint8_t* start = (uint8_t*) v->text;
|
||||
const uint8_t* end = start + v->size;
|
||||
while (start < end) {
|
||||
const uint8_t* s = start;
|
||||
uint32_t c = pgf_utf8_decode(&s);
|
||||
switch (c) {
|
||||
case '\\':
|
||||
puts("\\\\");
|
||||
break;
|
||||
case '"':
|
||||
puts("\\\"");
|
||||
break;
|
||||
case '\n':
|
||||
puts("\\n");
|
||||
break;
|
||||
case '\r':
|
||||
puts("\\r");
|
||||
break;
|
||||
case '\b':
|
||||
puts("\\b");
|
||||
break;
|
||||
case '\t':
|
||||
puts("\\t");
|
||||
break;
|
||||
case '\0':
|
||||
puts("\\0");
|
||||
break;
|
||||
default:
|
||||
charbuf->size = s-start;
|
||||
memcpy(charbuf->text, start, charbuf->size);
|
||||
charbuf->text[charbuf->size] = 0;
|
||||
puts(charbuf);
|
||||
}
|
||||
start = s;
|
||||
}
|
||||
}
|
||||
|
||||
void PgfPrinter::nprintf(size_t buf_size, const char *format, ...)
|
||||
{
|
||||
again: {
|
||||
@@ -348,44 +389,8 @@ PgfLiteral PgfPrinter::lflt(double v)
|
||||
|
||||
PgfLiteral PgfPrinter::lstr(PgfText *v)
|
||||
{
|
||||
PgfText *charbuf = (PgfText *) alloca(sizeof(PgfText)+7);
|
||||
|
||||
puts("\"");
|
||||
const uint8_t* start = (uint8_t*) v->text;
|
||||
const uint8_t* end = start + v->size;
|
||||
while (start < end) {
|
||||
const uint8_t* s = start;
|
||||
uint32_t c = pgf_utf8_decode(&s);
|
||||
switch (c) {
|
||||
case '\\':
|
||||
puts("\\\\");
|
||||
break;
|
||||
case '"':
|
||||
puts("\\\"");
|
||||
break;
|
||||
case '\n':
|
||||
puts("\\n");
|
||||
break;
|
||||
case '\r':
|
||||
puts("\\r");
|
||||
break;
|
||||
case '\b':
|
||||
puts("\\b");
|
||||
break;
|
||||
case '\t':
|
||||
puts("\\t");
|
||||
break;
|
||||
case '\0':
|
||||
puts("\\0");
|
||||
break;
|
||||
default:
|
||||
charbuf->size = s-start;
|
||||
memcpy(charbuf->text, start, charbuf->size);
|
||||
charbuf->text[charbuf->size] = 0;
|
||||
puts(charbuf);
|
||||
}
|
||||
start = s;
|
||||
}
|
||||
put_esc_str(v);
|
||||
puts("\"");
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -46,6 +46,8 @@ public:
|
||||
void puts(PgfText *s);
|
||||
void puts(const char *s);
|
||||
|
||||
void put_esc_str(PgfText *v);
|
||||
|
||||
// buf_size is the expected buffer size. If larger is needed,
|
||||
// it will be allocated automatically.
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
Reference in New Issue
Block a user