1
0
forked from GitHub/gf-core

an API to create unique function names

This commit is contained in:
Krasimir Angelov
2022-12-05 08:11:43 +01:00
parent a6aa6c2a5a
commit 8f7e4c084c
12 changed files with 284 additions and 84 deletions

View File

@@ -253,15 +253,16 @@ struct PGF_INTERNAL_DECL PgfConcrLin {
static void release(ref<PgfConcrLin> lin);
};
struct PGF_INTERNAL_DECL PgfLincatBackref {
struct PGF_INTERNAL_DECL PgfLinSeqIndex {
ref<PgfConcrLin> lin;
size_t seq_index;
};
struct PGF_INTERNAL_DECL PgfLincatBackref : public PgfLinSeqIndex {
size_t dot;
};
struct PGF_INTERNAL_DECL PgfLincatEpsilon {
ref<PgfConcrLin> lin;
size_t seq_index;
struct PGF_INTERNAL_DECL PgfLincatEpsilon : public PgfLinSeqIndex {
};
struct PGF_INTERNAL_DECL PgfConcrPrintname {

View File

@@ -398,6 +398,7 @@ PgfDB::PgfDB(const char* filepath, int flags, int mode) {
last_free_block = 0;
last_free_block_size = 0;
last_free_block_txn_id= 0;
seed = time(NULL);
}
PGF_INTERNAL

View File

@@ -64,6 +64,7 @@ private:
const char *filepath;
malloc_state* ms;
unsigned char* base;
unsigned long int seed;
// The following four fields are normally equal to
// the corresponding fields in the malloc_state.
@@ -133,6 +134,11 @@ public:
PGF_INTERNAL_DECL bool is_transient_object(object o);
PGF_INTERNAL_DECL static int rand() {
current_db->seed = current_db->seed * 1103515245 + 12345;
return (unsigned int)(current_db->seed/65536) % 32768;
}
private:
PGF_INTERNAL_DECL int init_state();

View File

@@ -324,6 +324,136 @@ Namespace<V> namespace_insert(Namespace<V> map, ref<V> value)
}
}
template <class V>
class PgfNameAllocator {
size_t available;
size_t fixed;
size_t base;
ref<V> value;
PgfText *name;
public:
PgfNameAllocator(PgfText *name_pattern)
{
available = name_pattern->size;
fixed = 0;
base = 0;
value = 0;
name = (PgfText *) malloc(sizeof(PgfText)+name_pattern->size+1);
if (name == NULL)
throw pgf_systemerror(ENOMEM);
size_t i = 0, j = 0;
while (i < name_pattern->size) {
if (name_pattern->text[i] == '%') {
i++;
if (name_pattern->text[i] == 'd') {
base = 10;
} else if (name_pattern->text[i] == 'x') {
base = 16;
} else if (name_pattern->text[i] == 'a') {
base = 36;
} else if (name_pattern->text[i] == '%') {
name->text[j++] = '%'; i++;
continue;
} else {
name->text[j++] = '%';
continue;
}
i++;
name->text[j++] = '1' + PgfDB::rand() % 9;
fixed = j;
} else {
name->text[j++] = name_pattern->text[i++];
}
}
name->size = j;
name->text[j] = 0;
}
~PgfNameAllocator() {
if (name) free(name);
}
void fetch_name_value(PgfText **pname, ref<V> *pvalue) {
*pname = name; name = NULL;
*pvalue = value; value = 0;
}
Namespace<V> allocate(Namespace<V> map)
{
static char alphabet[] = "0123456789abcdefghijklmnopqrstuvwxyz";
if (map == 0) {
value = PgfDB::malloc<V>(name->size+1);
memcpy(&value->name, name, sizeof(PgfText)+name->size+1);
return Node<ref<V>>::new_node(value);
}
for (;;) {
int cmp;
size_t i;
for (i = 0; ; i++) {
if (i >= name->size) {
cmp = -(i < map->value->name.size);
break;
}
if (i >= map->value->name.size) {
cmp = 1;
break;
}
if (name->text[i] > map->value->name.text[i]) {
cmp = 1;
break;
} else if (name->text[i] < map->value->name.text[i]) {
cmp = -1;
break;
}
}
if (cmp < 0) {
Namespace<V> left = allocate(map->left);
if (left != 0) {
map = Node<ref<V>>::upd_node(map,left,map->right);
return Node<ref<V>>::balanceL(map);
}
} else if (cmp > 0) {
Namespace<V> right = allocate(map->right);
if (right != 0) {
map = Node<ref<V>>::upd_node(map,map->left,right);
return Node<ref<V>>::balanceR(map);
}
} else {
return 0;
}
if (i >= fixed)
return 0;
if (name->size >= available) {
size_t new_size = name->size + 10;
PgfText *new_name = (PgfText *)
realloc(name, sizeof(PgfText)+new_size+1);
if (new_name == NULL) {
throw pgf_systemerror(ENOMEM);
}
name = new_name;
available = new_size;
}
i = name->size++;
while (i >= fixed) {
name->text[i+1] = name->text[i];
i--;
}
name->text[i+1] = alphabet[PgfDB::rand() % base];
fixed++;
}
}
};
template <class V>
Namespace<V> namespace_delete(Namespace<V> map, PgfText* name,
ref<V> *pvalue)

View File

@@ -7,7 +7,7 @@
#include <vector>
#include <queue>
// #define PARSER_DEBUG
#define PARSER_DEBUG
class PGF_INTERNAL_DECL PgfParser::CFGCat {
public:
@@ -60,6 +60,7 @@ public:
struct PGF_INTERNAL_DECL PgfParser::ParseItemConts {
State *state;
ref<PgfLincatField> field;
size_t value;
std::vector<ParseItem*> items;
};
@@ -75,6 +76,7 @@ public:
conts = new ParseItemConts();
conts->state = this;
conts->field = field;
conts->value = value;
contss.insert(std::pair<CFGCat,ParseItemConts*>(cfg_cat, conts));
} else {
conts = itr1->second;
@@ -104,12 +106,25 @@ public:
class PGF_INTERNAL_DECL PgfParser::ParseItem : public Item
{
public:
void* operator new(size_t size, size_t n_args)
void* operator new(size_t size, PgfLinSeqIndex *r)
{
return malloc(size+sizeof(Choice*)*n_args);
size_t n_args = r->lin->absfun->type->hypos->len;
size_t ex_size = sizeof(Choice*)*n_args;
ParseItem *item = (ParseItem *) malloc(size+ex_size);
memset(item->args, 0, ex_size);
return item;
}
ParseItem(ParseItemConts *conts, ref<PgfConcrLin> lin, size_t seq_index)
void* operator new(size_t size, ParseItem *item)
{
size_t n_args = item->lin->absfun->type->hypos->len;
size_t ex_size = sizeof(Choice*)*n_args;
ParseItem *new_item = (ParseItem *) malloc(size+ex_size);
memcpy(new_item, item, size+ex_size);
return new_item;
}
ParseItem(ParseItemConts *conts, size_t values, ref<PgfConcrLin> lin, size_t seq_index)
{
this->outside_prob = lin->lincat->abscat->prob;
this->inside_prob = lin->absfun->prob;
@@ -117,9 +132,7 @@ public:
this->lin = lin;
this->seq_index = seq_index;
this->dot = lin->seqs->data[seq_index]->syms.len;
size_t n_args = lin->absfun->type->hypos->len;
memset(this->args, 0, sizeof(Choice*)*n_args);
this->values = values;
}
ParseItem(ParseItemConts *conts, PgfLincatBackref *backref,
@@ -131,9 +144,6 @@ public:
this->lin = backref->lin;
this->seq_index = backref->seq_index;
this->dot = backref->dot+1;
size_t n_args = backref->lin->absfun->type->hypos->len;
memset(this->args, 0, sizeof(Choice*)*n_args);
this->args[d] = choice;
}
@@ -145,24 +155,13 @@ public:
this->lin = epsilon->lin;
this->seq_index = epsilon->seq_index;
this->dot = 0;
size_t n_args = epsilon->lin->absfun->type->hypos->len;
memset(this->args, 0, sizeof(Choice*)*n_args);
}
ParseItem(ParseItem *item,
size_t d, Choice *choice)
ParseItem(size_t d, Choice *choice)
{
this->outside_prob = item->outside_prob;
this->inside_prob = item->inside_prob+choice->viterbi_prob;
this->conts = item->conts;
this->lin = item->lin;
this->seq_index = item->seq_index;
this->dot = item->dot+1;
size_t n_args = item->lin->absfun->type->hypos->len;
memcpy(this->args, item->args, sizeof(Choice*)*n_args);
this->args[d] = choice;
this->inside_prob += choice->viterbi_prob;
this->dot += 1;
this->args[d] = choice;
}
static void bu_predict(ref<PgfLincatField> field, State *state, Choice *choice)
@@ -182,9 +181,8 @@ public:
ref<PgfLincatField> up_field = vector_elem(backref->lin->lincat->fields, index);
ParseItemConts *conts = choice->conts->state->get_conts(up_field, 0);
size_t n_args = backref->lin->absfun->type->hypos->len;
state->queue.push(new(n_args) ParseItem(conts, backref,
symcat->d, choice));
state->queue.push(new(&*backref) ParseItem(conts, backref,
symcat->d, choice));
}
}
@@ -195,8 +193,7 @@ public:
for (size_t i = 0; i < field->epsilons->len; i++) {
ref<PgfLincatEpsilon> epsilon = vector_elem(field->epsilons, i);
size_t n_args = epsilon->lin->absfun->type->hypos->len;
state->queue.push(new(n_args) ParseItem(conts, epsilon, outside_prob));
state->queue.push(new(&*epsilon) ParseItem(conts, epsilon, outside_prob));
}
}
@@ -205,8 +202,7 @@ public:
ref<PgfSequence> seq = lin->seqs->data[seq_index];
PgfSymbol sym = *vector_elem(&seq->syms,dot);
auto sym_cat = ref<PgfSymbolCat>::untagged(sym);
size_t n_args = lin->absfun->type->hypos->len;
state->queue.push(new(n_args) ParseItem(this, sym_cat->d, choice));
state->queue.push(new(this) ParseItem(sym_cat->d, choice));
}
void complete(PgfParser *parser, ref<PgfSequence> seq)
@@ -251,17 +247,21 @@ public:
malloc(sizeof(Production)+sizeof(Choice*)*n_args);
prod->lin = lin;
prod->seq_index = seq_index;
memcpy(prod->args, args, sizeof(Choice*)*n_args);
memcpy(prod->args, this+1, sizeof(Choice*)*n_args);
prod->trace(choice);
choice->prods.push_back(prod);
// Bottom up prediction if it has not been done already
// If this the first time when we complete this category
if (itr2 == parser->after->choices.end()) {
// Combine with top-down predicted rules
for (ParseItem *item : conts->items) {
item->combine(parser->after,choice);
}
bu_predict(conts->field,parser->after,choice);
if (conts->state != parser->after) {
// Bottom up prediction if this is not an epsilon rule
bu_predict(conts->field,parser->after,choice);
}
}
}
@@ -275,16 +275,15 @@ public:
vector_elem(lin->absfun->type->hypos, sym_cat->d)->type;
ref<PgfConcrLincat> lincat =
namespace_lookup(parser->concr->lincats, &ty->name);
ref<PgfLincatField> field = vector_elem(lincat->fields, sym_cat->r.i0);
if (lincat != 0) {
ref<PgfLincatField> field = vector_elem(lincat->fields, sym_cat->r.i0);
ParseItemConts *conts = parser->after->get_conts(field, 0);
conts->items.push_back(this);
size_t index = seq_index / lin->lincat->fields->len;
size_t n_args = lin->args->len / lin->res->len;
ref<PgfPArg> parg = vector_elem(lin->args, index*n_args + sym_cat->d);
ParseItemConts *conts = parser->after->get_conts(field, 0);
conts->items.push_back(this);
eps_predict(field, parser->after, conts, inside_prob+outside_prob);
if (conts->items.size() == 1) {
eps_predict(field, parser->after, conts, inside_prob+outside_prob);
}
}
}
}
default:;
@@ -321,8 +320,19 @@ public:
ref<PgfDTyp> ty = lin->absfun->type;
if (res->vars != 0) {
printer->lvar_ranges(res->vars);
printer->puts(" . ");
printer->puts("{");
size_t values = this->values;
for (size_t i = 0; i < res->vars->len; i++) {
if (i > 0)
printer->puts(", ");
printer->lvar(res->vars->data[i].var);
size_t val = values / res->vars->data[i].range;
printer->nprintf(32,"=%ld",val);
values = values % res->vars->data[i].range;
}
printer->puts("} . ");
}
printer->efun(&ty->name);
@@ -373,6 +383,7 @@ private:
ref<PgfConcrLin> lin;
size_t seq_index;
size_t dot;
size_t values;
Choice *args[];
};
@@ -622,11 +633,12 @@ void PgfParser::Item::trace(State *state, PgfMarshaller *m)
void PgfParser::Choice::trace(State *state)
{
#ifdef PARSER_DEBUG
size_t seq_index = conts->field-conts->field->lincat->fields->data;
PgfPrinter printer(NULL,0,NULL);
printer.nprintf(40,"[%ld-%ld; ", conts->state->end.pos, state->start.pos);
printer.efun(&conts->field->lincat->name);
printer.puts("; ");
printer.puts(conts->field->name);
printer.nprintf(30,"(%ld); %ld", conts->value, seq_index);
printer.nprintf(40,"; ?%ld; %f]\n", id, viterbi_prob);
printer.dump();
#endif
@@ -716,11 +728,12 @@ void PgfParser::start_matches(PgfTextSpot *end, PgfExn* err)
void PgfParser::match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err)
{
size_t index = seq_index % lin->lincat->fields->len;
ref<PgfLincatField> field = vector_elem(lin->lincat->fields, index);
ref<PgfLincatField> field = vector_elem(lin->lincat->fields, seq_index % lin->lincat->fields->len);
ref<PgfPResult> result = *vector_elem(lin->res, seq_index / lin->lincat->fields->len);
ParseItemConts *conts = before->get_conts(field, 0);
after->queue.push(new(0) ParseItem(conts, lin, seq_index));
ParseItemConts *conts = before->get_conts(field, result->param.i0);
PgfLinSeqIndex r = {lin, seq_index};
after->queue.push(new(&r) ParseItem(conts, result->param.i0, lin, seq_index));
}
void PgfParser::end_matches(PgfTextSpot *end, PgfExn* err)

View File

@@ -1141,7 +1141,7 @@ void pgf_check_expr(PgfDB *db, PgfRevision revision,
ref<PgfPGF> pgf = db->revision2pgf(revision);
PgfTypechecker checker(pgf,u);
PgfTypechecker checker(pgf,m,u);
*pe = m->match_expr(&checker, *pe);
} PGF_API_END
}
@@ -1157,7 +1157,7 @@ PgfType pgf_infer_expr(PgfDB *db, PgfRevision revision,
ref<PgfPGF> pgf = db->revision2pgf(revision);
PgfTypechecker checker(pgf,u);
PgfTypechecker checker(pgf,m,u);
*pe = m->match_expr(&checker, *pe);
} PGF_API_END
@@ -1179,7 +1179,7 @@ void pgf_check_type(PgfDB *db, PgfRevision revision,
ref<PgfPGF> pgf = db->revision2pgf(revision);
PgfTypechecker checker(pgf,u);
PgfTypechecker checker(pgf,m,u);
*pty = m->match_type(&checker, *pty);
} PGF_API_END
}
@@ -1247,12 +1247,12 @@ PgfRevision pgf_checkout_revision(PgfDB *db, PgfExn *err)
}
PGF_API
void pgf_create_function(PgfDB *db, PgfRevision revision,
PgfText *name,
PgfType ty, size_t arity, char *bytecode,
prob_t prob,
PgfMarshaller *m,
PgfExn *err)
PgfText *pgf_create_function(PgfDB *db, PgfRevision revision,
PgfText *name_pattern,
PgfType ty, size_t arity, char *bytecode,
prob_t prob,
PgfMarshaller *m,
PgfExn *err)
{
PGF_API_BEGIN {
DB_scope scope(db, WRITER_SCOPE);
@@ -1260,17 +1260,25 @@ void pgf_create_function(PgfDB *db, PgfRevision revision,
PgfDBUnmarshaller u(m);
ref<PgfPGF> pgf = db->revision2pgf(revision);
ref<PgfAbsFun> absfun = PgfDB::malloc<PgfAbsFun>(name->size+1);
PgfNameAllocator<PgfAbsFun> nalloc(name_pattern);
Namespace<PgfAbsFun> funs =
nalloc.allocate(pgf->abstract.funs);
PgfText *name; ref<PgfAbsFun> absfun;
nalloc.fetch_name_value(&name, &absfun);
absfun->type = m->match_type(&u, ty);
absfun->arity = arity;
absfun->bytecode = bytecode ? PgfDB::malloc<char>(0) : 0;
absfun->prob = prob;
memcpy(&absfun->name, name, sizeof(PgfText)+name->size+1);
Namespace<PgfAbsFun> funs =
namespace_insert(pgf->abstract.funs, absfun);
pgf->abstract.funs = funs;
return name;
} PGF_API_END
return NULL;
}
PGF_API

View File

@@ -502,12 +502,12 @@ PGF_API_DECL
PgfRevision pgf_checkout_revision(PgfDB *db, PgfExn *err);
PGF_API_DECL
void pgf_create_function(PgfDB *db, PgfRevision revision,
PgfText *name,
PgfType ty, size_t arity, char *bytecode,
prob_t prob,
PgfMarshaller *m,
PgfExn *err);
PgfText *pgf_create_function(PgfDB *db, PgfRevision revision,
PgfText *name_pattern,
PgfType ty, size_t arity, char *bytecode,
prob_t prob,
PgfMarshaller *m,
PgfExn *err);
PGF_API_DECL
void pgf_drop_function(PgfDB *db, PgfRevision revision,

View File

@@ -8,6 +8,12 @@ PgfExpr PgfTypechecker::eabs(PgfBindType btype, PgfText *name, PgfExpr body)
PgfExpr PgfTypechecker::eapp(PgfExpr fun, PgfExpr arg)
{
fun = m->match_expr(this, fun);
size_t fun_n_args = n_args;
ref<PgfDTyp> fun_type = type;
arg = m->match_expr(this, arg);
return u->eapp(fun, arg);
}
@@ -23,6 +29,13 @@ PgfExpr PgfTypechecker::emeta(PgfMetaId meta)
PgfExpr PgfTypechecker::efun(PgfText *name)
{
ref<PgfAbsFun> absfun =
namespace_lookup(gr->abstract.funs, name);
if (absfun == 0)
throw pgf_error("Unknown function");
type = absfun->type;
n_args = 0;
return u->efun(name);
}
@@ -33,11 +46,13 @@ PgfExpr PgfTypechecker::evar(int index)
PgfExpr PgfTypechecker::etyped(PgfExpr expr, PgfType ty)
{
expr = m->match_expr(this, expr);
return u->etyped(expr,ty);
}
PgfExpr PgfTypechecker::eimplarg(PgfExpr expr)
{
expr = m->match_expr(this, expr);
return u->eimplarg(expr);
}

View File

@@ -3,11 +3,15 @@
class PGF_INTERNAL_DECL PgfTypechecker : public PgfUnmarshaller {
ref<PgfPGF> gr;
ref<PgfDTyp> type;
size_t n_args;
PgfMarshaller *m;
PgfUnmarshaller *u;
public:
PgfTypechecker(ref<PgfPGF> gr, PgfUnmarshaller *u) {
PgfTypechecker(ref<PgfPGF> gr, PgfMarshaller *m, PgfUnmarshaller *u) {
this->gr = gr;
this->m = m;
this->u = u;
};

View File

@@ -198,7 +198,7 @@ foreign import ccall pgf_commit_transaction :: Ptr PgfDB -> Ptr PGF -> Ptr PgfEx
foreign import ccall pgf_checkout_revision :: Ptr PgfDB -> Ptr PgfExn -> IO (Ptr PGF)
foreign import ccall pgf_create_function :: Ptr PgfDB -> Ptr PGF -> Ptr PgfText -> StablePtr Type -> CSize -> Ptr CChar -> (#type prob_t) -> Ptr PgfMarshaller -> Ptr PgfExn -> IO ()
foreign import ccall pgf_create_function :: Ptr PgfDB -> Ptr PGF -> Ptr PgfText -> StablePtr Type -> CSize -> Ptr CChar -> (#type prob_t) -> Ptr PgfMarshaller -> Ptr PgfExn -> IO (Ptr PgfText)
foreign import ccall pgf_drop_function :: Ptr PgfDB -> Ptr PGF -> Ptr PgfText -> Ptr PgfExn -> IO ()

View File

@@ -145,13 +145,24 @@ checkoutPGF p = do
langs <- getConcretes (a_db p) fptr
return (PGF (a_db p) fptr langs)
createFunction :: Fun -> Type -> Int -> [[Instr]] -> Float -> Transaction PGF ()
{- | 'createFunction name ty arity bytecode prob' creates a new abstract
syntax function with the given name, type, arity, etc. If the name
contains %d, %x or %a then the pattern is replaced with a random
number in base 10, 16, or 36, which guarantees that the name is
unique. The returned name is the final name after the substitution.
-}
createFunction :: Fun -> Type -> Int -> [[Instr]] -> Float -> Transaction PGF Fun
createFunction name ty arity bytecode prob = Transaction $ \c_db _ c_revision c_exn ->
withText name $ \c_name ->
bracket (newStablePtr ty) freeStablePtr $ \c_ty ->
(if null bytecode then (\f -> f nullPtr) else (allocaBytes 0)) $ \c_bytecode ->
withForeignPtr marshaller $ \m -> do
pgf_create_function c_db c_revision c_name c_ty (fromIntegral arity) c_bytecode prob m c_exn
c_name <- pgf_create_function c_db c_revision c_name c_ty (fromIntegral arity) c_bytecode prob m c_exn
if c_name == nullPtr
then return ""
else do name <- peekText c_name
free c_name
return name
dropFunction :: Fun -> Transaction PGF ()
dropFunction name = Transaction $ \c_db _ c_revision c_exn ->

View File

@@ -134,16 +134,23 @@ Transaction_createFunction(TransactionObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "s#O!nf", &s, &size, &pgf_TypeType, &type, &arity, &prob))
return NULL;
PgfText *funname = CString_AsPgfText(s, size);
PgfText *name_pattern = CString_AsPgfText(s, size);
PgfExn err;
pgf_create_function(self->pgf->db, self->revision, funname, (PgfType) type, arity, NULL, prob, &marshaller, &err);
FreePgfText(funname);
PgfText *name =
pgf_create_function(self->pgf->db, self->revision, name_pattern, (PgfType) type, arity, NULL, prob, &marshaller, &err);
FreePgfText(name_pattern);
if (handleError(err) != PGF_EXN_NONE) {
return NULL;
}
Py_RETURN_NONE;
PyObject *py_name = PyUnicode_FromPgfText(name);
FreePgfText(name);
return py_name;
}
static PyObject *
@@ -340,7 +347,11 @@ static PyMethodDef Transaction_methods[] = {
{"__exit__", (PyCFunction)(void(*)(void))Transaction_exit, METH_FASTCALL, ""},
{"createFunction", (PyCFunction)Transaction_createFunction, METH_VARARGS,
"Create function"
"'createFunction(name,ty,arity,bytecode,prob)' creates a new abstract"
"syntax function with the given name, type, arity, etc. If the name"
"contains %d, %x or %a then the pattern is replaced with a random"
"number in base 10, 16, or 36, which guarantees that the name is"
"unique. The returned name is the final name after the substitution."
},
{"dropFunction", (PyCFunction)Transaction_dropFunction, METH_VARARGS,
"Drop function"