From 84789c9fbfc634e9d98f211f3bcd2659d6eee40d Mon Sep 17 00:00:00 2001 From: krangelov Date: Thu, 5 Aug 2021 12:37:12 +0200 Subject: [PATCH] finished reading the abstract syntax --- src/runtime/c/Makefile.am | 3 +- src/runtime/c/data.h | 73 ++++++++++- src/runtime/c/db.cxx | 16 +-- src/runtime/c/db.h | 84 ++++++++++--- src/runtime/c/expr.h | 111 ++++++++++++++--- src/runtime/c/namespace.h | 14 +-- src/runtime/c/pgf.cxx | 6 +- src/runtime/c/reader.cxx | 252 +++++++++++++++++++++++++++++++++++--- src/runtime/c/reader.h | 42 ++++++- src/runtime/c/variant.h | 12 -- src/runtime/c/vector.h | 32 +++++ 11 files changed, 559 insertions(+), 86 deletions(-) delete mode 100644 src/runtime/c/variant.h create mode 100644 src/runtime/c/vector.h diff --git a/src/runtime/c/Makefile.am b/src/runtime/c/Makefile.am index c47a4fdcb..213be21e2 100644 --- a/src/runtime/c/Makefile.am +++ b/src/runtime/c/Makefile.am @@ -17,8 +17,7 @@ libpgf_la_SOURCES = \ reader.h \ data.h \ expr.h \ - namespace.h \ - variant.h + namespace.h libpgf_la_LDFLAGS = "-no-undefined" diff --git a/src/runtime/c/data.h b/src/runtime/c/data.h index 0c1c24d00..2b0e2d886 100644 --- a/src/runtime/c/data.h +++ b/src/runtime/c/data.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -11,6 +12,7 @@ #include "pgf.h" #include "db.h" #include "text.h" +#include "vector.h" #include "namespace.h" #include "expr.h" @@ -21,7 +23,7 @@ public: this->msg = msg; } - const char *what() const throw () + virtual const char *what() const throw () { return msg; } @@ -35,9 +37,76 @@ struct PGF_INTERNAL_DECL PgfFlag { PgfText name; }; +// PgfPatt + +typedef variant PgfPatt; + +struct PgfPattApp { + static const uint8_t tag = 0; + + ref ctor; + PgfVector args; +}; + +struct PgfPattVar { + static const uint8_t tag = 1; + + PgfText name; +}; + +struct PgfPattAs { + static const uint8_t tag = 2; + + PgfPatt patt; + PgfText name; +}; + +struct PgfPattWild { + static const uint8_t tag = 3; +}; + +struct PgfPattLit { + static const uint8_t tag = 4; + + PgfLiteral lit; +}; + +struct PgfPattImplArg { + static const uint8_t tag = 5; + + PgfPatt patt; +}; + +struct PgfPattTilde { + static const uint8_t tag = 6; + + PgfExpr expr; +}; + typedef struct { - ref name; + PgfExpr body; + PgfVector patts; +} PgfEquation; + +struct PGF_INTERNAL_DECL PgfAbsFun { + ref type; + int arity; + ref>> defns; + PgfExprProb ep; + PgfText name; +}; + +typedef struct { + ref> context; + prob_t prob; + PgfText name; +} PgfAbsCat; + +typedef struct { + ref name; Namespace aflags; + Namespace funs; + Namespace cats; } PgfAbstr; struct PGF_INTERNAL_DECL PgfPGFRoot { diff --git a/src/runtime/c/db.cxx b/src/runtime/c/db.cxx index cc5654fca..35a0a6286 100644 --- a/src/runtime/c/db.cxx +++ b/src/runtime/c/db.cxx @@ -6,8 +6,8 @@ #include "data.h" -thread_local unsigned char* current_base; -thread_local DB* current_db; +unsigned char* current_base; +DB* current_db; #ifndef DEFAULT_TOP_PAD #define DEFAULT_TOP_PAD (0) @@ -267,7 +267,7 @@ struct malloc_state /* Bitmap of bins */ unsigned int binmap[BINMAPSIZE]; /* Reference to the root object */ - size_t root_offset; + moffset root_offset; }; DB::DB(const char* pathname) { @@ -309,12 +309,12 @@ DB::~DB() { close(fd); } -moffset DB::get_root_offset() { +moffset DB::get_root_internal() { return ms->root_offset; } -void DB::set_root_offset(moffset root) { - ms->root_offset = root; +void DB::set_root_internal(moffset root_offset) { + ms->root_offset = root_offset; } void @@ -448,7 +448,7 @@ static void malloc_consolidate(malloc_state *ms) } moffset -DB::malloc(size_t bytes) +DB::malloc_internal(size_t bytes) { unsigned int idx; /* associated bin index */ mbin* bin; /* associated bin */ @@ -819,7 +819,7 @@ DB::malloc(size_t bytes) } void -DB::free(moffset o) +DB::free_internal(moffset o) { size_t size; /* its size */ moffset *fb; /* associated fastbin */ diff --git a/src/runtime/c/db.h b/src/runtime/c/db.h index a484c2399..15e408c6b 100644 --- a/src/runtime/c/db.h +++ b/src/runtime/c/db.h @@ -3,53 +3,99 @@ class DB; -extern thread_local PGF_INTERNAL_DECL unsigned char* current_base; -extern thread_local PGF_INTERNAL_DECL DB* current_db; +extern PGF_INTERNAL_DECL unsigned char* current_base; +extern PGF_INTERNAL_DECL DB* current_db; typedef size_t moffset; +typedef moffset variant; + struct malloc_state; template class ref { - size_t offset; +private: + moffset offset; + + friend class DB; public: ref() { } - ref(size_t o) { offset = o; } - inline A* operator->() const { return (A*) (current_base+offset); } - inline operator A*() const { return (A*) (current_base+offset); } - inline bool operator ==(ref& other) const { return offset==other->offset; } - inline operator size_t() { return offset; } + ref(moffset o) { offset = o; } + + A* operator->() const { return (A*) (current_base+offset); } + operator A*() const { return (A*) (current_base+offset); } + bool operator ==(ref& other) const { return offset==other->offset; } + bool operator ==(moffset other_offset) const { return offset==other_offset; } + + ref& operator= (const ref& r) { + offset = r.offset; + return *this; + } + + static + ref from_ptr(A *ptr) { return (((uint8_t*) ptr) - current_base); } + + static + variant tagged(ref ref) { + assert(A::tag < 2*sizeof(size_t)); + return (ref.offset | A::tag); + } + + static + ref untagged(variant v) { + return (v & ~(2*sizeof(size_t) - 1)); + } + + static + uint8_t get_tag(variant v) { + return (v & (2*sizeof(size_t) - 1)); + } + + static + ref null() { return 0; } }; class PGF_INTERNAL_DECL DB { +private: int fd; malloc_state* ms; + friend class PgfReader; + public: DB(const char* pathname); ~DB(); - template ref malloc() { - return malloc(sizeof(A)); + template + static ref malloc() { + return current_db->malloc_internal(sizeof(A)); } - moffset malloc(size_t bytes); - - template ref get_root() { - return get_root_offset(); + template + static ref malloc(size_t bytes) { + return current_db->malloc_internal(bytes); } - template void set_root(ref root) { - set_root_offset(root); + + template + static ref get_root() { + return current_db->get_root_internal(); + } + + template + static void set_root(ref root) { + current_db->set_root_internal(root.offset); } private: void init_state(size_t size); - void free(moffset o); + moffset malloc_internal(size_t bytes); + void free_internal(moffset o); - moffset get_root_offset(); - void set_root_offset(moffset root); + moffset get_root_internal(); + void set_root_internal(moffset root_offset); + + unsigned char* relocate(unsigned char* ptr); }; #endif diff --git a/src/runtime/c/expr.h b/src/runtime/c/expr.h index 6a077458d..1bdc5d802 100644 --- a/src/runtime/c/expr.h +++ b/src/runtime/c/expr.h @@ -1,30 +1,109 @@ #ifndef EXPR_H_ #define EXPR_H_ -#include "variant.h" +/// An abstract syntax tree +typedef variant PgfExpr; -// PgfLiteral - -typedef variant PgfLiteral; +struct PgfHypo; +struct PgfType; +typedef int PgfMetaId; typedef enum { - PGF_LITERAL_STR, - PGF_LITERAL_INT, - PGF_LITERAL_FLT, - PGF_LITERAL_NUM_TAGS -} PgfLiteralTag; + PGF_BIND_TYPE_EXPLICIT, + PGF_BIND_TYPE_IMPLICIT +} PgfBindType; -typedef struct { - char val[0]; // a flexible array that contains the value -} PgfLiteralStr; +/// A literal for an abstract syntax tree +typedef variant PgfLiteral; + +struct PgfLiteralStr { + static const uint8_t tag = 0; + + PgfText val; +} ; + +struct PgfLiteralInt { + static const uint8_t tag = 1; -typedef struct { int val; -} PgfLiteralInt; +} ; + +struct PgfLiteralFlt { + static const uint8_t tag = 2; + + double val; +}; + +struct PgfHypo { + PgfBindType bind_type; + ref cid; + ref type; +}; + +struct PgfType { + ref> hypos; + ref> exprs; + PgfText name; +}; + +struct PgfExprAbs { + static const uint8_t tag = 0; + + PgfBindType bind_type; + PgfExpr body; + PgfText name; +}; + +struct PgfExprApp { + static const uint8_t tag = 1; + + PgfExpr fun; + PgfExpr arg; +}; + +struct PgfExprLit { + static const uint8_t tag = 2; + + PgfLiteral lit; +}; + +struct PgfExprMeta { + static const uint8_t tag = 3; + + PgfMetaId id; +}; + +struct PgfExprFun { + static const uint8_t tag = 4; + + PgfText name; +}; + +struct PgfExprVar { + static const uint8_t tag = 5; + + int var; +}; + +struct PgfExprTyped { + static const uint8_t tag = 6; + + PgfExpr expr; + ref type; +}; + +struct PgfExprImplArg { + static const uint8_t tag = 7; + + PgfExpr expr; +}; + +typedef float prob_t; typedef struct { - double val; -} PgfLiteralFlt; + prob_t prob; + PgfExpr expr; +} PgfExprProb; #endif /* EXPR_H_ */ diff --git a/src/runtime/c/namespace.h b/src/runtime/c/namespace.h index 74014bf8e..cad4cebf3 100644 --- a/src/runtime/c/namespace.h +++ b/src/runtime/c/namespace.h @@ -182,13 +182,13 @@ Namespace namespace_insert(Namespace map, ref value) { return Node::new_node(value); int cmp = textcmp(value->name,map->value->name); - if (cmp < 0) - return Node::balanceL(map->value, - namespace_insert(map->left, value),map->right); - else if (cmp > 0) - return Node::balanceR(map->value, - map->left, namespace_insert(map->right, value)); - else + if (cmp < 0) { + Namespace left = namespace_insert(map->left, value); + return Node::balanceL(map->value,left,map->right); + } else if (cmp > 0) { + Namespace right = namespace_insert(map->right, value); + return Node::balanceR(map->value, map->left, right); + } else return Node::new_node(value,map->left,map->right); } diff --git a/src/runtime/c/pgf.cxx b/src/runtime/c/pgf.cxx index bdbb05eab..4ebbbad75 100644 --- a/src/runtime/c/pgf.cxx +++ b/src/runtime/c/pgf.cxx @@ -16,7 +16,7 @@ PgfPGF *pgf_read(const char* fpath, PgfExn* err) pgf = new PgfPGF(fpath_n.c_str()); - if (pgf->db.get_root() == 0) { + if (DB::get_root() == 0) { std::ifstream in(fpath, std::ios::binary); if (in.fail()) { throw std::system_error(errno, std::generic_category()); @@ -44,8 +44,8 @@ PgfPGF *pgf_read(const char* fpath, PgfExn* err) } void PgfPGF::set_root() { - ref root = db.malloc(); + ref root = DB::malloc(); root->major_version = major_version; root->minor_version = minor_version; - db.set_root(root); + DB::set_root(root); } diff --git a/src/runtime/c/reader.cxx b/src/runtime/c/reader.cxx index 32e541056..6487d293d 100644 --- a/src/runtime/c/reader.cxx +++ b/src/runtime/c/reader.cxx @@ -82,7 +82,27 @@ uint64_t PgfReader::read_uint() return u; } -moffset PgfReader::read_name(size_t struct_size) +moffset PgfReader::read_name_internal(size_t struct_size) +{ + size_t size = read_len(); + moffset offs = current_db->malloc_internal(struct_size+sizeof(PgfText)+size+1); + PgfText* ptext = (PgfText*) (current_base+offs+struct_size); + ptext->size = size; + + // If reading the extra bytes causes EOF, it is an encoding + // error, not a legitimate end of character stream. + in->read(ptext->text, size); + if (in->eof()) + throw pgf_error("utf8 decoding error"); + if (in->fail()) + throw std::system_error(errno, std::generic_category()); + + ptext->text[size+1] = 0; + + return offs; +} + +moffset PgfReader::read_text_internal(size_t struct_size) { size_t len = read_len(); @@ -119,8 +139,8 @@ moffset PgfReader::read_name(size_t struct_size) size_t size = p-buf; *p++ = 0; - moffset offs = current_db->malloc(struct_size+size+1); - PgfText* ptext = (PgfText*) (current_base+offs+struct_size-sizeof(PgfText)); + moffset offs = current_db->malloc_internal(struct_size+sizeof(PgfText)+size+1); + PgfText* ptext = (PgfText*) (current_base+offs+struct_size); ptext->size = size; memcpy(ptext->text, buf, size+1); @@ -139,55 +159,259 @@ Namespace PgfReader::read_namespace(ref (PgfReader::*read_value)()) return nmsp; } +template +ref PgfReader::read_vector(PgfVector C::* field, void (PgfReader::*read_value)(ref val)) +{ + size_t len = read_len(); + ref loc = vector_new(field,len); + for (size_t i = 0; i < len; i++) { + (this->*read_value)(vector_elem(ref>::from_ptr(&(loc->*field)),i)); + } + return loc; +} + +template +ref> PgfReader::read_vector(void (PgfReader::*read_value)(ref val)) +{ + size_t len = read_len(); + ref> vec = vector_new(len); + for (size_t i = 0; i < len; i++) { + (this->*read_value)(vector_elem(vec,i)); + } + return vec; +} + PgfLiteral PgfReader::read_literal() { PgfLiteral lit = 0; uint8_t tag = read_tag(); switch (tag) { - case PGF_LITERAL_STR: { + case PgfLiteralStr::tag: { ref lit_str = - read_name(offsetof(PgfLiteralStr,val)); - lit = variant_close(lit_str,PGF_LITERAL_STR); + read_name(&PgfLiteralStr::val); + lit = ref::tagged(lit_str); break; } - case PGF_LITERAL_INT: { + case PgfLiteralInt::tag: { ref lit_int = - current_db->malloc(); + DB::malloc(tag); lit_int->val = read_int(); - lit = variant_close(lit_int,PGF_LITERAL_INT); + lit = ref::tagged(lit_int); break; } - case PGF_LITERAL_FLT: { + case PgfLiteralFlt::tag: { ref lit_flt = current_db->malloc(); lit_flt->val = read_double(); - lit = variant_close(lit_flt,PGF_LITERAL_FLT); + lit = ref::tagged(lit_flt); break; } default: - throw pgf_error("tag error"); + throw pgf_error("Unknown literal tag"); } return lit; } ref PgfReader::read_flag() { - ref flag = read_name(); + ref flag = read_name(&PgfFlag::name); flag->value = read_literal(); return flag; } +PgfExpr PgfReader::read_expr() +{ + PgfExpr expr = 0; + uint8_t tag = read_tag(); + + switch (tag) { + case PgfExprAbs::tag:{ + PgfBindType bind_type = (PgfBindType) read_tag(); + ref eabs = read_name(&PgfExprAbs::name); + eabs->bind_type = bind_type; + eabs->body = read_expr(); + expr = ref::tagged(eabs); + break; + } + case PgfExprApp::tag: { + ref eapp = DB::malloc(); + eapp->fun = read_expr(); + eapp->arg = read_expr(); + expr = ref::tagged(eapp); + break; + } + case PgfExprLit::tag: { + ref elit = DB::malloc(); + elit->lit = read_literal(); + expr = ref::tagged(elit); + break; + } + case PgfExprMeta::tag: { + ref emeta = DB::malloc(); + emeta->id = read_int(); + expr = ref::tagged(emeta); + break; + } + case PgfExprFun::tag: { + ref efun = read_name(&PgfExprFun::name); + expr = ref::tagged(efun); + break; + } + case PgfExprVar::tag: { + ref evar = DB::malloc(); + evar->var = read_int(); + expr = ref::tagged(evar); + break; + } + case PgfExprTyped::tag: { + ref etyped = DB::malloc(); + etyped->expr = read_expr(); + etyped->type = read_type(); + expr = ref::tagged(etyped); + break; + } + case PgfExprImplArg::tag: { + ref eimpl = current_db->malloc(); + eimpl->expr = read_expr(); + expr = ref::tagged(eimpl); + break; + } + default: + throw pgf_error("Unknown expression tag"); + } + + return 0; +} + +void PgfReader::read_hypo(ref hypo) +{ + hypo->bind_type = (PgfBindType) read_tag(); + hypo->cid = read_name(); + hypo->type = read_type(); +} + +ref PgfReader::read_type() +{ + ref> hypos = + read_vector(&PgfReader::read_hypo); + ref tp = read_name(&PgfType::name); + tp->hypos = hypos; + tp->exprs = + read_vector(&PgfReader::read_expr); + return tp; +} + +PgfPatt PgfReader::read_patt() +{ + PgfPatt patt = 0; + + uint8_t tag = read_tag(); + switch (tag) { + case PgfPattApp::tag: { + ref ctor = read_name(); + + ref papp = + read_vector(&PgfPattApp::args,&PgfReader::read_patt2); + papp->ctor = ctor; + patt = ref::tagged(papp); + break; + } + case PgfPattVar::tag: { + ref pvar = read_name(&PgfPattVar::name); + patt = ref::tagged(pvar); + break; + } + case PgfPattAs::tag: { + ref pas = read_name(&PgfPattAs::name); + pas->patt = read_patt(); + patt = ref::tagged(pas); + break; + } + case PgfPattWild::tag: { + ref pwild = DB::malloc(); + patt = ref::tagged(pwild); + break; + } + case PgfPattLit::tag: { + ref plit = DB::malloc(); + plit->lit = read_literal(); + patt = ref::tagged(plit); + break; + } + case PgfPattImplArg::tag: { + ref pimpl = DB::malloc(); + pimpl->patt = read_patt(); + patt = ref::tagged(pimpl); + break; + } + case PgfPattTilde::tag: { + ref ptilde = DB::malloc(); + ptilde->expr = read_expr(); + patt = ref::tagged(ptilde); + break; + } + default: + throw pgf_error("Unknown pattern tag"); + } + + return patt; +} + +void PgfReader::read_defn(ref> defn) +{ + ref eq = read_vector(&PgfEquation::patts,&PgfReader::read_patt2); + eq->body = read_expr(); + *defn = eq; +} + +ref PgfReader::read_absfun() +{ + ref absfun = + read_name(&PgfAbsFun::name); + ref efun = + ref::from_ptr((PgfExprFun*) &absfun->name); + absfun->ep.expr = ref::tagged(efun); + absfun->type = read_type(); + absfun->arity = read_int(); + + uint8_t tag = read_tag(); + switch (tag) { + case 0: + absfun->defns = 0; + break; + case 1: + absfun->defns = + read_vector>(&PgfReader::read_defn); + break; + default: + throw pgf_error("Unknown tag, 0 or 1 expected"); + } + absfun->ep.prob = - log(read_double()); + return absfun; +} + +ref PgfReader::read_abscat() +{ + ref abscat = read_name(&PgfAbsCat::name); + abscat->context = read_vector(&PgfReader::read_hypo); + abscat->prob = - log(read_double()); + return abscat; +} + void PgfReader::read_abstract(PgfAbstr* abstract) { - abstract->name = read_name(0); + abstract->name = read_name(); abstract->aflags = read_namespace(&PgfReader::read_flag); + abstract->funs = read_namespace(&PgfReader::read_absfun); + abstract->cats = read_namespace(&PgfReader::read_abscat); } void PgfReader::read_pgf(PgfPGFRoot *pgf) { pgf->major_version = read_u16be(); pgf->minor_version = read_u16be(); + pgf->gflags = read_namespace(&PgfReader::read_flag); read_abstract(&pgf->abstract); diff --git a/src/runtime/c/reader.h b/src/runtime/c/reader.h index def79dd64..ef9ba2e87 100644 --- a/src/runtime/c/reader.h +++ b/src/runtime/c/reader.h @@ -18,18 +18,53 @@ public: double read_double(); uint64_t read_uint(); int64_t read_int() { return (int64_t) read_uint(); }; - uint8_t read_tag() { return read_uint8(); } size_t read_len() { return (size_t) read_uint(); }; + uint8_t read_tag() { return read_uint8(); } + template - ref read_name() { return read_name(offsetof(V,name)); }; + ref read_name(PgfText V::* field) { + return read_name_internal((size_t) &(((V*) NULL)->*field)); + }; + + ref read_name() { + return read_name_internal(0); + }; + + template + ref read_text(PgfText V::* field) { + return read_text_internal((size_t) &(((V*) NULL)->*field)); + }; + + ref read_text() { + return read_text_internal(0); + }; template Namespace read_namespace(ref (PgfReader::*read_value)()); + template + ref read_vector(PgfVector C::* field, void (PgfReader::*read_value)(ref val)); + + template + ref> read_vector(void (PgfReader::*read_value)(ref val)); + PgfLiteral read_literal(); + PgfExpr read_expr(); + void read_expr(ref r) { *r = read_expr(); }; + + void read_hypo(ref hypo); + ref read_type(); + ref read_flag(); + PgfPatt read_patt(); + void read_patt2(ref r) { *r = read_patt(); }; + + void read_defn(ref> defn); + + ref read_absfun(); + ref read_abscat(); void read_abstract(PgfAbstr* abstract); void read_pgf(PgfPGFRoot* pgf); @@ -37,7 +72,8 @@ public: private: std::istream *in; - moffset read_name(size_t size); + moffset read_name_internal(size_t struct_size); + moffset read_text_internal(size_t struct_size); }; #endif diff --git a/src/runtime/c/variant.h b/src/runtime/c/variant.h deleted file mode 100644 index 4795fca48..000000000 --- a/src/runtime/c/variant.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef VARIANT_H_ -#define VARIANT_H_ - -typedef uintptr_t variant; - -template -variant variant_close(ref r, uint8_t tag) -{ - return (((moffset) r) | tag); -} - -#endif /* VARIANT_H_ */ diff --git a/src/runtime/c/vector.h b/src/runtime/c/vector.h new file mode 100644 index 000000000..f999edaaf --- /dev/null +++ b/src/runtime/c/vector.h @@ -0,0 +1,32 @@ +#ifndef VECTOR_H +#define VECTOR_H + +template +struct PgfVector { + size_t len; + A data[]; +}; + +template inline +ref> vector_new(size_t len) +{ + ref> res = DB::malloc>(sizeof(PgfVector)+len*sizeof(A)); + res->len = len; + return res; +} + +template inline +ref vector_new(PgfVector C::* field, size_t len) +{ + ref res = DB::malloc(((size_t) &(((C*) NULL)->*field))+sizeof(PgfVector)+len*sizeof(A)); + (res->*field).len = len; + return res; +} + +template inline +ref vector_elem(ref> v, size_t index) +{ + return ref::from_ptr(&v->data[index]); +} + +#endif // VECTOR_H