finished reading the abstract syntax

This commit is contained in:
krangelov
2021-08-05 12:37:12 +02:00
parent 17629e4821
commit 84789c9fbf
11 changed files with 559 additions and 86 deletions

View File

@@ -17,8 +17,7 @@ libpgf_la_SOURCES = \
reader.h \
data.h \
expr.h \
namespace.h \
variant.h
namespace.h
libpgf_la_LDFLAGS = "-no-undefined"

View File

@@ -4,6 +4,7 @@
#include <stdint.h>
#include <string.h>
#include <sys/types.h>
#include <assert.h>
#include <iostream>
#include <exception>
#include <stdexcept>
@@ -11,6 +12,7 @@
#include "pgf.h"
#include "db.h"
#include "text.h"
#include "vector.h"
#include "namespace.h"
#include "expr.h"
@@ -21,7 +23,7 @@ public:
this->msg = msg;
}
const char *what() const throw ()
virtual const char *what() const throw ()
{
return msg;
}
@@ -35,9 +37,76 @@ struct PGF_INTERNAL_DECL PgfFlag {
PgfText name;
};
// PgfPatt
typedef variant PgfPatt;
struct PgfPattApp {
static const uint8_t tag = 0;
ref<PgfText> ctor;
PgfVector<PgfPatt> args;
};
struct PgfPattVar {
static const uint8_t tag = 1;
PgfText name;
};
struct PgfPattAs {
static const uint8_t tag = 2;
PgfPatt patt;
PgfText name;
};
struct PgfPattWild {
static const uint8_t tag = 3;
};
struct PgfPattLit {
static const uint8_t tag = 4;
PgfLiteral lit;
};
struct PgfPattImplArg {
static const uint8_t tag = 5;
PgfPatt patt;
};
struct PgfPattTilde {
static const uint8_t tag = 6;
PgfExpr expr;
};
typedef struct {
ref<char> name;
PgfExpr body;
PgfVector<PgfPatt> patts;
} PgfEquation;
struct PGF_INTERNAL_DECL PgfAbsFun {
ref<PgfType> type;
int arity;
ref<PgfVector<ref<PgfEquation>>> defns;
PgfExprProb ep;
PgfText name;
};
typedef struct {
ref<PgfVector<PgfHypo>> context;
prob_t prob;
PgfText name;
} PgfAbsCat;
typedef struct {
ref<PgfText> name;
Namespace<PgfFlag> aflags;
Namespace<PgfAbsFun> funs;
Namespace<PgfAbsCat> cats;
} PgfAbstr;
struct PGF_INTERNAL_DECL PgfPGFRoot {

View File

@@ -6,8 +6,8 @@
#include "data.h"
thread_local unsigned char* current_base;
thread_local DB* current_db;
unsigned char* current_base;
DB* current_db;
#ifndef DEFAULT_TOP_PAD
#define DEFAULT_TOP_PAD (0)
@@ -267,7 +267,7 @@ struct malloc_state
/* Bitmap of bins */
unsigned int binmap[BINMAPSIZE];
/* Reference to the root object */
size_t root_offset;
moffset root_offset;
};
DB::DB(const char* pathname) {
@@ -309,12 +309,12 @@ DB::~DB() {
close(fd);
}
moffset DB::get_root_offset() {
moffset DB::get_root_internal() {
return ms->root_offset;
}
void DB::set_root_offset(moffset root) {
ms->root_offset = root;
void DB::set_root_internal(moffset root_offset) {
ms->root_offset = root_offset;
}
void
@@ -448,7 +448,7 @@ static void malloc_consolidate(malloc_state *ms)
}
moffset
DB::malloc(size_t bytes)
DB::malloc_internal(size_t bytes)
{
unsigned int idx; /* associated bin index */
mbin* bin; /* associated bin */
@@ -819,7 +819,7 @@ DB::malloc(size_t bytes)
}
void
DB::free(moffset o)
DB::free_internal(moffset o)
{
size_t size; /* its size */
moffset *fb; /* associated fastbin */

View File

@@ -3,53 +3,99 @@
class DB;
extern thread_local PGF_INTERNAL_DECL unsigned char* current_base;
extern thread_local PGF_INTERNAL_DECL DB* current_db;
extern PGF_INTERNAL_DECL unsigned char* current_base;
extern PGF_INTERNAL_DECL DB* current_db;
typedef size_t moffset;
typedef moffset variant;
struct malloc_state;
template<class A> class ref {
size_t offset;
private:
moffset offset;
friend class DB;
public:
ref<A>() { }
ref<A>(size_t o) { offset = o; }
inline A* operator->() const { return (A*) (current_base+offset); }
inline operator A*() const { return (A*) (current_base+offset); }
inline bool operator ==(ref<A>& other) const { return offset==other->offset; }
inline operator size_t() { return offset; }
ref<A>(moffset o) { offset = o; }
A* operator->() const { return (A*) (current_base+offset); }
operator A*() const { return (A*) (current_base+offset); }
bool operator ==(ref<A>& other) const { return offset==other->offset; }
bool operator ==(moffset other_offset) const { return offset==other_offset; }
ref<A>& operator= (const ref<A>& r) {
offset = r.offset;
return *this;
}
static
ref<A> from_ptr(A *ptr) { return (((uint8_t*) ptr) - current_base); }
static
variant tagged(ref<A> ref) {
assert(A::tag < 2*sizeof(size_t));
return (ref.offset | A::tag);
}
static
ref<A> untagged(variant v) {
return (v & ~(2*sizeof(size_t) - 1));
}
static
uint8_t get_tag(variant v) {
return (v & (2*sizeof(size_t) - 1));
}
static
ref<A> null() { return 0; }
};
class PGF_INTERNAL_DECL DB {
private:
int fd;
malloc_state* ms;
friend class PgfReader;
public:
DB(const char* pathname);
~DB();
template<class A> ref<A> malloc() {
return malloc(sizeof(A));
template<class A>
static ref<A> malloc() {
return current_db->malloc_internal(sizeof(A));
}
moffset malloc(size_t bytes);
template<class A> ref<A> get_root() {
return get_root_offset();
template<class A>
static ref<A> malloc(size_t bytes) {
return current_db->malloc_internal(bytes);
}
template<class A> void set_root(ref<A> root) {
set_root_offset(root);
template<class A>
static ref<A> get_root() {
return current_db->get_root_internal();
}
template<class A>
static void set_root(ref<A> root) {
current_db->set_root_internal(root.offset);
}
private:
void init_state(size_t size);
void free(moffset o);
moffset malloc_internal(size_t bytes);
void free_internal(moffset o);
moffset get_root_offset();
void set_root_offset(moffset root);
moffset get_root_internal();
void set_root_internal(moffset root_offset);
unsigned char* relocate(unsigned char* ptr);
};
#endif

View File

@@ -1,30 +1,109 @@
#ifndef EXPR_H_
#define EXPR_H_
#include "variant.h"
/// An abstract syntax tree
typedef variant PgfExpr;
// PgfLiteral
typedef variant PgfLiteral;
struct PgfHypo;
struct PgfType;
typedef int PgfMetaId;
typedef enum {
PGF_LITERAL_STR,
PGF_LITERAL_INT,
PGF_LITERAL_FLT,
PGF_LITERAL_NUM_TAGS
} PgfLiteralTag;
PGF_BIND_TYPE_EXPLICIT,
PGF_BIND_TYPE_IMPLICIT
} PgfBindType;
typedef struct {
char val[0]; // a flexible array that contains the value
} PgfLiteralStr;
/// A literal for an abstract syntax tree
typedef variant PgfLiteral;
struct PgfLiteralStr {
static const uint8_t tag = 0;
PgfText val;
} ;
struct PgfLiteralInt {
static const uint8_t tag = 1;
typedef struct {
int val;
} PgfLiteralInt;
} ;
struct PgfLiteralFlt {
static const uint8_t tag = 2;
double val;
};
struct PgfHypo {
PgfBindType bind_type;
ref<PgfText> cid;
ref<PgfType> type;
};
struct PgfType {
ref<PgfVector<PgfHypo>> hypos;
ref<PgfVector<PgfExpr>> exprs;
PgfText name;
};
struct PgfExprAbs {
static const uint8_t tag = 0;
PgfBindType bind_type;
PgfExpr body;
PgfText name;
};
struct PgfExprApp {
static const uint8_t tag = 1;
PgfExpr fun;
PgfExpr arg;
};
struct PgfExprLit {
static const uint8_t tag = 2;
PgfLiteral lit;
};
struct PgfExprMeta {
static const uint8_t tag = 3;
PgfMetaId id;
};
struct PgfExprFun {
static const uint8_t tag = 4;
PgfText name;
};
struct PgfExprVar {
static const uint8_t tag = 5;
int var;
};
struct PgfExprTyped {
static const uint8_t tag = 6;
PgfExpr expr;
ref<PgfType> type;
};
struct PgfExprImplArg {
static const uint8_t tag = 7;
PgfExpr expr;
};
typedef float prob_t;
typedef struct {
double val;
} PgfLiteralFlt;
prob_t prob;
PgfExpr expr;
} PgfExprProb;
#endif /* EXPR_H_ */

View File

@@ -182,13 +182,13 @@ Namespace<V> namespace_insert(Namespace<V> map, ref<V> value) {
return Node<V>::new_node(value);
int cmp = textcmp(value->name,map->value->name);
if (cmp < 0)
return Node<V>::balanceL(map->value,
namespace_insert(map->left, value),map->right);
else if (cmp > 0)
return Node<V>::balanceR(map->value,
map->left, namespace_insert(map->right, value));
else
if (cmp < 0) {
Namespace<V> left = namespace_insert(map->left, value);
return Node<V>::balanceL(map->value,left,map->right);
} else if (cmp > 0) {
Namespace<V> right = namespace_insert(map->right, value);
return Node<V>::balanceR(map->value, map->left, right);
} else
return Node<V>::new_node(value,map->left,map->right);
}

View File

@@ -16,7 +16,7 @@ PgfPGF *pgf_read(const char* fpath, PgfExn* err)
pgf = new PgfPGF(fpath_n.c_str());
if (pgf->db.get_root<PgfPGFRoot>() == 0) {
if (DB::get_root<PgfPGFRoot>() == 0) {
std::ifstream in(fpath, std::ios::binary);
if (in.fail()) {
throw std::system_error(errno, std::generic_category());
@@ -44,8 +44,8 @@ PgfPGF *pgf_read(const char* fpath, PgfExn* err)
}
void PgfPGF::set_root() {
ref<PgfPGFRoot> root = db.malloc<PgfPGFRoot>();
ref<PgfPGFRoot> root = DB::malloc<PgfPGFRoot>();
root->major_version = major_version;
root->minor_version = minor_version;
db.set_root(root);
DB::set_root(root);
}

View File

@@ -82,7 +82,27 @@ uint64_t PgfReader::read_uint()
return u;
}
moffset PgfReader::read_name(size_t struct_size)
moffset PgfReader::read_name_internal(size_t struct_size)
{
size_t size = read_len();
moffset offs = current_db->malloc_internal(struct_size+sizeof(PgfText)+size+1);
PgfText* ptext = (PgfText*) (current_base+offs+struct_size);
ptext->size = size;
// If reading the extra bytes causes EOF, it is an encoding
// error, not a legitimate end of character stream.
in->read(ptext->text, size);
if (in->eof())
throw pgf_error("utf8 decoding error");
if (in->fail())
throw std::system_error(errno, std::generic_category());
ptext->text[size+1] = 0;
return offs;
}
moffset PgfReader::read_text_internal(size_t struct_size)
{
size_t len = read_len();
@@ -119,8 +139,8 @@ moffset PgfReader::read_name(size_t struct_size)
size_t size = p-buf;
*p++ = 0;
moffset offs = current_db->malloc(struct_size+size+1);
PgfText* ptext = (PgfText*) (current_base+offs+struct_size-sizeof(PgfText));
moffset offs = current_db->malloc_internal(struct_size+sizeof(PgfText)+size+1);
PgfText* ptext = (PgfText*) (current_base+offs+struct_size);
ptext->size = size;
memcpy(ptext->text, buf, size+1);
@@ -139,55 +159,259 @@ Namespace<V> PgfReader::read_namespace(ref<V> (PgfReader::*read_value)())
return nmsp;
}
template <class C, class V>
ref<C> PgfReader::read_vector(PgfVector<V> C::* field, void (PgfReader::*read_value)(ref<V> val))
{
size_t len = read_len();
ref<C> loc = vector_new<C,V>(field,len);
for (size_t i = 0; i < len; i++) {
(this->*read_value)(vector_elem(ref<PgfVector<V>>::from_ptr(&(loc->*field)),i));
}
return loc;
}
template <class V>
ref<PgfVector<V>> PgfReader::read_vector(void (PgfReader::*read_value)(ref<V> val))
{
size_t len = read_len();
ref<PgfVector<V>> vec = vector_new<V>(len);
for (size_t i = 0; i < len; i++) {
(this->*read_value)(vector_elem(vec,i));
}
return vec;
}
PgfLiteral PgfReader::read_literal()
{
PgfLiteral lit = 0;
uint8_t tag = read_tag();
switch (tag) {
case PGF_LITERAL_STR: {
case PgfLiteralStr::tag: {
ref<PgfLiteralStr> lit_str =
read_name(offsetof(PgfLiteralStr,val));
lit = variant_close(lit_str,PGF_LITERAL_STR);
read_name<PgfLiteralStr>(&PgfLiteralStr::val);
lit = ref<PgfLiteralStr>::tagged(lit_str);
break;
}
case PGF_LITERAL_INT: {
case PgfLiteralInt::tag: {
ref<PgfLiteralInt> lit_int =
current_db->malloc<PgfLiteralInt>();
DB::malloc<PgfLiteralInt>(tag);
lit_int->val = read_int();
lit = variant_close(lit_int,PGF_LITERAL_INT);
lit = ref<PgfLiteralInt>::tagged(lit_int);
break;
}
case PGF_LITERAL_FLT: {
case PgfLiteralFlt::tag: {
ref<PgfLiteralFlt> lit_flt =
current_db->malloc<PgfLiteralFlt>();
lit_flt->val = read_double();
lit = variant_close(lit_flt,PGF_LITERAL_FLT);
lit = ref<PgfLiteralFlt>::tagged(lit_flt);
break;
}
default:
throw pgf_error("tag error");
throw pgf_error("Unknown literal tag");
}
return lit;
}
ref<PgfFlag> PgfReader::read_flag()
{
ref<PgfFlag> flag = read_name<PgfFlag>();
ref<PgfFlag> flag = read_name(&PgfFlag::name);
flag->value = read_literal();
return flag;
}
PgfExpr PgfReader::read_expr()
{
PgfExpr expr = 0;
uint8_t tag = read_tag();
switch (tag) {
case PgfExprAbs::tag:{
PgfBindType bind_type = (PgfBindType) read_tag();
ref<PgfExprAbs> eabs = read_name(&PgfExprAbs::name);
eabs->bind_type = bind_type;
eabs->body = read_expr();
expr = ref<PgfExprAbs>::tagged(eabs);
break;
}
case PgfExprApp::tag: {
ref<PgfExprApp> eapp = DB::malloc<PgfExprApp>();
eapp->fun = read_expr();
eapp->arg = read_expr();
expr = ref<PgfExprApp>::tagged(eapp);
break;
}
case PgfExprLit::tag: {
ref<PgfExprLit> elit = DB::malloc<PgfExprLit>();
elit->lit = read_literal();
expr = ref<PgfExprLit>::tagged(elit);
break;
}
case PgfExprMeta::tag: {
ref<PgfExprMeta> emeta = DB::malloc<PgfExprMeta>();
emeta->id = read_int();
expr = ref<PgfExprMeta>::tagged(emeta);
break;
}
case PgfExprFun::tag: {
ref<PgfExprFun> efun = read_name(&PgfExprFun::name);
expr = ref<PgfExprFun>::tagged(efun);
break;
}
case PgfExprVar::tag: {
ref<PgfExprVar> evar = DB::malloc<PgfExprVar>();
evar->var = read_int();
expr = ref<PgfExprVar>::tagged(evar);
break;
}
case PgfExprTyped::tag: {
ref<PgfExprTyped> etyped = DB::malloc<PgfExprTyped>();
etyped->expr = read_expr();
etyped->type = read_type();
expr = ref<PgfExprTyped>::tagged(etyped);
break;
}
case PgfExprImplArg::tag: {
ref<PgfExprImplArg> eimpl = current_db->malloc<PgfExprImplArg>();
eimpl->expr = read_expr();
expr = ref<PgfExprImplArg>::tagged(eimpl);
break;
}
default:
throw pgf_error("Unknown expression tag");
}
return 0;
}
void PgfReader::read_hypo(ref<PgfHypo> hypo)
{
hypo->bind_type = (PgfBindType) read_tag();
hypo->cid = read_name();
hypo->type = read_type();
}
ref<PgfType> PgfReader::read_type()
{
ref<PgfVector<PgfHypo>> hypos =
read_vector<PgfHypo>(&PgfReader::read_hypo);
ref<PgfType> tp = read_name<PgfType>(&PgfType::name);
tp->hypos = hypos;
tp->exprs =
read_vector<PgfExpr>(&PgfReader::read_expr);
return tp;
}
PgfPatt PgfReader::read_patt()
{
PgfPatt patt = 0;
uint8_t tag = read_tag();
switch (tag) {
case PgfPattApp::tag: {
ref<PgfText> ctor = read_name();
ref<PgfPattApp> papp =
read_vector<PgfPattApp,PgfPatt>(&PgfPattApp::args,&PgfReader::read_patt2);
papp->ctor = ctor;
patt = ref<PgfPattApp>::tagged(papp);
break;
}
case PgfPattVar::tag: {
ref<PgfPattVar> pvar = read_name<PgfPattVar>(&PgfPattVar::name);
patt = ref<PgfPattVar>::tagged(pvar);
break;
}
case PgfPattAs::tag: {
ref<PgfPattAs> pas = read_name<PgfPattAs>(&PgfPattAs::name);
pas->patt = read_patt();
patt = ref<PgfPattAs>::tagged(pas);
break;
}
case PgfPattWild::tag: {
ref<PgfPattWild> pwild = DB::malloc<PgfPattWild>();
patt = ref<PgfPattWild>::tagged(pwild);
break;
}
case PgfPattLit::tag: {
ref<PgfPattLit> plit = DB::malloc<PgfPattLit>();
plit->lit = read_literal();
patt = ref<PgfPattLit>::tagged(plit);
break;
}
case PgfPattImplArg::tag: {
ref<PgfPattImplArg> pimpl = DB::malloc<PgfPattImplArg>();
pimpl->patt = read_patt();
patt = ref<PgfPattImplArg>::tagged(pimpl);
break;
}
case PgfPattTilde::tag: {
ref<PgfPattTilde> ptilde = DB::malloc<PgfPattTilde>();
ptilde->expr = read_expr();
patt = ref<PgfPattTilde>::tagged(ptilde);
break;
}
default:
throw pgf_error("Unknown pattern tag");
}
return patt;
}
void PgfReader::read_defn(ref<ref<PgfEquation>> defn)
{
ref<PgfEquation> eq = read_vector(&PgfEquation::patts,&PgfReader::read_patt2);
eq->body = read_expr();
*defn = eq;
}
ref<PgfAbsFun> PgfReader::read_absfun()
{
ref<PgfAbsFun> absfun =
read_name<PgfAbsFun>(&PgfAbsFun::name);
ref<PgfExprFun> efun =
ref<PgfExprFun>::from_ptr((PgfExprFun*) &absfun->name);
absfun->ep.expr = ref<PgfExprFun>::tagged(efun);
absfun->type = read_type();
absfun->arity = read_int();
uint8_t tag = read_tag();
switch (tag) {
case 0:
absfun->defns = 0;
break;
case 1:
absfun->defns =
read_vector<ref<PgfEquation>>(&PgfReader::read_defn);
break;
default:
throw pgf_error("Unknown tag, 0 or 1 expected");
}
absfun->ep.prob = - log(read_double());
return absfun;
}
ref<PgfAbsCat> PgfReader::read_abscat()
{
ref<PgfAbsCat> abscat = read_name<PgfAbsCat>(&PgfAbsCat::name);
abscat->context = read_vector<PgfHypo>(&PgfReader::read_hypo);
abscat->prob = - log(read_double());
return abscat;
}
void PgfReader::read_abstract(PgfAbstr* abstract)
{
abstract->name = read_name(0);
abstract->name = read_name();
abstract->aflags = read_namespace<PgfFlag>(&PgfReader::read_flag);
abstract->funs = read_namespace<PgfAbsFun>(&PgfReader::read_absfun);
abstract->cats = read_namespace<PgfAbsCat>(&PgfReader::read_abscat);
}
void PgfReader::read_pgf(PgfPGFRoot *pgf)
{
pgf->major_version = read_u16be();
pgf->minor_version = read_u16be();
pgf->gflags = read_namespace<PgfFlag>(&PgfReader::read_flag);
read_abstract(&pgf->abstract);

View File

@@ -18,18 +18,53 @@ public:
double read_double();
uint64_t read_uint();
int64_t read_int() { return (int64_t) read_uint(); };
uint8_t read_tag() { return read_uint8(); }
size_t read_len() { return (size_t) read_uint(); };
uint8_t read_tag() { return read_uint8(); }
template<class V>
ref<V> read_name() { return read_name(offsetof(V,name)); };
ref<V> read_name(PgfText V::* field) {
return read_name_internal((size_t) &(((V*) NULL)->*field));
};
ref<PgfText> read_name() {
return read_name_internal(0);
};
template<class V>
ref<V> read_text(PgfText V::* field) {
return read_text_internal((size_t) &(((V*) NULL)->*field));
};
ref<PgfText> read_text() {
return read_text_internal(0);
};
template<class V>
Namespace<V> read_namespace(ref<V> (PgfReader::*read_value)());
template <class C, class V>
ref<C> read_vector(PgfVector<V> C::* field, void (PgfReader::*read_value)(ref<V> val));
template<class V>
ref<PgfVector<V>> read_vector(void (PgfReader::*read_value)(ref<V> val));
PgfLiteral read_literal();
PgfExpr read_expr();
void read_expr(ref<PgfExpr> r) { *r = read_expr(); };
void read_hypo(ref<PgfHypo> hypo);
ref<PgfType> read_type();
ref<PgfFlag> read_flag();
PgfPatt read_patt();
void read_patt2(ref<PgfPatt> r) { *r = read_patt(); };
void read_defn(ref<ref<PgfEquation>> defn);
ref<PgfAbsFun> read_absfun();
ref<PgfAbsCat> read_abscat();
void read_abstract(PgfAbstr* abstract);
void read_pgf(PgfPGFRoot* pgf);
@@ -37,7 +72,8 @@ public:
private:
std::istream *in;
moffset read_name(size_t size);
moffset read_name_internal(size_t struct_size);
moffset read_text_internal(size_t struct_size);
};
#endif

View File

@@ -1,12 +0,0 @@
#ifndef VARIANT_H_
#define VARIANT_H_
typedef uintptr_t variant;
template<class V>
variant variant_close(ref<V> r, uint8_t tag)
{
return (((moffset) r) | tag);
}
#endif /* VARIANT_H_ */

32
src/runtime/c/vector.h Normal file
View File

@@ -0,0 +1,32 @@
#ifndef VECTOR_H
#define VECTOR_H
template <class A>
struct PgfVector {
size_t len;
A data[];
};
template <class A> inline
ref<PgfVector<A>> vector_new(size_t len)
{
ref<PgfVector<A>> res = DB::malloc<PgfVector<A>>(sizeof(PgfVector<A>)+len*sizeof(A));
res->len = len;
return res;
}
template <class C, class A> inline
ref<C> vector_new(PgfVector<A> C::* field, size_t len)
{
ref<C> res = DB::malloc<C>(((size_t) &(((C*) NULL)->*field))+sizeof(PgfVector<A>)+len*sizeof(A));
(res->*field).len = len;
return res;
}
template <class A> inline
ref<A> vector_elem(ref<PgfVector<A>> v, size_t index)
{
return ref<A>::from_ptr(&v->data[index]);
}
#endif // VECTOR_H