move the C sources to the subfolder pgf again for backwards compatibility

2026-06-11 19:10:13 -06:00 · 2021-08-08 18:29:16 +02:00
parent f70e1b8772
commit 91f183ca6a
17 changed files with 16 additions and 17 deletions
--- a/src/runtime/c/pgf/data.h
+++ b/src/runtime/c/pgf/data.h
@@ -0,0 +1,131 @@
+#ifndef PGF_DATA_H_
+#define PGF_DATA_H_
+
+#include <stdint.h>
+#include <string.h>
+#include <sys/types.h>
+#include <assert.h>
+#include <iostream>
+#include <exception>
+#include <stdexcept>
+
+#include "pgf.h"
+#include "db.h"
+#include "text.h"
+#include "vector.h"
+#include "namespace.h"
+#include "expr.h"
+
+class PGF_INTERNAL_DECL pgf_error : public std::runtime_error {
+public:
+    pgf_error(const char *msg) : std::runtime_error(msg)
+    {
+        this->msg = msg;
+    }
+
+    virtual const char *what() const throw ()
+    {
+    	return msg;
+    }
+
+private:
+    const char *msg;
+};
+
+struct PGF_INTERNAL_DECL PgfFlag {
+    PgfLiteral value;
+    PgfText name;
+};
+
+// PgfPatt
+
+typedef variant PgfPatt;
+
+struct PgfPattApp {
+    static const uint8_t tag = 0;
+
+	ref<PgfText> ctor;
+    PgfVector<PgfPatt> args;
+};
+
+struct PgfPattVar {
+    static const uint8_t tag = 1;
+
+	PgfText name;
+};
+
+struct PgfPattAs {
+    static const uint8_t tag = 2;
+
+	PgfPatt patt;
+	PgfText name;
+};
+
+struct PgfPattWild {
+    static const uint8_t tag = 3;
+};
+
+struct PgfPattLit {
+    static const uint8_t tag = 4;
+
+	PgfLiteral lit;
+};
+
+struct PgfPattImplArg {
+    static const uint8_t tag = 5;
+
+	PgfPatt patt;
+};
+
+struct PgfPattTilde {
+    static const uint8_t tag = 6;
+
+	PgfExpr expr;
+};
+
+typedef struct {
+	PgfExpr body;
+	PgfVector<PgfPatt> patts;
+} PgfEquation;
+
+struct PGF_INTERNAL_DECL PgfAbsFun {
+    ref<PgfType> type;
+	int arity;
+    ref<PgfVector<ref<PgfEquation>>> defns;
+    PgfExprProb ep;
+    PgfText name;
+};
+
+typedef struct {
+	ref<PgfVector<PgfHypo>> context;
+	prob_t prob;
+    PgfText name;
+} PgfAbsCat;
+
+typedef struct {
+	ref<PgfText> name;
+    Namespace<PgfFlag> aflags;
+    Namespace<PgfAbsFun> funs;
+    Namespace<PgfAbsCat> cats;
+} PgfAbstr;
+
+struct PGF_INTERNAL_DECL PgfPGFRoot {
+	uint16_t major_version;
+	uint16_t minor_version;
+	Namespace<PgfFlag> gflags;
+	PgfAbstr abstract;
+	//PgfConcrs* concretes;
+};
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wattributes"
+
+struct PgfPGF : DB {
+    PGF_INTERNAL_DECL PgfPGF(const char* fpath, int flags, int mode)
+                         : DB(fpath, flags, mode) {};
+    PGF_INTERNAL_DECL ~PgfPGF() {};
+};
+
+#pragma GCC diagnostic pop
+
+#endif
--- a/src/runtime/c/pgf/db.cxx
+++ b/src/runtime/c/pgf/db.cxx
@@ -0,0 +1,977 @@
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <system_error>
+
+#include "data.h"
+
+PGF_INTERNAL __thread unsigned char* current_base __attribute__((tls_model("initial-exec"))) = NULL;
+PGF_INTERNAL __thread DB* current_db __attribute__((tls_model("initial-exec"))) = NULL;
+PGF_INTERNAL __thread DB_scope *last_db_scope __attribute__((tls_model("initial-exec"))) = NULL;
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+#define ptr(ms,o) ((mchunk*) (((char*) (ms)) + (o)))
+#define ofs(ms,p) (((char*) (p)) - ((char*) (ms)))
+
+struct mchunk {
+  size_t mchunk_prev_size;  /* Size of previous chunk (if free).  */
+  size_t mchunk_size;       /* Size in bytes, including overhead. */
+
+  moffset fd;               /* double links -- used only if free. */
+  moffset bk;
+
+  /* Only used for large blocks: pointer to next larger size.     */
+  moffset fd_nextsize;      /* double links -- used only if free. */
+  moffset bk_nextsize;
+};
+
+#define POOL_ALIGNMENT (2 * sizeof(size_t) < __alignof__ (long double) \
+                          ? __alignof__ (long double) : 2 * sizeof(size_t))
+
+/*
+   Bins
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as allocation request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+    To simplify use in double-linked lists, each bin header acts
+    as an mchunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a mchunk*.
+ */
+
+typedef struct mchunk mbin;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) \
+  (mbin*) (((char *) &((m)->bins[((i) - 1) * 2]))                              \
+           - offsetof (mchunk, fd))
+/* analog of ++bin */
+#define next_bin(b)  ((mbin*) ((char *) (b) + (sizeof(mchunk*) << 1)))
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/*
+   Indexing
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
+    a valid chunk size the small bins are bumped up one.
+ */
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH    POOL_ALIGNMENT
+#define SMALLBIN_CORRECTION (POOL_ALIGNMENT > 2 * sizeof(size_t))
+#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
+#define smallbin_index(sz) \
+  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+   + SMALLBIN_CORRECTION)
+#define largebin_index_32(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+#define largebin_index_32_big(sz)                                            \
+  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+// XXX It remains to be seen whether it is good to keep the widths of
+// XXX the buckets the same or whether it should be scaled by a factor
+// XXX of two as well.
+#define largebin_index_64(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+#define largebin_index(sz) \
+  (sizeof(size_t) == 8 ? largebin_index_64 (sz)                            \
+   : POOL_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
+   : largebin_index_32 (sz))
+
+
+/*
+   Unsorted chunks
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and pool_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+ */
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at (M, 1))
+
+/* conversion from malloc headers to user pointers, and back */
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*sizeof(size_t)))
+#define mem2chunk(mem) ((mchunk*)((char*)(mem) - 2*sizeof(size_t)))
+
+#define MIN_CHUNK_SIZE (offsetof(mchunk, fd_nextsize))
+
+#define MALLOC_ALIGN_MASK (2*sizeof(size_t) - 1)
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* pad request bytes into a usable size -- internal version */
+#define request2size(req)                                         \
+  (((req) + sizeof(size_t) + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + sizeof(size_t) + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/*
+   --------------- Physical chunk operations ---------------
+ */
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->mchunk_size & PREV_INUSE)
+
+/* Get size, ignoring use bits */
+#define chunksize(p) (p->mchunk_size & ~(PREV_INUSE))
+
+/* Size of the chunk below P.  Only valid if !prev_inuse (P).  */
+#define prev_size(p) ((p)->mchunk_prev_size)
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunk*) (((char *) (p)) + (s)))
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)                                     \
+  (((mchunk*) (((char *) (p)) + (s)))->mchunk_size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)                                 \
+  (((mchunk*) (((char *) (p)) + (s)))->mchunk_size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)                               \
+  (((mchunk*) (((char *) (p)) + (s)))->mchunk_size &= ~(PREV_INUSE))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->mchunk_size = (s))
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunk*) ((char *) (p) + (s)))->mchunk_prev_size = (s))
+
+/*
+   Binmap
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+ */
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+#define mark_bin(ms, i)    ((ms)->binmap[idx2block(i)] |= idx2bit (i))
+#define unmark_bin(ms, i)  ((ms)->binmap[idx2block(i)] &= ~(idx2bit (i)))
+#define get_binmap(ms, i)  ((ms)->binmap[idx2block(i)] & idx2bit (i))
+
+/*
+   Fastbins
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+ */
+
+#define DEFAULT_MXFAST    (64 * sizeof(size_t) / 4)
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) \
+  ((((unsigned int) (sz)) >> (sizeof(size_t) == 8 ? 4 : 3)) - 2)
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     (80 * sizeof(size_t) / 4)
+#define NFASTBINS    (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
+
+/*
+   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+   that triggers automatic consolidation of possibly-surrounding
+   fastbin chunks. This is a heuristic, so the exact value should not
+   matter too much. It is defined at half the default trim threshold as a
+   compromise heuristic to only attempt consolidation if it is likely
+   to lead to trimming. However, it is not dynamically tunable, since
+   consolidation reduces fragmentation surrounding large chunks even
+   if trimming is not used.
+ */
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+struct malloc_state
+{
+    /* Set if the fastbin chunks contain recently inserted free blocks.  */
+    bool have_fastchunks;
+    /* Fastbins */
+    moffset fastbins[NFASTBINS];
+    /* Base of the topmost chunk -- not otherwise kept in a bin */
+    moffset top;
+    /* The remainder from the most recent split of a small request */
+    moffset last_remainder;
+    /* Normal bins packed as described above */
+    moffset bins[NBINS * 2 - 2];
+    /* Bitmap of bins */
+    unsigned int binmap[BINMAPSIZE];
+    /* Reference to the root object */
+    moffset root_offset;
+};
+
+DB::DB(const char* pathname, int flags, int mode) {
+    size_t file_size;
+    bool is_new = false;
+
+    fd = -1;
+    ms = NULL;
+
+    if (pathname == NULL) {
+        file_size = getpagesize();
+        is_new    = true;
+    } else {
+        fd = open(pathname, flags, mode);
+        if (fd < 0)
+            throw std::system_error(errno, std::generic_category());
+
+        file_size = lseek(fd, 0, SEEK_END);
+        if (file_size == ((off_t) -1))
+            throw std::system_error(errno, std::generic_category());
+
+        is_new = false;
+        if (file_size == 0) {
+            file_size = getpagesize();
+            if (ftruncate(fd, file_size) < 0)
+                throw std::system_error(errno, std::generic_category());
+            is_new = true;
+        }
+    }
+
+    int mflags = (fd < 0) ? (MAP_PRIVATE | MAP_ANONYMOUS) : MAP_SHARED;
+    ms = (malloc_state*)
+        mmap(NULL, file_size, PROT_READ | PROT_WRITE, mflags, fd, 0);
+    if (ms == MAP_FAILED)
+        throw std::system_error(errno, std::generic_category());
+
+    if (is_new) {
+        init_state(file_size);
+    }
+
+    int res = pthread_rwlock_init(&rwlock, NULL);
+    if (res != 0) {
+        throw std::system_error(res, std::generic_category());
+    }
+}
+
+DB::~DB() {
+    if (ms != NULL) {
+        size_t size =
+            ms->top + size + sizeof(size_t);
+
+        munmap(ms,size);
+    }
+
+    if (fd >= 0)
+        close(fd);
+
+    pthread_rwlock_destroy(&rwlock);
+}
+
+void DB::sync()
+{
+    size_t size =
+        current_db->ms->top + size + sizeof(size_t);
+
+    int res = msync((void *) current_db->ms, size, MS_SYNC | MS_INVALIDATE);
+    if (res != 0)
+        throw std::system_error(errno, std::generic_category());
+}
+
+moffset DB::get_root_internal() {
+    return ms->root_offset;
+}
+
+void DB::set_root_internal(moffset root_offset) {
+    ms->root_offset = root_offset;
+}
+
+void
+DB::init_state(size_t size)
+{
+    /* Init fastbins */
+    ms->have_fastchunks = false;
+    for (int i = 0; i < NFASTBINS; ++i) {
+        ms->fastbins[i] = 0;
+    }
+
+    mchunk* top_chunk =
+        mem2chunk(((char*) ms) + sizeof(*ms) + sizeof(size_t));
+    ms->top = ofs(ms,top_chunk);
+    set_head(top_chunk, (size - sizeof(*ms)) | PREV_INUSE);
+
+    ms->last_remainder = 0;
+
+    /* Establish circular links for normal bins */
+    for (int i = 1; i < NBINS; ++i) {
+        mbin *bin = bin_at(ms, i);
+        bin->fd = bin->bk = ofs(ms,bin);
+    }
+
+    memset(ms->binmap, 0, sizeof(ms->binmap));
+
+    ms->root_offset = 0;
+}
+
+/* Take a chunk off a bin list.  */
+static void
+unlink_chunk (malloc_state* ms, mchunk* p)
+{
+    mchunk* fd = ptr(ms,p->fd);
+    mchunk* bk = ptr(ms,p->bk);
+    fd->bk = ofs(ms,bk);
+    bk->fd = ofs(ms,fd);
+    if (!in_smallbin_range(p->mchunk_size) && p->fd_nextsize != 0) {
+        if (fd->fd_nextsize == 0) {
+            if (p->fd_nextsize == ofs(ms,p))
+                fd->fd_nextsize = fd->bk_nextsize = ofs(ms,fd);
+            else {
+                fd->fd_nextsize = p->fd_nextsize;
+                fd->bk_nextsize = p->bk_nextsize;
+                ptr(ms,p->fd_nextsize)->bk_nextsize = ofs(ms,fd);
+                ptr(ms,p->bk_nextsize)->fd_nextsize = ofs(ms,fd);
+            }
+        } else {
+            ptr(ms,p->fd_nextsize)->bk_nextsize = p->bk_nextsize;
+            ptr(ms,p->bk_nextsize)->fd_nextsize = p->fd_nextsize;
+        }
+    }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+*/
+static void malloc_consolidate(malloc_state *ms)
+{
+  moffset* fb;                 /* current fastbin being consolidated */
+  moffset* maxfb;              /* last fastbin (for loop control) */
+  mchunk*  p;                  /* current chunk being consolidated */
+  mchunk*  nextp;              /* next chunk to consolidate */
+  mchunk*  unsorted_bin;       /* bin header */
+  mchunk*  first_unsorted;     /* chunk to link to */
+  /* These have same use as in free() */
+  mchunk*  nextchunk;
+  size_t   size;
+  size_t   nextsize;
+  size_t   prevsize;
+  int      nextinuse;
+
+  ms->have_fastchunks = false;
+  unsorted_bin = unsorted_chunks(ms);
+  /*
+    Remove each chunk from fast bin and consolidate it, placing it
+    then in unsorted bin. Among other reasons for doing this,
+    placing in unsorted bin avoids needing to calculate actual bins
+    until malloc is sure that chunks aren't immediately going to be
+    reused anyway.
+  */
+  maxfb = &ms->fastbins[NFASTBINS - 1];
+  fb    = &ms->fastbins[0];
+  do {
+    p   = ptr(ms,*fb);
+    *fb = 0;
+    if (p != NULL) {
+      do {
+        nextp = ptr(ms,p->fd);
+        /* Slightly streamlined version of consolidation code in free() */
+        size = chunksize(p);
+        nextchunk = chunk_at_offset(p, size);
+        nextsize = chunksize(nextchunk);
+        if (!prev_inuse(p)) {
+          prevsize = prev_size(p);
+          size += prevsize;
+          p = chunk_at_offset(p, -((long) prevsize));
+          unlink_chunk (ms, p);
+        }
+        if (nextchunk != ptr(ms,ms->top)) {
+          nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+          if (!nextinuse) {
+            size += nextsize;
+            unlink_chunk (ms, nextchunk);
+          } else
+            clear_inuse_bit_at_offset(nextchunk, 0);
+          first_unsorted = ptr(ms,unsorted_bin->fd);
+          unsorted_bin->fd = ofs(ms,p);
+          first_unsorted->bk = ofs(ms,p);
+          if (!in_smallbin_range(size)) {
+            p->fd_nextsize = 0;
+            p->bk_nextsize = 0;
+          }
+          set_head(p, size | PREV_INUSE);
+          p->bk = ofs(ms,unsorted_bin);
+          p->fd = ofs(ms,first_unsorted);
+          set_foot(p, size);
+        } else {
+          size += nextsize;
+          set_head(p, size | PREV_INUSE);
+          ms->top = ofs(ms,p);
+        }
+      } while ((p = nextp) != 0);
+    }
+  } while (fb++ != maxfb);
+}
+
+moffset
+DB::malloc_internal(size_t bytes)
+{
+    unsigned int idx;                 /* associated bin index */
+    mbin* bin;                        /* associated bin */
+    mchunk* victim;                   /* inspected/selected chunk */
+
+    mchunk* remainder;                /* remainder from a split */
+    unsigned long remainder_size;     /* its size */
+
+    /*
+        Convert request size to internal form by adding SIZE_SZ bytes
+        overhead plus possibly more to obtain necessary alignment and/or
+        to obtain a size of at least MINSIZE, the smallest allocatable
+        size. Also, checked_request2size traps (returning 0) request sizes
+        that are so large that they wrap around zero when padded and
+        aligned.
+    */
+    size_t nb = request2size(bytes);
+
+    if (nb <= DEFAULT_MXFAST) {
+        idx = fastbin_index(nb);
+
+        if (ms->fastbins[idx] != 0) {
+            victim = ptr(ms,ms->fastbins[idx]);
+            ms->fastbins[idx] = victim->fd;
+            return ofs(ms,chunk2mem(victim));
+        }
+    }
+
+    /*
+     If a small request, check regular bin.  Since these "smallbins"
+     hold one size each, no searching within bins is necessary.
+     (For a large request, we need to wait until unsorted chunks are
+     processed to find best fit. But for small ones, fits are exact
+     anyway, so we can check now, which is faster.)
+   */
+    if (in_smallbin_range (nb)) {
+        idx = smallbin_index (nb);
+        bin = bin_at (ms, idx);
+        if ((victim = ptr(ms,last(bin))) != bin)
+        {
+            moffset bck = victim->bk;
+            set_inuse_bit_at_offset (victim, nb);
+            bin->bk = bck;
+            ptr(ms,bck)->fd = ofs(ms,bin);
+            return ofs(ms,chunk2mem(victim));
+        }
+    } else {
+        /*
+           If this is a large request, consolidate fastbins before continuing.
+           While it might look excessive to kill all fastbins before
+           even seeing if there is space available, this avoids
+           fragmentation problems normally associated with fastbins.
+           Also, in practice, programs tend to have runs of either small or
+           large requests, but less often mixtures, so consolidation is not
+           invoked all that often in most programs. And the programs that
+           it is called frequently in otherwise tend to fragment.
+        */
+   
+        idx = largebin_index(nb);
+        if (ms->have_fastchunks)
+            malloc_consolidate(ms);
+    }
+
+    /*
+     Process recently freed or remaindered chunks, taking one only if
+     it is exact fit, or, if this a small request, the chunk is remainder from
+     the most recent non-exact fit.  Place other traversed chunks in
+     bins.  Note that this step is the only place in any routine where
+     chunks are placed in bins.
+     The outer loop here is needed because we might not realize until
+     near the end of malloc that we should have consolidated, so must
+     do so and retry. This happens at most once, and only when we would
+     otherwise need to expand memory to service a "small" request.
+   */
+    for (;;)
+    {
+        size_t size;
+        mchunk *fwd, *bck;
+
+        int iters = 0;
+        while ((victim = ptr(ms,unsorted_chunks(ms)->bk)) != unsorted_chunks(ms)) {
+            bck  = ptr(ms,victim->bk);
+            size = chunksize(victim);
+            mchunk *next = chunk_at_offset(victim, size);
+
+            /*
+             If a small request, try to use last remainder if it is the
+             only chunk in unsorted bin.  This helps promote locality for
+             runs of consecutive small requests. This is the only
+             exception to best-fit, and applies only when there is
+             no exact fit for a small chunk.
+           */
+
+            if (in_smallbin_range(nb) &&
+                bck == unsorted_chunks(ms) &&
+                victim == ptr(ms,ms->last_remainder) &&
+                (unsigned long) (size) > (unsigned long) (nb + MINSIZE)) {
+
+                /* split and reattach remainder */
+                remainder_size = size - nb;
+                remainder = chunk_at_offset(victim, nb);
+                ms->last_remainder =
+                  unsorted_chunks(ms)->bk =
+                    unsorted_chunks(ms)->fd = ofs(ms,remainder);
+                remainder->bk = remainder->fd = ofs(ms,unsorted_chunks(ms));
+                if (!in_smallbin_range(remainder_size)) {
+                    remainder->fd_nextsize = 0;
+                    remainder->bk_nextsize = 0;
+                }
+                set_head(victim, nb | PREV_INUSE);
+                set_head(remainder, remainder_size | PREV_INUSE);
+                set_foot(remainder, remainder_size);
+                return ofs(ms,chunk2mem(victim));
+            }
+
+            /* remove from unsorted list */
+            unsorted_chunks(ms)->bk = ofs(ms,bck);
+            bck->fd = ofs(ms,unsorted_chunks(ms));
+
+            /* Take now instead of binning if exact fit */
+            if (size == nb) {
+                set_inuse_bit_at_offset(victim, size);
+                return ofs(ms,chunk2mem(victim));
+            }
+
+            /* place chunk in bin */
+            size_t victim_index;
+            if (in_smallbin_range(size)) {
+                victim_index = smallbin_index(size);
+                bck = bin_at(ms, victim_index);
+                fwd = ptr(ms,bck->fd);
+            } else {
+                victim_index = largebin_index(size);
+                bck = bin_at(ms, victim_index);
+                fwd = ptr(ms,bck->fd);
+
+                /* maintain large bins in sorted order */
+                if (fwd != bck) {
+                    /* Or with inuse bit to speed comparisons */
+                    size |= PREV_INUSE;
+                    /* if smaller than smallest, bypass loop below */
+                    if ((unsigned long) (size) < (unsigned long) ptr(ms,bck->bk)->mchunk_size) {
+                        fwd = bck;
+                        bck = ptr(ms,bck->bk);
+                        victim->fd_nextsize = fwd->fd;
+                        victim->bk_nextsize = ptr(ms,fwd->fd)->bk_nextsize;
+                        ptr(ms,fwd->fd)->bk_nextsize = ptr(ms,victim->bk_nextsize)->fd_nextsize = ofs(ms,victim);
+                    } else {
+                        while ((unsigned long) size < fwd->mchunk_size) {
+                            fwd = ptr(ms,fwd->fd_nextsize);
+                        }
+                        if ((unsigned long) size == (unsigned long) fwd->mchunk_size)
+                            /* Always insert in the second position. */
+                            fwd = ptr(ms,fwd->fd);
+                        else {
+                            victim->fd_nextsize = ofs(ms,fwd);
+                            victim->bk_nextsize = fwd->bk_nextsize;
+                            fwd->bk_nextsize = ofs(ms,victim);
+                            ptr(ms,victim->bk_nextsize)->fd_nextsize = ofs(ms,victim);
+                        }
+                        bck = ptr(ms,fwd->bk);
+                    }
+                } else {
+                    victim->fd_nextsize = victim->bk_nextsize = ofs(ms,victim);
+                }
+            }
+
+            mark_bin(ms, victim_index);
+            victim->bk = ofs(ms,bck);
+            victim->fd = ofs(ms,fwd);
+            fwd->bk = ofs(ms,victim);
+            bck->fd = ofs(ms,victim);
+
+#define MAX_ITERS 10000
+            if (++iters >= MAX_ITERS)
+                break;
+        }
+
+        /*
+         If a large request, scan through the chunks of current bin in
+         sorted order to find smallest that fits.  Use the skip list for this.
+        */
+        if (!in_smallbin_range(nb)) {
+            bin = bin_at(ms, idx);
+
+            /* skip scan if empty or largest chunk is too small */
+            if ((victim = ptr(ms,first(bin))) != bin &&
+                (unsigned long) victim->mchunk_size >= (unsigned long) (nb)) {
+                size_t size;
+
+                victim = ptr(ms,victim->bk_nextsize);
+                while (((unsigned long) (size = chunksize(victim)) <
+                       (unsigned long) (nb)))
+                    victim = ptr(ms,victim->bk_nextsize);
+
+                /* Avoid removing the first entry for a size so that the skip
+                   list does not have to be rerouted.  */
+                if (victim != ptr(ms,last(bin)) &&
+                    victim->mchunk_size == ptr(ms,victim->fd)->mchunk_size)
+                  victim = ptr(ms,victim->fd);
+
+                remainder_size = size - nb;
+                unlink_chunk(ms, victim);
+
+                /* Exhaust */
+                if (remainder_size < MINSIZE) {
+                    set_inuse_bit_at_offset(victim, size);
+                } else {   /* Split */
+                    remainder = chunk_at_offset(victim, nb);
+
+                    /* We cannot assume the unsorted list is empty and therefore
+                       have to perform a complete insert here.  */
+                    bck = unsorted_chunks(ms);
+                    fwd = ptr(ms,bck->fd);
+                    remainder->bk = ofs(ms,bck);
+                    remainder->fd = ofs(ms,fwd);
+                    bck->fd = fwd->bk = ofs(ms,remainder);
+                    if (!in_smallbin_range(remainder_size)) {
+                        remainder->fd_nextsize = 0;
+                        remainder->bk_nextsize = 0;
+                    }
+                    set_head (victim, nb | PREV_INUSE);
+                    set_head (remainder, remainder_size | PREV_INUSE);
+                    set_foot (remainder, remainder_size);
+                }
+                return ofs(ms,chunk2mem(victim));
+            }
+        }
+
+        /*
+         Search for a chunk by scanning bins, starting with next largest
+         bin. This search is strictly by best-fit; i.e., the smallest
+         (with ties going to approximately the least recently used) chunk
+         that fits is selected.
+         The bitmap avoids needing to check that most blocks are nonempty.
+         The particular case of skipping all bins during warm-up phases
+         when no chunks have been returned yet is faster than it might look.
+        */
+
+        ++idx;
+        bin = bin_at(ms, idx);
+        unsigned int block = idx2block(idx);
+        unsigned int map = ms->binmap[block];
+        unsigned int bit = idx2bit(idx);
+
+        for (;;)
+        {
+            /* Skip rest of block if there are no more set bits in this block.  */
+            if (bit > map || bit == 0) {
+                do {
+                    if (++block >= BINMAPSIZE) /* out of bins */
+                        goto use_top;
+                } while ((map = ms->binmap[block]) == 0);
+                bin = bin_at(ms, (block << BINMAPSHIFT));
+                bit = 1;
+            }
+
+            /* Advance to bin with set bit. There must be one. */
+            while ((bit & map) == 0) {
+                bin = next_bin(bin);
+                bit <<= 1;
+            }
+            /* Inspect the bin. It is likely to be non-empty */
+            victim = ptr(ms,last(bin));
+            /*  If a false alarm (empty bin), clear the bit. */
+            if (victim == bin) {
+                ms->binmap[block] = map &= ~bit; /* Write through */
+                bin = next_bin(bin);
+                bit <<= 1;
+            } else {
+                size = chunksize(victim);
+                /*  We know the first chunk in this bin is big enough to use. */
+                remainder_size = size - nb;
+                /* unlink */
+                unlink_chunk (ms, victim);
+                /* Exhaust */
+                if (remainder_size < MINSIZE) {
+                    set_inuse_bit_at_offset(victim, size);
+                } else {   /* Split */
+                    remainder = chunk_at_offset(victim, nb);
+                    /* We cannot assume the unsorted list is empty and therefore
+                       have to perform a complete insert here.  */
+                    bck = unsorted_chunks(ms);
+                    fwd = ptr(ms,bck->fd);
+                    remainder->bk = ofs(ms,bck);
+                    remainder->fd = ofs(ms,fwd);
+                    bck->fd = fwd->bk = ofs(ms,remainder);
+
+                    /* advertise as last remainder */
+                    if (in_smallbin_range(nb))
+                        ms->last_remainder = ofs(ms,remainder);
+                    if (!in_smallbin_range(remainder_size)) {
+                        remainder->fd_nextsize = 0;
+                        remainder->bk_nextsize = 0;
+                    }
+                    set_head (victim, nb | PREV_INUSE);
+                    set_head (remainder, remainder_size | PREV_INUSE);
+                    set_foot (remainder, remainder_size);
+                }
+                return ofs(ms,chunk2mem(victim));
+            }
+        }
+
+    use_top:
+      /*
+         If large enough, split off the chunk bordering the end of memory
+         (held in ms->top). Note that this is in accord with the best-fit
+         search rule.  In effect, ms->top is treated as larger (and thus
+         less well fitting) than any other available chunk since it can
+         be extended to be as large as necessary (up to system
+         limitations).
+         We require that ms->top always exists (i.e., has size >=
+         MINSIZE) after initialization, so if it would otherwise be
+         exhausted by current request, it is replenished. (The main
+         reason for ensuring it exists is that we may need MINSIZE space
+         to put in fenceposts in sysmalloc.)
+       */
+        victim = ptr(ms,ms->top);
+        size = chunksize(victim);
+
+        if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE)) {
+            remainder_size = size - nb;
+            remainder = chunk_at_offset(victim, nb);
+            ms->top = ofs(ms,remainder);
+            set_head(victim, nb | PREV_INUSE);
+            set_head(remainder, remainder_size | PREV_INUSE);
+            return ofs(ms,chunk2mem(victim));
+        } else if (ms->have_fastchunks) {
+            malloc_consolidate (ms);
+            /* restore original bin index */
+            if (in_smallbin_range (nb))
+                idx = smallbin_index (nb);
+            else
+                idx = largebin_index (nb);
+        } else {  /* Otherwise, relay to handle system-dependent cases */
+            size_t page_size  = getpagesize();
+            size_t alloc_size =
+                ((nb + MINSIZE - size + page_size - 1) / page_size) * page_size;
+
+            size_t old_size =
+                ms->top + size + sizeof(size_t);
+            size_t new_size =
+                old_size + alloc_size;
+
+            if (fd >= 0) {
+                if (ftruncate(fd, new_size) < 0)
+                    throw std::system_error(errno, std::generic_category());
+            }
+
+            malloc_state* new_ms =
+                (malloc_state*) mremap(ms, old_size, new_size, MREMAP_MAYMOVE);
+            if (new_ms == MAP_FAILED)
+                throw std::system_error(errno, std::generic_category());
+
+            ms = new_ms;
+            current_base = (unsigned char*) ms;
+
+            victim = ptr(ms,ms->top);
+
+            size += alloc_size;
+
+            remainder_size = size - nb;
+            remainder = chunk_at_offset(victim, nb);
+            ms->top = ofs(ms,remainder);
+            set_head(victim, nb | PREV_INUSE);
+            set_head(remainder, remainder_size | PREV_INUSE);
+            return ofs(ms,chunk2mem(victim));
+        }
+    }
+}
+
+void
+DB::free_internal(moffset o)
+{
+    size_t size;                 /* its size */
+    moffset *fb;                 /* associated fastbin */
+    mchunk *nextchunk;           /* next contiguous chunk */
+    size_t nextsize;             /* its size */
+    int nextinuse;               /* true if nextchunk is used */
+    size_t prevsize;             /* size of previous contiguous chunk */
+    mchunk* bck;                 /* misc temp for linking */
+    mchunk* fwd;                 /* misc temp for linking */
+
+    mchunk* p = ptr(ms,o);
+    size = chunksize (p);
+
+
+    /*
+      If eligible, place chunk on a fastbin so it can be found
+      and used quickly in malloc.
+    */
+    if ((unsigned long)(size) <= (unsigned long)(DEFAULT_MXFAST)) {
+        ms->have_fastchunks = true;
+        unsigned int idx = fastbin_index(size);
+        fb = &ms->fastbins[idx];
+        /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
+        p->fd = *fb;
+        *fb = ofs(ms,p);
+    } else {     /* Consolidate other chunks as they arrive. */
+        nextchunk = chunk_at_offset(p, size);
+        nextsize = chunksize(nextchunk);
+        /* consolidate backward */
+        if (!prev_inuse(p)) {
+            prevsize = prev_size(p);
+            size += prevsize;
+            p = chunk_at_offset(p, -((long) prevsize));
+            unlink_chunk (ms, p);
+        }
+        if (nextchunk != ptr(ms,ms->top)) {
+            /* get and clear inuse bit */
+            nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+            /* consolidate forward */
+            if (!nextinuse) {
+                unlink_chunk (ms, nextchunk);
+                size += nextsize;
+            } else
+                clear_inuse_bit_at_offset(nextchunk, 0);
+            /*
+                Place the chunk in unsorted chunk list. Chunks are
+                not placed into regular bins until after they have
+                been given one chance to be used in malloc.
+            */
+            bck = unsorted_chunks(ms);
+            fwd = ptr(ms,bck->fd);
+            p->fd = ofs(ms,fwd);
+            p->bk = ofs(ms,bck);
+            if (!in_smallbin_range(size)) {
+                p->fd_nextsize = 0;
+                p->bk_nextsize = 0;
+            }
+            bck->fd = ofs(ms,p);
+            fwd->bk = ofs(ms,p);
+            set_head(p, size | PREV_INUSE);
+            set_foot(p, size);
+        } else {
+            /*
+                If the chunk borders the current high end of memory,
+                consolidate into top
+            */
+
+            size += nextsize;
+            set_head(p, size | PREV_INUSE);
+            ms->top = ofs(ms,p);
+        }
+
+        /*
+            If freeing a large space, consolidate possibly-surrounding
+            chunks. Then, if the total unused topmost memory exceeds trim
+            threshold, ask malloc_trim to reduce top.
+            Unless max_fast is 0, we don't know if there are fastbins
+            bordering top, so we cannot tell for sure whether threshold
+            has been reached unless fastbins are consolidated.  But we
+            don't want to consolidate on each free.  As a compromise,
+            consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+            is reached.
+        */
+        if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+            if (ms->have_fastchunks)
+                malloc_consolidate(ms);
+        }
+    }
+}
+
+
+DB_scope::DB_scope(DB *db, DB_scope_mode tp)
+{
+    int res =
+        (tp == READER_SCOPE) ? pthread_rwlock_rdlock(&db->rwlock)
+                             : pthread_rwlock_wrlock(&db->rwlock);
+    if (res != 0)
+        throw std::system_error(res, std::generic_category());
+
+    save_db       = current_db;
+    current_db    = db;
+    current_base  = (unsigned char*) current_db->ms;
+
+    next_scope    = last_db_scope;
+    last_db_scope = this;
+}
+
+DB_scope::~DB_scope()
+{
+    int res = pthread_rwlock_unlock(&current_db->rwlock);
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wterminate"
+    if (res != 0)
+        throw std::system_error(res, std::generic_category());
+#pragma GCC diagnostic pop
+
+    current_db    = save_db;
+    current_base  = current_db ? (unsigned char*) current_db->ms
+                               : NULL;
+
+    last_db_scope = next_scope;
+}
--- a/src/runtime/c/pgf/db.h
+++ b/src/runtime/c/pgf/db.h
@@ -0,0 +1,121 @@
+#ifndef DB_H
+#define DB_H
+
+class DB;
+
+extern PGF_INTERNAL_DECL __thread unsigned char* current_base __attribute__((tls_model("initial-exec")));
+extern PGF_INTERNAL_DECL __thread DB*            current_db   __attribute__((tls_model("initial-exec")));
+
+typedef size_t moffset;
+
+typedef moffset variant;
+
+struct malloc_state;
+
+template<class A> class ref {
+private:
+    moffset offset;
+
+    friend class DB;
+
+public:
+    ref<A>() { }
+    ref<A>(moffset o) { offset = o; }
+
+    A* operator->() const { return (A*) (current_base+offset); }
+    operator A*()   const { return (A*) (current_base+offset); }
+    bool operator ==(ref<A>& other) const { return offset==other->offset; }
+    bool operator ==(moffset other_offset) const { return offset==other_offset; }
+
+    ref<A>& operator= (const ref<A>& r) {
+        offset = r.offset;
+        return *this;
+    }
+
+    static
+    ref<A> from_ptr(A *ptr) { return (((uint8_t*) ptr) - current_base); }
+
+    static
+    variant tagged(ref<A> ref) {
+        assert(A::tag < 2*sizeof(size_t));
+        return (ref.offset | A::tag);
+    }
+
+    static
+    ref<A> untagged(variant v) {
+        return (v & ~(2*sizeof(size_t) - 1));
+    }
+
+    static
+    uint8_t get_tag(variant v) {
+        return (v & (2*sizeof(size_t) - 1));
+    }
+
+    static
+    ref<A> null() { return 0; }
+};
+
+class PGF_INTERNAL_DECL DB {
+private:
+    int fd;
+    malloc_state* ms;
+
+    pthread_rwlock_t rwlock;
+
+    friend class PgfReader;
+
+public:
+    DB(const char* pathname, int flags, int mode);
+    ~DB();
+
+    template<class A>
+    static ref<A> malloc() {
+        return current_db->malloc_internal(sizeof(A));
+    }
+
+    template<class A>
+    static ref<A> malloc(size_t bytes) {
+        return current_db->malloc_internal(bytes);
+    }
+
+    template<class A>
+    static ref<A> get_root() {
+        return current_db->get_root_internal();
+    }
+
+    template<class A>
+    static void set_root(ref<A> root) {
+        current_db->set_root_internal(root.offset);
+    }
+
+    static void sync();
+
+private:
+    void init_state(size_t size);
+
+    moffset malloc_internal(size_t bytes);
+    void free_internal(moffset o);
+
+    moffset get_root_internal();
+    void set_root_internal(moffset root_offset);
+
+    unsigned char* relocate(unsigned char* ptr);
+
+    friend class DB_scope;
+};
+
+enum DB_scope_mode {READER_SCOPE, WRITER_SCOPE};
+
+class PGF_INTERNAL_DECL DB_scope {
+public:
+    DB_scope(DB *db, DB_scope_mode type);
+    ~DB_scope();
+
+private:
+    DB* save_db;
+    DB_scope* next_scope;
+};
+
+extern PGF_INTERNAL_DECL thread_local DB_scope *last_db_scope;
+
+#endif
--- a/src/runtime/c/pgf/expr.h
+++ b/src/runtime/c/pgf/expr.h
@@ -0,0 +1,109 @@
+#ifndef EXPR_H_
+#define EXPR_H_
+
+/// An abstract syntax tree
+typedef variant PgfExpr;
+
+struct PgfHypo;
+struct PgfType;
+
+typedef int PgfMetaId;
+
+typedef enum {
+	PGF_BIND_TYPE_EXPLICIT,
+	PGF_BIND_TYPE_IMPLICIT
+} PgfBindType;
+
+/// A literal for an abstract syntax tree
+typedef variant PgfLiteral;
+
+struct PgfLiteralStr {
+    static const uint8_t tag = 0;
+
+	PgfText val;
+} ;
+
+struct PgfLiteralInt {
+    static const uint8_t tag = 1;
+
+	int val;
+} ;
+
+struct PgfLiteralFlt {
+    static const uint8_t tag = 2;
+
+	double val;
+};
+
+struct PgfHypo {
+	PgfBindType bind_type;
+	ref<PgfText> cid;
+	ref<PgfType> type;
+};
+
+struct PgfType {
+	ref<PgfVector<PgfHypo>> hypos;
+    ref<PgfVector<PgfExpr>> exprs;
+	PgfText name;
+};
+
+struct PgfExprAbs {
+    static const uint8_t tag = 0;
+    
+	PgfBindType bind_type;
+	PgfExpr body;
+	PgfText name;
+};
+
+struct PgfExprApp {
+    static const uint8_t tag = 1;
+
+	PgfExpr fun;
+	PgfExpr arg;
+};
+
+struct PgfExprLit {
+    static const uint8_t tag = 2;
+
+	PgfLiteral lit;
+};
+
+struct PgfExprMeta {
+    static const uint8_t tag = 3;
+
+	PgfMetaId id;
+};
+
+struct PgfExprFun {
+    static const uint8_t tag = 4;
+
+	PgfText name;
+};
+
+struct PgfExprVar {
+    static const uint8_t tag = 5;
+    
+	int var;
+};
+
+struct PgfExprTyped {
+    static const uint8_t tag = 6;
+
+	PgfExpr expr;
+	ref<PgfType> type;
+};
+
+struct PgfExprImplArg {
+    static const uint8_t tag = 7;
+
+	PgfExpr expr;
+};
+
+typedef float prob_t;
+
+typedef struct {
+	prob_t prob;
+	PgfExpr expr;
+} PgfExprProb;
+
+#endif /* EXPR_H_ */
--- a/src/runtime/c/pgf/namespace.h
+++ b/src/runtime/c/pgf/namespace.h
@@ -0,0 +1,251 @@
+#ifndef NAMESPACE_H
+#define NAMESPACE_H
+
+#include "db.h"
+
+template <class V>
+class Node;
+
+template <class V>
+using Namespace = ref<Node<V>>;
+
+template <class V>
+class Node {
+public:
+    size_t sz;
+    ref<V> value;
+    ref<Node> left;
+    ref<Node> right;
+
+    static
+    ref<Node> new_node(ref<V> value) {
+        ref<Node> node = current_db->malloc<Node>();
+        node->sz    = 1;
+        node->value = value;
+        node->left  = 0;
+        node->right = 0;
+        return node;
+    }
+
+    static
+    ref<Node> new_node(ref<V> value, ref<Node> left, ref<Node> right) {
+        ref<Node> node = current_db->malloc<Node>();
+        node->sz    = 1+namespace_size(left)+namespace_size(right);
+        node->value = value;
+        node->left  = left;
+        node->right = right;
+        return node;
+    }
+
+    static
+    ref<Node> balanceL(ref<V> value, ref<Node> left, ref<Node> right) {
+        if (right == 0) {
+            if (left == 0) {
+                return new_node(value);
+            } else {
+                if (left->left == 0) {
+                    if (left->right == 0) {
+                        return new_node(value,left,0);
+                    } else {
+                        return new_node(left->right->value,
+                                        new_node(left->value),
+                                        new_node(value));
+                    }
+                } else {
+                    if (left->right == 0) {
+                        return new_node(left->value,
+                                        left->left,
+                                        new_node(value));
+                    } else {
+                        if (left->right->sz < 2 * left->left->sz) {
+                            return new_node(left->value,
+                                            left->left,
+                                            new_node(value,
+                                                     left->right,
+                                                     0));
+                        } else {
+                            return new_node(left->right->value,
+                                            new_node(left->value,
+                                                     left->left,
+                                                     left->right->left),
+                                            new_node(value,
+                                                     left->right->right,
+                                                     0));
+                        }
+                    }
+                }
+            }
+        } else {
+            if (left == 0) {
+                return new_node(value,0,right);
+            } else {
+                if (left->sz > 3*right->sz) {
+                    if (left->right->sz < 2*left->left->sz)
+                        return new_node(left->value,
+                                        left->left,
+                                        new_node(value,
+                                                 left->right,
+                                                 right));
+                    else
+                        return new_node(left->right->value,
+                                        new_node(left->value,
+                                                 left->left,
+                                                 left->right->left),
+                                        new_node(value,
+                                                 left->right->right,
+                                                 right));
+                } else {
+                    return new_node(value,left,right);
+                }
+            }
+        }
+    }
+
+    static
+    ref<Node> balanceR(ref<V> value, ref<Node> left, ref<Node> right) {
+        if (left == 0) {
+            if (right == 0) {
+                return new_node(value);
+            } else {
+                if (right->left == 0) {
+                    if (right->right == 0) {
+                        return new_node(value,0,right);
+                    } else {
+                        Namespace<V> new_left =
+                            new_node(value);
+                        return new_node(right->value,
+                                        new_left,
+                                        right->right);
+                    }
+                } else {
+                    if (right->right == 0) {
+                        Namespace<V> new_left =
+                            new_node(value);
+                        Namespace<V> new_right =
+                            new_node(right->value);
+                        return new_node(right->left->value,
+                                        new_left,
+                                        new_right);
+                    } else {
+                        if (right->left->sz < 2 * right->right->sz) {
+                            Namespace<V> new_left =
+                                new_node(value,
+                                         0,
+                                         right->left);
+                            return new_node(right->value,
+                                            new_left,
+                                            right->right);
+                        } else {
+                            Namespace<V> new_left =
+                                new_node(value,
+                                         0,
+                                         right->left->left);
+                            Namespace<V> new_right =
+                                new_node(right->value,
+                                         right->left->right,
+                                         right->right);
+                            return new_node(right->left->value,
+                                            new_left,
+                                            new_right);
+                        }
+                    }
+                }
+            }
+        } else {
+            if (right == 0) {
+                return new_node(value,left,0);
+            } else {
+                if (right->sz > 3*left->sz) {
+                    if (right->left->sz < 2*right->right->sz) {
+                        Namespace<V> new_left =
+                            new_node(value,
+                                     left,
+                                     right->left);
+                        return new_node(right->value,
+                                        new_left,
+                                        right->right);
+                    } else {
+                        Namespace<V> new_left =
+                            new_node(value,
+                                     left,
+                                     right->left->left);
+                        Namespace<V> new_right =
+                            new_node(right->value,
+                                     right->left->right,
+                                     right->right);
+                        return new_node(right->left->value,
+                                        new_left,
+                                        new_right
+                                        );
+                    }
+                } else {
+                    return new_node(value,left,right);
+                }
+            }
+        }
+    }
+};
+
+template <class V>
+Namespace<V> namespace_empty()
+{
+    return 0;
+}
+
+template <class V>
+Namespace<V> namespace_singleton(ref<V> value)
+{
+    return Node<V>::new_node(value);
+}
+
+template <class V>
+Namespace<V> namespace_insert(Namespace<V> map, ref<V> value)
+{
+    if (map == 0)
+        return Node<V>::new_node(value);
+
+    int cmp = textcmp(&value->name,&map->value->name);
+    if (cmp < 0) {
+        Namespace<V> left = namespace_insert(map->left, value);
+        return Node<V>::balanceL(map->value,left,map->right);
+    } else if (cmp > 0) {
+        Namespace<V> right = namespace_insert(map->right, value);
+        return Node<V>::balanceR(map->value, map->left, right);
+    } else
+        return Node<V>::new_node(value,map->left,map->right);
+}
+
+template <class V>
+ref<V> namespace_lookup(Namespace<V> map, const char *name)
+{
+    while (map != 0) {
+        int cmp = strcmp(name,map->value->name);
+        if (cmp < 0)
+            map = map->left;
+        else if (cmp > 0)
+            map = map->right;
+        else
+            return map->value;
+    }
+    return NULL;
+}
+
+template <class V>
+size_t namespace_size(Namespace<V> map)
+{
+    if (map == 0)
+        return 0;
+    return map->sz;
+}
+
+template <class V>
+void namespace_iter(Namespace<V> map, PgfItor* itor)
+{
+    if (map == 0)
+        return;
+
+    namespace_iter(map->left, itor);
+    itor->fn(itor, &map->value->name, &(*map->value));
+    namespace_iter(map->right, itor);
+}
+#endif
--- a/src/runtime/c/pgf/pgf.cxx
+++ b/src/runtime/c/pgf/pgf.cxx
@@ -0,0 +1,197 @@
+#include <fcntl.h>
+#include "data.h"
+#include "reader.h"
+
+static void
+pgf_exn_clear(PgfExn* err)
+{
+    err->type = PGF_EXN_NONE;
+    err->code = 0;
+    err->msg  = NULL;
+}
+
+PGF_API
+PgfPGF *pgf_read_pgf(const char* fpath, PgfExn* err)
+{
+    PgfPGF *pgf = NULL;
+
+    pgf_exn_clear(err);
+
+    try {
+        pgf = new PgfPGF(NULL, 0, 0);
+
+        std::ifstream in(fpath, std::ios::binary);
+        if (in.fail()) {
+            throw std::system_error(errno, std::generic_category());
+        }
+
+        {
+            DB_scope scope(pgf, WRITER_SCOPE);
+
+            PgfReader rdr(&in);
+            ref<PgfPGFRoot> pgf_root = rdr.read_pgf();
+
+            pgf->set_root(pgf_root);
+        }
+
+        return pgf;
+    } catch (std::system_error& e) {
+        err->type = PGF_EXN_SYSTEM_ERROR;
+        err->code = e.code().value();
+    } catch (pgf_error& e) {
+        err->type = PGF_EXN_PGF_ERROR;
+        err->msg  = strdup(e.what());
+    }
+
+    if (pgf != NULL)
+        delete pgf;
+
+    return NULL;
+}
+
+PGF_API
+PgfPGF *pgf_boot_ngf(const char* pgf_path, const char* ngf_path, PgfExn* err)
+{
+    PgfPGF *pgf = NULL;
+
+    pgf_exn_clear(err);
+
+    try {
+        pgf = new PgfPGF(ngf_path, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
+
+        std::ifstream in(pgf_path, std::ios::binary);
+        if (in.fail()) {
+            throw std::system_error(errno, std::generic_category());
+        }
+
+        {
+            DB_scope scope(pgf, WRITER_SCOPE);
+
+            PgfReader rdr(&in);
+            ref<PgfPGFRoot> pgf_root = rdr.read_pgf();
+
+            pgf->set_root(pgf_root);
+
+            DB::sync();
+        }
+
+        return pgf;
+    } catch (std::system_error& e) {
+        err->type = PGF_EXN_SYSTEM_ERROR;
+        err->code = e.code().value();
+    } catch (pgf_error& e) {
+        err->type = PGF_EXN_PGF_ERROR;
+        err->msg  = strdup(e.what());
+    }
+
+    if (pgf != NULL) {
+        delete pgf;
+        remove(ngf_path);
+    }
+
+    return NULL;
+}
+
+PGF_API
+PgfPGF *pgf_read_ngf(const char *fpath, PgfExn* err)
+{
+    PgfPGF *pgf = NULL;
+
+    pgf_exn_clear(err);
+
+    bool is_new = false;
+    try {
+        pgf = new PgfPGF(fpath, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
+
+        {
+            DB_scope scope(pgf, WRITER_SCOPE);
+
+            if (DB::get_root<PgfPGFRoot>() == 0) {
+                is_new = true;
+                ref<PgfPGFRoot> root = DB::malloc<PgfPGFRoot>();
+                root->major_version = 2;
+                root->minor_version = 0;
+                root->gflags = 0;
+                root->abstract.name = DB::malloc<PgfText>();
+                root->abstract.name->size = 0;
+                root->abstract.aflags = 0;
+                root->abstract.funs = 0;
+                root->abstract.cats = 0;
+                DB::set_root<PgfPGFRoot>(root);
+            }
+        }
+
+        return pgf;
+    } catch (std::system_error& e) {
+        err->type = PGF_EXN_SYSTEM_ERROR;
+        err->code = e.code().value();
+    } catch (pgf_error& e) {
+        err->type = PGF_EXN_PGF_ERROR;
+        err->msg  = strdup(e.what());
+    }
+
+    if (pgf != NULL) {
+        delete pgf;
+        if (is_new)
+            remove(fpath);
+    }
+
+    return NULL;
+}
+
+PGF_API
+void pgf_free(PgfPGF *pgf)
+{
+    delete pgf;
+}
+
+PGF_API
+PgfText *pgf_abstract_name(PgfPGF* pgf)
+{
+    DB_scope scope(pgf, READER_SCOPE);
+
+    return textdup(&(*pgf->get_root<PgfPGFRoot>()->abstract.name));
+}
+
+PGF_API
+void pgf_iter_categories(PgfPGF* pgf, PgfItor* itor)
+{
+    DB_scope scope(pgf, READER_SCOPE);
+
+    namespace_iter(pgf->get_root<PgfPGFRoot>()->abstract.cats, itor);
+}
+
+PGF_API
+void pgf_iter_functions(PgfPGF* pgf, PgfItor* itor)
+{
+    DB_scope scope(pgf, READER_SCOPE);
+
+    namespace_iter(pgf->get_root<PgfPGFRoot>()->abstract.funs, itor);
+}
+
+struct PgfItorHelper : PgfItor
+{
+    PgfText *cat;
+    PgfItor *itor;
+};
+
+static
+void iter_by_cat_helper(PgfItor* itor, PgfText* key, void* value)
+{
+    PgfItorHelper* helper = (PgfItorHelper*) itor;
+    PgfAbsFun* absfun = (PgfAbsFun*) value;
+    if (textcmp(helper->cat, &absfun->type->name) == 0)
+        helper->itor->fn(helper->itor, key, value);
+}
+
+PGF_API
+void pgf_iter_functions_by_cat(PgfPGF* pgf, PgfText* cat, PgfItor* itor)
+{
+    DB_scope scope(pgf, READER_SCOPE);
+
+    PgfItorHelper helper;
+    helper.fn   = iter_by_cat_helper;
+    helper.cat  = cat;
+    helper.itor = itor;
+    namespace_iter(pgf->get_root<PgfPGFRoot>()->abstract.funs, &helper);
+}
--- a/src/runtime/c/pgf/pgf.h
+++ b/src/runtime/c/pgf/pgf.h
@@ -0,0 +1,117 @@
+#ifndef PGF_H_
+#define PGF_H_
+
+#ifdef __cplusplus
+#define EXTERN_C extern "C"
+#else
+#define EXTERN_C
+#endif
+
+#if defined(_MSC_VER)
+
+#if defined(COMPILING_PGF)
+#define PGF_API_DECL __declspec(dllexport) EXTERN_C
+#define PGF_API      __declspec(dllexport) EXTERN_C
+#else
+#define PGF_API_DECL __declspec(dllimport)
+#define PGF_API      ERROR_NOT_COMPILING_LIBPGF
+#endif
+#define PGF_INTERNAL_DECL
+#define PGF_INTERNAL
+
+#elif defined(__MINGW32__)
+
+#define PGF_API_DECL EXTERN_C
+#define PGF_API      EXTERN_C
+
+#define PGF_INTERNAL_DECL
+#define PGF_INTERNAL
+
+#else
+
+#define PGF_API_DECL EXTERN_C
+#define PGF_API      EXTERN_C
+
+#define PGF_INTERNAL_DECL  __attribute__ ((visibility ("hidden")))
+#define PGF_INTERNAL       __attribute__ ((visibility ("hidden")))
+
+#endif
+
+/* A generic structure to store text. The last field is variable length */
+typedef struct {
+    size_t size;
+    char text[];
+} PgfText;
+
+/* A generic structure to pass a callback for iteration over a collection */
+typedef struct PgfItor PgfItor;
+
+struct PgfItor {
+	void (*fn)(PgfItor* self, PgfText* key, void *value);
+};
+
+typedef struct PgfPGF PgfPGF;
+
+/* All functions that may fail take a reference to a PgfExn structure.
+ * It is used as follows:
+ * 
+ * - If everything went fine, the field type will be equal to
+ * PGF_EXN_NONE and all other fields will be zeroed.
+ *
+ * - If the exception was caused by external factors such as an error
+ * from a system call, then type will be PGF_EXN_SYSTEM_ERROR and
+ * the field code will contain the value of errno from the C runtime.
+ *
+ * - If the exception was caused by factors related to the GF runtime
+ * itself, then the error type is PGF_EXN_PGF_ERROR, and the field
+ * msg will contain a newly allocated string which must be freed from
+ * the caller.
+ */
+
+typedef enum {
+    PGF_EXN_NONE,
+    PGF_EXN_SYSTEM_ERROR,
+    PGF_EXN_PGF_ERROR
+}  PgfExnType;
+
+typedef struct {
+    PgfExnType type;
+    int code;
+    const char *msg;
+} PgfExn;
+
+/* Reads a PGF file and keeps it in memory. */
+PGF_API_DECL
+PgfPGF *pgf_read_pgf(const char* fpath, PgfExn* err);
+
+/* Reads a PGF file and stores the unpacked data in an NGF file
+ * ready to be shared with other process, or used for quick startup.
+ * The NGF file is platform dependent and should not be copied
+ * between machines. */
+PGF_API_DECL
+PgfPGF *pgf_boot_ngf(const char* pgf_path, const char* ngf_path, PgfExn* err);
+
+/* Tries to read the grammar from an already booted NGF file.
+ * If the file does not exist then a new one is created, and the
+ * grammar is set to be empty. It can later be populated with
+ * rules dynamically. */
+PGF_API_DECL
+PgfPGF *pgf_read_ngf(const char* fpath, PgfExn* err);
+
+/* Release the grammar when it is no longer needed. */
+PGF_API_DECL
+void pgf_free(PgfPGF *pgf);
+
+PGF_API_DECL
+PgfText *pgf_abstract_name(PgfPGF* pgf);
+
+PGF_API_DECL
+void pgf_iter_categories(PgfPGF* pgf, PgfItor* itor);
+
+PGF_API_DECL
+void pgf_iter_functions(PgfPGF* pgf, PgfItor* itor);
+
+PGF_API
+void pgf_iter_functions_by_cat(PgfPGF* pgf, PgfText* cat, PgfItor* itor);
+
+#endif // PGF_H_
--- a/src/runtime/c/pgf/reader.cxx
+++ b/src/runtime/c/pgf/reader.cxx
@@ -0,0 +1,430 @@
+#include "data.h"
+#include "reader.h"
+#include <math.h>
+#include <string.h>
+
+PgfReader::PgfReader(std::istream *in)
+{
+    this->in = in;
+}
+
+uint8_t PgfReader::read_uint8()
+{
+    uint8_t b;
+    in->read((char*) &b, sizeof(b));
+    if (in->eof())
+        throw pgf_error("reached end of file while reading a grammar");
+    if (in->fail())
+        throw std::system_error(errno, std::generic_category());
+
+	return b;
+}
+
+uint16_t PgfReader::read_u16be()
+{
+    uint8_t buf[2];
+    in->read((char*) &buf, sizeof(buf));
+    if (in->eof())
+        throw pgf_error("reached end of file while reading a grammar");
+    if (in->fail())
+        throw std::system_error(errno, std::generic_category());
+
+	return (((uint16_t) buf[0]) << 8 | buf[1]);
+}
+
+uint64_t PgfReader::read_u64be()
+{
+    uint8_t buf[8];
+    in->read((char*) &buf, sizeof(buf));
+    if (in->eof())
+        throw pgf_error("reached end of file while reading a grammar");
+    if (in->fail())
+        throw std::system_error(errno, std::generic_category());
+
+	return (((uint64_t) buf[0]) << 56 |
+            ((uint64_t) buf[1]) << 48 |
+            ((uint64_t) buf[2]) << 40 |
+            ((uint64_t) buf[3]) << 32 |
+            ((uint64_t) buf[4]) << 24 |
+            ((uint64_t) buf[5]) << 16 |
+            ((uint64_t) buf[6]) << 8  |
+            ((uint64_t) buf[7]));
+}
+
+double PgfReader::read_double()
+{
+    uint64_t u = read_u64be();
+
+    bool sign = u >> 63;
+	unsigned rawexp = u >> 52 & 0x7ff;
+	uint64_t mantissa = u & 0xfffffffffffff;
+	double ret;
+
+	if (rawexp == 0x7ff) {
+		ret = (mantissa == 0) ? INFINITY : NAN;
+	} else {
+		uint64_t m = rawexp ? 1ULL << 52 | mantissa : mantissa << 1;
+		ret = ldexp((double) m, rawexp - 1075);
+	}
+	return sign ? copysign(ret, -1.0) : ret;
+}
+
+uint64_t PgfReader::read_uint()
+{
+	uint64_t u = 0;
+	int shift = 0;
+	uint8_t b = 0;
+	do {
+		b = read_uint8();
+		u |= (b & ~0x80) << shift;
+		shift += 7;
+	} while (b & 0x80);
+	return u;
+}
+
+moffset PgfReader::read_name_internal(size_t struct_size)
+{
+    size_t size = read_len();
+	moffset offs = current_db->malloc_internal(struct_size+sizeof(PgfText)+size+1);
+    PgfText* ptext = (PgfText*) (current_base+offs+struct_size);
+    ptext->size = size;
+
+    // If reading the extra bytes causes EOF, it is an encoding
+    // error, not a legitimate end of character stream.
+    in->read(ptext->text, size);
+    if (in->eof())
+        throw pgf_error("utf8 decoding error");
+    if (in->fail())
+        throw std::system_error(errno, std::generic_category());
+
+    ptext->text[size+1] = 0;
+
+	return offs;
+}
+
+moffset PgfReader::read_text_internal(size_t struct_size)
+{
+    size_t len  = read_len();
+
+    char* buf = (char*) alloca(len*6+1);
+	char* p   = buf;
+	for (size_t i = 0; i < len; i++) {
+        uint8_t c = read_uint8();
+        *(p++) = (char) c;
+
+        if (c < 0x80) {
+            continue;
+        }
+        if (c < 0xc2) {
+            throw pgf_error("utf8 decoding error");
+        }
+
+        int len = (c < 0xe0 ? 1 :
+                   c < 0xf0 ? 2 :
+                   c < 0xf8 ? 3 :
+                   c < 0xfc ? 4 :
+                              5
+                   );
+        // If reading the extra bytes causes EOF, it is an encoding
+        // error, not a legitimate end of character stream.
+        in->read(p, len);
+        if (in->eof())
+            throw pgf_error("utf8 decoding error");
+        if (in->fail())
+            throw std::system_error(errno, std::generic_category());
+
+        p += len;
+	}
+
+    size_t size = p-buf;
+	*p++ = 0;
+
+	moffset offs = current_db->malloc_internal(struct_size+sizeof(PgfText)+size+1);
+    PgfText* ptext = (PgfText*) (current_base+offs+struct_size);
+    ptext->size = size;
+	memcpy(ptext->text, buf, size+1);
+
+	return offs;
+}
+
+template<class V>
+Namespace<V> PgfReader::read_namespace(ref<V> (PgfReader::*read_value)())
+{
+    size_t len = read_len();
+    Namespace<V> nmsp = 0;
+    for (size_t i = 0; i < len; i++) {
+        ref<V> value = (this->*read_value)();
+        nmsp = namespace_insert(nmsp, value);
+    }
+    return nmsp;
+}
+
+template <class C, class V>
+ref<C> PgfReader::read_vector(PgfVector<V> C::* field, void (PgfReader::*read_value)(ref<V> val))
+{
+    size_t len = read_len();
+    ref<C> loc = vector_new<C,V>(field,len);
+    for (size_t i = 0; i < len; i++) {
+        (this->*read_value)(vector_elem(ref<PgfVector<V>>::from_ptr(&(loc->*field)),i));
+    }
+    return loc;
+}
+
+template <class V>
+ref<PgfVector<V>> PgfReader::read_vector(void (PgfReader::*read_value)(ref<V> val))
+{
+    size_t len = read_len();
+    ref<PgfVector<V>> vec = vector_new<V>(len);
+    for (size_t i = 0; i < len; i++) {
+        (this->*read_value)(vector_elem(vec,i));
+    }
+    return vec;
+}
+
+PgfLiteral PgfReader::read_literal()
+{
+    PgfLiteral lit = 0;
+
+    uint8_t tag = read_tag();
+    switch (tag) {
+	case PgfLiteralStr::tag: {
+		ref<PgfLiteralStr> lit_str =
+            read_text<PgfLiteralStr>(&PgfLiteralStr::val);
+        lit = ref<PgfLiteralStr>::tagged(lit_str);
+		break;
+	}
+	case PgfLiteralInt::tag: {
+		ref<PgfLiteralInt> lit_int =
+			DB::malloc<PgfLiteralInt>(tag);
+		lit_int->val = read_int();
+        lit = ref<PgfLiteralInt>::tagged(lit_int);
+		break;
+	}
+	case PgfLiteralFlt::tag: {
+		ref<PgfLiteralFlt> lit_flt =
+			current_db->malloc<PgfLiteralFlt>();
+		lit_flt->val = read_double();
+        lit = ref<PgfLiteralFlt>::tagged(lit_flt);
+		break;
+	}
+	default:
+		throw pgf_error("Unknown literal tag");
+	}
+	return lit;
+}
+
+ref<PgfFlag> PgfReader::read_flag()
+{
+    ref<PgfFlag> flag = read_name(&PgfFlag::name);
+    flag->value = read_literal();
+    return flag;
+}
+
+PgfExpr PgfReader::read_expr()
+{
+    PgfExpr expr = 0;
+    uint8_t tag = read_tag();
+
+    switch (tag) {
+	case PgfExprAbs::tag:{
+        PgfBindType bind_type = (PgfBindType) read_tag();
+        ref<PgfExprAbs> eabs = read_name(&PgfExprAbs::name);
+        eabs->bind_type = bind_type;
+        eabs->body = read_expr();
+        expr = ref<PgfExprAbs>::tagged(eabs);
+		break;
+	}
+	case PgfExprApp::tag: {
+        ref<PgfExprApp> eapp = DB::malloc<PgfExprApp>();
+		eapp->fun = read_expr();
+		eapp->arg = read_expr();
+        expr = ref<PgfExprApp>::tagged(eapp);
+		break;
+	}
+	case PgfExprLit::tag: {
+        ref<PgfExprLit> elit = DB::malloc<PgfExprLit>();
+		elit->lit = read_literal();
+		expr = ref<PgfExprLit>::tagged(elit);
+		break;
+	}
+	case PgfExprMeta::tag: {
+		ref<PgfExprMeta> emeta = DB::malloc<PgfExprMeta>();
+		emeta->id = read_int();
+		expr = ref<PgfExprMeta>::tagged(emeta);
+		break;
+	}
+	case PgfExprFun::tag: {
+		ref<PgfExprFun> efun = read_name(&PgfExprFun::name);
+        expr = ref<PgfExprFun>::tagged(efun);
+		break;
+	}
+	case PgfExprVar::tag: {
+        ref<PgfExprVar> evar = DB::malloc<PgfExprVar>();
+		evar->var = read_int();
+        expr = ref<PgfExprVar>::tagged(evar);
+		break;
+	}
+	case PgfExprTyped::tag: {
+        ref<PgfExprTyped> etyped = DB::malloc<PgfExprTyped>();
+		etyped->expr = read_expr();
+		etyped->type = read_type();
+        expr = ref<PgfExprTyped>::tagged(etyped);
+		break;
+	}
+	case PgfExprImplArg::tag: {
+        ref<PgfExprImplArg> eimpl = current_db->malloc<PgfExprImplArg>();
+		eimpl->expr = read_expr();
+        expr = ref<PgfExprImplArg>::tagged(eimpl);
+		break;
+	}
+	default:
+		throw pgf_error("Unknown expression tag");
+	}
+
+    return 0;
+}
+
+void PgfReader::read_hypo(ref<PgfHypo> hypo)
+{
+    hypo->bind_type = (PgfBindType) read_tag();
+	hypo->cid = read_name();
+	hypo->type = read_type();
+}
+
+ref<PgfType> PgfReader::read_type()
+{
+    ref<PgfVector<PgfHypo>> hypos =
+        read_vector<PgfHypo>(&PgfReader::read_hypo);
+    ref<PgfType> tp = read_name<PgfType>(&PgfType::name);
+    tp->hypos = hypos;
+    tp->exprs =
+        read_vector<PgfExpr>(&PgfReader::read_expr);
+    return tp;
+}
+
+PgfPatt PgfReader::read_patt()
+{
+    PgfPatt patt = 0;
+
+	uint8_t tag = read_tag();
+	switch (tag) {
+	case PgfPattApp::tag: {
+		ref<PgfText> ctor = read_name();
+
+		ref<PgfPattApp> papp =
+            read_vector<PgfPattApp,PgfPatt>(&PgfPattApp::args,&PgfReader::read_patt2);
+		papp->ctor = ctor;
+        patt = ref<PgfPattApp>::tagged(papp);
+		break;
+	}
+	case PgfPattVar::tag: {
+		ref<PgfPattVar> pvar = read_name<PgfPattVar>(&PgfPattVar::name);
+        patt = ref<PgfPattVar>::tagged(pvar);
+		break;
+	}
+	case PgfPattAs::tag: {
+        ref<PgfPattAs> pas = read_name<PgfPattAs>(&PgfPattAs::name);
+		pas->patt = read_patt();
+        patt = ref<PgfPattAs>::tagged(pas);
+		break;
+	}
+	case PgfPattWild::tag: {
+		ref<PgfPattWild> pwild = DB::malloc<PgfPattWild>();
+        patt = ref<PgfPattWild>::tagged(pwild);
+		break;
+	}
+	case PgfPattLit::tag: {
+        ref<PgfPattLit> plit = DB::malloc<PgfPattLit>();
+		plit->lit = read_literal();
+        patt = ref<PgfPattLit>::tagged(plit);
+		break;
+	}
+	case PgfPattImplArg::tag: {
+        ref<PgfPattImplArg> pimpl = DB::malloc<PgfPattImplArg>();
+		pimpl->patt = read_patt();
+        patt = ref<PgfPattImplArg>::tagged(pimpl);
+		break;
+	}
+	case PgfPattTilde::tag: {
+        ref<PgfPattTilde> ptilde = DB::malloc<PgfPattTilde>();
+		ptilde->expr = read_expr();
+        patt = ref<PgfPattTilde>::tagged(ptilde);
+		break;
+	}
+	default:
+		throw pgf_error("Unknown pattern tag");
+	}
+
+	return patt;
+}
+
+void PgfReader::read_defn(ref<ref<PgfEquation>> defn)
+{
+    ref<PgfEquation> eq = read_vector(&PgfEquation::patts,&PgfReader::read_patt2);
+    eq->body = read_expr();
+    *defn = eq;
+}
+
+ref<PgfAbsFun> PgfReader::read_absfun()
+{
+    ref<PgfAbsFun> absfun =
+        read_name<PgfAbsFun>(&PgfAbsFun::name);
+    ref<PgfExprFun> efun =
+        ref<PgfExprFun>::from_ptr((PgfExprFun*) &absfun->name);
+    absfun->ep.expr = ref<PgfExprFun>::tagged(efun);
+    absfun->type = read_type();
+	absfun->arity = read_int();
+
+    uint8_t tag = read_tag();
+	switch (tag) {
+	case 0:
+        absfun->defns = 0;
+        break;
+    case 1:
+        absfun->defns =
+            read_vector<ref<PgfEquation>>(&PgfReader::read_defn);
+        break;
+    default:
+        throw pgf_error("Unknown tag, 0 or 1 expected");
+    }
+    absfun->ep.prob = - log(read_double());
+    return absfun;
+}
+
+ref<PgfAbsCat> PgfReader::read_abscat()
+{
+    ref<PgfAbsCat> abscat = read_name<PgfAbsCat>(&PgfAbsCat::name);
+    abscat->context = read_vector<PgfHypo>(&PgfReader::read_hypo);
+
+    // for now we just read the set of functions per category and ignore them
+    size_t n_funs = read_len();
+    for (size_t i = 0; i < n_funs; i++) {
+        read_double();
+        read_name();
+    }
+
+    abscat->prob  = - log(read_double());
+    return abscat;
+}
+
+void PgfReader::read_abstract(ref<PgfAbstr> abstract)
+{
+    abstract->name = read_name();
+	abstract->aflags = read_namespace<PgfFlag>(&PgfReader::read_flag);
+    abstract->funs = read_namespace<PgfAbsFun>(&PgfReader::read_absfun);
+    abstract->cats = read_namespace<PgfAbsCat>(&PgfReader::read_abscat);
+}
+
+ref<PgfPGFRoot> PgfReader::read_pgf()
+{
+    ref<PgfPGFRoot> pgf = DB::malloc<PgfPGFRoot>();
+
+    pgf->major_version = read_u16be();
+    pgf->minor_version = read_u16be();
+
+    pgf->gflags = read_namespace<PgfFlag>(&PgfReader::read_flag);
+
+    read_abstract(ref<PgfAbstr>::from_ptr(&pgf->abstract));
+
+    return pgf;
+}
--- a/src/runtime/c/pgf/reader.h
+++ b/src/runtime/c/pgf/reader.h
@@ -0,0 +1,79 @@
+#ifndef READER_H_
+#define READER_H_
+
+#include <fstream>
+#include <stdint.h>
+#include "db.h"
+
+// reader for PGF files
+
+class PGF_INTERNAL_DECL PgfReader
+{
+public:
+    PgfReader(std::istream *in);
+
+    uint8_t read_uint8();
+    uint16_t read_u16be();
+    uint64_t read_u64be();
+    double read_double();
+    uint64_t read_uint();
+    int64_t read_int() { return (int64_t) read_uint(); };
+    size_t  read_len() { return (size_t) read_uint(); };
+
+    uint8_t read_tag() { return read_uint8(); }
+
+    template<class V>
+    ref<V> read_name(PgfText V::* field) {
+        return read_name_internal((size_t) &(((V*) NULL)->*field));
+    };
+
+    ref<PgfText> read_name() {
+        return read_name_internal(0);
+    };
+
+    template<class V>
+    ref<V> read_text(PgfText V::* field) {
+        return read_text_internal((size_t) &(((V*) NULL)->*field));
+    };
+
+    ref<PgfText> read_text() {
+        return read_text_internal(0);
+    };
+
+    template<class V>
+    Namespace<V> read_namespace(ref<V> (PgfReader::*read_value)());
+
+    template <class C, class V>
+    ref<C> read_vector(PgfVector<V> C::* field, void (PgfReader::*read_value)(ref<V> val));
+
+    template<class V>
+    ref<PgfVector<V>> read_vector(void (PgfReader::*read_value)(ref<V> val));
+
+    PgfLiteral read_literal();
+    PgfExpr read_expr();
+    void read_expr(ref<PgfExpr> r) { *r = read_expr(); };
+
+    void read_hypo(ref<PgfHypo> hypo);
+    ref<PgfType> read_type();
+
+    ref<PgfFlag> read_flag();
+
+    PgfPatt read_patt();
+    void read_patt2(ref<PgfPatt> r) { *r = read_patt(); };
+
+    void read_defn(ref<ref<PgfEquation>> defn);
+
+    ref<PgfAbsFun> read_absfun();
+    ref<PgfAbsCat> read_abscat();
+    void read_abstract(ref<PgfAbstr> abstract);
+
+    ref<PgfPGFRoot> read_pgf();
+
+private:
+    std::istream *in;
+
+    moffset read_name_internal(size_t struct_size);
+    moffset read_text_internal(size_t struct_size);
+};
+
+#endif
--- a/src/runtime/c/pgf/text.cxx
+++ b/src/runtime/c/pgf/text.cxx
@@ -0,0 +1,91 @@
+#include "data.h"
+
+PGF_INTERNAL
+int textcmp(PgfText *t1, PgfText *t2)
+{
+    for (size_t i = 0; ; i++) {
+        if (i >= t1->size)
+            return (i - t2->size);
+        if (i >= t2->size)
+            return 1;
+
+        if (t1->text[i] > t2->text[i])
+            return 1;
+        else if (t1->text[i] < t2->text[i])
+            return -1;
+    }
+}
+
+PGF_INTERNAL
+PgfText* textdup(PgfText *t1)
+{
+    PgfText *t2 = (PgfText *) malloc(sizeof(PgfText) + t1->size + 1);
+    t2->size = t1->size;
+    memcpy(t2->text, t1->text, t1->size+1);
+    return t2;
+}
+
+PGF_API uint32_t
+pgf_utf8_decode(const uint8_t** src_inout)
+{
+	const uint8_t* src = *src_inout;
+	uint8_t c = src[0];
+	if (c < 0x80) {
+		*src_inout = src + 1;
+		return c;
+	}
+	size_t len = (c < 0xe0 ? 1 :
+	              c < 0xf0 ? 2 :
+	              c < 0xf8 ? 3 :
+	              c < 0xfc ? 4 :
+	                         5
+	             );
+	uint64_t mask = 0x0103070F1f7f;
+	uint32_t u = c & (mask >> (len * 8));
+	for (size_t i = 1; i <= len; i++) {
+		c = src[i];
+		u = u << 6 | (c & 0x3f);
+	}
+	*src_inout = &src[len + 1];
+	return u;
+}
+
+PGF_API void
+pgf_utf8_encode(uint32_t ucs, uint8_t** buf)
+{
+	uint8_t* p = *buf;
+	if (ucs < 0x80) {
+		p[0] = (uint8_t) ucs;
+		*buf = p+1;
+	} else if (ucs < 0x800) {
+		p[0] = 0xc0 | (ucs >> 6);
+		p[1] = 0x80 | (ucs & 0x3f);
+		*buf = p+2;
+	} else if (ucs < 0x10000) {
+		p[0] = 0xe0 | (ucs >> 12);
+		p[1] = 0x80 | ((ucs >> 6) & 0x3f);
+		p[2] = 0x80 | (ucs & 0x3f);
+		*buf = p+3;
+	} else if (ucs < 0x200000) {
+		p[0] = 0xf0 | (ucs >> 18);
+		p[1] = 0x80 | ((ucs >> 12) & 0x3f);
+		p[2] = 0x80 | ((ucs >> 6) & 0x3f);
+		p[3] = 0x80 | (ucs & 0x3f);
+		*buf = p+4;
+	} else if (ucs < 0x4000000) {
+		p[0] = 0xf8 | (ucs >> 24);
+		p[1] = 0x80 | ((ucs >> 18) & 0x3f);
+		p[2] = 0x80 | ((ucs >> 12) & 0x3f);
+		p[3] = 0x80 | ((ucs >>  6) & 0x3f);
+		p[4] = 0x80 | (ucs & 0x3f);
+		*buf = p+5;
+	} else {
+		p[0] = 0xfc | (ucs >> 30);
+		p[1] = 0x80 | ((ucs >> 24) & 0x3f);
+		p[2] = 0x80 | ((ucs >> 18) & 0x3f);
+		p[3] = 0x80 | ((ucs >> 12) & 0x3f);
+		p[4] = 0x80 | ((ucs >> 6) & 0x3f);
+		p[5] = 0x80 | (ucs & 0x3f);
+		*buf = p+6;
+	}
+}
--- a/src/runtime/c/pgf/text.h
+++ b/src/runtime/c/pgf/text.h
@@ -0,0 +1,13 @@
+#ifndef TEXT_H
+#define TEXT_H
+
+PGF_INTERNAL_DECL
+int textcmp(PgfText *t1, PgfText *t2);
+
+PGF_INTERNAL_DECL
+PgfText* textdup(PgfText *t1);
+
+PGF_API uint32_t
+pgf_utf8_decode(const uint8_t** src_inout);
+
+#endif
--- a/src/runtime/c/pgf/vector.h
+++ b/src/runtime/c/pgf/vector.h
@@ -0,0 +1,32 @@
+#ifndef VECTOR_H
+#define VECTOR_H
+
+template <class A>
+struct PgfVector {
+    size_t len;
+    A data[];
+};
+
+template <class A> inline
+ref<PgfVector<A>> vector_new(size_t len)
+{
+    ref<PgfVector<A>> res = DB::malloc<PgfVector<A>>(sizeof(PgfVector<A>)+len*sizeof(A));
+    res->len = len;
+    return res;
+}
+
+template <class C, class A> inline
+ref<C> vector_new(PgfVector<A> C::* field, size_t len)
+{
+    ref<C> res = DB::malloc<C>(((size_t) &(((C*) NULL)->*field))+sizeof(PgfVector<A>)+len*sizeof(A));
+    (res->*field).len = len;
+    return res;
+}
+
+template <class A> inline
+ref<A> vector_elem(ref<PgfVector<A>> v, size_t index)
+{
+    return ref<A>::from_ptr(&v->data[index]);
+}
+
+#endif // VECTOR_H