gf-core/src/runtime/c/pgf/db.cxx

#include <fcntl.h>
#include <stdlib.h>
#include <string.h>

#include "data.h"

#ifndef _WIN32

#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
#include <pthread.h>

typedef struct {
    dev_t dev;
    ino_t ino;
    pthread_rwlock_t rwlock;
} lock_entry;

typedef struct {
    pthread_mutex_t mutex;
    size_t n_entries;
    size_t n_max_entries;
    lock_entry entries[];
} file_locks;

static char gf_runtime_locks[] = "/gf-runtime-locks";

static file_locks *locks = NULL;

static
pthread_rwlock_t *ipc_rwlock_new(const char* file_path)
{
    int res;

    if (file_path == NULL) {
        pthread_rwlock_t *rwlock = (pthread_rwlock_t *)
            malloc(sizeof(pthread_rwlock_t));
        if (rwlock == NULL)
            throw pgf_systemerror(ENOMEM);
        if ((res = pthread_rwlock_init(rwlock, NULL)) != 0) {
            throw pgf_systemerror(res);
        }
        return rwlock;
    }

    if (locks == NULL) {
        int created = 0;

        // Uncomment if you want a clean state
        //shm_unlink(gf_runtime_locks);

        int fd =
            shm_open(gf_runtime_locks, O_RDWR, 0);
        if (errno == ENOENT) {
            created = 1;
            fd = shm_open(gf_runtime_locks, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
        }
        if (fd < 0) {
            throw pgf_systemerror(errno);
        }

        int pagesize  = getpagesize();

        if (ftruncate(fd, pagesize) != 0) {
            close(fd);
            throw pgf_systemerror(errno);
        }

        locks = (file_locks *)
             mmap(NULL, pagesize,
                  PROT_READ|PROT_WRITE,
                  MAP_SHARED,
                  fd,0);
        close(fd);
        if (locks == MAP_FAILED) {
            locks = NULL;
            throw pgf_systemerror(errno);
        }

        if (created) {
            pthread_mutexattr_t attr;
            if (pthread_mutexattr_init(&attr)) {
                throw pgf_systemerror(errno);
            }
            if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) {
                throw pgf_systemerror(errno);
            }
            if (pthread_mutex_init(&locks->mutex, &attr)) {
                throw pgf_systemerror(errno);
            }
            pthread_mutexattr_destroy(&attr);

            locks->n_entries = 0;
            locks->n_max_entries = (pagesize-sizeof(file_locks))/sizeof(lock_entry);
        }
    }

    struct stat s;
    if (stat(file_path, &s) != 0) {
        throw pgf_systemerror(errno);
    }

    pthread_mutex_lock(&locks->mutex);

    lock_entry *entry = NULL;
    for (size_t i = 0; i < locks->n_entries; i++) {
        if (locks->entries[i].dev == 0 && locks->entries[i].ino == 0) {
            entry = &locks->entries[i];
        }
        if (locks->entries[i].dev == s.st_dev && locks->entries[i].ino == s.st_ino) {
            entry = &locks->entries[i];
            break;
        }
    }

    if (entry == NULL) {
        if (locks->n_entries >= locks->n_max_entries) {
            pthread_mutex_unlock(&locks->mutex);
            throw pgf_error("Too many open grammars");
        }

        entry = &locks->entries[locks->n_entries++];

        pthread_rwlockattr_t attr;
        if ((res = pthread_rwlockattr_init(&attr)) != 0) {
            pthread_mutex_unlock(&locks->mutex);
            throw pgf_systemerror(res);
        }
        if ((res = pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) != 0) {
            pthread_rwlockattr_destroy(&attr);
            pthread_mutex_unlock(&locks->mutex);
            throw pgf_systemerror(res);
        }
        if ((res = pthread_rwlock_init(&entry->rwlock, &attr)) != 0) {
            pthread_rwlockattr_destroy(&attr);
            pthread_mutex_unlock(&locks->mutex);
            throw pgf_systemerror(res);
        }
        pthread_rwlockattr_destroy(&attr);
    }

    entry->dev = s.st_dev;
    entry->ino = s.st_ino;

    pthread_mutex_unlock(&locks->mutex);

    return &entry->rwlock;
}

static
void ipc_rwlock_destroy(const char* file_path,
                        pthread_rwlock_t *rwlock)
{
    if (file_path == NULL) {
        pthread_rwlock_destroy(rwlock);
        free(rwlock);
        return;
    }

    if (locks == NULL)
        return;

    pthread_mutex_lock(&locks->mutex);

    for (size_t i = 0; i < locks->n_entries; i++) {
        if (&locks->entries[i].rwlock == rwlock) {
            locks->entries[i].dev = 0;
            locks->entries[i].ino = 0;
            break;
        }
    }

    pthread_mutex_unlock(&locks->mutex);
}

#else

static
size_t getpagesize()
{
    SYSTEM_INFO si;
    GetSystemInfo(&si);
    return si.dwPageSize;
}

#define ftruncate _chsize
#define getpid GetCurrentProcessId

static
int last_error_to_errno()
{
    switch (GetLastError()) {
    case ERROR_SUCCESS:
        return 0;
    case ERROR_OUTOFMEMORY:
        return ENOMEM;
    case ERROR_HANDLE_DISK_FULL:
        return ENOSPC;
    default:
        return EINVAL;
    }
}
#endif

PGF_INTERNAL __thread unsigned char* current_base __attribute__((tls_model("initial-exec"))) = NULL;
PGF_INTERNAL __thread PgfDB* current_db __attribute__((tls_model("initial-exec"))) = NULL;
PGF_INTERNAL __thread DB_scope *last_db_scope __attribute__((tls_model("initial-exec"))) = NULL;

#ifndef DEFAULT_TOP_PAD
#define DEFAULT_TOP_PAD        (0)
#endif

#define ptr(ms,o) ((mchunk*) (((char*) (ms)) + (o)))
#define ofs(ms,p) (((char*) (p)) - ((char*) (ms)))

struct mchunk {
  size_t mchunk_prev_size;  /* Size of previous chunk (if free).  */
  size_t mchunk_size;       /* Size in bytes, including overhead. */

  object fd;               /* double links -- used only if free. */
  object bk;

  /* Only used for large blocks: pointer to next larger size.     */
  object fd_nextsize;      /* double links -- used only if free. */
  object bk_nextsize;
};

#define POOL_ALIGNMENT (2 * sizeof(size_t) < __alignof__ (long double) \
                          ? __alignof__ (long double) : 2 * sizeof(size_t))

/*
   Bins
    An array of bin headers for free chunks. Each bin is doubly
    linked.  The bins are approximately proportionally (log) spaced.
    There are a lot of these bins (128). This may look excessive, but
    works very well in practice.  Most bins hold sizes that are
    unusual as allocation request sizes, but are more usual for fragments
    and consolidated sets of chunks, which is what these bins hold, so
    they can be found quickly.  All procedures maintain the invariant
    that no consolidated chunk physically borders another one, so each
    chunk in a list is known to be preceeded and followed by either
    inuse chunks or the ends of memory.
    Chunks in bins are kept in size order, with ties going to the
    approximately least recently used chunk. Ordering isn't needed
    for the small bins, which all contain the same-sized chunks, but
    facilitates best-fit allocation for larger chunks. These lists
    are just sequential. Keeping them in order almost never requires
    enough traversal to warrant using fancier ordered data
    structures.
    Chunks of the same size are linked with the most
    recently freed at the front, and allocations are taken from the
    back.  This results in LRU (FIFO) allocation order, which tends
    to give each chunk an equal opportunity to be consolidated with
    adjacent freed chunks, resulting in larger free chunks and less
    fragmentation.
    To simplify use in double-linked lists, each bin header acts
    as an mchunk. This avoids special-casing for headers.
    But to conserve space and improve locality, we allocate
    only the fd/bk pointers of bins, and then use repositioning tricks
    to treat these as the fields of a mchunk*.
 */

typedef struct mchunk mbin;

/* addressing -- note that bin_at(0) does not exist */
#define bin_at(m, i) \
  (mbin*) (((char *) &((m)->bins[((i) - 1) * 2]))                              \
           - offsetof (mchunk, fd))
/* analog of ++bin */
#define next_bin(b)  ((mbin*) ((char *) (b) + (sizeof(mchunk*) << 1)))
/* Reminders about list directionality within bins */
#define first(b)     ((b)->fd)
#define last(b)      ((b)->bk)

/*
   Indexing
    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
    8 bytes apart. Larger bins are approximately logarithmically spaced:
    64 bins of size       8
    32 bins of size      64
    16 bins of size     512
     8 bins of size    4096
     4 bins of size   32768
     2 bins of size  262144
     1 bin  of size what's left
    There is actually a little bit of slop in the numbers in bin_index
    for the sake of speed. This makes no difference elsewhere.
    The bins top out around 1MB because we expect to service large
    requests via mmap.
    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
    a valid chunk size the small bins are bumped up one.
 */
#define NBINS             128
#define NSMALLBINS         64
#define SMALLBIN_WIDTH    POOL_ALIGNMENT
#define SMALLBIN_CORRECTION (POOL_ALIGNMENT > 2 * sizeof(size_t))
#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)

#define in_smallbin_range(sz)  \
  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
#define smallbin_index(sz) \
  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
   + SMALLBIN_CORRECTION)
#define largebin_index_32(sz)                                                \
  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
   126)
#define largebin_index_32_big(sz)                                            \
  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
   126)
// XXX It remains to be seen whether it is good to keep the widths of
// XXX the buckets the same or whether it should be scaled by a factor
// XXX of two as well.
#define largebin_index_64(sz)                                                \
  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
   126)
#define largebin_index(sz) \
  (sizeof(size_t) == 8 ? largebin_index_64 (sz)                            \
   : POOL_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
   : largebin_index_32 (sz))


/*
   Unsorted chunks
    All remainders from chunk splits, as well as all returned chunks,
    are first placed in the "unsorted" bin. They are then placed
    in regular bins after malloc gives them ONE chance to be used before
    binning. So, basically, the unsorted_chunks list acts as a queue,
    with chunks being placed on it in free (and pool_consolidate),
    and taken off (to be either used or placed in bins) in malloc.
 */
/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
#define unsorted_chunks(M)          (bin_at (M, 1))

/* conversion from malloc headers to user pointers, and back */
#define chunk2mem(p)   ((void*)((char*)(p) + 2*sizeof(size_t)))
#define mem2chunk(mem) ((mchunk*)((char*)(mem) - 2*sizeof(size_t)))

#define MIN_CHUNK_SIZE (offsetof(mchunk, fd_nextsize))

/* The smallest size we can malloc is an aligned minimal chunk */
#define MINSIZE  \
  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))

/* pad request bytes into a usable size -- internal version */
#define request2size(req)                                         \
  (((req) + sizeof(size_t) + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
   MINSIZE :                                                      \
   ((req) + sizeof(size_t) + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)

/*
   --------------- Physical chunk operations ---------------
 */
/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
#define PREV_INUSE 0x1
/* extract inuse bit of previous chunk */
#define prev_inuse(p)       ((p)->mchunk_size & PREV_INUSE)

/* Get size, ignoring use bits */
#define chunksize(p) (p->mchunk_size & ~(PREV_INUSE))

/* Size of the chunk below P.  Only valid if !prev_inuse (P).  */
#define prev_size(p) ((p)->mchunk_prev_size)

/* Treat space at ptr + offset as a chunk */
#define chunk_at_offset(p, s)  ((mchunk*) (((char *) (p)) + (s)))

/* extract p's inuse bit */
#define inuse(p)                                                              \
  ((((mchunk*) (((char *) (p)) + chunksize(p)))->mchunk_size) & PREV_INUSE)

/* check/set/clear inuse bits in known places */
#define inuse_bit_at_offset(p, s)                                     \
  (((mchunk*) (((char *) (p)) + (s)))->mchunk_size & PREV_INUSE)

#define set_inuse_bit_at_offset(p, s)                                 \
  (((mchunk*) (((char *) (p)) + (s)))->mchunk_size |= PREV_INUSE)

#define clear_inuse_bit_at_offset(p, s)                               \
  (((mchunk*) (((char *) (p)) + (s)))->mchunk_size &= ~(PREV_INUSE))


/* Set size at head, without disturbing its use bit */
#define set_head_size(p, s)  ((p)->mchunk_size = (((p)->mchunk_size & PREV_INUSE) | (s)))

/* Set size/use field */
#define set_head(p, s)       ((p)->mchunk_size = (s))

/* Set size at footer (only when chunk is not in use) */
#define set_foot(p, s)       (((mchunk*) ((char *) (p) + (s)))->mchunk_prev_size = (s))

/*
   Binmap
    To help compensate for the large number of bins, a one-level index
    structure is used for bin-by-bin searching.  `binmap' is a
    bitvector recording whether bins are definitely empty so they can
    be skipped over during during traversals.  The bits are NOT always
    cleared as soon as bins are empty, but instead only
    when they are noticed to be empty during traversal in malloc.
 */
/* Conservatively use 32 bits per map word, even if on 64bit system */
#define BINMAPSHIFT      5
#define BITSPERMAP       (1U << BINMAPSHIFT)
#define BINMAPSIZE       (NBINS / BITSPERMAP)

#define idx2block(i)     ((i) >> BINMAPSHIFT)
#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
#define mark_bin(ms, i)    ((ms)->binmap[idx2block(i)] |= idx2bit (i))
#define unmark_bin(ms, i)  ((ms)->binmap[idx2block(i)] &= ~(idx2bit (i)))
#define get_binmap(ms, i)  ((ms)->binmap[idx2block(i)] & idx2bit (i))

/*
   Fastbins
    An array of lists holding recently freed small chunks.  Fastbins
    are not doubly linked.  It is faster to single-link them, and
    since chunks are never removed from the middles of these lists,
    double linking is not necessary. Also, unlike regular bins, they
    are not even processed in FIFO order (they use faster LIFO) since
    ordering doesn't much matter in the transient contexts in which
    fastbins are normally used.
    Chunks in fastbins keep their inuse bit set, so they cannot
    be consolidated with other free chunks. malloc_consolidate
    releases all chunks in fastbins and consolidates them with
    other free chunks.
 */

#define DEFAULT_MXFAST    (64 * sizeof(size_t) / 4)

/* offset 2 to use otherwise unindexable first 2 bins */
#define fastbin_index(sz) \
  ((((unsigned int) (sz)) >> (sizeof(size_t) == 8 ? 4 : 3)) - 2)
/* The maximum fastbin request size we support */
#define MAX_FAST_SIZE     (80 * sizeof(size_t) / 4)
#define NFASTBINS    (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)

/*
   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
   that triggers automatic consolidation of possibly-surrounding
   fastbin chunks. This is a heuristic, so the exact value should not
   matter too much. It is defined at half the default trim threshold as a
   compromise heuristic to only attempt consolidation if it is likely
   to lead to trimming. However, it is not dynamically tunable, since
   consolidation reduces fragmentation surrounding large chunks even
   if trimming is not used.
 */
#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)

static char slovo[5] = {'S','L','O','V','O'};

typedef struct {
#ifndef _WIN32
    pid_t pid;
#else
    DWORD pid;
#endif
    object next;
} process_entry;

struct PGF_INTERNAL_DECL malloc_state
{
    /* Each .ngf file starts with 'SLOVO' as in:
     *    "V načaloto be slovoto" (In the beginning was the word)
     * In this way we detect an attempt to read a non .ngf file.
     */
    char sign[5];

    /* Set if the fastbin chunks contain recently inserted free blocks.  */
    bool have_fastchunks;
    /* Fastbins */
    object fastbins[NFASTBINS];
    /* Base of the topmost chunk -- not otherwise kept in a bin */
    object top;
    /* The remainder from the most recent split of a small request */
    object last_remainder;
    /* Normal bins packed as described above */
    object bins[NBINS * 2 - 2];
    /* Bitmap of bins */
    unsigned int binmap[BINMAPSIZE];

    size_t file_size;

    /* The namespace of all persistant grammar revisions */
    Namespace<PgfPGF> revisions;

    /* A reference to the first transient revisions in
     * a double-linked list.
     */
    ref<PgfPGF>   transient_revisions;
    ref<PgfConcr> transient_concr_revisions;

#ifdef _WIN32
    /* Stores a Reader/Writer lock for Windows */
    LONG rwlock;
#endif
    process_entry p;
};

PGF_INTERNAL
PgfDB::PgfDB(const char* filepath, int flags, int mode) {
    bool is_new = false;

    fd = -1;
    ms = NULL;
    ref_count = 0;

    if (filepath == NULL) {
        this->filepath = NULL;
        mmap_size = getpagesize();
        is_new    = true;
    } else {
        fd = open(filepath, flags, mode);
        if (fd < 0)
            throw pgf_systemerror(errno, filepath);

        mmap_size = lseek(fd, 0, SEEK_END);
        if (mmap_size == ((off_t) -1)) {
            int code = errno;
            close(fd);
            throw pgf_systemerror(code, filepath);
        }

        is_new = false;
        if (mmap_size == 0) {
            mmap_size = getpagesize();
            if (ftruncate(fd, mmap_size) < 0) {
                int code = errno;
                close(fd);
                throw pgf_systemerror(code, filepath);
            }
            is_new = true;
        }

        this->filepath = strdup(filepath);
    }

    int code = 0;
#ifndef _WIN32
#ifndef MREMAP_MAYMOVE
    if (fd >= 0) {
        ms = (malloc_state*)
            mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
        code = errno;
    } else {
        ms = (malloc_state*) ::malloc(mmap_size);
        code = ENOMEM;
    }
#else
    int mflags = (fd < 0) ? (MAP_PRIVATE | MAP_ANONYMOUS) : MAP_SHARED;
    ms = (malloc_state*)
        mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, mflags, fd, 0);
    code = errno;
#endif
    if (ms == MAP_FAILED || ms == NULL) {
        ms = NULL; // mark that ms is not created.
        ::free((void *) this->filepath);
        close(fd);
        throw pgf_systemerror(code, filepath);
    }
#else
    char *name;
    char buf[256];

    if (fd >= 0) {
        BY_HANDLE_FILE_INFORMATION hInfo;
        if (!GetFileInformationByHandle((HANDLE) _get_osfhandle(fd), &hInfo)) {
            code = last_error_to_errno();
            ::free((void *) this->filepath);
            close(fd);
            throw pgf_systemerror(code);
        }
        sprintf(buf, "gf-rwevent-%lx-%lx-%lx",
                     hInfo.dwVolumeSerialNumber,
                     hInfo.nFileIndexHigh,
                     hInfo.nFileIndexLow);
        name = buf;

        hMap = CreateFileMapping((HANDLE) _get_osfhandle(fd),
                                 NULL,
                                 PAGE_READWRITE,
                                 HIWORD(mmap_size), LOWORD(mmap_size),
                                 NULL);
        if (hMap != NULL) {
            ms = (malloc_state*) MapViewOfFile(hMap,
                                               FILE_MAP_WRITE,
                                               0,0,mmap_size);
            if (ms == NULL) {
                code = last_error_to_errno();
                CloseHandle(hMap);
                hMap = INVALID_HANDLE_VALUE;
            }
        } else {
            code = last_error_to_errno();
            hMap = INVALID_HANDLE_VALUE;
            ms   = NULL;
        }
    } else {
        code = ENOMEM;
        name = NULL;
        hMap = INVALID_HANDLE_VALUE;
        ms   = (malloc_state*) ::malloc(mmap_size);
    }

    if (ms == NULL) {
        ::free((void *) this->filepath);
        close(fd);
        throw pgf_systemerror(code, filepath);
    }

    hRWEvent = CreateEvent(NULL, FALSE, FALSE, name);
    if (hRWEvent == NULL) {
        if (fd < 0) {
            ::free(ms);
        } else {
            UnmapViewOfFile(ms);
            CloseHandle(hMap);
        }
        ::free((void *) this->filepath);
        close(fd);
        throw pgf_systemerror(code, filepath);
    }
#endif

#ifndef _WIN32
    rwlock = ipc_rwlock_new(filepath);
#endif

    if (is_new) {
        init_state(mmap_size);
    } else {
        if (strncmp(ms->sign, slovo, sizeof(ms->sign)) != 0) {
#ifndef _WIN32
#ifndef MREMAP_MAYMOVE
            if (fd < 0) {
              ::free(ms);
            } else
#endif
            munmap(ms,mmap_size);
#else
            if (fd < 0) {
                ::free(ms);
            } else {
                UnmapViewOfFile(ms);
                CloseHandle(hMap);
            }
            CloseHandle(hRWEvent);
#endif

            ::free((void *) this->filepath);
            close(fd);
            throw pgf_error("Invalid file content");
        }

        register_process();
    }
}

PGF_INTERNAL
PgfDB::~PgfDB()
{
    if (ms != NULL) {
        unregister_process();

#ifndef _WIN32
        if (ms->p.pid == 0)
            ipc_rwlock_destroy(filepath, rwlock);
#endif

        size_t size =
            ms->top + chunksize(ptr(ms,ms->top)) + sizeof(size_t);

#ifndef _WIN32
#ifndef MREMAP_MAYMOVE
        if (fd < 0) {
          ::free(ms);
        } else
#endif
        munmap(ms,size);
#else
        if (fd < 0) {
            ::free(ms);
        } else {
            UnmapViewOfFile(ms);
            CloseHandle(hMap);
        }
        CloseHandle(hRWEvent);
#endif
    }

    if (fd >= 0)
        close(fd);

    ::free((void*) filepath);
}

PGF_INTERNAL
void PgfDB::register_process()
{
    process_entry *pentry = &ms->p;
    object *plast = NULL;

    if (ms->p.pid != 0) {
        while (pentry != (process_entry *) ptr(ms,0)) {
#ifndef _WIN32
            char proc_file[32];
            sprintf(proc_file, "/proc/%d", pentry->pid);
            bool alive = (access(proc_file, F_OK) == 0);
#else
            HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION,
                                          FALSE,pentry->pid);
            DWORD dwExitCode = STILL_ACTIVE;
            if (hProcess != NULL)
                GetExitCodeProcess(hProcess,&dwExitCode);
            bool alive = (dwExitCode == STILL_ACTIVE);
            CloseHandle(hProcess);
#endif
            if (!alive) {
                // if there are dead processes -> remove them
                if (plast == NULL) {
                    if (ms->p.next == 0) {
                        ms->p.pid = 0;
                        break;
                    } else {
                        object next = pentry->next;
                        *pentry   = *((process_entry *) ptr(ms,next));
                        free_internal(next);
                    }
                } else {
                    *plast = pentry->next;
                    free_internal(ofs(ms,pentry));
                    pentry = (process_entry *) ptr(ms, *plast);
                }
            } else {
                plast  = &pentry->next;
                pentry = (process_entry *) ptr(ms, *plast);
            }
        }
    }

    if (plast != NULL) {
        *plast = malloc_internal(sizeof(process_entry));
        pentry = (process_entry*) ptr(ms,*plast);
        pentry->next = 0;
    }
    pentry->pid  = getpid();
}

PGF_INTERNAL
void PgfDB::unregister_process()
{
    auto pid = getpid();
    process_entry *pentry = &ms->p;
    object *plast = NULL;

    while (pentry != (process_entry *) ptr(ms,0)) {
        if (pentry->pid == pid) {
            if (plast == NULL) {
                if (ms->p.next == 0) {
                    ms->p.pid = 0;
                } else {
                    object next = pentry->next;
                    *pentry   = *((process_entry *) ptr(ms,next));
                    free_internal(next);
                }
            } else {
                *plast = pentry->next;
                free_internal(ofs(ms,pentry));
                pentry = (process_entry *) ptr(ms, *plast);
            }
            break;
        } else {
            plast  = &pentry->next;
            pentry = (process_entry *) ptr(ms, *plast);
        }
    }
}

void PgfDB::cleanup_revisions()
{
	if (ms->p.next == 0) {
		// The first process that opens this file makes sure that
		// left-over transient revisions are released.
		// They may be left-over of a client process that was killed
		// or if the garbadge collector has not managed to run
		// pgf_release_revision() before the process ended.

		while (ms->transient_revisions != 0) {
			pgf_free_revision(this, ms->transient_revisions.as_object());
			ref<PgfPGF> pgf = ms->transient_revisions;
			ref<PgfPGF> next = pgf->next;
			PgfPGF::release(pgf);
			ms->transient_revisions = next;
		}

		while (ms->transient_concr_revisions != 0) {
			ref<PgfConcr> concr = ms->transient_concr_revisions;
			ref<PgfConcr> next = concr->next;
			concr->ref_count -= concr->ref_count_ex;
			if (!concr->ref_count) {
				PgfConcr::release(concr);
			}
			ms->transient_concr_revisions = next;
		}
	}
}

PGF_INTERNAL
ref<PgfPGF> PgfDB::get_revision(PgfText *name)
{
    return namespace_lookup(current_db->ms->revisions, name);
}

PGF_INTERNAL
void PgfDB::set_revision(ref<PgfPGF> pgf)
{
    pgf->ref_count++;
    Namespace<PgfPGF> nmsp = namespace_insert(current_db->ms->revisions, pgf);
    namespace_release(current_db->ms->revisions);
    current_db->ms->revisions = nmsp;
}

PGF_INTERNAL
void PgfDB::init_state(size_t size)
{
    memcpy(ms->sign, slovo, sizeof(ms->sign));

    /* Init fastbins */
    ms->have_fastchunks = false;
    for (int i = 0; i < NFASTBINS; ++i) {
        ms->fastbins[i] = 0;
    }

    size_t sz = (sizeof(*ms) + sizeof(size_t));
    sz = (sz & ~MALLOC_ALIGN_MASK) + MALLOC_ALIGN_MASK + 1;

    mchunk* top_chunk = mem2chunk(((char*) ms) + sz);
    ms->top = ofs(ms,top_chunk);
    set_head(top_chunk, (size - (sz - sizeof(size_t))) | PREV_INUSE);

    ms->last_remainder = 0;

    /* Establish circular links for normal bins */
    for (int i = 1; i < NBINS; ++i) {
        mbin *bin = bin_at(ms, i);
        bin->fd = bin->bk = ofs(ms,bin);
    }

    memset(ms->binmap, 0, sizeof(ms->binmap));

    ms->file_size = size;

    ms->revisions = 0;
    ms->transient_revisions = 0;
    ms->transient_concr_revisions = 0;

#ifdef _WIN32
    ms->rwlock = 0;
#endif
    ms->p.pid  = getpid();
    ms->p.next = 0;
}

/* Take a chunk off a bin list.  */
static void
unlink_chunk (malloc_state* ms, mchunk* p)
{
    mchunk* fd = ptr(ms,p->fd);
    mchunk* bk = ptr(ms,p->bk);
    fd->bk = ofs(ms,bk);
    bk->fd = ofs(ms,fd);
    if (!in_smallbin_range(p->mchunk_size) && p->fd_nextsize != 0) {
        if (fd->fd_nextsize == 0) {
            if (p->fd_nextsize == ofs(ms,p))
                fd->fd_nextsize = fd->bk_nextsize = ofs(ms,fd);
            else {
                fd->fd_nextsize = p->fd_nextsize;
                fd->bk_nextsize = p->bk_nextsize;
                ptr(ms,p->fd_nextsize)->bk_nextsize = ofs(ms,fd);
                ptr(ms,p->bk_nextsize)->fd_nextsize = ofs(ms,fd);
            }
        } else {
            ptr(ms,p->fd_nextsize)->bk_nextsize = p->bk_nextsize;
            ptr(ms,p->bk_nextsize)->fd_nextsize = p->fd_nextsize;
        }
    }
}

/*
  ------------------------- malloc_consolidate -------------------------
  malloc_consolidate is a specialized version of free() that tears
  down chunks held in fastbins.  Free itself cannot be used for this
  purpose since, among other things, it might place chunks back onto
  fastbins.  So, instead, we need to use a minor variant of the same
  code.
*/
static void malloc_consolidate(malloc_state *ms)
{
  object* fb;                 /* current fastbin being consolidated */
  object* maxfb;              /* last fastbin (for loop control) */
  mchunk*  p;                 /* current chunk being consolidated */
  object   next_fb;           /* next chunk to consolidate */
  mchunk*  unsorted_bin;      /* bin header */
  mchunk*  first_unsorted;    /* chunk to link to */
  /* These have same use as in free() */
  mchunk*  nextchunk;
  size_t   size;
  size_t   nextsize;
  size_t   prevsize;
  int      nextinuse;

  ms->have_fastchunks = false;
  unsorted_bin = unsorted_chunks(ms);
  /*
    Remove each chunk from fast bin and consolidate it, placing it
    then in unsorted bin. Among other reasons for doing this,
    placing in unsorted bin avoids needing to calculate actual bins
    until malloc is sure that chunks aren't immediately going to be
    reused anyway.
  */
  maxfb = &ms->fastbins[NFASTBINS - 1];
  fb    = &ms->fastbins[0];
  do {
    if (*fb != 0) {
      p   = ptr(ms,*fb);
      *fb = 0;
      for (;;) {
        next_fb = p->fd;
        /* Slightly streamlined version of consolidation code in free() */
        size = chunksize(p);
        nextchunk = chunk_at_offset(p, size);
        nextsize = chunksize(nextchunk);
        if (!prev_inuse(p)) {
          prevsize = prev_size(p);
          size += prevsize;
          p = chunk_at_offset(p, -((long) prevsize));
          unlink_chunk (ms, p);
        }
        if (nextchunk != ptr(ms,ms->top)) {
          nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
          if (!nextinuse) {
            size += nextsize;
            unlink_chunk (ms, nextchunk);
          } else
            clear_inuse_bit_at_offset(nextchunk, 0);
          first_unsorted = ptr(ms,unsorted_bin->fd);
          unsorted_bin->fd = ofs(ms,p);
          first_unsorted->bk = ofs(ms,p);
          if (!in_smallbin_range(size)) {
            p->fd_nextsize = 0;
            p->bk_nextsize = 0;
          }
          set_head(p, size | PREV_INUSE);
          p->bk = ofs(ms,unsorted_bin);
          p->fd = ofs(ms,first_unsorted);
          set_foot(p, size);
        } else {
          size += nextsize;
          set_head(p, size | PREV_INUSE);
          ms->top = ofs(ms,p);
        }

        if (next_fb == 0)
            break;
        p = ptr(ms,next_fb);
      }
    }
  } while (fb++ != maxfb);
}

PGF_INTERNAL
object PgfDB::malloc_internal(size_t bytes)
{
    unsigned int idx;                 /* associated bin index */
    mbin* bin;                        /* associated bin */
    mchunk* victim;                   /* inspected/selected chunk */

    mchunk* remainder;                /* remainder from a split */
    unsigned long remainder_size;     /* its size */

    /*
        Convert request size to internal form by adding SIZE_SZ bytes
        overhead plus possibly more to obtain necessary alignment and/or
        to obtain a size of at least MINSIZE, the smallest allocatable
        size. Also, checked_request2size traps (returning 0) request sizes
        that are so large that they wrap around zero when padded and
        aligned.
    */
    size_t nb = request2size(bytes);

    if (nb <= DEFAULT_MXFAST) {
        idx = fastbin_index(nb);

        if (ms->fastbins[idx] != 0) {
            victim = ptr(ms,ms->fastbins[idx]);
            ms->fastbins[idx] = victim->fd;
            return ofs(ms,chunk2mem(victim));
        }
    }

    /*
     If a small request, check regular bin.  Since these "smallbins"
     hold one size each, no searching within bins is necessary.
     (For a large request, we need to wait until unsorted chunks are
     processed to find best fit. But for small ones, fits are exact
     anyway, so we can check now, which is faster.)
   */
    if (in_smallbin_range (nb)) {
        idx = smallbin_index (nb);
        bin = bin_at (ms, idx);
        if ((victim = ptr(ms,last(bin))) != bin)
        {
            object bck = victim->bk;
            set_inuse_bit_at_offset (victim, nb);
            bin->bk = bck;
            ptr(ms,bck)->fd = ofs(ms,bin);
            return ofs(ms,chunk2mem(victim));
        }
    } else {
        /*
           If this is a large request, consolidate fastbins before continuing.
           While it might look excessive to kill all fastbins before
           even seeing if there is space available, this avoids
           fragmentation problems normally associated with fastbins.
           Also, in practice, programs tend to have runs of either small or
           large requests, but less often mixtures, so consolidation is not
           invoked all that often in most programs. And the programs that
           it is called frequently in otherwise tend to fragment.
        */

        idx = largebin_index(nb);
        if (ms->have_fastchunks)
            malloc_consolidate(ms);
    }

    /*
     Process recently freed or remaindered chunks, taking one only if
     it is exact fit, or, if this a small request, the chunk is remainder from
     the most recent non-exact fit.  Place other traversed chunks in
     bins.  Note that this step is the only place in any routine where
     chunks are placed in bins.
     The outer loop here is needed because we might not realize until
     near the end of malloc that we should have consolidated, so must
     do so and retry. This happens at most once, and only when we would
     otherwise need to expand memory to service a "small" request.
   */
    for (;;)
    {
        size_t size;
        mchunk *fwd, *bck;

        int iters = 0;
        while ((victim = ptr(ms,unsorted_chunks(ms)->bk)) != unsorted_chunks(ms)) {
            bck  = ptr(ms,victim->bk);
            size = chunksize(victim);
            mchunk *next = chunk_at_offset(victim, size);

            /*
             If a small request, try to use last remainder if it is the
             only chunk in unsorted bin.  This helps promote locality for
             runs of consecutive small requests. This is the only
             exception to best-fit, and applies only when there is
             no exact fit for a small chunk.
           */

            if (in_smallbin_range(nb) &&
                bck == unsorted_chunks(ms) &&
                victim == ptr(ms,ms->last_remainder) &&
                (unsigned long) (size) > (unsigned long) (nb + MINSIZE)) {

                /* split and reattach remainder */
                remainder_size = size - nb;
                remainder = chunk_at_offset(victim, nb);
                ms->last_remainder =
                  unsorted_chunks(ms)->bk =
                    unsorted_chunks(ms)->fd = ofs(ms,remainder);
                remainder->bk = remainder->fd = ofs(ms,unsorted_chunks(ms));
                if (!in_smallbin_range(remainder_size)) {
                    remainder->fd_nextsize = 0;
                    remainder->bk_nextsize = 0;
                }
                set_head(victim, nb | PREV_INUSE);
                set_head(remainder, remainder_size | PREV_INUSE);
                set_foot(remainder, remainder_size);
                return ofs(ms,chunk2mem(victim));
            }

            /* remove from unsorted list */
            unsorted_chunks(ms)->bk = ofs(ms,bck);
            bck->fd = ofs(ms,unsorted_chunks(ms));

            /* Take now instead of binning if exact fit */
            if (size == nb) {
                set_inuse_bit_at_offset(victim, size);
                return ofs(ms,chunk2mem(victim));
            }

            /* place chunk in bin */
            size_t victim_index;
            if (in_smallbin_range(size)) {
                victim_index = smallbin_index(size);
                bck = bin_at(ms, victim_index);
                fwd = ptr(ms,bck->fd);
            } else {
                victim_index = largebin_index(size);
                bck = bin_at(ms, victim_index);
                fwd = ptr(ms,bck->fd);

                /* maintain large bins in sorted order */
                if (fwd != bck) {
                    /* Or with inuse bit to speed comparisons */
                    size |= PREV_INUSE;
                    /* if smaller than smallest, bypass loop below */
                    if ((unsigned long) (size) < (unsigned long) ptr(ms,bck->bk)->mchunk_size) {
                        fwd = bck;
                        bck = ptr(ms,bck->bk);
                        victim->fd_nextsize = fwd->fd;
                        victim->bk_nextsize = ptr(ms,fwd->fd)->bk_nextsize;
                        ptr(ms,fwd->fd)->bk_nextsize = ptr(ms,victim->bk_nextsize)->fd_nextsize = ofs(ms,victim);
                    } else {
                        while ((unsigned long) size < fwd->mchunk_size) {
                            fwd = ptr(ms,fwd->fd_nextsize);
                        }
                        if ((unsigned long) size == (unsigned long) fwd->mchunk_size)
                            /* Always insert in the second position. */
                            fwd = ptr(ms,fwd->fd);
                        else {
                            victim->fd_nextsize = ofs(ms,fwd);
                            victim->bk_nextsize = fwd->bk_nextsize;
                            fwd->bk_nextsize = ofs(ms,victim);
                            ptr(ms,victim->bk_nextsize)->fd_nextsize = ofs(ms,victim);
                        }
                        bck = ptr(ms,fwd->bk);
                    }
                } else {
                    victim->fd_nextsize = victim->bk_nextsize = ofs(ms,victim);
                }
            }

            mark_bin(ms, victim_index);
            victim->bk = ofs(ms,bck);
            victim->fd = ofs(ms,fwd);
            fwd->bk = ofs(ms,victim);
            bck->fd = ofs(ms,victim);

#define MAX_ITERS 10000
            if (++iters >= MAX_ITERS)
                break;
        }

        /*
         If a large request, scan through the chunks of current bin in
         sorted order to find smallest that fits.  Use the skip list for this.
        */
        if (!in_smallbin_range(nb)) {
            bin = bin_at(ms, idx);

            /* skip scan if empty or largest chunk is too small */
            if ((victim = ptr(ms,first(bin))) != bin &&
                (unsigned long) victim->mchunk_size >= (unsigned long) (nb)) {
                size_t size;

                victim = ptr(ms,victim->bk_nextsize);
                while (((unsigned long) (size = chunksize(victim)) <
                       (unsigned long) (nb)))
                    victim = ptr(ms,victim->bk_nextsize);

                /* Avoid removing the first entry for a size so that the skip
                   list does not have to be rerouted.  */
                if (victim != ptr(ms,last(bin)) &&
                    victim->mchunk_size == ptr(ms,victim->fd)->mchunk_size)
                  victim = ptr(ms,victim->fd);

                remainder_size = size - nb;
                unlink_chunk(ms, victim);

                /* Exhaust */
                if (remainder_size < MINSIZE) {
                    set_inuse_bit_at_offset(victim, size);
                } else {   /* Split */
                    remainder = chunk_at_offset(victim, nb);

                    /* We cannot assume the unsorted list is empty and therefore
                       have to perform a complete insert here.  */
                    bck = unsorted_chunks(ms);
                    fwd = ptr(ms,bck->fd);
                    remainder->bk = ofs(ms,bck);
                    remainder->fd = ofs(ms,fwd);
                    bck->fd = fwd->bk = ofs(ms,remainder);
                    if (!in_smallbin_range(remainder_size)) {
                        remainder->fd_nextsize = 0;
                        remainder->bk_nextsize = 0;
                    }
                    set_head (victim, nb | PREV_INUSE);
                    set_head (remainder, remainder_size | PREV_INUSE);
                    set_foot (remainder, remainder_size);
                }
                return ofs(ms,chunk2mem(victim));
            }
        }

        /*
         Search for a chunk by scanning bins, starting with next largest
         bin. This search is strictly by best-fit; i.e., the smallest
         (with ties going to approximately the least recently used) chunk
         that fits is selected.
         The bitmap avoids needing to check that most blocks are nonempty.
         The particular case of skipping all bins during warm-up phases
         when no chunks have been returned yet is faster than it might look.
        */

        ++idx;
        bin = bin_at(ms, idx);
        unsigned int block = idx2block(idx);
        unsigned int map = ms->binmap[block];
        unsigned int bit = idx2bit(idx);

        for (;;)
        {
            /* Skip rest of block if there are no more set bits in this block.  */
            if (bit > map || bit == 0) {
                do {
                    if (++block >= BINMAPSIZE) /* out of bins */
                        goto use_top;
                } while ((map = ms->binmap[block]) == 0);
                bin = bin_at(ms, (block << BINMAPSHIFT));
                bit = 1;
            }

            /* Advance to bin with set bit. There must be one. */
            while ((bit & map) == 0) {
                bin = next_bin(bin);
                bit <<= 1;
            }
            /* Inspect the bin. It is likely to be non-empty */
            victim = ptr(ms,last(bin));
            /*  If a false alarm (empty bin), clear the bit. */
            if (victim == bin) {
                ms->binmap[block] = map &= ~bit; /* Write through */
                bin = next_bin(bin);
                bit <<= 1;
            } else {
                size = chunksize(victim);
                /*  We know the first chunk in this bin is big enough to use. */
                remainder_size = size - nb;
                /* unlink */
                unlink_chunk (ms, victim);
                /* Exhaust */
                if (remainder_size < MINSIZE) {
                    set_inuse_bit_at_offset(victim, size);
                } else {   /* Split */
                    remainder = chunk_at_offset(victim, nb);
                    /* We cannot assume the unsorted list is empty and therefore
                       have to perform a complete insert here.  */
                    bck = unsorted_chunks(ms);
                    fwd = ptr(ms,bck->fd);
                    remainder->bk = ofs(ms,bck);
                    remainder->fd = ofs(ms,fwd);
                    bck->fd = fwd->bk = ofs(ms,remainder);

                    /* advertise as last remainder */
                    if (in_smallbin_range(nb))
                        ms->last_remainder = ofs(ms,remainder);
                    if (!in_smallbin_range(remainder_size)) {
                        remainder->fd_nextsize = 0;
                        remainder->bk_nextsize = 0;
                    }
                    set_head (victim, nb | PREV_INUSE);
                    set_head (remainder, remainder_size | PREV_INUSE);
                    set_foot (remainder, remainder_size);
                }
                return ofs(ms,chunk2mem(victim));
            }
        }

    use_top:
      /*
         If large enough, split off the chunk bordering the end of memory
         (held in ms->top). Note that this is in accord with the best-fit
         search rule.  In effect, ms->top is treated as larger (and thus
         less well fitting) than any other available chunk since it can
         be extended to be as large as necessary (up to system
         limitations).
         We require that ms->top always exists (i.e., has size >=
         MINSIZE) after initialization, so if it would otherwise be
         exhausted by current request, it is replenished. (The main
         reason for ensuring it exists is that we may need MINSIZE space
         to put in fenceposts in sysmalloc.)
       */
        victim = ptr(ms,ms->top);
        size = chunksize(victim);

        if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE)) {
            remainder_size = size - nb;
            remainder = chunk_at_offset(victim, nb);
            ms->top = ofs(ms,remainder);
            set_head(victim, nb | PREV_INUSE);
            set_head(remainder, remainder_size | PREV_INUSE);
            return ofs(ms,chunk2mem(victim));
        } else if (ms->have_fastchunks) {
            malloc_consolidate (ms);
            /* restore original bin index */
            if (in_smallbin_range (nb))
                idx = smallbin_index (nb);
            else
                idx = largebin_index (nb);
        } else {  /* Otherwise, relay to handle system-dependent cases */
            size_t page_size  = getpagesize();
            size_t alloc_size =
                ((nb + MINSIZE - size + page_size - 1) / page_size) * page_size;

            size_t new_size =
                ms->file_size + alloc_size;

			resize_map(new_size);

            victim = ptr(ms,ms->top);

            size += alloc_size;

            remainder_size = size - nb;
            remainder = chunk_at_offset(victim, nb);
            ms->top = ofs(ms,remainder);
            set_head(victim, nb | PREV_INUSE);
            set_head(remainder, remainder_size | PREV_INUSE);
            return ofs(ms,chunk2mem(victim));
        }
    }
}

PGF_INTERNAL
object PgfDB::realloc_internal(object oldo, size_t bytes)
{
    if (oldo == 0)
        return malloc_internal(bytes);

    mchunk *newp;            /* chunk to return */
    size_t  newsize;         /* its size */
    mchunk *remainder;       /* extra space at end of newp */
    size_t  remainder_size;  /* its size */

    size_t nb = request2size(bytes);

    mchunk *oldp    = mem2chunk(ptr(ms,oldo));
    size_t oldsize  = chunksize(oldp);
    mchunk *next    = chunk_at_offset(oldp, oldsize);
    size_t nextsize = chunksize(next);

    if (oldsize >= nb) {
        /* already big enough; split below */
        newp = oldp;
        newsize = oldsize;
    } else {
        /* Try to expand forward into top */
        if (ofs(ms,next) == ms->top &&
            (unsigned long) (newsize = oldsize + nextsize) >=
            (unsigned long) (nb + MINSIZE))
        {
            set_head_size(oldp, nb);
            remainder = chunk_at_offset(oldp, nb);
            ms->top = ofs(ms,remainder);
            set_head(remainder, (newsize - nb) | PREV_INUSE);
            return oldo;
        }
        /* Try to expand forward into next chunk;  split off remainder below */
        else if (ofs(ms,next) != ms->top &&
                 !inuse(next) &&
                 (unsigned long) (newsize = oldsize + nextsize) >=
                 (unsigned long) (nb))
        {
            newp = oldp;
            unlink_chunk(ms, next);
        }
        /* allocate, copy, free */
        else
        {
            object newo = malloc_internal(bytes);
            newp = mem2chunk(ptr(ms,newo));
            newsize = chunksize(newp);
            /*
               Avoid copy if newp is next chunk after oldp.
            */
            if (newp == next) {
                newsize += oldsize;
                newp = oldp;
            } else {
                memcpy(ptr(ms,newo), ptr(ms,oldo), oldsize - sizeof(size_t));
                free_internal(oldo);
                return newo;
            }
        }
    }

    /* If possible, free extra space in old or extended chunk */
    assert(newsize >= nb);
    remainder_size = newsize - nb;
    if (remainder_size < MINSIZE) {  /* not enough extra to split off */
        set_head_size(newp, newsize);
        set_inuse_bit_at_offset(newp, newsize);
    } else {  /* split remainder */
        remainder = chunk_at_offset(newp, nb);
        set_head_size(newp, nb);
        set_head(remainder, remainder_size | PREV_INUSE);
        /* Mark remainder as inuse so free() won't complain */
        set_inuse_bit_at_offset(remainder, remainder_size);
        free_internal(ofs(ms,chunk2mem(remainder)));
    }
    return ofs(ms,chunk2mem(newp));
}

PGF_INTERNAL
void PgfDB::free_internal(object o)
{
    size_t size;                 /* its size */
    object *fb;                  /* associated fastbin */
    mchunk *nextchunk;           /* next contiguous chunk */
    size_t nextsize;             /* its size */
    int nextinuse;               /* true if nextchunk is used */
    size_t prevsize;             /* size of previous contiguous chunk */
    mchunk* bck;                 /* misc temp for linking */
    mchunk* fwd;                 /* misc temp for linking */

    mchunk* p = mem2chunk(ptr(ms,o));
    size = chunksize(p);


    /*
      If eligible, place chunk on a fastbin so it can be found
      and used quickly in malloc.
    */
    if ((unsigned long)(size) <= (unsigned long)(DEFAULT_MXFAST)) {
        ms->have_fastchunks = true;
        unsigned int idx = fastbin_index(size);
        fb = &ms->fastbins[idx];
        /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
        p->fd = *fb;
        *fb = ofs(ms,p);
    } else {     /* Consolidate other chunks as they arrive. */
        nextchunk = chunk_at_offset(p, size);
        nextsize = chunksize(nextchunk);
        /* consolidate backward */
        if (!prev_inuse(p)) {
            prevsize = prev_size(p);
            size += prevsize;
            p = chunk_at_offset(p, -((long) prevsize));
            unlink_chunk (ms, p);
        }
        if (nextchunk != ptr(ms,ms->top)) {
            /* get and clear inuse bit */
            nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
            /* consolidate forward */
            if (!nextinuse) {
                unlink_chunk (ms, nextchunk);
                size += nextsize;
            } else
                clear_inuse_bit_at_offset(nextchunk, 0);
            /*
                Place the chunk in unsorted chunk list. Chunks are
                not placed into regular bins until after they have
                been given one chance to be used in malloc.
            */
            bck = unsorted_chunks(ms);
            fwd = ptr(ms,bck->fd);
            p->fd = ofs(ms,fwd);
            p->bk = ofs(ms,bck);
            if (!in_smallbin_range(size)) {
                p->fd_nextsize = 0;
                p->bk_nextsize = 0;
            }
            bck->fd = ofs(ms,p);
            fwd->bk = ofs(ms,p);
            set_head(p, size | PREV_INUSE);
            set_foot(p, size);
        } else {
            /*
                If the chunk borders the current high end of memory,
                consolidate into top
            */

            size += nextsize;
            set_head(p, size | PREV_INUSE);
            ms->top = ofs(ms,p);
        }

        /*
            If freeing a large space, consolidate possibly-surrounding
            chunks. Then, if the total unused topmost memory exceeds trim
            threshold, ask malloc_trim to reduce top.
            Unless max_fast is 0, we don't know if there are fastbins
            bordering top, so we cannot tell for sure whether threshold
            has been reached unless fastbins are consolidated.  But we
            don't want to consolidate on each free.  As a compromise,
            consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
            is reached.
        */
        if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
            if (ms->have_fastchunks)
                malloc_consolidate(ms);
        }
    }
}

PGF_INTERNAL
ref<PgfPGF> PgfDB::revision2pgf(PgfRevision revision)
{
    if (revision <= sizeof(*current_db->ms) || revision >= current_db->ms->top)
        throw pgf_error("Invalid revision");

    mchunk *chunk = mem2chunk(ptr(current_db->ms,revision));
    if (chunksize(chunk) < sizeof(PgfPGF))
        throw pgf_error("Invalid revision");

    ref<PgfPGF> pgf = revision;
    if (chunksize(chunk) - request2size(sizeof(PgfPGF)+pgf->name.size+1) > MINSIZE)
        throw pgf_error("Invalid revision");

    return pgf;
}

PGF_INTERNAL
bool PgfDB::is_persistant_revision(ref<PgfPGF> pgf)
{
    return (pgf->prev == 0 && pgf->next == 0 &&
            current_db->ms->transient_revisions != pgf);
}

PGF_INTERNAL
void PgfDB::link_transient_revision(ref<PgfPGF> pgf)
{
    pgf->next = current_db->ms->transient_revisions;
    if (current_db->ms->transient_revisions != 0)
        current_db->ms->transient_revisions->prev = pgf;
    current_db->ms->transient_revisions = pgf;
}

PGF_INTERNAL
void PgfDB::unlink_transient_revision(ref<PgfPGF> pgf)
{
    if (pgf->next != 0)
        pgf->next->prev = pgf->prev;
    if (pgf->prev != 0)
        pgf->prev->next = pgf->next;
    else if (current_db->ms->transient_revisions == pgf)
        current_db->ms->transient_revisions = pgf->next;
}

PGF_INTERNAL
ref<PgfConcr> PgfDB::revision2concr(PgfConcrRevision revision)
{
    if (revision <= sizeof(*current_db->ms) || revision >= current_db->ms->top)
        throw pgf_error("Invalid revision");

    mchunk *chunk = mem2chunk(ptr(current_db->ms,revision));
    if (chunksize(chunk) < sizeof(PgfConcr))
        throw pgf_error("Invalid revision");

    ref<PgfConcr> concr = revision;
    if (chunksize(chunk) - request2size(sizeof(PgfConcr)+concr->name.size+1) > MINSIZE)
        throw pgf_error("Invalid revision");

    return concr;
}

PGF_INTERNAL
bool PgfDB::is_persistant_revision(ref<PgfConcr> concr)
{
    return (concr->prev == 0 && concr->next == 0 &&
            current_db->ms->transient_concr_revisions != concr);
}

PGF_INTERNAL
void PgfDB::link_transient_revision(ref<PgfConcr> concr)
{
    concr->next = current_db->ms->transient_concr_revisions;
    if (current_db->ms->transient_concr_revisions != 0)
        current_db->ms->transient_concr_revisions->prev = concr;
    current_db->ms->transient_concr_revisions = concr;
}

PGF_INTERNAL
void PgfDB::unlink_transient_revision(ref<PgfConcr> concr)
{
    if (concr->next != 0)
        concr->next->prev = concr->prev;
    if (concr->prev != 0)
        concr->prev->next = concr->next;
    else if (current_db->ms->transient_concr_revisions == concr)
        current_db->ms->transient_concr_revisions = concr->next;
}

PGF_INTERNAL
void PgfDB::sync()
{
    malloc_state *ms = current_db->ms;
    size_t size =
        ms->top + chunksize(ptr(ms,ms->top)) + sizeof(size_t);

    int res;
#ifndef _WIN32
#ifndef MREMAP_MAYMOVE
    if (current_db->fd < 0) {
      res = 0;
    } else
#endif
    res = msync((void *) ms, size, MS_SYNC | MS_INVALIDATE);
    if (res != 0)
        throw pgf_systemerror(errno);
#else
    if (current_db->fd > 0) {
        if (!FlushViewOfFile(ms,size)) {
            throw pgf_systemerror(last_error_to_errno());
        }
    }
#endif
}

#ifdef _WIN32
#define MAX_SPIN 50000

__forceinline __int16 ReaderCount(unsigned __int32 lock)
{
	return lock & 0x00007FFF;
}

__forceinline __int32 SetReaders(unsigned __int32 lock, unsigned __int16 readers)
{
	return (lock & ~0x00007FFF) | readers;
}

__forceinline __int16 WaitingCount(unsigned __int32 lock)
{
	return (__int16) ((lock & 0x3FFF8000) >> 15);
}

__forceinline __int32 SetWaiting(unsigned __int32 lock, unsigned __int16 waiting)
{
	return (lock & ~0x3FFF8000) | (waiting << 15);
}

__forceinline bool Writer(unsigned __int32 lock)
{
	return (lock & 0x40000000) != 0;
}

__forceinline __int32 SetWriter(unsigned __int32 lock, bool writer)
{
	if(writer)
		return lock | 0x40000000;
	else
		return lock & ~0x40000000;
}

__forceinline bool AllClear(unsigned __int32 lock)
{
	return (lock & 0x40007FFF) == 0;
}
#endif

void PgfDB::lock(DB_scope_mode m)
{
#ifndef _WIN32
    int res =
        (m == READER_SCOPE) ? pthread_rwlock_rdlock(rwlock)
                            : pthread_rwlock_wrlock(rwlock);
    if (res != 0)
        throw pgf_systemerror(res);
#else
    for (int i = 0; ; ++i) {
        unsigned __int32 temp = ms->rwlock;
        if (m == READER_SCOPE && !Writer(temp)) {
            if (InterlockedCompareExchange(&ms->rwlock, SetReaders(temp, ReaderCount(temp) + 1), temp) == temp)
                return;
            else
                continue;
        } else if (m == WRITER_SCOPE && AllClear(temp)) {
            if (InterlockedCompareExchange(&ms->rwlock, SetWriter(temp, true), temp) == temp)
                return;
            else
                continue;
        } else {
            if (i < MAX_SPIN) {
                YieldProcessor();
                continue;
            }

            //The pending write operation is taking too long, so we'll drop to the kernel and wait
            if (InterlockedCompareExchange(&ms->rwlock, SetWaiting(temp, WaitingCount(temp) + 1), temp) != temp)
                continue;

            i = 0; //Reset the spincount for the next time
            WaitForSingleObject(hRWEvent, INFINITE);

            do
            {
                temp = ms->rwlock;
            } while (InterlockedCompareExchange(&ms->rwlock, SetWaiting(temp, WaitingCount(temp) - 1), temp) != temp);
        }
    }
#endif

	// If another process has resized the file we must resize the map
	if (mmap_size != ms->file_size)
		resize_map(ms->file_size);
}

void PgfDB::unlock()
{
#ifndef _WIN32
    pthread_rwlock_unlock(rwlock);
#else
    while (true) {
        unsigned __int32 temp = ms->rwlock;
        if (ReaderCount(temp) > 0) {
            if (ReaderCount(temp) == 1 && WaitingCount(temp) != 0) {
                //Note: this isn't nor has to be thread-safe, as the worst a duplicate notification can do
                //is cause a waiting to reader to wake, perform a spinlock, then go back to sleep

                //We're the last reader and there's a pending write
                //Wake one waiting writer
                SetEvent(hRWEvent);
            }

            //Decrement reader count
            if (InterlockedCompareExchange(&ms->rwlock, SetReaders(temp, ReaderCount(temp) - 1), temp) == temp)
                break;
        } else {
            while(true) {
                temp = ms->rwlock;
                assert(Writer(temp));
                if (WaitingCount(temp) == 0)
                    break;

                //Note: this is thread-safe (there's guaranteed not to be another EndWrite simultaneously)
                //Wake all waiting readers or writers, loop until wake confirmation is received
                SetEvent(hRWEvent);
            }

            //Decrement writer count
            if (InterlockedCompareExchange(&ms->rwlock, SetWriter(temp, false), temp) == temp)
                break;
        }
    }
#endif
}

void PgfDB::resize_map(size_t new_size)
{
	malloc_state* new_ms;
#ifndef _WIN32
// OSX do not implement mremap or MREMAP_MAYMOVE
#ifndef MREMAP_MAYMOVE
	if (fd >= 0) {
		size_t old_file_size = ms->file_size;
		if (munmap(ms, mmap_size) == -1)
			throw pgf_systemerror(errno);
		ms = NULL;
		if (old_file_size != new_size) {
			if (ftruncate(fd, new_size) < 0)
				throw pgf_systemerror(errno, filepath);
		}
		new_ms =
			(malloc_state*) mmap(0, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
		if (new_ms == MAP_FAILED)
			throw pgf_systemerror(errno);
	} else {
		new_ms = (malloc_state*) realloc(ms, new_size);
		if (new_ms == NULL)
			throw pgf_systemerror(ENOMEM);
	}
#else
	if (fd >= 0 && ms->file_size != new_size) {
		if (ftruncate(fd, new_size) < 0)
			throw pgf_systemerror(errno, filepath);
	}
	new_ms =
		(malloc_state*) mremap(ms, mmap_size, new_size, MREMAP_MAYMOVE);
	if (new_ms == MAP_FAILED)
		throw pgf_systemerror(errno);
#endif
#else
	if (fd >= 0) {
		UnmapViewOfFile(ms);
		CloseHandle(hMap);
		ms = NULL;

		hMap = CreateFileMapping((HANDLE) _get_osfhandle(fd),
								 NULL,
								 PAGE_READWRITE,
								 HIWORD(new_size), LOWORD(new_size),
								 NULL);
		if (hMap == NULL) {
			hMap = INVALID_HANDLE_VALUE;
			throw pgf_systemerror(last_error_to_errno());
		}

		new_ms = (malloc_state*) MapViewOfFile(hMap,
											   FILE_MAP_WRITE,
											   0,0,new_size);
		if (new_ms == NULL)
			throw pgf_systemerror(last_error_to_errno());
	} else {
		new_ms = (malloc_state*) realloc(ms, new_size);
		if (new_ms == NULL)
			throw pgf_systemerror(ENOMEM);
	}
#endif

	ms = new_ms;
	current_base = (unsigned char*) ms;
	mmap_size = new_size;
	ms->file_size = new_size;
}

DB_scope::DB_scope(PgfDB *db, DB_scope_mode m)
{
    db->lock(m);

    save_db       = current_db;
    current_db    = db;
    current_base  = (unsigned char*) current_db->ms;

    next_scope    = last_db_scope;
    last_db_scope = this;
}

DB_scope::~DB_scope()
{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wterminate"
    current_db->unlock();

    current_db    = save_db;
    current_base  = current_db ? (unsigned char*) current_db->ms
                               : NULL;

    last_db_scope = next_scope;

#pragma GCC diagnostic pop
}