1
0
forked from GitHub/gf-core
Files
gf-core/src/runtime/c/pgf/db.cxx
2024-03-21 16:42:41 +01:00

1965 lines
56 KiB
C++

// #define DEBUG_MEMORY_ALLOCATOR
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include "data.h"
#ifndef _WIN32
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
#include <pthread.h>
#else
#include <time.h>
static
size_t getpagesize()
{
SYSTEM_INFO si;
GetSystemInfo(&si);
return si.dwPageSize;
}
#if defined(_MSC_VER)
#include <io.h>
#define open _open
#define lseek _lseeki64
#define close _close
#include <BaseTsd.h>
#endif
#define ftruncate _chsize
#define getpid GetCurrentProcessId
static
int last_error_to_errno()
{
switch (GetLastError()) {
case ERROR_SUCCESS:
return 0;
case ERROR_OUTOFMEMORY:
return ENOMEM;
case ERROR_HANDLE_DISK_FULL:
return ENOSPC;
default:
return EINVAL;
}
}
#define LODWORD(_qw) ((DWORD)(_qw))
#define HIDWORD(_qw) ((DWORD)(((_qw) >> 32) & 0xffffffff))
#endif
#if defined(_MSC_VER)
PGF_INTERNAL __declspec( thread ) unsigned char* current_base = NULL;
PGF_INTERNAL __declspec( thread ) PgfDB* current_db = NULL;
PGF_INTERNAL __declspec( thread ) DB_scope *last_db_scope = NULL;
#else
PGF_INTERNAL __thread unsigned char* current_base __attribute__((tls_model("initial-exec"))) = NULL;
PGF_INTERNAL __thread PgfDB* current_db __attribute__((tls_model("initial-exec"))) = NULL;
PGF_INTERNAL __thread DB_scope *last_db_scope __attribute__((tls_model("initial-exec"))) = NULL;
#endif
#define ptr(T,o) ((T*) (base + (o)))
/* pad request bytes into a usable size -- internal version */
#define request2size(req) \
(((req) + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
static char slovo[5] = {'S','L','O','V','O'};
typedef struct {
#ifndef _WIN32
pid_t pid;
#else
DWORD pid;
#endif
uint32_t ref_count;
object o;
txn_t txn_id;
} revision_entry;
struct PGF_INTERNAL_DECL block_descr
{
size_t sz; // the size of the binary tree
size_t block_size; // the size of the block
object o; // the block itself
object left;
object right;
object chain; // links descriptors that are not in use
txn_t block_txn_id; // transaction which released the block
txn_t descr_txn_id; // transaction which allocated the descriptor
};
struct PGF_INTERNAL_DECL malloc_state
{
/* Each .ngf file starts with 'SLOVO' as in:
* "V načaloto be slovoto" (In the beginning was the word)
* In this way we detect an attempt to read a non .ngf file.
*/
char sign[5];
/* The current size is used to detect when the file was resized
* by another process. The file size doesn't include the first 4K */
size_t file_size;
#ifndef _WIN32
pthread_mutex_t rev_mutex;
pthread_mutex_t write_mutex;
pthread_rwlock_t rwlock;
pid_t writer_pid;
#else
/* Stores a Reader/Writer lock for Windows */
LONG rwlock;
DWORD writer_pid;
#endif
txn_t curr_txn_id;
txn_t min_txn_id;
/* The top address that is not allocated yet. */
object top;
object free_blocks; // a binary tree of descriptors for free blocks
object free_descriptors;
size_t n_revisions;
object active_revision;
revision_entry revisions[];
};
static inline ssize_t get_mmap_size(size_t init_size, size_t page_size)
{
size_t mmap_size = ((init_size+page_size-1)/page_size)*page_size;
if (mmap_size < page_size*2)
mmap_size = page_size*2;
return mmap_size;
}
PGF_INTERNAL_DECL
size_t get_next_padovan(size_t min);
static inline ssize_t get_mremap_size(size_t file_size, size_t block_size, size_t free_size, size_t page_size)
{
size_t n_pages =
((file_size - free_size + block_size + page_size - 1) / page_size);
return get_next_padovan(n_pages) * page_size ;
}
PGF_INTERNAL
PgfDB::PgfDB(const char* filepath, int flags, int mode, size_t init_size) {
bool is_new = false;
fd = -1;
ms = NULL;
ref_count = 0;
page_size = getpagesize();
pid = getpid();
if (filepath == NULL) {
this->filepath = NULL;
mmap_size = get_mmap_size(init_size, page_size);
is_new = true;
} else {
fd = open(filepath, flags, mode);
if (fd < 0)
throw pgf_systemerror(errno, filepath);
mmap_size = lseek(fd, 0, SEEK_END);
if (mmap_size == ((size_t) -1)) {
int code = errno;
close(fd);
throw pgf_systemerror(code, filepath);
}
is_new = false;
if (mmap_size == 0) {
mmap_size = get_mmap_size(init_size, page_size);
if (ftruncate(fd, mmap_size) < 0) {
int code = errno;
close(fd);
throw pgf_systemerror(code, filepath);
}
is_new = true;
}
this->filepath = strdup(filepath);
if (this->filepath == NULL) {
close(fd);
throw pgf_systemerror(ENOMEM);
}
}
int code = 0;
#ifndef _WIN32
#ifndef MREMAP_MAYMOVE
if (fd >= 0) {
ms = (malloc_state*)
mmap(NULL, mmap_size, PROT_READ, MAP_SHARED, fd, 0);
if (ms == MAP_FAILED) {
code = errno;
ms = NULL; // mark that ms is not created.
base = NULL;
::free((void *) this->filepath);
close(fd);
throw pgf_systemerror(code, filepath);
}
if (mprotect(ms, page_size, PROT_READ | PROT_WRITE) == -1) {
code = errno;
munmap(ms,mmap_size);
::free((void *) this->filepath);
close(fd);
throw pgf_systemerror(code, filepath);
}
mmap_size -= page_size;
base = ((unsigned char *) ms) + page_size;
} else {
ms = (malloc_state*) ::malloc(page_size);
if (ms == NULL)
throw pgf_systemerror(ENOMEM);
mmap_size -= page_size;
base = (unsigned char *) ::malloc(mmap_size);
if (base == NULL)
throw pgf_systemerror(ENOMEM);
}
#else
int mflags = (fd < 0) ? (MAP_PRIVATE | MAP_ANONYMOUS) : MAP_SHARED;
ms = (malloc_state*)
mmap(NULL, mmap_size, PROT_READ, mflags, fd, 0);
if (ms == MAP_FAILED) {
code = errno;
ms = NULL; // mark that ms is not created.
base = NULL;
::free((void *) this->filepath);
close(fd);
throw pgf_systemerror(code, filepath);
}
if (mprotect(ms, page_size, PROT_READ | PROT_WRITE) == -1) {
code = errno;
munmap(ms,mmap_size);
::free((void *) this->filepath);
close(fd);
throw pgf_systemerror(code, filepath);
}
mmap_size -= page_size;
base = ((unsigned char *) ms) + page_size;
#endif
#else
char *rev_mutex_name;
char *write_mutex_name;
char *event_name;
char buf[256];
if (fd >= 0) {
BY_HANDLE_FILE_INFORMATION hInfo;
if (!GetFileInformationByHandle((HANDLE) _get_osfhandle(fd), &hInfo)) {
code = last_error_to_errno();
::free((void *) this->filepath);
close(fd);
throw pgf_systemerror(code);
}
rev_mutex_name = buf;
sprintf(rev_mutex_name,
"gf-rev-mutex-%lx-%lx-%lx",
hInfo.dwVolumeSerialNumber,
hInfo.nFileIndexHigh,
hInfo.nFileIndexLow);
write_mutex_name = rev_mutex_name+strlen(rev_mutex_name+1);
sprintf(write_mutex_name,
"gf-write-mutex-%lx-%lx-%lx",
hInfo.dwVolumeSerialNumber,
hInfo.nFileIndexHigh,
hInfo.nFileIndexLow);
event_name = write_mutex_name+strlen(write_mutex_name+1);
sprintf(event_name,
"gf-rwevent-%lx-%lx-%lx",
hInfo.dwVolumeSerialNumber,
hInfo.nFileIndexHigh,
hInfo.nFileIndexLow);
hMap = CreateFileMapping((HANDLE) _get_osfhandle(fd),
NULL,
PAGE_READWRITE,
HIDWORD(mmap_size), LODWORD(mmap_size),
NULL);
if (hMap != NULL) {
ms = (malloc_state*) MapViewOfFile(hMap,
FILE_MAP_WRITE,
0,0,mmap_size);
if (ms == NULL) {
code = last_error_to_errno();
CloseHandle(hMap);
::free((void *) this->filepath);
close(fd);
throw pgf_systemerror(code, filepath);
}
mmap_size -= page_size;
base = ((unsigned char *) ms) + page_size;
} else {
code = last_error_to_errno();
::free((void *) this->filepath);
close(fd);
throw pgf_systemerror(code, filepath);
}
} else {
rev_mutex_name = NULL;
write_mutex_name = NULL;
event_name = NULL;
hMap = INVALID_HANDLE_VALUE;
ms = (malloc_state*) ::malloc(mmap_size);
if (ms == NULL)
throw pgf_systemerror(ENOMEM);
mmap_size -= page_size;
base = ((unsigned char *) ms) + page_size;
}
hRevMutex = CreateMutex(NULL, FALSE, rev_mutex_name);
if (hRevMutex == NULL) {
code = last_error_to_errno();
if (fd < 0) {
::free(ms);
} else {
UnmapViewOfFile(ms);
CloseHandle(hMap);
}
::free((void *) this->filepath);
close(fd);
throw pgf_systemerror(code, filepath);
}
hWriteMutex = CreateMutex(NULL, FALSE, write_mutex_name);
if (hWriteMutex == NULL) {
CloseHandle(hRevMutex);
code = last_error_to_errno();
if (fd < 0) {
::free(ms);
} else {
UnmapViewOfFile(ms);
CloseHandle(hMap);
}
::free((void *) this->filepath);
close(fd);
throw pgf_systemerror(code, filepath);
}
hRWEvent = CreateEvent(NULL, FALSE, FALSE, event_name);
if (hRWEvent == NULL) {
CloseHandle(hWriteMutex);
CloseHandle(hRevMutex);
if (fd < 0) {
::free(ms);
} else {
UnmapViewOfFile(ms);
CloseHandle(hMap);
}
::free((void *) this->filepath);
close(fd);
throw pgf_systemerror(code, filepath);
}
#endif
if (is_new) {
code = init_state();
if (code != 0) {
#ifndef _WIN32
#ifndef MREMAP_MAYMOVE
if (fd < 0) {
::free(ms);
::free(base);
} else
#endif
munmap(ms,page_size+mmap_size);
#else
if (fd < 0) {
::free(ms);
} else {
UnmapViewOfFile(ms);
CloseHandle(hMap);
}
CloseHandle(hRWEvent);
CloseHandle(hWriteMutex);
CloseHandle(hRevMutex);
#endif
::free((void *) this->filepath);
close(fd);
throw pgf_systemerror(code, filepath);
}
} else {
if (strncmp(ms->sign, slovo, sizeof(ms->sign)) != 0) {
#ifndef _WIN32
#ifndef MREMAP_MAYMOVE
if (fd < 0) {
::free(ms);
::free(base);
} else
#endif
munmap(ms,page_size+mmap_size);
#else
if (fd < 0) {
::free(ms);
} else {
UnmapViewOfFile(ms);
CloseHandle(hMap);
}
CloseHandle(hRWEvent);
CloseHandle(hWriteMutex);
CloseHandle(hRevMutex);
#endif
::free((void *) this->filepath);
close(fd);
throw pgf_error("Invalid file content");
}
cleanup_state();
}
top = ms->top;
free_blocks = ms->free_blocks;
free_descriptors[0] = ms->free_descriptors;
free_descriptors[1] = 0;
free_descriptors[2] = 0;
last_free_block = 0;
last_free_block_size = 0;
last_free_block_txn_id= 0;
seed = (unsigned long int) time(NULL);
transaction_object = 0;
}
PGF_INTERNAL
PgfDB::~PgfDB()
{
#ifndef _WIN32
#ifndef MREMAP_MAYMOVE
if (fd < 0) {
pthread_rwlock_destroy(&ms->rwlock);
pthread_mutex_destroy(&ms->write_mutex);
pthread_mutex_destroy(&ms->rev_mutex);
::free(ms);
::free(base);
} else
#endif
{
if (ms != NULL)
munmap(ms,sizeof(*ms));
if (base != NULL)
munmap(base,mmap_size);
}
#else
if (fd < 0) {
::free(ms);
} else {
UnmapViewOfFile(ms);
CloseHandle(hMap);
}
CloseHandle(hRWEvent);
CloseHandle(hWriteMutex);
CloseHandle(hRevMutex);
#endif
if (fd >= 0)
close(fd);
::free((void*) filepath);
}
PGF_INTERNAL
txn_t PgfDB::get_txn_id() {
return current_db->ms->curr_txn_id;
}
PGF_INTERNAL
object PgfDB::register_revision(object o, txn_t txn_id)
{
#ifndef _WIN32
pthread_mutex_lock(&ms->rev_mutex);
#else
WaitForSingleObject(hRevMutex, INFINITE);
#endif
bool found = false;
revision_entry *found_entry = NULL;
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, "revisions");
#endif
ms->min_txn_id = SIZE_MAX;
for (size_t i = 0; i < ms->n_revisions; i++) {
revision_entry *entry = &ms->revisions[i];
if (entry->ref_count == 0) {
if (found_entry == NULL)
found_entry = entry;
} else {
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, " %ld:%s(%016lx):%ld",
entry->txn_id,
((entry->o & MALLOC_ALIGN_MASK) == PgfPGF::tag) ? "pgf" : "concr",
entry->o & ~MALLOC_ALIGN_MASK,
entry->ref_count);
#endif
if (entry->pid == pid &&
entry->o == o) {
entry->ref_count++;
entry->txn_id = txn_id;
found_entry = entry;
found = true;
}
if (ms->min_txn_id > entry->txn_id)
ms->min_txn_id = entry->txn_id;
}
}
if (!found) {
if (found_entry == NULL) {
size_t n_max = (page_size-sizeof(malloc_state))/sizeof(revision_entry);
if (ms->n_revisions >= n_max) {
#ifndef _WIN32
pthread_mutex_unlock(&ms->rev_mutex);
#else
ReleaseMutex(hRevMutex);
#endif
throw pgf_error("Too many retained database revisions");
}
found_entry = &ms->revisions[ms->n_revisions++];
}
found_entry->pid = pid;
found_entry->o = o;
found_entry->ref_count = 1;
found_entry->txn_id = txn_id;
if (ms->min_txn_id > txn_id)
ms->min_txn_id = txn_id;
}
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, " minimal %ld\n", ms->min_txn_id);
#endif
#ifndef _WIN32
pthread_mutex_unlock(&ms->rev_mutex);
#else
ReleaseMutex(hRevMutex);
#endif
return (found_entry - ms->revisions) + 1;
}
PGF_INTERNAL
void PgfDB::unregister_revision(object revision)
{
if (revision == 0 || revision-1 >= ms->n_revisions)
throw pgf_error("Invalid revision");
revision_entry *entry = &ms->revisions[revision-1];
if (entry->ref_count == 0)
throw pgf_error("Invalid revision");
#ifndef _WIN32
pthread_mutex_lock(&ms->rev_mutex);
#else
WaitForSingleObject(hRevMutex, INFINITE);
#endif
if (--entry->ref_count == 0) {
// Maybe this was the last revision in the list.
// Decrement n_revisions if possible.
while (ms->revisions[ms->n_revisions-1].ref_count == 0) {
ms->n_revisions--;
if (ms->n_revisions == 0)
break;
}
}
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, "revisions");
#endif
ms->min_txn_id = SIZE_MAX;
for (size_t i = 0; i < ms->n_revisions; i++) {
revision_entry *entry = &ms->revisions[i];
if (entry->ref_count > 0) {
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, " %ld:%s(%016lx):%ld",
entry->txn_id,
((entry->o & MALLOC_ALIGN_MASK) == PgfPGF::tag) ? "pgf" : "concr",
entry->o & ~MALLOC_ALIGN_MASK,
entry->ref_count);
#endif
if (ms->min_txn_id > entry->txn_id)
ms->min_txn_id = entry->txn_id;
}
}
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, " minimal %ld\n", ms->min_txn_id);
#endif
#ifndef _WIN32
pthread_mutex_unlock(&ms->rev_mutex);
#else
ReleaseMutex(hRevMutex);
#endif
}
void PgfDB::cleanup_state()
{
#ifndef _WIN32
pthread_mutex_lock(&ms->rev_mutex);
#else
WaitForSingleObject(hRevMutex, INFINITE);
#endif
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, "revisions");
#endif
size_t n_revisions = 0;
txn_t min_txn_id = SIZE_MAX;
bool dead_writer = false;
// If there are dead processes, set their reference counts to 0.
for (size_t i = 0; i < ms->n_revisions; i++) {
revision_entry *entry = &ms->revisions[i];
if (entry->ref_count > 0) {
#ifndef _WIN32
char proc_file[32];
sprintf(proc_file, "/proc/%d", entry->pid);
bool alive = (access(proc_file, F_OK) == 0);
#else
HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION,
FALSE,entry->pid);
DWORD dwExitCode = STILL_ACTIVE;
if (hProcess != NULL)
GetExitCodeProcess(hProcess,&dwExitCode);
bool alive = (dwExitCode == STILL_ACTIVE);
CloseHandle(hProcess);
#endif
if (alive) {
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, " %ld:%s(%016lx):%ld",
entry->txn_id,
((entry->o & MALLOC_ALIGN_MASK) == PgfPGF::tag) ? "pgf" : "concr",
entry->o & ~MALLOC_ALIGN_MASK,
entry->ref_count);
#endif
if (min_txn_id > entry->txn_id)
min_txn_id = entry->txn_id;
n_revisions = (i+1);
} else {
entry->ref_count = 0;
if (entry->pid == ms->writer_pid) {
dead_writer = true;
}
}
}
}
ms->n_revisions = n_revisions;
ms->min_txn_id = min_txn_id;
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, " minimal %ld\n", ms->min_txn_id);
#endif
if (dead_writer) {
// The writer has died, cleanup the writer mutex
init_write_mutex();
}
if (ms->n_revisions == 0) {
// This is the only process that has access to the database,
// time to cleanup the rwlock
init_rwlock_mutex();
}
#ifndef _WIN32
pthread_mutex_unlock(&ms->rev_mutex);
#else
ReleaseMutex(hRevMutex);
#endif
}
PGF_INTERNAL
object PgfDB::get_active_revision()
{
return current_db->ms->active_revision;
}
PGF_INTERNAL
int PgfDB::init_rev_mutex()
{
#ifndef _WIN32
int res;
pthread_mutexattr_t attr;
if ((res = pthread_mutexattr_init(&attr)) != 0) {
return res;
}
if (fd >= 0 &&
(res = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) != 0) {
pthread_mutexattr_destroy(&attr);
return res;
}
if ((res = pthread_mutex_init(&ms->rev_mutex, &attr)) != 0) {
pthread_mutexattr_destroy(&attr);
return res;
}
pthread_mutexattr_destroy(&attr);
#endif
return 0;
}
PGF_INTERNAL
int PgfDB::init_write_mutex()
{
#ifndef _WIN32
int res;
pthread_mutexattr_t attr;
if ((res = pthread_mutexattr_init(&attr)) != 0) {
return res;
}
if (fd >= 0 &&
(res = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) != 0) {
pthread_mutexattr_destroy(&attr);
return res;
}
if ((res = pthread_mutex_init(&ms->write_mutex, &attr)) != 0) {
pthread_mutexattr_destroy(&attr);
return res;
}
pthread_mutexattr_destroy(&attr);
#endif
ms->writer_pid = 0;
return 0;
}
PGF_INTERNAL
int PgfDB::init_rwlock_mutex()
{
#ifdef _WIN32
ms->rwlock = 0;
#else
int res;
pthread_rwlockattr_t attr;
if ((res = pthread_rwlockattr_init(&attr)) != 0) {
return res;
}
if (fd >= 0 &&
(res = pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) != 0) {
pthread_rwlockattr_destroy(&attr);
return res;
}
if ((res = pthread_rwlock_init(&ms->rwlock, &attr)) != 0) {
pthread_rwlockattr_destroy(&attr);
return res;
}
pthread_rwlockattr_destroy(&attr);
#endif
return 0;
}
PGF_INTERNAL
int PgfDB::init_state()
{
memcpy(ms->sign, slovo, sizeof(ms->sign));
ms->top = request2size(1); // we don't want to start from 0
ms->file_size = mmap_size;
int res;
if ((res = init_rev_mutex()) != 0)
return res;
if ((res = init_write_mutex()) != 0)
return res;
if ((res = init_rwlock_mutex()) != 0)
return res;
ms->curr_txn_id = 1;
ms->min_txn_id = 1;
ms->free_blocks = 0;
ms->free_descriptors = 0;
ms->n_revisions = 0;
return 0;
}
PGF_INTERNAL
size_t PgfDB::block_descr_size(object map)
{
if (map == 0)
return 0;
return ptr(block_descr, map)->sz;
}
PGF_INTERNAL_DECL object PgfDB::new_block_descr(object o, size_t size, txn_t txn_id)
{
object odescr;
block_descr *descr;
if (free_descriptors[0] != 0) {
odescr = free_descriptors[0];
descr = ptr(block_descr, odescr);
free_descriptors[0] = descr->chain;
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, "recycled block descriptor %016lx\n", odescr);
#endif
} else {
size_t block_size = request2size(sizeof(block_descr));
size_t free_size = mmap_size - top;
if (block_size > free_size) {
size_t new_size =
get_mremap_size(ms->file_size, block_size, free_size, page_size);
resize_map(new_size, true);
}
odescr = top;
top += block_size;
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, "allocated new block descriptor %016lx\n", odescr);
#endif
descr = ptr(block_descr, odescr);
}
descr->sz = 1;
descr->left = 0;
descr->right = 0;
descr->chain = 0;
descr->o = o;
descr->block_size = size;
descr->block_txn_id = txn_id;
descr->descr_txn_id = ms->curr_txn_id;
return odescr;
}
PGF_INTERNAL
object PgfDB::upd_block_descr(object map, object left, object right)
{
block_descr *descr = ptr(block_descr, map);
if (descr->descr_txn_id != ms->curr_txn_id) {
block_descr *new_descr;
descr->chain = free_descriptors[1];
free_descriptors[1] = map;
if (free_descriptors[2] == 0)
free_descriptors[2] = map;
if (free_descriptors[0] != 0) {
map = free_descriptors[0];
new_descr = ptr(block_descr, map);
free_descriptors[0] = new_descr->chain;
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, "recycled block descriptor %016lx\n", map);
#endif
} else {
size_t block_size = request2size(sizeof(block_descr));
size_t free_size = mmap_size - top;
if (block_size > free_size) {
size_t new_size =
get_mremap_size(ms->file_size, block_size, free_size, page_size);
resize_map(new_size, true);
// refresh the pointer
descr = ptr(block_descr, map);
}
map = top;
top += block_size;
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, "allocated new block descriptor %016lx\n", map);
#endif
new_descr = ptr(block_descr, map);
}
new_descr->block_size = descr->block_size;
new_descr->o = descr->o;
new_descr->chain = 0;
new_descr->block_txn_id = descr->block_txn_id;
new_descr->descr_txn_id = ms->curr_txn_id;
descr = new_descr;
}
descr->sz = 1+block_descr_size(left)+block_descr_size(right);
descr->left = left;
descr->right = right;
return map;
}
PGF_INTERNAL
object PgfDB::balanceL_block_descriptor(object map)
{
block_descr *descr = ptr(block_descr, map);
if (descr->right == 0) {
if (descr->left == 0) {
return map;
} else {
block_descr *left = ptr(block_descr, descr->left);
if (left->left == 0) {
if (left->right == 0) {
return map;
} else {
object left_right = left->right;
object left = upd_block_descr(descr->left,0,0);
object right = upd_block_descr(map,0,0);
return upd_block_descr(left_right,
left,
right);
}
} else {
if (left->right == 0) {
object left_left = left->left;
object left = descr->left;
object right = upd_block_descr(map,0,0);
return upd_block_descr(left,
left_left,
right);
} else {
object left_left = left->left;
object left_right = left->right;
if (ptr(block_descr, left_right)->sz < 2 * ptr(block_descr, left_left)->sz) {
object left = descr->left;
object right =
upd_block_descr(map,
left_right,
0);
return upd_block_descr(left,
left_left,
right);
} else {
object left =
upd_block_descr(descr->left,
left_left,
ptr(block_descr, left_right)->left);
object right =
upd_block_descr(map,
ptr(block_descr, left_right)->right,
0);
return upd_block_descr(left_right,
left,
right);
}
}
}
}
} else {
if (descr->left == 0) {
return map;
} else {
if (ptr(block_descr, descr->left)->sz > 3*ptr(block_descr, descr->right)->sz) {
block_descr *left = ptr(block_descr, descr->left);
object left_left = left->left;
object left_right = left->right;
if (ptr(block_descr, left_right)->sz < 2*ptr(block_descr, left_left)->sz) {
object left = descr->left;
object right =
upd_block_descr(map,
left_right,
descr->right);
return upd_block_descr(left,
left_left,
right);
} else {
object left =
upd_block_descr(descr->left,
left_left,
ptr(block_descr, left_right)->left);
object right =
upd_block_descr(map,
ptr(block_descr, left_right)->right,
ptr(block_descr, map)->right);
return upd_block_descr(left_right,
left,
right);
}
} else {
return map;
}
}
}
}
PGF_INTERNAL
object PgfDB::balanceR_block_descriptor(object map)
{
block_descr *descr = ptr(block_descr, map);
if (descr->left == 0) {
if (descr->right == 0) {
return map;
} else {
block_descr *right = ptr(block_descr, descr->right);
if (right->left == 0) {
if (right->right == 0) {
return map;
} else {
object right_right = right->right;
object right = descr->right;
object left =
upd_block_descr(map,
0,
0);
return upd_block_descr(right,
left,
right_right);
}
} else {
if (right->right == 0) {
object right_left = right->left;
object right =
upd_block_descr(descr->right,0,0);
object left =
upd_block_descr(map,0,0);
return upd_block_descr(right_left,
left,
right);
} else {
object right_left = right->left;
object right_right = right->right;
if (ptr(block_descr,right_left)->sz < 2 * ptr(block_descr,right_right)->sz) {
object right = descr->right;
object left =
upd_block_descr(map,
0,
right_left);
return upd_block_descr(right,
left,
right_right);
} else {
object right =
upd_block_descr(descr->right,
ptr(block_descr,right_left)->right,
right_right);
object left =
upd_block_descr(map,
0,
ptr(block_descr,right_left)->left);
return upd_block_descr(right_left,
left,
right);
}
}
}
}
} else {
if (descr->right == 0) {
return map;
} else {
if (ptr(block_descr,descr->right)->sz > 3*ptr(block_descr,descr->left)->sz) {
block_descr *right = ptr(block_descr,descr->right);
object right_left = right->left;
object right_right = right->right;
if (ptr(block_descr,right_left)->sz < 2*ptr(block_descr,right_right)->sz) {
object right = descr->right;
object left =
upd_block_descr(map,
descr->left,
right_left);
return upd_block_descr(right,
left,
right_right);
} else {
object right =
upd_block_descr(descr->right,
ptr(block_descr,right_left)->right,
right_right);
object left =
upd_block_descr(map,
descr->left,
ptr(block_descr,right_left)->left);
return upd_block_descr(right_left,
left,
right);
}
} else {
return map;
}
}
}
}
PGF_INTERNAL
object PgfDB::pop_first_block_descriptor(object map, object *res)
{
block_descr *descr = ptr(block_descr, map);
if (map == 0) {
return 0;
} else if (descr->left == 0) {
*res = map;
return descr->right;
} else {
object right = descr->right;
object left = pop_first_block_descriptor(descr->left, res);
map = upd_block_descr(map, left, right);
return balanceR_block_descriptor(map);
}
}
PGF_INTERNAL
object PgfDB::pop_last_block_descriptor(object map, object *res)
{
block_descr *descr = ptr(block_descr, map);
if (map == 0) {
return 0;
} else if (descr->right == 0) {
*res = map;
return descr->left;
} else {
object left = descr->left;
object right = pop_last_block_descriptor(descr->right, res);
map = upd_block_descr(map, left, right);
return balanceL_block_descriptor(map);
}
}
PGF_INTERNAL
object PgfDB::delete_block_descriptor(object map, object *po, size_t *psize, txn_t *ptxn_id)
{
if (map == 0) {
*po = 0;
return 0;
}
block_descr *descr = ptr(block_descr, map);
int cmp = (*psize < descr->block_size) ? -1
: (*psize > descr->block_size) ? 1
: (ms->min_txn_id < descr->block_txn_id) ? -1
: 0;
if (cmp < 0) {
object right = descr->right;
object left = delete_block_descriptor(descr->left, po, psize, ptxn_id);
if (*po != 0) {
map = upd_block_descr(map,left,right);
map = balanceR_block_descriptor(map);
} else {
descr = ptr(block_descr, map);
if (descr->block_txn_id < ms->min_txn_id) {
*psize = descr->block_size;
goto fit;
}
}
return map;
} else if (cmp > 0) {
object left = descr->left;
object right = delete_block_descriptor(descr->right, po, psize, ptxn_id);
if (*po != 0) {
map = upd_block_descr(map,left,right);
map = balanceL_block_descriptor(map);
}
return map;
} else {
fit:
*po = descr->o;
*ptxn_id = descr->block_txn_id;
object new_map;
if (descr->left == 0) {
new_map = descr->right;
} else if (descr->right == 0) {
new_map = descr->left;
} else {
if (ptr(block_descr,descr->left)->sz > ptr(block_descr,descr->right)->sz) {
object right = descr->right;
object left = pop_last_block_descriptor(descr->left, &new_map);
new_map = upd_block_descr(new_map, left, right);
new_map = balanceR_block_descriptor(new_map);
} else {
object left = descr->left;
object right = pop_first_block_descriptor(descr->right, &new_map);
new_map = upd_block_descr(new_map, left, right);
new_map = balanceL_block_descriptor(new_map);
}
}
descr = ptr(block_descr, map);
int index = (descr->descr_txn_id != ms->curr_txn_id);
descr->chain = free_descriptors[index];
free_descriptors[index] = map;
if (index == 1 && free_descriptors[2] == 0)
free_descriptors[2] = map;
return new_map;
}
}
#ifdef DEBUG_MEMORY_ALLOCATOR
PGF_INTERNAL void PgfDB::dump_free_blocks(object map)
{
if (map == 0) {
fprintf(stderr, ".");
return;
}
block_descr *descr = ptr(block_descr, map);
if (descr->left != 0 || descr->right != 0) {
fprintf(stderr, "(");
dump_free_blocks(descr->left);
fprintf(stderr, " ");
}
fprintf(stderr, "[%016lx %ld %ld]", descr->o, descr->block_size, descr->block_txn_id);
if (descr->left != 0 || descr->right != 0) {
fprintf(stderr, " ");
dump_free_blocks(descr->right);
fprintf(stderr, ")");
}
}
#endif
PGF_INTERNAL
object PgfDB::malloc_internal(size_t bytes)
{
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, "malloc_internal(%ld)\n", bytes);
#endif
size_t block_size = request2size(bytes);
object o;
if (last_free_block != 0 &&
last_free_block_size >= block_size &&
last_free_block_txn_id < ms->min_txn_id) {
/* We can grab a piece of the last free block */
o = last_free_block;
last_free_block += block_size;
last_free_block_size -= block_size;
if (last_free_block_size == 0) {
last_free_block = 0;
last_free_block_txn_id = 0;
}
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, "grabbed from the last block %016lx %ld\n", o, block_size);
#endif
return o;
}
txn_t txn_id;
size_t alloc_size = block_size;
free_blocks = delete_block_descriptor(free_blocks, &o, &alloc_size, &txn_id);
if (o != 0) {
if (alloc_size > block_size) {
if (last_free_block != 0) {
free_blocks = insert_block_descriptor(free_blocks,
last_free_block,
last_free_block_size,
last_free_block_txn_id);
}
last_free_block = o+block_size;
last_free_block_size = alloc_size-block_size;
last_free_block_txn_id = txn_id;
}
#ifdef DEBUG_MEMORY_ALLOCATOR
dump_free_blocks(free_blocks);
fprintf(stderr, "\n");
fprintf(stderr, "recycled %016lx\n", o);
#endif
return o;
}
size_t free_size = mmap_size - top;
if (block_size > free_size) {
size_t new_size =
get_mremap_size(ms->file_size, block_size, free_size, page_size);
resize_map(new_size, true);
}
o = top;
top += block_size;
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, "allocated from top %016lx\n", o);
#endif
return o;
}
PGF_INTERNAL
object PgfDB::realloc_internal(object oldo, size_t old_bytes, size_t new_bytes, txn_t txn_id)
{
if (oldo == 0)
return malloc_internal(new_bytes);
size_t old_nb = request2size(old_bytes);
size_t new_nb = request2size(new_bytes);
if (txn_id == ms->curr_txn_id) {
if (old_nb == new_nb)
return oldo;
if (oldo + old_nb == top) {
ssize_t nb = new_nb-old_nb;
ssize_t free_size = mmap_size - top;
if (nb > free_size) {
size_t new_size =
get_mremap_size(ms->file_size, nb, free_size, page_size);
resize_map(new_size, true);
}
// If the object is at the end of the allocation area
top += nb;
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, "realloc_internal(%016lx,%ld,%ld)\n", oldo, old_bytes, new_bytes);
#endif
return oldo;
}
}
object newo = malloc_internal(new_bytes);
memcpy(base+newo, base+oldo, (old_bytes < new_bytes) ? old_bytes : new_bytes);
free_internal(oldo, old_bytes);
return newo;
}
PGF_INTERNAL
object PgfDB::insert_block_descriptor(object map, object o, size_t size, txn_t txn_id)
{
if (map == 0)
return new_block_descr(o, size, txn_id);
block_descr *descr = ptr(block_descr, map);
int cmp = (size < descr->block_size) ? -1
: (size > descr->block_size) ? 1
: (txn_id < descr->block_txn_id) ? -1
: (txn_id > descr->block_txn_id) ? 1
: ptr(block_descr,descr->left)->sz < ptr(block_descr,descr->right)->sz ? -1
: 1;
if (cmp < 0) {
object right = descr->right;
object left = insert_block_descriptor(descr->left, o, size, txn_id);
map = upd_block_descr(map,left,right);
return balanceL_block_descriptor(map);
} else {
object left = descr->left;
object right = insert_block_descriptor(descr->right, o, size, txn_id);
map = upd_block_descr(map,left,right);
return balanceR_block_descriptor(map);
}
}
PGF_INTERNAL
void PgfDB::free_internal(object o, size_t bytes)
{
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, "free_internal(%016lx,%ld)\n", o, bytes);
#endif
if (o == 0)
return;
size_t block_size = request2size(bytes);
txn_t txn_id = (o >= ms->top) ? 0 : ms->curr_txn_id;
if (txn_id == 0 && o+block_size == top) {
// The block has been allocated in the current transaction
// and it is the last just before the top area. We can
// simply decrement top;
top -= block_size;
if (last_free_block_txn_id == 0 &&
last_free_block != 0 &&
last_free_block+last_free_block_size == top) {
// After the merge of the current block, the last one is
// also on the border with top.
top -= last_free_block_size;
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, "merged with last free block and top %016lx\n", top);
#endif
last_free_block = 0;
last_free_block_size = 0;
last_free_block_txn_id = 0;
} else {
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, "merged with top %016lx\n", top);
#endif
}
return;
}
if (last_free_block != 0) {
if (last_free_block_txn_id == txn_id && last_free_block+last_free_block_size == o) {
last_free_block_size += block_size;
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, "merged block %016lx %ld\n", last_free_block, last_free_block_size);
#endif
return;
} else if (txn_id == last_free_block_txn_id && o+block_size == last_free_block) {
last_free_block = o;
last_free_block_size += block_size;
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, "merged block %016lx %ld\n", last_free_block, last_free_block_size);
#endif
return;
} else {
free_blocks = insert_block_descriptor(free_blocks,
last_free_block,
last_free_block_size,
last_free_block_txn_id);
}
}
last_free_block = o;
last_free_block_size = block_size;
last_free_block_txn_id = txn_id;
#ifdef DEBUG_MEMORY_ALLOCATOR
fprintf(stderr, "last free block %016lx (size: %ld, txn_id: %ld)\n",
last_free_block,
last_free_block_size,
last_free_block_txn_id);
dump_free_blocks(free_blocks);
fprintf(stderr, "\n");
#endif
}
PGF_INTERNAL
ref<PgfPGF> PgfDB::revision2pgf(PgfRevision revision, size_t *p_txn_id)
{
if (revision == 0 || revision-1 >= ms->n_revisions)
throw pgf_error("Invalid revision");
revision_entry *entry = &ms->revisions[revision-1];
if (entry->ref_count == 0)
throw pgf_error("Invalid revision");
if (ref<PgfPGF>::get_tag(entry->o) != PgfPGF::tag)
throw pgf_error("Invalid revision");
ref<PgfPGF> pgf = ref<PgfPGF>::untagged(entry->o);
if (pgf.as_object() >= top)
throw pgf_error("Invalid revision");
if (p_txn_id != NULL)
*p_txn_id = entry->txn_id;
return pgf;
}
PGF_INTERNAL
ref<PgfConcr> PgfDB::revision2concr(PgfConcrRevision revision, size_t *p_txn_id)
{
if (revision == 0 || revision-1 >= ms->n_revisions)
throw pgf_error("Invalid revision");
revision_entry *entry = &ms->revisions[revision-1];
if (entry->ref_count == 0)
throw pgf_error("Invalid revision");
if (ref<PgfPGF>::get_tag(entry->o) != PgfConcr::tag)
throw pgf_error("Invalid revision");
ref<PgfConcr> concr = ref<PgfConcr>::untagged(entry->o);
if (concr.as_object() >= top)
throw pgf_error("Invalid revision");
if (p_txn_id != NULL)
*p_txn_id = entry->txn_id;
return concr;
}
PGF_INTERNAL
void PgfDB::start_transaction()
{
#ifndef _WIN32
pthread_mutex_lock(&ms->write_mutex);
if (
#ifndef MREMAP_MAYMOVE
fd >= 0 &&
#endif
mprotect(base, mmap_size, PROT_READ | PROT_WRITE) == -1)
throw pgf_systemerror(errno);
#else
WaitForSingleObject(hWriteMutex, INFINITE);
#endif
ms->writer_pid = getpid();
top = ms->top;
free_blocks = ms->free_blocks;
free_descriptors[0] = ms->free_descriptors;
free_descriptors[1] = 0;
free_descriptors[2] = 0;
last_free_block = 0;
last_free_block_size = 0;
last_free_block_txn_id = 0;
}
PGF_INTERNAL
void PgfDB::set_transaction_object(object o)
{
transaction_object = o;
}
PGF_INTERNAL
void PgfDB::commit(object o)
{
if (transaction_object != o) {
return;
}
if (last_free_block != 0) {
free_blocks = insert_block_descriptor(free_blocks,
last_free_block,
last_free_block_size,
last_free_block_txn_id);
last_free_block = 0;
last_free_block_size = 0;
last_free_block_txn_id = 0;
}
malloc_state *ms = current_db->ms;
object save_top = ms->top;
object save_free_blocks = ms->free_blocks;
object save_free_descriptors = ms->free_descriptors;
object save_active_revision = ms->active_revision;
#ifndef _WIN32
int res;
#ifndef MREMAP_MAYMOVE
if (fd < 0) {
ms->active_revision = o;
ms->top = top;
ms->free_blocks = free_blocks;
if (free_descriptors[2] != 0) {
ptr(block_descr,free_descriptors[2])->chain = free_descriptors[0];
free_descriptors[0] = free_descriptors[1];
free_descriptors[1] = 0;
free_descriptors[2] = 0;
}
ms->free_descriptors = free_descriptors[0];
ms->curr_txn_id++;
res = 0;
} else {
#endif
if (free_descriptors[2] != 0) {
ptr(block_descr,free_descriptors[2])->chain = free_descriptors[0];
free_descriptors[0] = free_descriptors[1];
free_descriptors[1] = 0;
free_descriptors[2] = 0;
}
res = msync((void *) base, mmap_size, MS_SYNC | MS_INVALIDATE);
if (res != 0)
throw pgf_systemerror(errno);
ms->active_revision = o;
ms->top = top;
ms->free_blocks = free_blocks;
ms->free_descriptors = free_descriptors[0];
ms->curr_txn_id++;
if (mprotect(base, mmap_size, PROT_READ) == -1)
throw pgf_systemerror(errno);
res = msync((void *) ms, page_size, MS_SYNC | MS_INVALIDATE);
if (res != 0) {
ms->active_revision = save_active_revision;
ms->top = save_top;
ms->free_blocks = save_free_blocks;
ms->free_descriptors = save_free_descriptors;
ms->curr_txn_id--;
throw pgf_systemerror(errno);
}
#ifndef MREMAP_MAYMOVE
}
#endif
ms->writer_pid = 0;
pthread_mutex_unlock(&ms->write_mutex);
#else
if (free_descriptors[2] != 0) {
ptr(block_descr,free_descriptors[2])->chain = free_descriptors[0];
free_descriptors[0] = free_descriptors[1];
free_descriptors[1] = 0;
free_descriptors[2] = 0;
}
if (fd > 0) {
if (!FlushViewOfFile(base,mmap_size)) {
throw pgf_systemerror(last_error_to_errno());
}
ms->active_revision = o;
ms->top = top;
ms->free_blocks = free_blocks;
ms->free_descriptors = free_descriptors[0];
ms->curr_txn_id++;
if (!FlushViewOfFile(ms,page_size)) {
ms->active_revision = save_active_revision;
ms->top = save_top;
ms->free_blocks = save_free_blocks;
ms->free_descriptors = save_free_descriptors;
ms->curr_txn_id--;
throw pgf_systemerror(last_error_to_errno());
}
} else {
ms->active_revision = o;
ms->top = top;
ms->free_blocks = free_blocks;
ms->free_descriptors = free_descriptors[0];
ms->curr_txn_id++;
}
ms->writer_pid = 0;
ReleaseMutex(hWriteMutex);
#endif
transaction_object = 0;
}
PGF_INTERNAL
void PgfDB::rollback(object o)
{
if (transaction_object != o) {
return;
}
top = ms->top;
transaction_object = 0;
free_blocks = ms->free_blocks;
free_descriptors[0] = ms->free_descriptors;
free_descriptors[1] = 0;
free_descriptors[2] = 0;
last_free_block = 0;
last_free_block_size = 0;
last_free_block_txn_id = 0;
ms->writer_pid = 0;
#ifndef _WIN32
if (
#ifndef MREMAP_MAYMOVE
fd >= 0 &&
#endif
mprotect(base, mmap_size, PROT_READ) == -1)
throw pgf_systemerror(errno);
pthread_mutex_unlock(&ms->write_mutex);
#else
ReleaseMutex(hWriteMutex);
#endif
}
#ifdef _WIN32
#define MAX_SPIN 50000
__forceinline __int16 ReaderCount(unsigned __int32 lock)
{
return lock & 0x00007FFF;
}
__forceinline __int32 SetReaders(unsigned __int32 lock, unsigned __int16 readers)
{
return (lock & ~0x00007FFF) | readers;
}
__forceinline __int16 WaitingCount(unsigned __int32 lock)
{
return (__int16) ((lock & 0x3FFF8000) >> 15);
}
__forceinline __int32 SetWaiting(unsigned __int32 lock, unsigned __int16 waiting)
{
return (lock & ~0x3FFF8000) | (waiting << 15);
}
__forceinline bool Writer(unsigned __int32 lock)
{
return (lock & 0x40000000) != 0;
}
__forceinline __int32 SetWriter(unsigned __int32 lock, bool writer)
{
if(writer)
return lock | 0x40000000;
else
return lock & ~0x40000000;
}
__forceinline bool AllClear(unsigned __int32 lock)
{
return (lock & 0x40007FFF) == 0;
}
#endif
void PgfDB::lock(DB_scope_mode m)
{
// If another process has resized the file we must resize the map
if (mmap_size != ms->file_size)
resize_map(ms->file_size, m == WRITER_SCOPE);
if (m == READER_SCOPE) {
#ifndef _WIN32
int res = pthread_rwlock_rdlock(&ms->rwlock);
if (res != 0)
throw pgf_systemerror(res);
#else
for (int i = 0; ; ++i) {
unsigned __int32 temp = ms->rwlock;
if (!Writer(temp)) {
if (InterlockedCompareExchange(&ms->rwlock, SetReaders(temp, ReaderCount(temp) + 1), temp) == temp)
return;
else
continue;
} else {
if (i < MAX_SPIN) {
YieldProcessor();
continue;
}
//The pending write operation is taking too long, so we'll drop to the kernel and wait
if (InterlockedCompareExchange(&ms->rwlock, SetWaiting(temp, WaitingCount(temp) + 1), temp) != temp)
continue;
i = 0; //Reset the spincount for the next time
WaitForSingleObject(hRWEvent, INFINITE);
do
{
temp = ms->rwlock;
} while (InterlockedCompareExchange(&ms->rwlock, SetWaiting(temp, WaitingCount(temp) - 1), temp) != temp);
}
}
#endif
}
}
void PgfDB::unlock()
{
#ifndef _WIN32
pthread_rwlock_unlock(&ms->rwlock);
#else
while (true) {
unsigned __int32 temp = ms->rwlock;
if (ReaderCount(temp) > 0) {
if (ReaderCount(temp) == 1 && WaitingCount(temp) != 0) {
//Note: this isn't nor has to be thread-safe, as the worst a duplicate notification can do
//is cause a waiting to reader to wake, perform a spinlock, then go back to sleep
//We're the last reader and there's a pending write
//Wake one waiting writer
SetEvent(hRWEvent);
}
//Decrement reader count
if (InterlockedCompareExchange(&ms->rwlock, SetReaders(temp, ReaderCount(temp) - 1), temp) == temp)
break;
} else {
}
}
#endif
}
void PgfDB::resize_map(size_t new_size, bool writeable)
{
#ifndef _WIN32
int res;
unsigned char* new_base;
if ((res = pthread_rwlock_wrlock(&ms->rwlock)) != 0)
throw pgf_systemerror(res);
// OSX does not implement mremap or MREMAP_MAYMOVE
#ifndef MREMAP_MAYMOVE
if (fd >= 0) {
if (munmap(base, mmap_size) == -1)
throw pgf_systemerror(errno);
base = NULL;
if (ms->file_size != new_size) {
if (ftruncate(fd, page_size+new_size) < 0)
throw pgf_systemerror(errno, filepath);
}
int prot = writeable ? PROT_READ | PROT_WRITE : PROT_READ;
new_base =
(unsigned char *) mmap(0, new_size, prot, MAP_SHARED, fd, page_size);
if (new_base == MAP_FAILED)
throw pgf_systemerror(errno);
} else {
new_base = (unsigned char *) ::realloc(base, new_size);
if (new_base == NULL)
throw pgf_systemerror(ENOMEM);
}
#else
if (fd >= 0 && ms->file_size != new_size) {
if (ftruncate(fd, page_size+new_size) < 0)
throw pgf_systemerror(errno, filepath);
}
new_base =
(unsigned char *) mremap(base, mmap_size, new_size, MREMAP_MAYMOVE);
if (new_base == MAP_FAILED)
throw pgf_systemerror(errno);
#endif
base = new_base;
#else
for (int i = 0; ; ++i) {
unsigned __int32 temp = ms->rwlock;
if (AllClear(temp)) {
if (InterlockedCompareExchange(&ms->rwlock, SetWriter(temp, true), temp) == temp)
break;
else
continue;
} else {
if (i < MAX_SPIN) {
YieldProcessor();
continue;
}
//The pending write operation is taking too long, so we'll drop to the kernel and wait
if (InterlockedCompareExchange(&ms->rwlock, SetWaiting(temp, WaitingCount(temp) + 1), temp) != temp)
continue;
i = 0; //Reset the spincount for the next time
WaitForSingleObject(hRWEvent, INFINITE);
do
{
temp = ms->rwlock;
} while (InterlockedCompareExchange(&ms->rwlock, SetWaiting(temp, WaitingCount(temp) - 1), temp) != temp);
}
}
new_size += page_size;
if (fd >= 0) {
UnmapViewOfFile(ms);
CloseHandle(hMap);
ms = NULL;
base = NULL;
hMap = CreateFileMapping((HANDLE) _get_osfhandle(fd),
NULL,
PAGE_READWRITE,
HIDWORD(new_size), LODWORD(new_size),
NULL);
if (hMap == NULL) {
hMap = INVALID_HANDLE_VALUE;
throw pgf_systemerror(last_error_to_errno());
}
ms = (malloc_state*) MapViewOfFile(hMap,
FILE_MAP_WRITE,
0,0,new_size);
if (ms == NULL)
throw pgf_systemerror(last_error_to_errno());
} else {
malloc_state *new_ms = (malloc_state*) ::realloc(ms, new_size);
if (new_ms == NULL)
throw pgf_systemerror(ENOMEM);
ms = new_ms;
}
new_size -= page_size;
base = ((unsigned char*) ms) + page_size;
#endif
current_base = (unsigned char*) base;
mmap_size = new_size;
ms->file_size = new_size;
#ifndef _WIN32
if ((res = pthread_rwlock_unlock(&ms->rwlock)) != 0)
throw pgf_systemerror(res);
#else
while(true) {
LONG temp;
while(true) {
temp = ms->rwlock;
assert(Writer(temp));
if (WaitingCount(temp) == 0)
break;
//Note: this is thread-safe (there's guaranteed not to be another EndWrite simultaneously)
//Wake all waiting readers or writers, loop until wake confirmation is received
SetEvent(hRWEvent);
}
//Decrement writer count
if (InterlockedCompareExchange(&ms->rwlock, SetWriter(temp, false), temp) == temp)
break;
}
#endif
}
bool PgfDB::is_transient_object(object o)
{
return o > ms->top;
}
DB_scope::DB_scope(PgfDB *db, DB_scope_mode m)
{
db->lock(m);
mode = m;
save_db = current_db;
current_db = db;
current_base = db->base;
next_scope = last_db_scope;
last_db_scope = this;
}
DB_scope::~DB_scope()
{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wterminate"
if (mode == READER_SCOPE)
current_db->unlock();
current_db = save_db;
current_base = current_db ? current_db->base
: NULL;
last_db_scope = next_scope;
#pragma GCC diagnostic pop
}