From 6d856b2ce03783c7eb6a36aaabe579e05995edd4 Mon Sep 17 00:00:00 2001 From: Krasimir Angelov Date: Sun, 29 Jan 2023 21:04:08 +0100 Subject: [PATCH] make it possible to control the initial NGF size --- src/compiler/GF/Command/Importing.hs | 2 +- src/compiler/GF/Compile/GrammarToPGF.hs | 4 +-- src/runtime/c/pgf/db.cxx | 13 ++++++-- src/runtime/c/pgf/db.h | 2 +- src/runtime/c/pgf/pgf.cxx | 41 +++++++++++++++---------- src/runtime/c/pgf/pgf.h | 1 + src/runtime/haskell/PGF2.hsc | 6 ++-- src/runtime/haskell/PGF2/FFI.hsc | 2 +- src/runtime/python/pypgf.c | 14 +++++---- 9 files changed, 52 insertions(+), 33 deletions(-) diff --git a/src/compiler/GF/Command/Importing.hs b/src/compiler/GF/Command/Importing.hs index 5d1f88ee7..d112acf6a 100644 --- a/src/compiler/GF/Command/Importing.hs +++ b/src/compiler/GF/Command/Importing.hs @@ -31,7 +31,7 @@ importGrammar readNGF pgf0 opts _ putStr ("(Boot image "++fname++") ") return (Just fname) else do return Nothing - pgf <- newNGF name mb_ngf_file + pgf <- newNGF name mb_ngf_file 0 return (Just pgf) importGrammar readNGF pgf0 _ [] = return pgf0 importGrammar readNGF pgf0 opts fs diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs index 8c30f273f..a0854d297 100644 --- a/src/compiler/GF/Compile/GrammarToPGF.hs +++ b/src/compiler/GF/Compile/GrammarToPGF.hs @@ -45,9 +45,9 @@ grammar2PGF opts mb_pgf gr am probs = do then removeFile fname else return () putStr ("(Boot image "++fname++") ") - newNGF abs_name (Just fname) + newNGF abs_name (Just fname) 0 | otherwise -> - do newNGF abs_name Nothing + do newNGF abs_name Nothing 0 pgf <- modifyPGF pgf $ do sequence_ [setAbstractFlag name value | (name,value) <- optionsPGF aflags] diff --git a/src/runtime/c/pgf/db.cxx b/src/runtime/c/pgf/db.cxx index 09423b72a..dd3cda30a 100644 --- a/src/runtime/c/pgf/db.cxx +++ b/src/runtime/c/pgf/db.cxx @@ -134,9 +134,16 @@ struct PGF_INTERNAL_DECL malloc_state revision_entry revisions[]; }; +static inline ssize_t get_mmap_size(size_t init_size, size_t page_size) +{ + size_t mmap_size = ((init_size+page_size-1)/page_size)*page_size; + if (mmap_size < page_size*2) + mmap_size = page_size*2; + return mmap_size; +} PGF_INTERNAL -PgfDB::PgfDB(const char* filepath, int flags, int mode) { +PgfDB::PgfDB(const char* filepath, int flags, int mode, size_t init_size) { bool is_new = false; fd = -1; @@ -147,7 +154,7 @@ PgfDB::PgfDB(const char* filepath, int flags, int mode) { if (filepath == NULL) { this->filepath = NULL; - mmap_size = page_size*2; + mmap_size = get_mmap_size(init_size, page_size); is_new = true; } else { fd = open(filepath, flags, mode); @@ -163,7 +170,7 @@ PgfDB::PgfDB(const char* filepath, int flags, int mode) { is_new = false; if (mmap_size == 0) { - mmap_size = page_size*2; + mmap_size = get_mmap_size(init_size, page_size); if (ftruncate(fd, mmap_size) < 0) { int code = errno; close(fd); diff --git a/src/runtime/c/pgf/db.h b/src/runtime/c/pgf/db.h index bb8f373e5..d48dbb9d4 100644 --- a/src/runtime/c/pgf/db.h +++ b/src/runtime/c/pgf/db.h @@ -101,7 +101,7 @@ public: // When the count is zero we release the database. size_t ref_count; - PGF_INTERNAL_DECL PgfDB(const char* filepath, int flags, int mode); + PGF_INTERNAL_DECL PgfDB(const char* filepath, int flags, int mode, size_t init_size); PGF_INTERNAL_DECL ~PgfDB(); PGF_INTERNAL_DECL static txn_t get_txn_id(); diff --git a/src/runtime/c/pgf/pgf.cxx b/src/runtime/c/pgf/pgf.cxx index 95959b4d5..e8183e2f3 100644 --- a/src/runtime/c/pgf/pgf.cxx +++ b/src/runtime/c/pgf/pgf.cxx @@ -46,12 +46,16 @@ PgfDB *pgf_read_pgf(const char* fpath, PgfRevision *revision, FILE *in = NULL; PGF_API_BEGIN { - db = new PgfDB(NULL, 0, 0); in = fopen(fpath, "rb"); if (!in) { throw pgf_systemerror(errno, fpath); } + fseek(in, 0, SEEK_END); + size_t pgf_size = ftell(in); + fseek(in, 0, SEEK_SET); + db = new PgfDB(NULL, 0, 0, pgf_size*7); + { DB_scope scope(db, WRITER_SCOPE); @@ -71,12 +75,12 @@ PgfDB *pgf_read_pgf(const char* fpath, PgfRevision *revision, return db; } PGF_API_END - if (in != NULL) - fclose(in); - if (db != NULL) delete db; + if (in != NULL) + fclose(in); + return NULL; } @@ -90,19 +94,23 @@ PgfDB *pgf_boot_ngf(const char* pgf_path, const char* ngf_path, FILE *in = NULL; PGF_API_BEGIN { - db = new PgfDB(ngf_path, O_CREAT | O_EXCL | O_RDWR, -#ifndef _WIN32 - S_IRUSR | S_IWUSR -#else - _S_IREAD | _S_IWRITE -#endif - ); - in = fopen(pgf_path, "rb"); if (!in) { throw pgf_systemerror(errno, pgf_path); } + fseek(in, 0, SEEK_END); + size_t pgf_size = ftell(in); + fseek(in, 0, SEEK_SET); + + db = new PgfDB(ngf_path, O_CREAT | O_EXCL | O_RDWR, +#ifndef _WIN32 + S_IRUSR | S_IWUSR, +#else + _S_IREAD | _S_IWRITE, +#endif + pgf_size*7); + { DB_scope scope(db, WRITER_SCOPE); @@ -141,7 +149,7 @@ PgfDB *pgf_read_ngf(const char *fpath, PgfDB *db = NULL; PGF_API_BEGIN { - db = new PgfDB(fpath, O_RDWR, 0); + db = new PgfDB(fpath, O_RDWR, 0, 0); { DB_scope scope(db, WRITER_SCOPE); @@ -165,6 +173,7 @@ PgfDB *pgf_read_ngf(const char *fpath, PGF_API PgfDB *pgf_new_ngf(PgfText *abstract_name, const char *fpath, + size_t init_size, PgfRevision *revision, PgfExn* err) { @@ -173,11 +182,11 @@ PgfDB *pgf_new_ngf(PgfText *abstract_name, PGF_API_BEGIN { db = new PgfDB(fpath, O_CREAT | O_EXCL | O_RDWR, #ifndef _WIN32 - S_IRUSR | S_IWUSR + S_IRUSR | S_IWUSR, #else - _S_IREAD | _S_IWRITE + _S_IREAD | _S_IWRITE, #endif -); + init_size); { DB_scope scope(db, WRITER_SCOPE); diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index 56b5b3ed9..8ddcf993a 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -276,6 +276,7 @@ PgfDB *pgf_read_ngf(const char* fpath, PGF_API_DECL PgfDB *pgf_new_ngf(PgfText *abstract_name, const char *fpath, + size_t init_size, PgfRevision *revision, PgfExn* err); diff --git a/src/runtime/haskell/PGF2.hsc b/src/runtime/haskell/PGF2.hsc index 982cc24ff..5d4fdd628 100644 --- a/src/runtime/haskell/PGF2.hsc +++ b/src/runtime/haskell/PGF2.hsc @@ -177,13 +177,13 @@ readNGF fpath = -- Aside from the name, the grammar is otherwise empty but can be later -- populated with new functions and categories. If fpath is Nothing then -- the file is not stored on the disk but only in memory. -newNGF :: AbsName -> Maybe FilePath -> IO PGF -newNGF abs_name mb_fpath = +newNGF :: AbsName -> Maybe FilePath -> Int -> IO PGF +newNGF abs_name mb_fpath init_size = withText abs_name $ \c_abs_name -> maybe (\f -> f nullPtr) withCString mb_fpath $ \c_fpath -> alloca $ \p_revision -> mask_ $ do - c_db <- withPgfExn "newNGF" (pgf_new_ngf c_abs_name c_fpath p_revision) + c_db <- withPgfExn "newNGF" (pgf_new_ngf c_abs_name c_fpath (fromIntegral init_size) p_revision) c_revision <- peek p_revision fptr <- newForeignPtrEnv pgf_free_revision c_db c_revision return (PGF c_db fptr Map.empty) diff --git a/src/runtime/haskell/PGF2/FFI.hsc b/src/runtime/haskell/PGF2/FFI.hsc index 74fb8ca91..726df010a 100644 --- a/src/runtime/haskell/PGF2/FFI.hsc +++ b/src/runtime/haskell/PGF2/FFI.hsc @@ -74,7 +74,7 @@ foreign import ccall "wrapper" wrapProbsCallback :: Wrapper ProbsCallback foreign import ccall "pgf_read_ngf" pgf_read_ngf :: CString -> Ptr (Ptr PGF) -> Ptr PgfExn -> IO (Ptr PgfDB) -foreign import ccall pgf_new_ngf :: Ptr PgfText -> CString -> Ptr (Ptr PGF) -> Ptr PgfExn -> IO (Ptr PgfDB) +foreign import ccall pgf_new_ngf :: Ptr PgfText -> CString -> CSize -> Ptr (Ptr PGF) -> Ptr PgfExn -> IO (Ptr PgfDB) foreign import ccall pgf_merge_pgf :: Ptr PgfDB -> Ptr PGF -> CString -> Ptr PgfExn -> IO () diff --git a/src/runtime/python/pypgf.c b/src/runtime/python/pypgf.c index 901105d31..5c33b8e11 100644 --- a/src/runtime/python/pypgf.c +++ b/src/runtime/python/pypgf.c @@ -523,7 +523,7 @@ PGF_writePGF(PGFObject *self, PyObject *args, PyObject *kwargs) const char *fpath; PyObject *py_langs = NULL; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|O!", &kwds[0], &fpath, &PyList_Type, &py_langs)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|O!", kwds, &fpath, &PyList_Type, &py_langs)) return NULL; PgfText **langs = NULL; @@ -1327,20 +1327,22 @@ pgf_readNGF(PyObject *self, PyObject *args) } static PGFObject * -pgf_newNGF(PyObject *self, PyObject *args) +pgf_newNGF(PyObject *self, PyObject *args, PyObject *kwargs) { + char *kwds[] = {"","file","size",NULL}; + const char *s; Py_ssize_t size; const char *fpath = NULL; - if (!PyArg_ParseTuple(args, "s#|s", &s, &size, &fpath)) + Py_ssize_t init_size = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|sn", kwds, &s, &size, &fpath, &init_size)) return NULL; - PgfText *absname = CString_AsPgfText(s, size); PGFObject *py_pgf = (PGFObject *)pgf_PGFType.tp_alloc(&pgf_PGFType, 0); PgfExn err; - py_pgf->db = pgf_new_ngf(absname, fpath, &py_pgf->revision, &err); + py_pgf->db = pgf_new_ngf(absname, fpath, init_size, &py_pgf->revision, &err); FreePgfText(absname); if (handleError(err) != PGF_EXN_NONE) { Py_DECREF(py_pgf); @@ -1508,7 +1510,7 @@ static PyMethodDef module_methods[] = { "Reads a PGF file into memory and stores the unpacked data in an NGF file"}, {"readNGF", (void*)pgf_readNGF, METH_VARARGS, "Reads an NGF file into memory"}, - {"newNGF", (void*)pgf_newNGF, METH_VARARGS, + {"newNGF", (void*)pgf_newNGF, METH_VARARGS | METH_KEYWORDS, "Creates a new NGF file with the given name"}, {"readExpr", (void*)pgf_readExpr, METH_VARARGS,