From 12b4958b9955646bbb5b02293bfea93e1d520bdf Mon Sep 17 00:00:00 2001 From: krangelov Date: Wed, 22 Dec 2021 10:47:22 +0100 Subject: [PATCH] make it possible to merge PGF files in the compiler --- src/compiler/GF/Command/Importing.hs | 54 +++++++++++------------ src/compiler/GF/Compile.hs | 21 +++------ src/compiler/GF/Compile/GrammarToPGF.hs | 33 +++++++------- src/compiler/GF/Compiler.hs | 43 +++++++++++------- src/runtime/c/pgf/pgf.cxx | 27 ++++++++++++ src/runtime/c/pgf/pgf.h | 5 +++ src/runtime/c/pgf/reader.cxx | 44 ++++++++++++++++++ src/runtime/c/pgf/reader.h | 5 +++ src/runtime/haskell/PGF2/FFI.hsc | 2 + src/runtime/haskell/PGF2/Transactions.hsc | 6 +++ 10 files changed, 165 insertions(+), 75 deletions(-) diff --git a/src/compiler/GF/Command/Importing.hs b/src/compiler/GF/Command/Importing.hs index fe2105acb..fd3c0d676 100644 --- a/src/compiler/GF/Command/Importing.hs +++ b/src/compiler/GF/Command/Importing.hs @@ -1,6 +1,7 @@ module GF.Command.Importing (importGrammar, importSource) where import PGF2 +import PGF2.Transactions import GF.Compile import GF.Compile.Multi (readMulti) @@ -21,35 +22,32 @@ import Control.Monad(foldM) -- import a grammar in an environment where it extends an existing grammar importGrammar :: Maybe PGF -> Options -> [FilePath] -> IO (Maybe PGF) -importGrammar pgf0 _ [] = return pgf0 -importGrammar pgf0 opts files = - case takeExtensions (last files) of - ".cf" -> fmap Just $ importCF opts files getBNFCRules bnfc2cf - ".ebnf" -> fmap Just $ importCF opts files getEBNFRules ebnf2cf - ".gfm" -> do - ascss <- mapM readMulti files - let cs = concatMap snd ascss - importGrammar pgf0 opts cs - s | elem s [".gf",".gfo"] -> do - res <- tryIOE $ compileToPGF opts files - case res of - Ok pgf2 -> ioUnionPGF pgf0 pgf2 - Bad msg -> do putStrLn ('\n':'\n':msg) - return pgf0 - ".pgf" -> do - mapM readPGF files >>= foldM ioUnionPGF pgf0 - ".ngf" -> do - mapM readNGF files >>= foldM ioUnionPGF pgf0 - ext -> die $ "Unknown filename extension: " ++ show ext +importGrammar pgf0 _ [] = return pgf0 +importGrammar pgf0 opts fs + | all (extensionIs ".cf") fs = fmap Just $ importCF opts fs getBNFCRules bnfc2cf + | all (extensionIs ".ebnf") fs = fmap Just $ importCF opts fs getEBNFRules ebnf2cf + | all (extensionIs ".gfm") fs = do + ascss <- mapM readMulti fs + let cs = concatMap snd ascss + importGrammar pgf0 opts cs + | all (\f -> extensionIs ".gf" f || extensionIs ".gfo" f) fs = do + res <- tryIOE $ compileToPGF opts pgf0 fs + case res of + Ok pgf -> return (Just pgf) + Bad msg -> do putStrLn ('\n':'\n':msg) + return pgf0 + | all (extensionIs ".pgf") fs = foldM importPGF pgf0 fs + | all (extensionIs ".ngf") fs = do + case fs of + [f] -> fmap Just $ readNGF f + _ -> die $ "Only one .ngf file could be loaded at a time" + | otherwise = die $ "Don't know what to do with these input files: " ++ unwords fs + where + extensionIs ext = (== ext) . takeExtension -ioUnionPGF :: Maybe PGF -> PGF -> IO (Maybe PGF) -ioUnionPGF Nothing two = return (Just two) -ioUnionPGF (Just one) two = - case unionPGF one two of - Nothing -> putStrLn "Abstract changed, previous concretes discarded." >> return (Just two) - Just pgf -> return (Just pgf) - -unionPGF = error "TODO: unionPGF" +importPGF :: Maybe PGF -> FilePath -> IO (Maybe PGF) +importPGF Nothing f = fmap Just (readPGF f) +importPGF (Just pgf) f = fmap Just (modifyPGF pgf (mergePGF f)) importSource :: Options -> [FilePath] -> IO SourceGrammar importSource opts files = fmap (snd.snd) (batchCompile opts files) diff --git a/src/compiler/GF/Compile.hs b/src/compiler/GF/Compile.hs index 0251862d2..6e9215272 100644 --- a/src/compiler/GF/Compile.hs +++ b/src/compiler/GF/Compile.hs @@ -28,13 +28,13 @@ import PGF2(PGF,readProbabilitiesFromFile) -- | Compiles a number of source files and builds a 'PGF' structure for them. -- This is a composition of 'link' and 'batchCompile'. -compileToPGF :: Options -> [FilePath] -> IOE PGF -compileToPGF opts fs = link opts . snd =<< batchCompile opts fs +compileToPGF :: Options -> Maybe PGF -> [FilePath] -> IOE PGF +compileToPGF opts mb_pgf fs = link opts mb_pgf . snd =<< batchCompile opts fs -- | Link a grammar into a 'PGF' that can be used to 'PGF.linearize' and -- 'PGF.parse' with the "PGF" run-time system. -link :: Options -> (ModuleName,Grammar) -> IOE PGF -link opts (cnc,gr) = +link :: Options -> Maybe PGF -> (ModuleName,Grammar) -> IOE PGF +link opts mb_pgf (cnc,gr) = putPointE Normal opts "linking ... " $ do let abs = srcAbsName gr cnc @@ -44,7 +44,7 @@ link opts (cnc,gr) = warnOut opts warnings probs <- liftIO (maybe (return Map.empty) readProbabilitiesFromFile (flag optProbsFile opts)) - pgf <- grammar2PGF opts gr' abs probs + pgf <- grammar2PGF opts mb_pgf gr' abs probs when (verbAtLeast opts Normal) $ putStrE "OK" return pgf @@ -62,22 +62,11 @@ batchCompile opts files = do let cnc = moduleNameS (justModuleName (last files)) t = maximum . map fst $ Map.elems menv return (t,(cnc,gr)) -{- --- to compile a set of modules, e.g. an old GF or a .cf file -compileSourceGrammar :: Options -> Grammar -> IOE Grammar -compileSourceGrammar opts gr = do - cwd <- getCurrentDirectory - (_,gr',_) <- foldM (\env -> compileSourceModule opts cwd env Nothing) - emptyCompileEnv - (modules gr) - return gr' --} -- | compile with one module as starting point -- command-line options override options (marked by --#) in the file -- As for path: if it is read from file, the file path is prepended to each name. -- If from command line, it is used as it is. - compileModule :: Options -- ^ Options from program command line and shell command. -> CompileEnv -> FilePath -> IOE CompileEnv compileModule opts1 env@(_,rfs) file = diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs index bb7d1277a..78b623d2f 100644 --- a/src/compiler/GF/Compile/GrammarToPGF.hs +++ b/src/compiler/GF/Compile/GrammarToPGF.hs @@ -29,22 +29,23 @@ import Data.Maybe(fromMaybe) import System.FilePath import System.Directory -grammar2PGF :: Options -> SourceGrammar -> ModuleName -> Map.Map PGF2.Fun Double -> IO PGF -grammar2PGF opts gr am probs = do - let abs_name = mi2i am - mb_ngf_path <- - if snd (flag optLinkTargets opts) - then do let fname = maybe id () - (flag optOutputDir opts) - (fromMaybe abs_name (flag optName opts)<.>"ngf") - exists <- doesFileExist fname - if exists - then removeFile fname - else return () - putStr ("(Boot image "++fname++") ") - return (Just fname) - else do return Nothing - pgf <- newNGF abs_name mb_ngf_path +grammar2PGF :: Options -> Maybe PGF -> SourceGrammar -> ModuleName -> Map.Map PGF2.Fun Double -> IO PGF +grammar2PGF opts mb_pgf gr am probs = do + pgf <- case mb_pgf of + Nothing -> let abs_name = mi2i am + in if snd (flag optLinkTargets opts) + then do let fname = maybe id () + (flag optOutputDir opts) + (fromMaybe abs_name (flag optName opts)<.>"ngf") + exists <- doesFileExist fname + if exists + then removeFile fname + else return () + putStr ("(Boot image "++fname++") ") + newNGF abs_name (Just fname) + else newNGF abs_name Nothing + Just pgf -> return pgf + pgf <- modifyPGF pgf $ do sequence_ [setAbstractFlag name value | (name,value) <- optionsPGF aflags] sequence_ [createCategory c ctxt p | (c,ctxt,p) <- cats] diff --git a/src/compiler/GF/Compiler.hs b/src/compiler/GF/Compiler.hs index 5d8e05a12..e5b14117b 100644 --- a/src/compiler/GF/Compiler.hs +++ b/src/compiler/GF/Compiler.hs @@ -1,6 +1,7 @@ -module GF.Compiler (mainGFC, linkGrammars, writeGrammar, writeOutputs) where +module GF.Compiler (mainGFC, writeGrammar, writeOutputs) where import PGF2 +import PGF2.Transactions import GF.Compile as S(batchCompile,link,srcAbsName) import GF.CompileInParallel as P(parallelBatchCompile) import GF.Compile.Export @@ -25,7 +26,7 @@ import qualified Data.Set as Set import qualified Data.ByteString.Lazy as BSL import GF.Grammar.CanonicalJSON (encodeJSON) import System.FilePath -import Control.Monad(when,unless,forM_) +import Control.Monad(when,unless,forM_,foldM) -- | Compile the given GF grammar files. The result is a number of @.gfo@ files -- and, depending on the options, a @.pgf@ file. (@gf -batch@, @gf -make@) @@ -95,7 +96,8 @@ compileSourceFiles opts fs = -- If a @.pgf@ file by the same name already exists and it is newer than the -- source grammar files (as indicated by the 'UTCTime' argument), it is not -- recreated. Calls 'writeGrammar' and 'writeOutputs'. -linkGrammars opts (t_src,~cnc_grs@(~(cnc,gr):_)) = +linkGrammars opts (t_src,[]) = return () +linkGrammars opts (t_src,cnc_gr@(cnc,gr):cnc_grs) = do let abs = render (srcAbsName gr cnc) pgfFile = outputPath opts (grammarName' opts abs<.>"pgf") t_pgf <- if outputJustPGF opts @@ -103,8 +105,8 @@ linkGrammars opts (t_src,~cnc_grs@(~(cnc,gr):_)) = else return Nothing if t_pgf >= Just t_src then putIfVerb opts $ pgfFile ++ " is up-to-date." - else do pgfs <- mapM (link opts) cnc_grs - let pgf = foldl1 (\one two -> fromMaybe two (unionPGF one two)) pgfs + else do pgf <- link opts Nothing cnc_gr + pgf <- foldM (link opts . Just) pgf cnc_grs writeGrammar opts pgf writeOutputs opts pgf @@ -136,18 +138,29 @@ unionPGFFiles opts fs = else doIt doIt = - do pgfs <- mapM readPGFVerbose fs - let pgf = foldl1 (\one two -> fromMaybe two (unionPGF one two)) pgfs - let pgfFile = outputPath opts (grammarName opts pgf <.> "pgf") - if pgfFile `elem` fs - then putStrLnE $ "Refusing to overwrite " ++ pgfFile - else writeGrammar opts pgf - writeOutputs opts pgf + case fs of + [] -> return () + (f:fs) -> do pgf <- if snd (flag optLinkTargets opts) + then case flag optName opts of + Just name -> do let fname = maybe id () (flag optOutputDir opts) (name<.>"ngf") + putStrLnE ("(Boot image "++fname++")") + exists <- doesFileExist fname + if exists + then removeFile fname + else return () + echo (\f -> bootNGF f fname) f + Nothing -> do putStrLnE $ "To boot from a list of .pgf files add option -name" + echo readPGF f + else echo readPGF f + pgf <- foldM (\pgf -> echo (modifyPGF pgf . mergePGF)) pgf fs + let pgfFile = outputPath opts (grammarName opts pgf <.> "pgf") + if pgfFile `elem` fs + then putStrLnE $ "Refusing to overwrite " ++ pgfFile + else writeGrammar opts pgf + writeOutputs opts pgf - readPGFVerbose f = - putPointE Normal opts ("Reading " ++ f ++ "...") $ liftIO $ readPGF f + echo read f = putPointE Normal opts ("Reading " ++ f ++ "...") (liftIO (read f)) -unionPGF = error "TODO: unionPGF" -- | Export the PGF to the 'OutputFormat's specified in the 'Options'. -- Calls 'exportPGF'. diff --git a/src/runtime/c/pgf/pgf.cxx b/src/runtime/c/pgf/pgf.cxx index fc2520cae..b15c35e5c 100644 --- a/src/runtime/c/pgf/pgf.cxx +++ b/src/runtime/c/pgf/pgf.cxx @@ -213,6 +213,33 @@ PgfDB *pgf_new_ngf(PgfText *abstract_name, return NULL; } +PGF_API +void pgf_merge_pgf(PgfDB *db, PgfRevision revision, + const char* fpath, + PgfExn* err) +{ + FILE *in = NULL; + + PGF_API_BEGIN { + in = fopen(fpath, "rb"); + if (!in) { + throw pgf_systemerror(errno, fpath); + } + + { + DB_scope scope(db, WRITER_SCOPE); + ref pgf = PgfDB::revision2pgf(revision); + + PgfReader rdr(in); + rdr.merge_pgf(pgf); + } + } PGF_API_END + +end: + if (in != NULL) + fclose(in); +} + PGF_API void pgf_write_pgf(const char* fpath, PgfDB *db, PgfRevision revision, diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index 770c38dc4..c7e797e8d 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -257,6 +257,11 @@ PgfDB *pgf_new_ngf(PgfText *abstract_name, PgfRevision *revision, PgfExn* err); +PGF_API +void pgf_merge_pgf(PgfDB *db, PgfRevision revision, + const char* fpath, + PgfExn* err); + PGF_API_DECL void pgf_write_pgf(const char* fpath, PgfDB *db, PgfRevision revision, diff --git a/src/runtime/c/pgf/reader.cxx b/src/runtime/c/pgf/reader.cxx index 95a6f64e2..2e32f54bd 100644 --- a/src/runtime/c/pgf/reader.cxx +++ b/src/runtime/c/pgf/reader.cxx @@ -169,6 +169,16 @@ Namespace PgfReader::read_namespace(ref (PgfReader::*read_value)()) return read_namespace(read_value, len); } +template +void PgfReader::merge_namespace(ref (PgfReader::*read_value)()) +{ + size_t len = read_len(); + for (size_t i = 0; i < len; i++) { + ref value = (this->*read_value)(); + V::release(value); + } +} + template ref PgfReader::read_vector(Vector C::* field, void (PgfReader::*read_value)(ref val)) { @@ -371,6 +381,16 @@ void PgfReader::read_abstract(ref abstract) abstract->cats = read_namespace(&PgfReader::read_abscat); } +void PgfReader::merge_abstract(ref abstract) +{ + this->abstract = abstract; + + read_name(); // ? + merge_namespace(&PgfReader::read_flag); // ? + merge_namespace(&PgfReader::read_absfun); // ? + merge_namespace(&PgfReader::read_abscat); // ? +} + ref PgfReader::read_lparam() { size_t i0 = read_int(); @@ -597,3 +617,27 @@ ref PgfReader::read_pgf() return pgf; } + +void PgfReader::merge_pgf(ref pgf) +{ + uint16_t major_version = read_u16be(); + uint16_t minor_version = read_u16be(); + + if (pgf->major_version != PGF_MAJOR_VERSION || + pgf->minor_version != PGF_MINOR_VERSION) { + throw pgf_error("Unsupported format version"); + } + + merge_namespace(&PgfReader::read_flag); // ?? + + merge_abstract(ref::from_ptr(&pgf->abstract)); + + size_t len = read_len(); + for (size_t i = 0; i < len; i++) { + ref concr = PgfReader::read_concrete(); + Namespace concrs = + namespace_insert(pgf->concretes, concr); + namespace_release(pgf->concretes); + pgf->concretes = concrs; + } +} diff --git a/src/runtime/c/pgf/reader.h b/src/runtime/c/pgf/reader.h index 0e521b962..30d9b32c4 100644 --- a/src/runtime/c/pgf/reader.h +++ b/src/runtime/c/pgf/reader.h @@ -44,6 +44,9 @@ public: template Namespace read_namespace(ref (PgfReader::*read_value)()); + template + void merge_namespace(ref (PgfReader::*read_value)()); + template ref read_vector(Vector C::* field, void (PgfReader::*read_value)(ref val)); @@ -62,6 +65,7 @@ public: ref read_absfun(); ref read_abscat(); void read_abstract(ref abstract); + void merge_abstract(ref abstract); ref read_lincat(); ref read_lparam(); @@ -75,6 +79,7 @@ public: ref read_concrete(); ref read_pgf(); + void merge_pgf(ref pgf); private: FILE *in; diff --git a/src/runtime/haskell/PGF2/FFI.hsc b/src/runtime/haskell/PGF2/FFI.hsc index b39e84e15..0ec598a8b 100644 --- a/src/runtime/haskell/PGF2/FFI.hsc +++ b/src/runtime/haskell/PGF2/FFI.hsc @@ -66,6 +66,8 @@ foreign import ccall "pgf_read_ngf" foreign import ccall pgf_new_ngf :: Ptr PgfText -> CString -> Ptr (Ptr PGF) -> Ptr PgfExn -> IO (Ptr PgfDB) +foreign import ccall pgf_merge_pgf :: Ptr PgfDB -> Ptr PGF -> CString -> Ptr PgfExn -> IO () + foreign import ccall pgf_write_pgf :: CString -> Ptr PgfDB -> Ptr PGF -> Ptr PgfExn -> IO () foreign import ccall "pgf_free_revision" pgf_free_revision_ :: Ptr PgfDB -> Ptr PGF -> IO () diff --git a/src/runtime/haskell/PGF2/Transactions.hsc b/src/runtime/haskell/PGF2/Transactions.hsc index efe9dea9d..40043fa59 100644 --- a/src/runtime/haskell/PGF2/Transactions.hsc +++ b/src/runtime/haskell/PGF2/Transactions.hsc @@ -19,6 +19,7 @@ module PGF2.Transactions , createConcrete , alterConcrete , dropConcrete + , mergePGF , setConcreteFlag , createLincat , dropLincat @@ -168,6 +169,11 @@ dropConcrete name = Transaction $ \c_db _ c_revision c_exn -> withText name $ \c_name -> do pgf_drop_concrete c_db c_revision c_name c_exn +mergePGF :: FilePath -> Transaction PGF () +mergePGF fpath = Transaction $ \c_db _ c_revision c_exn -> + withCString fpath $ \c_fpath -> + pgf_merge_pgf c_db c_revision c_fpath c_exn + setGlobalFlag :: String -> Literal -> Transaction PGF () setGlobalFlag name value = Transaction $ \c_db _ c_revision c_exn -> withText name $ \c_name ->