From 5a2b2009482d3034651817ed137a2dab6a7174a3 Mon Sep 17 00:00:00 2001 From: Krasimir Angelov Date: Fri, 2 Nov 2018 14:38:44 +0100 Subject: [PATCH] manually copy the "c-runtime" branch from the old repository. --- gf.cabal | 146 +--- src/compiler/GF/Command/Abstract.hs | 22 +- src/compiler/GF/Command/CommandInfo.hs | 21 +- src/compiler/GF/Command/Commands.hs | 249 +++---- src/compiler/GF/Command/CommonCommands.hs | 19 +- src/compiler/GF/Command/Importing.hs | 32 +- src/compiler/GF/Command/Interpreter.hs | 13 +- src/compiler/GF/Compile.hs | 40 +- src/compiler/GF/Compile/CFGtoPGF.hs | 123 ++-- src/compiler/GF/Compile/CheckGrammar.hs | 1 - .../GF/Compile/Compute/ConcreteNew.hs | 1 - src/compiler/GF/Compile/Export.hs | 3 +- src/compiler/GF/Compile/GenerateBC.hs | 30 +- src/compiler/GF/Compile/GeneratePMCFG.hs | 19 +- src/compiler/GF/Compile/GetGrammar.hs | 2 - src/compiler/GF/Compile/GrammarToPGF.hs | 393 +++++----- src/compiler/GF/Compile/PGFtoHaskell.hs | 26 +- src/compiler/GF/Compile/PGFtoJS.hs | 79 +- src/compiler/GF/Compile/PGFtoProlog.hs | 136 +--- src/compiler/GF/Compile/PGFtoPython.hs | 58 +- src/compiler/GF/Compile/ToAPI.hs | 3 +- .../GF/Compile/TypeCheck/RConcrete.hs | 1 - src/compiler/GF/CompileInParallel.hs | 9 +- src/compiler/GF/Compiler.hs | 40 +- src/compiler/GF/Grammar/Binary.hs | 53 +- src/compiler/GF/Grammar/Printer.hs | 1 - src/compiler/GF/Infra/CheckM.hs | 1 - src/compiler/GF/Infra/Ident.hs | 24 +- src/compiler/GF/Infra/Location.hs | 1 - src/compiler/GF/Infra/Option.hs | 17 +- src/compiler/GF/Infra/UseIO.hs | 37 +- src/compiler/GF/Interactive.hs | 76 +- src/compiler/GF/Main.hs | 21 +- src/compiler/GF/Server.hs | 18 +- src/compiler/GF/Speech/GSL.hs | 1 - src/compiler/GF/Speech/JSGF.hs | 1 - src/compiler/GF/Speech/PGFToCFG.hs | 65 +- src/compiler/GF/Speech/SRGS_ABNF.hs | 1 - src/compiler/GF/Speech/VoiceXML.hs | 7 +- src/compiler/GF/Text/Transliterations.hs | 8 - src/runtime/c/Makefile.am | 5 +- src/runtime/c/gu/defs.h | 10 - src/runtime/c/pgf/expr.c | 55 +- src/runtime/c/pgf/expr.h | 7 +- src/runtime/c/pgf/linearizer.c | 90 ++- src/runtime/c/pgf/linearizer.h | 1 - src/runtime/c/pgf/lookup.c | 11 +- src/runtime/c/pgf/parser.c | 40 +- src/runtime/c/pgf/pgf.c | 61 +- src/runtime/c/pgf/pgf.h | 16 +- src/runtime/c/pgf/printer.c | 23 +- src/runtime/c/pgf/reader.c | 13 +- src/runtime/c/pgf/writer.c | 33 +- src/runtime/c/pgf/writer.h | 5 +- src/runtime/c/sg/sqlite3Btree.c | 159 ++-- src/runtime/haskell-bind/PGF.hs | 275 ++++++- src/runtime/haskell-bind/PGF/Internal.hs | 164 ++++- src/runtime/haskell-bind/PGF2.hsc | 693 ++++++++++++++---- src/runtime/haskell-bind/PGF2/Expr.hsc | 78 +- src/runtime/haskell-bind/PGF2/FFI.hsc | 33 +- src/runtime/haskell-bind/PGF2/Internal.hsc | 175 ++++- src/runtime/haskell-bind/PGF2/Type.hsc | 17 +- src/runtime/haskell-bind/pgf2.cabal | 18 +- src/runtime/haskell/PGF.hs | 21 +- src/runtime/haskell/PGF/ByteCode.hs | 2 +- src/runtime/haskell/PGF/Data.hs | 10 +- src/runtime/haskell/PGF/Expr.hs | 11 + src/runtime/haskell/PGF/Forest.hs | 26 +- src/runtime/haskell/PGF/Internal.hs | 174 ++++- src/runtime/haskell/PGF/Linearize.hs | 7 - src/runtime/haskell/PGF/Macros.hs | 1 - src/runtime/haskell/PGF/Morphology.hs | 3 +- src/runtime/haskell/PGF/OldBinary.hs | 4 +- src/runtime/haskell/PGF/Optimize.hs | 23 +- src/runtime/haskell/PGF/Parse.hs | 8 +- src/runtime/haskell/PGF/Printer.hs | 6 +- src/runtime/haskell/PGF/VisualizeTree.hs | 1 - src/runtime/haskell/pgf.cabal | 9 +- src/runtime/python/pypgf.c | 42 +- src/server/PGFService.hs | 17 +- 80 files changed, 2618 insertions(+), 1527 deletions(-) diff --git a/gf.cabal b/gf.cabal index bd1b04171..3a5f2822e 100644 --- a/gf.cabal +++ b/gf.cabal @@ -47,6 +47,10 @@ custom-setup filepath, process >=1.0.1.1 +--source-repository head +-- type: darcs +-- location: http://www.grammaticalframework.org/ + source-repository head type: git location: https://github.com/GrammaticalFramework/gf-core.git @@ -67,99 +71,38 @@ flag network-uri -- Description: Make -new-comp the default -- Default: True -flag custom-binary - Description: Use a customised version of the binary package - Default: True - Manual: True - flag c-runtime Description: Include functionality from the C run-time library (which must be installed already) Default: False -Library + + +executable gf + hs-source-dirs: src/programs, src/runtime/haskell/binary + main-is: gf-main.hs default-language: Haskell2010 - build-depends: base >= 4.6 && <5, - array, - containers, - bytestring, - utf8-string, - random, - pretty, - mtl, - exceptions - hs-source-dirs: src/runtime/haskell - - if flag(custom-binary) - other-modules: - -- not really part of GF but I have changed the original binary library - -- and we have to keep the copy for now. - Data.Binary - Data.Binary.Put - Data.Binary.Get - Data.Binary.Builder - Data.Binary.IEEE754 - else - build-depends: binary, data-binary-ieee754 - ---ghc-options: -fwarn-unused-imports ---if impl(ghc>=7.8) --- ghc-options: +RTS -A20M -RTS - ghc-prof-options: -fprof-auto - extensions: - - exposed-modules: - PGF - PGF.Internal - PGF.Haskell - - other-modules: - PGF.Data - PGF.Macros - PGF.Binary - PGF.Optimize - PGF.Printer - PGF.CId - PGF.Expr - PGF.Generate - PGF.Linearize - PGF.Morphology - PGF.Paraphrase - PGF.Parse - PGF.Probabilistic - PGF.SortTop - PGF.Tree - PGF.Type - PGF.TypeCheck - PGF.Forest - PGF.TrieMap - PGF.VisualizeTree - PGF.ByteCode - PGF.OldBinary - PGF.Utilities + build-depends: base, filepath, directory, time, time-compat, old-locale, pretty, mtl, array, random, + process, haskeline, parallel>=3, exceptions, bytestring, utf8-string, containers + ghc-options: -threaded if flag(c-runtime) - exposed-modules: PGF2 - other-modules: PGF2.FFI PGF2.Expr PGF2.Type - GF.Interactive2 GF.Command.Commands2 - hs-source-dirs: src/runtime/haskell-bind - build-tools: hsc2hs - extra-libraries: pgf gu - c-sources: src/runtime/haskell-bind/utils.c - cc-options: -std=c99 + build-depends: pgf2 + else + build-depends: pgf ----- GF compiler as a library: + if impl(ghc>=7.0) + ghc-options: -rtsopts -with-rtsopts=-I5 + if impl(ghc<7.8) + ghc-options: -with-rtsopts=-K64M - build-depends: filepath, directory, time, time-compat, old-locale, - process, haskeline, parallel>=3 + ghc-prof-options: -auto-all hs-source-dirs: src/compiler - exposed-modules: + + other-modules: GF GF.Support GF.Text.Pretty - GF.Text.Lexing - - other-modules: GF.Main GF.Compiler GF.Interactive GF.Compile GF.CompileInParallel GF.CompileOne GF.Compile.GetGrammar @@ -182,7 +125,6 @@ Library GF.Compile.CheckGrammar GF.Compile.Compute.AppPredefined GF.Compile.Compute.ConcreteNew --- GF.Compile.Compute.ConcreteNew1 GF.Compile.Compute.Predef GF.Compile.Compute.Value GF.Compile.ExampleBased @@ -192,6 +134,7 @@ Library GF.Compile.GrammarToPGF GF.Compile.Multi GF.Compile.Optimize + GF.Compile.OptimizePGF GF.Compile.PGFtoHaskell GF.Compile.PGFtoJava GF.Haskell @@ -268,8 +211,17 @@ Library GF.System.Signal GF.Text.Clitics GF.Text.Coding + GF.Text.Lexing GF.Text.Transliterations Paths_gf + + -- not really part of GF but I have changed the original binary library + -- and we have to keep the copy for now. + Data.Binary + Data.Binary.Put + Data.Binary.Get + Data.Binary.Builder + Data.Binary.IEEE754 if flag(c-runtime) cpp-options: -DC_RUNTIME @@ -307,7 +259,6 @@ Library if impl(ghc>=7.8) build-tools: happy>=1.19, alex>=3.1 --- ghc-options: +RTS -A20M -RTS else build-tools: happy, alex>=3 @@ -318,36 +269,13 @@ Library else build-depends: unix, terminfo>=0.4 - if impl(ghc>=8.2) - ghc-options: -fhide-source-paths -Executable gf - hs-source-dirs: src/programs - main-is: gf-main.hs +test-suite rgl-tests + type: exitcode-stdio-1.0 + main-is: run.hs + hs-source-dirs: lib/tests/ + build-depends: base, HTF, process, HUnit, filepath, directory default-language: Haskell2010 - build-depends: gf, base - ghc-options: -threaded ---ghc-options: -fwarn-unused-imports - - if impl(ghc>=7.0) - ghc-options: -rtsopts -with-rtsopts=-I5 - if impl(ghc<7.8) - ghc-options: -with-rtsopts=-K64M - - ghc-prof-options: -auto-all - - if impl(ghc>=8.2) - ghc-options: -fhide-source-paths - -executable pgf-shell ---if !flag(c-runtime) - buildable: False - main-is: pgf-shell.hs - hs-source-dirs: src/runtime/haskell-bind/examples - build-depends: gf, base, containers, mtl, lifted-base - default-language: Haskell2010 - if impl(ghc>=7.0) - ghc-options: -rtsopts test-suite gf-tests type: exitcode-stdio-1.0 diff --git a/src/compiler/GF/Command/Abstract.hs b/src/compiler/GF/Command/Abstract.hs index 25760e41f..ad6a9c5f2 100644 --- a/src/compiler/GF/Command/Abstract.hs +++ b/src/compiler/GF/Command/Abstract.hs @@ -11,7 +11,7 @@ type Pipe = [Command] data Command = Command Ident [Option] Argument - deriving (Eq,Ord,Show) + deriving Show data Option = OOpt Ident @@ -29,7 +29,7 @@ data Argument | ATerm Term | ANoArg | AMacro Ident - deriving (Eq,Ord,Show) + deriving Show valCIdOpts :: String -> CId -> [Option] -> CId valCIdOpts flag def opts = @@ -49,6 +49,24 @@ valStrOpts flag def opts = v:_ -> valueString v _ -> def +maybeCIdOpts :: String -> a -> (CId -> a) -> [Option] -> a +maybeCIdOpts flag def fn opts = + case [v | OFlag f (VId v) <- opts, f == flag] of + (v:_) -> fn (mkCId v) + _ -> def + +maybeIntOpts :: String -> a -> (Int -> a) -> [Option] -> a +maybeIntOpts flag def fn opts = + case [v | OFlag f (VInt v) <- opts, f == flag] of + (v:_) -> fn v + _ -> def + +maybeStrOpts :: String -> a -> (String -> a) -> [Option] -> a +maybeStrOpts flag def fn opts = + case listFlags flag opts of + v:_ -> fn (valueString v) + _ -> def + listFlags flag opts = [v | OFlag f v <- opts, f == flag] valueString v = diff --git a/src/compiler/GF/Command/CommandInfo.hs b/src/compiler/GF/Command/CommandInfo.hs index b0b5869c3..1e8d6bba1 100644 --- a/src/compiler/GF/Command/CommandInfo.hs +++ b/src/compiler/GF/Command/CommandInfo.hs @@ -3,8 +3,7 @@ import GF.Command.Abstract(Option,Expr,Term) import GF.Text.Pretty(render) import GF.Grammar.Printer() -- instance Pretty Term import GF.Grammar.Macros(string2term) -import qualified PGF as H(showExpr) -import qualified PGF.Internal as H(Literal(LStr),Expr(ELit)) ---- +import PGF(mkStr,unStr,showExpr) data CommandInfo m = CommandInfo { exec :: [Option] -> CommandArguments -> m CommandOutput, @@ -44,15 +43,13 @@ newtype CommandOutput = Piped (CommandArguments,String) ---- errors, etc -- ** Converting command output fromStrings ss = Piped (Strings ss, unlines ss) -fromExprs es = Piped (Exprs es,unlines (map (H.showExpr []) es)) +fromExprs es = Piped (Exprs es,unlines (map (showExpr []) es)) fromString s = Piped (Strings [s], s) pipeWithMessage es msg = Piped (Exprs es,msg) pipeMessage msg = Piped (Exprs [],msg) pipeExprs es = Piped (Exprs es,[]) -- only used in emptyCommandInfo void = Piped (Exprs [],"") -stringAsExpr = H.ELit . H.LStr -- should be a pattern macro - -- ** Converting command input toStrings args = @@ -62,22 +59,22 @@ toStrings args = Term t -> [render t] where showAsString first t = - case t of - H.ELit (H.LStr s) -> s - _ -> ['\n'|not first] ++ - H.showExpr [] t ---newline needed in other cases than the first + case unStr t of + Just s -> s + Nothing -> ['\n'|not first] ++ + showExpr [] t ---newline needed in other cases than the first toExprs args = case args of Exprs es -> es - Strings ss -> map stringAsExpr ss - Term t -> [stringAsExpr (render t)] + Strings ss -> map mkStr ss + Term t -> [mkStr (render t)] toTerm args = case args of Term t -> t Strings ss -> string2term $ unwords ss -- hmm - Exprs es -> string2term $ unwords $ map (H.showExpr []) es -- hmm + Exprs es -> string2term $ unwords $ map (showExpr []) es -- hmm -- ** Creating documentation diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index 72e57fcf5..83fa7e0ac 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -3,14 +3,10 @@ module GF.Command.Commands ( PGFEnv,HasPGFEnv(..),pgf,mos,pgfEnv,pgfCommands, options,flags, ) where -import Prelude hiding (putStrLn,(<>)) -- GHC 8.4.1 clash with Text.PrettyPrint +import Prelude hiding (putStrLn) import PGF - -import PGF.Internal(lookStartCat,functionsToCat,lookValCat,restrictPGF,hasLin) -import PGF.Internal(abstract,funs,cats,Expr(EFun)) ---- -import PGF.Internal(ppFun,ppCat) -import PGF.Internal(optimizePGF) +import PGF.Internal(writePGF) import GF.Compile.Export import GF.Compile.ToAPI @@ -28,7 +24,7 @@ import GF.Command.TreeOperations ---- temporary place for typecheck and compute import GF.Data.Operations -import PGF.Internal (encodeFile) +-- import PGF.Internal (encodeFile) import Data.List(intersperse,nub) import Data.Maybe import qualified Data.Map as Map @@ -37,16 +33,22 @@ import Data.List (sort) --import Debug.Trace -data PGFEnv = Env {pgf::PGF,mos::Map.Map Language Morpho} +data PGFEnv = Env {pgf::Maybe PGF,mos::Map.Map Language Morpho} -pgfEnv pgf = Env pgf mos - where mos = Map.fromList [(la,buildMorpho pgf la) | la <- languages pgf] +pgfEnv mb_pgf = Env mb_pgf mos + where mos = case mb_pgf of + Just pgf -> Map.fromList [(la,buildMorpho pgf la) | la <- languages pgf] + Nothing -> Map.empty class (Functor m,Monad m,MonadSIO m) => HasPGFEnv m where getPGFEnv :: m PGFEnv instance (Monad m,HasPGFEnv m) => TypeCheckArg m where - typeCheckArg e = (either (fail . render . ppTcError) (return . fst) - . flip inferExpr e . pgf) =<< getPGFEnv + typeCheckArg e = do env <- getPGFEnv + case pgf env of + Just gr -> either (fail . render . ppTcError) + (return . fst) + (inferExpr gr e) + Nothing -> fail "Import a grammar before using this command" pgfCommands :: HasPGFEnv m => Map.Map String (CommandInfo m) pgfCommands = Map.fromList [ @@ -61,7 +63,7 @@ pgfCommands = Map.fromList [ "by the view flag. The target format is png, unless overridden by the", "flag -format. Results from multiple trees are combined to pdf with convert (ImageMagick)." ], - exec = getEnv $ \ opts arg (Env pgf mos) -> do + exec = needPGF $ \ opts arg pgf mos -> do let es = toExprs arg let langs = optLangs pgf opts if isOpt "giza" opts @@ -95,6 +97,7 @@ pgfCommands = Map.fromList [ ("view", "program to open the resulting file") ] }), + ("ca", emptyCommandInfo { longname = "clitic_analyse", synopsis = "print the analyses of all words into stems and clitics", @@ -105,15 +108,15 @@ pgfCommands = Map.fromList [ "by the flag '-clitics'. The list of stems is given as the list of words", "of the language given by the '-lang' flag." ], - exec = getEnv $ \opts ts env -> case opts of + exec = needPGF $ \opts ts pgf mos -> case opts of _ | isOpt "raw" opts -> return . fromString . unlines . map (unwords . map (concat . intersperse "+")) . - map (getClitics (isInMorpho (optMorpho env opts)) (optClitics opts)) . + map (getClitics (isInMorpho (optMorpho pgf mos opts)) (optClitics opts)) . concatMap words $ toStrings ts _ -> return . fromStrings . - getCliticsText (isInMorpho (optMorpho env opts)) (optClitics opts) . + getCliticsText (isInMorpho (optMorpho pgf mos opts)) (optClitics opts) . concatMap words $ toStrings ts, flags = [ ("clitics","the list of possible clitics (comma-separated, no spaces)"), @@ -146,19 +149,18 @@ pgfCommands = Map.fromList [ ], flags = [ ("file","the file to be converted (suffix .gfe must be given)"), - ("lang","the language in which to parse"), - ("probs","file with probabilities to rank the parses") + ("lang","the language in which to parse") ], - exec = getEnv $ \ opts _ env@(Env pgf mos) -> do + exec = needPGF $ \ opts _ pgf mos -> do let file = optFile opts - pgf <- optProbs opts pgf let printer = if (isOpt "api" opts) then exprToAPI else (showExpr []) - let conf = configureExBased pgf (optMorpho env opts) (optLang pgf opts) printer + let conf = configureExBased pgf (optMorpho pgf mos opts) (optLang pgf opts) printer (file',ws) <- restricted $ parseExamplesInGrammar conf file if null ws then return () else putStrLn ("unknown words: " ++ unwords ws) return (fromString ("wrote " ++ file')), needsTypeCheck = False }), + ("gr", emptyCommandInfo { longname = "generate_random", synopsis = "generate random trees in the current abstract syntax", @@ -180,11 +182,9 @@ pgfCommands = Map.fromList [ ("cat","generation category"), ("lang","uses only functions that have linearizations in all these languages"), ("number","number of trees generated"), - ("depth","the maximum generation depth"), - ("probs", "file with biased probabilities (format 'f 0.4' one by line)") + ("depth","the maximum generation depth") ], - exec = getEnv $ \ opts arg (Env pgf mos) -> do - pgf <- optProbs opts (optRestricted opts pgf) + exec = needPGF $ \ opts arg pgf mos -> do gen <- newStdGen let dp = valIntOpts "depth" 4 opts let ts = case mexp (toExprs arg) of @@ -192,6 +192,7 @@ pgfCommands = Map.fromList [ Nothing -> generateRandomDepth gen pgf (optType pgf opts) (Just dp) returnFromExprs $ take (optNum opts) ts }), + ("gt", emptyCommandInfo { longname = "generate_trees", synopsis = "generates a list of trees, by default exhaustive", @@ -213,14 +214,14 @@ pgfCommands = Map.fromList [ mkEx "gt -cat=NP -depth=2 -- trees in the category NP to depth 2", mkEx "gt (AdjCN ? (UseN ?)) -- trees of form (AdjCN ? (UseN ?))" ], - exec = getEnv $ \ opts arg (Env pgf mos) -> do - let pgfr = optRestricted opts pgf + exec = needPGF $ \opts arg pgf mos -> do let dp = valIntOpts "depth" 4 opts let ts = case mexp (toExprs arg) of - Just ex -> generateFromDepth pgfr ex (Just dp) - Nothing -> generateAllDepth pgfr (optType pgf opts) (Just dp) + Just ex -> generateFromDepth pgf ex (Just dp) + Nothing -> generateAllDepth pgf (optType pgf opts) (Just dp) returnFromExprs $ take (optNumInf opts) ts }), + ("i", emptyCommandInfo { longname = "import", synopsis = "import a grammar from source code or compiled .pgf file", @@ -241,13 +242,13 @@ pgfCommands = Map.fromList [ ("probs","file with biased probabilities for generation") ], options = [ - -- ["gfo", "src", "no-cpu", "cpu", "quiet", "verbose"] ("retain","retain operations (used for cc command)"), ("src", "force compilation from source"), ("v", "be verbose - show intermediate status information") ], needsTypeCheck = False }), + ("l", emptyCommandInfo { longname = "linearize", synopsis = "convert an abstract syntax expression to string", @@ -267,7 +268,7 @@ pgfCommands = Map.fromList [ mkEx "gr -lang=LangHin -cat=Cl | l -table -to_devanagari -- hindi table", mkEx "l -unlexer=\"LangAra=to_arabic LangHin=to_devanagari\" -- different unlexers" ], - exec = getEnv $ \ opts ts (Env pgf mos) -> return . fromStrings . optLins pgf opts $ toExprs ts, + exec = needPGF $ \ opts ts pgf mos -> return . fromStrings . optLins pgf opts $ toExprs ts, options = [ ("all", "show all forms and variants, one by line (cf. l -list)"), ("bracket","show tree structure with brackets and paths to nodes"), @@ -275,7 +276,6 @@ pgfCommands = Map.fromList [ ("list","show all forms and variants, comma-separated on one line (cf. l -all)"), ("multi","linearize to all languages (default)"), ("table","show all forms labelled by parameters"), - ("tabtreebank","show the tree and its linearizations on a tab-separated line"), ("treebank","show the tree and tag linearizations with language names") ] ++ stringOpOptions, flags = [ @@ -283,25 +283,7 @@ pgfCommands = Map.fromList [ ("unlexer","set unlexers separately to each language (space-separated)") ] }), - ("lc", emptyCommandInfo { - longname = "linearize_chunks", - synopsis = "linearize a tree that has metavariables in maximal chunks without them", - explanation = unlines [ - "A hopefully temporary command, intended to work around the type checker that fails", - "trees where a function node is a metavariable." - ], - examples = [ - mkEx "l -lang=LangSwe,LangNor -chunks ? a b (? c d)" - ], - exec = getEnv $ \ opts ts (Env pgf mos) -> return . fromStrings $ optLins pgf (opts ++ [OOpt "chunks"]) (toExprs ts), - options = [ - ("treebank","show the tree and tag linearizations with language names") - ] ++ stringOpOptions, - flags = [ - ("lang","the languages of linearization (comma-separated, no spaces)") - ], - needsTypeCheck = False - }), + ("ma", emptyCommandInfo { longname = "morpho_analyse", synopsis = "print the morphological analyses of all words in the string", @@ -309,17 +291,17 @@ pgfCommands = Map.fromList [ "Prints all the analyses of space-separated words in the input string,", "using the morphological analyser of the actual grammar (see command pg)" ], - exec = getEnv $ \opts ts env -> case opts of + exec = needPGF $ \opts ts pgf mos -> case opts of _ | isOpt "missing" opts -> return . fromString . unwords . - morphoMissing (optMorpho env opts) . + morphoMissing (optMorpho pgf mos opts) . concatMap words $ toStrings ts _ | isOpt "known" opts -> return . fromString . unwords . - morphoKnown (optMorpho env opts) . + morphoKnown (optMorpho pgf mos opts) . concatMap words $ toStrings ts _ -> return . fromString . unlines . - map prMorphoAnalysis . concatMap (morphos env opts) . + map prMorphoAnalysis . concatMap (morphos pgf mos opts) . concatMap words $ toStrings ts, flags = [ ("lang","the languages of analysis (comma-separated, no spaces)") @@ -334,18 +316,16 @@ pgfCommands = Map.fromList [ longname = "morpho_quiz", synopsis = "start a morphology quiz", syntax = "mq (-cat=CAT)? (-probs=FILE)? TREE?", - exec = getEnv $ \ opts arg (Env pgf mos) -> do + exec = needPGF $ \ opts arg pgf mos -> do let lang = optLang pgf opts let typ = optType pgf opts - pgf <- optProbs opts pgf let mt = mexp (toExprs arg) restricted $ morphologyQuiz mt pgf lang typ return void, flags = [ ("lang","language of the quiz"), ("cat","category of the quiz"), - ("number","maximum number of questions"), - ("probs","file with biased probabilities for generation") + ("number","maximum number of questions") ] }), @@ -362,7 +342,7 @@ pgfCommands = Map.fromList [ "the parser. For example if -openclass=\"A,N,V\" is given, the parser", "will accept unknown adjectives, nouns and verbs with the resource grammar." ], - exec = getEnv $ \ opts ts (Env pgf mos) -> + exec = needPGF $ \opts ts pgf mos -> return $ fromParse opts (concat [map ((,) s) (par pgf opts s) | s <- toStrings ts]), flags = [ ("cat","target category of parsing"), @@ -374,6 +354,7 @@ pgfCommands = Map.fromList [ ("bracket","prints the bracketed string from the parser") ] }), + ("pg", emptyCommandInfo { ----- longname = "print_grammar", synopsis = "print the actual grammar with the given printer", @@ -393,7 +374,7 @@ pgfCommands = Map.fromList [ " " ++ opt ++ "\t\t" ++ expl | ((opt,_),expl) <- outputFormatsExpl, take 1 expl /= "*" ]), - exec = getEnv $ \opts _ env -> prGrammar env opts, + exec = needPGF $ \opts _ pgf mos -> prGrammar pgf mos opts, flags = [ --"cat", ("file", "set the file name when printing with -pgf option"), @@ -415,6 +396,7 @@ pgfCommands = Map.fromList [ mkEx ("pg -funs | ? grep \" S ;\" -- show functions with value cat S") ] }), + ("pt", emptyCommandInfo { longname = "put_tree", syntax = "pt OPT? TREE", @@ -428,11 +410,12 @@ pgfCommands = Map.fromList [ examples = [ mkEx "pt -compute (plus one two) -- compute value" ], - exec = getEnv $ \ opts arg (Env pgf mos) -> + exec = needPGF $ \opts arg pgf mos -> returnFromExprs . takeOptNum opts . treeOps pgf opts $ toExprs arg, options = treeOpOptions undefined{-pgf-}, flags = [("number","take at most this many trees")] ++ treeOpFlags undefined{-pgf-} }), + ("rf", emptyCommandInfo { longname = "read_file", synopsis = "read string or tree input from a file", @@ -445,10 +428,9 @@ pgfCommands = Map.fromList [ ], options = [ ("lines","return the list of lines, instead of the singleton of all contents"), - ("paragraphs","return the list of paragraphs, as separated by empty lines"), ("tree","convert strings into trees") ], - exec = getEnv $ \ opts _ (Env pgf mos) -> do + exec = needPGF $ \ opts _ pgf mos -> do let file = valStrOpts "file" "_gftmp" opts let exprs [] = ([],empty) exprs ((n,s):ls) | null s @@ -471,10 +453,10 @@ pgfCommands = Map.fromList [ _ | isOpt "tree" opts -> returnFromLines [(1::Int,s)] _ | isOpt "lines" opts -> return (fromStrings $ lines s) - _ | isOpt "paragraphs" opts -> return (fromStrings $ toParagraphs $ lines s) _ -> return (fromString s), flags = [("file","the input file name")] }), + ("rt", emptyCommandInfo { longname = "rank_trees", synopsis = "show trees in an order of decreasing probability", @@ -484,18 +466,14 @@ pgfCommands = Map.fromList [ "by the file given by flag -probs=FILE, where each line has the form", "'function probability', e.g. 'youPol_Pron 0.01'." ], - exec = getEnv $ \ opts arg (Env pgf mos) -> do + exec = needPGF $ \ opts arg pgf mos -> do let ts = toExprs arg - pgf <- optProbs opts pgf let tds = rankTreesByProbs pgf ts if isOpt "v" opts then putStrLn $ unlines [showExpr [] t ++ "\t--" ++ show d | (t,d) <- tds] else return () returnFromExprs $ map fst tds, - flags = [ - ("probs","probabilities from this file (format 'f 0.6' per line)") - ], options = [ ("v","show all trees with their probability scores") ], @@ -503,24 +481,23 @@ pgfCommands = Map.fromList [ mkEx "p \"you are here\" | rt -probs=probs | pt -number=1 -- most probable result" ] }), + ("tq", emptyCommandInfo { longname = "translation_quiz", syntax = "tq -from=LANG -to=LANG (-cat=CAT)? (-probs=FILE)? TREE?", synopsis = "start a translation quiz", - exec = getEnv $ \ opts arg (Env pgf mos) -> do + exec = needPGF $ \ opts arg pgf mos -> do let from = optLangFlag "from" pgf opts let to = optLangFlag "to" pgf opts let typ = optType pgf opts let mt = mexp (toExprs arg) - pgf <- optProbs opts pgf restricted $ translationQuiz mt pgf from to typ return void, flags = [ ("from","translate from this language"), ("to","translate to this language"), ("cat","translate in this category"), - ("number","the maximum number of questions"), - ("probs","file with biased probabilities for generation") + ("number","the maximum number of questions") ], examples = [ mkEx ("tq -from=Eng -to=Swe -- any trees in startcat"), @@ -528,7 +505,6 @@ pgfCommands = Map.fromList [ ] }), - ("vd", emptyCommandInfo { longname = "visualize_dependency", synopsis = "show word dependency tree graphically", @@ -546,7 +522,7 @@ pgfCommands = Map.fromList [ "flag -format. Results from multiple trees are combined to pdf with convert (ImageMagick).", "See also 'vp -showdep' for another visualization of dependencies." ], - exec = getEnv $ \ opts arg (Env pgf mos) -> do + exec = needPGF $ \ opts arg pgf mos -> do let absname = abstractName pgf let es = toExprs arg let debug = isOpt "v" opts @@ -595,7 +571,6 @@ pgfCommands = Map.fromList [ ] }), - ("vp", emptyCommandInfo { longname = "visualize_parse", synopsis = "show parse tree graphically", @@ -607,7 +582,7 @@ pgfCommands = Map.fromList [ "by the view flag. The target format is png, unless overridden by the", "flag -format. Results from multiple trees are combined to pdf with convert (ImageMagick)." ], - exec = getEnv $ \ opts arg (Env pgf mos) -> do + exec = needPGF $ \ opts arg pgf mos -> do let es = toExprs arg let lang = optLang pgf opts let gvOptions = GraphvizOptions {noLeaves = isOpt "noleaves" opts && not (isOpt "showleaves" opts), @@ -660,7 +635,6 @@ pgfCommands = Map.fromList [ ] }), - ("vt", emptyCommandInfo { longname = "visualize_tree", synopsis = "show a set of trees graphically", @@ -673,7 +647,7 @@ pgfCommands = Map.fromList [ "flag -format. Results from multiple trees are combined to pdf with convert (ImageMagick).", "With option -mk, use for showing library style function names of form 'mkC'." ], - exec = getEnv $ \ opts arg (Env pgf mos) -> + exec = needPGF $ \ opts arg pgf mos -> let es = toExprs arg in if isOpt "mk" opts then return $ fromString $ unlines $ map (tree2mk pgf) es @@ -707,6 +681,7 @@ pgfCommands = Map.fromList [ ("view","program to open the resulting file (default \"open\")") ] }), + ("ai", emptyCommandInfo { longname = "abstract_info", syntax = "ai IDENTIFIER or ai EXPR", @@ -719,43 +694,42 @@ pgfCommands = Map.fromList [ "If a whole expression is given it prints the expression with refined", "metavariables and the type of the expression." ], - exec = getEnv $ \ opts arg (Env pgf mos) -> do + exec = needPGF $ \opts arg pgf mos -> do case toExprs arg of - [EFun id] -> case Map.lookup id (funs (abstract pgf)) of - Just fd -> do putStrLn $ render (ppFun id fd) - let (_,_,_,prob) = fd - putStrLn ("Probability: "++show prob) - return void - Nothing -> case Map.lookup id (cats (abstract pgf)) of - Just cd -> do putStrLn $ - render (ppCat id cd $$ - if null (functionsToCat pgf id) - then empty - else ' ' $$ - vcat [ppFun fid (ty,0,Just ([],[]),0) | (fid,ty) <- functionsToCat pgf id] $$ - ' ') - let (_,_,prob) = cd - putStrLn ("Probability: "++show prob) - return void - Nothing -> do putStrLn ("unknown category of function identifier "++show id) - return void - [e] -> case inferExpr pgf e of - Left tcErr -> error $ render (ppTcError tcErr) - Right (e,ty) -> do putStrLn ("Expression: "++showExpr [] e) - putStrLn ("Type: "++showType [] ty) - putStrLn ("Probability: "++show (probTree pgf e)) - return void + [e] -> case unApp e of + Just (id, []) -> case functionType pgf id of + Just ty -> do putStrLn (showFun pgf id ty) + putStrLn ("Probability: "++show (treeProbability pgf e)) + return void + Nothing -> case categoryContext pgf id of + Just hypos -> do putStrLn ("cat "++showCId id++if null hypos then "" else ' ':showContext [] hypos) + let ls = [showFun pgf fn ty | fn <- functionsByCat pgf id, Just ty <- [functionType pgf fn]] + if null ls + then return () + else putStrLn (unlines ("":ls)) + putStrLn ("Probability: "++show (categoryProbability pgf id)) + return void + Nothing -> do putStrLn ("unknown category of function identifier "++show id) + return void + _ -> case inferExpr pgf e of + Left tcErr -> error $ render (ppTcError tcErr) + Right (e,ty) -> do putStrLn ("Expression: "++showExpr [] e) + putStrLn ("Type: "++showType [] ty) + putStrLn ("Probability: "++show (treeProbability pgf e)) + return void _ -> do putStrLn "a single identifier or expression is expected from the command" return void, needsTypeCheck = False }) ] where - getEnv exec opts ts = liftSIO . exec opts ts =<< getPGFEnv + needPGF exec opts ts = do + Env mb_pgf mos <- getPGFEnv + case mb_pgf of + Just pgf -> liftSIO $ exec opts ts pgf mos + _ -> fail "Import a grammar before using this command" - par pgf opts s = case optOpenTypes opts of - [] -> [parse_ pgf lang (optType pgf opts) (Just dp) s | lang <- optLangs pgf opts] - open_typs -> [parseWithRecovery pgf lang (optType pgf opts) open_typs (Just dp) s | lang <- optLangs pgf opts] + par pgf opts s = [parse_ pgf lang (optType pgf opts) (Just dp) s | lang <- optLangs pgf opts] where dp = valIntOpts "depth" 4 opts @@ -794,9 +768,6 @@ pgfCommands = Map.fromList [ _ | isOpt "treebank" opts -> (showCId (abstractName pgf) ++ ": " ++ showExpr [] t) : [showCId lang ++ ": " ++ s | lang <- optLangs pgf opts, s<-linear pgf opts lang t] - _ | isOpt "tabtreebank" opts -> - return $ concat $ intersperse "\t" $ (showExpr [] t) : - [s | lang <- optLangs pgf opts, s <- linear pgf opts lang t] _ | isOpt "chunks" opts -> map snd $ linChunks pgf opts t _ -> [s | lang <- optLangs pgf opts, s<-linear pgf opts lang t] linChunks pgf opts t = @@ -816,9 +787,12 @@ pgfCommands = Map.fromList [ -- replace each non-atomic constructor with mkC, where C is the val cat tree2mk pgf = showExpr [] . t2m where t2m t = case unApp t of - Just (cid,ts@(_:_)) -> mkApp (mk cid) (map t2m ts) - _ -> t - mk = mkCId . ("mk" ++) . showCId . lookValCat (abstract pgf) + Just (cid,ts@(_:_)) -> mkApp (mk cid) (map t2m ts) + _ -> t + mk f = case functionType pgf f of + Just ty -> let (_,cat,_) = unType ty + in mkCId ("mk" ++ showCId cat) + Nothing -> f unlex opts lang = stringOps Nothing (getUnlex opts lang ++ map prOpt opts) ---- @@ -845,8 +819,6 @@ pgfCommands = Map.fromList [ in cod : filter (/=cod) (map prOpt opts) _ -> map prOpt opts -} - optRestricted opts pgf = - restrictPGF (\f -> and [hasLin pgf la f | la <- optLangs pgf opts]) pgf optLang = optLangFlag "lang" optLangs = optLangsFlag "lang" @@ -860,26 +832,22 @@ pgfCommands = Map.fromList [ else (mkCId (showCId (abstractName pgf) ++ la)) optLangFlag f pgf opts = head $ optLangsFlag f pgf opts ++ [wildCId] - - optOpenTypes opts = case valStrOpts "openclass" "" opts of - "" -> [] - cats -> mapMaybe readType (chunks ',' cats) - +{- optProbs opts pgf = case valStrOpts "probs" "" opts of "" -> return pgf file -> do probs <- restricted $ readProbabilitiesFromFile file pgf return (setProbabilities probs pgf) - +-} optFile opts = valStrOpts "file" "_gftmp" opts optType pgf opts = - let str = valStrOpts "cat" (showCId $ lookStartCat pgf) opts - in case readType str of - Just ty -> case checkType pgf ty of - Left tcErr -> error $ render (ppTcError tcErr) - Right ty -> ty - Nothing -> error ("Can't parse '"++str++"' as a type") + let readOpt str = case readType str of + Just ty -> case checkType pgf ty of + Left tcErr -> error $ render (ppTcError tcErr) + Right ty -> ty + Nothing -> error ("Can't parse '"++str++"' as a type") + in maybeStrOpts "cat" (startCat pgf) readOpt opts optViewFormat opts = valStrOpts "format" "png" opts optViewGraph opts = valStrOpts "view" "open" opts optNum opts = valIntOpts "number" 1 opts @@ -890,34 +858,35 @@ pgfCommands = Map.fromList [ [] -> pipeMessage "no trees found" _ -> fromExprs es - prGrammar (Env pgf mos) opts + prGrammar pgf mos opts | isOpt "pgf" opts = do - let pgf1 = if isOpt "opt" opts then optimizePGF pgf else pgf let outfile = valStrOpts "file" (showCId (abstractName pgf) ++ ".pgf") opts - restricted $ encodeFile outfile pgf1 + restricted $ writePGF outfile pgf putStrLn $ "wrote file " ++ outfile return void | isOpt "cats" opts = return $ fromString $ unwords $ map showCId $ categories pgf - | isOpt "funs" opts = return $ fromString $ unlines $ map showFun $ funsigs pgf + | isOpt "funs" opts = return $ fromString $ unlines [showFun pgf f ty | f <- functions pgf, Just ty <- [functionType pgf f]] | isOpt "fullform" opts = return $ fromString $ concatMap (morpho mos "" prFullFormLexicon) $ optLangs pgf opts | isOpt "langs" opts = return $ fromString $ unwords $ map showCId $ languages pgf | isOpt "lexc" opts = return $ fromString $ concatMap (morpho mos "" prLexcLexicon) $ optLangs pgf opts - | isOpt "missing" opts = return $ fromString $ unlines $ [unwords (showCId la:":": map showCId cs) | - la <- optLangs pgf opts, let cs = missingLins pgf la] + | isOpt "missing" opts = return $ fromString $ unlines $ [unwords (showCId la:":":[showCId f | f <- functions pgf, not (hasLinearization pgf la f)]) | + la <- optLangs pgf opts] | isOpt "words" opts = return $ fromString $ concatMap (morpho mos "" prAllWords) $ optLangs pgf opts | otherwise = do fmt <- readOutputFormat (valStrOpts "printer" "pgf_pretty" opts) return $ fromString $ concatMap snd $ exportPGF noOptions fmt pgf - funsigs pgf = [(f,ty) | (f,(ty,_,_,_)) <- Map.assocs (funs (abstract pgf))] - showFun (f,ty) = showCId f ++ " : " ++ showType [] ty ++ " ;" + showFun pgf id ty = kwd++" "++showCId id ++ " : " ++ showType [] ty + where + kwd | functionIsDataCon pgf id = "data" + | otherwise = "fun" - morphos (Env pgf mos) opts s = + morphos pgf mos opts s = [(s,morpho mos [] (\mo -> lookupMorpho mo s) la) | la <- optLangs pgf opts] morpho mos z f la = maybe z f $ Map.lookup la mos - optMorpho (Env pgf mos) opts = morpho mos (error "no morpho") id (head (optLangs pgf opts)) + optMorpho pgf mos opts = morpho mos (error "no morpho") id (head (optLangs pgf opts)) optClitics opts = case valStrOpts "clitics" "" opts of "" -> [] @@ -961,7 +930,6 @@ prLexcLexicon mo = ws -> map ('+':) ws multichars = unwords $ nub $ concat [mkTags (words p) | (w,lps) <- morpho, (l,p) <- lps] - -- thick_A+(AAdj+Posit+Gen):thick's # ; prFullFormLexicon :: Morpho -> String prFullFormLexicon mo = @@ -971,7 +939,6 @@ prAllWords :: Morpho -> String prAllWords mo = unwords [w | (w,_) <- fullFormLexicon mo] -prMorphoAnalysis :: (String,[(Lemma,Analysis)]) -> String prMorphoAnalysis (w,lps) = unlines (w:[showCId l ++ " : " ++ p | (l,p) <- lps]) diff --git a/src/compiler/GF/Command/CommonCommands.hs b/src/compiler/GF/Command/CommonCommands.hs index 0b698e79c..69ccaf325 100644 --- a/src/compiler/GF/Command/CommonCommands.hs +++ b/src/compiler/GF/Command/CommonCommands.hs @@ -3,7 +3,6 @@ -- elsewhere module GF.Command.CommonCommands where import Data.List(sort) -import Data.Char (isSpace) import GF.Command.CommandInfo import qualified Data.Map as Map import GF.Infra.SIO @@ -117,13 +116,11 @@ commonCommands = fmap (mapCommandExec liftSIO) $ Map.fromList [ let (os,fs) = optsAndFlags opts trans <- optTranslit opts - case opts of - _ | isOpt "lines" opts -> return $ fromStrings $ map (trans . stringOps (envFlag fs) (map prOpt os)) $ toStrings x - _ | isOpt "paragraphs" opts -> return $ fromStrings $ map (trans . stringOps (envFlag fs) (map prOpt os)) $ toParagraphs $ toStrings x - _ -> return ((fromString . trans . stringOps (envFlag fs) (map prOpt os) . toString) x), + if isOpt "lines" opts + then return $ fromStrings $ map (trans . stringOps (envFlag fs) (map prOpt os)) $ toStrings x + else return ((fromString . trans . stringOps (envFlag fs) (map prOpt os) . toString) x), options = [ - ("lines","apply the operation separately to each input line, returning a list of lines"), - ("paragraphs","apply separately to each input paragraph (as separated by empty lines), returning a list of lines") + ("lines","apply the operation separately to each input line, returning a list of lines") ] ++ stringOpOptions, flags = [ @@ -272,11 +269,3 @@ trie = render . pptss . H.toTrie . map H.toATree -- ** Converting command input toString = unwords . toStrings toLines = unlines . toStrings - -toParagraphs = map (unwords . words) . toParas - where - toParas ls = case break (all isSpace) ls of - ([],[]) -> [] - ([],_:ll) -> toParas ll - (l, []) -> [unwords l] - (l, _:ll) -> unwords l : toParas ll diff --git a/src/compiler/GF/Command/Importing.hs b/src/compiler/GF/Command/Importing.hs index 59f84e409..db4476687 100644 --- a/src/compiler/GF/Command/Importing.hs +++ b/src/compiler/GF/Command/Importing.hs @@ -1,7 +1,7 @@ module GF.Command.Importing (importGrammar, importSource) where import PGF -import PGF.Internal(optimizePGF,unionPGF,msgUnionPGF) +import PGF.Internal(unionPGF) import GF.Compile import GF.Compile.Multi (readMulti) @@ -17,14 +17,16 @@ import GF.Data.ErrM import System.FilePath import qualified Data.Set as Set +import qualified Data.Map as Map +import Control.Monad(foldM) -- import a grammar in an environment where it extends an existing grammar -importGrammar :: PGF -> Options -> [FilePath] -> IO PGF -importGrammar pgf0 _ [] = return pgf0 +importGrammar :: Maybe PGF -> Options -> [FilePath] -> IO (Maybe PGF) +importGrammar pgf0 _ [] = return pgf0 importGrammar pgf0 opts files = case takeExtensions (last files) of - ".cf" -> importCF opts files getBNFCRules bnfc2cf - ".ebnf" -> importCF opts files getEBNFRules ebnf2cf + ".cf" -> fmap Just $ importCF opts files getBNFCRules bnfc2cf + ".ebnf" -> fmap Just $ importCF opts files getEBNFRules ebnf2cf ".gfm" -> do ascss <- mapM readMulti files let cs = concatMap snd ascss @@ -36,14 +38,15 @@ importGrammar pgf0 opts files = Bad msg -> do putStrLn ('\n':'\n':msg) return pgf0 ".pgf" -> do - pgf2 <- mapM readPGF files >>= return . foldl1 unionPGF - ioUnionPGF pgf0 pgf2 + mapM readPGF files >>= foldM ioUnionPGF pgf0 ext -> die $ "Unknown filename extension: " ++ show ext -ioUnionPGF :: PGF -> PGF -> IO PGF -ioUnionPGF one two = case msgUnionPGF one two of - (pgf, Just msg) -> putStrLn msg >> return pgf - (pgf,_) -> return pgf +ioUnionPGF :: Maybe PGF -> PGF -> IO (Maybe PGF) +ioUnionPGF Nothing two = return (Just two) +ioUnionPGF (Just one) two = + case unionPGF one two of + Nothing -> putStrLn "Abstract changed, previous concretes discarded." >> return (Just two) + Just pgf -> return (Just pgf) importSource :: Options -> [FilePath] -> IO SourceGrammar importSource opts files = fmap (snd.snd) (batchCompile opts files) @@ -56,7 +59,6 @@ importCF opts files get convert = impCF startCat <- case rules of (Rule cat _ _ : _) -> return cat _ -> fail "empty CFG" - let pgf = cf2pgf (last files) (mkCFG startCat Set.empty rules) - probs <- maybe (return . defaultProbabilities) readProbabilitiesFromFile (flag optProbsFile opts) pgf - return $ setProbabilities probs - $ if flag optOptimizePGF opts then optimizePGF pgf else pgf + probs <- maybe (return Map.empty) readProbabilitiesFromFile (flag optProbsFile opts) + let pgf = cf2pgf opts (last files) (mkCFG startCat Set.empty rules) probs + return pgf diff --git a/src/compiler/GF/Command/Interpreter.hs b/src/compiler/GF/Command/Interpreter.hs index bcb15d238..e1f8cd6f8 100644 --- a/src/compiler/GF/Command/Interpreter.hs +++ b/src/compiler/GF/Command/Interpreter.hs @@ -6,7 +6,7 @@ module GF.Command.Interpreter ( import GF.Command.CommandInfo import GF.Command.Abstract import GF.Command.Parse -import PGF.Internal(Expr(..)) +import PGF import GF.Infra.UseIO(putStrLnE) import Control.Monad(when) @@ -53,17 +53,8 @@ interpretPipe env cs = do -- | macro definition applications: replace ?i by (exps !! i) appCommand :: CommandArguments -> Command -> Command appCommand args c@(Command i os arg) = case arg of - AExpr e -> Command i os (AExpr (app e)) + AExpr e -> Command i os (AExpr (exprSubstitute e (toExprs args))) _ -> c - where - xs = toExprs args - - app e = case e of - EAbs b x e -> EAbs b x (app e) - EApp e1 e2 -> EApp (app e1) (app e2) - ELit l -> ELit l - EMeta i -> xs !! i - EFun x -> EFun x -- | return the trees to be sent in pipe, and the output possibly printed --interpret :: CommandEnv -> [Expr] -> Command -> SIO CommandOutput diff --git a/src/compiler/GF/Compile.hs b/src/compiler/GF/Compile.hs index 95a05dc09..fcfe09168 100644 --- a/src/compiler/GF/Compile.hs +++ b/src/compiler/GF/Compile.hs @@ -1,6 +1,6 @@ module GF.Compile (compileToPGF, link, batchCompile, srcAbsName) where -import GF.Compile.GrammarToPGF(mkCanon2pgf) +import GF.Compile.GrammarToPGF(grammar2PGF) import GF.Compile.ReadFiles(ModEnv,getOptionsFromFile,getAllFiles, importsOfModule) import GF.CompileOne(compileOne) @@ -14,7 +14,7 @@ import GF.Infra.UseIO(IOE,FullPath,liftIO,getLibraryDirectory,putIfVerb, justModuleName,extendPathEnv,putStrE,putPointE) import GF.Data.Operations(raise,(+++),err) -import Control.Monad(foldM,when,(<=<),filterM,liftM) +import Control.Monad(foldM,when,(<=<)) import GF.System.Directory(doesFileExist,getModificationTime) import System.FilePath((),isRelative,dropFileName) import qualified Data.Map as Map(empty,insert,elems) --lookup @@ -22,8 +22,7 @@ import Data.List(nub) import Data.Time(UTCTime) import GF.Text.Pretty(render,($$),(<+>),nest) -import PGF.Internal(optimizePGF) -import PGF(PGF,defaultProbabilities,setProbabilities,readProbabilitiesFromFile) +import PGF(PGF,readProbabilitiesFromFile) -- | Compiles a number of source files and builds a 'PGF' structure for them. -- This is a composition of 'link' and 'batchCompile'. @@ -36,11 +35,10 @@ link :: Options -> (ModuleName,Grammar) -> IOE PGF link opts (cnc,gr) = putPointE Normal opts "linking ... " $ do let abs = srcAbsName gr cnc - pgf <- mkCanon2pgf opts gr abs - probs <- liftIO (maybe (return . defaultProbabilities) readProbabilitiesFromFile (flag optProbsFile opts) pgf) + probs <- liftIO (maybe (return Map.empty) readProbabilitiesFromFile (flag optProbsFile opts)) + pgf <- grammar2PGF opts gr abs probs when (verbAtLeast opts Normal) $ putStrE "OK" - return $ setProbabilities probs - $ if flag optOptimizePGF opts then optimizePGF pgf else pgf + return pgf -- | Returns the name of the abstract syntax corresponding to the named concrete syntax srcAbsName gr cnc = err (const cnc) id $ abstractOfConcrete gr cnc @@ -78,14 +76,10 @@ compileModule opts1 env@(_,rfs) file = do file <- getRealFile file opts0 <- getOptionsFromFile file let curr_dir = dropFileName file - lib_dirs <- getLibraryDirectory (addOptions opts0 opts1) - let opts = addOptions (fixRelativeLibPaths curr_dir lib_dirs opts0) opts1 --- putIfVerb opts $ "curr_dir:" +++ show curr_dir ---- --- putIfVerb opts $ "lib_dir:" +++ show lib_dirs ---- + lib_dir <- getLibraryDirectory (addOptions opts0 opts1) + let opts = addOptions (fixRelativeLibPaths curr_dir lib_dir opts0) opts1 ps0 <- extendPathEnv opts let ps = nub (curr_dir : ps0) --- putIfVerb opts $ "options from file: " ++ show opts0 --- putIfVerb opts $ "augmented options: " ++ show opts putIfVerb opts $ "module search path:" +++ show ps ---- files <- getAllFiles opts ps rfs file putIfVerb opts $ "files to read:" +++ show files ---- @@ -98,17 +92,13 @@ compileModule opts1 env@(_,rfs) file = if exists then return file else if isRelative file - then do - lib_dirs <- getLibraryDirectory opts1 - let candidates = [ lib_dir file | lib_dir <- lib_dirs ] - putIfVerb opts1 (render ("looking for: " $$ nest 2 candidates)) - file1s <- filterM doesFileExist candidates - case length file1s of - 0 -> raise (render ("Unable to find: " $$ nest 2 candidates)) - 1 -> do return $ head file1s - _ -> do putIfVerb opts1 ("matched multiple candidates: " +++ show file1s) - return $ head file1s - else raise (render ("File" <+> file <+> "does not exist")) + then do lib_dir <- getLibraryDirectory opts1 + let file1 = lib_dir file + exists <- doesFileExist file1 + if exists + then return file1 + else raise (render ("None of these files exists:" $$ nest 2 (file $$ file1))) + else raise (render ("File" <+> file <+> "does not exist.")) compileOne' :: Options -> CompileEnv -> FullPath -> IOE CompileEnv compileOne' opts env@(gr,_) = extendCompileEnv env <=< compileOne opts gr diff --git a/src/compiler/GF/Compile/CFGtoPGF.hs b/src/compiler/GF/Compile/CFGtoPGF.hs index afc9de41f..eb97b72a4 100644 --- a/src/compiler/GF/Compile/CFGtoPGF.hs +++ b/src/compiler/GF/Compile/CFGtoPGF.hs @@ -1,8 +1,10 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts, ImplicitParams #-} module GF.Compile.CFGtoPGF (cf2pgf) where import GF.Grammar.CFG import GF.Infra.UseIO +import GF.Infra.Option +import GF.Compile.OptimizePGF import PGF import PGF.Internal @@ -12,88 +14,97 @@ import qualified Data.Map as Map import qualified Data.IntMap as IntMap import Data.Array.IArray import Data.List +import Data.Maybe(fromMaybe) -------------------------- -- the compiler ---------- -------------------------- -cf2pgf :: FilePath -> ParamCFG -> PGF -cf2pgf fpath cf = - let pgf = PGF Map.empty aname (cf2abstr cf) (Map.singleton cname (cf2concr cf)) - in updateProductionIndices pgf +cf2pgf :: Options -> FilePath -> ParamCFG -> Map.Map CId Double -> PGF +cf2pgf opts fpath cf probs = + build (let abstr = cf2abstr cf probs + in newPGF [] aname abstr [(cname, cf2concr opts abstr cf)]) where name = justModuleName fpath aname = mkCId (name ++ "Abs") cname = mkCId name -cf2abstr :: ParamCFG -> Abstr -cf2abstr cfg = Abstr aflags afuns acats +cf2abstr :: (?builder :: Builder s) => ParamCFG -> Map.Map CId Double -> B s AbstrInfo +cf2abstr cfg probs = newAbstr aflags acats afuns where - aflags = Map.singleton (mkCId "startcat") (LStr (fst (cfgStartCat cfg))) + aflags = [(mkCId "startcat", LStr (fst (cfgStartCat cfg)))] - acats = Map.fromList [(cat, ([], [(0,mkRuleName rule) | rule <- rules], 0)) - | (cat,rules) <- (Map.toList . Map.fromListWith (++)) - [(cat2id cat, catRules cfg cat) | - cat <- allCats' cfg]] - afuns = Map.fromList [(mkRuleName rule, (cftype [cat2id c | NonTerminal c <- ruleRhs rule] (cat2id (ruleLhs rule)), 0, Nothing, 0)) - | rule <- allRules cfg] + acats = [(c', [], toLogProb (fromMaybe 0 (Map.lookup c' probs))) | cat <- allCats' cfg, let c' = cat2id cat] + afuns = [(f', dTyp [hypo Explicit wildCId (dTyp [] (cat2id c) []) | NonTerminal c <- ruleRhs rule] (cat2id (ruleLhs rule)) [], 0, toLogProb (fromMaybe 0 (Map.lookup f' funs_probs))) + | rule <- allRules cfg + , let f' = mkRuleName rule] + + funs_probs = (Map.fromList . concat . Map.elems . fmap pad . Map.fromListWith (++)) + [(cat,[(f',Map.lookup f' probs)]) | rule <- allRules cfg, + let cat = cat2id (ruleLhs rule), + let f' = mkRuleName rule] + where + pad :: [(a,Maybe Double)] -> [(a,Double)] + pad pfs = [(f,fromMaybe deflt mb_p) | (f,mb_p) <- pfs] + where + deflt = case length [f | (f,Nothing) <- pfs] of + 0 -> 0 + n -> max 0 ((1 - sum [d | (f,Just d) <- pfs]) / fromIntegral n) + + toLogProb = realToFrac . negate . log cat2id = mkCId . fst -cf2concr :: ParamCFG -> Concr -cf2concr cfg = Concr Map.empty Map.empty - cncfuns lindefsrefs lindefsrefs - sequences productions - IntMap.empty Map.empty - cnccats - IntMap.empty - totalCats +cf2concr :: (?builder :: Builder s) => Options -> B s AbstrInfo -> ParamCFG -> B s ConcrInfo +cf2concr opts abstr cfg = + let (lindefs',linrefs',productions',cncfuns',sequences',cnccats') = + (if flag optOptimizePGF opts then optimizePGF (mkCId (fst (cfgStartCat cfg))) else id) + (lindefsrefs,lindefsrefs,IntMap.toList productions,cncfuns,sequences,cnccats) + in newConcr abstr [] [] + lindefs' linrefs' + productions' cncfuns' + sequences' cnccats' totalCats where cats = allCats' cfg rules = allRules cfg - sequences0 = Set.fromList (listArray (0,0) [SymCat 0 0] : - map mkSequence rules) - sequences = listArray (0,Set.size sequences0-1) (Set.toList sequences0) + idSeq = [SymCat 0 0] - idFun = CncFun [wildCId] (listArray (0,0) [seqid]) - where - seq = listArray (0,0) [SymCat 0 0] - seqid = binSearch seq sequences (bounds sequences) + sequences0 = Set.fromList (idSeq : + map mkSequence rules) + sequences = Set.toList sequences0 + + idFun = (wildCId,[Set.findIndex idSeq sequences0]) ((fun_cnt,cncfuns0),productions0) = mapAccumL (convertRule cs) (1,[idFun]) rules productions = foldl addProd IntMap.empty (concat (productions0++coercions)) - cncfuns = listArray (0,fun_cnt-1) (reverse cncfuns0) + cncfuns = reverse cncfuns0 - lbls = listArray (0,0) ["s"] - (fid,cnccats0) = (mapAccumL mkCncCat 0 . Map.toList . Map.fromListWith max) - [(c,p) | (c,ps) <- cats, p <- ps] + lbls = ["s"] + (fid,cnccats) = (mapAccumL mkCncCat 0 . Map.toList . Map.fromListWith max) + [(c,p) | (c,ps) <- cats, p <- ps] ((totalCats,cs), coercions) = mapAccumL mkCoercions (fid,Map.empty) cats - cnccats = Map.fromList cnccats0 - lindefsrefs = - IntMap.fromList (map mkLinDefRef cats) + lindefsrefs = map mkLinDefRef cats convertRule cs (funid,funs) rule = let args = [PArg [] (cat2arg c) | NonTerminal c <- ruleRhs rule] prod = PApply funid args - seqid = binSearch (mkSequence rule) sequences (bounds sequences) - fun = CncFun [mkRuleName rule] (listArray (0,0) [seqid]) + seqid = Set.findIndex (mkSequence rule) sequences0 + fun = (mkRuleName rule, [seqid]) funid' = funid+1 in funid' `seq` ((funid',fun:funs),let (c,ps) = ruleLhs rule in [(cat2fid c p, prod) | p <- ps]) - mkSequence rule = listArray (0,length syms-1) syms + mkSequence rule = snd $ mapAccumL convertSymbol 0 (ruleRhs rule) where - syms = snd $ mapAccumL convertSymbol 0 (ruleRhs rule) - convertSymbol d (NonTerminal (c,_)) = (d+1,if c `elem` ["Int","Float","String"] then SymLit d 0 else SymCat d 0) convertSymbol d (Terminal t) = (d, SymKS t) mkCncCat fid (cat,n) - | cat == "Int" = (fid, (mkCId cat, CncCat fidInt fidInt lbls)) - | cat == "Float" = (fid, (mkCId cat, CncCat fidFloat fidFloat lbls)) - | cat == "String" = (fid, (mkCId cat, CncCat fidString fidString lbls)) + | cat == "Int" = (fid, (mkCId cat, fidInt, fidInt, lbls)) + | cat == "Float" = (fid, (mkCId cat, fidFloat, fidFloat, lbls)) + | cat == "String" = (fid, (mkCId cat, fidString, fidString, lbls)) | otherwise = let fid' = fid+n+1 - in fid' `seq` (fid', (mkCId cat,CncCat fid (fid+n) lbls)) + in fid' `seq` (fid', (mkCId cat, fid, fid+n, lbls)) mkCoercions (fid,cs) c@(cat,[p]) = ((fid,cs),[]) mkCoercions (fid,cs) c@(cat,ps ) = @@ -102,25 +113,16 @@ cf2concr cfg = Concr Map.empty Map.empty mkLinDefRef (cat,_) = (cat2fid cat 0,[0]) - + addProd prods (fid,prod) = case IntMap.lookup fid prods of - Just set -> IntMap.insert fid (Set.insert prod set) prods - Nothing -> IntMap.insert fid (Set.singleton prod) prods - - binSearch v arr (i,j) - | i <= j = case compare v (arr ! k) of - LT -> binSearch v arr (i,k-1) - EQ -> k - GT -> binSearch v arr (k+1,j) - | otherwise = error "binSearch" - where - k = (i+j) `div` 2 + Just set -> IntMap.insert fid (prod:set) prods + Nothing -> IntMap.insert fid [prod] prods cat2fid cat p = - case Map.lookup (mkCId cat) cnccats of - Just (CncCat fid _ _) -> fid+p - _ -> error "cat2fid" + case [start | (cat',start,_,_) <- cnccats, mkCId cat == cat'] of + (start:_) -> fid+p + _ -> error "cat2fid" cat2arg c@(cat,[p]) = cat2fid cat p cat2arg c@(cat,ps ) = @@ -132,3 +134,4 @@ mkRuleName rule = case ruleName rule of CFObj n _ -> n _ -> wildCId + diff --git a/src/compiler/GF/Compile/CheckGrammar.hs b/src/compiler/GF/Compile/CheckGrammar.hs index 1348d8e41..5c1743b74 100644 --- a/src/compiler/GF/Compile/CheckGrammar.hs +++ b/src/compiler/GF/Compile/CheckGrammar.hs @@ -21,7 +21,6 @@ ----------------------------------------------------------------------------- module GF.Compile.CheckGrammar(checkModule) where -import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint import GF.Infra.Ident import GF.Infra.Option diff --git a/src/compiler/GF/Compile/Compute/ConcreteNew.hs b/src/compiler/GF/Compile/Compute/ConcreteNew.hs index f9edc931c..a77da88bf 100644 --- a/src/compiler/GF/Compile/Compute/ConcreteNew.hs +++ b/src/compiler/GF/Compile/Compute/ConcreteNew.hs @@ -5,7 +5,6 @@ module GF.Compile.Compute.ConcreteNew normalForm, Value(..), Bind(..), Env, value2term, eval, vapply ) where -import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint import GF.Grammar hiding (Env, VGen, VApp, VRecType) import GF.Grammar.Lookup(lookupResDefLoc,allParamValues) diff --git a/src/compiler/GF/Compile/Export.hs b/src/compiler/GF/Compile/Export.hs index d844e300a..4de408db4 100644 --- a/src/compiler/GF/Compile/Export.hs +++ b/src/compiler/GF/Compile/Export.hs @@ -1,7 +1,6 @@ module GF.Compile.Export where import PGF -import PGF.Internal(ppPGF) import GF.Compile.PGFtoHaskell import GF.Compile.PGFtoJava import GF.Compile.PGFtoProlog @@ -33,7 +32,7 @@ exportPGF :: Options -> [(FilePath,String)] -- ^ List of recommended file names and contents. exportPGF opts fmt pgf = case fmt of - FmtPGFPretty -> multi "txt" (render . ppPGF) + FmtPGFPretty -> multi "txt" (showPGF) FmtJavaScript -> multi "js" pgf2js FmtPython -> multi "py" pgf2python FmtHaskell -> multi "hs" (grammar2haskell opts name) diff --git a/src/compiler/GF/Compile/GenerateBC.hs b/src/compiler/GF/Compile/GenerateBC.hs index 3e13ea9e8..d6d6f255c 100644 --- a/src/compiler/GF/Compile/GenerateBC.hs +++ b/src/compiler/GF/Compile/GenerateBC.hs @@ -1,14 +1,15 @@ +{-# LANGUAGE CPP #-} module GF.Compile.GenerateBC(generateByteCode) where import GF.Grammar import GF.Grammar.Lookup(lookupAbsDef,lookupFunType) import GF.Data.Operations -import PGF(CId,utf8CId) import PGF.Internal(CodeLabel,Instr(..),IVal(..),TailInfo(..),Literal(..)) import qualified Data.Map as Map import Data.List(nub,mapAccumL) import Data.Maybe(fromMaybe) +#if C_RUNTIME generateByteCode :: SourceGrammar -> Int -> [L Equation] -> [[Instr]] generateByteCode gr arity eqs = let (bs,instrs) = compileEquations gr arity (arity+1) is @@ -63,7 +64,7 @@ compileEquations gr arity st (i:is) eqs fl bs = whilePP eqs Map.empty case_instr t = case t of - (Q (_,id)) -> CASE (i2i id) + (Q (_,id)) -> CASE (showIdent id) (EInt n) -> CASE_LIT (LInt n) (K s) -> CASE_LIT (LStr s) (EFloat d) -> CASE_LIT (LFlt d) @@ -105,7 +106,7 @@ compileFun gr eval st vs (App e1 e2) h0 bs args = compileFun gr eval st vs (Q (m,id)) h0 bs args = case lookupAbsDef gr m id of Ok (_,Just _) - -> (h0,bs,eval st (GLOBAL (i2i id)) args) + -> (h0,bs,eval st (GLOBAL (showIdent id)) args) _ -> let Ok ty = lookupFunType gr m id (ctxt,_,_) = typeForm ty c_arity = length ctxt @@ -114,14 +115,14 @@ compileFun gr eval st vs (Q (m,id)) h0 bs args = diff = c_arity-n_args in if diff <= 0 then if n_args == 0 - then (h0,bs,eval st (GLOBAL (i2i id)) []) + then (h0,bs,eval st (GLOBAL (showIdent id)) []) else let h1 = h0 + 2 + n_args - in (h1,bs,PUT_CONSTR (i2i id):is1++eval st (HEAP h0) []) + in (h1,bs,PUT_CONSTR (showIdent id):is1++eval st (HEAP h0) []) else let h1 = h0 + 1 + n_args is2 = [SET (FREE_VAR i) | i <- [0..n_args-1]] ++ [SET (ARG_VAR (i+1)) | i <- [0..diff-1]] b = CHECK_ARGS diff : ALLOC (c_arity+2) : - PUT_CONSTR (i2i id) : + PUT_CONSTR (showIdent id) : is2 ++ TUCK (ARG_VAR 0) diff : EVAL (HEAP h0) (TailCall diff) : @@ -167,16 +168,16 @@ compileFun gr eval st vs e _ _ _ = error (show e) compileArg gr st vs (Q(m,id)) h0 bs = case lookupAbsDef gr m id of - Ok (_,Just _) -> (h0,bs,GLOBAL (i2i id),[]) + Ok (_,Just _) -> (h0,bs,GLOBAL (showIdent id),[]) _ -> let Ok ty = lookupFunType gr m id (ctxt,_,_) = typeForm ty c_arity = length ctxt in if c_arity == 0 - then (h0,bs,GLOBAL (i2i id),[]) + then (h0,bs,GLOBAL (showIdent id),[]) else let is2 = [SET (ARG_VAR (i+1)) | i <- [0..c_arity-1]] b = CHECK_ARGS c_arity : ALLOC (c_arity+2) : - PUT_CONSTR (i2i id) : + PUT_CONSTR (showIdent id) : is2 ++ TUCK (ARG_VAR 0) c_arity : EVAL (HEAP h0) (TailCall c_arity) : @@ -224,12 +225,12 @@ compileArg gr st vs e h0 bs = diff = c_arity-n_args in if diff <= 0 then let h2 = h1 + 2 + n_args - in (h2,bs1,HEAP h1,is1 ++ (PUT_CONSTR (i2i id) : is2)) + in (h2,bs1,HEAP h1,is1 ++ (PUT_CONSTR (showIdent id) : is2)) else let h2 = h1 + 1 + n_args is2 = [SET (FREE_VAR i) | i <- [0..n_args-1]] ++ [SET (ARG_VAR (i+1)) | i <- [0..diff-1]] b = CHECK_ARGS diff : ALLOC (c_arity+2) : - PUT_CONSTR (i2i id) : + PUT_CONSTR (showIdent id) : is2 ++ TUCK (ARG_VAR 0) diff : EVAL (HEAP h0) (TailCall diff) : @@ -298,9 +299,10 @@ freeVars xs (Vr x) | not (elem x xs) = [x] freeVars xs e = collectOp (freeVars xs) e -i2i :: Ident -> CId -i2i = utf8CId . ident2utf8 - push_is :: Int -> Int -> [IVal] -> [IVal] push_is i 0 is = is push_is i n is = ARG_VAR i : push_is (i-1) (n-1) is + +#else +generateByteCode = error "generateByteCode is not implemented" +#endif diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs index f0c256775..0e20ea5e4 100644 --- a/src/compiler/GF/Compile/GeneratePMCFG.hs +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -14,7 +14,7 @@ module GF.Compile.GeneratePMCFG ) where --import PGF.CId -import PGF.Internal as PGF(CncCat(..),Symbol(..),fidVar) +import PGF.Internal as PGF(CId,Symbol(..),fidVar) import GF.Infra.Option import GF.Grammar hiding (Env, mkRecord, mkTable) @@ -157,12 +157,15 @@ convert opts gr cenv loc term ty@(_,val) pargs = args = map Vr vars vars = map (\(bt,x,t) -> x) context -pgfCncCat :: SourceGrammar -> Type -> Int -> CncCat -pgfCncCat gr lincat index = +pgfCncCat :: SourceGrammar -> CId -> Type -> Int -> (CId,Int,Int,[String]) +pgfCncCat gr id lincat index = let ((_,size),schema) = computeCatRange gr lincat - in PGF.CncCat index (index+size-1) - (mkArray (map (renderStyle style{mode=OneLineMode} . ppPath) - (getStrPaths schema))) + in ( id + , index + , index+size-1 + , map (renderStyle style{mode=OneLineMode} . ppPath) + (getStrPaths schema) + ) where getStrPaths :: Schema Identity s c -> [Path] getStrPaths = collect CNil [] @@ -500,13 +503,11 @@ mapAccumL' f s (x:xs) = (s'',y:ys) !(s'',ys) = mapAccumL' f s' xs addSequence :: SeqSet -> [Symbol] -> (SeqSet,SeqId) -addSequence seqs lst = +addSequence seqs seq = case Map.lookup seq seqs of Just id -> (seqs,id) Nothing -> let !last_seq = Map.size seqs in (Map.insert seq last_seq seqs, last_seq) - where - seq = mkArray lst ------------------------------------------------------------ diff --git a/src/compiler/GF/Compile/GetGrammar.hs b/src/compiler/GF/Compile/GetGrammar.hs index 191c3aff9..0813d15d2 100644 --- a/src/compiler/GF/Compile/GetGrammar.hs +++ b/src/compiler/GF/Compile/GetGrammar.hs @@ -52,11 +52,9 @@ getSourceModule opts file0 = let mi =mi0 {mflags=mflags mi0 `addOptions` opts, msrc=file0} optCoding' = renameEncoding `fmap` flag optEncoding (mflags mi0) case (optCoding,optCoding') of - {- (Nothing,Nothing) -> unless (BS.all isAscii raw) $ ePutStrLn $ file0++":\n Warning: default encoding has changed from Latin-1 to UTF-8" - -} (_,Just coding') -> when (coding/=coding') $ raise $ "Encoding mismatch: "++coding++" /= "++coding' diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs index eb127f7bd..70a3047b1 100644 --- a/src/compiler/GF/Compile/GrammarToPGF.hs +++ b/src/compiler/GF/Compile/GrammarToPGF.hs @@ -1,17 +1,14 @@ -{-# LANGUAGE BangPatterns, FlexibleContexts #-} -module GF.Compile.GrammarToPGF (mkCanon2pgf) where +{-# LANGUAGE ImplicitParams, BangPatterns, FlexibleContexts #-} +module GF.Compile.GrammarToPGF (grammar2PGF) where ---import GF.Compile.Export import GF.Compile.GeneratePMCFG import GF.Compile.GenerateBC +import GF.Compile.OptimizePGF -import PGF(CId,mkCId,utf8CId) -import PGF.Internal(fidInt,fidFloat,fidString,fidVar) -import PGF.Internal(updateProductionIndices) -import qualified PGF.Internal as C +import PGF(CId,mkCId,Type,Hypo,Expr) +import PGF.Internal import GF.Grammar.Predef ---import GF.Grammar.Printer -import GF.Grammar.Grammar +import GF.Grammar.Grammar hiding (Production) import qualified GF.Grammar.Lookup as Look import qualified GF.Grammar as A import qualified GF.Grammar.Macros as GM @@ -26,104 +23,132 @@ import qualified Data.Set as Set import qualified Data.Map as Map import qualified Data.IntMap as IntMap import Data.Array.IArray +import Data.Maybe(fromMaybe) -mkCanon2pgf :: Options -> SourceGrammar -> ModuleName -> IOE C.PGF -mkCanon2pgf opts gr am = do - (an,abs) <- mkAbstr am - cncs <- mapM mkConcr (allConcretes gr am) - return $ updateProductionIndices (C.PGF Map.empty an abs (Map.fromList cncs)) +grammar2PGF :: Options -> SourceGrammar -> ModuleName -> Map.Map CId Double -> IO PGF +grammar2PGF opts gr am probs = do + cnc_infos <- getConcreteInfos gr am + return $ + build (let gflags = if flag optSplitPGF opts + then [(mkCId "split", LStr "true")] + else [] + (an,abs) = mkAbstr am probs + cncs = map (mkConcr opts abs) cnc_infos + in newPGF gflags an abs cncs) where cenv = resourceValues opts gr + aflags = err (const noOptions) mflags (lookupModule gr am) - mkAbstr am = return (mi2i am, C.Abstr flags funs cats) + mkAbstr :: (?builder :: Builder s) => ModuleName -> Map.Map CId Double -> (CId, B s AbstrInfo) + mkAbstr am probs = (mi2i am, newAbstr flags cats funs) where - aflags = err (const noOptions) mflags (lookupModule gr am) - adefs = [((cPredefAbs,c), AbsCat (Just (L NoLoc []))) | c <- [cFloat,cInt,cString]] ++ Look.allOrigInfos gr am - flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF aflags] + flags = [(mkCId f,x) | (f,x) <- optionsPGF aflags] - funs = Map.fromList [(i2i f, (mkType [] ty, arity, mkDef gr arity mdef, 0)) | + toLogProb = realToFrac . negate . log + + cats = [(c', snd (mkContext [] cont), toLogProb (fromMaybe 0 (Map.lookup c' probs))) | + ((m,c),AbsCat (Just (L _ cont))) <- adefs, let c' = i2i c] + + funs = [(f', mkType [] ty, arity, {-mkDef gr arity mdef,-} toLogProb (fromMaybe 0 (Map.lookup f' funs_probs))) | ((m,f),AbsFun (Just (L _ ty)) ma mdef _) <- adefs, - let arity = mkArity ma mdef ty] + let arity = mkArity ma mdef ty, + let f' = i2i f] + + funs_probs = (Map.fromList . concat . Map.elems . fmap pad . Map.fromListWith (++)) + [(i2i cat,[(i2i f,Map.lookup f' probs)]) | ((m,f),AbsFun (Just (L _ ty)) _ _ _) <- adefs, + let (_,(_,cat),_) = GM.typeForm ty, + let f' = i2i f] + where + pad :: [(a,Maybe Double)] -> [(a,Double)] + pad pfs = [(f,fromMaybe deflt mb_p) | (f,mb_p) <- pfs] + where + deflt = case length [f | (f,Nothing) <- pfs] of + 0 -> 0 + n -> max 0 ((1 - sum [d | (f,Just d) <- pfs]) / fromIntegral n) - cats = Map.fromList [(i2i c, (snd (mkContext [] cont),catfuns c, 0)) | - ((m,c),AbsCat (Just (L _ cont))) <- adefs] - - catfuns cat = - [(0,i2i f) | ((m,f),AbsFun (Just (L _ ty)) _ _ (Just True)) <- adefs, snd (GM.valCat ty) == cat] - - mkConcr cm = do - let cflags = err (const noOptions) mflags (lookupModule gr cm) - - (ex_seqs,cdefs) <- addMissingPMCFGs - Map.empty - ([((cPredefAbs,c), CncCat (Just (L NoLoc GM.defLinType)) Nothing Nothing Nothing Nothing) | c <- [cInt,cFloat,cString]] ++ - Look.allOrigInfos gr cm) - - let flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF cflags] + mkConcr opts abs (cm,ex_seqs,cdefs) = + let cflags = err (const noOptions) mflags (lookupModule gr cm) + flags = [(mkCId f,x) | (f,x) <- optionsPGF cflags] seqs = (mkSetArray . Set.fromList . concat) $ - (Map.keys ex_seqs : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm]) - - ex_seqs_arr = mkMapArray ex_seqs :: Array SeqId Sequence + (elems (ex_seqs :: Array SeqId [Symbol]) : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm]) !(!fid_cnt1,!cnccats) = genCncCats gr am cm cdefs + cnccat_ranges = Map.fromList (map (\(cid,s,e,_) -> (cid,(s,e))) cnccats) !(!fid_cnt2,!productions,!lindefs,!linrefs,!cncfuns) - = genCncFuns gr am cm ex_seqs_arr seqs cdefs fid_cnt1 cnccats - + = genCncFuns gr am cm ex_seqs seqs cdefs fid_cnt1 cnccat_ranges + printnames = genPrintNames cdefs - return (mi2i cm, C.Concr flags - printnames - cncfuns - lindefs - linrefs - seqs - productions - IntMap.empty - Map.empty - cnccats - IntMap.empty - fid_cnt2) + + startCat = mkCId (fromMaybe "S" (flag optStartCat aflags)) + + (lindefs',linrefs',productions',cncfuns',sequences',cnccats') = + (if flag optOptimizePGF opts then optimizePGF startCat else id) + (lindefs,linrefs,productions,cncfuns,elems seqs,cnccats) + + in (mi2i cm, newConcr abs + flags + printnames + lindefs' + linrefs' + productions' + cncfuns' + sequences' + cnccats' + fid_cnt2) + + getConcreteInfos gr am = mapM flatten (allConcretes gr am) where + flatten cm = do + (seqs,infos) <- addMissingPMCFGs cm Map.empty + (lit_infos ++ Look.allOrigInfos gr cm) + return (cm,mkMapArray seqs :: Array SeqId [Symbol],infos) + + lit_infos = [((cPredefAbs,c), CncCat (Just (L NoLoc GM.defLinType)) Nothing Nothing Nothing Nothing) | c <- [cInt,cFloat,cString]] + -- if some module was compiled with -no-pmcfg, then -- we have to create the PMCFG code just before linking - addMissingPMCFGs seqs [] = return (seqs,[]) - addMissingPMCFGs seqs (((m,id), info):is) = do - (seqs,info) <- addPMCFG opts gr cenv Nothing am cm seqs id info - (seqs,is ) <- addMissingPMCFGs seqs is - return (seqs, ((m,id), info) : is) + addMissingPMCFGs cm seqs [] = return (seqs,[]) + addMissingPMCFGs cm seqs (((m,id), info):is) = do + (seqs,info) <- addPMCFG opts gr cenv Nothing am cm seqs id info + (seqs,infos) <- addMissingPMCFGs cm seqs is + return (seqs, ((m,id), info) : infos) + +mkSetArray set = listArray (0,Set.size set-1) (Set.toList set) +mkMapArray map = array (0,Map.size map-1) [(k,v) | (v,k) <- Map.toList map] i2i :: Ident -> CId -i2i = utf8CId . ident2utf8 +i2i = mkCId . showIdent mi2i :: ModuleName -> CId mi2i (MN i) = i2i i -mkType :: [Ident] -> A.Type -> C.Type +mkType :: (?builder :: Builder s) => [Ident] -> A.Type -> B s PGF.Type mkType scope t = case GM.typeForm t of (hyps,(_,cat),args) -> let (scope',hyps') = mkContext scope hyps - in C.DTyp hyps' (i2i cat) (map (mkExp scope') args) + in dTyp hyps' (i2i cat) (map (mkExp scope') args) -mkExp :: [Ident] -> A.Term -> C.Expr -mkExp scope t = +mkExp :: (?builder :: Builder s) => [Ident] -> A.Term -> B s Expr +mkExp scope t = case t of - Q (_,c) -> C.EFun (i2i c) - QC (_,c) -> C.EFun (i2i c) + Q (_,c) -> eFun (i2i c) + QC (_,c) -> eFun (i2i c) Vr x -> case lookup x (zip scope [0..]) of - Just i -> C.EVar i - Nothing -> C.EMeta 0 - Abs b x t-> C.EAbs b (i2i x) (mkExp (x:scope) t) - App t1 t2-> C.EApp (mkExp scope t1) (mkExp scope t2) - EInt i -> C.ELit (C.LInt (fromIntegral i)) - EFloat f -> C.ELit (C.LFlt f) - K s -> C.ELit (C.LStr s) - Meta i -> C.EMeta i - _ -> C.EMeta 0 - + Just i -> eVar i + Nothing -> eMeta 0 + Abs b x t-> eAbs b (i2i x) (mkExp (x:scope) t) + App t1 t2-> eApp (mkExp scope t1) (mkExp scope t2) + EInt i -> eLit (LInt (fromIntegral i)) + EFloat f -> eLit (LFlt f) + K s -> eLit (LStr s) + Meta i -> eMeta i + _ -> eMeta 0 +{- mkPatt scope p = case p of A.PP (_,c) ps->let (scope',ps') = mapAccumL mkPatt scope ps @@ -138,147 +163,146 @@ mkPatt scope p = A.PImplArg p-> let (scope',p') = mkPatt scope p in (scope',C.PImplArg p') A.PTilde t -> ( scope,C.PTilde (mkExp scope t)) - -mkContext :: [Ident] -> A.Context -> ([Ident],[C.Hypo]) +-} +mkContext :: (?builder :: Builder s) => [Ident] -> A.Context -> ([Ident],[B s PGF.Hypo]) mkContext scope hyps = mapAccumL (\scope (bt,x,ty) -> let ty' = mkType scope ty in if x == identW - then ( scope,(bt,i2i x,ty')) - else (x:scope,(bt,i2i x,ty'))) scope hyps - + then ( scope,hypo bt (i2i x) ty') + else (x:scope,hypo bt (i2i x) ty')) scope hyps +{- mkDef gr arity (Just eqs) = Just ([C.Equ ps' (mkExp scope' e) | L _ (ps,e) <- eqs, let (scope',ps') = mapAccumL mkPatt [] ps] ,generateByteCode gr arity eqs ) mkDef gr arity Nothing = Nothing - +-} mkArity (Just a) _ ty = a -- known arity, i.e. defined function mkArity Nothing (Just _) ty = 0 -- defined function with no arity - must be an axiom mkArity Nothing _ ty = let (ctxt, _, _) = GM.typeForm ty -- constructor in length ctxt -genCncCats gr am cm cdefs = - let (index,cats) = mkCncCats 0 cdefs - in (index, Map.fromList cats) +genCncCats gr am cm cdefs = mkCncCats 0 cdefs where mkCncCats index [] = (index,[]) mkCncCats index (((m,id),CncCat (Just (L _ lincat)) _ _ _ _):cdefs) | id == cInt = - let cc = pgfCncCat gr lincat fidInt + let cc = pgfCncCat gr (i2i id) lincat fidInt (index',cats) = mkCncCats index cdefs - in (index', (i2i id,cc) : cats) + in (index', cc : cats) | id == cFloat = - let cc = pgfCncCat gr lincat fidFloat + let cc = pgfCncCat gr (i2i id) lincat fidFloat (index',cats) = mkCncCats index cdefs - in (index', (i2i id,cc) : cats) + in (index', cc : cats) | id == cString = - let cc = pgfCncCat gr lincat fidString + let cc = pgfCncCat gr (i2i id) lincat fidString (index',cats) = mkCncCats index cdefs - in (index', (i2i id,cc) : cats) + in (index', cc : cats) | otherwise = - let cc@(C.CncCat _s e _) = pgfCncCat gr lincat index - (index',cats) = mkCncCats (e+1) cdefs - in (index', (i2i id,cc) : cats) - mkCncCats index (_ :cdefs) = mkCncCats index cdefs + let cc@(_, _s, e, _) = pgfCncCat gr (i2i id) lincat index + (index',cats) = mkCncCats (e+1) cdefs + in (index', cc : cats) + mkCncCats index (_ :cdefs) = mkCncCats index cdefs genCncFuns :: Grammar -> ModuleName -> ModuleName - -> Array SeqId Sequence - -> Array SeqId Sequence + -> Array SeqId [Symbol] + -> Array SeqId [Symbol] -> [(QIdent, Info)] -> FId - -> Map.Map CId C.CncCat + -> Map.Map CId (Int,Int) -> (FId, - IntMap.IntMap (Set.Set C.Production), - IntMap.IntMap [FunId], - IntMap.IntMap [FunId], - Array FunId C.CncFun) -genCncFuns gr am cm ex_seqs seqs cdefs fid_cnt cnccats = - let (fid_cnt1,lindefs,linrefs,fun_st1) = mkCncCats cdefs fid_cnt IntMap.empty IntMap.empty Map.empty - ((fid_cnt2,crc,prods),fun_st2) = mkCncFuns cdefs lindefs ((fid_cnt1,Map.empty,IntMap.empty),fun_st1) - in (fid_cnt2,prods,lindefs,linrefs,array (0,Map.size fun_st2-1) (Map.elems fun_st2)) + [(FId, [Production])], + [(FId, [FunId])], + [(FId, [FunId])], + [(CId,[SeqId])]) +genCncFuns gr am cm ex_seqs seqs cdefs fid_cnt cnccat_ranges = + let (fid_cnt1,funs_cnt1,funs1,lindefs,linrefs) = mkCncCats cdefs fid_cnt 0 [] IntMap.empty IntMap.empty + (fid_cnt2,funs_cnt2,funs2,prods0) = mkCncFuns cdefs fid_cnt1 funs_cnt1 funs1 lindefs Map.empty IntMap.empty + prods = [(fid,Set.toList prodSet) | (fid,prodSet) <- IntMap.toList prods0] + in (fid_cnt2,prods,IntMap.toList lindefs,IntMap.toList linrefs,reverse funs2) where - mkCncCats [] fid_cnt lindefs linrefs fun_st = - (fid_cnt,lindefs,linrefs,fun_st) - mkCncCats (((m,id),CncCat _ _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt lindefs linrefs fun_st = - let mseqs = case lookupModule gr m of - Ok (ModInfo{mseqs=Just mseqs}) -> mseqs - _ -> ex_seqs - (lindefs',fun_st1) = foldl' (toLinDef (m,id) funs0 mseqs) (lindefs,fun_st ) prods0 - (linrefs',fun_st2) = foldl' (toLinRef (m,id) funs0 mseqs) (linrefs,fun_st1) prods0 - in mkCncCats cdefs fid_cnt lindefs' linrefs' fun_st2 - mkCncCats (_ :cdefs) fid_cnt lindefs linrefs fun_st = - mkCncCats cdefs fid_cnt lindefs linrefs fun_st + mkCncCats [] fid_cnt funs_cnt funs lindefs linrefs = + (fid_cnt,funs_cnt,funs,lindefs,linrefs) + mkCncCats (((m,id),CncCat _ _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs linrefs = + let !funs_cnt' = let (s_funid, e_funid) = bounds funs0 + in funs_cnt+(e_funid-s_funid+1) + lindefs' = foldl' (toLinDef (am,id) funs_cnt) lindefs prods0 + linrefs' = foldl' (toLinRef (am,id) funs_cnt) linrefs prods0 + funs' = foldl' (toCncFun funs_cnt (m,mkLinDefId id)) funs (assocs funs0) + in mkCncCats cdefs fid_cnt funs_cnt' funs' lindefs' linrefs' + mkCncCats (_ :cdefs) fid_cnt funs_cnt funs lindefs linrefs = + mkCncCats cdefs fid_cnt funs_cnt funs lindefs linrefs - mkCncFuns [] lindefs st = st - mkCncFuns (((m,id),CncFun _ _ _ (Just (PMCFG prods0 funs0))):cdefs) lindefs st = - let ty_C = err error (\x -> x) $ fmap GM.typeForm (Look.lookupFunType gr am id) - mseqs = case lookupModule gr m of - Ok (ModInfo{mseqs=Just mseqs}) -> mseqs - _ -> ex_seqs - bundles = [([(args0,res0) | Production res0 funid0 args0 <- prods0, funid0==funid],lins) | (funid,lins) <- assocs funs0] - !st' = foldl' (toProd id lindefs mseqs ty_C) st bundles - in mkCncFuns cdefs lindefs st' - mkCncFuns (_ :cdefs) lindefs st = - mkCncFuns cdefs lindefs st + mkCncFuns [] fid_cnt funs_cnt funs lindefs crc prods = + (fid_cnt,funs_cnt,funs,prods) + mkCncFuns (((m,id),CncFun _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs crc prods = + let ty_C = err error (\x -> x) $ fmap GM.typeForm (Look.lookupFunType gr am id) + !funs_cnt' = let (s_funid, e_funid) = bounds funs0 + in funs_cnt+(e_funid-s_funid+1) + !(fid_cnt',crc',prods') + = foldl' (toProd lindefs ty_C funs_cnt) + (fid_cnt,crc,prods) prods0 + funs' = foldl' (toCncFun funs_cnt (m,id)) funs (assocs funs0) + in mkCncFuns cdefs fid_cnt' funs_cnt' funs' lindefs crc' prods' + mkCncFuns (_ :cdefs) fid_cnt funs_cnt funs lindefs crc prods = + mkCncFuns cdefs fid_cnt funs_cnt funs lindefs crc prods - toLinDef mid funs0 mseqs st@(lindefs,fun_st) (Production res0 funid0 [arg0]) - | arg0 == [fidVar] = - let res = mkFId mid res0 - - lins = amap (newSeqId mseqs) (funs0 ! funid0) - - !funid = Map.size fun_st - !fun_st' = Map.insert ([([C.PArg [] fidVar],res)],lins) (funid, C.CncFun [] lins) fun_st - - !lindefs' = IntMap.insertWith (++) res [funid] lindefs - in (lindefs',fun_st') - toLinDef res funs0 mseqs st _ = st - - toLinRef mid funs0 mseqs st (Production res0 funid0 [arg0]) - | res0 == fidVar = - let arg = map (mkFId mid) arg0 - - lins = amap (newSeqId mseqs) (funs0 ! funid0) - - in foldr (\arg (linrefs,fun_st) -> - let !funid = Map.size fun_st - !fun_st' = Map.insert ([([C.PArg [] arg],fidVar)],lins) (funid, C.CncFun [] lins) fun_st - - !linrefs' = IntMap.insertWith (++) arg [funid] linrefs - in (linrefs',fun_st')) - st arg - toLinRef res funs0 mseqs st _ = st - - toProd id lindefs mseqs (ctxt_C,res_C,_) (prod_st,fun_st) (sigs0,lins0) = - let (prod_st',sigs) = mapAccumL mkCncSig prod_st sigs0 - lins = amap (newSeqId mseqs) lins0 - in addBundle id (prod_st',fun_st) (concat sigs,lins) + toProd lindefs (ctxt_C,res_C,_) offs st (A.Production fid0 funid0 args0) = + let !((fid_cnt,crc,prods),args) = mapAccumL mkArg st (zip ctxt_C args0) + set0 = Set.fromList (map (PApply (offs+funid0)) (sequence args)) + fid = mkFId res_C fid0 + !prods' = case IntMap.lookup fid prods of + Just set -> IntMap.insert fid (Set.union set0 set) prods + Nothing -> IntMap.insert fid set0 prods + in (fid_cnt,crc,prods') where - mkCncSig prod_st (args0,res0) = - let !(prod_st',args) = mapAccumL mkArg prod_st (zip ctxt_C args0) - res = mkFId res_C res0 - in (prod_st',[(args,res) | args <- sequence args]) - mkArg st@(fid_cnt,crc,prods) ((_,_,ty),fid0s) = case fid0s of - [fid0] -> (st,map (flip C.PArg (mkFId arg_C fid0)) ctxt) + [fid0] -> (st,map (flip PArg (mkFId arg_C fid0)) ctxt) fid0s -> case Map.lookup fids crc of - Just fid -> (st,map (flip C.PArg fid) ctxt) + Just fid -> (st,map (flip PArg fid) ctxt) Nothing -> let !crc' = Map.insert fids fid_cnt crc - !prods' = IntMap.insert fid_cnt (Set.fromList (map C.PCoerce fids)) prods - in ((fid_cnt+1,crc',prods'),map (flip C.PArg fid_cnt) ctxt) + !prods' = IntMap.insert fid_cnt (Set.fromList (map PCoerce fids)) prods + in ((fid_cnt+1,crc',prods'),map (flip PArg fid_cnt) ctxt) where (hargs_C,arg_C) = GM.catSkeleton ty - ctxt = mapM mkCtxt hargs_C + ctxt = mapM (mkCtxt lindefs) hargs_C fids = map (mkFId arg_C) fid0s - mkCtxt (_,cat) = - case Map.lookup (i2i cat) cnccats of - Just (C.CncCat s e _) -> [(C.fidVar,fid) | fid <- [s..e], Just _ <- [IntMap.lookup fid lindefs]] - Nothing -> error "GrammarToPGF.mkCtxt failed" + mkLinDefId id = prefixIdent "lindef " id - newSeqId mseqs i = binSearch (mseqs ! i) seqs (bounds seqs) + toLinDef res offs lindefs (A.Production fid0 funid0 args) = + if args == [[fidVar]] + then IntMap.insertWith (++) fid [offs+funid0] lindefs + else lindefs where + fid = mkFId res fid0 + + toLinRef res offs linrefs (A.Production fid0 funid0 [fargs]) = + if fid0 == fidVar + then foldr (\fid -> IntMap.insertWith (++) fid [offs+funid0]) linrefs fids + else linrefs + where + fids = map (mkFId res) fargs + + mkFId (_,cat) fid0 = + case Map.lookup (i2i cat) cnccat_ranges of + Just (s,e) -> s+fid0 + Nothing -> error ("GrammarToPGF.mkFId: missing category "++showIdent cat) + + mkCtxt lindefs (_,cat) = + case Map.lookup (i2i cat) cnccat_ranges of + Just (s,e) -> [(fid,fid) | fid <- [s..e], Just _ <- [IntMap.lookup fid lindefs]] + Nothing -> error "GrammarToPGF.mkCtxt failed" + + toCncFun offs (m,id) funs (funid0,lins0) = + let mseqs = case lookupModule gr m of + Ok (ModInfo{mseqs=Just mseqs}) -> mseqs + _ -> ex_seqs + in (i2i id, map (newIndex mseqs) (elems lins0)):funs + where + newIndex mseqs i = binSearch (mseqs ! i) seqs (bounds seqs) + binSearch v arr (i,j) | i <= j = case compare v (arr ! k) of LT -> binSearch v arr (i,k-1) @@ -288,26 +312,9 @@ genCncFuns gr am cm ex_seqs seqs cdefs fid_cnt cnccats = where k = (i+j) `div` 2 - addBundle id ((fid_cnt,crc,prods),fun_st) bundle@(sigs,lins) = - case Map.lookup bundle fun_st of - Just (funid, C.CncFun funs lins) -> - let !fun_st' = Map.insert bundle (funid, C.CncFun (i2i id:funs) lins) fun_st - !prods' = foldl' (\prods (args,res) -> IntMap.insert res (Set.singleton (C.PApply funid args)) prods) prods sigs - in ((fid_cnt,crc,prods'),fun_st') - Nothing -> - let !funid = Map.size fun_st - !fun_st' = Map.insert bundle (funid, C.CncFun [i2i id] lins) fun_st - !prods' = foldl' (\prods (args,res) -> IntMap.insert res (Set.singleton (C.PApply funid args)) prods) prods sigs - in ((fid_cnt,crc,prods'),fun_st') - - mkFId (_,cat) fid0 = - case Map.lookup (i2i cat) cnccats of - Just (C.CncCat s e _) -> s+fid0 - Nothing -> error ("GrammarToPGF.mkFId: missing category "++showIdent cat) - genPrintNames cdefs = - Map.fromAscList [(i2i id, name) | ((m,id),info) <- cdefs, name <- prn info] + [(i2i id, name) | ((m,id),info) <- cdefs, name <- prn info] where prn (CncFun _ _ (Just (L _ tr)) _) = [flatten tr] prn (CncCat _ _ _ (Just (L _ tr)) _) = [flatten tr] @@ -316,7 +323,3 @@ genPrintNames cdefs = flatten (K s) = s flatten (Alts x _) = flatten x flatten (C x y) = flatten x +++ flatten y - ---mkArray lst = listArray (0,length lst-1) lst -mkMapArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] -mkSetArray set = listArray (0,Set.size set-1) [v | v <- Set.toList set] diff --git a/src/compiler/GF/Compile/PGFtoHaskell.hs b/src/compiler/GF/Compile/PGFtoHaskell.hs index fc17e4e4e..7520d6894 100644 --- a/src/compiler/GF/Compile/PGFtoHaskell.hs +++ b/src/compiler/GF/Compile/PGFtoHaskell.hs @@ -16,13 +16,14 @@ module GF.Compile.PGFtoHaskell (grammar2haskell) where -import PGF(showCId) +import PGF import PGF.Internal import GF.Data.Operations import GF.Infra.Option -import Data.List --(isPrefixOf, find, intersperse) +import Data.List +import Data.Maybe(mapMaybe) import qualified Data.Map as Map type Prefix = String -> String @@ -39,7 +40,7 @@ grammar2haskell opts name gr = foldr (++++) [] $ lexical cat = haskellOption opts HaskellLexical && isLexicalCat opts cat gId | haskellOption opts HaskellNoPrefix = id | otherwise = ("G"++) - pragmas | gadt = ["{-# OPTIONS_GHC -fglasgow-exts #-}","{-# LANGUAGE GADTs #-}"] + pragmas | gadt = ["{-# OPTIONS_GHC -fglasgow-exts #-}"] | otherwise = [] types | gadt = datatypesGADT gId lexical gr' | otherwise = datatypes gId lexical gr' @@ -262,18 +263,21 @@ fInstance gId lexical m (cat,rules) = --type HSkeleton = [(OIdent, [(OIdent, [OIdent])])] hSkeleton :: PGF -> (String,HSkeleton) hSkeleton gr = - (showCId (absname gr), + (showCId (abstractName gr), let fs = - [(showCId c, [(showCId f, map showCId cs) | (f, (cs,_)) <- fs]) | - fs@((_, (_,c)):_) <- fns] + [(showCId c, [(showCId f, map showCId cs) | (f, cs,_) <- fs]) | + fs@((_, _,c):_) <- fns] in fs ++ [(sc, []) | c <- cts, let sc = showCId c, notElem sc (["Int", "Float", "String"] ++ map fst fs)] ) where - cts = Map.keys (cats (abstract gr)) - fns = groupBy valtypg (sortBy valtyps (map jty (Map.assocs (funs (abstract gr))))) - valtyps (_, (_,x)) (_, (_,y)) = compare x y - valtypg (_, (_,x)) (_, (_,y)) = x == y - jty (f,(ty,_,_,_)) = (f,catSkeleton ty) + cts = categories gr + fns = groupBy valtypg (sortBy valtyps (mapMaybe jty (functions gr))) + valtyps (_,_,x) (_,_,y) = compare x y + valtypg (_,_,x) (_,_,y) = x == y + jty f = case functionType gr f of + Just ty -> let (hypos,valcat,_) = unType ty + in Just (f,[argcat | (_,_,ty) <- hypos, let (_,argcat,_) = unType ty],valcat) + Nothing -> Nothing {- updateSkeleton :: OIdent -> HSkeleton -> (OIdent, [OIdent]) -> HSkeleton updateSkeleton cat skel rule = diff --git a/src/compiler/GF/Compile/PGFtoJS.hs b/src/compiler/GF/Compile/PGFtoJS.hs index 0fc898aab..5b2aa3bf0 100644 --- a/src/compiler/GF/Compile/PGFtoJS.hs +++ b/src/compiler/GF/Compile/PGFtoJS.hs @@ -1,17 +1,9 @@ module GF.Compile.PGFtoJS (pgf2js) where -import PGF(showCId) -import PGF.Internal as M +import PGF +import PGF.Internal import qualified GF.JavaScript.AbsJS as JS import qualified GF.JavaScript.PrintJS as JS - ---import GF.Data.ErrM ---import GF.Infra.Option - ---import Control.Monad (mplus) ---import Data.Array.Unboxed (UArray) -import qualified Data.Array.IArray as Array ---import Data.Maybe (fromMaybe) import Data.Map (Map) import qualified Data.Set as Set import qualified Data.Map as Map @@ -21,54 +13,44 @@ pgf2js :: PGF -> String pgf2js pgf = JS.printTree $ JS.Program [JS.ElStmt $ JS.SDeclOrExpr $ JS.Decl [JS.DInit (JS.Ident n) grammar]] where - n = showCId $ absname pgf - as = abstract pgf - cs = Map.assocs (concretes pgf) - start = showCId $ M.lookStartCat pgf + n = showCId $ abstractName pgf + start = showType [] $ startCat pgf grammar = new "GFGrammar" [js_abstract, js_concrete] - js_abstract = abstract2js start as - js_concrete = JS.EObj $ map concrete2js cs + js_abstract = abstract2js start pgf + js_concrete = JS.EObj $ map (concrete2js pgf) (languages pgf) -abstract2js :: String -> Abstr -> JS.Expr -abstract2js start ds = new "GFAbstract" [JS.EStr start, JS.EObj $ map absdef2js (Map.assocs (funs ds))] +abstract2js :: String -> PGF -> JS.Expr +abstract2js start pgf = new "GFAbstract" [JS.EStr start, JS.EObj [absdef2js f ty | f <- functions pgf, Just ty <- [functionType pgf f]]] -absdef2js :: (CId,(Type,Int,Maybe ([Equation],[[M.Instr]]),Double)) -> JS.Property -absdef2js (f,(typ,_,_,_)) = - let (args,cat) = M.catSkeleton typ in - JS.Prop (JS.IdentPropName (JS.Ident (showCId f))) (new "Type" [JS.EArray [JS.EStr (showCId x) | x <- args], JS.EStr (showCId cat)]) +absdef2js :: CId -> Type -> JS.Property +absdef2js f typ = + let (hypos,cat,_) = unType typ + args = [cat | (_,_,typ) <- hypos, let (hypos,cat,_) = unType typ] + in JS.Prop (JS.IdentPropName (JS.Ident (showCId f))) (new "Type" [JS.EArray [JS.EStr (showCId x) | x <- args], JS.EStr (showCId cat)]) lit2js (LStr s) = JS.EStr s lit2js (LInt n) = JS.EInt n lit2js (LFlt d) = JS.EDbl d -concrete2js :: (CId,Concr) -> JS.Property -concrete2js (c,cnc) = - JS.Prop l (new "GFConcrete" [mapToJSObj (lit2js) $ cflags cnc, - JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray (map frule2js (Set.toList set))) | (cat,set) <- IntMap.toList (productions cnc)], - JS.EArray $ (map ffun2js (Array.elems (cncfuns cnc))), - JS.EArray $ (map seq2js (Array.elems (sequences cnc))), - JS.EObj $ map cats (Map.assocs (cnccats cnc)), - JS.EInt (totalCats cnc)]) - where - l = JS.IdentPropName (JS.Ident (showCId c)) -{- +concrete2js :: PGF -> Language -> JS.Property +concrete2js pgf lang = + JS.Prop l (new "GFConcrete" [mapToJSObj (lit2js) $ concrFlags cnc, + JS.EObj [JS.Prop (JS.IntPropName cat) (JS.EArray (map frule2js (concrProductions cnc cat))) | cat <- [0..concrTotalCats cnc]], + JS.EArray [ffun2js (concrFunction cnc funid) | funid <- [0..concrTotalFuns cnc]], + JS.EArray [seq2js (concrSequence cnc seqid) | seqid <- [0..concrTotalSeqs cnc]], + JS.EObj $ map cats (concrCategories cnc), + JS.EInt (concrTotalCats cnc)]) + where + cnc = lookConcr pgf lang + l = JS.IdentPropName (JS.Ident (showCId lang)) + litslins = [JS.Prop (JS.StringPropName "Int") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]), JS.Prop (JS.StringPropName "Float") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]), JS.Prop (JS.StringPropName "String") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]])] --} - cats (c,CncCat start end _) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EObj [JS.Prop (JS.IdentPropName (JS.Ident "s")) (JS.EInt start) - ,JS.Prop (JS.IdentPropName (JS.Ident "e")) (JS.EInt end)]) -{- -mkStr :: String -> JS.Expr -mkStr s = new "Str" [JS.EStr s] -mkSeq :: [JS.Expr] -> JS.Expr -mkSeq [x] = x -mkSeq xs = new "Seq" xs + cats (c,start,end,_) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EObj [JS.Prop (JS.IdentPropName (JS.Ident "s")) (JS.EInt start) + ,JS.Prop (JS.IdentPropName (JS.Ident "e")) (JS.EInt end)]) -argIdent :: Integer -> JS.Ident -argIdent n = JS.Ident ("x" ++ show n) --} children :: JS.Ident children = JS.Ident "cs" @@ -78,10 +60,10 @@ frule2js (PCoerce arg) = new "Coerce" [JS.EInt arg] farg2js (PArg hypos fid) = new "PArg" (map (JS.EInt . snd) hypos ++ [JS.EInt fid]) -ffun2js (CncFun fns lins) = new "CncFun" [JS.EArray (map (JS.EStr . showCId) fns), JS.EArray (map JS.EInt (Array.elems lins))] +ffun2js (f,lins) = new "CncFun" [JS.EStr (showCId f), JS.EArray (map JS.EInt lins)] -seq2js :: Array.Array DotPos Symbol -> JS.Expr -seq2js seq = JS.EArray [sym2js s | s <- Array.elems seq] +seq2js :: [Symbol] -> JS.Expr +seq2js seq = JS.EArray [sym2js s | s <- seq] sym2js :: Symbol -> JS.Expr sym2js (SymCat n l) = new "SymCat" [JS.EInt n, JS.EInt l] @@ -103,3 +85,4 @@ new f xs = JS.ENew (JS.Ident f) xs mapToJSObj :: (a -> JS.Expr) -> Map CId a -> JS.Expr mapToJSObj f m = JS.EObj [ JS.Prop (JS.IdentPropName (JS.Ident (showCId k))) (f v) | (k,v) <- Map.toList m ] + diff --git a/src/compiler/GF/Compile/PGFtoProlog.hs b/src/compiler/GF/Compile/PGFtoProlog.hs index 1279e3d8a..72fadfa79 100644 --- a/src/compiler/GF/Compile/PGFtoProlog.hs +++ b/src/compiler/GF/Compile/PGFtoProlog.hs @@ -8,9 +8,8 @@ module GF.Compile.PGFtoProlog (grammar2prolog) where -import PGF(mkCId,wildCId,showCId) +import PGF import PGF.Internal ---import PGF.Macros import GF.Data.Operations @@ -29,70 +28,56 @@ grammar2prolog pgf [[plp name]] ++++ plFacts wildCId "concrete" 2 "(?AbstractName, ?ConcreteName)" [[plp name, plp cncname] | - cncname <- Map.keys (concretes pgf)] ++++ + cncname <- languages pgf] ++++ plFacts wildCId "flag" 2 "(?Flag, ?Value): global flags" [[plp f, plp v] | - (f, v) <- Map.assocs (gflags pgf)] ++++ - plAbstract name (abstract pgf) ++++ - unlines (map plConcrete (Map.assocs (concretes pgf))) + (f, v) <- Map.assocs (globalFlags pgf)] ++++ + plAbstract name pgf ++++ + unlines [plConcrete name (lookConcr pgf name) | name <- languages pgf] ) - where name = absname pgf + where name = abstractName pgf ---------------------------------------------------------------------- -- abstract syntax -plAbstract :: CId -> Abstr -> String -plAbstract name abs +plAbstract :: CId -> PGF -> String +plAbstract name pgf = (plHeader "Abstract syntax" ++++ plFacts name "flag" 2 "(?Flag, ?Value): flags for abstract syntax" [[plp f, plp v] | - (f, v) <- Map.assocs (aflags abs)] ++++ + (f, v) <- Map.assocs (abstrFlags pgf)] ++++ plFacts name "cat" 2 "(?Type, ?[X:Type,...])" - [[plType cat args, plHypos hypos'] | - (cat, (hypos,_,_)) <- Map.assocs (cats abs), - let ((_, subst), hypos') = mapAccumL alphaConvertHypo emptyEnv hypos, - let args = reverse [EFun x | (_,x) <- subst]] ++++ + [[plType cat, []] | cat <- categories pgf] ++++ plFacts name "fun" 3 "(?Fun, ?Type, ?[X:Type,...])" - [[plp fun, plType cat args, plHypos hypos] | - (fun, (typ, _, _, _)) <- Map.assocs (funs abs), - let (_, DTyp hypos cat args) = alphaConvert emptyEnv typ] ++++ - plFacts name "def" 2 "(?Fun, ?Expr)" - [[plp fun, plp expr] | - (fun, (_, _, Just (eqs,_), _)) <- Map.assocs (funs abs), - let (_, expr) = alphaConvert emptyEnv eqs] + [[plp fun, plType cat, plHypos hypos] | + fun <- functions pgf, Just typ <- [functionType pgf fun], + let (hypos,cat,_) = unType typ] ) - where plType cat args = plTerm (plp cat) (map plp args) + where plType cat = plTerm (plp cat) [] plHypos hypos = plList [plOper ":" (plp x) (plp ty) | (_, x, ty) <- hypos] ---------------------------------------------------------------------- -- concrete syntax -plConcrete :: (CId, Concr) -> String -plConcrete (name, cnc) +plConcrete :: CId -> Concr -> String +plConcrete name cnc = (plHeader ("Concrete syntax: " ++ plp name) ++++ plFacts name "flag" 2 "(?Flag, ?Value): flags for concrete syntax" [[plp f, plp v] | - (f, v) <- Map.assocs (cflags cnc)] ++++ - plFacts name "printname" 2 "(?AbsFun/AbsCat, ?Atom)" - [[plp f, plp n] | - (f, n) <- Map.assocs (printnames cnc)] ++++ - plFacts name "lindef" 2 "(?CncCat, ?CncFun)" - [[plCat cat, plFun fun] | - (cat, funs) <- IntMap.assocs (lindefs cnc), - fun <- funs] ++++ + (f, v) <- Map.assocs (concrFlags cnc)] ++++ plFacts name "prod" 3 "(?CncCat, ?CncFun, ?[CncCat])" [[plCat cat, fun, plTerm "c" (map plCat args)] | - (cat, set) <- IntMap.toList (productions cnc), - (fun, args) <- map plProduction (Set.toList set)] ++++ + cat <- [0..concrTotalCats cnc-1], + (fun, args) <- map plProduction (concrProductions cnc cat)] ++++ plFacts name "cncfun" 3 "(?CncFun, ?[Seq,...], ?AbsFun)" - [[plFun fun, plTerm "s" (map plSeq (Array.elems lins)), plp absfun] | - (fun, CncFun absfun lins) <- Array.assocs (cncfuns cnc)] ++++ + [[plFun funid, plTerm "s" (map plSeq lins), plp absfun] | + funid <- [0..concrTotalFuns cnc-1], let (absfun,lins) = concrFunction cnc funid] ++++ plFacts name "seq" 2 "(?Seq, ?[Term])" - [[plSeq seq, plp (Array.elems symbols)] | - (seq, symbols) <- Array.assocs (sequences cnc)] ++++ + [[plSeq seqid, plp (concrSequence cnc seqid)] | + seqid <- [0..concrTotalSeqs cnc-1]] ++++ plFacts name "cnccat" 2 "(?AbsCat, ?[CnCCat])" [[plp cat, plList (map plCat [start..end])] | - (cat, CncCat start end _) <- Map.assocs (cnccats cnc)] + (cat,start,end,_) <- concrCategories cnc] ) where plProduction (PCoerce arg) = ("-", [arg]) plProduction (PApply funid args) = (plFun funid, [fid | PArg hypos fid <- args]) @@ -101,26 +86,12 @@ plConcrete (name, cnc) -- prolog-printing pgf datatypes instance PLPrint Type where - plp (DTyp hypos cat args) - | null hypos = result - | otherwise = plOper " -> " plHypos result - where result = plTerm (plp cat) (map plp args) - plHypos = plList [plOper ":" (plp x) (plp ty) | (_,x,ty) <- hypos] - -instance PLPrint Expr where - plp (EFun x) = plp x - plp (EAbs _ x e)= plOper "^" (plp x) (plp e) - plp (EApp e e') = plOper " * " (plp e) (plp e') - plp (ELit lit) = plp lit - plp (EMeta n) = "Meta_" ++ show n - -instance PLPrint Patt where - plp (PVar x) = plp x - plp (PApp f ps) = plOper " * " (plp f) (plp ps) - plp (PLit lit) = plp lit - -instance PLPrint Equation where - plp (Equ patterns result) = plOper ":" (plp patterns) (plp result) + plp ty + | null hypos = result + | otherwise = plOper " -> " plHypos result + where (hypos,cat,_) = unType ty + result = plTerm (plp cat) [] + plHypos = plList [plOper ":" (plp x) (plp ty) | (_,x,ty) <- hypos] instance PLPrint CId where plp cid | isLogicalVariable str || cid == wildCId = plVar str @@ -213,50 +184,3 @@ isLogicalVariable = isPrefixOf logicalVariablePrefix logicalVariablePrefix :: String logicalVariablePrefix = "X" - ----------------------------------------------------------------------- --- alpha convert variables to (unique) logical variables --- * this is needed if we want to translate variables to Prolog variables --- * used for abstract syntax, not concrete --- * not (yet?) used for variables bound in pattern equations - -type ConvertEnv = (Int, [(CId,CId)]) - -emptyEnv :: ConvertEnv -emptyEnv = (0, []) - -class AlphaConvert a where - alphaConvert :: ConvertEnv -> a -> (ConvertEnv, a) - -instance AlphaConvert a => AlphaConvert [a] where - alphaConvert env [] = (env, []) - alphaConvert env (a:as) = (env'', a':as') - where (env', a') = alphaConvert env a - (env'', as') = alphaConvert env' as - -instance AlphaConvert Type where - alphaConvert env@(_,subst) (DTyp hypos cat args) - = ((ctr,subst), DTyp hypos' cat args') - where (env', hypos') = mapAccumL alphaConvertHypo env hypos - ((ctr,_), args') = alphaConvert env' args - -alphaConvertHypo env (b,x,typ) = ((ctr+1,(x,x'):subst), (b,x',typ')) - where ((ctr,subst), typ') = alphaConvert env typ - x' = createLogicalVariable ctr - -instance AlphaConvert Expr where - alphaConvert (ctr,subst) (EAbs b x e) = ((ctr',subst), EAbs b x' e') - where ((ctr',_), e') = alphaConvert (ctr+1,(x,x'):subst) e - x' = createLogicalVariable ctr - alphaConvert env (EApp e1 e2) = (env'', EApp e1' e2') - where (env', e1') = alphaConvert env e1 - (env'', e2') = alphaConvert env' e2 - alphaConvert env expr@(EFun i) = (env, maybe expr EFun (lookup i (snd env))) - alphaConvert env expr = (env, expr) - --- pattern variables are not alpha converted --- (but they probably should be...) -instance AlphaConvert Equation where - alphaConvert env@(_,subst) (Equ patterns result) - = ((ctr,subst), Equ patterns result') - where ((ctr,_), result') = alphaConvert env result diff --git a/src/compiler/GF/Compile/PGFtoPython.hs b/src/compiler/GF/Compile/PGFtoPython.hs index f977abead..eeed374cf 100644 --- a/src/compiler/GF/Compile/PGFtoPython.hs +++ b/src/compiler/GF/Compile/PGFtoPython.hs @@ -9,40 +9,34 @@ {-# LANGUAGE FlexibleContexts #-} module GF.Compile.PGFtoPython (pgf2python) where -import PGF(showCId) -import PGF.Internal as M - -import GF.Data.Operations - -import qualified Data.Array.IArray as Array -import qualified Data.Set as Set +import PGF +import PGF.Internal import qualified Data.Map as Map -import qualified Data.IntMap as IntMap ---import Data.List (intersperse) +import GF.Data.Operations pgf2python :: PGF -> String pgf2python pgf = ("# -*- coding: utf-8 -*-" ++++ "# This file was automatically generated by GF" +++++ showCId name +++ "=" +++ pyDict 1 pyStr id [ - ("flags", pyDict 2 pyCId pyLiteral (Map.assocs (gflags pgf))), + ("flags", pyDict 2 pyCId pyLiteral (Map.assocs (globalFlags pgf))), ("abstract", pyDict 2 pyStr id [ ("name", pyCId name), - ("start", pyCId start), - ("flags", pyDict 3 pyCId pyLiteral (Map.assocs (aflags abs))), - ("funs", pyDict 3 pyCId pyAbsdef (Map.assocs (funs abs))) + ("start", pyCId start), + ("flags", pyDict 3 pyCId pyLiteral (Map.assocs (abstrFlags pgf))), + ("funs", pyDict 3 pyCId pyAbsdef [(f,ty) | f <- functions pgf, Just ty <- [functionType pgf f]]) ]), - ("concretes", pyDict 2 pyCId pyConcrete (Map.assocs cncs)) + ("concretes", pyDict 2 pyCId pyConcrete [(lang,lookConcr pgf lang) | lang <- languages pgf]) ] ++ "\n") where - name = absname pgf - start = M.lookStartCat pgf - abs = abstract pgf - cncs = concretes pgf + name = abstractName pgf + (_,start,_) = unType (startCat pgf) +-- cncs = concretes pgf -pyAbsdef :: (Type, Int, Maybe ([Equation], [[M.Instr]]), Double) -> String -pyAbsdef (typ, _, _, _) = pyTuple 0 id [pyCId cat, pyList 0 pyCId args] - where (args, cat) = M.catSkeleton typ +pyAbsdef :: Type -> String +pyAbsdef typ = pyTuple 0 id [pyCId cat, pyList 0 pyCId args] + where (hypos,cat,_) = unType typ + args = [cat | (_,_,typ) <- hypos, let (_,cat,_) = unType typ] pyLiteral :: Literal -> String pyLiteral (LStr s) = pyStr s @@ -51,19 +45,17 @@ pyLiteral (LFlt d) = show d pyConcrete :: Concr -> String pyConcrete cnc = pyDict 3 pyStr id [ - ("flags", pyDict 0 pyCId pyLiteral (Map.assocs (cflags cnc))), - ("printnames", pyDict 4 pyCId pyStr (Map.assocs (printnames cnc))), - ("lindefs", pyDict 4 pyCat (pyList 0 pyFun) (IntMap.assocs (lindefs cnc))), - ("productions", pyDict 4 pyCat pyProds (IntMap.assocs (productions cnc))), - ("cncfuns", pyDict 4 pyFun pyCncFun (Array.assocs (cncfuns cnc))), - ("sequences", pyDict 4 pySeq pySymbols (Array.assocs (sequences cnc))), - ("cnccats", pyDict 4 pyCId pyCncCat (Map.assocs (cnccats cnc))), - ("size", show (totalCats cnc)) + ("flags", pyDict 0 pyCId pyLiteral (Map.assocs (concrFlags cnc))), + ("productions", pyDict 4 pyCat pyProds [(fid,concrProductions cnc fid) | fid <- [0..concrTotalCats cnc-1]]), + ("cncfuns", pyDict 4 pyFun pyCncFun [(funid,concrFunction cnc funid) | funid <- [0..concrTotalFuns cnc-1]]), + ("sequences", pyDict 4 pySeq pySymbols [(seqid,concrSequence cnc seqid) | seqid <- [0..concrTotalSeqs cnc-1]]), + ("cnccats", pyDict 4 pyCId pyCncCat [(cat,(s,e,lbls)) | (cat,s,e,lbls) <- concrCategories cnc]), + ("size", show (concrTotalCats cnc)) ] - where pyProds prods = pyList 5 pyProduction (Set.toList prods) - pyCncCat (CncCat start end _) = pyList 0 pyCat [start..end] - pyCncFun (CncFun fns lins) = pyTuple 0 id [pyList 0 pySeq (Array.elems lins), pyList 0 pyCId fns] - pySymbols syms = pyList 0 pySymbol (Array.elems syms) + where pyProds prods = pyList 5 pyProduction prods + pyCncCat (start,end,_) = pyList 0 pyCat [start..end] + pyCncFun (f,lins) = pyTuple 0 id [pyList 0 pySeq lins, pyCId f] + pySymbols syms = pyList 0 pySymbol syms pyProduction :: Production -> String pyProduction (PCoerce arg) = pyTuple 0 id [pyStr "", pyList 0 pyCat [arg]] diff --git a/src/compiler/GF/Compile/ToAPI.hs b/src/compiler/GF/Compile/ToAPI.hs index d59ee4305..83ae6539f 100644 --- a/src/compiler/GF/Compile/ToAPI.hs +++ b/src/compiler/GF/Compile/ToAPI.hs @@ -2,8 +2,7 @@ module GF.Compile.ToAPI (stringToAPI,exprToAPI) where -import PGF.Internal -import PGF(showCId) +import PGF import Data.Maybe --import System.IO --import Control.Monad diff --git a/src/compiler/GF/Compile/TypeCheck/RConcrete.hs b/src/compiler/GF/Compile/TypeCheck/RConcrete.hs index 88e324ff3..2fe08b256 100644 --- a/src/compiler/GF/Compile/TypeCheck/RConcrete.hs +++ b/src/compiler/GF/Compile/TypeCheck/RConcrete.hs @@ -1,6 +1,5 @@ {-# LANGUAGE PatternGuards #-} module GF.Compile.TypeCheck.RConcrete( checkLType, inferLType, computeLType, ppType ) where -import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint import GF.Infra.CheckM import GF.Data.Operations diff --git a/src/compiler/GF/CompileInParallel.hs b/src/compiler/GF/CompileInParallel.hs index fecce0a68..7986656ec 100644 --- a/src/compiler/GF/CompileInParallel.hs +++ b/src/compiler/GF/CompileInParallel.hs @@ -1,6 +1,6 @@ -- | Parallel grammar compilation module GF.CompileInParallel(parallelBatchCompile) where -import Prelude hiding (catch,(<>)) -- GHC 8.4.1 clash with Text.PrettyPrint +import Prelude hiding (catch) import Control.Monad(join,ap,when,unless) import Control.Applicative import GF.Infra.Concurrency @@ -34,11 +34,8 @@ import qualified Data.ByteString.Lazy as BS parallelBatchCompile jobs opts rootfiles0 = do setJobs jobs rootfiles <- mapM canonical rootfiles0 - lib_dirs1 <- getLibraryDirectory opts - lib_dirs2 <- mapM canonical lib_dirs1 - let lib_dir = head lib_dirs2 - when (length lib_dirs2 >1) $ ePutStrLn ("GF_LIB_PATH defines more than one directory; using the first, " ++ show lib_dir) - filepaths <- mapM (getPathFromFile [lib_dir] opts) rootfiles + lib_dir <- canonical =<< getLibraryDirectory opts + filepaths <- mapM (getPathFromFile lib_dir opts) rootfiles let groups = groupFiles lib_dir filepaths n = length groups when (n>1) $ ePutStrLn "Grammar mixes present and alltenses, dividing modules into two groups" diff --git a/src/compiler/GF/Compiler.hs b/src/compiler/GF/Compiler.hs index aa7b80268..f03230f76 100644 --- a/src/compiler/GF/Compiler.hs +++ b/src/compiler/GF/Compiler.hs @@ -1,8 +1,7 @@ -module GF.Compiler (mainGFC, linkGrammars, writePGF, writeOutputs) where +module GF.Compiler (mainGFC, linkGrammars, writeGrammar, writeOutputs) where import PGF -import PGF.Internal(concretes,optimizePGF,unionPGF) -import PGF.Internal(putSplitAbs,encodeFile,runPut) +import PGF.Internal(unionPGF,writePGF,writeConcr) import GF.Compile as S(batchCompile,link,srcAbsName) import GF.CompileInParallel as P(parallelBatchCompile) import GF.Compile.Export @@ -70,7 +69,7 @@ compileSourceFiles opts fs = -- in the 'Options') from the output of 'parallelBatchCompile'. -- If a @.pgf@ file by the same name already exists and it is newer than the -- source grammar files (as indicated by the 'UTCTime' argument), it is not --- recreated. Calls 'writePGF' and 'writeOutputs'. +-- recreated. Calls 'writeGrammar' and 'writeOutputs'. linkGrammars opts (t_src,~cnc_grs@(~(cnc,gr):_)) = do let abs = render (srcAbsName gr cnc) pgfFile = outputPath opts (grammarName' opts abs<.>"pgf") @@ -80,8 +79,8 @@ linkGrammars opts (t_src,~cnc_grs@(~(cnc,gr):_)) = if t_pgf >= Just t_src then putIfVerb opts $ pgfFile ++ " is up-to-date." else do pgfs <- mapM (link opts) cnc_grs - let pgf = foldl1 unionPGF pgfs - writePGF opts pgf + let pgf = foldl1 (\one two -> fromMaybe two (unionPGF one two)) pgfs + writeGrammar opts pgf writeOutputs opts pgf compileCFFiles :: Options -> [FilePath] -> IOE () @@ -91,12 +90,11 @@ compileCFFiles opts fs = do startCat <- case rules of (Rule cat _ _ : _) -> return cat _ -> fail "empty CFG" - let pgf = cf2pgf (last fs) (mkCFG startCat Set.empty rules) + probs <- liftIO (maybe (return Map.empty) readProbabilitiesFromFile (flag optProbsFile opts)) + let pgf = cf2pgf opts (last fs) (mkCFG startCat Set.empty rules) probs unless (flag optStopAfterPhase opts == Compile) $ - do probs <- liftIO (maybe (return . defaultProbabilities) readProbabilitiesFromFile (flag optProbsFile opts) pgf) - let pgf' = setProbabilities probs $ if flag optOptimizePGF opts then optimizePGF pgf else pgf - writePGF opts pgf' - writeOutputs opts pgf' + do writeGrammar opts pgf + writeOutputs opts pgf unionPGFFiles :: Options -> [FilePath] -> IOE () unionPGFFiles opts fs = @@ -114,12 +112,11 @@ unionPGFFiles opts fs = doIt = do pgfs <- mapM readPGFVerbose fs - let pgf0 = foldl1 unionPGF pgfs - pgf = if flag optOptimizePGF opts then optimizePGF pgf0 else pgf0 + let pgf = foldl1 (\one two -> fromMaybe two (unionPGF one two)) pgfs pgfFile = outputPath opts (grammarName opts pgf <.> "pgf") if pgfFile `elem` fs then putStrLnE $ "Refusing to overwrite " ++ pgfFile - else writePGF opts pgf + else writeGrammar opts pgf writeOutputs opts pgf readPGFVerbose f = @@ -136,21 +133,20 @@ writeOutputs opts pgf = do -- | Write the result of compiling a grammar (e.g. with 'compileToPGF' or -- 'link') to a @.pgf@ file. -- A split PGF file is output if the @-split-pgf@ option is used. -writePGF :: Options -> PGF -> IOE () -writePGF opts pgf = +writeGrammar :: Options -> PGF -> IOE () +writeGrammar opts pgf = if flag optSplitPGF opts then writeSplitPGF else writeNormalPGF where writeNormalPGF = do let outfile = outputPath opts (grammarName opts pgf <.> "pgf") - writing opts outfile $ encodeFile outfile pgf + writing opts outfile (writePGF outfile pgf) writeSplitPGF = do let outfile = outputPath opts (grammarName opts pgf <.> "pgf") - writing opts outfile $ BSL.writeFile outfile (runPut (putSplitAbs pgf)) - --encodeFile_ outfile (putSplitAbs pgf) - forM_ (Map.toList (concretes pgf)) $ \cnc -> do - let outfile = outputPath opts (showCId (fst cnc) <.> "pgf_c") - writing opts outfile $ encodeFile outfile cnc + writing opts outfile $ writePGF outfile pgf + forM_ (languages pgf) $ \lang -> do + let outfile = outputPath opts (showCId lang <.> "pgf_c") + writing opts outfile (writeConcr outfile pgf lang) writeOutput :: Options -> FilePath-> String -> IOE () diff --git a/src/compiler/GF/Grammar/Binary.hs b/src/compiler/GF/Grammar/Binary.hs index 725ae9284..3ecdb0223 100644 --- a/src/compiler/GF/Grammar/Binary.hs +++ b/src/compiler/GF/Grammar/Binary.hs @@ -10,9 +10,9 @@ module GF.Grammar.Binary(VersionTagged(..),decodeModuleHeader,decodeModule,encodeModule) where import Prelude hiding (catch) +import Control.Monad import Control.Exception(catch,ErrorCall(..),throwIO) - -import PGF.Internal(Binary(..),Word8,putWord8,getWord8,encodeFile,decodeFile) +import Data.Binary import qualified Data.Map as Map(empty) import qualified Data.ByteString.Char8 as BS @@ -23,7 +23,7 @@ import GF.Infra.UseIO(MonadIO(..)) import GF.Grammar.Grammar import PGF() -- Binary instances -import PGF.Internal(Literal(..)) +import PGF.Internal(Literal(..),Symbol(..)) -- Please change this every time when the GFO format is changed gfoVersion = "GF04" @@ -298,6 +298,53 @@ instance Binary Label where 1 -> fmap LVar get _ -> decodingError +instance Binary BindType where + put Explicit = putWord8 0 + put Implicit = putWord8 1 + get = do tag <- getWord8 + case tag of + 0 -> return Explicit + 1 -> return Implicit + _ -> decodingError + +instance Binary Literal where + put (LStr s) = putWord8 0 >> put s + put (LInt i) = putWord8 1 >> put i + put (LFlt d) = putWord8 2 >> put d + get = do tag <- getWord8 + case tag of + 0 -> liftM LStr get + 1 -> liftM LInt get + 2 -> liftM LFlt get + _ -> decodingError + +instance Binary Symbol where + put (SymCat n l) = putWord8 0 >> put (n,l) + put (SymLit n l) = putWord8 1 >> put (n,l) + put (SymVar n l) = putWord8 2 >> put (n,l) + put (SymKS ts) = putWord8 3 >> put ts + put (SymKP d vs) = putWord8 4 >> put (d,vs) + put SymBIND = putWord8 5 + put SymSOFT_BIND = putWord8 6 + put SymNE = putWord8 7 + put SymSOFT_SPACE = putWord8 8 + put SymCAPIT = putWord8 9 + put SymALL_CAPIT = putWord8 10 + get = do tag <- getWord8 + case tag of + 0 -> liftM2 SymCat get get + 1 -> liftM2 SymLit get get + 2 -> liftM2 SymVar get get + 3 -> liftM SymKS get + 4 -> liftM2 (\d vs -> SymKP d vs) get get + 5 -> return SymBIND + 6 -> return SymSOFT_BIND + 7 -> return SymNE + 8 -> return SymSOFT_SPACE + 9 -> return SymCAPIT + 10-> return SymALL_CAPIT + _ -> decodingError + --putGFOVersion = mapM_ (putWord8 . fromIntegral . ord) gfoVersion --getGFOVersion = replicateM (length gfoVersion) (fmap (chr . fromIntegral) getWord8) --putGFOVersion = put gfoVersion diff --git a/src/compiler/GF/Grammar/Printer.hs b/src/compiler/GF/Grammar/Printer.hs index 4b19d215b..dcd419c42 100644 --- a/src/compiler/GF/Grammar/Printer.hs +++ b/src/compiler/GF/Grammar/Printer.hs @@ -22,7 +22,6 @@ module GF.Grammar.Printer , ppMeta , getAbs ) where -import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint import GF.Infra.Ident import GF.Infra.Option diff --git a/src/compiler/GF/Infra/CheckM.hs b/src/compiler/GF/Infra/CheckM.hs index c5f9ba255..3b6833f0f 100644 --- a/src/compiler/GF/Infra/CheckM.hs +++ b/src/compiler/GF/Infra/CheckM.hs @@ -18,7 +18,6 @@ module GF.Infra.CheckM checkIn, checkInModule, checkMap, checkMapRecover, parallelCheck, accumulateError, commitCheck, ) where -import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint import GF.Data.Operations --import GF.Infra.Ident diff --git a/src/compiler/GF/Infra/Ident.hs b/src/compiler/GF/Infra/Ident.hs index b856d3995..2bbbe33ad 100644 --- a/src/compiler/GF/Infra/Ident.hs +++ b/src/compiler/GF/Infra/Ident.hs @@ -13,17 +13,17 @@ ----------------------------------------------------------------------------- module GF.Infra.Ident (-- ** Identifiers - ModuleName(..), moduleNameS, - Ident, ident2utf8, showIdent, prefixIdent, - -- *** Normal identifiers (returned by the parser) - identS, identC, identW, - -- *** Special identifiers for internal use - identV, identA, identAV, - argIdent, isArgIdent, getArgIndex, - varStr, varX, isWildIdent, varIndex, - -- *** Raw identifiers - RawIdent, rawIdentS, rawIdentC, ident2raw, prefixRawIdent, - isPrefixOf, showRawIdent + ModuleName(..), moduleNameS, + Ident, ident2utf8, showIdent, prefixIdent, + -- *** Normal identifiers (returned by the parser) + identS, identC, identW, + -- *** Special identifiers for internal use + identV, identA, identAV, + argIdent, isArgIdent, getArgIndex, + varStr, varX, isWildIdent, varIndex, + -- *** Raw identifiers + RawIdent, rawIdentS, rawIdentC, ident2raw, prefixRawIdent, + isPrefixOf, showRawIdent ) where import qualified Data.ByteString.UTF8 as UTF8 @@ -31,7 +31,7 @@ import qualified Data.ByteString.Char8 as BS(append,isPrefixOf) -- Limit use of BS functions to the ones that work correctly on -- UTF-8-encoded bytestrings! import Data.Char(isDigit) -import PGF.Internal(Binary(..)) +import Data.Binary(Binary(..)) import GF.Text.Pretty diff --git a/src/compiler/GF/Infra/Location.hs b/src/compiler/GF/Infra/Location.hs index 8447a297c..0bf85b37f 100644 --- a/src/compiler/GF/Infra/Location.hs +++ b/src/compiler/GF/Infra/Location.hs @@ -1,6 +1,5 @@ -- | Source locations module GF.Infra.Location where -import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint import GF.Text.Pretty -- ** Source locations diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs index 27aa1c256..61ccd8f80 100644 --- a/src/compiler/GF/Infra/Option.hs +++ b/src/compiler/GF/Infra/Option.hs @@ -34,17 +34,14 @@ import Data.Maybe import GF.Infra.Ident import GF.Infra.GetOpt import GF.Grammar.Predef ---import System.Console.GetOpt import System.FilePath ---import System.IO +import PGF.Internal(Literal(..)) import GF.Data.Operations(Err,ErrorMonad(..),liftErr) import Data.Set (Set) import qualified Data.Set as Set -import PGF.Internal(Literal(..)) - usageHeader :: String usageHeader = unlines ["Usage: gf [OPTIONS] [FILE [...]]", @@ -75,7 +72,6 @@ errors = raise . unlines data Mode = ModeVersion | ModeHelp | ModeInteractive | ModeRun - | ModeInteractive2 | ModeRun2 | ModeCompiler | ModeServer {-port::-}Int deriving (Show,Eq,Ord) @@ -153,7 +149,7 @@ data Flags = Flags { optLiteralCats :: Set Ident, optGFODir :: Maybe FilePath, optOutputDir :: Maybe FilePath, - optGFLibPath :: Maybe [FilePath], + optGFLibPath :: Maybe FilePath, optDocumentRoot :: Maybe FilePath, -- For --server mode optRecomp :: Recomp, optProbsFile :: Maybe FilePath, @@ -208,10 +204,9 @@ parseModuleOptions args = do then return opts else errors $ map ("Non-option among module options: " ++) nonopts -fixRelativeLibPaths curr_dir lib_dirs (Options o) = Options (fixPathFlags . o) +fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o) where - fixPathFlags f@(Flags{optLibraryPath=path}) = f{optLibraryPath=concatMap (\dir -> [parent dir - | parent <- curr_dir : lib_dirs]) path} + fixPathFlags f@(Flags{optLibraryPath=path}) = f{optLibraryPath=concatMap (\dir -> [curr_dir dir, lib_dir dir]) path} -- Showing options @@ -307,8 +302,6 @@ optDescr = Option ['j'] ["jobs"] (OptArg jobs "N") "Compile N modules in parallel with -batch (default 1).", Option [] ["interactive"] (NoArg (mode ModeInteractive)) "Run in interactive mode (default).", Option [] ["run"] (NoArg (mode ModeRun)) "Run in interactive mode, showing output only (no other messages).", - Option [] ["cshell"] (NoArg (mode ModeInteractive2)) "Start the C run-time shell.", - Option [] ["crun"] (NoArg (mode ModeRun2)) "Start the C run-time shell, showing output only (no other messages).", Option [] ["server"] (OptArg modeServer "port") $ "Run in HTTP server mode on given port (default "++show defaultPort++").", Option [] ["document-root"] (ReqArg gfDocuRoot "DIR") @@ -424,7 +417,7 @@ optDescr = literalCat x = set $ \o -> o { optLiteralCats = foldr Set.insert (optLiteralCats o) ((map identS . splitBy (==',')) x) } lexicalCat x = set $ \o -> o { optLexicalCats = foldr Set.insert (optLexicalCats o) (splitBy (==',') x) } outDir x = set $ \o -> o { optOutputDir = Just x } - gfLibPath x = set $ \o -> o { optGFLibPath = Just $ splitInModuleSearchPath x } + gfLibPath x = set $ \o -> o { optGFLibPath = Just x } gfDocuRoot x = set $ \o -> o { optDocumentRoot = Just x } recomp x = set $ \o -> o { optRecomp = x } probsFile x = set $ \o -> o { optProbsFile = Just x } diff --git a/src/compiler/GF/Infra/UseIO.hs b/src/compiler/GF/Infra/UseIO.hs index e27b6e075..ad0c75fd5 100644 --- a/src/compiler/GF/Infra/UseIO.hs +++ b/src/compiler/GF/Infra/UseIO.hs @@ -38,7 +38,6 @@ import Control.Monad(when,liftM,foldM) import Control.Monad.Trans(MonadIO(..)) import Control.Monad.State(StateT,lift) import Control.Exception(evaluate) -import Data.List (nub) --putIfVerb :: MonadIO io => Options -> String -> io () putIfVerb opts msg = when (verbAtLeast opts Verbose) $ putStrLnE msg @@ -52,32 +51,28 @@ type FullPath = String gfLibraryPath = "GF_LIB_PATH" gfGrammarPathVar = "GF_GRAMMAR_PATH" -getLibraryDirectory :: MonadIO io => Options -> io [FilePath] +getLibraryDirectory :: MonadIO io => Options -> io FilePath getLibraryDirectory opts = case flag optGFLibPath opts of Just path -> return path - Nothing -> liftM splitSearchPath $ liftIO (catch (getEnv gfLibraryPath) - (\ex -> fmap ( "lib") getDataDir)) + Nothing -> liftIO $ catch (getEnv gfLibraryPath) + (\ex -> fmap ( "lib") getDataDir) -getGrammarPath :: MonadIO io => [FilePath] -> io [FilePath] -getGrammarPath lib_dirs = liftIO $ do +getGrammarPath :: MonadIO io => FilePath -> io [FilePath] +getGrammarPath lib_dir = liftIO $ do catch (fmap splitSearchPath $ getEnv gfGrammarPathVar) - (\_ -> return $ concat [[lib_dir "alltenses", lib_dir "prelude"] - | lib_dir <- lib_dirs ]) -- e.g. GF_GRAMMAR_PATH + (\_ -> return [lib_dir "alltenses",lib_dir "prelude"]) -- e.g. GF_GRAMMAR_PATH -- | extends the search path with the -- 'gfLibraryPath' and 'gfGrammarPathVar' -- environment variables. Returns only existing paths. extendPathEnv :: MonadIO io => Options -> io [FilePath] extendPathEnv opts = liftIO $ do - let opt_path = nub $ flag optLibraryPath opts -- e.g. paths given as options - lib_dirs <- getLibraryDirectory opts -- e.g. GF_LIB_PATH - grm_path <- getGrammarPath lib_dirs -- e.g. GF_GRAMMAR_PATH - let paths = opt_path ++ lib_dirs ++ grm_path - when (verbAtLeast opts Verbose) $ putStrLn ("extendPathEnv: opt_path is "++ show opt_path) - when (verbAtLeast opts Verbose) $ putStrLn ("extendPathEnv: lib_dirs is "++ show lib_dirs) - when (verbAtLeast opts Verbose) $ putStrLn ("extendPathEnv: grm_path is "++ show grm_path) - ps <- liftM (nub . concat) $ mapM allSubdirs (nub paths) + let opt_path = flag optLibraryPath opts -- e.g. paths given as options + lib_dir <- getLibraryDirectory opts -- e.g. GF_LIB_PATH + grm_path <- getGrammarPath lib_dir -- e.g. GF_GRAMMAR_PATH + let paths = opt_path ++ [lib_dir] ++ grm_path + ps <- liftM concat $ mapM allSubdirs paths mapM canonicalizePath ps where allSubdirs :: FilePath -> IO [FilePath] @@ -85,15 +80,11 @@ extendPathEnv opts = liftIO $ do allSubdirs p = case last p of '*' -> do let path = init p fs <- getSubdirs path - let starpaths = [path f | f <- fs] - when (verbAtLeast opts Verbose) $ putStrLn ("extendPathEnv: allSubdirs: * found "++show starpaths) - return starpaths + return [path f | f <- fs] _ -> do exists <- doesDirectoryExist p if exists - then do - when (verbAtLeast opts Verbose) $ putStrLn ("extendPathEnv: allSubdirs: found path "++show p) - return [p] - else do when (verbAtLeast opts Verbose) $ putStrLn ("extendPathEnv: allSubdirs: ignore path "++ show p) + then return [p] + else do when (verbAtLeast opts Verbose) $ putStrLn ("ignore path "++p) return [] getSubdirs :: FilePath -> IO [FilePath] diff --git a/src/compiler/GF/Interactive.hs b/src/compiler/GF/Interactive.hs index 184ff7c96..2a12257b7 100644 --- a/src/compiler/GF/Interactive.hs +++ b/src/compiler/GF/Interactive.hs @@ -1,10 +1,10 @@ {-# LANGUAGE CPP, ScopedTypeVariables, FlexibleInstances #-} -- | GF interactive mode module GF.Interactive (mainGFI,mainRunGFI,mainServerGFI) where + import Prelude hiding (putStrLn,print) import qualified Prelude as P(putStrLn) import GF.Command.Interpreter(CommandEnv(..),mkCommandEnv,interpretCommandLine) ---import GF.Command.Importing(importSource,importGrammar) import GF.Command.Commands(PGFEnv,HasPGFEnv(..),pgf,pgfEnv,pgfCommands) import GF.Command.CommonCommands(commonCommands,extend) import GF.Command.SourceCommands @@ -19,19 +19,13 @@ import GF.Infra.UseIO(ioErrorText,putStrLnE) import GF.Infra.SIO import GF.Infra.Option import qualified System.Console.Haskeline as Haskeline ---import GF.Text.Coding(decodeUnicode,encodeUnicode) - ---import GF.Compile.Coding(codeTerm) import PGF -import PGF.Internal(abstract,funs,lookStartCat,emptyPGF) import Data.Char import Data.List(isPrefixOf) import qualified Data.Map as Map import qualified Text.ParserCombinators.ReadP as RP ---import System.IO(utf8) ---import System.CPUTime(getCPUTime) import System.Directory({-getCurrentDirectory,-}getAppUserDataDirectory) import Control.Exception(SomeException,fromException,evaluate,try) import Control.Monad.State hiding (void) @@ -289,8 +283,9 @@ importInEnv opts files = do let opts' = addOptions (setOptimization OptCSE False) opts pgf1 <- importGrammar pgf0 opts' files if (verbAtLeast opts Normal) - then putStrLnFlush $ - unwords $ "\nLanguages:" : map showCId (languages pgf1) + then case pgf1 of + Just pgf -> putStrLnFlush $ unwords $ "\nLanguages:" : map showCId (languages pgf) + Nothing -> done else done return pgf1 @@ -301,10 +296,10 @@ tryGetLine = do Right l -> return l prompt env - | retain env || abs == wildCId = "> " - | otherwise = showCId abs ++ "> " - where - abs = abstractName (multigrammar env) + | retain env = "> " + | otherwise = case multigrammar env of + Just pgf -> showCId (abstractName pgf) ++ "> " + Nothing -> "> " type CmdEnv = (Grammar,PGFEnv) @@ -318,7 +313,7 @@ data GFEnv = GFEnv { emptyGFEnv opts = GFEnv opts False emptyCmdEnv emptyCommandEnv [] -emptyCmdEnv = (emptyGrammar,pgfEnv emptyPGF) +emptyCmdEnv = (emptyGrammar,pgfEnv Nothing) emptyCommandEnv = mkCommandEnv allCommands multigrammar = pgf . snd . pgfenv @@ -336,17 +331,32 @@ wordCompletion gfenv (left,right) = do CmplCmd pref -> ret (length pref) [Haskeline.simpleCompletion name | name <- Map.keys (commands cmdEnv), isPrefixOf pref name] CmplStr (Just (Command _ opts _)) s0 - -> do mb_state0 <- try (evaluate (initState pgf (optLang opts) (optType opts))) - case mb_state0 of - Right state0 -> let (rprefix,rs) = break isSpace (reverse s0) - s = reverse rs - prefix = reverse rprefix - ws = words s - in case loop state0 ws of - Nothing -> ret 0 [] - Just state -> let compls = getCompletions state prefix - in ret (length prefix) (map (\x -> Haskeline.simpleCompletion x) (Map.keys compls)) - Left (_ :: SomeException) -> ret 0 [] + -> case multigrammar gfenv of + Just pgf -> let optLang opts = case valStrOpts "lang" "" opts of + "" -> case languages pgf of + [] -> Nothing + (lang:_) -> Just lang + lang -> let cla = mkCId lang in + if elem cla (languages pgf) + then Just cla + else let cla = mkCId (showCId (abstractName pgf) ++ lang) + in if elem cla (languages pgf) + then Just cla + else Nothing + optType opts = let readOpt str = case readType str of + Just ty -> case checkType pgf ty of + Left _ -> Nothing + Right ty -> Just ty + Nothing -> Nothing + in maybeStrOpts "cat" (Just (startCat pgf)) readOpt opts + (rprefix,rs) = break isSpace (reverse s0) + s = reverse rs + prefix = reverse rprefix + in case (optLang opts, optType opts) of + (Just lang,Just cat) -> let (_,_,compls) = complete pgf lang cat s prefix + in ret (length prefix) (map Haskeline.simpleCompletion (Map.keys compls)) + _ -> ret 0 [] + Nothing -> ret 0 [] CmplOpt (Just (Command n _ _)) pref -> case Map.lookup n (commands cmdEnv) of Just inf -> do let flg_compls = [Haskeline.Completion ('-':flg++"=") ('-':flg) False | (flg,_) <- flags inf, isPrefixOf pref flg] @@ -357,23 +367,15 @@ wordCompletion gfenv (left,right) = do CmplIdent (Just (Command "i" _ _)) _ -- HACK: file name completion for command i -> Haskeline.completeFilename (left,right) CmplIdent _ pref - -> do mb_abs <- try (evaluate (abstract pgf)) - case mb_abs of - Right abs -> ret (length pref) [Haskeline.simpleCompletion name | cid <- Map.keys (funs abs), let name = showCId cid, isPrefixOf pref name] - Left (_ :: SomeException) -> ret (length pref) [] + -> case multigrammar gfenv of + Just pgf -> ret (length pref) [Haskeline.simpleCompletion name | cid <- functions pgf, let name = showCId cid, isPrefixOf pref name] + Nothing -> ret (length pref) [] _ -> ret 0 [] where - pgf = multigrammar gfenv cmdEnv = commandenv gfenv - optLang opts = valCIdOpts "lang" (head (languages pgf)) opts - optType opts = - let str = valStrOpts "cat" (showCId $ lookStartCat pgf) opts - in case readType str of - Just ty -> ty - Nothing -> error ("Can't parse '"++str++"' as type") loop ps [] = Just ps - loop ps (t:ts) = case nextState ps (simpleParseInput t) of + loop ps (t:ts) = case error "nextState ps (simpleParseInput t)" of Left es -> Nothing Right ps -> loop ps ts diff --git a/src/compiler/GF/Main.hs b/src/compiler/GF/Main.hs index 642ddf565..24f7b78f1 100644 --- a/src/compiler/GF/Main.hs +++ b/src/compiler/GF/Main.hs @@ -2,10 +2,7 @@ {-# LANGUAGE CPP #-} module GF.Main where import GF.Compiler -import qualified GF.Interactive as GFI1 -#ifdef C_RUNTIME -import qualified GF.Interactive2 as GFI2 -#endif +import GF.Interactive import GF.Data.ErrM import GF.Infra.Option import GF.Infra.UseIO @@ -47,17 +44,7 @@ mainOpts opts files = case flag optMode opts of ModeVersion -> putStrLn $ "Grammatical Framework (GF) version " ++ showVersion version ++ "\n" ++ buildInfo ModeHelp -> putStrLn helpMessage - ModeServer port -> GFI1.mainServerGFI opts port files + ModeServer port -> mainServerGFI opts port files ModeCompiler -> mainGFC opts files - ModeInteractive -> GFI1.mainGFI opts files - ModeRun -> GFI1.mainRunGFI opts files -#ifdef C_RUNTIME - ModeInteractive2 -> GFI2.mainGFI opts files - ModeRun2 -> GFI2.mainRunGFI opts files -#else - ModeInteractive2 -> noCruntime - ModeRun2 -> noCruntime - where - noCruntime = do ePutStrLn "GF configured without C run-time support" - exitFailure -#endif + ModeInteractive -> mainGFI opts files + ModeRun -> mainRunGFI opts files diff --git a/src/compiler/GF/Server.hs b/src/compiler/GF/Server.hs index c287e8001..de0ec6abc 100644 --- a/src/compiler/GF/Server.hs +++ b/src/compiler/GF/Server.hs @@ -3,7 +3,6 @@ module GF.Server(server) where import Data.List(partition,stripPrefix,isInfixOf) import qualified Data.Map as M -import Control.Applicative -- for GHC<7.10 import Control.Monad(when) import Control.Monad.State(StateT(..),get,gets,put) import Control.Monad.Error(ErrorT(..),Error(..)) @@ -34,7 +33,7 @@ import Network.Shed.Httpd(initServer,Request(..),Response(..),noCache) --import qualified Network.FastCGI as FCGI -- from hackage direct-fastcgi import Network.CGI(handleErrors,liftIO) import CGIUtils(handleCGIErrors)--,outputJSONP,stderrToFile -import Text.JSON(JSValue(..),Result(..),valFromObj,encode,decode,showJSON,makeObj) +import Text.JSON(encode,showJSON,makeObj) --import System.IO.Silently(hCapture) import System.Process(readProcessWithExitCode) import System.Exit(ExitCode(..)) @@ -284,17 +283,13 @@ handle logLn documentroot state0 cache execute1 stateVar skip_empty = filter (not.null.snd) jsonList = jsonList' return - jsonListLong ext = jsonList' (mapM (addTime ext)) ext + jsonListLong = jsonList' (mapM addTime) jsonList' details ext = fmap (json200) (details =<< ls_ext "." ext) - addTime ext path = + addTime path = do t <- getModificationTime path - if ext==".json" - then addComment (time t) <$> liftIO (try $ getComment path) - else return . makeObj $ time t + return $ makeObj ["path".=path,"time".=format t] where - addComment t = makeObj . either (const t) (\c->t++["comment".=c]) - time t = ["path".=path,"time".=format t] format = formatTime defaultTimeLocale rfc822DateFormat rm path | takeExtension path `elem` ok_to_delete = @@ -336,11 +331,6 @@ handle logLn documentroot state0 cache execute1 stateVar do paths <- getDirectoryContents dir return [path | path<-paths, takeExtension path==ext] - getComment path = - do Ok (JSObject obj) <- decode <$> readFile path - Ok cmnt <- return (valFromObj "comment" obj) - return (cmnt::String) - -- * Dynamic content jsonresult cwd dir cmd (ecode,stdout,stderr) files = diff --git a/src/compiler/GF/Speech/GSL.hs b/src/compiler/GF/Speech/GSL.hs index a898a4bb5..d9d6af0cc 100644 --- a/src/compiler/GF/Speech/GSL.hs +++ b/src/compiler/GF/Speech/GSL.hs @@ -7,7 +7,6 @@ ----------------------------------------------------------------------------- module GF.Speech.GSL (gslPrinter) where -import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint --import GF.Data.Utilities import GF.Grammar.CFG diff --git a/src/compiler/GF/Speech/JSGF.hs b/src/compiler/GF/Speech/JSGF.hs index 15f5ff69d..25168dbc8 100644 --- a/src/compiler/GF/Speech/JSGF.hs +++ b/src/compiler/GF/Speech/JSGF.hs @@ -11,7 +11,6 @@ ----------------------------------------------------------------------------- module GF.Speech.JSGF (jsgfPrinter) where -import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint --import GF.Data.Utilities import GF.Infra.Option diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs index a63dc43e4..bc9df2abc 100644 --- a/src/compiler/GF/Speech/PGFToCFG.hs +++ b/src/compiler/GF/Speech/PGFToCFG.hs @@ -6,17 +6,13 @@ ---------------------------------------------------------------------- module GF.Speech.PGFToCFG (bnfPrinter, pgfToCFG) where -import PGF(showCId) -import PGF.Internal as PGF ---import GF.Infra.Ident +import PGF +import PGF.Internal import GF.Grammar.CFG hiding (Symbol) -import Data.Array.IArray as Array ---import Data.List import Data.Map (Map) import qualified Data.Map as Map import qualified Data.IntMap as IntMap ---import Data.Maybe import Data.Set (Set) import qualified Data.Set as Set @@ -31,35 +27,36 @@ type Profile = [Int] pgfToCFG :: PGF -> CId -- ^ Concrete syntax name -> CFG -pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ concatMap ruleToCFRule rules) +pgfToCFG pgf lang = mkCFG (showCId start_cat) extCats (startRules ++ concatMap ruleToCFRule rules) where + (_,start_cat,_) = unType (startCat pgf) cnc = lookConcr pgf lang rules :: [(FId,Production)] - rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.productions cnc) - , prod <- Set.toList set] + rules = [(fcat,prod) | fcat <- [0..concrTotalCats cnc], + prod <- concrProductions cnc fcat] fcatCats :: Map FId Cat fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i) - | (c,CncCat s e lbls) <- Map.toList (cnccats cnc), - (fc,i) <- zip (range (s,e)) [1..]] + | (c,s,e,lbls) <- concrCategories cnc, + (fc,i) <- zip [s..e] [1..]] fcatCat :: FId -> Cat fcatCat c = Map.findWithDefault ("Unknown_" ++ show c) c fcatCats - fcatToCat :: FId -> LIndex -> Cat + fcatToCat :: FId -> Int -> Cat fcatToCat c l = fcatCat c ++ row where row = if catLinArity c == 1 then "" else "_" ++ show l -- gets the number of fields in the lincat for the given category catLinArity :: FId -> Int - catLinArity c = maximum (1:[rangeSize (bounds rhs) | (CncFun _ rhs, _) <- topdownRules c]) + catLinArity c = maximum (1:[length rhs | ((_,rhs), _) <- topdownRules c]) topdownRules cat = f cat [] where - f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions cnc)) + f cat rules = foldr g rules (concrProductions cnc cat) - g (PApply funid args) rules = (cncfuns cnc ! funid,args) : rules + g (PApply funid args) rules = (concrFunction cnc funid,args) : rules g (PCoerce cat) rules = f cat rules @@ -68,28 +65,25 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co startRules :: [CFRule] startRules = [Rule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0) - | (c,CncCat s e lbls) <- Map.toList (cnccats cnc), - fc <- range (s,e), not (isPredefFId fc), + | (c,s,e,lbls) <- concrCategories cnc, + fc <- [s..e], not (isPredefFId fc), r <- [0..catLinArity fc-1]] ruleToCFRule :: (FId,Production) -> [CFRule] ruleToCFRule (c,PApply funid args) = - [Rule (fcatToCat c l) (mkRhs row) term - | (l,seqid) <- Array.assocs rhs - , let row = sequences cnc ! seqid - , not (containsLiterals row) - , f <- fns - , let term = profilesToTerm f [fixProfile row n | n <- [0..length args-1]] - ] + [Rule (fcatToCat c l) (mkRhs row) (profilesToTerm [fixProfile row n | n <- [0..length args-1]]) + | (l,seqid) <- zip [0..] rhs + , let row = concrSequence cnc seqid + , not (containsLiterals row)] where - CncFun fns rhs = cncfuns cnc ! funid + (f, rhs) = concrFunction cnc funid - mkRhs :: Array DotPos Symbol -> [CFSymbol] - mkRhs = concatMap symbolToCFSymbol . Array.elems + mkRhs :: [Symbol] -> [CFSymbol] + mkRhs = concatMap symbolToCFSymbol - containsLiterals :: Array DotPos Symbol -> Bool - containsLiterals row = not (null ([n | SymLit n _ <- Array.elems row] ++ - [n | SymVar n _ <- Array.elems row])) + containsLiterals :: [Symbol] -> Bool + containsLiterals row = not (null ([n | SymLit n _ <- row] ++ + [n | SymVar n _ <- row])) symbolToCFSymbol :: Symbol -> [CFSymbol] symbolToCFSymbol (SymCat n l) = [let PArg _ fid = args!!n in NonTerminal (fcatToCat fid l)] @@ -105,18 +99,19 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co symbolToCFSymbol SymALL_CAPIT = [Terminal "&|"] symbolToCFSymbol SymNE = [] - fixProfile :: Array DotPos Symbol -> Int -> Profile + fixProfile :: [Symbol] -> Int -> Profile fixProfile row i = [k | (k,j) <- nts, j == i] where - nts = zip [0..] [j | nt <- Array.elems row, j <- getPos nt] + nts = zip [0..] [j | nt <- row, j <- getPos nt] getPos (SymCat j _) = [j] getPos (SymLit j _) = [j] getPos _ = [] - profilesToTerm :: CId -> [Profile] -> CFTerm - profilesToTerm f ps = CFObj f (zipWith profileToTerm argTypes ps) - where (argTypes,_) = catSkeleton $ lookType (abstract pgf) f + profilesToTerm :: [Profile] -> CFTerm + profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps) + where Just (hypos,_,_) = fmap unType (functionType pgf f) + argTypes = [cat | (_,_,ty) <- hypos, let (_,cat,_) = unType ty] profileToTerm :: CId -> Profile -> CFTerm profileToTerm t [] = CFMeta t diff --git a/src/compiler/GF/Speech/SRGS_ABNF.hs b/src/compiler/GF/Speech/SRGS_ABNF.hs index dc5c7bbd3..75d206a0c 100644 --- a/src/compiler/GF/Speech/SRGS_ABNF.hs +++ b/src/compiler/GF/Speech/SRGS_ABNF.hs @@ -18,7 +18,6 @@ ----------------------------------------------------------------------------- module GF.Speech.SRGS_ABNF (srgsAbnfPrinter, srgsAbnfNonRecursivePrinter) where -import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint --import GF.Data.Utilities import GF.Infra.Option diff --git a/src/compiler/GF/Speech/VoiceXML.hs b/src/compiler/GF/Speech/VoiceXML.hs index 84264c4d7..d05ba27ce 100644 --- a/src/compiler/GF/Speech/VoiceXML.hs +++ b/src/compiler/GF/Speech/VoiceXML.hs @@ -12,7 +12,6 @@ module GF.Speech.VoiceXML (grammar2vxml) where import GF.Data.XML --import GF.Infra.Ident import PGF -import PGF.Internal --import Control.Monad (liftM) import Data.List (intersperse) -- isPrefixOf, find @@ -28,7 +27,7 @@ grammar2vxml pgf cnc = showsXMLDoc (skel2vxml name language start skel qs) "" name = showCId cnc qs = catQuestions pgf cnc (map fst skel) language = languageCode pgf cnc - start = lookStartCat pgf + (_,start,_) = unType (startCat pgf) -- -- * VSkeleton: a simple description of the abstract syntax. @@ -37,8 +36,8 @@ grammar2vxml pgf cnc = showsXMLDoc (skel2vxml name language start skel qs) "" type Skeleton = [(CId, [(CId, [CId])])] pgfSkeleton :: PGF -> Skeleton -pgfSkeleton pgf = [(c,[(f,fst (catSkeleton (lookType (abstract pgf) f))) | (_,f) <- fs]) - | (c,(_,fs,_)) <- Map.toList (cats (abstract pgf))] +pgfSkeleton pgf = [(c,[(f,[cat | (_,_,ty) <- hypos, let (_,cat,_) = unType ty]) | f <- functionsByCat pgf c, Just (hypos,_,_) <- [fmap unType (functionType pgf f)]]) + | c <- categories pgf] -- -- * Questions to ask diff --git a/src/compiler/GF/Text/Transliterations.hs b/src/compiler/GF/Text/Transliterations.hs index 9b1b6e151..7645fc158 100644 --- a/src/compiler/GF/Text/Transliterations.hs +++ b/src/compiler/GF/Text/Transliterations.hs @@ -39,7 +39,6 @@ allTransliterations = Map.fromList [ ("amharic",transAmharic), ("ancientgreek", transAncientGreek), ("arabic", transArabic), - ("arabic_unvocalized", transArabicUnvoc), ("devanagari", transDevanagari), ("greek", transGreek), ("hebrew", transHebrew), @@ -179,13 +178,6 @@ transArabic = mkTransliteration "Arabic" allTrans allCodes where allCodes = [0x0621..0x062f] ++ [0x0630..0x063a] ++ [0x0641..0x064f] ++ [0x0650..0x0657] ++ [0x0671,0x061f] - -transArabicUnvoc :: Transliteration -transArabicUnvoc = transArabic{ - invisible_chars = ["a","u","i","v2","o","V+","V-","a:"], - printname = "unvocalized Arabic" - } - transPersian :: Transliteration transPersian = (mkTransliteration "Persian/Farsi" allTrans allCodes) {invisible_chars = ["a","u","i"]} where diff --git a/src/runtime/c/Makefile.am b/src/runtime/c/Makefile.am index 8f9c8bf56..edc4f88b2 100644 --- a/src/runtime/c/Makefile.am +++ b/src/runtime/c/Makefile.am @@ -87,14 +87,13 @@ libpgf_la_SOURCES = \ pgf/graphviz.c \ pgf/aligner.c \ pgf/pgf.c \ - pgf/pgf.h -libpgf_la_LDFLAGS = -no-undefined + pgf/pgf.h \ +libpgf_la_LDFLAGS = "-no-undefined" libpgf_la_LIBADD = libgu.la libsg_la_SOURCES = \ sg/sqlite3Btree.c \ sg/sg.c -libsg_la_LDFLAGS = -no-undefined libsg_la_LIBADD = libgu.la libpgf.la bin_PROGRAMS = diff --git a/src/runtime/c/gu/defs.h b/src/runtime/c/gu/defs.h index f5472a414..6b531979c 100644 --- a/src/runtime/c/gu/defs.h +++ b/src/runtime/c/gu/defs.h @@ -23,14 +23,6 @@ #define restrict __restrict -#elif defined(__MINGW32__) - -#define GU_API_DECL -#define GU_API - -#define GU_INTERNAL_DECL -#define GU_INTERNAL - #else #define GU_API_DECL @@ -38,9 +30,7 @@ #define GU_INTERNAL_DECL __attribute__ ((visibility ("hidden"))) #define GU_INTERNAL __attribute__ ((visibility ("hidden"))) - #endif - // end MSVC workaround #include diff --git a/src/runtime/c/pgf/expr.c b/src/runtime/c/pgf/expr.c index 92e92f04f..eb0a88bb6 100644 --- a/src/runtime/c/pgf/expr.c +++ b/src/runtime/c/pgf/expr.c @@ -30,8 +30,8 @@ pgf_expr_unwrap(PgfExpr expr) } } -PGF_API int -pgf_expr_arity(PgfExpr expr) +static PgfExprTag +pgf_expr_arity(PgfExpr expr, int *arity) { int n = 0; while (true) { @@ -44,10 +44,9 @@ pgf_expr_arity(PgfExpr expr) n = n + 1; break; } - case PGF_EXPR_FUN: - return n; default: - return -1; + *arity = n; + return i.tag; } } } @@ -55,8 +54,8 @@ pgf_expr_arity(PgfExpr expr) PGF_API PgfApplication* pgf_expr_unapply(PgfExpr expr, GuPool* pool) { - int arity = pgf_expr_arity(expr); - if (arity < 0) { + int arity; + if (pgf_expr_arity(expr, &arity) != PGF_EXPR_FUN) { return NULL; } PgfApplication* appl = gu_new_flex(pool, PgfApplication, args, arity); @@ -68,13 +67,38 @@ pgf_expr_unapply(PgfExpr expr, GuPool* pool) appl->args[n] = app->arg; expr = app->fun; } - PgfExpr e = pgf_expr_unwrap(expr); - gu_assert(gu_variant_tag(e) == PGF_EXPR_FUN); - PgfExprFun* fun = gu_variant_data(e); + appl->efun = pgf_expr_unwrap(expr); + gu_assert(gu_variant_tag(appl->efun) == PGF_EXPR_FUN); + PgfExprFun* fun = gu_variant_data(appl->efun); appl->fun = fun->fun; return appl; } +PGF_API PgfApplication* +pgf_expr_unapply_ex(PgfExpr expr, GuPool* pool) +{ + int arity; + pgf_expr_arity(expr, &arity); + + PgfApplication* appl = gu_new_flex(pool, PgfApplication, args, arity); + appl->n_args = arity; + for (int n = arity - 1; n >= 0; n--) { + PgfExpr e = pgf_expr_unwrap(expr); + gu_assert(gu_variant_tag(e) == PGF_EXPR_APP); + PgfExprApp* app = gu_variant_data(e); + appl->args[n] = app->arg; + expr = app->fun; + } + appl->efun = pgf_expr_unwrap(expr); + if (gu_variant_tag(appl->efun) == PGF_EXPR_FUN) { + PgfExprFun* fun = gu_variant_data(appl->efun); + appl->fun = fun->fun; + } else { + appl->fun = NULL; + } + return appl; +} + PGF_API PgfExpr pgf_expr_apply(PgfApplication* app, GuPool* pool) { @@ -675,6 +699,17 @@ pgf_expr_parser_binds(PgfExprParser* parser) return binds; } +PGF_API GuString +pgf_expr_parser_ident(PgfExprParser* parser) +{ + GuString ident = NULL; + if (parser->token_tag == PGF_TOKEN_IDENT) { + ident = gu_string_copy(gu_string_buf_data(parser->token_value), parser->expr_pool); + pgf_expr_parser_token(parser, true); + } + return ident; +} + PGF_API PgfExpr pgf_expr_parser_expr(PgfExprParser* parser, bool mark) { diff --git a/src/runtime/c/pgf/expr.h b/src/runtime/c/pgf/expr.h index e560d3a83..44fe440ae 100644 --- a/src/runtime/c/pgf/expr.h +++ b/src/runtime/c/pgf/expr.h @@ -126,12 +126,10 @@ typedef struct { PgfExpr expr; } PgfExprProb; -PGF_API_DECL int -pgf_expr_arity(PgfExpr expr); - typedef struct PgfApplication PgfApplication; struct PgfApplication { + PgfExpr efun; PgfCId fun; int n_args; PgfExpr args[]; @@ -140,6 +138,9 @@ struct PgfApplication { PGF_API_DECL PgfApplication* pgf_expr_unapply(PgfExpr expr, GuPool* pool); +PGF_API_DECL PgfApplication* +pgf_expr_unapply_ex(PgfExpr expr, GuPool* pool); + PGF_API_DECL PgfExpr pgf_expr_apply(PgfApplication*, GuPool* pool); diff --git a/src/runtime/c/pgf/linearizer.c b/src/runtime/c/pgf/linearizer.c index 12b047b13..ced2a8cf2 100644 --- a/src/runtime/c/pgf/linearizer.c +++ b/src/runtime/c/pgf/linearizer.c @@ -175,9 +175,8 @@ redo:; gu_buf_get(buf, PgfProductionApply*, index); gu_assert(n_args == gu_seq_length(papply->args)); - capp->abs_id = papply->fun->absfun->name; - capp->fun = papply->fun; - capp->fid = 0; + capp->fun = papply->fun; + capp->fid = 0; capp->n_args = n_args; for (size_t i = 0; i < n_args; i++) { @@ -223,10 +222,10 @@ redo:; static PgfCncTree pgf_cnc_resolve_def(PgfCnc* cnc, size_t n_vars, PgfPrintContext* context, - PgfCId abs_id, PgfCCat* ccat, GuString s, GuPool* pool) + PgfCCat* ccat, GuString s, GuPool* pool) { - PgfCncTree ret = gu_null_variant; PgfCncTree lit = gu_null_variant; + PgfCncTree ret = gu_null_variant; PgfCncTreeLit* clit = gu_new_variant(PGF_CNC_TREE_LIT, @@ -234,7 +233,7 @@ pgf_cnc_resolve_def(PgfCnc* cnc, &lit, pool); clit->n_vars = 0; clit->context = context; - clit->fid = -1; // don't report the literal in the bracket + clit->fid = cnc->fid++; PgfLiteralStr* lit_str = gu_new_flex_variant(PGF_LITERAL_STR, PgfLiteralStr, @@ -242,7 +241,7 @@ pgf_cnc_resolve_def(PgfCnc* cnc, &clit->lit, pool); strcpy((char*) lit_str->val, (char*) s); - if (ccat == NULL || ccat->lindefs == NULL) + if (ccat->lindefs == NULL) return lit; int index = @@ -254,10 +253,9 @@ pgf_cnc_resolve_def(PgfCnc* cnc, gu_new_flex_variant(PGF_CNC_TREE_APP, PgfCncTreeApp, args, 1, &ret, pool); - capp->ccat = ccat; - capp->abs_id= abs_id; - capp->fun = gu_seq_get(ccat->lindefs, PgfCncFun*, index); - capp->fid = cnc->fid++; + capp->ccat = ccat; + capp->fun = gu_seq_get(ccat->lindefs, PgfCncFun*, index); + capp->fid = cnc->fid++; capp->n_vars = n_vars; capp->context = context; capp->n_args = 1; @@ -297,7 +295,7 @@ pgf_lzr_wrap_linref(PgfCncTree ctree, GuPool* pool) PgfCncTreeApp* capp = cti.data; assert(gu_seq_length(capp->ccat->linrefs) > 0); - + // here we must apply the linref function PgfCncTree new_ctree; PgfCncTreeApp* new_capp = @@ -305,7 +303,6 @@ pgf_lzr_wrap_linref(PgfCncTree ctree, GuPool* pool) PgfCncTreeApp, args, 1, &new_ctree, pool); new_capp->ccat = NULL; - new_capp->abs_id = NULL; new_capp->fun = gu_seq_get(capp->ccat->linrefs, PgfCncFun*, 0); new_capp->fid = -1; new_capp->n_vars = 0; @@ -317,7 +314,7 @@ pgf_lzr_wrap_linref(PgfCncTree ctree, GuPool* pool) break; } } - + return ctree; } @@ -399,17 +396,6 @@ pgf_cnc_resolve(PgfCnc* cnc, goto done; } - PgfCId abs_id = "?"; - if (emeta->id > 0) { - GuPool* tmp_pool = gu_local_pool(); - GuExn* err = gu_new_exn(tmp_pool); - GuStringBuf* sbuf = gu_new_string_buf(tmp_pool); - GuOut* out = gu_string_buf_out(sbuf); - - gu_printf(out, err, "?%d", emeta->id); - abs_id = gu_string_buf_freeze(sbuf, pool); - } - int index = gu_choice_next(cnc->ch, gu_seq_length(ccat->lindefs)); if (index < 0) { @@ -420,7 +406,6 @@ pgf_cnc_resolve(PgfCnc* cnc, PgfCncTreeApp, args, 1, &ret, pool); capp->ccat = ccat; - capp->abs_id = abs_id; capp->fun = gu_seq_get(ccat->lindefs, PgfCncFun*, index); capp->fid = cnc->fid++; capp->n_vars = 0; @@ -450,7 +435,23 @@ pgf_cnc_resolve(PgfCnc* cnc, gu_putc(']', out, err); GuString s = gu_string_buf_freeze(sbuf, tmp_pool); - ret = pgf_cnc_resolve_def(cnc, n_vars, context, efun->fun, ccat, s, pool); + if (ccat != NULL) { + ret = pgf_cnc_resolve_def(cnc, n_vars, context, ccat, s, pool); + } else { + PgfCncTreeLit* clit = + gu_new_variant(PGF_CNC_TREE_LIT, + PgfCncTreeLit, + &ret, pool); + clit->n_vars = 0; + clit->context = context; + clit->fid = cnc->fid++; + PgfLiteralStr* lit = + gu_new_flex_variant(PGF_LITERAL_STR, + PgfLiteralStr, + val, strlen(s)+1, + &clit->lit, pool); + strcpy(lit->val, s); + } gu_pool_free(tmp_pool); goto done; @@ -498,7 +499,28 @@ redo:; index--; } - ret = pgf_cnc_resolve_def(cnc, n_vars, context, ctxt->name, ccat, ctxt->name, pool); + if (ccat != NULL && ccat->lindefs == NULL) { + goto done; + } + + if (ccat != NULL) { + ret = pgf_cnc_resolve_def(cnc, n_vars, context, ccat, ctxt->name, pool); + } else { + PgfCncTreeLit* clit = + gu_new_variant(PGF_CNC_TREE_LIT, + PgfCncTreeLit, + &ret, pool); + clit->n_vars = 0; + clit->context = context; + clit->fid = cnc->fid++; + PgfLiteralStr* lit = + gu_new_flex_variant(PGF_LITERAL_STR, + PgfLiteralStr, + val, strlen(ctxt->name)+1, + &clit->lit, pool); + strcpy(lit->val, ctxt->name); + } + goto done; } case PGF_EXPR_TYPED: { @@ -917,9 +939,9 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx) if ((*lzr->funcs)->begin_phrase && fapp->ccat != NULL) { (*lzr->funcs)->begin_phrase(lzr->funcs, - fapp->ccat->cnccat->abscat->name, + fun->absfun->type->cid, fapp->fid, lin_idx, - fapp->abs_id); + fun->absfun->name); } gu_require(lin_idx < fun->n_lins); @@ -927,9 +949,9 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx) if ((*lzr->funcs)->end_phrase && fapp->ccat != NULL) { (*lzr->funcs)->end_phrase(lzr->funcs, - fapp->ccat->cnccat->abscat->name, + fun->absfun->type->cid, fapp->fid, lin_idx, - fapp->abs_id); + fun->absfun->name); } break; } @@ -955,7 +977,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx) PgfCId cat = pgf_literal_cat(lzr->concr, flit->lit)->cnccat->abscat->name; - if ((*lzr->funcs)->begin_phrase && flit->fid >= 0) { + if ((*lzr->funcs)->begin_phrase) { (*lzr->funcs)->begin_phrase(lzr->funcs, cat, flit->fid, 0, ""); @@ -987,7 +1009,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx) (*lzr->funcs)->symbol_token(lzr->funcs, tok); } - if ((*lzr->funcs)->end_phrase && flit->fid >= 0) { + if ((*lzr->funcs)->end_phrase) { (*lzr->funcs)->end_phrase(lzr->funcs, cat, flit->fid, 0, ""); diff --git a/src/runtime/c/pgf/linearizer.h b/src/runtime/c/pgf/linearizer.h index 790dd5800..57fad962f 100644 --- a/src/runtime/c/pgf/linearizer.h +++ b/src/runtime/c/pgf/linearizer.h @@ -22,7 +22,6 @@ typedef enum { typedef struct { PgfCCat* ccat; - PgfCId abs_id; PgfCncFun* fun; int fid; diff --git a/src/runtime/c/pgf/lookup.c b/src/runtime/c/pgf/lookup.c index 5918275c1..f6f2d4da4 100644 --- a/src/runtime/c/pgf/lookup.c +++ b/src/runtime/c/pgf/lookup.c @@ -9,9 +9,6 @@ #include #include #include -#if defined(__MINGW32__) || defined(_MSC_VER) -#include -#endif //#define PGF_LOOKUP_DEBUG //#define PGF_LINEARIZER_DEBUG @@ -119,7 +116,7 @@ typedef struct { static PgfAbsProduction* pgf_lookup_new_production(PgfAbsFun* fun, GuPool *pool) { - size_t n_hypos = fun->type->hypos ? gu_seq_length(fun->type->hypos) : 0; + size_t n_hypos = gu_seq_length(fun->type->hypos); PgfAbsProduction* prod = gu_new_flex(pool, PgfAbsProduction, args, n_hypos); prod->fun = fun; prod->count = 0; @@ -699,12 +696,8 @@ pgf_lookup_tokenize(GuMap* lexicon_idx, GuString sentence, GuPool* pool) break; const uint8_t* start = p-1; - if (strchr(".!?,:",c) != NULL) + while (c != 0 && !gu_ucs_is_space(c)) { c = gu_utf8_decode(&p); - else { - while (c != 0 && strchr(".!?,:",c) == NULL && !gu_ucs_is_space(c)) { - c = gu_utf8_decode(&p); - } } const uint8_t* end = p-1; diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index cb59b2a55..0f5f6d1ac 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -65,7 +65,6 @@ typedef enum { BIND_NONE, BIND_HARD, BIND_SOFT } BIND_TYPE; typedef struct { PgfProductionIdx* idx; size_t offset; - size_t sym_idx; } PgfLexiconIdxEntry; typedef GuBuf PgfLexiconIdx; @@ -1061,13 +1060,13 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep) } static int -pgf_symbols_cmp(GuString* psent, PgfSymbols* syms, size_t* sym_idx, bool case_sensitive) +pgf_symbols_cmp(GuString* psent, PgfSymbols* syms, bool case_sensitive) { size_t n_syms = gu_seq_length(syms); - while (*sym_idx < n_syms) { - PgfSymbol sym = gu_seq_get(syms, PgfSymbol, *sym_idx); + for (size_t i = 0; i < n_syms; i++) { + PgfSymbol sym = gu_seq_get(syms, PgfSymbol, i); - if (*sym_idx > 0) { + if (i > 0) { if (!skip_space(psent)) { if (**psent == 0) return -1; @@ -1111,8 +1110,6 @@ pgf_symbols_cmp(GuString* psent, PgfSymbols* syms, size_t* sym_idx, bool case_se default: gu_impossible(); } - - (*sym_idx)++; } return 0; @@ -1133,8 +1130,7 @@ pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state, GuString start = ps->sentence + state->end_offset; GuString current = start; - size_t sym_idx = 0; - int cmp = pgf_symbols_cmp(¤t, seq->syms, &sym_idx, ps->case_sensitive); + int cmp = pgf_symbols_cmp(¤t, seq->syms, ps->case_sensitive); if (cmp < 0) { j = k-1; } else if (cmp > 0) { @@ -1155,9 +1151,8 @@ pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state, if (seq->idx != NULL) { PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx); - entry->idx = seq->idx; - entry->offset = (size_t) (current - ps->sentence); - entry->sym_idx = sym_idx; + entry->idx = seq->idx; + entry->offset = (size_t) (current - ps->sentence); } if (len+1 <= max) @@ -1236,7 +1231,6 @@ pgf_new_parse_state(PgfParsing* ps, size_t start_offset, PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx); entry->idx = seq->idx; entry->offset = state->start_offset; - entry->sym_idx= 0; } // Add non-epsilon lexical rules to the bottom up index @@ -1284,15 +1278,14 @@ pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item) static void pgf_parsing_predict_lexeme(PgfParsing* ps, PgfItemConts* conts, PgfProductionIdxEntry* entry, - size_t offset, size_t sym_idx) + size_t offset) { GuVariantInfo i = { PGF_PRODUCTION_APPLY, entry->papp }; PgfProduction prod = gu_variant_close(i); PgfItem* item = pgf_new_item(ps, conts, prod); PgfSymbols* syms = entry->papp->fun->lins[conts->lin_idx]->syms; - item->sym_idx = sym_idx; - pgf_item_set_curr_symbol(item, ps->pool); + item->sym_idx = gu_seq_length(syms); prob_t prob = item->inside_prob+item->conts->outside_prob; PgfParseState* state = pgf_new_parse_state(ps, offset, BIND_NONE, prob); @@ -1365,7 +1358,7 @@ pgf_parsing_td_predict(PgfParsing* ps, PgfProductionIdxEntry, &key); if (value != NULL) { - pgf_parsing_predict_lexeme(ps, conts, value, lentry->offset, lentry->sym_idx); + pgf_parsing_predict_lexeme(ps, conts, value, lentry->offset); PgfProductionIdxEntry* start = gu_buf_data(lentry->idx); @@ -1376,7 +1369,7 @@ pgf_parsing_td_predict(PgfParsing* ps, while (left >= start && value->ccat->fid == left->ccat->fid && value->lin_idx == left->lin_idx) { - pgf_parsing_predict_lexeme(ps, conts, left, lentry->offset, lentry->sym_idx); + pgf_parsing_predict_lexeme(ps, conts, left, lentry->offset); left--; } @@ -1384,7 +1377,7 @@ pgf_parsing_td_predict(PgfParsing* ps, while (right <= end && value->ccat->fid == right->ccat->fid && value->lin_idx == right->lin_idx) { - pgf_parsing_predict_lexeme(ps, conts, right, lentry->offset, lentry->sym_idx); + pgf_parsing_predict_lexeme(ps, conts, right, lentry->offset); right++; } } @@ -1957,6 +1950,8 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat, start_ccat->prods = NULL; start_ccat->n_synprods = 0; + gu_assert(start_ccat->cnccat != NULL); + #ifdef PGF_COUNTS_DEBUG state->ps->ccat_full_count++; #endif @@ -2300,7 +2295,7 @@ pgf_parser_completions_next(GuEnum* self, void* to, GuPool* pool) } PGF_API GuEnum* -pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence, +pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence, GuString prefix, GuExn *err, GuPool* pool) { if (concr->sequences == NULL || @@ -2379,9 +2374,8 @@ pgf_sequence_cmp_fn(GuOrder* order, const void* p1, const void* p2) GuString sent = (GuString) p1; const PgfSequence* sp2 = p2; - size_t sym_idx = 0; - int res = pgf_symbols_cmp(&sent, sp2->syms, &sym_idx, self->case_sensitive); - if (res == 0 && (*sent != 0 || sym_idx != gu_seq_length(sp2->syms))) { + int res = pgf_symbols_cmp(&sent, sp2->syms, self->case_sensitive); + if (res == 0 && *sent != 0) { res = 1; } diff --git a/src/runtime/c/pgf/pgf.c b/src/runtime/c/pgf/pgf.c index 5317830fb..26ea6cfb7 100644 --- a/src/runtime/c/pgf/pgf.c +++ b/src/runtime/c/pgf/pgf.c @@ -46,7 +46,7 @@ pgf_read_in(GuIn* in, } PGF_API_DECL void -pgf_write(PgfPGF* pgf, const char* fpath, GuExn* err) +pgf_write(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs, const char* fpath, GuExn* err) { FILE* outfile = fopen(fpath, "wb"); if (outfile == NULL) { @@ -60,13 +60,70 @@ pgf_write(PgfPGF* pgf, const char* fpath, GuExn* err) GuOut* out = gu_file_out(outfile, tmp_pool); PgfWriter* wtr = pgf_new_writer(out, tmp_pool, err); - pgf_write_pgf(pgf, wtr); + pgf_write_pgf(pgf, n_concrs, concrs, wtr); gu_pool_free(tmp_pool); fclose(outfile); } +PGF_API void +pgf_concrete_save(PgfConcr* concr, const char* fpath, GuExn* err) +{ + FILE* outfile = fopen(fpath, "wb"); + if (outfile == NULL) { + gu_raise_errno(err); + return; + } + + GuPool* tmp_pool = gu_local_pool(); + + // Create an input stream from the input file + GuOut* out = gu_file_out(outfile, tmp_pool); + + PgfWriter* wtr = pgf_new_writer(out, tmp_pool, err); + pgf_write_concrete(concr, wtr, true); + + gu_pool_free(tmp_pool); + + fclose(outfile); +} + +PGF_API bool +pgf_have_same_abstract(PgfPGF *one, PgfPGF *two) +{ + if (strcmp(one->abstract.name, two->abstract.name) != 0) + return false; + + size_t n_cats = gu_seq_length(one->abstract.cats); + if (n_cats != gu_seq_length(two->abstract.cats)) + return false; + size_t n_funs = gu_seq_length(one->abstract.funs); + if (n_funs != gu_seq_length(two->abstract.funs)) + return false; + + for (size_t i = 0; i < n_cats; i++) { + PgfAbsCat* cat1 = gu_seq_index(one->abstract.cats, PgfAbsCat, i); + PgfAbsCat* cat2 = gu_seq_index(two->abstract.cats, PgfAbsCat, i); + + if (strcmp(cat1->name, cat2->name) != 0) + return false; + } + + for (size_t i = 0; i < n_funs; i++) { + PgfAbsFun* fun1 = gu_seq_index(one->abstract.funs, PgfAbsFun, i); + PgfAbsFun* fun2 = gu_seq_index(two->abstract.funs, PgfAbsFun, i); + + if (strcmp(fun1->name, fun2->name) != 0) + return false; + + if (!pgf_type_eq(fun1->type, fun2->type)) + return false; + } + + return true; +} + PGF_API GuString pgf_abstract_name(PgfPGF* pgf) { diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index 6dd040b49..ea4c97335 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -19,14 +19,6 @@ #define PGF_INTERNAL_DECL #define PGF_INTERNAL -#elif defined(__MINGW32__) - -#define PGF_API_DECL -#define PGF_API - -#define PGF_INTERNAL_DECL -#define PGF_INTERNAL - #else #define PGF_API_DECL @@ -66,7 +58,10 @@ PGF_API_DECL void pgf_concrete_unload(PgfConcr* concr); PGF_API_DECL void -pgf_write(PgfPGF* pgf, const char* fpath, GuExn* err); +pgf_write(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs, const char* fpath, GuExn* err); + +PGF_API_DECL bool +pgf_have_same_abstract(PgfPGF *one, PgfPGF *two); PGF_API_DECL GuString pgf_abstract_name(PgfPGF*); @@ -249,7 +244,8 @@ pgf_callbacks_map_add_literal(PgfConcr* concr, PgfCallbacksMap* callbacks, PgfCId cat, PgfLiteralCallback* callback); PGF_API_DECL void -pgf_print(PgfPGF* pgf, GuOut* out, GuExn* err); +pgf_print(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs, + GuOut* out, GuExn* err); PGF_API_DECL void pgf_check_expr(PgfPGF* gr, PgfExpr* pe, PgfType* ty, diff --git a/src/runtime/c/pgf/printer.c b/src/runtime/c/pgf/printer.c index 153130b54..417d78e84 100644 --- a/src/runtime/c/pgf/printer.c +++ b/src/runtime/c/pgf/printer.c @@ -7,13 +7,17 @@ typedef struct { } PgfPrintFn; static void -pgf_print_flags(PgfFlags* flags, GuOut *out, GuExn* err) +pgf_print_flags(PgfFlags* flags, int indent, GuOut *out, GuExn* err) { size_t n_flags = gu_seq_length(flags); for (size_t i = 0; i < n_flags; i++) { PgfFlag* flag = gu_seq_index(flags, PgfFlag, i); - - gu_puts(" flag ", out, err); + + for (int k = 0; k < indent; k++) { + gu_putc(' ', out, err); + } + + gu_puts("flag ", out, err); pgf_print_cid(flag->name, out, err); gu_puts(" = ", out, err); pgf_print_literal(flag->value, out, err); @@ -70,7 +74,7 @@ pgf_print_abstract(PgfAbstr* abstr, GuOut* out, GuExn* err) pgf_print_cid(abstr->name, out, err); gu_puts(" {\n", out, err); - pgf_print_flags(abstr->aflags, out, err); + pgf_print_flags(abstr->aflags, 2, out, err); pgf_print_abscats(abstr->cats, out, err); pgf_print_absfuns(abstr->funs, out, err); @@ -358,7 +362,7 @@ pgf_print_concrete(PgfConcr* concr, GuOut* out, GuExn* err) pgf_print_cid(concr->name, out, err); gu_puts(" {\n", out, err); - pgf_print_flags(concr->cflags, out, err); + pgf_print_flags(concr->cflags, 2, out, err); gu_puts(" productions\n", out, err); PgfPrintFn clo2 = { { pgf_print_productions }, out }; @@ -396,13 +400,12 @@ pgf_print_concrete(PgfConcr* concr, GuOut* out, GuExn* err) } PGF_API void -pgf_print(PgfPGF* pgf, GuOut* out, GuExn* err) +pgf_print(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs, GuOut* out, GuExn* err) { + pgf_print_flags(pgf->gflags, 0, out, err); pgf_print_abstract(&pgf->abstract, out, err); - - size_t n_concrs = gu_seq_length(pgf->concretes); + for (size_t i = 0; i < n_concrs; i++) { - PgfConcr* concr = gu_seq_index(pgf->concretes, PgfConcr, i); - pgf_print_concrete(concr, out, err); + pgf_print_concrete(concrs[i], out, err); } } diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c index d7094c9d5..522d69b83 100644 --- a/src/runtime/c/pgf/reader.c +++ b/src/runtime/c/pgf/reader.c @@ -937,7 +937,7 @@ pgf_read_pargs(PgfReader* rdr, PgfConcr* concr) } PGF_API bool -pgf_production_is_lexical(PgfProductionApply *papp, +pgf_production_is_lexical(PgfProductionApply *papp, GuBuf* non_lexical_buf, GuPool* pool) { if (gu_seq_length(papp->args) > 0) @@ -1168,6 +1168,14 @@ pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err) // pgf_ccat_set_viterbi_prob(ccat); } +// The GF compiler needs to call this function when building in memory grammars. +PGF_API void +pgf_concrete_fix_internals(PgfConcr* concr) +{ + GuMapItor clo1 = { pgf_read_ccat_cb }; + gu_map_iter(concr->ccats, &clo1, NULL); +} + static void pgf_read_concrete_content(PgfReader* rdr, PgfConcr* concr) { @@ -1193,8 +1201,7 @@ pgf_read_concrete_content(PgfReader* rdr, PgfConcr* concr) concr->cnccats = pgf_read_cnccats(rdr, concr->abstr, concr); concr->total_cats = pgf_read_int(rdr); - GuMapItor clo1 = { pgf_read_ccat_cb }; - gu_map_iter(concr->ccats, &clo1, NULL); + pgf_concrete_fix_internals(concr); } static void diff --git a/src/runtime/c/pgf/writer.c b/src/runtime/c/pgf/writer.c index 57c7e3c76..ff6101155 100644 --- a/src/runtime/c/pgf/writer.c +++ b/src/runtime/c/pgf/writer.c @@ -72,10 +72,15 @@ pgf_write_cid(PgfCId id, PgfWriter* wtr) PGF_INTERNAL void pgf_write_string(GuString val, PgfWriter* wtr) { - size_t len = strlen(val); + size_t len = 0; + const uint8_t* buf = (const uint8_t*) val; + const uint8_t* p = buf; + while (gu_utf8_decode(&p) != 0) + len++; + pgf_write_len(len, wtr); gu_return_on_exn(wtr->err, ); - gu_out_bytes(wtr->out, (uint8_t*) val, len, wtr->err); + gu_out_bytes(wtr->out, (uint8_t*) val, (p-buf)-1, wtr->err); } PGF_INTERNAL void @@ -843,7 +848,7 @@ pgf_write_concrete_content(PgfConcr* concr, PgfWriter* wtr) pgf_write_int(concr->total_cats, wtr); } -static void +PGF_INTERNAL void pgf_write_concrete(PgfConcr* concr, PgfWriter* wtr, bool with_content) { if (with_content && @@ -865,34 +870,20 @@ pgf_write_concrete(PgfConcr* concr, PgfWriter* wtr, bool with_content) gu_return_on_exn(wtr->err, ); } -PGF_API void -pgf_concrete_save(PgfConcr* concr, GuOut* out, GuExn* err) -{ - GuPool* pool = gu_new_pool(); - - PgfWriter* wtr = pgf_new_writer(out, pool, err); - - pgf_write_concrete(concr, wtr, true); - - gu_pool_free(pool); -} - static void -pgf_write_concretes(PgfConcrs* concretes, PgfWriter* wtr, bool with_content) +pgf_write_concretes(size_t n_concrs, PgfConcr** concrs, PgfWriter* wtr, bool with_content) { - size_t n_concrs = gu_seq_length(concretes); pgf_write_len(n_concrs, wtr); gu_return_on_exn(wtr->err, ); for (size_t i = 0; i < n_concrs; i++) { - PgfConcr* concr = gu_seq_index(concretes, PgfConcr, i); - pgf_write_concrete(concr, wtr, with_content); + pgf_write_concrete(concrs[i], wtr, with_content); gu_return_on_exn(wtr->err, ); } } PGF_INTERNAL void -pgf_write_pgf(PgfPGF* pgf, PgfWriter* wtr) { +pgf_write_pgf(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs, PgfWriter* wtr) { gu_out_u16be(wtr->out, pgf->major_version, wtr->err); gu_return_on_exn(wtr->err, ); @@ -907,7 +898,7 @@ pgf_write_pgf(PgfPGF* pgf, PgfWriter* wtr) { bool with_content = (gu_seq_binsearch(pgf->gflags, pgf_flag_order, PgfFlag, "split") == NULL); - pgf_write_concretes(pgf->concretes, wtr, with_content); + pgf_write_concretes(n_concrs, concrs, wtr, with_content); gu_return_on_exn(wtr->err, ); } diff --git a/src/runtime/c/pgf/writer.h b/src/runtime/c/pgf/writer.h index de99ee266..8552e6c68 100644 --- a/src/runtime/c/pgf/writer.h +++ b/src/runtime/c/pgf/writer.h @@ -33,7 +33,10 @@ pgf_write_len(size_t len, PgfWriter* wtr); PGF_INTERNAL_DECL void pgf_write_cid(PgfCId id, PgfWriter* wtr); +PGF_INTERNAL void +pgf_write_concrete(PgfConcr* concr, PgfWriter* wtr, bool with_content); + PGF_INTERNAL_DECL void -pgf_write_pgf(PgfPGF* pgf, PgfWriter* wtr); +pgf_write_pgf(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs, PgfWriter* wtr); #endif // WRITER_H_ diff --git a/src/runtime/c/sg/sqlite3Btree.c b/src/runtime/c/sg/sqlite3Btree.c index ee6bd206a..999606791 100644 --- a/src/runtime/c/sg/sqlite3Btree.c +++ b/src/runtime/c/sg/sqlite3Btree.c @@ -4918,7 +4918,6 @@ SQLITE_PRIVATE int sqlite3PendingByte; # define SQLITE_UTF16NATIVE SQLITE_UTF16BE #endif #if !defined(SQLITE_BYTEORDER) -const int sqlite3one = 1; # define SQLITE_BYTEORDER 0 /* 0 means "unknown at compile-time" */ # define SQLITE_BIGENDIAN (*(char *)(&sqlite3one)==0) # define SQLITE_LITTLEENDIAN (*(char *)(&sqlite3one)==1) @@ -5041,30 +5040,6 @@ SQLITE_PRIVATE int sqlite3VdbeRecordCompareWithSkip(int, const void *, UnpackedR */ /* #include "sqliteInt.h" */ -/* An array to map all upper-case characters into their corresponding -** lower-case character. -** -** SQLite only considers US-ASCII (or EBCDIC) characters. We do not -** handle case conversions for the UTF character set since the tables -** involved are nearly as big or bigger than SQLite itself. -*/ -const unsigned char sqlite3UpperToLower[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, - 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, - 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103, - 104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121, - 122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107, - 108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125, - 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, - 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161, - 162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179, - 180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197, - 198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215, - 216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233, - 234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251, - 252,253,254,255 -}; /* EVIDENCE-OF: R-02982-34736 In order to maintain full backwards ** compatibility for legacy applications, the URI filename capability is ** disabled by default. @@ -9088,22 +9063,6 @@ SQLITE_PRIVATE int sqlite3Strlen30(const char *z){ return 0x3fffffff & (int)strlen(z); } -/* Convenient short-hand */ -#define UpperToLower sqlite3UpperToLower - -int sqlite3StrICmp(const char *zLeft, const char *zRight){ - unsigned char *a, *b; - int c; - a = (unsigned char *)zLeft; - b = (unsigned char *)zRight; - for(;;){ - c = (int)UpperToLower[*a] - (int)UpperToLower[*b]; - if( c || *a==0 ) break; - a++; - b++; - } - return c; -} /* ** The string z[] is an text representation of a real number. ** Convert this string to a double and write it into *pResult. @@ -17871,6 +17830,13 @@ struct winFile { #define WINFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ #define WINFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ +/* + * The size of the buffer used by sqlite3_win32_write_debug(). + */ +#ifndef SQLITE_WIN32_DBG_BUF_SIZE +# define SQLITE_WIN32_DBG_BUF_SIZE ((int)(4096-sizeof(DWORD))) +#endif + /* * The value used with sqlite3_win32_set_directory() to specify that * the temporary directory should be changed. @@ -18819,6 +18785,43 @@ SQLITE_PRIVATE int sqlite3_win32_reset_heap(){ } #endif /* SQLITE_WIN32_MALLOC */ +/* +** This function outputs the specified (ANSI) string to the Win32 debugger +** (if available). +*/ + +SQLITE_PRIVATE void sqlite3_win32_write_debug(const char *zBuf, int nBuf){ + char zDbgBuf[SQLITE_WIN32_DBG_BUF_SIZE]; + int nMin = MIN(nBuf, (SQLITE_WIN32_DBG_BUF_SIZE - 1)); /* may be negative. */ + if( nMin<-1 ) nMin = -1; /* all negative values become -1. */ + assert( nMin==-1 || nMin==0 || nMin0 ){ + memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE); + memcpy(zDbgBuf, zBuf, nMin); + osOutputDebugStringA(zDbgBuf); + }else{ + osOutputDebugStringA(zBuf); + } +#elif defined(SQLITE_WIN32_HAS_WIDE) + memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE); + if ( osMultiByteToWideChar( + osAreFileApisANSI() ? CP_ACP : CP_OEMCP, 0, zBuf, + nMin, (LPWSTR)zDbgBuf, SQLITE_WIN32_DBG_BUF_SIZE/sizeof(WCHAR))<=0 ){ + return; + } + osOutputDebugStringW((LPCWSTR)zDbgBuf); +#else + if( nMin>0 ){ + memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE); + memcpy(zDbgBuf, zBuf, nMin); + fprintf(stderr, "%s", zDbgBuf); + }else{ + fprintf(stderr, "%s", zBuf); + } +#endif +} + /* ** The following routine suspends the current thread for at least ms ** milliseconds. This is equivalent to the Win32 Sleep() interface. @@ -19260,6 +19263,40 @@ SQLITE_PRIVATE char *sqlite3_win32_utf8_to_mbcs(const char *zFilename){ return zFilenameMbcs; } +/* +** This function sets the data directory or the temporary directory based on +** the provided arguments. The type argument must be 1 in order to set the +** data directory or 2 in order to set the temporary directory. The zValue +** argument is the name of the directory to use. The return value will be +** SQLITE_OK if successful. +*/ +SQLITE_PRIVATE int sqlite3_win32_set_directory(DWORD type, LPCWSTR zValue){ + char **ppDirectory = 0; +#ifndef SQLITE_OMIT_AUTOINIT + int rc = sqlite3BtreeInitialize(); + if( rc ) return rc; +#endif + if( type==SQLITE_WIN32_TEMP_DIRECTORY_TYPE ){ + ppDirectory = &sqlite3_temp_directory; + } + assert( !ppDirectory || type==SQLITE_WIN32_TEMP_DIRECTORY_TYPE + ); + assert( !ppDirectory || sqlite3MemdebugHasType(*ppDirectory, MEMTYPE_HEAP) ); + if( ppDirectory ){ + char *zValueUtf8 = 0; + if( zValue && zValue[0] ){ + zValueUtf8 = winUnicodeToUtf8(zValue); + if ( zValueUtf8==0 ){ + return SQLITE_NOMEM; + } + } + sqlite3_free(*ppDirectory); + *ppDirectory = zValueUtf8; + return SQLITE_OK; + } + return SQLITE_ERROR; +} + /* ** The return value of winGetLastErrorMsg ** is zero if the error message fits in the buffer, or non-zero @@ -22331,6 +22368,9 @@ static int winOpen( if( isReadonly ){ pFile->ctrlFlags |= WINFILE_RDONLY; } + if( sqlite3_uri_boolean(zName, "psow", SQLITE_POWERSAFE_OVERWRITE) ){ + pFile->ctrlFlags |= WINFILE_PSOW; + } pFile->lastErrno = NO_ERROR; pFile->zPath = zName; #if SQLITE_MAX_MMAP_SIZE>0 @@ -22549,6 +22589,43 @@ static BOOL winIsDriveLetterAndColon( return ( sqlite3Isalpha(zPathname[0]) && zPathname[1]==':' ); } +/* +** Returns non-zero if the specified path name should be used verbatim. If +** non-zero is returned from this function, the calling function must simply +** use the provided path name verbatim -OR- resolve it into a full path name +** using the GetFullPathName Win32 API function (if available). +*/ +static BOOL winIsVerbatimPathname( + const char *zPathname +){ + /* + ** If the path name starts with a forward slash or a backslash, it is either + ** a legal UNC name, a volume relative path, or an absolute path name in the + ** "Unix" format on Windows. There is no easy way to differentiate between + ** the final two cases; therefore, we return the safer return value of TRUE + ** so that callers of this function will simply use it verbatim. + */ + if ( winIsDirSep(zPathname[0]) ){ + return TRUE; + } + + /* + ** If the path name starts with a letter and a colon it is either a volume + ** relative path or an absolute path. Callers of this function must not + ** attempt to treat it as a relative path name (i.e. they should simply use + ** it verbatim). + */ + if ( winIsDriveLetterAndColon(zPathname) ){ + return TRUE; + } + + /* + ** If we get to this point, the path name should almost certainly be a purely + ** relative one (i.e. not a UNC name, not absolute, and not volume relative). + */ + return FALSE; +} + /* ** Turn a relative pathname into a full pathname. Write the full ** pathname into zOut[]. zOut[] will be at least pVfs->mxPathname diff --git a/src/runtime/haskell-bind/PGF.hs b/src/runtime/haskell-bind/PGF.hs index 8aeca7ab8..11eeefd35 100644 --- a/src/runtime/haskell-bind/PGF.hs +++ b/src/runtime/haskell-bind/PGF.hs @@ -1,3 +1,274 @@ -module PGF(module PGF2) where +module PGF (PGF, readPGF, showPGF, + abstractName, -import PGF2 + CId, mkCId, wildCId, showCId, readCId, + + categories, categoryContext, categoryProbability, + functions, functionsByCat, functionType, functionIsDataCon, browse, + + PGF2.Expr,Tree,showExpr,PGF2.readExpr,pExpr,pIdent, + mkAbs,unAbs, + mkApp,unApp,unapply, + PGF2.mkStr,PGF2.unStr, + PGF2.mkInt,PGF2.unInt, + PGF2.mkFloat,PGF2.unFloat, + PGF2.mkMeta,PGF2.unMeta, + PGF2.exprSize, exprFunctions,PGF2.exprSubstitute, + compute, + rankTreesByProbs,treeProbability, + + TcError, ppTcError, inferExpr, checkType, + + PGF2.Type, PGF2.Hypo, showType, showContext, PGF2.readType, + mkType, unType, + + Token, + + Language, readLanguage, showLanguage, + languages, startCat, languageCode, + linearize, bracketedLinearize, tabularLinearizes, showBracketedString, + ParseOutput(..), parse, parse_, complete, + PGF2.BracketedString(..), PGF2.flattenBracketedString, + hasLinearization, + showPrintName, + + Morpho, buildMorpho, + lookupMorpho, isInMorpho, morphoMissing, morphoKnown, fullFormLexicon, + + Labels, getDepLabels, CncLabels, getCncDepLabels, + + generateAllDepth, generateRandom, generateRandomFrom, generateRandomDepth, generateRandomFromDepth, + generateFromDepth, + + PGF2.GraphvizOptions(..), + graphvizAbstractTree, graphvizParseTree, graphvizAlignment, graphvizDependencyTree, graphvizParseTreeDep, + + -- * Tries + ATree(..),Trie(..),toATree,toTrie, + + readProbabilitiesFromFile, + + groupResults, conlls2latexDoc, gizaAlignment + ) where + +import PGF.Internal +import qualified PGF2 +import qualified Data.Map as Map +import qualified Text.ParserCombinators.ReadP as RP +import Data.List(sortBy) +import Text.PrettyPrint(text) +import Data.Char(isDigit) + +readPGF = PGF2.readPGF + +showPGF gr = PGF2.showPGF gr + +readLanguage = readCId +showLanguage (CId s) = s + +startCat = PGF2.startCat +languageCode pgf lang = Just (PGF2.languageCode (lookConcr pgf lang)) + +abstractName gr = CId (PGF2.abstractName gr) + +categories gr = map CId (PGF2.categories gr) +categoryContext gr (CId c) = PGF2.categoryContext gr c +categoryProbability gr (CId c) = PGF2.categoryProbability gr c + +functions gr = map CId (PGF2.functions gr) +functionsByCat gr (CId c) = map CId (PGF2.functionsByCat gr c) +functionType gr (CId f) = PGF2.functionType gr f +functionIsDataCon gr (CId f) = PGF2.functionIsDataCon gr f + +type Tree = PGF2.Expr +type Labels = Map.Map CId [String] +type CncLabels = [(String, String -> Maybe (String -> String,String,String))] + + +mkCId x = CId x +wildCId = CId "_" +showCId (CId x) = x +readCId s = Just (CId s) + +showExpr xs e = PGF2.showExpr [x | CId x <- xs] e + +pExpr = RP.readS_to_P PGF2.pExpr +pIdent = RP.readS_to_P PGF2.pIdent + +mkAbs bind_type (CId var) e = PGF2.mkAbs bind_type var e +unAbs e = case PGF2.unAbs e of + Just (bind_type, var, e) -> Just (bind_type, CId var, e) + Nothing -> Nothing + +mkApp (CId f) es = PGF2.mkApp f es +unApp e = case PGF2.unApp e of + Just (f,es) -> Just (CId f,es) + Nothing -> Nothing + +unapply = PGF2.unapply + +instance Read PGF2.Expr where + readsPrec _ s = case PGF2.readExpr s of + Just e -> [(e,"")] + Nothing -> [] + +showType xs ty = PGF2.showType [x | CId x <- xs] ty +showContext xs hypos = PGF2.showContext [x | CId x <- xs] hypos + +mkType hypos (CId var) es = PGF2.mkType [(bt,var,ty) | (bt,CId var,ty) <- hypos] var es +unType ty = case PGF2.unType ty of + (hypos,var,es) -> ([(bt,CId var,ty) | (bt,var,ty) <- hypos],CId var,es) + +linearize pgf lang e = PGF2.linearize (lookConcr pgf lang) e +bracketedLinearize pgf lang e = PGF2.bracketedLinearize (lookConcr pgf lang) e +tabularLinearizes pgf lang e = [PGF2.tabularLinearize (lookConcr pgf lang) e] +showBracketedString = PGF2.showBracketedString + +type TcError = String + +-- | This data type encodes the different outcomes which you could get from the parser. +data ParseOutput + = ParseFailed Int -- ^ The integer is the position in number of tokens where the parser failed. + | TypeError [(FId,TcError)] -- ^ The parsing was successful but none of the trees is type correct. + -- The forest id ('FId') points to the bracketed string from the parser + -- where the type checking failed. More than one error is returned + -- if there are many analizes for some phrase but they all are not type correct. + | ParseOk [Tree] -- ^ If the parsing and the type checking are successful we get a list of abstract syntax trees. + -- The list should be non-empty. + | ParseIncomplete -- ^ The sentence is not complete. Only partial output is produced + +parse pgf lang cat s = + case PGF2.parse (lookConcr pgf lang) cat s of + PGF2.ParseOk ts -> map fst ts + _ -> [] + +parse_ pgf lang cat dp s = + case PGF2.parse (lookConcr pgf lang) cat s of + PGF2.ParseFailed pos _ -> (ParseFailed pos, PGF2.Leaf s) + PGF2.ParseOk ts -> (ParseOk (map fst ts), PGF2.Leaf s) + PGF2.ParseIncomplete -> (ParseIncomplete, PGF2.Leaf s) + +complete pgf lang cat s prefix = + let compls = Map.fromListWith (++) [(tok,[CId fun]) | (tok,_,fun,_) <- PGF2.complete (lookConcr pgf lang) cat s prefix] + in (PGF2.Leaf [],s,compls) + +hasLinearization pgf lang (CId f) = PGF2.hasLinearization (lookConcr pgf lang) f + +ppTcError s = s + +inferExpr gr e = + case PGF2.inferExpr gr e of + Right res -> Right res + Left msg -> Left (text msg) + +checkType gr ty = + case PGF2.checkType gr ty of + Right res -> Right res + Left msg -> Left (text msg) + +showPrintName pgf lang (CId f) = + case PGF2.printName (lookConcr pgf lang) f of + Just n -> n + Nothing -> f + +getDepLabels :: String -> Labels +getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map words (lines s)] + +getCncDepLabels :: String -> CncLabels +getCncDepLabels = PGF2.getCncDepLabels + +generateAllDepth gr ty _ = map fst (PGF2.generateAll gr ty) +generateFromDepth = error "generateFromDepth is not implemented" +generateRandom = error "generateRandom is not implemented" +generateRandomFrom = error "generateRandomFrom is not implemented" +generateRandomDepth = error "generateRandomDepth is not implemented" +generateRandomFromDepth = error "generateRandomFromDepth is not implemented" + +exprFunctions e = [CId f | f <- PGF2.exprFunctions e] + +compute = error "compute is not implemented" + +-- | rank from highest to lowest probability +rankTreesByProbs :: PGF -> [PGF2.Expr] -> [(PGF2.Expr,Double)] +rankTreesByProbs pgf ts = sortBy (\ (_,p) (_,q) -> compare q p) + [(t, realToFrac (PGF2.treeProbability pgf t)) | t <- ts] + +treeProbability = PGF2.treeProbability + +languages pgf = fmap CId (Map.keys (PGF2.languages pgf)) + +type Morpho = PGF2.Concr + +buildMorpho pgf lang = lookConcr pgf lang +lookupMorpho cnc w = [(CId lemma,an) | (lemma,an,_) <- PGF2.lookupMorpho cnc w] +isInMorpho cnc w = not (null (PGF2.lookupMorpho cnc w)) +fullFormLexicon cnc = [(w, [(CId fun,an) | (fun,an,_) <- analyses]) | (w, analyses) <- PGF2.fullFormLexicon cnc] + +graphvizAbstractTree pgf (funs,cats) = PGF2.graphvizAbstractTree pgf PGF2.graphvizDefaults{PGF2.noFun=not funs,PGF2.noCat=not cats} +graphvizParseTree pgf lang = PGF2.graphvizParseTree (lookConcr pgf lang) +graphvizAlignment pgf langs = PGF2.graphvizWordAlignment (map (lookConcr pgf) langs) PGF2.graphvizDefaults +graphvizDependencyTree format debug lbls cnclbls pgf lang e = + let to_lbls' lbls = Map.fromList [(id,xs) | (CId id,xs) <- Map.toList lbls] + in PGF2.graphvizDependencyTree format debug (fmap to_lbls' lbls) cnclbls (lookConcr pgf lang) e +graphvizParseTreeDep = error "graphvizParseTreeDep is not implemented" + +browse :: PGF -> CId -> Maybe (String,[CId],[CId]) +browse = error "browse is not implemented" + +-- | A type for plain applicative trees +data ATree t = Other t | App CId [ATree t] deriving Show +data Trie = Oth Tree | Ap CId [[Trie ]] deriving Show +-- ^ A type for tries of plain applicative trees + +-- | Convert a 'Tree' to an 'ATree' +toATree :: Tree -> ATree Tree +toATree e = maybe (Other e) app (PGF2.unApp e) + where + app (f,es) = App (mkCId f) (map toATree es) + +-- | Combine a list of trees into a trie +toTrie = combines . map ((:[]) . singleton) + where + singleton t = case t of + Other e -> Oth e + App f ts -> Ap f [map singleton ts] + + combines [] = [] + combines (ts:tss) = ts1:combines tss2 + where + (ts1,tss2) = combines2 [] tss ts + combines2 ots [] ts1 = (ts1,reverse ots) + combines2 ots (ts2:tss) ts1 = + maybe (combines2 (ts2:ots) tss ts1) (combines2 ots tss) (combine ts1 ts2) + + combine ts us = mapM combine2 (zip ts us) + where + combine2 (Ap f ts,Ap g us) | f==g = Just (Ap f (combines (ts++us))) + combine2 _ = Nothing + +readProbabilitiesFromFile :: FilePath -> IO (Map.Map CId Double) +readProbabilitiesFromFile fpath = do + s <- readFile fpath + return $ Map.fromList [(mkCId f,read p) | f:p:_ <- map words (lines s)] + +groupResults :: [[(Language,String)]] -> [(Language,[String])] +groupResults = Map.toList . foldr more Map.empty . start . concat + where + start ls = [(l,[s]) | (l,s) <- ls] + more (l,s) = + Map.insertWith (\ [x] xs -> if elem x xs then xs else (x : xs)) l s + +conlls2latexDoc = error "conlls2latexDoc is not implemented" + + +morphoMissing :: Morpho -> [String] -> [String] +morphoMissing = morphoClassify False + +morphoKnown :: Morpho -> [String] -> [String] +morphoKnown = morphoClassify True + +morphoClassify :: Bool -> Morpho -> [String] -> [String] +morphoClassify k mo ws = [w | w <- ws, k /= null (lookupMorpho mo w), notLiteral w] where + notLiteral w = not (all isDigit w) ---- should be defined somewhere + +gizaAlignment = error "gizaAlignment is not implemented" diff --git a/src/runtime/haskell-bind/PGF/Internal.hs b/src/runtime/haskell-bind/PGF/Internal.hs index e8193b788..df736e788 100644 --- a/src/runtime/haskell-bind/PGF/Internal.hs +++ b/src/runtime/haskell-bind/PGF/Internal.hs @@ -1 +1,163 @@ -module PGF.Internal where +{-# LANGUAGE ImplicitParams #-} +module PGF.Internal(CId(..),Language,PGF2.PGF, + PGF2.Concr,lookConcr, + PGF2.FId,isPredefFId, + PGF2.FunId,PGF2.SeqId,PGF2.LIndex,PGF2.Token, + PGF2.Production(..),PGF2.PArg(..),PGF2.Symbol(..),PGF2.Literal(..),PGF2.BindType(..),Sequence, + globalFlags, abstrFlags, concrFlags, + concrTotalCats, concrCategories, concrProductions, + concrTotalFuns, concrFunction, + concrTotalSeqs, concrSequence, + + PGF2.CodeLabel, PGF2.Instr(..), PGF2.IVal(..), PGF2.TailInfo(..), + + PGF2.Builder, PGF2.B, PGF2.build, + eAbs, eApp, eMeta, eFun, eVar, eLit, eTyped, eImplArg, dTyp, hypo, + PGF2.AbstrInfo, newAbstr, PGF2.ConcrInfo, newConcr, newPGF, + + -- * Write an in-memory PGF to a file + writePGF, writeConcr, + + PGF2.fidString, PGF2.fidInt, PGF2.fidFloat, PGF2.fidVar, PGF2.fidStart, + + ppFunId, ppSeqId, ppFId, ppMeta, ppLit, ppSeq, + + unionPGF + ) where + +import qualified PGF2 +import qualified PGF2.Internal as PGF2 +import qualified Data.Map as Map +import PGF2.FFI(PGF(..)) +import Data.Array.IArray +import Data.Array.Unboxed +import Text.PrettyPrint + +newtype CId = CId String deriving (Show,Read,Eq,Ord) + +type Language = CId + +lookConcr (PGF _ langs _) (CId lang) = + case Map.lookup lang langs of + Just cnc -> cnc + Nothing -> error "Unknown language" + +globalFlags pgf = Map.fromAscList [(CId name,value) | (name,value) <- PGF2.globalFlags pgf] +abstrFlags pgf = Map.fromAscList [(CId name,value) | (name,value) <- PGF2.abstrFlags pgf] +concrFlags concr = Map.fromAscList [(CId name,value) | (name,value) <- PGF2.concrFlags concr] + +concrTotalCats = PGF2.concrTotalCats + +concrCategories :: PGF2.Concr -> [(CId,PGF2.FId,PGF2.FId,[String])] +concrCategories c = [(CId cat,start,end,lbls) | (cat,start,end,lbls) <- PGF2.concrCategories c] + +concrProductions :: PGF2.Concr -> PGF2.FId -> [PGF2.Production] +concrProductions = PGF2.concrProductions + +concrTotalFuns = PGF2.concrTotalFuns + +concrFunction :: PGF2.Concr -> PGF2.FunId -> (CId,[PGF2.SeqId]) +concrFunction c funid = + let (fun,seqids) = PGF2.concrFunction c funid + in (CId fun,seqids) + +concrTotalSeqs :: PGF2.Concr -> PGF2.SeqId +concrTotalSeqs = PGF2.concrTotalSeqs + +concrSequence = PGF2.concrSequence + +isPredefFId = PGF2.isPredefFId + +type Sequence = [PGF2.Symbol] + +eAbs :: (?builder :: PGF2.Builder s) => PGF2.BindType -> CId -> PGF2.B s PGF2.Expr -> PGF2.B s PGF2.Expr +eAbs bind_type (CId var) body = PGF2.eAbs bind_type var body + +eApp :: (?builder :: PGF2.Builder s) => PGF2.B s PGF2.Expr -> PGF2.B s PGF2.Expr -> PGF2.B s PGF2.Expr +eApp = PGF2.eApp + +eMeta :: (?builder :: PGF2.Builder s) => Int -> PGF2.B s PGF2.Expr +eMeta = PGF2.eMeta + +eFun (CId fun) = PGF2.eFun fun + +eVar :: (?builder :: PGF2.Builder s) => Int -> PGF2.B s PGF2.Expr +eVar = PGF2.eVar + +eLit :: (?builder :: PGF2.Builder s) => PGF2.Literal -> PGF2.B s PGF2.Expr +eLit = PGF2.eLit + +eTyped :: (?builder :: PGF2.Builder s) => PGF2.B s PGF2.Expr -> PGF2.B s PGF2.Type -> PGF2.B s PGF2.Expr +eTyped = PGF2.eTyped + +eImplArg :: (?builder :: PGF2.Builder s) => PGF2.B s PGF2.Expr -> PGF2.B s PGF2.Expr +eImplArg = PGF2.eImplArg + +dTyp :: (?builder :: PGF2.Builder s) => [PGF2.B s (PGF2.BindType,String,PGF2.Type)] -> CId -> [PGF2.B s PGF2.Expr] -> PGF2.B s PGF2.Type +dTyp hypos (CId cat) es = PGF2.dTyp hypos cat es + +hypo bind_type (CId var) ty = PGF2.hypo bind_type var ty + +newAbstr flags cats funs = PGF2.newAbstr [(flag,lit) | (CId flag,lit) <- flags] + [(cat,hypos,prob) | (CId cat,hypos,prob) <- cats] + [(fun,ty,arity,prob) | (CId fun,ty,arity,prob) <- funs] + +newConcr abs flags printnames lindefs linrefs prods cncfuns seqs cnccats total_ccats = + PGF2.newConcr abs [(flag,lit) | (CId flag,lit) <- flags] + [(id,name) | (CId id,name) <- printnames] + lindefs linrefs + prods + [(fun,seq_ids) | (CId fun,seq_ids) <- cncfuns] + seqs + [(cat,start,end,labels) | (CId cat,start,end,labels) <- cnccats] + total_ccats + +newPGF flags (CId name) abstr concrs = + PGF2.newPGF [(flag,lit) | (CId flag,lit) <- flags] + name + abstr + [(name,concr) | (CId name,concr) <- concrs] + +writePGF = PGF2.writePGF +writeConcr fpath pgf lang = PGF2.writeConcr fpath (lookConcr pgf lang) + + +ppFunId funid = char 'F' <> int funid +ppSeqId seqid = char 'S' <> int seqid + +ppFId fid + | fid == PGF2.fidString = text "CString" + | fid == PGF2.fidInt = text "CInt" + | fid == PGF2.fidFloat = text "CFloat" + | fid == PGF2.fidVar = text "CVar" + | fid == PGF2.fidStart = text "CStart" + | otherwise = char 'C' <> int fid + +ppMeta :: Int -> Doc +ppMeta n + | n == 0 = char '?' + | otherwise = char '?' <> int n + +ppLit (PGF2.LStr s) = text (show s) +ppLit (PGF2.LInt n) = int n +ppLit (PGF2.LFlt d) = double d + +ppSeq (seqid,seq) = + ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol seq) + +ppSymbol (PGF2.SymCat d r) = char '<' <> int d <> comma <> int r <> char '>' +ppSymbol (PGF2.SymLit d r) = char '{' <> int d <> comma <> int r <> char '}' +ppSymbol (PGF2.SymVar d r) = char '<' <> int d <> comma <> char '$' <> int r <> char '>' +ppSymbol (PGF2.SymKS t) = doubleQuotes (text t) +ppSymbol PGF2.SymNE = text "nonExist" +ppSymbol PGF2.SymBIND = text "BIND" +ppSymbol PGF2.SymSOFT_BIND = text "SOFT_BIND" +ppSymbol PGF2.SymSOFT_SPACE= text "SOFT_SPACE" +ppSymbol PGF2.SymCAPIT = text "CAPIT" +ppSymbol PGF2.SymALL_CAPIT = text "ALL_CAPIT" +ppSymbol (PGF2.SymKP syms alts) = text "pre" <+> braces (hsep (punctuate semi (hsep (map ppSymbol syms) : map ppAlt alts))) + +ppAlt (syms,ps) = hsep (map ppSymbol syms) <+> char '/' <+> hsep (map (doubleQuotes . text) ps) + +unionPGF = PGF2.unionPGF + diff --git a/src/runtime/haskell-bind/PGF2.hsc b/src/runtime/haskell-bind/PGF2.hsc index 186aa2b31..dd03c3f3b 100644 --- a/src/runtime/haskell-bind/PGF2.hsc +++ b/src/runtime/haskell-bind/PGF2.hsc @@ -21,25 +21,21 @@ module PGF2 (-- * PGF PGF,readPGF,showPGF, - -- * Identifiers - CId, - -- * Abstract syntax AbsName,abstractName, -- ** Categories - Cat,categories,categoryContext, + Cat,categories,categoryContext,categoryProbability, -- ** Functions Fun, functions, functionsByCat, - functionType, functionIsConstructor, hasLinearization, + functionType, functionIsDataCon, hasLinearization, -- ** Expressions - Expr,showExpr,readExpr,pExpr, + Expr,showExpr,readExpr,pExpr,pIdent, mkAbs,unAbs, - mkApp,unApp, + mkApp,unApp,unapply, mkStr,unStr, mkInt,unInt, mkFloat,unFloat, mkMeta,unMeta, - mkCId, exprHash, exprSize, exprFunctions, exprSubstitute, treeProbability, @@ -58,13 +54,13 @@ module PGF2 (-- * PGF ConcName,Concr,languages,concreteName,languageCode, -- ** Linearization - linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,bracketedLinearizeAll, + linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize, FId, LIndex, BracketedString(..), showBracketedString, flattenBracketedString, printName, alignWords, -- ** Parsing - ParseOutput(..), parse, parseWithHeuristics, + ParseOutput(..), parse, parseWithHeuristics, complete, -- ** Sentence Lookup lookupSentence, -- ** Generation @@ -73,7 +69,9 @@ module PGF2 (-- * PGF MorphoAnalysis, lookupMorpho, fullFormLexicon, -- ** Visualizations GraphvizOptions(..), graphvizDefaults, - graphvizAbstractTree, graphvizParseTree, graphvizWordAlignment, + graphvizAbstractTree, graphvizParseTree, + graphvizDependencyTree, conlls2latexDoc, getCncDepLabels, + graphvizWordAlignment, -- * Exceptions PGFError(..), @@ -82,7 +80,7 @@ module PGF2 (-- * PGF LiteralCallback,literalCallbacks ) where -import Prelude hiding (fromEnum,(<>)) -- GHC 8.4.1 clash with Text.PrettyPrint +import Prelude hiding (fromEnum) import Control.Exception(Exception,throwIO) import Control.Monad(forM_) import System.IO.Unsafe(unsafePerformIO,unsafeInterleaveIO) @@ -97,7 +95,8 @@ import Data.Typeable import qualified Data.Map as Map import Data.IORef import Data.Char(isUpper,isSpace) -import Data.List(isSuffixOf,maximumBy,nub) +import Data.List(isSuffixOf,maximumBy,nub,mapAccumL,intersperse,groupBy,find) +import Data.Maybe(fromMaybe) import Data.Function(on) @@ -110,8 +109,8 @@ import Data.Function(on) -- to Concr but has lost its reference to PGF. -type AbsName = CId -- ^ Name of abstract syntax -type ConcName = CId -- ^ Name of concrete syntax +type AbsName = String -- ^ Name of abstract syntax +type ConcName = String -- ^ Name of concrete syntax -- | Reads file in Portable Grammar Format and produces -- 'PGF' structure. The file is usually produced with: @@ -136,7 +135,22 @@ readPGF fpath = throwIO (PGFError "The grammar cannot be loaded") else return pgf pgfFPtr <- newForeignPtr gu_pool_finalizer pool - return (PGF pgf (touchForeignPtr pgfFPtr)) + let touch = touchForeignPtr pgfFPtr + ref <- newIORef Map.empty + allocaBytes (#size GuMapItor) $ \itor -> + do fptr <- wrapMapItorCallback (getLanguages ref touch) + (#poke GuMapItor, fn) itor fptr + pgf_iter_languages pgf itor nullPtr + freeHaskellFunPtr fptr + langs <- readIORef ref + return (PGF pgf langs touch) + where + getLanguages :: IORef (Map.Map String Concr) -> Touch -> MapItorCallback + getLanguages ref touch itor key value exn = do + langs <- readIORef ref + name <- peekUtf8CString (castPtr key) + concr <- fmap (\ptr -> Concr ptr touch) $ peek (castPtr value) + writeIORef ref $! Map.insert name concr langs showPGF :: PGF -> String showPGF p = @@ -144,29 +158,15 @@ showPGF p = withGuPool $ \tmpPl -> do (sb,out) <- newOut tmpPl exn <- gu_new_exn tmpPl - pgf_print (pgf p) out exn + withArrayLen ((map concr . Map.elems . languages) p) $ \n_concrs concrs -> + pgf_print (pgf p) (fromIntegral n_concrs) concrs out exn touchPGF p s <- gu_string_buf_freeze sb tmpPl peekUtf8CString s -- | List of all languages available in the grammar. languages :: PGF -> Map.Map ConcName Concr -languages p = - unsafePerformIO $ - do ref <- newIORef Map.empty - allocaBytes (#size GuMapItor) $ \itor -> - do fptr <- wrapMapItorCallback (getLanguages ref) - (#poke GuMapItor, fn) itor fptr - pgf_iter_languages (pgf p) itor nullPtr - freeHaskellFunPtr fptr - readIORef ref - where - getLanguages :: IORef (Map.Map String Concr) -> MapItorCallback - getLanguages ref itor key value exn = do - langs <- readIORef ref - name <- peekUtf8CString (castPtr key) - concr <- fmap (\ptr -> Concr ptr (touchPGF p)) $ peek (castPtr value) - writeIORef ref $! Map.insert name concr langs +languages p = langs p -- | The abstract language name is the name of the top-level -- abstract module @@ -242,8 +242,8 @@ functionType p fn = else Just (Type c_type (touchPGF p))) -- | The type of a function -functionIsConstructor :: PGF -> Fun -> Bool -functionIsConstructor p fn = +functionIsDataCon :: PGF -> Fun -> Bool +functionIsDataCon p fn = unsafePerformIO $ withGuPool $ \tmpPl -> do c_fn <- newUtf8CString fn tmpPl @@ -253,15 +253,15 @@ functionIsConstructor p fn = -- | Checks an expression against a specified type. checkExpr :: PGF -> Expr -> Type -> Either String Expr -checkExpr (PGF p _) (Expr c_expr touch1) (Type c_ty touch2) = +checkExpr p (Expr c_expr touch1) (Type c_ty touch2) = unsafePerformIO $ alloca $ \pexpr -> withGuPool $ \tmpPl -> do exn <- gu_new_exn tmpPl exprPl <- gu_new_pool poke pexpr c_expr - pgf_check_expr p pexpr c_ty exn exprPl - touch1 >> touch2 + pgf_check_expr (pgf p) pexpr c_ty exn exprPl + touchPGF p >> touch1 >> touch2 status <- gu_exn_is_raised exn if not status then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl @@ -280,15 +280,15 @@ checkExpr (PGF p _) (Expr c_expr touch1) (Type c_ty touch2) = -- possible to infer its type in the GF type system. -- In this case the function returns an error. inferExpr :: PGF -> Expr -> Either String (Expr, Type) -inferExpr (PGF p _) (Expr c_expr touch1) = +inferExpr p (Expr c_expr touch1) = unsafePerformIO $ alloca $ \pexpr -> withGuPool $ \tmpPl -> do exn <- gu_new_exn tmpPl exprPl <- gu_new_pool poke pexpr c_expr - c_ty <- pgf_infer_expr p pexpr exn exprPl - touch1 + c_ty <- pgf_infer_expr (pgf p) pexpr exn exprPl + touchPGF p >> touch1 status <- gu_exn_is_raised exn if not status then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl @@ -306,15 +306,15 @@ inferExpr (PGF p _) (Expr c_expr touch1) = -- | Check whether a type is consistent with the abstract -- syntax of the grammar. checkType :: PGF -> Type -> Either String Type -checkType (PGF p _) (Type c_ty touch1) = +checkType p (Type c_ty touch1) = unsafePerformIO $ alloca $ \pty -> withGuPool $ \tmpPl -> do exn <- gu_new_exn tmpPl typePl <- gu_new_pool poke pty c_ty - pgf_check_type p pty exn typePl - touch1 + pgf_check_type (pgf p) pty exn typePl + touchPGF p >> touch1 status <- gu_exn_is_raised exn if not status then do typeFPl <- newForeignPtr gu_pool_finalizer typePl @@ -329,13 +329,13 @@ checkType (PGF p _) (Type c_ty touch1) = else throwIO (PGFError msg) compute :: PGF -> Expr -> Expr -compute (PGF p _) (Expr c_expr touch1) = +compute p (Expr c_expr touch1) = unsafePerformIO $ withGuPool $ \tmpPl -> do exn <- gu_new_exn tmpPl exprPl <- gu_new_pool - c_expr <- pgf_compute p c_expr exn tmpPl exprPl - touch1 + c_expr <- pgf_compute (pgf p) c_expr exn tmpPl exprPl + touchPGF p >> touch1 status <- gu_exn_is_raised exn if not status then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl @@ -346,10 +346,10 @@ compute (PGF p _) (Expr c_expr touch1) = throwIO (PGFError msg) treeProbability :: PGF -> Expr -> Float -treeProbability (PGF p _) (Expr c_expr touch1) = +treeProbability p (Expr c_expr touch1) = unsafePerformIO $ do - res <- pgf_compute_tree_probability p c_expr - touch1 + res <- pgf_compute_tree_probability (pgf p) c_expr + touchPGF p >> touch1 return (realToFrac res) exprHash :: Int32 -> Expr -> Int32 @@ -447,6 +447,433 @@ graphvizWordAlignment cs opts e = s <- gu_string_buf_freeze sb tmpPl peekUtf8CString s + +type Labels = Map.Map Fun [String] + +-- | Visualize word dependency tree. +graphvizDependencyTree + :: String -- ^ Output format: @"latex"@, @"conll"@, @"malt_tab"@, @"malt_input"@ or @"dot"@ + -> Bool -- ^ Include extra information (debug) + -> Maybe Labels -- ^ abstract label information obtained with 'getDepLabels' + -> Maybe CncLabels -- ^ concrete label information obtained with ' ' (was: unused (was: @Maybe String@)) + -> Concr + -> Expr + -> String -- ^ Rendered output in the specified format +graphvizDependencyTree format debug mlab mclab concr t = + case format of + "latex" -> render . ppLaTeX $ conll2latex' conll + "svg" -> render . ppSVG . toSVG $ conll2latex' conll + "conll" -> printCoNLL conll + "malt_tab" -> render $ vcat (map (hcat . intersperse (char '\t') . (\ws -> [ws !! 0,ws !! 1,ws !! 3,ws !! 6,ws !! 7])) wnodes) + "malt_input" -> render $ vcat (map (hcat . intersperse (char '\t') . take 6) wnodes) + _ -> render $ text "digraph {" $$ + space $$ + nest 2 (text "rankdir=LR ;" $$ + text "node [shape = plaintext] ;" $$ + vcat nodes $$ + vcat links) $$ + text "}" + where + conll = maybe conll0 (\ls -> fixCoNLL ls conll0) mclab + conll0 = (map.map) render wnodes + nodes = map mkNode leaves + links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves] + +-- CoNLL format: ID FORM LEMMA PLEMMA POS PPOS FEAT PFEAT HEAD PHEAD DEPREL PDEPREL +-- P variants are automatically predicted rather than gold standard + + wnodes = [[int i, maltws ws, text fun, text (posCat cat), text cat, unspec, int parent, text lab, unspec, unspec] | + ((cat,fid,fun),i,ws) <- tail leaves, + let (lab,parent) = fromMaybe (dep_lbl,0) + (do (lbl,fid) <- lookup fid deps + (_,i,_) <- find (\((_,fid1,_),i,_) -> fid == fid1) leaves + return (lbl,i)) + ] + maltws = text . concat . intersperse "+" . words -- no spaces in column 2 + + nil = -1 + + bss = bracketedLinearize concr t + + root = ("_",nil,"_") + + leaves = (root,0,root_lbl) : (groupAndIndexIt 1 . concatMap (getLeaves root)) bss + deps = let (_,(h,deps)) = getDeps 0 [] t + in (h,(dep_lbl,nil)):deps + + groupAndIndexIt id [] = [] + groupAndIndexIt id ((p,w):pws) = (p,id,w) : groupAndIndexIt (id+1) pws +--- groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws +--- in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1 + where + collect pws@((p1,w):pws1) + | p == p1 = let (ws,pws2) = collect pws1 + in (w:ws,pws2) + collect pws = ([],pws) + + getLeaves parent bs = + case bs of + Leaf w -> [(parent,w)] + Bracket cat fid _ fun bss -> concatMap (getLeaves (cat,fid,fun)) bss + + mkNode ((_,p,_),i,w) = + tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi + + mkLink (x,(lbl,y)) = tag y <+> text "->" <+> tag x <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;" + + labels = maybe Map.empty id mlab + clabels = maybe [] id mclab + + posCat cat = case Map.lookup cat labels of + Just [p] -> p + _ -> cat + + getDeps n_fid xs e = + case unAbs e of + Just (_, x, e) -> getDeps n_fid (x:xs) e + Nothing -> case unApp e of + Just (f,es) -> let (n_fid_1,ds) = descend n_fid xs es + (mb_h, deps) = selectHead f ds + in case mb_h of + Just (fid,deps0) -> (n_fid_1+1,(fid,deps0++ + [(n_fid_1,(dep_lbl,fid))]++ + concat [(m,(lbl,fid)):ds | (lbl,(m,ds)) <- deps])) + Nothing -> (n_fid_1+1,(n_fid_1,concat [(m,(lbl,n_fid_1)):ds | (lbl,(m,ds)) <- deps])) + Nothing -> (n_fid+1,(n_fid,[])) + + descend n_fid xs es = mapAccumL (\n_fid e -> getDeps n_fid xs e) n_fid es + + selectHead f ds = + case Map.lookup f labels of + Just lbls -> extractHead (zip lbls ds) + Nothing -> extractLast ds + where + extractHead [] = (Nothing, []) + extractHead (ld@(l,d):lds) + | l == head_lbl = (Just d,lds) + | otherwise = let (mb_h,deps) = extractHead lds + in (mb_h,ld:deps) + + extractLast [] = (Nothing, []) + extractLast (d:ds) + | null ds = (Just d,[]) + | otherwise = let (mb_h,deps) = extractLast ds + in (mb_h,(dep_lbl,d):deps) + + dep_lbl = "dep" + head_lbl = "head" + root_lbl = "ROOT" + unspec = text "_" + + +---------------------- should be a separate module? + +-- visualization with latex output. AR Nov 2015 + +conlls2latexDoc :: [String] -> String +conlls2latexDoc = + render . + latexDoc . + vcat . + intersperse (text "" $+$ app "vspace" (text "4mm")) . + map conll2latex . + filter (not . null) + +conll2latex :: String -> Doc +conll2latex = ppLaTeX . conll2latex' . parseCoNLL + +conll2latex' :: CoNLL -> [LaTeX] +conll2latex' = dep2latex . conll2dep' + +data Dep = Dep { + wordLength :: Int -> Double -- length of word at position int -- was: fixed width, millimetres (>= 20.0) + , tokens :: [(String,String)] -- word, pos (0..) + , deps :: [((Int,Int),String)] -- from, to, label + , root :: Int -- root word position + } + +-- some general measures +defaultWordLength = 20.0 -- the default fixed width word length, making word 100 units +defaultUnit = 0.2 -- unit in latex pictures, 0.2 millimetres +spaceLength = 10.0 +charWidth = 1.8 + +wsize rwld w = 100 * rwld w + spaceLength -- word length, units +wpos rwld i = sum [wsize rwld j | j <- [0..i-1]] -- start position of the i'th word +wdist rwld x y = sum [wsize rwld i | i <- [min x y .. max x y - 1]] -- distance between words x and y +labelheight h = h + arcbase + 3 -- label just above arc; 25 would put it just below +labelstart c = c - 15.0 -- label starts 15u left of arc centre +arcbase = 30.0 -- arcs start and end 40u above the bottom +arcfactor r = r * 600 -- reduction of arc size from word distance +xyratio = 3 -- width/height ratio of arcs + +putArc :: (Int -> Double) -> Int -> Int -> Int -> String -> [DrawingCommand] +putArc frwld height x y label = [oval,arrowhead,labelling] where + oval = Put (ctr,arcbase) (OvalTop (wdth,hght)) + arrowhead = Put (endp,arcbase + 5) (ArrowDown 5) -- downgoing arrow 5u above the arc base + labelling = Put (labelstart ctr,labelheight (hght/2)) (TinyText label) + dxy = wdist frwld x y -- distance between words, >>= 20.0 + ndxy = 100 * rwld * fromIntegral height -- distance that is indep of word length + hdxy = dxy / 2 -- half the distance + wdth = dxy - (arcfactor rwld)/dxy -- longer arcs are wider in proportion + hght = ndxy / (xyratio * rwld) -- arc height is independent of word length + begp = min x y -- begin position of oval + ctr = wpos frwld begp + hdxy + (if x < y then 20 else 10) -- LR arcs are farther right from center of oval + endp = (if x < y then (+) else (-)) ctr (wdth/2) -- the point of the arrow + rwld = 0.5 ---- + +dep2latex :: Dep -> [LaTeX] +dep2latex d = + [Comment (unwords (map fst (tokens d))), + Picture defaultUnit (width,height) ( + [Put (wpos rwld i,0) (Text w) | (i,w) <- zip [0..] (map fst (tokens d))] -- words + ++ [Put (wpos rwld i,15) (TinyText w) | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom + ++ concat [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d] -- arcs and labels + ++ [Put (wpos rwld (root d) + 15,height) (ArrowDown (height-arcbase))] + ++ [Put (wpos rwld (root d) + 20,height - 10) (TinyText "ROOT")] + )] + where + wld i = wordLength d i -- >= 20.0 + rwld i = (wld i) / defaultWordLength -- >= 1.0 + aheight x y = depth (min x y) (max x y) + 1 ---- abs (x-y) + arcs = [(min u v, max u v) | ((u,v),_) <- deps d] + depth x y = case [(u,v) | (u,v) <- arcs, (x < u && v <= y) || (x == u && v < y)] of ---- only projective arcs counted + [] -> 0 + uvs -> 1 + maximum (0:[depth u v | (u,v) <- uvs]) + width = {-round-} (sum [wsize rwld w | (w,_) <- zip [0..] (tokens d)]) + {-round-} spaceLength * fromIntegral ((length (tokens d)) - 1) + height = 50 + 20 * {-round-} (maximum (0:[aheight x y | ((x,y),_) <- deps d])) + +type CoNLL = [[String]] +parseCoNLL :: String -> CoNLL +parseCoNLL = map words . lines + +--conll2dep :: String -> Dep +--conll2dep = conll2dep' . parseCoNLL + +conll2dep' :: CoNLL -> Dep +conll2dep' ls = Dep { + wordLength = wld + , tokens = toks + , deps = dps + , root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1] + } + where + wld i = maximum (0:[charWidth * fromIntegral (length w) | w <- let (tok,pos) = toks !! i in [tok,pos]]) + toks = [(w,c) | _:w:_:c:_ <- ls] + dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"] + --maxdist = maximum [abs (x-y) | ((x,y),_) <- dps] + + +-- * LaTeX Pictures (see https://en.wikibooks.org/wiki/LaTeX/Picture) + +-- We render both LaTeX and SVG from this intermediate representation of +-- LaTeX pictures. + +data LaTeX = Comment String | Picture UnitLengthMM Size [DrawingCommand] +data DrawingCommand = Put Position Object +data Object = Text String | TinyText String | OvalTop Size | ArrowDown Length + +type UnitLengthMM = Double +type Size = (Double,Double) +type Position = (Double,Double) +type Length = Double + + +-- * latex formatting +ppLaTeX = vcat . map ppLaTeX1 + where + ppLaTeX1 el = + case el of + Comment s -> comment s + Picture unit size cmds -> + app "setlength{\\unitlength}" (text (show unit ++ "mm")) + $$ hang (app "begin" (text "picture")<>text (show size)) 2 + (vcat (map ppDrawingCommand cmds)) + $$ app "end" (text "picture") + $$ text "" + + ppDrawingCommand (Put pos obj) = put pos (ppObject obj) + + ppObject obj = + case obj of + Text s -> text s + TinyText s -> small (text s) + OvalTop size -> text "\\oval" <> text (show size) <> text "[t]" + ArrowDown len -> app "vector(0,-1)" (text (show len)) + + put p@(_,_) = app ("put" ++ show p) + small w = text "{\\tiny" <+> w <> text "}" + comment s = text "%%" <+> text s -- line break show follow + +app macro arg = text "\\" <> text macro <> text "{" <> arg <> text "}" + + +latexDoc :: Doc -> Doc +latexDoc body = + vcat [text "\\documentclass{article}", + text "\\usepackage[utf8]{inputenc}", + text "\\begin{document}", + body, + text "\\end{document}"] + +-- * SVG (see https://www.w3.org/Graphics/SVG/IG/resources/svgprimer.html) + +-- | Render LaTeX pictures as SVG +toSVG = concatMap toSVG1 + where + toSVG1 el = + case el of + Comment s -> [] + Picture unit size@(w,h) cmds -> + [Elem "svg" ["width".=x1,"height".=y0+5, + ("viewBox",unwords (map show [0,0,x1,y0+5])), + ("version","1.1"), + ("xmlns","http://www.w3.org/2000/svg")] + (white_bg:concatMap draw cmds)] + where + white_bg = + Elem "rect" ["x".=0,"y".=0,"width".=x1,"height".=y0+5, + ("fill","white")] [] + + draw (Put pos obj) = objectSVG pos obj + + objectSVG pos obj = + case obj of + Text s -> [text 16 pos s] + TinyText s -> [text 10 pos s] + OvalTop size -> [ovalTop pos size] + ArrowDown len -> arrowDown pos len + + text h (x,y) s = + Elem "text" ["x".=xc x,"y".=yc y-2,"font-size".=h] + [CharData s] + + ovalTop (x,y) (w,h) = + Elem "path" [("d",path),("stroke","black"),("fill","none")] [] + where + x1 = x-w/2 + x2 = min x (x1+r) + x3 = max x (x4-r) + x4 = x+w/2 + y1 = y + y2 = y+r + r = h/2 + sx = show . xc + sy = show . yc + path = unwords (["M",sx x1,sy y1,"Q",sx x1,sy y2,sx x2,sy y2, + "L",sx x3,sy y2,"Q",sx x4,sy y2,sx x4,sy y1]) + + arrowDown (x,y) len = + [Elem "line" ["x1".=xc x,"y1".=yc y,"x2".=xc x,"y2".=y2, + ("stroke","black")] [], + Elem "path" [("d",unwords arrowhead)] []] + where + x2 = xc x + y2 = yc (y-len) + arrowhead = "M":map show [x2,y2,x2-3,y2-6,x2+3,y2-6] + + xc x = num x+5 + yc y = y0-num y + x1 = num w+10 + y0 = num h+20 + num x = round (scale*x) + scale = unit*5 + + infix 0 .= + n.=v = (n,show v) + +-- * SVG is XML + +data SVG = CharData String | Elem TagName Attrs [SVG] +type TagName = String +type Attrs = [(String,String)] + +ppSVG svg = + vcat [text "", + text "", + text "", + vcat (map ppSVG1 svg)] -- It should be a single element... + where + ppSVG1 svg1 = + case svg1 of + CharData s -> text (encode s) + Elem tag attrs [] -> + text "<"<>text tag<>cat (map attr attrs) <> text "/>" + Elem tag attrs svg -> + cat [text "<"<>text tag<>cat (map attr attrs) <> text ">", + nest 2 (cat (map ppSVG1 svg)), + text "text tag<>text ">"] + + attr (n,v) = text " "<>text n<>text "=\""<>text (encode v)<>text "\"" + + encode s = foldr encodeEntity "" s + + encodeEntity = encodeEntity' (const False) + encodeEntity' esc c r = + case c of + '&' -> "&"++r + '<' -> "<"++r + '>' -> ">"++r + _ -> c:r + + +---------------------------------- +-- concrete syntax annotations (local) on top of conll +-- examples of annotations: +-- UseComp {"not"} PART neg head +-- UseComp {*} AUX cop head + +type CncLabels = [(String, String -> Maybe (String -> String,String,String))] +-- (fun, word -> (pos,label,target)) +-- the pos can remain unchanged, as in the current notation in the article + +fixCoNLL :: CncLabels -> CoNLL -> CoNLL +fixCoNLL labels conll = map fixc conll where + fixc row = case row of + (i:word:fun:pos:cat:x_:"0":"dep":xs) -> (i:word:fun:pos:cat:x_:"0":"root":xs) --- change the root label from dep to root + (i:word:fun:pos:cat:x_:j:label:xs) -> case look (fun,word) of + Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:x_:j :label':xs) + Just (pos',label',target) -> (i:word:fun:pos' pos:cat:x_: getDep j target:label':xs) + _ -> row + _ -> row + + look (fun,word) = case lookup fun labels of + Just relabel -> case relabel word of + Just row -> Just row + _ -> case lookup "*" labels of + Just starlabel -> starlabel word + _ -> Nothing + _ -> case lookup "*" labels of + Just starlabel -> starlabel word + _ -> Nothing + + getDep j label = maybe j id $ lookup (label,j) [((label,j),i) | i:word:fun:pos:cat:x_:j:label:xs <- conll] + +getCncDepLabels :: String -> CncLabels +getCncDepLabels = map merge . groupBy (\ (x,_) (a,_) -> x == a) . concatMap analyse . filter choose . lines where + --- choose is for compatibility with the general notation + choose line = notElem '(' line && elem '{' line --- ignoring non-local (with "(") and abstract (without "{") rules + + analyse line = case break (=='{') line of + (beg,_:ws) -> case break (=='}') ws of + (toks,_:target) -> case (words beg, words target) of + (fun:_,[ label,j]) -> [(fun, (tok, (id, label,j))) | tok <- getToks toks] + (fun:_,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | tok <- getToks toks] + _ -> [] + _ -> [] + _ -> [] + merge rules@((fun,_):_) = (fun, \tok -> + case lookup tok (map snd rules) of + Just new -> return new + _ -> lookup "*" (map snd rules) + ) + getToks = words . map (\c -> if elem c "\"," then ' ' else c) + +printCoNLL :: CoNLL -> String +printCoNLL = unlines . map (concat . intersperse "\t") + + newGraphvizOptions :: Ptr GuPool -> GraphvizOptions -> IO (Ptr PgfGraphvizOptions) newGraphvizOptions pool opts = do c_opts <- gu_malloc pool (#size PgfGraphvizOptions) @@ -542,7 +969,7 @@ parseWithHeuristics :: Concr -- ^ the language with which we parse -- If a literal has been recognized then the output should -- be Just (expr,probability,end_offset) -> ParseOutput -parseWithHeuristics lang (Type ctype touchType) sent heuristic callbacks = +parseWithHeuristics lang (Type ctype _) sent heuristic callbacks = unsafePerformIO $ do exprPl <- gu_new_pool parsePl <- gu_new_pool @@ -550,7 +977,6 @@ parseWithHeuristics lang (Type ctype touchType) sent heuristic callbacks = sent <- newUtf8CString sent parsePl callbacks_map <- mkCallbacksMap (concr lang) callbacks parsePl enum <- pgf_parse_with_heuristics (concr lang) ctype sent heuristic callbacks_map exn parsePl exprPl - touchType failed <- gu_exn_is_raised exn if failed then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError @@ -618,6 +1044,26 @@ mkCallbacksMap concr callbacks pool = do predict_callback _ _ _ = return nullPtr +complete :: Concr -- ^ the language with which we do word completion + -> Type -- ^ the start category + -> String -- ^ the input sentence + -> String -- ^ prefix for the word to be completed + -> [(String, Cat, Fun, Float)] +complete lang (Type ctype _) sent prefix = + unsafePerformIO $ + do pl <- gu_new_pool + exn <- gu_new_exn pl + sent <- newUtf8CString sent pl + prefix <- newUtf8CString prefix pl + enum <- pgf_complete (concr lang) ctype sent prefix exn pl + failed <- gu_exn_is_raised exn + if failed + then do gu_pool_free pl + return [] + else do fpl <- newForeignPtr gu_pool_finalizer pl + tokens <- fromPgfTokenEnum enum fpl + return tokens + lookupSentence :: Concr -- ^ the language with which we parse -> Type -- ^ the start category -> String -- ^ the input sentence @@ -862,9 +1308,8 @@ type LIndex = Int -- mark the beginning and the end of each constituent. data BracketedString = Leaf String -- ^ this is the leaf i.e. a single token - | BIND -- ^ the surrounding tokens must be bound together - | Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedString] - -- ^ this is a bracket. The 'CId' is the category of + | Bracket Cat {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex Fun [BracketedString] + -- ^ this is a bracket. The 'Cat' is the category of -- the phrase. The 'FId' is an unique identifier for -- every phrase in the sentence. For context-free grammars -- i.e. without discontinuous constituents this identifier @@ -875,7 +1320,7 @@ data BracketedString -- the constituent index i.e. 'LIndex'. If the grammar is reduplicating -- then the constituent indices will be the same for all brackets -- that represents the same constituent. - -- The second 'CId' is the name of the abstract function that generated + -- The 'Fun' is the name of the abstract function that generated -- this phrase. -- | Renders the bracketed string as a string where @@ -885,13 +1330,11 @@ showBracketedString :: BracketedString -> String showBracketedString = render . ppBracketedString ppBracketedString (Leaf t) = text t -ppBracketedString BIND = text "&+" ppBracketedString (Bracket cat fid index _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss)) -- | Extracts the sequence of tokens from the bracketed string flattenBracketedString :: BracketedString -> [String] flattenBracketedString (Leaf w) = [w] -flattenBracketedString BIND = [] flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss bracketedLinearize :: Concr -> Expr -> [BracketedString] @@ -909,8 +1352,27 @@ bracketedLinearize lang e = unsafePerformIO $ return [] else do ctree <- pgf_lzr_wrap_linref ctree pl ref <- newIORef ([],[]) - withBracketLinFuncs ref exn $ \ppLinFuncs -> - pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl + allocaBytes (#size PgfLinFuncs) $ \pLinFuncs -> + alloca $ \ppLinFuncs -> do + fptr_symbol_token <- wrapSymbolTokenCallback (symbol_token ref) + fptr_begin_phrase <- wrapPhraseCallback (begin_phrase ref) + fptr_end_phrase <- wrapPhraseCallback (end_phrase ref) + fptr_symbol_ne <- wrapSymbolNonExistCallback (symbol_ne exn) + fptr_symbol_meta <- wrapSymbolMetaCallback (symbol_meta ref) + (#poke PgfLinFuncs, symbol_token) pLinFuncs fptr_symbol_token + (#poke PgfLinFuncs, begin_phrase) pLinFuncs fptr_begin_phrase + (#poke PgfLinFuncs, end_phrase) pLinFuncs fptr_end_phrase + (#poke PgfLinFuncs, symbol_ne) pLinFuncs fptr_symbol_ne + (#poke PgfLinFuncs, symbol_bind) pLinFuncs nullPtr + (#poke PgfLinFuncs, symbol_capit) pLinFuncs nullPtr + (#poke PgfLinFuncs, symbol_meta) pLinFuncs fptr_symbol_meta + poke ppLinFuncs pLinFuncs + pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl + freeHaskellFunPtr fptr_symbol_token + freeHaskellFunPtr fptr_begin_phrase + freeHaskellFunPtr fptr_end_phrase + freeHaskellFunPtr fptr_symbol_ne + freeHaskellFunPtr fptr_symbol_meta failed <- gu_exn_is_raised exn if failed then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist @@ -919,65 +1381,6 @@ bracketedLinearize lang e = unsafePerformIO $ else throwExn exn else do (_,bs) <- readIORef ref return (reverse bs) - -bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]] -bracketedLinearizeAll lang e = unsafePerformIO $ - withGuPool $ \pl -> - do exn <- gu_new_exn pl - cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl - failed <- gu_exn_is_raised exn - if failed - then do touchExpr e - throwExn exn - else do ref <- newIORef ([],[]) - bss <- withBracketLinFuncs ref exn $ \ppLinFuncs -> - collect ref cts ppLinFuncs exn pl - touchExpr e - return bss - where - collect ref cts ppLinFuncs exn pl = withGuPool $ \tmpPl -> do - ctree <- alloca $ \ptr -> do gu_enum_next cts ptr tmpPl - peek ptr - if ctree == nullPtr - then return [] - else do ctree <- pgf_lzr_wrap_linref ctree pl - pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl - failed <- gu_exn_is_raised exn - if failed - then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist - if is_nonexist - then collect ref cts ppLinFuncs exn pl - else throwExn exn - else do (_,bs) <- readIORef ref - writeIORef ref ([],[]) - bss <- collect ref cts ppLinFuncs exn pl - return (reverse bs : bss) - -withBracketLinFuncs ref exn f = - allocaBytes (#size PgfLinFuncs) $ \pLinFuncs -> - alloca $ \ppLinFuncs -> do - fptr_symbol_token <- wrapSymbolTokenCallback (symbol_token ref) - fptr_begin_phrase <- wrapPhraseCallback (begin_phrase ref) - fptr_end_phrase <- wrapPhraseCallback (end_phrase ref) - fptr_symbol_ne <- wrapSymbolNonExistCallback (symbol_ne exn) - fptr_symbol_bind <- wrapSymbolBindCallback (symbol_bind ref) - fptr_symbol_meta <- wrapSymbolMetaCallback (symbol_meta ref) - (#poke PgfLinFuncs, symbol_token) pLinFuncs fptr_symbol_token - (#poke PgfLinFuncs, begin_phrase) pLinFuncs fptr_begin_phrase - (#poke PgfLinFuncs, end_phrase) pLinFuncs fptr_end_phrase - (#poke PgfLinFuncs, symbol_ne) pLinFuncs fptr_symbol_ne - (#poke PgfLinFuncs, symbol_bind) pLinFuncs fptr_symbol_bind - (#poke PgfLinFuncs, symbol_capit) pLinFuncs nullPtr - (#poke PgfLinFuncs, symbol_meta) pLinFuncs fptr_symbol_meta - poke ppLinFuncs pLinFuncs - res <- f ppLinFuncs - freeHaskellFunPtr fptr_symbol_token - freeHaskellFunPtr fptr_begin_phrase - freeHaskellFunPtr fptr_end_phrase - freeHaskellFunPtr fptr_symbol_ne - freeHaskellFunPtr fptr_symbol_bind - freeHaskellFunPtr fptr_symbol_meta - return res where symbol_token ref _ c_token = do (stack,bs) <- readIORef ref @@ -1000,22 +1403,17 @@ withBracketLinFuncs ref exn f = gu_exn_raise exn gu_exn_type_PgfLinNonExist return () - symbol_bind ref _ = do - (stack,bs) <- readIORef ref - writeIORef ref (stack,BIND : bs) - return () - symbol_meta ref _ meta_id = do (stack,bs) <- readIORef ref writeIORef ref (stack,Leaf "?" : bs) -throwExn exn = do - is_exn <- gu_exn_caught exn gu_exn_type_PgfExn - if is_exn - then do c_msg <- (#peek GuExn, data.data) exn - msg <- peekUtf8CString c_msg - throwIO (PGFError msg) - else do throwIO (PGFError "The abstract tree cannot be linearized") + throwExn exn = do + is_exn <- gu_exn_caught exn gu_exn_type_PgfExn + if is_exn + then do c_msg <- (#peek GuExn, data.data) exn + msg <- peekUtf8CString c_msg + throwIO (PGFError msg) + else do throwIO (PGFError "The abstract tree cannot be linearized") alignWords :: Concr -> Expr -> [(String, [Int])] alignWords lang e = unsafePerformIO $ @@ -1128,16 +1526,17 @@ categories p = name <- peekUtf8CString (castPtr key) writeIORef ref $! (name : names) -categoryContext :: PGF -> Cat -> [Hypo] +categoryContext :: PGF -> Cat -> Maybe [Hypo] categoryContext p cat = unsafePerformIO $ withGuPool $ \tmpPl -> do c_cat <- newUtf8CString cat tmpPl c_hypos <- pgf_category_context (pgf p) c_cat if c_hypos == nullPtr - then return [] + then return Nothing else do n_hypos <- (#peek GuSeq, len) c_hypos - peekHypos (c_hypos `plusPtr` (#offset GuSeq, data)) 0 n_hypos + hypos <- peekHypos (c_hypos `plusPtr` (#offset GuSeq, data)) 0 n_hypos + return (Just hypos) where peekHypos :: Ptr a -> Int -> Int -> IO [Hypo] peekHypos c_hypo i n @@ -1152,8 +1551,8 @@ categoryContext p cat = toBindType (#const PGF_BIND_TYPE_EXPLICIT) = Explicit toBindType (#const PGF_BIND_TYPE_IMPLICIT) = Implicit -categoryProb :: PGF -> Cat -> Float -categoryProb p cat = +categoryProbability :: PGF -> Cat -> Float +categoryProbability p cat = unsafePerformIO $ withGuPool $ \tmpPl -> do c_cat <- newUtf8CString cat tmpPl @@ -1164,7 +1563,7 @@ categoryProb p cat = ----------------------------------------------------------------------------- -- Helper functions -fromPgfExprEnum :: Ptr GuEnum -> ForeignPtr GuPool -> IO () -> IO [(Expr, Float)] +fromPgfExprEnum :: Ptr GuEnum -> ForeignPtr GuPool -> Touch -> IO [(Expr, Float)] fromPgfExprEnum enum fpl touch = do pgfExprProb <- alloca $ \ptr -> withForeignPtr fpl $ \pl -> @@ -1178,6 +1577,22 @@ fromPgfExprEnum enum fpl touch = prob <- (#peek PgfExprProb, prob) pgfExprProb return ((Expr expr touch,prob) : ts) +fromPgfTokenEnum :: Ptr GuEnum -> ForeignPtr GuPool -> IO [(String, Cat, Fun, Float)] +fromPgfTokenEnum enum fpl = + do pgfTokenProb <- alloca $ \ptr -> + withForeignPtr fpl $ \pl -> + do gu_enum_next enum ptr pl + peek ptr + if pgfTokenProb == nullPtr + then do finalizeForeignPtr fpl + return [] + else do tok <- (#peek PgfTokenProb, tok) pgfTokenProb >>= peekUtf8CString + cat <- (#peek PgfTokenProb, cat) pgfTokenProb >>= peekUtf8CString + fun <- (#peek PgfTokenProb, fun) pgfTokenProb >>= peekUtf8CString + prob <- (#peek PgfTokenProb, prob) pgfTokenProb + ts <- unsafeInterleaveIO (fromPgfTokenEnum enum fpl) + return ((tok,cat,fun,prob) : ts) + ----------------------------------------------------------------------- -- Exceptions @@ -1256,3 +1671,7 @@ capitalized' test s@(c:_) | test c = case span isSpace rest1 of (space,rest2) -> Just (name++space,rest2) capitalized' not s = Nothing + +tag i + | i < 0 = char 'r' <> int (negate i) + | otherwise = char 'n' <> int i diff --git a/src/runtime/haskell-bind/PGF2/Expr.hsc b/src/runtime/haskell-bind/PGF2/Expr.hsc index 096d15bfa..10db1291a 100644 --- a/src/runtime/haskell-bind/PGF2/Expr.hsc +++ b/src/runtime/haskell-bind/PGF2/Expr.hsc @@ -8,19 +8,13 @@ import Foreign.C import Data.IORef import PGF2.FFI --- | An data type that represents --- identifiers for functions and categories in PGF. -type CId = String - -wildCId = "_" :: CId - -type Cat = CId -- ^ Name of syntactic category -type Fun = CId -- ^ Name of function +type Cat = String -- ^ Name of syntactic category +type Fun = String -- ^ Name of function data BindType = Explicit | Implicit - deriving Show + deriving (Show, Eq, Ord) ----------------------------------------------------------------------------- -- Expressions @@ -43,7 +37,7 @@ instance Eq Expr where return (res /= 0) -- | Constructs an expression by lambda abstraction -mkAbs :: BindType -> CId -> Expr -> Expr +mkAbs :: BindType -> String -> Expr -> Expr mkAbs bind_type var (Expr body bodyTouch) = unsafePerformIO $ do exprPl <- gu_new_pool @@ -58,7 +52,7 @@ mkAbs bind_type var (Expr body bodyTouch) = Implicit -> (#const PGF_BIND_TYPE_IMPLICIT) -- | Decomposes an expression into an abstraction and a body -unAbs :: Expr -> Maybe (BindType, CId, Expr) +unAbs :: Expr -> Maybe (BindType, String, Expr) unAbs (Expr expr touch) = unsafePerformIO $ do c_abs <- pgf_expr_unabs expr @@ -103,6 +97,17 @@ unApp (Expr expr touch) = c_args <- peekArray (fromIntegral arity) (appl `plusPtr` (#offset PgfApplication, args)) return $ Just (fun, [Expr c_arg touch | c_arg <- c_args]) +-- | Decomposes an expression into an application of a function +unapply :: Expr -> (Expr,[Expr]) +unapply (Expr expr touch) = + unsafePerformIO $ + withGuPool $ \pl -> do + appl <- pgf_expr_unapply_ex expr pl + efun <- (#peek PgfApplication, efun) appl + arity <- (#peek PgfApplication, n_args) appl :: IO CInt + c_args <- peekArray (fromIntegral arity) (appl `plusPtr` (#offset PgfApplication, args)) + return (Expr efun touch, [Expr c_arg touch | c_arg <- c_args]) + -- | Constructs an expression from a string literal mkStr :: String -> Expr mkStr str = @@ -184,9 +189,6 @@ unMeta (Expr expr touch) = touch return (Just (fromIntegral (id :: CInt))) --- | this functions is only for backward compatibility with the old Haskell runtime -mkCId x = x - -- | parses a 'String' as an expression readExpr :: String -> Maybe Expr readExpr str = @@ -204,6 +206,22 @@ readExpr str = else do gu_pool_free exprPl return Nothing +pIdent :: ReadS String +pIdent str = + unsafePerformIO $ + withGuPool $ \tmpPl -> + do ref <- newIORef (str,str,str) + exn <- gu_new_exn tmpPl + c_fetch_char <- wrapParserGetc (fetch_char ref) + c_parser <- pgf_new_parser nullPtr c_fetch_char tmpPl tmpPl exn + c_ident <- pgf_expr_parser_ident c_parser + status <- gu_exn_is_raised exn + if (not status && c_ident /= nullPtr) + then do ident <- peekUtf8CString c_ident + (str,_,_) <- readIORef ref + return [(ident,str)] + else do return [] + pExpr :: ReadS Expr pExpr str = unsafePerformIO $ @@ -221,19 +239,19 @@ pExpr str = return [(Expr c_expr (touchForeignPtr exprFPl),str)] else do gu_pool_free exprPl return [] - where - fetch_char :: IORef (String,String,String) -> Ptr () -> (#type bool) -> Ptr GuExn -> IO (#type GuUCS) - fetch_char ref _ mark exn = do - (str1,str2,str3) <- readIORef ref - let str1' = if mark /= 0 - then str2 - else str1 - case str3 of - [] -> do writeIORef ref (str1',str3,[]) - gu_exn_raise exn gu_exn_type_GuEOF - return (-1) - (c:cs) -> do writeIORef ref (str1',str3,cs) - return ((fromIntegral . fromEnum) c) + +fetch_char :: IORef (String,String,String) -> Ptr () -> (#type bool) -> Ptr GuExn -> IO (#type GuUCS) +fetch_char ref _ mark exn = do + (str1,str2,str3) <- readIORef ref + let str1' = if mark /= 0 + then str2 + else str1 + case str3 of + [] -> do writeIORef ref (str1',str3,[]) + gu_exn_raise exn gu_exn_type_GuEOF + return (-1) + (c:cs) -> do writeIORef ref (str1',str3,cs) + return ((fromIntegral . fromEnum) c) foreign import ccall "pgf/expr.h pgf_new_parser" pgf_new_parser :: Ptr () -> (FunPtr ParserGetc) -> Ptr GuPool -> Ptr GuPool -> Ptr GuExn -> IO (Ptr PgfExprParser) @@ -241,16 +259,20 @@ foreign import ccall "pgf/expr.h pgf_new_parser" foreign import ccall "pgf/expr.h pgf_expr_parser_expr" pgf_expr_parser_expr :: Ptr PgfExprParser -> (#type bool) -> IO PgfExpr +foreign import ccall "pgf/expr.h pgf_expr_parser_ident" + pgf_expr_parser_ident :: Ptr PgfExprParser -> IO CString + type ParserGetc = Ptr () -> (#type bool) -> Ptr GuExn -> IO (#type GuUCS) foreign import ccall "wrapper" wrapParserGetc :: ParserGetc -> IO (FunPtr ParserGetc) + -- | renders an expression as a 'String'. The list -- of identifiers is the list of all free variables -- in the expression in order reverse to the order -- of binding. -showExpr :: [CId] -> Expr -> String +showExpr :: [String] -> Expr -> String showExpr scope e = unsafePerformIO $ withGuPool $ \tmpPl -> diff --git a/src/runtime/haskell-bind/PGF2/FFI.hsc b/src/runtime/haskell-bind/PGF2/FFI.hsc index 39b18fcf3..f5a30b006 100644 --- a/src/runtime/haskell-bind/PGF2/FFI.hsc +++ b/src/runtime/haskell-bind/PGF2/FFI.hsc @@ -15,12 +15,13 @@ import Control.Exception import GHC.Ptr import Data.Int import Data.Word +import qualified Data.Map as Map type Touch = IO () -- | An abstract data type representing multilingual grammar -- in Portable Grammar Format. -data PGF = PGF {pgf :: Ptr PgfPGF, touchPGF :: Touch} +data PGF = PGF {pgf :: Ptr PgfPGF, langs :: Map.Map String Concr, touchPGF :: Touch} data Concr = Concr {concr :: Ptr PgfConcr, touchConcr :: Touch} ------------------------------------------------------------------ @@ -32,7 +33,6 @@ data GuIn data GuOut data GuKind data GuType -data GuString data GuStringBuf data GuMap data GuMapItor @@ -266,7 +266,13 @@ foreign import ccall "pgf/pgf.h pgf_read" pgf_read :: CString -> Ptr GuPool -> Ptr GuExn -> IO (Ptr PgfPGF) foreign import ccall "pgf/pgf.h pgf_write" - pgf_write :: Ptr PgfPGF -> CString -> Ptr GuExn -> IO () + pgf_write :: Ptr PgfPGF -> CSizeT -> Ptr (Ptr PgfConcr) -> CString -> Ptr GuExn -> IO () + +foreign import ccall "pgf/writer.h pgf_concrete_save" + pgf_concrete_save :: Ptr PgfConcr -> CString -> Ptr GuExn -> IO () + +foreign import ccall "pgf/pgf.h pgf_have_same_abstract" + pgf_have_same_abstract :: Ptr PgfPGF -> Ptr PgfPGF -> (#type bool) foreign import ccall "pgf/pgf.h pgf_abstract_name" pgf_abstract_name :: Ptr PgfPGF -> IO CString @@ -292,6 +298,9 @@ foreign import ccall "pgf/pgf.h pgf_language_code" foreign import ccall "pgf/pgf.h pgf_iter_categories" pgf_iter_categories :: Ptr PgfPGF -> Ptr GuMapItor -> Ptr GuExn -> IO () +foreign import ccall "pgf/pgf.h pgf_concrete_fix_internals" + pgf_concrete_fix_internals :: Ptr PgfConcr -> IO () + foreign import ccall "pgf/pgf.h pgf_start_cat" pgf_start_cat :: Ptr PgfPGF -> Ptr GuPool -> IO PgfType @@ -340,7 +349,6 @@ foreign import ccall "pgf/pgf.h pgf_lzr_get_table" type SymbolTokenCallback = Ptr (Ptr PgfLinFuncs) -> CString -> IO () type PhraseCallback = Ptr (Ptr PgfLinFuncs) -> CString -> CInt -> CSizeT -> CString -> IO () type NonExistCallback = Ptr (Ptr PgfLinFuncs) -> IO () -type BindCallback = Ptr (Ptr PgfLinFuncs) -> IO () type MetaCallback = Ptr (Ptr PgfLinFuncs) -> CInt -> IO () foreign import ccall "wrapper" @@ -352,9 +360,6 @@ foreign import ccall "wrapper" foreign import ccall "wrapper" wrapSymbolNonExistCallback :: NonExistCallback -> IO (FunPtr NonExistCallback) -foreign import ccall "wrapper" - wrapSymbolBindCallback :: BindCallback -> IO (FunPtr BindCallback) - foreign import ccall "wrapper" wrapSymbolMetaCallback :: MetaCallback -> IO (FunPtr MetaCallback) @@ -364,6 +369,9 @@ foreign import ccall "pgf/pgf.h pgf_align_words" foreign import ccall "pgf/pgf.h pgf_parse_with_heuristics" pgf_parse_with_heuristics :: Ptr PgfConcr -> PgfType -> CString -> Double -> Ptr PgfCallbacksMap -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum) +foreign import ccall "pgf/pgf.h pgf_complete" + pgf_complete :: Ptr PgfConcr -> PgfType -> CString -> CString -> Ptr GuExn -> Ptr GuPool -> IO (Ptr GuEnum) + foreign import ccall "pgf/pgf.h pgf_lookup_sentence" pgf_lookup_sentence :: Ptr PgfConcr -> PgfType -> CString -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum) @@ -425,6 +433,9 @@ foreign import ccall "pgf/pgf.h pgf_expr_apply" foreign import ccall "pgf/pgf.h pgf_expr_unapply" pgf_expr_unapply :: PgfExpr -> Ptr GuPool -> IO (Ptr PgfApplication) + +foreign import ccall "pgf/pgf.h pgf_expr_unapply_ex" + pgf_expr_unapply_ex :: PgfExpr -> Ptr GuPool -> IO (Ptr PgfApplication) foreign import ccall "pgf/pgf.h pgf_expr_abs" pgf_expr_abs :: PgfBindType -> CString -> PgfExpr -> Ptr GuPool -> IO PgfExpr @@ -450,12 +461,12 @@ foreign import ccall "pgf/pgf.h pgf_expr_float" foreign import ccall "pgf/pgf.h pgf_expr_unlit" pgf_expr_unlit :: PgfExpr -> CInt -> IO (Ptr a) -foreign import ccall "pgf/expr.h pgf_expr_arity" - pgf_expr_arity :: PgfExpr -> IO CInt - foreign import ccall "pgf/expr.h pgf_expr_eq" pgf_expr_eq :: PgfExpr -> PgfExpr -> IO CInt +foreign import ccall "pgf/expr.h pgf_type_eq" + pgf_type_eq :: PgfType -> PgfType -> IO (#type bool) + foreign import ccall "pgf/expr.h pgf_expr_hash" pgf_expr_hash :: GuHash -> PgfExpr -> IO GuHash @@ -499,7 +510,7 @@ foreign import ccall "pgf/pgf.h pgf_generate_all" pgf_generate_all :: Ptr PgfPGF -> PgfType -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum) foreign import ccall "pgf/pgf.h pgf_print" - pgf_print :: Ptr PgfPGF -> Ptr GuOut -> Ptr GuExn -> IO () + pgf_print :: Ptr PgfPGF -> CSizeT -> Ptr (Ptr PgfConcr) -> Ptr GuOut -> Ptr GuExn -> IO () foreign import ccall "pgf/expr.h pgf_read_expr" pgf_read_expr :: Ptr GuIn -> Ptr GuPool -> Ptr GuPool -> Ptr GuExn -> IO PgfExpr diff --git a/src/runtime/haskell-bind/PGF2/Internal.hsc b/src/runtime/haskell-bind/PGF2/Internal.hsc index c4aef323a..d22df08b3 100644 --- a/src/runtime/haskell-bind/PGF2/Internal.hsc +++ b/src/runtime/haskell-bind/PGF2/Internal.hsc @@ -2,18 +2,25 @@ module PGF2.Internal(-- * Access the internal structures FId,isPredefFId, - FunId,Token,Production(..),PArg(..),Symbol(..),Literal(..), + FunId,SeqId,Token,Production(..),PArg(..),Symbol(..),Literal(..), globalFlags, abstrFlags, concrFlags, concrTotalCats, concrCategories, concrProductions, concrTotalFuns, concrFunction, concrTotalSeqs, concrSequence, - + + -- * Byte code + CodeLabel, Instr(..), IVal(..), TailInfo(..), + -- * Building new PGFs in memory - build, eAbs, eApp, eMeta, eFun, eVar, eTyped, eImplArg, dTyp, hypo, + build, Builder, B, + eAbs, eApp, eMeta, eFun, eVar, eLit, eTyped, eImplArg, dTyp, hypo, AbstrInfo, newAbstr, ConcrInfo, newConcr, newPGF, -- * Write an in-memory PGF to a file - writePGF + unionPGF, writePGF, writeConcr, + + -- * Predefined concrete categories + fidString, fidInt, fidFloat, fidVar, fidStart ) where #include @@ -29,7 +36,7 @@ import Data.IORef import Data.Maybe(fromMaybe) import Data.List(sortBy) import Control.Exception(Exception,throwIO) -import Control.Monad(foldM) +import Control.Monad(foldM,when) import qualified Data.Map as Map type Token = String @@ -50,7 +57,7 @@ data Production = PApply {-# UNPACK #-} !FunId [PArg] | PCoerce {-# UNPACK #-} !FId deriving (Eq,Ord,Show) -data PArg = PArg [FId] {-# UNPACK #-} !FId deriving (Eq,Ord,Show) +data PArg = PArg [(FId,FId)] {-# UNPACK #-} !FId deriving (Eq,Ord,Show) type FunId = Int type SeqId = Int data Literal = @@ -59,6 +66,42 @@ data Literal = | LFlt Double -- ^ a floating point constant deriving (Eq,Ord,Show) +type CodeLabel = Int + +data Instr + = CHECK_ARGS {-# UNPACK #-} !Int + | CASE Fun {-# UNPACK #-} !CodeLabel + | CASE_LIT Literal {-# UNPACK #-} !CodeLabel + | SAVE {-# UNPACK #-} !Int + | ALLOC {-# UNPACK #-} !Int + | PUT_CONSTR Fun + | PUT_CLOSURE {-# UNPACK #-} !CodeLabel + | PUT_LIT Literal + | SET IVal + | SET_PAD + | PUSH_FRAME + | PUSH IVal + | TUCK IVal {-# UNPACK #-} !Int + | EVAL IVal TailInfo + | DROP {-# UNPACK #-} !Int + | JUMP {-# UNPACK #-} !CodeLabel + | FAIL + | PUSH_ACCUM Literal + | POP_ACCUM + | ADD + +data IVal + = HEAP {-# UNPACK #-} !Int + | ARG_VAR {-# UNPACK #-} !Int + | FREE_VAR {-# UNPACK #-} !Int + | GLOBAL Fun + deriving Eq + +data TailInfo + = RecCall + | TailCall {-# UNPACK #-} !Int + | UpdateCall + ----------------------------------------------------------------------- -- Access the internal structures @@ -181,7 +224,7 @@ concrProductions c fid = unsafePerformIO $ do hypos <- peekSequence (deRef peekFId) (#size int) c_hypos c_ccat <- (#peek PgfPArg, ccat) ptr fid <- peekFId c_ccat - return (PArg hypos fid) + return (PArg [(fid,fid) | fid <- hypos] fid) peekFId c_ccat = do c_fid <- (#peek PgfCCat, fid) c_ccat @@ -197,6 +240,9 @@ concrTotalFuns c = unsafePerformIO $ do concrFunction :: Concr -> FunId -> (Fun,[SeqId]) concrFunction c funid = unsafePerformIO $ do c_cncfuns <- (#peek PgfConcr, cncfuns) (concr c) + c_len <- (#peek GuSeq, len) c_cncfuns + when (funid >= fromIntegral (c_len :: CSizeT)) $ + throwIO (PGFError ("Invalid concrete function: F"++show funid)) c_cncfun <- peek (c_cncfuns `plusPtr` ((#offset GuSeq, data)+funid*(#size PgfCncFun*))) c_absfun <- (#peek PgfCncFun, absfun) c_cncfun c_name <- (#peek PgfAbsFun, name) c_absfun @@ -220,6 +266,9 @@ concrTotalSeqs c = unsafePerformIO $ do concrSequence :: Concr -> SeqId -> [Symbol] concrSequence c seqid = unsafePerformIO $ do c_sequences <- (#peek PgfConcr, sequences) (concr c) + c_len <- (#peek GuSeq, len) c_sequences + when (seqid >= fromIntegral (c_len :: CSizeT)) $ + throwIO (PGFError ("Invalid concrete sequence: S"++show seqid)) let c_sequence = c_sequences `plusPtr` ((#offset GuSeq, data)+seqid*(#size PgfSequence)) c_syms <- (#peek PgfSequence, syms) c_sequence res <- peekSequence (deRef peekSymbol) (#size GuVariant) c_syms @@ -288,6 +337,9 @@ isPredefFId = (`elem` [fidString, fidInt, fidFloat, fidVar]) data Builder s = Builder (Ptr GuPool) Touch newtype B s a = B a +instance Functor (B s) where + fmap f (B x) = B (f x) + build :: (forall s . (?builder :: Builder s) => B s a) -> a build f = unsafePerformIO $ do @@ -376,6 +428,21 @@ eVar var = where (Builder pool touch) = ?builder +eLit :: (?builder :: Builder s) => Literal -> B s Expr +eLit value = + unsafePerformIO $ + alloca $ \pptr -> do + ptr <- gu_alloc_variant (#const PGF_EXPR_LIT) + (fromIntegral (#size PgfExprLit)) + (#const gu_alignof(PgfExprLit)) + pptr pool + c_value <- newLiteral value pool + (#poke PgfExprLit, lit) ptr c_value + e <- peek pptr + return (B (Expr e touch)) + where + (Builder pool touch) = ?builder + eTyped :: (?builder :: Builder s) => B s Expr -> B s Type -> B s Expr eTyped (B (Expr e _)) (B (Type ty _)) = unsafePerformIO $ @@ -405,7 +472,7 @@ eImplArg (B (Expr e _)) = where (Builder pool touch) = ?builder -hypo :: BindType -> CId -> B s Type -> (B s Hypo) +hypo :: BindType -> String -> B s Type -> (B s Hypo) hypo bind_type var (B ty) = B (bind_type,var,ty) dTyp :: (?builder :: Builder s) => [B s Hypo] -> Cat -> [B s Expr] -> B s Type @@ -450,14 +517,14 @@ data AbstrInfo = AbstrInfo (Ptr GuSeq) (Ptr GuSeq) (Map.Map String (Ptr PgfAbsCa newAbstr :: (?builder :: Builder s) => [(String,Literal)] -> [(Cat,[B s Hypo],Float)] -> [(Fun,B s Type,Int,Float)] -> - AbstrInfo + B s AbstrInfo newAbstr aflags cats funs = unsafePerformIO $ do c_aflags <- newFlags aflags pool (c_cats,abscats) <- newAbsCats (sortByFst3 cats) pool (c_funs,absfuns) <- newAbsFuns (sortByFst4 funs) pool c_abs_lin_fun <- newAbsLinFun c_non_lexical_buf <- gu_make_buf (#size PgfProductionIdxEntry) pool - return (AbstrInfo c_aflags c_cats abscats c_funs absfuns c_abs_lin_fun c_non_lexical_buf touch) + return (B (AbstrInfo c_aflags c_cats abscats c_funs absfuns c_abs_lin_fun c_non_lexical_buf touch)) where (Builder pool touch) = ?builder @@ -525,7 +592,7 @@ newAbstr aflags cats funs = unsafePerformIO $ do data ConcrInfo = ConcrInfo (Ptr GuSeq) (Ptr GuMap) (Ptr GuMap) (Ptr GuSeq) (Ptr GuSeq) (Ptr GuMap) (Ptr PgfConcr -> Ptr GuPool -> IO ()) CInt -newConcr :: (?builder :: Builder s) => AbstrInfo -> +newConcr :: (?builder :: Builder s) => B s AbstrInfo -> [(String,Literal)] -> -- ^ Concrete syntax flags [(String,String)] -> -- ^ Printnames [(FId,[FunId])] -> -- ^ Lindefs @@ -535,8 +602,8 @@ newConcr :: (?builder :: Builder s) => AbstrInfo -> [[Symbol]] -> -- ^ Sequences (must be sorted) [(Cat,FId,FId,[String])] -> -- ^ Concrete categories FId -> -- ^ The total count of the categories - ConcrInfo -newConcr (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _) cflags printnames lindefs linrefs prods cncfuns sequences cnccats total_cats = unsafePerformIO $ do + B s ConcrInfo +newConcr (B (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _)) cflags printnames lindefs linrefs prods cncfuns sequences cnccats total_cats = unsafePerformIO $ do c_cflags <- newFlags cflags pool c_printname <- newMap (#size GuString) gu_string_hasher newUtf8CString (#size GuString) (pokeString pool) @@ -553,12 +620,12 @@ newConcr (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _) cf mapM_ (addLinrefs c_ccats funs_ptr) linrefs mk_index <- foldM (addProductions c_ccats funs_ptr c_non_lexical_buf) (\concr pool -> return ()) prods c_cnccats <- newMap (#size GuString) gu_string_hasher newUtf8CString (#size PgfCncCat*) (pokeCncCat c_ccats) (map (\v@(k,_,_,_) -> (k,v)) cnccats) pool - return (ConcrInfo c_cflags c_printname c_ccats c_cncfuns c_seqs c_cnccats mk_index (fromIntegral total_cats)) + return (B (ConcrInfo c_cflags c_printname c_ccats c_cncfuns c_seqs c_cnccats mk_index (fromIntegral total_cats))) where (Builder pool touch) = ?builder - pokeCncFun seqs_ptr ptr cncfun = do - c_cncfun <- newCncFun absfuns nullPtr cncfun pool + pokeCncFun seqs_ptr ptr cncfun@(funid,_) = do + c_cncfun <- newCncFun absfuns seqs_ptr cncfun pool poke ptr c_cncfun pokeSequence c_seq syms = do @@ -583,7 +650,9 @@ newConcr (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _) cf (#poke PgfCCat, prods) c_ccat c_prods pokeProductions c_ccat (c_prods `plusPtr` (#offset GuSeq, data)) 0 (n_prods-1) mk_index prods where - pokeProductions c_ccat ptr top bot mk_index [] = return mk_index + pokeProductions c_ccat ptr top bot mk_index [] = do + (#poke PgfCCat, n_synprods) c_ccat (fromIntegral top :: CSizeT) + return mk_index pokeProductions c_ccat ptr top bot mk_index (prod:prods) = do (is_lexical,c_prod) <- newProduction c_ccats funs_ptr c_non_lexical_buf prod pool let mk_index' = \concr pool -> do pgf_parser_index concr c_ccat c_prod is_lexical pool @@ -596,27 +665,29 @@ newConcr (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _) cf pokeProductions c_ccat ptr top (bot-1) mk_index' prods pokeRefDefFunId funs_ptr ptr funid = do - let c_fun = funs_ptr `plusPtr` (funid * (#size PgfCncFun)) + c_fun <- peek (funs_ptr `plusPtr` (funid * (#size PgfCncFun*))) (#poke PgfCncFun, absfun) c_fun c_abs_lin_fun poke ptr c_fun pokeCncCat c_ccats ptr (name,start,end,labels) = do let n_lins = fromIntegral (length labels) :: CSizeT - c_cnccat <- gu_malloc_aligned pool + c_cnccat <- gu_malloc_aligned pool ((#size PgfCncCat)+n_lins*(#size GuString)) (#const gu_flex_alignof(PgfCncCat)) case Map.lookup name abscats of Just c_abscat -> (#poke PgfCncCat, abscat) c_cnccat c_abscat Nothing -> throwIO (PGFError ("The category "++name++" is not in the abstract syntax")) - c_ccats <- newSequence (#size PgfCCat*) pokeFId [start..end] pool + c_ccats <- newSequence (#size PgfCCat*) (pokeFId c_cnccat) [start..end] pool (#poke PgfCncCat, cats) c_cnccat c_ccats + (#poke PgfCncCat, n_lins) c_cnccat n_lins pokeLabels (c_cnccat `plusPtr` (#offset PgfCncCat, labels)) labels poke ptr c_cnccat where - pokeFId ptr fid = do + pokeFId c_cnccat ptr fid = do c_ccat <- getCCat c_ccats fid pool + (#poke PgfCCat, cnccat) c_ccat c_cnccat poke ptr c_ccat - + pokeLabels ptr [] = return [] pokeLabels ptr (l:ls) = do c_l <- newUtf8CString l pool @@ -626,10 +697,10 @@ newConcr (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _) cf newPGF :: (?builder :: Builder s) => [(String,Literal)] -> AbsName -> - AbstrInfo -> - [(ConcName,ConcrInfo)] -> + B s AbstrInfo -> + [(ConcName,B s ConcrInfo)] -> B s PGF -newPGF gflags absname (AbstrInfo c_aflags c_cats _ c_funs _ c_abs_lin_fun _ _) concrs = +newPGF gflags absname (B (AbstrInfo c_aflags c_cats _ c_funs _ c_abs_lin_fun _ _)) concrs = unsafePerformIO $ do ptr <- gu_malloc_aligned pool (#size PgfPGF) @@ -637,7 +708,8 @@ newPGF gflags absname (AbstrInfo c_aflags c_cats _ c_funs _ c_abs_lin_fun _ _) c c_gflags <- newFlags gflags pool c_absname <- newUtf8CString absname pool let c_abstr = ptr `plusPtr` (#offset PgfPGF, abstract) - c_concrs <- newSequence (#size PgfConcr) (pokeConcr c_abstr) concrs pool + c_concrs <- gu_make_seq (#size PgfConcr) (fromIntegral (length concrs)) pool + langs <- pokeConcrs c_abstr (c_concrs `plusPtr` (#offset GuSeq, data)) Map.empty concrs (#poke PgfPGF, major_version) ptr (2 :: (#type uint16_t)) (#poke PgfPGF, minor_version) ptr (0 :: (#type uint16_t)) (#poke PgfPGF, gflags) ptr c_gflags @@ -648,11 +720,18 @@ newPGF gflags absname (AbstrInfo c_aflags c_cats _ c_funs _ c_abs_lin_fun _ _) c (#poke PgfPGF, abstract.abs_lin_fun) ptr c_abs_lin_fun (#poke PgfPGF, concretes) ptr c_concrs (#poke PgfPGF, pool) ptr pool - return (B (PGF ptr touch)) + return (B (PGF ptr langs touch)) where (Builder pool touch) = ?builder - pokeConcr c_abstr ptr (name, ConcrInfo c_cflags c_printnames c_ccats c_cncfuns c_seqs c_cnccats mk_index c_total_cats) = do + pokeConcrs c_abstr ptr langs [] = return langs + pokeConcrs c_abstr ptr langs ((name, B info):xs) = do + pokeConcr c_abstr ptr name info + pokeConcrs c_abstr (ptr `plusPtr` (fromIntegral (#size PgfConcr))) + (Map.insert name (Concr ptr touch) langs) + xs + + pokeConcr c_abstr ptr name (ConcrInfo c_cflags c_printnames c_ccats c_cncfuns c_seqs c_cnccats mk_index c_total_cats) = do c_name <- newUtf8CString name pool c_fun_indices <- gu_make_map (#size GuString) gu_string_hasher (#size PgfCncOverloadMap*) gu_null_struct @@ -674,7 +753,9 @@ newPGF gflags absname (AbstrInfo c_aflags c_cats _ c_funs _ c_abs_lin_fun _ _) c (#poke PgfConcr, cnccats) ptr c_cnccats (#poke PgfConcr, total_cats) ptr c_total_cats (#poke PgfConcr, pool) ptr nullPtr + mk_index ptr pool + pgf_concrete_fix_internals ptr newFlags :: [(String,Literal)] -> Ptr GuPool -> IO (Ptr GuSeq) @@ -715,15 +796,15 @@ newLiteral (LFlt val) pool = newProduction :: Ptr GuMap -> Ptr PgfCncFun -> Ptr GuBuf -> Production -> Ptr GuPool -> IO ((#type bool), GuVariant) -newProduction c_ccats funs_ptr c_non_lexical_buf (PApply fun_id args) pool = +newProduction c_ccats funs_ptr c_non_lexical_buf (PApply funid args) pool = alloca $ \pptr -> do - let c_fun = funs_ptr `plusPtr` (fun_id * (#size PgfCncFun)) + c_fun <- peek (funs_ptr `plusPtr` (funid * (#size PgfCncFun*))) c_args <- newSequence (#size PgfPArg) pokePArg args pool ptr <- gu_alloc_variant (#const PGF_PRODUCTION_APPLY) (fromIntegral (#size PgfProductionApply)) (#const gu_alignof(PgfProductionApply)) pptr pool - (#poke PgfProductionApply, fun) ptr c_fun + (#poke PgfProductionApply, fun) ptr (c_fun :: Ptr PgfCncFun) (#poke PgfProductionApply, args) ptr c_args is_lexical <- pgf_production_is_lexical ptr c_non_lexical_buf pool c_prod <- peek pptr @@ -732,7 +813,7 @@ newProduction c_ccats funs_ptr c_non_lexical_buf (PApply fun_id args) pool = pokePArg ptr (PArg hypos ccat) = do c_ccat <- getCCat c_ccats ccat pool (#poke PgfPArg, ccat) ptr c_ccat - c_hypos <- newSequence (#size PgfCCat*) pokeCCat hypos pool + c_hypos <- newSequence (#size PgfCCat*) pokeCCat (map snd hypos) pool (#poke PgfPArg, hypos) ptr c_hypos pokeCCat ptr ccat = do @@ -907,12 +988,18 @@ newMap key_size hasher newKey elem_size pokeElem values pool = do insert map values pool +unionPGF :: PGF -> PGF -> Maybe PGF +unionPGF one@(PGF ptr1 langs1 touch1) two@(PGF ptr2 langs2 touch2) + | pgf_have_same_abstract ptr1 ptr2 /= 0 = Just (PGF ptr1 (Map.union langs1 langs2) (touch1 >> touch2)) + | otherwise = Nothing + writePGF :: FilePath -> PGF -> IO () writePGF fpath p = do pool <- gu_new_pool exn <- gu_new_exn pool - withCString fpath $ \c_fpath -> - pgf_write (pgf p) c_fpath exn + withArrayLen ((map concr . Map.elems . languages) p) $ \n_concrs concrs -> + withCString fpath $ \c_fpath -> + pgf_write (pgf p) (fromIntegral n_concrs) concrs c_fpath exn touchPGF p failed <- gu_exn_is_raised exn if failed @@ -927,6 +1014,26 @@ writePGF fpath p = do else do gu_pool_free pool return () +writeConcr :: FilePath -> Concr -> IO () +writeConcr fpath c = do + pool <- gu_new_pool + exn <- gu_new_exn pool + withCString fpath $ \c_fpath -> + pgf_concrete_save (concr c) c_fpath exn + touchConcr c + failed <- gu_exn_is_raised exn + if failed + then do is_errno <- gu_exn_caught exn gu_exn_type_GuErrno + if is_errno + then do perrno <- (#peek GuExn, data.data) exn + errno <- peek perrno + gu_pool_free pool + ioError (errnoToIOError "writeConcr" (Errno errno) Nothing (Just fpath)) + else do gu_pool_free pool + throwIO (PGFError "The grammar cannot be stored") + else do gu_pool_free pool + return () + sortByFst = sortBy (\(x,_) (y,_) -> compare x y) sortByFst3 = sortBy (\(x,_,_) (y,_,_) -> compare x y) sortByFst4 = sortBy (\(x,_,_,_) (y,_,_,_) -> compare x y) diff --git a/src/runtime/haskell-bind/PGF2/Type.hsc b/src/runtime/haskell-bind/PGF2/Type.hsc index 57e7eeaa9..f39246183 100644 --- a/src/runtime/haskell-bind/PGF2/Type.hsc +++ b/src/runtime/haskell-bind/PGF2/Type.hsc @@ -17,11 +17,18 @@ import PGF2.FFI data Type = Type {typ :: PgfExpr, touchType :: Touch} -- | 'Hypo' represents a hypothesis in a type i.e. in the type A -> B, A is the hypothesis -type Hypo = (BindType,CId,Type) +type Hypo = (BindType,String,Type) instance Show Type where show = showType [] +instance Eq Type where + (Type ty1 ty1_touch) == (Type ty2 ty2_touch) = + unsafePerformIO $ do + res <- pgf_type_eq ty1 ty2 + ty1_touch >> ty2_touch + return (res /= 0) + -- | parses a 'String' as a type readType :: String -> Maybe Type readType str = @@ -43,7 +50,7 @@ readType str = -- of identifiers is the list of all free variables -- in the type in order reverse to the order -- of binding. -showType :: [CId] -> Type -> String +showType :: [String] -> Type -> String showType scope (Type ty touch) = unsafePerformIO $ withGuPool $ \tmpPl -> @@ -59,7 +66,7 @@ showType scope (Type ty touch) = -- a list of arguments for the category. The operation -- @mkType [h_1,...,h_n] C [e_1,...,e_m]@ will create -- @h_1 -> ... -> h_n -> C e_1 ... e_m@ -mkType :: [Hypo] -> CId -> [Expr] -> Type +mkType :: [Hypo] -> String -> [Expr] -> Type mkType hypos cat exprs = unsafePerformIO $ do typPl <- gu_new_pool let n_exprs = fromIntegral (length exprs) :: CSizeT @@ -94,7 +101,7 @@ touchHypo (_,_,ty) = touchType ty -- | Decomposes a type into a list of hypothesises, a category and -- a list of arguments for the category. -unType :: Type -> ([Hypo],CId,[Expr]) +unType :: Type -> ([Hypo],String,[Expr]) unType (Type c_type touch) = unsafePerformIO $ do cid <- (#peek PgfType, cid) c_type >>= peekUtf8CString c_hypos <- (#peek PgfType, hypos) c_type @@ -127,7 +134,7 @@ unType (Type c_type touch) = unsafePerformIO $ do -- of identifiers is the list of all free variables -- in the type in order reverse to the order -- of binding. -showContext :: [CId] -> [Hypo] -> String +showContext :: [String] -> [Hypo] -> String showContext scope hypos = unsafePerformIO $ withGuPool $ \tmpPl -> diff --git a/src/runtime/haskell-bind/pgf2.cabal b/src/runtime/haskell-bind/pgf2.cabal index 4022f0b9b..09b7cbb46 100644 --- a/src/runtime/haskell-bind/pgf2.cabal +++ b/src/runtime/haskell-bind/pgf2.cabal @@ -1,31 +1,31 @@ name: pgf2 version: 0.1.0.0 --- synopsis: --- description: +-- synopsis: +-- description: homepage: http://www.grammaticalframework.org license: LGPL-3 --license-file: LICENSE author: Krasimir Angelov, Inari -maintainer: --- copyright: +maintainer: +-- copyright: category: Language build-type: Simple extra-source-files: README cabal-version: >=1.10 library - exposed-modules: PGF2, PGF2.Internal, SG + exposed-modules: PGF2, PGF2.Internal, SG, -- backwards compatibility API: - --, PGF, PGF.Internal + PGF, PGF.Internal other-modules: PGF2.FFI, PGF2.Expr, PGF2.Type, SG.FFI - build-depends: base >=4.3, - containers, pretty - -- hs-source-dirs: + build-depends: base >=4.3, containers, pretty, array + -- hs-source-dirs: default-language: Haskell2010 build-tools: hsc2hs extra-libraries: sg pgf gu cc-options: -std=c99 + default-language: Haskell2010 c-sources: utils.c executable pgf-shell diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs index 3cd417c73..235d662d5 100644 --- a/src/runtime/haskell/PGF.hs +++ b/src/runtime/haskell/PGF.hs @@ -16,8 +16,7 @@ module PGF( -- * PGF PGF, - readPGF, - parsePGF, + readPGF, showPGF, -- * Identifiers CId, mkCId, wildCId, @@ -54,12 +53,14 @@ module PGF( mkDouble, unDouble, mkFloat, unFloat, mkMeta, unMeta, + exprSubstitute, + -- extra pExpr, exprSize, exprFunctions, -- * Operations -- ** Linearization - linearize, linearizeAllLang, linearizeAll, bracketedLinearize, bracketedLinearizeAll, tabularLinearizes, + linearize, linearizeAllLang, linearizeAll, bracketedLinearize, tabularLinearizes, groupResults, -- lins of trees by language, removing duplicates showPrintName, @@ -166,17 +167,18 @@ import PGF.Macros import PGF.Expr (Tree) import PGF.Morphology import PGF.Data -import PGF.Binary () +import PGF.Binary() import qualified PGF.Forest as Forest import qualified PGF.Parse as Parse import PGF.Utilities(replace) +import PGF.Printer +import Text.PrettyPrint --import Data.Char import qualified Data.Map as Map --import qualified Data.IntMap as IntMap --import Data.Maybe import Data.Binary -import Data.ByteString.Lazy (ByteString) import Data.List(mapAccumL) --import System.Random (newStdGen) --import Control.Monad @@ -192,11 +194,6 @@ import Text.PrettyPrint -- > $ gf -make readPGF :: FilePath -> IO PGF --- | Like @readPGF@ but you have the manage file-handling. --- --- @since 3.9.1 -parsePGF :: ByteString -> PGF - -- | Tries to parse the given string in the specified language -- and to produce abstract syntax expression. parse :: PGF -> Language -> Type -> String -> [Tree] @@ -261,9 +258,9 @@ functionType :: PGF -> CId -> Maybe Type -- Implementation --------------------------------------------------- -readPGF = decodeFile +readPGF f = decodeFile f -parsePGF = decode +showPGF pgf = render (ppPGF pgf) parse pgf lang typ s = case parse_ pgf lang typ (Just 4) s of diff --git a/src/runtime/haskell/PGF/ByteCode.hs b/src/runtime/haskell/PGF/ByteCode.hs index ef21ab229..579d6b3bb 100644 --- a/src/runtime/haskell/PGF/ByteCode.hs +++ b/src/runtime/haskell/PGF/ByteCode.hs @@ -2,7 +2,7 @@ module PGF.ByteCode(Literal(..), CodeLabel, Instr(..), IVal(..), TailInfo(..), ppLit, ppCode, ppInstr ) where -import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint + import PGF.CId import Text.PrettyPrint diff --git a/src/runtime/haskell/PGF/Data.hs b/src/runtime/haskell/PGF/Data.hs index e85ee5aa2..8c8a98fb0 100644 --- a/src/runtime/haskell/PGF/Data.hs +++ b/src/runtime/haskell/PGF/Data.hs @@ -74,7 +74,7 @@ data Production deriving (Eq,Ord,Show) data PArg = PArg [(FId,FId)] {-# UNPACK #-} !FId deriving (Eq,Ord,Show) data CncCat = CncCat {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !(Array LIndex String) -data CncFun = CncFun [CId] {-# UNPACK #-} !(UArray LIndex SeqId) deriving (Eq,Ord,Show) +data CncFun = CncFun CId {-# UNPACK #-} !(UArray LIndex SeqId) deriving (Eq,Ord,Show) type Sequence = Array DotPos Symbol type FunId = Int type SeqId = Int @@ -93,14 +93,6 @@ msgUnionPGF one two = case absname one of _ -> (two, -- abstracts don't match, discard the old one -- error msg in Importing.ioUnionPGF Just "Abstract changed, previous concretes discarded.") -emptyPGF :: PGF -emptyPGF = PGF { - gflags = Map.empty, - absname = wildCId, - abstract = error "empty grammar, no abstract", - concretes = Map.empty - } - -- sameness of function type signatures, checked when importing a new concrete in env haveSameFunsPGF :: PGF -> PGF -> Bool haveSameFunsPGF one two = diff --git a/src/runtime/haskell/PGF/Expr.hs b/src/runtime/haskell/PGF/Expr.hs index d015f18e0..7bd3d88ec 100644 --- a/src/runtime/haskell/PGF/Expr.hs +++ b/src/runtime/haskell/PGF/Expr.hs @@ -8,6 +8,7 @@ module PGF.Expr(Tree, BindType(..), Expr(..), Literal(..), Patt(..), Equation(.. mkDouble, unDouble, mkFloat, unFloat, mkMeta, unMeta, + exprSubstitute, normalForm, @@ -169,6 +170,16 @@ unMeta (ETyped e ty) = unMeta e unMeta (EImplArg e) = unMeta e unMeta _ = Nothing +exprSubstitute :: Expr -> [Expr] -> Expr +exprSubstitute e es = + case e of + EAbs b x e -> EAbs b x (exprSubstitute e es) + EApp e1 e2 -> EApp (exprSubstitute e1 es) (exprSubstitute e2 es) + ELit l -> ELit l + EMeta i -> es !! i + EFun x -> EFun x + + ----------------------------------------------------- -- Parsing ----------------------------------------------------- diff --git a/src/runtime/haskell/PGF/Forest.hs b/src/runtime/haskell/PGF/Forest.hs index f25bc05d7..2a680b7c9 100644 --- a/src/runtime/haskell/PGF/Forest.hs +++ b/src/runtime/haskell/PGF/Forest.hs @@ -71,10 +71,10 @@ bracketedTokn dp f@(Forest abs cnc forest root) = in (ct,fid',fun,es,(map getVar hypos,lin)) Nothing -> error ("wrong forest id " ++ show fid) where - descend forest (PApply funid args) = let (CncFun pfuns _lins) = cncfuns cnc ! funid - cat = case pfuns of - [] -> wildCId - (pfun:_) -> case Map.lookup pfun (funs abs) of + descend forest (PApply funid args) = let (CncFun fun _lins) = cncfuns cnc ! funid + cat = case isLindefCId fun of + Just cat -> cat + Nothing -> case Map.lookup fun (funs abs) of Just (DTyp _ cat _,_,_,_) -> cat largs = map (render forest) args ltable = mkLinTable cnc isTrusted [] funid largs @@ -103,6 +103,14 @@ bracketedTokn dp f@(Forest abs cnc forest root) = descend (PCoerce fid) = trustedSpots parents' (PArg [] fid) descend (PConst c e _) = IntSet.empty +isLindefCId id + | take l s == lindef = Just (mkCId (drop l s)) + | otherwise = Nothing + where + s = showCId id + lindef = "lindef " + l = length lindef + -- | This function extracts the list of all completed parse trees -- that spans the whole input consumed so far. The trees are also -- limited by the category specified, which is usually @@ -124,13 +132,13 @@ getAbsTrees (Forest abs cnc forest root) arg@(PArg _ fid) ty dp = | otherwise = do fid0 <- get put fid x <- foldForest (\funid args trees -> - do let CncFun fns _lins = cncfuns cnc ! funid - case fns of - [] -> do arg <- go (Set.insert fid rec_) scope mb_tty (head args) + do let CncFun fn _lins = cncfuns cnc ! funid + case isLindefCId fn of + Just _ -> do arg <- go (Set.insert fid rec_) scope mb_tty (head args) return (mkAbs arg) - fns -> do ty_fn <- lookupFunType (head fns) + Nothing -> do ty_fn <- lookupFunType fn (e,tty0) <- foldM (\(e1,tty) arg -> goArg (Set.insert fid rec_) scope fid e1 arg tty) - (EFun (head fns),TTyp [] ty_fn) args + (EFun fn,TTyp [] ty_fn) args case mb_tty of Just tty -> do i <- newGuardedMeta e eqType scope (scopeSize scope) i tty tty0 diff --git a/src/runtime/haskell/PGF/Internal.hs b/src/runtime/haskell/PGF/Internal.hs index 3b252a36b..57c8a9fe1 100644 --- a/src/runtime/haskell/PGF/Internal.hs +++ b/src/runtime/haskell/PGF/Internal.hs @@ -1,19 +1,169 @@ {-# OPTIONS_HADDOCK hide #-} +{-# LANGUAGE ImplicitParams, RankNTypes #-} ------------------------------------------------- -- | -- Stability : unstable -- ------------------------------------------------- -module PGF.Internal(module Internal) where -import PGF.Binary as Internal -import PGF.Data as Internal -import PGF.Macros as Internal -import PGF.Optimize as Internal -import PGF.Printer as Internal -import PGF.Utilities as Internal -import PGF.ByteCode as Internal +module PGF.Internal(CId,Language,PGF, + Concr,lookConcr, + FId,isPredefFId, + FunId,SeqId,LIndex,Token, + Production(..),PArg(..),Symbol(..),Literal(..),BindType(..),PGF.Internal.Sequence, + globalFlags, abstrFlags, concrFlags, + concrTotalCats, concrCategories, concrProductions, + concrTotalFuns, concrFunction, + concrTotalSeqs, concrSequence, -import Data.Binary as Internal -import Data.Binary.Get as Internal -import Data.Binary.IEEE754 as Internal -import Data.Binary.Put as Internal + CodeLabel, Instr(..), IVal(..), TailInfo(..), + + Builder, B, build, + eAbs, eApp, eMeta, eFun, eVar, eLit, eTyped, eImplArg, dTyp, hypo, + AbstrInfo, newAbstr, ConcrInfo, newConcr, newPGF, + dTyp, hypo, + + fidString, fidInt, fidFloat, fidVar, fidStart, + + ppFunId, ppSeqId, ppFId, ppMeta, ppLit, PGF.Internal.ppSeq + ) where + +import PGF.Data +import PGF.Macros +import PGF.Printer +import PGF.ByteCode +import qualified Data.Map as Map +import qualified Data.IntMap as IntMap +import qualified Data.Set as Set +import Data.Array.IArray +import Text.PrettyPrint + +globalFlags pgf = gflags pgf +abstrFlags pgf = aflags (abstract pgf) +concrFlags concr = cflags concr + +concrTotalCats = totalCats + +concrCategories :: Concr -> [(CId,FId,FId,[String])] +concrCategories c = [(cat,start,end,elems lbls) | (cat,CncCat start end lbls) <- Map.toList (cnccats c)] + +concrTotalFuns c = + let (s,e) = bounds (cncfuns c) + in e-s+1 + +concrFunction :: Concr -> FunId -> (CId,[SeqId]) +concrFunction c funid = + let CncFun fun lins = cncfuns c ! funid + in (fun,elems lins) + +concrTotalSeqs :: Concr -> SeqId +concrTotalSeqs c = + let (s,e) = bounds (sequences c) + in e-s+1 + +type Sequence = [Symbol] + +concrSequence :: Concr -> SeqId -> [Symbol] +concrSequence c seqid = elems (sequences c ! seqid) + +concrProductions :: Concr -> FId -> [Production] +concrProductions c fid = + case IntMap.lookup fid (productions c) of + Just set -> Set.toList set + Nothing -> [] + + +data Builder s +newtype B s a = B a + +build :: (forall s . (?builder :: Builder s) => B s a) -> a +build x = let ?builder = undefined + in case x of + B x -> x + +eAbs :: (?builder :: Builder s) => BindType -> CId -> B s Expr -> B s Expr +eAbs bind_type var (B body) = B (EAbs bind_type var body) + +eApp :: (?builder :: Builder s) => B s Expr -> B s Expr -> B s Expr +eApp (B f) (B x) = B (EApp f x) + +eMeta :: (?builder :: Builder s) => Int -> B s Expr +eMeta i = B (EMeta i) + +eFun :: (?builder :: Builder s) => CId -> B s Expr +eFun f = B (EFun f) + +eVar :: (?builder :: Builder s) => Int -> B s Expr +eVar i = B (EVar i) + +eLit :: (?builder :: Builder s) => Literal -> B s Expr +eLit l = B (ELit l) + +eTyped :: (?builder :: Builder s) => B s Expr -> B s Type -> B s Expr +eTyped (B e) (B ty) = B (ETyped e ty) + +eImplArg :: (?builder :: Builder s) => B s Expr -> B s Expr +eImplArg (B e) = B (EImplArg e) + +hypo :: BindType -> CId -> B s Type -> (B s Hypo) +hypo bind_type var (B ty) = B (bind_type,var,ty) + +dTyp :: (?builder :: Builder s) => [B s Hypo] -> CId -> [B s Expr] -> B s Type +dTyp hypos cat es = B (DTyp [hypo | B hypo <- hypos] cat [e | B e <- es]) + + +type AbstrInfo = Abstr + +newAbstr :: (?builder :: Builder s) => [(CId,Literal)] -> + [(CId,[B s Hypo],Float)] -> + [(CId,B s Type,Int,Float)] -> + B s AbstrInfo +newAbstr aflags cats funs = B (Abstr (Map.fromList aflags) + (Map.fromList [(fun,(ty,arity,Nothing,realToFrac prob)) | (fun,B ty,arity,prob) <- funs]) + (Map.fromList [(cat,([hypo | B hypo <- hypos],[],realToFrac prob)) | (cat,hypos,prob) <- cats])) + +type ConcrInfo = Concr + +newConcr :: (?builder :: Builder s) => B s AbstrInfo -> + [(CId,Literal)] -> -- ^ Concrete syntax flags + [(CId,String)] -> -- ^ Printnames + [(FId,[FunId])] -> -- ^ Lindefs + [(FId,[FunId])] -> -- ^ Linrefs + [(FId,[Production])] -> -- ^ Productions + [(CId,[SeqId])] -> -- ^ Concrete functions (must be sorted by Fun) + [[Symbol]] -> -- ^ Sequences (must be sorted) + [(CId,FId,FId,[String])] -> -- ^ Concrete categories + FId -> -- ^ The total count of the categories + B s ConcrInfo +newConcr _ cflags printnames lindefs linrefs productions cncfuns sequences cnccats totalCats = + B (Concr {cflags = Map.fromList cflags + ,printnames = Map.fromList printnames + ,lindefs = IntMap.fromList lindefs + ,linrefs = IntMap.fromList linrefs + ,productions = IntMap.fromList [(fid,Set.fromList prods) | (fid,prods) <- productions] + ,cncfuns = mkArray [CncFun fun (mkArray lins) | (fun,lins) <- cncfuns] + ,sequences = mkArray (map mkArray sequences) + ,cnccats = Map.fromList [(cat,CncCat s e (mkArray lbls)) | (cat,s,e,lbls) <- cnccats] + ,totalCats = totalCats + }) + {- + pproductions :: IntMap.IntMap (Set.Set Production), -- productions needed for parsing + lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)), -- productions needed for linearization + lexicon :: IntMap.IntMap (IntMap.IntMap (TMap.TrieMap Token IntSet.IntSet)), +-} + +newPGF :: (?builder :: Builder s) => [(CId,Literal)] -> + CId -> + B s AbstrInfo -> + [(CId,B s ConcrInfo)] -> + B s PGF +newPGF gflags absname (B abstract) concretes = + B (PGF {gflags = Map.fromList gflags + ,absname = absname + ,abstract = abstract + ,concretes = Map.fromList [(cname,concr) | (cname,B concr) <- concretes] + }) + + +ppSeq (seqid,seq) = PGF.Printer.ppSeq (seqid,mkArray seq) + +mkArray l = listArray (0,length l-1) l diff --git a/src/runtime/haskell/PGF/Linearize.hs b/src/runtime/haskell/PGF/Linearize.hs index 5fdb186c1..e3e8d92db 100644 --- a/src/runtime/haskell/PGF/Linearize.hs +++ b/src/runtime/haskell/PGF/Linearize.hs @@ -4,7 +4,6 @@ module PGF.Linearize , linearizeAll , linearizeAllLang , bracketedLinearize - , bracketedLinearizeAll , tabularLinearizes ) where @@ -48,12 +47,6 @@ bracketedLinearize pgf lang = head . map (snd . untokn Nothing . firstLin cnc) . head [] = [] head (bs:bss) = bs --- | Linearizes given expression as a bracketed string in the language -bracketedLinearizeAll :: PGF -> Language -> Tree -> [[BracketedString]] -bracketedLinearizeAll pgf lang = map (snd . untokn Nothing . firstLin cnc) . linTree pgf cnc - where - cnc = lookMap (error "no lang") lang (concretes pgf) - firstLin cnc arg@(ct@(cat,n_fid),fid,fun,es,(xs,lin)) = case IntMap.lookup fid (linrefs cnc) of Just (funid:_) -> snd (mkLinTable cnc (const True) [] funid [arg]) ! 0 diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs index 3fc7a5804..de175616c 100644 --- a/src/runtime/haskell/PGF/Macros.hs +++ b/src/runtime/haskell/PGF/Macros.hs @@ -1,5 +1,4 @@ module PGF.Macros where -import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint import PGF.CId import PGF.Data diff --git a/src/runtime/haskell/PGF/Morphology.hs b/src/runtime/haskell/PGF/Morphology.hs index 9907d9fa6..2da6da44e 100644 --- a/src/runtime/haskell/PGF/Morphology.hs +++ b/src/runtime/haskell/PGF/Morphology.hs @@ -31,8 +31,7 @@ collectWords pinfo = Map.fromListWith (++) [(t, [(fun,lbls ! l)]) | (CncCat s e lbls) <- Map.elems (cnccats pinfo) , fid <- [s..e] , PApply funid _ <- maybe [] Set.toList (IntMap.lookup fid (productions pinfo)) - , let CncFun funs lins = cncfuns pinfo ! funid - , fun <- funs + , let CncFun fun lins = cncfuns pinfo ! funid , (l,seqid) <- assocs lins , sym <- elems (sequences pinfo ! seqid) , t <- sym2tokns sym] diff --git a/src/runtime/haskell/PGF/OldBinary.hs b/src/runtime/haskell/PGF/OldBinary.hs index 6acc18895..c727589f5 100644 --- a/src/runtime/haskell/PGF/OldBinary.hs +++ b/src/runtime/haskell/PGF/OldBinary.hs @@ -60,7 +60,7 @@ getConcr = cnccats <- getMap getCId getCncCat totalCats <- get let rseq = listToArray [SymCat 0 0] - rfun = CncFun [mkCId "linref"] (listToArray [scnt]) + rfun = CncFun (mkCId "linref") (listToArray [scnt]) linrefs = IntMap.fromList [(i,[fcnt])|i<-[0..totalCats-1]] return (Concr{ cflags=cflags, printnames=printnames , sequences=toArray (scnt+1,seqs++[rseq]) @@ -110,7 +110,7 @@ getBindType = 1 -> return Implicit _ -> decodingError "getBindType" -getCncFun = liftM2 CncFun (fmap (:[]) getCId) (getArray get) +getCncFun = liftM2 CncFun getCId (getArray get) getCncCat = liftM3 CncCat get get (getArray get) diff --git a/src/runtime/haskell/PGF/Optimize.hs b/src/runtime/haskell/PGF/Optimize.hs index 0573c5bf4..8739c8665 100644 --- a/src/runtime/haskell/PGF/Optimize.hs +++ b/src/runtime/haskell/PGF/Optimize.hs @@ -21,7 +21,6 @@ import qualified Data.IntMap as IntMap import qualified PGF.TrieMap as TrieMap import qualified Data.List as List import Control.Monad.ST -import Debug.Trace optimizePGF :: PGF -> PGF optimizePGF pgf = pgf{concretes=fmap (updateConcrete (abstract pgf) . @@ -179,26 +178,26 @@ topDownFilter startCat cnc = bottomUpFilter :: Concr -> Concr -bottomUpFilter cnc = cnc{productions=filterProductions IntMap.empty (productions cnc)} +bottomUpFilter cnc = cnc{productions=filterProductions IntMap.empty IntSet.empty (productions cnc)} -filterProductions prods0 prods +filterProductions prods0 hoc0 prods | prods0 == prods1 = prods0 - | otherwise = filterProductions prods1 prods + | otherwise = filterProductions prods1 hoc1 prods where - prods1 = IntMap.foldWithKey foldProdSet IntMap.empty prods - hoc = IntMap.fold (\set !hoc -> Set.fold accumHOC hoc set) IntSet.empty prods + (prods1,hoc1) = IntMap.foldWithKey foldProdSet (IntMap.empty,IntSet.empty) prods - foldProdSet fid set !prods - | Set.null set1 = prods - | otherwise = IntMap.insert fid set1 prods + foldProdSet fid set (!prods,!hoc) + | Set.null set1 = (prods,hoc) + | otherwise = (IntMap.insert fid set1 prods,hoc1) where set1 = Set.filter filterRule set + hoc1 = Set.fold accumHOC hoc set1 filterRule (PApply funid args) = all (\(PArg _ fid) -> isLive fid) args filterRule (PCoerce fid) = isLive fid filterRule _ = True - isLive fid = isPredefFId fid || IntMap.member fid prods0 || IntSet.member fid hoc + isLive fid = isPredefFId fid || IntMap.member fid prods0 || IntSet.member fid hoc0 accumHOC (PApply funid args) hoc = List.foldl' (\hoc (PArg hypos _) -> List.foldl' (\hoc (_,fid) -> IntSet.insert fid hoc) hoc hypos) hoc args accumHOC _ hoc = hoc @@ -242,7 +241,7 @@ splitLexicalRules cnc p_prods = seq2prefix (SymALL_CAPIT :syms) = TrieMap.fromList [wf ["&|"]] updateConcrete abs cnc = - let p_prods0 = filterProductions IntMap.empty (productions cnc) + let p_prods0 = filterProductions IntMap.empty IntSet.empty (productions cnc) (lex,p_prods) = splitLexicalRules cnc p_prods0 l_prods = linIndex cnc p_prods0 in cnc{pproductions = p_prods, lproductions = l_prods, lexicon = lex} @@ -253,7 +252,7 @@ updateConcrete abs cnc = , prod <- Set.toList prods , fun <- getFunctions prod] where - getFunctions (PApply funid args) = let CncFun funs _ = cncfuns cnc ! funid in funs + getFunctions (PApply funid args) = let CncFun fun _ = cncfuns cnc ! funid in [fun] getFunctions (PCoerce fid) = case IntMap.lookup fid productions of Nothing -> [] Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod] diff --git a/src/runtime/haskell/PGF/Parse.hs b/src/runtime/haskell/PGF/Parse.hs index c27cefba8..51b1d3273 100644 --- a/src/runtime/haskell/PGF/Parse.hs +++ b/src/runtime/haskell/PGF/Parse.hs @@ -503,14 +503,14 @@ type Continuation = TrieMap.TrieMap Token ActiveSet -- | Return the Continuation of a Parsestate with exportable types -- Used by PGFService getContinuationInfo :: ParseState -> Map.Map [Token] [(FunId, CId, String)] -getContinuationInfo pstate = Map.map (concatMap f . Set.toList) contMap +getContinuationInfo pstate = Map.map (map f . Set.toList) contMap where PState _abstr concr _chart cont = pstate contMap = Map.fromList (TrieMap.toList cont) -- always get [([], _::ActiveSet)] - f :: Active -> [(FunId,CId,String)] - f (Active int dotpos funid seqid pargs ak) = [(funid, fn, seq) | fn <- fns] + f :: Active -> (FunId,CId,String) + f (Active int dotpos funid seqid pargs ak) = (funid, cid, seq) where - CncFun fns _ = cncfuns concr ! funid + CncFun cid _ = cncfuns concr ! funid seq = showSeq dotpos (sequences concr ! seqid) showSeq :: DotPos -> Sequence -> String diff --git a/src/runtime/haskell/PGF/Printer.hs b/src/runtime/haskell/PGF/Printer.hs index 3501f49b0..6e394c2ba 100644 --- a/src/runtime/haskell/PGF/Printer.hs +++ b/src/runtime/haskell/PGF/Printer.hs @@ -1,6 +1,5 @@ {-# LANGUAGE FlexibleContexts #-} module PGF.Printer (ppPGF,ppCat,ppFId,ppFunId,ppSeqId,ppSeq,ppFun) where -import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint import PGF.CId import PGF.Data @@ -73,8 +72,8 @@ ppProduction (fid,PCoerce arg) = ppProduction (fid,PConst _ _ ss) = ppFId fid <+> text "->" <+> ppStrs ss -ppCncFun (funid,CncFun funs arr) = - ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (hsep (map ppCId funs)) +ppCncFun (funid,CncFun fun arr) = + ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun) ppLinDefs (fid,funids) = [ppFId fid <+> text "->" <+> ppFunId funid <> brackets (ppFId fidVar) | funid <- funids] @@ -82,6 +81,7 @@ ppLinDefs (fid,funids) = ppLinRefs (fid,funids) = [ppFId fidVar <+> text "->" <+> ppFunId funid <> brackets (ppFId fid) | funid <- funids] +ppSeq :: (SeqId,Sequence) -> Doc ppSeq (seqid,seq) = ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq)) diff --git a/src/runtime/haskell/PGF/VisualizeTree.hs b/src/runtime/haskell/PGF/VisualizeTree.hs index 520eb59c3..5d884fafe 100644 --- a/src/runtime/haskell/PGF/VisualizeTree.hs +++ b/src/runtime/haskell/PGF/VisualizeTree.hs @@ -23,7 +23,6 @@ module PGF.VisualizeTree , gizaAlignment , conlls2latexDoc ) where -import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint import PGF.CId (wildCId,showCId,ppCId,mkCId) --CId,pCId, import PGF.Data diff --git a/src/runtime/haskell/pgf.cabal b/src/runtime/haskell/pgf.cabal index 8a84dc278..d3146e9d4 100644 --- a/src/runtime/haskell/pgf.cabal +++ b/src/runtime/haskell/pgf.cabal @@ -1,5 +1,5 @@ name: pgf -version: 3.9.1-git +version: 3.9-git cabal-version: >= 1.20 build-type: Simple @@ -8,7 +8,7 @@ category: Natural Language Processing synopsis: Grammatical Framework description: A library for interpreting the Portable Grammar Format (PGF) homepage: http://www.grammaticalframework.org/ -bug-reports: https://github.com/GrammaticalFramework/gf-core/issues +bug-reports: https://github.com/GrammaticalFramework/GF/issues maintainer: Thomas Hallgren tested-with: GHC==7.6.3, GHC==7.8.3, GHC==7.10.3, GHC==8.0.2 @@ -30,6 +30,7 @@ Library exceptions if flag(custom-binary) + hs-source-dirs: ., binary other-modules: -- not really part of GF but I have changed the original binary library -- and we have to keep the copy for now. @@ -45,9 +46,9 @@ Library --if impl(ghc>=7.8) -- ghc-options: +RTS -A20M -RTS ghc-prof-options: -fprof-auto - extensions: + extensions: - exposed-modules: + exposed-modules: PGF PGF.Internal PGF.Haskell diff --git a/src/runtime/python/pypgf.c b/src/runtime/python/pypgf.c index a2f77aa42..c3223a827 100644 --- a/src/runtime/python/pypgf.c +++ b/src/runtime/python/pypgf.c @@ -2619,6 +2619,21 @@ PGF_dealloc(PGFObject* self) Py_TYPE(self)->tp_free((PyObject*)self); } +typedef struct { + GuMapItor fn; + PGFObject* grammar; + void* collection; +} PyPGFClosure; + +static void +pgf_collect_langs_seq(GuMapItor* fn, const void* key, void* value, GuExn* err) +{ + PgfConcr* concr = *((PgfConcr**) value); + PyPGFClosure* clo = (PyPGFClosure*) fn; + + gu_buf_push((GuBuf*) clo->collection, PgfConcr*, concr); +} + static PyObject * PGF_repr(PGFObject *self) { @@ -2628,7 +2643,14 @@ PGF_repr(PGFObject *self) GuStringBuf* sbuf = gu_new_string_buf(tmp_pool); GuOut* out = gu_string_buf_out(sbuf); - pgf_print(self->pgf, out, err); + GuBuf* languages = gu_new_buf(PgfConcr*, tmp_pool); + + PyPGFClosure clo = { { pgf_collect_langs_seq }, self, languages }; + pgf_iter_languages(self->pgf, &clo.fn, err); + + pgf_print(self->pgf, gu_buf_length(languages), + gu_buf_data(languages), + out, err); PyObject* pystr = PyString_FromStringAndSize(gu_string_buf_data(sbuf), gu_string_buf_length(sbuf)); @@ -2643,14 +2665,8 @@ PGF_getAbstractName(PGFObject *self, void *closure) return PyString_FromString(pgf_abstract_name(self->pgf)); } -typedef struct { - GuMapItor fn; - PGFObject* grammar; - PyObject* object; -} PyPGFClosure; - static void -pgf_collect_langs(GuMapItor* fn, const void* key, void* value, GuExn* err) +pgf_collect_langs_dict(GuMapItor* fn, const void* key, void* value, GuExn* err) { PgfCId name = (PgfCId) key; PgfConcr* concr = *((PgfConcr**) value); @@ -2675,7 +2691,7 @@ pgf_collect_langs(GuMapItor* fn, const void* key, void* value, GuExn* err) ((ConcrObject *) py_lang)->grammar = clo->grammar; Py_INCREF(clo->grammar); - if (PyDict_SetItem(clo->object, py_name, py_lang) != 0) { + if (PyDict_SetItem((PyObject*) clo->collection, py_name, py_lang) != 0) { gu_raise(err, PgfExn); goto end; } @@ -2697,7 +2713,7 @@ PGF_getLanguages(PGFObject *self, void *closure) // Create an exception frame that catches all errors. GuExn* err = gu_new_exn(tmp_pool); - PyPGFClosure clo = { { pgf_collect_langs }, self, languages }; + PyPGFClosure clo = { { pgf_collect_langs_dict }, self, languages }; pgf_iter_languages(self->pgf, &clo.fn, err); if (!gu_ok(err)) { Py_DECREF(languages); @@ -2727,7 +2743,7 @@ pgf_collect_cats(GuMapItor* fn, const void* key, void* value, GuExn* err) goto end; } - if (PyList_Append(clo->object, py_name) != 0) { + if (PyList_Append((PyObject*) clo->collection, py_name) != 0) { gu_raise(err, PgfExn); goto end; } @@ -2794,7 +2810,7 @@ pgf_collect_funs(GuMapItor* fn, const void* key, void* value, GuExn* err) goto end; } - if (PyList_Append(clo->object, py_name) != 0) { + if (PyList_Append((PyObject*) clo->collection, py_name) != 0) { gu_raise(err, PgfExn); goto end; } @@ -3142,7 +3158,7 @@ pgf_embed_funs(GuMapItor* fn, const void* key, void* value, GuExn* err) Py_INCREF(pyexpr->master); - if (PyModule_AddObject(clo->object, name, (PyObject*) pyexpr) != 0) { + if (PyModule_AddObject((PyObject*) clo->collection, name, (PyObject*) pyexpr) != 0) { Py_DECREF(pyexpr); gu_raise(err, PgfExn); } diff --git a/src/server/PGFService.hs b/src/server/PGFService.hs index 020349fbb..1774a8cff 100644 --- a/src/server/PGFService.hs +++ b/src/server/PGFService.hs @@ -317,15 +317,6 @@ cpgfMain qsem command (t,(pgf,pc)) = -------------------------------------------------------------------------------- -{- -instance JSON C.CId where - readJSON x = readJSON x >>= maybe (fail "Bad language.") return . C.readCId - showJSON = showJSON . C.showCId --} -instance JSON C.Expr where - readJSON x = readJSON x >>= maybe (fail "Bad expression.") return . C.readExpr - showJSON = showJSON . C.showExpr [] - -- | Convert a 'Tree' to an 'ATree' cToATree :: C.Expr -> PGF.ATree C.Expr @@ -333,10 +324,6 @@ cToATree e = maybe (PGF.Other e) app (C.unApp e) where app (f,es) = PGF.App (read f) (map cToATree es) -instance ToATree C.Expr where - showTree = show - toATree = cToATree - #endif -------------------------------------------------------------------------------- @@ -974,7 +961,11 @@ instance JSON PGF.Expr where instance JSON PGF.BracketedString where readJSON x = return (PGF.Leaf "") +#ifdef C_RUNTIME + showJSON (PGF.Bracket cat fid index fun bs) = +#else showJSON (PGF.Bracket cat fid index fun _ bs) = +#endif makeObj ["cat".=cat, "fid".=fid, "index".=index, "fun".=fun, "children".=bs] showJSON (PGF.Leaf s) = makeObj ["token".=s]