forked from GitHub/gf-core
316 lines
11 KiB
Haskell
316 lines
11 KiB
Haskell
-------------------------------------------------
|
|
-- |
|
|
-- Module : PGF
|
|
-- Maintainer : Aarne Ranta
|
|
-- Stability : stable
|
|
-- Portability : portable
|
|
--
|
|
-- This module is an Application Programming Interface to
|
|
-- load and interpret grammars compiled in Portable Grammar Format (PGF).
|
|
-- The PGF format is produced as a final output from the GF compiler.
|
|
-- The API is meant to be used for embedding GF grammars in Haskell
|
|
-- programs
|
|
-------------------------------------------------
|
|
|
|
module PGF(
|
|
-- * PGF
|
|
PGF,
|
|
readPGF,
|
|
|
|
-- * Identifiers
|
|
CId, mkCId, wildCId,
|
|
showCId, readCId,
|
|
|
|
-- * Languages
|
|
Language,
|
|
showLanguage, readLanguage,
|
|
languages, abstractName, languageCode,
|
|
|
|
-- * Types
|
|
Type, Hypo,
|
|
showType, readType,
|
|
mkType, mkHypo, mkDepHypo, mkImplHypo,
|
|
categories, startCat,
|
|
|
|
-- * Functions
|
|
functions, functionType,
|
|
|
|
-- * Expressions & Trees
|
|
-- ** Tree
|
|
Tree,
|
|
|
|
-- ** Expr
|
|
Expr,
|
|
showExpr, readExpr,
|
|
mkApp, unApp,
|
|
mkStr, unStr,
|
|
mkInt, unInt,
|
|
mkDouble, unDouble,
|
|
|
|
|
|
-- * Operations
|
|
-- ** Linearization
|
|
linearize, linearizeAllLang, linearizeAll,
|
|
showPrintName,
|
|
|
|
-- ** Parsing
|
|
parse, parseWithRecovery, canParse, parseAllLang, parseAll,
|
|
|
|
-- ** Evaluation
|
|
PGF.compute, paraphrase,
|
|
|
|
-- ** Type Checking
|
|
-- | The type checker in PGF does both type checking and renaming
|
|
-- i.e. it verifies that all identifiers are declared and it
|
|
-- distinguishes between global function or type indentifiers and
|
|
-- variable names. The type checker should always be applied on
|
|
-- expressions entered by the user i.e. those produced via functions
|
|
-- like 'readType' and 'readExpr' because otherwise unexpected results
|
|
-- could appear. All typechecking functions returns updated versions
|
|
-- of the input types or expressions because the typechecking could
|
|
-- also lead to metavariables instantiations.
|
|
checkType, checkExpr, inferExpr,
|
|
TcError(..), ppTcError,
|
|
|
|
-- ** Word Completion (Incremental Parsing)
|
|
complete,
|
|
Incremental.ParseState,
|
|
Incremental.initState, Incremental.nextState, Incremental.getCompletions, Incremental.recoveryStates, Incremental.extractTrees,
|
|
|
|
-- ** Generation
|
|
generateRandom, generateAll, generateAllDepth,
|
|
|
|
-- ** Morphological Analysis
|
|
Lemma, Analysis, Morpho,
|
|
lookupMorpho, buildMorpho,
|
|
|
|
-- ** Visualizations
|
|
graphvizAbstractTree,
|
|
graphvizParseTree,
|
|
graphvizDependencyTree,
|
|
graphvizAlignment
|
|
) where
|
|
|
|
import PGF.CId
|
|
import PGF.Linearize
|
|
import PGF.Generate
|
|
import PGF.TypeCheck
|
|
import PGF.Paraphrase
|
|
import PGF.VisualizeTree
|
|
import PGF.Macros
|
|
import PGF.Expr (Tree)
|
|
import PGF.Morphology
|
|
import PGF.Data hiding (functions)
|
|
import PGF.Binary
|
|
import qualified PGF.Parsing.FCFG.Active as Active
|
|
import qualified PGF.Parsing.FCFG.Incremental as Incremental
|
|
import qualified GF.Compile.GeneratePMCFG as PMCFG
|
|
|
|
import GF.Infra.Option
|
|
import GF.Data.Utilities (replace)
|
|
|
|
import Data.Char
|
|
import qualified Data.Map as Map
|
|
import qualified Data.IntMap as IntMap
|
|
import Data.Maybe
|
|
import Data.Binary
|
|
import System.Random (newStdGen)
|
|
import Control.Monad
|
|
|
|
---------------------------------------------------
|
|
-- Interface
|
|
---------------------------------------------------
|
|
|
|
-- | Reads file in Portable Grammar Format and produces
|
|
-- 'PGF' structure. The file is usually produced with:
|
|
--
|
|
-- > $ gf -make <grammar file name>
|
|
readPGF :: FilePath -> IO PGF
|
|
|
|
-- | Linearizes given expression as string in the language
|
|
linearize :: PGF -> Language -> Tree -> String
|
|
|
|
-- | Tries to parse the given string in the specified language
|
|
-- and to produce abstract syntax expression. An empty
|
|
-- list is returned if the parsing is not successful. The list may also
|
|
-- contain more than one element if the grammar is ambiguous.
|
|
-- Throws an exception if the given language cannot be used
|
|
-- for parsing, see 'canParse'.
|
|
parse :: PGF -> Language -> Type -> String -> [Tree]
|
|
|
|
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree]
|
|
|
|
-- | Checks whether the given language can be used for parsing.
|
|
canParse :: PGF -> Language -> Bool
|
|
|
|
-- | The same as 'linearizeAllLang' but does not return
|
|
-- the language.
|
|
linearizeAll :: PGF -> Tree -> [String]
|
|
|
|
-- | Linearizes given expression as string in all languages
|
|
-- available in the grammar.
|
|
linearizeAllLang :: PGF -> Tree -> [(Language,String)]
|
|
|
|
-- | Show the printname of a type
|
|
showPrintName :: PGF -> Language -> Type -> String
|
|
|
|
-- | The same as 'parseAllLang' but does not return
|
|
-- the language.
|
|
parseAll :: PGF -> Type -> String -> [[Tree]]
|
|
|
|
-- | Tries to parse the given string with all available languages.
|
|
-- Languages which cannot be used for parsing (see 'canParse')
|
|
-- are ignored.
|
|
-- The returned list contains pairs of language
|
|
-- and list of abstract syntax expressions
|
|
-- (this is a list, since grammars can be ambiguous).
|
|
-- Only those languages
|
|
-- for which at least one parsing is possible are listed.
|
|
parseAllLang :: PGF -> Type -> String -> [(Language,[Tree])]
|
|
|
|
-- | The same as 'generateAllDepth' but does not limit
|
|
-- the depth in the generation.
|
|
generateAll :: PGF -> Type -> [Expr]
|
|
|
|
-- | Generates an infinite list of random abstract syntax expressions.
|
|
-- This is usefull for tree bank generation which after that can be used
|
|
-- for grammar testing.
|
|
generateRandom :: PGF -> Type -> IO [Expr]
|
|
|
|
-- | Generates an exhaustive possibly infinite list of
|
|
-- abstract syntax expressions. A depth can be specified
|
|
-- to limit the search space.
|
|
generateAllDepth :: PGF -> Type -> Maybe Int -> [Expr]
|
|
|
|
-- | List of all languages available in the given grammar.
|
|
languages :: PGF -> [Language]
|
|
|
|
-- | Gets the RFC 4646 language tag
|
|
-- of the language which the given concrete syntax implements,
|
|
-- if this is listed in the source grammar.
|
|
-- Example language tags include @\"en\"@ for English,
|
|
-- and @\"en-UK\"@ for British English.
|
|
languageCode :: PGF -> Language -> Maybe String
|
|
|
|
-- | The abstract language name is the name of the top-level
|
|
-- abstract module
|
|
abstractName :: PGF -> Language
|
|
|
|
-- | List of all categories defined in the given grammar.
|
|
-- The categories are defined in the abstract syntax
|
|
-- with the \'cat\' keyword.
|
|
categories :: PGF -> [CId]
|
|
|
|
-- | The start category is defined in the grammar with
|
|
-- the \'startcat\' flag. This is usually the sentence category
|
|
-- but it is not necessary. Despite that there is a start category
|
|
-- defined you can parse with any category. The start category
|
|
-- definition is just for convenience.
|
|
startCat :: PGF -> Type
|
|
|
|
-- | List of all functions defined in the abstract syntax
|
|
functions :: PGF -> [CId]
|
|
|
|
-- | The type of a given function
|
|
functionType :: PGF -> CId -> Maybe Type
|
|
|
|
-- | Complete the last word in the given string. If the input
|
|
-- is empty or ends in whitespace, the last word is considred
|
|
-- to be the empty string. This means that the completions
|
|
-- will be all possible next words.
|
|
complete :: PGF -> Language -> Type -> String
|
|
-> [String] -- ^ Possible completions,
|
|
-- including the given input.
|
|
|
|
|
|
---------------------------------------------------
|
|
-- Implementation
|
|
---------------------------------------------------
|
|
|
|
readPGF f = decodeFile f >>= addParsers
|
|
|
|
-- Adds parsers for all concretes that don't have a parser and that have parser=ondemand.
|
|
addParsers :: PGF -> IO PGF
|
|
addParsers pgf = do cncs <- sequence [if wantsParser cnc then addParser lang cnc else return (lang,cnc)
|
|
| (lang,cnc) <- Map.toList (concretes pgf)]
|
|
return pgf { concretes = Map.fromList cncs }
|
|
where
|
|
wantsParser cnc = isNothing (parser cnc) && Map.lookup (mkCId "parser") (cflags cnc) == Just "ondemand"
|
|
addParser lang cnc = do pinfo <- PMCFG.convertConcrete noOptions (abstract pgf) lang cnc
|
|
return (lang,cnc { parser = Just pinfo })
|
|
|
|
linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang
|
|
|
|
parse pgf lang typ s =
|
|
case Map.lookup lang (concretes pgf) of
|
|
Just cnc -> case parser cnc of
|
|
Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on"
|
|
then Incremental.parse pgf lang typ (words s)
|
|
else Active.parse "t" pinfo typ (words s)
|
|
Nothing -> error ("No parser built for language: " ++ showCId lang)
|
|
Nothing -> error ("Unknown language: " ++ showCId lang)
|
|
|
|
parseWithRecovery pgf lang typ open_typs s = Incremental.parseWithRecovery pgf lang typ open_typs (words s)
|
|
|
|
canParse pgf cnc = isJust (lookParser pgf cnc)
|
|
|
|
linearizeAll mgr = map snd . linearizeAllLang mgr
|
|
linearizeAllLang mgr t =
|
|
[(lang,PGF.linearize mgr lang t) | lang <- languages mgr]
|
|
|
|
showPrintName pgf lang (DTyp _ c _) = realize $ lookPrintName pgf lang c
|
|
|
|
parseAll mgr typ = map snd . parseAllLang mgr typ
|
|
|
|
parseAllLang mgr typ s =
|
|
[(lang,ts) | lang <- languages mgr, canParse mgr lang, let ts = parse mgr lang typ s, not (null ts)]
|
|
|
|
generateRandom pgf cat = do
|
|
gen <- newStdGen
|
|
return $ genRandom gen pgf cat
|
|
|
|
generateAll pgf cat = generate pgf cat Nothing
|
|
generateAllDepth pgf cat = generate pgf cat
|
|
|
|
abstractName pgf = absname pgf
|
|
|
|
languages pgf = cncnames pgf
|
|
|
|
languageCode pgf lang =
|
|
fmap (replace '_' '-') $ lookConcrFlag pgf lang (mkCId "language")
|
|
|
|
categories pgf = [c | (c,hs) <- Map.toList (cats (abstract pgf))]
|
|
|
|
startCat pgf = DTyp [] (lookStartCat pgf) []
|
|
|
|
functions pgf = Map.keys (funs (abstract pgf))
|
|
|
|
functionType pgf fun =
|
|
case Map.lookup fun (funs (abstract pgf)) of
|
|
Just (ty,_,_) -> Just ty
|
|
Nothing -> Nothing
|
|
|
|
complete pgf from typ input =
|
|
let (ws,prefix) = tokensAndPrefix input
|
|
state0 = Incremental.initState pgf from typ
|
|
in case loop state0 ws of
|
|
Nothing -> []
|
|
Just state ->
|
|
(if null prefix && not (null (Incremental.extractTrees state typ)) then [unwords ws ++ " "] else [])
|
|
++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Incremental.getCompletions state prefix)]
|
|
where
|
|
tokensAndPrefix :: String -> ([String],String)
|
|
tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "")
|
|
| null ws = ([],"")
|
|
| otherwise = (init ws, last ws)
|
|
where ws = words s
|
|
|
|
loop ps [] = Just ps
|
|
loop ps (t:ts) = case Incremental.nextState ps t of
|
|
Left es -> Nothing
|
|
Right ps -> loop ps ts
|
|
|
|
-- | Converts an expression to normal form
|
|
compute :: PGF -> Expr -> Expr
|
|
compute pgf = PGF.Data.normalForm (funs (abstract pgf)) 0 []
|