mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-01 23:32:51 -06:00
reorganize the directories under src, and rescue the JavaScript interpreter from deprecated
This commit is contained in:
352
src/runtime/haskell/PGF.hs
Normal file
352
src/runtime/haskell/PGF.hs
Normal file
@@ -0,0 +1,352 @@
|
||||
-------------------------------------------------
|
||||
-- |
|
||||
-- Module : PGF
|
||||
-- Maintainer : Aarne Ranta
|
||||
-- Stability : stable
|
||||
-- Portability : portable
|
||||
--
|
||||
-- This module is an Application Programming Interface to
|
||||
-- load and interpret grammars compiled in Portable Grammar Format (PGF).
|
||||
-- The PGF format is produced as a final output from the GF compiler.
|
||||
-- The API is meant to be used for embedding GF grammars in Haskell
|
||||
-- programs
|
||||
-------------------------------------------------
|
||||
|
||||
module PGF(
|
||||
-- * PGF
|
||||
PGF,
|
||||
readPGF,
|
||||
|
||||
-- * Identifiers
|
||||
CId, mkCId, wildCId,
|
||||
showCId, readCId,
|
||||
|
||||
-- * Languages
|
||||
Language,
|
||||
showLanguage, readLanguage,
|
||||
languages, abstractName, languageCode,
|
||||
|
||||
-- * Types
|
||||
Type, Hypo,
|
||||
showType, readType,
|
||||
mkType, mkHypo, mkDepHypo, mkImplHypo,
|
||||
categories, startCat,
|
||||
|
||||
-- * Functions
|
||||
functions, functionType,
|
||||
|
||||
-- * Expressions & Trees
|
||||
-- ** Tree
|
||||
Tree,
|
||||
|
||||
-- ** Expr
|
||||
Expr,
|
||||
showExpr, readExpr,
|
||||
mkApp, unApp,
|
||||
mkStr, unStr,
|
||||
mkInt, unInt,
|
||||
mkDouble, unDouble,
|
||||
mkMeta, isMeta,
|
||||
|
||||
-- * Operations
|
||||
-- ** Linearization
|
||||
linearize, linearizeAllLang, linearizeAll,
|
||||
showPrintName,
|
||||
|
||||
-- ** Parsing
|
||||
parse, parseWithRecovery, canParse, parseAllLang, parseAll,
|
||||
|
||||
-- ** Evaluation
|
||||
PGF.compute, paraphrase,
|
||||
|
||||
-- ** Type Checking
|
||||
-- | The type checker in PGF does both type checking and renaming
|
||||
-- i.e. it verifies that all identifiers are declared and it
|
||||
-- distinguishes between global function or type indentifiers and
|
||||
-- variable names. The type checker should always be applied on
|
||||
-- expressions entered by the user i.e. those produced via functions
|
||||
-- like 'readType' and 'readExpr' because otherwise unexpected results
|
||||
-- could appear. All typechecking functions returns updated versions
|
||||
-- of the input types or expressions because the typechecking could
|
||||
-- also lead to metavariables instantiations.
|
||||
checkType, checkExpr, inferExpr,
|
||||
TcError(..), ppTcError,
|
||||
|
||||
-- ** Word Completion (Incremental Parsing)
|
||||
complete,
|
||||
Incremental.ParseState,
|
||||
Incremental.initState, Incremental.nextState, Incremental.getCompletions, Incremental.recoveryStates, Incremental.extractTrees,
|
||||
|
||||
-- ** Generation
|
||||
generateRandom, generateAll, generateAllDepth,
|
||||
|
||||
-- ** Morphological Analysis
|
||||
Lemma, Analysis, Morpho,
|
||||
lookupMorpho, buildMorpho,
|
||||
|
||||
-- ** Visualizations
|
||||
graphvizAbstractTree,
|
||||
graphvizParseTree,
|
||||
graphvizDependencyTree,
|
||||
graphvizAlignment,
|
||||
|
||||
-- * Browsing
|
||||
browse
|
||||
) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Linearize
|
||||
import PGF.Generate
|
||||
import PGF.TypeCheck
|
||||
import PGF.Paraphrase
|
||||
import PGF.VisualizeTree
|
||||
import PGF.Macros
|
||||
import PGF.Expr (Tree)
|
||||
import PGF.Morphology
|
||||
import PGF.Data hiding (functions)
|
||||
import PGF.Binary
|
||||
import qualified PGF.Parsing.FCFG.Active as Active
|
||||
import qualified PGF.Parsing.FCFG.Incremental as Incremental
|
||||
import qualified GF.Compile.GeneratePMCFG as PMCFG
|
||||
|
||||
import GF.Infra.Option
|
||||
import GF.Data.Utilities (replace)
|
||||
|
||||
import Data.Char
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.IntMap as IntMap
|
||||
import Data.Maybe
|
||||
import Data.Binary
|
||||
import Data.List(mapAccumL)
|
||||
import System.Random (newStdGen)
|
||||
import Control.Monad
|
||||
import Text.PrettyPrint
|
||||
|
||||
---------------------------------------------------
|
||||
-- Interface
|
||||
---------------------------------------------------
|
||||
|
||||
-- | Reads file in Portable Grammar Format and produces
|
||||
-- 'PGF' structure. The file is usually produced with:
|
||||
--
|
||||
-- > $ gf -make <grammar file name>
|
||||
readPGF :: FilePath -> IO PGF
|
||||
|
||||
-- | Linearizes given expression as string in the language
|
||||
linearize :: PGF -> Language -> Tree -> String
|
||||
|
||||
-- | Tries to parse the given string in the specified language
|
||||
-- and to produce abstract syntax expression. An empty
|
||||
-- list is returned if the parsing is not successful. The list may also
|
||||
-- contain more than one element if the grammar is ambiguous.
|
||||
-- Throws an exception if the given language cannot be used
|
||||
-- for parsing, see 'canParse'.
|
||||
parse :: PGF -> Language -> Type -> String -> [Tree]
|
||||
|
||||
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree]
|
||||
|
||||
-- | Checks whether the given language can be used for parsing.
|
||||
canParse :: PGF -> Language -> Bool
|
||||
|
||||
-- | The same as 'linearizeAllLang' but does not return
|
||||
-- the language.
|
||||
linearizeAll :: PGF -> Tree -> [String]
|
||||
|
||||
-- | Linearizes given expression as string in all languages
|
||||
-- available in the grammar.
|
||||
linearizeAllLang :: PGF -> Tree -> [(Language,String)]
|
||||
|
||||
-- | Show the printname of a type
|
||||
showPrintName :: PGF -> Language -> Type -> String
|
||||
|
||||
-- | The same as 'parseAllLang' but does not return
|
||||
-- the language.
|
||||
parseAll :: PGF -> Type -> String -> [[Tree]]
|
||||
|
||||
-- | Tries to parse the given string with all available languages.
|
||||
-- Languages which cannot be used for parsing (see 'canParse')
|
||||
-- are ignored.
|
||||
-- The returned list contains pairs of language
|
||||
-- and list of abstract syntax expressions
|
||||
-- (this is a list, since grammars can be ambiguous).
|
||||
-- Only those languages
|
||||
-- for which at least one parsing is possible are listed.
|
||||
parseAllLang :: PGF -> Type -> String -> [(Language,[Tree])]
|
||||
|
||||
-- | The same as 'generateAllDepth' but does not limit
|
||||
-- the depth in the generation.
|
||||
generateAll :: PGF -> Type -> [Expr]
|
||||
|
||||
-- | Generates an infinite list of random abstract syntax expressions.
|
||||
-- This is usefull for tree bank generation which after that can be used
|
||||
-- for grammar testing.
|
||||
generateRandom :: PGF -> Type -> IO [Expr]
|
||||
|
||||
-- | Generates an exhaustive possibly infinite list of
|
||||
-- abstract syntax expressions. A depth can be specified
|
||||
-- to limit the search space.
|
||||
generateAllDepth :: PGF -> Type -> Maybe Int -> [Expr]
|
||||
|
||||
-- | List of all languages available in the given grammar.
|
||||
languages :: PGF -> [Language]
|
||||
|
||||
-- | Gets the RFC 4646 language tag
|
||||
-- of the language which the given concrete syntax implements,
|
||||
-- if this is listed in the source grammar.
|
||||
-- Example language tags include @\"en\"@ for English,
|
||||
-- and @\"en-UK\"@ for British English.
|
||||
languageCode :: PGF -> Language -> Maybe String
|
||||
|
||||
-- | The abstract language name is the name of the top-level
|
||||
-- abstract module
|
||||
abstractName :: PGF -> Language
|
||||
|
||||
-- | List of all categories defined in the given grammar.
|
||||
-- The categories are defined in the abstract syntax
|
||||
-- with the \'cat\' keyword.
|
||||
categories :: PGF -> [CId]
|
||||
|
||||
-- | The start category is defined in the grammar with
|
||||
-- the \'startcat\' flag. This is usually the sentence category
|
||||
-- but it is not necessary. Despite that there is a start category
|
||||
-- defined you can parse with any category. The start category
|
||||
-- definition is just for convenience.
|
||||
startCat :: PGF -> Type
|
||||
|
||||
-- | List of all functions defined in the abstract syntax
|
||||
functions :: PGF -> [CId]
|
||||
|
||||
-- | The type of a given function
|
||||
functionType :: PGF -> CId -> Maybe Type
|
||||
|
||||
-- | Complete the last word in the given string. If the input
|
||||
-- is empty or ends in whitespace, the last word is considred
|
||||
-- to be the empty string. This means that the completions
|
||||
-- will be all possible next words.
|
||||
complete :: PGF -> Language -> Type -> String
|
||||
-> [String] -- ^ Possible completions,
|
||||
-- including the given input.
|
||||
|
||||
|
||||
---------------------------------------------------
|
||||
-- Implementation
|
||||
---------------------------------------------------
|
||||
|
||||
readPGF f = decodeFile f >>= addParsers
|
||||
|
||||
-- Adds parsers for all concretes that don't have a parser and that have parser=ondemand.
|
||||
addParsers :: PGF -> IO PGF
|
||||
addParsers pgf = do cncs <- sequence [if wantsParser cnc then addParser lang cnc else return (lang,cnc)
|
||||
| (lang,cnc) <- Map.toList (concretes pgf)]
|
||||
return pgf { concretes = Map.fromList cncs }
|
||||
where
|
||||
wantsParser cnc = isNothing (parser cnc) && Map.lookup (mkCId "parser") (cflags cnc) == Just "ondemand"
|
||||
addParser lang cnc = do pinfo <- PMCFG.convertConcrete noOptions (abstract pgf) lang cnc
|
||||
return (lang,cnc { parser = Just pinfo })
|
||||
|
||||
linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang
|
||||
|
||||
parse pgf lang typ s =
|
||||
case Map.lookup lang (concretes pgf) of
|
||||
Just cnc -> case parser cnc of
|
||||
Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on"
|
||||
then Incremental.parse pgf lang typ (words s)
|
||||
else Active.parse "t" pinfo typ (words s)
|
||||
Nothing -> error ("No parser built for language: " ++ showCId lang)
|
||||
Nothing -> error ("Unknown language: " ++ showCId lang)
|
||||
|
||||
parseWithRecovery pgf lang typ open_typs s = Incremental.parseWithRecovery pgf lang typ open_typs (words s)
|
||||
|
||||
canParse pgf cnc = isJust (lookParser pgf cnc)
|
||||
|
||||
linearizeAll mgr = map snd . linearizeAllLang mgr
|
||||
linearizeAllLang mgr t =
|
||||
[(lang,PGF.linearize mgr lang t) | lang <- languages mgr]
|
||||
|
||||
showPrintName pgf lang (DTyp _ c _) = realize $ lookPrintName pgf lang c
|
||||
|
||||
parseAll mgr typ = map snd . parseAllLang mgr typ
|
||||
|
||||
parseAllLang mgr typ s =
|
||||
[(lang,ts) | lang <- languages mgr, canParse mgr lang, let ts = parse mgr lang typ s, not (null ts)]
|
||||
|
||||
generateRandom pgf cat = do
|
||||
gen <- newStdGen
|
||||
return $ genRandom gen pgf cat
|
||||
|
||||
generateAll pgf cat = generate pgf cat Nothing
|
||||
generateAllDepth pgf cat = generate pgf cat
|
||||
|
||||
abstractName pgf = absname pgf
|
||||
|
||||
languages pgf = cncnames pgf
|
||||
|
||||
languageCode pgf lang =
|
||||
fmap (replace '_' '-') $ lookConcrFlag pgf lang (mkCId "language")
|
||||
|
||||
categories pgf = [c | (c,hs) <- Map.toList (cats (abstract pgf))]
|
||||
|
||||
startCat pgf = DTyp [] (lookStartCat pgf) []
|
||||
|
||||
functions pgf = Map.keys (funs (abstract pgf))
|
||||
|
||||
functionType pgf fun =
|
||||
case Map.lookup fun (funs (abstract pgf)) of
|
||||
Just (ty,_,_) -> Just ty
|
||||
Nothing -> Nothing
|
||||
|
||||
complete pgf from typ input =
|
||||
let (ws,prefix) = tokensAndPrefix input
|
||||
state0 = Incremental.initState pgf from typ
|
||||
in case loop state0 ws of
|
||||
Nothing -> []
|
||||
Just state ->
|
||||
(if null prefix && not (null (Incremental.extractTrees state typ)) then [unwords ws ++ " "] else [])
|
||||
++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Incremental.getCompletions state prefix)]
|
||||
where
|
||||
tokensAndPrefix :: String -> ([String],String)
|
||||
tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "")
|
||||
| null ws = ([],"")
|
||||
| otherwise = (init ws, last ws)
|
||||
where ws = words s
|
||||
|
||||
loop ps [] = Just ps
|
||||
loop ps (t:ts) = case Incremental.nextState ps t of
|
||||
Left es -> Nothing
|
||||
Right ps -> loop ps ts
|
||||
|
||||
-- | Converts an expression to normal form
|
||||
compute :: PGF -> Expr -> Expr
|
||||
compute pgf = PGF.Data.normalForm (funs (abstract pgf)) 0 []
|
||||
|
||||
browse :: PGF -> CId -> Maybe (String,[CId],[CId])
|
||||
browse pgf id = fmap (\def -> (def,producers,consumers)) definition
|
||||
where
|
||||
definition = case Map.lookup id (funs (abstract pgf)) of
|
||||
Just (ty,_,eqs) -> Just $ render (text "fun" <+> ppCId id <+> colon <+> ppType 0 [] ty $$
|
||||
if null eqs
|
||||
then empty
|
||||
else text "def" <+> vcat [let (scope,ds) = mapAccumL (ppPatt 9) [] patts
|
||||
in ppCId id <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs])
|
||||
Nothing -> case Map.lookup id (cats (abstract pgf)) of
|
||||
Just hyps -> Just $ render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL ppHypo [] hyps)))
|
||||
Nothing -> Nothing
|
||||
|
||||
(producers,consumers) = Map.foldWithKey accum ([],[]) (funs (abstract pgf))
|
||||
where
|
||||
accum f (ty,_,_) (plist,clist) =
|
||||
let !plist' = if id `elem` ps then f : plist else plist
|
||||
!clist' = if id `elem` cs then f : clist else clist
|
||||
in (plist',clist')
|
||||
where
|
||||
(ps,cs) = tyIds ty
|
||||
|
||||
tyIds (DTyp hyps cat es) = (foldr expIds (cat:concat css) es,concat pss)
|
||||
where
|
||||
(pss,css) = unzip [tyIds ty | (_,_,ty) <- hyps]
|
||||
|
||||
expIds (EAbs _ _ e) ids = expIds e ids
|
||||
expIds (EApp e1 e2) ids = expIds e1 (expIds e2 ids)
|
||||
expIds (EFun id) ids = id : ids
|
||||
expIds (ETyped e _) ids = expIds e ids
|
||||
expIds _ ids = ids
|
||||
Reference in New Issue
Block a user