remove the old Haskell runtime

This commit is contained in:
krangelov
2019-09-19 22:40:40 +02:00
parent acb70ccc1b
commit b3c07d45b9
41 changed files with 333 additions and 6353 deletions

View File

@@ -1,394 +1,219 @@
{-# LANGUAGE BangPatterns #-}
-------------------------------------------------
-- |
-- Module : PGF
-- Maintainer : Krasimir Angelov
-- Stability : stable
-- Portability : portable
--
-- This module is an Application Programming Interface to
-- load and interpret grammars compiled in Portable Grammar Format (PGF).
-- The PGF format is produced as a final output from the GF compiler.
-- The API is meant to be used for embedding GF grammars in Haskell
-- programs
-------------------------------------------------
module PGF (PGF, readPGF, showPGF,
abstractName,
module PGF(
-- * PGF
PGF,
readPGF, showPGF,
CId, mkCId, wildCId, showCId, readCId,
categories, categoryContext, categoryProbability,
functions, functionsByCat, functionType, functionIsDataCon, browse,
-- * Identifiers
CId, mkCId, wildCId,
showCId, readCId,
-- extra
ppCId, pIdent, utf8CId,
PGF2.Expr,Tree,showExpr,PGF2.readExpr,pExpr,pIdent,
mkAbs,unAbs,
mkApp,unApp,unapply,
PGF2.mkStr,PGF2.unStr,
PGF2.mkInt,PGF2.unInt,
PGF2.mkFloat,PGF2.unFloat,
PGF2.mkMeta,PGF2.unMeta,
PGF2.exprSize, exprFunctions,PGF2.exprSubstitute,
compute,
rankTreesByProbs,treeProbability,
-- * Languages
Language,
showLanguage, readLanguage,
languages, abstractName, languageCode,
TcError, ppTcError, inferExpr, checkType,
-- * Types
Type, Hypo,
showType, readType,
mkType, mkHypo, mkDepHypo, mkImplHypo,
unType,
categories, categoryContext, startCat,
PGF2.Type, PGF2.Hypo, showType, showContext, PGF2.readType,
mkType, unType,
-- * Functions
functions, functionsByCat, functionType, missingLins,
Token,
-- * Expressions & Trees
-- ** Tree
Tree,
Language, readLanguage, showLanguage,
languages, startCat, languageCode,
linearize, bracketedLinearize, tabularLinearizes, showBracketedString,
ParseOutput(..), parse, parse_, complete,
PGF2.BracketedString(..), PGF2.flattenBracketedString,
hasLinearization,
showPrintName,
Morpho, buildMorpho,
lookupMorpho, isInMorpho, morphoMissing, morphoKnown, fullFormLexicon,
-- ** Expr
Expr,
showExpr, readExpr,
mkAbs, unAbs,
mkApp, unApp, unapply,
mkStr, unStr,
mkInt, unInt,
mkDouble, unDouble,
mkFloat, unFloat,
mkMeta, unMeta,
exprSubstitute,
Labels, getDepLabels, CncLabels, getCncDepLabels,
-- extra
pExpr, exprSize, exprFunctions,
generateAllDepth, generateRandom, generateRandomFrom, generateRandomDepth, generateRandomFromDepth,
generateFromDepth,
-- * Operations
-- ** Linearization
linearize, linearizeAllLang, linearizeAll, bracketedLinearize, tabularLinearizes,
groupResults, -- lins of trees by language, removing duplicates
showPrintName,
BracketedString(..), FId, LIndex, Token,
Forest.showBracketedString,flattenBracketedString,
PGF2.GraphvizOptions(..),
graphvizAbstractTree, graphvizParseTree, graphvizAlignment, graphvizDependencyTree, graphvizParseTreeDep,
-- ** Parsing
parse, parseAllLang, parseAll, parse_, parseWithRecovery, complete,
-- * Tries
ATree(..),Trie(..),toATree,toTrie,
readProbabilitiesFromFile,
groupResults, conlls2latexDoc, gizaAlignment
) where
-- ** Evaluation
PGF.compute, paraphrase,
-- ** Type Checking
-- | The type checker in PGF does both type checking and renaming
-- i.e. it verifies that all identifiers are declared and it
-- distinguishes between global function or type indentifiers and
-- variable names. The type checker should always be applied on
-- expressions entered by the user i.e. those produced via functions
-- like 'readType' and 'readExpr' because otherwise unexpected results
-- could appear. All typechecking functions returns updated versions
-- of the input types or expressions because the typechecking could
-- also lead to metavariables instantiations.
checkType, checkExpr, inferExpr,
TcError(..), ppTcError,
-- ** Low level parsing API
Parse.ParseState,
Parse.initState, Parse.nextState, Parse.getCompletions, Parse.recoveryStates,
Parse.ParseInput(..), Parse.simpleParseInput, Parse.mkParseInput,
Parse.ParseOutput(..), Parse.getParseOutput,
Parse.getContinuationInfo,
-- ** Generation
-- | The PGF interpreter allows automatic generation of
-- abstract syntax expressions of a given type. Since the
-- type system of GF allows dependent types, the generation
-- is in general undecidable. In fact, the set of all type
-- signatures in the grammar is equivalent to a Turing-complete language (Prolog).
--
-- There are several generation methods which mainly differ in:
--
-- * whether the expressions are sequentially or randomly generated?
--
-- * are they generated from a template? The template is an expression
-- containing meta variables which the generator will fill in.
--
-- * is there a limit of the depth of the expression?
-- The depth can be used to limit the search space, which
-- in some cases is the only way to make the search decidable.
generateAll, generateAllDepth,
generateFrom, generateFromDepth,
generateRandom, generateRandomDepth,
generateRandomFrom, generateRandomFromDepth,
-- ** Morphological Analysis
Lemma, Analysis, Morpho,
lookupMorpho, buildMorpho, fullFormLexicon,
morphoMissing,
-- extra:
morphoKnown, isInMorpho,
-- ** Visualizations
graphvizAbstractTree,
graphvizParseTree,
graphvizParseTreeDep,
graphvizDependencyTree,
graphvizBracketedString,
graphvizAlignment,
gizaAlignment,
GraphvizOptions(..),
graphvizDefaults,
conlls2latexDoc,
-- extra:
Labels, getDepLabels,
CncLabels, getCncDepLabels,
-- * Probabilities
Probabilities,
mkProbabilities,
defaultProbabilities,
showProbabilities,
readProbabilitiesFromFile,
-- extra:
probTree, setProbabilities, rankTreesByProbs,
-- -- ** SortTop
-- forExample,
-- * Browsing
browse,
-- * Tries
ATree(..),Trie(..),toATree,toTrie
) where
import PGF.CId
import PGF.Linearize
--import PGF.SortTop
import PGF.Generate
import PGF.TypeCheck
import PGF.Paraphrase
import PGF.VisualizeTree
import PGF.Probabilistic
import PGF.Macros
import PGF.Expr (Tree)
import PGF.Morphology
import PGF.Data
import PGF.Binary()
import qualified PGF.Forest as Forest
import qualified PGF.Parse as Parse
import PGF.Utilities(replace)
import PGF.Printer
import Text.PrettyPrint
--import Data.Char
import PGF.Internal
import qualified PGF2
import qualified Data.Map as Map
--import qualified Data.IntMap as IntMap
--import Data.Maybe
import Data.Binary
import Data.List(mapAccumL)
--import System.Random (newStdGen)
--import Control.Monad
import Text.PrettyPrint
import qualified Text.ParserCombinators.ReadP as RP
import Data.List(sortBy)
import Text.PrettyPrint(text)
import Data.Char(isDigit)
---------------------------------------------------
-- Interface
---------------------------------------------------
readPGF = PGF2.readPGF
-- | Reads file in Portable Grammar Format and produces
-- 'PGF' structure. The file is usually produced with:
--
-- > $ gf -make <grammar file name>
readPGF :: FilePath -> IO PGF
showPGF gr = PGF2.showPGF gr
-- | Tries to parse the given string in the specified language
-- and to produce abstract syntax expression.
parse :: PGF -> Language -> Type -> String -> [Tree]
readLanguage = readCId
showLanguage (CId s) = s
-- | The same as 'parseAllLang' but does not return
-- the language.
parseAll :: PGF -> Type -> String -> [[Tree]]
startCat = PGF2.startCat
languageCode pgf lang = Just (PGF2.languageCode (lookConcr pgf lang))
-- | Tries to parse the given string with all available languages.
-- The returned list contains pairs of language
-- and list of abstract syntax expressions
-- (this is a list, since grammars can be ambiguous).
-- Only those languages
-- for which at least one parsing is possible are listed.
parseAllLang :: PGF -> Type -> String -> [(Language,[Tree])]
abstractName gr = CId (PGF2.abstractName gr)
-- | The same as 'parse' but returns more detailed information
parse_ :: PGF -> Language -> Type -> Maybe Int -> String -> (Parse.ParseOutput,BracketedString)
categories gr = map CId (PGF2.categories gr)
categoryContext gr (CId c) = PGF2.categoryContext gr c
categoryProbability gr (CId c) = PGF2.categoryProbability gr c
-- | This is an experimental function. Use it on your own risk
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> Maybe Int -> String -> (Parse.ParseOutput,BracketedString)
functions gr = map CId (PGF2.functions gr)
functionsByCat gr (CId c) = map CId (PGF2.functionsByCat gr c)
functionType gr (CId f) = PGF2.functionType gr f
functionIsDataCon gr (CId f) = PGF2.functionIsDataCon gr f
-- | List of all languages available in the given grammar.
languages :: PGF -> [Language]
-- | Gets the RFC 4646 language tag
-- of the language which the given concrete syntax implements,
-- if this is listed in the source grammar.
-- Example language tags include @\"en\"@ for English,
-- and @\"en-UK\"@ for British English.
languageCode :: PGF -> Language -> Maybe String
-- | The abstract language name is the name of the top-level
-- abstract module
abstractName :: PGF -> Language
-- | List of all categories defined in the given grammar.
-- The categories are defined in the abstract syntax
-- with the \'cat\' keyword.
categories :: PGF -> [CId]
categoryContext :: PGF -> CId -> Maybe [Hypo]
-- | The start category is defined in the grammar with
-- the \'startcat\' flag. This is usually the sentence category
-- but it is not necessary. Despite that there is a start category
-- defined you can parse with any category. The start category
-- definition is just for convenience.
startCat :: PGF -> Type
-- | List of all functions defined in the abstract syntax
functions :: PGF -> [CId]
-- | List of all functions defined for a given category
functionsByCat :: PGF -> CId -> [CId]
-- | The type of a given function
functionType :: PGF -> CId -> Maybe Type
type Tree = PGF2.Expr
type Labels = Map.Map CId [String]
type CncLabels = [(String, String -> Maybe (String -> String,String,String))]
---------------------------------------------------
-- Implementation
---------------------------------------------------
mkCId x = CId x
wildCId = CId "_"
showCId (CId x) = x
readCId s = Just (CId s)
readPGF f = decodeFile f
showExpr xs e = PGF2.showExpr [x | CId x <- xs] e
showPGF pgf = render (ppPGF pgf)
pExpr = RP.readS_to_P PGF2.pExpr
pIdent = RP.readS_to_P PGF2.pIdent
parse pgf lang typ s =
case parse_ pgf lang typ (Just 4) s of
(Parse.ParseOk ts,_) -> ts
_ -> []
mkAbs bind_type (CId var) e = PGF2.mkAbs bind_type var e
unAbs e = case PGF2.unAbs e of
Just (bind_type, var, e) -> Just (bind_type, CId var, e)
Nothing -> Nothing
parseAll mgr typ = map snd . parseAllLang mgr typ
mkApp (CId f) es = PGF2.mkApp f es
unApp e = case PGF2.unApp e of
Just (f,es) -> Just (CId f,es)
Nothing -> Nothing
parseAllLang mgr typ s =
[(lang,ts) | lang <- languages mgr, (Parse.ParseOk ts,_) <- [parse_ mgr lang typ (Just 4) s]]
unapply = PGF2.unapply
parse_ pgf lang typ dp s =
case Map.lookup lang (concretes pgf) of
Just cnc -> Parse.parse pgf lang typ dp (words s)
Nothing -> error ("Unknown language: " ++ showCId lang)
instance Read PGF2.Expr where
readsPrec _ s = case PGF2.readExpr s of
Just e -> [(e,"")]
Nothing -> []
parseWithRecovery pgf lang typ open_typs dp s = Parse.parseWithRecovery pgf lang typ open_typs dp (words s)
showType xs ty = PGF2.showType [x | CId x <- xs] ty
showContext xs hypos = PGF2.showContext [x | CId x <- xs] hypos
complete :: PGF -> Language -> Type -> String -> String -> (BracketedString,String,Map.Map Token [CId])
complete pgf from typ input prefix =
let ws = words input
ps0 = Parse.initState pgf from typ
(ps,ws') = loop ps0 ws
bs = snd (Parse.getParseOutput ps typ Nothing)
in if not (null ws')
then (bs, unwords (if null prefix then ws' else ws'++[prefix]), Map.empty)
else (bs, prefix, fmap getFuns (Parse.getCompletions ps prefix))
where
loop ps [] = (ps,[])
loop ps (w:ws) = case Parse.nextState ps (Parse.simpleParseInput w) of
Left es -> (ps,w:ws)
Right ps -> loop ps ws
mkType hypos (CId var) es = PGF2.mkType [(bt,var,ty) | (bt,CId var,ty) <- hypos] var es
unType ty = case PGF2.unType ty of
(hypos,var,es) -> ([(bt,CId var,ty) | (bt,var,ty) <- hypos],CId var,es)
getFuns ps = [cid | (funid,cid,seq) <- snd . head $ Map.toList contInfo]
where
contInfo = Parse.getContinuationInfo ps
linearize pgf lang e = PGF2.linearize (lookConcr pgf lang) e
bracketedLinearize pgf lang e = PGF2.bracketedLinearize (lookConcr pgf lang) e
tabularLinearizes pgf lang e = [PGF2.tabularLinearize (lookConcr pgf lang) e]
showBracketedString = PGF2.showBracketedString
groupResults :: [[(Language,String)]] -> [(Language,[String])]
groupResults = Map.toList . foldr more Map.empty . start . concat
where
start ls = [(l,[s]) | (l,s) <- ls]
more (l,s) =
Map.insertWith (\ [x] xs -> if elem x xs then xs else (x : xs)) l s
type TcError = String
abstractName pgf = absname pgf
-- | This data type encodes the different outcomes which you could get from the parser.
data ParseOutput
= ParseFailed Int -- ^ The integer is the position in number of tokens where the parser failed.
| TypeError [(FId,TcError)] -- ^ The parsing was successful but none of the trees is type correct.
-- The forest id ('FId') points to the bracketed string from the parser
-- where the type checking failed. More than one error is returned
-- if there are many analizes for some phrase but they all are not type correct.
| ParseOk [Tree] -- ^ If the parsing and the type checking are successful we get a list of abstract syntax trees.
-- The list should be non-empty.
| ParseIncomplete -- ^ The sentence is not complete. Only partial output is produced
languages pgf = Map.keys (concretes pgf)
parse pgf lang cat s =
case PGF2.parse (lookConcr pgf lang) cat s of
PGF2.ParseOk ts -> map fst ts
_ -> []
languageCode pgf lang =
case lookConcrFlag pgf lang (mkCId "language") of
Just (LStr s) -> Just (replace '_' '-' s)
_ -> Nothing
parse_ pgf lang cat dp s =
case PGF2.parse (lookConcr pgf lang) cat s of
PGF2.ParseFailed pos _ -> (ParseFailed pos, PGF2.Leaf s)
PGF2.ParseOk ts -> (ParseOk (map fst ts), PGF2.Leaf s)
PGF2.ParseIncomplete -> (ParseIncomplete, PGF2.Leaf s)
categories pgf = [c | (c,hs) <- Map.toList (cats (abstract pgf))]
complete pgf lang cat s prefix =
let compls = Map.fromListWith (++) [(tok,[CId fun]) | (tok,_,fun,_) <- PGF2.complete (lookConcr pgf lang) cat s prefix]
in (PGF2.Leaf [],s,compls)
categoryContext pgf cat =
case Map.lookup cat (cats (abstract pgf)) of
Just (hypos,_,_) -> Just hypos
Nothing -> Nothing
hasLinearization pgf lang (CId f) = PGF2.hasLinearization (lookConcr pgf lang) f
startCat pgf = DTyp [] (lookStartCat pgf) []
ppTcError s = s
functions pgf = Map.keys (funs (abstract pgf))
inferExpr gr e =
case PGF2.inferExpr gr e of
Right res -> Right res
Left msg -> Left (text msg)
functionsByCat pgf cat =
case Map.lookup cat (cats (abstract pgf)) of
Just (_,fns,_) -> map snd fns
Nothing -> []
checkType gr ty =
case PGF2.checkType gr ty of
Right res -> Right res
Left msg -> Left (text msg)
functionType pgf fun =
case Map.lookup fun (funs (abstract pgf)) of
Just (ty,_,_,_) -> Just ty
Nothing -> Nothing
showPrintName pgf lang (CId f) =
case PGF2.printName (lookConcr pgf lang) f of
Just n -> n
Nothing -> f
-- | Converts an expression to normal form
compute :: PGF -> Expr -> Expr
compute pgf = PGF.Data.normalForm (funs (abstract pgf),const Nothing) 0 []
getDepLabels :: String -> Labels
getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map words (lines s)]
exprSize :: Expr -> Int
exprSize (EAbs _ _ e) = exprSize e
exprSize (EApp e1 e2) = exprSize e1 + exprSize e2
exprSize (ETyped e ty)= exprSize e
exprSize (EImplArg e) = exprSize e
exprSize _ = 1
getCncDepLabels :: String -> CncLabels
getCncDepLabels = PGF2.getCncDepLabels
exprFunctions :: Expr -> [CId]
exprFunctions (EAbs _ _ e) = exprFunctions e
exprFunctions (EApp e1 e2) = exprFunctions e1 ++ exprFunctions e2
exprFunctions (ETyped e ty)= exprFunctions e
exprFunctions (EImplArg e) = exprFunctions e
exprFunctions (EFun f) = [f]
exprFunctions _ = []
generateAllDepth gr ty _ = map fst (PGF2.generateAll gr ty)
generateFromDepth = error "generateFromDepth is not implemented"
generateRandom = error "generateRandom is not implemented"
generateRandomFrom = error "generateRandomFrom is not implemented"
generateRandomDepth = error "generateRandomDepth is not implemented"
generateRandomFromDepth = error "generateRandomFromDepth is not implemented"
--exprFunctions :: Expr -> [Fun]
exprFunctions e = [CId f | f <- PGF2.exprFunctions e]
compute = error "compute is not implemented"
-- | rank from highest to lowest probability
rankTreesByProbs :: PGF -> [PGF2.Expr] -> [(PGF2.Expr,Double)]
rankTreesByProbs pgf ts = sortBy (\ (_,p) (_,q) -> compare q p)
[(t, realToFrac (PGF2.treeProbability pgf t)) | t <- ts]
treeProbability = PGF2.treeProbability
languages pgf = fmap CId (Map.keys (PGF2.languages pgf))
type Morpho = PGF2.Concr
buildMorpho pgf lang = lookConcr pgf lang
lookupMorpho cnc w = [(CId lemma,an) | (lemma,an,_) <- PGF2.lookupMorpho cnc w]
isInMorpho cnc w = not (null (PGF2.lookupMorpho cnc w))
fullFormLexicon cnc = [(w, [(CId fun,an) | (fun,an,_) <- analyses]) | (w, analyses) <- PGF2.fullFormLexicon cnc]
graphvizAbstractTree pgf (funs,cats) = PGF2.graphvizAbstractTree pgf PGF2.graphvizDefaults{PGF2.noFun=not funs,PGF2.noCat=not cats}
graphvizParseTree pgf lang = PGF2.graphvizParseTree (lookConcr pgf lang)
graphvizAlignment pgf langs = PGF2.graphvizWordAlignment (map (lookConcr pgf) langs) PGF2.graphvizDefaults
graphvizDependencyTree format debug lbls cnclbls pgf lang e =
let to_lbls' lbls = Map.fromList [(id,xs) | (CId id,xs) <- Map.toList lbls]
in PGF2.graphvizDependencyTree format debug (fmap to_lbls' lbls) cnclbls (lookConcr pgf lang) e
graphvizParseTreeDep = error "graphvizParseTreeDep is not implemented"
browse :: PGF -> CId -> Maybe (String,[CId],[CId])
browse pgf id = fmap (\def -> (def,producers,consumers)) definition
where
definition = case Map.lookup id (funs (abstract pgf)) of
Just (ty,_,Just (eqs,_),_) -> Just $ render (text "fun" <+> ppCId id <+> colon <+> ppType 0 [] ty $$
if null eqs
then empty
else text "def" <+> vcat [let scope = foldl pattScope [] patts
ds = map (ppPatt 9 scope) patts
in ppCId id <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs])
Just (ty,_,Nothing,_) -> Just $ render (text "data" <+> ppCId id <+> colon <+> ppType 0 [] ty)
Nothing -> case Map.lookup id (cats (abstract pgf)) of
Just (hyps,_,_) -> Just $ render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps)))
Nothing -> Nothing
(producers,consumers) = Map.foldrWithKey accum ([],[]) (funs (abstract pgf))
where
accum f (ty,_,_,_) (plist,clist) =
let !plist' = if id `elem` ps then f : plist else plist
!clist' = if id `elem` cs then f : clist else clist
in (plist',clist')
where
(ps,cs) = tyIds ty
tyIds (DTyp hyps cat es) = (foldr expIds (cat:concat css) es,concat pss)
where
(pss,css) = unzip [tyIds ty | (_,_,ty) <- hyps]
expIds (EAbs _ _ e) ids = expIds e ids
expIds (EApp e1 e2) ids = expIds e1 (expIds e2 ids)
expIds (EFun id) ids = id : ids
expIds (ETyped e _) ids = expIds e ids
expIds _ ids = ids
browse = error "browse is not implemented"
-- | A type for plain applicative trees
data ATree t = Other t | App CId [ATree t] deriving Show
@@ -397,9 +222,9 @@ data Trie = Oth Tree | Ap CId [[Trie ]] deriving Show
-- | Convert a 'Tree' to an 'ATree'
toATree :: Tree -> ATree Tree
toATree e = maybe (Other e) app (unApp e)
toATree e = maybe (Other e) app (PGF2.unApp e)
where
app (f,es) = App f (map toATree es)
app (f,es) = App (mkCId f) (map toATree es)
-- | Combine a list of trees into a trie
toTrie = combines . map ((:[]) . singleton)
@@ -420,3 +245,30 @@ toTrie = combines . map ((:[]) . singleton)
where
combine2 (Ap f ts,Ap g us) | f==g = Just (Ap f (combines (ts++us)))
combine2 _ = Nothing
readProbabilitiesFromFile :: FilePath -> IO (Map.Map CId Double)
readProbabilitiesFromFile fpath = do
s <- readFile fpath
return $ Map.fromList [(mkCId f,read p) | f:p:_ <- map words (lines s)]
groupResults :: [[(Language,String)]] -> [(Language,[String])]
groupResults = Map.toList . foldr more Map.empty . start . concat
where
start ls = [(l,[s]) | (l,s) <- ls]
more (l,s) =
Map.insertWith (\ [x] xs -> if elem x xs then xs else (x : xs)) l s
conlls2latexDoc = error "conlls2latexDoc is not implemented"
morphoMissing :: Morpho -> [String] -> [String]
morphoMissing = morphoClassify False
morphoKnown :: Morpho -> [String] -> [String]
morphoKnown = morphoClassify True
morphoClassify :: Bool -> Morpho -> [String] -> [String]
morphoClassify k mo ws = [w | w <- ws, k /= null (lookupMorpho mo w), notLiteral w] where
notLiteral w = not (all isDigit w) ---- should be defined somewhere
gizaAlignment = error "gizaAlignment is not implemented"