From 00f15826af22805827a87fc15691afb2c992d68a Mon Sep 17 00:00:00 2001 From: krasimir Date: Wed, 25 Jan 2017 20:30:54 +0000 Subject: [PATCH] improve the documentation for PGF2 --- src/compiler/GF/Command/Commands2.hs | 8 +- src/runtime/haskell-bind/PGF2.hsc | 102 ++++++++++++++++++------- src/runtime/haskell-bind/PGF2/Expr.hsc | 68 ++++++++++++----- src/runtime/haskell-bind/PGF2/FFI.hs | 2 + 4 files changed, 130 insertions(+), 50 deletions(-) diff --git a/src/compiler/GF/Command/Commands2.hs b/src/compiler/GF/Command/Commands2.hs index 505c22286..4e68c05ff 100644 --- a/src/compiler/GF/Command/Commands2.hs +++ b/src/compiler/GF/Command/Commands2.hs @@ -844,7 +844,7 @@ pgfCommands = Map.fromList [ ts = [hsExpr t|Right ts<-rs,(t,p)<-takeOptNum opts ts] msgs = concatMap (either err ok) rs err msg = ["Parse failed: "++msg] - ok = map (C.showExpr . fst).takeOptNum opts + ok = map (C.showExpr [] . fst).takeOptNum opts cLins env@(pgf,cncs) opts ts = [l|t<-ts,l<-[abs++": "++show t|treebank]++[l|cnc<-cncs,l<-lin cnc t]] @@ -975,7 +975,11 @@ pgfCommands = Map.fromList [ optFile opts = valStrOpts "file" "_gftmp" opts -} - optCat pgf opts = valStrOpts "cat" (C.startCat pgf) opts + optCat pgf opts = + case listFlags "cat" opts of + v:_ -> C.DTyp [] (valueString v) [] + _ -> C.startCat pgf + {- optType pgf opts = let str = valStrOpts "cat" (H.showCId $ H.lookStartCat pgf) opts diff --git a/src/runtime/haskell-bind/PGF2.hsc b/src/runtime/haskell-bind/PGF2.hsc index b9c5412e0..c5da14abf 100644 --- a/src/runtime/haskell-bind/PGF2.hsc +++ b/src/runtime/haskell-bind/PGF2.hsc @@ -1,36 +1,56 @@ {-# LANGUAGE ExistentialQuantification, DeriveDataTypeable, ScopedTypeVariables #-} ------------------------------------------------- -- | +-- Module : PGF2 -- Maintainer : Krasimir Angelov -- Stability : stable -- Portability : portable -- --- This is the Haskell binding to the C run-time system for --- loading and interpreting grammars compiled in Portable Grammar Format (PGF). +-- This module is an Application Programming Interface to +-- load and interpret grammars compiled in the Portable Grammar Format (PGF). +-- The PGF format is produced as the final output from the GF compiler. +-- The API is meant to be used for embedding GF grammars in Haskell +-- programs ------------------------------------------------- + #include #include #include -module PGF2 (-- * CId +module PGF2 (-- * PGF + PGF,readPGF, + + -- * Identifiers CId, - -- * PGF - PGF,readPGF,AbsName,abstractName,Cat,startCat,categories, + + -- * Abstract syntax + AbsName,abstractName, + -- ** Categories + Cat,startCat,categories, + -- ** Functions + Fun,functions, functionsByCat, functionType, hasLinearization, + -- ** Expressions + Expr,showExpr,readExpr,mkApp,unApp,mkStr,mkInt,mkFloat, + -- ** Types + Type(..), Hypo, BindType(..), showType, + -- * Concrete syntax - ConcName,Concr,languages,parse, - parseWithHeuristics, parseWithOracle, - hasLinearization,linearize,linearizeAll,alignWords, - -- * Types - Type(..), Hypo, BindType(..), showType, functionType, - -- * Trees - Expr,Fun,readExpr,showExpr,mkApp,unApp,mkStr,mkInt,mkFloat, - graphvizAbstractTree,graphvizParseTree, - -- * Morphology + ConcName,Concr,languages, + -- ** Linearization + linearize,linearizeAll, + alignWords, + -- ** Parsing + parse, parseWithHeuristics, + -- ** Generation + generateAll, + -- ** Morphological Analysis MorphoAnalysis, lookupMorpho, fullFormLexicon, - -- * Generation - functions, functionsByCat, generateAll, + -- ** Visualizations + graphvizAbstractTree,graphvizParseTree, + -- * Exceptions PGFError(..), + -- * Grammar specific callbacks LiteralCallback,literalCallbacks ) where @@ -61,9 +81,13 @@ import Data.Function(on) -- to Concr but has lost its reference to PGF. -type AbsName = String -- ^ Name of abstract syntax -type ConcName = String -- ^ Name of concrete syntax +type AbsName = CId -- ^ Name of abstract syntax +type ConcName = CId -- ^ Name of concrete syntax +-- | Reads file in Portable Grammar Format and produces +-- 'PGF' structure. The file is usually produced with: +-- +-- > $ gf -make readPGF :: FilePath -> IO PGF readPGF fpath = do pool <- gu_new_pool @@ -85,6 +109,7 @@ readPGF fpath = master <- newForeignPtr gu_pool_finalizer pool return PGF {pgf = pgf, pgfMaster = master} +-- | List of all languages available in the grammar. languages :: PGF -> Map.Map ConcName Concr languages p = unsafePerformIO $ @@ -103,8 +128,11 @@ languages p = concr <- fmap (\ptr -> Concr ptr p) $ peek (castPtr value) writeIORef ref $! Map.insert name concr langs -generateAll :: PGF -> Cat -> [(Expr,Float)] -generateAll p cat = +-- | Generates an exhaustive possibly infinite list of +-- all abstract syntax expressions of the given type. +-- The expressions are ordered by their probability. +generateAll :: PGF -> Type -> [(Expr,Float)] +generateAll p (DTyp _ cat _) = unsafePerformIO $ do genPl <- gu_new_pool exprPl <- gu_new_pool @@ -115,11 +143,21 @@ generateAll p cat = exprFPl <- newForeignPtr gu_pool_finalizer exprPl fromPgfExprEnum enum genFPl (p,exprFPl) +-- | The abstract language name is the name of the top-level +-- abstract module abstractName :: PGF -> AbsName abstractName p = unsafePerformIO (peekUtf8CString =<< pgf_abstract_name (pgf p)) -startCat :: PGF -> Cat -startCat p = unsafePerformIO (peekUtf8CString =<< pgf_start_cat (pgf p)) +-- | The start category is defined in the grammar with +-- the \'startcat\' flag. This is usually the sentence category +-- but it is not necessary. Despite that there is a start category +-- defined you can parse with any category. The start category +-- definition is just for convenience. +startCat :: PGF -> Type +startCat p = unsafePerformIO $ do + cat <- pgf_start_cat (pgf p) + cat <- peekUtf8CString cat + return (DTyp [] cat []) loadConcr :: Concr -> FilePath -> IO () loadConcr c fpath = @@ -143,7 +181,8 @@ loadConcr c fpath = unloadConcr :: Concr -> IO () unloadConcr c = pgf_concrete_unload (concr c) -functionType :: PGF -> CId -> Type +-- | The type of a function +functionType :: PGF -> Fun -> Type functionType p fn = unsafePerformIO $ withGuPool $ \tmpPl -> do @@ -185,6 +224,7 @@ functionType p fn = ----------------------------------------------------------------------------- -- Graphviz +-- | Renders an abstract syntax tree in a Graphviz format. graphvizAbstractTree :: PGF -> Expr -> String graphvizAbstractTree p e = unsafePerformIO $ @@ -259,11 +299,11 @@ getAnalysis ref self c_lemma c_anal prob exn = do anal <- peekUtf8CString c_anal writeIORef ref ((lemma, anal, prob):ans) -parse :: Concr -> Cat -> String -> Either String [(Expr,Float)] -parse lang cat sent = parseWithHeuristics lang cat sent (-1.0) [] +parse :: Concr -> Type -> String -> Either String [(Expr,Float)] +parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) [] parseWithHeuristics :: Concr -- ^ the language with which we parse - -> Cat -- ^ the start category + -> Type -- ^ the start category -> String -- ^ the input sentence -> Double -- ^ the heuristic factor. -- A negative value tells the parser @@ -277,7 +317,7 @@ parseWithHeuristics :: Concr -- ^ the language with which we parse -- If a literal has been recognized then the output should -- be Just (expr,probability,end_offset) -> Either String [(Expr,Float)] -parseWithHeuristics lang cat sent heuristic callbacks = +parseWithHeuristics lang (DTyp _ cat _) sent heuristic callbacks = unsafePerformIO $ do exprPl <- gu_new_pool parsePl <- gu_new_pool @@ -427,11 +467,13 @@ parseWithOracle lang cat sent (predict,complete,literal) = return ep Nothing -> do return nullPtr +-- | Returns True if there is a linearization defined for that function in that language hasLinearization :: Concr -> Fun -> Bool hasLinearization lang id = unsafePerformIO $ withGuPool $ \pl -> newUtf8CString id pl >>= pgf_has_linearization (concr lang) +-- | Linearizes an expression as a string in the language linearize :: Concr -> Expr -> String linearize lang e = unsafePerformIO $ withGuPool $ \pl -> @@ -452,6 +494,7 @@ linearize lang e = unsafePerformIO $ else do lin <- gu_string_buf_freeze sb pl peekUtf8CString lin +-- | Generates all possible linearizations of an expression linearizeAll :: Concr -> Expr -> [String] linearizeAll lang e = unsafePerformIO $ do pl <- gu_new_pool @@ -520,6 +563,7 @@ alignWords lang e = unsafePerformIO $ (fids :: [CInt]) <- peekArray (fromIntegral (n_fids :: CInt)) (ptr `plusPtr` (#offset PgfAlignmentPhrase, fids)) return (phrase, map fromIntegral fids) +-- | List of all functions defined in the abstract syntax functions :: PGF -> [Fun] functions p = unsafePerformIO $ @@ -540,6 +584,7 @@ functions p = name <- peekUtf8CString (castPtr key) writeIORef ref $! (name : names) +-- | List of all functions defined for a category functionsByCat :: PGF -> Cat -> [Fun] functionsByCat p cat = unsafePerformIO $ @@ -561,6 +606,9 @@ functionsByCat p cat = name <- peekUtf8CString (castPtr key) writeIORef ref $! (name : names) +-- | List of all categories defined in the grammar. +-- The categories are defined in the abstract syntax +-- with the \'cat\' keyword. categories :: PGF -> [Cat] categories pgf = -- !!! quick hack nub [cat | f<-functions pgf, let DTyp _ cat _=functionType pgf f] diff --git a/src/runtime/haskell-bind/PGF2/Expr.hsc b/src/runtime/haskell-bind/PGF2/Expr.hsc index 5914500de..338d4fa18 100644 --- a/src/runtime/haskell-bind/PGF2/Expr.hsc +++ b/src/runtime/haskell-bind/PGF2/Expr.hsc @@ -8,14 +8,17 @@ import Foreign hiding (unsafePerformIO) import Foreign.C import qualified Text.PrettyPrint as PP import PGF2.FFI +import Data.List(mapAccumL) +-- | An data type that represents +-- identifiers for functions and categories in PGF. type CId = String ppCId = PP.text wildCId = "_" :: CId -type Cat = String -- ^ Name of syntactic category -type Fun = String -- ^ Name of function +type Cat = CId -- ^ Name of syntactic category +type Fun = CId -- ^ Name of function ----------------------------------------------------------------------------- -- Expressions @@ -28,8 +31,9 @@ type Fun = String -- ^ Name of function data Expr = forall a . Expr {expr :: PgfExpr, exprMaster :: a} instance Show Expr where - show = showExpr + show = showExpr [] +-- | Constructs an expression by applying a function to a list of expressions mkApp :: Fun -> [Expr] -> Expr mkApp fun args = unsafePerformIO $ @@ -45,6 +49,7 @@ mkApp fun args = where len = length args +-- | Decomposes an expression into an application of a function unApp :: Expr -> Maybe (Fun,[Expr]) unApp (Expr expr master) = unsafePerformIO $ @@ -58,6 +63,7 @@ unApp (Expr expr master) = c_args <- peekArray (fromIntegral arity) (appl `plusPtr` (#offset PgfApplication, args)) return $ Just (fun, [Expr c_arg master | c_arg <- c_args]) +-- | Constructs an expression from a string literal mkStr :: String -> Expr mkStr str = unsafePerformIO $ @@ -67,6 +73,7 @@ mkStr str = exprFPl <- newForeignPtr gu_pool_finalizer exprPl return (Expr c_expr exprFPl) +-- | Constructs an expression from an integer literal mkInt :: Int -> Expr mkInt val = unsafePerformIO $ do @@ -75,6 +82,7 @@ mkInt val = exprFPl <- newForeignPtr gu_pool_finalizer exprPl return (Expr c_expr exprFPl) +-- | Constructs an expression from a real number mkFloat :: Double -> Expr mkFloat val = unsafePerformIO $ do @@ -83,6 +91,7 @@ mkFloat val = exprFPl <- newForeignPtr gu_pool_finalizer exprPl return (Expr c_expr exprFPl) +-- | parses a 'String' as an expression readExpr :: String -> Maybe Expr readExpr str = unsafePerformIO $ @@ -99,11 +108,15 @@ readExpr str = else do gu_pool_free exprPl return Nothing -ppExpr :: Int -> Expr -> PP.Doc -ppExpr d e = ppParens (d>0) (PP.text (showExpr e)) -- just a quick hack !!! +ppExpr :: Int -> [CId] -> Expr -> PP.Doc +ppExpr d xs e = ppParens (d>0) (PP.text (showExpr xs e)) -- just a quick hack !!! -showExpr :: Expr -> String -showExpr e = +-- | renders an expression as a 'String'. The list +-- of identifiers is the list of all free variables +-- in the expression in order reverse to the order +-- of binding. +showExpr :: [CId] -> Expr -> String +showExpr scope e = unsafePerformIO $ withGuPool $ \tmpPl -> do (sb,out) <- newOut tmpPl @@ -131,25 +144,38 @@ type Hypo = (BindType,CId,Type) -- | renders type as 'String'. showType :: Type -> String -showType = PP.render . ppType 0 +showType = PP.render . ppType 0 [] -ppType :: Int -> Type -> PP.Doc -ppType d (DTyp hyps cat args) - | null hyps = ppRes cat args - | otherwise = let hdocs = map (ppHypo 1) hyps - in ppParens (d > 0) (foldr (\hdoc doc -> hdoc PP.<+> PP.text "->" PP.<+> doc) (ppRes cat args) hdocs) +ppType :: Int -> [CId] -> Type -> PP.Doc +ppType d scope (DTyp hyps cat args) + | null hyps = ppRes scope cat args + | otherwise = let (scope',hdocs) = mapAccumL (ppHypo 1) scope hyps + in ppParens (d > 0) (foldr (\hdoc doc -> hdoc PP.<+> PP.text "->" PP.<+> doc) (ppRes scope cat args) hdocs) where - ppRes cat es + ppRes scope cat es | null es = ppCId cat - | otherwise = ppParens (d > 3) (ppCId cat PP.<+> PP.hsep (map (ppExpr 4) es)) + | otherwise = ppParens (d > 3) (ppCId cat PP.<+> PP.hsep (map (ppExpr 4 scope) es)) -ppHypo :: Int -> (BindType,CId,Type) -> PP.Doc -ppHypo d (Explicit,x,typ) = +ppHypo :: Int -> [CId]-> (BindType,CId,Type) -> ([CId],PP.Doc) +ppHypo d scope (Explicit,x,typ) = if x == wildCId - then ppType d typ - else PP.parens (ppCId x PP.<+> PP.char ':' PP.<+> ppType 0 typ) -ppHypo d (Implicit,x,typ) = - PP.parens (PP.braces (ppCId x) PP.<+> PP.char ':' PP.<+> ppType 0 typ) + then (scope, ppType d scope typ) + else let y = freshName x scope + in (y:scope, PP.parens (ppCId x PP.<+> PP.char ':' PP.<+> ppType 0 scope typ)) +ppHypo d scope (Implicit,x,typ) = + if x == wildCId + then (scope,PP.parens (PP.braces (ppCId x) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ)) + else let y = freshName x scope + in (y:scope,PP.parens (PP.braces (ppCId x) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ)) + +freshName :: CId -> [CId] -> CId +freshName x xs0 = loop 1 x + where + xs = wildCId : xs0 + + loop i y + | elem y xs = loop (i+1) (x++show i) + | otherwise = y ppParens True = PP.parens ppParens False = id diff --git a/src/runtime/haskell-bind/PGF2/FFI.hs b/src/runtime/haskell-bind/PGF2/FFI.hs index 8c4a1f5de..5e7dfe260 100644 --- a/src/runtime/haskell-bind/PGF2/FFI.hs +++ b/src/runtime/haskell-bind/PGF2/FFI.hs @@ -10,6 +10,8 @@ import Control.Exception import GHC.Ptr import Data.Int(Int32) +-- | An abstract data type representing multilingual grammar +-- in Portable Grammar Format. data PGF = PGF {pgf :: Ptr PgfPGF, pgfMaster :: ForeignPtr GuPool} data Concr = Concr {concr :: Ptr PgfConcr, concrMaster :: PGF}